aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/Smack.txt20
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--Documentation/sysctl/kernel.txt11
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/arm/plat-mxc/include/mach/imx-uart.h5
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/frv/include/asm/bitops.h29
-rw-r--r--arch/frv/include/asm/elf.h1
-rw-r--r--arch/frv/include/asm/pci.h7
-rw-r--r--arch/frv/include/asm/ptrace.h11
-rw-r--r--arch/frv/include/asm/syscall.h123
-rw-r--r--arch/frv/include/asm/thread_info.h10
-rw-r--r--arch/frv/kernel/entry.S13
-rw-r--r--arch/frv/kernel/ptrace.c755
-rw-r--r--arch/frv/kernel/signal.c10
-rw-r--r--arch/frv/kernel/uaccess.c6
-rw-r--r--arch/frv/mb93090-mb00/pci-dma-nommu.c6
-rw-r--r--arch/frv/mb93090-mb00/pci-dma.c6
-rw-r--r--arch/ia64/include/asm/kvm_host.h6
-rw-r--r--arch/ia64/include/asm/pgtable.h2
-rw-r--r--arch/ia64/kernel/irq_ia64.c3
-rw-r--r--arch/ia64/kvm/Kconfig2
-rw-r--r--arch/ia64/kvm/kvm-ia64.c263
-rw-r--r--arch/ia64/kvm/kvm_fw.c28
-rw-r--r--arch/ia64/kvm/lapic.h6
-rw-r--r--arch/ia64/kvm/optvfault.S30
-rw-r--r--arch/ia64/kvm/process.c5
-rw-r--r--arch/ia64/kvm/vcpu.c20
-rw-r--r--arch/ia64/kvm/vmm.c12
-rw-r--r--arch/ia64/kvm/vmm_ivt.S18
-rw-r--r--arch/ia64/kvm/vtlb.c3
-rw-r--r--arch/mn10300/Kconfig1
-rw-r--r--arch/mn10300/include/asm/elf.h3
-rw-r--r--arch/mn10300/include/asm/processor.h8
-rw-r--r--arch/mn10300/include/asm/ptrace.h8
-rw-r--r--arch/mn10300/kernel/entry.S13
-rw-r--r--arch/mn10300/kernel/ptrace.c454
-rw-r--r--arch/mn10300/kernel/signal.c9
-rw-r--r--arch/mn10300/mm/tlb-mn10300.S18
-rw-r--r--arch/powerpc/kvm/powerpc.c6
-rw-r--r--arch/s390/include/asm/kvm_host.h5
-rw-r--r--arch/s390/kvm/intercept.c28
-rw-r--r--arch/s390/kvm/interrupt.c59
-rw-r--r--arch/s390/kvm/kvm-s390.c63
-rw-r--r--arch/s390/kvm/kvm-s390.h4
-rw-r--r--arch/s390/kvm/priv.c4
-rw-r--r--arch/s390/kvm/sigp.c16
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/kvm.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h45
-rw-r--r--arch/x86/include/asm/kvm_x86_emulate.h6
-rw-r--r--arch/x86/include/asm/svm.h1
-rw-r--r--arch/x86/include/asm/termios.h1
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c1
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/smp.c3
-rw-r--r--arch/x86/kvm/Kconfig6
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/i8254.c109
-rw-r--r--arch/x86/kvm/i8254.h12
-rw-r--r--arch/x86/kvm/irq.c7
-rw-r--r--arch/x86/kvm/kvm_timer.h18
-rw-r--r--arch/x86/kvm/lapic.c251
-rw-r--r--arch/x86/kvm/lapic.h12
-rw-r--r--arch/x86/kvm/mmu.c194
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/paging_tmpl.h16
-rw-r--r--arch/x86/kvm/svm.c415
-rw-r--r--arch/x86/kvm/timer.c46
-rw-r--r--arch/x86/kvm/vmx.c721
-rw-r--r--arch/x86/kvm/x86.c409
-rw-r--r--arch/x86/kvm/x86.h14
-rw-r--r--arch/x86/kvm/x86_emulate.c141
-rw-r--r--block/blk-core.c10
-rw-r--r--drivers/bluetooth/hci_ldisc.c5
-rw-r--r--drivers/char/Kconfig13
-rw-r--r--drivers/char/Makefile1
-rw-r--r--drivers/char/bfin_jtag_comm.c365
-rw-r--r--drivers/char/cyclades.c290
-rw-r--r--drivers/char/epca.c17
-rw-r--r--drivers/char/ip2/i2lib.c4
-rw-r--r--drivers/char/ip2/ip2main.c4
-rw-r--r--drivers/char/isicom.c19
-rw-r--r--drivers/char/istallion.c8
-rw-r--r--drivers/char/moxa.c5
-rw-r--r--drivers/char/mxser.c12
-rw-r--r--drivers/char/n_hdlc.c4
-rw-r--r--drivers/char/n_tty.c29
-rw-r--r--drivers/char/pcmcia/synclink_cs.c11
-rw-r--r--drivers/char/pty.c11
-rw-r--r--drivers/char/rocket.c19
-rw-r--r--drivers/char/selection.c2
-rw-r--r--drivers/char/stallion.c6
-rw-r--r--drivers/char/synclink.c9
-rw-r--r--drivers/char/synclink_gt.c86
-rw-r--r--drivers/char/synclinkmp.c9
-rw-r--r--drivers/char/tty_audit.c10
-rw-r--r--drivers/char/tty_io.c122
-rw-r--r--drivers/char/tty_ioctl.c88
-rw-r--r--drivers/char/tty_ldisc.c549
-rw-r--r--drivers/char/tty_port.c47
-rw-r--r--drivers/ide/alim15x3.c10
-rw-r--r--drivers/ide/ide-atapi.c168
-rw-r--r--drivers/ide/ide-cd.c88
-rw-r--r--drivers/ide/ide-cd.h4
-rw-r--r--drivers/ide/ide-disk.c1
-rw-r--r--drivers/ide/ide-dma.c20
-rw-r--r--drivers/ide/ide-floppy.c24
-rw-r--r--drivers/ide/ide-io.c14
-rw-r--r--drivers/ide/ide-ioctls.c1
-rw-r--r--drivers/ide/ide-park.c7
-rw-r--r--drivers/ide/ide-pm.c38
-rw-r--r--drivers/ide/ide-tape.c734
-rw-r--r--drivers/ide/ide-taskfile.c18
-rw-r--r--drivers/parport/parport_pc.c1802
-rw-r--r--drivers/serial/8250.c7
-rw-r--r--drivers/serial/8250_pci.c3
-rw-r--r--drivers/serial/Kconfig8
-rw-r--r--drivers/serial/Makefile1
-rw-r--r--drivers/serial/bfin_5xx.c77
-rw-r--r--drivers/serial/bfin_sport_uart.c58
-rw-r--r--drivers/serial/icom.c20
-rw-r--r--drivers/serial/imx.c294
-rw-r--r--drivers/serial/jsm/jsm.h1
-rw-r--r--drivers/serial/jsm/jsm_tty.c14
-rw-r--r--drivers/serial/timbuart.c526
-rw-r--r--drivers/serial/timbuart.h58
-rw-r--r--drivers/usb/class/cdc-acm.c442
-rw-r--r--drivers/usb/class/cdc-acm.h5
-rw-r--r--drivers/usb/serial/belkin_sa.c6
-rw-r--r--drivers/usb/serial/ch341.c46
-rw-r--r--drivers/usb/serial/console.c6
-rw-r--r--drivers/usb/serial/cp210x.c253
-rw-r--r--drivers/usb/serial/cyberjack.c6
-rw-r--r--drivers/usb/serial/cypress_m8.c81
-rw-r--r--drivers/usb/serial/digi_acceleport.c75
-rw-r--r--drivers/usb/serial/empeg.c6
-rw-r--r--drivers/usb/serial/ftdi_sio.c149
-rw-r--r--drivers/usb/serial/garmin_gps.c3
-rw-r--r--drivers/usb/serial/generic.c3
-rw-r--r--drivers/usb/serial/io_edgeport.c10
-rw-r--r--drivers/usb/serial/io_ti.c3
-rw-r--r--drivers/usb/serial/ipaq.c6
-rw-r--r--drivers/usb/serial/ipw.c18
-rw-r--r--drivers/usb/serial/ir-usb.c6
-rw-r--r--drivers/usb/serial/iuu_phoenix.c102
-rw-r--r--drivers/usb/serial/keyspan.c13
-rw-r--r--drivers/usb/serial/keyspan.h8
-rw-r--r--drivers/usb/serial/keyspan_pda.c48
-rw-r--r--drivers/usb/serial/kl5kusb105.c6
-rw-r--r--drivers/usb/serial/kobil_sct.c9
-rw-r--r--drivers/usb/serial/mct_u232.c37
-rw-r--r--drivers/usb/serial/mos7720.c3
-rw-r--r--drivers/usb/serial/mos7840.c48
-rw-r--r--drivers/usb/serial/navman.c3
-rw-r--r--drivers/usb/serial/omninet.c6
-rw-r--r--drivers/usb/serial/opticon.c3
-rw-r--r--drivers/usb/serial/option.c68
-rw-r--r--drivers/usb/serial/oti6858.c57
-rw-r--r--drivers/usb/serial/pl2303.c79
-rw-r--r--drivers/usb/serial/sierra.c351
-rw-r--r--drivers/usb/serial/spcp8x5.c85
-rw-r--r--drivers/usb/serial/symbolserial.c3
-rw-r--r--drivers/usb/serial/ti_usb_3410_5052.c6
-rw-r--r--drivers/usb/serial/usb-serial.c144
-rw-r--r--drivers/usb/serial/visor.c6
-rw-r--r--drivers/usb/serial/whiteheat.c33
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/cifs/CHANGES11
-rw-r--r--fs/cifs/README16
-rw-r--r--fs/cifs/cifs_spnego.c6
-rw-r--r--fs/cifs/cifsacl.c178
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/cifsproto.h10
-rw-r--r--fs/cifs/cifssmb.c7
-rw-r--r--fs/cifs/connect.c34
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/inode.c19
-rw-r--r--fs/cifs/netmisc.c24
-rw-r--r--fs/cifs/readdir.c44
-rw-r--r--fs/compat.c6
-rw-r--r--fs/devpts/inode.c4
-rw-r--r--fs/exec.c10
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/Makefile4
-rw-r--r--fs/ext4/balloc.c28
-rw-r--r--fs/ext4/block_validity.c244
-rw-r--r--fs/ext4/dir.c3
-rw-r--r--fs/ext4/ext4.h354
-rw-r--r--fs/ext4/ext4_i.h140
-rw-r--r--fs/ext4/ext4_sb.h161
-rw-r--r--fs/ext4/extents.c85
-rw-r--r--fs/ext4/group.h29
-rw-r--r--fs/ext4/ialloc.c73
-rw-r--r--fs/ext4/inode.c593
-rw-r--r--fs/ext4/mballoc.c166
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/namei.c27
-rw-r--r--fs/ext4/namei.h8
-rw-r--r--fs/ext4/resize.c36
-rw-r--r--fs/ext4/super.c831
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/ioctl.c14
-rw-r--r--fs/jbd2/journal.c8
-rw-r--r--fs/mpage.c6
-rw-r--r--fs/namei.c6
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/proc/base.c6
-rw-r--r--include/linux/cyclades.h37
-rw-r--r--include/linux/ide.h27
-rw-r--r--include/linux/ima.h11
-rw-r--r--include/linux/init_task.h4
-rw-r--r--include/linux/kvm.h46
-rw-r--r--include/linux/kvm_host.h21
-rw-r--r--include/linux/kvm_types.h27
-rw-r--r--include/linux/lsm_audit.h111
-rw-r--r--include/linux/magic.h1
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/rational.h19
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/security.h2
-rw-r--r--include/linux/serial.h116
-rw-r--r--include/linux/serial_core.h6
-rw-r--r--include/linux/tty.h18
-rw-r--r--include/linux/tty_driver.h6
-rw-r--r--include/linux/usb/serial.h10
-rw-r--r--ipc/shm.c3
-rw-r--r--kernel/cred.c4
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/module.c7
-rw-r--r--kernel/ptrace.c9
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/sysctl.c14
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Makefile1
-rw-r--r--lib/rational.c62
-rw-r--r--mm/Kconfig19
-rw-r--r--mm/mmap.c3
-rw-r--r--mm/nommu.c3
-rw-r--r--mm/shmem.c2
-rw-r--r--security/Kconfig22
-rw-r--r--security/Makefile3
-rw-r--r--security/commoncap.c32
-rw-r--r--security/inode.c2
-rw-r--r--security/integrity/ima/ima_audit.c32
-rw-r--r--security/integrity/ima/ima_crypto.c4
-rw-r--r--security/integrity/ima/ima_fs.c8
-rw-r--r--security/integrity/ima/ima_iint.c2
-rw-r--r--security/integrity/ima/ima_init.c4
-rw-r--r--security/integrity/ima/ima_main.c86
-rw-r--r--security/integrity/ima/ima_policy.c48
-rw-r--r--security/lsm_audit.c386
-rw-r--r--security/root_plug.c12
-rw-r--r--security/security.c3
-rw-r--r--security/selinux/avc.c2
-rw-r--r--security/selinux/hooks.c24
-rw-r--r--security/selinux/include/security.h7
-rw-r--r--security/selinux/nlmsgtab.c2
-rw-r--r--security/selinux/selinuxfs.c8
-rw-r--r--security/selinux/ss/services.c30
-rw-r--r--security/smack/smack.h108
-rw-r--r--security/smack/smack_access.c143
-rw-r--r--security/smack/smack_lsm.c405
-rw-r--r--security/smack/smackfs.c68
-rw-r--r--security/tomoyo/common.c119
-rw-r--r--security/tomoyo/common.h134
-rw-r--r--security/tomoyo/domain.c240
-rw-r--r--security/tomoyo/file.c156
-rw-r--r--security/tomoyo/realpath.c23
-rw-r--r--security/tomoyo/tomoyo.c4
-rw-r--r--security/tomoyo/tomoyo.h13
-rw-r--r--virt/kvm/ioapic.c153
-rw-r--r--virt/kvm/ioapic.h27
-rw-r--r--virt/kvm/iommu.c27
-rw-r--r--virt/kvm/irq_comm.c111
-rw-r--r--virt/kvm/kvm_main.c678
280 files changed, 11642 insertions, 7876 deletions
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt
index 629c92e99783..34614b4c708e 100644
--- a/Documentation/Smack.txt
+++ b/Documentation/Smack.txt
@@ -184,8 +184,9 @@ length. Single character labels using special characters, that being anything
184other than a letter or digit, are reserved for use by the Smack development 184other than a letter or digit, are reserved for use by the Smack development
185team. Smack labels are unstructured, case sensitive, and the only operation 185team. Smack labels are unstructured, case sensitive, and the only operation
186ever performed on them is comparison for equality. Smack labels cannot 186ever performed on them is comparison for equality. Smack labels cannot
187contain unprintable characters or the "/" (slash) character. Smack labels 187contain unprintable characters, the "/" (slash), the "\" (backslash), the "'"
188cannot begin with a '-', which is reserved for special options. 188(quote) and '"' (double-quote) characters.
189Smack labels cannot begin with a '-', which is reserved for special options.
189 190
190There are some predefined labels: 191There are some predefined labels:
191 192
@@ -523,3 +524,18 @@ Smack supports some mount options:
523 524
524These mount options apply to all file system types. 525These mount options apply to all file system types.
525 526
527Smack auditing
528
529If you want Smack auditing of security events, you need to set CONFIG_AUDIT
530in your kernel configuration.
531By default, all denied events will be audited. You can change this behavior by
532writing a single character to the /smack/logging file :
5330 : no logging
5341 : log denied (default)
5352 : log accepted
5363 : log denied & accepted
537
538Events are logged as 'key=value' pairs, for each event you at least will get
539the subjet, the object, the rights requested, the action, the kernel function
540that triggered the event, plus other pairs depending on the type of event
541audited.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4a3c2209a124..72d3bf08d79b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -928,6 +928,12 @@ and is between 256 and 4096 characters. It is defined in the file
928 Formt: { "sha1" | "md5" } 928 Formt: { "sha1" | "md5" }
929 default: "sha1" 929 default: "sha1"
930 930
931 ima_tcb [IMA]
932 Load a policy which meets the needs of the Trusted
933 Computing Base. This means IMA will measure all
934 programs exec'd, files mmap'd for exec, and all files
935 opened for read by uid=0.
936
931 in2000= [HW,SCSI] 937 in2000= [HW,SCSI]
932 See header of drivers/scsi/in2000.c. 938 See header of drivers/scsi/in2000.c.
933 939
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index f11ca7979fa6..322a00bb99d9 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -32,6 +32,7 @@ show up in /proc/sys/kernel:
32- kstack_depth_to_print [ X86 only ] 32- kstack_depth_to_print [ X86 only ]
33- l2cr [ PPC only ] 33- l2cr [ PPC only ]
34- modprobe ==> Documentation/debugging-modules.txt 34- modprobe ==> Documentation/debugging-modules.txt
35- modules_disabled
35- msgmax 36- msgmax
36- msgmnb 37- msgmnb
37- msgmni 38- msgmni
@@ -184,6 +185,16 @@ kernel stack.
184 185
185============================================================== 186==============================================================
186 187
188modules_disabled:
189
190A toggle value indicating if modules are allowed to be loaded
191in an otherwise modular kernel. This toggle defaults to off
192(0), but can be set true (1). Once true, modules can be
193neither loaded nor unloaded, and the toggle cannot be set back
194to false.
195
196==============================================================
197
187osrelease, ostype & version: 198osrelease, ostype & version:
188 199
189# cat osrelease 200# cat osrelease
diff --git a/MAINTAINERS b/MAINTAINERS
index cf4abddfc8a4..84285b5ba359 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -71,7 +71,7 @@ P: Person
71M: Mail patches to 71M: Mail patches to
72L: Mailing list that is relevant to this area 72L: Mailing list that is relevant to this area
73W: Web-page with status/info 73W: Web-page with status/info
74T: SCM tree type and location. Type is one of: git, hg, quilt. 74T: SCM tree type and location. Type is one of: git, hg, quilt, stgit.
75S: Status, one of the following: 75S: Status, one of the following:
76 76
77 Supported: Someone is actually paid to look after this. 77 Supported: Someone is actually paid to look after this.
@@ -159,7 +159,8 @@ F: drivers/net/r8169.c
1598250/16?50 (AND CLONE UARTS) SERIAL DRIVER 1598250/16?50 (AND CLONE UARTS) SERIAL DRIVER
160L: linux-serial@vger.kernel.org 160L: linux-serial@vger.kernel.org
161W: http://serial.sourceforge.net 161W: http://serial.sourceforge.net
162S: Orphan 162M: alan@lxorguk.ukuu.org.uk
163S: Odd Fixes
163F: drivers/serial/8250* 164F: drivers/serial/8250*
164F: include/linux/serial_8250.h 165F: include/linux/serial_8250.h
165 166
@@ -5629,6 +5630,7 @@ P: Alan Cox
5629M: alan@lxorguk.ukuu.org.uk 5630M: alan@lxorguk.ukuu.org.uk
5630L: linux-kernel@vger.kernel.org 5631L: linux-kernel@vger.kernel.org
5631S: Maintained 5632S: Maintained
5633T: stgit http://zeniv.linux.org.uk/~alan/ttydev/
5632 5634
5633TULIP NETWORK DRIVERS 5635TULIP NETWORK DRIVERS
5634P: Grant Grundler 5636P: Grant Grundler
diff --git a/arch/arm/plat-mxc/include/mach/imx-uart.h b/arch/arm/plat-mxc/include/mach/imx-uart.h
index 599217b2e13f..f9bd17dd8dd7 100644
--- a/arch/arm/plat-mxc/include/mach/imx-uart.h
+++ b/arch/arm/plat-mxc/include/mach/imx-uart.h
@@ -20,11 +20,16 @@
20#define ASMARM_ARCH_UART_H 20#define ASMARM_ARCH_UART_H
21 21
22#define IMXUART_HAVE_RTSCTS (1<<0) 22#define IMXUART_HAVE_RTSCTS (1<<0)
23#define IMXUART_IRDA (1<<1)
23 24
24struct imxuart_platform_data { 25struct imxuart_platform_data {
25 int (*init)(struct platform_device *pdev); 26 int (*init)(struct platform_device *pdev);
26 int (*exit)(struct platform_device *pdev); 27 int (*exit)(struct platform_device *pdev);
27 unsigned int flags; 28 unsigned int flags;
29 void (*irda_enable)(int enable);
30 unsigned int irda_inv_rx:1;
31 unsigned int irda_inv_tx:1;
32 unsigned short transceiver_delay;
28}; 33};
29 34
30#endif 35#endif
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 9d1552a9ee2c..8a5bd7a9c6f5 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -6,6 +6,7 @@ config FRV
6 bool 6 bool
7 default y 7 default y
8 select HAVE_IDE 8 select HAVE_IDE
9 select HAVE_ARCH_TRACEHOOK
9 10
10config ZONE_DMA 11config ZONE_DMA
11 bool 12 bool
diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 287f6f697ce2..50ae91b29674 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -112,7 +112,7 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
112#define atomic_clear_mask(mask, v) atomic_test_and_ANDNOT_mask((mask), (v)) 112#define atomic_clear_mask(mask, v) atomic_test_and_ANDNOT_mask((mask), (v))
113#define atomic_set_mask(mask, v) atomic_test_and_OR_mask((mask), (v)) 113#define atomic_set_mask(mask, v) atomic_test_and_OR_mask((mask), (v))
114 114
115static inline int test_and_clear_bit(int nr, volatile void *addr) 115static inline int test_and_clear_bit(unsigned long nr, volatile void *addr)
116{ 116{
117 volatile unsigned long *ptr = addr; 117 volatile unsigned long *ptr = addr;
118 unsigned long mask = 1UL << (nr & 31); 118 unsigned long mask = 1UL << (nr & 31);
@@ -120,7 +120,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
120 return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0; 120 return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
121} 121}
122 122
123static inline int test_and_set_bit(int nr, volatile void *addr) 123static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
124{ 124{
125 volatile unsigned long *ptr = addr; 125 volatile unsigned long *ptr = addr;
126 unsigned long mask = 1UL << (nr & 31); 126 unsigned long mask = 1UL << (nr & 31);
@@ -128,7 +128,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
128 return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0; 128 return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
129} 129}
130 130
131static inline int test_and_change_bit(int nr, volatile void *addr) 131static inline int test_and_change_bit(unsigned long nr, volatile void *addr)
132{ 132{
133 volatile unsigned long *ptr = addr; 133 volatile unsigned long *ptr = addr;
134 unsigned long mask = 1UL << (nr & 31); 134 unsigned long mask = 1UL << (nr & 31);
@@ -136,22 +136,22 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
136 return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0; 136 return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
137} 137}
138 138
139static inline void clear_bit(int nr, volatile void *addr) 139static inline void clear_bit(unsigned long nr, volatile void *addr)
140{ 140{
141 test_and_clear_bit(nr, addr); 141 test_and_clear_bit(nr, addr);
142} 142}
143 143
144static inline void set_bit(int nr, volatile void *addr) 144static inline void set_bit(unsigned long nr, volatile void *addr)
145{ 145{
146 test_and_set_bit(nr, addr); 146 test_and_set_bit(nr, addr);
147} 147}
148 148
149static inline void change_bit(int nr, volatile void * addr) 149static inline void change_bit(unsigned long nr, volatile void *addr)
150{ 150{
151 test_and_change_bit(nr, addr); 151 test_and_change_bit(nr, addr);
152} 152}
153 153
154static inline void __clear_bit(int nr, volatile void * addr) 154static inline void __clear_bit(unsigned long nr, volatile void *addr)
155{ 155{
156 volatile unsigned long *a = addr; 156 volatile unsigned long *a = addr;
157 int mask; 157 int mask;
@@ -161,7 +161,7 @@ static inline void __clear_bit(int nr, volatile void * addr)
161 *a &= ~mask; 161 *a &= ~mask;
162} 162}
163 163
164static inline void __set_bit(int nr, volatile void * addr) 164static inline void __set_bit(unsigned long nr, volatile void *addr)
165{ 165{
166 volatile unsigned long *a = addr; 166 volatile unsigned long *a = addr;
167 int mask; 167 int mask;
@@ -171,7 +171,7 @@ static inline void __set_bit(int nr, volatile void * addr)
171 *a |= mask; 171 *a |= mask;
172} 172}
173 173
174static inline void __change_bit(int nr, volatile void *addr) 174static inline void __change_bit(unsigned long nr, volatile void *addr)
175{ 175{
176 volatile unsigned long *a = addr; 176 volatile unsigned long *a = addr;
177 int mask; 177 int mask;
@@ -181,7 +181,7 @@ static inline void __change_bit(int nr, volatile void *addr)
181 *a ^= mask; 181 *a ^= mask;
182} 182}
183 183
184static inline int __test_and_clear_bit(int nr, volatile void * addr) 184static inline int __test_and_clear_bit(unsigned long nr, volatile void *addr)
185{ 185{
186 volatile unsigned long *a = addr; 186 volatile unsigned long *a = addr;
187 int mask, retval; 187 int mask, retval;
@@ -193,7 +193,7 @@ static inline int __test_and_clear_bit(int nr, volatile void * addr)
193 return retval; 193 return retval;
194} 194}
195 195
196static inline int __test_and_set_bit(int nr, volatile void * addr) 196static inline int __test_and_set_bit(unsigned long nr, volatile void *addr)
197{ 197{
198 volatile unsigned long *a = addr; 198 volatile unsigned long *a = addr;
199 int mask, retval; 199 int mask, retval;
@@ -205,7 +205,7 @@ static inline int __test_and_set_bit(int nr, volatile void * addr)
205 return retval; 205 return retval;
206} 206}
207 207
208static inline int __test_and_change_bit(int nr, volatile void * addr) 208static inline int __test_and_change_bit(unsigned long nr, volatile void *addr)
209{ 209{
210 volatile unsigned long *a = addr; 210 volatile unsigned long *a = addr;
211 int mask, retval; 211 int mask, retval;
@@ -220,12 +220,13 @@ static inline int __test_and_change_bit(int nr, volatile void * addr)
220/* 220/*
221 * This routine doesn't need to be atomic. 221 * This routine doesn't need to be atomic.
222 */ 222 */
223static inline int __constant_test_bit(int nr, const volatile void * addr) 223static inline int
224__constant_test_bit(unsigned long nr, const volatile void *addr)
224{ 225{
225 return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; 226 return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
226} 227}
227 228
228static inline int __test_bit(int nr, const volatile void * addr) 229static inline int __test_bit(unsigned long nr, const volatile void *addr)
229{ 230{
230 int * a = (int *) addr; 231 int * a = (int *) addr;
231 int mask; 232 int mask;
diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h
index 7279ec07d62e..7bbf6e47f8c8 100644
--- a/arch/frv/include/asm/elf.h
+++ b/arch/frv/include/asm/elf.h
@@ -116,6 +116,7 @@ do { \
116} while(0) 116} while(0)
117 117
118#define USE_ELF_CORE_DUMP 118#define USE_ELF_CORE_DUMP
119#define CORE_DUMP_USE_REGSET
119#define ELF_FDPIC_CORE_EFLAGS EF_FRV_FDPIC 120#define ELF_FDPIC_CORE_EFLAGS EF_FRV_FDPIC
120#define ELF_EXEC_PAGESIZE 16384 121#define ELF_EXEC_PAGESIZE 16384
121 122
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 585d9b49949a..cc685e60b0f9 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -87,8 +87,7 @@ static inline void pci_dma_sync_single(struct pci_dev *hwdev,
87 dma_addr_t dma_handle, 87 dma_addr_t dma_handle,
88 size_t size, int direction) 88 size_t size, int direction)
89{ 89{
90 if (direction == PCI_DMA_NONE) 90 BUG_ON(direction == PCI_DMA_NONE);
91 BUG();
92 91
93 frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle), 92 frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle),
94 (unsigned long)bus_to_virt(dma_handle) + size); 93 (unsigned long)bus_to_virt(dma_handle) + size);
@@ -105,9 +104,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
105 int nelems, int direction) 104 int nelems, int direction)
106{ 105{
107 int i; 106 int i;
108 107 BUG_ON(direction == PCI_DMA_NONE);
109 if (direction == PCI_DMA_NONE)
110 BUG();
111 108
112 for (i = 0; i < nelems; i++) 109 for (i = 0; i < nelems; i++)
113 frv_cache_wback_inv(sg_dma_address(&sg[i]), 110 frv_cache_wback_inv(sg_dma_address(&sg[i]),
diff --git a/arch/frv/include/asm/ptrace.h b/arch/frv/include/asm/ptrace.h
index cf6934012b64..a54b535c9e49 100644
--- a/arch/frv/include/asm/ptrace.h
+++ b/arch/frv/include/asm/ptrace.h
@@ -65,6 +65,8 @@
65#ifdef __KERNEL__ 65#ifdef __KERNEL__
66#ifndef __ASSEMBLY__ 66#ifndef __ASSEMBLY__
67 67
68struct task_struct;
69
68/* 70/*
69 * we dedicate GR28 to keeping a pointer to the current exception frame 71 * we dedicate GR28 to keeping a pointer to the current exception frame
70 * - gr28 is destroyed on entry to the kernel from userspace 72 * - gr28 is destroyed on entry to the kernel from userspace
@@ -73,11 +75,18 @@ register struct pt_regs *__frame asm("gr28");
73 75
74#define user_mode(regs) (!((regs)->psr & PSR_S)) 76#define user_mode(regs) (!((regs)->psr & PSR_S))
75#define instruction_pointer(regs) ((regs)->pc) 77#define instruction_pointer(regs) ((regs)->pc)
78#define user_stack_pointer(regs) ((regs)->sp)
76 79
77extern unsigned long user_stack(const struct pt_regs *); 80extern unsigned long user_stack(const struct pt_regs *);
78extern void show_regs(struct pt_regs *); 81extern void show_regs(struct pt_regs *);
79#define profile_pc(regs) ((regs)->pc) 82#define profile_pc(regs) ((regs)->pc)
80#endif 83
84#define task_pt_regs(task) ((task)->thread.frame0)
85
86#define arch_has_single_step() (1)
87extern void user_enable_single_step(struct task_struct *);
88extern void user_disable_single_step(struct task_struct *);
81 89
82#endif /* !__ASSEMBLY__ */ 90#endif /* !__ASSEMBLY__ */
91#endif /* __KERNEL__ */
83#endif /* _ASM_PTRACE_H */ 92#endif /* _ASM_PTRACE_H */
diff --git a/arch/frv/include/asm/syscall.h b/arch/frv/include/asm/syscall.h
new file mode 100644
index 000000000000..70689eb29b98
--- /dev/null
+++ b/arch/frv/include/asm/syscall.h
@@ -0,0 +1,123 @@
1/* syscall parameter access functions
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#ifndef _ASM_SYSCALL_H
13#define _ASM_SYSCALL_H
14
15#include <linux/err.h>
16#include <asm/ptrace.h>
17
18/*
19 * Get the system call number or -1
20 */
21static inline long syscall_get_nr(struct task_struct *task,
22 struct pt_regs *regs)
23{
24 return regs->syscallno;
25}
26
27/*
28 * Restore the clobbered GR8 register
29 * (1st syscall arg was overwritten with syscall return or error)
30 */
31static inline void syscall_rollback(struct task_struct *task,
32 struct pt_regs *regs)
33{
34 regs->gr8 = regs->orig_gr8;
35}
36
37/*
38 * See if the syscall return value is an error, returning it if it is and 0 if
39 * not
40 */
41static inline long syscall_get_error(struct task_struct *task,
42 struct pt_regs *regs)
43{
44 return IS_ERR_VALUE(regs->gr8) ? regs->gr8 : 0;
45}
46
47/*
48 * Get the syscall return value
49 */
50static inline long syscall_get_return_value(struct task_struct *task,
51 struct pt_regs *regs)
52{
53 return regs->gr8;
54}
55
56/*
57 * Set the syscall return value
58 */
59static inline void syscall_set_return_value(struct task_struct *task,
60 struct pt_regs *regs,
61 int error, long val)
62{
63 if (error)
64 regs->gr8 = -error;
65 else
66 regs->gr8 = val;
67}
68
69/*
70 * Retrieve the system call arguments
71 */
72static inline void syscall_get_arguments(struct task_struct *task,
73 struct pt_regs *regs,
74 unsigned int i, unsigned int n,
75 unsigned long *args)
76{
77 /*
78 * Do this simply for now. If we need to start supporting
79 * fetching arguments from arbitrary indices, this will need some
80 * extra logic. Presently there are no in-tree users that depend
81 * on this behaviour.
82 */
83 BUG_ON(i);
84
85 /* Argument pattern is: GR8, GR9, GR10, GR11, GR12, GR13 */
86 switch (n) {
87 case 6: args[5] = regs->gr13;
88 case 5: args[4] = regs->gr12;
89 case 4: args[3] = regs->gr11;
90 case 3: args[2] = regs->gr10;
91 case 2: args[1] = regs->gr9;
92 case 1: args[0] = regs->gr8;
93 break;
94 default:
95 BUG();
96 }
97}
98
99/*
100 * Alter the system call arguments
101 */
102static inline void syscall_set_arguments(struct task_struct *task,
103 struct pt_regs *regs,
104 unsigned int i, unsigned int n,
105 const unsigned long *args)
106{
107 /* Same note as above applies */
108 BUG_ON(i);
109
110 switch (n) {
111 case 6: regs->gr13 = args[5];
112 case 5: regs->gr12 = args[4];
113 case 4: regs->gr11 = args[3];
114 case 3: regs->gr10 = args[2];
115 case 2: regs->gr9 = args[1];
116 case 1: regs->gr8 = args[0];
117 break;
118 default:
119 BUG();
120 }
121}
122
123#endif /* _ASM_SYSCALL_H */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index bb53ab753ffb..e8a5ed7be021 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -109,20 +109,20 @@ register struct thread_info *__current_thread_info asm("gr15");
109 * - other flags in MSW 109 * - other flags in MSW
110 */ 110 */
111#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ 111#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
112#define TIF_SIGPENDING 1 /* signal pending */ 112#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
113#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ 113#define TIF_SIGPENDING 2 /* signal pending */
114#define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */ 114#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
115#define TIF_IRET 4 /* return with iret */ 115#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
116#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ 116#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
117#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 117#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
118#define TIF_MEMDIE 17 /* OOM killer killed process */ 118#define TIF_MEMDIE 17 /* OOM killer killed process */
119#define TIF_FREEZE 18 /* freezing for suspend */ 119#define TIF_FREEZE 18 /* freezing for suspend */
120 120
121#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 121#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
122#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
122#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 123#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
123#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 124#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
124#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) 125#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
125#define _TIF_IRET (1 << TIF_IRET)
126#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) 126#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
127#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) 127#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
128#define _TIF_FREEZE (1 << TIF_FREEZE) 128#define _TIF_FREEZE (1 << TIF_FREEZE)
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 1da523b3298e..356e0e327a89 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -886,7 +886,6 @@ system_call:
886 bnc icc0,#0,__syscall_badsys 886 bnc icc0,#0,__syscall_badsys
887 887
888 ldi @(gr15,#TI_FLAGS),gr4 888 ldi @(gr15,#TI_FLAGS),gr4
889 ori gr4,#_TIF_SYSCALL_TRACE,gr4
890 andicc gr4,#_TIF_SYSCALL_TRACE,gr0,icc0 889 andicc gr4,#_TIF_SYSCALL_TRACE,gr0,icc0
891 bne icc0,#0,__syscall_trace_entry 890 bne icc0,#0,__syscall_trace_entry
892 891
@@ -1150,11 +1149,10 @@ __entry_work_notifysig:
1150 # perform syscall entry tracing 1149 # perform syscall entry tracing
1151__syscall_trace_entry: 1150__syscall_trace_entry:
1152 LEDS 0x6320 1151 LEDS 0x6320
1153 setlos.p #0,gr8 1152 call syscall_trace_entry
1154 call do_syscall_trace
1155 1153
1156 ldi @(gr28,#REG_SYSCALLNO),gr7 1154 lddi.p @(gr28,#REG_GR(8)) ,gr8
1157 lddi @(gr28,#REG_GR(8)) ,gr8 1155 ori gr8,#0,gr7 ; syscall_trace_entry() returned new syscallno
1158 lddi @(gr28,#REG_GR(10)),gr10 1156 lddi @(gr28,#REG_GR(10)),gr10
1159 lddi.p @(gr28,#REG_GR(12)),gr12 1157 lddi.p @(gr28,#REG_GR(12)),gr12
1160 1158
@@ -1169,11 +1167,10 @@ __syscall_exit_work:
1169 beq icc0,#1,__entry_work_pending 1167 beq icc0,#1,__entry_work_pending
1170 1168
1171 movsg psr,gr23 1169 movsg psr,gr23
1172 andi gr23,#~PSR_PIL,gr23 ; could let do_syscall_trace() call schedule() 1170 andi gr23,#~PSR_PIL,gr23 ; could let syscall_trace_exit() call schedule()
1173 movgs gr23,psr 1171 movgs gr23,psr
1174 1172
1175 setlos.p #1,gr8 1173 call syscall_trace_exit
1176 call do_syscall_trace
1177 bra __entry_resume_userspace 1174 bra __entry_resume_userspace
1178 1175
1179__syscall_badsys: 1176__syscall_badsys:
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 5e7d401d21e7..60eeed3694c0 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -19,6 +19,9 @@
19#include <linux/user.h> 19#include <linux/user.h>
20#include <linux/security.h> 20#include <linux/security.h>
21#include <linux/signal.h> 21#include <linux/signal.h>
22#include <linux/regset.h>
23#include <linux/elf.h>
24#include <linux/tracehook.h>
22 25
23#include <asm/uaccess.h> 26#include <asm/uaccess.h>
24#include <asm/page.h> 27#include <asm/page.h>
@@ -33,6 +36,169 @@
33 */ 36 */
34 37
35/* 38/*
39 * retrieve the contents of FRV userspace general registers
40 */
41static int genregs_get(struct task_struct *target,
42 const struct user_regset *regset,
43 unsigned int pos, unsigned int count,
44 void *kbuf, void __user *ubuf)
45{
46 const struct user_int_regs *iregs = &target->thread.user->i;
47 int ret;
48
49 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
50 iregs, 0, sizeof(*iregs));
51 if (ret < 0)
52 return ret;
53
54 return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
55 sizeof(*iregs), -1);
56}
57
58/*
59 * update the contents of the FRV userspace general registers
60 */
61static int genregs_set(struct task_struct *target,
62 const struct user_regset *regset,
63 unsigned int pos, unsigned int count,
64 const void *kbuf, const void __user *ubuf)
65{
66 struct user_int_regs *iregs = &target->thread.user->i;
67 unsigned int offs_gr0, offs_gr1;
68 int ret;
69
70 /* not allowed to set PSR or __status */
71 if (pos < offsetof(struct user_int_regs, psr) + sizeof(long) &&
72 pos + count > offsetof(struct user_int_regs, psr))
73 return -EIO;
74
75 if (pos < offsetof(struct user_int_regs, __status) + sizeof(long) &&
76 pos + count > offsetof(struct user_int_regs, __status))
77 return -EIO;
78
79 /* set the control regs */
80 offs_gr0 = offsetof(struct user_int_regs, gr[0]);
81 offs_gr1 = offsetof(struct user_int_regs, gr[1]);
82 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
83 iregs, 0, offs_gr0);
84 if (ret < 0)
85 return ret;
86
87 /* skip GR0/TBR */
88 ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
89 offs_gr0, offs_gr1);
90 if (ret < 0)
91 return ret;
92
93 /* set the general regs */
94 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
95 &iregs->gr[1], offs_gr1, sizeof(*iregs));
96 if (ret < 0)
97 return ret;
98
99 return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
100 sizeof(*iregs), -1);
101}
102
103/*
104 * retrieve the contents of FRV userspace FP/Media registers
105 */
106static int fpmregs_get(struct task_struct *target,
107 const struct user_regset *regset,
108 unsigned int pos, unsigned int count,
109 void *kbuf, void __user *ubuf)
110{
111 const struct user_fpmedia_regs *fpregs = &target->thread.user->f;
112 int ret;
113
114 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
115 fpregs, 0, sizeof(*fpregs));
116 if (ret < 0)
117 return ret;
118
119 return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
120 sizeof(*fpregs), -1);
121}
122
123/*
124 * update the contents of the FRV userspace FP/Media registers
125 */
126static int fpmregs_set(struct task_struct *target,
127 const struct user_regset *regset,
128 unsigned int pos, unsigned int count,
129 const void *kbuf, const void __user *ubuf)
130{
131 struct user_fpmedia_regs *fpregs = &target->thread.user->f;
132 int ret;
133
134 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
135 fpregs, 0, sizeof(*fpregs));
136 if (ret < 0)
137 return ret;
138
139 return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
140 sizeof(*fpregs), -1);
141}
142
143/*
144 * determine if the FP/Media registers have actually been used
145 */
146static int fpmregs_active(struct task_struct *target,
147 const struct user_regset *regset)
148{
149 return tsk_used_math(target) ? regset->n : 0;
150}
151
152/*
153 * Define the register sets available on the FRV under Linux
154 */
155enum frv_regset {
156 REGSET_GENERAL,
157 REGSET_FPMEDIA,
158};
159
160static const struct user_regset frv_regsets[] = {
161 /*
162 * General register format is:
163 * PSR, ISR, CCR, CCCR, LR, LCR, PC, (STATUS), SYSCALLNO, ORIG_G8
164 * GNER0-1, IACC0, TBR, GR1-63
165 */
166 [REGSET_GENERAL] = {
167 .core_note_type = NT_PRSTATUS,
168 .n = ELF_NGREG,
169 .size = sizeof(long),
170 .align = sizeof(long),
171 .get = genregs_get,
172 .set = genregs_set,
173 },
174 /*
175 * FPU/Media register format is:
176 * FR0-63, FNER0-1, MSR0-1, ACC0-7, ACCG0-8, FSR
177 */
178 [REGSET_FPMEDIA] = {
179 .core_note_type = NT_PRFPREG,
180 .n = sizeof(struct user_fpmedia_regs) / sizeof(long),
181 .size = sizeof(long),
182 .align = sizeof(long),
183 .get = fpmregs_get,
184 .set = fpmregs_set,
185 .active = fpmregs_active,
186 },
187};
188
189static const struct user_regset_view user_frv_native_view = {
190 .name = "frv",
191 .e_machine = EM_FRV,
192 .regsets = frv_regsets,
193 .n = ARRAY_SIZE(frv_regsets),
194};
195
196const struct user_regset_view *task_user_regset_view(struct task_struct *task)
197{
198 return &user_frv_native_view;
199}
200
201/*
36 * Get contents of register REGNO in task TASK. 202 * Get contents of register REGNO in task TASK.
37 */ 203 */
38static inline long get_reg(struct task_struct *task, int regno) 204static inline long get_reg(struct task_struct *task, int regno)
@@ -69,40 +235,23 @@ static inline int put_reg(struct task_struct *task, int regno,
69} 235}
70 236
71/* 237/*
72 * check that an address falls within the bounds of the target process's memory
73 * mappings
74 */
75static inline int is_user_addr_valid(struct task_struct *child,
76 unsigned long start, unsigned long len)
77{
78#ifdef CONFIG_MMU
79 if (start >= PAGE_OFFSET || len > PAGE_OFFSET - start)
80 return -EIO;
81 return 0;
82#else
83 struct vm_area_struct *vma;
84
85 vma = find_vma(child->mm, start);
86 if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
87 return 0;
88
89 return -EIO;
90#endif
91}
92
93/*
94 * Called by kernel/ptrace.c when detaching.. 238 * Called by kernel/ptrace.c when detaching..
95 * 239 *
96 * Control h/w single stepping 240 * Control h/w single stepping
97 */ 241 */
98void ptrace_disable(struct task_struct *child) 242void user_enable_single_step(struct task_struct *child)
243{
244 child->thread.frame0->__status |= REG__STATUS_STEP;
245}
246
247void user_disable_single_step(struct task_struct *child)
99{ 248{
100 child->thread.frame0->__status &= ~REG__STATUS_STEP; 249 child->thread.frame0->__status &= ~REG__STATUS_STEP;
101} 250}
102 251
103void ptrace_enable(struct task_struct *child) 252void ptrace_disable(struct task_struct *child)
104{ 253{
105 child->thread.frame0->__status |= REG__STATUS_STEP; 254 user_disable_single_step(child);
106} 255}
107 256
108long arch_ptrace(struct task_struct *child, long request, long addr, long data) 257long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -111,15 +260,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
111 int ret; 260 int ret;
112 261
113 switch (request) { 262 switch (request) {
114 /* when I and D space are separate, these will need to be fixed. */
115 case PTRACE_PEEKTEXT: /* read word at location addr. */
116 case PTRACE_PEEKDATA:
117 ret = -EIO;
118 if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
119 break;
120 ret = generic_ptrace_peekdata(child, addr, data);
121 break;
122
123 /* read the word at location addr in the USER area. */ 263 /* read the word at location addr in the USER area. */
124 case PTRACE_PEEKUSR: { 264 case PTRACE_PEEKUSR: {
125 tmp = 0; 265 tmp = 0;
@@ -163,15 +303,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
163 break; 303 break;
164 } 304 }
165 305
166 /* when I and D space are separate, this will have to be fixed. */
167 case PTRACE_POKETEXT: /* write the word at location addr. */
168 case PTRACE_POKEDATA:
169 ret = -EIO;
170 if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
171 break;
172 ret = generic_ptrace_pokedata(child, addr, data);
173 break;
174
175 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ 306 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
176 ret = -EIO; 307 ret = -EIO;
177 if ((addr & 3) || addr < 0) 308 if ((addr & 3) || addr < 0)
@@ -179,7 +310,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
179 310
180 ret = 0; 311 ret = 0;
181 switch (addr >> 2) { 312 switch (addr >> 2) {
182 case 0 ... PT__END-1: 313 case 0 ... PT__END - 1:
183 ret = put_reg(child, addr >> 2, data); 314 ret = put_reg(child, addr >> 2, data);
184 break; 315 break;
185 316
@@ -189,95 +320,29 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
189 } 320 }
190 break; 321 break;
191 322
192 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ 323 case PTRACE_GETREGS: /* Get all integer regs from the child. */
193 case PTRACE_CONT: /* restart after signal. */ 324 return copy_regset_to_user(child, &user_frv_native_view,
194 ret = -EIO; 325 REGSET_GENERAL,
195 if (!valid_signal(data)) 326 0, sizeof(child->thread.user->i),
196 break; 327 (void __user *)data);
197 if (request == PTRACE_SYSCALL) 328
198 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 329 case PTRACE_SETREGS: /* Set all integer regs in the child. */
199 else 330 return copy_regset_from_user(child, &user_frv_native_view,
200 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 331 REGSET_GENERAL,
201 child->exit_code = data; 332 0, sizeof(child->thread.user->i),
202 ptrace_disable(child); 333 (const void __user *)data);
203 wake_up_process(child); 334
204 ret = 0; 335 case PTRACE_GETFPREGS: /* Get the child FP/Media state. */
205 break; 336 return copy_regset_to_user(child, &user_frv_native_view,
206 337 REGSET_FPMEDIA,
207 /* make the child exit. Best I can do is send it a sigkill. 338 0, sizeof(child->thread.user->f),
208 * perhaps it should be put in the status that it wants to 339 (void __user *)data);
209 * exit. 340
210 */ 341 case PTRACE_SETFPREGS: /* Set the child FP/Media state. */
211 case PTRACE_KILL: 342 return copy_regset_from_user(child, &user_frv_native_view,
212 ret = 0; 343 REGSET_FPMEDIA,
213 if (child->exit_state == EXIT_ZOMBIE) /* already dead */ 344 0, sizeof(child->thread.user->f),
214 break; 345 (const void __user *)data);
215 child->exit_code = SIGKILL;
216 clear_tsk_thread_flag(child, TIF_SINGLESTEP);
217 ptrace_disable(child);
218 wake_up_process(child);
219 break;
220
221 case PTRACE_SINGLESTEP: /* set the trap flag. */
222 ret = -EIO;
223 if (!valid_signal(data))
224 break;
225 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
226 ptrace_enable(child);
227 child->exit_code = data;
228 wake_up_process(child);
229 ret = 0;
230 break;
231
232 case PTRACE_DETACH: /* detach a process that was attached. */
233 ret = ptrace_detach(child, data);
234 break;
235
236 case PTRACE_GETREGS: { /* Get all integer regs from the child. */
237 int i;
238 for (i = 0; i < PT__GPEND; i++) {
239 tmp = get_reg(child, i);
240 if (put_user(tmp, (unsigned long *) data)) {
241 ret = -EFAULT;
242 break;
243 }
244 data += sizeof(long);
245 }
246 ret = 0;
247 break;
248 }
249
250 case PTRACE_SETREGS: { /* Set all integer regs in the child. */
251 int i;
252 for (i = 0; i < PT__GPEND; i++) {
253 if (get_user(tmp, (unsigned long *) data)) {
254 ret = -EFAULT;
255 break;
256 }
257 put_reg(child, i, tmp);
258 data += sizeof(long);
259 }
260 ret = 0;
261 break;
262 }
263
264 case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */
265 ret = 0;
266 if (copy_to_user((void *) data,
267 &child->thread.user->f,
268 sizeof(child->thread.user->f)))
269 ret = -EFAULT;
270 break;
271 }
272
273 case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */
274 ret = 0;
275 if (copy_from_user(&child->thread.user->f,
276 (void *) data,
277 sizeof(child->thread.user->f)))
278 ret = -EFAULT;
279 break;
280 }
281 346
282 case PTRACE_GETFDPIC: 347 case PTRACE_GETFDPIC:
283 tmp = 0; 348 tmp = 0;
@@ -300,414 +365,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
300 break; 365 break;
301 366
302 default: 367 default:
303 ret = -EIO; 368 ret = ptrace_request(child, request, addr, data);
304 break; 369 break;
305 } 370 }
306 return ret; 371 return ret;
307} 372}
308 373
309int __nongprelbss kstrace; 374/*
310 375 * handle tracing of system call entry
311static const struct { 376 * - return the revised system call number or ULONG_MAX to cause ENOSYS
312 const char *name; 377 */
313 unsigned argmask; 378asmlinkage unsigned long syscall_trace_entry(void)
314} __syscall_name_table[NR_syscalls] = {
315 [0] = { "restart_syscall" },
316 [1] = { "exit", 0x000001 },
317 [2] = { "fork", 0xffffff },
318 [3] = { "read", 0x000141 },
319 [4] = { "write", 0x000141 },
320 [5] = { "open", 0x000235 },
321 [6] = { "close", 0x000001 },
322 [7] = { "waitpid", 0x000141 },
323 [8] = { "creat", 0x000025 },
324 [9] = { "link", 0x000055 },
325 [10] = { "unlink", 0x000005 },
326 [11] = { "execve", 0x000445 },
327 [12] = { "chdir", 0x000005 },
328 [13] = { "time", 0x000004 },
329 [14] = { "mknod", 0x000325 },
330 [15] = { "chmod", 0x000025 },
331 [16] = { "lchown", 0x000025 },
332 [17] = { "break" },
333 [18] = { "oldstat", 0x000045 },
334 [19] = { "lseek", 0x000131 },
335 [20] = { "getpid", 0xffffff },
336 [21] = { "mount", 0x043555 },
337 [22] = { "umount", 0x000005 },
338 [23] = { "setuid", 0x000001 },
339 [24] = { "getuid", 0xffffff },
340 [25] = { "stime", 0x000004 },
341 [26] = { "ptrace", 0x004413 },
342 [27] = { "alarm", 0x000001 },
343 [28] = { "oldfstat", 0x000041 },
344 [29] = { "pause", 0xffffff },
345 [30] = { "utime", 0x000045 },
346 [31] = { "stty" },
347 [32] = { "gtty" },
348 [33] = { "access", 0x000025 },
349 [34] = { "nice", 0x000001 },
350 [35] = { "ftime" },
351 [36] = { "sync", 0xffffff },
352 [37] = { "kill", 0x000011 },
353 [38] = { "rename", 0x000055 },
354 [39] = { "mkdir", 0x000025 },
355 [40] = { "rmdir", 0x000005 },
356 [41] = { "dup", 0x000001 },
357 [42] = { "pipe", 0x000004 },
358 [43] = { "times", 0x000004 },
359 [44] = { "prof" },
360 [45] = { "brk", 0x000004 },
361 [46] = { "setgid", 0x000001 },
362 [47] = { "getgid", 0xffffff },
363 [48] = { "signal", 0x000041 },
364 [49] = { "geteuid", 0xffffff },
365 [50] = { "getegid", 0xffffff },
366 [51] = { "acct", 0x000005 },
367 [52] = { "umount2", 0x000035 },
368 [53] = { "lock" },
369 [54] = { "ioctl", 0x000331 },
370 [55] = { "fcntl", 0x000331 },
371 [56] = { "mpx" },
372 [57] = { "setpgid", 0x000011 },
373 [58] = { "ulimit" },
374 [60] = { "umask", 0x000002 },
375 [61] = { "chroot", 0x000005 },
376 [62] = { "ustat", 0x000043 },
377 [63] = { "dup2", 0x000011 },
378 [64] = { "getppid", 0xffffff },
379 [65] = { "getpgrp", 0xffffff },
380 [66] = { "setsid", 0xffffff },
381 [67] = { "sigaction" },
382 [68] = { "sgetmask" },
383 [69] = { "ssetmask" },
384 [70] = { "setreuid" },
385 [71] = { "setregid" },
386 [72] = { "sigsuspend" },
387 [73] = { "sigpending" },
388 [74] = { "sethostname" },
389 [75] = { "setrlimit" },
390 [76] = { "getrlimit" },
391 [77] = { "getrusage" },
392 [78] = { "gettimeofday" },
393 [79] = { "settimeofday" },
394 [80] = { "getgroups" },
395 [81] = { "setgroups" },
396 [82] = { "select" },
397 [83] = { "symlink" },
398 [84] = { "oldlstat" },
399 [85] = { "readlink" },
400 [86] = { "uselib" },
401 [87] = { "swapon" },
402 [88] = { "reboot" },
403 [89] = { "readdir" },
404 [91] = { "munmap", 0x000034 },
405 [92] = { "truncate" },
406 [93] = { "ftruncate" },
407 [94] = { "fchmod" },
408 [95] = { "fchown" },
409 [96] = { "getpriority" },
410 [97] = { "setpriority" },
411 [99] = { "statfs" },
412 [100] = { "fstatfs" },
413 [102] = { "socketcall" },
414 [103] = { "syslog" },
415 [104] = { "setitimer" },
416 [105] = { "getitimer" },
417 [106] = { "stat" },
418 [107] = { "lstat" },
419 [108] = { "fstat" },
420 [111] = { "vhangup" },
421 [114] = { "wait4" },
422 [115] = { "swapoff" },
423 [116] = { "sysinfo" },
424 [117] = { "ipc" },
425 [118] = { "fsync" },
426 [119] = { "sigreturn" },
427 [120] = { "clone" },
428 [121] = { "setdomainname" },
429 [122] = { "uname" },
430 [123] = { "modify_ldt" },
431 [123] = { "cacheflush" },
432 [124] = { "adjtimex" },
433 [125] = { "mprotect" },
434 [126] = { "sigprocmask" },
435 [127] = { "create_module" },
436 [128] = { "init_module" },
437 [129] = { "delete_module" },
438 [130] = { "get_kernel_syms" },
439 [131] = { "quotactl" },
440 [132] = { "getpgid" },
441 [133] = { "fchdir" },
442 [134] = { "bdflush" },
443 [135] = { "sysfs" },
444 [136] = { "personality" },
445 [137] = { "afs_syscall" },
446 [138] = { "setfsuid" },
447 [139] = { "setfsgid" },
448 [140] = { "_llseek", 0x014331 },
449 [141] = { "getdents" },
450 [142] = { "_newselect", 0x000141 },
451 [143] = { "flock" },
452 [144] = { "msync" },
453 [145] = { "readv" },
454 [146] = { "writev" },
455 [147] = { "getsid", 0x000001 },
456 [148] = { "fdatasync", 0x000001 },
457 [149] = { "_sysctl", 0x000004 },
458 [150] = { "mlock" },
459 [151] = { "munlock" },
460 [152] = { "mlockall" },
461 [153] = { "munlockall" },
462 [154] = { "sched_setparam" },
463 [155] = { "sched_getparam" },
464 [156] = { "sched_setscheduler" },
465 [157] = { "sched_getscheduler" },
466 [158] = { "sched_yield" },
467 [159] = { "sched_get_priority_max" },
468 [160] = { "sched_get_priority_min" },
469 [161] = { "sched_rr_get_interval" },
470 [162] = { "nanosleep", 0x000044 },
471 [163] = { "mremap" },
472 [164] = { "setresuid" },
473 [165] = { "getresuid" },
474 [166] = { "vm86" },
475 [167] = { "query_module" },
476 [168] = { "poll" },
477 [169] = { "nfsservctl" },
478 [170] = { "setresgid" },
479 [171] = { "getresgid" },
480 [172] = { "prctl", 0x333331 },
481 [173] = { "rt_sigreturn", 0xffffff },
482 [174] = { "rt_sigaction", 0x001441 },
483 [175] = { "rt_sigprocmask", 0x001441 },
484 [176] = { "rt_sigpending", 0x000014 },
485 [177] = { "rt_sigtimedwait", 0x001444 },
486 [178] = { "rt_sigqueueinfo", 0x000411 },
487 [179] = { "rt_sigsuspend", 0x000014 },
488 [180] = { "pread", 0x003341 },
489 [181] = { "pwrite", 0x003341 },
490 [182] = { "chown", 0x000115 },
491 [183] = { "getcwd" },
492 [184] = { "capget" },
493 [185] = { "capset" },
494 [186] = { "sigaltstack" },
495 [187] = { "sendfile" },
496 [188] = { "getpmsg" },
497 [189] = { "putpmsg" },
498 [190] = { "vfork", 0xffffff },
499 [191] = { "ugetrlimit" },
500 [192] = { "mmap2", 0x313314 },
501 [193] = { "truncate64" },
502 [194] = { "ftruncate64" },
503 [195] = { "stat64", 0x000045 },
504 [196] = { "lstat64", 0x000045 },
505 [197] = { "fstat64", 0x000041 },
506 [198] = { "lchown32" },
507 [199] = { "getuid32", 0xffffff },
508 [200] = { "getgid32", 0xffffff },
509 [201] = { "geteuid32", 0xffffff },
510 [202] = { "getegid32", 0xffffff },
511 [203] = { "setreuid32" },
512 [204] = { "setregid32" },
513 [205] = { "getgroups32" },
514 [206] = { "setgroups32" },
515 [207] = { "fchown32" },
516 [208] = { "setresuid32" },
517 [209] = { "getresuid32" },
518 [210] = { "setresgid32" },
519 [211] = { "getresgid32" },
520 [212] = { "chown32" },
521 [213] = { "setuid32" },
522 [214] = { "setgid32" },
523 [215] = { "setfsuid32" },
524 [216] = { "setfsgid32" },
525 [217] = { "pivot_root" },
526 [218] = { "mincore" },
527 [219] = { "madvise" },
528 [220] = { "getdents64" },
529 [221] = { "fcntl64" },
530 [223] = { "security" },
531 [224] = { "gettid" },
532 [225] = { "readahead" },
533 [226] = { "setxattr" },
534 [227] = { "lsetxattr" },
535 [228] = { "fsetxattr" },
536 [229] = { "getxattr" },
537 [230] = { "lgetxattr" },
538 [231] = { "fgetxattr" },
539 [232] = { "listxattr" },
540 [233] = { "llistxattr" },
541 [234] = { "flistxattr" },
542 [235] = { "removexattr" },
543 [236] = { "lremovexattr" },
544 [237] = { "fremovexattr" },
545 [238] = { "tkill" },
546 [239] = { "sendfile64" },
547 [240] = { "futex" },
548 [241] = { "sched_setaffinity" },
549 [242] = { "sched_getaffinity" },
550 [243] = { "set_thread_area" },
551 [244] = { "get_thread_area" },
552 [245] = { "io_setup" },
553 [246] = { "io_destroy" },
554 [247] = { "io_getevents" },
555 [248] = { "io_submit" },
556 [249] = { "io_cancel" },
557 [250] = { "fadvise64" },
558 [252] = { "exit_group", 0x000001 },
559 [253] = { "lookup_dcookie" },
560 [254] = { "epoll_create" },
561 [255] = { "epoll_ctl" },
562 [256] = { "epoll_wait" },
563 [257] = { "remap_file_pages" },
564 [258] = { "set_tid_address" },
565 [259] = { "timer_create" },
566 [260] = { "timer_settime" },
567 [261] = { "timer_gettime" },
568 [262] = { "timer_getoverrun" },
569 [263] = { "timer_delete" },
570 [264] = { "clock_settime" },
571 [265] = { "clock_gettime" },
572 [266] = { "clock_getres" },
573 [267] = { "clock_nanosleep" },
574 [268] = { "statfs64" },
575 [269] = { "fstatfs64" },
576 [270] = { "tgkill" },
577 [271] = { "utimes" },
578 [272] = { "fadvise64_64" },
579 [273] = { "vserver" },
580 [274] = { "mbind" },
581 [275] = { "get_mempolicy" },
582 [276] = { "set_mempolicy" },
583 [277] = { "mq_open" },
584 [278] = { "mq_unlink" },
585 [279] = { "mq_timedsend" },
586 [280] = { "mq_timedreceive" },
587 [281] = { "mq_notify" },
588 [282] = { "mq_getsetattr" },
589 [283] = { "sys_kexec_load" },
590};
591
592asmlinkage void do_syscall_trace(int leaving)
593{ 379{
594#if 0 380 __frame->__status |= REG__STATUS_SYSC_ENTRY;
595 unsigned long *argp; 381 if (tracehook_report_syscall_entry(__frame)) {
596 const char *name; 382 /* tracing decided this syscall should not happen, so
597 unsigned argmask; 383 * We'll return a bogus call number to get an ENOSYS
598 char buffer[16]; 384 * error, but leave the original number in
599 385 * __frame->syscallno
600 if (!kstrace) 386 */
601 return; 387 return ULONG_MAX;
602
603 if (!current->mm)
604 return;
605
606 if (__frame->gr7 == __NR_close)
607 return;
608
609#if 0
610 if (__frame->gr7 != __NR_mmap2 &&
611 __frame->gr7 != __NR_vfork &&
612 __frame->gr7 != __NR_execve &&
613 __frame->gr7 != __NR_exit)
614 return;
615#endif
616
617 argmask = 0;
618 name = NULL;
619 if (__frame->gr7 < NR_syscalls) {
620 name = __syscall_name_table[__frame->gr7].name;
621 argmask = __syscall_name_table[__frame->gr7].argmask;
622 }
623 if (!name) {
624 sprintf(buffer, "sys_%lx", __frame->gr7);
625 name = buffer;
626 }
627
628 if (!leaving) {
629 if (!argmask) {
630 printk(KERN_CRIT "[%d] %s(%lx,%lx,%lx,%lx,%lx,%lx)\n",
631 current->pid,
632 name,
633 __frame->gr8,
634 __frame->gr9,
635 __frame->gr10,
636 __frame->gr11,
637 __frame->gr12,
638 __frame->gr13);
639 }
640 else if (argmask == 0xffffff) {
641 printk(KERN_CRIT "[%d] %s()\n",
642 current->pid,
643 name);
644 }
645 else {
646 printk(KERN_CRIT "[%d] %s(",
647 current->pid,
648 name);
649
650 argp = &__frame->gr8;
651
652 do {
653 switch (argmask & 0xf) {
654 case 1:
655 printk("%ld", (long) *argp);
656 break;
657 case 2:
658 printk("%lo", *argp);
659 break;
660 case 3:
661 printk("%lx", *argp);
662 break;
663 case 4:
664 printk("%p", (void *) *argp);
665 break;
666 case 5:
667 printk("\"%s\"", (char *) *argp);
668 break;
669 }
670
671 argp++;
672 argmask >>= 4;
673 if (argmask)
674 printk(",");
675
676 } while (argmask);
677
678 printk(")\n");
679 }
680 }
681 else {
682 if ((int)__frame->gr8 > -4096 && (int)__frame->gr8 < 4096)
683 printk(KERN_CRIT "[%d] %s() = %ld\n", current->pid, name, __frame->gr8);
684 else
685 printk(KERN_CRIT "[%d] %s() = %lx\n", current->pid, name, __frame->gr8);
686 } 388 }
687 return;
688#endif
689
690 if (!test_thread_flag(TIF_SYSCALL_TRACE))
691 return;
692
693 if (!(current->ptrace & PT_PTRACED))
694 return;
695 389
696 /* we need to indicate entry or exit to strace */ 390 return __frame->syscallno;
697 if (leaving) 391}
698 __frame->__status |= REG__STATUS_SYSC_EXIT;
699 else
700 __frame->__status |= REG__STATUS_SYSC_ENTRY;
701
702 ptrace_notify(SIGTRAP);
703 392
704 /* 393/*
705 * this isn't the same as continuing with a signal, but it will do 394 * handle tracing of system call exit
706 * for normal use. strace only continues with a signal if the 395 */
707 * stopping signal is not SIGTRAP. -brl 396asmlinkage void syscall_trace_exit(void)
708 */ 397{
709 if (current->exit_code) { 398 __frame->__status |= REG__STATUS_SYSC_EXIT;
710 send_sig(current->exit_code, current, 1); 399 tracehook_report_syscall_exit(__frame, 0);
711 current->exit_code = 0;
712 }
713} 400}
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 3bdb368292a8..4a7a62c6e783 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -21,6 +21,7 @@
21#include <linux/unistd.h> 21#include <linux/unistd.h>
22#include <linux/personality.h> 22#include <linux/personality.h>
23#include <linux/freezer.h> 23#include <linux/freezer.h>
24#include <linux/tracehook.h>
24#include <asm/ucontext.h> 25#include <asm/ucontext.h>
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
@@ -516,6 +517,9 @@ static void do_signal(void)
516 * clear the TIF_RESTORE_SIGMASK flag */ 517 * clear the TIF_RESTORE_SIGMASK flag */
517 if (test_thread_flag(TIF_RESTORE_SIGMASK)) 518 if (test_thread_flag(TIF_RESTORE_SIGMASK))
518 clear_thread_flag(TIF_RESTORE_SIGMASK); 519 clear_thread_flag(TIF_RESTORE_SIGMASK);
520
521 tracehook_signal_handler(signr, &info, &ka, __frame,
522 test_thread_flag(TIF_SINGLESTEP));
519 } 523 }
520 524
521 return; 525 return;
@@ -564,4 +568,10 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
564 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) 568 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
565 do_signal(); 569 do_signal();
566 570
571 /* deal with notification on about to resume userspace execution */
572 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
573 clear_thread_flag(TIF_NOTIFY_RESUME);
574 tracehook_notify_resume(__frame);
575 }
576
567} /* end do_notify_resume() */ 577} /* end do_notify_resume() */
diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c
index 9fb771a20df3..374f88d6cc00 100644
--- a/arch/frv/kernel/uaccess.c
+++ b/arch/frv/kernel/uaccess.c
@@ -23,8 +23,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
23 char *p, ch; 23 char *p, ch;
24 long err = -EFAULT; 24 long err = -EFAULT;
25 25
26 if (count < 0) 26 BUG_ON(count < 0);
27 BUG();
28 27
29 p = dst; 28 p = dst;
30 29
@@ -76,8 +75,7 @@ long strnlen_user(const char __user *src, long count)
76 long err = 0; 75 long err = 0;
77 char ch; 76 char ch;
78 77
79 if (count < 0) 78 BUG_ON(count < 0);
80 BUG();
81 79
82#ifndef CONFIG_MMU 80#ifndef CONFIG_MMU
83 if ((unsigned long) src < memory_start) 81 if ((unsigned long) src < memory_start)
diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c
index 52ff9aec799d..4e1ba0b15443 100644
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -116,8 +116,7 @@ EXPORT_SYMBOL(dma_free_coherent);
116dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 116dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
117 enum dma_data_direction direction) 117 enum dma_data_direction direction)
118{ 118{
119 if (direction == DMA_NONE) 119 BUG_ON(direction == DMA_NONE);
120 BUG();
121 120
122 frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size); 121 frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
123 122
@@ -151,8 +150,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
151 frv_cache_wback_inv(sg_dma_address(&sg[i]), 150 frv_cache_wback_inv(sg_dma_address(&sg[i]),
152 sg_dma_address(&sg[i]) + sg_dma_len(&sg[i])); 151 sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]));
153 152
154 if (direction == DMA_NONE) 153 BUG_ON(direction == DMA_NONE);
155 BUG();
156 154
157 return nents; 155 return nents;
158} 156}
diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c
index 3ddedebc4eb3..45954f0813dc 100644
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -48,8 +48,7 @@ EXPORT_SYMBOL(dma_free_coherent);
48dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 48dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
49 enum dma_data_direction direction) 49 enum dma_data_direction direction)
50{ 50{
51 if (direction == DMA_NONE) 51 BUG_ON(direction == DMA_NONE);
52 BUG();
53 52
54 frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size); 53 frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
55 54
@@ -81,8 +80,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
81 void *vaddr; 80 void *vaddr;
82 int i; 81 int i;
83 82
84 if (direction == DMA_NONE) 83 BUG_ON(direction == DMA_NONE);
85 BUG();
86 84
87 dampr2 = __get_DAMPR(2); 85 dampr2 = __get_DAMPR(2);
88 86
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 4542651e6acb..5f43697aed30 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -371,6 +371,7 @@ struct kvm_vcpu_arch {
371 int last_run_cpu; 371 int last_run_cpu;
372 int vmm_tr_slot; 372 int vmm_tr_slot;
373 int vm_tr_slot; 373 int vm_tr_slot;
374 int sn_rtc_tr_slot;
374 375
375#define KVM_MP_STATE_RUNNABLE 0 376#define KVM_MP_STATE_RUNNABLE 0
376#define KVM_MP_STATE_UNINITIALIZED 1 377#define KVM_MP_STATE_UNINITIALIZED 1
@@ -465,6 +466,7 @@ struct kvm_arch {
465 unsigned long vmm_init_rr; 466 unsigned long vmm_init_rr;
466 467
467 int online_vcpus; 468 int online_vcpus;
469 int is_sn2;
468 470
469 struct kvm_ioapic *vioapic; 471 struct kvm_ioapic *vioapic;
470 struct kvm_vm_stat stat; 472 struct kvm_vm_stat stat;
@@ -472,6 +474,7 @@ struct kvm_arch {
472 474
473 struct list_head assigned_dev_head; 475 struct list_head assigned_dev_head;
474 struct iommu_domain *iommu_domain; 476 struct iommu_domain *iommu_domain;
477 int iommu_flags;
475 struct hlist_head irq_ack_notifier_list; 478 struct hlist_head irq_ack_notifier_list;
476 479
477 unsigned long irq_sources_bitmap; 480 unsigned long irq_sources_bitmap;
@@ -578,6 +581,8 @@ struct kvm_vmm_info{
578 kvm_vmm_entry *vmm_entry; 581 kvm_vmm_entry *vmm_entry;
579 kvm_tramp_entry *tramp_entry; 582 kvm_tramp_entry *tramp_entry;
580 unsigned long vmm_ivt; 583 unsigned long vmm_ivt;
584 unsigned long patch_mov_ar;
585 unsigned long patch_mov_ar_sn2;
581}; 586};
582 587
583int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); 588int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
@@ -585,7 +590,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu);
585int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); 590int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
586void kvm_sal_emul(struct kvm_vcpu *vcpu); 591void kvm_sal_emul(struct kvm_vcpu *vcpu);
587 592
588static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
589#endif /* __ASSEMBLY__*/ 593#endif /* __ASSEMBLY__*/
590 594
591#endif 595#endif
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 7a9bff47564f..0a9cc73d35c7 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -146,6 +146,8 @@
146#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) 146#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
147#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) 147#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX)
148#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) 148#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
149#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \
150 _PAGE_MA_UC)
149 151
150# ifndef __ASSEMBLY__ 152# ifndef __ASSEMBLY__
151 153
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index acc4d19ae62a..b448197728be 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = {
610 .name = "IPI" 610 .name = "IPI"
611}; 611};
612 612
613/*
614 * KVM uses this interrupt to force a cpu out of guest mode
615 */
613static struct irqaction resched_irqaction = { 616static struct irqaction resched_irqaction = {
614 .handler = dummy_handler, 617 .handler = dummy_handler,
615 .flags = IRQF_DISABLED, 618 .flags = IRQF_DISABLED,
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 0a2d6b86075a..64d520937874 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -23,7 +23,7 @@ if VIRTUALIZATION
23 23
24config KVM 24config KVM
25 tristate "Kernel-based Virtual Machine (KVM) support" 25 tristate "Kernel-based Virtual Machine (KVM) support"
26 depends on HAVE_KVM && EXPERIMENTAL 26 depends on HAVE_KVM && MODULES && EXPERIMENTAL
27 # for device assignment: 27 # for device assignment:
28 depends on PCI 28 depends on PCI
29 select PREEMPT_NOTIFIERS 29 select PREEMPT_NOTIFIERS
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d20a5db4c4dd..80c57b0a21c4 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -41,6 +41,9 @@
41#include <asm/div64.h> 41#include <asm/div64.h>
42#include <asm/tlb.h> 42#include <asm/tlb.h>
43#include <asm/elf.h> 43#include <asm/elf.h>
44#include <asm/sn/addrs.h>
45#include <asm/sn/clksupport.h>
46#include <asm/sn/shub_mmr.h>
44 47
45#include "misc.h" 48#include "misc.h"
46#include "vti.h" 49#include "vti.h"
@@ -65,6 +68,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { NULL } 68 { NULL }
66}; 69};
67 70
71static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
72{
73#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
74 if (vcpu->kvm->arch.is_sn2)
75 return rtc_time();
76 else
77#endif
78 return ia64_getreg(_IA64_REG_AR_ITC);
79}
80
68static void kvm_flush_icache(unsigned long start, unsigned long len) 81static void kvm_flush_icache(unsigned long start, unsigned long len)
69{ 82{
70 int l; 83 int l;
@@ -119,8 +132,7 @@ void kvm_arch_hardware_enable(void *garbage)
119 unsigned long saved_psr; 132 unsigned long saved_psr;
120 int slot; 133 int slot;
121 134
122 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), 135 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
123 PAGE_KERNEL));
124 local_irq_save(saved_psr); 136 local_irq_save(saved_psr);
125 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); 137 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
126 local_irq_restore(saved_psr); 138 local_irq_restore(saved_psr);
@@ -283,6 +295,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
283 295
284} 296}
285 297
298static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
299{
300 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
301
302 if (!test_and_set_bit(vector, &vpd->irr[0])) {
303 vcpu->arch.irq_new_pending = 1;
304 kvm_vcpu_kick(vcpu);
305 return 1;
306 }
307 return 0;
308}
309
286/* 310/*
287 * offset: address offset to IPI space. 311 * offset: address offset to IPI space.
288 * value: deliver value. 312 * value: deliver value.
@@ -292,20 +316,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
292{ 316{
293 switch (dm) { 317 switch (dm) {
294 case SAPIC_FIXED: 318 case SAPIC_FIXED:
295 kvm_apic_set_irq(vcpu, vector, 0);
296 break; 319 break;
297 case SAPIC_NMI: 320 case SAPIC_NMI:
298 kvm_apic_set_irq(vcpu, 2, 0); 321 vector = 2;
299 break; 322 break;
300 case SAPIC_EXTINT: 323 case SAPIC_EXTINT:
301 kvm_apic_set_irq(vcpu, 0, 0); 324 vector = 0;
302 break; 325 break;
303 case SAPIC_INIT: 326 case SAPIC_INIT:
304 case SAPIC_PMI: 327 case SAPIC_PMI:
305 default: 328 default:
306 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); 329 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
307 break; 330 return;
308 } 331 }
332 __apic_accept_irq(vcpu, vector);
309} 333}
310 334
311static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, 335static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
@@ -413,6 +437,23 @@ static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
413 return 1; 437 return 1;
414} 438}
415 439
440static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
441{
442 unsigned long pte, rtc_phys_addr, map_addr;
443 int slot;
444
445 map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
446 rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
447 pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
448 slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
449 vcpu->arch.sn_rtc_tr_slot = slot;
450 if (slot < 0) {
451 printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
452 slot = 0;
453 }
454 return slot;
455}
456
416int kvm_emulate_halt(struct kvm_vcpu *vcpu) 457int kvm_emulate_halt(struct kvm_vcpu *vcpu)
417{ 458{
418 459
@@ -426,7 +467,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
426 467
427 if (irqchip_in_kernel(vcpu->kvm)) { 468 if (irqchip_in_kernel(vcpu->kvm)) {
428 469
429 vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; 470 vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
430 471
431 if (time_after(vcpu_now_itc, vpd->itm)) { 472 if (time_after(vcpu_now_itc, vpd->itm)) {
432 vcpu->arch.timer_check = 1; 473 vcpu->arch.timer_check = 1;
@@ -447,10 +488,10 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
447 hrtimer_cancel(p_ht); 488 hrtimer_cancel(p_ht);
448 vcpu->arch.ht_active = 0; 489 vcpu->arch.ht_active = 0;
449 490
450 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 491 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
492 kvm_cpu_has_pending_timer(vcpu))
451 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 493 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
452 vcpu->arch.mp_state = 494 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
453 KVM_MP_STATE_RUNNABLE;
454 495
455 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 496 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
456 return -EINTR; 497 return -EINTR;
@@ -551,22 +592,35 @@ static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
551 if (r < 0) 592 if (r < 0)
552 goto out; 593 goto out;
553 vcpu->arch.vm_tr_slot = r; 594 vcpu->arch.vm_tr_slot = r;
595
596#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
597 if (kvm->arch.is_sn2) {
598 r = kvm_sn2_setup_mappings(vcpu);
599 if (r < 0)
600 goto out;
601 }
602#endif
603
554 r = 0; 604 r = 0;
555out: 605out:
556 return r; 606 return r;
557
558} 607}
559 608
560static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) 609static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
561{ 610{
562 611 struct kvm *kvm = vcpu->kvm;
563 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); 612 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
564 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); 613 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
565 614#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
615 if (kvm->arch.is_sn2)
616 ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
617#endif
566} 618}
567 619
568static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) 620static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
569{ 621{
622 unsigned long psr;
623 int r;
570 int cpu = smp_processor_id(); 624 int cpu = smp_processor_id();
571 625
572 if (vcpu->arch.last_run_cpu != cpu || 626 if (vcpu->arch.last_run_cpu != cpu ||
@@ -578,36 +632,27 @@ static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
578 632
579 vcpu->arch.host_rr6 = ia64_get_rr(RR6); 633 vcpu->arch.host_rr6 = ia64_get_rr(RR6);
580 vti_set_rr6(vcpu->arch.vmm_rr); 634 vti_set_rr6(vcpu->arch.vmm_rr);
581 return kvm_insert_vmm_mapping(vcpu); 635 local_irq_save(psr);
636 r = kvm_insert_vmm_mapping(vcpu);
637 local_irq_restore(psr);
638 return r;
582} 639}
640
583static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) 641static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
584{ 642{
585 kvm_purge_vmm_mapping(vcpu); 643 kvm_purge_vmm_mapping(vcpu);
586 vti_set_rr6(vcpu->arch.host_rr6); 644 vti_set_rr6(vcpu->arch.host_rr6);
587} 645}
588 646
589static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 647static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
590{ 648{
591 union context *host_ctx, *guest_ctx; 649 union context *host_ctx, *guest_ctx;
592 int r; 650 int r;
593 651
594 /*Get host and guest context with guest address space.*/ 652 /*
595 host_ctx = kvm_get_host_context(vcpu); 653 * down_read() may sleep and return with interrupts enabled
596 guest_ctx = kvm_get_guest_context(vcpu); 654 */
597 655 down_read(&vcpu->kvm->slots_lock);
598 r = kvm_vcpu_pre_transition(vcpu);
599 if (r < 0)
600 goto out;
601 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
602 kvm_vcpu_post_transition(vcpu);
603 r = 0;
604out:
605 return r;
606}
607
608static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
609{
610 int r;
611 656
612again: 657again:
613 if (signal_pending(current)) { 658 if (signal_pending(current)) {
@@ -616,26 +661,31 @@ again:
616 goto out; 661 goto out;
617 } 662 }
618 663
619 /*
620 * down_read() may sleep and return with interrupts enabled
621 */
622 down_read(&vcpu->kvm->slots_lock);
623
624 preempt_disable(); 664 preempt_disable();
625 local_irq_disable(); 665 local_irq_disable();
626 666
627 vcpu->guest_mode = 1; 667 /*Get host and guest context with guest address space.*/
668 host_ctx = kvm_get_host_context(vcpu);
669 guest_ctx = kvm_get_guest_context(vcpu);
670
671 clear_bit(KVM_REQ_KICK, &vcpu->requests);
672
673 r = kvm_vcpu_pre_transition(vcpu);
674 if (r < 0)
675 goto vcpu_run_fail;
676
677 up_read(&vcpu->kvm->slots_lock);
628 kvm_guest_enter(); 678 kvm_guest_enter();
629 r = vti_vcpu_run(vcpu, kvm_run); 679
630 if (r < 0) { 680 /*
631 local_irq_enable(); 681 * Transition to the guest
632 preempt_enable(); 682 */
633 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 683 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
634 goto out; 684
635 } 685 kvm_vcpu_post_transition(vcpu);
636 686
637 vcpu->arch.launched = 1; 687 vcpu->arch.launched = 1;
638 vcpu->guest_mode = 0; 688 set_bit(KVM_REQ_KICK, &vcpu->requests);
639 local_irq_enable(); 689 local_irq_enable();
640 690
641 /* 691 /*
@@ -646,9 +696,10 @@ again:
646 */ 696 */
647 barrier(); 697 barrier();
648 kvm_guest_exit(); 698 kvm_guest_exit();
649 up_read(&vcpu->kvm->slots_lock);
650 preempt_enable(); 699 preempt_enable();
651 700
701 down_read(&vcpu->kvm->slots_lock);
702
652 r = kvm_handle_exit(kvm_run, vcpu); 703 r = kvm_handle_exit(kvm_run, vcpu);
653 704
654 if (r > 0) { 705 if (r > 0) {
@@ -657,12 +708,20 @@ again:
657 } 708 }
658 709
659out: 710out:
711 up_read(&vcpu->kvm->slots_lock);
660 if (r > 0) { 712 if (r > 0) {
661 kvm_resched(vcpu); 713 kvm_resched(vcpu);
714 down_read(&vcpu->kvm->slots_lock);
662 goto again; 715 goto again;
663 } 716 }
664 717
665 return r; 718 return r;
719
720vcpu_run_fail:
721 local_irq_enable();
722 preempt_enable();
723 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
724 goto out;
666} 725}
667 726
668static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) 727static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
@@ -788,6 +847,9 @@ struct kvm *kvm_arch_create_vm(void)
788 847
789 if (IS_ERR(kvm)) 848 if (IS_ERR(kvm))
790 return ERR_PTR(-ENOMEM); 849 return ERR_PTR(-ENOMEM);
850
851 kvm->arch.is_sn2 = ia64_platform_is("sn2");
852
791 kvm_init_vm(kvm); 853 kvm_init_vm(kvm);
792 854
793 kvm->arch.online_vcpus = 0; 855 kvm->arch.online_vcpus = 0;
@@ -884,7 +946,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
884 RESTORE_REGS(saved_gp); 946 RESTORE_REGS(saved_gp);
885 947
886 vcpu->arch.irq_new_pending = 1; 948 vcpu->arch.irq_new_pending = 1;
887 vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); 949 vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
888 set_bit(KVM_REQ_RESUME, &vcpu->requests); 950 set_bit(KVM_REQ_RESUME, &vcpu->requests);
889 951
890 vcpu_put(vcpu); 952 vcpu_put(vcpu);
@@ -1043,10 +1105,6 @@ static void kvm_free_vmm_area(void)
1043 } 1105 }
1044} 1106}
1045 1107
1046static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1047{
1048}
1049
1050static int vti_init_vpd(struct kvm_vcpu *vcpu) 1108static int vti_init_vpd(struct kvm_vcpu *vcpu)
1051{ 1109{
1052 int i; 1110 int i;
@@ -1165,7 +1223,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1165 regs->cr_iip = PALE_RESET_ENTRY; 1223 regs->cr_iip = PALE_RESET_ENTRY;
1166 1224
1167 /*Initialize itc offset for vcpus*/ 1225 /*Initialize itc offset for vcpus*/
1168 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); 1226 itc_offset = 0UL - kvm_get_itc(vcpu);
1169 for (i = 0; i < kvm->arch.online_vcpus; i++) { 1227 for (i = 0; i < kvm->arch.online_vcpus; i++) {
1170 v = (struct kvm_vcpu *)((char *)vcpu + 1228 v = (struct kvm_vcpu *)((char *)vcpu +
1171 sizeof(struct kvm_vcpu_data) * i); 1229 sizeof(struct kvm_vcpu_data) * i);
@@ -1237,6 +1295,7 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
1237 1295
1238 local_irq_save(psr); 1296 local_irq_save(psr);
1239 r = kvm_insert_vmm_mapping(vcpu); 1297 r = kvm_insert_vmm_mapping(vcpu);
1298 local_irq_restore(psr);
1240 if (r) 1299 if (r)
1241 goto fail; 1300 goto fail;
1242 r = kvm_vcpu_init(vcpu, vcpu->kvm, id); 1301 r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
@@ -1254,13 +1313,11 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
1254 goto uninit; 1313 goto uninit;
1255 1314
1256 kvm_purge_vmm_mapping(vcpu); 1315 kvm_purge_vmm_mapping(vcpu);
1257 local_irq_restore(psr);
1258 1316
1259 return 0; 1317 return 0;
1260uninit: 1318uninit:
1261 kvm_vcpu_uninit(vcpu); 1319 kvm_vcpu_uninit(vcpu);
1262fail: 1320fail:
1263 local_irq_restore(psr);
1264 return r; 1321 return r;
1265} 1322}
1266 1323
@@ -1291,7 +1348,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1291 vcpu->kvm = kvm; 1348 vcpu->kvm = kvm;
1292 1349
1293 cpu = get_cpu(); 1350 cpu = get_cpu();
1294 vti_vcpu_load(vcpu, cpu);
1295 r = vti_vcpu_setup(vcpu, id); 1351 r = vti_vcpu_setup(vcpu, id);
1296 put_cpu(); 1352 put_cpu();
1297 1353
@@ -1427,7 +1483,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1427 } 1483 }
1428 for (i = 0; i < 4; i++) 1484 for (i = 0; i < 4; i++)
1429 regs->insvc[i] = vcpu->arch.insvc[i]; 1485 regs->insvc[i] = vcpu->arch.insvc[i];
1430 regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); 1486 regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
1431 SAVE_REGS(xtp); 1487 SAVE_REGS(xtp);
1432 SAVE_REGS(metaphysical_rr0); 1488 SAVE_REGS(metaphysical_rr0);
1433 SAVE_REGS(metaphysical_rr4); 1489 SAVE_REGS(metaphysical_rr4);
@@ -1574,6 +1630,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
1574 1630
1575void kvm_arch_flush_shadow(struct kvm *kvm) 1631void kvm_arch_flush_shadow(struct kvm *kvm)
1576{ 1632{
1633 kvm_flush_remote_tlbs(kvm);
1577} 1634}
1578 1635
1579long kvm_arch_dev_ioctl(struct file *filp, 1636long kvm_arch_dev_ioctl(struct file *filp,
@@ -1616,8 +1673,37 @@ out:
1616 return 0; 1673 return 0;
1617} 1674}
1618 1675
1676
1677/*
1678 * On SN2, the ITC isn't stable, so copy in fast path code to use the
1679 * SN2 RTC, replacing the ITC based default verion.
1680 */
1681static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
1682 struct module *module)
1683{
1684 unsigned long new_ar, new_ar_sn2;
1685 unsigned long module_base;
1686
1687 if (!ia64_platform_is("sn2"))
1688 return;
1689
1690 module_base = (unsigned long)module->module_core;
1691
1692 new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
1693 new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
1694
1695 printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
1696 "as source\n");
1697
1698 /*
1699 * Copy the SN2 version of mov_ar into place. They are both
1700 * the same size, so 6 bundles is sufficient (6 * 0x10).
1701 */
1702 memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
1703}
1704
1619static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, 1705static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
1620 struct module *module) 1706 struct module *module)
1621{ 1707{
1622 unsigned long module_base; 1708 unsigned long module_base;
1623 unsigned long vmm_size; 1709 unsigned long vmm_size;
@@ -1639,6 +1725,7 @@ static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
1639 return -EFAULT; 1725 return -EFAULT;
1640 1726
1641 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); 1727 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
1728 kvm_patch_vmm(vmm_info, module);
1642 kvm_flush_icache(kvm_vmm_base, vmm_size); 1729 kvm_flush_icache(kvm_vmm_base, vmm_size);
1643 1730
1644 /*Recalculate kvm_vmm_info based on new VMM*/ 1731 /*Recalculate kvm_vmm_info based on new VMM*/
@@ -1792,38 +1879,24 @@ void kvm_arch_hardware_unsetup(void)
1792{ 1879{
1793} 1880}
1794 1881
1795static void vcpu_kick_intr(void *info)
1796{
1797#ifdef DEBUG
1798 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
1799 printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
1800#endif
1801}
1802
1803void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 1882void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
1804{ 1883{
1805 int ipi_pcpu = vcpu->cpu; 1884 int me;
1806 int cpu = get_cpu(); 1885 int cpu = vcpu->cpu;
1807 1886
1808 if (waitqueue_active(&vcpu->wq)) 1887 if (waitqueue_active(&vcpu->wq))
1809 wake_up_interruptible(&vcpu->wq); 1888 wake_up_interruptible(&vcpu->wq);
1810 1889
1811 if (vcpu->guest_mode && cpu != ipi_pcpu) 1890 me = get_cpu();
1812 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); 1891 if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
1892 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
1893 smp_send_reschedule(cpu);
1813 put_cpu(); 1894 put_cpu();
1814} 1895}
1815 1896
1816int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) 1897int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
1817{ 1898{
1818 1899 return __apic_accept_irq(vcpu, irq->vector);
1819 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1820
1821 if (!test_and_set_bit(vec, &vpd->irr[0])) {
1822 vcpu->arch.irq_new_pending = 1;
1823 kvm_vcpu_kick(vcpu);
1824 return 1;
1825 }
1826 return 0;
1827} 1900}
1828 1901
1829int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 1902int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
@@ -1836,20 +1909,18 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
1836 return 0; 1909 return 0;
1837} 1910}
1838 1911
1839struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, 1912int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1840 unsigned long bitmap)
1841{ 1913{
1842 struct kvm_vcpu *lvcpu = kvm->vcpus[0]; 1914 return vcpu1->arch.xtp - vcpu2->arch.xtp;
1843 int i; 1915}
1844
1845 for (i = 1; i < kvm->arch.online_vcpus; i++) {
1846 if (!kvm->vcpus[i])
1847 continue;
1848 if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
1849 lvcpu = kvm->vcpus[i];
1850 }
1851 1916
1852 return lvcpu; 1917int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
1918 int short_hand, int dest, int dest_mode)
1919{
1920 struct kvm_lapic *target = vcpu->arch.apic;
1921 return (dest_mode == 0) ?
1922 kvm_apic_match_physical_addr(target, dest) :
1923 kvm_apic_match_logical_addr(target, dest);
1853} 1924}
1854 1925
1855static int find_highest_bits(int *dat) 1926static int find_highest_bits(int *dat)
@@ -1888,6 +1959,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
1888 return 0; 1959 return 0;
1889} 1960}
1890 1961
1962int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
1963{
1964 /* do real check here */
1965 return 1;
1966}
1967
1891int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 1968int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
1892{ 1969{
1893 return vcpu->arch.timer_fired; 1970 return vcpu->arch.timer_fired;
@@ -1918,6 +1995,7 @@ static int vcpu_reset(struct kvm_vcpu *vcpu)
1918 long psr; 1995 long psr;
1919 local_irq_save(psr); 1996 local_irq_save(psr);
1920 r = kvm_insert_vmm_mapping(vcpu); 1997 r = kvm_insert_vmm_mapping(vcpu);
1998 local_irq_restore(psr);
1921 if (r) 1999 if (r)
1922 goto fail; 2000 goto fail;
1923 2001
@@ -1930,7 +2008,6 @@ static int vcpu_reset(struct kvm_vcpu *vcpu)
1930 kvm_purge_vmm_mapping(vcpu); 2008 kvm_purge_vmm_mapping(vcpu);
1931 r = 0; 2009 r = 0;
1932fail: 2010fail:
1933 local_irq_restore(psr);
1934 return r; 2011 return r;
1935} 2012}
1936 2013
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index a8ae52ed5635..e4b82319881d 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -21,6 +21,9 @@
21 21
22#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
23#include <linux/smp.h> 23#include <linux/smp.h>
24#include <asm/sn/addrs.h>
25#include <asm/sn/clksupport.h>
26#include <asm/sn/shub_mmr.h>
24 27
25#include "vti.h" 28#include "vti.h"
26#include "misc.h" 29#include "misc.h"
@@ -188,12 +191,35 @@ static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
188 return result; 191 return result;
189} 192}
190 193
191static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) 194/*
195 * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
196 * RTC is used instead. This function patches the ratios from SAL
197 * to match the RTC before providing them to the guest.
198 */
199static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
192{ 200{
201 struct pal_freq_ratio *ratio;
202 unsigned long sal_freq, sal_drift, factor;
203
204 result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
205 &sal_freq, &sal_drift);
206 ratio = (struct pal_freq_ratio *)&result->v2;
207 factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
208 sn_rtc_cycles_per_second;
209
210 ratio->num = 3;
211 ratio->den = factor;
212}
193 213
214static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
215{
194 struct ia64_pal_retval result; 216 struct ia64_pal_retval result;
195 217
196 PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); 218 PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
219
220 if (vcpu->kvm->arch.is_sn2)
221 sn2_patch_itc_freq_ratios(&result);
222
197 return result; 223 return result;
198} 224}
199 225
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index 6d6cbcb14893..ee541cebcd78 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -20,6 +20,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
20 20
21int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); 21int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
22int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); 22int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
23int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); 23int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
24 int short_hand, int dest, int dest_mode);
25int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
26int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
27#define kvm_apic_present(x) (true)
24 28
25#endif 29#endif
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
index 32254ce9a1bd..f793be3effff 100644
--- a/arch/ia64/kvm/optvfault.S
+++ b/arch/ia64/kvm/optvfault.S
@@ -11,6 +11,7 @@
11 11
12#include <asm/asmmacro.h> 12#include <asm/asmmacro.h>
13#include <asm/processor.h> 13#include <asm/processor.h>
14#include <asm/kvm_host.h>
14 15
15#include "vti.h" 16#include "vti.h"
16#include "asm-offsets.h" 17#include "asm-offsets.h"
@@ -140,6 +141,35 @@ GLOBAL_ENTRY(kvm_asm_mov_from_ar)
140 ;; 141 ;;
141END(kvm_asm_mov_from_ar) 142END(kvm_asm_mov_from_ar)
142 143
144/*
145 * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
146 * clock as it's source for emulating the ITC. This version will be
147 * copied on top of the original version if the host is determined to
148 * be an SN2.
149 */
150GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
151 add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
152 movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
153
154 add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
155 extr.u r17=r25,6,7
156 mov r24=b0
157 ;;
158 ld8 r18=[r18]
159 ld8 r19=[r19]
160 addl r20=@gprel(asm_mov_to_reg),gp
161 ;;
162 add r19=r19,r18
163 shladd r17=r17,4,r20
164 ;;
165 adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
166 st8 [r16] = r19
167 mov b0=r17
168 br.sptk.few b0
169 ;;
170END(kvm_asm_mov_from_ar_sn2)
171
172
143 173
144// mov r1=rr[r3] 174// mov r1=rr[r3]
145GLOBAL_ENTRY(kvm_asm_mov_from_rr) 175GLOBAL_ENTRY(kvm_asm_mov_from_rr)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index b1dc80952d91..a8f84da04b49 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -652,20 +652,25 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
652 unsigned long isr, unsigned long iim) 652 unsigned long isr, unsigned long iim)
653{ 653{
654 struct kvm_vcpu *v = current_vcpu; 654 struct kvm_vcpu *v = current_vcpu;
655 long psr;
655 656
656 if (ia64_psr(regs)->cpl == 0) { 657 if (ia64_psr(regs)->cpl == 0) {
657 /* Allow hypercalls only when cpl = 0. */ 658 /* Allow hypercalls only when cpl = 0. */
658 if (iim == DOMN_PAL_REQUEST) { 659 if (iim == DOMN_PAL_REQUEST) {
660 local_irq_save(psr);
659 set_pal_call_data(v); 661 set_pal_call_data(v);
660 vmm_transition(v); 662 vmm_transition(v);
661 get_pal_call_result(v); 663 get_pal_call_result(v);
662 vcpu_increment_iip(v); 664 vcpu_increment_iip(v);
665 local_irq_restore(psr);
663 return; 666 return;
664 } else if (iim == DOMN_SAL_REQUEST) { 667 } else if (iim == DOMN_SAL_REQUEST) {
668 local_irq_save(psr);
665 set_sal_call_data(v); 669 set_sal_call_data(v);
666 vmm_transition(v); 670 vmm_transition(v);
667 get_sal_call_result(v); 671 get_sal_call_result(v);
668 vcpu_increment_iip(v); 672 vcpu_increment_iip(v);
673 local_irq_restore(psr);
669 return; 674 return;
670 } 675 }
671 } 676 }
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index a18ee17b9192..a2c6c15e4761 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -788,13 +788,29 @@ void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
788 setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ 788 setfpreg(reg, val, regs); /* FIXME: handle NATs later*/
789} 789}
790 790
791/*
792 * The Altix RTC is mapped specially here for the vmm module
793 */
794#define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
795static long kvm_get_itc(struct kvm_vcpu *vcpu)
796{
797#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
798 struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
799
800 if (kvm->arch.is_sn2)
801 return (*SN_RTC_BASE);
802 else
803#endif
804 return ia64_getreg(_IA64_REG_AR_ITC);
805}
806
791/************************************************************************ 807/************************************************************************
792 * lsapic timer 808 * lsapic timer
793 ***********************************************************************/ 809 ***********************************************************************/
794u64 vcpu_get_itc(struct kvm_vcpu *vcpu) 810u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
795{ 811{
796 unsigned long guest_itc; 812 unsigned long guest_itc;
797 guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); 813 guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
798 814
799 if (guest_itc >= VMX(vcpu, last_itc)) { 815 if (guest_itc >= VMX(vcpu, last_itc)) {
800 VMX(vcpu, last_itc) = guest_itc; 816 VMX(vcpu, last_itc) = guest_itc;
@@ -809,7 +825,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
809 struct kvm_vcpu *v; 825 struct kvm_vcpu *v;
810 struct kvm *kvm; 826 struct kvm *kvm;
811 int i; 827 int i;
812 long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); 828 long itc_offset = val - kvm_get_itc(vcpu);
813 unsigned long vitv = VCPU(vcpu, itv); 829 unsigned long vitv = VCPU(vcpu, itv);
814 830
815 kvm = (struct kvm *)KVM_VM_BASE; 831 kvm = (struct kvm *)KVM_VM_BASE;
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 9eee5c04bacc..f4b4c899bb6c 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -30,15 +30,19 @@ MODULE_AUTHOR("Intel");
30MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
31 31
32extern char kvm_ia64_ivt; 32extern char kvm_ia64_ivt;
33extern char kvm_asm_mov_from_ar;
34extern char kvm_asm_mov_from_ar_sn2;
33extern fpswa_interface_t *vmm_fpswa_interface; 35extern fpswa_interface_t *vmm_fpswa_interface;
34 36
35long vmm_sanity = 1; 37long vmm_sanity = 1;
36 38
37struct kvm_vmm_info vmm_info = { 39struct kvm_vmm_info vmm_info = {
38 .module = THIS_MODULE, 40 .module = THIS_MODULE,
39 .vmm_entry = vmm_entry, 41 .vmm_entry = vmm_entry,
40 .tramp_entry = vmm_trampoline, 42 .tramp_entry = vmm_trampoline,
41 .vmm_ivt = (unsigned long)&kvm_ia64_ivt, 43 .vmm_ivt = (unsigned long)&kvm_ia64_ivt,
44 .patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar,
45 .patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2,
42}; 46};
43 47
44static int __init kvm_vmm_init(void) 48static int __init kvm_vmm_init(void)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index 3ef1a017a318..40920c630649 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -95,7 +95,7 @@ GLOBAL_ENTRY(kvm_vmm_panic)
95 ;; 95 ;;
96 srlz.i // guarantee that interruption collection is on 96 srlz.i // guarantee that interruption collection is on
97 ;; 97 ;;
98 //(p15) ssm psr.i // restore psr.i 98 (p15) ssm psr.i // restore psr.
99 addl r14=@gprel(ia64_leave_hypervisor),gp 99 addl r14=@gprel(ia64_leave_hypervisor),gp
100 ;; 100 ;;
101 KVM_SAVE_REST 101 KVM_SAVE_REST
@@ -249,7 +249,7 @@ ENTRY(kvm_break_fault)
249 ;; 249 ;;
250 srlz.i // guarantee that interruption collection is on 250 srlz.i // guarantee that interruption collection is on
251 ;; 251 ;;
252 //(p15)ssm psr.i // restore psr.i 252 (p15)ssm psr.i // restore psr.i
253 addl r14=@gprel(ia64_leave_hypervisor),gp 253 addl r14=@gprel(ia64_leave_hypervisor),gp
254 ;; 254 ;;
255 KVM_SAVE_REST 255 KVM_SAVE_REST
@@ -439,7 +439,7 @@ kvm_dispatch_vexirq:
439 ;; 439 ;;
440 srlz.i // guarantee that interruption collection is on 440 srlz.i // guarantee that interruption collection is on
441 ;; 441 ;;
442 //(p15) ssm psr.i // restore psr.i 442 (p15) ssm psr.i // restore psr.i
443 adds r3=8,r2 // set up second base pointer 443 adds r3=8,r2 // set up second base pointer
444 ;; 444 ;;
445 KVM_SAVE_REST 445 KVM_SAVE_REST
@@ -819,7 +819,7 @@ ENTRY(kvm_dtlb_miss_dispatch)
819 ;; 819 ;;
820 srlz.i // guarantee that interruption collection is on 820 srlz.i // guarantee that interruption collection is on
821 ;; 821 ;;
822 //(p15) ssm psr.i // restore psr.i 822 (p15) ssm psr.i // restore psr.i
823 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 823 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
824 ;; 824 ;;
825 KVM_SAVE_REST 825 KVM_SAVE_REST
@@ -842,7 +842,7 @@ ENTRY(kvm_itlb_miss_dispatch)
842 ;; 842 ;;
843 srlz.i // guarantee that interruption collection is on 843 srlz.i // guarantee that interruption collection is on
844 ;; 844 ;;
845 //(p15) ssm psr.i // restore psr.i 845 (p15) ssm psr.i // restore psr.i
846 addl r14=@gprel(ia64_leave_hypervisor),gp 846 addl r14=@gprel(ia64_leave_hypervisor),gp
847 ;; 847 ;;
848 KVM_SAVE_REST 848 KVM_SAVE_REST
@@ -871,7 +871,7 @@ ENTRY(kvm_dispatch_reflection)
871 ;; 871 ;;
872 srlz.i // guarantee that interruption collection is on 872 srlz.i // guarantee that interruption collection is on
873 ;; 873 ;;
874 //(p15) ssm psr.i // restore psr.i 874 (p15) ssm psr.i // restore psr.i
875 addl r14=@gprel(ia64_leave_hypervisor),gp 875 addl r14=@gprel(ia64_leave_hypervisor),gp
876 ;; 876 ;;
877 KVM_SAVE_REST 877 KVM_SAVE_REST
@@ -898,7 +898,7 @@ ENTRY(kvm_dispatch_virtualization_fault)
898 ;; 898 ;;
899 srlz.i // guarantee that interruption collection is on 899 srlz.i // guarantee that interruption collection is on
900 ;; 900 ;;
901 //(p15) ssm psr.i // restore psr.i 901 (p15) ssm psr.i // restore psr.i
902 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 902 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
903 ;; 903 ;;
904 KVM_SAVE_REST 904 KVM_SAVE_REST
@@ -920,7 +920,7 @@ ENTRY(kvm_dispatch_interrupt)
920 ;; 920 ;;
921 srlz.i 921 srlz.i
922 ;; 922 ;;
923 //(p15) ssm psr.i 923 (p15) ssm psr.i
924 addl r14=@gprel(ia64_leave_hypervisor),gp 924 addl r14=@gprel(ia64_leave_hypervisor),gp
925 ;; 925 ;;
926 KVM_SAVE_REST 926 KVM_SAVE_REST
@@ -1333,7 +1333,7 @@ hostret = r24
1333 ;; 1333 ;;
1334(p7) srlz.i 1334(p7) srlz.i
1335 ;; 1335 ;;
1336//(p6) ssm psr.i 1336(p6) ssm psr.i
1337 ;; 1337 ;;
1338 mov rp=rpsave 1338 mov rp=rpsave
1339 mov ar.pfs=pfssave 1339 mov ar.pfs=pfssave
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 2c2501f13159..4290a429bf7c 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -254,7 +254,8 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte)
254 "(p7) st8 [%2]=r9;;" 254 "(p7) st8 [%2]=r9;;"
255 "ssm psr.ic;;" 255 "ssm psr.ic;;"
256 "srlz.d;;" 256 "srlz.d;;"
257 /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ 257 "ssm psr.i;;"
258 "srlz.d;;"
258 : "=r"(ret) : "r"(iha), "r"(pte):"memory"); 259 : "=r"(ret) : "r"(iha), "r"(pte):"memory");
259 260
260 return ret; 261 return ret;
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 355926730e8d..89faacad5d17 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration"
8config MN10300 8config MN10300
9 def_bool y 9 def_bool y
10 select HAVE_OPROFILE 10 select HAVE_OPROFILE
11 select HAVE_ARCH_TRACEHOOK
11 12
12config AM33 13config AM33
13 def_bool y 14 def_bool y
diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h
index bf09f8bb392e..49105462e6fc 100644
--- a/arch/mn10300/include/asm/elf.h
+++ b/arch/mn10300/include/asm/elf.h
@@ -34,7 +34,7 @@
34 */ 34 */
35typedef unsigned long elf_greg_t; 35typedef unsigned long elf_greg_t;
36 36
37#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t)) 37#define ELF_NGREG ((sizeof(struct pt_regs) / sizeof(elf_greg_t)) - 1)
38typedef elf_greg_t elf_gregset_t[ELF_NGREG]; 38typedef elf_greg_t elf_gregset_t[ELF_NGREG];
39 39
40#define ELF_NFPREG 32 40#define ELF_NFPREG 32
@@ -76,6 +76,7 @@ do { \
76} while (0) 76} while (0)
77 77
78#define USE_ELF_CORE_DUMP 78#define USE_ELF_CORE_DUMP
79#define CORE_DUMP_USE_REGSET
79#define ELF_EXEC_PAGESIZE 4096 80#define ELF_EXEC_PAGESIZE 4096
80 81
81/* 82/*
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 73239271873d..f7d4b0d285e8 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -143,13 +143,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
143 143
144unsigned long get_wchan(struct task_struct *p); 144unsigned long get_wchan(struct task_struct *p);
145 145
146#define task_pt_regs(task) \ 146#define task_pt_regs(task) ((task)->thread.uregs)
147({ \
148 struct pt_regs *__regs__; \
149 __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \
150 __regs__ - 1; \
151})
152
153#define KSTK_EIP(task) (task_pt_regs(task)->pc) 147#define KSTK_EIP(task) (task_pt_regs(task)->pc)
154#define KSTK_ESP(task) (task_pt_regs(task)->sp) 148#define KSTK_ESP(task) (task_pt_regs(task)->sp)
155 149
diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h
index 7b06cc623d8b..921942ed1b03 100644
--- a/arch/mn10300/include/asm/ptrace.h
+++ b/arch/mn10300/include/asm/ptrace.h
@@ -91,9 +91,17 @@ extern struct pt_regs *__frame; /* current frame pointer */
91#if defined(__KERNEL__) 91#if defined(__KERNEL__)
92 92
93#if !defined(__ASSEMBLY__) 93#if !defined(__ASSEMBLY__)
94struct task_struct;
95
94#define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL) 96#define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL)
95#define instruction_pointer(regs) ((regs)->pc) 97#define instruction_pointer(regs) ((regs)->pc)
98#define user_stack_pointer(regs) ((regs)->sp)
96extern void show_regs(struct pt_regs *); 99extern void show_regs(struct pt_regs *);
100
101#define arch_has_single_step() (1)
102extern void user_enable_single_step(struct task_struct *);
103extern void user_disable_single_step(struct task_struct *);
104
97#endif /* !__ASSEMBLY */ 105#endif /* !__ASSEMBLY */
98 106
99#define profile_pc(regs) ((regs)->pc) 107#define profile_pc(regs) ((regs)->pc)
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 3dc3e462f92a..7408a27199f3 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -76,7 +76,7 @@ ENTRY(system_call)
76 cmp nr_syscalls,d0 76 cmp nr_syscalls,d0
77 bcc syscall_badsys 77 bcc syscall_badsys
78 btst _TIF_SYSCALL_TRACE,(TI_flags,a2) 78 btst _TIF_SYSCALL_TRACE,(TI_flags,a2)
79 bne syscall_trace_entry 79 bne syscall_entry_trace
80syscall_call: 80syscall_call:
81 add d0,d0,a1 81 add d0,d0,a1
82 add a1,a1 82 add a1,a1
@@ -104,11 +104,10 @@ restore_all:
104syscall_exit_work: 104syscall_exit_work:
105 btst _TIF_SYSCALL_TRACE,d2 105 btst _TIF_SYSCALL_TRACE,d2
106 beq work_pending 106 beq work_pending
107 __sti # could let do_syscall_trace() call 107 __sti # could let syscall_trace_exit() call
108 # schedule() instead 108 # schedule() instead
109 mov fp,d0 109 mov fp,d0
110 mov 1,d1 110 call syscall_trace_exit[],0 # do_syscall_trace(regs)
111 call do_syscall_trace[],0 # do_syscall_trace(regs,entryexit)
112 jmp resume_userspace 111 jmp resume_userspace
113 112
114 ALIGN 113 ALIGN
@@ -138,13 +137,11 @@ work_notifysig:
138 jmp resume_userspace 137 jmp resume_userspace
139 138
140 # perform syscall entry tracing 139 # perform syscall entry tracing
141syscall_trace_entry: 140syscall_entry_trace:
142 mov -ENOSYS,d0 141 mov -ENOSYS,d0
143 mov d0,(REG_D0,fp) 142 mov d0,(REG_D0,fp)
144 mov fp,d0 143 mov fp,d0
145 clr d1 144 call syscall_trace_entry[],0 # returns the syscall number to actually use
146 call do_syscall_trace[],0
147 mov (REG_ORIG_D0,fp),d0
148 mov (REG_D1,fp),d1 145 mov (REG_D1,fp),d1
149 cmp nr_syscalls,d0 146 cmp nr_syscalls,d0
150 bcs syscall_call 147 bcs syscall_call
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index d6d6cdc75c52..e143339ad28e 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -17,6 +17,9 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/ptrace.h> 18#include <linux/ptrace.h>
19#include <linux/user.h> 19#include <linux/user.h>
20#include <linux/regset.h>
21#include <linux/elf.h>
22#include <linux/tracehook.h>
20#include <asm/uaccess.h> 23#include <asm/uaccess.h>
21#include <asm/pgtable.h> 24#include <asm/pgtable.h>
22#include <asm/system.h> 25#include <asm/system.h>
@@ -64,12 +67,6 @@ static inline int get_stack_long(struct task_struct *task, int offset)
64 ((unsigned long) task->thread.uregs + offset); 67 ((unsigned long) task->thread.uregs + offset);
65} 68}
66 69
67/*
68 * this routine will put a word on the processes privileged stack.
69 * the offset is how far from the base addr as stored in the TSS.
70 * this routine assumes that all the privileged stacks are in our
71 * data space.
72 */
73static inline 70static inline
74int put_stack_long(struct task_struct *task, int offset, unsigned long data) 71int put_stack_long(struct task_struct *task, int offset, unsigned long data)
75{ 72{
@@ -80,94 +77,233 @@ int put_stack_long(struct task_struct *task, int offset, unsigned long data)
80 return 0; 77 return 0;
81} 78}
82 79
83static inline unsigned long get_fpregs(struct fpu_state_struct *buf, 80/*
84 struct task_struct *tsk) 81 * retrieve the contents of MN10300 userspace general registers
82 */
83static int genregs_get(struct task_struct *target,
84 const struct user_regset *regset,
85 unsigned int pos, unsigned int count,
86 void *kbuf, void __user *ubuf)
85{ 87{
86 return __copy_to_user(buf, &tsk->thread.fpu_state, 88 const struct pt_regs *regs = task_pt_regs(target);
87 sizeof(struct fpu_state_struct)); 89 int ret;
90
91 /* we need to skip regs->next */
92 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
93 regs, 0, PT_ORIG_D0 * sizeof(long));
94 if (ret < 0)
95 return ret;
96
97 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
98 &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
99 NR_PTREGS * sizeof(long));
100 if (ret < 0)
101 return ret;
102
103 return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
104 NR_PTREGS * sizeof(long), -1);
88} 105}
89 106
90static inline unsigned long set_fpregs(struct task_struct *tsk, 107/*
91 struct fpu_state_struct *buf) 108 * update the contents of the MN10300 userspace general registers
109 */
110static int genregs_set(struct task_struct *target,
111 const struct user_regset *regset,
112 unsigned int pos, unsigned int count,
113 const void *kbuf, const void __user *ubuf)
92{ 114{
93 return __copy_from_user(&tsk->thread.fpu_state, buf, 115 struct pt_regs *regs = task_pt_regs(target);
94 sizeof(struct fpu_state_struct)); 116 unsigned long tmp;
117 int ret;
118
119 /* we need to skip regs->next */
120 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
121 regs, 0, PT_ORIG_D0 * sizeof(long));
122 if (ret < 0)
123 return ret;
124
125 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
126 &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
127 PT_EPSW * sizeof(long));
128 if (ret < 0)
129 return ret;
130
131 /* we need to mask off changes to EPSW */
132 tmp = regs->epsw;
133 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
134 &tmp, PT_EPSW * sizeof(long),
135 PT_PC * sizeof(long));
136 tmp &= EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | EPSW_FLAG_Z;
137 tmp |= regs->epsw & ~(EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N |
138 EPSW_FLAG_Z);
139 regs->epsw = tmp;
140
141 if (ret < 0)
142 return ret;
143
144 /* and finally load the PC */
145 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
146 &regs->pc, PT_PC * sizeof(long),
147 NR_PTREGS * sizeof(long));
148
149 if (ret < 0)
150 return ret;
151
152 return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
153 NR_PTREGS * sizeof(long), -1);
95} 154}
96 155
97static inline void fpsave_init(struct task_struct *task) 156/*
157 * retrieve the contents of MN10300 userspace FPU registers
158 */
159static int fpuregs_get(struct task_struct *target,
160 const struct user_regset *regset,
161 unsigned int pos, unsigned int count,
162 void *kbuf, void __user *ubuf)
98{ 163{
99 memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct)); 164 const struct fpu_state_struct *fpregs = &target->thread.fpu_state;
165 int ret;
166
167 unlazy_fpu(target);
168
169 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
170 fpregs, 0, sizeof(*fpregs));
171 if (ret < 0)
172 return ret;
173
174 return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
175 sizeof(*fpregs), -1);
100} 176}
101 177
102/* 178/*
103 * make sure the single step bit is not set 179 * update the contents of the MN10300 userspace FPU registers
104 */ 180 */
105void ptrace_disable(struct task_struct *child) 181static int fpuregs_set(struct task_struct *target,
182 const struct user_regset *regset,
183 unsigned int pos, unsigned int count,
184 const void *kbuf, const void __user *ubuf)
185{
186 struct fpu_state_struct fpu_state = target->thread.fpu_state;
187 int ret;
188
189 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
190 &fpu_state, 0, sizeof(fpu_state));
191 if (ret < 0)
192 return ret;
193
194 fpu_kill_state(target);
195 target->thread.fpu_state = fpu_state;
196 set_using_fpu(target);
197
198 return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
199 sizeof(fpu_state), -1);
200}
201
202/*
203 * determine if the FPU registers have actually been used
204 */
205static int fpuregs_active(struct task_struct *target,
206 const struct user_regset *regset)
207{
208 return is_using_fpu(target) ? regset->n : 0;
209}
210
211/*
212 * Define the register sets available on the MN10300 under Linux
213 */
214enum mn10300_regset {
215 REGSET_GENERAL,
216 REGSET_FPU,
217};
218
219static const struct user_regset mn10300_regsets[] = {
220 /*
221 * General register format is:
222 * A3, A2, D3, D2, MCVF, MCRL, MCRH, MDRQ
223 * E1, E0, E7...E2, SP, LAR, LIR, MDR
224 * A1, A0, D1, D0, ORIG_D0, EPSW, PC
225 */
226 [REGSET_GENERAL] = {
227 .core_note_type = NT_PRSTATUS,
228 .n = ELF_NGREG,
229 .size = sizeof(long),
230 .align = sizeof(long),
231 .get = genregs_get,
232 .set = genregs_set,
233 },
234 /*
235 * FPU register format is:
236 * FS0-31, FPCR
237 */
238 [REGSET_FPU] = {
239 .core_note_type = NT_PRFPREG,
240 .n = sizeof(struct fpu_state_struct) / sizeof(long),
241 .size = sizeof(long),
242 .align = sizeof(long),
243 .get = fpuregs_get,
244 .set = fpuregs_set,
245 .active = fpuregs_active,
246 },
247};
248
249static const struct user_regset_view user_mn10300_native_view = {
250 .name = "mn10300",
251 .e_machine = EM_MN10300,
252 .regsets = mn10300_regsets,
253 .n = ARRAY_SIZE(mn10300_regsets),
254};
255
256const struct user_regset_view *task_user_regset_view(struct task_struct *task)
257{
258 return &user_mn10300_native_view;
259}
260
261/*
262 * set the single-step bit
263 */
264void user_enable_single_step(struct task_struct *child)
106{ 265{
107#ifndef CONFIG_MN10300_USING_JTAG 266#ifndef CONFIG_MN10300_USING_JTAG
108 struct user *dummy = NULL; 267 struct user *dummy = NULL;
109 long tmp; 268 long tmp;
110 269
111 tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw); 270 tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
112 tmp &= ~EPSW_T; 271 tmp |= EPSW_T;
113 put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp); 272 put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
114#endif 273#endif
115} 274}
116 275
117/* 276/*
118 * set the single step bit 277 * make sure the single-step bit is not set
119 */ 278 */
120void ptrace_enable(struct task_struct *child) 279void user_disable_single_step(struct task_struct *child)
121{ 280{
122#ifndef CONFIG_MN10300_USING_JTAG 281#ifndef CONFIG_MN10300_USING_JTAG
123 struct user *dummy = NULL; 282 struct user *dummy = NULL;
124 long tmp; 283 long tmp;
125 284
126 tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw); 285 tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
127 tmp |= EPSW_T; 286 tmp &= ~EPSW_T;
128 put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp); 287 put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
129#endif 288#endif
130} 289}
131 290
291void ptrace_disable(struct task_struct *child)
292{
293 user_disable_single_step(child);
294}
295
132/* 296/*
133 * handle the arch-specific side of process tracing 297 * handle the arch-specific side of process tracing
134 */ 298 */
135long arch_ptrace(struct task_struct *child, long request, long addr, long data) 299long arch_ptrace(struct task_struct *child, long request, long addr, long data)
136{ 300{
137 struct fpu_state_struct fpu_state; 301 unsigned long tmp;
138 int i, ret; 302 int ret;
139 303
140 switch (request) { 304 switch (request) {
141 /* read the word at location addr. */
142 case PTRACE_PEEKTEXT: {
143 unsigned long tmp;
144 int copied;
145
146 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
147 ret = -EIO;
148 if (copied != sizeof(tmp))
149 break;
150 ret = put_user(tmp, (unsigned long *) data);
151 break;
152 }
153
154 /* read the word at location addr. */
155 case PTRACE_PEEKDATA: {
156 unsigned long tmp;
157 int copied;
158
159 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
160 ret = -EIO;
161 if (copied != sizeof(tmp))
162 break;
163 ret = put_user(tmp, (unsigned long *) data);
164 break;
165 }
166
167 /* read the word at location addr in the USER area. */ 305 /* read the word at location addr in the USER area. */
168 case PTRACE_PEEKUSR: { 306 case PTRACE_PEEKUSR:
169 unsigned long tmp;
170
171 ret = -EIO; 307 ret = -EIO;
172 if ((addr & 3) || addr < 0 || 308 if ((addr & 3) || addr < 0 ||
173 addr > sizeof(struct user) - 3) 309 addr > sizeof(struct user) - 3)
@@ -179,17 +315,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
179 ptrace_regid_to_frame[addr]); 315 ptrace_regid_to_frame[addr]);
180 ret = put_user(tmp, (unsigned long *) data); 316 ret = put_user(tmp, (unsigned long *) data);
181 break; 317 break;
182 }
183
184 /* write the word at location addr. */
185 case PTRACE_POKETEXT:
186 case PTRACE_POKEDATA:
187 if (access_process_vm(child, addr, &data, sizeof(data), 1) ==
188 sizeof(data))
189 ret = 0;
190 else
191 ret = -EIO;
192 break;
193 318
194 /* write the word at location addr in the USER area */ 319 /* write the word at location addr in the USER area */
195 case PTRACE_POKEUSR: 320 case PTRACE_POKEUSR:
@@ -204,132 +329,32 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
204 data); 329 data);
205 break; 330 break;
206 331
207 /* continue and stop at next (return from) syscall */ 332 case PTRACE_GETREGS: /* Get all integer regs from the child. */
208 case PTRACE_SYSCALL: 333 return copy_regset_to_user(child, &user_mn10300_native_view,
209 /* restart after signal. */ 334 REGSET_GENERAL,
210 case PTRACE_CONT: 335 0, NR_PTREGS * sizeof(long),
211 ret = -EIO; 336 (void __user *)data);
212 if ((unsigned long) data > _NSIG) 337
213 break; 338 case PTRACE_SETREGS: /* Set all integer regs in the child. */
214 if (request == PTRACE_SYSCALL) 339 return copy_regset_from_user(child, &user_mn10300_native_view,
215 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 340 REGSET_GENERAL,
216 else 341 0, NR_PTREGS * sizeof(long),
217 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 342 (const void __user *)data);
218 child->exit_code = data; 343
219 ptrace_disable(child); 344 case PTRACE_GETFPREGS: /* Get the child FPU state. */
220 wake_up_process(child); 345 return copy_regset_to_user(child, &user_mn10300_native_view,
221 ret = 0; 346 REGSET_FPU,
222 break; 347 0, sizeof(struct fpu_state_struct),
223 348 (void __user *)data);
224 /* 349
225 * make the child exit 350 case PTRACE_SETFPREGS: /* Set the child FPU state. */
226 * - the best I can do is send it a sigkill 351 return copy_regset_from_user(child, &user_mn10300_native_view,
227 * - perhaps it should be put in the status that it wants to 352 REGSET_FPU,
228 * exit 353 0, sizeof(struct fpu_state_struct),
229 */ 354 (const void __user *)data);
230 case PTRACE_KILL:
231 ret = 0;
232 if (child->exit_state == EXIT_ZOMBIE) /* already dead */
233 break;
234 child->exit_code = SIGKILL;
235 clear_tsk_thread_flag(child, TIF_SINGLESTEP);
236 ptrace_disable(child);
237 wake_up_process(child);
238 break;
239
240 case PTRACE_SINGLESTEP: /* set the trap flag. */
241#ifndef CONFIG_MN10300_USING_JTAG
242 ret = -EIO;
243 if ((unsigned long) data > _NSIG)
244 break;
245 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
246 ptrace_enable(child);
247 child->exit_code = data;
248 wake_up_process(child);
249 ret = 0;
250#else
251 ret = -EINVAL;
252#endif
253 break;
254
255 case PTRACE_DETACH: /* detach a process that was attached. */
256 ret = ptrace_detach(child, data);
257 break;
258
259 /* Get all gp regs from the child. */
260 case PTRACE_GETREGS: {
261 unsigned long tmp;
262
263 if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) {
264 ret = -EIO;
265 break;
266 }
267
268 for (i = 0; i < NR_PTREGS << 2; i += 4) {
269 tmp = get_stack_long(child, ptrace_regid_to_frame[i]);
270 __put_user(tmp, (unsigned long *) data);
271 data += sizeof(tmp);
272 }
273 ret = 0;
274 break;
275 }
276
277 case PTRACE_SETREGS: { /* Set all gp regs in the child. */
278 unsigned long tmp;
279
280 if (!access_ok(VERIFY_READ, (unsigned long *)data,
281 sizeof(struct pt_regs))) {
282 ret = -EIO;
283 break;
284 }
285
286 for (i = 0; i < NR_PTREGS << 2; i += 4) {
287 __get_user(tmp, (unsigned long *) data);
288 put_stack_long(child, ptrace_regid_to_frame[i], tmp);
289 data += sizeof(tmp);
290 }
291 ret = 0;
292 break;
293 }
294
295 case PTRACE_GETFPREGS: { /* Get the child FPU state. */
296 if (is_using_fpu(child)) {
297 unlazy_fpu(child);
298 fpu_state = child->thread.fpu_state;
299 } else {
300 memset(&fpu_state, 0, sizeof(fpu_state));
301 }
302
303 ret = -EIO;
304 if (copy_to_user((void *) data, &fpu_state,
305 sizeof(fpu_state)) == 0)
306 ret = 0;
307 break;
308 }
309
310 case PTRACE_SETFPREGS: { /* Set the child FPU state. */
311 ret = -EFAULT;
312 if (copy_from_user(&fpu_state, (const void *) data,
313 sizeof(fpu_state)) == 0) {
314 fpu_kill_state(child);
315 child->thread.fpu_state = fpu_state;
316 set_using_fpu(child);
317 ret = 0;
318 }
319 break;
320 }
321
322 case PTRACE_SETOPTIONS: {
323 if (data & PTRACE_O_TRACESYSGOOD)
324 child->ptrace |= PT_TRACESYSGOOD;
325 else
326 child->ptrace &= ~PT_TRACESYSGOOD;
327 ret = 0;
328 break;
329 }
330 355
331 default: 356 default:
332 ret = -EIO; 357 ret = ptrace_request(child, request, addr, data);
333 break; 358 break;
334 } 359 }
335 360
@@ -337,43 +362,26 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
337} 362}
338 363
339/* 364/*
340 * notification of system call entry/exit 365 * handle tracing of system call entry
341 * - triggered by current->work.syscall_trace 366 * - return the revised system call number or ULONG_MAX to cause ENOSYS
342 */ 367 */
343asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) 368asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs)
344{ 369{
345#if 0 370 if (tracehook_report_syscall_entry(regs))
346 /* just in case... */ 371 /* tracing decided this syscall should not happen, so
347 printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n", 372 * We'll return a bogus call number to get an ENOSYS
348 current->pid, 373 * error, but leave the original number in
349 regs->orig_d0, 374 * regs->orig_d0
350 regs->a0, 375 */
351 regs->d1, 376 return ULONG_MAX;
352 regs->a3,
353 regs->a2,
354 regs->d0);
355 return;
356#endif
357
358 if (!test_thread_flag(TIF_SYSCALL_TRACE) &&
359 !test_thread_flag(TIF_SINGLESTEP))
360 return;
361 if (!(current->ptrace & PT_PTRACED))
362 return;
363 377
364 /* the 0x80 provides a way for the tracing parent to distinguish 378 return regs->orig_d0;
365 between a syscall stop and SIGTRAP delivery */ 379}
366 ptrace_notify(SIGTRAP |
367 ((current->ptrace & PT_TRACESYSGOOD) &&
368 !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0));
369 380
370 /* 381/*
371 * this isn't the same as continuing with a signal, but it will do 382 * handle tracing of system call exit
372 * for normal use. strace only continues with a signal if the 383 */
373 * stopping signal is not SIGTRAP. -brl 384asmlinkage void syscall_trace_exit(struct pt_regs *regs)
374 */ 385{
375 if (current->exit_code) { 386 tracehook_report_syscall_exit(regs, 0);
376 send_sig(current->exit_code, current, 1);
377 current->exit_code = 0;
378 }
379} 387}
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 841ca9955a18..9f7572a0f578 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -23,6 +23,7 @@
23#include <linux/tty.h> 23#include <linux/tty.h>
24#include <linux/personality.h> 24#include <linux/personality.h>
25#include <linux/suspend.h> 25#include <linux/suspend.h>
26#include <linux/tracehook.h>
26#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
27#include <asm/ucontext.h> 28#include <asm/ucontext.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
@@ -511,6 +512,9 @@ static void do_signal(struct pt_regs *regs)
511 * clear the TIF_RESTORE_SIGMASK flag */ 512 * clear the TIF_RESTORE_SIGMASK flag */
512 if (test_thread_flag(TIF_RESTORE_SIGMASK)) 513 if (test_thread_flag(TIF_RESTORE_SIGMASK))
513 clear_thread_flag(TIF_RESTORE_SIGMASK); 514 clear_thread_flag(TIF_RESTORE_SIGMASK);
515
516 tracehook_signal_handler(signr, &info, &ka, regs,
517 test_thread_flag(TIF_SINGLESTEP));
514 } 518 }
515 519
516 return; 520 return;
@@ -561,4 +565,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
561 /* deal with pending signal delivery */ 565 /* deal with pending signal delivery */
562 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) 566 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
563 do_signal(regs); 567 do_signal(regs);
568
569 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
570 clear_thread_flag(TIF_NOTIFY_RESUME);
571 tracehook_notify_resume(__frame);
572 }
564} 573}
diff --git a/arch/mn10300/mm/tlb-mn10300.S b/arch/mn10300/mm/tlb-mn10300.S
index 789208094e98..7095147dcb8b 100644
--- a/arch/mn10300/mm/tlb-mn10300.S
+++ b/arch/mn10300/mm/tlb-mn10300.S
@@ -165,24 +165,6 @@ ENTRY(itlb_aerror)
165ENTRY(dtlb_aerror) 165ENTRY(dtlb_aerror)
166 and ~EPSW_NMID,epsw 166 and ~EPSW_NMID,epsw
167 add -4,sp 167 add -4,sp
168 mov d1,(sp)
169
170 movhu (MMUFCR_DFC),d1 # is it the initial valid write
171 # to this page?
172 and MMUFCR_xFC_INITWR,d1
173 beq dtlb_pagefault # jump if not
174
175 mov (DPTEL),d1 # set the dirty bit
176 # (don't replace with BSET!)
177 or _PAGE_DIRTY,d1
178 mov d1,(DPTEL)
179 mov (sp),d1
180 add 4,sp
181 rti
182
183 ALIGN
184dtlb_pagefault:
185 mov (sp),d1
186 SAVE_ALL 168 SAVE_ALL
187 add -4,sp # need to pass three params 169 add -4,sp # need to pass three params
188 170
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9057335fdc61..2cf915e51e7e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
41 return !!(v->arch.pending_exceptions); 41 return !!(v->arch.pending_exceptions);
42} 42}
43 43
44int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
45{
46 /* do real check here */
47 return 1;
48}
49
44int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 50int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
45{ 51{
46 return !(v->arch.msr & MSR_WE); 52 return !(v->arch.msr & MSR_WE);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 54ea39f96ecd..a27d0d5a6f86 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,6 +13,8 @@
13 13
14#ifndef ASM_KVM_HOST_H 14#ifndef ASM_KVM_HOST_H
15#define ASM_KVM_HOST_H 15#define ASM_KVM_HOST_H
16#include <linux/hrtimer.h>
17#include <linux/interrupt.h>
16#include <linux/kvm_host.h> 18#include <linux/kvm_host.h>
17#include <asm/debug.h> 19#include <asm/debug.h>
18#include <asm/cpuid.h> 20#include <asm/cpuid.h>
@@ -210,7 +212,8 @@ struct kvm_vcpu_arch {
210 s390_fp_regs guest_fpregs; 212 s390_fp_regs guest_fpregs;
211 unsigned int guest_acrs[NUM_ACRS]; 213 unsigned int guest_acrs[NUM_ACRS];
212 struct kvm_s390_local_interrupt local_int; 214 struct kvm_s390_local_interrupt local_int;
213 struct timer_list ckc_timer; 215 struct hrtimer ckc_timer;
216 struct tasklet_struct tasklet;
214 union { 217 union {
215 cpuid_t cpu_id; 218 cpuid_t cpu_id;
216 u64 stidp_data; 219 u64 stidp_data;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 9d19803111ba..98997ccba501 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu)
154static int handle_validity(struct kvm_vcpu *vcpu) 154static int handle_validity(struct kvm_vcpu *vcpu)
155{ 155{
156 int viwhy = vcpu->arch.sie_block->ipb >> 16; 156 int viwhy = vcpu->arch.sie_block->ipb >> 16;
157 int rc;
158
157 vcpu->stat.exit_validity++; 159 vcpu->stat.exit_validity++;
158 if (viwhy == 0x37) { 160 if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
159 fault_in_pages_writeable((char __user *) 161 <= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
160 vcpu->kvm->arch.guest_origin + 162 rc = fault_in_pages_writeable((char __user *)
161 vcpu->arch.sie_block->prefix, 163 vcpu->kvm->arch.guest_origin +
162 PAGE_SIZE); 164 vcpu->arch.sie_block->prefix,
163 return 0; 165 2*PAGE_SIZE);
164 } 166 if (rc)
165 VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", 167 /* user will receive sigsegv, exit to user */
166 viwhy); 168 rc = -ENOTSUPP;
167 return -ENOTSUPP; 169 } else
170 rc = -ENOTSUPP;
171
172 if (rc)
173 VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
174 viwhy);
175 return rc;
168} 176}
169 177
170static int handle_instruction(struct kvm_vcpu *vcpu) 178static int handle_instruction(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0189356fe209..f04f5301b1b4 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -12,6 +12,8 @@
12 12
13#include <asm/lowcore.h> 13#include <asm/lowcore.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <linux/hrtimer.h>
16#include <linux/interrupt.h>
15#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
16#include <linux/signal.h> 18#include <linux/signal.h>
17#include "kvm-s390.h" 19#include "kvm-s390.h"
@@ -299,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
299 } 301 }
300 302
301 if ((!rc) && atomic_read(&fi->active)) { 303 if ((!rc) && atomic_read(&fi->active)) {
302 spin_lock_bh(&fi->lock); 304 spin_lock(&fi->lock);
303 list_for_each_entry(inti, &fi->list, list) 305 list_for_each_entry(inti, &fi->list, list)
304 if (__interrupt_is_deliverable(vcpu, inti)) { 306 if (__interrupt_is_deliverable(vcpu, inti)) {
305 rc = 1; 307 rc = 1;
306 break; 308 break;
307 } 309 }
308 spin_unlock_bh(&fi->lock); 310 spin_unlock(&fi->lock);
309 } 311 }
310 312
311 if ((!rc) && (vcpu->arch.sie_block->ckc < 313 if ((!rc) && (vcpu->arch.sie_block->ckc <
@@ -318,6 +320,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
318 return rc; 320 return rc;
319} 321}
320 322
323int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
324{
325 /* do real check here */
326 return 1;
327}
328
321int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 329int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
322{ 330{
323 return 0; 331 return 0;
@@ -355,14 +363,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
355 return 0; 363 return 0;
356 } 364 }
357 365
358 sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; 366 sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9;
359 367
360 vcpu->arch.ckc_timer.expires = jiffies + sltime; 368 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
361 369 VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
362 add_timer(&vcpu->arch.ckc_timer);
363 VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime);
364no_timer: 370no_timer:
365 spin_lock_bh(&vcpu->arch.local_int.float_int->lock); 371 spin_lock(&vcpu->arch.local_int.float_int->lock);
366 spin_lock_bh(&vcpu->arch.local_int.lock); 372 spin_lock_bh(&vcpu->arch.local_int.lock);
367 add_wait_queue(&vcpu->arch.local_int.wq, &wait); 373 add_wait_queue(&vcpu->arch.local_int.wq, &wait);
368 while (list_empty(&vcpu->arch.local_int.list) && 374 while (list_empty(&vcpu->arch.local_int.list) &&
@@ -371,33 +377,46 @@ no_timer:
371 !signal_pending(current)) { 377 !signal_pending(current)) {
372 set_current_state(TASK_INTERRUPTIBLE); 378 set_current_state(TASK_INTERRUPTIBLE);
373 spin_unlock_bh(&vcpu->arch.local_int.lock); 379 spin_unlock_bh(&vcpu->arch.local_int.lock);
374 spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); 380 spin_unlock(&vcpu->arch.local_int.float_int->lock);
375 vcpu_put(vcpu); 381 vcpu_put(vcpu);
376 schedule(); 382 schedule();
377 vcpu_load(vcpu); 383 vcpu_load(vcpu);
378 spin_lock_bh(&vcpu->arch.local_int.float_int->lock); 384 spin_lock(&vcpu->arch.local_int.float_int->lock);
379 spin_lock_bh(&vcpu->arch.local_int.lock); 385 spin_lock_bh(&vcpu->arch.local_int.lock);
380 } 386 }
381 __unset_cpu_idle(vcpu); 387 __unset_cpu_idle(vcpu);
382 __set_current_state(TASK_RUNNING); 388 __set_current_state(TASK_RUNNING);
383 remove_wait_queue(&vcpu->wq, &wait); 389 remove_wait_queue(&vcpu->wq, &wait);
384 spin_unlock_bh(&vcpu->arch.local_int.lock); 390 spin_unlock_bh(&vcpu->arch.local_int.lock);
385 spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); 391 spin_unlock(&vcpu->arch.local_int.float_int->lock);
386 del_timer(&vcpu->arch.ckc_timer); 392 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
387 return 0; 393 return 0;
388} 394}
389 395
390void kvm_s390_idle_wakeup(unsigned long data) 396void kvm_s390_tasklet(unsigned long parm)
391{ 397{
392 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 398 struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
393 399
394 spin_lock_bh(&vcpu->arch.local_int.lock); 400 spin_lock(&vcpu->arch.local_int.lock);
395 vcpu->arch.local_int.timer_due = 1; 401 vcpu->arch.local_int.timer_due = 1;
396 if (waitqueue_active(&vcpu->arch.local_int.wq)) 402 if (waitqueue_active(&vcpu->arch.local_int.wq))
397 wake_up_interruptible(&vcpu->arch.local_int.wq); 403 wake_up_interruptible(&vcpu->arch.local_int.wq);
398 spin_unlock_bh(&vcpu->arch.local_int.lock); 404 spin_unlock(&vcpu->arch.local_int.lock);
399} 405}
400 406
407/*
408 * low level hrtimer wake routine. Because this runs in hardirq context
409 * we schedule a tasklet to do the real work.
410 */
411enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
412{
413 struct kvm_vcpu *vcpu;
414
415 vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
416 tasklet_schedule(&vcpu->arch.tasklet);
417
418 return HRTIMER_NORESTART;
419}
401 420
402void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) 421void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
403{ 422{
@@ -436,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
436 if (atomic_read(&fi->active)) { 455 if (atomic_read(&fi->active)) {
437 do { 456 do {
438 deliver = 0; 457 deliver = 0;
439 spin_lock_bh(&fi->lock); 458 spin_lock(&fi->lock);
440 list_for_each_entry_safe(inti, n, &fi->list, list) { 459 list_for_each_entry_safe(inti, n, &fi->list, list) {
441 if (__interrupt_is_deliverable(vcpu, inti)) { 460 if (__interrupt_is_deliverable(vcpu, inti)) {
442 list_del(&inti->list); 461 list_del(&inti->list);
@@ -447,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
447 } 466 }
448 if (list_empty(&fi->list)) 467 if (list_empty(&fi->list))
449 atomic_set(&fi->active, 0); 468 atomic_set(&fi->active, 0);
450 spin_unlock_bh(&fi->lock); 469 spin_unlock(&fi->lock);
451 if (deliver) { 470 if (deliver) {
452 __do_deliver_interrupt(vcpu, inti); 471 __do_deliver_interrupt(vcpu, inti);
453 kfree(inti); 472 kfree(inti);
@@ -512,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
512 531
513 mutex_lock(&kvm->lock); 532 mutex_lock(&kvm->lock);
514 fi = &kvm->arch.float_int; 533 fi = &kvm->arch.float_int;
515 spin_lock_bh(&fi->lock); 534 spin_lock(&fi->lock);
516 list_add_tail(&inti->list, &fi->list); 535 list_add_tail(&inti->list, &fi->list);
517 atomic_set(&fi->active, 1); 536 atomic_set(&fi->active, 1);
518 sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); 537 sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
@@ -529,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
529 if (waitqueue_active(&li->wq)) 548 if (waitqueue_active(&li->wq))
530 wake_up_interruptible(&li->wq); 549 wake_up_interruptible(&li->wq);
531 spin_unlock_bh(&li->lock); 550 spin_unlock_bh(&li->lock);
532 spin_unlock_bh(&fi->lock); 551 spin_unlock(&fi->lock);
533 mutex_unlock(&kvm->lock); 552 mutex_unlock(&kvm->lock);
534 return 0; 553 return 0;
535} 554}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f4d56e9939c9..10bccd1f8aee 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -15,6 +15,7 @@
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/err.h> 16#include <linux/err.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/hrtimer.h>
18#include <linux/init.h> 19#include <linux/init.h>
19#include <linux/kvm.h> 20#include <linux/kvm.h>
20#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
@@ -195,6 +196,10 @@ out_nokvm:
195void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 196void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
196{ 197{
197 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 198 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
199 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
200 (__u64) vcpu->arch.sie_block)
201 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
202 smp_mb();
198 free_page((unsigned long)(vcpu->arch.sie_block)); 203 free_page((unsigned long)(vcpu->arch.sie_block));
199 kvm_vcpu_uninit(vcpu); 204 kvm_vcpu_uninit(vcpu);
200 kfree(vcpu); 205 kfree(vcpu);
@@ -283,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
283 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; 288 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
284 vcpu->arch.sie_block->ecb = 2; 289 vcpu->arch.sie_block->ecb = 2;
285 vcpu->arch.sie_block->eca = 0xC1002001U; 290 vcpu->arch.sie_block->eca = 0xC1002001U;
286 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, 291 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
287 (unsigned long) vcpu); 292 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
293 (unsigned long) vcpu);
294 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
288 get_cpu_id(&vcpu->arch.cpu_id); 295 get_cpu_id(&vcpu->arch.cpu_id);
289 vcpu->arch.cpu_id.version = 0xff; 296 vcpu->arch.cpu_id.version = 0xff;
290 return 0; 297 return 0;
@@ -307,19 +314,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
307 314
308 vcpu->arch.sie_block->icpua = id; 315 vcpu->arch.sie_block->icpua = id;
309 BUG_ON(!kvm->arch.sca); 316 BUG_ON(!kvm->arch.sca);
310 BUG_ON(kvm->arch.sca->cpu[id].sda); 317 if (!kvm->arch.sca->cpu[id].sda)
311 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; 318 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
319 else
320 BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
312 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); 321 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
313 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 322 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
314 323
315 spin_lock_init(&vcpu->arch.local_int.lock); 324 spin_lock_init(&vcpu->arch.local_int.lock);
316 INIT_LIST_HEAD(&vcpu->arch.local_int.list); 325 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
317 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 326 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
318 spin_lock_bh(&kvm->arch.float_int.lock); 327 spin_lock(&kvm->arch.float_int.lock);
319 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; 328 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
320 init_waitqueue_head(&vcpu->arch.local_int.wq); 329 init_waitqueue_head(&vcpu->arch.local_int.wq);
321 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 330 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
322 spin_unlock_bh(&kvm->arch.float_int.lock); 331 spin_unlock(&kvm->arch.float_int.lock);
323 332
324 rc = kvm_vcpu_init(vcpu, kvm, id); 333 rc = kvm_vcpu_init(vcpu, kvm, id);
325 if (rc) 334 if (rc)
@@ -478,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
478 487
479 vcpu_load(vcpu); 488 vcpu_load(vcpu);
480 489
490 /* verify, that memory has been registered */
491 if (!vcpu->kvm->arch.guest_memsize) {
492 vcpu_put(vcpu);
493 return -EINVAL;
494 }
495
481 if (vcpu->sigset_active) 496 if (vcpu->sigset_active)
482 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 497 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
483 498
@@ -657,6 +672,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
657 struct kvm_memory_slot old, 672 struct kvm_memory_slot old,
658 int user_alloc) 673 int user_alloc)
659{ 674{
675 int i;
676
660 /* A few sanity checks. We can have exactly one memory slot which has 677 /* A few sanity checks. We can have exactly one memory slot which has
661 to start at guest virtual zero and which has to be located at a 678 to start at guest virtual zero and which has to be located at a
662 page boundary in userland and which has to end at a page boundary. 679 page boundary in userland and which has to end at a page boundary.
@@ -664,7 +681,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
664 vmas. It is okay to mmap() and munmap() stuff in this slot after 681 vmas. It is okay to mmap() and munmap() stuff in this slot after
665 doing this call at any time */ 682 doing this call at any time */
666 683
667 if (mem->slot) 684 if (mem->slot || kvm->arch.guest_memsize)
668 return -EINVAL; 685 return -EINVAL;
669 686
670 if (mem->guest_phys_addr) 687 if (mem->guest_phys_addr)
@@ -676,15 +693,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
676 if (mem->memory_size & (PAGE_SIZE - 1)) 693 if (mem->memory_size & (PAGE_SIZE - 1))
677 return -EINVAL; 694 return -EINVAL;
678 695
696 if (!user_alloc)
697 return -EINVAL;
698
699 /* lock all vcpus */
700 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
701 if (!kvm->vcpus[i])
702 continue;
703 if (!mutex_trylock(&kvm->vcpus[i]->mutex))
704 goto fail_out;
705 }
706
679 kvm->arch.guest_origin = mem->userspace_addr; 707 kvm->arch.guest_origin = mem->userspace_addr;
680 kvm->arch.guest_memsize = mem->memory_size; 708 kvm->arch.guest_memsize = mem->memory_size;
681 709
682 /* FIXME: we do want to interrupt running CPUs and update their memory 710 /* update sie control blocks, and unlock all vcpus */
683 configuration now to avoid race conditions. But hey, changing the 711 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
684 memory layout while virtual CPUs are running is usually bad 712 if (kvm->vcpus[i]) {
685 programming practice. */ 713 kvm->vcpus[i]->arch.sie_block->gmsor =
714 kvm->arch.guest_origin;
715 kvm->vcpus[i]->arch.sie_block->gmslm =
716 kvm->arch.guest_memsize +
717 kvm->arch.guest_origin +
718 VIRTIODESCSPACE - 1ul;
719 mutex_unlock(&kvm->vcpus[i]->mutex);
720 }
721 }
686 722
687 return 0; 723 return 0;
724
725fail_out:
726 for (; i >= 0; i--)
727 mutex_unlock(&kvm->vcpus[i]->mutex);
728 return -EINVAL;
688} 729}
689 730
690void kvm_arch_flush_shadow(struct kvm *kvm) 731void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 00bbe69b78da..748fee872323 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -14,6 +14,7 @@
14#ifndef ARCH_S390_KVM_S390_H 14#ifndef ARCH_S390_KVM_S390_H
15#define ARCH_S390_KVM_S390_H 15#define ARCH_S390_KVM_S390_H
16 16
17#include <linux/hrtimer.h>
17#include <linux/kvm.h> 18#include <linux/kvm.h>
18#include <linux/kvm_host.h> 19#include <linux/kvm_host.h>
19 20
@@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
41} 42}
42 43
43int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); 44int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
44void kvm_s390_idle_wakeup(unsigned long data); 45enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
46void kvm_s390_tasklet(unsigned long parm);
45void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); 47void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
46int kvm_s390_inject_vm(struct kvm *kvm, 48int kvm_s390_inject_vm(struct kvm *kvm,
47 struct kvm_s390_interrupt *s390int); 49 struct kvm_s390_interrupt *s390int);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4b88834b8dd8..93ecd06e1a74 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
204 int cpus = 0; 204 int cpus = 0;
205 int n; 205 int n;
206 206
207 spin_lock_bh(&fi->lock); 207 spin_lock(&fi->lock);
208 for (n = 0; n < KVM_MAX_VCPUS; n++) 208 for (n = 0; n < KVM_MAX_VCPUS; n++)
209 if (fi->local_int[n]) 209 if (fi->local_int[n])
210 cpus++; 210 cpus++;
211 spin_unlock_bh(&fi->lock); 211 spin_unlock(&fi->lock);
212 212
213 /* deal with other level 3 hypervisors */ 213 /* deal with other level 3 hypervisors */
214 if (stsi(mem, 3, 2, 2) == -ENOSYS) 214 if (stsi(mem, 3, 2, 2) == -ENOSYS)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index f27dbedf0866..36678835034d 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
52 if (cpu_addr >= KVM_MAX_VCPUS) 52 if (cpu_addr >= KVM_MAX_VCPUS)
53 return 3; /* not operational */ 53 return 3; /* not operational */
54 54
55 spin_lock_bh(&fi->lock); 55 spin_lock(&fi->lock);
56 if (fi->local_int[cpu_addr] == NULL) 56 if (fi->local_int[cpu_addr] == NULL)
57 rc = 3; /* not operational */ 57 rc = 3; /* not operational */
58 else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) 58 else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
@@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
64 *reg |= SIGP_STAT_STOPPED; 64 *reg |= SIGP_STAT_STOPPED;
65 rc = 1; /* status stored */ 65 rc = 1; /* status stored */
66 } 66 }
67 spin_unlock_bh(&fi->lock); 67 spin_unlock(&fi->lock);
68 68
69 VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); 69 VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
70 return rc; 70 return rc;
@@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
86 86
87 inti->type = KVM_S390_INT_EMERGENCY; 87 inti->type = KVM_S390_INT_EMERGENCY;
88 88
89 spin_lock_bh(&fi->lock); 89 spin_lock(&fi->lock);
90 li = fi->local_int[cpu_addr]; 90 li = fi->local_int[cpu_addr];
91 if (li == NULL) { 91 if (li == NULL) {
92 rc = 3; /* not operational */ 92 rc = 3; /* not operational */
@@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
102 spin_unlock_bh(&li->lock); 102 spin_unlock_bh(&li->lock);
103 rc = 0; /* order accepted */ 103 rc = 0; /* order accepted */
104unlock: 104unlock:
105 spin_unlock_bh(&fi->lock); 105 spin_unlock(&fi->lock);
106 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); 106 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
107 return rc; 107 return rc;
108} 108}
@@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
123 123
124 inti->type = KVM_S390_SIGP_STOP; 124 inti->type = KVM_S390_SIGP_STOP;
125 125
126 spin_lock_bh(&fi->lock); 126 spin_lock(&fi->lock);
127 li = fi->local_int[cpu_addr]; 127 li = fi->local_int[cpu_addr];
128 if (li == NULL) { 128 if (li == NULL) {
129 rc = 3; /* not operational */ 129 rc = 3; /* not operational */
@@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
142 spin_unlock_bh(&li->lock); 142 spin_unlock_bh(&li->lock);
143 rc = 0; /* order accepted */ 143 rc = 0; /* order accepted */
144unlock: 144unlock:
145 spin_unlock_bh(&fi->lock); 145 spin_unlock(&fi->lock);
146 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); 146 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
147 return rc; 147 return rc;
148} 148}
@@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
188 if (!inti) 188 if (!inti)
189 return 2; /* busy */ 189 return 2; /* busy */
190 190
191 spin_lock_bh(&fi->lock); 191 spin_lock(&fi->lock);
192 li = fi->local_int[cpu_addr]; 192 li = fi->local_int[cpu_addr];
193 193
194 if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { 194 if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
@@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
220out_li: 220out_li:
221 spin_unlock_bh(&li->lock); 221 spin_unlock_bh(&li->lock);
222out_fi: 222out_fi:
223 spin_unlock_bh(&fi->lock); 223 spin_unlock(&fi->lock);
224 return rc; 224 return rc;
225} 225}
226 226
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 19af42138f78..4a28d22d4793 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -116,6 +116,8 @@
116#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ 116#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
117#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ 117#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
118#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ 118#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
119#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
120#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
119#define X86_FEATURE_AES (4*32+25) /* AES instructions */ 121#define X86_FEATURE_AES (4*32+25) /* AES instructions */
120#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ 122#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
121#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ 123#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index dc3f6cf11704..125be8b19568 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -16,6 +16,7 @@
16#define __KVM_HAVE_MSI 16#define __KVM_HAVE_MSI
17#define __KVM_HAVE_USER_NMI 17#define __KVM_HAVE_USER_NMI
18#define __KVM_HAVE_GUEST_DEBUG 18#define __KVM_HAVE_GUEST_DEBUG
19#define __KVM_HAVE_MSIX
19 20
20/* Architectural interrupt line count. */ 21/* Architectural interrupt line count. */
21#define KVM_NR_INTERRUPTS 256 22#define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f0faf58044ff..eabdc1cfab5c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -185,6 +185,7 @@ union kvm_mmu_page_role {
185 unsigned access:3; 185 unsigned access:3;
186 unsigned invalid:1; 186 unsigned invalid:1;
187 unsigned cr4_pge:1; 187 unsigned cr4_pge:1;
188 unsigned nxe:1;
188 }; 189 };
189}; 190};
190 191
@@ -212,7 +213,6 @@ struct kvm_mmu_page {
212 int multimapped; /* More than one parent_pte? */ 213 int multimapped; /* More than one parent_pte? */
213 int root_count; /* Currently serving as active root */ 214 int root_count; /* Currently serving as active root */
214 bool unsync; 215 bool unsync;
215 bool global;
216 unsigned int unsync_children; 216 unsigned int unsync_children;
217 union { 217 union {
218 u64 *parent_pte; /* !multimapped */ 218 u64 *parent_pte; /* !multimapped */
@@ -261,13 +261,11 @@ struct kvm_mmu {
261 union kvm_mmu_page_role base_role; 261 union kvm_mmu_page_role base_role;
262 262
263 u64 *pae_root; 263 u64 *pae_root;
264 u64 rsvd_bits_mask[2][4];
264}; 265};
265 266
266struct kvm_vcpu_arch { 267struct kvm_vcpu_arch {
267 u64 host_tsc; 268 u64 host_tsc;
268 int interrupt_window_open;
269 unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
270 DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
271 /* 269 /*
272 * rip and regs accesses must go through 270 * rip and regs accesses must go through
273 * kvm_{register,rip}_{read,write} functions. 271 * kvm_{register,rip}_{read,write} functions.
@@ -286,6 +284,7 @@ struct kvm_vcpu_arch {
286 u64 shadow_efer; 284 u64 shadow_efer;
287 u64 apic_base; 285 u64 apic_base;
288 struct kvm_lapic *apic; /* kernel irqchip context */ 286 struct kvm_lapic *apic; /* kernel irqchip context */
287 int32_t apic_arb_prio;
289 int mp_state; 288 int mp_state;
290 int sipi_vector; 289 int sipi_vector;
291 u64 ia32_misc_enable_msr; 290 u64 ia32_misc_enable_msr;
@@ -320,6 +319,8 @@ struct kvm_vcpu_arch {
320 struct kvm_pio_request pio; 319 struct kvm_pio_request pio;
321 void *pio_data; 320 void *pio_data;
322 321
322 u8 event_exit_inst_len;
323
323 struct kvm_queued_exception { 324 struct kvm_queued_exception {
324 bool pending; 325 bool pending;
325 bool has_error_code; 326 bool has_error_code;
@@ -329,11 +330,12 @@ struct kvm_vcpu_arch {
329 330
330 struct kvm_queued_interrupt { 331 struct kvm_queued_interrupt {
331 bool pending; 332 bool pending;
333 bool soft;
332 u8 nr; 334 u8 nr;
333 } interrupt; 335 } interrupt;
334 336
335 struct { 337 struct {
336 int active; 338 int vm86_active;
337 u8 save_iopl; 339 u8 save_iopl;
338 struct kvm_save_segment { 340 struct kvm_save_segment {
339 u16 selector; 341 u16 selector;
@@ -356,9 +358,9 @@ struct kvm_vcpu_arch {
356 unsigned int time_offset; 358 unsigned int time_offset;
357 struct page *time_page; 359 struct page *time_page;
358 360
361 bool singlestep; /* guest is single stepped by KVM */
359 bool nmi_pending; 362 bool nmi_pending;
360 bool nmi_injected; 363 bool nmi_injected;
361 bool nmi_window_open;
362 364
363 struct mtrr_state_type mtrr_state; 365 struct mtrr_state_type mtrr_state;
364 u32 pat; 366 u32 pat;
@@ -392,15 +394,14 @@ struct kvm_arch{
392 */ 394 */
393 struct list_head active_mmu_pages; 395 struct list_head active_mmu_pages;
394 struct list_head assigned_dev_head; 396 struct list_head assigned_dev_head;
395 struct list_head oos_global_pages;
396 struct iommu_domain *iommu_domain; 397 struct iommu_domain *iommu_domain;
398 int iommu_flags;
397 struct kvm_pic *vpic; 399 struct kvm_pic *vpic;
398 struct kvm_ioapic *vioapic; 400 struct kvm_ioapic *vioapic;
399 struct kvm_pit *vpit; 401 struct kvm_pit *vpit;
400 struct hlist_head irq_ack_notifier_list; 402 struct hlist_head irq_ack_notifier_list;
401 int vapics_in_nmi_mode; 403 int vapics_in_nmi_mode;
402 404
403 int round_robin_prev_vcpu;
404 unsigned int tss_addr; 405 unsigned int tss_addr;
405 struct page *apic_access_page; 406 struct page *apic_access_page;
406 407
@@ -423,7 +424,6 @@ struct kvm_vm_stat {
423 u32 mmu_recycled; 424 u32 mmu_recycled;
424 u32 mmu_cache_miss; 425 u32 mmu_cache_miss;
425 u32 mmu_unsync; 426 u32 mmu_unsync;
426 u32 mmu_unsync_global;
427 u32 remote_tlb_flush; 427 u32 remote_tlb_flush;
428 u32 lpages; 428 u32 lpages;
429}; 429};
@@ -443,7 +443,6 @@ struct kvm_vcpu_stat {
443 u32 halt_exits; 443 u32 halt_exits;
444 u32 halt_wakeup; 444 u32 halt_wakeup;
445 u32 request_irq_exits; 445 u32 request_irq_exits;
446 u32 request_nmi_exits;
447 u32 irq_exits; 446 u32 irq_exits;
448 u32 host_state_reload; 447 u32 host_state_reload;
449 u32 efer_reload; 448 u32 efer_reload;
@@ -511,20 +510,22 @@ struct kvm_x86_ops {
511 void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); 510 void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
512 int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); 511 int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
513 void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); 512 void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
513 void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
514 u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
514 void (*patch_hypercall)(struct kvm_vcpu *vcpu, 515 void (*patch_hypercall)(struct kvm_vcpu *vcpu,
515 unsigned char *hypercall_addr); 516 unsigned char *hypercall_addr);
516 int (*get_irq)(struct kvm_vcpu *vcpu); 517 void (*set_irq)(struct kvm_vcpu *vcpu);
517 void (*set_irq)(struct kvm_vcpu *vcpu, int vec); 518 void (*set_nmi)(struct kvm_vcpu *vcpu);
518 void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, 519 void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
519 bool has_error_code, u32 error_code); 520 bool has_error_code, u32 error_code);
520 bool (*exception_injected)(struct kvm_vcpu *vcpu); 521 int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
521 void (*inject_pending_irq)(struct kvm_vcpu *vcpu); 522 int (*nmi_allowed)(struct kvm_vcpu *vcpu);
522 void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, 523 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
523 struct kvm_run *run); 524 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
524 525 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
525 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 526 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
526 int (*get_tdp_level)(void); 527 int (*get_tdp_level)(void);
527 int (*get_mt_mask_shift)(void); 528 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
528}; 529};
529 530
530extern struct kvm_x86_ops *kvm_x86_ops; 531extern struct kvm_x86_ops *kvm_x86_ops;
@@ -538,7 +539,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
538void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); 539void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
539void kvm_mmu_set_base_ptes(u64 base_pte); 540void kvm_mmu_set_base_ptes(u64 base_pte);
540void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 541void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
541 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); 542 u64 dirty_mask, u64 nx_mask, u64 x_mask);
542 543
543int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 544int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
544void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 545void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -552,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
552 const void *val, int bytes); 553 const void *val, int bytes);
553int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, 554int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
554 gpa_t addr, unsigned long *ret); 555 gpa_t addr, unsigned long *ret);
556u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
555 557
556extern bool tdp_enabled; 558extern bool tdp_enabled;
557 559
@@ -563,6 +565,7 @@ enum emulation_result {
563 565
564#define EMULTYPE_NO_DECODE (1 << 0) 566#define EMULTYPE_NO_DECODE (1 << 0)
565#define EMULTYPE_TRAP_UD (1 << 1) 567#define EMULTYPE_TRAP_UD (1 << 1)
568#define EMULTYPE_SKIP (1 << 2)
566int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, 569int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
567 unsigned long cr2, u16 error_code, int emulation_type); 570 unsigned long cr2, u16 error_code, int emulation_type);
568void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); 571void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
@@ -638,7 +641,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
638int kvm_mmu_load(struct kvm_vcpu *vcpu); 641int kvm_mmu_load(struct kvm_vcpu *vcpu);
639void kvm_mmu_unload(struct kvm_vcpu *vcpu); 642void kvm_mmu_unload(struct kvm_vcpu *vcpu);
640void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 643void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
641void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
642 644
643int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 645int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
644 646
@@ -769,6 +771,8 @@ enum {
769#define HF_GIF_MASK (1 << 0) 771#define HF_GIF_MASK (1 << 0)
770#define HF_HIF_MASK (1 << 1) 772#define HF_HIF_MASK (1 << 1)
771#define HF_VINTR_MASK (1 << 2) 773#define HF_VINTR_MASK (1 << 2)
774#define HF_NMI_MASK (1 << 3)
775#define HF_IRET_MASK (1 << 4)
772 776
773/* 777/*
774 * Hardware virtualization extension instructions may fault if a 778 * Hardware virtualization extension instructions may fault if a
@@ -791,5 +795,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
791#define KVM_ARCH_WANT_MMU_NOTIFIER 795#define KVM_ARCH_WANT_MMU_NOTIFIER
792int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 796int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
793int kvm_age_hva(struct kvm *kvm, unsigned long hva); 797int kvm_age_hva(struct kvm *kvm, unsigned long hva);
798int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
794 799
795#endif /* _ASM_X86_KVM_HOST_H */ 800#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 6a159732881a..b7ed2c423116 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -143,6 +143,9 @@ struct decode_cache {
143 struct fetch_cache fetch; 143 struct fetch_cache fetch;
144}; 144};
145 145
146#define X86_SHADOW_INT_MOV_SS 1
147#define X86_SHADOW_INT_STI 2
148
146struct x86_emulate_ctxt { 149struct x86_emulate_ctxt {
147 /* Register state before/after emulation. */ 150 /* Register state before/after emulation. */
148 struct kvm_vcpu *vcpu; 151 struct kvm_vcpu *vcpu;
@@ -152,6 +155,9 @@ struct x86_emulate_ctxt {
152 int mode; 155 int mode;
153 u32 cs_base; 156 u32 cs_base;
154 157
158 /* interruptibility state, as a result of execution of STI or MOV SS */
159 int interruptibility;
160
155 /* decode cache */ 161 /* decode cache */
156 struct decode_cache decode; 162 struct decode_cache decode;
157}; 163};
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 82ada75f3ebf..85574b7c1bc1 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb {
225#define SVM_EVTINJ_VALID_ERR (1 << 11) 225#define SVM_EVTINJ_VALID_ERR (1 << 11)
226 226
227#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK 227#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
228#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
228 229
229#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR 230#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
230#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI 231#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h
index f72956331c49..c4ee8056baca 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/asm/termios.h
@@ -67,6 +67,7 @@ static inline int user_termio_to_kernel_termios(struct ktermios *termios,
67 SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); 67 SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
68 SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); 68 SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
69 SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); 69 SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
70 get_user(termios->c_line, &termio->c_line);
70 return copy_from_user(termios->c_cc, termio->c_cc, NCC); 71 return copy_from_user(termios->c_cc, termio->c_cc, NCC);
71} 72}
72 73
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 498f944010b9..11be5ad2e0e9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -247,6 +247,7 @@ enum vmcs_field {
247#define EXIT_REASON_MSR_READ 31 247#define EXIT_REASON_MSR_READ 31
248#define EXIT_REASON_MSR_WRITE 32 248#define EXIT_REASON_MSR_WRITE 32
249#define EXIT_REASON_MWAIT_INSTRUCTION 36 249#define EXIT_REASON_MWAIT_INSTRUCTION 36
250#define EXIT_REASON_MCE_DURING_VMENTRY 41
250#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 251#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
251#define EXIT_REASON_APIC_ACCESS 44 252#define EXIT_REASON_APIC_ACCESS 44
252#define EXIT_REASON_EPT_VIOLATION 48 253#define EXIT_REASON_EPT_VIOLATION 48
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 09dd1d414fc3..289cc4815028 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -420,6 +420,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
420 out2: 420 out2:
421 atomic_dec(&mce_entry); 421 atomic_dec(&mce_entry);
422} 422}
423EXPORT_SYMBOL_GPL(do_machine_check);
423 424
424#ifdef CONFIG_X86_MCE_INTEL 425#ifdef CONFIG_X86_MCE_INTEL
425/*** 426/***
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 6551dedee20c..a78ecad0c900 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -27,6 +27,7 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/hardirq.h> 29#include <linux/hardirq.h>
30#include <asm/timer.h>
30 31
31#define MMU_QUEUE_SIZE 1024 32#define MMU_QUEUE_SIZE 1024
32 33
@@ -230,6 +231,9 @@ static void paravirt_ops_setup(void)
230 pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; 231 pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
231 pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; 232 pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
232 } 233 }
234#ifdef CONFIG_X86_IO_APIC
235 no_timer_check = 1;
236#endif
233} 237}
234 238
235void __init kvm_guest_init(void) 239void __init kvm_guest_init(void)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index f6db48c405b8..28f5fb495a66 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
172{ 172{
173 ack_APIC_irq(); 173 ack_APIC_irq();
174 inc_irq_stat(irq_resched_count); 174 inc_irq_stat(irq_resched_count);
175 /*
176 * KVM uses this interrupt to force a cpu out of guest mode
177 */
175} 178}
176 179
177void smp_call_function_interrupt(struct pt_regs *regs) 180void smp_call_function_interrupt(struct pt_regs *regs)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a58504ea78cc..8600a09e0c6c 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -50,6 +50,9 @@ config KVM_INTEL
50 Provides support for KVM on Intel processors equipped with the VT 50 Provides support for KVM on Intel processors equipped with the VT
51 extensions. 51 extensions.
52 52
53 To compile this as a module, choose M here: the module
54 will be called kvm-intel.
55
53config KVM_AMD 56config KVM_AMD
54 tristate "KVM for AMD processors support" 57 tristate "KVM for AMD processors support"
55 depends on KVM 58 depends on KVM
@@ -57,6 +60,9 @@ config KVM_AMD
57 Provides support for KVM on AMD processors equipped with the AMD-V 60 Provides support for KVM on AMD processors equipped with the AMD-V
58 (SVM) extensions. 61 (SVM) extensions.
59 62
63 To compile this as a module, choose M here: the module
64 will be called kvm-amd.
65
60config KVM_TRACE 66config KVM_TRACE
61 bool "KVM trace support" 67 bool "KVM trace support"
62 depends on KVM && SYSFS 68 depends on KVM && SYSFS
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d3ec292f00f2..b43c4efafe80 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,7 +14,7 @@ endif
14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm 14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
15 15
16kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ 16kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
17 i8254.o 17 i8254.o timer.o
18obj-$(CONFIG_KVM) += kvm.o 18obj-$(CONFIG_KVM) += kvm.o
19kvm-intel-objs = vmx.o 19kvm-intel-objs = vmx.o
20obj-$(CONFIG_KVM_INTEL) += kvm-intel.o 20obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index c13bb92d3157..4d6f0d293ee2 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -98,6 +98,37 @@ static int pit_get_gate(struct kvm *kvm, int channel)
98 return kvm->arch.vpit->pit_state.channels[channel].gate; 98 return kvm->arch.vpit->pit_state.channels[channel].gate;
99} 99}
100 100
101static s64 __kpit_elapsed(struct kvm *kvm)
102{
103 s64 elapsed;
104 ktime_t remaining;
105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
106
107 /*
108 * The Counter does not stop when it reaches zero. In
109 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
110 * the highest count, either FFFF hex for binary counting
111 * or 9999 for BCD counting, and continues counting.
112 * Modes 2 and 3 are periodic; the Counter reloads
113 * itself with the initial count and continues counting
114 * from there.
115 */
116 remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
117 elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
118 elapsed = mod_64(elapsed, ps->pit_timer.period);
119
120 return elapsed;
121}
122
123static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c,
124 int channel)
125{
126 if (channel == 0)
127 return __kpit_elapsed(kvm);
128
129 return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
130}
131
101static int pit_get_count(struct kvm *kvm, int channel) 132static int pit_get_count(struct kvm *kvm, int channel)
102{ 133{
103 struct kvm_kpit_channel_state *c = 134 struct kvm_kpit_channel_state *c =
@@ -107,7 +138,7 @@ static int pit_get_count(struct kvm *kvm, int channel)
107 138
108 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); 139 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
109 140
110 t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); 141 t = kpit_elapsed(kvm, c, channel);
111 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); 142 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
112 143
113 switch (c->mode) { 144 switch (c->mode) {
@@ -137,7 +168,7 @@ static int pit_get_out(struct kvm *kvm, int channel)
137 168
138 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); 169 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
139 170
140 t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); 171 t = kpit_elapsed(kvm, c, channel);
141 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); 172 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
142 173
143 switch (c->mode) { 174 switch (c->mode) {
@@ -193,28 +224,6 @@ static void pit_latch_status(struct kvm *kvm, int channel)
193 } 224 }
194} 225}
195 226
196static int __pit_timer_fn(struct kvm_kpit_state *ps)
197{
198 struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
199 struct kvm_kpit_timer *pt = &ps->pit_timer;
200
201 if (!atomic_inc_and_test(&pt->pending))
202 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
203
204 if (!pt->reinject)
205 atomic_set(&pt->pending, 1);
206
207 if (vcpu0 && waitqueue_active(&vcpu0->wq))
208 wake_up_interruptible(&vcpu0->wq);
209
210 hrtimer_add_expires_ns(&pt->timer, pt->period);
211 pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
212 if (pt->period)
213 ps->channels[0].count_load_time = ktime_get();
214
215 return (pt->period == 0 ? 0 : 1);
216}
217
218int pit_has_pending_timer(struct kvm_vcpu *vcpu) 227int pit_has_pending_timer(struct kvm_vcpu *vcpu)
219{ 228{
220 struct kvm_pit *pit = vcpu->kvm->arch.vpit; 229 struct kvm_pit *pit = vcpu->kvm->arch.vpit;
@@ -235,21 +244,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
235 spin_unlock(&ps->inject_lock); 244 spin_unlock(&ps->inject_lock);
236} 245}
237 246
238static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
239{
240 struct kvm_kpit_state *ps;
241 int restart_timer = 0;
242
243 ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
244
245 restart_timer = __pit_timer_fn(ps);
246
247 if (restart_timer)
248 return HRTIMER_RESTART;
249 else
250 return HRTIMER_NORESTART;
251}
252
253void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) 247void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
254{ 248{
255 struct kvm_pit *pit = vcpu->kvm->arch.vpit; 249 struct kvm_pit *pit = vcpu->kvm->arch.vpit;
@@ -263,15 +257,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
263 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 257 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
264} 258}
265 259
266static void destroy_pit_timer(struct kvm_kpit_timer *pt) 260static void destroy_pit_timer(struct kvm_timer *pt)
267{ 261{
268 pr_debug("pit: execute del timer!\n"); 262 pr_debug("pit: execute del timer!\n");
269 hrtimer_cancel(&pt->timer); 263 hrtimer_cancel(&pt->timer);
270} 264}
271 265
266static bool kpit_is_periodic(struct kvm_timer *ktimer)
267{
268 struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
269 pit_timer);
270 return ps->is_periodic;
271}
272
273static struct kvm_timer_ops kpit_ops = {
274 .is_periodic = kpit_is_periodic,
275};
276
272static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) 277static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
273{ 278{
274 struct kvm_kpit_timer *pt = &ps->pit_timer; 279 struct kvm_timer *pt = &ps->pit_timer;
275 s64 interval; 280 s64 interval;
276 281
277 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); 282 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@ -280,8 +285,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
280 285
281 /* TODO The new value only affected after the retriggered */ 286 /* TODO The new value only affected after the retriggered */
282 hrtimer_cancel(&pt->timer); 287 hrtimer_cancel(&pt->timer);
283 pt->period = (is_period == 0) ? 0 : interval; 288 pt->period = interval;
284 pt->timer.function = pit_timer_fn; 289 ps->is_periodic = is_period;
290
291 pt->timer.function = kvm_timer_fn;
292 pt->t_ops = &kpit_ops;
293 pt->kvm = ps->pit->kvm;
294 pt->vcpu_id = 0;
295
285 atomic_set(&pt->pending, 0); 296 atomic_set(&pt->pending, 0);
286 ps->irq_ack = 1; 297 ps->irq_ack = 1;
287 298
@@ -298,23 +309,23 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
298 pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); 309 pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
299 310
300 /* 311 /*
301 * Though spec said the state of 8254 is undefined after power-up, 312 * The largest possible initial count is 0; this is equivalent
302 * seems some tricky OS like Windows XP depends on IRQ0 interrupt 313 * to 216 for binary counting and 104 for BCD counting.
303 * when booting up.
304 * So here setting initialize rate for it, and not a specific number
305 */ 314 */
306 if (val == 0) 315 if (val == 0)
307 val = 0x10000; 316 val = 0x10000;
308 317
309 ps->channels[channel].count_load_time = ktime_get();
310 ps->channels[channel].count = val; 318 ps->channels[channel].count = val;
311 319
312 if (channel != 0) 320 if (channel != 0) {
321 ps->channels[channel].count_load_time = ktime_get();
313 return; 322 return;
323 }
314 324
315 /* Two types of timer 325 /* Two types of timer
316 * mode 1 is one shot, mode 2 is period, otherwise del timer */ 326 * mode 1 is one shot, mode 2 is period, otherwise del timer */
317 switch (ps->channels[0].mode) { 327 switch (ps->channels[0].mode) {
328 case 0:
318 case 1: 329 case 1:
319 /* FIXME: enhance mode 4 precision */ 330 /* FIXME: enhance mode 4 precision */
320 case 4: 331 case 4:
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 6acbe4b505d5..bbd863ff60b7 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -3,15 +3,6 @@
3 3
4#include "iodev.h" 4#include "iodev.h"
5 5
6struct kvm_kpit_timer {
7 struct hrtimer timer;
8 int irq;
9 s64 period; /* unit: ns */
10 s64 scheduled;
11 atomic_t pending;
12 bool reinject;
13};
14
15struct kvm_kpit_channel_state { 6struct kvm_kpit_channel_state {
16 u32 count; /* can be 65536 */ 7 u32 count; /* can be 65536 */
17 u16 latched_count; 8 u16 latched_count;
@@ -30,7 +21,8 @@ struct kvm_kpit_channel_state {
30 21
31struct kvm_kpit_state { 22struct kvm_kpit_state {
32 struct kvm_kpit_channel_state channels[3]; 23 struct kvm_kpit_channel_state channels[3];
33 struct kvm_kpit_timer pit_timer; 24 struct kvm_timer pit_timer;
25 bool is_periodic;
34 u32 speaker_data_on; 26 u32 speaker_data_on;
35 struct mutex lock; 27 struct mutex lock;
36 struct kvm_pit *pit; 28 struct kvm_pit *pit;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index cf17ed52f6fb..96dfbb6ad2a9 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -24,6 +24,7 @@
24 24
25#include "irq.h" 25#include "irq.h"
26#include "i8254.h" 26#include "i8254.h"
27#include "x86.h"
27 28
28/* 29/*
29 * check if there are pending timer events 30 * check if there are pending timer events
@@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
48{ 49{
49 struct kvm_pic *s; 50 struct kvm_pic *s;
50 51
52 if (!irqchip_in_kernel(v->kvm))
53 return v->arch.interrupt.pending;
54
51 if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ 55 if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
52 if (kvm_apic_accept_pic_intr(v)) { 56 if (kvm_apic_accept_pic_intr(v)) {
53 s = pic_irqchip(v->kvm); /* PIC */ 57 s = pic_irqchip(v->kvm); /* PIC */
@@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
67 struct kvm_pic *s; 71 struct kvm_pic *s;
68 int vector; 72 int vector;
69 73
74 if (!irqchip_in_kernel(v->kvm))
75 return v->arch.interrupt.nr;
76
70 vector = kvm_get_apic_interrupt(v); /* APIC */ 77 vector = kvm_get_apic_interrupt(v); /* APIC */
71 if (vector == -1) { 78 if (vector == -1) {
72 if (kvm_apic_accept_pic_intr(v)) { 79 if (kvm_apic_accept_pic_intr(v)) {
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h
new file mode 100644
index 000000000000..26bd6ba74e1c
--- /dev/null
+++ b/arch/x86/kvm/kvm_timer.h
@@ -0,0 +1,18 @@
1
2struct kvm_timer {
3 struct hrtimer timer;
4 s64 period; /* unit: ns */
5 atomic_t pending; /* accumulated triggered timers */
6 bool reinject;
7 struct kvm_timer_ops *t_ops;
8 struct kvm *kvm;
9 int vcpu_id;
10};
11
12struct kvm_timer_ops {
13 bool (*is_periodic)(struct kvm_timer *);
14};
15
16
17enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
18
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f0b67f2cdd69..ae99d83f81a3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -196,20 +196,15 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
196} 196}
197EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); 197EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
198 198
199int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) 199static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
200 int vector, int level, int trig_mode);
201
202int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
200{ 203{
201 struct kvm_lapic *apic = vcpu->arch.apic; 204 struct kvm_lapic *apic = vcpu->arch.apic;
202 205
203 if (!apic_test_and_set_irr(vec, apic)) { 206 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
204 /* a new pending irq is set in IRR */ 207 irq->level, irq->trig_mode);
205 if (trig)
206 apic_set_vector(vec, apic->regs + APIC_TMR);
207 else
208 apic_clear_vector(vec, apic->regs + APIC_TMR);
209 kvm_vcpu_kick(apic->vcpu);
210 return 1;
211 }
212 return 0;
213} 208}
214 209
215static inline int apic_find_highest_isr(struct kvm_lapic *apic) 210static inline int apic_find_highest_isr(struct kvm_lapic *apic)
@@ -250,7 +245,7 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
250 245
251int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 246int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
252{ 247{
253 return kvm_apic_id(apic) == dest; 248 return dest == 0xff || kvm_apic_id(apic) == dest;
254} 249}
255 250
256int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 251int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
@@ -279,37 +274,34 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
279 return result; 274 return result;
280} 275}
281 276
282static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 277int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
283 int short_hand, int dest, int dest_mode) 278 int short_hand, int dest, int dest_mode)
284{ 279{
285 int result = 0; 280 int result = 0;
286 struct kvm_lapic *target = vcpu->arch.apic; 281 struct kvm_lapic *target = vcpu->arch.apic;
287 282
288 apic_debug("target %p, source %p, dest 0x%x, " 283 apic_debug("target %p, source %p, dest 0x%x, "
289 "dest_mode 0x%x, short_hand 0x%x", 284 "dest_mode 0x%x, short_hand 0x%x\n",
290 target, source, dest, dest_mode, short_hand); 285 target, source, dest, dest_mode, short_hand);
291 286
292 ASSERT(!target); 287 ASSERT(!target);
293 switch (short_hand) { 288 switch (short_hand) {
294 case APIC_DEST_NOSHORT: 289 case APIC_DEST_NOSHORT:
295 if (dest_mode == 0) { 290 if (dest_mode == 0)
296 /* Physical mode. */ 291 /* Physical mode. */
297 if ((dest == 0xFF) || (dest == kvm_apic_id(target))) 292 result = kvm_apic_match_physical_addr(target, dest);
298 result = 1; 293 else
299 } else
300 /* Logical mode. */ 294 /* Logical mode. */
301 result = kvm_apic_match_logical_addr(target, dest); 295 result = kvm_apic_match_logical_addr(target, dest);
302 break; 296 break;
303 case APIC_DEST_SELF: 297 case APIC_DEST_SELF:
304 if (target == source) 298 result = (target == source);
305 result = 1;
306 break; 299 break;
307 case APIC_DEST_ALLINC: 300 case APIC_DEST_ALLINC:
308 result = 1; 301 result = 1;
309 break; 302 break;
310 case APIC_DEST_ALLBUT: 303 case APIC_DEST_ALLBUT:
311 if (target != source) 304 result = (target != source);
312 result = 1;
313 break; 305 break;
314 default: 306 default:
315 printk(KERN_WARNING "Bad dest shorthand value %x\n", 307 printk(KERN_WARNING "Bad dest shorthand value %x\n",
@@ -327,20 +319,22 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
327static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 319static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
328 int vector, int level, int trig_mode) 320 int vector, int level, int trig_mode)
329{ 321{
330 int orig_irr, result = 0; 322 int result = 0;
331 struct kvm_vcpu *vcpu = apic->vcpu; 323 struct kvm_vcpu *vcpu = apic->vcpu;
332 324
333 switch (delivery_mode) { 325 switch (delivery_mode) {
334 case APIC_DM_FIXED:
335 case APIC_DM_LOWEST: 326 case APIC_DM_LOWEST:
327 vcpu->arch.apic_arb_prio++;
328 case APIC_DM_FIXED:
336 /* FIXME add logic for vcpu on reset */ 329 /* FIXME add logic for vcpu on reset */
337 if (unlikely(!apic_enabled(apic))) 330 if (unlikely(!apic_enabled(apic)))
338 break; 331 break;
339 332
340 orig_irr = apic_test_and_set_irr(vector, apic); 333 result = !apic_test_and_set_irr(vector, apic);
341 if (orig_irr && trig_mode) { 334 if (!result) {
342 apic_debug("level trig mode repeatedly for vector %d", 335 if (trig_mode)
343 vector); 336 apic_debug("level trig mode repeatedly for "
337 "vector %d", vector);
344 break; 338 break;
345 } 339 }
346 340
@@ -349,10 +343,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
349 apic_set_vector(vector, apic->regs + APIC_TMR); 343 apic_set_vector(vector, apic->regs + APIC_TMR);
350 } else 344 } else
351 apic_clear_vector(vector, apic->regs + APIC_TMR); 345 apic_clear_vector(vector, apic->regs + APIC_TMR);
352
353 kvm_vcpu_kick(vcpu); 346 kvm_vcpu_kick(vcpu);
354
355 result = (orig_irr == 0);
356 break; 347 break;
357 348
358 case APIC_DM_REMRD: 349 case APIC_DM_REMRD:
@@ -364,12 +355,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
364 break; 355 break;
365 356
366 case APIC_DM_NMI: 357 case APIC_DM_NMI:
358 result = 1;
367 kvm_inject_nmi(vcpu); 359 kvm_inject_nmi(vcpu);
368 kvm_vcpu_kick(vcpu); 360 kvm_vcpu_kick(vcpu);
369 break; 361 break;
370 362
371 case APIC_DM_INIT: 363 case APIC_DM_INIT:
372 if (level) { 364 if (level) {
365 result = 1;
373 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 366 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
374 printk(KERN_DEBUG 367 printk(KERN_DEBUG
375 "INIT on a runnable vcpu %d\n", 368 "INIT on a runnable vcpu %d\n",
@@ -386,6 +379,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
386 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 379 apic_debug("SIPI to vcpu %d vector 0x%02x\n",
387 vcpu->vcpu_id, vector); 380 vcpu->vcpu_id, vector);
388 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 381 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
382 result = 1;
389 vcpu->arch.sipi_vector = vector; 383 vcpu->arch.sipi_vector = vector;
390 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; 384 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
391 kvm_vcpu_kick(vcpu); 385 kvm_vcpu_kick(vcpu);
@@ -408,43 +402,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
408 return result; 402 return result;
409} 403}
410 404
411static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, 405int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
412 unsigned long bitmap)
413{
414 int last;
415 int next;
416 struct kvm_lapic *apic = NULL;
417
418 last = kvm->arch.round_robin_prev_vcpu;
419 next = last;
420
421 do {
422 if (++next == KVM_MAX_VCPUS)
423 next = 0;
424 if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
425 continue;
426 apic = kvm->vcpus[next]->arch.apic;
427 if (apic && apic_enabled(apic))
428 break;
429 apic = NULL;
430 } while (next != last);
431 kvm->arch.round_robin_prev_vcpu = next;
432
433 if (!apic)
434 printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
435
436 return apic;
437}
438
439struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
440 unsigned long bitmap)
441{ 406{
442 struct kvm_lapic *apic; 407 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
443
444 apic = kvm_apic_round_robin(kvm, vector, bitmap);
445 if (apic)
446 return apic->vcpu;
447 return NULL;
448} 408}
449 409
450static void apic_set_eoi(struct kvm_lapic *apic) 410static void apic_set_eoi(struct kvm_lapic *apic)
@@ -472,47 +432,24 @@ static void apic_send_ipi(struct kvm_lapic *apic)
472{ 432{
473 u32 icr_low = apic_get_reg(apic, APIC_ICR); 433 u32 icr_low = apic_get_reg(apic, APIC_ICR);
474 u32 icr_high = apic_get_reg(apic, APIC_ICR2); 434 u32 icr_high = apic_get_reg(apic, APIC_ICR2);
435 struct kvm_lapic_irq irq;
475 436
476 unsigned int dest = GET_APIC_DEST_FIELD(icr_high); 437 irq.vector = icr_low & APIC_VECTOR_MASK;
477 unsigned int short_hand = icr_low & APIC_SHORT_MASK; 438 irq.delivery_mode = icr_low & APIC_MODE_MASK;
478 unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; 439 irq.dest_mode = icr_low & APIC_DEST_MASK;
479 unsigned int level = icr_low & APIC_INT_ASSERT; 440 irq.level = icr_low & APIC_INT_ASSERT;
480 unsigned int dest_mode = icr_low & APIC_DEST_MASK; 441 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
481 unsigned int delivery_mode = icr_low & APIC_MODE_MASK; 442 irq.shorthand = icr_low & APIC_SHORT_MASK;
482 unsigned int vector = icr_low & APIC_VECTOR_MASK; 443 irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
483
484 struct kvm_vcpu *target;
485 struct kvm_vcpu *vcpu;
486 unsigned long lpr_map = 0;
487 int i;
488 444
489 apic_debug("icr_high 0x%x, icr_low 0x%x, " 445 apic_debug("icr_high 0x%x, icr_low 0x%x, "
490 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 446 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
491 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 447 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
492 icr_high, icr_low, short_hand, dest, 448 icr_high, icr_low, irq.shorthand, irq.dest_id,
493 trig_mode, level, dest_mode, delivery_mode, vector); 449 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
494 450 irq.vector);
495 for (i = 0; i < KVM_MAX_VCPUS; i++) {
496 vcpu = apic->vcpu->kvm->vcpus[i];
497 if (!vcpu)
498 continue;
499
500 if (vcpu->arch.apic &&
501 apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
502 if (delivery_mode == APIC_DM_LOWEST)
503 set_bit(vcpu->vcpu_id, &lpr_map);
504 else
505 __apic_accept_irq(vcpu->arch.apic, delivery_mode,
506 vector, level, trig_mode);
507 }
508 }
509 451
510 if (delivery_mode == APIC_DM_LOWEST) { 452 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
511 target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
512 if (target != NULL)
513 __apic_accept_irq(target->arch.apic, delivery_mode,
514 vector, level, trig_mode);
515 }
516} 453}
517 454
518static u32 apic_get_tmcct(struct kvm_lapic *apic) 455static u32 apic_get_tmcct(struct kvm_lapic *apic)
@@ -527,12 +464,13 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
527 if (apic_get_reg(apic, APIC_TMICT) == 0) 464 if (apic_get_reg(apic, APIC_TMICT) == 0)
528 return 0; 465 return 0;
529 466
530 remaining = hrtimer_expires_remaining(&apic->timer.dev); 467 remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer);
531 if (ktime_to_ns(remaining) < 0) 468 if (ktime_to_ns(remaining) < 0)
532 remaining = ktime_set(0, 0); 469 remaining = ktime_set(0, 0);
533 470
534 ns = mod_64(ktime_to_ns(remaining), apic->timer.period); 471 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
535 tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); 472 tmcct = div64_u64(ns,
473 (APIC_BUS_CYCLE_NS * apic->divide_count));
536 474
537 return tmcct; 475 return tmcct;
538} 476}
@@ -619,25 +557,25 @@ static void update_divide_count(struct kvm_lapic *apic)
619 tdcr = apic_get_reg(apic, APIC_TDCR); 557 tdcr = apic_get_reg(apic, APIC_TDCR);
620 tmp1 = tdcr & 0xf; 558 tmp1 = tdcr & 0xf;
621 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 559 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
622 apic->timer.divide_count = 0x1 << (tmp2 & 0x7); 560 apic->divide_count = 0x1 << (tmp2 & 0x7);
623 561
624 apic_debug("timer divide count is 0x%x\n", 562 apic_debug("timer divide count is 0x%x\n",
625 apic->timer.divide_count); 563 apic->divide_count);
626} 564}
627 565
628static void start_apic_timer(struct kvm_lapic *apic) 566static void start_apic_timer(struct kvm_lapic *apic)
629{ 567{
630 ktime_t now = apic->timer.dev.base->get_time(); 568 ktime_t now = apic->lapic_timer.timer.base->get_time();
631 569
632 apic->timer.period = apic_get_reg(apic, APIC_TMICT) * 570 apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) *
633 APIC_BUS_CYCLE_NS * apic->timer.divide_count; 571 APIC_BUS_CYCLE_NS * apic->divide_count;
634 atomic_set(&apic->timer.pending, 0); 572 atomic_set(&apic->lapic_timer.pending, 0);
635 573
636 if (!apic->timer.period) 574 if (!apic->lapic_timer.period)
637 return; 575 return;
638 576
639 hrtimer_start(&apic->timer.dev, 577 hrtimer_start(&apic->lapic_timer.timer,
640 ktime_add_ns(now, apic->timer.period), 578 ktime_add_ns(now, apic->lapic_timer.period),
641 HRTIMER_MODE_ABS); 579 HRTIMER_MODE_ABS);
642 580
643 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 581 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
@@ -646,9 +584,9 @@ static void start_apic_timer(struct kvm_lapic *apic)
646 "expire @ 0x%016" PRIx64 ".\n", __func__, 584 "expire @ 0x%016" PRIx64 ".\n", __func__,
647 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 585 APIC_BUS_CYCLE_NS, ktime_to_ns(now),
648 apic_get_reg(apic, APIC_TMICT), 586 apic_get_reg(apic, APIC_TMICT),
649 apic->timer.period, 587 apic->lapic_timer.period,
650 ktime_to_ns(ktime_add_ns(now, 588 ktime_to_ns(ktime_add_ns(now,
651 apic->timer.period))); 589 apic->lapic_timer.period)));
652} 590}
653 591
654static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 592static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
@@ -730,7 +668,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
730 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 668 apic_set_reg(apic, APIC_LVTT + 0x10 * i,
731 lvt_val | APIC_LVT_MASKED); 669 lvt_val | APIC_LVT_MASKED);
732 } 670 }
733 atomic_set(&apic->timer.pending, 0); 671 atomic_set(&apic->lapic_timer.pending, 0);
734 672
735 } 673 }
736 break; 674 break;
@@ -762,7 +700,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
762 break; 700 break;
763 701
764 case APIC_TMICT: 702 case APIC_TMICT:
765 hrtimer_cancel(&apic->timer.dev); 703 hrtimer_cancel(&apic->lapic_timer.timer);
766 apic_set_reg(apic, APIC_TMICT, val); 704 apic_set_reg(apic, APIC_TMICT, val);
767 start_apic_timer(apic); 705 start_apic_timer(apic);
768 return; 706 return;
@@ -802,7 +740,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
802 if (!vcpu->arch.apic) 740 if (!vcpu->arch.apic)
803 return; 741 return;
804 742
805 hrtimer_cancel(&vcpu->arch.apic->timer.dev); 743 hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
806 744
807 if (vcpu->arch.apic->regs_page) 745 if (vcpu->arch.apic->regs_page)
808 __free_page(vcpu->arch.apic->regs_page); 746 __free_page(vcpu->arch.apic->regs_page);
@@ -880,7 +818,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
880 ASSERT(apic != NULL); 818 ASSERT(apic != NULL);
881 819
882 /* Stop the timer in case it's a reset to an active apic */ 820 /* Stop the timer in case it's a reset to an active apic */
883 hrtimer_cancel(&apic->timer.dev); 821 hrtimer_cancel(&apic->lapic_timer.timer);
884 822
885 apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); 823 apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
886 apic_set_reg(apic, APIC_LVR, APIC_VERSION); 824 apic_set_reg(apic, APIC_LVR, APIC_VERSION);
@@ -905,11 +843,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
905 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 843 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
906 } 844 }
907 update_divide_count(apic); 845 update_divide_count(apic);
908 atomic_set(&apic->timer.pending, 0); 846 atomic_set(&apic->lapic_timer.pending, 0);
909 if (vcpu->vcpu_id == 0) 847 if (vcpu->vcpu_id == 0)
910 vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; 848 vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
911 apic_update_ppr(apic); 849 apic_update_ppr(apic);
912 850
851 vcpu->arch.apic_arb_prio = 0;
852
913 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 853 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
914 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 854 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
915 vcpu, kvm_apic_id(apic), 855 vcpu, kvm_apic_id(apic),
@@ -917,16 +857,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
917} 857}
918EXPORT_SYMBOL_GPL(kvm_lapic_reset); 858EXPORT_SYMBOL_GPL(kvm_lapic_reset);
919 859
920int kvm_lapic_enabled(struct kvm_vcpu *vcpu) 860bool kvm_apic_present(struct kvm_vcpu *vcpu)
921{ 861{
922 struct kvm_lapic *apic = vcpu->arch.apic; 862 return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
923 int ret = 0; 863}
924
925 if (!apic)
926 return 0;
927 ret = apic_enabled(apic);
928 864
929 return ret; 865int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
866{
867 return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
930} 868}
931EXPORT_SYMBOL_GPL(kvm_lapic_enabled); 869EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
932 870
@@ -936,22 +874,11 @@ EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
936 *---------------------------------------------------------------------- 874 *----------------------------------------------------------------------
937 */ 875 */
938 876
939/* TODO: make sure __apic_timer_fn runs in current pCPU */ 877static bool lapic_is_periodic(struct kvm_timer *ktimer)
940static int __apic_timer_fn(struct kvm_lapic *apic)
941{ 878{
942 int result = 0; 879 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
943 wait_queue_head_t *q = &apic->vcpu->wq; 880 lapic_timer);
944 881 return apic_lvtt_period(apic);
945 if(!atomic_inc_and_test(&apic->timer.pending))
946 set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
947 if (waitqueue_active(q))
948 wake_up_interruptible(q);
949
950 if (apic_lvtt_period(apic)) {
951 result = 1;
952 hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period);
953 }
954 return result;
955} 882}
956 883
957int apic_has_pending_timer(struct kvm_vcpu *vcpu) 884int apic_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -959,7 +886,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
959 struct kvm_lapic *lapic = vcpu->arch.apic; 886 struct kvm_lapic *lapic = vcpu->arch.apic;
960 887
961 if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) 888 if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
962 return atomic_read(&lapic->timer.pending); 889 return atomic_read(&lapic->lapic_timer.pending);
963 890
964 return 0; 891 return 0;
965} 892}
@@ -986,20 +913,9 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
986 kvm_apic_local_deliver(apic, APIC_LVT0); 913 kvm_apic_local_deliver(apic, APIC_LVT0);
987} 914}
988 915
989static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 916static struct kvm_timer_ops lapic_timer_ops = {
990{ 917 .is_periodic = lapic_is_periodic,
991 struct kvm_lapic *apic; 918};
992 int restart_timer = 0;
993
994 apic = container_of(data, struct kvm_lapic, timer.dev);
995
996 restart_timer = __apic_timer_fn(apic);
997
998 if (restart_timer)
999 return HRTIMER_RESTART;
1000 else
1001 return HRTIMER_NORESTART;
1002}
1003 919
1004int kvm_create_lapic(struct kvm_vcpu *vcpu) 920int kvm_create_lapic(struct kvm_vcpu *vcpu)
1005{ 921{
@@ -1024,8 +940,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
1024 memset(apic->regs, 0, PAGE_SIZE); 940 memset(apic->regs, 0, PAGE_SIZE);
1025 apic->vcpu = vcpu; 941 apic->vcpu = vcpu;
1026 942
1027 hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 943 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
1028 apic->timer.dev.function = apic_timer_fn; 944 HRTIMER_MODE_ABS);
945 apic->lapic_timer.timer.function = kvm_timer_fn;
946 apic->lapic_timer.t_ops = &lapic_timer_ops;
947 apic->lapic_timer.kvm = vcpu->kvm;
948 apic->lapic_timer.vcpu_id = vcpu->vcpu_id;
949
1029 apic->base_address = APIC_DEFAULT_PHYS_BASE; 950 apic->base_address = APIC_DEFAULT_PHYS_BASE;
1030 vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; 951 vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
1031 952
@@ -1078,9 +999,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1078{ 999{
1079 struct kvm_lapic *apic = vcpu->arch.apic; 1000 struct kvm_lapic *apic = vcpu->arch.apic;
1080 1001
1081 if (apic && atomic_read(&apic->timer.pending) > 0) { 1002 if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
1082 if (kvm_apic_local_deliver(apic, APIC_LVTT)) 1003 if (kvm_apic_local_deliver(apic, APIC_LVTT))
1083 atomic_dec(&apic->timer.pending); 1004 atomic_dec(&apic->lapic_timer.pending);
1084 } 1005 }
1085} 1006}
1086 1007
@@ -1106,7 +1027,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
1106 MSR_IA32_APICBASE_BASE; 1027 MSR_IA32_APICBASE_BASE;
1107 apic_set_reg(apic, APIC_LVR, APIC_VERSION); 1028 apic_set_reg(apic, APIC_LVR, APIC_VERSION);
1108 apic_update_ppr(apic); 1029 apic_update_ppr(apic);
1109 hrtimer_cancel(&apic->timer.dev); 1030 hrtimer_cancel(&apic->lapic_timer.timer);
1110 update_divide_count(apic); 1031 update_divide_count(apic);
1111 start_apic_timer(apic); 1032 start_apic_timer(apic);
1112} 1033}
@@ -1119,7 +1040,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
1119 if (!apic) 1040 if (!apic)
1120 return; 1041 return;
1121 1042
1122 timer = &apic->timer.dev; 1043 timer = &apic->lapic_timer.timer;
1123 if (hrtimer_cancel(timer)) 1044 if (hrtimer_cancel(timer))
1124 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1045 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
1125} 1046}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 45ab6ee71209..a587f8349c46 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -2,18 +2,15 @@
2#define __KVM_X86_LAPIC_H 2#define __KVM_X86_LAPIC_H
3 3
4#include "iodev.h" 4#include "iodev.h"
5#include "kvm_timer.h"
5 6
6#include <linux/kvm_host.h> 7#include <linux/kvm_host.h>
7 8
8struct kvm_lapic { 9struct kvm_lapic {
9 unsigned long base_address; 10 unsigned long base_address;
10 struct kvm_io_device dev; 11 struct kvm_io_device dev;
11 struct { 12 struct kvm_timer lapic_timer;
12 atomic_t pending; 13 u32 divide_count;
13 s64 period; /* unit: ns */
14 u32 divide_count;
15 struct hrtimer dev;
16 } timer;
17 struct kvm_vcpu *vcpu; 14 struct kvm_vcpu *vcpu;
18 struct page *regs_page; 15 struct page *regs_page;
19 void *regs; 16 void *regs;
@@ -34,12 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
34 31
35int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); 32int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
36int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); 33int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
37int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); 34int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
38 35
39u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); 36u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
40void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); 37void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
41void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); 38void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
42int kvm_lapic_enabled(struct kvm_vcpu *vcpu); 39int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
40bool kvm_apic_present(struct kvm_vcpu *vcpu);
43int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); 41int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
44 42
45void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); 43void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 32cf11e5728a..5c3d6e81a7dc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
126#define PFERR_PRESENT_MASK (1U << 0) 126#define PFERR_PRESENT_MASK (1U << 0)
127#define PFERR_WRITE_MASK (1U << 1) 127#define PFERR_WRITE_MASK (1U << 1)
128#define PFERR_USER_MASK (1U << 2) 128#define PFERR_USER_MASK (1U << 2)
129#define PFERR_RSVD_MASK (1U << 3)
129#define PFERR_FETCH_MASK (1U << 4) 130#define PFERR_FETCH_MASK (1U << 4)
130 131
131#define PT_DIRECTORY_LEVEL 2 132#define PT_DIRECTORY_LEVEL 2
@@ -177,7 +178,11 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
177static u64 __read_mostly shadow_user_mask; 178static u64 __read_mostly shadow_user_mask;
178static u64 __read_mostly shadow_accessed_mask; 179static u64 __read_mostly shadow_accessed_mask;
179static u64 __read_mostly shadow_dirty_mask; 180static u64 __read_mostly shadow_dirty_mask;
180static u64 __read_mostly shadow_mt_mask; 181
182static inline u64 rsvd_bits(int s, int e)
183{
184 return ((1ULL << (e - s + 1)) - 1) << s;
185}
181 186
182void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) 187void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
183{ 188{
@@ -193,14 +198,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
193EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); 198EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
194 199
195void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 200void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
196 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) 201 u64 dirty_mask, u64 nx_mask, u64 x_mask)
197{ 202{
198 shadow_user_mask = user_mask; 203 shadow_user_mask = user_mask;
199 shadow_accessed_mask = accessed_mask; 204 shadow_accessed_mask = accessed_mask;
200 shadow_dirty_mask = dirty_mask; 205 shadow_dirty_mask = dirty_mask;
201 shadow_nx_mask = nx_mask; 206 shadow_nx_mask = nx_mask;
202 shadow_x_mask = x_mask; 207 shadow_x_mask = x_mask;
203 shadow_mt_mask = mt_mask;
204} 208}
205EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); 209EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
206 210
@@ -219,11 +223,6 @@ static int is_nx(struct kvm_vcpu *vcpu)
219 return vcpu->arch.shadow_efer & EFER_NX; 223 return vcpu->arch.shadow_efer & EFER_NX;
220} 224}
221 225
222static int is_present_pte(unsigned long pte)
223{
224 return pte & PT_PRESENT_MASK;
225}
226
227static int is_shadow_present_pte(u64 pte) 226static int is_shadow_present_pte(u64 pte)
228{ 227{
229 return pte != shadow_trap_nonpresent_pte 228 return pte != shadow_trap_nonpresent_pte
@@ -1074,18 +1073,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1074 return NULL; 1073 return NULL;
1075} 1074}
1076 1075
1077static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
1078{
1079 list_del(&sp->oos_link);
1080 --kvm->stat.mmu_unsync_global;
1081}
1082
1083static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1076static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1084{ 1077{
1085 WARN_ON(!sp->unsync); 1078 WARN_ON(!sp->unsync);
1086 sp->unsync = 0; 1079 sp->unsync = 0;
1087 if (sp->global)
1088 kvm_unlink_unsync_global(kvm, sp);
1089 --kvm->stat.mmu_unsync; 1080 --kvm->stat.mmu_unsync;
1090} 1081}
1091 1082
@@ -1248,7 +1239,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1248 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); 1239 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
1249 sp->gfn = gfn; 1240 sp->gfn = gfn;
1250 sp->role = role; 1241 sp->role = role;
1251 sp->global = 0;
1252 hlist_add_head(&sp->hash_link, bucket); 1242 hlist_add_head(&sp->hash_link, bucket);
1253 if (!direct) { 1243 if (!direct) {
1254 if (rmap_write_protect(vcpu->kvm, gfn)) 1244 if (rmap_write_protect(vcpu->kvm, gfn))
@@ -1616,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
1616 return mtrr_state->def_type; 1606 return mtrr_state->def_type;
1617} 1607}
1618 1608
1619static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) 1609u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
1620{ 1610{
1621 u8 mtrr; 1611 u8 mtrr;
1622 1612
@@ -1626,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
1626 mtrr = MTRR_TYPE_WRBACK; 1616 mtrr = MTRR_TYPE_WRBACK;
1627 return mtrr; 1617 return mtrr;
1628} 1618}
1619EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
1629 1620
1630static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1621static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1631{ 1622{
@@ -1646,11 +1637,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1646 ++vcpu->kvm->stat.mmu_unsync; 1637 ++vcpu->kvm->stat.mmu_unsync;
1647 sp->unsync = 1; 1638 sp->unsync = 1;
1648 1639
1649 if (sp->global) { 1640 kvm_mmu_mark_parents_unsync(vcpu, sp);
1650 list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
1651 ++vcpu->kvm->stat.mmu_unsync_global;
1652 } else
1653 kvm_mmu_mark_parents_unsync(vcpu, sp);
1654 1641
1655 mmu_convert_notrap(sp); 1642 mmu_convert_notrap(sp);
1656 return 0; 1643 return 0;
@@ -1677,21 +1664,11 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
1677static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1664static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1678 unsigned pte_access, int user_fault, 1665 unsigned pte_access, int user_fault,
1679 int write_fault, int dirty, int largepage, 1666 int write_fault, int dirty, int largepage,
1680 int global, gfn_t gfn, pfn_t pfn, bool speculative, 1667 gfn_t gfn, pfn_t pfn, bool speculative,
1681 bool can_unsync) 1668 bool can_unsync)
1682{ 1669{
1683 u64 spte; 1670 u64 spte;
1684 int ret = 0; 1671 int ret = 0;
1685 u64 mt_mask = shadow_mt_mask;
1686 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
1687
1688 if (!global && sp->global) {
1689 sp->global = 0;
1690 if (sp->unsync) {
1691 kvm_unlink_unsync_global(vcpu->kvm, sp);
1692 kvm_mmu_mark_parents_unsync(vcpu, sp);
1693 }
1694 }
1695 1672
1696 /* 1673 /*
1697 * We don't set the accessed bit, since we sometimes want to see 1674 * We don't set the accessed bit, since we sometimes want to see
@@ -1711,16 +1688,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1711 spte |= shadow_user_mask; 1688 spte |= shadow_user_mask;
1712 if (largepage) 1689 if (largepage)
1713 spte |= PT_PAGE_SIZE_MASK; 1690 spte |= PT_PAGE_SIZE_MASK;
1714 if (mt_mask) { 1691 if (tdp_enabled)
1715 if (!kvm_is_mmio_pfn(pfn)) { 1692 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
1716 mt_mask = get_memory_type(vcpu, gfn) << 1693 kvm_is_mmio_pfn(pfn));
1717 kvm_x86_ops->get_mt_mask_shift();
1718 mt_mask |= VMX_EPT_IGMT_BIT;
1719 } else
1720 mt_mask = MTRR_TYPE_UNCACHABLE <<
1721 kvm_x86_ops->get_mt_mask_shift();
1722 spte |= mt_mask;
1723 }
1724 1694
1725 spte |= (u64)pfn << PAGE_SHIFT; 1695 spte |= (u64)pfn << PAGE_SHIFT;
1726 1696
@@ -1765,8 +1735,8 @@ set_pte:
1765static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1735static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1766 unsigned pt_access, unsigned pte_access, 1736 unsigned pt_access, unsigned pte_access,
1767 int user_fault, int write_fault, int dirty, 1737 int user_fault, int write_fault, int dirty,
1768 int *ptwrite, int largepage, int global, 1738 int *ptwrite, int largepage, gfn_t gfn,
1769 gfn_t gfn, pfn_t pfn, bool speculative) 1739 pfn_t pfn, bool speculative)
1770{ 1740{
1771 int was_rmapped = 0; 1741 int was_rmapped = 0;
1772 int was_writeble = is_writeble_pte(*shadow_pte); 1742 int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1795,7 +1765,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1795 was_rmapped = 1; 1765 was_rmapped = 1;
1796 } 1766 }
1797 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1767 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1798 dirty, largepage, global, gfn, pfn, speculative, true)) { 1768 dirty, largepage, gfn, pfn, speculative, true)) {
1799 if (write_fault) 1769 if (write_fault)
1800 *ptwrite = 1; 1770 *ptwrite = 1;
1801 kvm_x86_ops->tlb_flush(vcpu); 1771 kvm_x86_ops->tlb_flush(vcpu);
@@ -1843,7 +1813,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1843 || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { 1813 || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
1844 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 1814 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
1845 0, write, 1, &pt_write, 1815 0, write, 1, &pt_write,
1846 largepage, 0, gfn, pfn, false); 1816 largepage, gfn, pfn, false);
1847 ++vcpu->stat.pf_fixed; 1817 ++vcpu->stat.pf_fixed;
1848 break; 1818 break;
1849 } 1819 }
@@ -1942,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1942 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 1912 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1943} 1913}
1944 1914
1945static void mmu_alloc_roots(struct kvm_vcpu *vcpu) 1915static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
1916{
1917 int ret = 0;
1918
1919 if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
1920 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
1921 ret = 1;
1922 }
1923
1924 return ret;
1925}
1926
1927static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
1946{ 1928{
1947 int i; 1929 int i;
1948 gfn_t root_gfn; 1930 gfn_t root_gfn;
@@ -1957,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1957 ASSERT(!VALID_PAGE(root)); 1939 ASSERT(!VALID_PAGE(root));
1958 if (tdp_enabled) 1940 if (tdp_enabled)
1959 direct = 1; 1941 direct = 1;
1942 if (mmu_check_root(vcpu, root_gfn))
1943 return 1;
1960 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 1944 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
1961 PT64_ROOT_LEVEL, direct, 1945 PT64_ROOT_LEVEL, direct,
1962 ACC_ALL, NULL); 1946 ACC_ALL, NULL);
1963 root = __pa(sp->spt); 1947 root = __pa(sp->spt);
1964 ++sp->root_count; 1948 ++sp->root_count;
1965 vcpu->arch.mmu.root_hpa = root; 1949 vcpu->arch.mmu.root_hpa = root;
1966 return; 1950 return 0;
1967 } 1951 }
1968 direct = !is_paging(vcpu); 1952 direct = !is_paging(vcpu);
1969 if (tdp_enabled) 1953 if (tdp_enabled)
@@ -1980,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1980 root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT; 1964 root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
1981 } else if (vcpu->arch.mmu.root_level == 0) 1965 } else if (vcpu->arch.mmu.root_level == 0)
1982 root_gfn = 0; 1966 root_gfn = 0;
1967 if (mmu_check_root(vcpu, root_gfn))
1968 return 1;
1983 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 1969 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1984 PT32_ROOT_LEVEL, direct, 1970 PT32_ROOT_LEVEL, direct,
1985 ACC_ALL, NULL); 1971 ACC_ALL, NULL);
@@ -1988,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1988 vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; 1974 vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
1989 } 1975 }
1990 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); 1976 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
1977 return 0;
1991} 1978}
1992 1979
1993static void mmu_sync_roots(struct kvm_vcpu *vcpu) 1980static void mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -2006,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
2006 for (i = 0; i < 4; ++i) { 1993 for (i = 0; i < 4; ++i) {
2007 hpa_t root = vcpu->arch.mmu.pae_root[i]; 1994 hpa_t root = vcpu->arch.mmu.pae_root[i];
2008 1995
2009 if (root) { 1996 if (root && VALID_PAGE(root)) {
2010 root &= PT64_BASE_ADDR_MASK; 1997 root &= PT64_BASE_ADDR_MASK;
2011 sp = page_header(root); 1998 sp = page_header(root);
2012 mmu_sync_children(vcpu, sp); 1999 mmu_sync_children(vcpu, sp);
@@ -2014,15 +2001,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
2014 } 2001 }
2015} 2002}
2016 2003
2017static void mmu_sync_global(struct kvm_vcpu *vcpu)
2018{
2019 struct kvm *kvm = vcpu->kvm;
2020 struct kvm_mmu_page *sp, *n;
2021
2022 list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
2023 kvm_sync_page(vcpu, sp);
2024}
2025
2026void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) 2004void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
2027{ 2005{
2028 spin_lock(&vcpu->kvm->mmu_lock); 2006 spin_lock(&vcpu->kvm->mmu_lock);
@@ -2030,13 +2008,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
2030 spin_unlock(&vcpu->kvm->mmu_lock); 2008 spin_unlock(&vcpu->kvm->mmu_lock);
2031} 2009}
2032 2010
2033void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
2034{
2035 spin_lock(&vcpu->kvm->mmu_lock);
2036 mmu_sync_global(vcpu);
2037 spin_unlock(&vcpu->kvm->mmu_lock);
2038}
2039
2040static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) 2011static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
2041{ 2012{
2042 return vaddr; 2013 return vaddr;
@@ -2151,6 +2122,14 @@ static void paging_free(struct kvm_vcpu *vcpu)
2151 nonpaging_free(vcpu); 2122 nonpaging_free(vcpu);
2152} 2123}
2153 2124
2125static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
2126{
2127 int bit7;
2128
2129 bit7 = (gpte >> 7) & 1;
2130 return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
2131}
2132
2154#define PTTYPE 64 2133#define PTTYPE 64
2155#include "paging_tmpl.h" 2134#include "paging_tmpl.h"
2156#undef PTTYPE 2135#undef PTTYPE
@@ -2159,6 +2138,59 @@ static void paging_free(struct kvm_vcpu *vcpu)
2159#include "paging_tmpl.h" 2138#include "paging_tmpl.h"
2160#undef PTTYPE 2139#undef PTTYPE
2161 2140
2141static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2142{
2143 struct kvm_mmu *context = &vcpu->arch.mmu;
2144 int maxphyaddr = cpuid_maxphyaddr(vcpu);
2145 u64 exb_bit_rsvd = 0;
2146
2147 if (!is_nx(vcpu))
2148 exb_bit_rsvd = rsvd_bits(63, 63);
2149 switch (level) {
2150 case PT32_ROOT_LEVEL:
2151 /* no rsvd bits for 2 level 4K page table entries */
2152 context->rsvd_bits_mask[0][1] = 0;
2153 context->rsvd_bits_mask[0][0] = 0;
2154 if (is_cpuid_PSE36())
2155 /* 36bits PSE 4MB page */
2156 context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
2157 else
2158 /* 32 bits PSE 4MB page */
2159 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
2160 context->rsvd_bits_mask[1][0] = ~0ull;
2161 break;
2162 case PT32E_ROOT_LEVEL:
2163 context->rsvd_bits_mask[0][2] =
2164 rsvd_bits(maxphyaddr, 63) |
2165 rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */
2166 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
2167 rsvd_bits(maxphyaddr, 62); /* PDE */
2168 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
2169 rsvd_bits(maxphyaddr, 62); /* PTE */
2170 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
2171 rsvd_bits(maxphyaddr, 62) |
2172 rsvd_bits(13, 20); /* large page */
2173 context->rsvd_bits_mask[1][0] = ~0ull;
2174 break;
2175 case PT64_ROOT_LEVEL:
2176 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
2177 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
2178 context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
2179 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
2180 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
2181 rsvd_bits(maxphyaddr, 51);
2182 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
2183 rsvd_bits(maxphyaddr, 51);
2184 context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
2185 context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
2186 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
2187 rsvd_bits(maxphyaddr, 51) |
2188 rsvd_bits(13, 20); /* large page */
2189 context->rsvd_bits_mask[1][0] = ~0ull;
2190 break;
2191 }
2192}
2193
2162static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) 2194static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
2163{ 2195{
2164 struct kvm_mmu *context = &vcpu->arch.mmu; 2196 struct kvm_mmu *context = &vcpu->arch.mmu;
@@ -2179,6 +2211,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
2179 2211
2180static int paging64_init_context(struct kvm_vcpu *vcpu) 2212static int paging64_init_context(struct kvm_vcpu *vcpu)
2181{ 2213{
2214 reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
2182 return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); 2215 return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
2183} 2216}
2184 2217
@@ -2186,6 +2219,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
2186{ 2219{
2187 struct kvm_mmu *context = &vcpu->arch.mmu; 2220 struct kvm_mmu *context = &vcpu->arch.mmu;
2188 2221
2222 reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
2189 context->new_cr3 = paging_new_cr3; 2223 context->new_cr3 = paging_new_cr3;
2190 context->page_fault = paging32_page_fault; 2224 context->page_fault = paging32_page_fault;
2191 context->gva_to_gpa = paging32_gva_to_gpa; 2225 context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2201,6 +2235,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
2201 2235
2202static int paging32E_init_context(struct kvm_vcpu *vcpu) 2236static int paging32E_init_context(struct kvm_vcpu *vcpu)
2203{ 2237{
2238 reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
2204 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); 2239 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
2205} 2240}
2206 2241
@@ -2221,12 +2256,15 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
2221 context->gva_to_gpa = nonpaging_gva_to_gpa; 2256 context->gva_to_gpa = nonpaging_gva_to_gpa;
2222 context->root_level = 0; 2257 context->root_level = 0;
2223 } else if (is_long_mode(vcpu)) { 2258 } else if (is_long_mode(vcpu)) {
2259 reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
2224 context->gva_to_gpa = paging64_gva_to_gpa; 2260 context->gva_to_gpa = paging64_gva_to_gpa;
2225 context->root_level = PT64_ROOT_LEVEL; 2261 context->root_level = PT64_ROOT_LEVEL;
2226 } else if (is_pae(vcpu)) { 2262 } else if (is_pae(vcpu)) {
2263 reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
2227 context->gva_to_gpa = paging64_gva_to_gpa; 2264 context->gva_to_gpa = paging64_gva_to_gpa;
2228 context->root_level = PT32E_ROOT_LEVEL; 2265 context->root_level = PT32E_ROOT_LEVEL;
2229 } else { 2266 } else {
2267 reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
2230 context->gva_to_gpa = paging32_gva_to_gpa; 2268 context->gva_to_gpa = paging32_gva_to_gpa;
2231 context->root_level = PT32_ROOT_LEVEL; 2269 context->root_level = PT32_ROOT_LEVEL;
2232 } 2270 }
@@ -2290,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
2290 goto out; 2328 goto out;
2291 spin_lock(&vcpu->kvm->mmu_lock); 2329 spin_lock(&vcpu->kvm->mmu_lock);
2292 kvm_mmu_free_some_pages(vcpu); 2330 kvm_mmu_free_some_pages(vcpu);
2293 mmu_alloc_roots(vcpu); 2331 r = mmu_alloc_roots(vcpu);
2294 mmu_sync_roots(vcpu); 2332 mmu_sync_roots(vcpu);
2295 spin_unlock(&vcpu->kvm->mmu_lock); 2333 spin_unlock(&vcpu->kvm->mmu_lock);
2334 if (r)
2335 goto out;
2296 kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); 2336 kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
2297 kvm_mmu_flush_tlb(vcpu); 2337 kvm_mmu_flush_tlb(vcpu);
2298out: 2338out:
@@ -2638,14 +2678,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
2638 2678
2639static void free_mmu_pages(struct kvm_vcpu *vcpu) 2679static void free_mmu_pages(struct kvm_vcpu *vcpu)
2640{ 2680{
2641 struct kvm_mmu_page *sp;
2642
2643 while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
2644 sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
2645 struct kvm_mmu_page, link);
2646 kvm_mmu_zap_page(vcpu->kvm, sp);
2647 cond_resched();
2648 }
2649 free_page((unsigned long)vcpu->arch.mmu.pae_root); 2681 free_page((unsigned long)vcpu->arch.mmu.pae_root);
2650} 2682}
2651 2683
@@ -2710,7 +2742,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
2710{ 2742{
2711 struct kvm_mmu_page *sp; 2743 struct kvm_mmu_page *sp;
2712 2744
2713 spin_lock(&kvm->mmu_lock);
2714 list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { 2745 list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
2715 int i; 2746 int i;
2716 u64 *pt; 2747 u64 *pt;
@@ -2725,7 +2756,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
2725 pt[i] &= ~PT_WRITABLE_MASK; 2756 pt[i] &= ~PT_WRITABLE_MASK;
2726 } 2757 }
2727 kvm_flush_remote_tlbs(kvm); 2758 kvm_flush_remote_tlbs(kvm);
2728 spin_unlock(&kvm->mmu_lock);
2729} 2759}
2730 2760
2731void kvm_mmu_zap_all(struct kvm *kvm) 2761void kvm_mmu_zap_all(struct kvm *kvm)
@@ -3007,11 +3037,13 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
3007 " in nonleaf level: levels %d gva %lx" 3037 " in nonleaf level: levels %d gva %lx"
3008 " level %d pte %llx\n", audit_msg, 3038 " level %d pte %llx\n", audit_msg,
3009 vcpu->arch.mmu.root_level, va, level, ent); 3039 vcpu->arch.mmu.root_level, va, level, ent);
3010 3040 else
3011 audit_mappings_page(vcpu, ent, va, level - 1); 3041 audit_mappings_page(vcpu, ent, va, level - 1);
3012 } else { 3042 } else {
3013 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); 3043 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
3014 hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; 3044 gfn_t gfn = gpa >> PAGE_SHIFT;
3045 pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
3046 hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
3015 3047
3016 if (is_shadow_present_pte(ent) 3048 if (is_shadow_present_pte(ent)
3017 && (ent & PT64_BASE_ADDR_MASK) != hpa) 3049 && (ent & PT64_BASE_ADDR_MASK) != hpa)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index eaab2145f62b..3494a2fb136e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
75 return vcpu->arch.cr0 & X86_CR0_PG; 75 return vcpu->arch.cr0 & X86_CR0_PG;
76} 76}
77 77
78static inline int is_present_pte(unsigned long pte)
79{
80 return pte & PT_PRESENT_MASK;
81}
82
78#endif 83#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6bd70206c561..258e4591e1ca 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
123 gfn_t table_gfn; 123 gfn_t table_gfn;
124 unsigned index, pt_access, pte_access; 124 unsigned index, pt_access, pte_access;
125 gpa_t pte_gpa; 125 gpa_t pte_gpa;
126 int rsvd_fault = 0;
126 127
127 pgprintk("%s: addr %lx\n", __func__, addr); 128 pgprintk("%s: addr %lx\n", __func__, addr);
128walk: 129walk:
@@ -157,6 +158,10 @@ walk:
157 if (!is_present_pte(pte)) 158 if (!is_present_pte(pte))
158 goto not_present; 159 goto not_present;
159 160
161 rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
162 if (rsvd_fault)
163 goto access_error;
164
160 if (write_fault && !is_writeble_pte(pte)) 165 if (write_fault && !is_writeble_pte(pte))
161 if (user_fault || is_write_protection(vcpu)) 166 if (user_fault || is_write_protection(vcpu))
162 goto access_error; 167 goto access_error;
@@ -209,7 +214,6 @@ walk:
209 if (ret) 214 if (ret)
210 goto walk; 215 goto walk;
211 pte |= PT_DIRTY_MASK; 216 pte |= PT_DIRTY_MASK;
212 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
213 walker->ptes[walker->level - 1] = pte; 217 walker->ptes[walker->level - 1] = pte;
214 } 218 }
215 219
@@ -233,6 +237,8 @@ err:
233 walker->error_code |= PFERR_USER_MASK; 237 walker->error_code |= PFERR_USER_MASK;
234 if (fetch_fault) 238 if (fetch_fault)
235 walker->error_code |= PFERR_FETCH_MASK; 239 walker->error_code |= PFERR_FETCH_MASK;
240 if (rsvd_fault)
241 walker->error_code |= PFERR_RSVD_MASK;
236 return 0; 242 return 0;
237} 243}
238 244
@@ -262,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
262 kvm_get_pfn(pfn); 268 kvm_get_pfn(pfn);
263 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 269 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
264 gpte & PT_DIRTY_MASK, NULL, largepage, 270 gpte & PT_DIRTY_MASK, NULL, largepage,
265 gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), 271 gpte_to_gfn(gpte), pfn, true);
266 pfn, true);
267} 272}
268 273
269/* 274/*
@@ -297,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
297 user_fault, write_fault, 302 user_fault, write_fault,
298 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 303 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
299 ptwrite, largepage, 304 ptwrite, largepage,
300 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
301 gw->gfn, pfn, false); 305 gw->gfn, pfn, false);
302 break; 306 break;
303 } 307 }
@@ -380,7 +384,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
380 return r; 384 return r;
381 385
382 /* 386 /*
383 * Look up the shadow pte for the faulting address. 387 * Look up the guest pte for the faulting address.
384 */ 388 */
385 r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, 389 r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
386 fetch_fault); 390 fetch_fault);
@@ -586,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
586 nr_present++; 590 nr_present++;
587 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 591 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
588 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 592 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
589 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, 593 is_dirty_pte(gpte), 0, gfn,
590 spte_to_pfn(sp->spt[i]), true, false); 594 spte_to_pfn(sp->spt[i]), true, false);
591 } 595 }
592 596
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1f8510c51d6e..71510e07e69e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -19,6 +19,7 @@
19#include "irq.h" 19#include "irq.h"
20#include "mmu.h" 20#include "mmu.h"
21#include "kvm_cache_regs.h" 21#include "kvm_cache_regs.h"
22#include "x86.h"
22 23
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/kernel.h> 25#include <linux/kernel.h>
@@ -69,7 +70,6 @@ module_param(npt, int, S_IRUGO);
69static int nested = 0; 70static int nested = 0;
70module_param(nested, int, S_IRUGO); 71module_param(nested, int, S_IRUGO);
71 72
72static void kvm_reput_irq(struct vcpu_svm *svm);
73static void svm_flush_tlb(struct kvm_vcpu *vcpu); 73static void svm_flush_tlb(struct kvm_vcpu *vcpu);
74 74
75static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); 75static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
@@ -132,24 +132,6 @@ static inline u32 svm_has(u32 feat)
132 return svm_features & feat; 132 return svm_features & feat;
133} 133}
134 134
135static inline u8 pop_irq(struct kvm_vcpu *vcpu)
136{
137 int word_index = __ffs(vcpu->arch.irq_summary);
138 int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
139 int irq = word_index * BITS_PER_LONG + bit_index;
140
141 clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
142 if (!vcpu->arch.irq_pending[word_index])
143 clear_bit(word_index, &vcpu->arch.irq_summary);
144 return irq;
145}
146
147static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
148{
149 set_bit(irq, vcpu->arch.irq_pending);
150 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
151}
152
153static inline void clgi(void) 135static inline void clgi(void)
154{ 136{
155 asm volatile (__ex(SVM_CLGI)); 137 asm volatile (__ex(SVM_CLGI));
@@ -214,17 +196,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
214 svm->vmcb->control.event_inj_err = error_code; 196 svm->vmcb->control.event_inj_err = error_code;
215} 197}
216 198
217static bool svm_exception_injected(struct kvm_vcpu *vcpu) 199static int is_external_interrupt(u32 info)
200{
201 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
202 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
203}
204
205static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
218{ 206{
219 struct vcpu_svm *svm = to_svm(vcpu); 207 struct vcpu_svm *svm = to_svm(vcpu);
208 u32 ret = 0;
220 209
221 return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID); 210 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
211 ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
212 return ret & mask;
222} 213}
223 214
224static int is_external_interrupt(u32 info) 215static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
225{ 216{
226 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 217 struct vcpu_svm *svm = to_svm(vcpu);
227 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); 218
219 if (mask == 0)
220 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
221 else
222 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
223
228} 224}
229 225
230static void skip_emulated_instruction(struct kvm_vcpu *vcpu) 226static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
@@ -232,7 +228,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
232 struct vcpu_svm *svm = to_svm(vcpu); 228 struct vcpu_svm *svm = to_svm(vcpu);
233 229
234 if (!svm->next_rip) { 230 if (!svm->next_rip) {
235 printk(KERN_DEBUG "%s: NOP\n", __func__); 231 if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
232 EMULATE_DONE)
233 printk(KERN_DEBUG "%s: NOP\n", __func__);
236 return; 234 return;
237 } 235 }
238 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) 236 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
@@ -240,9 +238,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
240 __func__, kvm_rip_read(vcpu), svm->next_rip); 238 __func__, kvm_rip_read(vcpu), svm->next_rip);
241 239
242 kvm_rip_write(vcpu, svm->next_rip); 240 kvm_rip_write(vcpu, svm->next_rip);
243 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 241 svm_set_interrupt_shadow(vcpu, 0);
244
245 vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
246} 242}
247 243
248static int has_svm(void) 244static int has_svm(void)
@@ -830,6 +826,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
830 if (!var->unusable) 826 if (!var->unusable)
831 var->type |= 0x1; 827 var->type |= 0x1;
832 break; 828 break;
829 case VCPU_SREG_SS:
830 /* On AMD CPUs sometimes the DB bit in the segment
831 * descriptor is left as 1, although the whole segment has
832 * been made unusable. Clear it here to pass an Intel VMX
833 * entry check when cross vendor migrating.
834 */
835 if (var->unusable)
836 var->db = 0;
837 break;
833 } 838 }
834} 839}
835 840
@@ -960,15 +965,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
960 965
961} 966}
962 967
963static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) 968static void update_db_intercept(struct kvm_vcpu *vcpu)
964{ 969{
965 int old_debug = vcpu->guest_debug;
966 struct vcpu_svm *svm = to_svm(vcpu); 970 struct vcpu_svm *svm = to_svm(vcpu);
967 971
968 vcpu->guest_debug = dbg->control;
969
970 svm->vmcb->control.intercept_exceptions &= 972 svm->vmcb->control.intercept_exceptions &=
971 ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); 973 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
974
975 if (vcpu->arch.singlestep)
976 svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
977
972 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 978 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
973 if (vcpu->guest_debug & 979 if (vcpu->guest_debug &
974 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) 980 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -979,6 +985,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
979 1 << BP_VECTOR; 985 1 << BP_VECTOR;
980 } else 986 } else
981 vcpu->guest_debug = 0; 987 vcpu->guest_debug = 0;
988}
989
990static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
991{
992 int old_debug = vcpu->guest_debug;
993 struct vcpu_svm *svm = to_svm(vcpu);
994
995 vcpu->guest_debug = dbg->control;
996
997 update_db_intercept(vcpu);
982 998
983 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 999 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
984 svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; 1000 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
@@ -993,16 +1009,6 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
993 return 0; 1009 return 0;
994} 1010}
995 1011
996static int svm_get_irq(struct kvm_vcpu *vcpu)
997{
998 struct vcpu_svm *svm = to_svm(vcpu);
999 u32 exit_int_info = svm->vmcb->control.exit_int_info;
1000
1001 if (is_external_interrupt(exit_int_info))
1002 return exit_int_info & SVM_EVTINJ_VEC_MASK;
1003 return -1;
1004}
1005
1006static void load_host_msrs(struct kvm_vcpu *vcpu) 1012static void load_host_msrs(struct kvm_vcpu *vcpu)
1007{ 1013{
1008#ifdef CONFIG_X86_64 1014#ifdef CONFIG_X86_64
@@ -1107,17 +1113,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
1107 1113
1108static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1114static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1109{ 1115{
1110 u32 exit_int_info = svm->vmcb->control.exit_int_info;
1111 struct kvm *kvm = svm->vcpu.kvm;
1112 u64 fault_address; 1116 u64 fault_address;
1113 u32 error_code; 1117 u32 error_code;
1114 bool event_injection = false;
1115
1116 if (!irqchip_in_kernel(kvm) &&
1117 is_external_interrupt(exit_int_info)) {
1118 event_injection = true;
1119 push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
1120 }
1121 1118
1122 fault_address = svm->vmcb->control.exit_info_2; 1119 fault_address = svm->vmcb->control.exit_info_2;
1123 error_code = svm->vmcb->control.exit_info_1; 1120 error_code = svm->vmcb->control.exit_info_1;
@@ -1137,23 +1134,40 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1137 */ 1134 */
1138 if (npt_enabled) 1135 if (npt_enabled)
1139 svm_flush_tlb(&svm->vcpu); 1136 svm_flush_tlb(&svm->vcpu);
1140 1137 else {
1141 if (!npt_enabled && event_injection) 1138 if (kvm_event_needs_reinjection(&svm->vcpu))
1142 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); 1139 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1140 }
1143 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1141 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1144} 1142}
1145 1143
1146static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1144static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1147{ 1145{
1148 if (!(svm->vcpu.guest_debug & 1146 if (!(svm->vcpu.guest_debug &
1149 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 1147 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1148 !svm->vcpu.arch.singlestep) {
1150 kvm_queue_exception(&svm->vcpu, DB_VECTOR); 1149 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1151 return 1; 1150 return 1;
1152 } 1151 }
1153 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1152
1154 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1153 if (svm->vcpu.arch.singlestep) {
1155 kvm_run->debug.arch.exception = DB_VECTOR; 1154 svm->vcpu.arch.singlestep = false;
1156 return 0; 1155 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1156 svm->vmcb->save.rflags &=
1157 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1158 update_db_intercept(&svm->vcpu);
1159 }
1160
1161 if (svm->vcpu.guest_debug &
1162 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
1163 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1164 kvm_run->debug.arch.pc =
1165 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1166 kvm_run->debug.arch.exception = DB_VECTOR;
1167 return 0;
1168 }
1169
1170 return 1;
1157} 1171}
1158 1172
1159static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1173static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1842,17 +1856,51 @@ static int task_switch_interception(struct vcpu_svm *svm,
1842 struct kvm_run *kvm_run) 1856 struct kvm_run *kvm_run)
1843{ 1857{
1844 u16 tss_selector; 1858 u16 tss_selector;
1859 int reason;
1860 int int_type = svm->vmcb->control.exit_int_info &
1861 SVM_EXITINTINFO_TYPE_MASK;
1862 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
1863 uint32_t type =
1864 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
1865 uint32_t idt_v =
1866 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
1845 1867
1846 tss_selector = (u16)svm->vmcb->control.exit_info_1; 1868 tss_selector = (u16)svm->vmcb->control.exit_info_1;
1869
1847 if (svm->vmcb->control.exit_info_2 & 1870 if (svm->vmcb->control.exit_info_2 &
1848 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) 1871 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
1849 return kvm_task_switch(&svm->vcpu, tss_selector, 1872 reason = TASK_SWITCH_IRET;
1850 TASK_SWITCH_IRET); 1873 else if (svm->vmcb->control.exit_info_2 &
1851 if (svm->vmcb->control.exit_info_2 & 1874 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
1852 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) 1875 reason = TASK_SWITCH_JMP;
1853 return kvm_task_switch(&svm->vcpu, tss_selector, 1876 else if (idt_v)
1854 TASK_SWITCH_JMP); 1877 reason = TASK_SWITCH_GATE;
1855 return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); 1878 else
1879 reason = TASK_SWITCH_CALL;
1880
1881 if (reason == TASK_SWITCH_GATE) {
1882 switch (type) {
1883 case SVM_EXITINTINFO_TYPE_NMI:
1884 svm->vcpu.arch.nmi_injected = false;
1885 break;
1886 case SVM_EXITINTINFO_TYPE_EXEPT:
1887 kvm_clear_exception_queue(&svm->vcpu);
1888 break;
1889 case SVM_EXITINTINFO_TYPE_INTR:
1890 kvm_clear_interrupt_queue(&svm->vcpu);
1891 break;
1892 default:
1893 break;
1894 }
1895 }
1896
1897 if (reason != TASK_SWITCH_GATE ||
1898 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
1899 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
1900 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
1901 skip_emulated_instruction(&svm->vcpu);
1902
1903 return kvm_task_switch(&svm->vcpu, tss_selector, reason);
1856} 1904}
1857 1905
1858static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1906static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1862,6 +1910,14 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1862 return 1; 1910 return 1;
1863} 1911}
1864 1912
1913static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1914{
1915 ++svm->vcpu.stat.nmi_window_exits;
1916 svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
1917 svm->vcpu.arch.hflags |= HF_IRET_MASK;
1918 return 1;
1919}
1920
1865static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1921static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1866{ 1922{
1867 if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) 1923 if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
@@ -1879,8 +1935,14 @@ static int emulate_on_interception(struct vcpu_svm *svm,
1879 1935
1880static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1936static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1881{ 1937{
1938 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
1939 /* instruction emulation calls kvm_set_cr8() */
1882 emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); 1940 emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
1883 if (irqchip_in_kernel(svm->vcpu.kvm)) 1941 if (irqchip_in_kernel(svm->vcpu.kvm)) {
1942 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
1943 return 1;
1944 }
1945 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
1884 return 1; 1946 return 1;
1885 kvm_run->exit_reason = KVM_EXIT_SET_TPR; 1947 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
1886 return 0; 1948 return 0;
@@ -2090,8 +2152,9 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
2090 * If the user space waits to inject interrupts, exit as soon as 2152 * If the user space waits to inject interrupts, exit as soon as
2091 * possible 2153 * possible
2092 */ 2154 */
2093 if (kvm_run->request_interrupt_window && 2155 if (!irqchip_in_kernel(svm->vcpu.kvm) &&
2094 !svm->vcpu.arch.irq_summary) { 2156 kvm_run->request_interrupt_window &&
2157 !kvm_cpu_has_interrupt(&svm->vcpu)) {
2095 ++svm->vcpu.stat.irq_window_exits; 2158 ++svm->vcpu.stat.irq_window_exits;
2096 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 2159 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
2097 return 0; 2160 return 0;
@@ -2134,6 +2197,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
2134 [SVM_EXIT_VINTR] = interrupt_window_interception, 2197 [SVM_EXIT_VINTR] = interrupt_window_interception,
2135 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ 2198 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */
2136 [SVM_EXIT_CPUID] = cpuid_interception, 2199 [SVM_EXIT_CPUID] = cpuid_interception,
2200 [SVM_EXIT_IRET] = iret_interception,
2137 [SVM_EXIT_INVD] = emulate_on_interception, 2201 [SVM_EXIT_INVD] = emulate_on_interception,
2138 [SVM_EXIT_HLT] = halt_interception, 2202 [SVM_EXIT_HLT] = halt_interception,
2139 [SVM_EXIT_INVLPG] = invlpg_interception, 2203 [SVM_EXIT_INVLPG] = invlpg_interception,
@@ -2194,7 +2258,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2194 } 2258 }
2195 } 2259 }
2196 2260
2197 kvm_reput_irq(svm);
2198 2261
2199 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 2262 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2200 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2263 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2205,7 +2268,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2205 2268
2206 if (is_external_interrupt(svm->vmcb->control.exit_int_info) && 2269 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
2207 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && 2270 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
2208 exit_code != SVM_EXIT_NPF) 2271 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
2209 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " 2272 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
2210 "exit_code 0x%x\n", 2273 "exit_code 0x%x\n",
2211 __func__, svm->vmcb->control.exit_int_info, 2274 __func__, svm->vmcb->control.exit_int_info,
@@ -2242,6 +2305,15 @@ static void pre_svm_run(struct vcpu_svm *svm)
2242 new_asid(svm, svm_data); 2305 new_asid(svm, svm_data);
2243} 2306}
2244 2307
2308static void svm_inject_nmi(struct kvm_vcpu *vcpu)
2309{
2310 struct vcpu_svm *svm = to_svm(vcpu);
2311
2312 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
2313 vcpu->arch.hflags |= HF_NMI_MASK;
2314 svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
2315 ++vcpu->stat.nmi_injections;
2316}
2245 2317
2246static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) 2318static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
2247{ 2319{
@@ -2257,134 +2329,71 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
2257 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); 2329 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
2258} 2330}
2259 2331
2260static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) 2332static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
2261{ 2333{
2262 struct vcpu_svm *svm = to_svm(vcpu); 2334 struct vcpu_svm *svm = to_svm(vcpu);
2263 2335
2264 nested_svm_intr(svm); 2336 svm->vmcb->control.event_inj = nr |
2265 2337 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
2266 svm_inject_irq(svm, irq);
2267} 2338}
2268 2339
2269static void update_cr8_intercept(struct kvm_vcpu *vcpu) 2340static void svm_set_irq(struct kvm_vcpu *vcpu)
2270{ 2341{
2271 struct vcpu_svm *svm = to_svm(vcpu); 2342 struct vcpu_svm *svm = to_svm(vcpu);
2272 struct vmcb *vmcb = svm->vmcb;
2273 int max_irr, tpr;
2274 2343
2275 if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr) 2344 nested_svm_intr(svm);
2276 return;
2277 2345
2278 vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; 2346 svm_queue_irq(vcpu, vcpu->arch.interrupt.nr);
2347}
2279 2348
2280 max_irr = kvm_lapic_find_highest_irr(vcpu); 2349static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
2281 if (max_irr == -1) 2350{
2282 return; 2351 struct vcpu_svm *svm = to_svm(vcpu);
2283 2352
2284 tpr = kvm_lapic_get_cr8(vcpu) << 4; 2353 if (irr == -1)
2354 return;
2285 2355
2286 if (tpr >= (max_irr & 0xf0)) 2356 if (tpr >= irr)
2287 vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; 2357 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
2288} 2358}
2289 2359
2290static void svm_intr_assist(struct kvm_vcpu *vcpu) 2360static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
2291{ 2361{
2292 struct vcpu_svm *svm = to_svm(vcpu); 2362 struct vcpu_svm *svm = to_svm(vcpu);
2293 struct vmcb *vmcb = svm->vmcb; 2363 struct vmcb *vmcb = svm->vmcb;
2294 int intr_vector = -1; 2364 return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2295 2365 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
2296 if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
2297 ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
2298 intr_vector = vmcb->control.exit_int_info &
2299 SVM_EVTINJ_VEC_MASK;
2300 vmcb->control.exit_int_info = 0;
2301 svm_inject_irq(svm, intr_vector);
2302 goto out;
2303 }
2304
2305 if (vmcb->control.int_ctl & V_IRQ_MASK)
2306 goto out;
2307
2308 if (!kvm_cpu_has_interrupt(vcpu))
2309 goto out;
2310
2311 if (nested_svm_intr(svm))
2312 goto out;
2313
2314 if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
2315 goto out;
2316
2317 if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
2318 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
2319 (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
2320 /* unable to deliver irq, set pending irq */
2321 svm_set_vintr(svm);
2322 svm_inject_irq(svm, 0x0);
2323 goto out;
2324 }
2325 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
2326 intr_vector = kvm_cpu_get_interrupt(vcpu);
2327 svm_inject_irq(svm, intr_vector);
2328out:
2329 update_cr8_intercept(vcpu);
2330} 2366}
2331 2367
2332static void kvm_reput_irq(struct vcpu_svm *svm) 2368static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
2333{ 2369{
2334 struct vmcb_control_area *control = &svm->vmcb->control; 2370 struct vcpu_svm *svm = to_svm(vcpu);
2335 2371 struct vmcb *vmcb = svm->vmcb;
2336 if ((control->int_ctl & V_IRQ_MASK) 2372 return (vmcb->save.rflags & X86_EFLAGS_IF) &&
2337 && !irqchip_in_kernel(svm->vcpu.kvm)) { 2373 !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2338 control->int_ctl &= ~V_IRQ_MASK; 2374 (svm->vcpu.arch.hflags & HF_GIF_MASK);
2339 push_irq(&svm->vcpu, control->int_vector);
2340 }
2341
2342 svm->vcpu.arch.interrupt_window_open =
2343 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2344 (svm->vcpu.arch.hflags & HF_GIF_MASK);
2345} 2375}
2346 2376
2347static void svm_do_inject_vector(struct vcpu_svm *svm) 2377static void enable_irq_window(struct kvm_vcpu *vcpu)
2348{ 2378{
2349 struct kvm_vcpu *vcpu = &svm->vcpu; 2379 svm_set_vintr(to_svm(vcpu));
2350 int word_index = __ffs(vcpu->arch.irq_summary); 2380 svm_inject_irq(to_svm(vcpu), 0x0);
2351 int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
2352 int irq = word_index * BITS_PER_LONG + bit_index;
2353
2354 clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
2355 if (!vcpu->arch.irq_pending[word_index])
2356 clear_bit(word_index, &vcpu->arch.irq_summary);
2357 svm_inject_irq(svm, irq);
2358} 2381}
2359 2382
2360static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2383static void enable_nmi_window(struct kvm_vcpu *vcpu)
2361 struct kvm_run *kvm_run)
2362{ 2384{
2363 struct vcpu_svm *svm = to_svm(vcpu); 2385 struct vcpu_svm *svm = to_svm(vcpu);
2364 struct vmcb_control_area *control = &svm->vmcb->control;
2365
2366 if (nested_svm_intr(svm))
2367 return;
2368 2386
2369 svm->vcpu.arch.interrupt_window_open = 2387 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
2370 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && 2388 == HF_NMI_MASK)
2371 (svm->vmcb->save.rflags & X86_EFLAGS_IF) && 2389 return; /* IRET will cause a vm exit */
2372 (svm->vcpu.arch.hflags & HF_GIF_MASK));
2373 2390
2374 if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) 2391 /* Something prevents NMI from been injected. Single step over
2375 /* 2392 possible problem (IRET or exception injection or interrupt
2376 * If interrupts enabled, and not blocked by sti or mov ss. Good. 2393 shadow) */
2377 */ 2394 vcpu->arch.singlestep = true;
2378 svm_do_inject_vector(svm); 2395 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2379 2396 update_db_intercept(vcpu);
2380 /*
2381 * Interrupts blocked. Wait for unblock.
2382 */
2383 if (!svm->vcpu.arch.interrupt_window_open &&
2384 (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
2385 svm_set_vintr(svm);
2386 else
2387 svm_clear_vintr(svm);
2388} 2397}
2389 2398
2390static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 2399static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2407,7 +2416,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
2407 2416
2408 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { 2417 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
2409 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; 2418 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
2410 kvm_lapic_set_tpr(vcpu, cr8); 2419 kvm_set_cr8(vcpu, cr8);
2411 } 2420 }
2412} 2421}
2413 2422
@@ -2416,14 +2425,54 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
2416 struct vcpu_svm *svm = to_svm(vcpu); 2425 struct vcpu_svm *svm = to_svm(vcpu);
2417 u64 cr8; 2426 u64 cr8;
2418 2427
2419 if (!irqchip_in_kernel(vcpu->kvm))
2420 return;
2421
2422 cr8 = kvm_get_cr8(vcpu); 2428 cr8 = kvm_get_cr8(vcpu);
2423 svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 2429 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
2424 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; 2430 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
2425} 2431}
2426 2432
2433static void svm_complete_interrupts(struct vcpu_svm *svm)
2434{
2435 u8 vector;
2436 int type;
2437 u32 exitintinfo = svm->vmcb->control.exit_int_info;
2438
2439 if (svm->vcpu.arch.hflags & HF_IRET_MASK)
2440 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
2441
2442 svm->vcpu.arch.nmi_injected = false;
2443 kvm_clear_exception_queue(&svm->vcpu);
2444 kvm_clear_interrupt_queue(&svm->vcpu);
2445
2446 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
2447 return;
2448
2449 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
2450 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
2451
2452 switch (type) {
2453 case SVM_EXITINTINFO_TYPE_NMI:
2454 svm->vcpu.arch.nmi_injected = true;
2455 break;
2456 case SVM_EXITINTINFO_TYPE_EXEPT:
2457 /* In case of software exception do not reinject an exception
2458 vector, but re-execute and instruction instead */
2459 if (kvm_exception_is_soft(vector))
2460 break;
2461 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
2462 u32 err = svm->vmcb->control.exit_int_info_err;
2463 kvm_queue_exception_e(&svm->vcpu, vector, err);
2464
2465 } else
2466 kvm_queue_exception(&svm->vcpu, vector);
2467 break;
2468 case SVM_EXITINTINFO_TYPE_INTR:
2469 kvm_queue_interrupt(&svm->vcpu, vector, false);
2470 break;
2471 default:
2472 break;
2473 }
2474}
2475
2427#ifdef CONFIG_X86_64 2476#ifdef CONFIG_X86_64
2428#define R "r" 2477#define R "r"
2429#else 2478#else
@@ -2552,6 +2601,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2552 sync_cr8_to_lapic(vcpu); 2601 sync_cr8_to_lapic(vcpu);
2553 2602
2554 svm->next_rip = 0; 2603 svm->next_rip = 0;
2604
2605 svm_complete_interrupts(svm);
2555} 2606}
2556 2607
2557#undef R 2608#undef R
@@ -2617,7 +2668,7 @@ static int get_npt_level(void)
2617#endif 2668#endif
2618} 2669}
2619 2670
2620static int svm_get_mt_mask_shift(void) 2671static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
2621{ 2672{
2622 return 0; 2673 return 0;
2623} 2674}
@@ -2667,17 +2718,21 @@ static struct kvm_x86_ops svm_x86_ops = {
2667 .run = svm_vcpu_run, 2718 .run = svm_vcpu_run,
2668 .handle_exit = handle_exit, 2719 .handle_exit = handle_exit,
2669 .skip_emulated_instruction = skip_emulated_instruction, 2720 .skip_emulated_instruction = skip_emulated_instruction,
2721 .set_interrupt_shadow = svm_set_interrupt_shadow,
2722 .get_interrupt_shadow = svm_get_interrupt_shadow,
2670 .patch_hypercall = svm_patch_hypercall, 2723 .patch_hypercall = svm_patch_hypercall,
2671 .get_irq = svm_get_irq,
2672 .set_irq = svm_set_irq, 2724 .set_irq = svm_set_irq,
2725 .set_nmi = svm_inject_nmi,
2673 .queue_exception = svm_queue_exception, 2726 .queue_exception = svm_queue_exception,
2674 .exception_injected = svm_exception_injected, 2727 .interrupt_allowed = svm_interrupt_allowed,
2675 .inject_pending_irq = svm_intr_assist, 2728 .nmi_allowed = svm_nmi_allowed,
2676 .inject_pending_vectors = do_interrupt_requests, 2729 .enable_nmi_window = enable_nmi_window,
2730 .enable_irq_window = enable_irq_window,
2731 .update_cr8_intercept = update_cr8_intercept,
2677 2732
2678 .set_tss_addr = svm_set_tss_addr, 2733 .set_tss_addr = svm_set_tss_addr,
2679 .get_tdp_level = get_npt_level, 2734 .get_tdp_level = get_npt_level,
2680 .get_mt_mask_shift = svm_get_mt_mask_shift, 2735 .get_mt_mask = svm_get_mt_mask,
2681}; 2736};
2682 2737
2683static int __init svm_init(void) 2738static int __init svm_init(void)
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
new file mode 100644
index 000000000000..86dbac072d0c
--- /dev/null
+++ b/arch/x86/kvm/timer.c
@@ -0,0 +1,46 @@
1#include <linux/kvm_host.h>
2#include <linux/kvm.h>
3#include <linux/hrtimer.h>
4#include <asm/atomic.h>
5#include "kvm_timer.h"
6
7static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
8{
9 int restart_timer = 0;
10 wait_queue_head_t *q = &vcpu->wq;
11
12 /* FIXME: this code should not know anything about vcpus */
13 if (!atomic_inc_and_test(&ktimer->pending))
14 set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
15
16 if (!ktimer->reinject)
17 atomic_set(&ktimer->pending, 1);
18
19 if (waitqueue_active(q))
20 wake_up_interruptible(q);
21
22 if (ktimer->t_ops->is_periodic(ktimer)) {
23 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
24 restart_timer = 1;
25 }
26
27 return restart_timer;
28}
29
30enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
31{
32 int restart_timer;
33 struct kvm_vcpu *vcpu;
34 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
35
36 vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
37 if (!vcpu)
38 return HRTIMER_NORESTART;
39
40 restart_timer = __kvm_timer_fn(vcpu, ktimer);
41 if (restart_timer)
42 return HRTIMER_RESTART;
43 else
44 return HRTIMER_NORESTART;
45}
46
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bb481330716f..32d6ae8fb60e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -32,26 +32,27 @@
32#include <asm/desc.h> 32#include <asm/desc.h>
33#include <asm/vmx.h> 33#include <asm/vmx.h>
34#include <asm/virtext.h> 34#include <asm/virtext.h>
35#include <asm/mce.h>
35 36
36#define __ex(x) __kvm_handle_fault_on_reboot(x) 37#define __ex(x) __kvm_handle_fault_on_reboot(x)
37 38
38MODULE_AUTHOR("Qumranet"); 39MODULE_AUTHOR("Qumranet");
39MODULE_LICENSE("GPL"); 40MODULE_LICENSE("GPL");
40 41
41static int bypass_guest_pf = 1; 42static int __read_mostly bypass_guest_pf = 1;
42module_param(bypass_guest_pf, bool, 0); 43module_param(bypass_guest_pf, bool, S_IRUGO);
43 44
44static int enable_vpid = 1; 45static int __read_mostly enable_vpid = 1;
45module_param(enable_vpid, bool, 0); 46module_param_named(vpid, enable_vpid, bool, 0444);
46 47
47static int flexpriority_enabled = 1; 48static int __read_mostly flexpriority_enabled = 1;
48module_param(flexpriority_enabled, bool, 0); 49module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
49 50
50static int enable_ept = 1; 51static int __read_mostly enable_ept = 1;
51module_param(enable_ept, bool, 0); 52module_param_named(ept, enable_ept, bool, S_IRUGO);
52 53
53static int emulate_invalid_guest_state = 0; 54static int __read_mostly emulate_invalid_guest_state = 0;
54module_param(emulate_invalid_guest_state, bool, 0); 55module_param(emulate_invalid_guest_state, bool, S_IRUGO);
55 56
56struct vmcs { 57struct vmcs {
57 u32 revision_id; 58 u32 revision_id;
@@ -97,6 +98,7 @@ struct vcpu_vmx {
97 int soft_vnmi_blocked; 98 int soft_vnmi_blocked;
98 ktime_t entry_time; 99 ktime_t entry_time;
99 s64 vnmi_blocked_time; 100 s64 vnmi_blocked_time;
101 u32 exit_reason;
100}; 102};
101 103
102static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 104static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -111,9 +113,10 @@ static DEFINE_PER_CPU(struct vmcs *, vmxarea);
111static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 113static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
112static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); 114static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
113 115
114static struct page *vmx_io_bitmap_a; 116static unsigned long *vmx_io_bitmap_a;
115static struct page *vmx_io_bitmap_b; 117static unsigned long *vmx_io_bitmap_b;
116static struct page *vmx_msr_bitmap; 118static unsigned long *vmx_msr_bitmap_legacy;
119static unsigned long *vmx_msr_bitmap_longmode;
117 120
118static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); 121static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
119static DEFINE_SPINLOCK(vmx_vpid_lock); 122static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -213,70 +216,78 @@ static inline int is_external_interrupt(u32 intr_info)
213 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 216 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
214} 217}
215 218
219static inline int is_machine_check(u32 intr_info)
220{
221 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
222 INTR_INFO_VALID_MASK)) ==
223 (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
224}
225
216static inline int cpu_has_vmx_msr_bitmap(void) 226static inline int cpu_has_vmx_msr_bitmap(void)
217{ 227{
218 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS); 228 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
219} 229}
220 230
221static inline int cpu_has_vmx_tpr_shadow(void) 231static inline int cpu_has_vmx_tpr_shadow(void)
222{ 232{
223 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); 233 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
224} 234}
225 235
226static inline int vm_need_tpr_shadow(struct kvm *kvm) 236static inline int vm_need_tpr_shadow(struct kvm *kvm)
227{ 237{
228 return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm))); 238 return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
229} 239}
230 240
231static inline int cpu_has_secondary_exec_ctrls(void) 241static inline int cpu_has_secondary_exec_ctrls(void)
232{ 242{
233 return (vmcs_config.cpu_based_exec_ctrl & 243 return vmcs_config.cpu_based_exec_ctrl &
234 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); 244 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
235} 245}
236 246
237static inline bool cpu_has_vmx_virtualize_apic_accesses(void) 247static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
238{ 248{
239 return flexpriority_enabled 249 return vmcs_config.cpu_based_2nd_exec_ctrl &
240 && (vmcs_config.cpu_based_2nd_exec_ctrl & 250 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
241 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); 251}
252
253static inline bool cpu_has_vmx_flexpriority(void)
254{
255 return cpu_has_vmx_tpr_shadow() &&
256 cpu_has_vmx_virtualize_apic_accesses();
242} 257}
243 258
244static inline int cpu_has_vmx_invept_individual_addr(void) 259static inline int cpu_has_vmx_invept_individual_addr(void)
245{ 260{
246 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT)); 261 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
247} 262}
248 263
249static inline int cpu_has_vmx_invept_context(void) 264static inline int cpu_has_vmx_invept_context(void)
250{ 265{
251 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT)); 266 return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT);
252} 267}
253 268
254static inline int cpu_has_vmx_invept_global(void) 269static inline int cpu_has_vmx_invept_global(void)
255{ 270{
256 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT)); 271 return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT);
257} 272}
258 273
259static inline int cpu_has_vmx_ept(void) 274static inline int cpu_has_vmx_ept(void)
260{ 275{
261 return (vmcs_config.cpu_based_2nd_exec_ctrl & 276 return vmcs_config.cpu_based_2nd_exec_ctrl &
262 SECONDARY_EXEC_ENABLE_EPT); 277 SECONDARY_EXEC_ENABLE_EPT;
263}
264
265static inline int vm_need_ept(void)
266{
267 return (cpu_has_vmx_ept() && enable_ept);
268} 278}
269 279
270static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 280static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
271{ 281{
272 return ((cpu_has_vmx_virtualize_apic_accesses()) && 282 return flexpriority_enabled &&
273 (irqchip_in_kernel(kvm))); 283 (cpu_has_vmx_virtualize_apic_accesses()) &&
284 (irqchip_in_kernel(kvm));
274} 285}
275 286
276static inline int cpu_has_vmx_vpid(void) 287static inline int cpu_has_vmx_vpid(void)
277{ 288{
278 return (vmcs_config.cpu_based_2nd_exec_ctrl & 289 return vmcs_config.cpu_based_2nd_exec_ctrl &
279 SECONDARY_EXEC_ENABLE_VPID); 290 SECONDARY_EXEC_ENABLE_VPID;
280} 291}
281 292
282static inline int cpu_has_virtual_nmis(void) 293static inline int cpu_has_virtual_nmis(void)
@@ -284,6 +295,11 @@ static inline int cpu_has_virtual_nmis(void)
284 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 295 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
285} 296}
286 297
298static inline bool report_flexpriority(void)
299{
300 return flexpriority_enabled;
301}
302
287static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) 303static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
288{ 304{
289 int i; 305 int i;
@@ -381,7 +397,7 @@ static inline void ept_sync_global(void)
381 397
382static inline void ept_sync_context(u64 eptp) 398static inline void ept_sync_context(u64 eptp)
383{ 399{
384 if (vm_need_ept()) { 400 if (enable_ept) {
385 if (cpu_has_vmx_invept_context()) 401 if (cpu_has_vmx_invept_context())
386 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); 402 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
387 else 403 else
@@ -391,7 +407,7 @@ static inline void ept_sync_context(u64 eptp)
391 407
392static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) 408static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
393{ 409{
394 if (vm_need_ept()) { 410 if (enable_ept) {
395 if (cpu_has_vmx_invept_individual_addr()) 411 if (cpu_has_vmx_invept_individual_addr())
396 __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, 412 __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
397 eptp, gpa); 413 eptp, gpa);
@@ -478,7 +494,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
478{ 494{
479 u32 eb; 495 u32 eb;
480 496
481 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); 497 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
482 if (!vcpu->fpu_active) 498 if (!vcpu->fpu_active)
483 eb |= 1u << NM_VECTOR; 499 eb |= 1u << NM_VECTOR;
484 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 500 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
@@ -488,9 +504,9 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
488 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 504 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
489 eb |= 1u << BP_VECTOR; 505 eb |= 1u << BP_VECTOR;
490 } 506 }
491 if (vcpu->arch.rmode.active) 507 if (vcpu->arch.rmode.vm86_active)
492 eb = ~0; 508 eb = ~0;
493 if (vm_need_ept()) 509 if (enable_ept)
494 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ 510 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
495 vmcs_write32(EXCEPTION_BITMAP, eb); 511 vmcs_write32(EXCEPTION_BITMAP, eb);
496} 512}
@@ -724,29 +740,50 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
724 740
725static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 741static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
726{ 742{
727 if (vcpu->arch.rmode.active) 743 if (vcpu->arch.rmode.vm86_active)
728 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 744 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
729 vmcs_writel(GUEST_RFLAGS, rflags); 745 vmcs_writel(GUEST_RFLAGS, rflags);
730} 746}
731 747
748static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
749{
750 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
751 int ret = 0;
752
753 if (interruptibility & GUEST_INTR_STATE_STI)
754 ret |= X86_SHADOW_INT_STI;
755 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
756 ret |= X86_SHADOW_INT_MOV_SS;
757
758 return ret & mask;
759}
760
761static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
762{
763 u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
764 u32 interruptibility = interruptibility_old;
765
766 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
767
768 if (mask & X86_SHADOW_INT_MOV_SS)
769 interruptibility |= GUEST_INTR_STATE_MOV_SS;
770 if (mask & X86_SHADOW_INT_STI)
771 interruptibility |= GUEST_INTR_STATE_STI;
772
773 if ((interruptibility != interruptibility_old))
774 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
775}
776
732static void skip_emulated_instruction(struct kvm_vcpu *vcpu) 777static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
733{ 778{
734 unsigned long rip; 779 unsigned long rip;
735 u32 interruptibility;
736 780
737 rip = kvm_rip_read(vcpu); 781 rip = kvm_rip_read(vcpu);
738 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 782 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
739 kvm_rip_write(vcpu, rip); 783 kvm_rip_write(vcpu, rip);
740 784
741 /* 785 /* skipping an emulated instruction also counts */
742 * We emulated an instruction, so temporary interrupt blocking 786 vmx_set_interrupt_shadow(vcpu, 0);
743 * should be removed, if set.
744 */
745 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
746 if (interruptibility & 3)
747 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
748 interruptibility & ~3);
749 vcpu->arch.interrupt_window_open = 1;
750} 787}
751 788
752static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 789static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -760,7 +797,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
760 intr_info |= INTR_INFO_DELIVER_CODE_MASK; 797 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
761 } 798 }
762 799
763 if (vcpu->arch.rmode.active) { 800 if (vcpu->arch.rmode.vm86_active) {
764 vmx->rmode.irq.pending = true; 801 vmx->rmode.irq.pending = true;
765 vmx->rmode.irq.vector = nr; 802 vmx->rmode.irq.vector = nr;
766 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 803 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -773,8 +810,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
773 return; 810 return;
774 } 811 }
775 812
776 if (nr == BP_VECTOR || nr == OF_VECTOR) { 813 if (kvm_exception_is_soft(nr)) {
777 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); 814 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
815 vmx->vcpu.arch.event_exit_inst_len);
778 intr_info |= INTR_TYPE_SOFT_EXCEPTION; 816 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
779 } else 817 } else
780 intr_info |= INTR_TYPE_HARD_EXCEPTION; 818 intr_info |= INTR_TYPE_HARD_EXCEPTION;
@@ -782,11 +820,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
782 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 820 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
783} 821}
784 822
785static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
786{
787 return false;
788}
789
790/* 823/*
791 * Swap MSR entry in host/guest MSR entry array. 824 * Swap MSR entry in host/guest MSR entry array.
792 */ 825 */
@@ -812,6 +845,7 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
812static void setup_msrs(struct vcpu_vmx *vmx) 845static void setup_msrs(struct vcpu_vmx *vmx)
813{ 846{
814 int save_nmsrs; 847 int save_nmsrs;
848 unsigned long *msr_bitmap;
815 849
816 vmx_load_host_state(vmx); 850 vmx_load_host_state(vmx);
817 save_nmsrs = 0; 851 save_nmsrs = 0;
@@ -847,6 +881,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
847 __find_msr_index(vmx, MSR_KERNEL_GS_BASE); 881 __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
848#endif 882#endif
849 vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); 883 vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
884
885 if (cpu_has_vmx_msr_bitmap()) {
886 if (is_long_mode(&vmx->vcpu))
887 msr_bitmap = vmx_msr_bitmap_longmode;
888 else
889 msr_bitmap = vmx_msr_bitmap_legacy;
890
891 vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
892 }
850} 893}
851 894
852/* 895/*
@@ -1034,13 +1077,6 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1034 return 0; 1077 return 0;
1035} 1078}
1036 1079
1037static int vmx_get_irq(struct kvm_vcpu *vcpu)
1038{
1039 if (!vcpu->arch.interrupt.pending)
1040 return -1;
1041 return vcpu->arch.interrupt.nr;
1042}
1043
1044static __init int cpu_has_kvm_support(void) 1080static __init int cpu_has_kvm_support(void)
1045{ 1081{
1046 return cpu_has_vmx(); 1082 return cpu_has_vmx();
@@ -1294,6 +1330,18 @@ static __init int hardware_setup(void)
1294 if (boot_cpu_has(X86_FEATURE_NX)) 1330 if (boot_cpu_has(X86_FEATURE_NX))
1295 kvm_enable_efer_bits(EFER_NX); 1331 kvm_enable_efer_bits(EFER_NX);
1296 1332
1333 if (!cpu_has_vmx_vpid())
1334 enable_vpid = 0;
1335
1336 if (!cpu_has_vmx_ept())
1337 enable_ept = 0;
1338
1339 if (!cpu_has_vmx_flexpriority())
1340 flexpriority_enabled = 0;
1341
1342 if (!cpu_has_vmx_tpr_shadow())
1343 kvm_x86_ops->update_cr8_intercept = NULL;
1344
1297 return alloc_kvm_area(); 1345 return alloc_kvm_area();
1298} 1346}
1299 1347
@@ -1324,7 +1372,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1324 struct vcpu_vmx *vmx = to_vmx(vcpu); 1372 struct vcpu_vmx *vmx = to_vmx(vcpu);
1325 1373
1326 vmx->emulation_required = 1; 1374 vmx->emulation_required = 1;
1327 vcpu->arch.rmode.active = 0; 1375 vcpu->arch.rmode.vm86_active = 0;
1328 1376
1329 vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); 1377 vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
1330 vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit); 1378 vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit);
@@ -1386,7 +1434,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1386 struct vcpu_vmx *vmx = to_vmx(vcpu); 1434 struct vcpu_vmx *vmx = to_vmx(vcpu);
1387 1435
1388 vmx->emulation_required = 1; 1436 vmx->emulation_required = 1;
1389 vcpu->arch.rmode.active = 1; 1437 vcpu->arch.rmode.vm86_active = 1;
1390 1438
1391 vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); 1439 vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
1392 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); 1440 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1485,7 +1533,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
1485static void vmx_flush_tlb(struct kvm_vcpu *vcpu) 1533static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1486{ 1534{
1487 vpid_sync_vcpu_all(to_vmx(vcpu)); 1535 vpid_sync_vcpu_all(to_vmx(vcpu));
1488 if (vm_need_ept()) 1536 if (enable_ept)
1489 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 1537 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
1490} 1538}
1491 1539
@@ -1555,10 +1603,10 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1555 1603
1556 vmx_fpu_deactivate(vcpu); 1604 vmx_fpu_deactivate(vcpu);
1557 1605
1558 if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) 1606 if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE))
1559 enter_pmode(vcpu); 1607 enter_pmode(vcpu);
1560 1608
1561 if (!vcpu->arch.rmode.active && !(cr0 & X86_CR0_PE)) 1609 if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE))
1562 enter_rmode(vcpu); 1610 enter_rmode(vcpu);
1563 1611
1564#ifdef CONFIG_X86_64 1612#ifdef CONFIG_X86_64
@@ -1570,7 +1618,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1570 } 1618 }
1571#endif 1619#endif
1572 1620
1573 if (vm_need_ept()) 1621 if (enable_ept)
1574 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); 1622 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
1575 1623
1576 vmcs_writel(CR0_READ_SHADOW, cr0); 1624 vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -1599,7 +1647,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1599 u64 eptp; 1647 u64 eptp;
1600 1648
1601 guest_cr3 = cr3; 1649 guest_cr3 = cr3;
1602 if (vm_need_ept()) { 1650 if (enable_ept) {
1603 eptp = construct_eptp(cr3); 1651 eptp = construct_eptp(cr3);
1604 vmcs_write64(EPT_POINTER, eptp); 1652 vmcs_write64(EPT_POINTER, eptp);
1605 ept_sync_context(eptp); 1653 ept_sync_context(eptp);
@@ -1616,11 +1664,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1616 1664
1617static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1665static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1618{ 1666{
1619 unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? 1667 unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ?
1620 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); 1668 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1621 1669
1622 vcpu->arch.cr4 = cr4; 1670 vcpu->arch.cr4 = cr4;
1623 if (vm_need_ept()) 1671 if (enable_ept)
1624 ept_update_paging_mode_cr4(&hw_cr4, vcpu); 1672 ept_update_paging_mode_cr4(&hw_cr4, vcpu);
1625 1673
1626 vmcs_writel(CR4_READ_SHADOW, cr4); 1674 vmcs_writel(CR4_READ_SHADOW, cr4);
@@ -1699,7 +1747,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
1699 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1747 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
1700 u32 ar; 1748 u32 ar;
1701 1749
1702 if (vcpu->arch.rmode.active && seg == VCPU_SREG_TR) { 1750 if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) {
1703 vcpu->arch.rmode.tr.selector = var->selector; 1751 vcpu->arch.rmode.tr.selector = var->selector;
1704 vcpu->arch.rmode.tr.base = var->base; 1752 vcpu->arch.rmode.tr.base = var->base;
1705 vcpu->arch.rmode.tr.limit = var->limit; 1753 vcpu->arch.rmode.tr.limit = var->limit;
@@ -1709,7 +1757,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
1709 vmcs_writel(sf->base, var->base); 1757 vmcs_writel(sf->base, var->base);
1710 vmcs_write32(sf->limit, var->limit); 1758 vmcs_write32(sf->limit, var->limit);
1711 vmcs_write16(sf->selector, var->selector); 1759 vmcs_write16(sf->selector, var->selector);
1712 if (vcpu->arch.rmode.active && var->s) { 1760 if (vcpu->arch.rmode.vm86_active && var->s) {
1713 /* 1761 /*
1714 * Hack real-mode segments into vm86 compatibility. 1762 * Hack real-mode segments into vm86 compatibility.
1715 */ 1763 */
@@ -1982,7 +2030,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
1982 pfn_t identity_map_pfn; 2030 pfn_t identity_map_pfn;
1983 u32 tmp; 2031 u32 tmp;
1984 2032
1985 if (!vm_need_ept()) 2033 if (!enable_ept)
1986 return 1; 2034 return 1;
1987 if (unlikely(!kvm->arch.ept_identity_pagetable)) { 2035 if (unlikely(!kvm->arch.ept_identity_pagetable)) {
1988 printk(KERN_ERR "EPT: identity-mapping pagetable " 2036 printk(KERN_ERR "EPT: identity-mapping pagetable "
@@ -2071,7 +2119,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
2071 int vpid; 2119 int vpid;
2072 2120
2073 vmx->vpid = 0; 2121 vmx->vpid = 0;
2074 if (!enable_vpid || !cpu_has_vmx_vpid()) 2122 if (!enable_vpid)
2075 return; 2123 return;
2076 spin_lock(&vmx_vpid_lock); 2124 spin_lock(&vmx_vpid_lock);
2077 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); 2125 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
@@ -2082,9 +2130,9 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
2082 spin_unlock(&vmx_vpid_lock); 2130 spin_unlock(&vmx_vpid_lock);
2083} 2131}
2084 2132
2085static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) 2133static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
2086{ 2134{
2087 void *va; 2135 int f = sizeof(unsigned long);
2088 2136
2089 if (!cpu_has_vmx_msr_bitmap()) 2137 if (!cpu_has_vmx_msr_bitmap())
2090 return; 2138 return;
@@ -2094,16 +2142,21 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
2094 * have the write-low and read-high bitmap offsets the wrong way round. 2142 * have the write-low and read-high bitmap offsets the wrong way round.
2095 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. 2143 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
2096 */ 2144 */
2097 va = kmap(msr_bitmap);
2098 if (msr <= 0x1fff) { 2145 if (msr <= 0x1fff) {
2099 __clear_bit(msr, va + 0x000); /* read-low */ 2146 __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
2100 __clear_bit(msr, va + 0x800); /* write-low */ 2147 __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
2101 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { 2148 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2102 msr &= 0x1fff; 2149 msr &= 0x1fff;
2103 __clear_bit(msr, va + 0x400); /* read-high */ 2150 __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
2104 __clear_bit(msr, va + 0xc00); /* write-high */ 2151 __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
2105 } 2152 }
2106 kunmap(msr_bitmap); 2153}
2154
2155static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
2156{
2157 if (!longmode_only)
2158 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
2159 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
2107} 2160}
2108 2161
2109/* 2162/*
@@ -2121,11 +2174,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2121 u32 exec_control; 2174 u32 exec_control;
2122 2175
2123 /* I/O */ 2176 /* I/O */
2124 vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); 2177 vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
2125 vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); 2178 vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
2126 2179
2127 if (cpu_has_vmx_msr_bitmap()) 2180 if (cpu_has_vmx_msr_bitmap())
2128 vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap)); 2181 vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
2129 2182
2130 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ 2183 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
2131 2184
@@ -2141,7 +2194,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2141 CPU_BASED_CR8_LOAD_EXITING; 2194 CPU_BASED_CR8_LOAD_EXITING;
2142#endif 2195#endif
2143 } 2196 }
2144 if (!vm_need_ept()) 2197 if (!enable_ept)
2145 exec_control |= CPU_BASED_CR3_STORE_EXITING | 2198 exec_control |= CPU_BASED_CR3_STORE_EXITING |
2146 CPU_BASED_CR3_LOAD_EXITING | 2199 CPU_BASED_CR3_LOAD_EXITING |
2147 CPU_BASED_INVLPG_EXITING; 2200 CPU_BASED_INVLPG_EXITING;
@@ -2154,7 +2207,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2154 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 2207 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2155 if (vmx->vpid == 0) 2208 if (vmx->vpid == 0)
2156 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; 2209 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
2157 if (!vm_need_ept()) 2210 if (!enable_ept)
2158 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; 2211 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
2159 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 2212 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
2160 } 2213 }
@@ -2273,7 +2326,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2273 goto out; 2326 goto out;
2274 } 2327 }
2275 2328
2276 vmx->vcpu.arch.rmode.active = 0; 2329 vmx->vcpu.arch.rmode.vm86_active = 0;
2277 2330
2278 vmx->soft_vnmi_blocked = 0; 2331 vmx->soft_vnmi_blocked = 0;
2279 2332
@@ -2402,14 +2455,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
2402 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2455 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2403} 2456}
2404 2457
2405static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) 2458static void vmx_inject_irq(struct kvm_vcpu *vcpu)
2406{ 2459{
2407 struct vcpu_vmx *vmx = to_vmx(vcpu); 2460 struct vcpu_vmx *vmx = to_vmx(vcpu);
2461 uint32_t intr;
2462 int irq = vcpu->arch.interrupt.nr;
2408 2463
2409 KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); 2464 KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
2410 2465
2411 ++vcpu->stat.irq_injections; 2466 ++vcpu->stat.irq_injections;
2412 if (vcpu->arch.rmode.active) { 2467 if (vcpu->arch.rmode.vm86_active) {
2413 vmx->rmode.irq.pending = true; 2468 vmx->rmode.irq.pending = true;
2414 vmx->rmode.irq.vector = irq; 2469 vmx->rmode.irq.vector = irq;
2415 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 2470 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -2419,8 +2474,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2419 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); 2474 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
2420 return; 2475 return;
2421 } 2476 }
2422 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2477 intr = irq | INTR_INFO_VALID_MASK;
2423 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 2478 if (vcpu->arch.interrupt.soft) {
2479 intr |= INTR_TYPE_SOFT_INTR;
2480 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2481 vmx->vcpu.arch.event_exit_inst_len);
2482 } else
2483 intr |= INTR_TYPE_EXT_INTR;
2484 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
2424} 2485}
2425 2486
2426static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 2487static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -2441,7 +2502,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2441 } 2502 }
2442 2503
2443 ++vcpu->stat.nmi_injections; 2504 ++vcpu->stat.nmi_injections;
2444 if (vcpu->arch.rmode.active) { 2505 if (vcpu->arch.rmode.vm86_active) {
2445 vmx->rmode.irq.pending = true; 2506 vmx->rmode.irq.pending = true;
2446 vmx->rmode.irq.vector = NMI_VECTOR; 2507 vmx->rmode.irq.vector = NMI_VECTOR;
2447 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 2508 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -2456,76 +2517,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2456 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 2517 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
2457} 2518}
2458 2519
2459static void vmx_update_window_states(struct kvm_vcpu *vcpu) 2520static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
2460{ 2521{
2461 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2462
2463 vcpu->arch.nmi_window_open =
2464 !(guest_intr & (GUEST_INTR_STATE_STI |
2465 GUEST_INTR_STATE_MOV_SS |
2466 GUEST_INTR_STATE_NMI));
2467 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) 2522 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
2468 vcpu->arch.nmi_window_open = 0; 2523 return 0;
2469
2470 vcpu->arch.interrupt_window_open =
2471 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2472 !(guest_intr & (GUEST_INTR_STATE_STI |
2473 GUEST_INTR_STATE_MOV_SS)));
2474}
2475
2476static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2477{
2478 int word_index = __ffs(vcpu->arch.irq_summary);
2479 int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
2480 int irq = word_index * BITS_PER_LONG + bit_index;
2481 2524
2482 clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); 2525 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
2483 if (!vcpu->arch.irq_pending[word_index]) 2526 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS |
2484 clear_bit(word_index, &vcpu->arch.irq_summary); 2527 GUEST_INTR_STATE_NMI));
2485 kvm_queue_interrupt(vcpu, irq);
2486} 2528}
2487 2529
2488static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2530static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
2489 struct kvm_run *kvm_run)
2490{ 2531{
2491 vmx_update_window_states(vcpu); 2532 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2492 2533 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
2493 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 2534 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
2494 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
2495 GUEST_INTR_STATE_STI |
2496 GUEST_INTR_STATE_MOV_SS);
2497
2498 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2499 if (vcpu->arch.interrupt.pending) {
2500 enable_nmi_window(vcpu);
2501 } else if (vcpu->arch.nmi_window_open) {
2502 vcpu->arch.nmi_pending = false;
2503 vcpu->arch.nmi_injected = true;
2504 } else {
2505 enable_nmi_window(vcpu);
2506 return;
2507 }
2508 }
2509 if (vcpu->arch.nmi_injected) {
2510 vmx_inject_nmi(vcpu);
2511 if (vcpu->arch.nmi_pending)
2512 enable_nmi_window(vcpu);
2513 else if (vcpu->arch.irq_summary
2514 || kvm_run->request_interrupt_window)
2515 enable_irq_window(vcpu);
2516 return;
2517 }
2518
2519 if (vcpu->arch.interrupt_window_open) {
2520 if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
2521 kvm_do_inject_irq(vcpu);
2522
2523 if (vcpu->arch.interrupt.pending)
2524 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
2525 }
2526 if (!vcpu->arch.interrupt_window_open &&
2527 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
2528 enable_irq_window(vcpu);
2529} 2535}
2530 2536
2531static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 2537static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2585,6 +2591,31 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2585 return 0; 2591 return 0;
2586} 2592}
2587 2593
2594/*
2595 * Trigger machine check on the host. We assume all the MSRs are already set up
2596 * by the CPU and that we still run on the same CPU as the MCE occurred on.
2597 * We pass a fake environment to the machine check handler because we want
2598 * the guest to be always treated like user space, no matter what context
2599 * it used internally.
2600 */
2601static void kvm_machine_check(void)
2602{
2603#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
2604 struct pt_regs regs = {
2605 .cs = 3, /* Fake ring 3 no matter what the guest ran on */
2606 .flags = X86_EFLAGS_IF,
2607 };
2608
2609 do_machine_check(&regs, 0);
2610#endif
2611}
2612
2613static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2614{
2615 /* already handled by vcpu_run */
2616 return 1;
2617}
2618
2588static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2619static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2589{ 2620{
2590 struct vcpu_vmx *vmx = to_vmx(vcpu); 2621 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2596,17 +2627,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2596 vect_info = vmx->idt_vectoring_info; 2627 vect_info = vmx->idt_vectoring_info;
2597 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 2628 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2598 2629
2630 if (is_machine_check(intr_info))
2631 return handle_machine_check(vcpu, kvm_run);
2632
2599 if ((vect_info & VECTORING_INFO_VALID_MASK) && 2633 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
2600 !is_page_fault(intr_info)) 2634 !is_page_fault(intr_info))
2601 printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " 2635 printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
2602 "intr info 0x%x\n", __func__, vect_info, intr_info); 2636 "intr info 0x%x\n", __func__, vect_info, intr_info);
2603 2637
2604 if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
2605 int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
2606 set_bit(irq, vcpu->arch.irq_pending);
2607 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
2608 }
2609
2610 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) 2638 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
2611 return 1; /* already handled by vmx_vcpu_run() */ 2639 return 1; /* already handled by vmx_vcpu_run() */
2612 2640
@@ -2628,17 +2656,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2628 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 2656 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
2629 if (is_page_fault(intr_info)) { 2657 if (is_page_fault(intr_info)) {
2630 /* EPT won't cause page fault directly */ 2658 /* EPT won't cause page fault directly */
2631 if (vm_need_ept()) 2659 if (enable_ept)
2632 BUG(); 2660 BUG();
2633 cr2 = vmcs_readl(EXIT_QUALIFICATION); 2661 cr2 = vmcs_readl(EXIT_QUALIFICATION);
2634 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, 2662 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
2635 (u32)((u64)cr2 >> 32), handler); 2663 (u32)((u64)cr2 >> 32), handler);
2636 if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending) 2664 if (kvm_event_needs_reinjection(vcpu))
2637 kvm_mmu_unprotect_page_virt(vcpu, cr2); 2665 kvm_mmu_unprotect_page_virt(vcpu, cr2);
2638 return kvm_mmu_page_fault(vcpu, cr2, error_code); 2666 return kvm_mmu_page_fault(vcpu, cr2, error_code);
2639 } 2667 }
2640 2668
2641 if (vcpu->arch.rmode.active && 2669 if (vcpu->arch.rmode.vm86_active &&
2642 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, 2670 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
2643 error_code)) { 2671 error_code)) {
2644 if (vcpu->arch.halt_request) { 2672 if (vcpu->arch.halt_request) {
@@ -2753,13 +2781,18 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2753 kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); 2781 kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
2754 skip_emulated_instruction(vcpu); 2782 skip_emulated_instruction(vcpu);
2755 return 1; 2783 return 1;
2756 case 8: 2784 case 8: {
2757 kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg)); 2785 u8 cr8_prev = kvm_get_cr8(vcpu);
2758 skip_emulated_instruction(vcpu); 2786 u8 cr8 = kvm_register_read(vcpu, reg);
2759 if (irqchip_in_kernel(vcpu->kvm)) 2787 kvm_set_cr8(vcpu, cr8);
2760 return 1; 2788 skip_emulated_instruction(vcpu);
2761 kvm_run->exit_reason = KVM_EXIT_SET_TPR; 2789 if (irqchip_in_kernel(vcpu->kvm))
2762 return 0; 2790 return 1;
2791 if (cr8_prev <= cr8)
2792 return 1;
2793 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
2794 return 0;
2795 }
2763 }; 2796 };
2764 break; 2797 break;
2765 case 2: /* clts */ 2798 case 2: /* clts */
@@ -2957,8 +2990,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2957 * If the user space waits to inject interrupts, exit as soon as 2990 * If the user space waits to inject interrupts, exit as soon as
2958 * possible 2991 * possible
2959 */ 2992 */
2960 if (kvm_run->request_interrupt_window && 2993 if (!irqchip_in_kernel(vcpu->kvm) &&
2961 !vcpu->arch.irq_summary) { 2994 kvm_run->request_interrupt_window &&
2995 !kvm_cpu_has_interrupt(vcpu)) {
2962 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 2996 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
2963 return 0; 2997 return 0;
2964 } 2998 }
@@ -2980,7 +3014,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2980 3014
2981static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3015static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2982{ 3016{
2983 u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 3017 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2984 3018
2985 kvm_mmu_invlpg(vcpu, exit_qualification); 3019 kvm_mmu_invlpg(vcpu, exit_qualification);
2986 skip_emulated_instruction(vcpu); 3020 skip_emulated_instruction(vcpu);
@@ -2996,11 +3030,11 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2996 3030
2997static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3031static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2998{ 3032{
2999 u64 exit_qualification; 3033 unsigned long exit_qualification;
3000 enum emulation_result er; 3034 enum emulation_result er;
3001 unsigned long offset; 3035 unsigned long offset;
3002 3036
3003 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 3037 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
3004 offset = exit_qualification & 0xffful; 3038 offset = exit_qualification & 0xffful;
3005 3039
3006 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3040 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3019,22 +3053,41 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3019 struct vcpu_vmx *vmx = to_vmx(vcpu); 3053 struct vcpu_vmx *vmx = to_vmx(vcpu);
3020 unsigned long exit_qualification; 3054 unsigned long exit_qualification;
3021 u16 tss_selector; 3055 u16 tss_selector;
3022 int reason; 3056 int reason, type, idt_v;
3057
3058 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
3059 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
3023 3060
3024 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3061 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
3025 3062
3026 reason = (u32)exit_qualification >> 30; 3063 reason = (u32)exit_qualification >> 30;
3027 if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && 3064 if (reason == TASK_SWITCH_GATE && idt_v) {
3028 (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 3065 switch (type) {
3029 (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) 3066 case INTR_TYPE_NMI_INTR:
3030 == INTR_TYPE_NMI_INTR) { 3067 vcpu->arch.nmi_injected = false;
3031 vcpu->arch.nmi_injected = false; 3068 if (cpu_has_virtual_nmis())
3032 if (cpu_has_virtual_nmis()) 3069 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3033 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3070 GUEST_INTR_STATE_NMI);
3034 GUEST_INTR_STATE_NMI); 3071 break;
3072 case INTR_TYPE_EXT_INTR:
3073 case INTR_TYPE_SOFT_INTR:
3074 kvm_clear_interrupt_queue(vcpu);
3075 break;
3076 case INTR_TYPE_HARD_EXCEPTION:
3077 case INTR_TYPE_SOFT_EXCEPTION:
3078 kvm_clear_exception_queue(vcpu);
3079 break;
3080 default:
3081 break;
3082 }
3035 } 3083 }
3036 tss_selector = exit_qualification; 3084 tss_selector = exit_qualification;
3037 3085
3086 if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
3087 type != INTR_TYPE_EXT_INTR &&
3088 type != INTR_TYPE_NMI_INTR))
3089 skip_emulated_instruction(vcpu);
3090
3038 if (!kvm_task_switch(vcpu, tss_selector, reason)) 3091 if (!kvm_task_switch(vcpu, tss_selector, reason))
3039 return 0; 3092 return 0;
3040 3093
@@ -3051,11 +3104,11 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3051 3104
3052static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3105static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3053{ 3106{
3054 u64 exit_qualification; 3107 unsigned long exit_qualification;
3055 gpa_t gpa; 3108 gpa_t gpa;
3056 int gla_validity; 3109 int gla_validity;
3057 3110
3058 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 3111 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
3059 3112
3060 if (exit_qualification & (1 << 6)) { 3113 if (exit_qualification & (1 << 6)) {
3061 printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); 3114 printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
@@ -3067,7 +3120,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3067 printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); 3120 printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
3068 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", 3121 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
3069 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), 3122 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
3070 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); 3123 vmcs_readl(GUEST_LINEAR_ADDRESS));
3071 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", 3124 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
3072 (long unsigned int)exit_qualification); 3125 (long unsigned int)exit_qualification);
3073 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3126 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -3150,6 +3203,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
3150 [EXIT_REASON_WBINVD] = handle_wbinvd, 3203 [EXIT_REASON_WBINVD] = handle_wbinvd,
3151 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 3204 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
3152 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 3205 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
3206 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
3153}; 3207};
3154 3208
3155static const int kvm_vmx_max_exit_handlers = 3209static const int kvm_vmx_max_exit_handlers =
@@ -3159,10 +3213,10 @@ static const int kvm_vmx_max_exit_handlers =
3159 * The guest has exited. See if we can fix it or if we need userspace 3213 * The guest has exited. See if we can fix it or if we need userspace
3160 * assistance. 3214 * assistance.
3161 */ 3215 */
3162static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 3216static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3163{ 3217{
3164 u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
3165 struct vcpu_vmx *vmx = to_vmx(vcpu); 3218 struct vcpu_vmx *vmx = to_vmx(vcpu);
3219 u32 exit_reason = vmx->exit_reason;
3166 u32 vectoring_info = vmx->idt_vectoring_info; 3220 u32 vectoring_info = vmx->idt_vectoring_info;
3167 3221
3168 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), 3222 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
@@ -3178,7 +3232,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3178 3232
3179 /* Access CR3 don't cause VMExit in paging mode, so we need 3233 /* Access CR3 don't cause VMExit in paging mode, so we need
3180 * to sync with guest real CR3. */ 3234 * to sync with guest real CR3. */
3181 if (vm_need_ept() && is_paging(vcpu)) { 3235 if (enable_ept && is_paging(vcpu)) {
3182 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 3236 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
3183 ept_load_pdptrs(vcpu); 3237 ept_load_pdptrs(vcpu);
3184 } 3238 }
@@ -3199,9 +3253,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3199 __func__, vectoring_info, exit_reason); 3253 __func__, vectoring_info, exit_reason);
3200 3254
3201 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { 3255 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
3202 if (vcpu->arch.interrupt_window_open) { 3256 if (vmx_interrupt_allowed(vcpu)) {
3203 vmx->soft_vnmi_blocked = 0; 3257 vmx->soft_vnmi_blocked = 0;
3204 vcpu->arch.nmi_window_open = 1;
3205 } else if (vmx->vnmi_blocked_time > 1000000000LL && 3258 } else if (vmx->vnmi_blocked_time > 1000000000LL &&
3206 vcpu->arch.nmi_pending) { 3259 vcpu->arch.nmi_pending) {
3207 /* 3260 /*
@@ -3214,7 +3267,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3214 "state on VCPU %d after 1 s timeout\n", 3267 "state on VCPU %d after 1 s timeout\n",
3215 __func__, vcpu->vcpu_id); 3268 __func__, vcpu->vcpu_id);
3216 vmx->soft_vnmi_blocked = 0; 3269 vmx->soft_vnmi_blocked = 0;
3217 vmx->vcpu.arch.nmi_window_open = 1;
3218 } 3270 }
3219 } 3271 }
3220 3272
@@ -3228,122 +3280,107 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3228 return 0; 3280 return 0;
3229} 3281}
3230 3282
3231static void update_tpr_threshold(struct kvm_vcpu *vcpu) 3283static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3232{ 3284{
3233 int max_irr, tpr; 3285 if (irr == -1 || tpr < irr) {
3234
3235 if (!vm_need_tpr_shadow(vcpu->kvm))
3236 return;
3237
3238 if (!kvm_lapic_enabled(vcpu) ||
3239 ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) {
3240 vmcs_write32(TPR_THRESHOLD, 0); 3286 vmcs_write32(TPR_THRESHOLD, 0);
3241 return; 3287 return;
3242 } 3288 }
3243 3289
3244 tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4; 3290 vmcs_write32(TPR_THRESHOLD, irr);
3245 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
3246} 3291}
3247 3292
3248static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 3293static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3249{ 3294{
3250 u32 exit_intr_info; 3295 u32 exit_intr_info;
3251 u32 idt_vectoring_info; 3296 u32 idt_vectoring_info = vmx->idt_vectoring_info;
3252 bool unblock_nmi; 3297 bool unblock_nmi;
3253 u8 vector; 3298 u8 vector;
3254 int type; 3299 int type;
3255 bool idtv_info_valid; 3300 bool idtv_info_valid;
3256 u32 error;
3257 3301
3258 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3302 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3303
3304 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
3305
3306 /* Handle machine checks before interrupts are enabled */
3307 if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
3308 || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
3309 && is_machine_check(exit_intr_info)))
3310 kvm_machine_check();
3311
3312 /* We need to handle NMIs before interrupts are enabled */
3313 if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3314 (exit_intr_info & INTR_INFO_VALID_MASK)) {
3315 KVMTRACE_0D(NMI, &vmx->vcpu, handler);
3316 asm("int $2");
3317 }
3318
3319 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3320
3259 if (cpu_has_virtual_nmis()) { 3321 if (cpu_has_virtual_nmis()) {
3260 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 3322 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
3261 vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 3323 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
3262 /* 3324 /*
3263 * SDM 3: 25.7.1.2 3325 * SDM 3: 27.7.1.2 (September 2008)
3264 * Re-set bit "block by NMI" before VM entry if vmexit caused by 3326 * Re-set bit "block by NMI" before VM entry if vmexit caused by
3265 * a guest IRET fault. 3327 * a guest IRET fault.
3328 * SDM 3: 23.2.2 (September 2008)
3329 * Bit 12 is undefined in any of the following cases:
3330 * If the VM exit sets the valid bit in the IDT-vectoring
3331 * information field.
3332 * If the VM exit is due to a double fault.
3266 */ 3333 */
3267 if (unblock_nmi && vector != DF_VECTOR) 3334 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
3335 vector != DF_VECTOR && !idtv_info_valid)
3268 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3336 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3269 GUEST_INTR_STATE_NMI); 3337 GUEST_INTR_STATE_NMI);
3270 } else if (unlikely(vmx->soft_vnmi_blocked)) 3338 } else if (unlikely(vmx->soft_vnmi_blocked))
3271 vmx->vnmi_blocked_time += 3339 vmx->vnmi_blocked_time +=
3272 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); 3340 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
3273 3341
3274 idt_vectoring_info = vmx->idt_vectoring_info; 3342 vmx->vcpu.arch.nmi_injected = false;
3275 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3343 kvm_clear_exception_queue(&vmx->vcpu);
3344 kvm_clear_interrupt_queue(&vmx->vcpu);
3345
3346 if (!idtv_info_valid)
3347 return;
3348
3276 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; 3349 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
3277 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; 3350 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
3278 if (vmx->vcpu.arch.nmi_injected) { 3351
3352 switch (type) {
3353 case INTR_TYPE_NMI_INTR:
3354 vmx->vcpu.arch.nmi_injected = true;
3279 /* 3355 /*
3280 * SDM 3: 25.7.1.2 3356 * SDM 3: 27.7.1.2 (September 2008)
3281 * Clear bit "block by NMI" before VM entry if a NMI delivery 3357 * Clear bit "block by NMI" before VM entry if a NMI
3282 * faulted. 3358 * delivery faulted.
3283 */ 3359 */
3284 if (idtv_info_valid && type == INTR_TYPE_NMI_INTR) 3360 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
3285 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 3361 GUEST_INTR_STATE_NMI);
3286 GUEST_INTR_STATE_NMI); 3362 break;
3287 else 3363 case INTR_TYPE_SOFT_EXCEPTION:
3288 vmx->vcpu.arch.nmi_injected = false; 3364 vmx->vcpu.arch.event_exit_inst_len =
3289 } 3365 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
3290 kvm_clear_exception_queue(&vmx->vcpu); 3366 /* fall through */
3291 if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || 3367 case INTR_TYPE_HARD_EXCEPTION:
3292 type == INTR_TYPE_SOFT_EXCEPTION)) {
3293 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 3368 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
3294 error = vmcs_read32(IDT_VECTORING_ERROR_CODE); 3369 u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
3295 kvm_queue_exception_e(&vmx->vcpu, vector, error); 3370 kvm_queue_exception_e(&vmx->vcpu, vector, err);
3296 } else 3371 } else
3297 kvm_queue_exception(&vmx->vcpu, vector); 3372 kvm_queue_exception(&vmx->vcpu, vector);
3298 vmx->idt_vectoring_info = 0; 3373 break;
3299 } 3374 case INTR_TYPE_SOFT_INTR:
3300 kvm_clear_interrupt_queue(&vmx->vcpu); 3375 vmx->vcpu.arch.event_exit_inst_len =
3301 if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) { 3376 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
3302 kvm_queue_interrupt(&vmx->vcpu, vector); 3377 /* fall through */
3303 vmx->idt_vectoring_info = 0; 3378 case INTR_TYPE_EXT_INTR:
3304 } 3379 kvm_queue_interrupt(&vmx->vcpu, vector,
3305} 3380 type == INTR_TYPE_SOFT_INTR);
3306 3381 break;
3307static void vmx_intr_assist(struct kvm_vcpu *vcpu) 3382 default:
3308{ 3383 break;
3309 update_tpr_threshold(vcpu);
3310
3311 vmx_update_window_states(vcpu);
3312
3313 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3314 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
3315 GUEST_INTR_STATE_STI |
3316 GUEST_INTR_STATE_MOV_SS);
3317
3318 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3319 if (vcpu->arch.interrupt.pending) {
3320 enable_nmi_window(vcpu);
3321 } else if (vcpu->arch.nmi_window_open) {
3322 vcpu->arch.nmi_pending = false;
3323 vcpu->arch.nmi_injected = true;
3324 } else {
3325 enable_nmi_window(vcpu);
3326 return;
3327 }
3328 }
3329 if (vcpu->arch.nmi_injected) {
3330 vmx_inject_nmi(vcpu);
3331 if (vcpu->arch.nmi_pending)
3332 enable_nmi_window(vcpu);
3333 else if (kvm_cpu_has_interrupt(vcpu))
3334 enable_irq_window(vcpu);
3335 return;
3336 }
3337 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
3338 if (vcpu->arch.interrupt_window_open)
3339 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
3340 else
3341 enable_irq_window(vcpu);
3342 }
3343 if (vcpu->arch.interrupt.pending) {
3344 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3345 if (kvm_cpu_has_interrupt(vcpu))
3346 enable_irq_window(vcpu);
3347 } 3384 }
3348} 3385}
3349 3386
@@ -3381,7 +3418,6 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
3381static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3418static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3382{ 3419{
3383 struct vcpu_vmx *vmx = to_vmx(vcpu); 3420 struct vcpu_vmx *vmx = to_vmx(vcpu);
3384 u32 intr_info;
3385 3421
3386 /* Record the guest's net vcpu time for enforced NMI injections. */ 3422 /* Record the guest's net vcpu time for enforced NMI injections. */
3387 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) 3423 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
@@ -3505,20 +3541,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3505 if (vmx->rmode.irq.pending) 3541 if (vmx->rmode.irq.pending)
3506 fixup_rmode_irq(vmx); 3542 fixup_rmode_irq(vmx);
3507 3543
3508 vmx_update_window_states(vcpu);
3509
3510 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3544 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
3511 vmx->launched = 1; 3545 vmx->launched = 1;
3512 3546
3513 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3514
3515 /* We need to handle NMIs before interrupts are enabled */
3516 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3517 (intr_info & INTR_INFO_VALID_MASK)) {
3518 KVMTRACE_0D(NMI, vcpu, handler);
3519 asm("int $2");
3520 }
3521
3522 vmx_complete_interrupts(vmx); 3547 vmx_complete_interrupts(vmx);
3523} 3548}
3524 3549
@@ -3593,7 +3618,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
3593 if (alloc_apic_access_page(kvm) != 0) 3618 if (alloc_apic_access_page(kvm) != 0)
3594 goto free_vmcs; 3619 goto free_vmcs;
3595 3620
3596 if (vm_need_ept()) 3621 if (enable_ept)
3597 if (alloc_identity_pagetable(kvm) != 0) 3622 if (alloc_identity_pagetable(kvm) != 0)
3598 goto free_vmcs; 3623 goto free_vmcs;
3599 3624
@@ -3631,9 +3656,32 @@ static int get_ept_level(void)
3631 return VMX_EPT_DEFAULT_GAW + 1; 3656 return VMX_EPT_DEFAULT_GAW + 1;
3632} 3657}
3633 3658
3634static int vmx_get_mt_mask_shift(void) 3659static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3635{ 3660{
3636 return VMX_EPT_MT_EPTE_SHIFT; 3661 u64 ret;
3662
3663 /* For VT-d and EPT combination
3664 * 1. MMIO: always map as UC
3665 * 2. EPT with VT-d:
3666 * a. VT-d without snooping control feature: can't guarantee the
3667 * result, try to trust guest.
3668 * b. VT-d with snooping control feature: snooping control feature of
3669 * VT-d engine can guarantee the cache correctness. Just set it
3670 * to WB to keep consistent with host. So the same as item 3.
3671 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
3672 * consistent with host MTRR
3673 */
3674 if (is_mmio)
3675 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
3676 else if (vcpu->kvm->arch.iommu_domain &&
3677 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
3678 ret = kvm_get_guest_memory_type(vcpu, gfn) <<
3679 VMX_EPT_MT_EPTE_SHIFT;
3680 else
3681 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
3682 | VMX_EPT_IGMT_BIT;
3683
3684 return ret;
3637} 3685}
3638 3686
3639static struct kvm_x86_ops vmx_x86_ops = { 3687static struct kvm_x86_ops vmx_x86_ops = {
@@ -3644,7 +3692,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3644 .check_processor_compatibility = vmx_check_processor_compat, 3692 .check_processor_compatibility = vmx_check_processor_compat,
3645 .hardware_enable = hardware_enable, 3693 .hardware_enable = hardware_enable,
3646 .hardware_disable = hardware_disable, 3694 .hardware_disable = hardware_disable,
3647 .cpu_has_accelerated_tpr = cpu_has_vmx_virtualize_apic_accesses, 3695 .cpu_has_accelerated_tpr = report_flexpriority,
3648 3696
3649 .vcpu_create = vmx_create_vcpu, 3697 .vcpu_create = vmx_create_vcpu,
3650 .vcpu_free = vmx_free_vcpu, 3698 .vcpu_free = vmx_free_vcpu,
@@ -3678,78 +3726,82 @@ static struct kvm_x86_ops vmx_x86_ops = {
3678 .tlb_flush = vmx_flush_tlb, 3726 .tlb_flush = vmx_flush_tlb,
3679 3727
3680 .run = vmx_vcpu_run, 3728 .run = vmx_vcpu_run,
3681 .handle_exit = kvm_handle_exit, 3729 .handle_exit = vmx_handle_exit,
3682 .skip_emulated_instruction = skip_emulated_instruction, 3730 .skip_emulated_instruction = skip_emulated_instruction,
3731 .set_interrupt_shadow = vmx_set_interrupt_shadow,
3732 .get_interrupt_shadow = vmx_get_interrupt_shadow,
3683 .patch_hypercall = vmx_patch_hypercall, 3733 .patch_hypercall = vmx_patch_hypercall,
3684 .get_irq = vmx_get_irq,
3685 .set_irq = vmx_inject_irq, 3734 .set_irq = vmx_inject_irq,
3735 .set_nmi = vmx_inject_nmi,
3686 .queue_exception = vmx_queue_exception, 3736 .queue_exception = vmx_queue_exception,
3687 .exception_injected = vmx_exception_injected, 3737 .interrupt_allowed = vmx_interrupt_allowed,
3688 .inject_pending_irq = vmx_intr_assist, 3738 .nmi_allowed = vmx_nmi_allowed,
3689 .inject_pending_vectors = do_interrupt_requests, 3739 .enable_nmi_window = enable_nmi_window,
3740 .enable_irq_window = enable_irq_window,
3741 .update_cr8_intercept = update_cr8_intercept,
3690 3742
3691 .set_tss_addr = vmx_set_tss_addr, 3743 .set_tss_addr = vmx_set_tss_addr,
3692 .get_tdp_level = get_ept_level, 3744 .get_tdp_level = get_ept_level,
3693 .get_mt_mask_shift = vmx_get_mt_mask_shift, 3745 .get_mt_mask = vmx_get_mt_mask,
3694}; 3746};
3695 3747
3696static int __init vmx_init(void) 3748static int __init vmx_init(void)
3697{ 3749{
3698 void *va;
3699 int r; 3750 int r;
3700 3751
3701 vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 3752 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
3702 if (!vmx_io_bitmap_a) 3753 if (!vmx_io_bitmap_a)
3703 return -ENOMEM; 3754 return -ENOMEM;
3704 3755
3705 vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 3756 vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
3706 if (!vmx_io_bitmap_b) { 3757 if (!vmx_io_bitmap_b) {
3707 r = -ENOMEM; 3758 r = -ENOMEM;
3708 goto out; 3759 goto out;
3709 } 3760 }
3710 3761
3711 vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 3762 vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
3712 if (!vmx_msr_bitmap) { 3763 if (!vmx_msr_bitmap_legacy) {
3713 r = -ENOMEM; 3764 r = -ENOMEM;
3714 goto out1; 3765 goto out1;
3715 } 3766 }
3716 3767
3768 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
3769 if (!vmx_msr_bitmap_longmode) {
3770 r = -ENOMEM;
3771 goto out2;
3772 }
3773
3717 /* 3774 /*
3718 * Allow direct access to the PC debug port (it is often used for I/O 3775 * Allow direct access to the PC debug port (it is often used for I/O
3719 * delays, but the vmexits simply slow things down). 3776 * delays, but the vmexits simply slow things down).
3720 */ 3777 */
3721 va = kmap(vmx_io_bitmap_a); 3778 memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
3722 memset(va, 0xff, PAGE_SIZE); 3779 clear_bit(0x80, vmx_io_bitmap_a);
3723 clear_bit(0x80, va);
3724 kunmap(vmx_io_bitmap_a);
3725 3780
3726 va = kmap(vmx_io_bitmap_b); 3781 memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
3727 memset(va, 0xff, PAGE_SIZE);
3728 kunmap(vmx_io_bitmap_b);
3729 3782
3730 va = kmap(vmx_msr_bitmap); 3783 memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
3731 memset(va, 0xff, PAGE_SIZE); 3784 memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
3732 kunmap(vmx_msr_bitmap);
3733 3785
3734 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ 3786 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
3735 3787
3736 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); 3788 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
3737 if (r) 3789 if (r)
3738 goto out2; 3790 goto out3;
3739 3791
3740 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE); 3792 vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
3741 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE); 3793 vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
3742 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS); 3794 vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
3743 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); 3795 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
3744 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); 3796 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
3797 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
3745 3798
3746 if (vm_need_ept()) { 3799 if (enable_ept) {
3747 bypass_guest_pf = 0; 3800 bypass_guest_pf = 0;
3748 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3801 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3749 VMX_EPT_WRITABLE_MASK); 3802 VMX_EPT_WRITABLE_MASK);
3750 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3803 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3751 VMX_EPT_EXECUTABLE_MASK, 3804 VMX_EPT_EXECUTABLE_MASK);
3752 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3753 kvm_enable_tdp(); 3805 kvm_enable_tdp();
3754 } else 3806 } else
3755 kvm_disable_tdp(); 3807 kvm_disable_tdp();
@@ -3761,20 +3813,23 @@ static int __init vmx_init(void)
3761 3813
3762 return 0; 3814 return 0;
3763 3815
3816out3:
3817 free_page((unsigned long)vmx_msr_bitmap_longmode);
3764out2: 3818out2:
3765 __free_page(vmx_msr_bitmap); 3819 free_page((unsigned long)vmx_msr_bitmap_legacy);
3766out1: 3820out1:
3767 __free_page(vmx_io_bitmap_b); 3821 free_page((unsigned long)vmx_io_bitmap_b);
3768out: 3822out:
3769 __free_page(vmx_io_bitmap_a); 3823 free_page((unsigned long)vmx_io_bitmap_a);
3770 return r; 3824 return r;
3771} 3825}
3772 3826
3773static void __exit vmx_exit(void) 3827static void __exit vmx_exit(void)
3774{ 3828{
3775 __free_page(vmx_msr_bitmap); 3829 free_page((unsigned long)vmx_msr_bitmap_legacy);
3776 __free_page(vmx_io_bitmap_b); 3830 free_page((unsigned long)vmx_msr_bitmap_longmode);
3777 __free_page(vmx_io_bitmap_a); 3831 free_page((unsigned long)vmx_io_bitmap_b);
3832 free_page((unsigned long)vmx_io_bitmap_a);
3778 3833
3779 kvm_exit(); 3834 kvm_exit();
3780} 3835}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3944e917e794..249540f98513 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -91,7 +91,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
91 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 91 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
92 { "hypercalls", VCPU_STAT(hypercalls) }, 92 { "hypercalls", VCPU_STAT(hypercalls) },
93 { "request_irq", VCPU_STAT(request_irq_exits) }, 93 { "request_irq", VCPU_STAT(request_irq_exits) },
94 { "request_nmi", VCPU_STAT(request_nmi_exits) },
95 { "irq_exits", VCPU_STAT(irq_exits) }, 94 { "irq_exits", VCPU_STAT(irq_exits) },
96 { "host_state_reload", VCPU_STAT(host_state_reload) }, 95 { "host_state_reload", VCPU_STAT(host_state_reload) },
97 { "efer_reload", VCPU_STAT(efer_reload) }, 96 { "efer_reload", VCPU_STAT(efer_reload) },
@@ -108,7 +107,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
108 { "mmu_recycled", VM_STAT(mmu_recycled) }, 107 { "mmu_recycled", VM_STAT(mmu_recycled) },
109 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 108 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
110 { "mmu_unsync", VM_STAT(mmu_unsync) }, 109 { "mmu_unsync", VM_STAT(mmu_unsync) },
111 { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
112 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 110 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
113 { "largepages", VM_STAT(lpages) }, 111 { "largepages", VM_STAT(lpages) },
114 { NULL } 112 { NULL }
@@ -234,7 +232,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
234 goto out; 232 goto out;
235 } 233 }
236 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { 234 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
237 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { 235 if (is_present_pte(pdpte[i]) &&
236 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
238 ret = 0; 237 ret = 0;
239 goto out; 238 goto out;
240 } 239 }
@@ -321,7 +320,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
321 kvm_x86_ops->set_cr0(vcpu, cr0); 320 kvm_x86_ops->set_cr0(vcpu, cr0);
322 vcpu->arch.cr0 = cr0; 321 vcpu->arch.cr0 = cr0;
323 322
324 kvm_mmu_sync_global(vcpu);
325 kvm_mmu_reset_context(vcpu); 323 kvm_mmu_reset_context(vcpu);
326 return; 324 return;
327} 325}
@@ -370,7 +368,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
370 kvm_x86_ops->set_cr4(vcpu, cr4); 368 kvm_x86_ops->set_cr4(vcpu, cr4);
371 vcpu->arch.cr4 = cr4; 369 vcpu->arch.cr4 = cr4;
372 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; 370 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
373 kvm_mmu_sync_global(vcpu);
374 kvm_mmu_reset_context(vcpu); 371 kvm_mmu_reset_context(vcpu);
375} 372}
376EXPORT_SYMBOL_GPL(kvm_set_cr4); 373EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -523,6 +520,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
523 efer |= vcpu->arch.shadow_efer & EFER_LMA; 520 efer |= vcpu->arch.shadow_efer & EFER_LMA;
524 521
525 vcpu->arch.shadow_efer = efer; 522 vcpu->arch.shadow_efer = efer;
523
524 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
525 kvm_mmu_reset_context(vcpu);
526} 526}
527 527
528void kvm_enable_efer_bits(u64 mask) 528void kvm_enable_efer_bits(u64 mask)
@@ -630,14 +630,17 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
630 unsigned long flags; 630 unsigned long flags;
631 struct kvm_vcpu_arch *vcpu = &v->arch; 631 struct kvm_vcpu_arch *vcpu = &v->arch;
632 void *shared_kaddr; 632 void *shared_kaddr;
633 unsigned long this_tsc_khz;
633 634
634 if ((!vcpu->time_page)) 635 if ((!vcpu->time_page))
635 return; 636 return;
636 637
637 if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { 638 this_tsc_khz = get_cpu_var(cpu_tsc_khz);
638 kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); 639 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
639 vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); 640 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
641 vcpu->hv_clock_tsc_khz = this_tsc_khz;
640 } 642 }
643 put_cpu_var(cpu_tsc_khz);
641 644
642 /* Keep irq disabled to prevent changes to the clock */ 645 /* Keep irq disabled to prevent changes to the clock */
643 local_irq_save(flags); 646 local_irq_save(flags);
@@ -893,6 +896,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
893 case MSR_IA32_LASTINTFROMIP: 896 case MSR_IA32_LASTINTFROMIP:
894 case MSR_IA32_LASTINTTOIP: 897 case MSR_IA32_LASTINTTOIP:
895 case MSR_VM_HSAVE_PA: 898 case MSR_VM_HSAVE_PA:
899 case MSR_P6_EVNTSEL0:
900 case MSR_P6_EVNTSEL1:
896 data = 0; 901 data = 0;
897 break; 902 break;
898 case MSR_MTRRcap: 903 case MSR_MTRRcap:
@@ -1024,6 +1029,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1024 case KVM_CAP_SYNC_MMU: 1029 case KVM_CAP_SYNC_MMU:
1025 case KVM_CAP_REINJECT_CONTROL: 1030 case KVM_CAP_REINJECT_CONTROL:
1026 case KVM_CAP_IRQ_INJECT_STATUS: 1031 case KVM_CAP_IRQ_INJECT_STATUS:
1032 case KVM_CAP_ASSIGN_DEV_IRQ:
1027 r = 1; 1033 r = 1;
1028 break; 1034 break;
1029 case KVM_CAP_COALESCED_MMIO: 1035 case KVM_CAP_COALESCED_MMIO:
@@ -1241,41 +1247,53 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1241 entry->flags = 0; 1247 entry->flags = 0;
1242} 1248}
1243 1249
1250#define F(x) bit(X86_FEATURE_##x)
1251
1244static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 1252static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1245 u32 index, int *nent, int maxnent) 1253 u32 index, int *nent, int maxnent)
1246{ 1254{
1247 const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) | 1255 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
1248 bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
1249 bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
1250 bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
1251 bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
1252 bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
1253 bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
1254 bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
1255 bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
1256 bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
1257 const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
1258 bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
1259 bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
1260 bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
1261 bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
1262 bit(X86_FEATURE_PGE) |
1263 bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
1264 bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
1265 bit(X86_FEATURE_SYSCALL) |
1266 (is_efer_nx() ? bit(X86_FEATURE_NX) : 0) |
1267#ifdef CONFIG_X86_64 1256#ifdef CONFIG_X86_64
1268 bit(X86_FEATURE_LM) | 1257 unsigned f_lm = F(LM);
1258#else
1259 unsigned f_lm = 0;
1269#endif 1260#endif
1270 bit(X86_FEATURE_FXSR_OPT) | 1261
1271 bit(X86_FEATURE_MMXEXT) | 1262 /* cpuid 1.edx */
1272 bit(X86_FEATURE_3DNOWEXT) | 1263 const u32 kvm_supported_word0_x86_features =
1273 bit(X86_FEATURE_3DNOW); 1264 F(FPU) | F(VME) | F(DE) | F(PSE) |
1274 const u32 kvm_supported_word3_x86_features = 1265 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
1275 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); 1266 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
1267 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1268 F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
1269 0 /* Reserved, DS, ACPI */ | F(MMX) |
1270 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
1271 0 /* HTT, TM, Reserved, PBE */;
1272 /* cpuid 0x80000001.edx */
1273 const u32 kvm_supported_word1_x86_features =
1274 F(FPU) | F(VME) | F(DE) | F(PSE) |
1275 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
1276 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
1277 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1278 F(PAT) | F(PSE36) | 0 /* Reserved */ |
1279 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
1280 F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ |
1281 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1282 /* cpuid 1.ecx */
1283 const u32 kvm_supported_word4_x86_features =
1284 F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
1285 0 /* DS-CPL, VMX, SMX, EST */ |
1286 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
1287 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
1288 0 /* Reserved, DCA */ | F(XMM4_1) |
1289 F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) |
1290 0 /* Reserved, XSAVE, OSXSAVE */;
1291 /* cpuid 0x80000001.ecx */
1276 const u32 kvm_supported_word6_x86_features = 1292 const u32 kvm_supported_word6_x86_features =
1277 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | 1293 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
1278 bit(X86_FEATURE_SVM); 1294 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
1295 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
1296 0 /* SKINIT */ | 0 /* WDT */;
1279 1297
1280 /* all calls to cpuid_count() should be made on the same cpu */ 1298 /* all calls to cpuid_count() should be made on the same cpu */
1281 get_cpu(); 1299 get_cpu();
@@ -1288,7 +1306,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1288 break; 1306 break;
1289 case 1: 1307 case 1:
1290 entry->edx &= kvm_supported_word0_x86_features; 1308 entry->edx &= kvm_supported_word0_x86_features;
1291 entry->ecx &= kvm_supported_word3_x86_features; 1309 entry->ecx &= kvm_supported_word4_x86_features;
1292 break; 1310 break;
1293 /* function 2 entries are STATEFUL. That is, repeated cpuid commands 1311 /* function 2 entries are STATEFUL. That is, repeated cpuid commands
1294 * may return different values. This forces us to get_cpu() before 1312 * may return different values. This forces us to get_cpu() before
@@ -1350,6 +1368,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1350 put_cpu(); 1368 put_cpu();
1351} 1369}
1352 1370
1371#undef F
1372
1353static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 1373static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1354 struct kvm_cpuid_entry2 __user *entries) 1374 struct kvm_cpuid_entry2 __user *entries)
1355{ 1375{
@@ -1421,8 +1441,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1421 return -ENXIO; 1441 return -ENXIO;
1422 vcpu_load(vcpu); 1442 vcpu_load(vcpu);
1423 1443
1424 set_bit(irq->irq, vcpu->arch.irq_pending); 1444 kvm_queue_interrupt(vcpu, irq->irq, false);
1425 set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
1426 1445
1427 vcpu_put(vcpu); 1446 vcpu_put(vcpu);
1428 1447
@@ -1584,8 +1603,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1584 r = -EINVAL; 1603 r = -EINVAL;
1585 } 1604 }
1586out: 1605out:
1587 if (lapic) 1606 kfree(lapic);
1588 kfree(lapic);
1589 return r; 1607 return r;
1590} 1608}
1591 1609
@@ -1606,10 +1624,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
1606 return -EINVAL; 1624 return -EINVAL;
1607 1625
1608 down_write(&kvm->slots_lock); 1626 down_write(&kvm->slots_lock);
1627 spin_lock(&kvm->mmu_lock);
1609 1628
1610 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 1629 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
1611 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 1630 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
1612 1631
1632 spin_unlock(&kvm->mmu_lock);
1613 up_write(&kvm->slots_lock); 1633 up_write(&kvm->slots_lock);
1614 return 0; 1634 return 0;
1615} 1635}
@@ -1785,7 +1805,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1785 1805
1786 /* If nothing is dirty, don't bother messing with page tables. */ 1806 /* If nothing is dirty, don't bother messing with page tables. */
1787 if (is_dirty) { 1807 if (is_dirty) {
1808 spin_lock(&kvm->mmu_lock);
1788 kvm_mmu_slot_remove_write_access(kvm, log->slot); 1809 kvm_mmu_slot_remove_write_access(kvm, log->slot);
1810 spin_unlock(&kvm->mmu_lock);
1789 kvm_flush_remote_tlbs(kvm); 1811 kvm_flush_remote_tlbs(kvm);
1790 memslot = &kvm->memslots[log->slot]; 1812 memslot = &kvm->memslots[log->slot];
1791 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1813 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@@ -2360,7 +2382,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2360 u16 error_code, 2382 u16 error_code,
2361 int emulation_type) 2383 int emulation_type)
2362{ 2384{
2363 int r; 2385 int r, shadow_mask;
2364 struct decode_cache *c; 2386 struct decode_cache *c;
2365 2387
2366 kvm_clear_exception_queue(vcpu); 2388 kvm_clear_exception_queue(vcpu);
@@ -2408,7 +2430,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2408 } 2430 }
2409 } 2431 }
2410 2432
2433 if (emulation_type & EMULTYPE_SKIP) {
2434 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
2435 return EMULATE_DONE;
2436 }
2437
2411 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 2438 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
2439 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
2440
2441 if (r == 0)
2442 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
2412 2443
2413 if (vcpu->arch.pio.string) 2444 if (vcpu->arch.pio.string)
2414 return EMULATE_DO_MMIO; 2445 return EMULATE_DO_MMIO;
@@ -2761,7 +2792,7 @@ int kvm_arch_init(void *opaque)
2761 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 2792 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
2762 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2793 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2763 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2794 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2764 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); 2795 PT_DIRTY_MASK, PT64_NX_MASK, 0);
2765 2796
2766 for_each_possible_cpu(cpu) 2797 for_each_possible_cpu(cpu)
2767 per_cpu(cpu_tsc_khz, cpu) = tsc_khz; 2798 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
@@ -3012,6 +3043,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
3012 return best; 3043 return best;
3013} 3044}
3014 3045
3046int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
3047{
3048 struct kvm_cpuid_entry2 *best;
3049
3050 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
3051 if (best)
3052 return best->eax & 0xff;
3053 return 36;
3054}
3055
3015void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 3056void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
3016{ 3057{
3017 u32 function, index; 3058 u32 function, index;
@@ -3048,10 +3089,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
3048static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, 3089static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
3049 struct kvm_run *kvm_run) 3090 struct kvm_run *kvm_run)
3050{ 3091{
3051 return (!vcpu->arch.irq_summary && 3092 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
3052 kvm_run->request_interrupt_window && 3093 kvm_run->request_interrupt_window &&
3053 vcpu->arch.interrupt_window_open && 3094 kvm_arch_interrupt_allowed(vcpu));
3054 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
3055} 3095}
3056 3096
3057static void post_kvm_run_save(struct kvm_vcpu *vcpu, 3097static void post_kvm_run_save(struct kvm_vcpu *vcpu,
@@ -3064,8 +3104,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
3064 kvm_run->ready_for_interrupt_injection = 1; 3104 kvm_run->ready_for_interrupt_injection = 1;
3065 else 3105 else
3066 kvm_run->ready_for_interrupt_injection = 3106 kvm_run->ready_for_interrupt_injection =
3067 (vcpu->arch.interrupt_window_open && 3107 kvm_arch_interrupt_allowed(vcpu) &&
3068 vcpu->arch.irq_summary == 0); 3108 !kvm_cpu_has_interrupt(vcpu) &&
3109 !kvm_event_needs_reinjection(vcpu);
3069} 3110}
3070 3111
3071static void vapic_enter(struct kvm_vcpu *vcpu) 3112static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -3094,9 +3135,63 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
3094 up_read(&vcpu->kvm->slots_lock); 3135 up_read(&vcpu->kvm->slots_lock);
3095} 3136}
3096 3137
3138static void update_cr8_intercept(struct kvm_vcpu *vcpu)
3139{
3140 int max_irr, tpr;
3141
3142 if (!kvm_x86_ops->update_cr8_intercept)
3143 return;
3144
3145 if (!vcpu->arch.apic->vapic_addr)
3146 max_irr = kvm_lapic_find_highest_irr(vcpu);
3147 else
3148 max_irr = -1;
3149
3150 if (max_irr != -1)
3151 max_irr >>= 4;
3152
3153 tpr = kvm_lapic_get_cr8(vcpu);
3154
3155 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
3156}
3157
3158static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3159{
3160 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3161 kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
3162
3163 /* try to reinject previous events if any */
3164 if (vcpu->arch.nmi_injected) {
3165 kvm_x86_ops->set_nmi(vcpu);
3166 return;
3167 }
3168
3169 if (vcpu->arch.interrupt.pending) {
3170 kvm_x86_ops->set_irq(vcpu);
3171 return;
3172 }
3173
3174 /* try to inject new event if pending */
3175 if (vcpu->arch.nmi_pending) {
3176 if (kvm_x86_ops->nmi_allowed(vcpu)) {
3177 vcpu->arch.nmi_pending = false;
3178 vcpu->arch.nmi_injected = true;
3179 kvm_x86_ops->set_nmi(vcpu);
3180 }
3181 } else if (kvm_cpu_has_interrupt(vcpu)) {
3182 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
3183 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
3184 false);
3185 kvm_x86_ops->set_irq(vcpu);
3186 }
3187 }
3188}
3189
3097static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3190static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3098{ 3191{
3099 int r; 3192 int r;
3193 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
3194 kvm_run->request_interrupt_window;
3100 3195
3101 if (vcpu->requests) 3196 if (vcpu->requests)
3102 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 3197 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3128,9 +3223,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3128 } 3223 }
3129 } 3224 }
3130 3225
3131 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
3132 kvm_inject_pending_timer_irqs(vcpu);
3133
3134 preempt_disable(); 3226 preempt_disable();
3135 3227
3136 kvm_x86_ops->prepare_guest_switch(vcpu); 3228 kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -3138,6 +3230,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3138 3230
3139 local_irq_disable(); 3231 local_irq_disable();
3140 3232
3233 clear_bit(KVM_REQ_KICK, &vcpu->requests);
3234 smp_mb__after_clear_bit();
3235
3141 if (vcpu->requests || need_resched() || signal_pending(current)) { 3236 if (vcpu->requests || need_resched() || signal_pending(current)) {
3142 local_irq_enable(); 3237 local_irq_enable();
3143 preempt_enable(); 3238 preempt_enable();
@@ -3145,21 +3240,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3145 goto out; 3240 goto out;
3146 } 3241 }
3147 3242
3148 vcpu->guest_mode = 1;
3149 /*
3150 * Make sure that guest_mode assignment won't happen after
3151 * testing the pending IRQ vector bitmap.
3152 */
3153 smp_wmb();
3154
3155 if (vcpu->arch.exception.pending) 3243 if (vcpu->arch.exception.pending)
3156 __queue_exception(vcpu); 3244 __queue_exception(vcpu);
3157 else if (irqchip_in_kernel(vcpu->kvm))
3158 kvm_x86_ops->inject_pending_irq(vcpu);
3159 else 3245 else
3160 kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); 3246 inject_pending_irq(vcpu, kvm_run);
3161 3247
3162 kvm_lapic_sync_to_vapic(vcpu); 3248 /* enable NMI/IRQ window open exits if needed */
3249 if (vcpu->arch.nmi_pending)
3250 kvm_x86_ops->enable_nmi_window(vcpu);
3251 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
3252 kvm_x86_ops->enable_irq_window(vcpu);
3253
3254 if (kvm_lapic_enabled(vcpu)) {
3255 update_cr8_intercept(vcpu);
3256 kvm_lapic_sync_to_vapic(vcpu);
3257 }
3163 3258
3164 up_read(&vcpu->kvm->slots_lock); 3259 up_read(&vcpu->kvm->slots_lock);
3165 3260
@@ -3193,7 +3288,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3193 set_debugreg(vcpu->arch.host_dr6, 6); 3288 set_debugreg(vcpu->arch.host_dr6, 6);
3194 set_debugreg(vcpu->arch.host_dr7, 7); 3289 set_debugreg(vcpu->arch.host_dr7, 7);
3195 3290
3196 vcpu->guest_mode = 0; 3291 set_bit(KVM_REQ_KICK, &vcpu->requests);
3197 local_irq_enable(); 3292 local_irq_enable();
3198 3293
3199 ++vcpu->stat.exits; 3294 ++vcpu->stat.exits;
@@ -3220,8 +3315,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3220 profile_hit(KVM_PROFILING, (void *)rip); 3315 profile_hit(KVM_PROFILING, (void *)rip);
3221 } 3316 }
3222 3317
3223 if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
3224 vcpu->arch.exception.pending = false;
3225 3318
3226 kvm_lapic_sync_from_vapic(vcpu); 3319 kvm_lapic_sync_from_vapic(vcpu);
3227 3320
@@ -3230,6 +3323,7 @@ out:
3230 return r; 3323 return r;
3231} 3324}
3232 3325
3326
3233static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3327static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3234{ 3328{
3235 int r; 3329 int r;
@@ -3256,29 +3350,42 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3256 kvm_vcpu_block(vcpu); 3350 kvm_vcpu_block(vcpu);
3257 down_read(&vcpu->kvm->slots_lock); 3351 down_read(&vcpu->kvm->slots_lock);
3258 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 3352 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
3259 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 3353 {
3354 switch(vcpu->arch.mp_state) {
3355 case KVM_MP_STATE_HALTED:
3260 vcpu->arch.mp_state = 3356 vcpu->arch.mp_state =
3261 KVM_MP_STATE_RUNNABLE; 3357 KVM_MP_STATE_RUNNABLE;
3262 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 3358 case KVM_MP_STATE_RUNNABLE:
3263 r = -EINTR; 3359 break;
3360 case KVM_MP_STATE_SIPI_RECEIVED:
3361 default:
3362 r = -EINTR;
3363 break;
3364 }
3365 }
3264 } 3366 }
3265 3367
3266 if (r > 0) { 3368 if (r <= 0)
3267 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 3369 break;
3268 r = -EINTR; 3370
3269 kvm_run->exit_reason = KVM_EXIT_INTR; 3371 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
3270 ++vcpu->stat.request_irq_exits; 3372 if (kvm_cpu_has_pending_timer(vcpu))
3271 } 3373 kvm_inject_pending_timer_irqs(vcpu);
3272 if (signal_pending(current)) { 3374
3273 r = -EINTR; 3375 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
3274 kvm_run->exit_reason = KVM_EXIT_INTR; 3376 r = -EINTR;
3275 ++vcpu->stat.signal_exits; 3377 kvm_run->exit_reason = KVM_EXIT_INTR;
3276 } 3378 ++vcpu->stat.request_irq_exits;
3277 if (need_resched()) { 3379 }
3278 up_read(&vcpu->kvm->slots_lock); 3380 if (signal_pending(current)) {
3279 kvm_resched(vcpu); 3381 r = -EINTR;
3280 down_read(&vcpu->kvm->slots_lock); 3382 kvm_run->exit_reason = KVM_EXIT_INTR;
3281 } 3383 ++vcpu->stat.signal_exits;
3384 }
3385 if (need_resched()) {
3386 up_read(&vcpu->kvm->slots_lock);
3387 kvm_resched(vcpu);
3388 down_read(&vcpu->kvm->slots_lock);
3282 } 3389 }
3283 } 3390 }
3284 3391
@@ -3442,7 +3549,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3442 struct kvm_sregs *sregs) 3549 struct kvm_sregs *sregs)
3443{ 3550{
3444 struct descriptor_table dt; 3551 struct descriptor_table dt;
3445 int pending_vec;
3446 3552
3447 vcpu_load(vcpu); 3553 vcpu_load(vcpu);
3448 3554
@@ -3472,16 +3578,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3472 sregs->efer = vcpu->arch.shadow_efer; 3578 sregs->efer = vcpu->arch.shadow_efer;
3473 sregs->apic_base = kvm_get_apic_base(vcpu); 3579 sregs->apic_base = kvm_get_apic_base(vcpu);
3474 3580
3475 if (irqchip_in_kernel(vcpu->kvm)) { 3581 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
3476 memset(sregs->interrupt_bitmap, 0, 3582
3477 sizeof sregs->interrupt_bitmap); 3583 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
3478 pending_vec = kvm_x86_ops->get_irq(vcpu); 3584 set_bit(vcpu->arch.interrupt.nr,
3479 if (pending_vec >= 0) 3585 (unsigned long *)sregs->interrupt_bitmap);
3480 set_bit(pending_vec,
3481 (unsigned long *)sregs->interrupt_bitmap);
3482 } else
3483 memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
3484 sizeof sregs->interrupt_bitmap);
3485 3586
3486 vcpu_put(vcpu); 3587 vcpu_put(vcpu);
3487 3588
@@ -3688,7 +3789,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
3688 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); 3789 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
3689 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); 3790 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
3690 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); 3791 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3691 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3692} 3792}
3693 3793
3694static int load_state_from_tss32(struct kvm_vcpu *vcpu, 3794static int load_state_from_tss32(struct kvm_vcpu *vcpu,
@@ -3785,8 +3885,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3785} 3885}
3786 3886
3787static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, 3887static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3788 u32 old_tss_base, 3888 u16 old_tss_sel, u32 old_tss_base,
3789 struct desc_struct *nseg_desc) 3889 struct desc_struct *nseg_desc)
3790{ 3890{
3791 struct tss_segment_16 tss_segment_16; 3891 struct tss_segment_16 tss_segment_16;
3792 int ret = 0; 3892 int ret = 0;
@@ -3805,6 +3905,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3805 &tss_segment_16, sizeof tss_segment_16)) 3905 &tss_segment_16, sizeof tss_segment_16))
3806 goto out; 3906 goto out;
3807 3907
3908 if (old_tss_sel != 0xffff) {
3909 tss_segment_16.prev_task_link = old_tss_sel;
3910
3911 if (kvm_write_guest(vcpu->kvm,
3912 get_tss_base_addr(vcpu, nseg_desc),
3913 &tss_segment_16.prev_task_link,
3914 sizeof tss_segment_16.prev_task_link))
3915 goto out;
3916 }
3917
3808 if (load_state_from_tss16(vcpu, &tss_segment_16)) 3918 if (load_state_from_tss16(vcpu, &tss_segment_16))
3809 goto out; 3919 goto out;
3810 3920
@@ -3814,7 +3924,7 @@ out:
3814} 3924}
3815 3925
3816static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, 3926static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3817 u32 old_tss_base, 3927 u16 old_tss_sel, u32 old_tss_base,
3818 struct desc_struct *nseg_desc) 3928 struct desc_struct *nseg_desc)
3819{ 3929{
3820 struct tss_segment_32 tss_segment_32; 3930 struct tss_segment_32 tss_segment_32;
@@ -3834,6 +3944,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3834 &tss_segment_32, sizeof tss_segment_32)) 3944 &tss_segment_32, sizeof tss_segment_32))
3835 goto out; 3945 goto out;
3836 3946
3947 if (old_tss_sel != 0xffff) {
3948 tss_segment_32.prev_task_link = old_tss_sel;
3949
3950 if (kvm_write_guest(vcpu->kvm,
3951 get_tss_base_addr(vcpu, nseg_desc),
3952 &tss_segment_32.prev_task_link,
3953 sizeof tss_segment_32.prev_task_link))
3954 goto out;
3955 }
3956
3837 if (load_state_from_tss32(vcpu, &tss_segment_32)) 3957 if (load_state_from_tss32(vcpu, &tss_segment_32))
3838 goto out; 3958 goto out;
3839 3959
@@ -3887,14 +4007,22 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3887 kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); 4007 kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
3888 } 4008 }
3889 4009
3890 kvm_x86_ops->skip_emulated_instruction(vcpu); 4010 /* set back link to prev task only if NT bit is set in eflags
4011 note that old_tss_sel is not used afetr this point */
4012 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
4013 old_tss_sel = 0xffff;
4014
4015 /* set back link to prev task only if NT bit is set in eflags
4016 note that old_tss_sel is not used afetr this point */
4017 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
4018 old_tss_sel = 0xffff;
3891 4019
3892 if (nseg_desc.type & 8) 4020 if (nseg_desc.type & 8)
3893 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, 4021 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
3894 &nseg_desc); 4022 old_tss_base, &nseg_desc);
3895 else 4023 else
3896 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, 4024 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
3897 &nseg_desc); 4025 old_tss_base, &nseg_desc);
3898 4026
3899 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { 4027 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
3900 u32 eflags = kvm_x86_ops->get_rflags(vcpu); 4028 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
@@ -3920,7 +4048,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3920 struct kvm_sregs *sregs) 4048 struct kvm_sregs *sregs)
3921{ 4049{
3922 int mmu_reset_needed = 0; 4050 int mmu_reset_needed = 0;
3923 int i, pending_vec, max_bits; 4051 int pending_vec, max_bits;
3924 struct descriptor_table dt; 4052 struct descriptor_table dt;
3925 4053
3926 vcpu_load(vcpu); 4054 vcpu_load(vcpu);
@@ -3934,7 +4062,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3934 4062
3935 vcpu->arch.cr2 = sregs->cr2; 4063 vcpu->arch.cr2 = sregs->cr2;
3936 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; 4064 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
3937 vcpu->arch.cr3 = sregs->cr3; 4065
4066 down_read(&vcpu->kvm->slots_lock);
4067 if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT))
4068 vcpu->arch.cr3 = sregs->cr3;
4069 else
4070 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
4071 up_read(&vcpu->kvm->slots_lock);
3938 4072
3939 kvm_set_cr8(vcpu, sregs->cr8); 4073 kvm_set_cr8(vcpu, sregs->cr8);
3940 4074
@@ -3956,25 +4090,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3956 if (mmu_reset_needed) 4090 if (mmu_reset_needed)
3957 kvm_mmu_reset_context(vcpu); 4091 kvm_mmu_reset_context(vcpu);
3958 4092
3959 if (!irqchip_in_kernel(vcpu->kvm)) { 4093 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
3960 memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap, 4094 pending_vec = find_first_bit(
3961 sizeof vcpu->arch.irq_pending); 4095 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
3962 vcpu->arch.irq_summary = 0; 4096 if (pending_vec < max_bits) {
3963 for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i) 4097 kvm_queue_interrupt(vcpu, pending_vec, false);
3964 if (vcpu->arch.irq_pending[i]) 4098 pr_debug("Set back pending irq %d\n", pending_vec);
3965 __set_bit(i, &vcpu->arch.irq_summary); 4099 if (irqchip_in_kernel(vcpu->kvm))
3966 } else { 4100 kvm_pic_clear_isr_ack(vcpu->kvm);
3967 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
3968 pending_vec = find_first_bit(
3969 (const unsigned long *)sregs->interrupt_bitmap,
3970 max_bits);
3971 /* Only pending external irq is handled here */
3972 if (pending_vec < max_bits) {
3973 kvm_x86_ops->set_irq(vcpu, pending_vec);
3974 pr_debug("Set back pending irq %d\n",
3975 pending_vec);
3976 }
3977 kvm_pic_clear_isr_ack(vcpu->kvm);
3978 } 4101 }
3979 4102
3980 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 4103 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
@@ -4308,7 +4431,6 @@ struct kvm *kvm_arch_create_vm(void)
4308 return ERR_PTR(-ENOMEM); 4431 return ERR_PTR(-ENOMEM);
4309 4432
4310 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4433 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4311 INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
4312 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 4434 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
4313 4435
4314 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4436 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4411,12 +4533,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
4411 } 4533 }
4412 } 4534 }
4413 4535
4536 spin_lock(&kvm->mmu_lock);
4414 if (!kvm->arch.n_requested_mmu_pages) { 4537 if (!kvm->arch.n_requested_mmu_pages) {
4415 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 4538 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
4416 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 4539 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
4417 } 4540 }
4418 4541
4419 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 4542 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
4543 spin_unlock(&kvm->mmu_lock);
4420 kvm_flush_remote_tlbs(kvm); 4544 kvm_flush_remote_tlbs(kvm);
4421 4545
4422 return 0; 4546 return 0;
@@ -4425,6 +4549,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
4425void kvm_arch_flush_shadow(struct kvm *kvm) 4549void kvm_arch_flush_shadow(struct kvm *kvm)
4426{ 4550{
4427 kvm_mmu_zap_all(kvm); 4551 kvm_mmu_zap_all(kvm);
4552 kvm_reload_remote_mmus(kvm);
4428} 4553}
4429 4554
4430int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4555int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
@@ -4434,28 +4559,24 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4434 || vcpu->arch.nmi_pending; 4559 || vcpu->arch.nmi_pending;
4435} 4560}
4436 4561
4437static void vcpu_kick_intr(void *info)
4438{
4439#ifdef DEBUG
4440 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
4441 printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
4442#endif
4443}
4444
4445void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 4562void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
4446{ 4563{
4447 int ipi_pcpu = vcpu->cpu; 4564 int me;
4448 int cpu = get_cpu(); 4565 int cpu = vcpu->cpu;
4449 4566
4450 if (waitqueue_active(&vcpu->wq)) { 4567 if (waitqueue_active(&vcpu->wq)) {
4451 wake_up_interruptible(&vcpu->wq); 4568 wake_up_interruptible(&vcpu->wq);
4452 ++vcpu->stat.halt_wakeup; 4569 ++vcpu->stat.halt_wakeup;
4453 } 4570 }
4454 /* 4571
4455 * We may be called synchronously with irqs disabled in guest mode, 4572 me = get_cpu();
4456 * So need not to call smp_call_function_single() in that case. 4573 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
4457 */ 4574 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
4458 if (vcpu->guest_mode && vcpu->cpu != cpu) 4575 smp_send_reschedule(cpu);
4459 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
4460 put_cpu(); 4576 put_cpu();
4461} 4577}
4578
4579int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
4580{
4581 return kvm_x86_ops->interrupt_allowed(vcpu);
4582}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 6a4be78a7384..4c8e10af78e8 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
8 vcpu->arch.exception.pending = false; 8 vcpu->arch.exception.pending = false;
9} 9}
10 10
11static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector) 11static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
12 bool soft)
12{ 13{
13 vcpu->arch.interrupt.pending = true; 14 vcpu->arch.interrupt.pending = true;
15 vcpu->arch.interrupt.soft = soft;
14 vcpu->arch.interrupt.nr = vector; 16 vcpu->arch.interrupt.nr = vector;
15} 17}
16 18
@@ -19,4 +21,14 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
19 vcpu->arch.interrupt.pending = false; 21 vcpu->arch.interrupt.pending = false;
20} 22}
21 23
24static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
25{
26 return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
27 vcpu->arch.nmi_injected;
28}
29
30static inline bool kvm_exception_is_soft(unsigned int nr)
31{
32 return (nr == BP_VECTOR) || (nr == OF_VECTOR);
33}
22#endif 34#endif
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ca91749d2083..c1b6c232e02b 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -59,13 +59,14 @@
59#define SrcImm (5<<4) /* Immediate operand. */ 59#define SrcImm (5<<4) /* Immediate operand. */
60#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ 60#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
61#define SrcOne (7<<4) /* Implied '1' */ 61#define SrcOne (7<<4) /* Implied '1' */
62#define SrcMask (7<<4) 62#define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
63#define SrcMask (0xf<<4)
63/* Generic ModRM decode. */ 64/* Generic ModRM decode. */
64#define ModRM (1<<7) 65#define ModRM (1<<8)
65/* Destination is only written; never read. */ 66/* Destination is only written; never read. */
66#define Mov (1<<8) 67#define Mov (1<<9)
67#define BitOp (1<<9) 68#define BitOp (1<<10)
68#define MemAbs (1<<10) /* Memory operand is absolute displacement */ 69#define MemAbs (1<<11) /* Memory operand is absolute displacement */
69#define String (1<<12) /* String instruction (rep capable) */ 70#define String (1<<12) /* String instruction (rep capable) */
70#define Stack (1<<13) /* Stack instruction (push/pop) */ 71#define Stack (1<<13) /* Stack instruction (push/pop) */
71#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ 72#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
@@ -76,6 +77,7 @@
76#define Src2CL (1<<29) 77#define Src2CL (1<<29)
77#define Src2ImmByte (2<<29) 78#define Src2ImmByte (2<<29)
78#define Src2One (3<<29) 79#define Src2One (3<<29)
80#define Src2Imm16 (4<<29)
79#define Src2Mask (7<<29) 81#define Src2Mask (7<<29)
80 82
81enum { 83enum {
@@ -135,11 +137,11 @@ static u32 opcode_table[256] = {
135 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ 137 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
136 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ 138 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
137 /* 0x70 - 0x77 */ 139 /* 0x70 - 0x77 */
138 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 140 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
139 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 141 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
140 /* 0x78 - 0x7F */ 142 /* 0x78 - 0x7F */
141 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 143 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
142 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 144 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
143 /* 0x80 - 0x87 */ 145 /* 0x80 - 0x87 */
144 Group | Group1_80, Group | Group1_81, 146 Group | Group1_80, Group | Group1_81,
145 Group | Group1_82, Group | Group1_83, 147 Group | Group1_82, Group | Group1_83,
@@ -153,7 +155,8 @@ static u32 opcode_table[256] = {
153 /* 0x90 - 0x97 */ 155 /* 0x90 - 0x97 */
154 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, 156 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
155 /* 0x98 - 0x9F */ 157 /* 0x98 - 0x9F */
156 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, 158 0, 0, SrcImm | Src2Imm16, 0,
159 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
157 /* 0xA0 - 0xA7 */ 160 /* 0xA0 - 0xA7 */
158 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, 161 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
159 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, 162 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
@@ -178,7 +181,8 @@ static u32 opcode_table[256] = {
178 0, ImplicitOps | Stack, 0, 0, 181 0, ImplicitOps | Stack, 0, 0,
179 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, 182 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
180 /* 0xC8 - 0xCF */ 183 /* 0xC8 - 0xCF */
181 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0, 184 0, 0, 0, ImplicitOps | Stack,
185 ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
182 /* 0xD0 - 0xD7 */ 186 /* 0xD0 - 0xD7 */
183 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, 187 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
184 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, 188 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -187,11 +191,11 @@ static u32 opcode_table[256] = {
187 0, 0, 0, 0, 0, 0, 0, 0, 191 0, 0, 0, 0, 0, 0, 0, 0,
188 /* 0xE0 - 0xE7 */ 192 /* 0xE0 - 0xE7 */
189 0, 0, 0, 0, 193 0, 0, 0, 0,
190 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 194 ByteOp | SrcImmUByte, SrcImmUByte,
191 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 195 ByteOp | SrcImmUByte, SrcImmUByte,
192 /* 0xE8 - 0xEF */ 196 /* 0xE8 - 0xEF */
193 ImplicitOps | Stack, SrcImm | ImplicitOps, 197 SrcImm | Stack, SrcImm | ImplicitOps,
194 ImplicitOps, SrcImmByte | ImplicitOps, 198 SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
195 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 199 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
196 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 200 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
197 /* 0xF0 - 0xF7 */ 201 /* 0xF0 - 0xF7 */
@@ -230,10 +234,8 @@ static u32 twobyte_table[256] = {
230 /* 0x70 - 0x7F */ 234 /* 0x70 - 0x7F */
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 /* 0x80 - 0x8F */ 236 /* 0x80 - 0x8F */
233 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 237 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
234 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 238 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
235 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
236 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
237 /* 0x90 - 0x9F */ 239 /* 0x90 - 0x9F */
238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
239 /* 0xA0 - 0xA7 */ 241 /* 0xA0 - 0xA7 */
@@ -1044,10 +1046,14 @@ done_prefixes:
1044 } 1046 }
1045 break; 1047 break;
1046 case SrcImmByte: 1048 case SrcImmByte:
1049 case SrcImmUByte:
1047 c->src.type = OP_IMM; 1050 c->src.type = OP_IMM;
1048 c->src.ptr = (unsigned long *)c->eip; 1051 c->src.ptr = (unsigned long *)c->eip;
1049 c->src.bytes = 1; 1052 c->src.bytes = 1;
1050 c->src.val = insn_fetch(s8, 1, c->eip); 1053 if ((c->d & SrcMask) == SrcImmByte)
1054 c->src.val = insn_fetch(s8, 1, c->eip);
1055 else
1056 c->src.val = insn_fetch(u8, 1, c->eip);
1051 break; 1057 break;
1052 case SrcOne: 1058 case SrcOne:
1053 c->src.bytes = 1; 1059 c->src.bytes = 1;
@@ -1072,6 +1078,12 @@ done_prefixes:
1072 c->src2.bytes = 1; 1078 c->src2.bytes = 1;
1073 c->src2.val = insn_fetch(u8, 1, c->eip); 1079 c->src2.val = insn_fetch(u8, 1, c->eip);
1074 break; 1080 break;
1081 case Src2Imm16:
1082 c->src2.type = OP_IMM;
1083 c->src2.ptr = (unsigned long *)c->eip;
1084 c->src2.bytes = 2;
1085 c->src2.val = insn_fetch(u16, 2, c->eip);
1086 break;
1075 case Src2One: 1087 case Src2One:
1076 c->src2.bytes = 1; 1088 c->src2.bytes = 1;
1077 c->src2.val = 1; 1089 c->src2.val = 1;
@@ -1349,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
1349 return 0; 1361 return 0;
1350} 1362}
1351 1363
1364void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1365{
1366 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1367 /*
1368 * an sti; sti; sequence only disable interrupts for the first
1369 * instruction. So, if the last instruction, be it emulated or
1370 * not, left the system with the INT_STI flag enabled, it
1371 * means that the last instruction is an sti. We should not
1372 * leave the flag on in this case. The same goes for mov ss
1373 */
1374 if (!(int_shadow & mask))
1375 ctxt->interruptibility = mask;
1376}
1377
1352int 1378int
1353x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 1379x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1354{ 1380{
@@ -1360,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1360 int io_dir_in; 1386 int io_dir_in;
1361 int rc = 0; 1387 int rc = 0;
1362 1388
1389 ctxt->interruptibility = 0;
1390
1363 /* Shadow copy of register state. Committed on successful emulation. 1391 /* Shadow copy of register state. Committed on successful emulation.
1364 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't 1392 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
1365 * modify them. 1393 * modify them.
@@ -1531,13 +1559,10 @@ special_insn:
1531 return -1; 1559 return -1;
1532 } 1560 }
1533 return 0; 1561 return 0;
1534 case 0x70 ... 0x7f: /* jcc (short) */ { 1562 case 0x70 ... 0x7f: /* jcc (short) */
1535 int rel = insn_fetch(s8, 1, c->eip);
1536
1537 if (test_cc(c->b, ctxt->eflags)) 1563 if (test_cc(c->b, ctxt->eflags))
1538 jmp_rel(c, rel); 1564 jmp_rel(c, c->src.val);
1539 break; 1565 break;
1540 }
1541 case 0x80 ... 0x83: /* Grp1 */ 1566 case 0x80 ... 0x83: /* Grp1 */
1542 switch (c->modrm_reg) { 1567 switch (c->modrm_reg) {
1543 case 0: 1568 case 0:
@@ -1609,6 +1634,9 @@ special_insn:
1609 int err; 1634 int err;
1610 1635
1611 sel = c->src.val; 1636 sel = c->src.val;
1637 if (c->modrm_reg == VCPU_SREG_SS)
1638 toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
1639
1612 if (c->modrm_reg <= 5) { 1640 if (c->modrm_reg <= 5) {
1613 type_bits = (c->modrm_reg == 1) ? 9 : 1; 1641 type_bits = (c->modrm_reg == 1) ? 9 : 1;
1614 err = kvm_load_segment_descriptor(ctxt->vcpu, sel, 1642 err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
@@ -1769,59 +1797,32 @@ special_insn:
1769 break; 1797 break;
1770 case 0xe4: /* inb */ 1798 case 0xe4: /* inb */
1771 case 0xe5: /* in */ 1799 case 0xe5: /* in */
1772 port = insn_fetch(u8, 1, c->eip); 1800 port = c->src.val;
1773 io_dir_in = 1; 1801 io_dir_in = 1;
1774 goto do_io; 1802 goto do_io;
1775 case 0xe6: /* outb */ 1803 case 0xe6: /* outb */
1776 case 0xe7: /* out */ 1804 case 0xe7: /* out */
1777 port = insn_fetch(u8, 1, c->eip); 1805 port = c->src.val;
1778 io_dir_in = 0; 1806 io_dir_in = 0;
1779 goto do_io; 1807 goto do_io;
1780 case 0xe8: /* call (near) */ { 1808 case 0xe8: /* call (near) */ {
1781 long int rel; 1809 long int rel = c->src.val;
1782 switch (c->op_bytes) {
1783 case 2:
1784 rel = insn_fetch(s16, 2, c->eip);
1785 break;
1786 case 4:
1787 rel = insn_fetch(s32, 4, c->eip);
1788 break;
1789 default:
1790 DPRINTF("Call: Invalid op_bytes\n");
1791 goto cannot_emulate;
1792 }
1793 c->src.val = (unsigned long) c->eip; 1810 c->src.val = (unsigned long) c->eip;
1794 jmp_rel(c, rel); 1811 jmp_rel(c, rel);
1795 c->op_bytes = c->ad_bytes;
1796 emulate_push(ctxt); 1812 emulate_push(ctxt);
1797 break; 1813 break;
1798 } 1814 }
1799 case 0xe9: /* jmp rel */ 1815 case 0xe9: /* jmp rel */
1800 goto jmp; 1816 goto jmp;
1801 case 0xea: /* jmp far */ { 1817 case 0xea: /* jmp far */
1802 uint32_t eip; 1818 if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
1803 uint16_t sel; 1819 VCPU_SREG_CS) < 0) {
1804
1805 switch (c->op_bytes) {
1806 case 2:
1807 eip = insn_fetch(u16, 2, c->eip);
1808 break;
1809 case 4:
1810 eip = insn_fetch(u32, 4, c->eip);
1811 break;
1812 default:
1813 DPRINTF("jmp far: Invalid op_bytes\n");
1814 goto cannot_emulate;
1815 }
1816 sel = insn_fetch(u16, 2, c->eip);
1817 if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) {
1818 DPRINTF("jmp far: Failed to load CS descriptor\n"); 1820 DPRINTF("jmp far: Failed to load CS descriptor\n");
1819 goto cannot_emulate; 1821 goto cannot_emulate;
1820 } 1822 }
1821 1823
1822 c->eip = eip; 1824 c->eip = c->src.val;
1823 break; 1825 break;
1824 }
1825 case 0xeb: 1826 case 0xeb:
1826 jmp: /* jmp rel short */ 1827 jmp: /* jmp rel short */
1827 jmp_rel(c, c->src.val); 1828 jmp_rel(c, c->src.val);
@@ -1865,6 +1866,7 @@ special_insn:
1865 c->dst.type = OP_NONE; /* Disable writeback. */ 1866 c->dst.type = OP_NONE; /* Disable writeback. */
1866 break; 1867 break;
1867 case 0xfb: /* sti */ 1868 case 0xfb: /* sti */
1869 toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
1868 ctxt->eflags |= X86_EFLAGS_IF; 1870 ctxt->eflags |= X86_EFLAGS_IF;
1869 c->dst.type = OP_NONE; /* Disable writeback. */ 1871 c->dst.type = OP_NONE; /* Disable writeback. */
1870 break; 1872 break;
@@ -2039,28 +2041,11 @@ twobyte_insn:
2039 if (!test_cc(c->b, ctxt->eflags)) 2041 if (!test_cc(c->b, ctxt->eflags))
2040 c->dst.type = OP_NONE; /* no writeback */ 2042 c->dst.type = OP_NONE; /* no writeback */
2041 break; 2043 break;
2042 case 0x80 ... 0x8f: /* jnz rel, etc*/ { 2044 case 0x80 ... 0x8f: /* jnz rel, etc*/
2043 long int rel;
2044
2045 switch (c->op_bytes) {
2046 case 2:
2047 rel = insn_fetch(s16, 2, c->eip);
2048 break;
2049 case 4:
2050 rel = insn_fetch(s32, 4, c->eip);
2051 break;
2052 case 8:
2053 rel = insn_fetch(s64, 8, c->eip);
2054 break;
2055 default:
2056 DPRINTF("jnz: Invalid op_bytes\n");
2057 goto cannot_emulate;
2058 }
2059 if (test_cc(c->b, ctxt->eflags)) 2045 if (test_cc(c->b, ctxt->eflags))
2060 jmp_rel(c, rel); 2046 jmp_rel(c, c->src.val);
2061 c->dst.type = OP_NONE; 2047 c->dst.type = OP_NONE;
2062 break; 2048 break;
2063 }
2064 case 0xa3: 2049 case 0xa3:
2065 bt: /* bt */ 2050 bt: /* bt */
2066 c->dst.type = OP_NONE; 2051 c->dst.type = OP_NONE;
diff --git a/block/blk-core.c b/block/blk-core.c
index 9475bf99b891..648f15cb41f1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1732,10 +1732,14 @@ static int __end_that_request_first(struct request *req, int error,
1732 trace_block_rq_complete(req->q, req); 1732 trace_block_rq_complete(req->q, req);
1733 1733
1734 /* 1734 /*
1735 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual 1735 * For fs requests, rq is just carrier of independent bio's
1736 * sense key with us all the way through 1736 * and each partial completion should be handled separately.
1737 * Reset per-request error on each partial completion.
1738 *
1739 * TODO: tj: This is too subtle. It would be better to let
1740 * low level drivers do what they see fit.
1737 */ 1741 */
1738 if (!blk_pc_request(req)) 1742 if (blk_fs_request(req))
1739 req->errors = 0; 1743 req->errors = 0;
1740 1744
1741 if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { 1745 if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index af761dc434f6..4895f0e05322 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -277,8 +277,8 @@ static int hci_uart_tty_open(struct tty_struct *tty)
277 /* FIXME: why is this needed. Note don't use ldisc_ref here as the 277 /* FIXME: why is this needed. Note don't use ldisc_ref here as the
278 open path is before the ldisc is referencable */ 278 open path is before the ldisc is referencable */
279 279
280 if (tty->ldisc.ops->flush_buffer) 280 if (tty->ldisc->ops->flush_buffer)
281 tty->ldisc.ops->flush_buffer(tty); 281 tty->ldisc->ops->flush_buffer(tty);
282 tty_driver_flush_buffer(tty); 282 tty_driver_flush_buffer(tty);
283 283
284 return 0; 284 return 0;
@@ -463,7 +463,6 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file * file,
463 clear_bit(HCI_UART_PROTO_SET, &hu->flags); 463 clear_bit(HCI_UART_PROTO_SET, &hu->flags);
464 return err; 464 return err;
465 } 465 }
466 tty->low_latency = 1;
467 } else 466 } else
468 return -EBUSY; 467 return -EBUSY;
469 break; 468 break;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 735bbe2be51a..02ecfd5fa61c 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -97,6 +97,19 @@ config DEVKMEM
97 kind of kernel debugging operations. 97 kind of kernel debugging operations.
98 When in doubt, say "N". 98 When in doubt, say "N".
99 99
100config BFIN_JTAG_COMM
101 tristate "Blackfin JTAG Communication"
102 depends on BLACKFIN
103 help
104 Add support for emulating a TTY device over the Blackfin JTAG.
105
106 To compile this driver as a module, choose M here: the
107 module will be called bfin_jtag_comm.
108
109config BFIN_JTAG_COMM_CONSOLE
110 bool "Console on Blackfin JTAG"
111 depends on BFIN_JTAG_COMM=y
112
100config SERIAL_NONSTANDARD 113config SERIAL_NONSTANDARD
101 bool "Non-standard serial port support" 114 bool "Non-standard serial port support"
102 depends on HAS_IOMEM 115 depends on HAS_IOMEM
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 9caf5b5ad1c0..189efcff08ce 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_LEGACY_PTYS) += pty.o
13obj-$(CONFIG_UNIX98_PTYS) += pty.o 13obj-$(CONFIG_UNIX98_PTYS) += pty.o
14obj-y += misc.o 14obj-y += misc.o
15obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o selection.o keyboard.o 15obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o selection.o keyboard.o
16obj-$(CONFIG_BFIN_JTAG_COMM) += bfin_jtag_comm.o
16obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o 17obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
17obj-$(CONFIG_HW_CONSOLE) += vt.o defkeymap.o 18obj-$(CONFIG_HW_CONSOLE) += vt.o defkeymap.o
18obj-$(CONFIG_AUDIT) += tty_audit.o 19obj-$(CONFIG_AUDIT) += tty_audit.o
diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c
new file mode 100644
index 000000000000..44c113d56045
--- /dev/null
+++ b/drivers/char/bfin_jtag_comm.c
@@ -0,0 +1,365 @@
1/*
2 * TTY over Blackfin JTAG Communication
3 *
4 * Copyright 2008-2009 Analog Devices Inc.
5 *
6 * Enter bugs at http://blackfin.uclinux.org/
7 *
8 * Licensed under the GPL-2 or later.
9 */
10
11#include <linux/circ_buf.h>
12#include <linux/console.h>
13#include <linux/delay.h>
14#include <linux/err.h>
15#include <linux/kernel.h>
16#include <linux/kthread.h>
17#include <linux/module.h>
18#include <linux/mutex.h>
19#include <linux/sched.h>
20#include <linux/tty.h>
21#include <linux/tty_driver.h>
22#include <linux/tty_flip.h>
23#include <asm/atomic.h>
24
25/* See the Debug/Emulation chapter in the HRM */
26#define EMUDOF 0x00000001 /* EMUDAT_OUT full & valid */
27#define EMUDIF 0x00000002 /* EMUDAT_IN full & valid */
28#define EMUDOOVF 0x00000004 /* EMUDAT_OUT overflow */
29#define EMUDIOVF 0x00000008 /* EMUDAT_IN overflow */
30
31#define DRV_NAME "bfin-jtag-comm"
32#define DEV_NAME "ttyBFJC"
33
34#define pr_init(fmt, args...) ({ static const __initdata char __fmt[] = fmt; printk(__fmt, ## args); })
35#define debug(fmt, args...) pr_debug(DRV_NAME ": " fmt, ## args)
36
37static inline uint32_t bfin_write_emudat(uint32_t emudat)
38{
39 __asm__ __volatile__("emudat = %0;" : : "d"(emudat));
40 return emudat;
41}
42
43static inline uint32_t bfin_read_emudat(void)
44{
45 uint32_t emudat;
46 __asm__ __volatile__("%0 = emudat;" : "=d"(emudat));
47 return emudat;
48}
49
50static inline uint32_t bfin_write_emudat_chars(char a, char b, char c, char d)
51{
52 return bfin_write_emudat((a << 0) | (b << 8) | (c << 16) | (d << 24));
53}
54
55#define CIRC_SIZE 2048 /* see comment in tty_io.c:do_tty_write() */
56#define CIRC_MASK (CIRC_SIZE - 1)
57#define circ_empty(circ) ((circ)->head == (circ)->tail)
58#define circ_free(circ) CIRC_SPACE((circ)->head, (circ)->tail, CIRC_SIZE)
59#define circ_cnt(circ) CIRC_CNT((circ)->head, (circ)->tail, CIRC_SIZE)
60#define circ_byte(circ, idx) ((circ)->buf[(idx) & CIRC_MASK])
61
62static struct tty_driver *bfin_jc_driver;
63static struct task_struct *bfin_jc_kthread;
64static struct tty_struct * volatile bfin_jc_tty;
65static unsigned long bfin_jc_count;
66static DEFINE_MUTEX(bfin_jc_tty_mutex);
67static volatile struct circ_buf bfin_jc_write_buf;
68
69static int
70bfin_jc_emudat_manager(void *arg)
71{
72 uint32_t inbound_len = 0, outbound_len = 0;
73
74 while (!kthread_should_stop()) {
75 /* no one left to give data to, so sleep */
76 if (bfin_jc_tty == NULL && circ_empty(&bfin_jc_write_buf)) {
77 debug("waiting for readers\n");
78 __set_current_state(TASK_UNINTERRUPTIBLE);
79 schedule();
80 __set_current_state(TASK_RUNNING);
81 }
82
83 /* no data available, so just chill */
84 if (!(bfin_read_DBGSTAT() & EMUDIF) && circ_empty(&bfin_jc_write_buf)) {
85 debug("waiting for data (in_len = %i) (circ: %i %i)\n",
86 inbound_len, bfin_jc_write_buf.tail, bfin_jc_write_buf.head);
87 if (inbound_len)
88 schedule();
89 else
90 schedule_timeout_interruptible(HZ);
91 continue;
92 }
93
94 /* if incoming data is ready, eat it */
95 if (bfin_read_DBGSTAT() & EMUDIF) {
96 struct tty_struct *tty;
97 mutex_lock(&bfin_jc_tty_mutex);
98 tty = (struct tty_struct *)bfin_jc_tty;
99 if (tty != NULL) {
100 uint32_t emudat = bfin_read_emudat();
101 if (inbound_len == 0) {
102 debug("incoming length: 0x%08x\n", emudat);
103 inbound_len = emudat;
104 } else {
105 size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
106 debug(" incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
107 inbound_len -= num_chars;
108 tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars);
109 tty_flip_buffer_push(tty);
110 }
111 }
112 mutex_unlock(&bfin_jc_tty_mutex);
113 }
114
115 /* if outgoing data is ready, post it */
116 if (!(bfin_read_DBGSTAT() & EMUDOF) && !circ_empty(&bfin_jc_write_buf)) {
117 if (outbound_len == 0) {
118 outbound_len = circ_cnt(&bfin_jc_write_buf);
119 bfin_write_emudat(outbound_len);
120 debug("outgoing length: 0x%08x\n", outbound_len);
121 } else {
122 struct tty_struct *tty;
123 int tail = bfin_jc_write_buf.tail;
124 size_t ate = (4 <= outbound_len ? 4 : outbound_len);
125 uint32_t emudat =
126 bfin_write_emudat_chars(
127 circ_byte(&bfin_jc_write_buf, tail + 0),
128 circ_byte(&bfin_jc_write_buf, tail + 1),
129 circ_byte(&bfin_jc_write_buf, tail + 2),
130 circ_byte(&bfin_jc_write_buf, tail + 3)
131 );
132 bfin_jc_write_buf.tail += ate;
133 outbound_len -= ate;
134 mutex_lock(&bfin_jc_tty_mutex);
135 tty = (struct tty_struct *)bfin_jc_tty;
136 if (tty)
137 tty_wakeup(tty);
138 mutex_unlock(&bfin_jc_tty_mutex);
139 debug(" outgoing data: 0x%08x (pushing %zu)\n", emudat, ate);
140 }
141 }
142 }
143
144 __set_current_state(TASK_RUNNING);
145 return 0;
146}
147
148static int
149bfin_jc_open(struct tty_struct *tty, struct file *filp)
150{
151 mutex_lock(&bfin_jc_tty_mutex);
152 debug("open %lu\n", bfin_jc_count);
153 ++bfin_jc_count;
154 bfin_jc_tty = tty;
155 wake_up_process(bfin_jc_kthread);
156 mutex_unlock(&bfin_jc_tty_mutex);
157 return 0;
158}
159
160static void
161bfin_jc_close(struct tty_struct *tty, struct file *filp)
162{
163 mutex_lock(&bfin_jc_tty_mutex);
164 debug("close %lu\n", bfin_jc_count);
165 if (--bfin_jc_count == 0)
166 bfin_jc_tty = NULL;
167 wake_up_process(bfin_jc_kthread);
168 mutex_unlock(&bfin_jc_tty_mutex);
169}
170
171/* XXX: we dont handle the put_char() case where we must handle count = 1 */
172static int
173bfin_jc_circ_write(const unsigned char *buf, int count)
174{
175 int i;
176 count = min(count, circ_free(&bfin_jc_write_buf));
177 debug("going to write chunk of %i bytes\n", count);
178 for (i = 0; i < count; ++i)
179 circ_byte(&bfin_jc_write_buf, bfin_jc_write_buf.head + i) = buf[i];
180 bfin_jc_write_buf.head += i;
181 return i;
182}
183
184#ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE
185# define acquire_console_sem()
186# define release_console_sem()
187#endif
188static int
189bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count)
190{
191 int i;
192 acquire_console_sem();
193 i = bfin_jc_circ_write(buf, count);
194 release_console_sem();
195 wake_up_process(bfin_jc_kthread);
196 return i;
197}
198
199static void
200bfin_jc_flush_chars(struct tty_struct *tty)
201{
202 wake_up_process(bfin_jc_kthread);
203}
204
205static int
206bfin_jc_write_room(struct tty_struct *tty)
207{
208 return circ_free(&bfin_jc_write_buf);
209}
210
211static int
212bfin_jc_chars_in_buffer(struct tty_struct *tty)
213{
214 return circ_cnt(&bfin_jc_write_buf);
215}
216
217static void
218bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout)
219{
220 unsigned long expire = jiffies + timeout;
221 while (!circ_empty(&bfin_jc_write_buf)) {
222 if (signal_pending(current))
223 break;
224 if (time_after(jiffies, expire))
225 break;
226 }
227}
228
229static struct tty_operations bfin_jc_ops = {
230 .open = bfin_jc_open,
231 .close = bfin_jc_close,
232 .write = bfin_jc_write,
233 /*.put_char = bfin_jc_put_char,*/
234 .flush_chars = bfin_jc_flush_chars,
235 .write_room = bfin_jc_write_room,
236 .chars_in_buffer = bfin_jc_chars_in_buffer,
237 .wait_until_sent = bfin_jc_wait_until_sent,
238};
239
240static int __init bfin_jc_init(void)
241{
242 int ret;
243
244 bfin_jc_kthread = kthread_create(bfin_jc_emudat_manager, NULL, DRV_NAME);
245 if (IS_ERR(bfin_jc_kthread))
246 return PTR_ERR(bfin_jc_kthread);
247
248 ret = -ENOMEM;
249
250 bfin_jc_write_buf.head = bfin_jc_write_buf.tail = 0;
251 bfin_jc_write_buf.buf = kmalloc(CIRC_SIZE, GFP_KERNEL);
252 if (!bfin_jc_write_buf.buf)
253 goto err;
254
255 bfin_jc_driver = alloc_tty_driver(1);
256 if (!bfin_jc_driver)
257 goto err;
258
259 bfin_jc_driver->owner = THIS_MODULE;
260 bfin_jc_driver->driver_name = DRV_NAME;
261 bfin_jc_driver->name = DEV_NAME;
262 bfin_jc_driver->type = TTY_DRIVER_TYPE_SERIAL;
263 bfin_jc_driver->subtype = SERIAL_TYPE_NORMAL;
264 bfin_jc_driver->init_termios = tty_std_termios;
265 tty_set_operations(bfin_jc_driver, &bfin_jc_ops);
266
267 ret = tty_register_driver(bfin_jc_driver);
268 if (ret)
269 goto err;
270
271 pr_init(KERN_INFO DRV_NAME ": initialized\n");
272
273 return 0;
274
275 err:
276 put_tty_driver(bfin_jc_driver);
277 kfree(bfin_jc_write_buf.buf);
278 kthread_stop(bfin_jc_kthread);
279 return ret;
280}
281module_init(bfin_jc_init);
282
283static void __exit bfin_jc_exit(void)
284{
285 kthread_stop(bfin_jc_kthread);
286 kfree(bfin_jc_write_buf.buf);
287 tty_unregister_driver(bfin_jc_driver);
288 put_tty_driver(bfin_jc_driver);
289}
290module_exit(bfin_jc_exit);
291
292#if defined(CONFIG_BFIN_JTAG_COMM_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
293static void
294bfin_jc_straight_buffer_write(const char *buf, unsigned count)
295{
296 unsigned ate = 0;
297 while (bfin_read_DBGSTAT() & EMUDOF)
298 continue;
299 bfin_write_emudat(count);
300 while (ate < count) {
301 while (bfin_read_DBGSTAT() & EMUDOF)
302 continue;
303 bfin_write_emudat_chars(buf[ate], buf[ate+1], buf[ate+2], buf[ate+3]);
304 ate += 4;
305 }
306}
307#endif
308
309#ifdef CONFIG_BFIN_JTAG_COMM_CONSOLE
310static void
311bfin_jc_console_write(struct console *co, const char *buf, unsigned count)
312{
313 if (bfin_jc_kthread == NULL)
314 bfin_jc_straight_buffer_write(buf, count);
315 else
316 bfin_jc_circ_write(buf, count);
317}
318
319static struct tty_driver *
320bfin_jc_console_device(struct console *co, int *index)
321{
322 *index = co->index;
323 return bfin_jc_driver;
324}
325
326static struct console bfin_jc_console = {
327 .name = DEV_NAME,
328 .write = bfin_jc_console_write,
329 .device = bfin_jc_console_device,
330 .flags = CON_ANYTIME | CON_PRINTBUFFER,
331 .index = -1,
332};
333
334static int __init bfin_jc_console_init(void)
335{
336 register_console(&bfin_jc_console);
337 return 0;
338}
339console_initcall(bfin_jc_console_init);
340#endif
341
342#ifdef CONFIG_EARLY_PRINTK
343static void __init
344bfin_jc_early_write(struct console *co, const char *buf, unsigned int count)
345{
346 bfin_jc_straight_buffer_write(buf, count);
347}
348
349static struct __initdata console bfin_jc_early_console = {
350 .name = "early_BFJC",
351 .write = bfin_jc_early_write,
352 .flags = CON_ANYTIME | CON_PRINTBUFFER,
353 .index = -1,
354};
355
356struct console * __init
357bfin_jc_early_init(unsigned int port, unsigned int cflag)
358{
359 return &bfin_jc_early_console;
360}
361#endif
362
363MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
364MODULE_DESCRIPTION("TTY over Blackfin JTAG Communication");
365MODULE_LICENSE("GPL");
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 1fdb9f657d8f..f3366d3f06cf 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -604,7 +604,6 @@
604 604
605#define NR_PORTS 256 605#define NR_PORTS 256
606 606
607#define ZE_V1_NPORTS 64
608#define ZO_V1 0 607#define ZO_V1 0
609#define ZO_V2 1 608#define ZO_V2 1
610#define ZE_V1 2 609#define ZE_V1 2
@@ -663,18 +662,6 @@
663static void cy_throttle(struct tty_struct *tty); 662static void cy_throttle(struct tty_struct *tty);
664static void cy_send_xchar(struct tty_struct *tty, char ch); 663static void cy_send_xchar(struct tty_struct *tty, char ch);
665 664
666#define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1)
667
668#define Z_FPGA_CHECK(card) \
669 ((readl(&((struct RUNTIME_9060 __iomem *) \
670 ((card).ctl_addr))->init_ctrl) & (1<<17)) != 0)
671
672#define ISZLOADED(card) (((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \
673 ((card).ctl_addr))->mail_box_0)) || \
674 Z_FPGA_CHECK(card)) && \
675 (ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
676 ((card).base_addr+ID_ADDRESS))->signature)))
677
678#ifndef SERIAL_XMIT_SIZE 665#ifndef SERIAL_XMIT_SIZE
679#define SERIAL_XMIT_SIZE (min(PAGE_SIZE, 4096)) 666#define SERIAL_XMIT_SIZE (min(PAGE_SIZE, 4096))
680#endif 667#endif
@@ -687,8 +674,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
687#define DRIVER_VERSION 0x02010203 674#define DRIVER_VERSION 0x02010203
688#define RAM_SIZE 0x80000 675#define RAM_SIZE 0x80000
689 676
690#define Z_FPGA_LOADED(X) ((readl(&(X)->init_ctrl) & (1<<17)) != 0)
691
692enum zblock_type { 677enum zblock_type {
693 ZBLOCK_PRG = 0, 678 ZBLOCK_PRG = 0,
694 ZBLOCK_FPGA = 1 679 ZBLOCK_FPGA = 1
@@ -883,6 +868,29 @@ static void cyz_rx_restart(unsigned long);
883static struct timer_list cyz_rx_full_timer[NR_PORTS]; 868static struct timer_list cyz_rx_full_timer[NR_PORTS];
884#endif /* CONFIG_CYZ_INTR */ 869#endif /* CONFIG_CYZ_INTR */
885 870
871static inline bool cy_is_Z(struct cyclades_card *card)
872{
873 return card->num_chips == (unsigned int)-1;
874}
875
876static inline bool __cyz_fpga_loaded(struct RUNTIME_9060 __iomem *ctl_addr)
877{
878 return readl(&ctl_addr->init_ctrl) & (1 << 17);
879}
880
881static inline bool cyz_fpga_loaded(struct cyclades_card *card)
882{
883 return __cyz_fpga_loaded(card->ctl_addr.p9060);
884}
885
886static inline bool cyz_is_loaded(struct cyclades_card *card)
887{
888 struct FIRM_ID __iomem *fw_id = card->base_addr + ID_ADDRESS;
889
890 return (card->hw_ver == ZO_V1 || cyz_fpga_loaded(card)) &&
891 readl(&fw_id->signature) == ZFIRM_ID;
892}
893
886static inline int serial_paranoia_check(struct cyclades_port *info, 894static inline int serial_paranoia_check(struct cyclades_port *info,
887 char *name, const char *routine) 895 char *name, const char *routine)
888{ 896{
@@ -1395,19 +1403,15 @@ cyz_fetch_msg(struct cyclades_card *cinfo,
1395 unsigned long loc_doorbell; 1403 unsigned long loc_doorbell;
1396 1404
1397 firm_id = cinfo->base_addr + ID_ADDRESS; 1405 firm_id = cinfo->base_addr + ID_ADDRESS;
1398 if (!ISZLOADED(*cinfo))
1399 return -1;
1400 zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); 1406 zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
1401 board_ctrl = &zfw_ctrl->board_ctrl; 1407 board_ctrl = &zfw_ctrl->board_ctrl;
1402 1408
1403 loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *) 1409 loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
1404 (cinfo->ctl_addr))->loc_doorbell);
1405 if (loc_doorbell) { 1410 if (loc_doorbell) {
1406 *cmd = (char)(0xff & loc_doorbell); 1411 *cmd = (char)(0xff & loc_doorbell);
1407 *channel = readl(&board_ctrl->fwcmd_channel); 1412 *channel = readl(&board_ctrl->fwcmd_channel);
1408 *param = (__u32) readl(&board_ctrl->fwcmd_param); 1413 *param = (__u32) readl(&board_ctrl->fwcmd_param);
1409 cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))-> 1414 cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff);
1410 loc_doorbell, 0xffffffff);
1411 return 1; 1415 return 1;
1412 } 1416 }
1413 return 0; 1417 return 0;
@@ -1424,15 +1428,14 @@ cyz_issue_cmd(struct cyclades_card *cinfo,
1424 unsigned int index; 1428 unsigned int index;
1425 1429
1426 firm_id = cinfo->base_addr + ID_ADDRESS; 1430 firm_id = cinfo->base_addr + ID_ADDRESS;
1427 if (!ISZLOADED(*cinfo)) 1431 if (!cyz_is_loaded(cinfo))
1428 return -1; 1432 return -1;
1429 1433
1430 zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); 1434 zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
1431 board_ctrl = &zfw_ctrl->board_ctrl; 1435 board_ctrl = &zfw_ctrl->board_ctrl;
1432 1436
1433 index = 0; 1437 index = 0;
1434 pci_doorbell = 1438 pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell;
1435 &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell;
1436 while ((readl(pci_doorbell) & 0xff) != 0) { 1439 while ((readl(pci_doorbell) & 0xff) != 0) {
1437 if (index++ == 1000) 1440 if (index++ == 1000)
1438 return (int)(readl(pci_doorbell) & 0xff); 1441 return (int)(readl(pci_doorbell) & 0xff);
@@ -1624,10 +1627,8 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
1624 static struct BOARD_CTRL __iomem *board_ctrl; 1627 static struct BOARD_CTRL __iomem *board_ctrl;
1625 static struct CH_CTRL __iomem *ch_ctrl; 1628 static struct CH_CTRL __iomem *ch_ctrl;
1626 static struct BUF_CTRL __iomem *buf_ctrl; 1629 static struct BUF_CTRL __iomem *buf_ctrl;
1627 __u32 channel; 1630 __u32 channel, param, fw_ver;
1628 __u8 cmd; 1631 __u8 cmd;
1629 __u32 param;
1630 __u32 hw_ver, fw_ver;
1631 int special_count; 1632 int special_count;
1632 int delta_count; 1633 int delta_count;
1633 1634
@@ -1635,8 +1636,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
1635 zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); 1636 zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
1636 board_ctrl = &zfw_ctrl->board_ctrl; 1637 board_ctrl = &zfw_ctrl->board_ctrl;
1637 fw_ver = readl(&board_ctrl->fw_version); 1638 fw_ver = readl(&board_ctrl->fw_version);
1638 hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
1639 mail_box_0);
1640 1639
1641 while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) { 1640 while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
1642 special_count = 0; 1641 special_count = 0;
@@ -1737,15 +1736,7 @@ static irqreturn_t cyz_interrupt(int irq, void *dev_id)
1737{ 1736{
1738 struct cyclades_card *cinfo = dev_id; 1737 struct cyclades_card *cinfo = dev_id;
1739 1738
1740 if (unlikely(cinfo == NULL)) { 1739 if (unlikely(!cyz_is_loaded(cinfo))) {
1741#ifdef CY_DEBUG_INTERRUPTS
1742 printk(KERN_DEBUG "cyz_interrupt: spurious interrupt %d\n",
1743 irq);
1744#endif
1745 return IRQ_NONE; /* spurious interrupt */
1746 }
1747
1748 if (unlikely(!ISZLOADED(*cinfo))) {
1749#ifdef CY_DEBUG_INTERRUPTS 1740#ifdef CY_DEBUG_INTERRUPTS
1750 printk(KERN_DEBUG "cyz_interrupt: board not yet loaded " 1741 printk(KERN_DEBUG "cyz_interrupt: board not yet loaded "
1751 "(IRQ%d).\n", irq); 1742 "(IRQ%d).\n", irq);
@@ -1785,7 +1776,6 @@ static void cyz_poll(unsigned long arg)
1785 struct tty_struct *tty; 1776 struct tty_struct *tty;
1786 struct FIRM_ID __iomem *firm_id; 1777 struct FIRM_ID __iomem *firm_id;
1787 struct ZFW_CTRL __iomem *zfw_ctrl; 1778 struct ZFW_CTRL __iomem *zfw_ctrl;
1788 struct BOARD_CTRL __iomem *board_ctrl;
1789 struct BUF_CTRL __iomem *buf_ctrl; 1779 struct BUF_CTRL __iomem *buf_ctrl;
1790 unsigned long expires = jiffies + HZ; 1780 unsigned long expires = jiffies + HZ;
1791 unsigned int port, card; 1781 unsigned int port, card;
@@ -1793,19 +1783,17 @@ static void cyz_poll(unsigned long arg)
1793 for (card = 0; card < NR_CARDS; card++) { 1783 for (card = 0; card < NR_CARDS; card++) {
1794 cinfo = &cy_card[card]; 1784 cinfo = &cy_card[card];
1795 1785
1796 if (!IS_CYC_Z(*cinfo)) 1786 if (!cy_is_Z(cinfo))
1797 continue; 1787 continue;
1798 if (!ISZLOADED(*cinfo)) 1788 if (!cyz_is_loaded(cinfo))
1799 continue; 1789 continue;
1800 1790
1801 firm_id = cinfo->base_addr + ID_ADDRESS; 1791 firm_id = cinfo->base_addr + ID_ADDRESS;
1802 zfw_ctrl = cinfo->base_addr + 1792 zfw_ctrl = cinfo->base_addr +
1803 (readl(&firm_id->zfwctrl_addr) & 0xfffff); 1793 (readl(&firm_id->zfwctrl_addr) & 0xfffff);
1804 board_ctrl = &(zfw_ctrl->board_ctrl);
1805 1794
1806 /* Skip first polling cycle to avoid racing conditions with the FW */ 1795 /* Skip first polling cycle to avoid racing conditions with the FW */
1807 if (!cinfo->intr_enabled) { 1796 if (!cinfo->intr_enabled) {
1808 cinfo->nports = (int)readl(&board_ctrl->n_channel);
1809 cinfo->intr_enabled = 1; 1797 cinfo->intr_enabled = 1;
1810 continue; 1798 continue;
1811 } 1799 }
@@ -1874,7 +1862,7 @@ static int startup(struct cyclades_port *info)
1874 1862
1875 set_line_char(info); 1863 set_line_char(info);
1876 1864
1877 if (!IS_CYC_Z(*card)) { 1865 if (!cy_is_Z(card)) {
1878 chip = channel >> 2; 1866 chip = channel >> 2;
1879 channel &= 0x03; 1867 channel &= 0x03;
1880 index = card->bus_index; 1868 index = card->bus_index;
@@ -1931,7 +1919,7 @@ static int startup(struct cyclades_port *info)
1931 base_addr = card->base_addr; 1919 base_addr = card->base_addr;
1932 1920
1933 firm_id = base_addr + ID_ADDRESS; 1921 firm_id = base_addr + ID_ADDRESS;
1934 if (!ISZLOADED(*card)) 1922 if (!cyz_is_loaded(card))
1935 return -ENODEV; 1923 return -ENODEV;
1936 1924
1937 zfw_ctrl = card->base_addr + 1925 zfw_ctrl = card->base_addr +
@@ -2026,7 +2014,7 @@ static void start_xmit(struct cyclades_port *info)
2026 2014
2027 card = info->card; 2015 card = info->card;
2028 channel = info->line - card->first_line; 2016 channel = info->line - card->first_line;
2029 if (!IS_CYC_Z(*card)) { 2017 if (!cy_is_Z(card)) {
2030 chip = channel >> 2; 2018 chip = channel >> 2;
2031 channel &= 0x03; 2019 channel &= 0x03;
2032 index = card->bus_index; 2020 index = card->bus_index;
@@ -2070,7 +2058,7 @@ static void shutdown(struct cyclades_port *info)
2070 2058
2071 card = info->card; 2059 card = info->card;
2072 channel = info->line - card->first_line; 2060 channel = info->line - card->first_line;
2073 if (!IS_CYC_Z(*card)) { 2061 if (!cy_is_Z(card)) {
2074 chip = channel >> 2; 2062 chip = channel >> 2;
2075 channel &= 0x03; 2063 channel &= 0x03;
2076 index = card->bus_index; 2064 index = card->bus_index;
@@ -2126,7 +2114,7 @@ static void shutdown(struct cyclades_port *info)
2126#endif 2114#endif
2127 2115
2128 firm_id = base_addr + ID_ADDRESS; 2116 firm_id = base_addr + ID_ADDRESS;
2129 if (!ISZLOADED(*card)) 2117 if (!cyz_is_loaded(card))
2130 return; 2118 return;
2131 2119
2132 zfw_ctrl = card->base_addr + 2120 zfw_ctrl = card->base_addr +
@@ -2233,7 +2221,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp,
2233#endif 2221#endif
2234 info->port.blocked_open++; 2222 info->port.blocked_open++;
2235 2223
2236 if (!IS_CYC_Z(*cinfo)) { 2224 if (!cy_is_Z(cinfo)) {
2237 chip = channel >> 2; 2225 chip = channel >> 2;
2238 channel &= 0x03; 2226 channel &= 0x03;
2239 index = cinfo->bus_index; 2227 index = cinfo->bus_index;
@@ -2296,7 +2284,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp,
2296 2284
2297 base_addr = cinfo->base_addr; 2285 base_addr = cinfo->base_addr;
2298 firm_id = base_addr + ID_ADDRESS; 2286 firm_id = base_addr + ID_ADDRESS;
2299 if (!ISZLOADED(*cinfo)) { 2287 if (!cyz_is_loaded(cinfo)) {
2300 __set_current_state(TASK_RUNNING); 2288 __set_current_state(TASK_RUNNING);
2301 remove_wait_queue(&info->port.open_wait, &wait); 2289 remove_wait_queue(&info->port.open_wait, &wait);
2302 return -EINVAL; 2290 return -EINVAL;
@@ -2397,16 +2385,14 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
2397 treat it as absent from the system. This 2385 treat it as absent from the system. This
2398 will make the user pay attention. 2386 will make the user pay attention.
2399 */ 2387 */
2400 if (IS_CYC_Z(*info->card)) { 2388 if (cy_is_Z(info->card)) {
2401 struct cyclades_card *cinfo = info->card; 2389 struct cyclades_card *cinfo = info->card;
2402 struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS; 2390 struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
2403 2391
2404 if (!ISZLOADED(*cinfo)) { 2392 if (!cyz_is_loaded(cinfo)) {
2405 if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *) 2393 if (cinfo->hw_ver == ZE_V1 && cyz_fpga_loaded(cinfo) &&
2406 (cinfo->ctl_addr))->mail_box_0)) && 2394 readl(&firm_id->signature) ==
2407 Z_FPGA_CHECK(*cinfo)) && 2395 ZFIRM_HLT) {
2408 (ZFIRM_HLT == readl(
2409 &firm_id->signature))) {
2410 printk(KERN_ERR "cyc:Cyclades-Z Error: you " 2396 printk(KERN_ERR "cyc:Cyclades-Z Error: you "
2411 "need an external power supply for " 2397 "need an external power supply for "
2412 "this number of ports.\nFirmware " 2398 "this number of ports.\nFirmware "
@@ -2423,18 +2409,13 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
2423 interrupts should be enabled as soon as the first open 2409 interrupts should be enabled as soon as the first open
2424 happens to one of its ports. */ 2410 happens to one of its ports. */
2425 if (!cinfo->intr_enabled) { 2411 if (!cinfo->intr_enabled) {
2426 struct ZFW_CTRL __iomem *zfw_ctrl; 2412 u16 intr;
2427 struct BOARD_CTRL __iomem *board_ctrl;
2428
2429 zfw_ctrl = cinfo->base_addr +
2430 (readl(&firm_id->zfwctrl_addr) &
2431 0xfffff);
2432
2433 board_ctrl = &zfw_ctrl->board_ctrl;
2434 2413
2435 /* Enable interrupts on the PLX chip */ 2414 /* Enable interrupts on the PLX chip */
2436 cy_writew(cinfo->ctl_addr + 0x68, 2415 intr = readw(&cinfo->ctl_addr.p9060->
2437 readw(cinfo->ctl_addr + 0x68) | 0x0900); 2416 intr_ctrl_stat) | 0x0900;
2417 cy_writew(&cinfo->ctl_addr.p9060->
2418 intr_ctrl_stat, intr);
2438 /* Enable interrupts on the FW */ 2419 /* Enable interrupts on the FW */
2439 retval = cyz_issue_cmd(cinfo, 0, 2420 retval = cyz_issue_cmd(cinfo, 0,
2440 C_CM_IRQ_ENBL, 0L); 2421 C_CM_IRQ_ENBL, 0L);
@@ -2442,8 +2423,6 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
2442 printk(KERN_ERR "cyc:IRQ enable retval " 2423 printk(KERN_ERR "cyc:IRQ enable retval "
2443 "was %x\n", retval); 2424 "was %x\n", retval);
2444 } 2425 }
2445 cinfo->nports =
2446 (int)readl(&board_ctrl->n_channel);
2447 cinfo->intr_enabled = 1; 2426 cinfo->intr_enabled = 1;
2448 } 2427 }
2449 } 2428 }
@@ -2556,7 +2535,7 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout)
2556#endif 2535#endif
2557 card = info->card; 2536 card = info->card;
2558 channel = (info->line) - (card->first_line); 2537 channel = (info->line) - (card->first_line);
2559 if (!IS_CYC_Z(*card)) { 2538 if (!cy_is_Z(card)) {
2560 chip = channel >> 2; 2539 chip = channel >> 2;
2561 channel &= 0x03; 2540 channel &= 0x03;
2562 index = card->bus_index; 2541 index = card->bus_index;
@@ -2601,7 +2580,7 @@ static void cy_flush_buffer(struct tty_struct *tty)
2601 info->xmit_cnt = info->xmit_head = info->xmit_tail = 0; 2580 info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
2602 spin_unlock_irqrestore(&card->card_lock, flags); 2581 spin_unlock_irqrestore(&card->card_lock, flags);
2603 2582
2604 if (IS_CYC_Z(*card)) { /* If it is a Z card, flush the on-board 2583 if (cy_is_Z(card)) { /* If it is a Z card, flush the on-board
2605 buffers as well */ 2584 buffers as well */
2606 spin_lock_irqsave(&card->card_lock, flags); 2585 spin_lock_irqsave(&card->card_lock, flags);
2607 retval = cyz_issue_cmd(card, channel, C_CM_FLUSH_TX, 0L); 2586 retval = cyz_issue_cmd(card, channel, C_CM_FLUSH_TX, 0L);
@@ -2682,7 +2661,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
2682 2661
2683 spin_lock_irqsave(&card->card_lock, flags); 2662 spin_lock_irqsave(&card->card_lock, flags);
2684 2663
2685 if (!IS_CYC_Z(*card)) { 2664 if (!cy_is_Z(card)) {
2686 int channel = info->line - card->first_line; 2665 int channel = info->line - card->first_line;
2687 int index = card->bus_index; 2666 int index = card->bus_index;
2688 void __iomem *base_addr = card->base_addr + 2667 void __iomem *base_addr = card->base_addr +
@@ -2902,7 +2881,7 @@ static int cy_chars_in_buffer(struct tty_struct *tty)
2902 channel = (info->line) - (card->first_line); 2881 channel = (info->line) - (card->first_line);
2903 2882
2904#ifdef Z_EXT_CHARS_IN_BUFFER 2883#ifdef Z_EXT_CHARS_IN_BUFFER
2905 if (!IS_CYC_Z(cy_card[card])) { 2884 if (!cy_is_Z(card)) {
2906#endif /* Z_EXT_CHARS_IN_BUFFER */ 2885#endif /* Z_EXT_CHARS_IN_BUFFER */
2907#ifdef CY_DEBUG_IO 2886#ifdef CY_DEBUG_IO
2908 printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n", 2887 printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n",
@@ -2984,7 +2963,6 @@ static void set_line_char(struct cyclades_port *info)
2984 void __iomem *base_addr; 2963 void __iomem *base_addr;
2985 int chip, channel, index; 2964 int chip, channel, index;
2986 unsigned cflag, iflag; 2965 unsigned cflag, iflag;
2987 unsigned short chip_number;
2988 int baud, baud_rate = 0; 2966 int baud, baud_rate = 0;
2989 int i; 2967 int i;
2990 2968
@@ -3013,9 +2991,8 @@ static void set_line_char(struct cyclades_port *info)
3013 2991
3014 card = info->card; 2992 card = info->card;
3015 channel = info->line - card->first_line; 2993 channel = info->line - card->first_line;
3016 chip_number = channel / 4;
3017 2994
3018 if (!IS_CYC_Z(*card)) { 2995 if (!cy_is_Z(card)) {
3019 2996
3020 index = card->bus_index; 2997 index = card->bus_index;
3021 2998
@@ -3233,21 +3210,17 @@ static void set_line_char(struct cyclades_port *info)
3233 } else { 3210 } else {
3234 struct FIRM_ID __iomem *firm_id; 3211 struct FIRM_ID __iomem *firm_id;
3235 struct ZFW_CTRL __iomem *zfw_ctrl; 3212 struct ZFW_CTRL __iomem *zfw_ctrl;
3236 struct BOARD_CTRL __iomem *board_ctrl;
3237 struct CH_CTRL __iomem *ch_ctrl; 3213 struct CH_CTRL __iomem *ch_ctrl;
3238 struct BUF_CTRL __iomem *buf_ctrl;
3239 __u32 sw_flow; 3214 __u32 sw_flow;
3240 int retval; 3215 int retval;
3241 3216
3242 firm_id = card->base_addr + ID_ADDRESS; 3217 firm_id = card->base_addr + ID_ADDRESS;
3243 if (!ISZLOADED(*card)) 3218 if (!cyz_is_loaded(card))
3244 return; 3219 return;
3245 3220
3246 zfw_ctrl = card->base_addr + 3221 zfw_ctrl = card->base_addr +
3247 (readl(&firm_id->zfwctrl_addr) & 0xfffff); 3222 (readl(&firm_id->zfwctrl_addr) & 0xfffff);
3248 board_ctrl = &zfw_ctrl->board_ctrl;
3249 ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]); 3223 ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
3250 buf_ctrl = &zfw_ctrl->buf_ctrl[channel];
3251 3224
3252 /* baud rate */ 3225 /* baud rate */
3253 baud = tty_get_baud_rate(info->port.tty); 3226 baud = tty_get_baud_rate(info->port.tty);
@@ -3457,7 +3430,7 @@ static int get_lsr_info(struct cyclades_port *info, unsigned int __user *value)
3457 3430
3458 card = info->card; 3431 card = info->card;
3459 channel = (info->line) - (card->first_line); 3432 channel = (info->line) - (card->first_line);
3460 if (!IS_CYC_Z(*card)) { 3433 if (!cy_is_Z(card)) {
3461 chip = channel >> 2; 3434 chip = channel >> 2;
3462 channel &= 0x03; 3435 channel &= 0x03;
3463 index = card->bus_index; 3436 index = card->bus_index;
@@ -3497,7 +3470,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
3497 3470
3498 card = info->card; 3471 card = info->card;
3499 channel = info->line - card->first_line; 3472 channel = info->line - card->first_line;
3500 if (!IS_CYC_Z(*card)) { 3473 if (!cy_is_Z(card)) {
3501 chip = channel >> 2; 3474 chip = channel >> 2;
3502 channel &= 0x03; 3475 channel &= 0x03;
3503 index = card->bus_index; 3476 index = card->bus_index;
@@ -3523,7 +3496,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
3523 } else { 3496 } else {
3524 base_addr = card->base_addr; 3497 base_addr = card->base_addr;
3525 firm_id = card->base_addr + ID_ADDRESS; 3498 firm_id = card->base_addr + ID_ADDRESS;
3526 if (ISZLOADED(*card)) { 3499 if (cyz_is_loaded(card)) {
3527 zfw_ctrl = card->base_addr + 3500 zfw_ctrl = card->base_addr +
3528 (readl(&firm_id->zfwctrl_addr) & 0xfffff); 3501 (readl(&firm_id->zfwctrl_addr) & 0xfffff);
3529 board_ctrl = &zfw_ctrl->board_ctrl; 3502 board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3566,7 +3539,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
3566 3539
3567 card = info->card; 3540 card = info->card;
3568 channel = (info->line) - (card->first_line); 3541 channel = (info->line) - (card->first_line);
3569 if (!IS_CYC_Z(*card)) { 3542 if (!cy_is_Z(card)) {
3570 chip = channel >> 2; 3543 chip = channel >> 2;
3571 channel &= 0x03; 3544 channel &= 0x03;
3572 index = card->bus_index; 3545 index = card->bus_index;
@@ -3641,7 +3614,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
3641 base_addr = card->base_addr; 3614 base_addr = card->base_addr;
3642 3615
3643 firm_id = card->base_addr + ID_ADDRESS; 3616 firm_id = card->base_addr + ID_ADDRESS;
3644 if (ISZLOADED(*card)) { 3617 if (cyz_is_loaded(card)) {
3645 zfw_ctrl = card->base_addr + 3618 zfw_ctrl = card->base_addr +
3646 (readl(&firm_id->zfwctrl_addr) & 0xfffff); 3619 (readl(&firm_id->zfwctrl_addr) & 0xfffff);
3647 board_ctrl = &zfw_ctrl->board_ctrl; 3620 board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3713,7 +3686,7 @@ static int cy_break(struct tty_struct *tty, int break_state)
3713 card = info->card; 3686 card = info->card;
3714 3687
3715 spin_lock_irqsave(&card->card_lock, flags); 3688 spin_lock_irqsave(&card->card_lock, flags);
3716 if (!IS_CYC_Z(*card)) { 3689 if (!cy_is_Z(card)) {
3717 /* Let the transmit ISR take care of this (since it 3690 /* Let the transmit ISR take care of this (since it
3718 requires stuffing characters into the output stream). 3691 requires stuffing characters into the output stream).
3719 */ 3692 */
@@ -3782,7 +3755,7 @@ static int set_threshold(struct cyclades_port *info, unsigned long value)
3782 3755
3783 card = info->card; 3756 card = info->card;
3784 channel = info->line - card->first_line; 3757 channel = info->line - card->first_line;
3785 if (!IS_CYC_Z(*card)) { 3758 if (!cy_is_Z(card)) {
3786 chip = channel >> 2; 3759 chip = channel >> 2;
3787 channel &= 0x03; 3760 channel &= 0x03;
3788 index = card->bus_index; 3761 index = card->bus_index;
@@ -3810,7 +3783,7 @@ static int get_threshold(struct cyclades_port *info,
3810 3783
3811 card = info->card; 3784 card = info->card;
3812 channel = info->line - card->first_line; 3785 channel = info->line - card->first_line;
3813 if (!IS_CYC_Z(*card)) { 3786 if (!cy_is_Z(card)) {
3814 chip = channel >> 2; 3787 chip = channel >> 2;
3815 channel &= 0x03; 3788 channel &= 0x03;
3816 index = card->bus_index; 3789 index = card->bus_index;
@@ -3844,7 +3817,7 @@ static int set_timeout(struct cyclades_port *info, unsigned long value)
3844 3817
3845 card = info->card; 3818 card = info->card;
3846 channel = info->line - card->first_line; 3819 channel = info->line - card->first_line;
3847 if (!IS_CYC_Z(*card)) { 3820 if (!cy_is_Z(card)) {
3848 chip = channel >> 2; 3821 chip = channel >> 2;
3849 channel &= 0x03; 3822 channel &= 0x03;
3850 index = card->bus_index; 3823 index = card->bus_index;
@@ -3867,7 +3840,7 @@ static int get_timeout(struct cyclades_port *info,
3867 3840
3868 card = info->card; 3841 card = info->card;
3869 channel = info->line - card->first_line; 3842 channel = info->line - card->first_line;
3870 if (!IS_CYC_Z(*card)) { 3843 if (!cy_is_Z(card)) {
3871 chip = channel >> 2; 3844 chip = channel >> 2;
3872 channel &= 0x03; 3845 channel &= 0x03;
3873 index = card->bus_index; 3846 index = card->bus_index;
@@ -4121,7 +4094,7 @@ static void cy_send_xchar(struct tty_struct *tty, char ch)
4121 card = info->card; 4094 card = info->card;
4122 channel = info->line - card->first_line; 4095 channel = info->line - card->first_line;
4123 4096
4124 if (IS_CYC_Z(*card)) { 4097 if (cy_is_Z(card)) {
4125 if (ch == STOP_CHAR(tty)) 4098 if (ch == STOP_CHAR(tty))
4126 cyz_issue_cmd(card, channel, C_CM_SENDXOFF, 0L); 4099 cyz_issue_cmd(card, channel, C_CM_SENDXOFF, 0L);
4127 else if (ch == START_CHAR(tty)) 4100 else if (ch == START_CHAR(tty))
@@ -4154,7 +4127,7 @@ static void cy_throttle(struct tty_struct *tty)
4154 card = info->card; 4127 card = info->card;
4155 4128
4156 if (I_IXOFF(tty)) { 4129 if (I_IXOFF(tty)) {
4157 if (!IS_CYC_Z(*card)) 4130 if (!cy_is_Z(card))
4158 cy_send_xchar(tty, STOP_CHAR(tty)); 4131 cy_send_xchar(tty, STOP_CHAR(tty));
4159 else 4132 else
4160 info->throttle = 1; 4133 info->throttle = 1;
@@ -4162,7 +4135,7 @@ static void cy_throttle(struct tty_struct *tty)
4162 4135
4163 if (tty->termios->c_cflag & CRTSCTS) { 4136 if (tty->termios->c_cflag & CRTSCTS) {
4164 channel = info->line - card->first_line; 4137 channel = info->line - card->first_line;
4165 if (!IS_CYC_Z(*card)) { 4138 if (!cy_is_Z(card)) {
4166 chip = channel >> 2; 4139 chip = channel >> 2;
4167 channel &= 0x03; 4140 channel &= 0x03;
4168 index = card->bus_index; 4141 index = card->bus_index;
@@ -4219,7 +4192,7 @@ static void cy_unthrottle(struct tty_struct *tty)
4219 if (tty->termios->c_cflag & CRTSCTS) { 4192 if (tty->termios->c_cflag & CRTSCTS) {
4220 card = info->card; 4193 card = info->card;
4221 channel = info->line - card->first_line; 4194 channel = info->line - card->first_line;
4222 if (!IS_CYC_Z(*card)) { 4195 if (!cy_is_Z(card)) {
4223 chip = channel >> 2; 4196 chip = channel >> 2;
4224 channel &= 0x03; 4197 channel &= 0x03;
4225 index = card->bus_index; 4198 index = card->bus_index;
@@ -4263,7 +4236,7 @@ static void cy_stop(struct tty_struct *tty)
4263 4236
4264 cinfo = info->card; 4237 cinfo = info->card;
4265 channel = info->line - cinfo->first_line; 4238 channel = info->line - cinfo->first_line;
4266 if (!IS_CYC_Z(*cinfo)) { 4239 if (!cy_is_Z(cinfo)) {
4267 index = cinfo->bus_index; 4240 index = cinfo->bus_index;
4268 chip = channel >> 2; 4241 chip = channel >> 2;
4269 channel &= 0x03; 4242 channel &= 0x03;
@@ -4296,7 +4269,7 @@ static void cy_start(struct tty_struct *tty)
4296 cinfo = info->card; 4269 cinfo = info->card;
4297 channel = info->line - cinfo->first_line; 4270 channel = info->line - cinfo->first_line;
4298 index = cinfo->bus_index; 4271 index = cinfo->bus_index;
4299 if (!IS_CYC_Z(*cinfo)) { 4272 if (!cy_is_Z(cinfo)) {
4300 chip = channel >> 2; 4273 chip = channel >> 2;
4301 channel &= 0x03; 4274 channel &= 0x03;
4302 base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index); 4275 base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
@@ -4347,33 +4320,20 @@ static void cy_hangup(struct tty_struct *tty)
4347static int __devinit cy_init_card(struct cyclades_card *cinfo) 4320static int __devinit cy_init_card(struct cyclades_card *cinfo)
4348{ 4321{
4349 struct cyclades_port *info; 4322 struct cyclades_port *info;
4350 u32 uninitialized_var(mailbox); 4323 unsigned int port;
4351 unsigned int nports, port;
4352 unsigned short chip_number; 4324 unsigned short chip_number;
4353 int uninitialized_var(index);
4354 4325
4355 spin_lock_init(&cinfo->card_lock); 4326 spin_lock_init(&cinfo->card_lock);
4327 cinfo->intr_enabled = 0;
4356 4328
4357 if (IS_CYC_Z(*cinfo)) { /* Cyclades-Z */ 4329 cinfo->ports = kcalloc(cinfo->nports, sizeof(*cinfo->ports),
4358 mailbox = readl(&((struct RUNTIME_9060 __iomem *) 4330 GFP_KERNEL);
4359 cinfo->ctl_addr)->mail_box_0);
4360 nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8;
4361 cinfo->intr_enabled = 0;
4362 cinfo->nports = 0; /* Will be correctly set later, after
4363 Z FW is loaded */
4364 } else {
4365 index = cinfo->bus_index;
4366 nports = cinfo->nports = CyPORTS_PER_CHIP * cinfo->num_chips;
4367 }
4368
4369 cinfo->ports = kzalloc(sizeof(*cinfo->ports) * nports, GFP_KERNEL);
4370 if (cinfo->ports == NULL) { 4331 if (cinfo->ports == NULL) {
4371 printk(KERN_ERR "Cyclades: cannot allocate ports\n"); 4332 printk(KERN_ERR "Cyclades: cannot allocate ports\n");
4372 cinfo->nports = 0;
4373 return -ENOMEM; 4333 return -ENOMEM;
4374 } 4334 }
4375 4335
4376 for (port = cinfo->first_line; port < cinfo->first_line + nports; 4336 for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports;
4377 port++) { 4337 port++) {
4378 info = &cinfo->ports[port - cinfo->first_line]; 4338 info = &cinfo->ports[port - cinfo->first_line];
4379 tty_port_init(&info->port); 4339 tty_port_init(&info->port);
@@ -4387,9 +4347,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
4387 init_completion(&info->shutdown_wait); 4347 init_completion(&info->shutdown_wait);
4388 init_waitqueue_head(&info->delta_msr_wait); 4348 init_waitqueue_head(&info->delta_msr_wait);
4389 4349
4390 if (IS_CYC_Z(*cinfo)) { 4350 if (cy_is_Z(cinfo)) {
4391 info->type = PORT_STARTECH; 4351 info->type = PORT_STARTECH;
4392 if (mailbox == ZO_V1) 4352 if (cinfo->hw_ver == ZO_V1)
4393 info->xmit_fifo_size = CYZ_FIFO_SIZE; 4353 info->xmit_fifo_size = CYZ_FIFO_SIZE;
4394 else 4354 else
4395 info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE; 4355 info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE;
@@ -4398,6 +4358,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
4398 cyz_rx_restart, (unsigned long)info); 4358 cyz_rx_restart, (unsigned long)info);
4399#endif 4359#endif
4400 } else { 4360 } else {
4361 int index = cinfo->bus_index;
4401 info->type = PORT_CIRRUS; 4362 info->type = PORT_CIRRUS;
4402 info->xmit_fifo_size = CyMAX_CHAR_FIFO; 4363 info->xmit_fifo_size = CyMAX_CHAR_FIFO;
4403 info->cor1 = CyPARITY_NONE | Cy_1_STOP | Cy_8_BITS; 4364 info->cor1 = CyPARITY_NONE | Cy_1_STOP | Cy_8_BITS;
@@ -4430,7 +4391,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
4430 } 4391 }
4431 4392
4432#ifndef CONFIG_CYZ_INTR 4393#ifndef CONFIG_CYZ_INTR
4433 if (IS_CYC_Z(*cinfo) && !timer_pending(&cyz_timerlist)) { 4394 if (cy_is_Z(cinfo) && !timer_pending(&cyz_timerlist)) {
4434 mod_timer(&cyz_timerlist, jiffies + 1); 4395 mod_timer(&cyz_timerlist, jiffies + 1);
4435#ifdef CY_PCI_DEBUG 4396#ifdef CY_PCI_DEBUG
4436 printk(KERN_DEBUG "Cyclades-Z polling initialized\n"); 4397 printk(KERN_DEBUG "Cyclades-Z polling initialized\n");
@@ -4621,11 +4582,12 @@ static int __init cy_detect_isa(void)
4621 4582
4622 /* set cy_card */ 4583 /* set cy_card */
4623 cy_card[j].base_addr = cy_isa_address; 4584 cy_card[j].base_addr = cy_isa_address;
4624 cy_card[j].ctl_addr = NULL; 4585 cy_card[j].ctl_addr.p9050 = NULL;
4625 cy_card[j].irq = (int)cy_isa_irq; 4586 cy_card[j].irq = (int)cy_isa_irq;
4626 cy_card[j].bus_index = 0; 4587 cy_card[j].bus_index = 0;
4627 cy_card[j].first_line = cy_next_channel; 4588 cy_card[j].first_line = cy_next_channel;
4628 cy_card[j].num_chips = cy_isa_nchan / 4; 4589 cy_card[j].num_chips = cy_isa_nchan / CyPORTS_PER_CHIP;
4590 cy_card[j].nports = cy_isa_nchan;
4629 if (cy_init_card(&cy_card[j])) { 4591 if (cy_init_card(&cy_card[j])) {
4630 cy_card[j].base_addr = NULL; 4592 cy_card[j].base_addr = NULL;
4631 free_irq(cy_isa_irq, &cy_card[j]); 4593 free_irq(cy_isa_irq, &cy_card[j]);
@@ -4781,7 +4743,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
4781 struct CUSTOM_REG __iomem *cust = base_addr; 4743 struct CUSTOM_REG __iomem *cust = base_addr;
4782 struct ZFW_CTRL __iomem *pt_zfwctrl; 4744 struct ZFW_CTRL __iomem *pt_zfwctrl;
4783 void __iomem *tmp; 4745 void __iomem *tmp;
4784 u32 mailbox, status; 4746 u32 mailbox, status, nchan;
4785 unsigned int i; 4747 unsigned int i;
4786 int retval; 4748 int retval;
4787 4749
@@ -4793,7 +4755,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
4793 4755
4794 /* Check whether the firmware is already loaded and running. If 4756 /* Check whether the firmware is already loaded and running. If
4795 positive, skip this board */ 4757 positive, skip this board */
4796 if (Z_FPGA_LOADED(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) { 4758 if (__cyz_fpga_loaded(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
4797 u32 cntval = readl(base_addr + 0x190); 4759 u32 cntval = readl(base_addr + 0x190);
4798 4760
4799 udelay(100); 4761 udelay(100);
@@ -4812,7 +4774,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
4812 4774
4813 mailbox = readl(&ctl_addr->mail_box_0); 4775 mailbox = readl(&ctl_addr->mail_box_0);
4814 4776
4815 if (mailbox == 0 || Z_FPGA_LOADED(ctl_addr)) { 4777 if (mailbox == 0 || __cyz_fpga_loaded(ctl_addr)) {
4816 /* stops CPU and set window to beginning of RAM */ 4778 /* stops CPU and set window to beginning of RAM */
4817 cy_writel(&ctl_addr->loc_addr_base, WIN_CREG); 4779 cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
4818 cy_writel(&cust->cpu_stop, 0); 4780 cy_writel(&cust->cpu_stop, 0);
@@ -4828,7 +4790,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
4828 base_addr); 4790 base_addr);
4829 if (retval) 4791 if (retval)
4830 goto err_rel; 4792 goto err_rel;
4831 if (!Z_FPGA_LOADED(ctl_addr)) { 4793 if (!__cyz_fpga_loaded(ctl_addr)) {
4832 dev_err(&pdev->dev, "fw upload successful, but fw is " 4794 dev_err(&pdev->dev, "fw upload successful, but fw is "
4833 "not loaded\n"); 4795 "not loaded\n");
4834 goto err_rel; 4796 goto err_rel;
@@ -4887,7 +4849,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
4887 "system before loading the new FW to the " 4849 "system before loading the new FW to the "
4888 "Cyclades-Z.\n"); 4850 "Cyclades-Z.\n");
4889 4851
4890 if (Z_FPGA_LOADED(ctl_addr)) 4852 if (__cyz_fpga_loaded(ctl_addr))
4891 plx_init(pdev, irq, ctl_addr); 4853 plx_init(pdev, irq, ctl_addr);
4892 4854
4893 retval = -EIO; 4855 retval = -EIO;
@@ -4902,16 +4864,16 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
4902 base_addr + ID_ADDRESS, readl(&fid->zfwctrl_addr), 4864 base_addr + ID_ADDRESS, readl(&fid->zfwctrl_addr),
4903 base_addr + readl(&fid->zfwctrl_addr)); 4865 base_addr + readl(&fid->zfwctrl_addr));
4904 4866
4867 nchan = readl(&pt_zfwctrl->board_ctrl.n_channel);
4905 dev_info(&pdev->dev, "Cyclades-Z FW loaded: version = %x, ports = %u\n", 4868 dev_info(&pdev->dev, "Cyclades-Z FW loaded: version = %x, ports = %u\n",
4906 readl(&pt_zfwctrl->board_ctrl.fw_version), 4869 readl(&pt_zfwctrl->board_ctrl.fw_version), nchan);
4907 readl(&pt_zfwctrl->board_ctrl.n_channel));
4908 4870
4909 if (readl(&pt_zfwctrl->board_ctrl.n_channel) == 0) { 4871 if (nchan == 0) {
4910 dev_warn(&pdev->dev, "no Cyclades-Z ports were found. Please " 4872 dev_warn(&pdev->dev, "no Cyclades-Z ports were found. Please "
4911 "check the connection between the Z host card and the " 4873 "check the connection between the Z host card and the "
4912 "serial expanders.\n"); 4874 "serial expanders.\n");
4913 4875
4914 if (Z_FPGA_LOADED(ctl_addr)) 4876 if (__cyz_fpga_loaded(ctl_addr))
4915 plx_init(pdev, irq, ctl_addr); 4877 plx_init(pdev, irq, ctl_addr);
4916 4878
4917 dev_info(&pdev->dev, "Null number of ports detected. Board " 4879 dev_info(&pdev->dev, "Null number of ports detected. Board "
@@ -4932,9 +4894,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
4932 cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) | 4894 cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) |
4933 0x00030800UL); 4895 0x00030800UL);
4934 4896
4935 plx_init(pdev, irq, ctl_addr); 4897 return nchan;
4936
4937 return 0;
4938err_rel: 4898err_rel:
4939 release_firmware(fw); 4899 release_firmware(fw);
4940err: 4900err:
@@ -4946,7 +4906,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
4946{ 4906{
4947 void __iomem *addr0 = NULL, *addr2 = NULL; 4907 void __iomem *addr0 = NULL, *addr2 = NULL;
4948 char *card_name = NULL; 4908 char *card_name = NULL;
4949 u32 mailbox; 4909 u32 uninitialized_var(mailbox);
4950 unsigned int device_id, nchan = 0, card_no, i; 4910 unsigned int device_id, nchan = 0, card_no, i;
4951 unsigned char plx_ver; 4911 unsigned char plx_ver;
4952 int retval, irq; 4912 int retval, irq;
@@ -5023,11 +4983,12 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
5023 } 4983 }
5024 4984
5025 /* Disable interrupts on the PLX before resetting it */ 4985 /* Disable interrupts on the PLX before resetting it */
5026 cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900); 4986 cy_writew(&ctl_addr->intr_ctrl_stat,
4987 readw(&ctl_addr->intr_ctrl_stat) & ~0x0900);
5027 4988
5028 plx_init(pdev, irq, addr0); 4989 plx_init(pdev, irq, addr0);
5029 4990
5030 mailbox = (u32)readl(&ctl_addr->mail_box_0); 4991 mailbox = readl(&ctl_addr->mail_box_0);
5031 4992
5032 addr2 = ioremap_nocache(pci_resource_start(pdev, 2), 4993 addr2 = ioremap_nocache(pci_resource_start(pdev, 2),
5033 mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin); 4994 mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin);
@@ -5038,12 +4999,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
5038 4999
5039 if (mailbox == ZE_V1) { 5000 if (mailbox == ZE_V1) {
5040 card_name = "Cyclades-Ze"; 5001 card_name = "Cyclades-Ze";
5041
5042 readl(&ctl_addr->mail_box_0);
5043 nchan = ZE_V1_NPORTS;
5044 } else { 5002 } else {
5045 card_name = "Cyclades-8Zo"; 5003 card_name = "Cyclades-8Zo";
5046
5047#ifdef CY_PCI_DEBUG 5004#ifdef CY_PCI_DEBUG
5048 if (mailbox == ZO_V1) { 5005 if (mailbox == ZO_V1) {
5049 cy_writel(&ctl_addr->loc_addr_base, WIN_CREG); 5006 cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
@@ -5065,15 +5022,12 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
5065 */ 5022 */
5066 if ((mailbox == ZO_V1) || (mailbox == ZO_V2)) 5023 if ((mailbox == ZO_V1) || (mailbox == ZO_V2))
5067 cy_writel(addr2 + ID_ADDRESS, 0L); 5024 cy_writel(addr2 + ID_ADDRESS, 0L);
5068
5069 retval = cyz_load_fw(pdev, addr2, addr0, irq);
5070 if (retval)
5071 goto err_unmap;
5072 /* This must be a Cyclades-8Zo/PCI. The extendable
5073 version will have a different device_id and will
5074 be allocated its maximum number of ports. */
5075 nchan = 8;
5076 } 5025 }
5026
5027 retval = cyz_load_fw(pdev, addr2, addr0, irq);
5028 if (retval <= 0)
5029 goto err_unmap;
5030 nchan = retval;
5077 } 5031 }
5078 5032
5079 if ((cy_next_channel + nchan) > NR_PORTS) { 5033 if ((cy_next_channel + nchan) > NR_PORTS) {
@@ -5103,8 +5057,10 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
5103 dev_err(&pdev->dev, "could not allocate IRQ\n"); 5057 dev_err(&pdev->dev, "could not allocate IRQ\n");
5104 goto err_unmap; 5058 goto err_unmap;
5105 } 5059 }
5106 cy_card[card_no].num_chips = nchan / 4; 5060 cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP;
5107 } else { 5061 } else {
5062 cy_card[card_no].hw_ver = mailbox;
5063 cy_card[card_no].num_chips = (unsigned int)-1;
5108#ifdef CONFIG_CYZ_INTR 5064#ifdef CONFIG_CYZ_INTR
5109 /* allocate IRQ only if board has an IRQ */ 5065 /* allocate IRQ only if board has an IRQ */
5110 if (irq != 0 && irq != 255) { 5066 if (irq != 0 && irq != 255) {
@@ -5117,15 +5073,15 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
5117 } 5073 }
5118 } 5074 }
5119#endif /* CONFIG_CYZ_INTR */ 5075#endif /* CONFIG_CYZ_INTR */
5120 cy_card[card_no].num_chips = (unsigned int)-1;
5121 } 5076 }
5122 5077
5123 /* set cy_card */ 5078 /* set cy_card */
5124 cy_card[card_no].base_addr = addr2; 5079 cy_card[card_no].base_addr = addr2;
5125 cy_card[card_no].ctl_addr = addr0; 5080 cy_card[card_no].ctl_addr.p9050 = addr0;
5126 cy_card[card_no].irq = irq; 5081 cy_card[card_no].irq = irq;
5127 cy_card[card_no].bus_index = 1; 5082 cy_card[card_no].bus_index = 1;
5128 cy_card[card_no].first_line = cy_next_channel; 5083 cy_card[card_no].first_line = cy_next_channel;
5084 cy_card[card_no].nports = nchan;
5129 retval = cy_init_card(&cy_card[card_no]); 5085 retval = cy_init_card(&cy_card[card_no]);
5130 if (retval) 5086 if (retval)
5131 goto err_null; 5087 goto err_null;
@@ -5138,17 +5094,20 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
5138 plx_ver = readb(addr2 + CyPLX_VER) & 0x0f; 5094 plx_ver = readb(addr2 + CyPLX_VER) & 0x0f;
5139 switch (plx_ver) { 5095 switch (plx_ver) {
5140 case PLX_9050: 5096 case PLX_9050:
5141
5142 cy_writeb(addr0 + 0x4c, 0x43); 5097 cy_writeb(addr0 + 0x4c, 0x43);
5143 break; 5098 break;
5144 5099
5145 case PLX_9060: 5100 case PLX_9060:
5146 case PLX_9080: 5101 case PLX_9080:
5147 default: /* Old boards, use PLX_9060 */ 5102 default: /* Old boards, use PLX_9060 */
5148 plx_init(pdev, irq, addr0); 5103 {
5149 cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900); 5104 struct RUNTIME_9060 __iomem *ctl_addr = addr0;
5105 plx_init(pdev, irq, ctl_addr);
5106 cy_writew(&ctl_addr->intr_ctrl_stat,
5107 readw(&ctl_addr->intr_ctrl_stat) | 0x0900);
5150 break; 5108 break;
5151 } 5109 }
5110 }
5152 } 5111 }
5153 5112
5154 dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from " 5113 dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from "
@@ -5179,22 +5138,23 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev)
5179 unsigned int i; 5138 unsigned int i;
5180 5139
5181 /* non-Z with old PLX */ 5140 /* non-Z with old PLX */
5182 if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) == 5141 if (!cy_is_Z(cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
5183 PLX_9050) 5142 PLX_9050)
5184 cy_writeb(cinfo->ctl_addr + 0x4c, 0); 5143 cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0);
5185 else 5144 else
5186#ifndef CONFIG_CYZ_INTR 5145#ifndef CONFIG_CYZ_INTR
5187 if (!IS_CYC_Z(*cinfo)) 5146 if (!cy_is_Z(cinfo))
5188#endif 5147#endif
5189 cy_writew(cinfo->ctl_addr + 0x68, 5148 cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat,
5190 readw(cinfo->ctl_addr + 0x68) & ~0x0900); 5149 readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) &
5150 ~0x0900);
5191 5151
5192 iounmap(cinfo->base_addr); 5152 iounmap(cinfo->base_addr);
5193 if (cinfo->ctl_addr) 5153 if (cinfo->ctl_addr.p9050)
5194 iounmap(cinfo->ctl_addr); 5154 iounmap(cinfo->ctl_addr.p9050);
5195 if (cinfo->irq 5155 if (cinfo->irq
5196#ifndef CONFIG_CYZ_INTR 5156#ifndef CONFIG_CYZ_INTR
5197 && !IS_CYC_Z(*cinfo) 5157 && !cy_is_Z(cinfo)
5198#endif /* CONFIG_CYZ_INTR */ 5158#endif /* CONFIG_CYZ_INTR */
5199 ) 5159 )
5200 free_irq(cinfo->irq, cinfo); 5160 free_irq(cinfo->irq, cinfo);
@@ -5240,7 +5200,7 @@ static int cyclades_proc_show(struct seq_file *m, void *v)
5240 (cur_jifs - info->idle_stats.recv_idle)/ 5200 (cur_jifs - info->idle_stats.recv_idle)/
5241 HZ, info->idle_stats.overruns, 5201 HZ, info->idle_stats.overruns,
5242 /* FIXME: double check locking */ 5202 /* FIXME: double check locking */
5243 (long)info->port.tty->ldisc.ops->num); 5203 (long)info->port.tty->ldisc->ops->num);
5244 else 5204 else
5245 seq_printf(m, "%3d %8lu %10lu %8lu " 5205 seq_printf(m, "%3d %8lu %10lu %8lu "
5246 "%10lu %8lu %9lu %6ld\n", 5206 "%10lu %8lu %9lu %6ld\n",
@@ -5386,11 +5346,11 @@ static void __exit cy_cleanup_module(void)
5386 /* clear interrupt */ 5346 /* clear interrupt */
5387 cy_writeb(card->base_addr + Cy_ClrIntr, 0); 5347 cy_writeb(card->base_addr + Cy_ClrIntr, 0);
5388 iounmap(card->base_addr); 5348 iounmap(card->base_addr);
5389 if (card->ctl_addr) 5349 if (card->ctl_addr.p9050)
5390 iounmap(card->ctl_addr); 5350 iounmap(card->ctl_addr.p9050);
5391 if (card->irq 5351 if (card->irq
5392#ifndef CONFIG_CYZ_INTR 5352#ifndef CONFIG_CYZ_INTR
5393 && !IS_CYC_Z(*card) 5353 && !cy_is_Z(card)
5394#endif /* CONFIG_CYZ_INTR */ 5354#endif /* CONFIG_CYZ_INTR */
5395 ) 5355 )
5396 free_irq(card->irq, card); 5356 free_irq(card->irq, card);
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index af7c13ca9493..abef1f7d84fe 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -745,7 +745,7 @@ static int epca_carrier_raised(struct tty_port *port)
745 return 0; 745 return 0;
746} 746}
747 747
748static void epca_raise_dtr_rts(struct tty_port *port) 748static void epca_dtr_rts(struct tty_port *port, int onoff)
749{ 749{
750} 750}
751 751
@@ -925,7 +925,7 @@ static const struct tty_operations pc_ops = {
925 925
926static const struct tty_port_operations epca_port_ops = { 926static const struct tty_port_operations epca_port_ops = {
927 .carrier_raised = epca_carrier_raised, 927 .carrier_raised = epca_carrier_raised,
928 .raise_dtr_rts = epca_raise_dtr_rts, 928 .dtr_rts = epca_dtr_rts,
929}; 929};
930 930
931static int info_open(struct tty_struct *tty, struct file *filp) 931static int info_open(struct tty_struct *tty, struct file *filp)
@@ -1518,7 +1518,7 @@ static void doevent(int crd)
1518 if (event & MODEMCHG_IND) { 1518 if (event & MODEMCHG_IND) {
1519 /* A modem signal change has been indicated */ 1519 /* A modem signal change has been indicated */
1520 ch->imodem = mstat; 1520 ch->imodem = mstat;
1521 if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) { 1521 if (test_bit(ASYNCB_CHECK_CD, &ch->port.flags)) {
1522 /* We are now receiving dcd */ 1522 /* We are now receiving dcd */
1523 if (mstat & ch->dcd) 1523 if (mstat & ch->dcd)
1524 wake_up_interruptible(&ch->port.open_wait); 1524 wake_up_interruptible(&ch->port.open_wait);
@@ -1765,9 +1765,9 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch)
1765 * that the driver will wait on carrier detect. 1765 * that the driver will wait on carrier detect.
1766 */ 1766 */
1767 if (ts->c_cflag & CLOCAL) 1767 if (ts->c_cflag & CLOCAL)
1768 clear_bit(ASYNC_CHECK_CD, &ch->port.flags); 1768 clear_bit(ASYNCB_CHECK_CD, &ch->port.flags);
1769 else 1769 else
1770 set_bit(ASYNC_CHECK_CD, &ch->port.flags); 1770 set_bit(ASYNCB_CHECK_CD, &ch->port.flags);
1771 mval = ch->m_dtr | ch->m_rts; 1771 mval = ch->m_dtr | ch->m_rts;
1772 } /* End CBAUD not detected */ 1772 } /* End CBAUD not detected */
1773 iflag = termios2digi_i(ch, ts->c_iflag); 1773 iflag = termios2digi_i(ch, ts->c_iflag);
@@ -2114,8 +2114,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file,
2114 tty_wait_until_sent(tty, 0); 2114 tty_wait_until_sent(tty, 0);
2115 } else { 2115 } else {
2116 /* ldisc lock already held in ioctl */ 2116 /* ldisc lock already held in ioctl */
2117 if (tty->ldisc.ops->flush_buffer) 2117 if (tty->ldisc->ops->flush_buffer)
2118 tty->ldisc.ops->flush_buffer(tty); 2118 tty->ldisc->ops->flush_buffer(tty);
2119 } 2119 }
2120 unlock_kernel(); 2120 unlock_kernel();
2121 /* Fall Thru */ 2121 /* Fall Thru */
@@ -2244,7 +2244,8 @@ static void do_softint(struct work_struct *work)
2244 if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) { 2244 if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
2245 tty_hangup(tty); 2245 tty_hangup(tty);
2246 wake_up_interruptible(&ch->port.open_wait); 2246 wake_up_interruptible(&ch->port.open_wait);
2247 clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags); 2247 clear_bit(ASYNCB_NORMAL_ACTIVE,
2248 &ch->port.flags);
2248 } 2249 }
2249 } 2250 }
2250 tty_kref_put(tty); 2251 tty_kref_put(tty);
diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c
index 0061e18aff60..0d10b89218ed 100644
--- a/drivers/char/ip2/i2lib.c
+++ b/drivers/char/ip2/i2lib.c
@@ -868,11 +868,11 @@ i2Input(i2ChanStrPtr pCh)
868 amountToMove = count; 868 amountToMove = count;
869 } 869 }
870 // Move the first block 870 // Move the first block
871 pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY, 871 pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
872 &(pCh->Ibuf[stripIndex]), NULL, amountToMove ); 872 &(pCh->Ibuf[stripIndex]), NULL, amountToMove );
873 // If we needed to wrap, do the second data move 873 // If we needed to wrap, do the second data move
874 if (count > amountToMove) { 874 if (count > amountToMove) {
875 pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY, 875 pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
876 pCh->Ibuf, NULL, count - amountToMove ); 876 pCh->Ibuf, NULL, count - amountToMove );
877 } 877 }
878 // Bump and wrap the stripIndex all at once by the amount of data read. This 878 // Bump and wrap the stripIndex all at once by the amount of data read. This
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index afd9247cf082..517271c762e6 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -1315,8 +1315,8 @@ static inline void isig(int sig, struct tty_struct *tty, int flush)
1315 if (tty->pgrp) 1315 if (tty->pgrp)
1316 kill_pgrp(tty->pgrp, sig, 1); 1316 kill_pgrp(tty->pgrp, sig, 1);
1317 if (flush || !L_NOFLSH(tty)) { 1317 if (flush || !L_NOFLSH(tty)) {
1318 if ( tty->ldisc.ops->flush_buffer ) 1318 if ( tty->ldisc->ops->flush_buffer )
1319 tty->ldisc.ops->flush_buffer(tty); 1319 tty->ldisc->ops->flush_buffer(tty);
1320 i2InputFlush( tty->driver_data ); 1320 i2InputFlush( tty->driver_data );
1321 } 1321 }
1322} 1322}
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index a59eac584d16..4d745a89504f 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -329,7 +329,7 @@ static inline void drop_rts(struct isi_port *port)
329 329
330/* card->lock MUST NOT be held */ 330/* card->lock MUST NOT be held */
331 331
332static void isicom_raise_dtr_rts(struct tty_port *port) 332static void isicom_dtr_rts(struct tty_port *port, int on)
333{ 333{
334 struct isi_port *ip = container_of(port, struct isi_port, port); 334 struct isi_port *ip = container_of(port, struct isi_port, port);
335 struct isi_board *card = ip->card; 335 struct isi_board *card = ip->card;
@@ -339,10 +339,17 @@ static void isicom_raise_dtr_rts(struct tty_port *port)
339 if (!lock_card(card)) 339 if (!lock_card(card))
340 return; 340 return;
341 341
342 outw(0x8000 | (channel << card->shift_count) | 0x02, base); 342 if (on) {
343 outw(0x0f04, base); 343 outw(0x8000 | (channel << card->shift_count) | 0x02, base);
344 InterruptTheCard(base); 344 outw(0x0f04, base);
345 ip->status |= (ISI_DTR | ISI_RTS); 345 InterruptTheCard(base);
346 ip->status |= (ISI_DTR | ISI_RTS);
347 } else {
348 outw(0x8000 | (channel << card->shift_count) | 0x02, base);
349 outw(0x0C04, base);
350 InterruptTheCard(base);
351 ip->status &= ~(ISI_DTR | ISI_RTS);
352 }
346 unlock_card(card); 353 unlock_card(card);
347} 354}
348 355
@@ -1339,7 +1346,7 @@ static const struct tty_operations isicom_ops = {
1339 1346
1340static const struct tty_port_operations isicom_port_ops = { 1347static const struct tty_port_operations isicom_port_ops = {
1341 .carrier_raised = isicom_carrier_raised, 1348 .carrier_raised = isicom_carrier_raised,
1342 .raise_dtr_rts = isicom_raise_dtr_rts, 1349 .dtr_rts = isicom_dtr_rts,
1343}; 1350};
1344 1351
1345static int __devinit reset_card(struct pci_dev *pdev, 1352static int __devinit reset_card(struct pci_dev *pdev,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index fff19f7e29d2..e18800c400b1 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -1140,14 +1140,14 @@ static int stli_carrier_raised(struct tty_port *port)
1140 return (portp->sigs & TIOCM_CD) ? 1 : 0; 1140 return (portp->sigs & TIOCM_CD) ? 1 : 0;
1141} 1141}
1142 1142
1143static void stli_raise_dtr_rts(struct tty_port *port) 1143static void stli_dtr_rts(struct tty_port *port, int on)
1144{ 1144{
1145 struct stliport *portp = container_of(port, struct stliport, port); 1145 struct stliport *portp = container_of(port, struct stliport, port);
1146 struct stlibrd *brdp = stli_brds[portp->brdnr]; 1146 struct stlibrd *brdp = stli_brds[portp->brdnr];
1147 stli_mkasysigs(&portp->asig, 1, 1); 1147 stli_mkasysigs(&portp->asig, on, on);
1148 if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig, 1148 if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
1149 sizeof(asysigs_t), 0) < 0) 1149 sizeof(asysigs_t), 0) < 0)
1150 printk(KERN_WARNING "istallion: dtr raise failed.\n"); 1150 printk(KERN_WARNING "istallion: dtr set failed.\n");
1151} 1151}
1152 1152
1153 1153
@@ -4417,7 +4417,7 @@ static const struct tty_operations stli_ops = {
4417 4417
4418static const struct tty_port_operations stli_port_ops = { 4418static const struct tty_port_operations stli_port_ops = {
4419 .carrier_raised = stli_carrier_raised, 4419 .carrier_raised = stli_carrier_raised,
4420 .raise_dtr_rts = stli_raise_dtr_rts, 4420 .dtr_rts = stli_dtr_rts,
4421}; 4421};
4422 4422
4423/*****************************************************************************/ 4423/*****************************************************************************/
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 4a4cab73d0be..65b6ff2442c6 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -1184,6 +1184,11 @@ static int moxa_open(struct tty_struct *tty, struct file *filp)
1184 return -ENODEV; 1184 return -ENODEV;
1185 } 1185 }
1186 1186
1187 if (port % MAX_PORTS_PER_BOARD >= brd->numPorts) {
1188 mutex_unlock(&moxa_openlock);
1189 return -ENODEV;
1190 }
1191
1187 ch = &brd->ports[port % MAX_PORTS_PER_BOARD]; 1192 ch = &brd->ports[port % MAX_PORTS_PER_BOARD];
1188 ch->port.count++; 1193 ch->port.count++;
1189 tty->driver_data = ch; 1194 tty->driver_data = ch;
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 13f8871e5b21..9533f43a30bb 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -547,14 +547,18 @@ static int mxser_carrier_raised(struct tty_port *port)
547 return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0; 547 return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
548} 548}
549 549
550static void mxser_raise_dtr_rts(struct tty_port *port) 550static void mxser_dtr_rts(struct tty_port *port, int on)
551{ 551{
552 struct mxser_port *mp = container_of(port, struct mxser_port, port); 552 struct mxser_port *mp = container_of(port, struct mxser_port, port);
553 unsigned long flags; 553 unsigned long flags;
554 554
555 spin_lock_irqsave(&mp->slock, flags); 555 spin_lock_irqsave(&mp->slock, flags);
556 outb(inb(mp->ioaddr + UART_MCR) | 556 if (on)
557 UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR); 557 outb(inb(mp->ioaddr + UART_MCR) |
558 UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
559 else
560 outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS),
561 mp->ioaddr + UART_MCR);
558 spin_unlock_irqrestore(&mp->slock, flags); 562 spin_unlock_irqrestore(&mp->slock, flags);
559} 563}
560 564
@@ -2356,7 +2360,7 @@ static const struct tty_operations mxser_ops = {
2356 2360
2357struct tty_port_operations mxser_port_ops = { 2361struct tty_port_operations mxser_port_ops = {
2358 .carrier_raised = mxser_carrier_raised, 2362 .carrier_raised = mxser_carrier_raised,
2359 .raise_dtr_rts = mxser_raise_dtr_rts, 2363 .dtr_rts = mxser_dtr_rts,
2360}; 2364};
2361 2365
2362/* 2366/*
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c
index bacb3e2872ae..461ece591a5b 100644
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -342,8 +342,8 @@ static int n_hdlc_tty_open (struct tty_struct *tty)
342#endif 342#endif
343 343
344 /* Flush any pending characters in the driver and discipline. */ 344 /* Flush any pending characters in the driver and discipline. */
345 if (tty->ldisc.ops->flush_buffer) 345 if (tty->ldisc->ops->flush_buffer)
346 tty->ldisc.ops->flush_buffer(tty); 346 tty->ldisc->ops->flush_buffer(tty);
347 347
348 tty_driver_flush_buffer(tty); 348 tty_driver_flush_buffer(tty);
349 349
diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index f6f0e4ec2b51..94a5d5020abc 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -73,24 +73,6 @@
73#define ECHO_OP_SET_CANON_COL 0x81 73#define ECHO_OP_SET_CANON_COL 0x81
74#define ECHO_OP_ERASE_TAB 0x82 74#define ECHO_OP_ERASE_TAB 0x82
75 75
76static inline unsigned char *alloc_buf(void)
77{
78 gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
79
80 if (PAGE_SIZE != N_TTY_BUF_SIZE)
81 return kmalloc(N_TTY_BUF_SIZE, prio);
82 else
83 return (unsigned char *)__get_free_page(prio);
84}
85
86static inline void free_buf(unsigned char *buf)
87{
88 if (PAGE_SIZE != N_TTY_BUF_SIZE)
89 kfree(buf);
90 else
91 free_page((unsigned long) buf);
92}
93
94static inline int tty_put_user(struct tty_struct *tty, unsigned char x, 76static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
95 unsigned char __user *ptr) 77 unsigned char __user *ptr)
96{ 78{
@@ -1558,11 +1540,11 @@ static void n_tty_close(struct tty_struct *tty)
1558{ 1540{
1559 n_tty_flush_buffer(tty); 1541 n_tty_flush_buffer(tty);
1560 if (tty->read_buf) { 1542 if (tty->read_buf) {
1561 free_buf(tty->read_buf); 1543 kfree(tty->read_buf);
1562 tty->read_buf = NULL; 1544 tty->read_buf = NULL;
1563 } 1545 }
1564 if (tty->echo_buf) { 1546 if (tty->echo_buf) {
1565 free_buf(tty->echo_buf); 1547 kfree(tty->echo_buf);
1566 tty->echo_buf = NULL; 1548 tty->echo_buf = NULL;
1567 } 1549 }
1568} 1550}
@@ -1584,17 +1566,16 @@ static int n_tty_open(struct tty_struct *tty)
1584 1566
1585 /* These are ugly. Currently a malloc failure here can panic */ 1567 /* These are ugly. Currently a malloc failure here can panic */
1586 if (!tty->read_buf) { 1568 if (!tty->read_buf) {
1587 tty->read_buf = alloc_buf(); 1569 tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
1588 if (!tty->read_buf) 1570 if (!tty->read_buf)
1589 return -ENOMEM; 1571 return -ENOMEM;
1590 } 1572 }
1591 if (!tty->echo_buf) { 1573 if (!tty->echo_buf) {
1592 tty->echo_buf = alloc_buf(); 1574 tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
1575
1593 if (!tty->echo_buf) 1576 if (!tty->echo_buf)
1594 return -ENOMEM; 1577 return -ENOMEM;
1595 } 1578 }
1596 memset(tty->read_buf, 0, N_TTY_BUF_SIZE);
1597 memset(tty->echo_buf, 0, N_TTY_BUF_SIZE);
1598 reset_buffer_flags(tty); 1579 reset_buffer_flags(tty);
1599 tty->column = 0; 1580 tty->column = 0;
1600 n_tty_set_termios(tty, NULL); 1581 n_tty_set_termios(tty, NULL);
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 19d79fc54461..77b364889224 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -383,7 +383,7 @@ static void async_mode(MGSLPC_INFO *info);
383static void tx_timeout(unsigned long context); 383static void tx_timeout(unsigned long context);
384 384
385static int carrier_raised(struct tty_port *port); 385static int carrier_raised(struct tty_port *port);
386static void raise_dtr_rts(struct tty_port *port); 386static void dtr_rts(struct tty_port *port, int onoff);
387 387
388#if SYNCLINK_GENERIC_HDLC 388#if SYNCLINK_GENERIC_HDLC
389#define dev_to_port(D) (dev_to_hdlc(D)->priv) 389#define dev_to_port(D) (dev_to_hdlc(D)->priv)
@@ -513,7 +513,7 @@ static void ldisc_receive_buf(struct tty_struct *tty,
513 513
514static const struct tty_port_operations mgslpc_port_ops = { 514static const struct tty_port_operations mgslpc_port_ops = {
515 .carrier_raised = carrier_raised, 515 .carrier_raised = carrier_raised,
516 .raise_dtr_rts = raise_dtr_rts 516 .dtr_rts = dtr_rts
517}; 517};
518 518
519static int mgslpc_probe(struct pcmcia_device *link) 519static int mgslpc_probe(struct pcmcia_device *link)
@@ -2528,13 +2528,16 @@ static int carrier_raised(struct tty_port *port)
2528 return 0; 2528 return 0;
2529} 2529}
2530 2530
2531static void raise_dtr_rts(struct tty_port *port) 2531static void dtr_rts(struct tty_port *port, int onoff)
2532{ 2532{
2533 MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port); 2533 MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
2534 unsigned long flags; 2534 unsigned long flags;
2535 2535
2536 spin_lock_irqsave(&info->lock,flags); 2536 spin_lock_irqsave(&info->lock,flags);
2537 info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; 2537 if (onoff)
2538 info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
2539 else
2540 info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR;
2538 set_signals(info); 2541 set_signals(info);
2539 spin_unlock_irqrestore(&info->lock,flags); 2542 spin_unlock_irqrestore(&info->lock,flags);
2540} 2543}
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 31038a0052a2..5acd29e6e043 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -30,7 +30,6 @@
30 30
31#include <asm/system.h> 31#include <asm/system.h>
32 32
33/* These are global because they are accessed in tty_io.c */
34#ifdef CONFIG_UNIX98_PTYS 33#ifdef CONFIG_UNIX98_PTYS
35static struct tty_driver *ptm_driver; 34static struct tty_driver *ptm_driver;
36static struct tty_driver *pts_driver; 35static struct tty_driver *pts_driver;
@@ -111,7 +110,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf,
111 c = to->receive_room; 110 c = to->receive_room;
112 if (c > count) 111 if (c > count)
113 c = count; 112 c = count;
114 to->ldisc.ops->receive_buf(to, buf, NULL, c); 113 to->ldisc->ops->receive_buf(to, buf, NULL, c);
115 114
116 return c; 115 return c;
117} 116}
@@ -149,11 +148,11 @@ static int pty_chars_in_buffer(struct tty_struct *tty)
149 int count; 148 int count;
150 149
151 /* We should get the line discipline lock for "tty->link" */ 150 /* We should get the line discipline lock for "tty->link" */
152 if (!to || !to->ldisc.ops->chars_in_buffer) 151 if (!to || !to->ldisc->ops->chars_in_buffer)
153 return 0; 152 return 0;
154 153
155 /* The ldisc must report 0 if no characters available to be read */ 154 /* The ldisc must report 0 if no characters available to be read */
156 count = to->ldisc.ops->chars_in_buffer(to); 155 count = to->ldisc->ops->chars_in_buffer(to);
157 156
158 if (tty->driver->subtype == PTY_TYPE_SLAVE) 157 if (tty->driver->subtype == PTY_TYPE_SLAVE)
159 return count; 158 return count;
@@ -187,8 +186,8 @@ static void pty_flush_buffer(struct tty_struct *tty)
187 if (!to) 186 if (!to)
188 return; 187 return;
189 188
190 if (to->ldisc.ops->flush_buffer) 189 if (to->ldisc->ops->flush_buffer)
191 to->ldisc.ops->flush_buffer(to); 190 to->ldisc->ops->flush_buffer(to);
192 191
193 if (to->packet) { 192 if (to->packet) {
194 spin_lock_irqsave(&tty->ctrl_lock, flags); 193 spin_lock_irqsave(&tty->ctrl_lock, flags);
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index f59fc5cea067..63d5b628477a 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -872,11 +872,16 @@ static int carrier_raised(struct tty_port *port)
872 return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0; 872 return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
873} 873}
874 874
875static void raise_dtr_rts(struct tty_port *port) 875static void dtr_rts(struct tty_port *port, int on)
876{ 876{
877 struct r_port *info = container_of(port, struct r_port, port); 877 struct r_port *info = container_of(port, struct r_port, port);
878 sSetDTR(&info->channel); 878 if (on) {
879 sSetRTS(&info->channel); 879 sSetDTR(&info->channel);
880 sSetRTS(&info->channel);
881 } else {
882 sClrDTR(&info->channel);
883 sClrRTS(&info->channel);
884 }
880} 885}
881 886
882/* 887/*
@@ -934,7 +939,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
934 /* 939 /*
935 * Info->count is now 1; so it's safe to sleep now. 940 * Info->count is now 1; so it's safe to sleep now.
936 */ 941 */
937 if (!test_bit(ASYNC_INITIALIZED, &port->flags)) { 942 if (!test_bit(ASYNCB_INITIALIZED, &port->flags)) {
938 cp = &info->channel; 943 cp = &info->channel;
939 sSetRxTrigger(cp, TRIG_1); 944 sSetRxTrigger(cp, TRIG_1);
940 if (sGetChanStatus(cp) & CD_ACT) 945 if (sGetChanStatus(cp) & CD_ACT)
@@ -958,7 +963,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
958 sEnRxFIFO(cp); 963 sEnRxFIFO(cp);
959 sEnTransmit(cp); 964 sEnTransmit(cp);
960 965
961 set_bit(ASYNC_INITIALIZED, &info->port.flags); 966 set_bit(ASYNCB_INITIALIZED, &info->port.flags);
962 967
963 /* 968 /*
964 * Set up the tty->alt_speed kludge 969 * Set up the tty->alt_speed kludge
@@ -1641,7 +1646,7 @@ static int rp_write(struct tty_struct *tty,
1641 /* Write remaining data into the port's xmit_buf */ 1646 /* Write remaining data into the port's xmit_buf */
1642 while (1) { 1647 while (1) {
1643 /* Hung up ? */ 1648 /* Hung up ? */
1644 if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags)) 1649 if (!test_bit(ASYNCB_NORMAL_ACTIVE, &info->port.flags))
1645 goto end; 1650 goto end;
1646 c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1); 1651 c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1);
1647 c = min(c, XMIT_BUF_SIZE - info->xmit_head); 1652 c = min(c, XMIT_BUF_SIZE - info->xmit_head);
@@ -2250,7 +2255,7 @@ static const struct tty_operations rocket_ops = {
2250 2255
2251static const struct tty_port_operations rocket_port_ops = { 2256static const struct tty_port_operations rocket_port_ops = {
2252 .carrier_raised = carrier_raised, 2257 .carrier_raised = carrier_raised,
2253 .raise_dtr_rts = raise_dtr_rts, 2258 .dtr_rts = dtr_rts,
2254}; 2259};
2255 2260
2256/* 2261/*
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index cb8ca5698963..f97b9e848064 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -327,7 +327,7 @@ int paste_selection(struct tty_struct *tty)
327 } 327 }
328 count = sel_buffer_lth - pasted; 328 count = sel_buffer_lth - pasted;
329 count = min(count, tty->receive_room); 329 count = min(count, tty->receive_room);
330 tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted, 330 tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
331 NULL, count); 331 NULL, count);
332 pasted += count; 332 pasted += count;
333 } 333 }
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 2ad813a801dc..53e504f41b20 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -772,11 +772,11 @@ static int stl_carrier_raised(struct tty_port *port)
772 return (portp->sigs & TIOCM_CD) ? 1 : 0; 772 return (portp->sigs & TIOCM_CD) ? 1 : 0;
773} 773}
774 774
775static void stl_raise_dtr_rts(struct tty_port *port) 775static void stl_dtr_rts(struct tty_port *port, int on)
776{ 776{
777 struct stlport *portp = container_of(port, struct stlport, port); 777 struct stlport *portp = container_of(port, struct stlport, port);
778 /* Takes brd_lock internally */ 778 /* Takes brd_lock internally */
779 stl_setsignals(portp, 1, 1); 779 stl_setsignals(portp, on, on);
780} 780}
781 781
782/*****************************************************************************/ 782/*****************************************************************************/
@@ -2547,7 +2547,7 @@ static const struct tty_operations stl_ops = {
2547 2547
2548static const struct tty_port_operations stl_port_ops = { 2548static const struct tty_port_operations stl_port_ops = {
2549 .carrier_raised = stl_carrier_raised, 2549 .carrier_raised = stl_carrier_raised,
2550 .raise_dtr_rts = stl_raise_dtr_rts, 2550 .dtr_rts = stl_dtr_rts,
2551}; 2551};
2552 2552
2553/*****************************************************************************/ 2553/*****************************************************************************/
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index afd0b26ca056..afded3a2379c 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3247,13 +3247,16 @@ static int carrier_raised(struct tty_port *port)
3247 return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; 3247 return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
3248} 3248}
3249 3249
3250static void raise_dtr_rts(struct tty_port *port) 3250static void dtr_rts(struct tty_port *port, int on)
3251{ 3251{
3252 struct mgsl_struct *info = container_of(port, struct mgsl_struct, port); 3252 struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
3253 unsigned long flags; 3253 unsigned long flags;
3254 3254
3255 spin_lock_irqsave(&info->irq_spinlock,flags); 3255 spin_lock_irqsave(&info->irq_spinlock,flags);
3256 info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; 3256 if (on)
3257 info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
3258 else
3259 info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
3257 usc_set_serial_signals(info); 3260 usc_set_serial_signals(info);
3258 spin_unlock_irqrestore(&info->irq_spinlock,flags); 3261 spin_unlock_irqrestore(&info->irq_spinlock,flags);
3259} 3262}
@@ -4258,7 +4261,7 @@ static void mgsl_add_device( struct mgsl_struct *info )
4258 4261
4259static const struct tty_port_operations mgsl_port_ops = { 4262static const struct tty_port_operations mgsl_port_ops = {
4260 .carrier_raised = carrier_raised, 4263 .carrier_raised = carrier_raised,
4261 .raise_dtr_rts = raise_dtr_rts, 4264 .dtr_rts = dtr_rts,
4262}; 4265};
4263 4266
4264 4267
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 5e256494686a..1386625fc4ca 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -214,6 +214,7 @@ struct slgt_desc
214#define set_desc_next(a,b) (a).next = cpu_to_le32((unsigned int)(b)) 214#define set_desc_next(a,b) (a).next = cpu_to_le32((unsigned int)(b))
215#define set_desc_count(a,b)(a).count = cpu_to_le16((unsigned short)(b)) 215#define set_desc_count(a,b)(a).count = cpu_to_le16((unsigned short)(b))
216#define set_desc_eof(a,b) (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0)) 216#define set_desc_eof(a,b) (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0))
217#define set_desc_status(a, b) (a).status = cpu_to_le16((unsigned short)(b))
217#define desc_count(a) (le16_to_cpu((a).count)) 218#define desc_count(a) (le16_to_cpu((a).count))
218#define desc_status(a) (le16_to_cpu((a).status)) 219#define desc_status(a) (le16_to_cpu((a).status))
219#define desc_complete(a) (le16_to_cpu((a).status) & BIT15) 220#define desc_complete(a) (le16_to_cpu((a).status) & BIT15)
@@ -297,6 +298,7 @@ struct slgt_info {
297 u32 max_frame_size; /* as set by device config */ 298 u32 max_frame_size; /* as set by device config */
298 299
299 unsigned int rbuf_fill_level; 300 unsigned int rbuf_fill_level;
301 unsigned int rx_pio;
300 unsigned int if_mode; 302 unsigned int if_mode;
301 unsigned int base_clock; 303 unsigned int base_clock;
302 304
@@ -331,6 +333,8 @@ struct slgt_info {
331 struct slgt_desc *rbufs; 333 struct slgt_desc *rbufs;
332 unsigned int rbuf_current; 334 unsigned int rbuf_current;
333 unsigned int rbuf_index; 335 unsigned int rbuf_index;
336 unsigned int rbuf_fill_index;
337 unsigned short rbuf_fill_count;
334 338
335 unsigned int tbuf_count; 339 unsigned int tbuf_count;
336 struct slgt_desc *tbufs; 340 struct slgt_desc *tbufs;
@@ -2110,6 +2114,40 @@ static void ri_change(struct slgt_info *info, unsigned short status)
2110 info->pending_bh |= BH_STATUS; 2114 info->pending_bh |= BH_STATUS;
2111} 2115}
2112 2116
2117static void isr_rxdata(struct slgt_info *info)
2118{
2119 unsigned int count = info->rbuf_fill_count;
2120 unsigned int i = info->rbuf_fill_index;
2121 unsigned short reg;
2122
2123 while (rd_reg16(info, SSR) & IRQ_RXDATA) {
2124 reg = rd_reg16(info, RDR);
2125 DBGISR(("isr_rxdata %s RDR=%04X\n", info->device_name, reg));
2126 if (desc_complete(info->rbufs[i])) {
2127 /* all buffers full */
2128 rx_stop(info);
2129 info->rx_restart = 1;
2130 continue;
2131 }
2132 info->rbufs[i].buf[count++] = (unsigned char)reg;
2133 /* async mode saves status byte to buffer for each data byte */
2134 if (info->params.mode == MGSL_MODE_ASYNC)
2135 info->rbufs[i].buf[count++] = (unsigned char)(reg >> 8);
2136 if (count == info->rbuf_fill_level || (reg & BIT10)) {
2137 /* buffer full or end of frame */
2138 set_desc_count(info->rbufs[i], count);
2139 set_desc_status(info->rbufs[i], BIT15 | (reg >> 8));
2140 info->rbuf_fill_count = count = 0;
2141 if (++i == info->rbuf_count)
2142 i = 0;
2143 info->pending_bh |= BH_RECEIVE;
2144 }
2145 }
2146
2147 info->rbuf_fill_index = i;
2148 info->rbuf_fill_count = count;
2149}
2150
2113static void isr_serial(struct slgt_info *info) 2151static void isr_serial(struct slgt_info *info)
2114{ 2152{
2115 unsigned short status = rd_reg16(info, SSR); 2153 unsigned short status = rd_reg16(info, SSR);
@@ -2125,6 +2163,8 @@ static void isr_serial(struct slgt_info *info)
2125 if (info->tx_count) 2163 if (info->tx_count)
2126 isr_txeom(info, status); 2164 isr_txeom(info, status);
2127 } 2165 }
2166 if (info->rx_pio && (status & IRQ_RXDATA))
2167 isr_rxdata(info);
2128 if ((status & IRQ_RXBREAK) && (status & RXBREAK)) { 2168 if ((status & IRQ_RXBREAK) && (status & RXBREAK)) {
2129 info->icount.brk++; 2169 info->icount.brk++;
2130 /* process break detection if tty control allows */ 2170 /* process break detection if tty control allows */
@@ -2141,7 +2181,8 @@ static void isr_serial(struct slgt_info *info)
2141 } else { 2181 } else {
2142 if (status & (IRQ_TXIDLE + IRQ_TXUNDER)) 2182 if (status & (IRQ_TXIDLE + IRQ_TXUNDER))
2143 isr_txeom(info, status); 2183 isr_txeom(info, status);
2144 2184 if (info->rx_pio && (status & IRQ_RXDATA))
2185 isr_rxdata(info);
2145 if (status & IRQ_RXIDLE) { 2186 if (status & IRQ_RXIDLE) {
2146 if (status & RXIDLE) 2187 if (status & RXIDLE)
2147 info->icount.rxidle++; 2188 info->icount.rxidle++;
@@ -2642,6 +2683,10 @@ static int rx_enable(struct slgt_info *info, int enable)
2642 return -EINVAL; 2683 return -EINVAL;
2643 } 2684 }
2644 info->rbuf_fill_level = rbuf_fill_level; 2685 info->rbuf_fill_level = rbuf_fill_level;
2686 if (rbuf_fill_level < 128)
2687 info->rx_pio = 1; /* PIO mode */
2688 else
2689 info->rx_pio = 0; /* DMA mode */
2645 rx_stop(info); /* restart receiver to use new fill level */ 2690 rx_stop(info); /* restart receiver to use new fill level */
2646 } 2691 }
2647 2692
@@ -3099,13 +3144,16 @@ static int carrier_raised(struct tty_port *port)
3099 return (info->signals & SerialSignal_DCD) ? 1 : 0; 3144 return (info->signals & SerialSignal_DCD) ? 1 : 0;
3100} 3145}
3101 3146
3102static void raise_dtr_rts(struct tty_port *port) 3147static void dtr_rts(struct tty_port *port, int on)
3103{ 3148{
3104 unsigned long flags; 3149 unsigned long flags;
3105 struct slgt_info *info = container_of(port, struct slgt_info, port); 3150 struct slgt_info *info = container_of(port, struct slgt_info, port);
3106 3151
3107 spin_lock_irqsave(&info->lock,flags); 3152 spin_lock_irqsave(&info->lock,flags);
3108 info->signals |= SerialSignal_RTS + SerialSignal_DTR; 3153 if (on)
3154 info->signals |= SerialSignal_RTS + SerialSignal_DTR;
3155 else
3156 info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
3109 set_signals(info); 3157 set_signals(info);
3110 spin_unlock_irqrestore(&info->lock,flags); 3158 spin_unlock_irqrestore(&info->lock,flags);
3111} 3159}
@@ -3419,7 +3467,7 @@ static void add_device(struct slgt_info *info)
3419 3467
3420static const struct tty_port_operations slgt_port_ops = { 3468static const struct tty_port_operations slgt_port_ops = {
3421 .carrier_raised = carrier_raised, 3469 .carrier_raised = carrier_raised,
3422 .raise_dtr_rts = raise_dtr_rts, 3470 .dtr_rts = dtr_rts,
3423}; 3471};
3424 3472
3425/* 3473/*
@@ -3841,15 +3889,27 @@ static void rx_start(struct slgt_info *info)
3841 rdma_reset(info); 3889 rdma_reset(info);
3842 reset_rbufs(info); 3890 reset_rbufs(info);
3843 3891
3844 /* set 1st descriptor address */ 3892 if (info->rx_pio) {
3845 wr_reg32(info, RDDAR, info->rbufs[0].pdesc); 3893 /* rx request when rx FIFO not empty */
3846 3894 wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) & ~BIT14));
3847 if (info->params.mode != MGSL_MODE_ASYNC) { 3895 slgt_irq_on(info, IRQ_RXDATA);
3848 /* enable rx DMA and DMA interrupt */ 3896 if (info->params.mode == MGSL_MODE_ASYNC) {
3849 wr_reg32(info, RDCSR, (BIT2 + BIT0)); 3897 /* enable saving of rx status */
3898 wr_reg32(info, RDCSR, BIT6);
3899 }
3850 } else { 3900 } else {
3851 /* enable saving of rx status, rx DMA and DMA interrupt */ 3901 /* rx request when rx FIFO half full */
3852 wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0)); 3902 wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT14));
3903 /* set 1st descriptor address */
3904 wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
3905
3906 if (info->params.mode != MGSL_MODE_ASYNC) {
3907 /* enable rx DMA and DMA interrupt */
3908 wr_reg32(info, RDCSR, (BIT2 + BIT0));
3909 } else {
3910 /* enable saving of rx status, rx DMA and DMA interrupt */
3911 wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
3912 }
3853 } 3913 }
3854 3914
3855 slgt_irq_on(info, IRQ_RXOVER); 3915 slgt_irq_on(info, IRQ_RXOVER);
@@ -4467,6 +4527,8 @@ static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last
4467static void reset_rbufs(struct slgt_info *info) 4527static void reset_rbufs(struct slgt_info *info)
4468{ 4528{
4469 free_rbufs(info, 0, info->rbuf_count - 1); 4529 free_rbufs(info, 0, info->rbuf_count - 1);
4530 info->rbuf_fill_index = 0;
4531 info->rbuf_fill_count = 0;
4470} 4532}
4471 4533
4472/* 4534/*
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 26de60efe4b2..6f727e3c53ad 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -3277,13 +3277,16 @@ static int carrier_raised(struct tty_port *port)
3277 return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; 3277 return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
3278} 3278}
3279 3279
3280static void raise_dtr_rts(struct tty_port *port) 3280static void dtr_rts(struct tty_port *port, int on)
3281{ 3281{
3282 SLMP_INFO *info = container_of(port, SLMP_INFO, port); 3282 SLMP_INFO *info = container_of(port, SLMP_INFO, port);
3283 unsigned long flags; 3283 unsigned long flags;
3284 3284
3285 spin_lock_irqsave(&info->lock,flags); 3285 spin_lock_irqsave(&info->lock,flags);
3286 info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; 3286 if (on)
3287 info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
3288 else
3289 info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
3287 set_signals(info); 3290 set_signals(info);
3288 spin_unlock_irqrestore(&info->lock,flags); 3291 spin_unlock_irqrestore(&info->lock,flags);
3289} 3292}
@@ -3746,7 +3749,7 @@ static void add_device(SLMP_INFO *info)
3746 3749
3747static const struct tty_port_operations port_ops = { 3750static const struct tty_port_operations port_ops = {
3748 .carrier_raised = carrier_raised, 3751 .carrier_raised = carrier_raised,
3749 .raise_dtr_rts = raise_dtr_rts, 3752 .dtr_rts = dtr_rts,
3750}; 3753};
3751 3754
3752/* Allocate and initialize a device instance structure 3755/* Allocate and initialize a device instance structure
diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index 55ba6f142883..ac16fbec72d0 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -29,10 +29,7 @@ static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor,
29 buf = kmalloc(sizeof(*buf), GFP_KERNEL); 29 buf = kmalloc(sizeof(*buf), GFP_KERNEL);
30 if (!buf) 30 if (!buf)
31 goto err; 31 goto err;
32 if (PAGE_SIZE != N_TTY_BUF_SIZE) 32 buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
33 buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
34 else
35 buf->data = (unsigned char *)__get_free_page(GFP_KERNEL);
36 if (!buf->data) 33 if (!buf->data)
37 goto err_buf; 34 goto err_buf;
38 atomic_set(&buf->count, 1); 35 atomic_set(&buf->count, 1);
@@ -52,10 +49,7 @@ err:
52static void tty_audit_buf_free(struct tty_audit_buf *buf) 49static void tty_audit_buf_free(struct tty_audit_buf *buf)
53{ 50{
54 WARN_ON(buf->valid != 0); 51 WARN_ON(buf->valid != 0);
55 if (PAGE_SIZE != N_TTY_BUF_SIZE) 52 kfree(buf->data);
56 kfree(buf->data);
57 else
58 free_page((unsigned long)buf->data);
59 kfree(buf); 53 kfree(buf);
60} 54}
61 55
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 66b99a2049e3..939e198d7670 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -295,7 +295,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line)
295 struct tty_driver *p, *res = NULL; 295 struct tty_driver *p, *res = NULL;
296 int tty_line = 0; 296 int tty_line = 0;
297 int len; 297 int len;
298 char *str; 298 char *str, *stp;
299 299
300 for (str = name; *str; str++) 300 for (str = name; *str; str++)
301 if ((*str >= '0' && *str <= '9') || *str == ',') 301 if ((*str >= '0' && *str <= '9') || *str == ',')
@@ -311,13 +311,14 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line)
311 list_for_each_entry(p, &tty_drivers, tty_drivers) { 311 list_for_each_entry(p, &tty_drivers, tty_drivers) {
312 if (strncmp(name, p->name, len) != 0) 312 if (strncmp(name, p->name, len) != 0)
313 continue; 313 continue;
314 if (*str == ',') 314 stp = str;
315 str++; 315 if (*stp == ',')
316 if (*str == '\0') 316 stp++;
317 str = NULL; 317 if (*stp == '\0')
318 stp = NULL;
318 319
319 if (tty_line >= 0 && tty_line <= p->num && p->ops && 320 if (tty_line >= 0 && tty_line <= p->num && p->ops &&
320 p->ops->poll_init && !p->ops->poll_init(p, tty_line, str)) { 321 p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
321 res = tty_driver_kref_get(p); 322 res = tty_driver_kref_get(p);
322 *line = tty_line; 323 *line = tty_line;
323 break; 324 break;
@@ -470,43 +471,6 @@ void tty_wakeup(struct tty_struct *tty)
470EXPORT_SYMBOL_GPL(tty_wakeup); 471EXPORT_SYMBOL_GPL(tty_wakeup);
471 472
472/** 473/**
473 * tty_ldisc_flush - flush line discipline queue
474 * @tty: tty
475 *
476 * Flush the line discipline queue (if any) for this tty. If there
477 * is no line discipline active this is a no-op.
478 */
479
480void tty_ldisc_flush(struct tty_struct *tty)
481{
482 struct tty_ldisc *ld = tty_ldisc_ref(tty);
483 if (ld) {
484 if (ld->ops->flush_buffer)
485 ld->ops->flush_buffer(tty);
486 tty_ldisc_deref(ld);
487 }
488 tty_buffer_flush(tty);
489}
490
491EXPORT_SYMBOL_GPL(tty_ldisc_flush);
492
493/**
494 * tty_reset_termios - reset terminal state
495 * @tty: tty to reset
496 *
497 * Restore a terminal to the driver default state
498 */
499
500static void tty_reset_termios(struct tty_struct *tty)
501{
502 mutex_lock(&tty->termios_mutex);
503 *tty->termios = tty->driver->init_termios;
504 tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
505 tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
506 mutex_unlock(&tty->termios_mutex);
507}
508
509/**
510 * do_tty_hangup - actual handler for hangup events 474 * do_tty_hangup - actual handler for hangup events
511 * @work: tty device 475 * @work: tty device
512 * 476 *
@@ -535,7 +499,6 @@ static void do_tty_hangup(struct work_struct *work)
535 struct file *cons_filp = NULL; 499 struct file *cons_filp = NULL;
536 struct file *filp, *f = NULL; 500 struct file *filp, *f = NULL;
537 struct task_struct *p; 501 struct task_struct *p;
538 struct tty_ldisc *ld;
539 int closecount = 0, n; 502 int closecount = 0, n;
540 unsigned long flags; 503 unsigned long flags;
541 int refs = 0; 504 int refs = 0;
@@ -566,40 +529,8 @@ static void do_tty_hangup(struct work_struct *work)
566 filp->f_op = &hung_up_tty_fops; 529 filp->f_op = &hung_up_tty_fops;
567 } 530 }
568 file_list_unlock(); 531 file_list_unlock();
569 /*
570 * FIXME! What are the locking issues here? This may me overdoing
571 * things... This question is especially important now that we've
572 * removed the irqlock.
573 */
574 ld = tty_ldisc_ref(tty);
575 if (ld != NULL) {
576 /* We may have no line discipline at this point */
577 if (ld->ops->flush_buffer)
578 ld->ops->flush_buffer(tty);
579 tty_driver_flush_buffer(tty);
580 if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
581 ld->ops->write_wakeup)
582 ld->ops->write_wakeup(tty);
583 if (ld->ops->hangup)
584 ld->ops->hangup(tty);
585 }
586 /*
587 * FIXME: Once we trust the LDISC code better we can wait here for
588 * ldisc completion and fix the driver call race
589 */
590 wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
591 wake_up_interruptible_poll(&tty->read_wait, POLLIN);
592 /*
593 * Shutdown the current line discipline, and reset it to
594 * N_TTY.
595 */
596 if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
597 tty_reset_termios(tty);
598 /* Defer ldisc switch */
599 /* tty_deferred_ldisc_switch(N_TTY);
600 532
601 This should get done automatically when the port closes and 533 tty_ldisc_hangup(tty);
602 tty_release is called */
603 534
604 read_lock(&tasklist_lock); 535 read_lock(&tasklist_lock);
605 if (tty->session) { 536 if (tty->session) {
@@ -628,12 +559,15 @@ static void do_tty_hangup(struct work_struct *work)
628 read_unlock(&tasklist_lock); 559 read_unlock(&tasklist_lock);
629 560
630 spin_lock_irqsave(&tty->ctrl_lock, flags); 561 spin_lock_irqsave(&tty->ctrl_lock, flags);
631 tty->flags = 0; 562 clear_bit(TTY_THROTTLED, &tty->flags);
563 clear_bit(TTY_PUSH, &tty->flags);
564 clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
632 put_pid(tty->session); 565 put_pid(tty->session);
633 put_pid(tty->pgrp); 566 put_pid(tty->pgrp);
634 tty->session = NULL; 567 tty->session = NULL;
635 tty->pgrp = NULL; 568 tty->pgrp = NULL;
636 tty->ctrl_status = 0; 569 tty->ctrl_status = 0;
570 set_bit(TTY_HUPPED, &tty->flags);
637 spin_unlock_irqrestore(&tty->ctrl_lock, flags); 571 spin_unlock_irqrestore(&tty->ctrl_lock, flags);
638 572
639 /* Account for the p->signal references we killed */ 573 /* Account for the p->signal references we killed */
@@ -659,10 +593,7 @@ static void do_tty_hangup(struct work_struct *work)
659 * can't yet guarantee all that. 593 * can't yet guarantee all that.
660 */ 594 */
661 set_bit(TTY_HUPPED, &tty->flags); 595 set_bit(TTY_HUPPED, &tty->flags);
662 if (ld) { 596 tty_ldisc_enable(tty);
663 tty_ldisc_enable(tty);
664 tty_ldisc_deref(ld);
665 }
666 unlock_kernel(); 597 unlock_kernel();
667 if (f) 598 if (f)
668 fput(f); 599 fput(f);
@@ -2480,6 +2411,24 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int
2480 return tty->ops->tiocmset(tty, file, set, clear); 2411 return tty->ops->tiocmset(tty, file, set, clear);
2481} 2412}
2482 2413
2414struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
2415{
2416 if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
2417 tty->driver->subtype == PTY_TYPE_MASTER)
2418 tty = tty->link;
2419 return tty;
2420}
2421EXPORT_SYMBOL(tty_pair_get_tty);
2422
2423struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
2424{
2425 if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
2426 tty->driver->subtype == PTY_TYPE_MASTER)
2427 return tty;
2428 return tty->link;
2429}
2430EXPORT_SYMBOL(tty_pair_get_pty);
2431
2483/* 2432/*
2484 * Split this up, as gcc can choke on it otherwise.. 2433 * Split this up, as gcc can choke on it otherwise..
2485 */ 2434 */
@@ -2495,11 +2444,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2495 if (tty_paranoia_check(tty, inode, "tty_ioctl")) 2444 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
2496 return -EINVAL; 2445 return -EINVAL;
2497 2446
2498 real_tty = tty; 2447 real_tty = tty_pair_get_tty(tty);
2499 if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
2500 tty->driver->subtype == PTY_TYPE_MASTER)
2501 real_tty = tty->link;
2502
2503 2448
2504 /* 2449 /*
2505 * Factor out some common prep work 2450 * Factor out some common prep work
@@ -2555,7 +2500,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2555 case TIOCGSID: 2500 case TIOCGSID:
2556 return tiocgsid(tty, real_tty, p); 2501 return tiocgsid(tty, real_tty, p);
2557 case TIOCGETD: 2502 case TIOCGETD:
2558 return put_user(tty->ldisc.ops->num, (int __user *)p); 2503 return put_user(tty->ldisc->ops->num, (int __user *)p);
2559 case TIOCSETD: 2504 case TIOCSETD:
2560 return tiocsetd(tty, p); 2505 return tiocsetd(tty, p);
2561 /* 2506 /*
@@ -2770,6 +2715,7 @@ void initialize_tty_struct(struct tty_struct *tty,
2770 tty->buf.head = tty->buf.tail = NULL; 2715 tty->buf.head = tty->buf.tail = NULL;
2771 tty_buffer_init(tty); 2716 tty_buffer_init(tty);
2772 mutex_init(&tty->termios_mutex); 2717 mutex_init(&tty->termios_mutex);
2718 mutex_init(&tty->ldisc_mutex);
2773 init_waitqueue_head(&tty->write_wait); 2719 init_waitqueue_head(&tty->write_wait);
2774 init_waitqueue_head(&tty->read_wait); 2720 init_waitqueue_head(&tty->read_wait);
2775 INIT_WORK(&tty->hangup_work, do_tty_hangup); 2721 INIT_WORK(&tty->hangup_work, do_tty_hangup);
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index 6f4c7d0a53bf..8116bb1c8f80 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -97,14 +97,19 @@ EXPORT_SYMBOL(tty_driver_flush_buffer);
97 * @tty: terminal 97 * @tty: terminal
98 * 98 *
99 * Indicate that a tty should stop transmitting data down the stack. 99 * Indicate that a tty should stop transmitting data down the stack.
100 * Takes the termios mutex to protect against parallel throttle/unthrottle
101 * and also to ensure the driver can consistently reference its own
102 * termios data at this point when implementing software flow control.
100 */ 103 */
101 104
102void tty_throttle(struct tty_struct *tty) 105void tty_throttle(struct tty_struct *tty)
103{ 106{
107 mutex_lock(&tty->termios_mutex);
104 /* check TTY_THROTTLED first so it indicates our state */ 108 /* check TTY_THROTTLED first so it indicates our state */
105 if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && 109 if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) &&
106 tty->ops->throttle) 110 tty->ops->throttle)
107 tty->ops->throttle(tty); 111 tty->ops->throttle(tty);
112 mutex_unlock(&tty->termios_mutex);
108} 113}
109EXPORT_SYMBOL(tty_throttle); 114EXPORT_SYMBOL(tty_throttle);
110 115
@@ -113,13 +118,21 @@ EXPORT_SYMBOL(tty_throttle);
113 * @tty: terminal 118 * @tty: terminal
114 * 119 *
115 * Indicate that a tty may continue transmitting data down the stack. 120 * Indicate that a tty may continue transmitting data down the stack.
121 * Takes the termios mutex to protect against parallel throttle/unthrottle
122 * and also to ensure the driver can consistently reference its own
123 * termios data at this point when implementing software flow control.
124 *
125 * Drivers should however remember that the stack can issue a throttle,
126 * then change flow control method, then unthrottle.
116 */ 127 */
117 128
118void tty_unthrottle(struct tty_struct *tty) 129void tty_unthrottle(struct tty_struct *tty)
119{ 130{
131 mutex_lock(&tty->termios_mutex);
120 if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && 132 if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) &&
121 tty->ops->unthrottle) 133 tty->ops->unthrottle)
122 tty->ops->unthrottle(tty); 134 tty->ops->unthrottle(tty);
135 mutex_unlock(&tty->termios_mutex);
123} 136}
124EXPORT_SYMBOL(tty_unthrottle); 137EXPORT_SYMBOL(tty_unthrottle);
125 138
@@ -613,9 +626,25 @@ static int set_termios(struct tty_struct *tty, void __user *arg, int opt)
613 return 0; 626 return 0;
614} 627}
615 628
629static void copy_termios(struct tty_struct *tty, struct ktermios *kterm)
630{
631 mutex_lock(&tty->termios_mutex);
632 memcpy(kterm, tty->termios, sizeof(struct ktermios));
633 mutex_unlock(&tty->termios_mutex);
634}
635
636static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm)
637{
638 mutex_lock(&tty->termios_mutex);
639 memcpy(kterm, tty->termios_locked, sizeof(struct ktermios));
640 mutex_unlock(&tty->termios_mutex);
641}
642
616static int get_termio(struct tty_struct *tty, struct termio __user *termio) 643static int get_termio(struct tty_struct *tty, struct termio __user *termio)
617{ 644{
618 if (kernel_termios_to_user_termio(termio, tty->termios)) 645 struct ktermios kterm;
646 copy_termios(tty, &kterm);
647 if (kernel_termios_to_user_termio(termio, &kterm))
619 return -EFAULT; 648 return -EFAULT;
620 return 0; 649 return 0;
621} 650}
@@ -917,6 +946,8 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
917 struct tty_struct *real_tty; 946 struct tty_struct *real_tty;
918 void __user *p = (void __user *)arg; 947 void __user *p = (void __user *)arg;
919 int ret = 0; 948 int ret = 0;
949 struct ktermios kterm;
950 struct termiox ktermx;
920 951
921 if (tty->driver->type == TTY_DRIVER_TYPE_PTY && 952 if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
922 tty->driver->subtype == PTY_TYPE_MASTER) 953 tty->driver->subtype == PTY_TYPE_MASTER)
@@ -952,23 +983,20 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
952 return set_termios(real_tty, p, TERMIOS_OLD); 983 return set_termios(real_tty, p, TERMIOS_OLD);
953#ifndef TCGETS2 984#ifndef TCGETS2
954 case TCGETS: 985 case TCGETS:
955 mutex_lock(&real_tty->termios_mutex); 986 copy_termios(real_tty, &kterm);
956 if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios)) 987 if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
957 ret = -EFAULT; 988 ret = -EFAULT;
958 mutex_unlock(&real_tty->termios_mutex);
959 return ret; 989 return ret;
960#else 990#else
961 case TCGETS: 991 case TCGETS:
962 mutex_lock(&real_tty->termios_mutex); 992 copy_termios(real_tty, &kterm);
963 if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios)) 993 if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
964 ret = -EFAULT; 994 ret = -EFAULT;
965 mutex_unlock(&real_tty->termios_mutex);
966 return ret; 995 return ret;
967 case TCGETS2: 996 case TCGETS2:
968 mutex_lock(&real_tty->termios_mutex); 997 copy_termios(real_tty, &kterm);
969 if (kernel_termios_to_user_termios((struct termios2 __user *)arg, real_tty->termios)) 998 if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm))
970 ret = -EFAULT; 999 ret = -EFAULT;
971 mutex_unlock(&real_tty->termios_mutex);
972 return ret; 1000 return ret;
973 case TCSETSF2: 1001 case TCSETSF2:
974 return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT); 1002 return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT);
@@ -987,34 +1015,36 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
987 return set_termios(real_tty, p, TERMIOS_TERMIO); 1015 return set_termios(real_tty, p, TERMIOS_TERMIO);
988#ifndef TCGETS2 1016#ifndef TCGETS2
989 case TIOCGLCKTRMIOS: 1017 case TIOCGLCKTRMIOS:
990 mutex_lock(&real_tty->termios_mutex); 1018 copy_termios_locked(real_tty, &kterm);
991 if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked)) 1019 if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
992 ret = -EFAULT; 1020 ret = -EFAULT;
993 mutex_unlock(&real_tty->termios_mutex);
994 return ret; 1021 return ret;
995 case TIOCSLCKTRMIOS: 1022 case TIOCSLCKTRMIOS:
996 if (!capable(CAP_SYS_ADMIN)) 1023 if (!capable(CAP_SYS_ADMIN))
997 return -EPERM; 1024 return -EPERM;
998 mutex_lock(&real_tty->termios_mutex); 1025 copy_termios_locked(real_tty, &kterm);
999 if (user_termios_to_kernel_termios(real_tty->termios_locked, 1026 if (user_termios_to_kernel_termios(&kterm,
1000 (struct termios __user *) arg)) 1027 (struct termios __user *) arg))
1001 ret = -EFAULT; 1028 return -EFAULT;
1029 mutex_lock(&real_tty->termios_mutex);
1030 memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
1002 mutex_unlock(&real_tty->termios_mutex); 1031 mutex_unlock(&real_tty->termios_mutex);
1003 return ret; 1032 return 0;
1004#else 1033#else
1005 case TIOCGLCKTRMIOS: 1034 case TIOCGLCKTRMIOS:
1006 mutex_lock(&real_tty->termios_mutex); 1035 copy_termios_locked(real_tty, &kterm);
1007 if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked)) 1036 if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
1008 ret = -EFAULT; 1037 ret = -EFAULT;
1009 mutex_unlock(&real_tty->termios_mutex);
1010 return ret; 1038 return ret;
1011 case TIOCSLCKTRMIOS: 1039 case TIOCSLCKTRMIOS:
1012 if (!capable(CAP_SYS_ADMIN)) 1040 if (!capable(CAP_SYS_ADMIN))
1013 ret = -EPERM; 1041 return -EPERM;
1014 mutex_lock(&real_tty->termios_mutex); 1042 copy_termios_locked(real_tty, &kterm);
1015 if (user_termios_to_kernel_termios_1(real_tty->termios_locked, 1043 if (user_termios_to_kernel_termios_1(&kterm,
1016 (struct termios __user *) arg)) 1044 (struct termios __user *) arg))
1017 ret = -EFAULT; 1045 return -EFAULT;
1046 mutex_lock(&real_tty->termios_mutex);
1047 memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
1018 mutex_unlock(&real_tty->termios_mutex); 1048 mutex_unlock(&real_tty->termios_mutex);
1019 return ret; 1049 return ret;
1020#endif 1050#endif
@@ -1023,9 +1053,10 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
1023 if (real_tty->termiox == NULL) 1053 if (real_tty->termiox == NULL)
1024 return -EINVAL; 1054 return -EINVAL;
1025 mutex_lock(&real_tty->termios_mutex); 1055 mutex_lock(&real_tty->termios_mutex);
1026 if (copy_to_user(p, real_tty->termiox, sizeof(struct termiox))) 1056 memcpy(&ktermx, real_tty->termiox, sizeof(struct termiox));
1027 ret = -EFAULT;
1028 mutex_unlock(&real_tty->termios_mutex); 1057 mutex_unlock(&real_tty->termios_mutex);
1058 if (copy_to_user(p, &ktermx, sizeof(struct termiox)))
1059 ret = -EFAULT;
1029 return ret; 1060 return ret;
1030 case TCSETX: 1061 case TCSETX:
1031 return set_termiox(real_tty, p, 0); 1062 return set_termiox(real_tty, p, 0);
@@ -1035,10 +1066,9 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
1035 return set_termiox(real_tty, p, TERMIOS_FLUSH); 1066 return set_termiox(real_tty, p, TERMIOS_FLUSH);
1036#endif 1067#endif
1037 case TIOCGSOFTCAR: 1068 case TIOCGSOFTCAR:
1038 mutex_lock(&real_tty->termios_mutex); 1069 copy_termios(real_tty, &kterm);
1039 ret = put_user(C_CLOCAL(real_tty) ? 1 : 0, 1070 ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0,
1040 (int __user *)arg); 1071 (int __user *)arg);
1041 mutex_unlock(&real_tty->termios_mutex);
1042 return ret; 1072 return ret;
1043 case TIOCSSOFTCAR: 1073 case TIOCSSOFTCAR:
1044 if (get_user(arg, (unsigned int __user *) arg)) 1074 if (get_user(arg, (unsigned int __user *) arg))
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index f78f5b0127a8..39c8f86dedd4 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -115,19 +115,22 @@ EXPORT_SYMBOL(tty_unregister_ldisc);
115/** 115/**
116 * tty_ldisc_try_get - try and reference an ldisc 116 * tty_ldisc_try_get - try and reference an ldisc
117 * @disc: ldisc number 117 * @disc: ldisc number
118 * @ld: tty ldisc structure to complete
119 * 118 *
120 * Attempt to open and lock a line discipline into place. Return 119 * Attempt to open and lock a line discipline into place. Return
121 * the line discipline refcounted and assigned in ld. On an error 120 * the line discipline refcounted or an error.
122 * report the error code back
123 */ 121 */
124 122
125static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) 123static struct tty_ldisc *tty_ldisc_try_get(int disc)
126{ 124{
127 unsigned long flags; 125 unsigned long flags;
126 struct tty_ldisc *ld;
128 struct tty_ldisc_ops *ldops; 127 struct tty_ldisc_ops *ldops;
129 int err = -EINVAL; 128 int err = -EINVAL;
130 129
130 ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL);
131 if (ld == NULL)
132 return ERR_PTR(-ENOMEM);
133
131 spin_lock_irqsave(&tty_ldisc_lock, flags); 134 spin_lock_irqsave(&tty_ldisc_lock, flags);
132 ld->ops = NULL; 135 ld->ops = NULL;
133 ldops = tty_ldiscs[disc]; 136 ldops = tty_ldiscs[disc];
@@ -140,17 +143,19 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
140 /* lock it */ 143 /* lock it */
141 ldops->refcount++; 144 ldops->refcount++;
142 ld->ops = ldops; 145 ld->ops = ldops;
146 ld->refcount = 0;
143 err = 0; 147 err = 0;
144 } 148 }
145 } 149 }
146 spin_unlock_irqrestore(&tty_ldisc_lock, flags); 150 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
147 return err; 151 if (err)
152 return ERR_PTR(err);
153 return ld;
148} 154}
149 155
150/** 156/**
151 * tty_ldisc_get - take a reference to an ldisc 157 * tty_ldisc_get - take a reference to an ldisc
152 * @disc: ldisc number 158 * @disc: ldisc number
153 * @ld: tty line discipline structure to use
154 * 159 *
155 * Takes a reference to a line discipline. Deals with refcounts and 160 * Takes a reference to a line discipline. Deals with refcounts and
156 * module locking counts. Returns NULL if the discipline is not available. 161 * module locking counts. Returns NULL if the discipline is not available.
@@ -161,52 +166,54 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
161 * takes tty_ldisc_lock to guard against ldisc races 166 * takes tty_ldisc_lock to guard against ldisc races
162 */ 167 */
163 168
164static int tty_ldisc_get(int disc, struct tty_ldisc *ld) 169static struct tty_ldisc *tty_ldisc_get(int disc)
165{ 170{
166 int err; 171 struct tty_ldisc *ld;
167 172
168 if (disc < N_TTY || disc >= NR_LDISCS) 173 if (disc < N_TTY || disc >= NR_LDISCS)
169 return -EINVAL; 174 return ERR_PTR(-EINVAL);
170 err = tty_ldisc_try_get(disc, ld); 175 ld = tty_ldisc_try_get(disc);
171 if (err < 0) { 176 if (IS_ERR(ld)) {
172 request_module("tty-ldisc-%d", disc); 177 request_module("tty-ldisc-%d", disc);
173 err = tty_ldisc_try_get(disc, ld); 178 ld = tty_ldisc_try_get(disc);
174 } 179 }
175 return err; 180 return ld;
176} 181}
177 182
178/** 183/**
179 * tty_ldisc_put - drop ldisc reference 184 * tty_ldisc_put - drop ldisc reference
180 * @disc: ldisc number 185 * @ld: ldisc
181 * 186 *
182 * Drop a reference to a line discipline. Manage refcounts and 187 * Drop a reference to a line discipline. Manage refcounts and
183 * module usage counts 188 * module usage counts. Free the ldisc once the recount hits zero.
184 * 189 *
185 * Locking: 190 * Locking:
186 * takes tty_ldisc_lock to guard against ldisc races 191 * takes tty_ldisc_lock to guard against ldisc races
187 */ 192 */
188 193
189static void tty_ldisc_put(struct tty_ldisc_ops *ld) 194static void tty_ldisc_put(struct tty_ldisc *ld)
190{ 195{
191 unsigned long flags; 196 unsigned long flags;
192 int disc = ld->num; 197 int disc = ld->ops->num;
198 struct tty_ldisc_ops *ldo;
193 199
194 BUG_ON(disc < N_TTY || disc >= NR_LDISCS); 200 BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
195 201
196 spin_lock_irqsave(&tty_ldisc_lock, flags); 202 spin_lock_irqsave(&tty_ldisc_lock, flags);
197 ld = tty_ldiscs[disc]; 203 ldo = tty_ldiscs[disc];
198 BUG_ON(ld->refcount == 0); 204 BUG_ON(ldo->refcount == 0);
199 ld->refcount--; 205 ldo->refcount--;
200 module_put(ld->owner); 206 module_put(ldo->owner);
201 spin_unlock_irqrestore(&tty_ldisc_lock, flags); 207 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
208 kfree(ld);
202} 209}
203 210
204static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) 211static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
205{ 212{
206 return (*pos < NR_LDISCS) ? pos : NULL; 213 return (*pos < NR_LDISCS) ? pos : NULL;
207} 214}
208 215
209static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) 216static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
210{ 217{
211 (*pos)++; 218 (*pos)++;
212 return (*pos < NR_LDISCS) ? pos : NULL; 219 return (*pos < NR_LDISCS) ? pos : NULL;
@@ -219,12 +226,13 @@ static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
219static int tty_ldiscs_seq_show(struct seq_file *m, void *v) 226static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
220{ 227{
221 int i = *(loff_t *)v; 228 int i = *(loff_t *)v;
222 struct tty_ldisc ld; 229 struct tty_ldisc *ld;
223 230
224 if (tty_ldisc_get(i, &ld) < 0) 231 ld = tty_ldisc_try_get(i);
232 if (IS_ERR(ld))
225 return 0; 233 return 0;
226 seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i); 234 seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i);
227 tty_ldisc_put(ld.ops); 235 tty_ldisc_put(ld);
228 return 0; 236 return 0;
229} 237}
230 238
@@ -263,8 +271,7 @@ const struct file_operations tty_ldiscs_proc_fops = {
263 271
264static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) 272static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
265{ 273{
266 ld->refcount = 0; 274 tty->ldisc = ld;
267 tty->ldisc = *ld;
268} 275}
269 276
270/** 277/**
@@ -286,7 +293,7 @@ static int tty_ldisc_try(struct tty_struct *tty)
286 int ret = 0; 293 int ret = 0;
287 294
288 spin_lock_irqsave(&tty_ldisc_lock, flags); 295 spin_lock_irqsave(&tty_ldisc_lock, flags);
289 ld = &tty->ldisc; 296 ld = tty->ldisc;
290 if (test_bit(TTY_LDISC, &tty->flags)) { 297 if (test_bit(TTY_LDISC, &tty->flags)) {
291 ld->refcount++; 298 ld->refcount++;
292 ret = 1; 299 ret = 1;
@@ -315,10 +322,9 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
315{ 322{
316 /* wait_event is a macro */ 323 /* wait_event is a macro */
317 wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); 324 wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
318 WARN_ON(tty->ldisc.refcount == 0); 325 WARN_ON(tty->ldisc->refcount == 0);
319 return &tty->ldisc; 326 return tty->ldisc;
320} 327}
321
322EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); 328EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
323 329
324/** 330/**
@@ -335,10 +341,9 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
335struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) 341struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
336{ 342{
337 if (tty_ldisc_try(tty)) 343 if (tty_ldisc_try(tty))
338 return &tty->ldisc; 344 return tty->ldisc;
339 return NULL; 345 return NULL;
340} 346}
341
342EXPORT_SYMBOL_GPL(tty_ldisc_ref); 347EXPORT_SYMBOL_GPL(tty_ldisc_ref);
343 348
344/** 349/**
@@ -366,7 +371,6 @@ void tty_ldisc_deref(struct tty_ldisc *ld)
366 wake_up(&tty_ldisc_wait); 371 wake_up(&tty_ldisc_wait);
367 spin_unlock_irqrestore(&tty_ldisc_lock, flags); 372 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
368} 373}
369
370EXPORT_SYMBOL_GPL(tty_ldisc_deref); 374EXPORT_SYMBOL_GPL(tty_ldisc_deref);
371 375
372/** 376/**
@@ -389,6 +393,26 @@ void tty_ldisc_enable(struct tty_struct *tty)
389} 393}
390 394
391/** 395/**
396 * tty_ldisc_flush - flush line discipline queue
397 * @tty: tty
398 *
399 * Flush the line discipline queue (if any) for this tty. If there
400 * is no line discipline active this is a no-op.
401 */
402
403void tty_ldisc_flush(struct tty_struct *tty)
404{
405 struct tty_ldisc *ld = tty_ldisc_ref(tty);
406 if (ld) {
407 if (ld->ops->flush_buffer)
408 ld->ops->flush_buffer(tty);
409 tty_ldisc_deref(ld);
410 }
411 tty_buffer_flush(tty);
412}
413EXPORT_SYMBOL_GPL(tty_ldisc_flush);
414
415/**
392 * tty_set_termios_ldisc - set ldisc field 416 * tty_set_termios_ldisc - set ldisc field
393 * @tty: tty structure 417 * @tty: tty structure
394 * @num: line discipline number 418 * @num: line discipline number
@@ -407,6 +431,39 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
407 mutex_unlock(&tty->termios_mutex); 431 mutex_unlock(&tty->termios_mutex);
408} 432}
409 433
434/**
435 * tty_ldisc_open - open a line discipline
436 * @tty: tty we are opening the ldisc on
437 * @ld: discipline to open
438 *
439 * A helper opening method. Also a convenient debugging and check
440 * point.
441 */
442
443static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
444{
445 WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags));
446 if (ld->ops->open)
447 return ld->ops->open(tty);
448 return 0;
449}
450
451/**
452 * tty_ldisc_close - close a line discipline
453 * @tty: tty we are opening the ldisc on
454 * @ld: discipline to close
455 *
456 * A helper close method. Also a convenient debugging and check
457 * point.
458 */
459
460static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
461{
462 WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
463 clear_bit(TTY_LDISC_OPEN, &tty->flags);
464 if (ld->ops->close)
465 ld->ops->close(tty);
466}
410 467
411/** 468/**
412 * tty_ldisc_restore - helper for tty ldisc change 469 * tty_ldisc_restore - helper for tty ldisc change
@@ -420,66 +477,136 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
420static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) 477static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
421{ 478{
422 char buf[64]; 479 char buf[64];
423 struct tty_ldisc new_ldisc; 480 struct tty_ldisc *new_ldisc;
481 int r;
424 482
425 /* There is an outstanding reference here so this is safe */ 483 /* There is an outstanding reference here so this is safe */
426 tty_ldisc_get(old->ops->num, old); 484 old = tty_ldisc_get(old->ops->num);
485 WARN_ON(IS_ERR(old));
427 tty_ldisc_assign(tty, old); 486 tty_ldisc_assign(tty, old);
428 tty_set_termios_ldisc(tty, old->ops->num); 487 tty_set_termios_ldisc(tty, old->ops->num);
429 if (old->ops->open && (old->ops->open(tty) < 0)) { 488 if (tty_ldisc_open(tty, old) < 0) {
430 tty_ldisc_put(old->ops); 489 tty_ldisc_put(old);
431 /* This driver is always present */ 490 /* This driver is always present */
432 if (tty_ldisc_get(N_TTY, &new_ldisc) < 0) 491 new_ldisc = tty_ldisc_get(N_TTY);
492 if (IS_ERR(new_ldisc))
433 panic("n_tty: get"); 493 panic("n_tty: get");
434 tty_ldisc_assign(tty, &new_ldisc); 494 tty_ldisc_assign(tty, new_ldisc);
435 tty_set_termios_ldisc(tty, N_TTY); 495 tty_set_termios_ldisc(tty, N_TTY);
436 if (new_ldisc.ops->open) { 496 r = tty_ldisc_open(tty, new_ldisc);
437 int r = new_ldisc.ops->open(tty); 497 if (r < 0)
438 if (r < 0) 498 panic("Couldn't open N_TTY ldisc for "
439 panic("Couldn't open N_TTY ldisc for " 499 "%s --- error %d.",
440 "%s --- error %d.", 500 tty_name(tty, buf), r);
441 tty_name(tty, buf), r);
442 }
443 } 501 }
444} 502}
445 503
446/** 504/**
505 * tty_ldisc_halt - shut down the line discipline
506 * @tty: tty device
507 *
508 * Shut down the line discipline and work queue for this tty device.
509 * The TTY_LDISC flag being cleared ensures no further references can
510 * be obtained while the delayed work queue halt ensures that no more
511 * data is fed to the ldisc.
512 *
513 * In order to wait for any existing references to complete see
514 * tty_ldisc_wait_idle.
515 */
516
517static int tty_ldisc_halt(struct tty_struct *tty)
518{
519 clear_bit(TTY_LDISC, &tty->flags);
520 return cancel_delayed_work(&tty->buf.work);
521}
522
523/**
524 * tty_ldisc_wait_idle - wait for the ldisc to become idle
525 * @tty: tty to wait for
526 *
527 * Wait for the line discipline to become idle. The discipline must
528 * have been halted for this to guarantee it remains idle.
529 *
530 * tty_ldisc_lock protects the ref counts currently.
531 */
532
533static int tty_ldisc_wait_idle(struct tty_struct *tty)
534{
535 unsigned long flags;
536 spin_lock_irqsave(&tty_ldisc_lock, flags);
537 while (tty->ldisc->refcount) {
538 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
539 if (wait_event_timeout(tty_ldisc_wait,
540 tty->ldisc->refcount == 0, 5 * HZ) == 0)
541 return -EBUSY;
542 spin_lock_irqsave(&tty_ldisc_lock, flags);
543 }
544 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
545 return 0;
546}
547
548/**
447 * tty_set_ldisc - set line discipline 549 * tty_set_ldisc - set line discipline
448 * @tty: the terminal to set 550 * @tty: the terminal to set
449 * @ldisc: the line discipline 551 * @ldisc: the line discipline
450 * 552 *
451 * Set the discipline of a tty line. Must be called from a process 553 * Set the discipline of a tty line. Must be called from a process
452 * context. 554 * context. The ldisc change logic has to protect itself against any
555 * overlapping ldisc change (including on the other end of pty pairs),
556 * the close of one side of a tty/pty pair, and eventually hangup.
453 * 557 *
454 * Locking: takes tty_ldisc_lock. 558 * Locking: takes tty_ldisc_lock, termios_mutex
455 * called functions take termios_mutex
456 */ 559 */
457 560
458int tty_set_ldisc(struct tty_struct *tty, int ldisc) 561int tty_set_ldisc(struct tty_struct *tty, int ldisc)
459{ 562{
460 int retval; 563 int retval;
461 struct tty_ldisc o_ldisc, new_ldisc; 564 struct tty_ldisc *o_ldisc, *new_ldisc;
462 int work; 565 int work, o_work = 0;
463 unsigned long flags;
464 struct tty_struct *o_tty; 566 struct tty_struct *o_tty;
465 567
466restart: 568 new_ldisc = tty_ldisc_get(ldisc);
467 /* This is a bit ugly for now but means we can break the 'ldisc 569 if (IS_ERR(new_ldisc))
468 is part of the tty struct' assumption later */ 570 return PTR_ERR(new_ldisc);
469 retval = tty_ldisc_get(ldisc, &new_ldisc); 571
470 if (retval) 572 /*
471 return retval; 573 * We need to look at the tty locking here for pty/tty pairs
574 * when both sides try to change in parallel.
575 */
576
577 o_tty = tty->link; /* o_tty is the pty side or NULL */
578
579
580 /*
581 * Check the no-op case
582 */
583
584 if (tty->ldisc->ops->num == ldisc) {
585 tty_ldisc_put(new_ldisc);
586 return 0;
587 }
472 588
473 /* 589 /*
474 * Problem: What do we do if this blocks ? 590 * Problem: What do we do if this blocks ?
591 * We could deadlock here
475 */ 592 */
476 593
477 tty_wait_until_sent(tty, 0); 594 tty_wait_until_sent(tty, 0);
478 595
479 if (tty->ldisc.ops->num == ldisc) { 596 mutex_lock(&tty->ldisc_mutex);
480 tty_ldisc_put(new_ldisc.ops); 597
481 return 0; 598 /*
599 * We could be midstream of another ldisc change which has
600 * dropped the lock during processing. If so we need to wait.
601 */
602
603 while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
604 mutex_unlock(&tty->ldisc_mutex);
605 wait_event(tty_ldisc_wait,
606 test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
607 mutex_lock(&tty->ldisc_mutex);
482 } 608 }
609 set_bit(TTY_LDISC_CHANGING, &tty->flags);
483 610
484 /* 611 /*
485 * No more input please, we are switching. The new ldisc 612 * No more input please, we are switching. The new ldisc
@@ -489,8 +616,6 @@ restart:
489 tty->receive_room = 0; 616 tty->receive_room = 0;
490 617
491 o_ldisc = tty->ldisc; 618 o_ldisc = tty->ldisc;
492 o_tty = tty->link;
493
494 /* 619 /*
495 * Make sure we don't change while someone holds a 620 * Make sure we don't change while someone holds a
496 * reference to the line discipline. The TTY_LDISC bit 621 * reference to the line discipline. The TTY_LDISC bit
@@ -501,108 +626,181 @@ restart:
501 * with a userspace app continually trying to use the tty in 626 * with a userspace app continually trying to use the tty in
502 * parallel to the change and re-referencing the tty. 627 * parallel to the change and re-referencing the tty.
503 */ 628 */
504 clear_bit(TTY_LDISC, &tty->flags);
505 if (o_tty)
506 clear_bit(TTY_LDISC, &o_tty->flags);
507 629
508 spin_lock_irqsave(&tty_ldisc_lock, flags); 630 work = tty_ldisc_halt(tty);
509 if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
510 if (tty->ldisc.refcount) {
511 /* Free the new ldisc we grabbed. Must drop the lock
512 first. */
513 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
514 tty_ldisc_put(o_ldisc.ops);
515 /*
516 * There are several reasons we may be busy, including
517 * random momentary I/O traffic. We must therefore
518 * retry. We could distinguish between blocking ops
519 * and retries if we made tty_ldisc_wait() smarter.
520 * That is up for discussion.
521 */
522 if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0)
523 return -ERESTARTSYS;
524 goto restart;
525 }
526 if (o_tty && o_tty->ldisc.refcount) {
527 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
528 tty_ldisc_put(o_tty->ldisc.ops);
529 if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0)
530 return -ERESTARTSYS;
531 goto restart;
532 }
533 }
534 /*
535 * If the TTY_LDISC bit is set, then we are racing against
536 * another ldisc change
537 */
538 if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
539 struct tty_ldisc *ld;
540 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
541 tty_ldisc_put(new_ldisc.ops);
542 ld = tty_ldisc_ref_wait(tty);
543 tty_ldisc_deref(ld);
544 goto restart;
545 }
546 /*
547 * This flag is used to avoid two parallel ldisc changes. Once
548 * open and close are fine grained locked this may work better
549 * as a mutex shared with the open/close/hup paths
550 */
551 set_bit(TTY_LDISC_CHANGING, &tty->flags);
552 if (o_tty) 631 if (o_tty)
553 set_bit(TTY_LDISC_CHANGING, &o_tty->flags); 632 o_work = tty_ldisc_halt(o_tty);
554 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
555
556 /*
557 * From this point on we know nobody has an ldisc
558 * usage reference, nor can they obtain one until
559 * we say so later on.
560 */
561 633
562 work = cancel_delayed_work(&tty->buf.work);
563 /* 634 /*
564 * Wait for ->hangup_work and ->buf.work handlers to terminate 635 * Wait for ->hangup_work and ->buf.work handlers to terminate.
565 * MUST NOT hold locks here. 636 * We must drop the mutex here in case a hangup is also in process.
566 */ 637 */
638
639 mutex_unlock(&tty->ldisc_mutex);
640
567 flush_scheduled_work(); 641 flush_scheduled_work();
642
643 /* Let any existing reference holders finish */
644 retval = tty_ldisc_wait_idle(tty);
645 if (retval < 0) {
646 clear_bit(TTY_LDISC_CHANGING, &tty->flags);
647 tty_ldisc_put(new_ldisc);
648 return retval;
649 }
650
651 mutex_lock(&tty->ldisc_mutex);
652 if (test_bit(TTY_HUPPED, &tty->flags)) {
653 /* We were raced by the hangup method. It will have stomped
654 the ldisc data and closed the ldisc down */
655 clear_bit(TTY_LDISC_CHANGING, &tty->flags);
656 mutex_unlock(&tty->ldisc_mutex);
657 tty_ldisc_put(new_ldisc);
658 return -EIO;
659 }
660
568 /* Shutdown the current discipline. */ 661 /* Shutdown the current discipline. */
569 if (o_ldisc.ops->close) 662 tty_ldisc_close(tty, o_ldisc);
570 (o_ldisc.ops->close)(tty);
571 663
572 /* Now set up the new line discipline. */ 664 /* Now set up the new line discipline. */
573 tty_ldisc_assign(tty, &new_ldisc); 665 tty_ldisc_assign(tty, new_ldisc);
574 tty_set_termios_ldisc(tty, ldisc); 666 tty_set_termios_ldisc(tty, ldisc);
575 if (new_ldisc.ops->open) 667
576 retval = (new_ldisc.ops->open)(tty); 668 retval = tty_ldisc_open(tty, new_ldisc);
577 if (retval < 0) { 669 if (retval < 0) {
578 tty_ldisc_put(new_ldisc.ops); 670 /* Back to the old one or N_TTY if we can't */
579 tty_ldisc_restore(tty, &o_ldisc); 671 tty_ldisc_put(new_ldisc);
672 tty_ldisc_restore(tty, o_ldisc);
580 } 673 }
674
581 /* At this point we hold a reference to the new ldisc and a 675 /* At this point we hold a reference to the new ldisc and a
582 a reference to the old ldisc. If we ended up flipping back 676 a reference to the old ldisc. If we ended up flipping back
583 to the existing ldisc we have two references to it */ 677 to the existing ldisc we have two references to it */
584 678
585 if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc) 679 if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc)
586 tty->ops->set_ldisc(tty); 680 tty->ops->set_ldisc(tty);
587 681
588 tty_ldisc_put(o_ldisc.ops); 682 tty_ldisc_put(o_ldisc);
589 683
590 /* 684 /*
591 * Allow ldisc referencing to occur as soon as the driver 685 * Allow ldisc referencing to occur again
592 * ldisc callback completes.
593 */ 686 */
594 687
595 tty_ldisc_enable(tty); 688 tty_ldisc_enable(tty);
596 if (o_tty) 689 if (o_tty)
597 tty_ldisc_enable(o_tty); 690 tty_ldisc_enable(o_tty);
598 691
599 /* Restart it in case no characters kick it off. Safe if 692 /* Restart the work queue in case no characters kick it off. Safe if
600 already running */ 693 already running */
601 if (work) 694 if (work)
602 schedule_delayed_work(&tty->buf.work, 1); 695 schedule_delayed_work(&tty->buf.work, 1);
696 if (o_work)
697 schedule_delayed_work(&o_tty->buf.work, 1);
698 mutex_unlock(&tty->ldisc_mutex);
603 return retval; 699 return retval;
604} 700}
605 701
702/**
703 * tty_reset_termios - reset terminal state
704 * @tty: tty to reset
705 *
706 * Restore a terminal to the driver default state.
707 */
708
709static void tty_reset_termios(struct tty_struct *tty)
710{
711 mutex_lock(&tty->termios_mutex);
712 *tty->termios = tty->driver->init_termios;
713 tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
714 tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
715 mutex_unlock(&tty->termios_mutex);
716}
717
718
719/**
720 * tty_ldisc_reinit - reinitialise the tty ldisc
721 * @tty: tty to reinit
722 *
723 * Switch the tty back to N_TTY line discipline and leave the
724 * ldisc state closed
725 */
726
727static void tty_ldisc_reinit(struct tty_struct *tty)
728{
729 struct tty_ldisc *ld;
730
731 tty_ldisc_close(tty, tty->ldisc);
732 tty_ldisc_put(tty->ldisc);
733 tty->ldisc = NULL;
734 /*
735 * Switch the line discipline back
736 */
737 ld = tty_ldisc_get(N_TTY);
738 BUG_ON(IS_ERR(ld));
739 tty_ldisc_assign(tty, ld);
740 tty_set_termios_ldisc(tty, N_TTY);
741}
742
743/**
744 * tty_ldisc_hangup - hangup ldisc reset
745 * @tty: tty being hung up
746 *
747 * Some tty devices reset their termios when they receive a hangup
748 * event. In that situation we must also switch back to N_TTY properly
749 * before we reset the termios data.
750 *
751 * Locking: We can take the ldisc mutex as the rest of the code is
752 * careful to allow for this.
753 *
754 * In the pty pair case this occurs in the close() path of the
755 * tty itself so we must be careful about locking rules.
756 */
757
758void tty_ldisc_hangup(struct tty_struct *tty)
759{
760 struct tty_ldisc *ld;
761
762 /*
763 * FIXME! What are the locking issues here? This may me overdoing
764 * things... This question is especially important now that we've
765 * removed the irqlock.
766 */
767 ld = tty_ldisc_ref(tty);
768 if (ld != NULL) {
769 /* We may have no line discipline at this point */
770 if (ld->ops->flush_buffer)
771 ld->ops->flush_buffer(tty);
772 tty_driver_flush_buffer(tty);
773 if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
774 ld->ops->write_wakeup)
775 ld->ops->write_wakeup(tty);
776 if (ld->ops->hangup)
777 ld->ops->hangup(tty);
778 tty_ldisc_deref(ld);
779 }
780 /*
781 * FIXME: Once we trust the LDISC code better we can wait here for
782 * ldisc completion and fix the driver call race
783 */
784 wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
785 wake_up_interruptible_poll(&tty->read_wait, POLLIN);
786 /*
787 * Shutdown the current line discipline, and reset it to
788 * N_TTY.
789 */
790 if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) {
791 /* Avoid racing set_ldisc */
792 mutex_lock(&tty->ldisc_mutex);
793 /* Switch back to N_TTY */
794 tty_ldisc_reinit(tty);
795 /* At this point we have a closed ldisc and we want to
796 reopen it. We could defer this to the next open but
797 it means auditing a lot of other paths so this is a FIXME */
798 WARN_ON(tty_ldisc_open(tty, tty->ldisc));
799 tty_ldisc_enable(tty);
800 mutex_unlock(&tty->ldisc_mutex);
801 tty_reset_termios(tty);
802 }
803}
606 804
607/** 805/**
608 * tty_ldisc_setup - open line discipline 806 * tty_ldisc_setup - open line discipline
@@ -610,24 +808,23 @@ restart:
610 * @o_tty: pair tty for pty/tty pairs 808 * @o_tty: pair tty for pty/tty pairs
611 * 809 *
612 * Called during the initial open of a tty/pty pair in order to set up the 810 * Called during the initial open of a tty/pty pair in order to set up the
613 * line discplines and bind them to the tty. 811 * line disciplines and bind them to the tty. This has no locking issues
812 * as the device isn't yet active.
614 */ 813 */
615 814
616int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) 815int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
617{ 816{
618 struct tty_ldisc *ld = &tty->ldisc; 817 struct tty_ldisc *ld = tty->ldisc;
619 int retval; 818 int retval;
620 819
621 if (ld->ops->open) { 820 retval = tty_ldisc_open(tty, ld);
622 retval = (ld->ops->open)(tty); 821 if (retval)
623 if (retval) 822 return retval;
624 return retval; 823
625 } 824 if (o_tty) {
626 if (o_tty && o_tty->ldisc.ops->open) { 825 retval = tty_ldisc_open(o_tty, o_tty->ldisc);
627 retval = (o_tty->ldisc.ops->open)(o_tty);
628 if (retval) { 826 if (retval) {
629 if (ld->ops->close) 827 tty_ldisc_close(tty, ld);
630 (ld->ops->close)(tty);
631 return retval; 828 return retval;
632 } 829 }
633 tty_ldisc_enable(o_tty); 830 tty_ldisc_enable(o_tty);
@@ -635,32 +832,25 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
635 tty_ldisc_enable(tty); 832 tty_ldisc_enable(tty);
636 return 0; 833 return 0;
637} 834}
638
639/** 835/**
640 * tty_ldisc_release - release line discipline 836 * tty_ldisc_release - release line discipline
641 * @tty: tty being shut down 837 * @tty: tty being shut down
642 * @o_tty: pair tty for pty/tty pairs 838 * @o_tty: pair tty for pty/tty pairs
643 * 839 *
644 * Called during the final close of a tty/pty pair in order to shut down the 840 * Called during the final close of a tty/pty pair in order to shut down
645 * line discpline layer. 841 * the line discpline layer. On exit the ldisc assigned is N_TTY and the
842 * ldisc has not been opened.
646 */ 843 */
647 844
648void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) 845void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
649{ 846{
650 unsigned long flags;
651 struct tty_ldisc ld;
652 /* 847 /*
653 * Prevent flush_to_ldisc() from rescheduling the work for later. Then 848 * Prevent flush_to_ldisc() from rescheduling the work for later. Then
654 * kill any delayed work. As this is the final close it does not 849 * kill any delayed work. As this is the final close it does not
655 * race with the set_ldisc code path. 850 * race with the set_ldisc code path.
656 */ 851 */
657 clear_bit(TTY_LDISC, &tty->flags);
658 cancel_delayed_work(&tty->buf.work);
659
660 /*
661 * Wait for ->hangup_work and ->buf.work handlers to terminate
662 */
663 852
853 tty_ldisc_halt(tty);
664 flush_scheduled_work(); 854 flush_scheduled_work();
665 855
666 /* 856 /*
@@ -668,38 +858,19 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
668 * side waiters as the file is closing so user count on the file 858 * side waiters as the file is closing so user count on the file
669 * side is zero. 859 * side is zero.
670 */ 860 */
671 spin_lock_irqsave(&tty_ldisc_lock, flags); 861
672 while (tty->ldisc.refcount) { 862 tty_ldisc_wait_idle(tty);
673 spin_unlock_irqrestore(&tty_ldisc_lock, flags); 863
674 wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
675 spin_lock_irqsave(&tty_ldisc_lock, flags);
676 }
677 spin_unlock_irqrestore(&tty_ldisc_lock, flags);
678 /* 864 /*
679 * Shutdown the current line discipline, and reset it to N_TTY. 865 * Shutdown the current line discipline, and reset it to N_TTY.
680 * 866 *
681 * FIXME: this MUST get fixed for the new reflocking 867 * FIXME: this MUST get fixed for the new reflocking
682 */ 868 */
683 if (tty->ldisc.ops->close)
684 (tty->ldisc.ops->close)(tty);
685 tty_ldisc_put(tty->ldisc.ops);
686 869
687 /* 870 tty_ldisc_reinit(tty);
688 * Switch the line discipline back 871 /* This will need doing differently if we need to lock */
689 */ 872 if (o_tty)
690 WARN_ON(tty_ldisc_get(N_TTY, &ld)); 873 tty_ldisc_release(o_tty, NULL);
691 tty_ldisc_assign(tty, &ld);
692 tty_set_termios_ldisc(tty, N_TTY);
693 if (o_tty) {
694 /* FIXME: could o_tty be in setldisc here ? */
695 clear_bit(TTY_LDISC, &o_tty->flags);
696 if (o_tty->ldisc.ops->close)
697 (o_tty->ldisc.ops->close)(o_tty);
698 tty_ldisc_put(o_tty->ldisc.ops);
699 WARN_ON(tty_ldisc_get(N_TTY, &ld));
700 tty_ldisc_assign(o_tty, &ld);
701 tty_set_termios_ldisc(o_tty, N_TTY);
702 }
703} 874}
704 875
705/** 876/**
@@ -712,10 +883,10 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
712 883
713void tty_ldisc_init(struct tty_struct *tty) 884void tty_ldisc_init(struct tty_struct *tty)
714{ 885{
715 struct tty_ldisc ld; 886 struct tty_ldisc *ld = tty_ldisc_get(N_TTY);
716 if (tty_ldisc_get(N_TTY, &ld) < 0) 887 if (IS_ERR(ld))
717 panic("n_tty: init_tty"); 888 panic("n_tty: init_tty");
718 tty_ldisc_assign(tty, &ld); 889 tty_ldisc_assign(tty, ld);
719} 890}
720 891
721void tty_ldisc_begin(void) 892void tty_ldisc_begin(void)
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9b8004c72686..62dadfc95e34 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -137,7 +137,7 @@ int tty_port_carrier_raised(struct tty_port *port)
137EXPORT_SYMBOL(tty_port_carrier_raised); 137EXPORT_SYMBOL(tty_port_carrier_raised);
138 138
139/** 139/**
140 * tty_port_raise_dtr_rts - Riase DTR/RTS 140 * tty_port_raise_dtr_rts - Raise DTR/RTS
141 * @port: tty port 141 * @port: tty port
142 * 142 *
143 * Wrapper for the DTR/RTS raise logic. For the moment this is used 143 * Wrapper for the DTR/RTS raise logic. For the moment this is used
@@ -147,12 +147,28 @@ EXPORT_SYMBOL(tty_port_carrier_raised);
147 147
148void tty_port_raise_dtr_rts(struct tty_port *port) 148void tty_port_raise_dtr_rts(struct tty_port *port)
149{ 149{
150 if (port->ops->raise_dtr_rts) 150 if (port->ops->dtr_rts)
151 port->ops->raise_dtr_rts(port); 151 port->ops->dtr_rts(port, 1);
152} 152}
153EXPORT_SYMBOL(tty_port_raise_dtr_rts); 153EXPORT_SYMBOL(tty_port_raise_dtr_rts);
154 154
155/** 155/**
156 * tty_port_lower_dtr_rts - Lower DTR/RTS
157 * @port: tty port
158 *
159 * Wrapper for the DTR/RTS raise logic. For the moment this is used
160 * to hide some internal details. This will eventually become entirely
161 * internal to the tty port.
162 */
163
164void tty_port_lower_dtr_rts(struct tty_port *port)
165{
166 if (port->ops->dtr_rts)
167 port->ops->dtr_rts(port, 0);
168}
169EXPORT_SYMBOL(tty_port_lower_dtr_rts);
170
171/**
156 * tty_port_block_til_ready - Waiting logic for tty open 172 * tty_port_block_til_ready - Waiting logic for tty open
157 * @port: the tty port being opened 173 * @port: the tty port being opened
158 * @tty: the tty device being bound 174 * @tty: the tty device being bound
@@ -167,7 +183,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts);
167 * - port flags and counts 183 * - port flags and counts
168 * 184 *
169 * The passed tty_port must implement the carrier_raised method if it can 185 * The passed tty_port must implement the carrier_raised method if it can
170 * do carrier detect and the raise_dtr_rts method if it supports software 186 * do carrier detect and the dtr_rts method if it supports software
171 * management of these lines. Note that the dtr/rts raise is done each 187 * management of these lines. Note that the dtr/rts raise is done each
172 * iteration as a hangup may have previously dropped them while we wait. 188 * iteration as a hangup may have previously dropped them while we wait.
173 */ 189 */
@@ -182,7 +198,8 @@ int tty_port_block_til_ready(struct tty_port *port,
182 198
183 /* block if port is in the process of being closed */ 199 /* block if port is in the process of being closed */
184 if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) { 200 if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
185 interruptible_sleep_on(&port->close_wait); 201 wait_event_interruptible(port->close_wait,
202 !(port->flags & ASYNC_CLOSING));
186 if (port->flags & ASYNC_HUP_NOTIFY) 203 if (port->flags & ASYNC_HUP_NOTIFY)
187 return -EAGAIN; 204 return -EAGAIN;
188 else 205 else
@@ -205,7 +222,6 @@ int tty_port_block_til_ready(struct tty_port *port,
205 before the next open may complete */ 222 before the next open may complete */
206 223
207 retval = 0; 224 retval = 0;
208 add_wait_queue(&port->open_wait, &wait);
209 225
210 /* The port lock protects the port counts */ 226 /* The port lock protects the port counts */
211 spin_lock_irqsave(&port->lock, flags); 227 spin_lock_irqsave(&port->lock, flags);
@@ -219,7 +235,7 @@ int tty_port_block_til_ready(struct tty_port *port,
219 if (tty->termios->c_cflag & CBAUD) 235 if (tty->termios->c_cflag & CBAUD)
220 tty_port_raise_dtr_rts(port); 236 tty_port_raise_dtr_rts(port);
221 237
222 set_current_state(TASK_INTERRUPTIBLE); 238 prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
223 /* Check for a hangup or uninitialised port. Return accordingly */ 239 /* Check for a hangup or uninitialised port. Return accordingly */
224 if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) { 240 if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
225 if (port->flags & ASYNC_HUP_NOTIFY) 241 if (port->flags & ASYNC_HUP_NOTIFY)
@@ -240,8 +256,7 @@ int tty_port_block_til_ready(struct tty_port *port,
240 } 256 }
241 schedule(); 257 schedule();
242 } 258 }
243 set_current_state(TASK_RUNNING); 259 finish_wait(&port->open_wait, &wait);
244 remove_wait_queue(&port->open_wait, &wait);
245 260
246 /* Update counts. A parallel hangup will have set count to zero and 261 /* Update counts. A parallel hangup will have set count to zero and
247 we must not mess that up further */ 262 we must not mess that up further */
@@ -292,6 +307,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
292 if (port->flags & ASYNC_INITIALIZED && 307 if (port->flags & ASYNC_INITIALIZED &&
293 port->closing_wait != ASYNC_CLOSING_WAIT_NONE) 308 port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
294 tty_wait_until_sent(tty, port->closing_wait); 309 tty_wait_until_sent(tty, port->closing_wait);
310 if (port->drain_delay) {
311 unsigned int bps = tty_get_baud_rate(tty);
312 long timeout;
313
314 if (bps > 1200)
315 timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps,
316 HZ / 10);
317 else
318 timeout = 2 * HZ;
319 schedule_timeout_interruptible(timeout);
320 }
295 return 1; 321 return 1;
296} 322}
297EXPORT_SYMBOL(tty_port_close_start); 323EXPORT_SYMBOL(tty_port_close_start);
@@ -302,6 +328,9 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
302 328
303 tty_ldisc_flush(tty); 329 tty_ldisc_flush(tty);
304 330
331 if (tty->termios->c_cflag & HUPCL)
332 tty_port_lower_dtr_rts(port);
333
305 spin_lock_irqsave(&port->lock, flags); 334 spin_lock_irqsave(&port->lock, flags);
306 tty->closing = 0; 335 tty->closing = 0;
307 336
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 537da1cde16d..e59b6dee9ae2 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -402,27 +402,23 @@ static u8 ali_cable_detect(ide_hwif_t *hwif)
402 return cbl; 402 return cbl;
403} 403}
404 404
405#if !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) 405#ifndef CONFIG_SPARC64
406/** 406/**
407 * init_hwif_ali15x3 - Initialize the ALI IDE x86 stuff 407 * init_hwif_ali15x3 - Initialize the ALI IDE x86 stuff
408 * @hwif: interface to configure 408 * @hwif: interface to configure
409 * 409 *
410 * Obtain the IRQ tables for an ALi based IDE solution on the PC 410 * Obtain the IRQ tables for an ALi based IDE solution on the PC
411 * class platforms. This part of the code isn't applicable to the 411 * class platforms. This part of the code isn't applicable to the
412 * Sparc and PowerPC systems. 412 * Sparc systems.
413 */ 413 */
414 414
415static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif) 415static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif)
416{ 416{
417 struct pci_dev *dev = to_pci_dev(hwif->dev);
418 u8 ideic, inmir; 417 u8 ideic, inmir;
419 s8 irq_routing_table[] = { -1, 9, 3, 10, 4, 5, 7, 6, 418 s8 irq_routing_table[] = { -1, 9, 3, 10, 4, 5, 7, 6,
420 1, 11, 0, 12, 0, 14, 0, 15 }; 419 1, 11, 0, 12, 0, 14, 0, 15 };
421 int irq = -1; 420 int irq = -1;
422 421
423 if (dev->device == PCI_DEVICE_ID_AL_M5229)
424 hwif->irq = hwif->channel ? 15 : 14;
425
426 if (isa_dev) { 422 if (isa_dev) {
427 /* 423 /*
428 * read IDE interface control 424 * read IDE interface control
@@ -455,7 +451,7 @@ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif)
455} 451}
456#else 452#else
457#define init_hwif_ali15x3 NULL 453#define init_hwif_ali15x3 NULL
458#endif /* !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) */ 454#endif /* CONFIG_SPARC64 */
459 455
460/** 456/**
461 * init_dma_ali15x3 - set up DMA on ALi15x3 457 * init_dma_ali15x3 - set up DMA on ALi15x3
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 7201b176d75b..afe5a4323879 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -80,34 +80,6 @@ void ide_init_pc(struct ide_atapi_pc *pc)
80EXPORT_SYMBOL_GPL(ide_init_pc); 80EXPORT_SYMBOL_GPL(ide_init_pc);
81 81
82/* 82/*
83 * Generate a new packet command request in front of the request queue, before
84 * the current request, so that it will be processed immediately, on the next
85 * pass through the driver.
86 */
87static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
88 struct ide_atapi_pc *pc, struct request *rq)
89{
90 blk_rq_init(NULL, rq);
91 rq->cmd_type = REQ_TYPE_SPECIAL;
92 rq->cmd_flags |= REQ_PREEMPT;
93 rq->buffer = (char *)pc;
94 rq->rq_disk = disk;
95
96 if (pc->req_xfer) {
97 rq->data = pc->buf;
98 rq->data_len = pc->req_xfer;
99 }
100
101 memcpy(rq->cmd, pc->c, 12);
102 if (drive->media == ide_tape)
103 rq->cmd[13] = REQ_IDETAPE_PC1;
104
105 drive->hwif->rq = NULL;
106
107 elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
108}
109
110/*
111 * Add a special packet command request to the tail of the request queue, 83 * Add a special packet command request to the tail of the request queue,
112 * and wait for it to be serviced. 84 * and wait for it to be serviced.
113 */ 85 */
@@ -119,19 +91,21 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
119 91
120 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 92 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
121 rq->cmd_type = REQ_TYPE_SPECIAL; 93 rq->cmd_type = REQ_TYPE_SPECIAL;
122 rq->buffer = (char *)pc; 94 rq->special = (char *)pc;
123 95
124 if (pc->req_xfer) { 96 if (pc->req_xfer) {
125 rq->data = pc->buf; 97 error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
126 rq->data_len = pc->req_xfer; 98 GFP_NOIO);
99 if (error)
100 goto put_req;
127 } 101 }
128 102
129 memcpy(rq->cmd, pc->c, 12); 103 memcpy(rq->cmd, pc->c, 12);
130 if (drive->media == ide_tape) 104 if (drive->media == ide_tape)
131 rq->cmd[13] = REQ_IDETAPE_PC1; 105 rq->cmd[13] = REQ_IDETAPE_PC1;
132 error = blk_execute_rq(drive->queue, disk, rq, 0); 106 error = blk_execute_rq(drive->queue, disk, rq, 0);
107put_req:
133 blk_put_request(rq); 108 blk_put_request(rq);
134
135 return error; 109 return error;
136} 110}
137EXPORT_SYMBOL_GPL(ide_queue_pc_tail); 111EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
@@ -191,20 +165,103 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc)
191} 165}
192EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); 166EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
193 167
168void ide_prep_sense(ide_drive_t *drive, struct request *rq)
169{
170 struct request_sense *sense = &drive->sense_data;
171 struct request *sense_rq = &drive->sense_rq;
172 unsigned int cmd_len, sense_len;
173 int err;
174
175 debug_log("%s: enter\n", __func__);
176
177 switch (drive->media) {
178 case ide_floppy:
179 cmd_len = 255;
180 sense_len = 18;
181 break;
182 case ide_tape:
183 cmd_len = 20;
184 sense_len = 20;
185 break;
186 default:
187 cmd_len = 18;
188 sense_len = 18;
189 }
190
191 BUG_ON(sense_len > sizeof(*sense));
192
193 if (blk_sense_request(rq) || drive->sense_rq_armed)
194 return;
195
196 memset(sense, 0, sizeof(*sense));
197
198 blk_rq_init(rq->q, sense_rq);
199
200 err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
201 GFP_NOIO);
202 if (unlikely(err)) {
203 if (printk_ratelimit())
204 printk(KERN_WARNING "%s: failed to map sense buffer\n",
205 drive->name);
206 return;
207 }
208
209 sense_rq->rq_disk = rq->rq_disk;
210 sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
211 sense_rq->cmd[4] = cmd_len;
212 sense_rq->cmd_type = REQ_TYPE_SENSE;
213 sense_rq->cmd_flags |= REQ_PREEMPT;
214
215 if (drive->media == ide_tape)
216 sense_rq->cmd[13] = REQ_IDETAPE_PC1;
217
218 drive->sense_rq_armed = true;
219}
220EXPORT_SYMBOL_GPL(ide_prep_sense);
221
222int ide_queue_sense_rq(ide_drive_t *drive, void *special)
223{
224 /* deferred failure from ide_prep_sense() */
225 if (!drive->sense_rq_armed) {
226 printk(KERN_WARNING "%s: failed queue sense request\n",
227 drive->name);
228 return -ENOMEM;
229 }
230
231 drive->sense_rq.special = special;
232 drive->sense_rq_armed = false;
233
234 drive->hwif->rq = NULL;
235
236 elv_add_request(drive->queue, &drive->sense_rq,
237 ELEVATOR_INSERT_FRONT, 0);
238 return 0;
239}
240EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
241
194/* 242/*
195 * Called when an error was detected during the last packet command. 243 * Called when an error was detected during the last packet command.
196 * We queue a request sense packet command in the head of the request list. 244 * We queue a request sense packet command at the head of the request
245 * queue.
197 */ 246 */
198void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk) 247void ide_retry_pc(ide_drive_t *drive)
199{ 248{
200 struct request *rq = &drive->request_sense_rq; 249 struct request *sense_rq = &drive->sense_rq;
201 struct ide_atapi_pc *pc = &drive->request_sense_pc; 250 struct ide_atapi_pc *pc = &drive->request_sense_pc;
202 251
203 (void)ide_read_error(drive); 252 (void)ide_read_error(drive);
204 ide_create_request_sense_cmd(drive, pc); 253
254 /* init pc from sense_rq */
255 ide_init_pc(pc);
256 memcpy(pc->c, sense_rq->cmd, 12);
257 pc->buf = bio_data(sense_rq->bio); /* pointer to mapped address */
258 pc->req_xfer = sense_rq->data_len;
259
205 if (drive->media == ide_tape) 260 if (drive->media == ide_tape)
206 set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); 261 set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
207 ide_queue_pc_head(drive, disk, pc, rq); 262
263 if (ide_queue_sense_rq(drive, pc))
264 ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
208} 265}
209EXPORT_SYMBOL_GPL(ide_retry_pc); 266EXPORT_SYMBOL_GPL(ide_retry_pc);
210 267
@@ -276,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
276 struct ide_cmd *cmd = &hwif->cmd; 333 struct ide_cmd *cmd = &hwif->cmd;
277 struct request *rq = hwif->rq; 334 struct request *rq = hwif->rq;
278 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 335 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
279 xfer_func_t *xferfunc;
280 unsigned int timeout, done; 336 unsigned int timeout, done;
281 u16 bcount; 337 u16 bcount;
282 u8 stat, ireason, dsc = 0; 338 u8 stat, ireason, dsc = 0;
@@ -303,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
303 drive->name, rq_data_dir(pc->rq) 359 drive->name, rq_data_dir(pc->rq)
304 ? "write" : "read"); 360 ? "write" : "read");
305 pc->flags |= PC_FLAG_DMA_ERROR; 361 pc->flags |= PC_FLAG_DMA_ERROR;
306 } else { 362 } else
307 pc->xferred = pc->req_xfer; 363 pc->xferred = pc->req_xfer;
308 if (drive->pc_update_buffers)
309 drive->pc_update_buffers(drive, pc);
310 }
311 debug_log("%s: DMA finished\n", drive->name); 364 debug_log("%s: DMA finished\n", drive->name);
312 } 365 }
313 366
@@ -343,7 +396,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
343 debug_log("[cmd %x]: check condition\n", rq->cmd[0]); 396 debug_log("[cmd %x]: check condition\n", rq->cmd[0]);
344 397
345 /* Retry operation */ 398 /* Retry operation */
346 ide_retry_pc(drive, rq->rq_disk); 399 ide_retry_pc(drive);
347 400
348 /* queued, but not started */ 401 /* queued, but not started */
349 return ide_stopped; 402 return ide_stopped;
@@ -353,6 +406,12 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
353 if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0) 406 if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
354 dsc = 1; 407 dsc = 1;
355 408
409 /*
410 * ->pc_callback() might change rq->data_len for
411 * residual count, cache total length.
412 */
413 done = blk_rq_bytes(rq);
414
356 /* Command finished - Call the callback function */ 415 /* Command finished - Call the callback function */
357 uptodate = drive->pc_callback(drive, dsc); 416 uptodate = drive->pc_callback(drive, dsc);
358 417
@@ -361,7 +420,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
361 420
362 if (blk_special_request(rq)) { 421 if (blk_special_request(rq)) {
363 rq->errors = 0; 422 rq->errors = 0;
364 done = blk_rq_bytes(rq);
365 error = 0; 423 error = 0;
366 } else { 424 } else {
367 425
@@ -370,11 +428,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
370 rq->errors = -EIO; 428 rq->errors = -EIO;
371 } 429 }
372 430
373 if (drive->media == ide_tape)
374 done = ide_rq_bytes(rq); /* FIXME */
375 else
376 done = blk_rq_bytes(rq);
377
378 error = uptodate ? 0 : -EIO; 431 error = uptodate ? 0 : -EIO;
379 } 432 }
380 433
@@ -407,21 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
407 return ide_do_reset(drive); 460 return ide_do_reset(drive);
408 } 461 }
409 462
410 xferfunc = write ? tp_ops->output_data : tp_ops->input_data; 463 done = min_t(unsigned int, bcount, cmd->nleft);
411 464 ide_pio_bytes(drive, cmd, write, done);
412 if (drive->media == ide_floppy && pc->buf == NULL) {
413 done = min_t(unsigned int, bcount, cmd->nleft);
414 ide_pio_bytes(drive, cmd, write, done);
415 } else if (drive->media == ide_tape && pc->bh) {
416 done = drive->pc_io_buffers(drive, pc, bcount, write);
417 } else {
418 done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred);
419 xferfunc(drive, NULL, pc->cur_pos, done);
420 }
421 465
422 /* Update the current position */ 466 /* Update transferred byte count */
423 pc->xferred += done; 467 pc->xferred += done;
424 pc->cur_pos += done;
425 468
426 bcount -= done; 469 bcount -= done;
427 470
@@ -599,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
599 642
600 /* We haven't transferred any data yet */ 643 /* We haven't transferred any data yet */
601 pc->xferred = 0; 644 pc->xferred = 0;
602 pc->cur_pos = pc->buf;
603 645
604 valid_tf = IDE_VALID_DEVICE; 646 valid_tf = IDE_VALID_DEVICE;
605 bcount = ((drive->media == ide_tape) ? 647 bcount = ((drive->media == ide_tape) ?
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 925eb9e245d1..a75e4ee1cd17 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -206,54 +206,25 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
206 ide_cd_log_error(drive->name, failed_command, sense); 206 ide_cd_log_error(drive->name, failed_command, sense);
207} 207}
208 208
209static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
210 struct request *failed_command)
211{
212 struct cdrom_info *info = drive->driver_data;
213 struct request *rq = &drive->request_sense_rq;
214
215 ide_debug_log(IDE_DBG_SENSE, "enter");
216
217 if (sense == NULL)
218 sense = &info->sense_data;
219
220 /* stuff the sense request in front of our current request */
221 blk_rq_init(NULL, rq);
222 rq->cmd_type = REQ_TYPE_ATA_PC;
223 rq->rq_disk = info->disk;
224
225 rq->data = sense;
226 rq->cmd[0] = GPCMD_REQUEST_SENSE;
227 rq->cmd[4] = 18;
228 rq->data_len = 18;
229
230 rq->cmd_type = REQ_TYPE_SENSE;
231 rq->cmd_flags |= REQ_PREEMPT;
232
233 /* NOTE! Save the failed command in "rq->buffer" */
234 rq->buffer = (void *) failed_command;
235
236 if (failed_command)
237 ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x",
238 failed_command->cmd[0]);
239
240 drive->hwif->rq = NULL;
241
242 elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
243}
244
245static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) 209static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
246{ 210{
247 /* 211 /*
248 * For REQ_TYPE_SENSE, "rq->buffer" points to the original 212 * For REQ_TYPE_SENSE, "rq->special" points to the original
249 * failed request 213 * failed request. Also, the sense data should be read
214 * directly from rq which might be different from the original
215 * sense buffer if it got copied during mapping.
250 */ 216 */
251 struct request *failed = (struct request *)rq->buffer; 217 struct request *failed = (struct request *)rq->special;
252 struct cdrom_info *info = drive->driver_data; 218 void *sense = bio_data(rq->bio);
253 void *sense = &info->sense_data;
254 219
255 if (failed) { 220 if (failed) {
256 if (failed->sense) { 221 if (failed->sense) {
222 /*
223 * Sense is always read into drive->sense_data.
224 * Copy back if the failed request has its
225 * sense pointer set.
226 */
227 memcpy(failed->sense, sense, 18);
257 sense = failed->sense; 228 sense = failed->sense;
258 failed->sense_len = rq->sense_len; 229 failed->sense_len = rq->sense_len;
259 } 230 }
@@ -428,7 +399,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
428 399
429 /* if we got a CHECK_CONDITION status, queue a request sense command */ 400 /* if we got a CHECK_CONDITION status, queue a request sense command */
430 if (stat & ATA_ERR) 401 if (stat & ATA_ERR)
431 cdrom_queue_request_sense(drive, NULL, NULL); 402 return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
432 return 1; 403 return 1;
433 404
434end_request: 405end_request:
@@ -442,8 +413,7 @@ end_request:
442 413
443 hwif->rq = NULL; 414 hwif->rq = NULL;
444 415
445 cdrom_queue_request_sense(drive, rq->sense, rq); 416 return ide_queue_sense_rq(drive, rq) ? 2 : 1;
446 return 1;
447 } else 417 } else
448 return 2; 418 return 2;
449} 419}
@@ -503,14 +473,8 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
503 * and some drives don't send them. Sigh. 473 * and some drives don't send them. Sigh.
504 */ 474 */
505 if (rq->cmd[0] == GPCMD_REQUEST_SENSE && 475 if (rq->cmd[0] == GPCMD_REQUEST_SENSE &&
506 cmd->nleft > 0 && cmd->nleft <= 5) { 476 cmd->nleft > 0 && cmd->nleft <= 5)
507 unsigned int ofs = cmd->nbytes - cmd->nleft; 477 cmd->nleft = 0;
508
509 while (cmd->nleft > 0) {
510 *((u8 *)rq->data + ofs++) = 0;
511 cmd->nleft--;
512 }
513 }
514} 478}
515 479
516int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, 480int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
@@ -543,8 +507,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
543 rq->cmd_flags |= cmd_flags; 507 rq->cmd_flags |= cmd_flags;
544 rq->timeout = timeout; 508 rq->timeout = timeout;
545 if (buffer) { 509 if (buffer) {
546 rq->data = buffer; 510 error = blk_rq_map_kern(drive->queue, rq, buffer,
547 rq->data_len = *bufflen; 511 *bufflen, GFP_NOIO);
512 if (error) {
513 blk_put_request(rq);
514 return error;
515 }
548 } 516 }
549 517
550 error = blk_execute_rq(drive->queue, info->disk, rq, 0); 518 error = blk_execute_rq(drive->queue, info->disk, rq, 0);
@@ -838,15 +806,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
838 drive->dma = 0; 806 drive->dma = 0;
839 807
840 /* sg request */ 808 /* sg request */
841 if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { 809 if (rq->bio) {
842 struct request_queue *q = drive->queue; 810 struct request_queue *q = drive->queue;
811 char *buf = bio_data(rq->bio);
843 unsigned int alignment; 812 unsigned int alignment;
844 char *buf;
845
846 if (rq->bio)
847 buf = bio_data(rq->bio);
848 else
849 buf = rq->data;
850 813
851 drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); 814 drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
852 815
@@ -896,6 +859,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
896 goto out_end; 859 goto out_end;
897 } 860 }
898 861
862 /* prepare sense request for this command */
863 ide_prep_sense(drive, rq);
864
899 memset(&cmd, 0, sizeof(cmd)); 865 memset(&cmd, 0, sizeof(cmd));
900 866
901 if (rq_data_dir(rq)) 867 if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 1d97101099ce..93a3cf1b0f3f 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -87,10 +87,6 @@ struct cdrom_info {
87 87
88 struct atapi_toc *toc; 88 struct atapi_toc *toc;
89 89
90 /* The result of the last successful request sense command
91 on this device. */
92 struct request_sense sense_data;
93
94 u8 max_speed; /* Max speed of the drive. */ 90 u8 max_speed; /* Max speed of the drive. */
95 u8 current_speed; /* Current speed of the drive. */ 91 u8 current_speed; /* Current speed of the drive. */
96 92
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index a9fbe2c31210..c2438804d3c4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -411,7 +411,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
411 cmd->protocol = ATA_PROT_NODATA; 411 cmd->protocol = ATA_PROT_NODATA;
412 412
413 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 413 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
414 rq->cmd_flags |= REQ_SOFTBARRIER;
415 rq->special = cmd; 414 rq->special = cmd;
416} 415}
417 416
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index a0b8cab1d9a6..d9123ecae4a9 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -510,23 +510,11 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
510 /* 510 /*
511 * un-busy drive etc and make sure request is sane 511 * un-busy drive etc and make sure request is sane
512 */ 512 */
513
514 rq = hwif->rq; 513 rq = hwif->rq;
515 if (!rq) 514 if (rq) {
516 goto out; 515 hwif->rq = NULL;
517 516 rq->errors = 0;
518 hwif->rq = NULL; 517 }
519
520 rq->errors = 0;
521
522 if (!rq->bio)
523 goto out;
524
525 rq->sector = rq->bio->bi_sector;
526 rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9;
527 rq->hard_cur_sectors = rq->current_nr_sectors;
528 rq->buffer = bio_data(rq->bio);
529out:
530 return ret; 518 return ret;
531} 519}
532 520
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 2b4868d95f8b..537b7c558033 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -134,13 +134,17 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
134 drive->pc = pc; 134 drive->pc = pc;
135 135
136 if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) { 136 if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) {
137 unsigned int done = blk_rq_bytes(drive->hwif->rq);
138
137 if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR)) 139 if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR))
138 ide_floppy_report_error(floppy, pc); 140 ide_floppy_report_error(floppy, pc);
141
139 /* Giving up */ 142 /* Giving up */
140 pc->error = IDE_DRV_ERROR_GENERAL; 143 pc->error = IDE_DRV_ERROR_GENERAL;
141 144
142 drive->failed_pc = NULL; 145 drive->failed_pc = NULL;
143 drive->pc_callback(drive, 0); 146 drive->pc_callback(drive, 0);
147 ide_complete_rq(drive, -EIO, done);
144 return ide_stopped; 148 return ide_stopped;
145 } 149 }
146 150
@@ -216,15 +220,13 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
216 ide_init_pc(pc); 220 ide_init_pc(pc);
217 memcpy(pc->c, rq->cmd, sizeof(pc->c)); 221 memcpy(pc->c, rq->cmd, sizeof(pc->c));
218 pc->rq = rq; 222 pc->rq = rq;
219 if (rq->data_len && rq_data_dir(rq) == WRITE) 223 if (rq->data_len) {
220 pc->flags |= PC_FLAG_WRITING;
221 pc->buf = rq->data;
222 if (rq->bio)
223 pc->flags |= PC_FLAG_DMA_OK; 224 pc->flags |= PC_FLAG_DMA_OK;
224 /* 225 if (rq_data_dir(rq) == WRITE)
225 * possibly problematic, doesn't look like ide-floppy correctly 226 pc->flags |= PC_FLAG_WRITING;
226 * handled scattered requests if dma fails... 227 }
227 */ 228 /* pio will be performed by ide_pio_bytes() which handles sg fine */
229 pc->buf = NULL;
228 pc->req_xfer = pc->buf_size = rq->data_len; 230 pc->req_xfer = pc->buf_size = rq->data_len;
229} 231}
230 232
@@ -265,8 +267,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
265 } 267 }
266 pc = &floppy->queued_pc; 268 pc = &floppy->queued_pc;
267 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); 269 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
268 } else if (blk_special_request(rq)) { 270 } else if (blk_special_request(rq) || blk_sense_request(rq)) {
269 pc = (struct ide_atapi_pc *) rq->buffer; 271 pc = (struct ide_atapi_pc *)rq->special;
270 } else if (blk_pc_request(rq)) { 272 } else if (blk_pc_request(rq)) {
271 pc = &floppy->queued_pc; 273 pc = &floppy->queued_pc;
272 idefloppy_blockpc_cmd(floppy, pc, rq); 274 idefloppy_blockpc_cmd(floppy, pc, rq);
@@ -275,6 +277,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
275 goto out_end; 277 goto out_end;
276 } 278 }
277 279
280 ide_prep_sense(drive, rq);
281
278 memset(&cmd, 0, sizeof(cmd)); 282 memset(&cmd, 0, sizeof(cmd));
279 283
280 if (rq_data_dir(rq)) 284 if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 6415a2e2ba87..41d804065d38 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -248,14 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
248 struct scatterlist *sg = hwif->sg_table; 248 struct scatterlist *sg = hwif->sg_table;
249 struct request *rq = cmd->rq; 249 struct request *rq = cmd->rq;
250 250
251 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { 251 cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
252 sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
253 cmd->sg_nents = 1;
254 } else if (!rq->bio) {
255 sg_init_one(sg, rq->data, rq->data_len);
256 cmd->sg_nents = 1;
257 } else
258 cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
259} 252}
260EXPORT_SYMBOL_GPL(ide_map_sg); 253EXPORT_SYMBOL_GPL(ide_map_sg);
261 254
@@ -371,7 +364,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
371 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) 364 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
372 return execute_drive_cmd(drive, rq); 365 return execute_drive_cmd(drive, rq);
373 else if (blk_pm_request(rq)) { 366 else if (blk_pm_request(rq)) {
374 struct request_pm_state *pm = rq->data; 367 struct request_pm_state *pm = rq->special;
375#ifdef DEBUG_PM 368#ifdef DEBUG_PM
376 printk("%s: start_power_step(step: %d)\n", 369 printk("%s: start_power_step(step: %d)\n",
377 drive->name, pm->pm_step); 370 drive->name, pm->pm_step);
@@ -484,6 +477,9 @@ void do_ide_request(struct request_queue *q)
484 477
485 spin_unlock_irq(q->queue_lock); 478 spin_unlock_irq(q->queue_lock);
486 479
480 /* HLD do_request() callback might sleep, make sure it's okay */
481 might_sleep();
482
487 if (ide_lock_host(host, hwif)) 483 if (ide_lock_host(host, hwif))
488 goto plug_device_2; 484 goto plug_device_2;
489 485
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index c1c25ebbaa1f..5991b23793f2 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -231,7 +231,6 @@ static int generic_drive_reset(ide_drive_t *drive)
231 rq->cmd_type = REQ_TYPE_SPECIAL; 231 rq->cmd_type = REQ_TYPE_SPECIAL;
232 rq->cmd_len = 1; 232 rq->cmd_len = 1;
233 rq->cmd[0] = REQ_DRIVE_RESET; 233 rq->cmd[0] = REQ_DRIVE_RESET;
234 rq->cmd_flags |= REQ_SOFTBARRIER;
235 if (blk_execute_rq(drive->queue, NULL, rq, 1)) 234 if (blk_execute_rq(drive->queue, NULL, rq, 1))
236 ret = rq->errors; 235 ret = rq->errors;
237 blk_put_request(rq); 236 blk_put_request(rq);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 310d03f2b5b7..a914023d6d03 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -24,11 +24,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
24 start_queue = 1; 24 start_queue = 1;
25 spin_unlock_irq(&hwif->lock); 25 spin_unlock_irq(&hwif->lock);
26 26
27 if (start_queue) { 27 if (start_queue)
28 spin_lock_irq(q->queue_lock); 28 blk_run_queue(q);
29 blk_start_queueing(q);
30 spin_unlock_irq(q->queue_lock);
31 }
32 return; 29 return;
33 } 30 }
34 spin_unlock_irq(&hwif->lock); 31 spin_unlock_irq(&hwif->lock);
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0d8a151c0a01..ba1488bd8430 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -7,7 +7,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
7 ide_hwif_t *hwif = drive->hwif; 7 ide_hwif_t *hwif = drive->hwif;
8 struct request *rq; 8 struct request *rq;
9 struct request_pm_state rqpm; 9 struct request_pm_state rqpm;
10 struct ide_cmd cmd;
11 int ret; 10 int ret;
12 11
13 /* call ACPI _GTM only once */ 12 /* call ACPI _GTM only once */
@@ -15,11 +14,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
15 ide_acpi_get_timing(hwif); 14 ide_acpi_get_timing(hwif);
16 15
17 memset(&rqpm, 0, sizeof(rqpm)); 16 memset(&rqpm, 0, sizeof(rqpm));
18 memset(&cmd, 0, sizeof(cmd));
19 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 17 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
20 rq->cmd_type = REQ_TYPE_PM_SUSPEND; 18 rq->cmd_type = REQ_TYPE_PM_SUSPEND;
21 rq->special = &cmd; 19 rq->special = &rqpm;
22 rq->data = &rqpm;
23 rqpm.pm_step = IDE_PM_START_SUSPEND; 20 rqpm.pm_step = IDE_PM_START_SUSPEND;
24 if (mesg.event == PM_EVENT_PRETHAW) 21 if (mesg.event == PM_EVENT_PRETHAW)
25 mesg.event = PM_EVENT_FREEZE; 22 mesg.event = PM_EVENT_FREEZE;
@@ -41,7 +38,6 @@ int generic_ide_resume(struct device *dev)
41 ide_hwif_t *hwif = drive->hwif; 38 ide_hwif_t *hwif = drive->hwif;
42 struct request *rq; 39 struct request *rq;
43 struct request_pm_state rqpm; 40 struct request_pm_state rqpm;
44 struct ide_cmd cmd;
45 int err; 41 int err;
46 42
47 /* call ACPI _PS0 / _STM only once */ 43 /* call ACPI _PS0 / _STM only once */
@@ -53,12 +49,10 @@ int generic_ide_resume(struct device *dev)
53 ide_acpi_exec_tfs(drive); 49 ide_acpi_exec_tfs(drive);
54 50
55 memset(&rqpm, 0, sizeof(rqpm)); 51 memset(&rqpm, 0, sizeof(rqpm));
56 memset(&cmd, 0, sizeof(cmd));
57 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 52 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
58 rq->cmd_type = REQ_TYPE_PM_RESUME; 53 rq->cmd_type = REQ_TYPE_PM_RESUME;
59 rq->cmd_flags |= REQ_PREEMPT; 54 rq->cmd_flags |= REQ_PREEMPT;
60 rq->special = &cmd; 55 rq->special = &rqpm;
61 rq->data = &rqpm;
62 rqpm.pm_step = IDE_PM_START_RESUME; 56 rqpm.pm_step = IDE_PM_START_RESUME;
63 rqpm.pm_state = PM_EVENT_ON; 57 rqpm.pm_state = PM_EVENT_ON;
64 58
@@ -77,7 +71,7 @@ int generic_ide_resume(struct device *dev)
77 71
78void ide_complete_power_step(ide_drive_t *drive, struct request *rq) 72void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
79{ 73{
80 struct request_pm_state *pm = rq->data; 74 struct request_pm_state *pm = rq->special;
81 75
82#ifdef DEBUG_PM 76#ifdef DEBUG_PM
83 printk(KERN_INFO "%s: complete_power_step(step: %d)\n", 77 printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -107,10 +101,8 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
107 101
108ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) 102ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
109{ 103{
110 struct request_pm_state *pm = rq->data; 104 struct request_pm_state *pm = rq->special;
111 struct ide_cmd *cmd = rq->special; 105 struct ide_cmd cmd = { };
112
113 memset(cmd, 0, sizeof(*cmd));
114 106
115 switch (pm->pm_step) { 107 switch (pm->pm_step) {
116 case IDE_PM_FLUSH_CACHE: /* Suspend step 1 (flush cache) */ 108 case IDE_PM_FLUSH_CACHE: /* Suspend step 1 (flush cache) */
@@ -123,12 +115,12 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
123 return ide_stopped; 115 return ide_stopped;
124 } 116 }
125 if (ata_id_flush_ext_enabled(drive->id)) 117 if (ata_id_flush_ext_enabled(drive->id))
126 cmd->tf.command = ATA_CMD_FLUSH_EXT; 118 cmd.tf.command = ATA_CMD_FLUSH_EXT;
127 else 119 else
128 cmd->tf.command = ATA_CMD_FLUSH; 120 cmd.tf.command = ATA_CMD_FLUSH;
129 goto out_do_tf; 121 goto out_do_tf;
130 case IDE_PM_STANDBY: /* Suspend step 2 (standby) */ 122 case IDE_PM_STANDBY: /* Suspend step 2 (standby) */
131 cmd->tf.command = ATA_CMD_STANDBYNOW1; 123 cmd.tf.command = ATA_CMD_STANDBYNOW1;
132 goto out_do_tf; 124 goto out_do_tf;
133 case IDE_PM_RESTORE_PIO: /* Resume step 1 (restore PIO) */ 125 case IDE_PM_RESTORE_PIO: /* Resume step 1 (restore PIO) */
134 ide_set_max_pio(drive); 126 ide_set_max_pio(drive);
@@ -141,7 +133,7 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
141 ide_complete_power_step(drive, rq); 133 ide_complete_power_step(drive, rq);
142 return ide_stopped; 134 return ide_stopped;
143 case IDE_PM_IDLE: /* Resume step 2 (idle) */ 135 case IDE_PM_IDLE: /* Resume step 2 (idle) */
144 cmd->tf.command = ATA_CMD_IDLEIMMEDIATE; 136 cmd.tf.command = ATA_CMD_IDLEIMMEDIATE;
145 goto out_do_tf; 137 goto out_do_tf;
146 case IDE_PM_RESTORE_DMA: /* Resume step 3 (restore DMA) */ 138 case IDE_PM_RESTORE_DMA: /* Resume step 3 (restore DMA) */
147 /* 139 /*
@@ -163,11 +155,11 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
163 return ide_stopped; 155 return ide_stopped;
164 156
165out_do_tf: 157out_do_tf:
166 cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 158 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
167 cmd->valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; 159 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
168 cmd->protocol = ATA_PROT_NODATA; 160 cmd.protocol = ATA_PROT_NODATA;
169 161
170 return do_rw_taskfile(drive, cmd); 162 return do_rw_taskfile(drive, &cmd);
171} 163}
172 164
173/** 165/**
@@ -181,7 +173,7 @@ out_do_tf:
181void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) 173void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
182{ 174{
183 struct request_queue *q = drive->queue; 175 struct request_queue *q = drive->queue;
184 struct request_pm_state *pm = rq->data; 176 struct request_pm_state *pm = rq->special;
185 unsigned long flags; 177 unsigned long flags;
186 178
187 ide_complete_power_step(drive, rq); 179 ide_complete_power_step(drive, rq);
@@ -207,7 +199,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
207 199
208void ide_check_pm_state(ide_drive_t *drive, struct request *rq) 200void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
209{ 201{
210 struct request_pm_state *pm = rq->data; 202 struct request_pm_state *pm = rq->special;
211 203
212 if (blk_pm_suspend_request(rq) && 204 if (blk_pm_suspend_request(rq) &&
213 pm->pm_step == IDE_PM_START_SUSPEND) 205 pm->pm_step == IDE_PM_START_SUSPEND)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 3a53e0834cf7..203bbeac182f 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -131,13 +131,6 @@ enum {
131 IDETAPE_DIR_WRITE = (1 << 2), 131 IDETAPE_DIR_WRITE = (1 << 2),
132}; 132};
133 133
134struct idetape_bh {
135 u32 b_size;
136 atomic_t b_count;
137 struct idetape_bh *b_reqnext;
138 char *b_data;
139};
140
141/* Tape door status */ 134/* Tape door status */
142#define DOOR_UNLOCKED 0 135#define DOOR_UNLOCKED 0
143#define DOOR_LOCKED 1 136#define DOOR_LOCKED 1
@@ -219,18 +212,12 @@ typedef struct ide_tape_obj {
219 212
220 /* Data buffer size chosen based on the tape's recommendation */ 213 /* Data buffer size chosen based on the tape's recommendation */
221 int buffer_size; 214 int buffer_size;
222 /* merge buffer */ 215 /* Staging buffer of buffer_size bytes */
223 struct idetape_bh *merge_bh; 216 void *buf;
224 /* size of the merge buffer */ 217 /* The read/write cursor */
225 int merge_bh_size; 218 void *cur;
226 /* pointer to current buffer head within the merge buffer */ 219 /* The number of valid bytes in buf */
227 struct idetape_bh *bh; 220 size_t valid;
228 char *b_data;
229 int b_count;
230
231 int pages_per_buffer;
232 /* Wasted space in each stage */
233 int excess_bh_size;
234 221
235 /* Measures average tape speed */ 222 /* Measures average tape speed */
236 unsigned long avg_time; 223 unsigned long avg_time;
@@ -297,84 +284,6 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i)
297 return tape; 284 return tape;
298} 285}
299 286
300static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
301 unsigned int bcount)
302{
303 struct idetape_bh *bh = pc->bh;
304 int count;
305
306 while (bcount) {
307 if (bh == NULL)
308 break;
309 count = min(
310 (unsigned int)(bh->b_size - atomic_read(&bh->b_count)),
311 bcount);
312 drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data +
313 atomic_read(&bh->b_count), count);
314 bcount -= count;
315 atomic_add(count, &bh->b_count);
316 if (atomic_read(&bh->b_count) == bh->b_size) {
317 bh = bh->b_reqnext;
318 if (bh)
319 atomic_set(&bh->b_count, 0);
320 }
321 }
322
323 pc->bh = bh;
324
325 return bcount;
326}
327
328static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
329 unsigned int bcount)
330{
331 struct idetape_bh *bh = pc->bh;
332 int count;
333
334 while (bcount) {
335 if (bh == NULL)
336 break;
337 count = min((unsigned int)pc->b_count, (unsigned int)bcount);
338 drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count);
339 bcount -= count;
340 pc->b_data += count;
341 pc->b_count -= count;
342 if (!pc->b_count) {
343 bh = bh->b_reqnext;
344 pc->bh = bh;
345 if (bh) {
346 pc->b_data = bh->b_data;
347 pc->b_count = atomic_read(&bh->b_count);
348 }
349 }
350 }
351
352 return bcount;
353}
354
355static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
356{
357 struct idetape_bh *bh = pc->bh;
358 int count;
359 unsigned int bcount = pc->xferred;
360
361 if (pc->flags & PC_FLAG_WRITING)
362 return;
363 while (bcount) {
364 if (bh == NULL) {
365 printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
366 __func__);
367 return;
368 }
369 count = min((unsigned int)bh->b_size, (unsigned int)bcount);
370 atomic_set(&bh->b_count, count);
371 if (atomic_read(&bh->b_count) == bh->b_size)
372 bh = bh->b_reqnext;
373 bcount -= count;
374 }
375 pc->bh = bh;
376}
377
378/* 287/*
379 * called on each failed packet command retry to analyze the request sense. We 288 * called on each failed packet command retry to analyze the request sense. We
380 * currently do not utilize this information. 289 * currently do not utilize this information.
@@ -392,12 +301,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
392 pc->c[0], tape->sense_key, tape->asc, tape->ascq); 301 pc->c[0], tape->sense_key, tape->asc, tape->ascq);
393 302
394 /* Correct pc->xferred by asking the tape. */ 303 /* Correct pc->xferred by asking the tape. */
395 if (pc->flags & PC_FLAG_DMA_ERROR) { 304 if (pc->flags & PC_FLAG_DMA_ERROR)
396 pc->xferred = pc->req_xfer - 305 pc->xferred = pc->req_xfer -
397 tape->blk_size * 306 tape->blk_size *
398 get_unaligned_be32(&sense[3]); 307 get_unaligned_be32(&sense[3]);
399 idetape_update_buffers(drive, pc);
400 }
401 308
402 /* 309 /*
403 * If error was the result of a zero-length read or write command, 310 * If error was the result of a zero-length read or write command,
@@ -436,29 +343,6 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
436 } 343 }
437} 344}
438 345
439/* Free data buffers completely. */
440static void ide_tape_kfree_buffer(idetape_tape_t *tape)
441{
442 struct idetape_bh *prev_bh, *bh = tape->merge_bh;
443
444 while (bh) {
445 u32 size = bh->b_size;
446
447 while (size) {
448 unsigned int order = fls(size >> PAGE_SHIFT)-1;
449
450 if (bh->b_data)
451 free_pages((unsigned long)bh->b_data, order);
452
453 size &= (order-1);
454 bh->b_data += (1 << order) * PAGE_SIZE;
455 }
456 prev_bh = bh;
457 bh = bh->b_reqnext;
458 kfree(prev_bh);
459 }
460}
461
462static void ide_tape_handle_dsc(ide_drive_t *); 346static void ide_tape_handle_dsc(ide_drive_t *);
463 347
464static int ide_tape_callback(ide_drive_t *drive, int dsc) 348static int ide_tape_callback(ide_drive_t *drive, int dsc)
@@ -496,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
496 } 380 }
497 381
498 tape->first_frame += blocks; 382 tape->first_frame += blocks;
499 rq->current_nr_sectors -= blocks; 383 rq->data_len -= blocks * tape->blk_size;
500 384
501 if (pc->error) { 385 if (pc->error) {
502 uptodate = 0; 386 uptodate = 0;
@@ -558,19 +442,6 @@ static void ide_tape_handle_dsc(ide_drive_t *drive)
558 idetape_postpone_request(drive); 442 idetape_postpone_request(drive);
559} 443}
560 444
561static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
562 unsigned int bcount, int write)
563{
564 unsigned int bleft;
565
566 if (write)
567 bleft = idetape_output_buffers(drive, pc, bcount);
568 else
569 bleft = idetape_input_buffers(drive, pc, bcount);
570
571 return bcount - bleft;
572}
573
574/* 445/*
575 * Packet Command Interface 446 * Packet Command Interface
576 * 447 *
@@ -622,6 +493,8 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
622 493
623 if (pc->retries > IDETAPE_MAX_PC_RETRIES || 494 if (pc->retries > IDETAPE_MAX_PC_RETRIES ||
624 (pc->flags & PC_FLAG_ABORT)) { 495 (pc->flags & PC_FLAG_ABORT)) {
496 unsigned int done = blk_rq_bytes(drive->hwif->rq);
497
625 /* 498 /*
626 * We will "abort" retrying a packet command in case legitimate 499 * We will "abort" retrying a packet command in case legitimate
627 * error code was received (crossing a filemark, or end of the 500 * error code was received (crossing a filemark, or end of the
@@ -641,8 +514,10 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
641 /* Giving up */ 514 /* Giving up */
642 pc->error = IDE_DRV_ERROR_GENERAL; 515 pc->error = IDE_DRV_ERROR_GENERAL;
643 } 516 }
517
644 drive->failed_pc = NULL; 518 drive->failed_pc = NULL;
645 drive->pc_callback(drive, 0); 519 drive->pc_callback(drive, 0);
520 ide_complete_rq(drive, -EIO, done);
646 return ide_stopped; 521 return ide_stopped;
647 } 522 }
648 debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]); 523 debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
@@ -695,7 +570,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
695 printk(KERN_ERR "ide-tape: %s: I/O error, ", 570 printk(KERN_ERR "ide-tape: %s: I/O error, ",
696 tape->name); 571 tape->name);
697 /* Retry operation */ 572 /* Retry operation */
698 ide_retry_pc(drive, tape->disk); 573 ide_retry_pc(drive);
699 return ide_stopped; 574 return ide_stopped;
700 } 575 }
701 pc->error = 0; 576 pc->error = 0;
@@ -711,27 +586,22 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
711 struct ide_atapi_pc *pc, struct request *rq, 586 struct ide_atapi_pc *pc, struct request *rq,
712 u8 opcode) 587 u8 opcode)
713{ 588{
714 struct idetape_bh *bh = (struct idetape_bh *)rq->special; 589 unsigned int length = rq->nr_sectors;
715 unsigned int length = rq->current_nr_sectors;
716 590
717 ide_init_pc(pc); 591 ide_init_pc(pc);
718 put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); 592 put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
719 pc->c[1] = 1; 593 pc->c[1] = 1;
720 pc->bh = bh;
721 pc->buf = NULL; 594 pc->buf = NULL;
722 pc->buf_size = length * tape->blk_size; 595 pc->buf_size = length * tape->blk_size;
723 pc->req_xfer = pc->buf_size; 596 pc->req_xfer = pc->buf_size;
724 if (pc->req_xfer == tape->buffer_size) 597 if (pc->req_xfer == tape->buffer_size)
725 pc->flags |= PC_FLAG_DMA_OK; 598 pc->flags |= PC_FLAG_DMA_OK;
726 599
727 if (opcode == READ_6) { 600 if (opcode == READ_6)
728 pc->c[0] = READ_6; 601 pc->c[0] = READ_6;
729 atomic_set(&bh->b_count, 0); 602 else if (opcode == WRITE_6) {
730 } else if (opcode == WRITE_6) {
731 pc->c[0] = WRITE_6; 603 pc->c[0] = WRITE_6;
732 pc->flags |= PC_FLAG_WRITING; 604 pc->flags |= PC_FLAG_WRITING;
733 pc->b_data = bh->b_data;
734 pc->b_count = atomic_read(&bh->b_count);
735 } 605 }
736 606
737 memcpy(rq->cmd, pc->c, 12); 607 memcpy(rq->cmd, pc->c, 12);
@@ -747,12 +617,10 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
747 struct ide_cmd cmd; 617 struct ide_cmd cmd;
748 u8 stat; 618 u8 stat;
749 619
750 debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu," 620 debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n",
751 " current_nr_sectors: %u\n", 621 (unsigned long long)rq->sector, rq->nr_sectors);
752 (unsigned long long)rq->sector, rq->nr_sectors,
753 rq->current_nr_sectors);
754 622
755 if (!blk_special_request(rq)) { 623 if (!(blk_special_request(rq) || blk_sense_request(rq))) {
756 /* We do not support buffer cache originated requests. */ 624 /* We do not support buffer cache originated requests. */
757 printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " 625 printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
758 "request queue (%d)\n", drive->name, rq->cmd_type); 626 "request queue (%d)\n", drive->name, rq->cmd_type);
@@ -828,7 +696,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
828 goto out; 696 goto out;
829 } 697 }
830 if (rq->cmd[13] & REQ_IDETAPE_PC1) { 698 if (rq->cmd[13] & REQ_IDETAPE_PC1) {
831 pc = (struct ide_atapi_pc *) rq->buffer; 699 pc = (struct ide_atapi_pc *)rq->special;
832 rq->cmd[13] &= ~(REQ_IDETAPE_PC1); 700 rq->cmd[13] &= ~(REQ_IDETAPE_PC1);
833 rq->cmd[13] |= REQ_IDETAPE_PC2; 701 rq->cmd[13] |= REQ_IDETAPE_PC2;
834 goto out; 702 goto out;
@@ -840,6 +708,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
840 BUG(); 708 BUG();
841 709
842out: 710out:
711 /* prepare sense request for this command */
712 ide_prep_sense(drive, rq);
713
843 memset(&cmd, 0, sizeof(cmd)); 714 memset(&cmd, 0, sizeof(cmd));
844 715
845 if (rq_data_dir(rq)) 716 if (rq_data_dir(rq))
@@ -847,167 +718,10 @@ out:
847 718
848 cmd.rq = rq; 719 cmd.rq = rq;
849 720
850 return ide_tape_issue_pc(drive, &cmd, pc); 721 ide_init_sg_cmd(&cmd, pc->req_xfer);
851} 722 ide_map_sg(drive, &cmd);
852
853/*
854 * The function below uses __get_free_pages to allocate a data buffer of size
855 * tape->buffer_size (or a bit more). We attempt to combine sequential pages as
856 * much as possible.
857 *
858 * It returns a pointer to the newly allocated buffer, or NULL in case of
859 * failure.
860 */
861static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape,
862 int full, int clear)
863{
864 struct idetape_bh *prev_bh, *bh, *merge_bh;
865 int pages = tape->pages_per_buffer;
866 unsigned int order, b_allocd;
867 char *b_data = NULL;
868
869 merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
870 bh = merge_bh;
871 if (bh == NULL)
872 goto abort;
873
874 order = fls(pages) - 1;
875 bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order);
876 if (!bh->b_data)
877 goto abort;
878 b_allocd = (1 << order) * PAGE_SIZE;
879 pages &= (order-1);
880
881 if (clear)
882 memset(bh->b_data, 0, b_allocd);
883 bh->b_reqnext = NULL;
884 bh->b_size = b_allocd;
885 atomic_set(&bh->b_count, full ? bh->b_size : 0);
886
887 while (pages) {
888 order = fls(pages) - 1;
889 b_data = (char *) __get_free_pages(GFP_KERNEL, order);
890 if (!b_data)
891 goto abort;
892 b_allocd = (1 << order) * PAGE_SIZE;
893
894 if (clear)
895 memset(b_data, 0, b_allocd);
896
897 /* newly allocated page frames below buffer header or ...*/
898 if (bh->b_data == b_data + b_allocd) {
899 bh->b_size += b_allocd;
900 bh->b_data -= b_allocd;
901 if (full)
902 atomic_add(b_allocd, &bh->b_count);
903 continue;
904 }
905 /* they are above the header */
906 if (b_data == bh->b_data + bh->b_size) {
907 bh->b_size += b_allocd;
908 if (full)
909 atomic_add(b_allocd, &bh->b_count);
910 continue;
911 }
912 prev_bh = bh;
913 bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
914 if (!bh) {
915 free_pages((unsigned long) b_data, order);
916 goto abort;
917 }
918 bh->b_reqnext = NULL;
919 bh->b_data = b_data;
920 bh->b_size = b_allocd;
921 atomic_set(&bh->b_count, full ? bh->b_size : 0);
922 prev_bh->b_reqnext = bh;
923
924 pages &= (order-1);
925 }
926
927 bh->b_size -= tape->excess_bh_size;
928 if (full)
929 atomic_sub(tape->excess_bh_size, &bh->b_count);
930 return merge_bh;
931abort:
932 ide_tape_kfree_buffer(tape);
933 return NULL;
934}
935 723
936static int idetape_copy_stage_from_user(idetape_tape_t *tape, 724 return ide_tape_issue_pc(drive, &cmd, pc);
937 const char __user *buf, int n)
938{
939 struct idetape_bh *bh = tape->bh;
940 int count;
941 int ret = 0;
942
943 while (n) {
944 if (bh == NULL) {
945 printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
946 __func__);
947 return 1;
948 }
949 count = min((unsigned int)
950 (bh->b_size - atomic_read(&bh->b_count)),
951 (unsigned int)n);
952 if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf,
953 count))
954 ret = 1;
955 n -= count;
956 atomic_add(count, &bh->b_count);
957 buf += count;
958 if (atomic_read(&bh->b_count) == bh->b_size) {
959 bh = bh->b_reqnext;
960 if (bh)
961 atomic_set(&bh->b_count, 0);
962 }
963 }
964 tape->bh = bh;
965 return ret;
966}
967
968static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf,
969 int n)
970{
971 struct idetape_bh *bh = tape->bh;
972 int count;
973 int ret = 0;
974
975 while (n) {
976 if (bh == NULL) {
977 printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
978 __func__);
979 return 1;
980 }
981 count = min(tape->b_count, n);
982 if (copy_to_user(buf, tape->b_data, count))
983 ret = 1;
984 n -= count;
985 tape->b_data += count;
986 tape->b_count -= count;
987 buf += count;
988 if (!tape->b_count) {
989 bh = bh->b_reqnext;
990 tape->bh = bh;
991 if (bh) {
992 tape->b_data = bh->b_data;
993 tape->b_count = atomic_read(&bh->b_count);
994 }
995 }
996 }
997 return ret;
998}
999
1000static void idetape_init_merge_buffer(idetape_tape_t *tape)
1001{
1002 struct idetape_bh *bh = tape->merge_bh;
1003 tape->bh = tape->merge_bh;
1004
1005 if (tape->chrdev_dir == IDETAPE_DIR_WRITE)
1006 atomic_set(&bh->b_count, 0);
1007 else {
1008 tape->b_data = bh->b_data;
1009 tape->b_count = atomic_read(&bh->b_count);
1010 }
1011} 725}
1012 726
1013/* 727/*
@@ -1107,10 +821,10 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
1107 return; 821 return;
1108 822
1109 clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags); 823 clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags);
1110 tape->merge_bh_size = 0; 824 tape->valid = 0;
1111 if (tape->merge_bh != NULL) { 825 if (tape->buf != NULL) {
1112 ide_tape_kfree_buffer(tape); 826 kfree(tape->buf);
1113 tape->merge_bh = NULL; 827 tape->buf = NULL;
1114 } 828 }
1115 829
1116 tape->chrdev_dir = IDETAPE_DIR_NONE; 830 tape->chrdev_dir = IDETAPE_DIR_NONE;
@@ -1164,36 +878,44 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
1164 * Generate a read/write request for the block device interface and wait for it 878 * Generate a read/write request for the block device interface and wait for it
1165 * to be serviced. 879 * to be serviced.
1166 */ 880 */
1167static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, 881static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
1168 struct idetape_bh *bh)
1169{ 882{
1170 idetape_tape_t *tape = drive->driver_data; 883 idetape_tape_t *tape = drive->driver_data;
1171 struct request *rq; 884 struct request *rq;
1172 int ret, errors; 885 int ret;
1173 886
1174 debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd); 887 debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
888 BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
889 BUG_ON(size < 0 || size % tape->blk_size);
1175 890
1176 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 891 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
1177 rq->cmd_type = REQ_TYPE_SPECIAL; 892 rq->cmd_type = REQ_TYPE_SPECIAL;
1178 rq->cmd[13] = cmd; 893 rq->cmd[13] = cmd;
1179 rq->rq_disk = tape->disk; 894 rq->rq_disk = tape->disk;
1180 rq->special = (void *)bh;
1181 rq->sector = tape->first_frame; 895 rq->sector = tape->first_frame;
1182 rq->nr_sectors = blocks;
1183 rq->current_nr_sectors = blocks;
1184 blk_execute_rq(drive->queue, tape->disk, rq, 0);
1185 896
1186 errors = rq->errors; 897 if (size) {
1187 ret = tape->blk_size * (blocks - rq->current_nr_sectors); 898 ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
1188 blk_put_request(rq); 899 __GFP_WAIT);
900 if (ret)
901 goto out_put;
902 }
1189 903
1190 if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0) 904 blk_execute_rq(drive->queue, tape->disk, rq, 0);
1191 return 0;
1192 905
1193 if (tape->merge_bh) 906 /* calculate the number of transferred bytes and update buffer state */
1194 idetape_init_merge_buffer(tape); 907 size -= rq->data_len;
1195 if (errors == IDE_DRV_ERROR_GENERAL) 908 tape->cur = tape->buf;
1196 return -EIO; 909 if (cmd == REQ_IDETAPE_READ)
910 tape->valid = size;
911 else
912 tape->valid = 0;
913
914 ret = size;
915 if (rq->errors == IDE_DRV_ERROR_GENERAL)
916 ret = -EIO;
917out_put:
918 blk_put_request(rq);
1197 return ret; 919 return ret;
1198} 920}
1199 921
@@ -1230,153 +952,87 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
1230 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 952 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1231} 953}
1232 954
1233/* Queue up a character device originated write request. */
1234static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
1235{
1236 idetape_tape_t *tape = drive->driver_data;
1237
1238 debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
1239
1240 return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
1241 blocks, tape->merge_bh);
1242}
1243
1244static void ide_tape_flush_merge_buffer(ide_drive_t *drive) 955static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
1245{ 956{
1246 idetape_tape_t *tape = drive->driver_data; 957 idetape_tape_t *tape = drive->driver_data;
1247 int blocks, min;
1248 struct idetape_bh *bh;
1249 958
1250 if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { 959 if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
1251 printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer" 960 printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
1252 " but we are not writing.\n"); 961 " but we are not writing.\n");
1253 return; 962 return;
1254 } 963 }
1255 if (tape->merge_bh_size > tape->buffer_size) { 964 if (tape->buf) {
1256 printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n"); 965 size_t aligned = roundup(tape->valid, tape->blk_size);
1257 tape->merge_bh_size = tape->buffer_size; 966
1258 } 967 memset(tape->cur, 0, aligned - tape->valid);
1259 if (tape->merge_bh_size) { 968 idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned);
1260 blocks = tape->merge_bh_size / tape->blk_size; 969 kfree(tape->buf);
1261 if (tape->merge_bh_size % tape->blk_size) { 970 tape->buf = NULL;
1262 unsigned int i;
1263
1264 blocks++;
1265 i = tape->blk_size - tape->merge_bh_size %
1266 tape->blk_size;
1267 bh = tape->bh->b_reqnext;
1268 while (bh) {
1269 atomic_set(&bh->b_count, 0);
1270 bh = bh->b_reqnext;
1271 }
1272 bh = tape->bh;
1273 while (i) {
1274 if (bh == NULL) {
1275 printk(KERN_INFO "ide-tape: bug,"
1276 " bh NULL\n");
1277 break;
1278 }
1279 min = min(i, (unsigned int)(bh->b_size -
1280 atomic_read(&bh->b_count)));
1281 memset(bh->b_data + atomic_read(&bh->b_count),
1282 0, min);
1283 atomic_add(min, &bh->b_count);
1284 i -= min;
1285 bh = bh->b_reqnext;
1286 }
1287 }
1288 (void) idetape_add_chrdev_write_request(drive, blocks);
1289 tape->merge_bh_size = 0;
1290 }
1291 if (tape->merge_bh != NULL) {
1292 ide_tape_kfree_buffer(tape);
1293 tape->merge_bh = NULL;
1294 } 971 }
1295 tape->chrdev_dir = IDETAPE_DIR_NONE; 972 tape->chrdev_dir = IDETAPE_DIR_NONE;
1296} 973}
1297 974
1298static int idetape_init_read(ide_drive_t *drive) 975static int idetape_init_rw(ide_drive_t *drive, int dir)
1299{ 976{
1300 idetape_tape_t *tape = drive->driver_data; 977 idetape_tape_t *tape = drive->driver_data;
1301 int bytes_read; 978 int rc;
1302 979
1303 /* Initialize read operation */ 980 BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE);
1304 if (tape->chrdev_dir != IDETAPE_DIR_READ) {
1305 if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
1306 ide_tape_flush_merge_buffer(drive);
1307 idetape_flush_tape_buffers(drive);
1308 }
1309 if (tape->merge_bh || tape->merge_bh_size) {
1310 printk(KERN_ERR "ide-tape: merge_bh_size should be"
1311 " 0 now\n");
1312 tape->merge_bh_size = 0;
1313 }
1314 tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
1315 if (!tape->merge_bh)
1316 return -ENOMEM;
1317 tape->chrdev_dir = IDETAPE_DIR_READ;
1318 981
1319 /* 982 if (tape->chrdev_dir == dir)
1320 * Issue a read 0 command to ensure that DSC handshake is 983 return 0;
1321 * switched from completion mode to buffer available mode.
1322 * No point in issuing this if DSC overlap isn't supported, some
1323 * drives (Seagate STT3401A) will return an error.
1324 */
1325 if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
1326 bytes_read = idetape_queue_rw_tail(drive,
1327 REQ_IDETAPE_READ, 0,
1328 tape->merge_bh);
1329 if (bytes_read < 0) {
1330 ide_tape_kfree_buffer(tape);
1331 tape->merge_bh = NULL;
1332 tape->chrdev_dir = IDETAPE_DIR_NONE;
1333 return bytes_read;
1334 }
1335 }
1336 }
1337 984
1338 return 0; 985 if (tape->chrdev_dir == IDETAPE_DIR_READ)
1339} 986 ide_tape_discard_merge_buffer(drive, 1);
987 else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
988 ide_tape_flush_merge_buffer(drive);
989 idetape_flush_tape_buffers(drive);
990 }
1340 991
1341/* called from idetape_chrdev_read() to service a chrdev read request. */ 992 if (tape->buf || tape->valid) {
1342static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) 993 printk(KERN_ERR "ide-tape: valid should be 0 now\n");
1343{ 994 tape->valid = 0;
1344 idetape_tape_t *tape = drive->driver_data; 995 }
1345 996
1346 debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks); 997 tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
998 if (!tape->buf)
999 return -ENOMEM;
1000 tape->chrdev_dir = dir;
1001 tape->cur = tape->buf;
1347 1002
1348 /* If we are at a filemark, return a read length of 0 */ 1003 /*
1349 if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) 1004 * Issue a 0 rw command to ensure that DSC handshake is
1350 return 0; 1005 * switched from completion mode to buffer available mode. No
1351 1006 * point in issuing this if DSC overlap isn't supported, some
1352 idetape_init_read(drive); 1007 * drives (Seagate STT3401A) will return an error.
1008 */
1009 if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
1010 int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ
1011 : REQ_IDETAPE_WRITE;
1012
1013 rc = idetape_queue_rw_tail(drive, cmd, 0);
1014 if (rc < 0) {
1015 kfree(tape->buf);
1016 tape->buf = NULL;
1017 tape->chrdev_dir = IDETAPE_DIR_NONE;
1018 return rc;
1019 }
1020 }
1353 1021
1354 return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks, 1022 return 0;
1355 tape->merge_bh);
1356} 1023}
1357 1024
1358static void idetape_pad_zeros(ide_drive_t *drive, int bcount) 1025static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
1359{ 1026{
1360 idetape_tape_t *tape = drive->driver_data; 1027 idetape_tape_t *tape = drive->driver_data;
1361 struct idetape_bh *bh; 1028
1362 int blocks; 1029 memset(tape->buf, 0, tape->buffer_size);
1363 1030
1364 while (bcount) { 1031 while (bcount) {
1365 unsigned int count; 1032 unsigned int count = min(tape->buffer_size, bcount);
1366 1033
1367 bh = tape->merge_bh; 1034 idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count);
1368 count = min(tape->buffer_size, bcount);
1369 bcount -= count; 1035 bcount -= count;
1370 blocks = count / tape->blk_size;
1371 while (count) {
1372 atomic_set(&bh->b_count,
1373 min(count, (unsigned int)bh->b_size));
1374 memset(bh->b_data, 0, atomic_read(&bh->b_count));
1375 count -= atomic_read(&bh->b_count);
1376 bh = bh->b_reqnext;
1377 }
1378 idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks,
1379 tape->merge_bh);
1380 } 1036 }
1381} 1037}
1382 1038
@@ -1456,7 +1112,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
1456 } 1112 }
1457 1113
1458 if (tape->chrdev_dir == IDETAPE_DIR_READ) { 1114 if (tape->chrdev_dir == IDETAPE_DIR_READ) {
1459 tape->merge_bh_size = 0; 1115 tape->valid = 0;
1460 if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) 1116 if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
1461 ++count; 1117 ++count;
1462 ide_tape_discard_merge_buffer(drive, 0); 1118 ide_tape_discard_merge_buffer(drive, 0);
@@ -1505,9 +1161,9 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
1505{ 1161{
1506 struct ide_tape_obj *tape = file->private_data; 1162 struct ide_tape_obj *tape = file->private_data;
1507 ide_drive_t *drive = tape->drive; 1163 ide_drive_t *drive = tape->drive;
1508 ssize_t bytes_read, temp, actually_read = 0, rc; 1164 size_t done = 0;
1509 ssize_t ret = 0; 1165 ssize_t ret = 0;
1510 u16 ctl = *(u16 *)&tape->caps[12]; 1166 int rc;
1511 1167
1512 debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); 1168 debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
1513 1169
@@ -1517,49 +1173,43 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
1517 (count % tape->blk_size) == 0) 1173 (count % tape->blk_size) == 0)
1518 tape->user_bs_factor = count / tape->blk_size; 1174 tape->user_bs_factor = count / tape->blk_size;
1519 } 1175 }
1520 rc = idetape_init_read(drive); 1176
1177 rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
1521 if (rc < 0) 1178 if (rc < 0)
1522 return rc; 1179 return rc;
1523 if (count == 0) 1180
1524 return (0); 1181 while (done < count) {
1525 if (tape->merge_bh_size) { 1182 size_t todo;
1526 actually_read = min((unsigned int)(tape->merge_bh_size), 1183
1527 (unsigned int)count); 1184 /* refill if staging buffer is empty */
1528 if (idetape_copy_stage_to_user(tape, buf, actually_read)) 1185 if (!tape->valid) {
1529 ret = -EFAULT; 1186 /* If we are at a filemark, nothing more to read */
1530 buf += actually_read; 1187 if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
1531 tape->merge_bh_size -= actually_read; 1188 break;
1532 count -= actually_read; 1189 /* read */
1533 } 1190 if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
1534 while (count >= tape->buffer_size) { 1191 tape->buffer_size) <= 0)
1535 bytes_read = idetape_add_chrdev_read_request(drive, ctl); 1192 break;
1536 if (bytes_read <= 0) 1193 }
1537 goto finish; 1194
1538 if (idetape_copy_stage_to_user(tape, buf, bytes_read)) 1195 /* copy out */
1539 ret = -EFAULT; 1196 todo = min_t(size_t, count - done, tape->valid);
1540 buf += bytes_read; 1197 if (copy_to_user(buf + done, tape->cur, todo))
1541 count -= bytes_read;
1542 actually_read += bytes_read;
1543 }
1544 if (count) {
1545 bytes_read = idetape_add_chrdev_read_request(drive, ctl);
1546 if (bytes_read <= 0)
1547 goto finish;
1548 temp = min((unsigned long)count, (unsigned long)bytes_read);
1549 if (idetape_copy_stage_to_user(tape, buf, temp))
1550 ret = -EFAULT; 1198 ret = -EFAULT;
1551 actually_read += temp; 1199
1552 tape->merge_bh_size = bytes_read-temp; 1200 tape->cur += todo;
1201 tape->valid -= todo;
1202 done += todo;
1553 } 1203 }
1554finish: 1204
1555 if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { 1205 if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
1556 debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name); 1206 debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
1557 1207
1558 idetape_space_over_filemarks(drive, MTFSF, 1); 1208 idetape_space_over_filemarks(drive, MTFSF, 1);
1559 return 0; 1209 return 0;
1560 } 1210 }
1561 1211
1562 return ret ? ret : actually_read; 1212 return ret ? ret : done;
1563} 1213}
1564 1214
1565static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, 1215static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
@@ -1567,9 +1217,9 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
1567{ 1217{
1568 struct ide_tape_obj *tape = file->private_data; 1218 struct ide_tape_obj *tape = file->private_data;
1569 ide_drive_t *drive = tape->drive; 1219 ide_drive_t *drive = tape->drive;
1570 ssize_t actually_written = 0; 1220 size_t done = 0;
1571 ssize_t ret = 0; 1221 ssize_t ret = 0;
1572 u16 ctl = *(u16 *)&tape->caps[12]; 1222 int rc;
1573 1223
1574 /* The drive is write protected. */ 1224 /* The drive is write protected. */
1575 if (tape->write_prot) 1225 if (tape->write_prot)
@@ -1578,80 +1228,31 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
1578 debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); 1228 debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
1579 1229
1580 /* Initialize write operation */ 1230 /* Initialize write operation */
1581 if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { 1231 rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
1582 if (tape->chrdev_dir == IDETAPE_DIR_READ) 1232 if (rc < 0)
1583 ide_tape_discard_merge_buffer(drive, 1); 1233 return rc;
1584 if (tape->merge_bh || tape->merge_bh_size) {
1585 printk(KERN_ERR "ide-tape: merge_bh_size "
1586 "should be 0 now\n");
1587 tape->merge_bh_size = 0;
1588 }
1589 tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
1590 if (!tape->merge_bh)
1591 return -ENOMEM;
1592 tape->chrdev_dir = IDETAPE_DIR_WRITE;
1593 idetape_init_merge_buffer(tape);
1594 1234
1595 /* 1235 while (done < count) {
1596 * Issue a write 0 command to ensure that DSC handshake is 1236 size_t todo;
1597 * switched from completion mode to buffer available mode. No 1237
1598 * point in issuing this if DSC overlap isn't supported, some 1238 /* flush if staging buffer is full */
1599 * drives (Seagate STT3401A) will return an error. 1239 if (tape->valid == tape->buffer_size &&
1600 */ 1240 idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
1601 if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { 1241 tape->buffer_size) <= 0)
1602 ssize_t retval = idetape_queue_rw_tail(drive, 1242 return rc;
1603 REQ_IDETAPE_WRITE, 0, 1243
1604 tape->merge_bh); 1244 /* copy in */
1605 if (retval < 0) { 1245 todo = min_t(size_t, count - done,
1606 ide_tape_kfree_buffer(tape); 1246 tape->buffer_size - tape->valid);
1607 tape->merge_bh = NULL; 1247 if (copy_from_user(tape->cur, buf + done, todo))
1608 tape->chrdev_dir = IDETAPE_DIR_NONE;
1609 return retval;
1610 }
1611 }
1612 }
1613 if (count == 0)
1614 return (0);
1615 if (tape->merge_bh_size) {
1616 if (tape->merge_bh_size >= tape->buffer_size) {
1617 printk(KERN_ERR "ide-tape: bug: merge buf too big\n");
1618 tape->merge_bh_size = 0;
1619 }
1620 actually_written = min((unsigned int)
1621 (tape->buffer_size - tape->merge_bh_size),
1622 (unsigned int)count);
1623 if (idetape_copy_stage_from_user(tape, buf, actually_written))
1624 ret = -EFAULT;
1625 buf += actually_written;
1626 tape->merge_bh_size += actually_written;
1627 count -= actually_written;
1628
1629 if (tape->merge_bh_size == tape->buffer_size) {
1630 ssize_t retval;
1631 tape->merge_bh_size = 0;
1632 retval = idetape_add_chrdev_write_request(drive, ctl);
1633 if (retval <= 0)
1634 return (retval);
1635 }
1636 }
1637 while (count >= tape->buffer_size) {
1638 ssize_t retval;
1639 if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size))
1640 ret = -EFAULT;
1641 buf += tape->buffer_size;
1642 count -= tape->buffer_size;
1643 retval = idetape_add_chrdev_write_request(drive, ctl);
1644 actually_written += tape->buffer_size;
1645 if (retval <= 0)
1646 return (retval);
1647 }
1648 if (count) {
1649 actually_written += count;
1650 if (idetape_copy_stage_from_user(tape, buf, count))
1651 ret = -EFAULT; 1248 ret = -EFAULT;
1652 tape->merge_bh_size += count; 1249
1250 tape->cur += todo;
1251 tape->valid += todo;
1252 done += todo;
1653 } 1253 }
1654 return ret ? ret : actually_written; 1254
1255 return ret ? ret : done;
1655} 1256}
1656 1257
1657static int idetape_write_filemark(ide_drive_t *drive) 1258static int idetape_write_filemark(ide_drive_t *drive)
@@ -1812,7 +1413,7 @@ static int idetape_chrdev_ioctl(struct inode *inode, struct file *file,
1812 idetape_flush_tape_buffers(drive); 1413 idetape_flush_tape_buffers(drive);
1813 } 1414 }
1814 if (cmd == MTIOCGET || cmd == MTIOCPOS) { 1415 if (cmd == MTIOCGET || cmd == MTIOCPOS) {
1815 block_offset = tape->merge_bh_size / 1416 block_offset = tape->valid /
1816 (tape->blk_size * tape->user_bs_factor); 1417 (tape->blk_size * tape->user_bs_factor);
1817 position = idetape_read_position(drive); 1418 position = idetape_read_position(drive);
1818 if (position < 0) 1419 if (position < 0)
@@ -1960,12 +1561,12 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor)
1960 idetape_tape_t *tape = drive->driver_data; 1561 idetape_tape_t *tape = drive->driver_data;
1961 1562
1962 ide_tape_flush_merge_buffer(drive); 1563 ide_tape_flush_merge_buffer(drive);
1963 tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0); 1564 tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
1964 if (tape->merge_bh != NULL) { 1565 if (tape->buf != NULL) {
1965 idetape_pad_zeros(drive, tape->blk_size * 1566 idetape_pad_zeros(drive, tape->blk_size *
1966 (tape->user_bs_factor - 1)); 1567 (tape->user_bs_factor - 1));
1967 ide_tape_kfree_buffer(tape); 1568 kfree(tape->buf);
1968 tape->merge_bh = NULL; 1569 tape->buf = NULL;
1969 } 1570 }
1970 idetape_write_filemark(drive); 1571 idetape_write_filemark(drive);
1971 idetape_flush_tape_buffers(drive); 1572 idetape_flush_tape_buffers(drive);
@@ -2159,8 +1760,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
2159 u16 *ctl = (u16 *)&tape->caps[12]; 1760 u16 *ctl = (u16 *)&tape->caps[12];
2160 1761
2161 drive->pc_callback = ide_tape_callback; 1762 drive->pc_callback = ide_tape_callback;
2162 drive->pc_update_buffers = idetape_update_buffers;
2163 drive->pc_io_buffers = ide_tape_io_buffers;
2164 1763
2165 drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP; 1764 drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP;
2166 1765
@@ -2191,11 +1790,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
2191 tape->buffer_size = *ctl * tape->blk_size; 1790 tape->buffer_size = *ctl * tape->blk_size;
2192 } 1791 }
2193 buffer_size = tape->buffer_size; 1792 buffer_size = tape->buffer_size;
2194 tape->pages_per_buffer = buffer_size / PAGE_SIZE;
2195 if (buffer_size % PAGE_SIZE) {
2196 tape->pages_per_buffer++;
2197 tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE;
2198 }
2199 1793
2200 /* select the "best" DSC read/write polling freq */ 1794 /* select the "best" DSC read/write polling freq */
2201 speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]); 1795 speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]);
@@ -2238,7 +1832,7 @@ static void ide_tape_release(struct device *dev)
2238 ide_drive_t *drive = tape->drive; 1832 ide_drive_t *drive = tape->drive;
2239 struct gendisk *g = tape->disk; 1833 struct gendisk *g = tape->disk;
2240 1834
2241 BUG_ON(tape->merge_bh_size); 1835 BUG_ON(tape->valid);
2242 1836
2243 drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP; 1837 drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
2244 drive->driver_data = NULL; 1838 drive->driver_data = NULL;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 4aa6223c11be..f400eb4d4aff 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -424,7 +424,9 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
424 424
425 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 425 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
426 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 426 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
427 rq->buffer = buf; 427
428 if (cmd->tf_flags & IDE_TFLAG_WRITE)
429 rq->cmd_flags |= REQ_RW;
428 430
429 /* 431 /*
430 * (ks) We transfer currently only whole sectors. 432 * (ks) We transfer currently only whole sectors.
@@ -432,18 +434,20 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
432 * if we would find a solution to transfer any size. 434 * if we would find a solution to transfer any size.
433 * To support special commands like READ LONG. 435 * To support special commands like READ LONG.
434 */ 436 */
435 rq->hard_nr_sectors = rq->nr_sectors = nsect; 437 if (nsect) {
436 rq->hard_cur_sectors = rq->current_nr_sectors = nsect; 438 error = blk_rq_map_kern(drive->queue, rq, buf,
437 439 nsect * SECTOR_SIZE, __GFP_WAIT);
438 if (cmd->tf_flags & IDE_TFLAG_WRITE) 440 if (error)
439 rq->cmd_flags |= REQ_RW; 441 goto put_req;
442 }
440 443
441 rq->special = cmd; 444 rq->special = cmd;
442 cmd->rq = rq; 445 cmd->rq = rq;
443 446
444 error = blk_execute_rq(drive->queue, NULL, rq, 0); 447 error = blk_execute_rq(drive->queue, NULL, rq, 0);
445 blk_put_request(rq);
446 448
449put_req:
450 blk_put_request(rq);
447 return error; 451 return error;
448} 452}
449 453
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 4e63cc9e2778..151bf5bc8afe 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -1,5 +1,5 @@
1/* Low-level parallel-port routines for 8255-based PC-style hardware. 1/* Low-level parallel-port routines for 8255-based PC-style hardware.
2 * 2 *
3 * Authors: Phil Blundell <philb@gnu.org> 3 * Authors: Phil Blundell <philb@gnu.org>
4 * Tim Waugh <tim@cyberelk.demon.co.uk> 4 * Tim Waugh <tim@cyberelk.demon.co.uk>
5 * Jose Renau <renau@acm.org> 5 * Jose Renau <renau@acm.org>
@@ -11,7 +11,7 @@
11 * Cleaned up include files - Russell King <linux@arm.uk.linux.org> 11 * Cleaned up include files - Russell King <linux@arm.uk.linux.org>
12 * DMA support - Bert De Jonghe <bert@sophis.be> 12 * DMA support - Bert De Jonghe <bert@sophis.be>
13 * Many ECP bugs fixed. Fred Barnes & Jamie Lokier, 1999 13 * Many ECP bugs fixed. Fred Barnes & Jamie Lokier, 1999
14 * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. 14 * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G.
15 * Various hacks, Fred Barnes, 04/2001 15 * Various hacks, Fred Barnes, 04/2001
16 * Updated probing logic - Adam Belay <ambx1@neo.rr.com> 16 * Updated probing logic - Adam Belay <ambx1@neo.rr.com>
17 */ 17 */
@@ -56,10 +56,10 @@
56#include <linux/pnp.h> 56#include <linux/pnp.h>
57#include <linux/platform_device.h> 57#include <linux/platform_device.h>
58#include <linux/sysctl.h> 58#include <linux/sysctl.h>
59#include <linux/io.h>
60#include <linux/uaccess.h>
59 61
60#include <asm/io.h>
61#include <asm/dma.h> 62#include <asm/dma.h>
62#include <asm/uaccess.h>
63 63
64#include <linux/parport.h> 64#include <linux/parport.h>
65#include <linux/parport_pc.h> 65#include <linux/parport_pc.h>
@@ -82,7 +82,7 @@
82#define ECR_TST 06 82#define ECR_TST 06
83#define ECR_CNF 07 83#define ECR_CNF 07
84#define ECR_MODE_MASK 0xe0 84#define ECR_MODE_MASK 0xe0
85#define ECR_WRITE(p,v) frob_econtrol((p),0xff,(v)) 85#define ECR_WRITE(p, v) frob_econtrol((p), 0xff, (v))
86 86
87#undef DEBUG 87#undef DEBUG
88 88
@@ -109,27 +109,27 @@ static int pci_registered_parport;
109static int pnp_registered_parport; 109static int pnp_registered_parport;
110 110
111/* frob_control, but for ECR */ 111/* frob_control, but for ECR */
112static void frob_econtrol (struct parport *pb, unsigned char m, 112static void frob_econtrol(struct parport *pb, unsigned char m,
113 unsigned char v) 113 unsigned char v)
114{ 114{
115 unsigned char ectr = 0; 115 unsigned char ectr = 0;
116 116
117 if (m != 0xff) 117 if (m != 0xff)
118 ectr = inb (ECONTROL (pb)); 118 ectr = inb(ECONTROL(pb));
119 119
120 DPRINTK (KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n", 120 DPRINTK(KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
121 m, v, ectr, (ectr & ~m) ^ v); 121 m, v, ectr, (ectr & ~m) ^ v);
122 122
123 outb ((ectr & ~m) ^ v, ECONTROL (pb)); 123 outb((ectr & ~m) ^ v, ECONTROL(pb));
124} 124}
125 125
126static __inline__ void frob_set_mode (struct parport *p, int mode) 126static inline void frob_set_mode(struct parport *p, int mode)
127{ 127{
128 frob_econtrol (p, ECR_MODE_MASK, mode << 5); 128 frob_econtrol(p, ECR_MODE_MASK, mode << 5);
129} 129}
130 130
131#ifdef CONFIG_PARPORT_PC_FIFO 131#ifdef CONFIG_PARPORT_PC_FIFO
132/* Safely change the mode bits in the ECR 132/* Safely change the mode bits in the ECR
133 Returns: 133 Returns:
134 0 : Success 134 0 : Success
135 -EBUSY: Could not drain FIFO in some finite amount of time, 135 -EBUSY: Could not drain FIFO in some finite amount of time,
@@ -141,17 +141,18 @@ static int change_mode(struct parport *p, int m)
141 unsigned char oecr; 141 unsigned char oecr;
142 int mode; 142 int mode;
143 143
144 DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n",m); 144 DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n", m);
145 145
146 if (!priv->ecr) { 146 if (!priv->ecr) {
147 printk (KERN_DEBUG "change_mode: but there's no ECR!\n"); 147 printk(KERN_DEBUG "change_mode: but there's no ECR!\n");
148 return 0; 148 return 0;
149 } 149 }
150 150
151 /* Bits <7:5> contain the mode. */ 151 /* Bits <7:5> contain the mode. */
152 oecr = inb (ECONTROL (p)); 152 oecr = inb(ECONTROL(p));
153 mode = (oecr >> 5) & 0x7; 153 mode = (oecr >> 5) & 0x7;
154 if (mode == m) return 0; 154 if (mode == m)
155 return 0;
155 156
156 if (mode >= 2 && !(priv->ctr & 0x20)) { 157 if (mode >= 2 && !(priv->ctr & 0x20)) {
157 /* This mode resets the FIFO, so we may 158 /* This mode resets the FIFO, so we may
@@ -163,19 +164,21 @@ static int change_mode(struct parport *p, int m)
163 case ECR_ECP: /* ECP Parallel Port mode */ 164 case ECR_ECP: /* ECP Parallel Port mode */
164 /* Busy wait for 200us */ 165 /* Busy wait for 200us */
165 for (counter = 0; counter < 40; counter++) { 166 for (counter = 0; counter < 40; counter++) {
166 if (inb (ECONTROL (p)) & 0x01) 167 if (inb(ECONTROL(p)) & 0x01)
168 break;
169 if (signal_pending(current))
167 break; 170 break;
168 if (signal_pending (current)) break; 171 udelay(5);
169 udelay (5);
170 } 172 }
171 173
172 /* Poll slowly. */ 174 /* Poll slowly. */
173 while (!(inb (ECONTROL (p)) & 0x01)) { 175 while (!(inb(ECONTROL(p)) & 0x01)) {
174 if (time_after_eq (jiffies, expire)) 176 if (time_after_eq(jiffies, expire))
175 /* The FIFO is stuck. */ 177 /* The FIFO is stuck. */
176 return -EBUSY; 178 return -EBUSY;
177 schedule_timeout_interruptible(msecs_to_jiffies(10)); 179 schedule_timeout_interruptible(
178 if (signal_pending (current)) 180 msecs_to_jiffies(10));
181 if (signal_pending(current))
179 break; 182 break;
180 } 183 }
181 } 184 }
@@ -185,20 +188,20 @@ static int change_mode(struct parport *p, int m)
185 /* We have to go through mode 001 */ 188 /* We have to go through mode 001 */
186 oecr &= ~(7 << 5); 189 oecr &= ~(7 << 5);
187 oecr |= ECR_PS2 << 5; 190 oecr |= ECR_PS2 << 5;
188 ECR_WRITE (p, oecr); 191 ECR_WRITE(p, oecr);
189 } 192 }
190 193
191 /* Set the mode. */ 194 /* Set the mode. */
192 oecr &= ~(7 << 5); 195 oecr &= ~(7 << 5);
193 oecr |= m << 5; 196 oecr |= m << 5;
194 ECR_WRITE (p, oecr); 197 ECR_WRITE(p, oecr);
195 return 0; 198 return 0;
196} 199}
197 200
198#ifdef CONFIG_PARPORT_1284 201#ifdef CONFIG_PARPORT_1284
199/* Find FIFO lossage; FIFO is reset */ 202/* Find FIFO lossage; FIFO is reset */
200#if 0 203#if 0
201static int get_fifo_residue (struct parport *p) 204static int get_fifo_residue(struct parport *p)
202{ 205{
203 int residue; 206 int residue;
204 int cnfga; 207 int cnfga;
@@ -206,26 +209,26 @@ static int get_fifo_residue (struct parport *p)
206 209
207 /* Adjust for the contents of the FIFO. */ 210 /* Adjust for the contents of the FIFO. */
208 for (residue = priv->fifo_depth; ; residue--) { 211 for (residue = priv->fifo_depth; ; residue--) {
209 if (inb (ECONTROL (p)) & 0x2) 212 if (inb(ECONTROL(p)) & 0x2)
210 /* Full up. */ 213 /* Full up. */
211 break; 214 break;
212 215
213 outb (0, FIFO (p)); 216 outb(0, FIFO(p));
214 } 217 }
215 218
216 printk (KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name, 219 printk(KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
217 residue); 220 residue);
218 221
219 /* Reset the FIFO. */ 222 /* Reset the FIFO. */
220 frob_set_mode (p, ECR_PS2); 223 frob_set_mode(p, ECR_PS2);
221 224
222 /* Now change to config mode and clean up. FIXME */ 225 /* Now change to config mode and clean up. FIXME */
223 frob_set_mode (p, ECR_CNF); 226 frob_set_mode(p, ECR_CNF);
224 cnfga = inb (CONFIGA (p)); 227 cnfga = inb(CONFIGA(p));
225 printk (KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga); 228 printk(KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
226 229
227 if (!(cnfga & (1<<2))) { 230 if (!(cnfga & (1<<2))) {
228 printk (KERN_DEBUG "%s: Accounting for extra byte\n", p->name); 231 printk(KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
229 residue++; 232 residue++;
230 } 233 }
231 234
@@ -233,9 +236,11 @@ static int get_fifo_residue (struct parport *p)
233 * PWord != 1 byte. */ 236 * PWord != 1 byte. */
234 237
235 /* Back to PS2 mode. */ 238 /* Back to PS2 mode. */
236 frob_set_mode (p, ECR_PS2); 239 frob_set_mode(p, ECR_PS2);
237 240
238 DPRINTK (KERN_DEBUG "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", inb (ECONTROL (p))); 241 DPRINTK(KERN_DEBUG
242 "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n",
243 inb(ECONTROL(p)));
239 return residue; 244 return residue;
240} 245}
241#endif /* 0 */ 246#endif /* 0 */
@@ -257,8 +262,8 @@ static int clear_epp_timeout(struct parport *pb)
257 /* To clear timeout some chips require double read */ 262 /* To clear timeout some chips require double read */
258 parport_pc_read_status(pb); 263 parport_pc_read_status(pb);
259 r = parport_pc_read_status(pb); 264 r = parport_pc_read_status(pb);
260 outb (r | 0x01, STATUS (pb)); /* Some reset by writing 1 */ 265 outb(r | 0x01, STATUS(pb)); /* Some reset by writing 1 */
261 outb (r & 0xfe, STATUS (pb)); /* Others by writing 0 */ 266 outb(r & 0xfe, STATUS(pb)); /* Others by writing 0 */
262 r = parport_pc_read_status(pb); 267 r = parport_pc_read_status(pb);
263 268
264 return !(r & 0x01); 269 return !(r & 0x01);
@@ -272,7 +277,8 @@ static int clear_epp_timeout(struct parport *pb)
272 * of these are in parport_pc.h. 277 * of these are in parport_pc.h.
273 */ 278 */
274 279
275static void parport_pc_init_state(struct pardevice *dev, struct parport_state *s) 280static void parport_pc_init_state(struct pardevice *dev,
281 struct parport_state *s)
276{ 282{
277 s->u.pc.ctr = 0xc; 283 s->u.pc.ctr = 0xc;
278 if (dev->irq_func && 284 if (dev->irq_func &&
@@ -289,22 +295,23 @@ static void parport_pc_save_state(struct parport *p, struct parport_state *s)
289 const struct parport_pc_private *priv = p->physport->private_data; 295 const struct parport_pc_private *priv = p->physport->private_data;
290 s->u.pc.ctr = priv->ctr; 296 s->u.pc.ctr = priv->ctr;
291 if (priv->ecr) 297 if (priv->ecr)
292 s->u.pc.ecr = inb (ECONTROL (p)); 298 s->u.pc.ecr = inb(ECONTROL(p));
293} 299}
294 300
295static void parport_pc_restore_state(struct parport *p, struct parport_state *s) 301static void parport_pc_restore_state(struct parport *p,
302 struct parport_state *s)
296{ 303{
297 struct parport_pc_private *priv = p->physport->private_data; 304 struct parport_pc_private *priv = p->physport->private_data;
298 register unsigned char c = s->u.pc.ctr & priv->ctr_writable; 305 register unsigned char c = s->u.pc.ctr & priv->ctr_writable;
299 outb (c, CONTROL (p)); 306 outb(c, CONTROL(p));
300 priv->ctr = c; 307 priv->ctr = c;
301 if (priv->ecr) 308 if (priv->ecr)
302 ECR_WRITE (p, s->u.pc.ecr); 309 ECR_WRITE(p, s->u.pc.ecr);
303} 310}
304 311
305#ifdef CONFIG_PARPORT_1284 312#ifdef CONFIG_PARPORT_1284
306static size_t parport_pc_epp_read_data (struct parport *port, void *buf, 313static size_t parport_pc_epp_read_data(struct parport *port, void *buf,
307 size_t length, int flags) 314 size_t length, int flags)
308{ 315{
309 size_t got = 0; 316 size_t got = 0;
310 317
@@ -316,54 +323,52 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
316 * nFault is 0 if there is at least 1 byte in the Warp's FIFO 323 * nFault is 0 if there is at least 1 byte in the Warp's FIFO
317 * pError is 1 if there are 16 bytes in the Warp's FIFO 324 * pError is 1 if there are 16 bytes in the Warp's FIFO
318 */ 325 */
319 status = inb (STATUS (port)); 326 status = inb(STATUS(port));
320 327
321 while (!(status & 0x08) && (got < length)) { 328 while (!(status & 0x08) && got < length) {
322 if ((left >= 16) && (status & 0x20) && !(status & 0x08)) { 329 if (left >= 16 && (status & 0x20) && !(status & 0x08)) {
323 /* can grab 16 bytes from warp fifo */ 330 /* can grab 16 bytes from warp fifo */
324 if (!((long)buf & 0x03)) { 331 if (!((long)buf & 0x03))
325 insl (EPPDATA (port), buf, 4); 332 insl(EPPDATA(port), buf, 4);
326 } else { 333 else
327 insb (EPPDATA (port), buf, 16); 334 insb(EPPDATA(port), buf, 16);
328 }
329 buf += 16; 335 buf += 16;
330 got += 16; 336 got += 16;
331 left -= 16; 337 left -= 16;
332 } else { 338 } else {
333 /* grab single byte from the warp fifo */ 339 /* grab single byte from the warp fifo */
334 *((char *)buf) = inb (EPPDATA (port)); 340 *((char *)buf) = inb(EPPDATA(port));
335 buf++; 341 buf++;
336 got++; 342 got++;
337 left--; 343 left--;
338 } 344 }
339 status = inb (STATUS (port)); 345 status = inb(STATUS(port));
340 if (status & 0x01) { 346 if (status & 0x01) {
341 /* EPP timeout should never occur... */ 347 /* EPP timeout should never occur... */
342 printk (KERN_DEBUG "%s: EPP timeout occurred while talking to " 348 printk(KERN_DEBUG
343 "w91284pic (should not have done)\n", port->name); 349"%s: EPP timeout occurred while talking to w91284pic (should not have done)\n", port->name);
344 clear_epp_timeout (port); 350 clear_epp_timeout(port);
345 } 351 }
346 } 352 }
347 return got; 353 return got;
348 } 354 }
349 if ((flags & PARPORT_EPP_FAST) && (length > 1)) { 355 if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
350 if (!(((long)buf | length) & 0x03)) { 356 if (!(((long)buf | length) & 0x03))
351 insl (EPPDATA (port), buf, (length >> 2)); 357 insl(EPPDATA(port), buf, (length >> 2));
352 } else { 358 else
353 insb (EPPDATA (port), buf, length); 359 insb(EPPDATA(port), buf, length);
354 } 360 if (inb(STATUS(port)) & 0x01) {
355 if (inb (STATUS (port)) & 0x01) { 361 clear_epp_timeout(port);
356 clear_epp_timeout (port);
357 return -EIO; 362 return -EIO;
358 } 363 }
359 return length; 364 return length;
360 } 365 }
361 for (; got < length; got++) { 366 for (; got < length; got++) {
362 *((char*)buf) = inb (EPPDATA(port)); 367 *((char *)buf) = inb(EPPDATA(port));
363 buf++; 368 buf++;
364 if (inb (STATUS (port)) & 0x01) { 369 if (inb(STATUS(port)) & 0x01) {
365 /* EPP timeout */ 370 /* EPP timeout */
366 clear_epp_timeout (port); 371 clear_epp_timeout(port);
367 break; 372 break;
368 } 373 }
369 } 374 }
@@ -371,28 +376,27 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
371 return got; 376 return got;
372} 377}
373 378
374static size_t parport_pc_epp_write_data (struct parport *port, const void *buf, 379static size_t parport_pc_epp_write_data(struct parport *port, const void *buf,
375 size_t length, int flags) 380 size_t length, int flags)
376{ 381{
377 size_t written = 0; 382 size_t written = 0;
378 383
379 if ((flags & PARPORT_EPP_FAST) && (length > 1)) { 384 if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
380 if (!(((long)buf | length) & 0x03)) { 385 if (!(((long)buf | length) & 0x03))
381 outsl (EPPDATA (port), buf, (length >> 2)); 386 outsl(EPPDATA(port), buf, (length >> 2));
382 } else { 387 else
383 outsb (EPPDATA (port), buf, length); 388 outsb(EPPDATA(port), buf, length);
384 } 389 if (inb(STATUS(port)) & 0x01) {
385 if (inb (STATUS (port)) & 0x01) { 390 clear_epp_timeout(port);
386 clear_epp_timeout (port);
387 return -EIO; 391 return -EIO;
388 } 392 }
389 return length; 393 return length;
390 } 394 }
391 for (; written < length; written++) { 395 for (; written < length; written++) {
392 outb (*((char*)buf), EPPDATA(port)); 396 outb(*((char *)buf), EPPDATA(port));
393 buf++; 397 buf++;
394 if (inb (STATUS(port)) & 0x01) { 398 if (inb(STATUS(port)) & 0x01) {
395 clear_epp_timeout (port); 399 clear_epp_timeout(port);
396 break; 400 break;
397 } 401 }
398 } 402 }
@@ -400,24 +404,24 @@ static size_t parport_pc_epp_write_data (struct parport *port, const void *buf,
400 return written; 404 return written;
401} 405}
402 406
403static size_t parport_pc_epp_read_addr (struct parport *port, void *buf, 407static size_t parport_pc_epp_read_addr(struct parport *port, void *buf,
404 size_t length, int flags) 408 size_t length, int flags)
405{ 409{
406 size_t got = 0; 410 size_t got = 0;
407 411
408 if ((flags & PARPORT_EPP_FAST) && (length > 1)) { 412 if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
409 insb (EPPADDR (port), buf, length); 413 insb(EPPADDR(port), buf, length);
410 if (inb (STATUS (port)) & 0x01) { 414 if (inb(STATUS(port)) & 0x01) {
411 clear_epp_timeout (port); 415 clear_epp_timeout(port);
412 return -EIO; 416 return -EIO;
413 } 417 }
414 return length; 418 return length;
415 } 419 }
416 for (; got < length; got++) { 420 for (; got < length; got++) {
417 *((char*)buf) = inb (EPPADDR (port)); 421 *((char *)buf) = inb(EPPADDR(port));
418 buf++; 422 buf++;
419 if (inb (STATUS (port)) & 0x01) { 423 if (inb(STATUS(port)) & 0x01) {
420 clear_epp_timeout (port); 424 clear_epp_timeout(port);
421 break; 425 break;
422 } 426 }
423 } 427 }
@@ -425,25 +429,25 @@ static size_t parport_pc_epp_read_addr (struct parport *port, void *buf,
425 return got; 429 return got;
426} 430}
427 431
428static size_t parport_pc_epp_write_addr (struct parport *port, 432static size_t parport_pc_epp_write_addr(struct parport *port,
429 const void *buf, size_t length, 433 const void *buf, size_t length,
430 int flags) 434 int flags)
431{ 435{
432 size_t written = 0; 436 size_t written = 0;
433 437
434 if ((flags & PARPORT_EPP_FAST) && (length > 1)) { 438 if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
435 outsb (EPPADDR (port), buf, length); 439 outsb(EPPADDR(port), buf, length);
436 if (inb (STATUS (port)) & 0x01) { 440 if (inb(STATUS(port)) & 0x01) {
437 clear_epp_timeout (port); 441 clear_epp_timeout(port);
438 return -EIO; 442 return -EIO;
439 } 443 }
440 return length; 444 return length;
441 } 445 }
442 for (; written < length; written++) { 446 for (; written < length; written++) {
443 outb (*((char*)buf), EPPADDR (port)); 447 outb(*((char *)buf), EPPADDR(port));
444 buf++; 448 buf++;
445 if (inb (STATUS (port)) & 0x01) { 449 if (inb(STATUS(port)) & 0x01) {
446 clear_epp_timeout (port); 450 clear_epp_timeout(port);
447 break; 451 break;
448 } 452 }
449 } 453 }
@@ -451,74 +455,74 @@ static size_t parport_pc_epp_write_addr (struct parport *port,
451 return written; 455 return written;
452} 456}
453 457
454static size_t parport_pc_ecpepp_read_data (struct parport *port, void *buf, 458static size_t parport_pc_ecpepp_read_data(struct parport *port, void *buf,
455 size_t length, int flags) 459 size_t length, int flags)
456{ 460{
457 size_t got; 461 size_t got;
458 462
459 frob_set_mode (port, ECR_EPP); 463 frob_set_mode(port, ECR_EPP);
460 parport_pc_data_reverse (port); 464 parport_pc_data_reverse(port);
461 parport_pc_write_control (port, 0x4); 465 parport_pc_write_control(port, 0x4);
462 got = parport_pc_epp_read_data (port, buf, length, flags); 466 got = parport_pc_epp_read_data(port, buf, length, flags);
463 frob_set_mode (port, ECR_PS2); 467 frob_set_mode(port, ECR_PS2);
464 468
465 return got; 469 return got;
466} 470}
467 471
468static size_t parport_pc_ecpepp_write_data (struct parport *port, 472static size_t parport_pc_ecpepp_write_data(struct parport *port,
469 const void *buf, size_t length, 473 const void *buf, size_t length,
470 int flags) 474 int flags)
471{ 475{
472 size_t written; 476 size_t written;
473 477
474 frob_set_mode (port, ECR_EPP); 478 frob_set_mode(port, ECR_EPP);
475 parport_pc_write_control (port, 0x4); 479 parport_pc_write_control(port, 0x4);
476 parport_pc_data_forward (port); 480 parport_pc_data_forward(port);
477 written = parport_pc_epp_write_data (port, buf, length, flags); 481 written = parport_pc_epp_write_data(port, buf, length, flags);
478 frob_set_mode (port, ECR_PS2); 482 frob_set_mode(port, ECR_PS2);
479 483
480 return written; 484 return written;
481} 485}
482 486
483static size_t parport_pc_ecpepp_read_addr (struct parport *port, void *buf, 487static size_t parport_pc_ecpepp_read_addr(struct parport *port, void *buf,
484 size_t length, int flags) 488 size_t length, int flags)
485{ 489{
486 size_t got; 490 size_t got;
487 491
488 frob_set_mode (port, ECR_EPP); 492 frob_set_mode(port, ECR_EPP);
489 parport_pc_data_reverse (port); 493 parport_pc_data_reverse(port);
490 parport_pc_write_control (port, 0x4); 494 parport_pc_write_control(port, 0x4);
491 got = parport_pc_epp_read_addr (port, buf, length, flags); 495 got = parport_pc_epp_read_addr(port, buf, length, flags);
492 frob_set_mode (port, ECR_PS2); 496 frob_set_mode(port, ECR_PS2);
493 497
494 return got; 498 return got;
495} 499}
496 500
497static size_t parport_pc_ecpepp_write_addr (struct parport *port, 501static size_t parport_pc_ecpepp_write_addr(struct parport *port,
498 const void *buf, size_t length, 502 const void *buf, size_t length,
499 int flags) 503 int flags)
500{ 504{
501 size_t written; 505 size_t written;
502 506
503 frob_set_mode (port, ECR_EPP); 507 frob_set_mode(port, ECR_EPP);
504 parport_pc_write_control (port, 0x4); 508 parport_pc_write_control(port, 0x4);
505 parport_pc_data_forward (port); 509 parport_pc_data_forward(port);
506 written = parport_pc_epp_write_addr (port, buf, length, flags); 510 written = parport_pc_epp_write_addr(port, buf, length, flags);
507 frob_set_mode (port, ECR_PS2); 511 frob_set_mode(port, ECR_PS2);
508 512
509 return written; 513 return written;
510} 514}
511#endif /* IEEE 1284 support */ 515#endif /* IEEE 1284 support */
512 516
513#ifdef CONFIG_PARPORT_PC_FIFO 517#ifdef CONFIG_PARPORT_PC_FIFO
514static size_t parport_pc_fifo_write_block_pio (struct parport *port, 518static size_t parport_pc_fifo_write_block_pio(struct parport *port,
515 const void *buf, size_t length) 519 const void *buf, size_t length)
516{ 520{
517 int ret = 0; 521 int ret = 0;
518 const unsigned char *bufp = buf; 522 const unsigned char *bufp = buf;
519 size_t left = length; 523 size_t left = length;
520 unsigned long expire = jiffies + port->physport->cad->timeout; 524 unsigned long expire = jiffies + port->physport->cad->timeout;
521 const int fifo = FIFO (port); 525 const int fifo = FIFO(port);
522 int poll_for = 8; /* 80 usecs */ 526 int poll_for = 8; /* 80 usecs */
523 const struct parport_pc_private *priv = port->physport->private_data; 527 const struct parport_pc_private *priv = port->physport->private_data;
524 const int fifo_depth = priv->fifo_depth; 528 const int fifo_depth = priv->fifo_depth;
@@ -526,25 +530,25 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
526 port = port->physport; 530 port = port->physport;
527 531
528 /* We don't want to be interrupted every character. */ 532 /* We don't want to be interrupted every character. */
529 parport_pc_disable_irq (port); 533 parport_pc_disable_irq(port);
530 /* set nErrIntrEn and serviceIntr */ 534 /* set nErrIntrEn and serviceIntr */
531 frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2)); 535 frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
532 536
533 /* Forward mode. */ 537 /* Forward mode. */
534 parport_pc_data_forward (port); /* Must be in PS2 mode */ 538 parport_pc_data_forward(port); /* Must be in PS2 mode */
535 539
536 while (left) { 540 while (left) {
537 unsigned char byte; 541 unsigned char byte;
538 unsigned char ecrval = inb (ECONTROL (port)); 542 unsigned char ecrval = inb(ECONTROL(port));
539 int i = 0; 543 int i = 0;
540 544
541 if (need_resched() && time_before (jiffies, expire)) 545 if (need_resched() && time_before(jiffies, expire))
542 /* Can't yield the port. */ 546 /* Can't yield the port. */
543 schedule (); 547 schedule();
544 548
545 /* Anyone else waiting for the port? */ 549 /* Anyone else waiting for the port? */
546 if (port->waithead) { 550 if (port->waithead) {
547 printk (KERN_DEBUG "Somebody wants the port\n"); 551 printk(KERN_DEBUG "Somebody wants the port\n");
548 break; 552 break;
549 } 553 }
550 554
@@ -552,21 +556,22 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
552 /* FIFO is full. Wait for interrupt. */ 556 /* FIFO is full. Wait for interrupt. */
553 557
554 /* Clear serviceIntr */ 558 /* Clear serviceIntr */
555 ECR_WRITE (port, ecrval & ~(1<<2)); 559 ECR_WRITE(port, ecrval & ~(1<<2));
556 false_alarm: 560false_alarm:
557 ret = parport_wait_event (port, HZ); 561 ret = parport_wait_event(port, HZ);
558 if (ret < 0) break; 562 if (ret < 0)
563 break;
559 ret = 0; 564 ret = 0;
560 if (!time_before (jiffies, expire)) { 565 if (!time_before(jiffies, expire)) {
561 /* Timed out. */ 566 /* Timed out. */
562 printk (KERN_DEBUG "FIFO write timed out\n"); 567 printk(KERN_DEBUG "FIFO write timed out\n");
563 break; 568 break;
564 } 569 }
565 ecrval = inb (ECONTROL (port)); 570 ecrval = inb(ECONTROL(port));
566 if (!(ecrval & (1<<2))) { 571 if (!(ecrval & (1<<2))) {
567 if (need_resched() && 572 if (need_resched() &&
568 time_before (jiffies, expire)) 573 time_before(jiffies, expire))
569 schedule (); 574 schedule();
570 575
571 goto false_alarm; 576 goto false_alarm;
572 } 577 }
@@ -577,38 +582,38 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
577 /* Can't fail now. */ 582 /* Can't fail now. */
578 expire = jiffies + port->cad->timeout; 583 expire = jiffies + port->cad->timeout;
579 584
580 poll: 585poll:
581 if (signal_pending (current)) 586 if (signal_pending(current))
582 break; 587 break;
583 588
584 if (ecrval & 0x01) { 589 if (ecrval & 0x01) {
585 /* FIFO is empty. Blast it full. */ 590 /* FIFO is empty. Blast it full. */
586 const int n = left < fifo_depth ? left : fifo_depth; 591 const int n = left < fifo_depth ? left : fifo_depth;
587 outsb (fifo, bufp, n); 592 outsb(fifo, bufp, n);
588 bufp += n; 593 bufp += n;
589 left -= n; 594 left -= n;
590 595
591 /* Adjust the poll time. */ 596 /* Adjust the poll time. */
592 if (i < (poll_for - 2)) poll_for--; 597 if (i < (poll_for - 2))
598 poll_for--;
593 continue; 599 continue;
594 } else if (i++ < poll_for) { 600 } else if (i++ < poll_for) {
595 udelay (10); 601 udelay(10);
596 ecrval = inb (ECONTROL (port)); 602 ecrval = inb(ECONTROL(port));
597 goto poll; 603 goto poll;
598 } 604 }
599 605
600 /* Half-full (call me an optimist) */ 606 /* Half-full(call me an optimist) */
601 byte = *bufp++; 607 byte = *bufp++;
602 outb (byte, fifo); 608 outb(byte, fifo);
603 left--; 609 left--;
604 } 610 }
605 611 dump_parport_state("leave fifo_write_block_pio", port);
606dump_parport_state ("leave fifo_write_block_pio", port);
607 return length - left; 612 return length - left;
608} 613}
609 614
610#ifdef HAS_DMA 615#ifdef HAS_DMA
611static size_t parport_pc_fifo_write_block_dma (struct parport *port, 616static size_t parport_pc_fifo_write_block_dma(struct parport *port,
612 const void *buf, size_t length) 617 const void *buf, size_t length)
613{ 618{
614 int ret = 0; 619 int ret = 0;
@@ -621,7 +626,7 @@ static size_t parport_pc_fifo_write_block_dma (struct parport *port,
621 unsigned long start = (unsigned long) buf; 626 unsigned long start = (unsigned long) buf;
622 unsigned long end = (unsigned long) buf + length - 1; 627 unsigned long end = (unsigned long) buf + length - 1;
623 628
624dump_parport_state ("enter fifo_write_block_dma", port); 629 dump_parport_state("enter fifo_write_block_dma", port);
625 if (end < MAX_DMA_ADDRESS) { 630 if (end < MAX_DMA_ADDRESS) {
626 /* If it would cross a 64k boundary, cap it at the end. */ 631 /* If it would cross a 64k boundary, cap it at the end. */
627 if ((start ^ end) & ~0xffffUL) 632 if ((start ^ end) & ~0xffffUL)
@@ -629,8 +634,9 @@ dump_parport_state ("enter fifo_write_block_dma", port);
629 634
630 dma_addr = dma_handle = dma_map_single(dev, (void *)buf, length, 635 dma_addr = dma_handle = dma_map_single(dev, (void *)buf, length,
631 DMA_TO_DEVICE); 636 DMA_TO_DEVICE);
632 } else { 637 } else {
633 /* above 16 MB we use a bounce buffer as ISA-DMA is not possible */ 638 /* above 16 MB we use a bounce buffer as ISA-DMA
639 is not possible */
634 maxlen = PAGE_SIZE; /* sizeof(priv->dma_buf) */ 640 maxlen = PAGE_SIZE; /* sizeof(priv->dma_buf) */
635 dma_addr = priv->dma_handle; 641 dma_addr = priv->dma_handle;
636 dma_handle = 0; 642 dma_handle = 0;
@@ -639,12 +645,12 @@ dump_parport_state ("enter fifo_write_block_dma", port);
639 port = port->physport; 645 port = port->physport;
640 646
641 /* We don't want to be interrupted every character. */ 647 /* We don't want to be interrupted every character. */
642 parport_pc_disable_irq (port); 648 parport_pc_disable_irq(port);
643 /* set nErrIntrEn and serviceIntr */ 649 /* set nErrIntrEn and serviceIntr */
644 frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2)); 650 frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
645 651
646 /* Forward mode. */ 652 /* Forward mode. */
647 parport_pc_data_forward (port); /* Must be in PS2 mode */ 653 parport_pc_data_forward(port); /* Must be in PS2 mode */
648 654
649 while (left) { 655 while (left) {
650 unsigned long expire = jiffies + port->physport->cad->timeout; 656 unsigned long expire = jiffies + port->physport->cad->timeout;
@@ -665,10 +671,10 @@ dump_parport_state ("enter fifo_write_block_dma", port);
665 set_dma_count(port->dma, count); 671 set_dma_count(port->dma, count);
666 672
667 /* Set DMA mode */ 673 /* Set DMA mode */
668 frob_econtrol (port, 1<<3, 1<<3); 674 frob_econtrol(port, 1<<3, 1<<3);
669 675
670 /* Clear serviceIntr */ 676 /* Clear serviceIntr */
671 frob_econtrol (port, 1<<2, 0); 677 frob_econtrol(port, 1<<2, 0);
672 678
673 enable_dma(port->dma); 679 enable_dma(port->dma);
674 release_dma_lock(dmaflag); 680 release_dma_lock(dmaflag);
@@ -676,20 +682,22 @@ dump_parport_state ("enter fifo_write_block_dma", port);
676 /* assume DMA will be successful */ 682 /* assume DMA will be successful */
677 left -= count; 683 left -= count;
678 buf += count; 684 buf += count;
679 if (dma_handle) dma_addr += count; 685 if (dma_handle)
686 dma_addr += count;
680 687
681 /* Wait for interrupt. */ 688 /* Wait for interrupt. */
682 false_alarm: 689false_alarm:
683 ret = parport_wait_event (port, HZ); 690 ret = parport_wait_event(port, HZ);
684 if (ret < 0) break; 691 if (ret < 0)
692 break;
685 ret = 0; 693 ret = 0;
686 if (!time_before (jiffies, expire)) { 694 if (!time_before(jiffies, expire)) {
687 /* Timed out. */ 695 /* Timed out. */
688 printk (KERN_DEBUG "DMA write timed out\n"); 696 printk(KERN_DEBUG "DMA write timed out\n");
689 break; 697 break;
690 } 698 }
691 /* Is serviceIntr set? */ 699 /* Is serviceIntr set? */
692 if (!(inb (ECONTROL (port)) & (1<<2))) { 700 if (!(inb(ECONTROL(port)) & (1<<2))) {
693 cond_resched(); 701 cond_resched();
694 702
695 goto false_alarm; 703 goto false_alarm;
@@ -705,14 +713,15 @@ dump_parport_state ("enter fifo_write_block_dma", port);
705 713
706 /* Anyone else waiting for the port? */ 714 /* Anyone else waiting for the port? */
707 if (port->waithead) { 715 if (port->waithead) {
708 printk (KERN_DEBUG "Somebody wants the port\n"); 716 printk(KERN_DEBUG "Somebody wants the port\n");
709 break; 717 break;
710 } 718 }
711 719
712 /* update for possible DMA residue ! */ 720 /* update for possible DMA residue ! */
713 buf -= count; 721 buf -= count;
714 left += count; 722 left += count;
715 if (dma_handle) dma_addr -= count; 723 if (dma_handle)
724 dma_addr -= count;
716 } 725 }
717 726
718 /* Maybe got here through break, so adjust for DMA residue! */ 727 /* Maybe got here through break, so adjust for DMA residue! */
@@ -723,12 +732,12 @@ dump_parport_state ("enter fifo_write_block_dma", port);
723 release_dma_lock(dmaflag); 732 release_dma_lock(dmaflag);
724 733
725 /* Turn off DMA mode */ 734 /* Turn off DMA mode */
726 frob_econtrol (port, 1<<3, 0); 735 frob_econtrol(port, 1<<3, 0);
727 736
728 if (dma_handle) 737 if (dma_handle)
729 dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE); 738 dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE);
730 739
731dump_parport_state ("leave fifo_write_block_dma", port); 740 dump_parport_state("leave fifo_write_block_dma", port);
732 return length - left; 741 return length - left;
733} 742}
734#endif 743#endif
@@ -738,13 +747,13 @@ static inline size_t parport_pc_fifo_write_block(struct parport *port,
738{ 747{
739#ifdef HAS_DMA 748#ifdef HAS_DMA
740 if (port->dma != PARPORT_DMA_NONE) 749 if (port->dma != PARPORT_DMA_NONE)
741 return parport_pc_fifo_write_block_dma (port, buf, length); 750 return parport_pc_fifo_write_block_dma(port, buf, length);
742#endif 751#endif
743 return parport_pc_fifo_write_block_pio (port, buf, length); 752 return parport_pc_fifo_write_block_pio(port, buf, length);
744} 753}
745 754
746/* Parallel Port FIFO mode (ECP chipsets) */ 755/* Parallel Port FIFO mode (ECP chipsets) */
747static size_t parport_pc_compat_write_block_pio (struct parport *port, 756static size_t parport_pc_compat_write_block_pio(struct parport *port,
748 const void *buf, size_t length, 757 const void *buf, size_t length,
749 int flags) 758 int flags)
750{ 759{
@@ -756,14 +765,16 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
756 /* Special case: a timeout of zero means we cannot call schedule(). 765 /* Special case: a timeout of zero means we cannot call schedule().
757 * Also if O_NONBLOCK is set then use the default implementation. */ 766 * Also if O_NONBLOCK is set then use the default implementation. */
758 if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) 767 if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
759 return parport_ieee1284_write_compat (port, buf, 768 return parport_ieee1284_write_compat(port, buf,
760 length, flags); 769 length, flags);
761 770
762 /* Set up parallel port FIFO mode.*/ 771 /* Set up parallel port FIFO mode.*/
763 parport_pc_data_forward (port); /* Must be in PS2 mode */ 772 parport_pc_data_forward(port); /* Must be in PS2 mode */
764 parport_pc_frob_control (port, PARPORT_CONTROL_STROBE, 0); 773 parport_pc_frob_control(port, PARPORT_CONTROL_STROBE, 0);
765 r = change_mode (port, ECR_PPF); /* Parallel port FIFO */ 774 r = change_mode(port, ECR_PPF); /* Parallel port FIFO */
766 if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", port->name); 775 if (r)
776 printk(KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n",
777 port->name);
767 778
768 port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA; 779 port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
769 780
@@ -775,40 +786,39 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
775 * the FIFO is empty, so allow 4 seconds for each position 786 * the FIFO is empty, so allow 4 seconds for each position
776 * in the fifo. 787 * in the fifo.
777 */ 788 */
778 expire = jiffies + (priv->fifo_depth * HZ * 4); 789 expire = jiffies + (priv->fifo_depth * HZ * 4);
779 do { 790 do {
780 /* Wait for the FIFO to empty */ 791 /* Wait for the FIFO to empty */
781 r = change_mode (port, ECR_PS2); 792 r = change_mode(port, ECR_PS2);
782 if (r != -EBUSY) { 793 if (r != -EBUSY)
783 break; 794 break;
784 } 795 } while (time_before(jiffies, expire));
785 } while (time_before (jiffies, expire));
786 if (r == -EBUSY) { 796 if (r == -EBUSY) {
787 797
788 printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name); 798 printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
789 799
790 /* Prevent further data transfer. */ 800 /* Prevent further data transfer. */
791 frob_set_mode (port, ECR_TST); 801 frob_set_mode(port, ECR_TST);
792 802
793 /* Adjust for the contents of the FIFO. */ 803 /* Adjust for the contents of the FIFO. */
794 for (written -= priv->fifo_depth; ; written++) { 804 for (written -= priv->fifo_depth; ; written++) {
795 if (inb (ECONTROL (port)) & 0x2) { 805 if (inb(ECONTROL(port)) & 0x2) {
796 /* Full up. */ 806 /* Full up. */
797 break; 807 break;
798 } 808 }
799 outb (0, FIFO (port)); 809 outb(0, FIFO(port));
800 } 810 }
801 811
802 /* Reset the FIFO and return to PS2 mode. */ 812 /* Reset the FIFO and return to PS2 mode. */
803 frob_set_mode (port, ECR_PS2); 813 frob_set_mode(port, ECR_PS2);
804 } 814 }
805 815
806 r = parport_wait_peripheral (port, 816 r = parport_wait_peripheral(port,
807 PARPORT_STATUS_BUSY, 817 PARPORT_STATUS_BUSY,
808 PARPORT_STATUS_BUSY); 818 PARPORT_STATUS_BUSY);
809 if (r) 819 if (r)
810 printk (KERN_DEBUG 820 printk(KERN_DEBUG
811 "%s: BUSY timeout (%d) in compat_write_block_pio\n", 821 "%s: BUSY timeout (%d) in compat_write_block_pio\n",
812 port->name, r); 822 port->name, r);
813 823
814 port->physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE; 824 port->physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
@@ -818,7 +828,7 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
818 828
819/* ECP */ 829/* ECP */
820#ifdef CONFIG_PARPORT_1284 830#ifdef CONFIG_PARPORT_1284
821static size_t parport_pc_ecp_write_block_pio (struct parport *port, 831static size_t parport_pc_ecp_write_block_pio(struct parport *port,
822 const void *buf, size_t length, 832 const void *buf, size_t length,
823 int flags) 833 int flags)
824{ 834{
@@ -830,36 +840,38 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
830 /* Special case: a timeout of zero means we cannot call schedule(). 840 /* Special case: a timeout of zero means we cannot call schedule().
831 * Also if O_NONBLOCK is set then use the default implementation. */ 841 * Also if O_NONBLOCK is set then use the default implementation. */
832 if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) 842 if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
833 return parport_ieee1284_ecp_write_data (port, buf, 843 return parport_ieee1284_ecp_write_data(port, buf,
834 length, flags); 844 length, flags);
835 845
836 /* Switch to forward mode if necessary. */ 846 /* Switch to forward mode if necessary. */
837 if (port->physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) { 847 if (port->physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) {
838 /* Event 47: Set nInit high. */ 848 /* Event 47: Set nInit high. */
839 parport_frob_control (port, 849 parport_frob_control(port,
840 PARPORT_CONTROL_INIT 850 PARPORT_CONTROL_INIT
841 | PARPORT_CONTROL_AUTOFD, 851 | PARPORT_CONTROL_AUTOFD,
842 PARPORT_CONTROL_INIT 852 PARPORT_CONTROL_INIT
843 | PARPORT_CONTROL_AUTOFD); 853 | PARPORT_CONTROL_AUTOFD);
844 854
845 /* Event 49: PError goes high. */ 855 /* Event 49: PError goes high. */
846 r = parport_wait_peripheral (port, 856 r = parport_wait_peripheral(port,
847 PARPORT_STATUS_PAPEROUT, 857 PARPORT_STATUS_PAPEROUT,
848 PARPORT_STATUS_PAPEROUT); 858 PARPORT_STATUS_PAPEROUT);
849 if (r) { 859 if (r) {
850 printk (KERN_DEBUG "%s: PError timeout (%d) " 860 printk(KERN_DEBUG "%s: PError timeout (%d) "
851 "in ecp_write_block_pio\n", port->name, r); 861 "in ecp_write_block_pio\n", port->name, r);
852 } 862 }
853 } 863 }
854 864
855 /* Set up ECP parallel port mode.*/ 865 /* Set up ECP parallel port mode.*/
856 parport_pc_data_forward (port); /* Must be in PS2 mode */ 866 parport_pc_data_forward(port); /* Must be in PS2 mode */
857 parport_pc_frob_control (port, 867 parport_pc_frob_control(port,
858 PARPORT_CONTROL_STROBE | 868 PARPORT_CONTROL_STROBE |
859 PARPORT_CONTROL_AUTOFD, 869 PARPORT_CONTROL_AUTOFD,
860 0); 870 0);
861 r = change_mode (port, ECR_ECP); /* ECP FIFO */ 871 r = change_mode(port, ECR_ECP); /* ECP FIFO */
862 if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name); 872 if (r)
873 printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
874 port->name);
863 port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA; 875 port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
864 876
865 /* Write the data to the FIFO. */ 877 /* Write the data to the FIFO. */
@@ -873,55 +885,54 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
873 expire = jiffies + (priv->fifo_depth * (HZ * 4)); 885 expire = jiffies + (priv->fifo_depth * (HZ * 4));
874 do { 886 do {
875 /* Wait for the FIFO to empty */ 887 /* Wait for the FIFO to empty */
876 r = change_mode (port, ECR_PS2); 888 r = change_mode(port, ECR_PS2);
877 if (r != -EBUSY) { 889 if (r != -EBUSY)
878 break; 890 break;
879 } 891 } while (time_before(jiffies, expire));
880 } while (time_before (jiffies, expire));
881 if (r == -EBUSY) { 892 if (r == -EBUSY) {
882 893
883 printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name); 894 printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
884 895
885 /* Prevent further data transfer. */ 896 /* Prevent further data transfer. */
886 frob_set_mode (port, ECR_TST); 897 frob_set_mode(port, ECR_TST);
887 898
888 /* Adjust for the contents of the FIFO. */ 899 /* Adjust for the contents of the FIFO. */
889 for (written -= priv->fifo_depth; ; written++) { 900 for (written -= priv->fifo_depth; ; written++) {
890 if (inb (ECONTROL (port)) & 0x2) { 901 if (inb(ECONTROL(port)) & 0x2) {
891 /* Full up. */ 902 /* Full up. */
892 break; 903 break;
893 } 904 }
894 outb (0, FIFO (port)); 905 outb(0, FIFO(port));
895 } 906 }
896 907
897 /* Reset the FIFO and return to PS2 mode. */ 908 /* Reset the FIFO and return to PS2 mode. */
898 frob_set_mode (port, ECR_PS2); 909 frob_set_mode(port, ECR_PS2);
899 910
900 /* Host transfer recovery. */ 911 /* Host transfer recovery. */
901 parport_pc_data_reverse (port); /* Must be in PS2 mode */ 912 parport_pc_data_reverse(port); /* Must be in PS2 mode */
902 udelay (5); 913 udelay(5);
903 parport_frob_control (port, PARPORT_CONTROL_INIT, 0); 914 parport_frob_control(port, PARPORT_CONTROL_INIT, 0);
904 r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0); 915 r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
905 if (r) 916 if (r)
906 printk (KERN_DEBUG "%s: PE,1 timeout (%d) " 917 printk(KERN_DEBUG "%s: PE,1 timeout (%d) "
907 "in ecp_write_block_pio\n", port->name, r); 918 "in ecp_write_block_pio\n", port->name, r);
908 919
909 parport_frob_control (port, 920 parport_frob_control(port,
910 PARPORT_CONTROL_INIT, 921 PARPORT_CONTROL_INIT,
911 PARPORT_CONTROL_INIT); 922 PARPORT_CONTROL_INIT);
912 r = parport_wait_peripheral (port, 923 r = parport_wait_peripheral(port,
913 PARPORT_STATUS_PAPEROUT, 924 PARPORT_STATUS_PAPEROUT,
914 PARPORT_STATUS_PAPEROUT); 925 PARPORT_STATUS_PAPEROUT);
915 if (r) 926 if (r)
916 printk (KERN_DEBUG "%s: PE,2 timeout (%d) " 927 printk(KERN_DEBUG "%s: PE,2 timeout (%d) "
917 "in ecp_write_block_pio\n", port->name, r); 928 "in ecp_write_block_pio\n", port->name, r);
918 } 929 }
919 930
920 r = parport_wait_peripheral (port, 931 r = parport_wait_peripheral(port,
921 PARPORT_STATUS_BUSY, 932 PARPORT_STATUS_BUSY,
922 PARPORT_STATUS_BUSY); 933 PARPORT_STATUS_BUSY);
923 if(r) 934 if (r)
924 printk (KERN_DEBUG 935 printk(KERN_DEBUG
925 "%s: BUSY timeout (%d) in ecp_write_block_pio\n", 936 "%s: BUSY timeout (%d) in ecp_write_block_pio\n",
926 port->name, r); 937 port->name, r);
927 938
@@ -931,7 +942,7 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
931} 942}
932 943
933#if 0 944#if 0
934static size_t parport_pc_ecp_read_block_pio (struct parport *port, 945static size_t parport_pc_ecp_read_block_pio(struct parport *port,
935 void *buf, size_t length, 946 void *buf, size_t length,
936 int flags) 947 int flags)
937{ 948{
@@ -944,13 +955,13 @@ static size_t parport_pc_ecp_read_block_pio (struct parport *port,
944 char *bufp = buf; 955 char *bufp = buf;
945 956
946 port = port->physport; 957 port = port->physport;
947DPRINTK (KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n"); 958 DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
948dump_parport_state ("enter fcn", port); 959 dump_parport_state("enter fcn", port);
949 960
950 /* Special case: a timeout of zero means we cannot call schedule(). 961 /* Special case: a timeout of zero means we cannot call schedule().
951 * Also if O_NONBLOCK is set then use the default implementation. */ 962 * Also if O_NONBLOCK is set then use the default implementation. */
952 if (port->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) 963 if (port->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
953 return parport_ieee1284_ecp_read_data (port, buf, 964 return parport_ieee1284_ecp_read_data(port, buf,
954 length, flags); 965 length, flags);
955 966
956 if (port->ieee1284.mode == IEEE1284_MODE_ECPRLE) { 967 if (port->ieee1284.mode == IEEE1284_MODE_ECPRLE) {
@@ -966,173 +977,178 @@ dump_parport_state ("enter fcn", port);
966 * go through software emulation. Otherwise we may have to throw 977 * go through software emulation. Otherwise we may have to throw
967 * away data. */ 978 * away data. */
968 if (length < fifofull) 979 if (length < fifofull)
969 return parport_ieee1284_ecp_read_data (port, buf, 980 return parport_ieee1284_ecp_read_data(port, buf,
970 length, flags); 981 length, flags);
971 982
972 if (port->ieee1284.phase != IEEE1284_PH_REV_IDLE) { 983 if (port->ieee1284.phase != IEEE1284_PH_REV_IDLE) {
973 /* change to reverse-idle phase (must be in forward-idle) */ 984 /* change to reverse-idle phase (must be in forward-idle) */
974 985
975 /* Event 38: Set nAutoFd low (also make sure nStrobe is high) */ 986 /* Event 38: Set nAutoFd low (also make sure nStrobe is high) */
976 parport_frob_control (port, 987 parport_frob_control(port,
977 PARPORT_CONTROL_AUTOFD 988 PARPORT_CONTROL_AUTOFD
978 | PARPORT_CONTROL_STROBE, 989 | PARPORT_CONTROL_STROBE,
979 PARPORT_CONTROL_AUTOFD); 990 PARPORT_CONTROL_AUTOFD);
980 parport_pc_data_reverse (port); /* Must be in PS2 mode */ 991 parport_pc_data_reverse(port); /* Must be in PS2 mode */
981 udelay (5); 992 udelay(5);
982 /* Event 39: Set nInit low to initiate bus reversal */ 993 /* Event 39: Set nInit low to initiate bus reversal */
983 parport_frob_control (port, 994 parport_frob_control(port,
984 PARPORT_CONTROL_INIT, 995 PARPORT_CONTROL_INIT,
985 0); 996 0);
986 /* Event 40: Wait for nAckReverse (PError) to go low */ 997 /* Event 40: Wait for nAckReverse (PError) to go low */
987 r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0); 998 r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
988 if (r) { 999 if (r) {
989 printk (KERN_DEBUG "%s: PE timeout Event 40 (%d) " 1000 printk(KERN_DEBUG "%s: PE timeout Event 40 (%d) "
990 "in ecp_read_block_pio\n", port->name, r); 1001 "in ecp_read_block_pio\n", port->name, r);
991 return 0; 1002 return 0;
992 } 1003 }
993 } 1004 }
994 1005
995 /* Set up ECP FIFO mode.*/ 1006 /* Set up ECP FIFO mode.*/
996/* parport_pc_frob_control (port, 1007/* parport_pc_frob_control(port,
997 PARPORT_CONTROL_STROBE | 1008 PARPORT_CONTROL_STROBE |
998 PARPORT_CONTROL_AUTOFD, 1009 PARPORT_CONTROL_AUTOFD,
999 PARPORT_CONTROL_AUTOFD); */ 1010 PARPORT_CONTROL_AUTOFD); */
1000 r = change_mode (port, ECR_ECP); /* ECP FIFO */ 1011 r = change_mode(port, ECR_ECP); /* ECP FIFO */
1001 if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name); 1012 if (r)
1013 printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
1014 port->name);
1002 1015
1003 port->ieee1284.phase = IEEE1284_PH_REV_DATA; 1016 port->ieee1284.phase = IEEE1284_PH_REV_DATA;
1004 1017
1005 /* the first byte must be collected manually */ 1018 /* the first byte must be collected manually */
1006dump_parport_state ("pre 43", port); 1019 dump_parport_state("pre 43", port);
1007 /* Event 43: Wait for nAck to go low */ 1020 /* Event 43: Wait for nAck to go low */
1008 r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, 0); 1021 r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, 0);
1009 if (r) { 1022 if (r) {
1010 /* timed out while reading -- no data */ 1023 /* timed out while reading -- no data */
1011 printk (KERN_DEBUG "PIO read timed out (initial byte)\n"); 1024 printk(KERN_DEBUG "PIO read timed out (initial byte)\n");
1012 goto out_no_data; 1025 goto out_no_data;
1013 } 1026 }
1014 /* read byte */ 1027 /* read byte */
1015 *bufp++ = inb (DATA (port)); 1028 *bufp++ = inb(DATA(port));
1016 left--; 1029 left--;
1017dump_parport_state ("43-44", port); 1030 dump_parport_state("43-44", port);
1018 /* Event 44: nAutoFd (HostAck) goes high to acknowledge */ 1031 /* Event 44: nAutoFd (HostAck) goes high to acknowledge */
1019 parport_pc_frob_control (port, 1032 parport_pc_frob_control(port,
1020 PARPORT_CONTROL_AUTOFD, 1033 PARPORT_CONTROL_AUTOFD,
1021 0); 1034 0);
1022dump_parport_state ("pre 45", port); 1035 dump_parport_state("pre 45", port);
1023 /* Event 45: Wait for nAck to go high */ 1036 /* Event 45: Wait for nAck to go high */
1024/* r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, PARPORT_STATUS_ACK); */ 1037 /* r = parport_wait_peripheral(port, PARPORT_STATUS_ACK,
1025dump_parport_state ("post 45", port); 1038 PARPORT_STATUS_ACK); */
1026r = 0; 1039 dump_parport_state("post 45", port);
1040 r = 0;
1027 if (r) { 1041 if (r) {
1028 /* timed out while waiting for peripheral to respond to ack */ 1042 /* timed out while waiting for peripheral to respond to ack */
1029 printk (KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n"); 1043 printk(KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
1030 1044
1031 /* keep hold of the byte we've got already */ 1045 /* keep hold of the byte we've got already */
1032 goto out_no_data; 1046 goto out_no_data;
1033 } 1047 }
1034 /* Event 46: nAutoFd (HostAck) goes low to accept more data */ 1048 /* Event 46: nAutoFd (HostAck) goes low to accept more data */
1035 parport_pc_frob_control (port, 1049 parport_pc_frob_control(port,
1036 PARPORT_CONTROL_AUTOFD, 1050 PARPORT_CONTROL_AUTOFD,
1037 PARPORT_CONTROL_AUTOFD); 1051 PARPORT_CONTROL_AUTOFD);
1038 1052
1039 1053
1040dump_parport_state ("rev idle", port); 1054 dump_parport_state("rev idle", port);
1041 /* Do the transfer. */ 1055 /* Do the transfer. */
1042 while (left > fifofull) { 1056 while (left > fifofull) {
1043 int ret; 1057 int ret;
1044 unsigned long expire = jiffies + port->cad->timeout; 1058 unsigned long expire = jiffies + port->cad->timeout;
1045 unsigned char ecrval = inb (ECONTROL (port)); 1059 unsigned char ecrval = inb(ECONTROL(port));
1046 1060
1047 if (need_resched() && time_before (jiffies, expire)) 1061 if (need_resched() && time_before(jiffies, expire))
1048 /* Can't yield the port. */ 1062 /* Can't yield the port. */
1049 schedule (); 1063 schedule();
1050 1064
1051 /* At this point, the FIFO may already be full. In 1065 /* At this point, the FIFO may already be full. In
1052 * that case ECP is already holding back the 1066 * that case ECP is already holding back the
1053 * peripheral (assuming proper design) with a delayed 1067 * peripheral (assuming proper design) with a delayed
1054 * handshake. Work fast to avoid a peripheral 1068 * handshake. Work fast to avoid a peripheral
1055 * timeout. */ 1069 * timeout. */
1056 1070
1057 if (ecrval & 0x01) { 1071 if (ecrval & 0x01) {
1058 /* FIFO is empty. Wait for interrupt. */ 1072 /* FIFO is empty. Wait for interrupt. */
1059dump_parport_state ("FIFO empty", port); 1073 dump_parport_state("FIFO empty", port);
1060 1074
1061 /* Anyone else waiting for the port? */ 1075 /* Anyone else waiting for the port? */
1062 if (port->waithead) { 1076 if (port->waithead) {
1063 printk (KERN_DEBUG "Somebody wants the port\n"); 1077 printk(KERN_DEBUG "Somebody wants the port\n");
1064 break; 1078 break;
1065 } 1079 }
1066 1080
1067 /* Clear serviceIntr */ 1081 /* Clear serviceIntr */
1068 ECR_WRITE (port, ecrval & ~(1<<2)); 1082 ECR_WRITE(port, ecrval & ~(1<<2));
1069 false_alarm: 1083false_alarm:
1070dump_parport_state ("waiting", port); 1084 dump_parport_state("waiting", port);
1071 ret = parport_wait_event (port, HZ); 1085 ret = parport_wait_event(port, HZ);
1072DPRINTK (KERN_DEBUG "parport_wait_event returned %d\n", ret); 1086 DPRINTK(KERN_DEBUG "parport_wait_event returned %d\n",
1087 ret);
1073 if (ret < 0) 1088 if (ret < 0)
1074 break; 1089 break;
1075 ret = 0; 1090 ret = 0;
1076 if (!time_before (jiffies, expire)) { 1091 if (!time_before(jiffies, expire)) {
1077 /* Timed out. */ 1092 /* Timed out. */
1078dump_parport_state ("timeout", port); 1093 dump_parport_state("timeout", port);
1079 printk (KERN_DEBUG "PIO read timed out\n"); 1094 printk(KERN_DEBUG "PIO read timed out\n");
1080 break; 1095 break;
1081 } 1096 }
1082 ecrval = inb (ECONTROL (port)); 1097 ecrval = inb(ECONTROL(port));
1083 if (!(ecrval & (1<<2))) { 1098 if (!(ecrval & (1<<2))) {
1084 if (need_resched() && 1099 if (need_resched() &&
1085 time_before (jiffies, expire)) { 1100 time_before(jiffies, expire)) {
1086 schedule (); 1101 schedule();
1087 } 1102 }
1088 goto false_alarm; 1103 goto false_alarm;
1089 } 1104 }
1090 1105
1091 /* Depending on how the FIFO threshold was 1106 /* Depending on how the FIFO threshold was
1092 * set, how long interrupt service took, and 1107 * set, how long interrupt service took, and
1093 * how fast the peripheral is, we might be 1108 * how fast the peripheral is, we might be
1094 * lucky and have a just filled FIFO. */ 1109 * lucky and have a just filled FIFO. */
1095 continue; 1110 continue;
1096 } 1111 }
1097 1112
1098 if (ecrval & 0x02) { 1113 if (ecrval & 0x02) {
1099 /* FIFO is full. */ 1114 /* FIFO is full. */
1100dump_parport_state ("FIFO full", port); 1115 dump_parport_state("FIFO full", port);
1101 insb (fifo, bufp, fifo_depth); 1116 insb(fifo, bufp, fifo_depth);
1102 bufp += fifo_depth; 1117 bufp += fifo_depth;
1103 left -= fifo_depth; 1118 left -= fifo_depth;
1104 continue; 1119 continue;
1105 } 1120 }
1106 1121
1107DPRINTK (KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n"); 1122 DPRINTK(KERN_DEBUG
1123 "*** ecp_read_block_pio: reading one byte from the FIFO\n");
1108 1124
1109 /* FIFO not filled. We will cycle this loop for a while 1125 /* FIFO not filled. We will cycle this loop for a while
1110 * and either the peripheral will fill it faster, 1126 * and either the peripheral will fill it faster,
1111 * tripping a fast empty with insb, or we empty it. */ 1127 * tripping a fast empty with insb, or we empty it. */
1112 *bufp++ = inb (fifo); 1128 *bufp++ = inb(fifo);
1113 left--; 1129 left--;
1114 } 1130 }
1115 1131
1116 /* scoop up anything left in the FIFO */ 1132 /* scoop up anything left in the FIFO */
1117 while (left && !(inb (ECONTROL (port) & 0x01))) { 1133 while (left && !(inb(ECONTROL(port) & 0x01))) {
1118 *bufp++ = inb (fifo); 1134 *bufp++ = inb(fifo);
1119 left--; 1135 left--;
1120 } 1136 }
1121 1137
1122 port->ieee1284.phase = IEEE1284_PH_REV_IDLE; 1138 port->ieee1284.phase = IEEE1284_PH_REV_IDLE;
1123dump_parport_state ("rev idle2", port); 1139 dump_parport_state("rev idle2", port);
1124 1140
1125out_no_data: 1141out_no_data:
1126 1142
1127 /* Go to forward idle mode to shut the peripheral up (event 47). */ 1143 /* Go to forward idle mode to shut the peripheral up (event 47). */
1128 parport_frob_control (port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT); 1144 parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
1129 1145
1130 /* event 49: PError goes high */ 1146 /* event 49: PError goes high */
1131 r = parport_wait_peripheral (port, 1147 r = parport_wait_peripheral(port,
1132 PARPORT_STATUS_PAPEROUT, 1148 PARPORT_STATUS_PAPEROUT,
1133 PARPORT_STATUS_PAPEROUT); 1149 PARPORT_STATUS_PAPEROUT);
1134 if (r) { 1150 if (r) {
1135 printk (KERN_DEBUG 1151 printk(KERN_DEBUG
1136 "%s: PE timeout FWDIDLE (%d) in ecp_read_block_pio\n", 1152 "%s: PE timeout FWDIDLE (%d) in ecp_read_block_pio\n",
1137 port->name, r); 1153 port->name, r);
1138 } 1154 }
@@ -1141,14 +1157,14 @@ out_no_data:
1141 1157
1142 /* Finish up. */ 1158 /* Finish up. */
1143 { 1159 {
1144 int lost = get_fifo_residue (port); 1160 int lost = get_fifo_residue(port);
1145 if (lost) 1161 if (lost)
1146 /* Shouldn't happen with compliant peripherals. */ 1162 /* Shouldn't happen with compliant peripherals. */
1147 printk (KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n", 1163 printk(KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
1148 port->name, lost); 1164 port->name, lost);
1149 } 1165 }
1150 1166
1151dump_parport_state ("fwd idle", port); 1167 dump_parport_state("fwd idle", port);
1152 return length - left; 1168 return length - left;
1153} 1169}
1154#endif /* 0 */ 1170#endif /* 0 */
@@ -1164,8 +1180,7 @@ dump_parport_state ("fwd idle", port);
1164 1180
1165/* GCC is not inlining extern inline function later overwriten to non-inline, 1181/* GCC is not inlining extern inline function later overwriten to non-inline,
1166 so we use outlined_ variants here. */ 1182 so we use outlined_ variants here. */
1167static const struct parport_operations parport_pc_ops = 1183static const struct parport_operations parport_pc_ops = {
1168{
1169 .write_data = parport_pc_write_data, 1184 .write_data = parport_pc_write_data,
1170 .read_data = parport_pc_read_data, 1185 .read_data = parport_pc_read_data,
1171 1186
@@ -1202,88 +1217,107 @@ static const struct parport_operations parport_pc_ops =
1202}; 1217};
1203 1218
1204#ifdef CONFIG_PARPORT_PC_SUPERIO 1219#ifdef CONFIG_PARPORT_PC_SUPERIO
1220
1221static struct superio_struct *find_free_superio(void)
1222{
1223 int i;
1224 for (i = 0; i < NR_SUPERIOS; i++)
1225 if (superios[i].io == 0)
1226 return &superios[i];
1227 return NULL;
1228}
1229
1230
1205/* Super-IO chipset detection, Winbond, SMSC */ 1231/* Super-IO chipset detection, Winbond, SMSC */
1206static void __devinit show_parconfig_smsc37c669(int io, int key) 1232static void __devinit show_parconfig_smsc37c669(int io, int key)
1207{ 1233{
1208 int cr1,cr4,cra,cr23,cr26,cr27,i=0; 1234 int cr1, cr4, cra, cr23, cr26, cr27;
1209 static const char *const modes[]={ 1235 struct superio_struct *s;
1236
1237 static const char *const modes[] = {
1210 "SPP and Bidirectional (PS/2)", 1238 "SPP and Bidirectional (PS/2)",
1211 "EPP and SPP", 1239 "EPP and SPP",
1212 "ECP", 1240 "ECP",
1213 "ECP and EPP" }; 1241 "ECP and EPP" };
1214 1242
1215 outb(key,io); 1243 outb(key, io);
1216 outb(key,io); 1244 outb(key, io);
1217 outb(1,io); 1245 outb(1, io);
1218 cr1=inb(io+1); 1246 cr1 = inb(io + 1);
1219 outb(4,io); 1247 outb(4, io);
1220 cr4=inb(io+1); 1248 cr4 = inb(io + 1);
1221 outb(0x0a,io); 1249 outb(0x0a, io);
1222 cra=inb(io+1); 1250 cra = inb(io + 1);
1223 outb(0x23,io); 1251 outb(0x23, io);
1224 cr23=inb(io+1); 1252 cr23 = inb(io + 1);
1225 outb(0x26,io); 1253 outb(0x26, io);
1226 cr26=inb(io+1); 1254 cr26 = inb(io + 1);
1227 outb(0x27,io); 1255 outb(0x27, io);
1228 cr27=inb(io+1); 1256 cr27 = inb(io + 1);
1229 outb(0xaa,io); 1257 outb(0xaa, io);
1230 1258
1231 if (verbose_probing) { 1259 if (verbose_probing) {
1232 printk (KERN_INFO "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, " 1260 printk(KERN_INFO
1261 "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
1233 "A=0x%2x, 23=0x%02x, 26=0x%02x, 27=0x%02x\n", 1262 "A=0x%2x, 23=0x%02x, 26=0x%02x, 27=0x%02x\n",
1234 cr1,cr4,cra,cr23,cr26,cr27); 1263 cr1, cr4, cra, cr23, cr26, cr27);
1235 1264
1236 /* The documentation calls DMA and IRQ-Lines by letters, so 1265 /* The documentation calls DMA and IRQ-Lines by letters, so
1237 the board maker can/will wire them 1266 the board maker can/will wire them
1238 appropriately/randomly... G=reserved H=IDE-irq, */ 1267 appropriately/randomly... G=reserved H=IDE-irq, */
1239 printk (KERN_INFO "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, " 1268 printk(KERN_INFO
1240 "fifo threshold=%d\n", cr23*4, 1269 "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, fifo threshold=%d\n",
1241 (cr27 &0x0f) ? 'A'-1+(cr27 &0x0f): '-', 1270 cr23 * 4,
1242 (cr26 &0x0f) ? 'A'-1+(cr26 &0x0f): '-', cra & 0x0f); 1271 (cr27 & 0x0f) ? 'A' - 1 + (cr27 & 0x0f) : '-',
1272 (cr26 & 0x0f) ? 'A' - 1 + (cr26 & 0x0f) : '-',
1273 cra & 0x0f);
1243 printk(KERN_INFO "SMSC LPT Config: enabled=%s power=%s\n", 1274 printk(KERN_INFO "SMSC LPT Config: enabled=%s power=%s\n",
1244 (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no"); 1275 (cr23 * 4 >= 0x100) ? "yes" : "no",
1245 printk(KERN_INFO "SMSC LPT Config: Port mode=%s, EPP version =%s\n", 1276 (cr1 & 4) ? "yes" : "no");
1246 (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], 1277 printk(KERN_INFO
1247 (cr4 & 0x40) ? "1.7" : "1.9"); 1278 "SMSC LPT Config: Port mode=%s, EPP version =%s\n",
1279 (cr1 & 0x08) ? "Standard mode only (SPP)"
1280 : modes[cr4 & 0x03],
1281 (cr4 & 0x40) ? "1.7" : "1.9");
1248 } 1282 }
1249 1283
1250 /* Heuristics ! BIOS setup for this mainboard device limits 1284 /* Heuristics ! BIOS setup for this mainboard device limits
1251 the choices to standard settings, i.e. io-address and IRQ 1285 the choices to standard settings, i.e. io-address and IRQ
1252 are related, however DMA can be 1 or 3, assume DMA_A=DMA1, 1286 are related, however DMA can be 1 or 3, assume DMA_A=DMA1,
1253 DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */ 1287 DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */
1254 if(cr23*4 >=0x100) { /* if active */ 1288 if (cr23 * 4 >= 0x100) { /* if active */
1255 while((superios[i].io!= 0) && (i<NR_SUPERIOS)) 1289 s = find_free_superio();
1256 i++; 1290 if (s == NULL)
1257 if(i==NR_SUPERIOS)
1258 printk(KERN_INFO "Super-IO: too many chips!\n"); 1291 printk(KERN_INFO "Super-IO: too many chips!\n");
1259 else { 1292 else {
1260 int d; 1293 int d;
1261 switch (cr23*4) { 1294 switch (cr23 * 4) {
1262 case 0x3bc: 1295 case 0x3bc:
1263 superios[i].io = 0x3bc; 1296 s->io = 0x3bc;
1264 superios[i].irq = 7; 1297 s->irq = 7;
1265 break; 1298 break;
1266 case 0x378: 1299 case 0x378:
1267 superios[i].io = 0x378; 1300 s->io = 0x378;
1268 superios[i].irq = 7; 1301 s->irq = 7;
1269 break; 1302 break;
1270 case 0x278: 1303 case 0x278:
1271 superios[i].io = 0x278; 1304 s->io = 0x278;
1272 superios[i].irq = 5; 1305 s->irq = 5;
1273 } 1306 }
1274 d=(cr26 &0x0f); 1307 d = (cr26 & 0x0f);
1275 if((d==1) || (d==3)) 1308 if (d == 1 || d == 3)
1276 superios[i].dma= d; 1309 s->dma = d;
1277 else 1310 else
1278 superios[i].dma= PARPORT_DMA_NONE; 1311 s->dma = PARPORT_DMA_NONE;
1279 } 1312 }
1280 } 1313 }
1281} 1314}
1282 1315
1283 1316
1284static void __devinit show_parconfig_winbond(int io, int key) 1317static void __devinit show_parconfig_winbond(int io, int key)
1285{ 1318{
1286 int cr30,cr60,cr61,cr70,cr74,crf0,i=0; 1319 int cr30, cr60, cr61, cr70, cr74, crf0;
1320 struct superio_struct *s;
1287 static const char *const modes[] = { 1321 static const char *const modes[] = {
1288 "Standard (SPP) and Bidirectional(PS/2)", /* 0 */ 1322 "Standard (SPP) and Bidirectional(PS/2)", /* 0 */
1289 "EPP-1.9 and SPP", 1323 "EPP-1.9 and SPP",
@@ -1296,110 +1330,134 @@ static void __devinit show_parconfig_winbond(int io, int key)
1296 static char *const irqtypes[] = { 1330 static char *const irqtypes[] = {
1297 "pulsed low, high-Z", 1331 "pulsed low, high-Z",
1298 "follows nACK" }; 1332 "follows nACK" };
1299 1333
1300 /* The registers are called compatible-PnP because the 1334 /* The registers are called compatible-PnP because the
1301 register layout is modelled after ISA-PnP, the access 1335 register layout is modelled after ISA-PnP, the access
1302 method is just another ... */ 1336 method is just another ... */
1303 outb(key,io); 1337 outb(key, io);
1304 outb(key,io); 1338 outb(key, io);
1305 outb(0x07,io); /* Register 7: Select Logical Device */ 1339 outb(0x07, io); /* Register 7: Select Logical Device */
1306 outb(0x01,io+1); /* LD1 is Parallel Port */ 1340 outb(0x01, io + 1); /* LD1 is Parallel Port */
1307 outb(0x30,io); 1341 outb(0x30, io);
1308 cr30=inb(io+1); 1342 cr30 = inb(io + 1);
1309 outb(0x60,io); 1343 outb(0x60, io);
1310 cr60=inb(io+1); 1344 cr60 = inb(io + 1);
1311 outb(0x61,io); 1345 outb(0x61, io);
1312 cr61=inb(io+1); 1346 cr61 = inb(io + 1);
1313 outb(0x70,io); 1347 outb(0x70, io);
1314 cr70=inb(io+1); 1348 cr70 = inb(io + 1);
1315 outb(0x74,io); 1349 outb(0x74, io);
1316 cr74=inb(io+1); 1350 cr74 = inb(io + 1);
1317 outb(0xf0,io); 1351 outb(0xf0, io);
1318 crf0=inb(io+1); 1352 crf0 = inb(io + 1);
1319 outb(0xaa,io); 1353 outb(0xaa, io);
1320 1354
1321 if (verbose_probing) { 1355 if (verbose_probing) {
1322 printk(KERN_INFO "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x " 1356 printk(KERN_INFO
1323 "70=%02x 74=%02x, f0=%02x\n", cr30,cr60,cr61,cr70,cr74,crf0); 1357 "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x 70=%02x 74=%02x, f0=%02x\n",
1324 printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", 1358 cr30, cr60, cr61, cr70, cr74, crf0);
1325 (cr30 & 0x01) ? "yes":"no", cr60,cr61,cr70&0x0f ); 1359 printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ",
1360 (cr30 & 0x01) ? "yes" : "no", cr60, cr61, cr70 & 0x0f);
1326 if ((cr74 & 0x07) > 3) 1361 if ((cr74 & 0x07) > 3)
1327 printk("dma=none\n"); 1362 printk("dma=none\n");
1328 else 1363 else
1329 printk("dma=%d\n",cr74 & 0x07); 1364 printk("dma=%d\n", cr74 & 0x07);
1330 printk(KERN_INFO "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n", 1365 printk(KERN_INFO
1331 irqtypes[crf0>>7], (crf0>>3)&0x0f); 1366 "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
1332 printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", modes[crf0 & 0x07]); 1367 irqtypes[crf0>>7], (crf0>>3)&0x0f);
1368 printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n",
1369 modes[crf0 & 0x07]);
1333 } 1370 }
1334 1371
1335 if(cr30 & 0x01) { /* the settings can be interrogated later ... */ 1372 if (cr30 & 0x01) { /* the settings can be interrogated later ... */
1336 while((superios[i].io!= 0) && (i<NR_SUPERIOS)) 1373 s = find_free_superio();
1337 i++; 1374 if (s == NULL)
1338 if(i==NR_SUPERIOS)
1339 printk(KERN_INFO "Super-IO: too many chips!\n"); 1375 printk(KERN_INFO "Super-IO: too many chips!\n");
1340 else { 1376 else {
1341 superios[i].io = (cr60<<8)|cr61; 1377 s->io = (cr60 << 8) | cr61;
1342 superios[i].irq = cr70&0x0f; 1378 s->irq = cr70 & 0x0f;
1343 superios[i].dma = (((cr74 & 0x07) > 3) ? 1379 s->dma = (((cr74 & 0x07) > 3) ?
1344 PARPORT_DMA_NONE : (cr74 & 0x07)); 1380 PARPORT_DMA_NONE : (cr74 & 0x07));
1345 } 1381 }
1346 } 1382 }
1347} 1383}
1348 1384
1349static void __devinit decode_winbond(int efer, int key, int devid, int devrev, int oldid) 1385static void __devinit decode_winbond(int efer, int key, int devid,
1386 int devrev, int oldid)
1350{ 1387{
1351 const char *type = "unknown"; 1388 const char *type = "unknown";
1352 int id,progif=2; 1389 int id, progif = 2;
1353 1390
1354 if (devid == devrev) 1391 if (devid == devrev)
1355 /* simple heuristics, we happened to read some 1392 /* simple heuristics, we happened to read some
1356 non-winbond register */ 1393 non-winbond register */
1357 return; 1394 return;
1358 1395
1359 id=(devid<<8) | devrev; 1396 id = (devid << 8) | devrev;
1360 1397
1361 /* Values are from public data sheets pdf files, I can just 1398 /* Values are from public data sheets pdf files, I can just
1362 confirm 83977TF is correct :-) */ 1399 confirm 83977TF is correct :-) */
1363 if (id == 0x9771) type="83977F/AF"; 1400 if (id == 0x9771)
1364 else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x"; 1401 type = "83977F/AF";
1365 else if (id == 0x9774) type="83977ATF"; 1402 else if (id == 0x9773)
1366 else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x"; 1403 type = "83977TF / SMSC 97w33x/97w34x";
1367 else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x"; 1404 else if (id == 0x9774)
1368 else if ((id & ~0x0f) == 0x5210) type="83627"; 1405 type = "83977ATF";
1369 else if ((id & ~0x0f) == 0x6010) type="83697HF"; 1406 else if ((id & ~0x0f) == 0x5270)
1370 else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;} 1407 type = "83977CTF / SMSC 97w36x";
1371 else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;} 1408 else if ((id & ~0x0f) == 0x52f0)
1372 else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;} 1409 type = "83977EF / SMSC 97w35x";
1373 else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;} 1410 else if ((id & ~0x0f) == 0x5210)
1374 else progif=0; 1411 type = "83627";
1412 else if ((id & ~0x0f) == 0x6010)
1413 type = "83697HF";
1414 else if ((oldid & 0x0f) == 0x0a) {
1415 type = "83877F";
1416 progif = 1;
1417 } else if ((oldid & 0x0f) == 0x0b) {
1418 type = "83877AF";
1419 progif = 1;
1420 } else if ((oldid & 0x0f) == 0x0c) {
1421 type = "83877TF";
1422 progif = 1;
1423 } else if ((oldid & 0x0f) == 0x0d) {
1424 type = "83877ATF";
1425 progif = 1;
1426 } else
1427 progif = 0;
1375 1428
1376 if (verbose_probing) 1429 if (verbose_probing)
1377 printk(KERN_INFO "Winbond chip at EFER=0x%x key=0x%02x " 1430 printk(KERN_INFO "Winbond chip at EFER=0x%x key=0x%02x "
1378 "devid=%02x devrev=%02x oldid=%02x type=%s\n", 1431 "devid=%02x devrev=%02x oldid=%02x type=%s\n",
1379 efer, key, devid, devrev, oldid, type); 1432 efer, key, devid, devrev, oldid, type);
1380 1433
1381 if (progif == 2) 1434 if (progif == 2)
1382 show_parconfig_winbond(efer,key); 1435 show_parconfig_winbond(efer, key);
1383} 1436}
1384 1437
1385static void __devinit decode_smsc(int efer, int key, int devid, int devrev) 1438static void __devinit decode_smsc(int efer, int key, int devid, int devrev)
1386{ 1439{
1387 const char *type = "unknown"; 1440 const char *type = "unknown";
1388 void (*func)(int io, int key); 1441 void (*func)(int io, int key);
1389 int id; 1442 int id;
1390 1443
1391 if (devid == devrev) 1444 if (devid == devrev)
1392 /* simple heuristics, we happened to read some 1445 /* simple heuristics, we happened to read some
1393 non-smsc register */ 1446 non-smsc register */
1394 return; 1447 return;
1395 1448
1396 func=NULL; 1449 func = NULL;
1397 id=(devid<<8) | devrev; 1450 id = (devid << 8) | devrev;
1398 1451
1399 if (id==0x0302) {type="37c669"; func=show_parconfig_smsc37c669;} 1452 if (id == 0x0302) {
1400 else if (id==0x6582) type="37c665IR"; 1453 type = "37c669";
1401 else if (devid==0x65) type="37c665GT"; 1454 func = show_parconfig_smsc37c669;
1402 else if (devid==0x66) type="37c666GT"; 1455 } else if (id == 0x6582)
1456 type = "37c665IR";
1457 else if (devid == 0x65)
1458 type = "37c665GT";
1459 else if (devid == 0x66)
1460 type = "37c666GT";
1403 1461
1404 if (verbose_probing) 1462 if (verbose_probing)
1405 printk(KERN_INFO "SMSC chip at EFER=0x%x " 1463 printk(KERN_INFO "SMSC chip at EFER=0x%x "
@@ -1407,138 +1465,138 @@ static void __devinit decode_smsc(int efer, int key, int devid, int devrev)
1407 efer, key, devid, devrev, type); 1465 efer, key, devid, devrev, type);
1408 1466
1409 if (func) 1467 if (func)
1410 func(efer,key); 1468 func(efer, key);
1411} 1469}
1412 1470
1413 1471
1414static void __devinit winbond_check(int io, int key) 1472static void __devinit winbond_check(int io, int key)
1415{ 1473{
1416 int devid,devrev,oldid,x_devid,x_devrev,x_oldid; 1474 int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
1417 1475
1418 if (!request_region(io, 3, __func__)) 1476 if (!request_region(io, 3, __func__))
1419 return; 1477 return;
1420 1478
1421 /* First probe without key */ 1479 /* First probe without key */
1422 outb(0x20,io); 1480 outb(0x20, io);
1423 x_devid=inb(io+1); 1481 x_devid = inb(io + 1);
1424 outb(0x21,io); 1482 outb(0x21, io);
1425 x_devrev=inb(io+1); 1483 x_devrev = inb(io + 1);
1426 outb(0x09,io); 1484 outb(0x09, io);
1427 x_oldid=inb(io+1); 1485 x_oldid = inb(io + 1);
1428 1486
1429 outb(key,io); 1487 outb(key, io);
1430 outb(key,io); /* Write Magic Sequence to EFER, extended 1488 outb(key, io); /* Write Magic Sequence to EFER, extended
1431 funtion enable register */ 1489 funtion enable register */
1432 outb(0x20,io); /* Write EFIR, extended function index register */ 1490 outb(0x20, io); /* Write EFIR, extended function index register */
1433 devid=inb(io+1); /* Read EFDR, extended function data register */ 1491 devid = inb(io + 1); /* Read EFDR, extended function data register */
1434 outb(0x21,io); 1492 outb(0x21, io);
1435 devrev=inb(io+1); 1493 devrev = inb(io + 1);
1436 outb(0x09,io); 1494 outb(0x09, io);
1437 oldid=inb(io+1); 1495 oldid = inb(io + 1);
1438 outb(0xaa,io); /* Magic Seal */ 1496 outb(0xaa, io); /* Magic Seal */
1439 1497
1440 if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) 1498 if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
1441 goto out; /* protection against false positives */ 1499 goto out; /* protection against false positives */
1442 1500
1443 decode_winbond(io,key,devid,devrev,oldid); 1501 decode_winbond(io, key, devid, devrev, oldid);
1444out: 1502out:
1445 release_region(io, 3); 1503 release_region(io, 3);
1446} 1504}
1447 1505
1448static void __devinit winbond_check2(int io,int key) 1506static void __devinit winbond_check2(int io, int key)
1449{ 1507{
1450 int devid,devrev,oldid,x_devid,x_devrev,x_oldid; 1508 int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
1451 1509
1452 if (!request_region(io, 3, __func__)) 1510 if (!request_region(io, 3, __func__))
1453 return; 1511 return;
1454 1512
1455 /* First probe without the key */ 1513 /* First probe without the key */
1456 outb(0x20,io+2); 1514 outb(0x20, io + 2);
1457 x_devid=inb(io+2); 1515 x_devid = inb(io + 2);
1458 outb(0x21,io+1); 1516 outb(0x21, io + 1);
1459 x_devrev=inb(io+2); 1517 x_devrev = inb(io + 2);
1460 outb(0x09,io+1); 1518 outb(0x09, io + 1);
1461 x_oldid=inb(io+2); 1519 x_oldid = inb(io + 2);
1462 1520
1463 outb(key,io); /* Write Magic Byte to EFER, extended 1521 outb(key, io); /* Write Magic Byte to EFER, extended
1464 funtion enable register */ 1522 funtion enable register */
1465 outb(0x20,io+2); /* Write EFIR, extended function index register */ 1523 outb(0x20, io + 2); /* Write EFIR, extended function index register */
1466 devid=inb(io+2); /* Read EFDR, extended function data register */ 1524 devid = inb(io + 2); /* Read EFDR, extended function data register */
1467 outb(0x21,io+1); 1525 outb(0x21, io + 1);
1468 devrev=inb(io+2); 1526 devrev = inb(io + 2);
1469 outb(0x09,io+1); 1527 outb(0x09, io + 1);
1470 oldid=inb(io+2); 1528 oldid = inb(io + 2);
1471 outb(0xaa,io); /* Magic Seal */ 1529 outb(0xaa, io); /* Magic Seal */
1472 1530
1473 if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) 1531 if (x_devid == devid && x_devrev == devrev && x_oldid == oldid)
1474 goto out; /* protection against false positives */ 1532 goto out; /* protection against false positives */
1475 1533
1476 decode_winbond(io,key,devid,devrev,oldid); 1534 decode_winbond(io, key, devid, devrev, oldid);
1477out: 1535out:
1478 release_region(io, 3); 1536 release_region(io, 3);
1479} 1537}
1480 1538
1481static void __devinit smsc_check(int io, int key) 1539static void __devinit smsc_check(int io, int key)
1482{ 1540{
1483 int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev; 1541 int id, rev, oldid, oldrev, x_id, x_rev, x_oldid, x_oldrev;
1484 1542
1485 if (!request_region(io, 3, __func__)) 1543 if (!request_region(io, 3, __func__))
1486 return; 1544 return;
1487 1545
1488 /* First probe without the key */ 1546 /* First probe without the key */
1489 outb(0x0d,io); 1547 outb(0x0d, io);
1490 x_oldid=inb(io+1); 1548 x_oldid = inb(io + 1);
1491 outb(0x0e,io); 1549 outb(0x0e, io);
1492 x_oldrev=inb(io+1); 1550 x_oldrev = inb(io + 1);
1493 outb(0x20,io); 1551 outb(0x20, io);
1494 x_id=inb(io+1); 1552 x_id = inb(io + 1);
1495 outb(0x21,io); 1553 outb(0x21, io);
1496 x_rev=inb(io+1); 1554 x_rev = inb(io + 1);
1497 1555
1498 outb(key,io); 1556 outb(key, io);
1499 outb(key,io); /* Write Magic Sequence to EFER, extended 1557 outb(key, io); /* Write Magic Sequence to EFER, extended
1500 funtion enable register */ 1558 funtion enable register */
1501 outb(0x0d,io); /* Write EFIR, extended function index register */ 1559 outb(0x0d, io); /* Write EFIR, extended function index register */
1502 oldid=inb(io+1); /* Read EFDR, extended function data register */ 1560 oldid = inb(io + 1); /* Read EFDR, extended function data register */
1503 outb(0x0e,io); 1561 outb(0x0e, io);
1504 oldrev=inb(io+1); 1562 oldrev = inb(io + 1);
1505 outb(0x20,io); 1563 outb(0x20, io);
1506 id=inb(io+1); 1564 id = inb(io + 1);
1507 outb(0x21,io); 1565 outb(0x21, io);
1508 rev=inb(io+1); 1566 rev = inb(io + 1);
1509 outb(0xaa,io); /* Magic Seal */ 1567 outb(0xaa, io); /* Magic Seal */
1510 1568
1511 if ((x_id == id) && (x_oldrev == oldrev) && 1569 if (x_id == id && x_oldrev == oldrev &&
1512 (x_oldid == oldid) && (x_rev == rev)) 1570 x_oldid == oldid && x_rev == rev)
1513 goto out; /* protection against false positives */ 1571 goto out; /* protection against false positives */
1514 1572
1515 decode_smsc(io,key,oldid,oldrev); 1573 decode_smsc(io, key, oldid, oldrev);
1516out: 1574out:
1517 release_region(io, 3); 1575 release_region(io, 3);
1518} 1576}
1519 1577
1520 1578
1521static void __devinit detect_and_report_winbond (void) 1579static void __devinit detect_and_report_winbond(void)
1522{ 1580{
1523 if (verbose_probing) 1581 if (verbose_probing)
1524 printk(KERN_DEBUG "Winbond Super-IO detection, now testing ports 3F0,370,250,4E,2E ...\n"); 1582 printk(KERN_DEBUG "Winbond Super-IO detection, now testing ports 3F0,370,250,4E,2E ...\n");
1525 winbond_check(0x3f0,0x87); 1583 winbond_check(0x3f0, 0x87);
1526 winbond_check(0x370,0x87); 1584 winbond_check(0x370, 0x87);
1527 winbond_check(0x2e ,0x87); 1585 winbond_check(0x2e , 0x87);
1528 winbond_check(0x4e ,0x87); 1586 winbond_check(0x4e , 0x87);
1529 winbond_check(0x3f0,0x86); 1587 winbond_check(0x3f0, 0x86);
1530 winbond_check2(0x250,0x88); 1588 winbond_check2(0x250, 0x88);
1531 winbond_check2(0x250,0x89); 1589 winbond_check2(0x250, 0x89);
1532} 1590}
1533 1591
1534static void __devinit detect_and_report_smsc (void) 1592static void __devinit detect_and_report_smsc(void)
1535{ 1593{
1536 if (verbose_probing) 1594 if (verbose_probing)
1537 printk(KERN_DEBUG "SMSC Super-IO detection, now testing Ports 2F0, 370 ...\n"); 1595 printk(KERN_DEBUG "SMSC Super-IO detection, now testing Ports 2F0, 370 ...\n");
1538 smsc_check(0x3f0,0x55); 1596 smsc_check(0x3f0, 0x55);
1539 smsc_check(0x370,0x55); 1597 smsc_check(0x370, 0x55);
1540 smsc_check(0x3f0,0x44); 1598 smsc_check(0x3f0, 0x44);
1541 smsc_check(0x370,0x44); 1599 smsc_check(0x370, 0x44);
1542} 1600}
1543 1601
1544static void __devinit detect_and_report_it87(void) 1602static void __devinit detect_and_report_it87(void)
@@ -1573,34 +1631,39 @@ static void __devinit detect_and_report_it87(void)
1573} 1631}
1574#endif /* CONFIG_PARPORT_PC_SUPERIO */ 1632#endif /* CONFIG_PARPORT_PC_SUPERIO */
1575 1633
1576static int get_superio_dma (struct parport *p) 1634static struct superio_struct *find_superio(struct parport *p)
1577{ 1635{
1578 int i=0; 1636 int i;
1579 while( (superios[i].io != p->base) && (i<NR_SUPERIOS)) 1637 for (i = 0; i < NR_SUPERIOS; i++)
1580 i++; 1638 if (superios[i].io != p->base)
1581 if (i!=NR_SUPERIOS) 1639 return &superios[i];
1582 return superios[i].dma; 1640 return NULL;
1641}
1642
1643static int get_superio_dma(struct parport *p)
1644{
1645 struct superio_struct *s = find_superio(p);
1646 if (s)
1647 return s->dma;
1583 return PARPORT_DMA_NONE; 1648 return PARPORT_DMA_NONE;
1584} 1649}
1585 1650
1586static int get_superio_irq (struct parport *p) 1651static int get_superio_irq(struct parport *p)
1587{ 1652{
1588 int i=0; 1653 struct superio_struct *s = find_superio(p);
1589 while( (superios[i].io != p->base) && (i<NR_SUPERIOS)) 1654 if (s)
1590 i++; 1655 return s->irq;
1591 if (i!=NR_SUPERIOS) 1656 return PARPORT_IRQ_NONE;
1592 return superios[i].irq;
1593 return PARPORT_IRQ_NONE;
1594} 1657}
1595 1658
1596 1659
1597/* --- Mode detection ------------------------------------- */ 1660/* --- Mode detection ------------------------------------- */
1598 1661
1599/* 1662/*
1600 * Checks for port existence, all ports support SPP MODE 1663 * Checks for port existence, all ports support SPP MODE
1601 * Returns: 1664 * Returns:
1602 * 0 : No parallel port at this address 1665 * 0 : No parallel port at this address
1603 * PARPORT_MODE_PCSPP : SPP port detected 1666 * PARPORT_MODE_PCSPP : SPP port detected
1604 * (if the user specified an ioport himself, 1667 * (if the user specified an ioport himself,
1605 * this shall always be the case!) 1668 * this shall always be the case!)
1606 * 1669 *
@@ -1610,7 +1673,7 @@ static int parport_SPP_supported(struct parport *pb)
1610 unsigned char r, w; 1673 unsigned char r, w;
1611 1674
1612 /* 1675 /*
1613 * first clear an eventually pending EPP timeout 1676 * first clear an eventually pending EPP timeout
1614 * I (sailer@ife.ee.ethz.ch) have an SMSC chipset 1677 * I (sailer@ife.ee.ethz.ch) have an SMSC chipset
1615 * that does not even respond to SPP cycles if an EPP 1678 * that does not even respond to SPP cycles if an EPP
1616 * timeout is pending 1679 * timeout is pending
@@ -1619,19 +1682,19 @@ static int parport_SPP_supported(struct parport *pb)
1619 1682
1620 /* Do a simple read-write test to make sure the port exists. */ 1683 /* Do a simple read-write test to make sure the port exists. */
1621 w = 0xc; 1684 w = 0xc;
1622 outb (w, CONTROL (pb)); 1685 outb(w, CONTROL(pb));
1623 1686
1624 /* Is there a control register that we can read from? Some 1687 /* Is there a control register that we can read from? Some
1625 * ports don't allow reads, so read_control just returns a 1688 * ports don't allow reads, so read_control just returns a
1626 * software copy. Some ports _do_ allow reads, so bypass the 1689 * software copy. Some ports _do_ allow reads, so bypass the
1627 * software copy here. In addition, some bits aren't 1690 * software copy here. In addition, some bits aren't
1628 * writable. */ 1691 * writable. */
1629 r = inb (CONTROL (pb)); 1692 r = inb(CONTROL(pb));
1630 if ((r & 0xf) == w) { 1693 if ((r & 0xf) == w) {
1631 w = 0xe; 1694 w = 0xe;
1632 outb (w, CONTROL (pb)); 1695 outb(w, CONTROL(pb));
1633 r = inb (CONTROL (pb)); 1696 r = inb(CONTROL(pb));
1634 outb (0xc, CONTROL (pb)); 1697 outb(0xc, CONTROL(pb));
1635 if ((r & 0xf) == w) 1698 if ((r & 0xf) == w)
1636 return PARPORT_MODE_PCSPP; 1699 return PARPORT_MODE_PCSPP;
1637 } 1700 }
@@ -1639,18 +1702,18 @@ static int parport_SPP_supported(struct parport *pb)
1639 if (user_specified) 1702 if (user_specified)
1640 /* That didn't work, but the user thinks there's a 1703 /* That didn't work, but the user thinks there's a
1641 * port here. */ 1704 * port here. */
1642 printk (KERN_INFO "parport 0x%lx (WARNING): CTR: " 1705 printk(KERN_INFO "parport 0x%lx (WARNING): CTR: "
1643 "wrote 0x%02x, read 0x%02x\n", pb->base, w, r); 1706 "wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
1644 1707
1645 /* Try the data register. The data lines aren't tri-stated at 1708 /* Try the data register. The data lines aren't tri-stated at
1646 * this stage, so we expect back what we wrote. */ 1709 * this stage, so we expect back what we wrote. */
1647 w = 0xaa; 1710 w = 0xaa;
1648 parport_pc_write_data (pb, w); 1711 parport_pc_write_data(pb, w);
1649 r = parport_pc_read_data (pb); 1712 r = parport_pc_read_data(pb);
1650 if (r == w) { 1713 if (r == w) {
1651 w = 0x55; 1714 w = 0x55;
1652 parport_pc_write_data (pb, w); 1715 parport_pc_write_data(pb, w);
1653 r = parport_pc_read_data (pb); 1716 r = parport_pc_read_data(pb);
1654 if (r == w) 1717 if (r == w)
1655 return PARPORT_MODE_PCSPP; 1718 return PARPORT_MODE_PCSPP;
1656 } 1719 }
@@ -1658,9 +1721,9 @@ static int parport_SPP_supported(struct parport *pb)
1658 if (user_specified) { 1721 if (user_specified) {
1659 /* Didn't work, but the user is convinced this is the 1722 /* Didn't work, but the user is convinced this is the
1660 * place. */ 1723 * place. */
1661 printk (KERN_INFO "parport 0x%lx (WARNING): DATA: " 1724 printk(KERN_INFO "parport 0x%lx (WARNING): DATA: "
1662 "wrote 0x%02x, read 0x%02x\n", pb->base, w, r); 1725 "wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
1663 printk (KERN_INFO "parport 0x%lx: You gave this address, " 1726 printk(KERN_INFO "parport 0x%lx: You gave this address, "
1664 "but there is probably no parallel port there!\n", 1727 "but there is probably no parallel port there!\n",
1665 pb->base); 1728 pb->base);
1666 } 1729 }
@@ -1691,33 +1754,33 @@ static int parport_ECR_present(struct parport *pb)
1691 struct parport_pc_private *priv = pb->private_data; 1754 struct parport_pc_private *priv = pb->private_data;
1692 unsigned char r = 0xc; 1755 unsigned char r = 0xc;
1693 1756
1694 outb (r, CONTROL (pb)); 1757 outb(r, CONTROL(pb));
1695 if ((inb (ECONTROL (pb)) & 0x3) == (r & 0x3)) { 1758 if ((inb(ECONTROL(pb)) & 0x3) == (r & 0x3)) {
1696 outb (r ^ 0x2, CONTROL (pb)); /* Toggle bit 1 */ 1759 outb(r ^ 0x2, CONTROL(pb)); /* Toggle bit 1 */
1697 1760
1698 r = inb (CONTROL (pb)); 1761 r = inb(CONTROL(pb));
1699 if ((inb (ECONTROL (pb)) & 0x2) == (r & 0x2)) 1762 if ((inb(ECONTROL(pb)) & 0x2) == (r & 0x2))
1700 goto no_reg; /* Sure that no ECR register exists */ 1763 goto no_reg; /* Sure that no ECR register exists */
1701 } 1764 }
1702 1765
1703 if ((inb (ECONTROL (pb)) & 0x3 ) != 0x1) 1766 if ((inb(ECONTROL(pb)) & 0x3) != 0x1)
1704 goto no_reg; 1767 goto no_reg;
1705 1768
1706 ECR_WRITE (pb, 0x34); 1769 ECR_WRITE(pb, 0x34);
1707 if (inb (ECONTROL (pb)) != 0x35) 1770 if (inb(ECONTROL(pb)) != 0x35)
1708 goto no_reg; 1771 goto no_reg;
1709 1772
1710 priv->ecr = 1; 1773 priv->ecr = 1;
1711 outb (0xc, CONTROL (pb)); 1774 outb(0xc, CONTROL(pb));
1712 1775
1713 /* Go to mode 000 */ 1776 /* Go to mode 000 */
1714 frob_set_mode (pb, ECR_SPP); 1777 frob_set_mode(pb, ECR_SPP);
1715 1778
1716 return 1; 1779 return 1;
1717 1780
1718 no_reg: 1781 no_reg:
1719 outb (0xc, CONTROL (pb)); 1782 outb(0xc, CONTROL(pb));
1720 return 0; 1783 return 0;
1721} 1784}
1722 1785
1723#ifdef CONFIG_PARPORT_1284 1786#ifdef CONFIG_PARPORT_1284
@@ -1727,7 +1790,7 @@ static int parport_ECR_present(struct parport *pb)
1727 * allows us to read data from the data lines. In theory we would get back 1790 * allows us to read data from the data lines. In theory we would get back
1728 * 0xff but any peripheral attached to the port may drag some or all of the 1791 * 0xff but any peripheral attached to the port may drag some or all of the
1729 * lines down to zero. So if we get back anything that isn't the contents 1792 * lines down to zero. So if we get back anything that isn't the contents
1730 * of the data register we deem PS/2 support to be present. 1793 * of the data register we deem PS/2 support to be present.
1731 * 1794 *
1732 * Some SPP ports have "half PS/2" ability - you can't turn off the line 1795 * Some SPP ports have "half PS/2" ability - you can't turn off the line
1733 * drivers, but an external peripheral with sufficiently beefy drivers of 1796 * drivers, but an external peripheral with sufficiently beefy drivers of
@@ -1735,26 +1798,28 @@ static int parport_ECR_present(struct parport *pb)
1735 * where they can then be read back as normal. Ports with this property 1798 * where they can then be read back as normal. Ports with this property
1736 * and the right type of device attached are likely to fail the SPP test, 1799 * and the right type of device attached are likely to fail the SPP test,
1737 * (as they will appear to have stuck bits) and so the fact that they might 1800 * (as they will appear to have stuck bits) and so the fact that they might
1738 * be misdetected here is rather academic. 1801 * be misdetected here is rather academic.
1739 */ 1802 */
1740 1803
1741static int parport_PS2_supported(struct parport *pb) 1804static int parport_PS2_supported(struct parport *pb)
1742{ 1805{
1743 int ok = 0; 1806 int ok = 0;
1744 1807
1745 clear_epp_timeout(pb); 1808 clear_epp_timeout(pb);
1746 1809
1747 /* try to tri-state the buffer */ 1810 /* try to tri-state the buffer */
1748 parport_pc_data_reverse (pb); 1811 parport_pc_data_reverse(pb);
1749 1812
1750 parport_pc_write_data(pb, 0x55); 1813 parport_pc_write_data(pb, 0x55);
1751 if (parport_pc_read_data(pb) != 0x55) ok++; 1814 if (parport_pc_read_data(pb) != 0x55)
1815 ok++;
1752 1816
1753 parport_pc_write_data(pb, 0xaa); 1817 parport_pc_write_data(pb, 0xaa);
1754 if (parport_pc_read_data(pb) != 0xaa) ok++; 1818 if (parport_pc_read_data(pb) != 0xaa)
1819 ok++;
1755 1820
1756 /* cancel input mode */ 1821 /* cancel input mode */
1757 parport_pc_data_forward (pb); 1822 parport_pc_data_forward(pb);
1758 1823
1759 if (ok) { 1824 if (ok) {
1760 pb->modes |= PARPORT_MODE_TRISTATE; 1825 pb->modes |= PARPORT_MODE_TRISTATE;
@@ -1773,68 +1838,68 @@ static int parport_ECP_supported(struct parport *pb)
1773 int config, configb; 1838 int config, configb;
1774 int pword; 1839 int pword;
1775 struct parport_pc_private *priv = pb->private_data; 1840 struct parport_pc_private *priv = pb->private_data;
1776 /* Translate ECP intrLine to ISA irq value */ 1841 /* Translate ECP intrLine to ISA irq value */
1777 static const int intrline[]= { 0, 7, 9, 10, 11, 14, 15, 5 }; 1842 static const int intrline[] = { 0, 7, 9, 10, 11, 14, 15, 5 };
1778 1843
1779 /* If there is no ECR, we have no hope of supporting ECP. */ 1844 /* If there is no ECR, we have no hope of supporting ECP. */
1780 if (!priv->ecr) 1845 if (!priv->ecr)
1781 return 0; 1846 return 0;
1782 1847
1783 /* Find out FIFO depth */ 1848 /* Find out FIFO depth */
1784 ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ 1849 ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
1785 ECR_WRITE (pb, ECR_TST << 5); /* TEST FIFO */ 1850 ECR_WRITE(pb, ECR_TST << 5); /* TEST FIFO */
1786 for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02); i++) 1851 for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02); i++)
1787 outb (0xaa, FIFO (pb)); 1852 outb(0xaa, FIFO(pb));
1788 1853
1789 /* 1854 /*
1790 * Using LGS chipset it uses ECR register, but 1855 * Using LGS chipset it uses ECR register, but
1791 * it doesn't support ECP or FIFO MODE 1856 * it doesn't support ECP or FIFO MODE
1792 */ 1857 */
1793 if (i == 1024) { 1858 if (i == 1024) {
1794 ECR_WRITE (pb, ECR_SPP << 5); 1859 ECR_WRITE(pb, ECR_SPP << 5);
1795 return 0; 1860 return 0;
1796 } 1861 }
1797 1862
1798 priv->fifo_depth = i; 1863 priv->fifo_depth = i;
1799 if (verbose_probing) 1864 if (verbose_probing)
1800 printk (KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i); 1865 printk(KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
1801 1866
1802 /* Find out writeIntrThreshold */ 1867 /* Find out writeIntrThreshold */
1803 frob_econtrol (pb, 1<<2, 1<<2); 1868 frob_econtrol(pb, 1<<2, 1<<2);
1804 frob_econtrol (pb, 1<<2, 0); 1869 frob_econtrol(pb, 1<<2, 0);
1805 for (i = 1; i <= priv->fifo_depth; i++) { 1870 for (i = 1; i <= priv->fifo_depth; i++) {
1806 inb (FIFO (pb)); 1871 inb(FIFO(pb));
1807 udelay (50); 1872 udelay(50);
1808 if (inb (ECONTROL (pb)) & (1<<2)) 1873 if (inb(ECONTROL(pb)) & (1<<2))
1809 break; 1874 break;
1810 } 1875 }
1811 1876
1812 if (i <= priv->fifo_depth) { 1877 if (i <= priv->fifo_depth) {
1813 if (verbose_probing) 1878 if (verbose_probing)
1814 printk (KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n", 1879 printk(KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
1815 pb->base, i); 1880 pb->base, i);
1816 } else 1881 } else
1817 /* Number of bytes we know we can write if we get an 1882 /* Number of bytes we know we can write if we get an
1818 interrupt. */ 1883 interrupt. */
1819 i = 0; 1884 i = 0;
1820 1885
1821 priv->writeIntrThreshold = i; 1886 priv->writeIntrThreshold = i;
1822 1887
1823 /* Find out readIntrThreshold */ 1888 /* Find out readIntrThreshold */
1824 frob_set_mode (pb, ECR_PS2); /* Reset FIFO and enable PS2 */ 1889 frob_set_mode(pb, ECR_PS2); /* Reset FIFO and enable PS2 */
1825 parport_pc_data_reverse (pb); /* Must be in PS2 mode */ 1890 parport_pc_data_reverse(pb); /* Must be in PS2 mode */
1826 frob_set_mode (pb, ECR_TST); /* Test FIFO */ 1891 frob_set_mode(pb, ECR_TST); /* Test FIFO */
1827 frob_econtrol (pb, 1<<2, 1<<2); 1892 frob_econtrol(pb, 1<<2, 1<<2);
1828 frob_econtrol (pb, 1<<2, 0); 1893 frob_econtrol(pb, 1<<2, 0);
1829 for (i = 1; i <= priv->fifo_depth; i++) { 1894 for (i = 1; i <= priv->fifo_depth; i++) {
1830 outb (0xaa, FIFO (pb)); 1895 outb(0xaa, FIFO(pb));
1831 if (inb (ECONTROL (pb)) & (1<<2)) 1896 if (inb(ECONTROL(pb)) & (1<<2))
1832 break; 1897 break;
1833 } 1898 }
1834 1899
1835 if (i <= priv->fifo_depth) { 1900 if (i <= priv->fifo_depth) {
1836 if (verbose_probing) 1901 if (verbose_probing)
1837 printk (KERN_INFO "0x%lx: readIntrThreshold is %d\n", 1902 printk(KERN_INFO "0x%lx: readIntrThreshold is %d\n",
1838 pb->base, i); 1903 pb->base, i);
1839 } else 1904 } else
1840 /* Number of bytes we can read if we get an interrupt. */ 1905 /* Number of bytes we can read if we get an interrupt. */
@@ -1842,23 +1907,23 @@ static int parport_ECP_supported(struct parport *pb)
1842 1907
1843 priv->readIntrThreshold = i; 1908 priv->readIntrThreshold = i;
1844 1909
1845 ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ 1910 ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
1846 ECR_WRITE (pb, 0xf4); /* Configuration mode */ 1911 ECR_WRITE(pb, 0xf4); /* Configuration mode */
1847 config = inb (CONFIGA (pb)); 1912 config = inb(CONFIGA(pb));
1848 pword = (config >> 4) & 0x7; 1913 pword = (config >> 4) & 0x7;
1849 switch (pword) { 1914 switch (pword) {
1850 case 0: 1915 case 0:
1851 pword = 2; 1916 pword = 2;
1852 printk (KERN_WARNING "0x%lx: Unsupported pword size!\n", 1917 printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
1853 pb->base); 1918 pb->base);
1854 break; 1919 break;
1855 case 2: 1920 case 2:
1856 pword = 4; 1921 pword = 4;
1857 printk (KERN_WARNING "0x%lx: Unsupported pword size!\n", 1922 printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
1858 pb->base); 1923 pb->base);
1859 break; 1924 break;
1860 default: 1925 default:
1861 printk (KERN_WARNING "0x%lx: Unknown implementation ID\n", 1926 printk(KERN_WARNING "0x%lx: Unknown implementation ID\n",
1862 pb->base); 1927 pb->base);
1863 /* Assume 1 */ 1928 /* Assume 1 */
1864 case 1: 1929 case 1:
@@ -1867,28 +1932,29 @@ static int parport_ECP_supported(struct parport *pb)
1867 priv->pword = pword; 1932 priv->pword = pword;
1868 1933
1869 if (verbose_probing) { 1934 if (verbose_probing) {
1870 printk (KERN_DEBUG "0x%lx: PWord is %d bits\n", pb->base, 8 * pword); 1935 printk(KERN_DEBUG "0x%lx: PWord is %d bits\n",
1871 1936 pb->base, 8 * pword);
1872 printk (KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base, 1937
1938 printk(KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
1873 config & 0x80 ? "Level" : "Pulses"); 1939 config & 0x80 ? "Level" : "Pulses");
1874 1940
1875 configb = inb (CONFIGB (pb)); 1941 configb = inb(CONFIGB(pb));
1876 printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n", 1942 printk(KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
1877 pb->base, config, configb); 1943 pb->base, config, configb);
1878 printk (KERN_DEBUG "0x%lx: ECP settings irq=", pb->base); 1944 printk(KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
1879 if ((configb >>3) & 0x07) 1945 if ((configb >> 3) & 0x07)
1880 printk("%d",intrline[(configb >>3) & 0x07]); 1946 printk("%d", intrline[(configb >> 3) & 0x07]);
1881 else 1947 else
1882 printk("<none or set by other means>"); 1948 printk("<none or set by other means>");
1883 printk (" dma="); 1949 printk(" dma=");
1884 if( (configb & 0x03 ) == 0x00) 1950 if ((configb & 0x03) == 0x00)
1885 printk("<none or set by other means>\n"); 1951 printk("<none or set by other means>\n");
1886 else 1952 else
1887 printk("%d\n",configb & 0x07); 1953 printk("%d\n", configb & 0x07);
1888 } 1954 }
1889 1955
1890 /* Go back to mode 000 */ 1956 /* Go back to mode 000 */
1891 frob_set_mode (pb, ECR_SPP); 1957 frob_set_mode(pb, ECR_SPP);
1892 1958
1893 return 1; 1959 return 1;
1894} 1960}
@@ -1903,10 +1969,10 @@ static int parport_ECPPS2_supported(struct parport *pb)
1903 if (!priv->ecr) 1969 if (!priv->ecr)
1904 return 0; 1970 return 0;
1905 1971
1906 oecr = inb (ECONTROL (pb)); 1972 oecr = inb(ECONTROL(pb));
1907 ECR_WRITE (pb, ECR_PS2 << 5); 1973 ECR_WRITE(pb, ECR_PS2 << 5);
1908 result = parport_PS2_supported(pb); 1974 result = parport_PS2_supported(pb);
1909 ECR_WRITE (pb, oecr); 1975 ECR_WRITE(pb, oecr);
1910 return result; 1976 return result;
1911} 1977}
1912 1978
@@ -1930,16 +1996,15 @@ static int parport_EPP_supported(struct parport *pb)
1930 */ 1996 */
1931 1997
1932 /* If EPP timeout bit clear then EPP available */ 1998 /* If EPP timeout bit clear then EPP available */
1933 if (!clear_epp_timeout(pb)) { 1999 if (!clear_epp_timeout(pb))
1934 return 0; /* No way to clear timeout */ 2000 return 0; /* No way to clear timeout */
1935 }
1936 2001
1937 /* Check for Intel bug. */ 2002 /* Check for Intel bug. */
1938 if (priv->ecr) { 2003 if (priv->ecr) {
1939 unsigned char i; 2004 unsigned char i;
1940 for (i = 0x00; i < 0x80; i += 0x20) { 2005 for (i = 0x00; i < 0x80; i += 0x20) {
1941 ECR_WRITE (pb, i); 2006 ECR_WRITE(pb, i);
1942 if (clear_epp_timeout (pb)) { 2007 if (clear_epp_timeout(pb)) {
1943 /* Phony EPP in ECP. */ 2008 /* Phony EPP in ECP. */
1944 return 0; 2009 return 0;
1945 } 2010 }
@@ -1963,17 +2028,16 @@ static int parport_ECPEPP_supported(struct parport *pb)
1963 int result; 2028 int result;
1964 unsigned char oecr; 2029 unsigned char oecr;
1965 2030
1966 if (!priv->ecr) { 2031 if (!priv->ecr)
1967 return 0; 2032 return 0;
1968 }
1969 2033
1970 oecr = inb (ECONTROL (pb)); 2034 oecr = inb(ECONTROL(pb));
1971 /* Search for SMC style EPP+ECP mode */ 2035 /* Search for SMC style EPP+ECP mode */
1972 ECR_WRITE (pb, 0x80); 2036 ECR_WRITE(pb, 0x80);
1973 outb (0x04, CONTROL (pb)); 2037 outb(0x04, CONTROL(pb));
1974 result = parport_EPP_supported(pb); 2038 result = parport_EPP_supported(pb);
1975 2039
1976 ECR_WRITE (pb, oecr); 2040 ECR_WRITE(pb, oecr);
1977 2041
1978 if (result) { 2042 if (result) {
1979 /* Set up access functions to use ECP+EPP hardware. */ 2043 /* Set up access functions to use ECP+EPP hardware. */
@@ -1991,11 +2055,25 @@ static int parport_ECPEPP_supported(struct parport *pb)
1991/* Don't bother probing for modes we know we won't use. */ 2055/* Don't bother probing for modes we know we won't use. */
1992static int __devinit parport_PS2_supported(struct parport *pb) { return 0; } 2056static int __devinit parport_PS2_supported(struct parport *pb) { return 0; }
1993#ifdef CONFIG_PARPORT_PC_FIFO 2057#ifdef CONFIG_PARPORT_PC_FIFO
1994static int parport_ECP_supported(struct parport *pb) { return 0; } 2058static int parport_ECP_supported(struct parport *pb)
2059{
2060 return 0;
2061}
1995#endif 2062#endif
1996static int __devinit parport_EPP_supported(struct parport *pb) { return 0; } 2063static int __devinit parport_EPP_supported(struct parport *pb)
1997static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;} 2064{
1998static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;} 2065 return 0;
2066}
2067
2068static int __devinit parport_ECPEPP_supported(struct parport *pb)
2069{
2070 return 0;
2071}
2072
2073static int __devinit parport_ECPPS2_supported(struct parport *pb)
2074{
2075 return 0;
2076}
1999 2077
2000#endif /* No IEEE 1284 support */ 2078#endif /* No IEEE 1284 support */
2001 2079
@@ -2005,17 +2083,17 @@ static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;}
2005static int programmable_irq_support(struct parport *pb) 2083static int programmable_irq_support(struct parport *pb)
2006{ 2084{
2007 int irq, intrLine; 2085 int irq, intrLine;
2008 unsigned char oecr = inb (ECONTROL (pb)); 2086 unsigned char oecr = inb(ECONTROL(pb));
2009 static const int lookup[8] = { 2087 static const int lookup[8] = {
2010 PARPORT_IRQ_NONE, 7, 9, 10, 11, 14, 15, 5 2088 PARPORT_IRQ_NONE, 7, 9, 10, 11, 14, 15, 5
2011 }; 2089 };
2012 2090
2013 ECR_WRITE (pb, ECR_CNF << 5); /* Configuration MODE */ 2091 ECR_WRITE(pb, ECR_CNF << 5); /* Configuration MODE */
2014 2092
2015 intrLine = (inb (CONFIGB (pb)) >> 3) & 0x07; 2093 intrLine = (inb(CONFIGB(pb)) >> 3) & 0x07;
2016 irq = lookup[intrLine]; 2094 irq = lookup[intrLine];
2017 2095
2018 ECR_WRITE (pb, oecr); 2096 ECR_WRITE(pb, oecr);
2019 return irq; 2097 return irq;
2020} 2098}
2021 2099
@@ -2025,17 +2103,17 @@ static int irq_probe_ECP(struct parport *pb)
2025 unsigned long irqs; 2103 unsigned long irqs;
2026 2104
2027 irqs = probe_irq_on(); 2105 irqs = probe_irq_on();
2028 2106
2029 ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ 2107 ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
2030 ECR_WRITE (pb, (ECR_TST << 5) | 0x04); 2108 ECR_WRITE(pb, (ECR_TST << 5) | 0x04);
2031 ECR_WRITE (pb, ECR_TST << 5); 2109 ECR_WRITE(pb, ECR_TST << 5);
2032 2110
2033 /* If Full FIFO sure that writeIntrThreshold is generated */ 2111 /* If Full FIFO sure that writeIntrThreshold is generated */
2034 for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02) ; i++) 2112 for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02) ; i++)
2035 outb (0xaa, FIFO (pb)); 2113 outb(0xaa, FIFO(pb));
2036 2114
2037 pb->irq = probe_irq_off(irqs); 2115 pb->irq = probe_irq_off(irqs);
2038 ECR_WRITE (pb, ECR_SPP << 5); 2116 ECR_WRITE(pb, ECR_SPP << 5);
2039 2117
2040 if (pb->irq <= 0) 2118 if (pb->irq <= 0)
2041 pb->irq = PARPORT_IRQ_NONE; 2119 pb->irq = PARPORT_IRQ_NONE;
@@ -2045,7 +2123,7 @@ static int irq_probe_ECP(struct parport *pb)
2045 2123
2046/* 2124/*
2047 * This detection seems that only works in National Semiconductors 2125 * This detection seems that only works in National Semiconductors
2048 * This doesn't work in SMC, LGS, and Winbond 2126 * This doesn't work in SMC, LGS, and Winbond
2049 */ 2127 */
2050static int irq_probe_EPP(struct parport *pb) 2128static int irq_probe_EPP(struct parport *pb)
2051{ 2129{
@@ -2056,16 +2134,16 @@ static int irq_probe_EPP(struct parport *pb)
2056 unsigned char oecr; 2134 unsigned char oecr;
2057 2135
2058 if (pb->modes & PARPORT_MODE_PCECR) 2136 if (pb->modes & PARPORT_MODE_PCECR)
2059 oecr = inb (ECONTROL (pb)); 2137 oecr = inb(ECONTROL(pb));
2060 2138
2061 irqs = probe_irq_on(); 2139 irqs = probe_irq_on();
2062 2140
2063 if (pb->modes & PARPORT_MODE_PCECR) 2141 if (pb->modes & PARPORT_MODE_PCECR)
2064 frob_econtrol (pb, 0x10, 0x10); 2142 frob_econtrol(pb, 0x10, 0x10);
2065 2143
2066 clear_epp_timeout(pb); 2144 clear_epp_timeout(pb);
2067 parport_pc_frob_control (pb, 0x20, 0x20); 2145 parport_pc_frob_control(pb, 0x20, 0x20);
2068 parport_pc_frob_control (pb, 0x10, 0x10); 2146 parport_pc_frob_control(pb, 0x10, 0x10);
2069 clear_epp_timeout(pb); 2147 clear_epp_timeout(pb);
2070 2148
2071 /* Device isn't expecting an EPP read 2149 /* Device isn't expecting an EPP read
@@ -2074,9 +2152,9 @@ static int irq_probe_EPP(struct parport *pb)
2074 parport_pc_read_epp(pb); 2152 parport_pc_read_epp(pb);
2075 udelay(20); 2153 udelay(20);
2076 2154
2077 pb->irq = probe_irq_off (irqs); 2155 pb->irq = probe_irq_off(irqs);
2078 if (pb->modes & PARPORT_MODE_PCECR) 2156 if (pb->modes & PARPORT_MODE_PCECR)
2079 ECR_WRITE (pb, oecr); 2157 ECR_WRITE(pb, oecr);
2080 parport_pc_write_control(pb, 0xc); 2158 parport_pc_write_control(pb, 0xc);
2081 2159
2082 if (pb->irq <= 0) 2160 if (pb->irq <= 0)
@@ -2133,28 +2211,28 @@ static int parport_irq_probe(struct parport *pb)
2133/* --- DMA detection -------------------------------------- */ 2211/* --- DMA detection -------------------------------------- */
2134 2212
2135/* Only if chipset conforms to ECP ISA Interface Standard */ 2213/* Only if chipset conforms to ECP ISA Interface Standard */
2136static int programmable_dma_support (struct parport *p) 2214static int programmable_dma_support(struct parport *p)
2137{ 2215{
2138 unsigned char oecr = inb (ECONTROL (p)); 2216 unsigned char oecr = inb(ECONTROL(p));
2139 int dma; 2217 int dma;
2140 2218
2141 frob_set_mode (p, ECR_CNF); 2219 frob_set_mode(p, ECR_CNF);
2142 2220
2143 dma = inb (CONFIGB(p)) & 0x07; 2221 dma = inb(CONFIGB(p)) & 0x07;
2144 /* 000: Indicates jumpered 8-bit DMA if read-only. 2222 /* 000: Indicates jumpered 8-bit DMA if read-only.
2145 100: Indicates jumpered 16-bit DMA if read-only. */ 2223 100: Indicates jumpered 16-bit DMA if read-only. */
2146 if ((dma & 0x03) == 0) 2224 if ((dma & 0x03) == 0)
2147 dma = PARPORT_DMA_NONE; 2225 dma = PARPORT_DMA_NONE;
2148 2226
2149 ECR_WRITE (p, oecr); 2227 ECR_WRITE(p, oecr);
2150 return dma; 2228 return dma;
2151} 2229}
2152 2230
2153static int parport_dma_probe (struct parport *p) 2231static int parport_dma_probe(struct parport *p)
2154{ 2232{
2155 const struct parport_pc_private *priv = p->private_data; 2233 const struct parport_pc_private *priv = p->private_data;
2156 if (priv->ecr) 2234 if (priv->ecr) /* ask ECP chipset first */
2157 p->dma = programmable_dma_support(p); /* ask ECP chipset first */ 2235 p->dma = programmable_dma_support(p);
2158 if (p->dma == PARPORT_DMA_NONE) { 2236 if (p->dma == PARPORT_DMA_NONE) {
2159 /* ask known Super-IO chips proper, although these 2237 /* ask known Super-IO chips proper, although these
2160 claim ECP compatible, some don't report their DMA 2238 claim ECP compatible, some don't report their DMA
@@ -2212,7 +2290,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2212 if (!base_res) 2290 if (!base_res)
2213 goto out4; 2291 goto out4;
2214 2292
2215 memcpy(ops, &parport_pc_ops, sizeof (struct parport_operations)); 2293 memcpy(ops, &parport_pc_ops, sizeof(struct parport_operations));
2216 priv->ctr = 0xc; 2294 priv->ctr = 0xc;
2217 priv->ctr_writable = ~0x10; 2295 priv->ctr_writable = ~0x10;
2218 priv->ecr = 0; 2296 priv->ecr = 0;
@@ -2239,7 +2317,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2239 if (!parport_EPP_supported(p)) 2317 if (!parport_EPP_supported(p))
2240 parport_ECPEPP_supported(p); 2318 parport_ECPEPP_supported(p);
2241 } 2319 }
2242 if (!parport_SPP_supported (p)) 2320 if (!parport_SPP_supported(p))
2243 /* No port. */ 2321 /* No port. */
2244 goto out5; 2322 goto out5;
2245 if (priv->ecr) 2323 if (priv->ecr)
@@ -2247,7 +2325,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2247 else 2325 else
2248 parport_PS2_supported(p); 2326 parport_PS2_supported(p);
2249 2327
2250 p->size = (p->modes & PARPORT_MODE_EPP)?8:3; 2328 p->size = (p->modes & PARPORT_MODE_EPP) ? 8 : 3;
2251 2329
2252 printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base); 2330 printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base);
2253 if (p->base_hi && priv->ecr) 2331 if (p->base_hi && priv->ecr)
@@ -2271,7 +2349,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2271 } 2349 }
2272 } 2350 }
2273 if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq 2351 if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq
2274 is mandatory (see above) */ 2352 is mandatory (see above) */
2275 p->dma = PARPORT_DMA_NONE; 2353 p->dma = PARPORT_DMA_NONE;
2276 2354
2277#ifdef CONFIG_PARPORT_PC_FIFO 2355#ifdef CONFIG_PARPORT_PC_FIFO
@@ -2288,16 +2366,23 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2288 if (p->dma != PARPORT_DMA_NONE) { 2366 if (p->dma != PARPORT_DMA_NONE) {
2289 printk(", dma %d", p->dma); 2367 printk(", dma %d", p->dma);
2290 p->modes |= PARPORT_MODE_DMA; 2368 p->modes |= PARPORT_MODE_DMA;
2291 } 2369 } else
2292 else printk(", using FIFO"); 2370 printk(", using FIFO");
2293 } 2371 } else
2294 else
2295 /* We can't use the DMA channel after all. */ 2372 /* We can't use the DMA channel after all. */
2296 p->dma = PARPORT_DMA_NONE; 2373 p->dma = PARPORT_DMA_NONE;
2297#endif /* Allowed to use FIFO/DMA */ 2374#endif /* Allowed to use FIFO/DMA */
2298 2375
2299 printk(" ["); 2376 printk(" [");
2300#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}} 2377
2378#define printmode(x) \
2379 {\
2380 if (p->modes & PARPORT_MODE_##x) {\
2381 printk("%s%s", f ? "," : "", #x);\
2382 f++;\
2383 } \
2384 }
2385
2301 { 2386 {
2302 int f = 0; 2387 int f = 0;
2303 printmode(PCSPP); 2388 printmode(PCSPP);
@@ -2309,10 +2394,10 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2309 } 2394 }
2310#undef printmode 2395#undef printmode
2311#ifndef CONFIG_PARPORT_1284 2396#ifndef CONFIG_PARPORT_1284
2312 printk ("(,...)"); 2397 printk("(,...)");
2313#endif /* CONFIG_PARPORT_1284 */ 2398#endif /* CONFIG_PARPORT_1284 */
2314 printk("]\n"); 2399 printk("]\n");
2315 if (probedirq != PARPORT_IRQ_NONE) 2400 if (probedirq != PARPORT_IRQ_NONE)
2316 printk(KERN_INFO "%s: irq %d detected\n", p->name, probedirq); 2401 printk(KERN_INFO "%s: irq %d detected\n", p->name, probedirq);
2317 2402
2318 /* If No ECP release the ports grabbed above. */ 2403 /* If No ECP release the ports grabbed above. */
@@ -2328,7 +2413,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2328 if (p->irq != PARPORT_IRQ_NONE) { 2413 if (p->irq != PARPORT_IRQ_NONE) {
2329 if (request_irq(p->irq, parport_irq_handler, 2414 if (request_irq(p->irq, parport_irq_handler,
2330 irqflags, p->name, p)) { 2415 irqflags, p->name, p)) {
2331 printk (KERN_WARNING "%s: irq %d in use, " 2416 printk(KERN_WARNING "%s: irq %d in use, "
2332 "resorting to polled operation\n", 2417 "resorting to polled operation\n",
2333 p->name, p->irq); 2418 p->name, p->irq);
2334 p->irq = PARPORT_IRQ_NONE; 2419 p->irq = PARPORT_IRQ_NONE;
@@ -2338,8 +2423,8 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2338#ifdef CONFIG_PARPORT_PC_FIFO 2423#ifdef CONFIG_PARPORT_PC_FIFO
2339#ifdef HAS_DMA 2424#ifdef HAS_DMA
2340 if (p->dma != PARPORT_DMA_NONE) { 2425 if (p->dma != PARPORT_DMA_NONE) {
2341 if (request_dma (p->dma, p->name)) { 2426 if (request_dma(p->dma, p->name)) {
2342 printk (KERN_WARNING "%s: dma %d in use, " 2427 printk(KERN_WARNING "%s: dma %d in use, "
2343 "resorting to PIO operation\n", 2428 "resorting to PIO operation\n",
2344 p->name, p->dma); 2429 p->name, p->dma);
2345 p->dma = PARPORT_DMA_NONE; 2430 p->dma = PARPORT_DMA_NONE;
@@ -2349,8 +2434,8 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2349 PAGE_SIZE, 2434 PAGE_SIZE,
2350 &priv->dma_handle, 2435 &priv->dma_handle,
2351 GFP_KERNEL); 2436 GFP_KERNEL);
2352 if (! priv->dma_buf) { 2437 if (!priv->dma_buf) {
2353 printk (KERN_WARNING "%s: " 2438 printk(KERN_WARNING "%s: "
2354 "cannot get buffer for DMA, " 2439 "cannot get buffer for DMA, "
2355 "resorting to PIO operation\n", 2440 "resorting to PIO operation\n",
2356 p->name); 2441 p->name);
@@ -2369,10 +2454,10 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2369 * Put the ECP detected port in PS2 mode. 2454 * Put the ECP detected port in PS2 mode.
2370 * Do this also for ports that have ECR but don't do ECP. 2455 * Do this also for ports that have ECR but don't do ECP.
2371 */ 2456 */
2372 ECR_WRITE (p, 0x34); 2457 ECR_WRITE(p, 0x34);
2373 2458
2374 parport_pc_write_data(p, 0); 2459 parport_pc_write_data(p, 0);
2375 parport_pc_data_forward (p); 2460 parport_pc_data_forward(p);
2376 2461
2377 /* Now that we've told the sharing engine about the port, and 2462 /* Now that we've told the sharing engine about the port, and
2378 found out its characteristics, let the high-level drivers 2463 found out its characteristics, let the high-level drivers
@@ -2380,7 +2465,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
2380 spin_lock(&ports_lock); 2465 spin_lock(&ports_lock);
2381 list_add(&priv->list, &ports_list); 2466 list_add(&priv->list, &ports_list);
2382 spin_unlock(&ports_lock); 2467 spin_unlock(&ports_lock);
2383 parport_announce_port (p); 2468 parport_announce_port(p);
2384 2469
2385 return p; 2470 return p;
2386 2471
@@ -2393,18 +2478,17 @@ out5:
2393out4: 2478out4:
2394 parport_put_port(p); 2479 parport_put_port(p);
2395out3: 2480out3:
2396 kfree (priv); 2481 kfree(priv);
2397out2: 2482out2:
2398 kfree (ops); 2483 kfree(ops);
2399out1: 2484out1:
2400 if (pdev) 2485 if (pdev)
2401 platform_device_unregister(pdev); 2486 platform_device_unregister(pdev);
2402 return NULL; 2487 return NULL;
2403} 2488}
2489EXPORT_SYMBOL(parport_pc_probe_port);
2404 2490
2405EXPORT_SYMBOL (parport_pc_probe_port); 2491void parport_pc_unregister_port(struct parport *p)
2406
2407void parport_pc_unregister_port (struct parport *p)
2408{ 2492{
2409 struct parport_pc_private *priv = p->private_data; 2493 struct parport_pc_private *priv = p->private_data;
2410 struct parport_operations *ops = p->ops; 2494 struct parport_operations *ops = p->ops;
@@ -2430,17 +2514,16 @@ void parport_pc_unregister_port (struct parport *p)
2430 priv->dma_buf, 2514 priv->dma_buf,
2431 priv->dma_handle); 2515 priv->dma_handle);
2432#endif 2516#endif
2433 kfree (p->private_data); 2517 kfree(p->private_data);
2434 parport_put_port(p); 2518 parport_put_port(p);
2435 kfree (ops); /* hope no-one cached it */ 2519 kfree(ops); /* hope no-one cached it */
2436} 2520}
2437 2521EXPORT_SYMBOL(parport_pc_unregister_port);
2438EXPORT_SYMBOL (parport_pc_unregister_port);
2439 2522
2440#ifdef CONFIG_PCI 2523#ifdef CONFIG_PCI
2441 2524
2442/* ITE support maintained by Rich Liu <richliu@poorman.org> */ 2525/* ITE support maintained by Rich Liu <richliu@poorman.org> */
2443static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, 2526static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
2444 int autodma, 2527 int autodma,
2445 const struct parport_pc_via_data *via) 2528 const struct parport_pc_via_data *via)
2446{ 2529{
@@ -2452,73 +2535,74 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
2452 int irq; 2535 int irq;
2453 int i; 2536 int i;
2454 2537
2455 DPRINTK (KERN_DEBUG "sio_ite_8872_probe()\n"); 2538 DPRINTK(KERN_DEBUG "sio_ite_8872_probe()\n");
2456 2539
2457 // make sure which one chip 2540 /* make sure which one chip */
2458 for(i = 0; i < 5; i++) { 2541 for (i = 0; i < 5; i++) {
2459 base_res = request_region(inta_addr[i], 32, "it887x"); 2542 base_res = request_region(inta_addr[i], 32, "it887x");
2460 if (base_res) { 2543 if (base_res) {
2461 int test; 2544 int test;
2462 pci_write_config_dword (pdev, 0x60, 2545 pci_write_config_dword(pdev, 0x60,
2463 0xe5000000 | inta_addr[i]); 2546 0xe5000000 | inta_addr[i]);
2464 pci_write_config_dword (pdev, 0x78, 2547 pci_write_config_dword(pdev, 0x78,
2465 0x00000000 | inta_addr[i]); 2548 0x00000000 | inta_addr[i]);
2466 test = inb (inta_addr[i]); 2549 test = inb(inta_addr[i]);
2467 if (test != 0xff) break; 2550 if (test != 0xff)
2551 break;
2468 release_region(inta_addr[i], 0x8); 2552 release_region(inta_addr[i], 0x8);
2469 } 2553 }
2470 } 2554 }
2471 if(i >= 5) { 2555 if (i >= 5) {
2472 printk (KERN_INFO "parport_pc: cannot find ITE8872 INTA\n"); 2556 printk(KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
2473 return 0; 2557 return 0;
2474 } 2558 }
2475 2559
2476 type = inb (inta_addr[i] + 0x18); 2560 type = inb(inta_addr[i] + 0x18);
2477 type &= 0x0f; 2561 type &= 0x0f;
2478 2562
2479 switch (type) { 2563 switch (type) {
2480 case 0x2: 2564 case 0x2:
2481 printk (KERN_INFO "parport_pc: ITE8871 found (1P)\n"); 2565 printk(KERN_INFO "parport_pc: ITE8871 found (1P)\n");
2482 ite8872set = 0x64200000; 2566 ite8872set = 0x64200000;
2483 break; 2567 break;
2484 case 0xa: 2568 case 0xa:
2485 printk (KERN_INFO "parport_pc: ITE8875 found (1P)\n"); 2569 printk(KERN_INFO "parport_pc: ITE8875 found (1P)\n");
2486 ite8872set = 0x64200000; 2570 ite8872set = 0x64200000;
2487 break; 2571 break;
2488 case 0xe: 2572 case 0xe:
2489 printk (KERN_INFO "parport_pc: ITE8872 found (2S1P)\n"); 2573 printk(KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
2490 ite8872set = 0x64e00000; 2574 ite8872set = 0x64e00000;
2491 break; 2575 break;
2492 case 0x6: 2576 case 0x6:
2493 printk (KERN_INFO "parport_pc: ITE8873 found (1S)\n"); 2577 printk(KERN_INFO "parport_pc: ITE8873 found (1S)\n");
2494 return 0; 2578 return 0;
2495 case 0x8: 2579 case 0x8:
2496 DPRINTK (KERN_DEBUG "parport_pc: ITE8874 found (2S)\n"); 2580 DPRINTK(KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
2497 return 0; 2581 return 0;
2498 default: 2582 default:
2499 printk (KERN_INFO "parport_pc: unknown ITE887x\n"); 2583 printk(KERN_INFO "parport_pc: unknown ITE887x\n");
2500 printk (KERN_INFO "parport_pc: please mail 'lspci -nvv' " 2584 printk(KERN_INFO "parport_pc: please mail 'lspci -nvv' "
2501 "output to Rich.Liu@ite.com.tw\n"); 2585 "output to Rich.Liu@ite.com.tw\n");
2502 return 0; 2586 return 0;
2503 } 2587 }
2504 2588
2505 pci_read_config_byte (pdev, 0x3c, &ite8872_irq); 2589 pci_read_config_byte(pdev, 0x3c, &ite8872_irq);
2506 pci_read_config_dword (pdev, 0x1c, &ite8872_lpt); 2590 pci_read_config_dword(pdev, 0x1c, &ite8872_lpt);
2507 ite8872_lpt &= 0x0000ff00; 2591 ite8872_lpt &= 0x0000ff00;
2508 pci_read_config_dword (pdev, 0x20, &ite8872_lpthi); 2592 pci_read_config_dword(pdev, 0x20, &ite8872_lpthi);
2509 ite8872_lpthi &= 0x0000ff00; 2593 ite8872_lpthi &= 0x0000ff00;
2510 pci_write_config_dword (pdev, 0x6c, 0xe3000000 | ite8872_lpt); 2594 pci_write_config_dword(pdev, 0x6c, 0xe3000000 | ite8872_lpt);
2511 pci_write_config_dword (pdev, 0x70, 0xe3000000 | ite8872_lpthi); 2595 pci_write_config_dword(pdev, 0x70, 0xe3000000 | ite8872_lpthi);
2512 pci_write_config_dword (pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt); 2596 pci_write_config_dword(pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
2513 // SET SPP&EPP , Parallel Port NO DMA , Enable All Function 2597 /* SET SPP&EPP , Parallel Port NO DMA , Enable All Function */
2514 // SET Parallel IRQ 2598 /* SET Parallel IRQ */
2515 pci_write_config_dword (pdev, 0x9c, 2599 pci_write_config_dword(pdev, 0x9c,
2516 ite8872set | (ite8872_irq * 0x11111)); 2600 ite8872set | (ite8872_irq * 0x11111));
2517 2601
2518 DPRINTK (KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq); 2602 DPRINTK(KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
2519 DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n", 2603 DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
2520 ite8872_lpt); 2604 ite8872_lpt);
2521 DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n", 2605 DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
2522 ite8872_lpthi); 2606 ite8872_lpthi);
2523 2607
2524 /* Let the user (or defaults) steer us away from interrupts */ 2608 /* Let the user (or defaults) steer us away from interrupts */
@@ -2530,14 +2614,14 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
2530 * Release the resource so that parport_pc_probe_port can get it. 2614 * Release the resource so that parport_pc_probe_port can get it.
2531 */ 2615 */
2532 release_resource(base_res); 2616 release_resource(base_res);
2533 if (parport_pc_probe_port (ite8872_lpt, ite8872_lpthi, 2617 if (parport_pc_probe_port(ite8872_lpt, ite8872_lpthi,
2534 irq, PARPORT_DMA_NONE, &pdev->dev, 0)) { 2618 irq, PARPORT_DMA_NONE, &pdev->dev, 0)) {
2535 printk (KERN_INFO 2619 printk(KERN_INFO
2536 "parport_pc: ITE 8872 parallel port: io=0x%X", 2620 "parport_pc: ITE 8872 parallel port: io=0x%X",
2537 ite8872_lpt); 2621 ite8872_lpt);
2538 if (irq != PARPORT_IRQ_NONE) 2622 if (irq != PARPORT_IRQ_NONE)
2539 printk (", irq=%d", irq); 2623 printk(", irq=%d", irq);
2540 printk ("\n"); 2624 printk("\n");
2541 return 1; 2625 return 1;
2542 } 2626 }
2543 2627
@@ -2546,7 +2630,7 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
2546 2630
2547/* VIA 8231 support by Pavel Fedin <sonic_amiga@rambler.ru> 2631/* VIA 8231 support by Pavel Fedin <sonic_amiga@rambler.ru>
2548 based on VIA 686a support code by Jeff Garzik <jgarzik@pobox.com> */ 2632 based on VIA 686a support code by Jeff Garzik <jgarzik@pobox.com> */
2549static int __devinitdata parport_init_mode = 0; 2633static int __devinitdata parport_init_mode;
2550 2634
2551/* Data for two known VIA chips */ 2635/* Data for two known VIA chips */
2552static struct parport_pc_via_data via_686a_data __devinitdata = { 2636static struct parport_pc_via_data via_686a_data __devinitdata = {
@@ -2568,7 +2652,7 @@ static struct parport_pc_via_data via_8231_data __devinitdata = {
2568 0xF6 2652 0xF6
2569}; 2653};
2570 2654
2571static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, 2655static int __devinit sio_via_probe(struct pci_dev *pdev, int autoirq,
2572 int autodma, 2656 int autodma,
2573 const struct parport_pc_via_data *via) 2657 const struct parport_pc_via_data *via)
2574{ 2658{
@@ -2580,38 +2664,38 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
2580 2664
2581 printk(KERN_DEBUG "parport_pc: VIA 686A/8231 detected\n"); 2665 printk(KERN_DEBUG "parport_pc: VIA 686A/8231 detected\n");
2582 2666
2583 switch(parport_init_mode) 2667 switch (parport_init_mode) {
2584 {
2585 case 1: 2668 case 1:
2586 printk(KERN_DEBUG "parport_pc: setting SPP mode\n"); 2669 printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
2587 siofunc = VIA_FUNCTION_PARPORT_SPP; 2670 siofunc = VIA_FUNCTION_PARPORT_SPP;
2588 break; 2671 break;
2589 case 2: 2672 case 2:
2590 printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n"); 2673 printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
2591 siofunc = VIA_FUNCTION_PARPORT_SPP; 2674 siofunc = VIA_FUNCTION_PARPORT_SPP;
2592 ppcontrol = VIA_PARPORT_BIDIR; 2675 ppcontrol = VIA_PARPORT_BIDIR;
2593 break; 2676 break;
2594 case 3: 2677 case 3:
2595 printk(KERN_DEBUG "parport_pc: setting EPP mode\n"); 2678 printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
2596 siofunc = VIA_FUNCTION_PARPORT_EPP; 2679 siofunc = VIA_FUNCTION_PARPORT_EPP;
2597 ppcontrol = VIA_PARPORT_BIDIR; 2680 ppcontrol = VIA_PARPORT_BIDIR;
2598 have_epp = 1; 2681 have_epp = 1;
2599 break; 2682 break;
2600 case 4: 2683 case 4:
2601 printk(KERN_DEBUG "parport_pc: setting ECP mode\n"); 2684 printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
2602 siofunc = VIA_FUNCTION_PARPORT_ECP; 2685 siofunc = VIA_FUNCTION_PARPORT_ECP;
2603 ppcontrol = VIA_PARPORT_BIDIR; 2686 ppcontrol = VIA_PARPORT_BIDIR;
2604 break; 2687 break;
2605 case 5: 2688 case 5:
2606 printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n"); 2689 printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
2607 siofunc = VIA_FUNCTION_PARPORT_ECP; 2690 siofunc = VIA_FUNCTION_PARPORT_ECP;
2608 ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP; 2691 ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
2609 have_epp = 1; 2692 have_epp = 1;
2610 break; 2693 break;
2611 default: 2694 default:
2612 printk(KERN_DEBUG "parport_pc: probing current configuration\n"); 2695 printk(KERN_DEBUG
2613 siofunc = VIA_FUNCTION_PROBE; 2696 "parport_pc: probing current configuration\n");
2614 break; 2697 siofunc = VIA_FUNCTION_PROBE;
2698 break;
2615 } 2699 }
2616 /* 2700 /*
2617 * unlock super i/o configuration 2701 * unlock super i/o configuration
@@ -2622,38 +2706,36 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
2622 2706
2623 /* Bits 1-0: Parallel Port Mode / Enable */ 2707 /* Bits 1-0: Parallel Port Mode / Enable */
2624 outb(via->viacfg_function, VIA_CONFIG_INDEX); 2708 outb(via->viacfg_function, VIA_CONFIG_INDEX);
2625 tmp = inb (VIA_CONFIG_DATA); 2709 tmp = inb(VIA_CONFIG_DATA);
2626 /* Bit 5: EPP+ECP enable; bit 7: PS/2 bidirectional port enable */ 2710 /* Bit 5: EPP+ECP enable; bit 7: PS/2 bidirectional port enable */
2627 outb(via->viacfg_parport_control, VIA_CONFIG_INDEX); 2711 outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
2628 tmp2 = inb (VIA_CONFIG_DATA); 2712 tmp2 = inb(VIA_CONFIG_DATA);
2629 if (siofunc == VIA_FUNCTION_PROBE) 2713 if (siofunc == VIA_FUNCTION_PROBE) {
2630 { 2714 siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
2631 siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE; 2715 ppcontrol = tmp2;
2632 ppcontrol = tmp2; 2716 } else {
2717 tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
2718 tmp |= siofunc;
2719 outb(via->viacfg_function, VIA_CONFIG_INDEX);
2720 outb(tmp, VIA_CONFIG_DATA);
2721 tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
2722 tmp2 |= ppcontrol;
2723 outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
2724 outb(tmp2, VIA_CONFIG_DATA);
2633 } 2725 }
2634 else 2726
2635 {
2636 tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
2637 tmp |= siofunc;
2638 outb(via->viacfg_function, VIA_CONFIG_INDEX);
2639 outb(tmp, VIA_CONFIG_DATA);
2640 tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
2641 tmp2 |= ppcontrol;
2642 outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
2643 outb(tmp2, VIA_CONFIG_DATA);
2644 }
2645
2646 /* Parallel Port I/O Base Address, bits 9-2 */ 2727 /* Parallel Port I/O Base Address, bits 9-2 */
2647 outb(via->viacfg_parport_base, VIA_CONFIG_INDEX); 2728 outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
2648 port1 = inb(VIA_CONFIG_DATA) << 2; 2729 port1 = inb(VIA_CONFIG_DATA) << 2;
2649 2730
2650 printk (KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",port1); 2731 printk(KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",
2651 if ((port1 == 0x3BC) && have_epp) 2732 port1);
2652 { 2733 if (port1 == 0x3BC && have_epp) {
2653 outb(via->viacfg_parport_base, VIA_CONFIG_INDEX); 2734 outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
2654 outb((0x378 >> 2), VIA_CONFIG_DATA); 2735 outb((0x378 >> 2), VIA_CONFIG_DATA);
2655 printk(KERN_DEBUG "parport_pc: Parallel port base changed to 0x378\n"); 2736 printk(KERN_DEBUG
2656 port1 = 0x378; 2737 "parport_pc: Parallel port base changed to 0x378\n");
2738 port1 = 0x378;
2657 } 2739 }
2658 2740
2659 /* 2741 /*
@@ -2667,36 +2749,39 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
2667 printk(KERN_INFO "parport_pc: VIA parallel port disabled in BIOS\n"); 2749 printk(KERN_INFO "parport_pc: VIA parallel port disabled in BIOS\n");
2668 return 0; 2750 return 0;
2669 } 2751 }
2670 2752
2671 /* Bits 7-4: PnP Routing for Parallel Port IRQ */ 2753 /* Bits 7-4: PnP Routing for Parallel Port IRQ */
2672 pci_read_config_byte(pdev, via->via_pci_parport_irq_reg, &tmp); 2754 pci_read_config_byte(pdev, via->via_pci_parport_irq_reg, &tmp);
2673 irq = ((tmp & VIA_IRQCONTROL_PARALLEL) >> 4); 2755 irq = ((tmp & VIA_IRQCONTROL_PARALLEL) >> 4);
2674 2756
2675 if (siofunc == VIA_FUNCTION_PARPORT_ECP) 2757 if (siofunc == VIA_FUNCTION_PARPORT_ECP) {
2676 { 2758 /* Bits 3-2: PnP Routing for Parallel Port DMA */
2677 /* Bits 3-2: PnP Routing for Parallel Port DMA */ 2759 pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
2678 pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp); 2760 dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
2679 dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2); 2761 } else
2680 } 2762 /* if ECP not enabled, DMA is not enabled, assumed
2681 else 2763 bogus 'dma' value */
2682 /* if ECP not enabled, DMA is not enabled, assumed bogus 'dma' value */ 2764 dma = PARPORT_DMA_NONE;
2683 dma = PARPORT_DMA_NONE;
2684 2765
2685 /* Let the user (or defaults) steer us away from interrupts and DMA */ 2766 /* Let the user (or defaults) steer us away from interrupts and DMA */
2686 if (autoirq == PARPORT_IRQ_NONE) { 2767 if (autoirq == PARPORT_IRQ_NONE) {
2687 irq = PARPORT_IRQ_NONE; 2768 irq = PARPORT_IRQ_NONE;
2688 dma = PARPORT_DMA_NONE; 2769 dma = PARPORT_DMA_NONE;
2689 } 2770 }
2690 if (autodma == PARPORT_DMA_NONE) 2771 if (autodma == PARPORT_DMA_NONE)
2691 dma = PARPORT_DMA_NONE; 2772 dma = PARPORT_DMA_NONE;
2692 2773
2693 switch (port1) { 2774 switch (port1) {
2694 case 0x3bc: port2 = 0x7bc; break; 2775 case 0x3bc:
2695 case 0x378: port2 = 0x778; break; 2776 port2 = 0x7bc; break;
2696 case 0x278: port2 = 0x678; break; 2777 case 0x378:
2778 port2 = 0x778; break;
2779 case 0x278:
2780 port2 = 0x678; break;
2697 default: 2781 default:
2698 printk(KERN_INFO "parport_pc: Weird VIA parport base 0x%X, ignoring\n", 2782 printk(KERN_INFO
2699 port1); 2783 "parport_pc: Weird VIA parport base 0x%X, ignoring\n",
2784 port1);
2700 return 0; 2785 return 0;
2701 } 2786 }
2702 2787
@@ -2714,17 +2799,17 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
2714 } 2799 }
2715 2800
2716 /* finally, do the probe with values obtained */ 2801 /* finally, do the probe with values obtained */
2717 if (parport_pc_probe_port (port1, port2, irq, dma, &pdev->dev, 0)) { 2802 if (parport_pc_probe_port(port1, port2, irq, dma, &pdev->dev, 0)) {
2718 printk (KERN_INFO 2803 printk(KERN_INFO
2719 "parport_pc: VIA parallel port: io=0x%X", port1); 2804 "parport_pc: VIA parallel port: io=0x%X", port1);
2720 if (irq != PARPORT_IRQ_NONE) 2805 if (irq != PARPORT_IRQ_NONE)
2721 printk (", irq=%d", irq); 2806 printk(", irq=%d", irq);
2722 if (dma != PARPORT_DMA_NONE) 2807 if (dma != PARPORT_DMA_NONE)
2723 printk (", dma=%d", dma); 2808 printk(", dma=%d", dma);
2724 printk ("\n"); 2809 printk("\n");
2725 return 1; 2810 return 1;
2726 } 2811 }
2727 2812
2728 printk(KERN_WARNING "parport_pc: Strange, can't probe VIA parallel port: io=0x%X, irq=%d, dma=%d\n", 2813 printk(KERN_WARNING "parport_pc: Strange, can't probe VIA parallel port: io=0x%X, irq=%d, dma=%d\n",
2729 port1, irq, dma); 2814 port1, irq, dma);
2730 return 0; 2815 return 0;
@@ -2732,8 +2817,8 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
2732 2817
2733 2818
2734enum parport_pc_sio_types { 2819enum parport_pc_sio_types {
2735 sio_via_686a = 0, /* Via VT82C686A motherboard Super I/O */ 2820 sio_via_686a = 0, /* Via VT82C686A motherboard Super I/O */
2736 sio_via_8231, /* Via VT8231 south bridge integrated Super IO */ 2821 sio_via_8231, /* Via VT8231 south bridge integrated Super IO */
2737 sio_ite_8872, 2822 sio_ite_8872,
2738 last_sio 2823 last_sio
2739}; 2824};
@@ -2804,15 +2889,15 @@ enum parport_pc_pci_cards {
2804}; 2889};
2805 2890
2806 2891
2807/* each element directly indexed from enum list, above 2892/* each element directly indexed from enum list, above
2808 * (but offset by last_sio) */ 2893 * (but offset by last_sio) */
2809static struct parport_pc_pci { 2894static struct parport_pc_pci {
2810 int numports; 2895 int numports;
2811 struct { /* BAR (base address registers) numbers in the config 2896 struct { /* BAR (base address registers) numbers in the config
2812 space header */ 2897 space header */
2813 int lo; 2898 int lo;
2814 int hi; /* -1 if not there, >6 for offset-method (max 2899 int hi;
2815 BAR is 6) */ 2900 /* -1 if not there, >6 for offset-method (max BAR is 6) */
2816 } addr[4]; 2901 } addr[4];
2817 2902
2818 /* If set, this is called immediately after pci_enable_device. 2903 /* If set, this is called immediately after pci_enable_device.
@@ -2857,7 +2942,7 @@ static struct parport_pc_pci {
2857 /* timedia_4018 */ { 2, { { 0, 1 }, { 2, 3 }, } }, 2942 /* timedia_4018 */ { 2, { { 0, 1 }, { 2, 3 }, } },
2858 /* timedia_9018a */ { 2, { { 0, 1 }, { 2, 3 }, } }, 2943 /* timedia_9018a */ { 2, { { 0, 1 }, { 2, 3 }, } },
2859 /* SYBA uses fixed offsets in 2944 /* SYBA uses fixed offsets in
2860 a 1K io window */ 2945 a 1K io window */
2861 /* syba_2p_epp AP138B */ { 2, { { 0, 0x078 }, { 0, 0x178 }, } }, 2946 /* syba_2p_epp AP138B */ { 2, { { 0, 0x078 }, { 0, 0x178 }, } },
2862 /* syba_1p_ecp W83787 */ { 1, { { 0, 0x078 }, } }, 2947 /* syba_1p_ecp W83787 */ { 1, { { 0, 0x078 }, } },
2863 /* titan_010l */ { 1, { { 3, -1 }, } }, 2948 /* titan_010l */ { 1, { { 3, -1 }, } },
@@ -2873,11 +2958,14 @@ static struct parport_pc_pci {
2873 /* oxsemi_pcie_pport */ { 1, { { 0, 1 }, } }, 2958 /* oxsemi_pcie_pport */ { 1, { { 0, 1 }, } },
2874 /* aks_0100 */ { 1, { { 0, -1 }, } }, 2959 /* aks_0100 */ { 1, { { 0, -1 }, } },
2875 /* mobility_pp */ { 1, { { 0, 1 }, } }, 2960 /* mobility_pp */ { 1, { { 0, 1 }, } },
2876 /* netmos_9705 */ { 1, { { 0, -1 }, } }, /* untested */ 2961
2877 /* netmos_9715 */ { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */ 2962 /* The netmos entries below are untested */
2878 /* netmos_9755 */ { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */ 2963 /* netmos_9705 */ { 1, { { 0, -1 }, } },
2879 /* netmos_9805 */ { 1, { { 0, -1 }, } }, /* untested */ 2964 /* netmos_9715 */ { 2, { { 0, 1 }, { 2, 3 },} },
2880 /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */ 2965 /* netmos_9755 */ { 2, { { 0, 1 }, { 2, 3 },} },
2966 /* netmos_9805 */ { 1, { { 0, -1 }, } },
2967 /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } },
2968
2881 /* quatech_sppxp100 */ { 1, { { 0, 1 }, } }, 2969 /* quatech_sppxp100 */ { 1, { { 0, 1 }, } },
2882}; 2970};
2883 2971
@@ -2906,7 +2994,7 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
2906 { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_BOCA_IOPPAR, 2994 { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_BOCA_IOPPAR,
2907 PCI_ANY_ID, PCI_ANY_ID, 0, 0, boca_ioppar }, 2995 PCI_ANY_ID, PCI_ANY_ID, 0, 0, boca_ioppar },
2908 { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, 2996 { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
2909 PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0,0, plx_9050 }, 2997 PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0, 0, plx_9050 },
2910 /* PCI_VENDOR_ID_TIMEDIA/SUNIX has many differing cards ...*/ 2998 /* PCI_VENDOR_ID_TIMEDIA/SUNIX has many differing cards ...*/
2911 { 0x1409, 0x7168, 0x1409, 0x4078, 0, 0, timedia_4078a }, 2999 { 0x1409, 0x7168, 0x1409, 0x4078, 0, 0, timedia_4078a },
2912 { 0x1409, 0x7168, 0x1409, 0x4079, 0, 0, timedia_4079h }, 3000 { 0x1409, 0x7168, 0x1409, 0x4079, 0, 0, timedia_4079h },
@@ -2940,7 +3028,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
2940 { 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 }, 3028 { 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 },
2941 { 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 }, 3029 { 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 },
2942 /* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/ 3030 /* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/
2943 { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */ 3031 /* AFAVLAB_TK9902 */
3032 { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p},
2944 { 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p}, 3033 { 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p},
2945 { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP, 3034 { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP,
2946 PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 }, 3035 PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 },
@@ -2983,14 +3072,14 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
2983 PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 }, 3072 PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
2984 { 0, } /* terminate list */ 3073 { 0, } /* terminate list */
2985}; 3074};
2986MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl); 3075MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl);
2987 3076
2988struct pci_parport_data { 3077struct pci_parport_data {
2989 int num; 3078 int num;
2990 struct parport *ports[2]; 3079 struct parport *ports[2];
2991}; 3080};
2992 3081
2993static int parport_pc_pci_probe (struct pci_dev *dev, 3082static int parport_pc_pci_probe(struct pci_dev *dev,
2994 const struct pci_device_id *id) 3083 const struct pci_device_id *id)
2995{ 3084{
2996 int err, count, n, i = id->driver_data; 3085 int err, count, n, i = id->driver_data;
@@ -3003,7 +3092,8 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
3003 /* This is a PCI card */ 3092 /* This is a PCI card */
3004 i -= last_sio; 3093 i -= last_sio;
3005 count = 0; 3094 count = 0;
3006 if ((err = pci_enable_device (dev)) != 0) 3095 err = pci_enable_device(dev);
3096 if (err)
3007 return err; 3097 return err;
3008 3098
3009 data = kmalloc(sizeof(struct pci_parport_data), GFP_KERNEL); 3099 data = kmalloc(sizeof(struct pci_parport_data), GFP_KERNEL);
@@ -3011,7 +3101,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
3011 return -ENOMEM; 3101 return -ENOMEM;
3012 3102
3013 if (cards[i].preinit_hook && 3103 if (cards[i].preinit_hook &&
3014 cards[i].preinit_hook (dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) { 3104 cards[i].preinit_hook(dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
3015 kfree(data); 3105 kfree(data);
3016 return -ENODEV; 3106 return -ENODEV;
3017 } 3107 }
@@ -3021,25 +3111,25 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
3021 int hi = cards[i].addr[n].hi; 3111 int hi = cards[i].addr[n].hi;
3022 int irq; 3112 int irq;
3023 unsigned long io_lo, io_hi; 3113 unsigned long io_lo, io_hi;
3024 io_lo = pci_resource_start (dev, lo); 3114 io_lo = pci_resource_start(dev, lo);
3025 io_hi = 0; 3115 io_hi = 0;
3026 if ((hi >= 0) && (hi <= 6)) 3116 if ((hi >= 0) && (hi <= 6))
3027 io_hi = pci_resource_start (dev, hi); 3117 io_hi = pci_resource_start(dev, hi);
3028 else if (hi > 6) 3118 else if (hi > 6)
3029 io_lo += hi; /* Reinterpret the meaning of 3119 io_lo += hi; /* Reinterpret the meaning of
3030 "hi" as an offset (see SYBA 3120 "hi" as an offset (see SYBA
3031 def.) */ 3121 def.) */
3032 /* TODO: test if sharing interrupts works */ 3122 /* TODO: test if sharing interrupts works */
3033 irq = dev->irq; 3123 irq = dev->irq;
3034 if (irq == IRQ_NONE) { 3124 if (irq == IRQ_NONE) {
3035 printk (KERN_DEBUG 3125 printk(KERN_DEBUG
3036 "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n", 3126 "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n",
3037 parport_pc_pci_tbl[i + last_sio].vendor, 3127 parport_pc_pci_tbl[i + last_sio].vendor,
3038 parport_pc_pci_tbl[i + last_sio].device, 3128 parport_pc_pci_tbl[i + last_sio].device,
3039 io_lo, io_hi); 3129 io_lo, io_hi);
3040 irq = PARPORT_IRQ_NONE; 3130 irq = PARPORT_IRQ_NONE;
3041 } else { 3131 } else {
3042 printk (KERN_DEBUG 3132 printk(KERN_DEBUG
3043 "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n", 3133 "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n",
3044 parport_pc_pci_tbl[i + last_sio].vendor, 3134 parport_pc_pci_tbl[i + last_sio].vendor,
3045 parport_pc_pci_tbl[i + last_sio].device, 3135 parport_pc_pci_tbl[i + last_sio].device,
@@ -3056,7 +3146,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
3056 data->num = count; 3146 data->num = count;
3057 3147
3058 if (cards[i].postinit_hook) 3148 if (cards[i].postinit_hook)
3059 cards[i].postinit_hook (dev, count == 0); 3149 cards[i].postinit_hook(dev, count == 0);
3060 3150
3061 if (count) { 3151 if (count) {
3062 pci_set_drvdata(dev, data); 3152 pci_set_drvdata(dev, data);
@@ -3090,7 +3180,7 @@ static struct pci_driver parport_pc_pci_driver = {
3090 .remove = __devexit_p(parport_pc_pci_remove), 3180 .remove = __devexit_p(parport_pc_pci_remove),
3091}; 3181};
3092 3182
3093static int __init parport_pc_init_superio (int autoirq, int autodma) 3183static int __init parport_pc_init_superio(int autoirq, int autodma)
3094{ 3184{
3095 const struct pci_device_id *id; 3185 const struct pci_device_id *id;
3096 struct pci_dev *pdev = NULL; 3186 struct pci_dev *pdev = NULL;
@@ -3101,8 +3191,9 @@ static int __init parport_pc_init_superio (int autoirq, int autodma)
3101 if (id == NULL || id->driver_data >= last_sio) 3191 if (id == NULL || id->driver_data >= last_sio)
3102 continue; 3192 continue;
3103 3193
3104 if (parport_pc_superio_info[id->driver_data].probe 3194 if (parport_pc_superio_info[id->driver_data].probe(
3105 (pdev, autoirq, autodma,parport_pc_superio_info[id->driver_data].via)) { 3195 pdev, autoirq, autodma,
3196 parport_pc_superio_info[id->driver_data].via)) {
3106 ret++; 3197 ret++;
3107 } 3198 }
3108 } 3199 }
@@ -3111,7 +3202,10 @@ static int __init parport_pc_init_superio (int autoirq, int autodma)
3111} 3202}
3112#else 3203#else
3113static struct pci_driver parport_pc_pci_driver; 3204static struct pci_driver parport_pc_pci_driver;
3114static int __init parport_pc_init_superio(int autoirq, int autodma) {return 0;} 3205static int __init parport_pc_init_superio(int autoirq, int autodma)
3206{
3207 return 0;
3208}
3115#endif /* CONFIG_PCI */ 3209#endif /* CONFIG_PCI */
3116 3210
3117#ifdef CONFIG_PNP 3211#ifdef CONFIG_PNP
@@ -3124,44 +3218,45 @@ static const struct pnp_device_id parport_pc_pnp_tbl[] = {
3124 { } 3218 { }
3125}; 3219};
3126 3220
3127MODULE_DEVICE_TABLE(pnp,parport_pc_pnp_tbl); 3221MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl);
3128 3222
3129static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id) 3223static int parport_pc_pnp_probe(struct pnp_dev *dev,
3224 const struct pnp_device_id *id)
3130{ 3225{
3131 struct parport *pdata; 3226 struct parport *pdata;
3132 unsigned long io_lo, io_hi; 3227 unsigned long io_lo, io_hi;
3133 int dma, irq; 3228 int dma, irq;
3134 3229
3135 if (pnp_port_valid(dev,0) && 3230 if (pnp_port_valid(dev, 0) &&
3136 !(pnp_port_flags(dev,0) & IORESOURCE_DISABLED)) { 3231 !(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) {
3137 io_lo = pnp_port_start(dev,0); 3232 io_lo = pnp_port_start(dev, 0);
3138 } else 3233 } else
3139 return -EINVAL; 3234 return -EINVAL;
3140 3235
3141 if (pnp_port_valid(dev,1) && 3236 if (pnp_port_valid(dev, 1) &&
3142 !(pnp_port_flags(dev,1) & IORESOURCE_DISABLED)) { 3237 !(pnp_port_flags(dev, 1) & IORESOURCE_DISABLED)) {
3143 io_hi = pnp_port_start(dev,1); 3238 io_hi = pnp_port_start(dev, 1);
3144 } else 3239 } else
3145 io_hi = 0; 3240 io_hi = 0;
3146 3241
3147 if (pnp_irq_valid(dev,0) && 3242 if (pnp_irq_valid(dev, 0) &&
3148 !(pnp_irq_flags(dev,0) & IORESOURCE_DISABLED)) { 3243 !(pnp_irq_flags(dev, 0) & IORESOURCE_DISABLED)) {
3149 irq = pnp_irq(dev,0); 3244 irq = pnp_irq(dev, 0);
3150 } else 3245 } else
3151 irq = PARPORT_IRQ_NONE; 3246 irq = PARPORT_IRQ_NONE;
3152 3247
3153 if (pnp_dma_valid(dev,0) && 3248 if (pnp_dma_valid(dev, 0) &&
3154 !(pnp_dma_flags(dev,0) & IORESOURCE_DISABLED)) { 3249 !(pnp_dma_flags(dev, 0) & IORESOURCE_DISABLED)) {
3155 dma = pnp_dma(dev,0); 3250 dma = pnp_dma(dev, 0);
3156 } else 3251 } else
3157 dma = PARPORT_DMA_NONE; 3252 dma = PARPORT_DMA_NONE;
3158 3253
3159 dev_info(&dev->dev, "reported by %s\n", dev->protocol->name); 3254 dev_info(&dev->dev, "reported by %s\n", dev->protocol->name);
3160 if (!(pdata = parport_pc_probe_port(io_lo, io_hi, 3255 pdata = parport_pc_probe_port(io_lo, io_hi, irq, dma, &dev->dev, 0);
3161 irq, dma, &dev->dev, 0))) 3256 if (pdata == NULL)
3162 return -ENODEV; 3257 return -ENODEV;
3163 3258
3164 pnp_set_drvdata(dev,pdata); 3259 pnp_set_drvdata(dev, pdata);
3165 return 0; 3260 return 0;
3166} 3261}
3167 3262
@@ -3203,7 +3298,7 @@ static struct platform_driver parport_pc_platform_driver = {
3203 3298
3204/* This is called by parport_pc_find_nonpci_ports (in asm/parport.h) */ 3299/* This is called by parport_pc_find_nonpci_ports (in asm/parport.h) */
3205static int __devinit __attribute__((unused)) 3300static int __devinit __attribute__((unused))
3206parport_pc_find_isa_ports (int autoirq, int autodma) 3301parport_pc_find_isa_ports(int autoirq, int autodma)
3207{ 3302{
3208 int count = 0; 3303 int count = 0;
3209 3304
@@ -3227,7 +3322,7 @@ parport_pc_find_isa_ports (int autoirq, int autodma)
3227 * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY 3322 * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY
3228 * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO 3323 * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO
3229 */ 3324 */
3230static void __init parport_pc_find_ports (int autoirq, int autodma) 3325static void __init parport_pc_find_ports(int autoirq, int autodma)
3231{ 3326{
3232 int count = 0, err; 3327 int count = 0, err;
3233 3328
@@ -3261,11 +3356,18 @@ static void __init parport_pc_find_ports (int autoirq, int autodma)
3261 * syntax and keep in mind that code below is a cleaned up version. 3356 * syntax and keep in mind that code below is a cleaned up version.
3262 */ 3357 */
3263 3358
3264static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { [0 ... PARPORT_PC_MAX_PORTS] = 0 }; 3359static int __initdata io[PARPORT_PC_MAX_PORTS+1] = {
3265static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = 3360 [0 ... PARPORT_PC_MAX_PORTS] = 0
3266 { [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO }; 3361};
3267static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE }; 3362static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = {
3268static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY }; 3363 [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO
3364};
3365static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = {
3366 [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE
3367};
3368static int __initdata irqval[PARPORT_PC_MAX_PORTS] = {
3369 [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY
3370};
3269 3371
3270static int __init parport_parse_param(const char *s, int *val, 3372static int __init parport_parse_param(const char *s, int *val,
3271 int automatic, int none, int nofifo) 3373 int automatic, int none, int nofifo)
@@ -3306,18 +3408,19 @@ static int __init parport_parse_dma(const char *dmastr, int *val)
3306#ifdef CONFIG_PCI 3408#ifdef CONFIG_PCI
3307static int __init parport_init_mode_setup(char *str) 3409static int __init parport_init_mode_setup(char *str)
3308{ 3410{
3309 printk(KERN_DEBUG "parport_pc.c: Specified parameter parport_init_mode=%s\n", str); 3411 printk(KERN_DEBUG
3310 3412 "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
3311 if (!strcmp (str, "spp")) 3413
3312 parport_init_mode=1; 3414 if (!strcmp(str, "spp"))
3313 if (!strcmp (str, "ps2")) 3415 parport_init_mode = 1;
3314 parport_init_mode=2; 3416 if (!strcmp(str, "ps2"))
3315 if (!strcmp (str, "epp")) 3417 parport_init_mode = 2;
3316 parport_init_mode=3; 3418 if (!strcmp(str, "epp"))
3317 if (!strcmp (str, "ecp")) 3419 parport_init_mode = 3;
3318 parport_init_mode=4; 3420 if (!strcmp(str, "ecp"))
3319 if (!strcmp (str, "ecpepp")) 3421 parport_init_mode = 4;
3320 parport_init_mode=5; 3422 if (!strcmp(str, "ecpepp"))
3423 parport_init_mode = 5;
3321 return 1; 3424 return 1;
3322} 3425}
3323#endif 3426#endif
@@ -3341,7 +3444,8 @@ module_param(verbose_probing, int, 0644);
3341#endif 3444#endif
3342#ifdef CONFIG_PCI 3445#ifdef CONFIG_PCI
3343static char *init_mode; 3446static char *init_mode;
3344MODULE_PARM_DESC(init_mode, "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)"); 3447MODULE_PARM_DESC(init_mode,
3448 "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
3345module_param(init_mode, charp, 0); 3449module_param(init_mode, charp, 0);
3346#endif 3450#endif
3347 3451
@@ -3372,7 +3476,7 @@ static int __init parse_parport_params(void)
3372 irqval[0] = val; 3476 irqval[0] = val;
3373 break; 3477 break;
3374 default: 3478 default:
3375 printk (KERN_WARNING 3479 printk(KERN_WARNING
3376 "parport_pc: irq specified " 3480 "parport_pc: irq specified "
3377 "without base address. Use 'io=' " 3481 "without base address. Use 'io=' "
3378 "to specify one\n"); 3482 "to specify one\n");
@@ -3385,7 +3489,7 @@ static int __init parse_parport_params(void)
3385 dmaval[0] = val; 3489 dmaval[0] = val;
3386 break; 3490 break;
3387 default: 3491 default:
3388 printk (KERN_WARNING 3492 printk(KERN_WARNING
3389 "parport_pc: dma specified " 3493 "parport_pc: dma specified "
3390 "without base address. Use 'io=' " 3494 "without base address. Use 'io=' "
3391 "to specify one\n"); 3495 "to specify one\n");
@@ -3396,7 +3500,7 @@ static int __init parse_parport_params(void)
3396 3500
3397#else 3501#else
3398 3502
3399static int parport_setup_ptr __initdata = 0; 3503static int parport_setup_ptr __initdata;
3400 3504
3401/* 3505/*
3402 * Acceptable parameters: 3506 * Acceptable parameters:
@@ -3407,7 +3511,7 @@ static int parport_setup_ptr __initdata = 0;
3407 * 3511 *
3408 * IRQ/DMA may be numeric or 'auto' or 'none' 3512 * IRQ/DMA may be numeric or 'auto' or 'none'
3409 */ 3513 */
3410static int __init parport_setup (char *str) 3514static int __init parport_setup(char *str)
3411{ 3515{
3412 char *endptr; 3516 char *endptr;
3413 char *sep; 3517 char *sep;
@@ -3419,15 +3523,15 @@ static int __init parport_setup (char *str)
3419 return 1; 3523 return 1;
3420 } 3524 }
3421 3525
3422 if (!strncmp (str, "auto", 4)) { 3526 if (!strncmp(str, "auto", 4)) {
3423 irqval[0] = PARPORT_IRQ_AUTO; 3527 irqval[0] = PARPORT_IRQ_AUTO;
3424 dmaval[0] = PARPORT_DMA_AUTO; 3528 dmaval[0] = PARPORT_DMA_AUTO;
3425 return 1; 3529 return 1;
3426 } 3530 }
3427 3531
3428 val = simple_strtoul (str, &endptr, 0); 3532 val = simple_strtoul(str, &endptr, 0);
3429 if (endptr == str) { 3533 if (endptr == str) {
3430 printk (KERN_WARNING "parport=%s not understood\n", str); 3534 printk(KERN_WARNING "parport=%s not understood\n", str);
3431 return 1; 3535 return 1;
3432 } 3536 }
3433 3537
@@ -3461,7 +3565,7 @@ static int __init parse_parport_params(void)
3461 return io[0] == PARPORT_DISABLE; 3565 return io[0] == PARPORT_DISABLE;
3462} 3566}
3463 3567
3464__setup ("parport=", parport_setup); 3568__setup("parport=", parport_setup);
3465 3569
3466/* 3570/*
3467 * Acceptable parameters: 3571 * Acceptable parameters:
@@ -3469,7 +3573,7 @@ __setup ("parport=", parport_setup);
3469 * parport_init_mode=[spp|ps2|epp|ecp|ecpepp] 3573 * parport_init_mode=[spp|ps2|epp|ecp|ecpepp]
3470 */ 3574 */
3471#ifdef CONFIG_PCI 3575#ifdef CONFIG_PCI
3472__setup("parport_init_mode=",parport_init_mode_setup); 3576__setup("parport_init_mode=", parport_init_mode_setup);
3473#endif 3577#endif
3474#endif 3578#endif
3475 3579
@@ -3493,13 +3597,13 @@ static int __init parport_pc_init(void)
3493 for (i = 0; i < PARPORT_PC_MAX_PORTS; i++) { 3597 for (i = 0; i < PARPORT_PC_MAX_PORTS; i++) {
3494 if (!io[i]) 3598 if (!io[i])
3495 break; 3599 break;
3496 if ((io_hi[i]) == PARPORT_IOHI_AUTO) 3600 if (io_hi[i] == PARPORT_IOHI_AUTO)
3497 io_hi[i] = 0x400 + io[i]; 3601 io_hi[i] = 0x400 + io[i];
3498 parport_pc_probe_port(io[i], io_hi[i], 3602 parport_pc_probe_port(io[i], io_hi[i],
3499 irqval[i], dmaval[i], NULL, 0); 3603 irqval[i], dmaval[i], NULL, 0);
3500 } 3604 }
3501 } else 3605 } else
3502 parport_pc_find_ports (irqval[0], dmaval[0]); 3606 parport_pc_find_ports(irqval[0], dmaval[0]);
3503 3607
3504 return 0; 3608 return 0;
3505} 3609}
@@ -3507,9 +3611,9 @@ static int __init parport_pc_init(void)
3507static void __exit parport_pc_exit(void) 3611static void __exit parport_pc_exit(void)
3508{ 3612{
3509 if (pci_registered_parport) 3613 if (pci_registered_parport)
3510 pci_unregister_driver (&parport_pc_pci_driver); 3614 pci_unregister_driver(&parport_pc_pci_driver);
3511 if (pnp_registered_parport) 3615 if (pnp_registered_parport)
3512 pnp_unregister_driver (&parport_pc_pnp_driver); 3616 pnp_unregister_driver(&parport_pc_pnp_driver);
3513 platform_driver_unregister(&parport_pc_platform_driver); 3617 platform_driver_unregister(&parport_pc_platform_driver);
3514 3618
3515 while (!list_empty(&ports_list)) { 3619 while (!list_empty(&ports_list)) {
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index a0127e93ade0..fb867a9f55e9 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -287,6 +287,13 @@ static const struct serial8250_config uart_config[] = {
287 .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, 287 .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
288 .flags = UART_CAP_FIFO, 288 .flags = UART_CAP_FIFO,
289 }, 289 },
290 [PORT_AR7] = {
291 .name = "AR7",
292 .fifo_size = 16,
293 .tx_loadsz = 16,
294 .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
295 .flags = UART_CAP_FIFO | UART_CAP_AFE,
296 },
290}; 297};
291 298
292#if defined (CONFIG_SERIAL_8250_AU1X00) 299#if defined (CONFIG_SERIAL_8250_AU1X00)
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 938bc1b6c3fa..e371a9c15341 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2776,6 +2776,9 @@ static struct pci_device_id serial_pci_tbl[] = {
2776 { PCI_VENDOR_ID_OXSEMI, 0x950a, 2776 { PCI_VENDOR_ID_OXSEMI, 0x950a,
2777 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2777 PCI_ANY_ID, PCI_ANY_ID, 0, 0,
2778 pbn_b0_2_1130000 }, 2778 pbn_b0_2_1130000 },
2779 { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950,
2780 PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0,
2781 pbn_b0_1_921600 },
2779 { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954, 2782 { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
2780 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2783 PCI_ANY_ID, PCI_ANY_ID, 0, 0,
2781 pbn_b0_4_115200 }, 2784 pbn_b0_4_115200 },
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 343e3a35b6a3..641e800ed693 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -833,6 +833,7 @@ config SERIAL_IMX
833 bool "IMX serial port support" 833 bool "IMX serial port support"
834 depends on ARM && (ARCH_IMX || ARCH_MXC) 834 depends on ARM && (ARCH_IMX || ARCH_MXC)
835 select SERIAL_CORE 835 select SERIAL_CORE
836 select RATIONAL
836 help 837 help
837 If you have a machine based on a Motorola IMX CPU you 838 If you have a machine based on a Motorola IMX CPU you
838 can enable its onboard serial port by enabling this option. 839 can enable its onboard serial port by enabling this option.
@@ -1433,4 +1434,11 @@ config SPORT_BAUD_RATE
1433 default 19200 if (SERIAL_SPORT_BAUD_RATE_19200) 1434 default 19200 if (SERIAL_SPORT_BAUD_RATE_19200)
1434 default 9600 if (SERIAL_SPORT_BAUD_RATE_9600) 1435 default 9600 if (SERIAL_SPORT_BAUD_RATE_9600)
1435 1436
1437config SERIAL_TIMBERDALE
1438 tristate "Support for timberdale UART"
1439 depends on MFD_TIMBERDALE
1440 select SERIAL_CORE
1441 ---help---
1442 Add support for UART controller on timberdale.
1443
1436endmenu 1444endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index d438eb2a73de..45a8658f54d5 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -77,3 +77,4 @@ obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o
77obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o 77obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
78obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o 78obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
79obj-$(CONFIG_SERIAL_QE) += ucc_uart.o 79obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
80obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o
diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index d86123e03391..e2f6b1bfac98 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -330,6 +330,11 @@ static void bfin_serial_tx_chars(struct bfin_serial_port *uart)
330 /* Clear TFI bit */ 330 /* Clear TFI bit */
331 UART_PUT_LSR(uart, TFI); 331 UART_PUT_LSR(uart, TFI);
332#endif 332#endif
333 /* Anomaly notes:
334 * 05000215 - we always clear ETBEI within last UART TX
335 * interrupt to end a string. It is always set
336 * when start a new tx.
337 */
333 UART_CLEAR_IER(uart, ETBEI); 338 UART_CLEAR_IER(uart, ETBEI);
334 return; 339 return;
335 } 340 }
@@ -415,6 +420,7 @@ static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart)
415 set_dma_start_addr(uart->tx_dma_channel, (unsigned long)(xmit->buf+xmit->tail)); 420 set_dma_start_addr(uart->tx_dma_channel, (unsigned long)(xmit->buf+xmit->tail));
416 set_dma_x_count(uart->tx_dma_channel, uart->tx_count); 421 set_dma_x_count(uart->tx_dma_channel, uart->tx_count);
417 set_dma_x_modify(uart->tx_dma_channel, 1); 422 set_dma_x_modify(uart->tx_dma_channel, 1);
423 SSYNC();
418 enable_dma(uart->tx_dma_channel); 424 enable_dma(uart->tx_dma_channel);
419 425
420 UART_SET_IER(uart, ETBEI); 426 UART_SET_IER(uart, ETBEI);
@@ -473,27 +479,41 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
473void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart) 479void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
474{ 480{
475 int x_pos, pos; 481 int x_pos, pos;
476 unsigned long flags;
477
478 spin_lock_irqsave(&uart->port.lock, flags);
479 482
483 dma_disable_irq(uart->rx_dma_channel);
484 spin_lock_bh(&uart->port.lock);
485
486 /* 2D DMA RX buffer ring is used. Because curr_y_count and
487 * curr_x_count can't be read as an atomic operation,
488 * curr_y_count should be read before curr_x_count. When
489 * curr_x_count is read, curr_y_count may already indicate
490 * next buffer line. But, the position calculated here is
491 * still indicate the old line. The wrong position data may
492 * be smaller than current buffer tail, which cause garbages
493 * are received if it is not prohibit.
494 */
480 uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel); 495 uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
481 x_pos = get_dma_curr_xcount(uart->rx_dma_channel); 496 x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
482 uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows; 497 uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
483 if (uart->rx_dma_nrows == DMA_RX_YCOUNT) 498 if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
484 uart->rx_dma_nrows = 0; 499 uart->rx_dma_nrows = 0;
485 x_pos = DMA_RX_XCOUNT - x_pos; 500 x_pos = DMA_RX_XCOUNT - x_pos;
486 if (x_pos == DMA_RX_XCOUNT) 501 if (x_pos == DMA_RX_XCOUNT)
487 x_pos = 0; 502 x_pos = 0;
488 503
489 pos = uart->rx_dma_nrows * DMA_RX_XCOUNT + x_pos; 504 pos = uart->rx_dma_nrows * DMA_RX_XCOUNT + x_pos;
490 if (pos != uart->rx_dma_buf.tail) { 505 /* Ignore receiving data if new position is in the same line of
506 * current buffer tail and small.
507 */
508 if (pos > uart->rx_dma_buf.tail ||
509 uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
491 uart->rx_dma_buf.head = pos; 510 uart->rx_dma_buf.head = pos;
492 bfin_serial_dma_rx_chars(uart); 511 bfin_serial_dma_rx_chars(uart);
493 uart->rx_dma_buf.tail = uart->rx_dma_buf.head; 512 uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
494 } 513 }
495 514
496 spin_unlock_irqrestore(&uart->port.lock, flags); 515 spin_unlock_bh(&uart->port.lock);
516 dma_enable_irq(uart->rx_dma_channel);
497 517
498 mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES); 518 mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES);
499} 519}
@@ -514,6 +534,11 @@ static irqreturn_t bfin_serial_dma_tx_int(int irq, void *dev_id)
514 if (!(get_dma_curr_irqstat(uart->tx_dma_channel)&DMA_RUN)) { 534 if (!(get_dma_curr_irqstat(uart->tx_dma_channel)&DMA_RUN)) {
515 disable_dma(uart->tx_dma_channel); 535 disable_dma(uart->tx_dma_channel);
516 clear_dma_irqstat(uart->tx_dma_channel); 536 clear_dma_irqstat(uart->tx_dma_channel);
537 /* Anomaly notes:
538 * 05000215 - we always clear ETBEI within last UART TX
539 * interrupt to end a string. It is always set
540 * when start a new tx.
541 */
517 UART_CLEAR_IER(uart, ETBEI); 542 UART_CLEAR_IER(uart, ETBEI);
518 xmit->tail = (xmit->tail + uart->tx_count) & (UART_XMIT_SIZE - 1); 543 xmit->tail = (xmit->tail + uart->tx_count) & (UART_XMIT_SIZE - 1);
519 uart->port.icount.tx += uart->tx_count; 544 uart->port.icount.tx += uart->tx_count;
@@ -532,11 +557,26 @@ static irqreturn_t bfin_serial_dma_rx_int(int irq, void *dev_id)
532{ 557{
533 struct bfin_serial_port *uart = dev_id; 558 struct bfin_serial_port *uart = dev_id;
534 unsigned short irqstat; 559 unsigned short irqstat;
560 int x_pos, pos;
535 561
536 spin_lock(&uart->port.lock); 562 spin_lock(&uart->port.lock);
537 irqstat = get_dma_curr_irqstat(uart->rx_dma_channel); 563 irqstat = get_dma_curr_irqstat(uart->rx_dma_channel);
538 clear_dma_irqstat(uart->rx_dma_channel); 564 clear_dma_irqstat(uart->rx_dma_channel);
539 bfin_serial_dma_rx_chars(uart); 565
566 uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
567 x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
568 uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
569 if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
570 uart->rx_dma_nrows = 0;
571
572 pos = uart->rx_dma_nrows * DMA_RX_XCOUNT;
573 if (pos > uart->rx_dma_buf.tail ||
574 uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
575 uart->rx_dma_buf.head = pos;
576 bfin_serial_dma_rx_chars(uart);
577 uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
578 }
579
540 spin_unlock(&uart->port.lock); 580 spin_unlock(&uart->port.lock);
541 581
542 return IRQ_HANDLED; 582 return IRQ_HANDLED;
@@ -789,8 +829,16 @@ bfin_serial_set_termios(struct uart_port *port, struct ktermios *termios,
789 __func__); 829 __func__);
790 } 830 }
791 831
792 if (termios->c_cflag & CSTOPB) 832 /* Anomaly notes:
793 lcr |= STB; 833 * 05000231 - STOP bit is always set to 1 whatever the user is set.
834 */
835 if (termios->c_cflag & CSTOPB) {
836 if (ANOMALY_05000231)
837 printk(KERN_WARNING "STOP bits other than 1 is not "
838 "supported in case of anomaly 05000231.\n");
839 else
840 lcr |= STB;
841 }
794 if (termios->c_cflag & PARENB) 842 if (termios->c_cflag & PARENB)
795 lcr |= PEN; 843 lcr |= PEN;
796 if (!(termios->c_cflag & PARODD)) 844 if (!(termios->c_cflag & PARODD))
@@ -940,6 +988,10 @@ static void bfin_serial_reset_irda(struct uart_port *port)
940} 988}
941 989
942#ifdef CONFIG_CONSOLE_POLL 990#ifdef CONFIG_CONSOLE_POLL
991/* Anomaly notes:
992 * 05000099 - Because we only use THRE in poll_put and DR in poll_get,
993 * losing other bits of UART_LSR is not a problem here.
994 */
943static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr) 995static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr)
944{ 996{
945 struct bfin_serial_port *uart = (struct bfin_serial_port *)port; 997 struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
@@ -1245,12 +1297,17 @@ static __init void early_serial_write(struct console *con, const char *s,
1245 } 1297 }
1246} 1298}
1247 1299
1300/*
1301 * This should have a .setup or .early_setup in it, but then things get called
1302 * without the command line options, and the baud rate gets messed up - so
1303 * don't let the common infrastructure play with things. (see calls to setup
1304 * & earlysetup in ./kernel/printk.c:register_console()
1305 */
1248static struct __initdata console bfin_early_serial_console = { 1306static struct __initdata console bfin_early_serial_console = {
1249 .name = "early_BFuart", 1307 .name = "early_BFuart",
1250 .write = early_serial_write, 1308 .write = early_serial_write,
1251 .device = uart_console_device, 1309 .device = uart_console_device,
1252 .flags = CON_PRINTBUFFER, 1310 .flags = CON_PRINTBUFFER,
1253 .setup = bfin_serial_console_setup,
1254 .index = -1, 1311 .index = -1,
1255 .data = &bfin_serial_reg, 1312 .data = &bfin_serial_reg,
1256}; 1313};
diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index 529c0ff7952c..34b4ae0fe760 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -101,15 +101,16 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
101{ 101{
102 pr_debug("%s value:%x\n", __func__, value); 102 pr_debug("%s value:%x\n", __func__, value);
103 /* Place a Start and Stop bit */ 103 /* Place a Start and Stop bit */
104 __asm__ volatile ( 104 __asm__ __volatile__ (
105 "R2 = b#01111111100;\n\t" 105 "R2 = b#01111111100;"
106 "R3 = b#10000000001;\n\t" 106 "R3 = b#10000000001;"
107 "%0 <<= 2;\n\t" 107 "%0 <<= 2;"
108 "%0 = %0 & R2;\n\t" 108 "%0 = %0 & R2;"
109 "%0 = %0 | R3;\n\t" 109 "%0 = %0 | R3;"
110 :"=r"(value) 110 : "=d"(value)
111 :"0"(value) 111 : "d"(value)
112 :"R2", "R3"); 112 : "ASTAT", "R2", "R3"
113 );
113 pr_debug("%s value:%x\n", __func__, value); 114 pr_debug("%s value:%x\n", __func__, value);
114 115
115 SPORT_PUT_TX(up, value); 116 SPORT_PUT_TX(up, value);
@@ -118,27 +119,30 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
118static inline unsigned int rx_one_byte(struct sport_uart_port *up) 119static inline unsigned int rx_one_byte(struct sport_uart_port *up)
119{ 120{
120 unsigned int value, extract; 121 unsigned int value, extract;
122 u32 tmp_mask1, tmp_mask2, tmp_shift, tmp;
121 123
122 value = SPORT_GET_RX32(up); 124 value = SPORT_GET_RX32(up);
123 pr_debug("%s value:%x\n", __func__, value); 125 pr_debug("%s value:%x\n", __func__, value);
124 126
125 /* Extract 8 bits data */ 127 /* Extract 8 bits data */
126 __asm__ volatile ( 128 __asm__ __volatile__ (
127 "R5 = 0;\n\t" 129 "%[extr] = 0;"
128 "P0 = 8;\n\t" 130 "%[mask1] = 0x1801(Z);"
129 "R1 = 0x1801(Z);\n\t" 131 "%[mask2] = 0x0300(Z);"
130 "R3 = 0x0300(Z);\n\t" 132 "%[shift] = 0;"
131 "R4 = 0;\n\t" 133 "LSETUP(.Lloop_s, .Lloop_e) LC0 = %[lc];"
132 "LSETUP(loop_s, loop_e) LC0 = P0;\nloop_s:\t" 134 ".Lloop_s:"
133 "R2 = extract(%1, R1.L)(Z);\n\t" 135 "%[tmp] = extract(%[val], %[mask1].L)(Z);"
134 "R2 <<= R4;\n\t" 136 "%[tmp] <<= %[shift];"
135 "R5 = R5 | R2;\n\t" 137 "%[extr] = %[extr] | %[tmp];"
136 "R1 = R1 - R3;\nloop_e:\t" 138 "%[mask1] = %[mask1] - %[mask2];"
137 "R4 += 1;\n\t" 139 ".Lloop_e:"
138 "%0 = R5;\n\t" 140 "%[shift] += 1;"
139 :"=r"(extract) 141 : [val]"=d"(value), [extr]"=d"(extract), [shift]"=d"(tmp_shift), [tmp]"=d"(tmp),
140 :"r"(value) 142 [mask1]"=d"(tmp_mask1), [mask2]"=d"(tmp_mask2)
141 :"P0", "R1", "R2","R3","R4", "R5"); 143 : "d"(value), [lc]"a"(8)
144 : "ASTAT", "LB0", "LC0", "LT0"
145 );
142 146
143 pr_debug(" extract:%x\n", extract); 147 pr_debug(" extract:%x\n", extract);
144 return extract; 148 return extract;
@@ -149,7 +153,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate)
149 int tclkdiv, tfsdiv, rclkdiv; 153 int tclkdiv, tfsdiv, rclkdiv;
150 154
151 /* Set TCR1 and TCR2 */ 155 /* Set TCR1 and TCR2 */
152 SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK)); 156 SPORT_PUT_TCR1(up, (LATFS | ITFS | TFSR | TLSBIT | ITCLK));
153 SPORT_PUT_TCR2(up, 10); 157 SPORT_PUT_TCR2(up, 10);
154 pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up)); 158 pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
155 159
@@ -419,7 +423,7 @@ static void sport_shutdown(struct uart_port *port)
419} 423}
420 424
421static void sport_set_termios(struct uart_port *port, 425static void sport_set_termios(struct uart_port *port,
422 struct termios *termios, struct termios *old) 426 struct ktermios *termios, struct ktermios *old)
423{ 427{
424 pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag); 428 pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag);
425 uart_update_timeout(port, CS8 ,port->uartclk); 429 uart_update_timeout(port, CS8 ,port->uartclk);
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index a461b3b2c72d..9f2891c2c4a2 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -137,7 +137,12 @@ static LIST_HEAD(icom_adapter_head);
137static spinlock_t icom_lock; 137static spinlock_t icom_lock;
138 138
139#ifdef ICOM_TRACE 139#ifdef ICOM_TRACE
140static inline void trace(struct icom_port *, char *, unsigned long) {}; 140static inline void trace(struct icom_port *icom_port, char *trace_pt,
141 unsigned long trace_data)
142{
143 dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
144 icom_port->port, trace_pt, trace_data);
145}
141#else 146#else
142static inline void trace(struct icom_port *icom_port, char *trace_pt, unsigned long trace_data) {}; 147static inline void trace(struct icom_port *icom_port, char *trace_pt, unsigned long trace_data) {};
143#endif 148#endif
@@ -408,7 +413,7 @@ static void load_code(struct icom_port *icom_port)
408 release_firmware(fw); 413 release_firmware(fw);
409 414
410 /* Set Hardware level */ 415 /* Set Hardware level */
411 if ((icom_port->adapter->version | ADAPTER_V2) == ADAPTER_V2) 416 if (icom_port->adapter->version == ADAPTER_V2)
412 writeb(V2_HARDWARE, &(icom_port->dram->misc_flags)); 417 writeb(V2_HARDWARE, &(icom_port->dram->misc_flags));
413 418
414 /* Start the processor in Adapter */ 419 /* Start the processor in Adapter */
@@ -861,7 +866,7 @@ static irqreturn_t icom_interrupt(int irq, void *dev_id)
861 /* find icom_port for this interrupt */ 866 /* find icom_port for this interrupt */
862 icom_adapter = (struct icom_adapter *) dev_id; 867 icom_adapter = (struct icom_adapter *) dev_id;
863 868
864 if ((icom_adapter->version | ADAPTER_V2) == ADAPTER_V2) { 869 if (icom_adapter->version == ADAPTER_V2) {
865 int_reg = icom_adapter->base_addr + 0x8024; 870 int_reg = icom_adapter->base_addr + 0x8024;
866 871
867 adapter_interrupts = readl(int_reg); 872 adapter_interrupts = readl(int_reg);
@@ -1647,15 +1652,6 @@ static void __exit icom_exit(void)
1647module_init(icom_init); 1652module_init(icom_init);
1648module_exit(icom_exit); 1653module_exit(icom_exit);
1649 1654
1650#ifdef ICOM_TRACE
1651static inline void trace(struct icom_port *icom_port, char *trace_pt,
1652 unsigned long trace_data)
1653{
1654 dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
1655 icom_port->port, trace_pt, trace_data);
1656}
1657#endif
1658
1659MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>"); 1655MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>");
1660MODULE_DESCRIPTION("IBM iSeries Serial IOA driver"); 1656MODULE_DESCRIPTION("IBM iSeries Serial IOA driver");
1661MODULE_SUPPORTED_DEVICE 1657MODULE_SUPPORTED_DEVICE
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 5f0be40dfdab..7b5d1de9cfe3 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -8,6 +8,9 @@
8 * Author: Sascha Hauer <sascha@saschahauer.de> 8 * Author: Sascha Hauer <sascha@saschahauer.de>
9 * Copyright (C) 2004 Pengutronix 9 * Copyright (C) 2004 Pengutronix
10 * 10 *
11 * Copyright (C) 2009 emlix GmbH
12 * Author: Fabian Godehardt (added IrDA support for iMX)
13 *
11 * This program is free software; you can redistribute it and/or modify 14 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by 15 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or 16 * the Free Software Foundation; either version 2 of the License, or
@@ -41,6 +44,8 @@
41#include <linux/serial_core.h> 44#include <linux/serial_core.h>
42#include <linux/serial.h> 45#include <linux/serial.h>
43#include <linux/clk.h> 46#include <linux/clk.h>
47#include <linux/delay.h>
48#include <linux/rational.h>
44 49
45#include <asm/io.h> 50#include <asm/io.h>
46#include <asm/irq.h> 51#include <asm/irq.h>
@@ -148,6 +153,7 @@
148#define UCR4_DREN (1<<0) /* Recv data ready interrupt enable */ 153#define UCR4_DREN (1<<0) /* Recv data ready interrupt enable */
149#define UFCR_RXTL_SHF 0 /* Receiver trigger level shift */ 154#define UFCR_RXTL_SHF 0 /* Receiver trigger level shift */
150#define UFCR_RFDIV (7<<7) /* Reference freq divider mask */ 155#define UFCR_RFDIV (7<<7) /* Reference freq divider mask */
156#define UFCR_RFDIV_REG(x) (((x) < 7 ? 6 - (x) : 6) << 7)
151#define UFCR_TXTL_SHF 10 /* Transmitter trigger level shift */ 157#define UFCR_TXTL_SHF 10 /* Transmitter trigger level shift */
152#define USR1_PARITYERR (1<<15) /* Parity error interrupt flag */ 158#define USR1_PARITYERR (1<<15) /* Parity error interrupt flag */
153#define USR1_RTSS (1<<14) /* RTS pin status */ 159#define USR1_RTSS (1<<14) /* RTS pin status */
@@ -211,10 +217,20 @@ struct imx_port {
211 struct timer_list timer; 217 struct timer_list timer;
212 unsigned int old_status; 218 unsigned int old_status;
213 int txirq,rxirq,rtsirq; 219 int txirq,rxirq,rtsirq;
214 int have_rtscts:1; 220 unsigned int have_rtscts:1;
221 unsigned int use_irda:1;
222 unsigned int irda_inv_rx:1;
223 unsigned int irda_inv_tx:1;
224 unsigned short trcv_delay; /* transceiver delay */
215 struct clk *clk; 225 struct clk *clk;
216}; 226};
217 227
228#ifdef CONFIG_IRDA
229#define USE_IRDA(sport) ((sport)->use_irda)
230#else
231#define USE_IRDA(sport) (0)
232#endif
233
218/* 234/*
219 * Handle any change of modem status signal since we were last called. 235 * Handle any change of modem status signal since we were last called.
220 */ 236 */
@@ -268,6 +284,48 @@ static void imx_stop_tx(struct uart_port *port)
268 struct imx_port *sport = (struct imx_port *)port; 284 struct imx_port *sport = (struct imx_port *)port;
269 unsigned long temp; 285 unsigned long temp;
270 286
287 if (USE_IRDA(sport)) {
288 /* half duplex - wait for end of transmission */
289 int n = 256;
290 while ((--n > 0) &&
291 !(readl(sport->port.membase + USR2) & USR2_TXDC)) {
292 udelay(5);
293 barrier();
294 }
295 /*
296 * irda transceiver - wait a bit more to avoid
297 * cutoff, hardware dependent
298 */
299 udelay(sport->trcv_delay);
300
301 /*
302 * half duplex - reactivate receive mode,
303 * flush receive pipe echo crap
304 */
305 if (readl(sport->port.membase + USR2) & USR2_TXDC) {
306 temp = readl(sport->port.membase + UCR1);
307 temp &= ~(UCR1_TXMPTYEN | UCR1_TRDYEN);
308 writel(temp, sport->port.membase + UCR1);
309
310 temp = readl(sport->port.membase + UCR4);
311 temp &= ~(UCR4_TCEN);
312 writel(temp, sport->port.membase + UCR4);
313
314 while (readl(sport->port.membase + URXD0) &
315 URXD_CHARRDY)
316 barrier();
317
318 temp = readl(sport->port.membase + UCR1);
319 temp |= UCR1_RRDYEN;
320 writel(temp, sport->port.membase + UCR1);
321
322 temp = readl(sport->port.membase + UCR4);
323 temp |= UCR4_DREN;
324 writel(temp, sport->port.membase + UCR4);
325 }
326 return;
327 }
328
271 temp = readl(sport->port.membase + UCR1); 329 temp = readl(sport->port.membase + UCR1);
272 writel(temp & ~UCR1_TXMPTYEN, sport->port.membase + UCR1); 330 writel(temp & ~UCR1_TXMPTYEN, sport->port.membase + UCR1);
273} 331}
@@ -302,13 +360,15 @@ static inline void imx_transmit_buffer(struct imx_port *sport)
302 /* send xmit->buf[xmit->tail] 360 /* send xmit->buf[xmit->tail]
303 * out the port here */ 361 * out the port here */
304 writel(xmit->buf[xmit->tail], sport->port.membase + URTX0); 362 writel(xmit->buf[xmit->tail], sport->port.membase + URTX0);
305 xmit->tail = (xmit->tail + 1) & 363 xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
306 (UART_XMIT_SIZE - 1);
307 sport->port.icount.tx++; 364 sport->port.icount.tx++;
308 if (uart_circ_empty(xmit)) 365 if (uart_circ_empty(xmit))
309 break; 366 break;
310 } 367 }
311 368
369 if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
370 uart_write_wakeup(&sport->port);
371
312 if (uart_circ_empty(xmit)) 372 if (uart_circ_empty(xmit))
313 imx_stop_tx(&sport->port); 373 imx_stop_tx(&sport->port);
314} 374}
@@ -321,9 +381,30 @@ static void imx_start_tx(struct uart_port *port)
321 struct imx_port *sport = (struct imx_port *)port; 381 struct imx_port *sport = (struct imx_port *)port;
322 unsigned long temp; 382 unsigned long temp;
323 383
384 if (USE_IRDA(sport)) {
385 /* half duplex in IrDA mode; have to disable receive mode */
386 temp = readl(sport->port.membase + UCR4);
387 temp &= ~(UCR4_DREN);
388 writel(temp, sport->port.membase + UCR4);
389
390 temp = readl(sport->port.membase + UCR1);
391 temp &= ~(UCR1_RRDYEN);
392 writel(temp, sport->port.membase + UCR1);
393 }
394
324 temp = readl(sport->port.membase + UCR1); 395 temp = readl(sport->port.membase + UCR1);
325 writel(temp | UCR1_TXMPTYEN, sport->port.membase + UCR1); 396 writel(temp | UCR1_TXMPTYEN, sport->port.membase + UCR1);
326 397
398 if (USE_IRDA(sport)) {
399 temp = readl(sport->port.membase + UCR1);
400 temp |= UCR1_TRDYEN;
401 writel(temp, sport->port.membase + UCR1);
402
403 temp = readl(sport->port.membase + UCR4);
404 temp |= UCR4_TCEN;
405 writel(temp, sport->port.membase + UCR4);
406 }
407
327 if (readl(sport->port.membase + UTS) & UTS_TXEMPTY) 408 if (readl(sport->port.membase + UTS) & UTS_TXEMPTY)
328 imx_transmit_buffer(sport); 409 imx_transmit_buffer(sport);
329} 410}
@@ -395,8 +476,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
395 continue; 476 continue;
396 } 477 }
397 478
398 if (uart_handle_sysrq_char 479 if (uart_handle_sysrq_char(&sport->port, (unsigned char)rx))
399 (&sport->port, (unsigned char)rx))
400 continue; 480 continue;
401 481
402 if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR) ) { 482 if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR) ) {
@@ -471,26 +551,26 @@ static unsigned int imx_tx_empty(struct uart_port *port)
471 */ 551 */
472static unsigned int imx_get_mctrl(struct uart_port *port) 552static unsigned int imx_get_mctrl(struct uart_port *port)
473{ 553{
474 struct imx_port *sport = (struct imx_port *)port; 554 struct imx_port *sport = (struct imx_port *)port;
475 unsigned int tmp = TIOCM_DSR | TIOCM_CAR; 555 unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
476 556
477 if (readl(sport->port.membase + USR1) & USR1_RTSS) 557 if (readl(sport->port.membase + USR1) & USR1_RTSS)
478 tmp |= TIOCM_CTS; 558 tmp |= TIOCM_CTS;
479 559
480 if (readl(sport->port.membase + UCR2) & UCR2_CTS) 560 if (readl(sport->port.membase + UCR2) & UCR2_CTS)
481 tmp |= TIOCM_RTS; 561 tmp |= TIOCM_RTS;
482 562
483 return tmp; 563 return tmp;
484} 564}
485 565
486static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl) 566static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl)
487{ 567{
488 struct imx_port *sport = (struct imx_port *)port; 568 struct imx_port *sport = (struct imx_port *)port;
489 unsigned long temp; 569 unsigned long temp;
490 570
491 temp = readl(sport->port.membase + UCR2) & ~UCR2_CTS; 571 temp = readl(sport->port.membase + UCR2) & ~UCR2_CTS;
492 572
493 if (mctrl & TIOCM_RTS) 573 if (mctrl & TIOCM_RTS)
494 temp |= UCR2_CTS; 574 temp |= UCR2_CTS;
495 575
496 writel(temp, sport->port.membase + UCR2); 576 writel(temp, sport->port.membase + UCR2);
@@ -534,12 +614,7 @@ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode)
534 if(!ufcr_rfdiv) 614 if(!ufcr_rfdiv)
535 ufcr_rfdiv = 1; 615 ufcr_rfdiv = 1;
536 616
537 if(ufcr_rfdiv >= 7) 617 val |= UFCR_RFDIV_REG(ufcr_rfdiv);
538 ufcr_rfdiv = 6;
539 else
540 ufcr_rfdiv = 6 - ufcr_rfdiv;
541
542 val |= UFCR_RFDIV & (ufcr_rfdiv << 7);
543 618
544 writel(val, sport->port.membase + UFCR); 619 writel(val, sport->port.membase + UFCR);
545 620
@@ -558,8 +633,24 @@ static int imx_startup(struct uart_port *port)
558 * requesting IRQs 633 * requesting IRQs
559 */ 634 */
560 temp = readl(sport->port.membase + UCR4); 635 temp = readl(sport->port.membase + UCR4);
636
637 if (USE_IRDA(sport))
638 temp |= UCR4_IRSC;
639
561 writel(temp & ~UCR4_DREN, sport->port.membase + UCR4); 640 writel(temp & ~UCR4_DREN, sport->port.membase + UCR4);
562 641
642 if (USE_IRDA(sport)) {
643 /* reset fifo's and state machines */
644 int i = 100;
645 temp = readl(sport->port.membase + UCR2);
646 temp &= ~UCR2_SRST;
647 writel(temp, sport->port.membase + UCR2);
648 while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) &&
649 (--i > 0)) {
650 udelay(1);
651 }
652 }
653
563 /* 654 /*
564 * Allocate the IRQ(s) i.MX1 has three interrupts whereas later 655 * Allocate the IRQ(s) i.MX1 has three interrupts whereas later
565 * chips only have one interrupt. 656 * chips only have one interrupt.
@@ -575,12 +666,16 @@ static int imx_startup(struct uart_port *port)
575 if (retval) 666 if (retval)
576 goto error_out2; 667 goto error_out2;
577 668
578 retval = request_irq(sport->rtsirq, imx_rtsint, 669 /* do not use RTS IRQ on IrDA */
579 (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 : 670 if (!USE_IRDA(sport)) {
580 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, 671 retval = request_irq(sport->rtsirq, imx_rtsint,
581 DRIVER_NAME, sport); 672 (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
582 if (retval) 673 IRQF_TRIGGER_FALLING |
583 goto error_out3; 674 IRQF_TRIGGER_RISING,
675 DRIVER_NAME, sport);
676 if (retval)
677 goto error_out3;
678 }
584 } else { 679 } else {
585 retval = request_irq(sport->port.irq, imx_int, 0, 680 retval = request_irq(sport->port.irq, imx_int, 0,
586 DRIVER_NAME, sport); 681 DRIVER_NAME, sport);
@@ -597,18 +692,49 @@ static int imx_startup(struct uart_port *port)
597 692
598 temp = readl(sport->port.membase + UCR1); 693 temp = readl(sport->port.membase + UCR1);
599 temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN; 694 temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN;
695
696 if (USE_IRDA(sport)) {
697 temp |= UCR1_IREN;
698 temp &= ~(UCR1_RTSDEN);
699 }
700
600 writel(temp, sport->port.membase + UCR1); 701 writel(temp, sport->port.membase + UCR1);
601 702
602 temp = readl(sport->port.membase + UCR2); 703 temp = readl(sport->port.membase + UCR2);
603 temp |= (UCR2_RXEN | UCR2_TXEN); 704 temp |= (UCR2_RXEN | UCR2_TXEN);
604 writel(temp, sport->port.membase + UCR2); 705 writel(temp, sport->port.membase + UCR2);
605 706
707 if (USE_IRDA(sport)) {
708 /* clear RX-FIFO */
709 int i = 64;
710 while ((--i > 0) &&
711 (readl(sport->port.membase + URXD0) & URXD_CHARRDY)) {
712 barrier();
713 }
714 }
715
606#if defined CONFIG_ARCH_MX2 || defined CONFIG_ARCH_MX3 716#if defined CONFIG_ARCH_MX2 || defined CONFIG_ARCH_MX3
607 temp = readl(sport->port.membase + UCR3); 717 temp = readl(sport->port.membase + UCR3);
608 temp |= UCR3_RXDMUXSEL; 718 temp |= UCR3_RXDMUXSEL;
609 writel(temp, sport->port.membase + UCR3); 719 writel(temp, sport->port.membase + UCR3);
610#endif 720#endif
611 721
722 if (USE_IRDA(sport)) {
723 temp = readl(sport->port.membase + UCR4);
724 if (sport->irda_inv_rx)
725 temp |= UCR4_INVR;
726 else
727 temp &= ~(UCR4_INVR);
728 writel(temp | UCR4_DREN, sport->port.membase + UCR4);
729
730 temp = readl(sport->port.membase + UCR3);
731 if (sport->irda_inv_tx)
732 temp |= UCR3_INVT;
733 else
734 temp &= ~(UCR3_INVT);
735 writel(temp, sport->port.membase + UCR3);
736 }
737
612 /* 738 /*
613 * Enable modem status interrupts 739 * Enable modem status interrupts
614 */ 740 */
@@ -616,6 +742,16 @@ static int imx_startup(struct uart_port *port)
616 imx_enable_ms(&sport->port); 742 imx_enable_ms(&sport->port);
617 spin_unlock_irqrestore(&sport->port.lock,flags); 743 spin_unlock_irqrestore(&sport->port.lock,flags);
618 744
745 if (USE_IRDA(sport)) {
746 struct imxuart_platform_data *pdata;
747 pdata = sport->port.dev->platform_data;
748 sport->irda_inv_rx = pdata->irda_inv_rx;
749 sport->irda_inv_tx = pdata->irda_inv_tx;
750 sport->trcv_delay = pdata->transceiver_delay;
751 if (pdata->irda_enable)
752 pdata->irda_enable(1);
753 }
754
619 return 0; 755 return 0;
620 756
621error_out3: 757error_out3:
@@ -633,6 +769,17 @@ static void imx_shutdown(struct uart_port *port)
633 struct imx_port *sport = (struct imx_port *)port; 769 struct imx_port *sport = (struct imx_port *)port;
634 unsigned long temp; 770 unsigned long temp;
635 771
772 temp = readl(sport->port.membase + UCR2);
773 temp &= ~(UCR2_TXEN);
774 writel(temp, sport->port.membase + UCR2);
775
776 if (USE_IRDA(sport)) {
777 struct imxuart_platform_data *pdata;
778 pdata = sport->port.dev->platform_data;
779 if (pdata->irda_enable)
780 pdata->irda_enable(0);
781 }
782
636 /* 783 /*
637 * Stop our timer. 784 * Stop our timer.
638 */ 785 */
@@ -642,7 +789,8 @@ static void imx_shutdown(struct uart_port *port)
642 * Free the interrupts 789 * Free the interrupts
643 */ 790 */
644 if (sport->txirq > 0) { 791 if (sport->txirq > 0) {
645 free_irq(sport->rtsirq, sport); 792 if (!USE_IRDA(sport))
793 free_irq(sport->rtsirq, sport);
646 free_irq(sport->txirq, sport); 794 free_irq(sport->txirq, sport);
647 free_irq(sport->rxirq, sport); 795 free_irq(sport->rxirq, sport);
648 } else 796 } else
@@ -654,6 +802,9 @@ static void imx_shutdown(struct uart_port *port)
654 802
655 temp = readl(sport->port.membase + UCR1); 803 temp = readl(sport->port.membase + UCR1);
656 temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); 804 temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN);
805 if (USE_IRDA(sport))
806 temp &= ~(UCR1_IREN);
807
657 writel(temp, sport->port.membase + UCR1); 808 writel(temp, sport->port.membase + UCR1);
658} 809}
659 810
@@ -665,7 +816,9 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
665 unsigned long flags; 816 unsigned long flags;
666 unsigned int ucr2, old_ucr1, old_txrxen, baud, quot; 817 unsigned int ucr2, old_ucr1, old_txrxen, baud, quot;
667 unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8; 818 unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
668 unsigned int div, num, denom, ufcr; 819 unsigned int div, ufcr;
820 unsigned long num, denom;
821 uint64_t tdiv64;
669 822
670 /* 823 /*
671 * If we don't support modem control lines, don't allow 824 * If we don't support modem control lines, don't allow
@@ -761,38 +914,39 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
761 sport->port.membase + UCR2); 914 sport->port.membase + UCR2);
762 old_txrxen &= (UCR2_TXEN | UCR2_RXEN); 915 old_txrxen &= (UCR2_TXEN | UCR2_RXEN);
763 916
764 div = sport->port.uartclk / (baud * 16); 917 if (USE_IRDA(sport)) {
765 if (div > 7) 918 /*
766 div = 7; 919 * use maximum available submodule frequency to
767 if (!div) 920 * avoid missing short pulses due to low sampling rate
921 */
768 div = 1; 922 div = 1;
769 923 } else {
770 num = baud; 924 div = sport->port.uartclk / (baud * 16);
771 denom = port->uartclk / div / 16; 925 if (div > 7)
772 926 div = 7;
773 /* shift num and denom right until they fit into 16 bits */ 927 if (!div)
774 while (num > 0x10000 || denom > 0x10000) { 928 div = 1;
775 num >>= 1;
776 denom >>= 1;
777 } 929 }
778 if (num > 0)
779 num -= 1;
780 if (denom > 0)
781 denom -= 1;
782 930
783 writel(num, sport->port.membase + UBIR); 931 rational_best_approximation(16 * div * baud, sport->port.uartclk,
784 writel(denom, sport->port.membase + UBMR); 932 1 << 16, 1 << 16, &num, &denom);
785 933
786 if (div == 7) 934 tdiv64 = sport->port.uartclk;
787 div = 6; /* 6 in RFDIV means divide by 7 */ 935 tdiv64 *= num;
788 else 936 do_div(tdiv64, denom * 16 * div);
789 div = 6 - div; 937 tty_encode_baud_rate(sport->port.info->port.tty,
938 (speed_t)tdiv64, (speed_t)tdiv64);
939
940 num -= 1;
941 denom -= 1;
790 942
791 ufcr = readl(sport->port.membase + UFCR); 943 ufcr = readl(sport->port.membase + UFCR);
792 ufcr = (ufcr & (~UFCR_RFDIV)) | 944 ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div);
793 (div << 7);
794 writel(ufcr, sport->port.membase + UFCR); 945 writel(ufcr, sport->port.membase + UFCR);
795 946
947 writel(num, sport->port.membase + UBIR);
948 writel(denom, sport->port.membase + UBMR);
949
796#ifdef ONEMS 950#ifdef ONEMS
797 writel(sport->port.uartclk / div / 1000, sport->port.membase + ONEMS); 951 writel(sport->port.uartclk / div / 1000, sport->port.membase + ONEMS);
798#endif 952#endif
@@ -1072,22 +1226,22 @@ static struct uart_driver imx_reg = {
1072 1226
1073static int serial_imx_suspend(struct platform_device *dev, pm_message_t state) 1227static int serial_imx_suspend(struct platform_device *dev, pm_message_t state)
1074{ 1228{
1075 struct imx_port *sport = platform_get_drvdata(dev); 1229 struct imx_port *sport = platform_get_drvdata(dev);
1076 1230
1077 if (sport) 1231 if (sport)
1078 uart_suspend_port(&imx_reg, &sport->port); 1232 uart_suspend_port(&imx_reg, &sport->port);
1079 1233
1080 return 0; 1234 return 0;
1081} 1235}
1082 1236
1083static int serial_imx_resume(struct platform_device *dev) 1237static int serial_imx_resume(struct platform_device *dev)
1084{ 1238{
1085 struct imx_port *sport = platform_get_drvdata(dev); 1239 struct imx_port *sport = platform_get_drvdata(dev);
1086 1240
1087 if (sport) 1241 if (sport)
1088 uart_resume_port(&imx_reg, &sport->port); 1242 uart_resume_port(&imx_reg, &sport->port);
1089 1243
1090 return 0; 1244 return 0;
1091} 1245}
1092 1246
1093static int serial_imx_probe(struct platform_device *pdev) 1247static int serial_imx_probe(struct platform_device *pdev)
@@ -1143,19 +1297,29 @@ static int serial_imx_probe(struct platform_device *pdev)
1143 imx_ports[pdev->id] = sport; 1297 imx_ports[pdev->id] = sport;
1144 1298
1145 pdata = pdev->dev.platform_data; 1299 pdata = pdev->dev.platform_data;
1146 if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS)) 1300 if (pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
1147 sport->have_rtscts = 1; 1301 sport->have_rtscts = 1;
1148 1302
1303#ifdef CONFIG_IRDA
1304 if (pdata && (pdata->flags & IMXUART_IRDA))
1305 sport->use_irda = 1;
1306#endif
1307
1149 if (pdata->init) { 1308 if (pdata->init) {
1150 ret = pdata->init(pdev); 1309 ret = pdata->init(pdev);
1151 if (ret) 1310 if (ret)
1152 goto clkput; 1311 goto clkput;
1153 } 1312 }
1154 1313
1155 uart_add_one_port(&imx_reg, &sport->port); 1314 ret = uart_add_one_port(&imx_reg, &sport->port);
1315 if (ret)
1316 goto deinit;
1156 platform_set_drvdata(pdev, &sport->port); 1317 platform_set_drvdata(pdev, &sport->port);
1157 1318
1158 return 0; 1319 return 0;
1320deinit:
1321 if (pdata->exit)
1322 pdata->exit(pdev);
1159clkput: 1323clkput:
1160 clk_put(sport->clk); 1324 clk_put(sport->clk);
1161 clk_disable(sport->clk); 1325 clk_disable(sport->clk);
@@ -1193,13 +1357,13 @@ static int serial_imx_remove(struct platform_device *pdev)
1193} 1357}
1194 1358
1195static struct platform_driver serial_imx_driver = { 1359static struct platform_driver serial_imx_driver = {
1196 .probe = serial_imx_probe, 1360 .probe = serial_imx_probe,
1197 .remove = serial_imx_remove, 1361 .remove = serial_imx_remove,
1198 1362
1199 .suspend = serial_imx_suspend, 1363 .suspend = serial_imx_suspend,
1200 .resume = serial_imx_resume, 1364 .resume = serial_imx_resume,
1201 .driver = { 1365 .driver = {
1202 .name = "imx-uart", 1366 .name = "imx-uart",
1203 .owner = THIS_MODULE, 1367 .owner = THIS_MODULE,
1204 }, 1368 },
1205}; 1369};
diff --git a/drivers/serial/jsm/jsm.h b/drivers/serial/jsm/jsm.h
index c0a3e2734e24..4e5f3bde0461 100644
--- a/drivers/serial/jsm/jsm.h
+++ b/drivers/serial/jsm/jsm.h
@@ -61,6 +61,7 @@ enum {
61 if ((DBG_##nlevel & jsm_debug)) \ 61 if ((DBG_##nlevel & jsm_debug)) \
62 dev_printk(KERN_##klevel, pdev->dev, fmt, ## args) 62 dev_printk(KERN_##klevel, pdev->dev, fmt, ## args)
63 63
64#define MAXLINES 256
64#define MAXPORTS 8 65#define MAXPORTS 8
65#define MAX_STOPS_SENT 5 66#define MAX_STOPS_SENT 5
66 67
diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index 31496dc0a0d1..107ce2e187b8 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -33,6 +33,8 @@
33 33
34#include "jsm.h" 34#include "jsm.h"
35 35
36static DECLARE_BITMAP(linemap, MAXLINES);
37
36static void jsm_carrier(struct jsm_channel *ch); 38static void jsm_carrier(struct jsm_channel *ch);
37 39
38static inline int jsm_get_mstat(struct jsm_channel *ch) 40static inline int jsm_get_mstat(struct jsm_channel *ch)
@@ -433,6 +435,7 @@ int __devinit jsm_tty_init(struct jsm_board *brd)
433int __devinit jsm_uart_port_init(struct jsm_board *brd) 435int __devinit jsm_uart_port_init(struct jsm_board *brd)
434{ 436{
435 int i; 437 int i;
438 unsigned int line;
436 struct jsm_channel *ch; 439 struct jsm_channel *ch;
437 440
438 if (!brd) 441 if (!brd)
@@ -459,9 +462,15 @@ int __devinit jsm_uart_port_init(struct jsm_board *brd)
459 brd->channels[i]->uart_port.membase = brd->re_map_membase; 462 brd->channels[i]->uart_port.membase = brd->re_map_membase;
460 brd->channels[i]->uart_port.fifosize = 16; 463 brd->channels[i]->uart_port.fifosize = 16;
461 brd->channels[i]->uart_port.ops = &jsm_ops; 464 brd->channels[i]->uart_port.ops = &jsm_ops;
462 brd->channels[i]->uart_port.line = brd->channels[i]->ch_portnum + brd->boardnum * 2; 465 line = find_first_zero_bit(linemap, MAXLINES);
466 if (line >= MAXLINES) {
467 printk(KERN_INFO "jsm: linemap is full, added device failed\n");
468 continue;
469 } else
470 set_bit((int)line, linemap);
471 brd->channels[i]->uart_port.line = line;
463 if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port)) 472 if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port))
464 printk(KERN_INFO "Added device failed\n"); 473 printk(KERN_INFO "jsm: add device failed\n");
465 else 474 else
466 printk(KERN_INFO "Added device \n"); 475 printk(KERN_INFO "Added device \n");
467 } 476 }
@@ -494,6 +503,7 @@ int jsm_remove_uart_port(struct jsm_board *brd)
494 503
495 ch = brd->channels[i]; 504 ch = brd->channels[i];
496 505
506 clear_bit((int)(ch->uart_port.line), linemap);
497 uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port); 507 uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port);
498 } 508 }
499 509
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
new file mode 100644
index 000000000000..ac9e5d5f742e
--- /dev/null
+++ b/drivers/serial/timbuart.c
@@ -0,0 +1,526 @@
1/*
2 * timbuart.c timberdale FPGA UART driver
3 * Copyright (c) 2009 Intel Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19/* Supports:
20 * Timberdale FPGA UART
21 */
22
23#include <linux/pci.h>
24#include <linux/interrupt.h>
25#include <linux/serial_core.h>
26#include <linux/kernel.h>
27#include <linux/platform_device.h>
28#include <linux/ioport.h>
29
30#include "timbuart.h"
31
32struct timbuart_port {
33 struct uart_port port;
34 struct tasklet_struct tasklet;
35 int usedma;
36 u8 last_ier;
37 struct platform_device *dev;
38};
39
40static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800,
41 921600, 1843200, 3250000};
42
43static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier);
44
45static irqreturn_t timbuart_handleinterrupt(int irq, void *devid);
46
47static void timbuart_stop_rx(struct uart_port *port)
48{
49 /* spin lock held by upper layer, disable all RX interrupts */
50 u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS;
51 iowrite8(ier, port->membase + TIMBUART_IER);
52}
53
54static void timbuart_stop_tx(struct uart_port *port)
55{
56 /* spinlock held by upper layer, disable TX interrupt */
57 u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE;
58 iowrite8(ier, port->membase + TIMBUART_IER);
59}
60
61static void timbuart_start_tx(struct uart_port *port)
62{
63 struct timbuart_port *uart =
64 container_of(port, struct timbuart_port, port);
65
66 /* do not transfer anything here -> fire off the tasklet */
67 tasklet_schedule(&uart->tasklet);
68}
69
70static void timbuart_flush_buffer(struct uart_port *port)
71{
72 u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX;
73
74 iowrite8(ctl, port->membase + TIMBUART_CTRL);
75 iowrite8(TXBF, port->membase + TIMBUART_ISR);
76}
77
78static void timbuart_rx_chars(struct uart_port *port)
79{
80 struct tty_struct *tty = port->info->port.tty;
81
82 while (ioread8(port->membase + TIMBUART_ISR) & RXDP) {
83 u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
84 port->icount.rx++;
85 tty_insert_flip_char(tty, ch, TTY_NORMAL);
86 }
87
88 spin_unlock(&port->lock);
89 tty_flip_buffer_push(port->info->port.tty);
90 spin_lock(&port->lock);
91
92 dev_dbg(port->dev, "%s - total read %d bytes\n",
93 __func__, port->icount.rx);
94}
95
96static void timbuart_tx_chars(struct uart_port *port)
97{
98 struct circ_buf *xmit = &port->info->xmit;
99
100 while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) &&
101 !uart_circ_empty(xmit)) {
102 iowrite8(xmit->buf[xmit->tail],
103 port->membase + TIMBUART_TXFIFO);
104 xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
105 port->icount.tx++;
106 }
107
108 dev_dbg(port->dev,
109 "%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n",
110 __func__,
111 port->icount.tx,
112 ioread8(port->membase + TIMBUART_CTRL),
113 port->mctrl & TIOCM_RTS,
114 ioread8(port->membase + TIMBUART_BAUDRATE));
115}
116
117static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier)
118{
119 struct timbuart_port *uart =
120 container_of(port, struct timbuart_port, port);
121 struct circ_buf *xmit = &port->info->xmit;
122
123 if (uart_circ_empty(xmit) || uart_tx_stopped(port))
124 return;
125
126 if (port->x_char)
127 return;
128
129 if (isr & TXFLAGS) {
130 timbuart_tx_chars(port);
131 /* clear all TX interrupts */
132 iowrite8(TXFLAGS, port->membase + TIMBUART_ISR);
133
134 if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
135 uart_write_wakeup(port);
136 } else
137 /* Re-enable any tx interrupt */
138 *ier |= uart->last_ier & TXFLAGS;
139
140 /* enable interrupts if there are chars in the transmit buffer,
141 * Or if we delivered some bytes and want the almost empty interrupt
142 * we wake up the upper layer later when we got the interrupt
143 * to give it some time to go out...
144 */
145 if (!uart_circ_empty(xmit))
146 *ier |= TXBAE;
147
148 dev_dbg(port->dev, "%s - leaving\n", __func__);
149}
150
151void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier)
152{
153 if (isr & RXFLAGS) {
154 /* Some RX status is set */
155 if (isr & RXBF) {
156 u8 ctl = ioread8(port->membase + TIMBUART_CTRL) |
157 TIMBUART_CTRL_FLSHRX;
158 iowrite8(ctl, port->membase + TIMBUART_CTRL);
159 port->icount.overrun++;
160 } else if (isr & (RXDP))
161 timbuart_rx_chars(port);
162
163 /* ack all RX interrupts */
164 iowrite8(RXFLAGS, port->membase + TIMBUART_ISR);
165 }
166
167 /* always have the RX interrupts enabled */
168 *ier |= RXBAF | RXBF | RXTT;
169
170 dev_dbg(port->dev, "%s - leaving\n", __func__);
171}
172
173void timbuart_tasklet(unsigned long arg)
174{
175 struct timbuart_port *uart = (struct timbuart_port *)arg;
176 u8 isr, ier = 0;
177
178 spin_lock(&uart->port.lock);
179
180 isr = ioread8(uart->port.membase + TIMBUART_ISR);
181 dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr);
182
183 if (!uart->usedma)
184 timbuart_handle_tx_port(&uart->port, isr, &ier);
185
186 timbuart_mctrl_check(&uart->port, isr, &ier);
187
188 if (!uart->usedma)
189 timbuart_handle_rx_port(&uart->port, isr, &ier);
190
191 iowrite8(ier, uart->port.membase + TIMBUART_IER);
192
193 spin_unlock(&uart->port.lock);
194 dev_dbg(uart->port.dev, "%s leaving\n", __func__);
195}
196
197static unsigned int timbuart_tx_empty(struct uart_port *port)
198{
199 u8 isr = ioread8(port->membase + TIMBUART_ISR);
200
201 return (isr & TXBAE) ? TIOCSER_TEMT : 0;
202}
203
204static unsigned int timbuart_get_mctrl(struct uart_port *port)
205{
206 u8 cts = ioread8(port->membase + TIMBUART_CTRL);
207 dev_dbg(port->dev, "%s - cts %x\n", __func__, cts);
208
209 if (cts & TIMBUART_CTRL_CTS)
210 return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
211 else
212 return TIOCM_DSR | TIOCM_CAR;
213}
214
215static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
216{
217 dev_dbg(port->dev, "%s - %x\n", __func__, mctrl);
218
219 if (mctrl & TIOCM_RTS)
220 iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
221 else
222 iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
223}
224
225static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier)
226{
227 unsigned int cts;
228
229 if (isr & CTS_DELTA) {
230 /* ack */
231 iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR);
232 cts = timbuart_get_mctrl(port);
233 uart_handle_cts_change(port, cts & TIOCM_CTS);
234 wake_up_interruptible(&port->info->delta_msr_wait);
235 }
236
237 *ier |= CTS_DELTA;
238}
239
240static void timbuart_enable_ms(struct uart_port *port)
241{
242 /* N/A */
243}
244
245static void timbuart_break_ctl(struct uart_port *port, int ctl)
246{
247 /* N/A */
248}
249
250static int timbuart_startup(struct uart_port *port)
251{
252 struct timbuart_port *uart =
253 container_of(port, struct timbuart_port, port);
254
255 dev_dbg(port->dev, "%s\n", __func__);
256
257 iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL);
258 iowrite8(0xff, port->membase + TIMBUART_ISR);
259 /* Enable all but TX interrupts */
260 iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA,
261 port->membase + TIMBUART_IER);
262
263 return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED,
264 "timb-uart", uart);
265}
266
267static void timbuart_shutdown(struct uart_port *port)
268{
269 struct timbuart_port *uart =
270 container_of(port, struct timbuart_port, port);
271 dev_dbg(port->dev, "%s\n", __func__);
272 free_irq(port->irq, uart);
273 iowrite8(0, port->membase + TIMBUART_IER);
274}
275
276static int get_bindex(int baud)
277{
278 int i;
279
280 for (i = 0; i < ARRAY_SIZE(baudrates); i++)
281 if (baud <= baudrates[i])
282 return i;
283
284 return -1;
285}
286
287static void timbuart_set_termios(struct uart_port *port,
288 struct ktermios *termios,
289 struct ktermios *old)
290{
291 unsigned int baud;
292 short bindex;
293 unsigned long flags;
294
295 baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
296 bindex = get_bindex(baud);
297 dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex);
298
299 if (bindex < 0)
300 bindex = 0;
301 baud = baudrates[bindex];
302
303 /* The serial layer calls into this once with old = NULL when setting
304 up initially */
305 if (old)
306 tty_termios_copy_hw(termios, old);
307 tty_termios_encode_baud_rate(termios, baud, baud);
308
309 spin_lock_irqsave(&port->lock, flags);
310 iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE);
311 uart_update_timeout(port, termios->c_cflag, baud);
312 spin_unlock_irqrestore(&port->lock, flags);
313}
314
315static const char *timbuart_type(struct uart_port *port)
316{
317 return port->type == PORT_UNKNOWN ? "timbuart" : NULL;
318}
319
320/* We do not request/release mappings of the registers here,
321 * currently it's done in the proble function.
322 */
323static void timbuart_release_port(struct uart_port *port)
324{
325 struct platform_device *pdev = to_platform_device(port->dev);
326 int size =
327 resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
328
329 if (port->flags & UPF_IOREMAP) {
330 iounmap(port->membase);
331 port->membase = NULL;
332 }
333
334 release_mem_region(port->mapbase, size);
335}
336
337static int timbuart_request_port(struct uart_port *port)
338{
339 struct platform_device *pdev = to_platform_device(port->dev);
340 int size =
341 resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
342
343 if (!request_mem_region(port->mapbase, size, "timb-uart"))
344 return -EBUSY;
345
346 if (port->flags & UPF_IOREMAP) {
347 port->membase = ioremap(port->mapbase, size);
348 if (port->membase == NULL) {
349 release_mem_region(port->mapbase, size);
350 return -ENOMEM;
351 }
352 }
353
354 return 0;
355}
356
357static irqreturn_t timbuart_handleinterrupt(int irq, void *devid)
358{
359 struct timbuart_port *uart = (struct timbuart_port *)devid;
360
361 if (ioread8(uart->port.membase + TIMBUART_IPR)) {
362 uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER);
363
364 /* disable interrupts, the tasklet enables them again */
365 iowrite8(0, uart->port.membase + TIMBUART_IER);
366
367 /* fire off bottom half */
368 tasklet_schedule(&uart->tasklet);
369
370 return IRQ_HANDLED;
371 } else
372 return IRQ_NONE;
373}
374
375/*
376 * Configure/autoconfigure the port.
377 */
378static void timbuart_config_port(struct uart_port *port, int flags)
379{
380 if (flags & UART_CONFIG_TYPE) {
381 port->type = PORT_TIMBUART;
382 timbuart_request_port(port);
383 }
384}
385
386static int timbuart_verify_port(struct uart_port *port,
387 struct serial_struct *ser)
388{
389 /* we don't want the core code to modify any port params */
390 return -EINVAL;
391}
392
393static struct uart_ops timbuart_ops = {
394 .tx_empty = timbuart_tx_empty,
395 .set_mctrl = timbuart_set_mctrl,
396 .get_mctrl = timbuart_get_mctrl,
397 .stop_tx = timbuart_stop_tx,
398 .start_tx = timbuart_start_tx,
399 .flush_buffer = timbuart_flush_buffer,
400 .stop_rx = timbuart_stop_rx,
401 .enable_ms = timbuart_enable_ms,
402 .break_ctl = timbuart_break_ctl,
403 .startup = timbuart_startup,
404 .shutdown = timbuart_shutdown,
405 .set_termios = timbuart_set_termios,
406 .type = timbuart_type,
407 .release_port = timbuart_release_port,
408 .request_port = timbuart_request_port,
409 .config_port = timbuart_config_port,
410 .verify_port = timbuart_verify_port
411};
412
413static struct uart_driver timbuart_driver = {
414 .owner = THIS_MODULE,
415 .driver_name = "timberdale_uart",
416 .dev_name = "ttyTU",
417 .major = TIMBUART_MAJOR,
418 .minor = TIMBUART_MINOR,
419 .nr = 1
420};
421
422static int timbuart_probe(struct platform_device *dev)
423{
424 int err;
425 struct timbuart_port *uart;
426 struct resource *iomem;
427
428 dev_dbg(&dev->dev, "%s\n", __func__);
429
430 uart = kzalloc(sizeof(*uart), GFP_KERNEL);
431 if (!uart) {
432 err = -EINVAL;
433 goto err_mem;
434 }
435
436 uart->usedma = 0;
437
438 uart->port.uartclk = 3250000 * 16;
439 uart->port.fifosize = TIMBUART_FIFO_SIZE;
440 uart->port.regshift = 2;
441 uart->port.iotype = UPIO_MEM;
442 uart->port.ops = &timbuart_ops;
443 uart->port.irq = 0;
444 uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP;
445 uart->port.line = 0;
446 uart->port.dev = &dev->dev;
447
448 iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
449 if (!iomem) {
450 err = -ENOMEM;
451 goto err_register;
452 }
453 uart->port.mapbase = iomem->start;
454 uart->port.membase = NULL;
455
456 uart->port.irq = platform_get_irq(dev, 0);
457 if (uart->port.irq < 0) {
458 err = -EINVAL;
459 goto err_register;
460 }
461
462 tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart);
463
464 err = uart_register_driver(&timbuart_driver);
465 if (err)
466 goto err_register;
467
468 err = uart_add_one_port(&timbuart_driver, &uart->port);
469 if (err)
470 goto err_add_port;
471
472 platform_set_drvdata(dev, uart);
473
474 return 0;
475
476err_add_port:
477 uart_unregister_driver(&timbuart_driver);
478err_register:
479 kfree(uart);
480err_mem:
481 printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n",
482 err);
483
484 return err;
485}
486
487static int timbuart_remove(struct platform_device *dev)
488{
489 struct timbuart_port *uart = platform_get_drvdata(dev);
490
491 tasklet_kill(&uart->tasklet);
492 uart_remove_one_port(&timbuart_driver, &uart->port);
493 uart_unregister_driver(&timbuart_driver);
494 kfree(uart);
495
496 return 0;
497}
498
499static struct platform_driver timbuart_platform_driver = {
500 .driver = {
501 .name = "timb-uart",
502 .owner = THIS_MODULE,
503 },
504 .probe = timbuart_probe,
505 .remove = timbuart_remove,
506};
507
508/*--------------------------------------------------------------------------*/
509
510static int __init timbuart_init(void)
511{
512 return platform_driver_register(&timbuart_platform_driver);
513}
514
515static void __exit timbuart_exit(void)
516{
517 platform_driver_unregister(&timbuart_platform_driver);
518}
519
520module_init(timbuart_init);
521module_exit(timbuart_exit);
522
523MODULE_DESCRIPTION("Timberdale UART driver");
524MODULE_LICENSE("GPL v2");
525MODULE_ALIAS("platform:timb-uart");
526
diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h
new file mode 100644
index 000000000000..7e566766bc43
--- /dev/null
+++ b/drivers/serial/timbuart.h
@@ -0,0 +1,58 @@
1/*
2 * timbuart.c timberdale FPGA GPIO driver
3 * Copyright (c) 2009 Intel Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19/* Supports:
20 * Timberdale FPGA UART
21 */
22
23#ifndef _TIMBUART_H
24#define _TIMBUART_H
25
26#define TIMBUART_FIFO_SIZE 2048
27
28#define TIMBUART_RXFIFO 0x08
29#define TIMBUART_TXFIFO 0x0c
30#define TIMBUART_IER 0x10
31#define TIMBUART_IPR 0x14
32#define TIMBUART_ISR 0x18
33#define TIMBUART_CTRL 0x1c
34#define TIMBUART_BAUDRATE 0x20
35
36#define TIMBUART_CTRL_RTS 0x01
37#define TIMBUART_CTRL_CTS 0x02
38#define TIMBUART_CTRL_FLSHTX 0x40
39#define TIMBUART_CTRL_FLSHRX 0x80
40
41#define TXBF 0x01
42#define TXBAE 0x02
43#define CTS_DELTA 0x04
44#define RXDP 0x08
45#define RXBAF 0x10
46#define RXBF 0x20
47#define RXTT 0x40
48#define RXBNAE 0x80
49#define TXBE 0x100
50
51#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE)
52#define TXFLAGS (TXBF | TXBAE)
53
54#define TIMBUART_MAJOR 204
55#define TIMBUART_MINOR 192
56
57#endif /* _TIMBUART_H */
58
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7a1164dd1d37..ddeb69192537 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -16,7 +16,8 @@
16 * v0.9 - thorough cleaning, URBification, almost a rewrite 16 * v0.9 - thorough cleaning, URBification, almost a rewrite
17 * v0.10 - some more cleanups 17 * v0.10 - some more cleanups
18 * v0.11 - fixed flow control, read error doesn't stop reads 18 * v0.11 - fixed flow control, read error doesn't stop reads
19 * v0.12 - added TIOCM ioctls, added break handling, made struct acm kmalloced 19 * v0.12 - added TIOCM ioctls, added break handling, made struct acm
20 * kmalloced
20 * v0.13 - added termios, added hangup 21 * v0.13 - added termios, added hangup
21 * v0.14 - sized down struct acm 22 * v0.14 - sized down struct acm
22 * v0.15 - fixed flow control again - characters could be lost 23 * v0.15 - fixed flow control again - characters could be lost
@@ -62,7 +63,7 @@
62#include <linux/tty_flip.h> 63#include <linux/tty_flip.h>
63#include <linux/module.h> 64#include <linux/module.h>
64#include <linux/mutex.h> 65#include <linux/mutex.h>
65#include <asm/uaccess.h> 66#include <linux/uaccess.h>
66#include <linux/usb.h> 67#include <linux/usb.h>
67#include <linux/usb/cdc.h> 68#include <linux/usb/cdc.h>
68#include <asm/byteorder.h> 69#include <asm/byteorder.h>
@@ -87,7 +88,10 @@ static struct acm *acm_table[ACM_TTY_MINORS];
87 88
88static DEFINE_MUTEX(open_mutex); 89static DEFINE_MUTEX(open_mutex);
89 90
90#define ACM_READY(acm) (acm && acm->dev && acm->used) 91#define ACM_READY(acm) (acm && acm->dev && acm->port.count)
92
93static const struct tty_port_operations acm_port_ops = {
94};
91 95
92#ifdef VERBOSE_DEBUG 96#ifdef VERBOSE_DEBUG
93#define verbose 1 97#define verbose 1
@@ -99,13 +103,15 @@ static DEFINE_MUTEX(open_mutex);
99 * Functions for ACM control messages. 103 * Functions for ACM control messages.
100 */ 104 */
101 105
102static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len) 106static int acm_ctrl_msg(struct acm *acm, int request, int value,
107 void *buf, int len)
103{ 108{
104 int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0), 109 int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0),
105 request, USB_RT_ACM, value, 110 request, USB_RT_ACM, value,
106 acm->control->altsetting[0].desc.bInterfaceNumber, 111 acm->control->altsetting[0].desc.bInterfaceNumber,
107 buf, len, 5000); 112 buf, len, 5000);
108 dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", request, value, len, retval); 113 dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d",
114 request, value, len, retval);
109 return retval < 0 ? retval : 0; 115 return retval < 0 ? retval : 0;
110} 116}
111 117
@@ -150,9 +156,8 @@ static int acm_wb_is_avail(struct acm *acm)
150 156
151 n = ACM_NW; 157 n = ACM_NW;
152 spin_lock_irqsave(&acm->write_lock, flags); 158 spin_lock_irqsave(&acm->write_lock, flags);
153 for (i = 0; i < ACM_NW; i++) { 159 for (i = 0; i < ACM_NW; i++)
154 n -= acm->wb[i].use; 160 n -= acm->wb[i].use;
155 }
156 spin_unlock_irqrestore(&acm->write_lock, flags); 161 spin_unlock_irqrestore(&acm->write_lock, flags);
157 return n; 162 return n;
158} 163}
@@ -183,7 +188,8 @@ static int acm_start_wb(struct acm *acm, struct acm_wb *wb)
183 wb->urb->transfer_buffer_length = wb->len; 188 wb->urb->transfer_buffer_length = wb->len;
184 wb->urb->dev = acm->dev; 189 wb->urb->dev = acm->dev;
185 190
186 if ((rc = usb_submit_urb(wb->urb, GFP_ATOMIC)) < 0) { 191 rc = usb_submit_urb(wb->urb, GFP_ATOMIC);
192 if (rc < 0) {
187 dbg("usb_submit_urb(write bulk) failed: %d", rc); 193 dbg("usb_submit_urb(write bulk) failed: %d", rc);
188 acm_write_done(acm, wb); 194 acm_write_done(acm, wb);
189 } 195 }
@@ -262,6 +268,7 @@ static void acm_ctrl_irq(struct urb *urb)
262{ 268{
263 struct acm *acm = urb->context; 269 struct acm *acm = urb->context;
264 struct usb_cdc_notification *dr = urb->transfer_buffer; 270 struct usb_cdc_notification *dr = urb->transfer_buffer;
271 struct tty_struct *tty;
265 unsigned char *data; 272 unsigned char *data;
266 int newctrl; 273 int newctrl;
267 int retval; 274 int retval;
@@ -287,40 +294,45 @@ static void acm_ctrl_irq(struct urb *urb)
287 294
288 data = (unsigned char *)(dr + 1); 295 data = (unsigned char *)(dr + 1);
289 switch (dr->bNotificationType) { 296 switch (dr->bNotificationType) {
297 case USB_CDC_NOTIFY_NETWORK_CONNECTION:
298 dbg("%s network", dr->wValue ?
299 "connected to" : "disconnected from");
300 break;
290 301
291 case USB_CDC_NOTIFY_NETWORK_CONNECTION: 302 case USB_CDC_NOTIFY_SERIAL_STATE:
292 303 tty = tty_port_tty_get(&acm->port);
293 dbg("%s network", dr->wValue ? "connected to" : "disconnected from"); 304 newctrl = get_unaligned_le16(data);
294 break;
295
296 case USB_CDC_NOTIFY_SERIAL_STATE:
297
298 newctrl = get_unaligned_le16(data);
299 305
300 if (acm->tty && !acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { 306 if (tty) {
307 if (!acm->clocal &&
308 (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
301 dbg("calling hangup"); 309 dbg("calling hangup");
302 tty_hangup(acm->tty); 310 tty_hangup(tty);
303 } 311 }
312 tty_kref_put(tty);
313 }
304 314
305 acm->ctrlin = newctrl; 315 acm->ctrlin = newctrl;
306
307 dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
308 acm->ctrlin & ACM_CTRL_DCD ? '+' : '-', acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
309 acm->ctrlin & ACM_CTRL_BRK ? '+' : '-', acm->ctrlin & ACM_CTRL_RI ? '+' : '-',
310 acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-', acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
311 acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
312 316
317 dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
318 acm->ctrlin & ACM_CTRL_DCD ? '+' : '-',
319 acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
320 acm->ctrlin & ACM_CTRL_BRK ? '+' : '-',
321 acm->ctrlin & ACM_CTRL_RI ? '+' : '-',
322 acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',
323 acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
324 acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
313 break; 325 break;
314 326
315 default: 327 default:
316 dbg("unknown notification %d received: index %d len %d data0 %d data1 %d", 328 dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
317 dr->bNotificationType, dr->wIndex, 329 dr->bNotificationType, dr->wIndex,
318 dr->wLength, data[0], data[1]); 330 dr->wLength, data[0], data[1]);
319 break; 331 break;
320 } 332 }
321exit: 333exit:
322 usb_mark_last_busy(acm->dev); 334 usb_mark_last_busy(acm->dev);
323 retval = usb_submit_urb (urb, GFP_ATOMIC); 335 retval = usb_submit_urb(urb, GFP_ATOMIC);
324 if (retval) 336 if (retval)
325 dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with " 337 dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with "
326 "result %d", __func__, retval); 338 "result %d", __func__, retval);
@@ -371,15 +383,14 @@ static void acm_rx_tasklet(unsigned long _acm)
371{ 383{
372 struct acm *acm = (void *)_acm; 384 struct acm *acm = (void *)_acm;
373 struct acm_rb *buf; 385 struct acm_rb *buf;
374 struct tty_struct *tty = acm->tty; 386 struct tty_struct *tty;
375 struct acm_ru *rcv; 387 struct acm_ru *rcv;
376 unsigned long flags; 388 unsigned long flags;
377 unsigned char throttled; 389 unsigned char throttled;
378 390
379 dbg("Entering acm_rx_tasklet"); 391 dbg("Entering acm_rx_tasklet");
380 392
381 if (!ACM_READY(acm)) 393 if (!ACM_READY(acm)) {
382 {
383 dbg("acm_rx_tasklet: ACM not ready"); 394 dbg("acm_rx_tasklet: ACM not ready");
384 return; 395 return;
385 } 396 }
@@ -387,12 +398,13 @@ static void acm_rx_tasklet(unsigned long _acm)
387 spin_lock_irqsave(&acm->throttle_lock, flags); 398 spin_lock_irqsave(&acm->throttle_lock, flags);
388 throttled = acm->throttle; 399 throttled = acm->throttle;
389 spin_unlock_irqrestore(&acm->throttle_lock, flags); 400 spin_unlock_irqrestore(&acm->throttle_lock, flags);
390 if (throttled) 401 if (throttled) {
391 {
392 dbg("acm_rx_tasklet: throttled"); 402 dbg("acm_rx_tasklet: throttled");
393 return; 403 return;
394 } 404 }
395 405
406 tty = tty_port_tty_get(&acm->port);
407
396next_buffer: 408next_buffer:
397 spin_lock_irqsave(&acm->read_lock, flags); 409 spin_lock_irqsave(&acm->read_lock, flags);
398 if (list_empty(&acm->filled_read_bufs)) { 410 if (list_empty(&acm->filled_read_bufs)) {
@@ -406,20 +418,22 @@ next_buffer:
406 418
407 dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d", buf, buf->size); 419 dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d", buf, buf->size);
408 420
409 tty_buffer_request_room(tty, buf->size); 421 if (tty) {
410 spin_lock_irqsave(&acm->throttle_lock, flags); 422 spin_lock_irqsave(&acm->throttle_lock, flags);
411 throttled = acm->throttle; 423 throttled = acm->throttle;
412 spin_unlock_irqrestore(&acm->throttle_lock, flags); 424 spin_unlock_irqrestore(&acm->throttle_lock, flags);
413 if (!throttled) 425 if (!throttled) {
414 tty_insert_flip_string(tty, buf->base, buf->size); 426 tty_buffer_request_room(tty, buf->size);
415 tty_flip_buffer_push(tty); 427 tty_insert_flip_string(tty, buf->base, buf->size);
416 428 tty_flip_buffer_push(tty);
417 if (throttled) { 429 } else {
418 dbg("Throttling noticed"); 430 tty_kref_put(tty);
419 spin_lock_irqsave(&acm->read_lock, flags); 431 dbg("Throttling noticed");
420 list_add(&buf->list, &acm->filled_read_bufs); 432 spin_lock_irqsave(&acm->read_lock, flags);
421 spin_unlock_irqrestore(&acm->read_lock, flags); 433 list_add(&buf->list, &acm->filled_read_bufs);
422 return; 434 spin_unlock_irqrestore(&acm->read_lock, flags);
435 return;
436 }
423 } 437 }
424 438
425 spin_lock_irqsave(&acm->read_lock, flags); 439 spin_lock_irqsave(&acm->read_lock, flags);
@@ -428,6 +442,8 @@ next_buffer:
428 goto next_buffer; 442 goto next_buffer;
429 443
430urbs: 444urbs:
445 tty_kref_put(tty);
446
431 while (!list_empty(&acm->spare_read_bufs)) { 447 while (!list_empty(&acm->spare_read_bufs)) {
432 spin_lock_irqsave(&acm->read_lock, flags); 448 spin_lock_irqsave(&acm->read_lock, flags);
433 if (list_empty(&acm->spare_read_urbs)) { 449 if (list_empty(&acm->spare_read_urbs)) {
@@ -454,10 +470,11 @@ urbs:
454 rcv->urb->transfer_dma = buf->dma; 470 rcv->urb->transfer_dma = buf->dma;
455 rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; 471 rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
456 472
457 /* This shouldn't kill the driver as unsuccessful URBs are returned to the 473 /* This shouldn't kill the driver as unsuccessful URBs are
458 free-urbs-pool and resubmited ASAP */ 474 returned to the free-urbs-pool and resubmited ASAP */
459 spin_lock_irqsave(&acm->read_lock, flags); 475 spin_lock_irqsave(&acm->read_lock, flags);
460 if (acm->susp_count || usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) { 476 if (acm->susp_count ||
477 usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
461 list_add(&buf->list, &acm->spare_read_bufs); 478 list_add(&buf->list, &acm->spare_read_bufs);
462 list_add(&rcv->list, &acm->spare_read_urbs); 479 list_add(&rcv->list, &acm->spare_read_urbs);
463 acm->processing = 0; 480 acm->processing = 0;
@@ -499,11 +516,14 @@ static void acm_write_bulk(struct urb *urb)
499static void acm_softint(struct work_struct *work) 516static void acm_softint(struct work_struct *work)
500{ 517{
501 struct acm *acm = container_of(work, struct acm, work); 518 struct acm *acm = container_of(work, struct acm, work);
519 struct tty_struct *tty;
502 520
503 dev_vdbg(&acm->data->dev, "tx work\n"); 521 dev_vdbg(&acm->data->dev, "tx work\n");
504 if (!ACM_READY(acm)) 522 if (!ACM_READY(acm))
505 return; 523 return;
506 tty_wakeup(acm->tty); 524 tty = tty_port_tty_get(&acm->port);
525 tty_wakeup(tty);
526 tty_kref_put(tty);
507} 527}
508 528
509static void acm_waker(struct work_struct *waker) 529static void acm_waker(struct work_struct *waker)
@@ -543,8 +563,9 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
543 rv = 0; 563 rv = 0;
544 564
545 set_bit(TTY_NO_WRITE_SPLIT, &tty->flags); 565 set_bit(TTY_NO_WRITE_SPLIT, &tty->flags);
566
546 tty->driver_data = acm; 567 tty->driver_data = acm;
547 acm->tty = tty; 568 tty_port_tty_set(&acm->port, tty);
548 569
549 if (usb_autopm_get_interface(acm->control) < 0) 570 if (usb_autopm_get_interface(acm->control) < 0)
550 goto early_bail; 571 goto early_bail;
@@ -552,11 +573,10 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
552 acm->control->needs_remote_wakeup = 1; 573 acm->control->needs_remote_wakeup = 1;
553 574
554 mutex_lock(&acm->mutex); 575 mutex_lock(&acm->mutex);
555 if (acm->used++) { 576 if (acm->port.count++) {
556 usb_autopm_put_interface(acm->control); 577 usb_autopm_put_interface(acm->control);
557 goto done; 578 goto done;
558 } 579 }
559
560 580
561 acm->ctrlurb->dev = acm->dev; 581 acm->ctrlurb->dev = acm->dev;
562 if (usb_submit_urb(acm->ctrlurb, GFP_KERNEL)) { 582 if (usb_submit_urb(acm->ctrlurb, GFP_KERNEL)) {
@@ -567,22 +587,22 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
567 if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS) && 587 if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS) &&
568 (acm->ctrl_caps & USB_CDC_CAP_LINE)) 588 (acm->ctrl_caps & USB_CDC_CAP_LINE))
569 goto full_bailout; 589 goto full_bailout;
590
570 usb_autopm_put_interface(acm->control); 591 usb_autopm_put_interface(acm->control);
571 592
572 INIT_LIST_HEAD(&acm->spare_read_urbs); 593 INIT_LIST_HEAD(&acm->spare_read_urbs);
573 INIT_LIST_HEAD(&acm->spare_read_bufs); 594 INIT_LIST_HEAD(&acm->spare_read_bufs);
574 INIT_LIST_HEAD(&acm->filled_read_bufs); 595 INIT_LIST_HEAD(&acm->filled_read_bufs);
575 for (i = 0; i < acm->rx_buflimit; i++) { 596
597 for (i = 0; i < acm->rx_buflimit; i++)
576 list_add(&(acm->ru[i].list), &acm->spare_read_urbs); 598 list_add(&(acm->ru[i].list), &acm->spare_read_urbs);
577 } 599 for (i = 0; i < acm->rx_buflimit; i++)
578 for (i = 0; i < acm->rx_buflimit; i++) {
579 list_add(&(acm->rb[i].list), &acm->spare_read_bufs); 600 list_add(&(acm->rb[i].list), &acm->spare_read_bufs);
580 }
581 601
582 acm->throttle = 0; 602 acm->throttle = 0;
583 603
584 tasklet_schedule(&acm->urb_task); 604 tasklet_schedule(&acm->urb_task);
585 605 rv = tty_port_block_til_ready(&acm->port, tty, filp);
586done: 606done:
587 mutex_unlock(&acm->mutex); 607 mutex_unlock(&acm->mutex);
588err_out: 608err_out:
@@ -593,16 +613,17 @@ full_bailout:
593 usb_kill_urb(acm->ctrlurb); 613 usb_kill_urb(acm->ctrlurb);
594bail_out: 614bail_out:
595 usb_autopm_put_interface(acm->control); 615 usb_autopm_put_interface(acm->control);
596 acm->used--; 616 acm->port.count--;
597 mutex_unlock(&acm->mutex); 617 mutex_unlock(&acm->mutex);
598early_bail: 618early_bail:
599 mutex_unlock(&open_mutex); 619 mutex_unlock(&open_mutex);
620 tty_port_tty_set(&acm->port, NULL);
600 return -EIO; 621 return -EIO;
601} 622}
602 623
603static void acm_tty_unregister(struct acm *acm) 624static void acm_tty_unregister(struct acm *acm)
604{ 625{
605 int i,nr; 626 int i, nr;
606 627
607 nr = acm->rx_buflimit; 628 nr = acm->rx_buflimit;
608 tty_unregister_device(acm_tty_driver, acm->minor); 629 tty_unregister_device(acm_tty_driver, acm->minor);
@@ -619,41 +640,56 @@ static void acm_tty_unregister(struct acm *acm)
619 640
620static int acm_tty_chars_in_buffer(struct tty_struct *tty); 641static int acm_tty_chars_in_buffer(struct tty_struct *tty);
621 642
643static void acm_port_down(struct acm *acm, int drain)
644{
645 int i, nr = acm->rx_buflimit;
646 mutex_lock(&open_mutex);
647 if (acm->dev) {
648 usb_autopm_get_interface(acm->control);
649 acm_set_control(acm, acm->ctrlout = 0);
650 /* try letting the last writes drain naturally */
651 if (drain) {
652 wait_event_interruptible_timeout(acm->drain_wait,
653 (ACM_NW == acm_wb_is_avail(acm)) || !acm->dev,
654 ACM_CLOSE_TIMEOUT * HZ);
655 }
656 usb_kill_urb(acm->ctrlurb);
657 for (i = 0; i < ACM_NW; i++)
658 usb_kill_urb(acm->wb[i].urb);
659 for (i = 0; i < nr; i++)
660 usb_kill_urb(acm->ru[i].urb);
661 acm->control->needs_remote_wakeup = 0;
662 usb_autopm_put_interface(acm->control);
663 }
664 mutex_unlock(&open_mutex);
665}
666
667static void acm_tty_hangup(struct tty_struct *tty)
668{
669 struct acm *acm = tty->driver_data;
670 tty_port_hangup(&acm->port);
671 acm_port_down(acm, 0);
672}
673
622static void acm_tty_close(struct tty_struct *tty, struct file *filp) 674static void acm_tty_close(struct tty_struct *tty, struct file *filp)
623{ 675{
624 struct acm *acm = tty->driver_data; 676 struct acm *acm = tty->driver_data;
625 int i,nr;
626 677
627 if (!acm || !acm->used) 678 /* Perform the closing process and see if we need to do the hardware
679 shutdown */
680 if (tty_port_close_start(&acm->port, tty, filp) == 0)
628 return; 681 return;
629 682 acm_port_down(acm, 0);
630 nr = acm->rx_buflimit; 683 tty_port_close_end(&acm->port, tty);
631 mutex_lock(&open_mutex); 684 mutex_lock(&open_mutex);
632 if (!--acm->used) { 685 tty_port_tty_set(&acm->port, NULL);
633 if (acm->dev) { 686 if (!acm->dev)
634 usb_autopm_get_interface(acm->control); 687 acm_tty_unregister(acm);
635 acm_set_control(acm, acm->ctrlout = 0);
636
637 /* try letting the last writes drain naturally */
638 wait_event_interruptible_timeout(acm->drain_wait,
639 (ACM_NW == acm_wb_is_avail(acm))
640 || !acm->dev,
641 ACM_CLOSE_TIMEOUT * HZ);
642
643 usb_kill_urb(acm->ctrlurb);
644 for (i = 0; i < ACM_NW; i++)
645 usb_kill_urb(acm->wb[i].urb);
646 for (i = 0; i < nr; i++)
647 usb_kill_urb(acm->ru[i].urb);
648 acm->control->needs_remote_wakeup = 0;
649 usb_autopm_put_interface(acm->control);
650 } else
651 acm_tty_unregister(acm);
652 }
653 mutex_unlock(&open_mutex); 688 mutex_unlock(&open_mutex);
654} 689}
655 690
656static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) 691static int acm_tty_write(struct tty_struct *tty,
692 const unsigned char *buf, int count)
657{ 693{
658 struct acm *acm = tty->driver_data; 694 struct acm *acm = tty->driver_data;
659 int stat; 695 int stat;
@@ -669,7 +705,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c
669 return 0; 705 return 0;
670 706
671 spin_lock_irqsave(&acm->write_lock, flags); 707 spin_lock_irqsave(&acm->write_lock, flags);
672 if ((wbn = acm_wb_alloc(acm)) < 0) { 708 wbn = acm_wb_alloc(acm);
709 if (wbn < 0) {
673 spin_unlock_irqrestore(&acm->write_lock, flags); 710 spin_unlock_irqrestore(&acm->write_lock, flags);
674 return 0; 711 return 0;
675 } 712 }
@@ -681,7 +718,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c
681 wb->len = count; 718 wb->len = count;
682 spin_unlock_irqrestore(&acm->write_lock, flags); 719 spin_unlock_irqrestore(&acm->write_lock, flags);
683 720
684 if ((stat = acm_write_start(acm, wbn)) < 0) 721 stat = acm_write_start(acm, wbn);
722 if (stat < 0)
685 return stat; 723 return stat;
686 return count; 724 return count;
687} 725}
@@ -767,8 +805,10 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file,
767 return -EINVAL; 805 return -EINVAL;
768 806
769 newctrl = acm->ctrlout; 807 newctrl = acm->ctrlout;
770 set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (set & TIOCM_RTS ? ACM_CTRL_RTS : 0); 808 set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
771 clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0); 809 (set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
810 clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
811 (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
772 812
773 newctrl = (newctrl & ~clear) | set; 813 newctrl = (newctrl & ~clear) | set;
774 814
@@ -777,7 +817,8 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file,
777 return acm_set_control(acm, acm->ctrlout = newctrl); 817 return acm_set_control(acm, acm->ctrlout = newctrl);
778} 818}
779 819
780static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) 820static int acm_tty_ioctl(struct tty_struct *tty, struct file *file,
821 unsigned int cmd, unsigned long arg)
781{ 822{
782 struct acm *acm = tty->driver_data; 823 struct acm *acm = tty->driver_data;
783 824
@@ -799,7 +840,8 @@ static const __u8 acm_tty_size[] = {
799 5, 6, 7, 8 840 5, 6, 7, 8
800}; 841};
801 842
802static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios_old) 843static void acm_tty_set_termios(struct tty_struct *tty,
844 struct ktermios *termios_old)
803{ 845{
804 struct acm *acm = tty->driver_data; 846 struct acm *acm = tty->driver_data;
805 struct ktermios *termios = tty->termios; 847 struct ktermios *termios = tty->termios;
@@ -809,19 +851,23 @@ static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios
809 if (!ACM_READY(acm)) 851 if (!ACM_READY(acm))
810 return; 852 return;
811 853
854 /* FIXME: Needs to support the tty_baud interface */
855 /* FIXME: Broken on sparc */
812 newline.dwDTERate = cpu_to_le32p(acm_tty_speed + 856 newline.dwDTERate = cpu_to_le32p(acm_tty_speed +
813 (termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0)); 857 (termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0));
814 newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0; 858 newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0;
815 newline.bParityType = termios->c_cflag & PARENB ? 859 newline.bParityType = termios->c_cflag & PARENB ?
816 (termios->c_cflag & PARODD ? 1 : 2) + (termios->c_cflag & CMSPAR ? 2 : 0) : 0; 860 (termios->c_cflag & PARODD ? 1 : 2) +
861 (termios->c_cflag & CMSPAR ? 2 : 0) : 0;
817 newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4]; 862 newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4];
818 863 /* FIXME: Needs to clear unsupported bits in the termios */
819 acm->clocal = ((termios->c_cflag & CLOCAL) != 0); 864 acm->clocal = ((termios->c_cflag & CLOCAL) != 0);
820 865
821 if (!newline.dwDTERate) { 866 if (!newline.dwDTERate) {
822 newline.dwDTERate = acm->line.dwDTERate; 867 newline.dwDTERate = acm->line.dwDTERate;
823 newctrl &= ~ACM_CTRL_DTR; 868 newctrl &= ~ACM_CTRL_DTR;
824 } else newctrl |= ACM_CTRL_DTR; 869 } else
870 newctrl |= ACM_CTRL_DTR;
825 871
826 if (newctrl != acm->ctrlout) 872 if (newctrl != acm->ctrlout)
827 acm_set_control(acm, acm->ctrlout = newctrl); 873 acm_set_control(acm, acm->ctrlout = newctrl);
@@ -846,9 +892,8 @@ static void acm_write_buffers_free(struct acm *acm)
846 struct acm_wb *wb; 892 struct acm_wb *wb;
847 struct usb_device *usb_dev = interface_to_usbdev(acm->control); 893 struct usb_device *usb_dev = interface_to_usbdev(acm->control);
848 894
849 for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) { 895 for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++)
850 usb_buffer_free(usb_dev, acm->writesize, wb->buf, wb->dmah); 896 usb_buffer_free(usb_dev, acm->writesize, wb->buf, wb->dmah);
851 }
852} 897}
853 898
854static void acm_read_buffers_free(struct acm *acm) 899static void acm_read_buffers_free(struct acm *acm)
@@ -857,7 +902,8 @@ static void acm_read_buffers_free(struct acm *acm)
857 int i, n = acm->rx_buflimit; 902 int i, n = acm->rx_buflimit;
858 903
859 for (i = 0; i < n; i++) 904 for (i = 0; i < n; i++)
860 usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma); 905 usb_buffer_free(usb_dev, acm->readsize,
906 acm->rb[i].base, acm->rb[i].dma);
861} 907}
862 908
863/* Little helper: write buffers allocate */ 909/* Little helper: write buffers allocate */
@@ -882,8 +928,8 @@ static int acm_write_buffers_alloc(struct acm *acm)
882 return 0; 928 return 0;
883} 929}
884 930
885static int acm_probe (struct usb_interface *intf, 931static int acm_probe(struct usb_interface *intf,
886 const struct usb_device_id *id) 932 const struct usb_device_id *id)
887{ 933{
888 struct usb_cdc_union_desc *union_header = NULL; 934 struct usb_cdc_union_desc *union_header = NULL;
889 struct usb_cdc_country_functional_desc *cfd = NULL; 935 struct usb_cdc_country_functional_desc *cfd = NULL;
@@ -897,7 +943,7 @@ static int acm_probe (struct usb_interface *intf,
897 struct usb_device *usb_dev = interface_to_usbdev(intf); 943 struct usb_device *usb_dev = interface_to_usbdev(intf);
898 struct acm *acm; 944 struct acm *acm;
899 int minor; 945 int minor;
900 int ctrlsize,readsize; 946 int ctrlsize, readsize;
901 u8 *buf; 947 u8 *buf;
902 u8 ac_management_function = 0; 948 u8 ac_management_function = 0;
903 u8 call_management_function = 0; 949 u8 call_management_function = 0;
@@ -917,7 +963,7 @@ static int acm_probe (struct usb_interface *intf,
917 control_interface = usb_ifnum_to_if(usb_dev, 0); 963 control_interface = usb_ifnum_to_if(usb_dev, 0);
918 goto skip_normal_probe; 964 goto skip_normal_probe;
919 } 965 }
920 966
921 /* normal probing*/ 967 /* normal probing*/
922 if (!buffer) { 968 if (!buffer) {
923 dev_err(&intf->dev, "Weird descriptor references\n"); 969 dev_err(&intf->dev, "Weird descriptor references\n");
@@ -925,8 +971,10 @@ static int acm_probe (struct usb_interface *intf,
925 } 971 }
926 972
927 if (!buflen) { 973 if (!buflen) {
928 if (intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) { 974 if (intf->cur_altsetting->endpoint->extralen &&
929 dev_dbg(&intf->dev,"Seeking extra descriptors on endpoint\n"); 975 intf->cur_altsetting->endpoint->extra) {
976 dev_dbg(&intf->dev,
977 "Seeking extra descriptors on endpoint\n");
930 buflen = intf->cur_altsetting->endpoint->extralen; 978 buflen = intf->cur_altsetting->endpoint->extralen;
931 buffer = intf->cur_altsetting->endpoint->extra; 979 buffer = intf->cur_altsetting->endpoint->extra;
932 } else { 980 } else {
@@ -937,47 +985,43 @@ static int acm_probe (struct usb_interface *intf,
937 } 985 }
938 986
939 while (buflen > 0) { 987 while (buflen > 0) {
940 if (buffer [1] != USB_DT_CS_INTERFACE) { 988 if (buffer[1] != USB_DT_CS_INTERFACE) {
941 dev_err(&intf->dev, "skipping garbage\n"); 989 dev_err(&intf->dev, "skipping garbage\n");
942 goto next_desc; 990 goto next_desc;
943 } 991 }
944 992
945 switch (buffer [2]) { 993 switch (buffer[2]) {
946 case USB_CDC_UNION_TYPE: /* we've found it */ 994 case USB_CDC_UNION_TYPE: /* we've found it */
947 if (union_header) { 995 if (union_header) {
948 dev_err(&intf->dev, "More than one " 996 dev_err(&intf->dev, "More than one "
949 "union descriptor, " 997 "union descriptor, skipping ...\n");
950 "skipping ...\n"); 998 goto next_desc;
951 goto next_desc;
952 }
953 union_header = (struct usb_cdc_union_desc *)
954 buffer;
955 break;
956 case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
957 cfd = (struct usb_cdc_country_functional_desc *)buffer;
958 break;
959 case USB_CDC_HEADER_TYPE: /* maybe check version */
960 break; /* for now we ignore it */
961 case USB_CDC_ACM_TYPE:
962 ac_management_function = buffer[3];
963 break;
964 case USB_CDC_CALL_MANAGEMENT_TYPE:
965 call_management_function = buffer[3];
966 call_interface_num = buffer[4];
967 if ((call_management_function & 3) != 3)
968 dev_err(&intf->dev, "This device "
969 "cannot do calls on its own. "
970 "It is no modem.\n");
971 break;
972 default:
973 /* there are LOTS more CDC descriptors that
974 * could legitimately be found here.
975 */
976 dev_dbg(&intf->dev, "Ignoring descriptor: "
977 "type %02x, length %d\n",
978 buffer[2], buffer[0]);
979 break;
980 } 999 }
1000 union_header = (struct usb_cdc_union_desc *)buffer;
1001 break;
1002 case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
1003 cfd = (struct usb_cdc_country_functional_desc *)buffer;
1004 break;
1005 case USB_CDC_HEADER_TYPE: /* maybe check version */
1006 break; /* for now we ignore it */
1007 case USB_CDC_ACM_TYPE:
1008 ac_management_function = buffer[3];
1009 break;
1010 case USB_CDC_CALL_MANAGEMENT_TYPE:
1011 call_management_function = buffer[3];
1012 call_interface_num = buffer[4];
1013 if ((call_management_function & 3) != 3)
1014 dev_err(&intf->dev, "This device cannot do calls on its own. It is not a modem.\n");
1015 break;
1016 default:
1017 /* there are LOTS more CDC descriptors that
1018 * could legitimately be found here.
1019 */
1020 dev_dbg(&intf->dev, "Ignoring descriptor: "
1021 "type %02x, length %d\n",
1022 buffer[2], buffer[0]);
1023 break;
1024 }
981next_desc: 1025next_desc:
982 buflen -= buffer[0]; 1026 buflen -= buffer[0];
983 buffer += buffer[0]; 1027 buffer += buffer[0];
@@ -985,33 +1029,36 @@ next_desc:
985 1029
986 if (!union_header) { 1030 if (!union_header) {
987 if (call_interface_num > 0) { 1031 if (call_interface_num > 0) {
988 dev_dbg(&intf->dev,"No union descriptor, using call management descriptor\n"); 1032 dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n");
989 data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num)); 1033 data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
990 control_interface = intf; 1034 control_interface = intf;
991 } else { 1035 } else {
992 dev_dbg(&intf->dev,"No union descriptor, giving up\n"); 1036 dev_dbg(&intf->dev,
1037 "No union descriptor, giving up\n");
993 return -ENODEV; 1038 return -ENODEV;
994 } 1039 }
995 } else { 1040 } else {
996 control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0); 1041 control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0);
997 data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0)); 1042 data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0));
998 if (!control_interface || !data_interface) { 1043 if (!control_interface || !data_interface) {
999 dev_dbg(&intf->dev,"no interfaces\n"); 1044 dev_dbg(&intf->dev, "no interfaces\n");
1000 return -ENODEV; 1045 return -ENODEV;
1001 } 1046 }
1002 } 1047 }
1003 1048
1004 if (data_interface_num != call_interface_num) 1049 if (data_interface_num != call_interface_num)
1005 dev_dbg(&intf->dev,"Separate call control interface. That is not fully supported.\n"); 1050 dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n");
1006 1051
1007skip_normal_probe: 1052skip_normal_probe:
1008 1053
1009 /*workaround for switched interfaces */ 1054 /*workaround for switched interfaces */
1010 if (data_interface->cur_altsetting->desc.bInterfaceClass != CDC_DATA_INTERFACE_TYPE) { 1055 if (data_interface->cur_altsetting->desc.bInterfaceClass
1011 if (control_interface->cur_altsetting->desc.bInterfaceClass == CDC_DATA_INTERFACE_TYPE) { 1056 != CDC_DATA_INTERFACE_TYPE) {
1057 if (control_interface->cur_altsetting->desc.bInterfaceClass
1058 == CDC_DATA_INTERFACE_TYPE) {
1012 struct usb_interface *t; 1059 struct usb_interface *t;
1013 dev_dbg(&intf->dev,"Your device has switched interfaces.\n"); 1060 dev_dbg(&intf->dev,
1014 1061 "Your device has switched interfaces.\n");
1015 t = control_interface; 1062 t = control_interface;
1016 control_interface = data_interface; 1063 control_interface = data_interface;
1017 data_interface = t; 1064 data_interface = t;
@@ -1023,9 +1070,9 @@ skip_normal_probe:
1023 /* Accept probe requests only for the control interface */ 1070 /* Accept probe requests only for the control interface */
1024 if (intf != control_interface) 1071 if (intf != control_interface)
1025 return -ENODEV; 1072 return -ENODEV;
1026 1073
1027 if (usb_interface_claimed(data_interface)) { /* valid in this context */ 1074 if (usb_interface_claimed(data_interface)) { /* valid in this context */
1028 dev_dbg(&intf->dev,"The data interface isn't available\n"); 1075 dev_dbg(&intf->dev, "The data interface isn't available\n");
1029 return -EBUSY; 1076 return -EBUSY;
1030 } 1077 }
1031 1078
@@ -1042,8 +1089,8 @@ skip_normal_probe:
1042 if (!usb_endpoint_dir_in(epread)) { 1089 if (!usb_endpoint_dir_in(epread)) {
1043 /* descriptors are swapped */ 1090 /* descriptors are swapped */
1044 struct usb_endpoint_descriptor *t; 1091 struct usb_endpoint_descriptor *t;
1045 dev_dbg(&intf->dev,"The data interface has switched endpoints\n"); 1092 dev_dbg(&intf->dev,
1046 1093 "The data interface has switched endpoints\n");
1047 t = epread; 1094 t = epread;
1048 epread = epwrite; 1095 epread = epwrite;
1049 epwrite = t; 1096 epwrite = t;
@@ -1056,13 +1103,15 @@ skip_normal_probe:
1056 return -ENODEV; 1103 return -ENODEV;
1057 } 1104 }
1058 1105
1059 if (!(acm = kzalloc(sizeof(struct acm), GFP_KERNEL))) { 1106 acm = kzalloc(sizeof(struct acm), GFP_KERNEL);
1107 if (acm == NULL) {
1060 dev_dbg(&intf->dev, "out of memory (acm kzalloc)\n"); 1108 dev_dbg(&intf->dev, "out of memory (acm kzalloc)\n");
1061 goto alloc_fail; 1109 goto alloc_fail;
1062 } 1110 }
1063 1111
1064 ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize); 1112 ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize);
1065 readsize = le16_to_cpu(epread->wMaxPacketSize)* ( quirks == SINGLE_RX_URB ? 1 : 2); 1113 readsize = le16_to_cpu(epread->wMaxPacketSize) *
1114 (quirks == SINGLE_RX_URB ? 1 : 2);
1066 acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize) * 20; 1115 acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize) * 20;
1067 acm->control = control_interface; 1116 acm->control = control_interface;
1068 acm->data = data_interface; 1117 acm->data = data_interface;
@@ -1082,6 +1131,8 @@ skip_normal_probe:
1082 spin_lock_init(&acm->read_lock); 1131 spin_lock_init(&acm->read_lock);
1083 mutex_init(&acm->mutex); 1132 mutex_init(&acm->mutex);
1084 acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress); 1133 acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
1134 tty_port_init(&acm->port);
1135 acm->port.ops = &acm_port_ops;
1085 1136
1086 buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma); 1137 buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma);
1087 if (!buf) { 1138 if (!buf) {
@@ -1103,8 +1154,10 @@ skip_normal_probe:
1103 for (i = 0; i < num_rx_buf; i++) { 1154 for (i = 0; i < num_rx_buf; i++) {
1104 struct acm_ru *rcv = &(acm->ru[i]); 1155 struct acm_ru *rcv = &(acm->ru[i]);
1105 1156
1106 if (!(rcv->urb = usb_alloc_urb(0, GFP_KERNEL))) { 1157 rcv->urb = usb_alloc_urb(0, GFP_KERNEL);
1107 dev_dbg(&intf->dev, "out of memory (read urbs usb_alloc_urb)\n"); 1158 if (rcv->urb == NULL) {
1159 dev_dbg(&intf->dev,
1160 "out of memory (read urbs usb_alloc_urb)\n");
1108 goto alloc_fail7; 1161 goto alloc_fail7;
1109 } 1162 }
1110 1163
@@ -1117,26 +1170,29 @@ skip_normal_probe:
1117 rb->base = usb_buffer_alloc(acm->dev, readsize, 1170 rb->base = usb_buffer_alloc(acm->dev, readsize,
1118 GFP_KERNEL, &rb->dma); 1171 GFP_KERNEL, &rb->dma);
1119 if (!rb->base) { 1172 if (!rb->base) {
1120 dev_dbg(&intf->dev, "out of memory (read bufs usb_buffer_alloc)\n"); 1173 dev_dbg(&intf->dev,
1174 "out of memory (read bufs usb_buffer_alloc)\n");
1121 goto alloc_fail7; 1175 goto alloc_fail7;
1122 } 1176 }
1123 } 1177 }
1124 for(i = 0; i < ACM_NW; i++) 1178 for (i = 0; i < ACM_NW; i++) {
1125 {
1126 struct acm_wb *snd = &(acm->wb[i]); 1179 struct acm_wb *snd = &(acm->wb[i]);
1127 1180
1128 if (!(snd->urb = usb_alloc_urb(0, GFP_KERNEL))) { 1181 snd->urb = usb_alloc_urb(0, GFP_KERNEL);
1129 dev_dbg(&intf->dev, "out of memory (write urbs usb_alloc_urb)"); 1182 if (snd->urb == NULL) {
1183 dev_dbg(&intf->dev,
1184 "out of memory (write urbs usb_alloc_urb)");
1130 goto alloc_fail7; 1185 goto alloc_fail7;
1131 } 1186 }
1132 1187
1133 usb_fill_bulk_urb(snd->urb, usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress), 1188 usb_fill_bulk_urb(snd->urb, usb_dev,
1134 NULL, acm->writesize, acm_write_bulk, snd); 1189 usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
1190 NULL, acm->writesize, acm_write_bulk, snd);
1135 snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; 1191 snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
1136 snd->instance = acm; 1192 snd->instance = acm;
1137 } 1193 }
1138 1194
1139 usb_set_intfdata (intf, acm); 1195 usb_set_intfdata(intf, acm);
1140 1196
1141 i = device_create_file(&intf->dev, &dev_attr_bmCapabilities); 1197 i = device_create_file(&intf->dev, &dev_attr_bmCapabilities);
1142 if (i < 0) 1198 if (i < 0)
@@ -1147,7 +1203,8 @@ skip_normal_probe:
1147 if (!acm->country_codes) 1203 if (!acm->country_codes)
1148 goto skip_countries; 1204 goto skip_countries;
1149 acm->country_code_size = cfd->bLength - 4; 1205 acm->country_code_size = cfd->bLength - 4;
1150 memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, cfd->bLength - 4); 1206 memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0,
1207 cfd->bLength - 4);
1151 acm->country_rel_date = cfd->iCountryCodeRelDate; 1208 acm->country_rel_date = cfd->iCountryCodeRelDate;
1152 1209
1153 i = device_create_file(&intf->dev, &dev_attr_wCountryCodes); 1210 i = device_create_file(&intf->dev, &dev_attr_wCountryCodes);
@@ -1156,7 +1213,8 @@ skip_normal_probe:
1156 goto skip_countries; 1213 goto skip_countries;
1157 } 1214 }
1158 1215
1159 i = device_create_file(&intf->dev, &dev_attr_iCountryCodeRelDate); 1216 i = device_create_file(&intf->dev,
1217 &dev_attr_iCountryCodeRelDate);
1160 if (i < 0) { 1218 if (i < 0) {
1161 kfree(acm->country_codes); 1219 kfree(acm->country_codes);
1162 goto skip_countries; 1220 goto skip_countries;
@@ -1164,8 +1222,10 @@ skip_normal_probe:
1164 } 1222 }
1165 1223
1166skip_countries: 1224skip_countries:
1167 usb_fill_int_urb(acm->ctrlurb, usb_dev, usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress), 1225 usb_fill_int_urb(acm->ctrlurb, usb_dev,
1168 acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, epctrl->bInterval); 1226 usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
1227 acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm,
1228 epctrl->bInterval);
1169 acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; 1229 acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
1170 acm->ctrlurb->transfer_dma = acm->ctrl_dma; 1230 acm->ctrlurb->transfer_dma = acm->ctrl_dma;
1171 1231
@@ -1212,7 +1272,7 @@ static void stop_data_traffic(struct acm *acm)
1212 tasklet_disable(&acm->urb_task); 1272 tasklet_disable(&acm->urb_task);
1213 1273
1214 usb_kill_urb(acm->ctrlurb); 1274 usb_kill_urb(acm->ctrlurb);
1215 for(i = 0; i < ACM_NW; i++) 1275 for (i = 0; i < ACM_NW; i++)
1216 usb_kill_urb(acm->wb[i].urb); 1276 usb_kill_urb(acm->wb[i].urb);
1217 for (i = 0; i < acm->rx_buflimit; i++) 1277 for (i = 0; i < acm->rx_buflimit; i++)
1218 usb_kill_urb(acm->ru[i].urb); 1278 usb_kill_urb(acm->ru[i].urb);
@@ -1227,13 +1287,14 @@ static void acm_disconnect(struct usb_interface *intf)
1227{ 1287{
1228 struct acm *acm = usb_get_intfdata(intf); 1288 struct acm *acm = usb_get_intfdata(intf);
1229 struct usb_device *usb_dev = interface_to_usbdev(intf); 1289 struct usb_device *usb_dev = interface_to_usbdev(intf);
1290 struct tty_struct *tty;
1230 1291
1231 /* sibling interface is already cleaning up */ 1292 /* sibling interface is already cleaning up */
1232 if (!acm) 1293 if (!acm)
1233 return; 1294 return;
1234 1295
1235 mutex_lock(&open_mutex); 1296 mutex_lock(&open_mutex);
1236 if (acm->country_codes){ 1297 if (acm->country_codes) {
1237 device_remove_file(&acm->control->dev, 1298 device_remove_file(&acm->control->dev,
1238 &dev_attr_wCountryCodes); 1299 &dev_attr_wCountryCodes);
1239 device_remove_file(&acm->control->dev, 1300 device_remove_file(&acm->control->dev,
@@ -1247,22 +1308,25 @@ static void acm_disconnect(struct usb_interface *intf)
1247 stop_data_traffic(acm); 1308 stop_data_traffic(acm);
1248 1309
1249 acm_write_buffers_free(acm); 1310 acm_write_buffers_free(acm);
1250 usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma); 1311 usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer,
1312 acm->ctrl_dma);
1251 acm_read_buffers_free(acm); 1313 acm_read_buffers_free(acm);
1252 1314
1253 usb_driver_release_interface(&acm_driver, intf == acm->control ? 1315 usb_driver_release_interface(&acm_driver, intf == acm->control ?
1254 acm->data : acm->control); 1316 acm->data : acm->control);
1255 1317
1256 if (!acm->used) { 1318 if (acm->port.count == 0) {
1257 acm_tty_unregister(acm); 1319 acm_tty_unregister(acm);
1258 mutex_unlock(&open_mutex); 1320 mutex_unlock(&open_mutex);
1259 return; 1321 return;
1260 } 1322 }
1261 1323
1262 mutex_unlock(&open_mutex); 1324 mutex_unlock(&open_mutex);
1263 1325 tty = tty_port_tty_get(&acm->port);
1264 if (acm->tty) 1326 if (tty) {
1265 tty_hangup(acm->tty); 1327 tty_hangup(tty);
1328 tty_kref_put(tty);
1329 }
1266} 1330}
1267 1331
1268#ifdef CONFIG_PM 1332#ifdef CONFIG_PM
@@ -1297,7 +1361,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
1297 */ 1361 */
1298 mutex_lock(&acm->mutex); 1362 mutex_lock(&acm->mutex);
1299 1363
1300 if (acm->used) 1364 if (acm->port.count)
1301 stop_data_traffic(acm); 1365 stop_data_traffic(acm);
1302 1366
1303 mutex_unlock(&acm->mutex); 1367 mutex_unlock(&acm->mutex);
@@ -1319,7 +1383,7 @@ static int acm_resume(struct usb_interface *intf)
1319 return 0; 1383 return 0;
1320 1384
1321 mutex_lock(&acm->mutex); 1385 mutex_lock(&acm->mutex);
1322 if (acm->used) { 1386 if (acm->port.count) {
1323 rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO); 1387 rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO);
1324 if (rv < 0) 1388 if (rv < 0)
1325 goto err_out; 1389 goto err_out;
@@ -1398,7 +1462,7 @@ static struct usb_device_id acm_ids[] = {
1398 { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, 1462 { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
1399 USB_CDC_ACM_PROTO_AT_GSM) }, 1463 USB_CDC_ACM_PROTO_AT_GSM) },
1400 { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, 1464 { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
1401 USB_CDC_ACM_PROTO_AT_3G ) }, 1465 USB_CDC_ACM_PROTO_AT_3G) },
1402 { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, 1466 { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
1403 USB_CDC_ACM_PROTO_AT_CDMA) }, 1467 USB_CDC_ACM_PROTO_AT_CDMA) },
1404 1468
@@ -1406,7 +1470,7 @@ static struct usb_device_id acm_ids[] = {
1406 { } 1470 { }
1407}; 1471};
1408 1472
1409MODULE_DEVICE_TABLE (usb, acm_ids); 1473MODULE_DEVICE_TABLE(usb, acm_ids);
1410 1474
1411static struct usb_driver acm_driver = { 1475static struct usb_driver acm_driver = {
1412 .name = "cdc_acm", 1476 .name = "cdc_acm",
@@ -1429,6 +1493,7 @@ static struct usb_driver acm_driver = {
1429static const struct tty_operations acm_ops = { 1493static const struct tty_operations acm_ops = {
1430 .open = acm_tty_open, 1494 .open = acm_tty_open,
1431 .close = acm_tty_close, 1495 .close = acm_tty_close,
1496 .hangup = acm_tty_hangup,
1432 .write = acm_tty_write, 1497 .write = acm_tty_write,
1433 .write_room = acm_tty_write_room, 1498 .write_room = acm_tty_write_room,
1434 .ioctl = acm_tty_ioctl, 1499 .ioctl = acm_tty_ioctl,
@@ -1460,7 +1525,8 @@ static int __init acm_init(void)
1460 acm_tty_driver->subtype = SERIAL_TYPE_NORMAL, 1525 acm_tty_driver->subtype = SERIAL_TYPE_NORMAL,
1461 acm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; 1526 acm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
1462 acm_tty_driver->init_termios = tty_std_termios; 1527 acm_tty_driver->init_termios = tty_std_termios;
1463 acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; 1528 acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD |
1529 HUPCL | CLOCAL;
1464 tty_set_operations(acm_tty_driver, &acm_ops); 1530 tty_set_operations(acm_tty_driver, &acm_ops);
1465 1531
1466 retval = tty_register_driver(acm_tty_driver); 1532 retval = tty_register_driver(acm_tty_driver);
@@ -1492,7 +1558,7 @@ static void __exit acm_exit(void)
1492module_init(acm_init); 1558module_init(acm_init);
1493module_exit(acm_exit); 1559module_exit(acm_exit);
1494 1560
1495MODULE_AUTHOR( DRIVER_AUTHOR ); 1561MODULE_AUTHOR(DRIVER_AUTHOR);
1496MODULE_DESCRIPTION( DRIVER_DESC ); 1562MODULE_DESCRIPTION(DRIVER_DESC);
1497MODULE_LICENSE("GPL"); 1563MODULE_LICENSE("GPL");
1498MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR); 1564MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR);
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index 1f95e7aa1b66..4c3856420add 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -89,8 +89,8 @@ struct acm {
89 struct usb_device *dev; /* the corresponding usb device */ 89 struct usb_device *dev; /* the corresponding usb device */
90 struct usb_interface *control; /* control interface */ 90 struct usb_interface *control; /* control interface */
91 struct usb_interface *data; /* data interface */ 91 struct usb_interface *data; /* data interface */
92 struct tty_struct *tty; /* the corresponding tty */ 92 struct tty_port port; /* our tty port data */
93 struct urb *ctrlurb; /* urbs */ 93 struct urb *ctrlurb; /* urbs */
94 u8 *ctrl_buffer; /* buffers of urbs */ 94 u8 *ctrl_buffer; /* buffers of urbs */
95 dma_addr_t ctrl_dma; /* dma handles of buffers */ 95 dma_addr_t ctrl_dma; /* dma handles of buffers */
96 u8 *country_codes; /* country codes from device */ 96 u8 *country_codes; /* country codes from device */
@@ -120,7 +120,6 @@ struct acm {
120 unsigned int ctrlout; /* output control lines (DTR, RTS) */ 120 unsigned int ctrlout; /* output control lines (DTR, RTS) */
121 unsigned int writesize; /* max packet size for the output bulk endpoint */ 121 unsigned int writesize; /* max packet size for the output bulk endpoint */
122 unsigned int readsize,ctrlsize; /* buffer sizes for freeing */ 122 unsigned int readsize,ctrlsize; /* buffer sizes for freeing */
123 unsigned int used; /* someone has this acm's device open */
124 unsigned int minor; /* acm minor number */ 123 unsigned int minor; /* acm minor number */
125 unsigned char throttle; /* throttled by tty layer */ 124 unsigned char throttle; /* throttled by tty layer */
126 unsigned char clocal; /* termios CLOCAL */ 125 unsigned char clocal; /* termios CLOCAL */
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index b7eacad4d48c..2bfd6dd85b5a 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -93,8 +93,7 @@ static int belkin_sa_startup(struct usb_serial *serial);
93static void belkin_sa_shutdown(struct usb_serial *serial); 93static void belkin_sa_shutdown(struct usb_serial *serial);
94static int belkin_sa_open(struct tty_struct *tty, 94static int belkin_sa_open(struct tty_struct *tty,
95 struct usb_serial_port *port, struct file *filp); 95 struct usb_serial_port *port, struct file *filp);
96static void belkin_sa_close(struct tty_struct *tty, 96static void belkin_sa_close(struct usb_serial_port *port);
97 struct usb_serial_port *port, struct file *filp);
98static void belkin_sa_read_int_callback(struct urb *urb); 97static void belkin_sa_read_int_callback(struct urb *urb);
99static void belkin_sa_set_termios(struct tty_struct *tty, 98static void belkin_sa_set_termios(struct tty_struct *tty,
100 struct usb_serial_port *port, struct ktermios * old); 99 struct usb_serial_port *port, struct ktermios * old);
@@ -244,8 +243,7 @@ exit:
244} /* belkin_sa_open */ 243} /* belkin_sa_open */
245 244
246 245
247static void belkin_sa_close(struct tty_struct *tty, 246static void belkin_sa_close(struct usb_serial_port *port)
248 struct usb_serial_port *port, struct file *filp)
249{ 247{
250 dbg("%s port %d", __func__, port->number); 248 dbg("%s port %d", __func__, port->number);
251 249
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index ab4cc277aa65..2830766f5b39 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -262,32 +262,40 @@ error: kfree(priv);
262 return r; 262 return r;
263} 263}
264 264
265static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port, 265static int ch341_carrier_raised(struct usb_serial_port *port)
266 struct file *filp) 266{
267 struct ch341_private *priv = usb_get_serial_port_data(port);
268 if (priv->line_status & CH341_BIT_DCD)
269 return 1;
270 return 0;
271}
272
273static void ch341_dtr_rts(struct usb_serial_port *port, int on)
267{ 274{
268 struct ch341_private *priv = usb_get_serial_port_data(port); 275 struct ch341_private *priv = usb_get_serial_port_data(port);
269 unsigned long flags; 276 unsigned long flags;
270 unsigned int c_cflag;
271 277
272 dbg("%s - port %d", __func__, port->number); 278 dbg("%s - port %d", __func__, port->number);
279 /* drop DTR and RTS */
280 spin_lock_irqsave(&priv->lock, flags);
281 if (on)
282 priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR;
283 else
284 priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR);
285 spin_unlock_irqrestore(&priv->lock, flags);
286 ch341_set_handshake(port->serial->dev, priv->line_control);
287 wake_up_interruptible(&priv->delta_msr_wait);
288}
289
290static void ch341_close(struct usb_serial_port *port)
291{
292 dbg("%s - port %d", __func__, port->number);
273 293
274 /* shutdown our urbs */ 294 /* shutdown our urbs */
275 dbg("%s - shutting down urbs", __func__); 295 dbg("%s - shutting down urbs", __func__);
276 usb_kill_urb(port->write_urb); 296 usb_kill_urb(port->write_urb);
277 usb_kill_urb(port->read_urb); 297 usb_kill_urb(port->read_urb);
278 usb_kill_urb(port->interrupt_in_urb); 298 usb_kill_urb(port->interrupt_in_urb);
279
280 if (tty) {
281 c_cflag = tty->termios->c_cflag;
282 if (c_cflag & HUPCL) {
283 /* drop DTR and RTS */
284 spin_lock_irqsave(&priv->lock, flags);
285 priv->line_control = 0;
286 spin_unlock_irqrestore(&priv->lock, flags);
287 ch341_set_handshake(port->serial->dev, 0);
288 }
289 }
290 wake_up_interruptible(&priv->delta_msr_wait);
291} 299}
292 300
293 301
@@ -302,7 +310,6 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
302 dbg("ch341_open()"); 310 dbg("ch341_open()");
303 311
304 priv->baud_rate = DEFAULT_BAUD_RATE; 312 priv->baud_rate = DEFAULT_BAUD_RATE;
305 priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
306 313
307 r = ch341_configure(serial->dev, priv); 314 r = ch341_configure(serial->dev, priv);
308 if (r) 315 if (r)
@@ -322,7 +329,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
322 if (r) { 329 if (r) {
323 dev_err(&port->dev, "%s - failed submitting interrupt urb," 330 dev_err(&port->dev, "%s - failed submitting interrupt urb,"
324 " error %d\n", __func__, r); 331 " error %d\n", __func__, r);
325 ch341_close(tty, port, NULL); 332 ch341_close(port);
326 return -EPROTO; 333 return -EPROTO;
327 } 334 }
328 335
@@ -343,9 +350,6 @@ static void ch341_set_termios(struct tty_struct *tty,
343 350
344 dbg("ch341_set_termios()"); 351 dbg("ch341_set_termios()");
345 352
346 if (!tty || !tty->termios)
347 return;
348
349 baud_rate = tty_get_baud_rate(tty); 353 baud_rate = tty_get_baud_rate(tty);
350 354
351 priv->baud_rate = baud_rate; 355 priv->baud_rate = baud_rate;
@@ -568,6 +572,8 @@ static struct usb_serial_driver ch341_device = {
568 .usb_driver = &ch341_driver, 572 .usb_driver = &ch341_driver,
569 .num_ports = 1, 573 .num_ports = 1,
570 .open = ch341_open, 574 .open = ch341_open,
575 .dtr_rts = ch341_dtr_rts,
576 .carrier_raised = ch341_carrier_raised,
571 .close = ch341_close, 577 .close = ch341_close,
572 .ioctl = ch341_ioctl, 578 .ioctl = ch341_ioctl,
573 .set_termios = ch341_set_termios, 579 .set_termios = ch341_set_termios,
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 19e24045b137..247b61bfb7f4 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -169,7 +169,9 @@ static int usb_console_setup(struct console *co, char *options)
169 kfree(tty); 169 kfree(tty);
170 } 170 }
171 } 171 }
172 172 /* So we know not to kill the hardware on a hangup on this
173 port. We have also bumped the use count by one so it won't go
174 idle */
173 port->console = 1; 175 port->console = 1;
174 retval = 0; 176 retval = 0;
175 177
@@ -182,7 +184,7 @@ free_tty:
182 kfree(tty); 184 kfree(tty);
183reset_open_count: 185reset_open_count:
184 port->port.count = 0; 186 port->port.count = 0;
185goto out; 187 goto out;
186} 188}
187 189
188static void usb_console_write(struct console *co, 190static void usb_console_write(struct console *co,
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index e8d5133ce9c8..16a154d3b2fe 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Silicon Laboratories CP2101/CP2102 USB to RS232 serial adaptor driver 2 * Silicon Laboratories CP210x USB to RS232 serial adaptor driver
3 * 3 *
4 * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk) 4 * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk)
5 * 5 *
@@ -27,44 +27,46 @@
27/* 27/*
28 * Version Information 28 * Version Information
29 */ 29 */
30#define DRIVER_VERSION "v0.08" 30#define DRIVER_VERSION "v0.09"
31#define DRIVER_DESC "Silicon Labs CP2101/CP2102 RS232 serial adaptor driver" 31#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver"
32 32
33/* 33/*
34 * Function Prototypes 34 * Function Prototypes
35 */ 35 */
36static int cp2101_open(struct tty_struct *, struct usb_serial_port *, 36static int cp210x_open(struct tty_struct *, struct usb_serial_port *,
37 struct file *); 37 struct file *);
38static void cp2101_cleanup(struct usb_serial_port *); 38static void cp210x_cleanup(struct usb_serial_port *);
39static void cp2101_close(struct tty_struct *, struct usb_serial_port *, 39static void cp210x_close(struct usb_serial_port *);
40 struct file*); 40static void cp210x_get_termios(struct tty_struct *,
41static void cp2101_get_termios(struct tty_struct *,
42 struct usb_serial_port *port); 41 struct usb_serial_port *port);
43static void cp2101_get_termios_port(struct usb_serial_port *port, 42static void cp210x_get_termios_port(struct usb_serial_port *port,
44 unsigned int *cflagp, unsigned int *baudp); 43 unsigned int *cflagp, unsigned int *baudp);
45static void cp2101_set_termios(struct tty_struct *, struct usb_serial_port *, 44static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *,
46 struct ktermios*); 45 struct ktermios*);
47static int cp2101_tiocmget(struct tty_struct *, struct file *); 46static int cp210x_tiocmget(struct tty_struct *, struct file *);
48static int cp2101_tiocmset(struct tty_struct *, struct file *, 47static int cp210x_tiocmset(struct tty_struct *, struct file *,
49 unsigned int, unsigned int); 48 unsigned int, unsigned int);
50static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *, 49static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *,
51 unsigned int, unsigned int); 50 unsigned int, unsigned int);
52static void cp2101_break_ctl(struct tty_struct *, int); 51static void cp210x_break_ctl(struct tty_struct *, int);
53static int cp2101_startup(struct usb_serial *); 52static int cp210x_startup(struct usb_serial *);
54static void cp2101_shutdown(struct usb_serial *); 53static void cp210x_shutdown(struct usb_serial *);
55 54
56static int debug; 55static int debug;
57 56
58static struct usb_device_id id_table [] = { 57static struct usb_device_id id_table [] = {
59 { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ 58 { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */
60 { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ 59 { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
60 { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
61 { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ 61 { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
62 { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
62 { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */ 63 { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */
63 { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */ 64 { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */
64 { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */ 65 { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */
65 { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */ 66 { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */
66 { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */ 67 { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */
67 { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */ 68 { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */
69 { USB_DEVICE(0x10C4, 0x0F91) }, /* Vstabi */
68 { USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */ 70 { USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */
69 { USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */ 71 { USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */
70 { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */ 72 { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */
@@ -85,10 +87,12 @@ static struct usb_device_id id_table [] = {
85 { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */ 87 { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
86 { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */ 88 { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
87 { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */ 89 { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
90 { USB_DEVICE(0x10C4, 0x81F2) }, /* C1007 HF band RFID controller */
88 { USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */ 91 { USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */
89 { USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */ 92 { USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */
90 { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demostration module */ 93 { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demostration module */
91 { USB_DEVICE(0x10c4, 0x8293) }, /* Telegesys ETRX2USB */ 94 { USB_DEVICE(0x10c4, 0x8293) }, /* Telegesys ETRX2USB */
95 { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
92 { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ 96 { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
93 { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */ 97 { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */
94 { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */ 98 { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
@@ -99,7 +103,9 @@ static struct usb_device_id id_table [] = {
99 { USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */ 103 { USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */
100 { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ 104 { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */
101 { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ 105 { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */
106 { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
102 { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ 107 { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
108 { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
103 { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */ 109 { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
104 { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */ 110 { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
105 { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ 111 { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
@@ -108,53 +114,70 @@ static struct usb_device_id id_table [] = {
108 114
109MODULE_DEVICE_TABLE(usb, id_table); 115MODULE_DEVICE_TABLE(usb, id_table);
110 116
111static struct usb_driver cp2101_driver = { 117static struct usb_driver cp210x_driver = {
112 .name = "cp2101", 118 .name = "cp210x",
113 .probe = usb_serial_probe, 119 .probe = usb_serial_probe,
114 .disconnect = usb_serial_disconnect, 120 .disconnect = usb_serial_disconnect,
115 .id_table = id_table, 121 .id_table = id_table,
116 .no_dynamic_id = 1, 122 .no_dynamic_id = 1,
117}; 123};
118 124
119static struct usb_serial_driver cp2101_device = { 125static struct usb_serial_driver cp210x_device = {
120 .driver = { 126 .driver = {
121 .owner = THIS_MODULE, 127 .owner = THIS_MODULE,
122 .name = "cp2101", 128 .name = "cp210x",
123 }, 129 },
124 .usb_driver = &cp2101_driver, 130 .usb_driver = &cp210x_driver,
125 .id_table = id_table, 131 .id_table = id_table,
126 .num_ports = 1, 132 .num_ports = 1,
127 .open = cp2101_open, 133 .open = cp210x_open,
128 .close = cp2101_close, 134 .close = cp210x_close,
129 .break_ctl = cp2101_break_ctl, 135 .break_ctl = cp210x_break_ctl,
130 .set_termios = cp2101_set_termios, 136 .set_termios = cp210x_set_termios,
131 .tiocmget = cp2101_tiocmget, 137 .tiocmget = cp210x_tiocmget,
132 .tiocmset = cp2101_tiocmset, 138 .tiocmset = cp210x_tiocmset,
133 .attach = cp2101_startup, 139 .attach = cp210x_startup,
134 .shutdown = cp2101_shutdown, 140 .shutdown = cp210x_shutdown,
135}; 141};
136 142
137/* Config request types */ 143/* Config request types */
138#define REQTYPE_HOST_TO_DEVICE 0x41 144#define REQTYPE_HOST_TO_DEVICE 0x41
139#define REQTYPE_DEVICE_TO_HOST 0xc1 145#define REQTYPE_DEVICE_TO_HOST 0xc1
140 146
141/* Config SET requests. To GET, add 1 to the request number */ 147/* Config request codes */
142#define CP2101_UART 0x00 /* Enable / Disable */ 148#define CP210X_IFC_ENABLE 0x00
143#define CP2101_BAUDRATE 0x01 /* (BAUD_RATE_GEN_FREQ / baudrate) */ 149#define CP210X_SET_BAUDDIV 0x01
144#define CP2101_BITS 0x03 /* 0x(0)(databits)(parity)(stopbits) */ 150#define CP210X_GET_BAUDDIV 0x02
145#define CP2101_BREAK 0x05 /* On / Off */ 151#define CP210X_SET_LINE_CTL 0x03
146#define CP2101_CONTROL 0x07 /* Flow control line states */ 152#define CP210X_GET_LINE_CTL 0x04
147#define CP2101_MODEMCTL 0x13 /* Modem controls */ 153#define CP210X_SET_BREAK 0x05
148#define CP2101_CONFIG_6 0x19 /* 6 bytes of config data ??? */ 154#define CP210X_IMM_CHAR 0x06
149 155#define CP210X_SET_MHS 0x07
150/* CP2101_UART */ 156#define CP210X_GET_MDMSTS 0x08
157#define CP210X_SET_XON 0x09
158#define CP210X_SET_XOFF 0x0A
159#define CP210X_SET_EVENTMASK 0x0B
160#define CP210X_GET_EVENTMASK 0x0C
161#define CP210X_SET_CHAR 0x0D
162#define CP210X_GET_CHARS 0x0E
163#define CP210X_GET_PROPS 0x0F
164#define CP210X_GET_COMM_STATUS 0x10
165#define CP210X_RESET 0x11
166#define CP210X_PURGE 0x12
167#define CP210X_SET_FLOW 0x13
168#define CP210X_GET_FLOW 0x14
169#define CP210X_EMBED_EVENTS 0x15
170#define CP210X_GET_EVENTSTATE 0x16
171#define CP210X_SET_CHARS 0x19
172
173/* CP210X_IFC_ENABLE */
151#define UART_ENABLE 0x0001 174#define UART_ENABLE 0x0001
152#define UART_DISABLE 0x0000 175#define UART_DISABLE 0x0000
153 176
154/* CP2101_BAUDRATE */ 177/* CP210X_(SET|GET)_BAUDDIV */
155#define BAUD_RATE_GEN_FREQ 0x384000 178#define BAUD_RATE_GEN_FREQ 0x384000
156 179
157/* CP2101_BITS */ 180/* CP210X_(SET|GET)_LINE_CTL */
158#define BITS_DATA_MASK 0X0f00 181#define BITS_DATA_MASK 0X0f00
159#define BITS_DATA_5 0X0500 182#define BITS_DATA_5 0X0500
160#define BITS_DATA_6 0X0600 183#define BITS_DATA_6 0X0600
@@ -174,11 +197,11 @@ static struct usb_serial_driver cp2101_device = {
174#define BITS_STOP_1_5 0x0001 197#define BITS_STOP_1_5 0x0001
175#define BITS_STOP_2 0x0002 198#define BITS_STOP_2 0x0002
176 199
177/* CP2101_BREAK */ 200/* CP210X_SET_BREAK */
178#define BREAK_ON 0x0000 201#define BREAK_ON 0x0000
179#define BREAK_OFF 0x0001 202#define BREAK_OFF 0x0001
180 203
181/* CP2101_CONTROL */ 204/* CP210X_(SET_MHS|GET_MDMSTS) */
182#define CONTROL_DTR 0x0001 205#define CONTROL_DTR 0x0001
183#define CONTROL_RTS 0x0002 206#define CONTROL_RTS 0x0002
184#define CONTROL_CTS 0x0010 207#define CONTROL_CTS 0x0010
@@ -189,13 +212,13 @@ static struct usb_serial_driver cp2101_device = {
189#define CONTROL_WRITE_RTS 0x0200 212#define CONTROL_WRITE_RTS 0x0200
190 213
191/* 214/*
192 * cp2101_get_config 215 * cp210x_get_config
193 * Reads from the CP2101 configuration registers 216 * Reads from the CP210x configuration registers
194 * 'size' is specified in bytes. 217 * 'size' is specified in bytes.
195 * 'data' is a pointer to a pre-allocated array of integers large 218 * 'data' is a pointer to a pre-allocated array of integers large
196 * enough to hold 'size' bytes (with 4 bytes to each integer) 219 * enough to hold 'size' bytes (with 4 bytes to each integer)
197 */ 220 */
198static int cp2101_get_config(struct usb_serial_port *port, u8 request, 221static int cp210x_get_config(struct usb_serial_port *port, u8 request,
199 unsigned int *data, int size) 222 unsigned int *data, int size)
200{ 223{
201 struct usb_serial *serial = port->serial; 224 struct usb_serial *serial = port->serial;
@@ -211,9 +234,6 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request,
211 return -ENOMEM; 234 return -ENOMEM;
212 } 235 }
213 236
214 /* For get requests, the request number must be incremented */
215 request++;
216
217 /* Issue the request, attempting to read 'size' bytes */ 237 /* Issue the request, attempting to read 'size' bytes */
218 result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), 238 result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
219 request, REQTYPE_DEVICE_TO_HOST, 0x0000, 239 request, REQTYPE_DEVICE_TO_HOST, 0x0000,
@@ -236,12 +256,12 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request,
236} 256}
237 257
238/* 258/*
239 * cp2101_set_config 259 * cp210x_set_config
240 * Writes to the CP2101 configuration registers 260 * Writes to the CP210x configuration registers
241 * Values less than 16 bits wide are sent directly 261 * Values less than 16 bits wide are sent directly
242 * 'size' is specified in bytes. 262 * 'size' is specified in bytes.
243 */ 263 */
244static int cp2101_set_config(struct usb_serial_port *port, u8 request, 264static int cp210x_set_config(struct usb_serial_port *port, u8 request,
245 unsigned int *data, int size) 265 unsigned int *data, int size)
246{ 266{
247 struct usb_serial *serial = port->serial; 267 struct usb_serial *serial = port->serial;
@@ -292,21 +312,21 @@ static int cp2101_set_config(struct usb_serial_port *port, u8 request,
292} 312}
293 313
294/* 314/*
295 * cp2101_set_config_single 315 * cp210x_set_config_single
296 * Convenience function for calling cp2101_set_config on single data values 316 * Convenience function for calling cp210x_set_config on single data values
297 * without requiring an integer pointer 317 * without requiring an integer pointer
298 */ 318 */
299static inline int cp2101_set_config_single(struct usb_serial_port *port, 319static inline int cp210x_set_config_single(struct usb_serial_port *port,
300 u8 request, unsigned int data) 320 u8 request, unsigned int data)
301{ 321{
302 return cp2101_set_config(port, request, &data, 2); 322 return cp210x_set_config(port, request, &data, 2);
303} 323}
304 324
305/* 325/*
306 * cp2101_quantise_baudrate 326 * cp210x_quantise_baudrate
307 * Quantises the baud rate as per AN205 Table 1 327 * Quantises the baud rate as per AN205 Table 1
308 */ 328 */
309static unsigned int cp2101_quantise_baudrate(unsigned int baud) { 329static unsigned int cp210x_quantise_baudrate(unsigned int baud) {
310 if (baud <= 56) baud = 0; 330 if (baud <= 56) baud = 0;
311 else if (baud <= 300) baud = 300; 331 else if (baud <= 300) baud = 300;
312 else if (baud <= 600) baud = 600; 332 else if (baud <= 600) baud = 600;
@@ -343,7 +363,7 @@ static unsigned int cp2101_quantise_baudrate(unsigned int baud) {
343 return baud; 363 return baud;
344} 364}
345 365
346static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port, 366static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port,
347 struct file *filp) 367 struct file *filp)
348{ 368{
349 struct usb_serial *serial = port->serial; 369 struct usb_serial *serial = port->serial;
@@ -351,7 +371,7 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
351 371
352 dbg("%s - port %d", __func__, port->number); 372 dbg("%s - port %d", __func__, port->number);
353 373
354 if (cp2101_set_config_single(port, CP2101_UART, UART_ENABLE)) { 374 if (cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) {
355 dev_err(&port->dev, "%s - Unable to enable UART\n", 375 dev_err(&port->dev, "%s - Unable to enable UART\n",
356 __func__); 376 __func__);
357 return -EPROTO; 377 return -EPROTO;
@@ -373,17 +393,17 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
373 } 393 }
374 394
375 /* Configure the termios structure */ 395 /* Configure the termios structure */
376 cp2101_get_termios(tty, port); 396 cp210x_get_termios(tty, port);
377 397
378 /* Set the DTR and RTS pins low */ 398 /* Set the DTR and RTS pins low */
379 cp2101_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data 399 cp210x_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
380 : port, 400 : port,
381 NULL, TIOCM_DTR | TIOCM_RTS, 0); 401 NULL, TIOCM_DTR | TIOCM_RTS, 0);
382 402
383 return 0; 403 return 0;
384} 404}
385 405
386static void cp2101_cleanup(struct usb_serial_port *port) 406static void cp210x_cleanup(struct usb_serial_port *port)
387{ 407{
388 struct usb_serial *serial = port->serial; 408 struct usb_serial *serial = port->serial;
389 409
@@ -398,8 +418,7 @@ static void cp2101_cleanup(struct usb_serial_port *port)
398 } 418 }
399} 419}
400 420
401static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port, 421static void cp210x_close(struct usb_serial_port *port)
402 struct file *filp)
403{ 422{
404 dbg("%s - port %d", __func__, port->number); 423 dbg("%s - port %d", __func__, port->number);
405 424
@@ -410,23 +429,23 @@ static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port,
410 429
411 mutex_lock(&port->serial->disc_mutex); 430 mutex_lock(&port->serial->disc_mutex);
412 if (!port->serial->disconnected) 431 if (!port->serial->disconnected)
413 cp2101_set_config_single(port, CP2101_UART, UART_DISABLE); 432 cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE);
414 mutex_unlock(&port->serial->disc_mutex); 433 mutex_unlock(&port->serial->disc_mutex);
415} 434}
416 435
417/* 436/*
418 * cp2101_get_termios 437 * cp210x_get_termios
419 * Reads the baud rate, data bits, parity, stop bits and flow control mode 438 * Reads the baud rate, data bits, parity, stop bits and flow control mode
420 * from the device, corrects any unsupported values, and configures the 439 * from the device, corrects any unsupported values, and configures the
421 * termios structure to reflect the state of the device 440 * termios structure to reflect the state of the device
422 */ 441 */
423static void cp2101_get_termios(struct tty_struct *tty, 442static void cp210x_get_termios(struct tty_struct *tty,
424 struct usb_serial_port *port) 443 struct usb_serial_port *port)
425{ 444{
426 unsigned int baud; 445 unsigned int baud;
427 446
428 if (tty) { 447 if (tty) {
429 cp2101_get_termios_port(tty->driver_data, 448 cp210x_get_termios_port(tty->driver_data,
430 &tty->termios->c_cflag, &baud); 449 &tty->termios->c_cflag, &baud);
431 tty_encode_baud_rate(tty, baud, baud); 450 tty_encode_baud_rate(tty, baud, baud);
432 } 451 }
@@ -434,15 +453,15 @@ static void cp2101_get_termios(struct tty_struct *tty,
434 else { 453 else {
435 unsigned int cflag; 454 unsigned int cflag;
436 cflag = 0; 455 cflag = 0;
437 cp2101_get_termios_port(port, &cflag, &baud); 456 cp210x_get_termios_port(port, &cflag, &baud);
438 } 457 }
439} 458}
440 459
441/* 460/*
442 * cp2101_get_termios_port 461 * cp210x_get_termios_port
443 * This is the heart of cp2101_get_termios which always uses a &usb_serial_port. 462 * This is the heart of cp210x_get_termios which always uses a &usb_serial_port.
444 */ 463 */
445static void cp2101_get_termios_port(struct usb_serial_port *port, 464static void cp210x_get_termios_port(struct usb_serial_port *port,
446 unsigned int *cflagp, unsigned int *baudp) 465 unsigned int *cflagp, unsigned int *baudp)
447{ 466{
448 unsigned int cflag, modem_ctl[4]; 467 unsigned int cflag, modem_ctl[4];
@@ -451,17 +470,17 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
451 470
452 dbg("%s - port %d", __func__, port->number); 471 dbg("%s - port %d", __func__, port->number);
453 472
454 cp2101_get_config(port, CP2101_BAUDRATE, &baud, 2); 473 cp210x_get_config(port, CP210X_GET_BAUDDIV, &baud, 2);
455 /* Convert to baudrate */ 474 /* Convert to baudrate */
456 if (baud) 475 if (baud)
457 baud = cp2101_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud); 476 baud = cp210x_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
458 477
459 dbg("%s - baud rate = %d", __func__, baud); 478 dbg("%s - baud rate = %d", __func__, baud);
460 *baudp = baud; 479 *baudp = baud;
461 480
462 cflag = *cflagp; 481 cflag = *cflagp;
463 482
464 cp2101_get_config(port, CP2101_BITS, &bits, 2); 483 cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
465 cflag &= ~CSIZE; 484 cflag &= ~CSIZE;
466 switch (bits & BITS_DATA_MASK) { 485 switch (bits & BITS_DATA_MASK) {
467 case BITS_DATA_5: 486 case BITS_DATA_5:
@@ -486,14 +505,14 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
486 cflag |= CS8; 505 cflag |= CS8;
487 bits &= ~BITS_DATA_MASK; 506 bits &= ~BITS_DATA_MASK;
488 bits |= BITS_DATA_8; 507 bits |= BITS_DATA_8;
489 cp2101_set_config(port, CP2101_BITS, &bits, 2); 508 cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
490 break; 509 break;
491 default: 510 default:
492 dbg("%s - Unknown number of data bits, using 8", __func__); 511 dbg("%s - Unknown number of data bits, using 8", __func__);
493 cflag |= CS8; 512 cflag |= CS8;
494 bits &= ~BITS_DATA_MASK; 513 bits &= ~BITS_DATA_MASK;
495 bits |= BITS_DATA_8; 514 bits |= BITS_DATA_8;
496 cp2101_set_config(port, CP2101_BITS, &bits, 2); 515 cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
497 break; 516 break;
498 } 517 }
499 518
@@ -516,20 +535,20 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
516 __func__); 535 __func__);
517 cflag &= ~PARENB; 536 cflag &= ~PARENB;
518 bits &= ~BITS_PARITY_MASK; 537 bits &= ~BITS_PARITY_MASK;
519 cp2101_set_config(port, CP2101_BITS, &bits, 2); 538 cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
520 break; 539 break;
521 case BITS_PARITY_SPACE: 540 case BITS_PARITY_SPACE:
522 dbg("%s - parity = SPACE (not supported, disabling parity)", 541 dbg("%s - parity = SPACE (not supported, disabling parity)",
523 __func__); 542 __func__);
524 cflag &= ~PARENB; 543 cflag &= ~PARENB;
525 bits &= ~BITS_PARITY_MASK; 544 bits &= ~BITS_PARITY_MASK;
526 cp2101_set_config(port, CP2101_BITS, &bits, 2); 545 cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
527 break; 546 break;
528 default: 547 default:
529 dbg("%s - Unknown parity mode, disabling parity", __func__); 548 dbg("%s - Unknown parity mode, disabling parity", __func__);
530 cflag &= ~PARENB; 549 cflag &= ~PARENB;
531 bits &= ~BITS_PARITY_MASK; 550 bits &= ~BITS_PARITY_MASK;
532 cp2101_set_config(port, CP2101_BITS, &bits, 2); 551 cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
533 break; 552 break;
534 } 553 }
535 554
@@ -542,7 +561,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
542 dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)", 561 dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)",
543 __func__); 562 __func__);
544 bits &= ~BITS_STOP_MASK; 563 bits &= ~BITS_STOP_MASK;
545 cp2101_set_config(port, CP2101_BITS, &bits, 2); 564 cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
546 break; 565 break;
547 case BITS_STOP_2: 566 case BITS_STOP_2:
548 dbg("%s - stop bits = 2", __func__); 567 dbg("%s - stop bits = 2", __func__);
@@ -552,11 +571,11 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
552 dbg("%s - Unknown number of stop bits, using 1 stop bit", 571 dbg("%s - Unknown number of stop bits, using 1 stop bit",
553 __func__); 572 __func__);
554 bits &= ~BITS_STOP_MASK; 573 bits &= ~BITS_STOP_MASK;
555 cp2101_set_config(port, CP2101_BITS, &bits, 2); 574 cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
556 break; 575 break;
557 } 576 }
558 577
559 cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16); 578 cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
560 if (modem_ctl[0] & 0x0008) { 579 if (modem_ctl[0] & 0x0008) {
561 dbg("%s - flow control = CRTSCTS", __func__); 580 dbg("%s - flow control = CRTSCTS", __func__);
562 cflag |= CRTSCTS; 581 cflag |= CRTSCTS;
@@ -568,7 +587,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
568 *cflagp = cflag; 587 *cflagp = cflag;
569} 588}
570 589
571static void cp2101_set_termios(struct tty_struct *tty, 590static void cp210x_set_termios(struct tty_struct *tty,
572 struct usb_serial_port *port, struct ktermios *old_termios) 591 struct usb_serial_port *port, struct ktermios *old_termios)
573{ 592{
574 unsigned int cflag, old_cflag; 593 unsigned int cflag, old_cflag;
@@ -583,13 +602,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
583 tty->termios->c_cflag &= ~CMSPAR; 602 tty->termios->c_cflag &= ~CMSPAR;
584 cflag = tty->termios->c_cflag; 603 cflag = tty->termios->c_cflag;
585 old_cflag = old_termios->c_cflag; 604 old_cflag = old_termios->c_cflag;
586 baud = cp2101_quantise_baudrate(tty_get_baud_rate(tty)); 605 baud = cp210x_quantise_baudrate(tty_get_baud_rate(tty));
587 606
588 /* If the baud rate is to be updated*/ 607 /* If the baud rate is to be updated*/
589 if (baud != tty_termios_baud_rate(old_termios) && baud != 0) { 608 if (baud != tty_termios_baud_rate(old_termios) && baud != 0) {
590 dbg("%s - Setting baud rate to %d baud", __func__, 609 dbg("%s - Setting baud rate to %d baud", __func__,
591 baud); 610 baud);
592 if (cp2101_set_config_single(port, CP2101_BAUDRATE, 611 if (cp210x_set_config_single(port, CP210X_SET_BAUDDIV,
593 ((BAUD_RATE_GEN_FREQ + baud/2) / baud))) { 612 ((BAUD_RATE_GEN_FREQ + baud/2) / baud))) {
594 dbg("Baud rate requested not supported by device\n"); 613 dbg("Baud rate requested not supported by device\n");
595 baud = tty_termios_baud_rate(old_termios); 614 baud = tty_termios_baud_rate(old_termios);
@@ -600,7 +619,7 @@ static void cp2101_set_termios(struct tty_struct *tty,
600 619
601 /* If the number of data bits is to be updated */ 620 /* If the number of data bits is to be updated */
602 if ((cflag & CSIZE) != (old_cflag & CSIZE)) { 621 if ((cflag & CSIZE) != (old_cflag & CSIZE)) {
603 cp2101_get_config(port, CP2101_BITS, &bits, 2); 622 cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
604 bits &= ~BITS_DATA_MASK; 623 bits &= ~BITS_DATA_MASK;
605 switch (cflag & CSIZE) { 624 switch (cflag & CSIZE) {
606 case CS5: 625 case CS5:
@@ -624,19 +643,19 @@ static void cp2101_set_termios(struct tty_struct *tty,
624 dbg("%s - data bits = 9", __func__); 643 dbg("%s - data bits = 9", __func__);
625 break;*/ 644 break;*/
626 default: 645 default:
627 dbg("cp2101 driver does not " 646 dbg("cp210x driver does not "
628 "support the number of bits requested," 647 "support the number of bits requested,"
629 " using 8 bit mode\n"); 648 " using 8 bit mode\n");
630 bits |= BITS_DATA_8; 649 bits |= BITS_DATA_8;
631 break; 650 break;
632 } 651 }
633 if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) 652 if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
634 dbg("Number of data bits requested " 653 dbg("Number of data bits requested "
635 "not supported by device\n"); 654 "not supported by device\n");
636 } 655 }
637 656
638 if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) { 657 if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) {
639 cp2101_get_config(port, CP2101_BITS, &bits, 2); 658 cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
640 bits &= ~BITS_PARITY_MASK; 659 bits &= ~BITS_PARITY_MASK;
641 if (cflag & PARENB) { 660 if (cflag & PARENB) {
642 if (cflag & PARODD) { 661 if (cflag & PARODD) {
@@ -647,13 +666,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
647 dbg("%s - parity = EVEN", __func__); 666 dbg("%s - parity = EVEN", __func__);
648 } 667 }
649 } 668 }
650 if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) 669 if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
651 dbg("Parity mode not supported " 670 dbg("Parity mode not supported "
652 "by device\n"); 671 "by device\n");
653 } 672 }
654 673
655 if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) { 674 if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) {
656 cp2101_get_config(port, CP2101_BITS, &bits, 2); 675 cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
657 bits &= ~BITS_STOP_MASK; 676 bits &= ~BITS_STOP_MASK;
658 if (cflag & CSTOPB) { 677 if (cflag & CSTOPB) {
659 bits |= BITS_STOP_2; 678 bits |= BITS_STOP_2;
@@ -662,13 +681,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
662 bits |= BITS_STOP_1; 681 bits |= BITS_STOP_1;
663 dbg("%s - stop bits = 1", __func__); 682 dbg("%s - stop bits = 1", __func__);
664 } 683 }
665 if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) 684 if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
666 dbg("Number of stop bits requested " 685 dbg("Number of stop bits requested "
667 "not supported by device\n"); 686 "not supported by device\n");
668 } 687 }
669 688
670 if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) { 689 if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) {
671 cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16); 690 cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
672 dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x", 691 dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
673 __func__, modem_ctl[0], modem_ctl[1], 692 __func__, modem_ctl[0], modem_ctl[1],
674 modem_ctl[2], modem_ctl[3]); 693 modem_ctl[2], modem_ctl[3]);
@@ -688,19 +707,19 @@ static void cp2101_set_termios(struct tty_struct *tty,
688 dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x", 707 dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
689 __func__, modem_ctl[0], modem_ctl[1], 708 __func__, modem_ctl[0], modem_ctl[1],
690 modem_ctl[2], modem_ctl[3]); 709 modem_ctl[2], modem_ctl[3]);
691 cp2101_set_config(port, CP2101_MODEMCTL, modem_ctl, 16); 710 cp210x_set_config(port, CP210X_SET_FLOW, modem_ctl, 16);
692 } 711 }
693 712
694} 713}
695 714
696static int cp2101_tiocmset (struct tty_struct *tty, struct file *file, 715static int cp210x_tiocmset (struct tty_struct *tty, struct file *file,
697 unsigned int set, unsigned int clear) 716 unsigned int set, unsigned int clear)
698{ 717{
699 struct usb_serial_port *port = tty->driver_data; 718 struct usb_serial_port *port = tty->driver_data;
700 return cp2101_tiocmset_port(port, file, set, clear); 719 return cp210x_tiocmset_port(port, file, set, clear);
701} 720}
702 721
703static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file, 722static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *file,
704 unsigned int set, unsigned int clear) 723 unsigned int set, unsigned int clear)
705{ 724{
706 unsigned int control = 0; 725 unsigned int control = 0;
@@ -726,10 +745,10 @@ static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file,
726 745
727 dbg("%s - control = 0x%.4x", __func__, control); 746 dbg("%s - control = 0x%.4x", __func__, control);
728 747
729 return cp2101_set_config(port, CP2101_CONTROL, &control, 2); 748 return cp210x_set_config(port, CP210X_SET_MHS, &control, 2);
730} 749}
731 750
732static int cp2101_tiocmget (struct tty_struct *tty, struct file *file) 751static int cp210x_tiocmget (struct tty_struct *tty, struct file *file)
733{ 752{
734 struct usb_serial_port *port = tty->driver_data; 753 struct usb_serial_port *port = tty->driver_data;
735 unsigned int control; 754 unsigned int control;
@@ -737,7 +756,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
737 756
738 dbg("%s - port %d", __func__, port->number); 757 dbg("%s - port %d", __func__, port->number);
739 758
740 cp2101_get_config(port, CP2101_CONTROL, &control, 1); 759 cp210x_get_config(port, CP210X_GET_MDMSTS, &control, 1);
741 760
742 result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0) 761 result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0)
743 |((control & CONTROL_RTS) ? TIOCM_RTS : 0) 762 |((control & CONTROL_RTS) ? TIOCM_RTS : 0)
@@ -751,7 +770,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
751 return result; 770 return result;
752} 771}
753 772
754static void cp2101_break_ctl (struct tty_struct *tty, int break_state) 773static void cp210x_break_ctl (struct tty_struct *tty, int break_state)
755{ 774{
756 struct usb_serial_port *port = tty->driver_data; 775 struct usb_serial_port *port = tty->driver_data;
757 unsigned int state; 776 unsigned int state;
@@ -763,17 +782,17 @@ static void cp2101_break_ctl (struct tty_struct *tty, int break_state)
763 state = BREAK_ON; 782 state = BREAK_ON;
764 dbg("%s - turning break %s", __func__, 783 dbg("%s - turning break %s", __func__,
765 state == BREAK_OFF ? "off" : "on"); 784 state == BREAK_OFF ? "off" : "on");
766 cp2101_set_config(port, CP2101_BREAK, &state, 2); 785 cp210x_set_config(port, CP210X_SET_BREAK, &state, 2);
767} 786}
768 787
769static int cp2101_startup(struct usb_serial *serial) 788static int cp210x_startup(struct usb_serial *serial)
770{ 789{
771 /* CP2101 buffers behave strangely unless device is reset */ 790 /* cp210x buffers behave strangely unless device is reset */
772 usb_reset_device(serial->dev); 791 usb_reset_device(serial->dev);
773 return 0; 792 return 0;
774} 793}
775 794
776static void cp2101_shutdown(struct usb_serial *serial) 795static void cp210x_shutdown(struct usb_serial *serial)
777{ 796{
778 int i; 797 int i;
779 798
@@ -781,21 +800,21 @@ static void cp2101_shutdown(struct usb_serial *serial)
781 800
782 /* Stop reads and writes on all ports */ 801 /* Stop reads and writes on all ports */
783 for (i = 0; i < serial->num_ports; ++i) 802 for (i = 0; i < serial->num_ports; ++i)
784 cp2101_cleanup(serial->port[i]); 803 cp210x_cleanup(serial->port[i]);
785} 804}
786 805
787static int __init cp2101_init(void) 806static int __init cp210x_init(void)
788{ 807{
789 int retval; 808 int retval;
790 809
791 retval = usb_serial_register(&cp2101_device); 810 retval = usb_serial_register(&cp210x_device);
792 if (retval) 811 if (retval)
793 return retval; /* Failed to register */ 812 return retval; /* Failed to register */
794 813
795 retval = usb_register(&cp2101_driver); 814 retval = usb_register(&cp210x_driver);
796 if (retval) { 815 if (retval) {
797 /* Failed to register */ 816 /* Failed to register */
798 usb_serial_deregister(&cp2101_device); 817 usb_serial_deregister(&cp210x_device);
799 return retval; 818 return retval;
800 } 819 }
801 820
@@ -805,14 +824,14 @@ static int __init cp2101_init(void)
805 return 0; 824 return 0;
806} 825}
807 826
808static void __exit cp2101_exit(void) 827static void __exit cp210x_exit(void)
809{ 828{
810 usb_deregister(&cp2101_driver); 829 usb_deregister(&cp210x_driver);
811 usb_serial_deregister(&cp2101_device); 830 usb_serial_deregister(&cp210x_device);
812} 831}
813 832
814module_init(cp2101_init); 833module_init(cp210x_init);
815module_exit(cp2101_exit); 834module_exit(cp210x_exit);
816 835
817MODULE_DESCRIPTION(DRIVER_DESC); 836MODULE_DESCRIPTION(DRIVER_DESC);
818MODULE_VERSION(DRIVER_VERSION); 837MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index dd501bb63ed6..933ba913e66c 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -61,8 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial);
61static void cyberjack_shutdown(struct usb_serial *serial); 61static void cyberjack_shutdown(struct usb_serial *serial);
62static int cyberjack_open(struct tty_struct *tty, 62static int cyberjack_open(struct tty_struct *tty,
63 struct usb_serial_port *port, struct file *filp); 63 struct usb_serial_port *port, struct file *filp);
64static void cyberjack_close(struct tty_struct *tty, 64static void cyberjack_close(struct usb_serial_port *port);
65 struct usb_serial_port *port, struct file *filp);
66static int cyberjack_write(struct tty_struct *tty, 65static int cyberjack_write(struct tty_struct *tty,
67 struct usb_serial_port *port, const unsigned char *buf, int count); 66 struct usb_serial_port *port, const unsigned char *buf, int count);
68static int cyberjack_write_room(struct tty_struct *tty); 67static int cyberjack_write_room(struct tty_struct *tty);
@@ -185,8 +184,7 @@ static int cyberjack_open(struct tty_struct *tty,
185 return result; 184 return result;
186} 185}
187 186
188static void cyberjack_close(struct tty_struct *tty, 187static void cyberjack_close(struct usb_serial_port *port)
189 struct usb_serial_port *port, struct file *filp)
190{ 188{
191 dbg("%s - port %d", __func__, port->number); 189 dbg("%s - port %d", __func__, port->number);
192 190
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index e568710b263f..669f93848539 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -174,8 +174,8 @@ static int cypress_ca42v2_startup(struct usb_serial *serial);
174static void cypress_shutdown(struct usb_serial *serial); 174static void cypress_shutdown(struct usb_serial *serial);
175static int cypress_open(struct tty_struct *tty, 175static int cypress_open(struct tty_struct *tty,
176 struct usb_serial_port *port, struct file *filp); 176 struct usb_serial_port *port, struct file *filp);
177static void cypress_close(struct tty_struct *tty, 177static void cypress_close(struct usb_serial_port *port);
178 struct usb_serial_port *port, struct file *filp); 178static void cypress_dtr_rts(struct usb_serial_port *port, int on);
179static int cypress_write(struct tty_struct *tty, struct usb_serial_port *port, 179static int cypress_write(struct tty_struct *tty, struct usb_serial_port *port,
180 const unsigned char *buf, int count); 180 const unsigned char *buf, int count);
181static void cypress_send(struct usb_serial_port *port); 181static void cypress_send(struct usb_serial_port *port);
@@ -218,6 +218,7 @@ static struct usb_serial_driver cypress_earthmate_device = {
218 .shutdown = cypress_shutdown, 218 .shutdown = cypress_shutdown,
219 .open = cypress_open, 219 .open = cypress_open,
220 .close = cypress_close, 220 .close = cypress_close,
221 .dtr_rts = cypress_dtr_rts,
221 .write = cypress_write, 222 .write = cypress_write,
222 .write_room = cypress_write_room, 223 .write_room = cypress_write_room,
223 .ioctl = cypress_ioctl, 224 .ioctl = cypress_ioctl,
@@ -244,6 +245,7 @@ static struct usb_serial_driver cypress_hidcom_device = {
244 .shutdown = cypress_shutdown, 245 .shutdown = cypress_shutdown,
245 .open = cypress_open, 246 .open = cypress_open,
246 .close = cypress_close, 247 .close = cypress_close,
248 .dtr_rts = cypress_dtr_rts,
247 .write = cypress_write, 249 .write = cypress_write,
248 .write_room = cypress_write_room, 250 .write_room = cypress_write_room,
249 .ioctl = cypress_ioctl, 251 .ioctl = cypress_ioctl,
@@ -270,6 +272,7 @@ static struct usb_serial_driver cypress_ca42v2_device = {
270 .shutdown = cypress_shutdown, 272 .shutdown = cypress_shutdown,
271 .open = cypress_open, 273 .open = cypress_open,
272 .close = cypress_close, 274 .close = cypress_close,
275 .dtr_rts = cypress_dtr_rts,
273 .write = cypress_write, 276 .write = cypress_write,
274 .write_room = cypress_write_room, 277 .write_room = cypress_write_room,
275 .ioctl = cypress_ioctl, 278 .ioctl = cypress_ioctl,
@@ -656,11 +659,7 @@ static int cypress_open(struct tty_struct *tty,
656 priv->rx_flags = 0; 659 priv->rx_flags = 0;
657 spin_unlock_irqrestore(&priv->lock, flags); 660 spin_unlock_irqrestore(&priv->lock, flags);
658 661
659 /* raise both lines and set termios */ 662 /* Set termios */
660 spin_lock_irqsave(&priv->lock, flags);
661 priv->line_control = CONTROL_DTR | CONTROL_RTS;
662 priv->cmd_ctrl = 1;
663 spin_unlock_irqrestore(&priv->lock, flags);
664 result = cypress_write(tty, port, NULL, 0); 663 result = cypress_write(tty, port, NULL, 0);
665 664
666 if (result) { 665 if (result) {
@@ -694,76 +693,42 @@ static int cypress_open(struct tty_struct *tty,
694 __func__, result); 693 __func__, result);
695 cypress_set_dead(port); 694 cypress_set_dead(port);
696 } 695 }
697 696 port->port.drain_delay = 256;
698 return result; 697 return result;
699} /* cypress_open */ 698} /* cypress_open */
700 699
700static void cypress_dtr_rts(struct usb_serial_port *port, int on)
701{
702 struct cypress_private *priv = usb_get_serial_port_data(port);
703 /* drop dtr and rts */
704 priv = usb_get_serial_port_data(port);
705 spin_lock_irq(&priv->lock);
706 if (on == 0)
707 priv->line_control = 0;
708 else
709 priv->line_control = CONTROL_DTR | CONTROL_RTS;
710 priv->cmd_ctrl = 1;
711 spin_unlock_irq(&priv->lock);
712 cypress_write(NULL, port, NULL, 0);
713}
701 714
702static void cypress_close(struct tty_struct *tty, 715static void cypress_close(struct usb_serial_port *port)
703 struct usb_serial_port *port, struct file *filp)
704{ 716{
705 struct cypress_private *priv = usb_get_serial_port_data(port); 717 struct cypress_private *priv = usb_get_serial_port_data(port);
706 unsigned int c_cflag;
707 int bps;
708 long timeout;
709 wait_queue_t wait;
710 718
711 dbg("%s - port %d", __func__, port->number); 719 dbg("%s - port %d", __func__, port->number);
712 720
713 /* wait for data to drain from buffer */
714 spin_lock_irq(&priv->lock);
715 timeout = CYPRESS_CLOSING_WAIT;
716 init_waitqueue_entry(&wait, current);
717 add_wait_queue(&tty->write_wait, &wait);
718 for (;;) {
719 set_current_state(TASK_INTERRUPTIBLE);
720 if (cypress_buf_data_avail(priv->buf) == 0
721 || timeout == 0 || signal_pending(current)
722 /* without mutex, allowed due to harmless failure mode */
723 || port->serial->disconnected)
724 break;
725 spin_unlock_irq(&priv->lock);
726 timeout = schedule_timeout(timeout);
727 spin_lock_irq(&priv->lock);
728 }
729 set_current_state(TASK_RUNNING);
730 remove_wait_queue(&tty->write_wait, &wait);
731 /* clear out any remaining data in the buffer */
732 cypress_buf_clear(priv->buf);
733 spin_unlock_irq(&priv->lock);
734
735 /* writing is potentially harmful, lock must be taken */ 721 /* writing is potentially harmful, lock must be taken */
736 mutex_lock(&port->serial->disc_mutex); 722 mutex_lock(&port->serial->disc_mutex);
737 if (port->serial->disconnected) { 723 if (port->serial->disconnected) {
738 mutex_unlock(&port->serial->disc_mutex); 724 mutex_unlock(&port->serial->disc_mutex);
739 return; 725 return;
740 } 726 }
741 /* wait for characters to drain from device */ 727 cypress_buf_clear(priv->buf);
742 if (tty) {
743 bps = tty_get_baud_rate(tty);
744 if (bps > 1200)
745 timeout = max((HZ * 2560) / bps, HZ / 10);
746 else
747 timeout = 2 * HZ;
748 schedule_timeout_interruptible(timeout);
749 }
750
751 dbg("%s - stopping urbs", __func__); 728 dbg("%s - stopping urbs", __func__);
752 usb_kill_urb(port->interrupt_in_urb); 729 usb_kill_urb(port->interrupt_in_urb);
753 usb_kill_urb(port->interrupt_out_urb); 730 usb_kill_urb(port->interrupt_out_urb);
754 731
755 if (tty) {
756 c_cflag = tty->termios->c_cflag;
757 if (c_cflag & HUPCL) {
758 /* drop dtr and rts */
759 priv = usb_get_serial_port_data(port);
760 spin_lock_irq(&priv->lock);
761 priv->line_control = 0;
762 priv->cmd_ctrl = 1;
763 spin_unlock_irq(&priv->lock);
764 cypress_write(tty, port, NULL, 0);
765 }
766 }
767 732
768 if (stats) 733 if (stats)
769 dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n", 734 dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n",
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 38ba4ea8b6bf..30f5140eff03 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -422,7 +422,6 @@ struct digi_port {
422 int dp_throttled; 422 int dp_throttled;
423 int dp_throttle_restart; 423 int dp_throttle_restart;
424 wait_queue_head_t dp_flush_wait; 424 wait_queue_head_t dp_flush_wait;
425 int dp_in_close; /* close in progress */
426 wait_queue_head_t dp_close_wait; /* wait queue for close */ 425 wait_queue_head_t dp_close_wait; /* wait queue for close */
427 struct work_struct dp_wakeup_work; 426 struct work_struct dp_wakeup_work;
428 struct usb_serial_port *dp_port; 427 struct usb_serial_port *dp_port;
@@ -456,8 +455,9 @@ static int digi_write_room(struct tty_struct *tty);
456static int digi_chars_in_buffer(struct tty_struct *tty); 455static int digi_chars_in_buffer(struct tty_struct *tty);
457static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, 456static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
458 struct file *filp); 457 struct file *filp);
459static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, 458static void digi_close(struct usb_serial_port *port);
460 struct file *filp); 459static int digi_carrier_raised(struct usb_serial_port *port);
460static void digi_dtr_rts(struct usb_serial_port *port, int on);
461static int digi_startup_device(struct usb_serial *serial); 461static int digi_startup_device(struct usb_serial *serial);
462static int digi_startup(struct usb_serial *serial); 462static int digi_startup(struct usb_serial *serial);
463static void digi_shutdown(struct usb_serial *serial); 463static void digi_shutdown(struct usb_serial *serial);
@@ -510,6 +510,8 @@ static struct usb_serial_driver digi_acceleport_2_device = {
510 .num_ports = 3, 510 .num_ports = 3,
511 .open = digi_open, 511 .open = digi_open,
512 .close = digi_close, 512 .close = digi_close,
513 .dtr_rts = digi_dtr_rts,
514 .carrier_raised = digi_carrier_raised,
513 .write = digi_write, 515 .write = digi_write,
514 .write_room = digi_write_room, 516 .write_room = digi_write_room,
515 .write_bulk_callback = digi_write_bulk_callback, 517 .write_bulk_callback = digi_write_bulk_callback,
@@ -1328,6 +1330,19 @@ static int digi_chars_in_buffer(struct tty_struct *tty)
1328 1330
1329} 1331}
1330 1332
1333static void digi_dtr_rts(struct usb_serial_port *port, int on)
1334{
1335 /* Adjust DTR and RTS */
1336 digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1);
1337}
1338
1339static int digi_carrier_raised(struct usb_serial_port *port)
1340{
1341 struct digi_port *priv = usb_get_serial_port_data(port);
1342 if (priv->dp_modem_signals & TIOCM_CD)
1343 return 1;
1344 return 0;
1345}
1331 1346
1332static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, 1347static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
1333 struct file *filp) 1348 struct file *filp)
@@ -1336,7 +1351,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
1336 unsigned char buf[32]; 1351 unsigned char buf[32];
1337 struct digi_port *priv = usb_get_serial_port_data(port); 1352 struct digi_port *priv = usb_get_serial_port_data(port);
1338 struct ktermios not_termios; 1353 struct ktermios not_termios;
1339 unsigned long flags = 0;
1340 1354
1341 dbg("digi_open: TOP: port=%d, open_count=%d", 1355 dbg("digi_open: TOP: port=%d, open_count=%d",
1342 priv->dp_port_num, port->port.count); 1356 priv->dp_port_num, port->port.count);
@@ -1345,26 +1359,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
1345 if (digi_startup_device(port->serial) != 0) 1359 if (digi_startup_device(port->serial) != 0)
1346 return -ENXIO; 1360 return -ENXIO;
1347 1361
1348 spin_lock_irqsave(&priv->dp_port_lock, flags);
1349
1350 /* don't wait on a close in progress for non-blocking opens */
1351 if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) {
1352 spin_unlock_irqrestore(&priv->dp_port_lock, flags);
1353 return -EAGAIN;
1354 }
1355
1356 /* wait for a close in progress to finish */
1357 while (priv->dp_in_close) {
1358 cond_wait_interruptible_timeout_irqrestore(
1359 &priv->dp_close_wait, DIGI_RETRY_TIMEOUT,
1360 &priv->dp_port_lock, flags);
1361 if (signal_pending(current))
1362 return -EINTR;
1363 spin_lock_irqsave(&priv->dp_port_lock, flags);
1364 }
1365
1366 spin_unlock_irqrestore(&priv->dp_port_lock, flags);
1367
1368 /* read modem signals automatically whenever they change */ 1362 /* read modem signals automatically whenever they change */
1369 buf[0] = DIGI_CMD_READ_INPUT_SIGNALS; 1363 buf[0] = DIGI_CMD_READ_INPUT_SIGNALS;
1370 buf[1] = priv->dp_port_num; 1364 buf[1] = priv->dp_port_num;
@@ -1387,16 +1381,11 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
1387 not_termios.c_iflag = ~tty->termios->c_iflag; 1381 not_termios.c_iflag = ~tty->termios->c_iflag;
1388 digi_set_termios(tty, port, &not_termios); 1382 digi_set_termios(tty, port, &not_termios);
1389 } 1383 }
1390
1391 /* set DTR and RTS */
1392 digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1);
1393
1394 return 0; 1384 return 0;
1395} 1385}
1396 1386
1397 1387
1398static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, 1388static void digi_close(struct usb_serial_port *port)
1399 struct file *filp)
1400{ 1389{
1401 DEFINE_WAIT(wait); 1390 DEFINE_WAIT(wait);
1402 int ret; 1391 int ret;
@@ -1411,28 +1400,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
1411 if (port->serial->disconnected) 1400 if (port->serial->disconnected)
1412 goto exit; 1401 goto exit;
1413 1402
1414 /* do cleanup only after final close on this port */
1415 spin_lock_irq(&priv->dp_port_lock);
1416 priv->dp_in_close = 1;
1417 spin_unlock_irq(&priv->dp_port_lock);
1418
1419 /* tell line discipline to process only XON/XOFF */
1420 tty->closing = 1;
1421
1422 /* wait for output to drain */
1423 if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
1424 tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT);
1425
1426 /* flush driver and line discipline buffers */
1427 tty_driver_flush_buffer(tty);
1428 tty_ldisc_flush(tty);
1429
1430 if (port->serial->dev) { 1403 if (port->serial->dev) {
1431 /* wait for transmit idle */ 1404 /* FIXME: Transmit idle belongs in the wait_unti_sent path */
1432 if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) 1405 digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
1433 digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
1434 /* drop DTR and RTS */
1435 digi_set_modem_signals(port, 0, 0);
1436 1406
1437 /* disable input flow control */ 1407 /* disable input flow control */
1438 buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL; 1408 buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL;
@@ -1477,11 +1447,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
1477 /* shutdown any outstanding bulk writes */ 1447 /* shutdown any outstanding bulk writes */
1478 usb_kill_urb(port->write_urb); 1448 usb_kill_urb(port->write_urb);
1479 } 1449 }
1480 tty->closing = 0;
1481exit: 1450exit:
1482 spin_lock_irq(&priv->dp_port_lock); 1451 spin_lock_irq(&priv->dp_port_lock);
1483 priv->dp_write_urb_in_use = 0; 1452 priv->dp_write_urb_in_use = 0;
1484 priv->dp_in_close = 0;
1485 wake_up_interruptible(&priv->dp_close_wait); 1453 wake_up_interruptible(&priv->dp_close_wait);
1486 spin_unlock_irq(&priv->dp_port_lock); 1454 spin_unlock_irq(&priv->dp_port_lock);
1487 mutex_unlock(&port->serial->disc_mutex); 1455 mutex_unlock(&port->serial->disc_mutex);
@@ -1560,7 +1528,6 @@ static int digi_startup(struct usb_serial *serial)
1560 priv->dp_throttled = 0; 1528 priv->dp_throttled = 0;
1561 priv->dp_throttle_restart = 0; 1529 priv->dp_throttle_restart = 0;
1562 init_waitqueue_head(&priv->dp_flush_wait); 1530 init_waitqueue_head(&priv->dp_flush_wait);
1563 priv->dp_in_close = 0;
1564 init_waitqueue_head(&priv->dp_close_wait); 1531 init_waitqueue_head(&priv->dp_close_wait);
1565 INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock); 1532 INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock);
1566 priv->dp_port = serial->port[i]; 1533 priv->dp_port = serial->port[i];
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index c709ec474a80..2b141ccb0cd9 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -81,8 +81,7 @@ static int debug;
81/* function prototypes for an empeg-car player */ 81/* function prototypes for an empeg-car player */
82static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port, 82static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
83 struct file *filp); 83 struct file *filp);
84static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port, 84static void empeg_close(struct usb_serial_port *port);
85 struct file *filp);
86static int empeg_write(struct tty_struct *tty, struct usb_serial_port *port, 85static int empeg_write(struct tty_struct *tty, struct usb_serial_port *port,
87 const unsigned char *buf, 86 const unsigned char *buf,
88 int count); 87 int count);
@@ -181,8 +180,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
181} 180}
182 181
183 182
184static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port, 183static void empeg_close(struct usb_serial_port *port)
185 struct file *filp)
186{ 184{
187 dbg("%s - port %d", __func__, port->number); 185 dbg("%s - port %d", __func__, port->number);
188 186
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d9fcdaedf389..683304d60615 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -89,6 +89,7 @@ struct ftdi_private {
89 int force_rtscts; /* if non-zero, force RTS-CTS to always 89 int force_rtscts; /* if non-zero, force RTS-CTS to always
90 be enabled */ 90 be enabled */
91 91
92 unsigned int latency; /* latency setting in use */
92 spinlock_t tx_lock; /* spinlock for transmit state */ 93 spinlock_t tx_lock; /* spinlock for transmit state */
93 unsigned long tx_bytes; 94 unsigned long tx_bytes;
94 unsigned long tx_outstanding_bytes; 95 unsigned long tx_outstanding_bytes;
@@ -719,8 +720,8 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port);
719static int ftdi_sio_port_remove(struct usb_serial_port *port); 720static int ftdi_sio_port_remove(struct usb_serial_port *port);
720static int ftdi_open(struct tty_struct *tty, 721static int ftdi_open(struct tty_struct *tty,
721 struct usb_serial_port *port, struct file *filp); 722 struct usb_serial_port *port, struct file *filp);
722static void ftdi_close(struct tty_struct *tty, 723static void ftdi_close(struct usb_serial_port *port);
723 struct usb_serial_port *port, struct file *filp); 724static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
724static int ftdi_write(struct tty_struct *tty, struct usb_serial_port *port, 725static int ftdi_write(struct tty_struct *tty, struct usb_serial_port *port,
725 const unsigned char *buf, int count); 726 const unsigned char *buf, int count);
726static int ftdi_write_room(struct tty_struct *tty); 727static int ftdi_write_room(struct tty_struct *tty);
@@ -758,6 +759,7 @@ static struct usb_serial_driver ftdi_sio_device = {
758 .port_remove = ftdi_sio_port_remove, 759 .port_remove = ftdi_sio_port_remove,
759 .open = ftdi_open, 760 .open = ftdi_open,
760 .close = ftdi_close, 761 .close = ftdi_close,
762 .dtr_rts = ftdi_dtr_rts,
761 .throttle = ftdi_throttle, 763 .throttle = ftdi_throttle,
762 .unthrottle = ftdi_unthrottle, 764 .unthrottle = ftdi_unthrottle,
763 .write = ftdi_write, 765 .write = ftdi_write,
@@ -1037,7 +1039,54 @@ static int change_speed(struct tty_struct *tty, struct usb_serial_port *port)
1037 return rv; 1039 return rv;
1038} 1040}
1039 1041
1042static int write_latency_timer(struct usb_serial_port *port)
1043{
1044 struct ftdi_private *priv = usb_get_serial_port_data(port);
1045 struct usb_device *udev = port->serial->dev;
1046 char buf[1];
1047 int rv = 0;
1048 int l = priv->latency;
1049
1050 if (priv->flags & ASYNC_LOW_LATENCY)
1051 l = 1;
1052
1053 dbg("%s: setting latency timer = %i", __func__, l);
1054
1055 rv = usb_control_msg(udev,
1056 usb_sndctrlpipe(udev, 0),
1057 FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
1058 FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
1059 l, priv->interface,
1060 buf, 0, WDR_TIMEOUT);
1061
1062 if (rv < 0)
1063 dev_err(&port->dev, "Unable to write latency timer: %i\n", rv);
1064 return rv;
1065}
1066
1067static int read_latency_timer(struct usb_serial_port *port)
1068{
1069 struct ftdi_private *priv = usb_get_serial_port_data(port);
1070 struct usb_device *udev = port->serial->dev;
1071 unsigned short latency = 0;
1072 int rv = 0;
1073
1040 1074
1075 dbg("%s", __func__);
1076
1077 rv = usb_control_msg(udev,
1078 usb_rcvctrlpipe(udev, 0),
1079 FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
1080 FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
1081 0, priv->interface,
1082 (char *) &latency, 1, WDR_TIMEOUT);
1083
1084 if (rv < 0) {
1085 dev_err(&port->dev, "Unable to read latency timer: %i\n", rv);
1086 return -EIO;
1087 }
1088 return latency;
1089}
1041 1090
1042static int get_serial_info(struct usb_serial_port *port, 1091static int get_serial_info(struct usb_serial_port *port,
1043 struct serial_struct __user *retinfo) 1092 struct serial_struct __user *retinfo)
@@ -1097,6 +1146,7 @@ static int set_serial_info(struct tty_struct *tty,
1097 priv->custom_divisor = new_serial.custom_divisor; 1146 priv->custom_divisor = new_serial.custom_divisor;
1098 1147
1099 tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0; 1148 tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
1149 write_latency_timer(port);
1100 1150
1101check_and_exit: 1151check_and_exit:
1102 if ((old_priv.flags & ASYNC_SPD_MASK) != 1152 if ((old_priv.flags & ASYNC_SPD_MASK) !=
@@ -1192,27 +1242,13 @@ static ssize_t show_latency_timer(struct device *dev,
1192{ 1242{
1193 struct usb_serial_port *port = to_usb_serial_port(dev); 1243 struct usb_serial_port *port = to_usb_serial_port(dev);
1194 struct ftdi_private *priv = usb_get_serial_port_data(port); 1244 struct ftdi_private *priv = usb_get_serial_port_data(port);
1195 struct usb_device *udev = port->serial->dev; 1245 if (priv->flags & ASYNC_LOW_LATENCY)
1196 unsigned short latency = 0; 1246 return sprintf(buf, "1\n");
1197 int rv = 0; 1247 else
1198 1248 return sprintf(buf, "%i\n", priv->latency);
1199
1200 dbg("%s", __func__);
1201
1202 rv = usb_control_msg(udev,
1203 usb_rcvctrlpipe(udev, 0),
1204 FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
1205 FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
1206 0, priv->interface,
1207 (char *) &latency, 1, WDR_TIMEOUT);
1208
1209 if (rv < 0) {
1210 dev_err(dev, "Unable to read latency timer: %i\n", rv);
1211 return -EIO;
1212 }
1213 return sprintf(buf, "%i\n", latency);
1214} 1249}
1215 1250
1251
1216/* Write a new value of the latency timer, in units of milliseconds. */ 1252/* Write a new value of the latency timer, in units of milliseconds. */
1217static ssize_t store_latency_timer(struct device *dev, 1253static ssize_t store_latency_timer(struct device *dev,
1218 struct device_attribute *attr, const char *valbuf, 1254 struct device_attribute *attr, const char *valbuf,
@@ -1220,25 +1256,13 @@ static ssize_t store_latency_timer(struct device *dev,
1220{ 1256{
1221 struct usb_serial_port *port = to_usb_serial_port(dev); 1257 struct usb_serial_port *port = to_usb_serial_port(dev);
1222 struct ftdi_private *priv = usb_get_serial_port_data(port); 1258 struct ftdi_private *priv = usb_get_serial_port_data(port);
1223 struct usb_device *udev = port->serial->dev;
1224 char buf[1];
1225 int v = simple_strtoul(valbuf, NULL, 10); 1259 int v = simple_strtoul(valbuf, NULL, 10);
1226 int rv = 0; 1260 int rv = 0;
1227 1261
1228 dbg("%s: setting latency timer = %i", __func__, v); 1262 priv->latency = v;
1229 1263 rv = write_latency_timer(port);
1230 rv = usb_control_msg(udev, 1264 if (rv < 0)
1231 usb_sndctrlpipe(udev, 0),
1232 FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
1233 FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
1234 v, priv->interface,
1235 buf, 0, WDR_TIMEOUT);
1236
1237 if (rv < 0) {
1238 dev_err(dev, "Unable to write latency timer: %i\n", rv);
1239 return -EIO; 1265 return -EIO;
1240 }
1241
1242 return count; 1266 return count;
1243} 1267}
1244 1268
@@ -1392,6 +1416,7 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port)
1392 usb_set_serial_port_data(port, priv); 1416 usb_set_serial_port_data(port, priv);
1393 1417
1394 ftdi_determine_type(port); 1418 ftdi_determine_type(port);
1419 read_latency_timer(port);
1395 create_sysfs_attrs(port); 1420 create_sysfs_attrs(port);
1396 return 0; 1421 return 0;
1397} 1422}
@@ -1514,6 +1539,8 @@ static int ftdi_open(struct tty_struct *tty,
1514 if (tty) 1539 if (tty)
1515 tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0; 1540 tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
1516 1541
1542 write_latency_timer(port);
1543
1517 /* No error checking for this (will get errors later anyway) */ 1544 /* No error checking for this (will get errors later anyway) */
1518 /* See ftdi_sio.h for description of what is reset */ 1545 /* See ftdi_sio.h for description of what is reset */
1519 usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 1546 usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
@@ -1529,11 +1556,6 @@ static int ftdi_open(struct tty_struct *tty,
1529 if (tty) 1556 if (tty)
1530 ftdi_set_termios(tty, port, tty->termios); 1557 ftdi_set_termios(tty, port, tty->termios);
1531 1558
1532 /* FIXME: Flow control might be enabled, so it should be checked -
1533 we have no control of defaults! */
1534 /* Turn on RTS and DTR since we are not flow controlling by default */
1535 set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
1536
1537 /* Not throttled */ 1559 /* Not throttled */
1538 spin_lock_irqsave(&priv->rx_lock, flags); 1560 spin_lock_irqsave(&priv->rx_lock, flags);
1539 priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED); 1561 priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED);
@@ -1558,6 +1580,30 @@ static int ftdi_open(struct tty_struct *tty,
1558} /* ftdi_open */ 1580} /* ftdi_open */
1559 1581
1560 1582
1583static void ftdi_dtr_rts(struct usb_serial_port *port, int on)
1584{
1585 struct ftdi_private *priv = usb_get_serial_port_data(port);
1586 char buf[1];
1587
1588 mutex_lock(&port->serial->disc_mutex);
1589 if (!port->serial->disconnected) {
1590 /* Disable flow control */
1591 if (!on && usb_control_msg(port->serial->dev,
1592 usb_sndctrlpipe(port->serial->dev, 0),
1593 FTDI_SIO_SET_FLOW_CTRL_REQUEST,
1594 FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
1595 0, priv->interface, buf, 0,
1596 WDR_TIMEOUT) < 0) {
1597 dev_err(&port->dev, "error from flowcontrol urb\n");
1598 }
1599 /* drop RTS and DTR */
1600 if (on)
1601 set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
1602 else
1603 clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
1604 }
1605 mutex_unlock(&port->serial->disc_mutex);
1606}
1561 1607
1562/* 1608/*
1563 * usbserial:__serial_close only calls ftdi_close if the point is open 1609 * usbserial:__serial_close only calls ftdi_close if the point is open
@@ -1567,31 +1613,12 @@ static int ftdi_open(struct tty_struct *tty,
1567 * 1613 *
1568 */ 1614 */
1569 1615
1570static void ftdi_close(struct tty_struct *tty, 1616static void ftdi_close(struct usb_serial_port *port)
1571 struct usb_serial_port *port, struct file *filp)
1572{ /* ftdi_close */ 1617{ /* ftdi_close */
1573 unsigned int c_cflag = tty->termios->c_cflag;
1574 struct ftdi_private *priv = usb_get_serial_port_data(port); 1618 struct ftdi_private *priv = usb_get_serial_port_data(port);
1575 char buf[1];
1576 1619
1577 dbg("%s", __func__); 1620 dbg("%s", __func__);
1578 1621
1579 mutex_lock(&port->serial->disc_mutex);
1580 if (c_cflag & HUPCL && !port->serial->disconnected) {
1581 /* Disable flow control */
1582 if (usb_control_msg(port->serial->dev,
1583 usb_sndctrlpipe(port->serial->dev, 0),
1584 FTDI_SIO_SET_FLOW_CTRL_REQUEST,
1585 FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
1586 0, priv->interface, buf, 0,
1587 WDR_TIMEOUT) < 0) {
1588 dev_err(&port->dev, "error from flowcontrol urb\n");
1589 }
1590
1591 /* drop RTS and DTR */
1592 clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
1593 } /* Note change no line if hupcl is off */
1594 mutex_unlock(&port->serial->disc_mutex);
1595 1622
1596 /* cancel any scheduled reading */ 1623 /* cancel any scheduled reading */
1597 cancel_delayed_work_sync(&priv->rx_work); 1624 cancel_delayed_work_sync(&priv->rx_work);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 586d30ff450b..ee25a3fe3b09 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -993,8 +993,7 @@ static int garmin_open(struct tty_struct *tty,
993} 993}
994 994
995 995
996static void garmin_close(struct tty_struct *tty, 996static void garmin_close(struct usb_serial_port *port)
997 struct usb_serial_port *port, struct file *filp)
998{ 997{
999 struct usb_serial *serial = port->serial; 998 struct usb_serial *serial = port->serial;
1000 struct garmin_data *garmin_data_p = usb_get_serial_port_data(port); 999 struct garmin_data *garmin_data_p = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 4cec9906ccf3..be82ea956720 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -184,8 +184,7 @@ int usb_serial_generic_resume(struct usb_serial *serial)
184} 184}
185EXPORT_SYMBOL_GPL(usb_serial_generic_resume); 185EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
186 186
187void usb_serial_generic_close(struct tty_struct *tty, 187void usb_serial_generic_close(struct usb_serial_port *port)
188 struct usb_serial_port *port, struct file *filp)
189{ 188{
190 dbg("%s - port %d", __func__, port->number); 189 dbg("%s - port %d", __func__, port->number);
191 generic_cleanup(port); 190 generic_cleanup(port);
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index fb4a73d090f6..53ef5996e33d 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -207,8 +207,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb);
207/* function prototypes for the usbserial callbacks */ 207/* function prototypes for the usbserial callbacks */
208static int edge_open(struct tty_struct *tty, struct usb_serial_port *port, 208static int edge_open(struct tty_struct *tty, struct usb_serial_port *port,
209 struct file *filp); 209 struct file *filp);
210static void edge_close(struct tty_struct *tty, struct usb_serial_port *port, 210static void edge_close(struct usb_serial_port *port);
211 struct file *filp);
212static int edge_write(struct tty_struct *tty, struct usb_serial_port *port, 211static int edge_write(struct tty_struct *tty, struct usb_serial_port *port,
213 const unsigned char *buf, int count); 212 const unsigned char *buf, int count);
214static int edge_write_room(struct tty_struct *tty); 213static int edge_write_room(struct tty_struct *tty);
@@ -965,7 +964,7 @@ static int edge_open(struct tty_struct *tty,
965 964
966 if (!edge_port->txfifo.fifo) { 965 if (!edge_port->txfifo.fifo) {
967 dbg("%s - no memory", __func__); 966 dbg("%s - no memory", __func__);
968 edge_close(tty, port, filp); 967 edge_close(port);
969 return -ENOMEM; 968 return -ENOMEM;
970 } 969 }
971 970
@@ -975,7 +974,7 @@ static int edge_open(struct tty_struct *tty,
975 974
976 if (!edge_port->write_urb) { 975 if (!edge_port->write_urb) {
977 dbg("%s - no memory", __func__); 976 dbg("%s - no memory", __func__);
978 edge_close(tty, port, filp); 977 edge_close(port);
979 return -ENOMEM; 978 return -ENOMEM;
980 } 979 }
981 980
@@ -1099,8 +1098,7 @@ static void block_until_tx_empty(struct edgeport_port *edge_port)
1099 * edge_close 1098 * edge_close
1100 * this function is called by the tty driver when a port is closed 1099 * this function is called by the tty driver when a port is closed
1101 *****************************************************************************/ 1100 *****************************************************************************/
1102static void edge_close(struct tty_struct *tty, 1101static void edge_close(struct usb_serial_port *port)
1103 struct usb_serial_port *port, struct file *filp)
1104{ 1102{
1105 struct edgeport_serial *edge_serial; 1103 struct edgeport_serial *edge_serial;
1106 struct edgeport_port *edge_port; 1104 struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 513b25e044c1..eabf20eeb370 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2009,8 +2009,7 @@ release_es_lock:
2009 return status; 2009 return status;
2010} 2010}
2011 2011
2012static void edge_close(struct tty_struct *tty, 2012static void edge_close(struct usb_serial_port *port)
2013 struct usb_serial_port *port, struct file *filp)
2014{ 2013{
2015 struct edgeport_serial *edge_serial; 2014 struct edgeport_serial *edge_serial;
2016 struct edgeport_port *edge_port; 2015 struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index cd62825a9ac3..c610a99fa477 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -76,8 +76,7 @@ static int initial_wait;
76/* Function prototypes for an ipaq */ 76/* Function prototypes for an ipaq */
77static int ipaq_open(struct tty_struct *tty, 77static int ipaq_open(struct tty_struct *tty,
78 struct usb_serial_port *port, struct file *filp); 78 struct usb_serial_port *port, struct file *filp);
79static void ipaq_close(struct tty_struct *tty, 79static void ipaq_close(struct usb_serial_port *port);
80 struct usb_serial_port *port, struct file *filp);
81static int ipaq_calc_num_ports(struct usb_serial *serial); 80static int ipaq_calc_num_ports(struct usb_serial *serial);
82static int ipaq_startup(struct usb_serial *serial); 81static int ipaq_startup(struct usb_serial *serial);
83static void ipaq_shutdown(struct usb_serial *serial); 82static void ipaq_shutdown(struct usb_serial *serial);
@@ -714,8 +713,7 @@ error:
714} 713}
715 714
716 715
717static void ipaq_close(struct tty_struct *tty, 716static void ipaq_close(struct usb_serial_port *port)
718 struct usb_serial_port *port, struct file *filp)
719{ 717{
720 struct ipaq_private *priv = usb_get_serial_port_data(port); 718 struct ipaq_private *priv = usb_get_serial_port_data(port);
721 719
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index da2a2b46644a..29ad038b9c8d 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -302,23 +302,17 @@ static int ipw_open(struct tty_struct *tty,
302 return 0; 302 return 0;
303} 303}
304 304
305static void ipw_close(struct tty_struct *tty, 305static void ipw_dtr_rts(struct usb_serial_port *port, int on)
306 struct usb_serial_port *port, struct file *filp)
307{ 306{
308 struct usb_device *dev = port->serial->dev; 307 struct usb_device *dev = port->serial->dev;
309 int result; 308 int result;
310 309
311 if (tty_hung_up_p(filp)) {
312 dbg("%s: tty_hung_up_p ...", __func__);
313 return;
314 }
315
316 /*--1: drop the dtr */ 310 /*--1: drop the dtr */
317 dbg("%s:dropping dtr", __func__); 311 dbg("%s:dropping dtr", __func__);
318 result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 312 result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
319 IPW_SIO_SET_PIN, 313 IPW_SIO_SET_PIN,
320 USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT, 314 USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
321 IPW_PIN_CLRDTR, 315 on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR,
322 0, 316 0,
323 NULL, 317 NULL,
324 0, 318 0,
@@ -332,7 +326,7 @@ static void ipw_close(struct tty_struct *tty,
332 result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 326 result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
333 IPW_SIO_SET_PIN, USB_TYPE_VENDOR | 327 IPW_SIO_SET_PIN, USB_TYPE_VENDOR |
334 USB_RECIP_INTERFACE | USB_DIR_OUT, 328 USB_RECIP_INTERFACE | USB_DIR_OUT,
335 IPW_PIN_CLRRTS, 329 on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS,
336 0, 330 0,
337 NULL, 331 NULL,
338 0, 332 0,
@@ -340,7 +334,12 @@ static void ipw_close(struct tty_struct *tty,
340 if (result < 0) 334 if (result < 0)
341 dev_err(&port->dev, 335 dev_err(&port->dev,
342 "dropping rts failed (error = %d)\n", result); 336 "dropping rts failed (error = %d)\n", result);
337}
343 338
339static void ipw_close(struct usb_serial_port *port)
340{
341 struct usb_device *dev = port->serial->dev;
342 int result;
344 343
345 /*--3: purge */ 344 /*--3: purge */
346 dbg("%s:sending purge", __func__); 345 dbg("%s:sending purge", __func__);
@@ -461,6 +460,7 @@ static struct usb_serial_driver ipw_device = {
461 .num_ports = 1, 460 .num_ports = 1,
462 .open = ipw_open, 461 .open = ipw_open,
463 .close = ipw_close, 462 .close = ipw_close,
463 .dtr_rts = ipw_dtr_rts,
464 .port_probe = ipw_probe, 464 .port_probe = ipw_probe,
465 .port_remove = ipw_disconnect, 465 .port_remove = ipw_disconnect,
466 .write = ipw_write, 466 .write = ipw_write,
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 4e2cda93da59..66009b6b763a 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -88,8 +88,7 @@ static int xbof = -1;
88static int ir_startup (struct usb_serial *serial); 88static int ir_startup (struct usb_serial *serial);
89static int ir_open(struct tty_struct *tty, struct usb_serial_port *port, 89static int ir_open(struct tty_struct *tty, struct usb_serial_port *port,
90 struct file *filep); 90 struct file *filep);
91static void ir_close(struct tty_struct *tty, struct usb_serial_port *port, 91static void ir_close(struct usb_serial_port *port);
92 struct file *filep);
93static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, 92static int ir_write(struct tty_struct *tty, struct usb_serial_port *port,
94 const unsigned char *buf, int count); 93 const unsigned char *buf, int count);
95static void ir_write_bulk_callback (struct urb *urb); 94static void ir_write_bulk_callback (struct urb *urb);
@@ -346,8 +345,7 @@ static int ir_open(struct tty_struct *tty,
346 return result; 345 return result;
347} 346}
348 347
349static void ir_close(struct tty_struct *tty, 348static void ir_close(struct usb_serial_port *port)
350 struct usb_serial_port *port, struct file * filp)
351{ 349{
352 dbg("%s - port %d", __func__, port->number); 350 dbg("%s - port %d", __func__, port->number);
353 351
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 4473d442b2aa..76a3cc327bb9 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -40,7 +40,7 @@ static int debug;
40/* 40/*
41 * Version Information 41 * Version Information
42 */ 42 */
43#define DRIVER_VERSION "v0.5" 43#define DRIVER_VERSION "v0.10"
44#define DRIVER_DESC "Infinity USB Unlimited Phoenix driver" 44#define DRIVER_DESC "Infinity USB Unlimited Phoenix driver"
45 45
46static struct usb_device_id id_table[] = { 46static struct usb_device_id id_table[] = {
@@ -70,7 +70,6 @@ static void read_rxcmd_callback(struct urb *urb);
70struct iuu_private { 70struct iuu_private {
71 spinlock_t lock; /* store irq state */ 71 spinlock_t lock; /* store irq state */
72 wait_queue_head_t delta_msr_wait; 72 wait_queue_head_t delta_msr_wait;
73 u8 line_control;
74 u8 line_status; 73 u8 line_status;
75 u8 termios_initialized; 74 u8 termios_initialized;
76 int tiostatus; /* store IUART SIGNAL for tiocmget call */ 75 int tiostatus; /* store IUART SIGNAL for tiocmget call */
@@ -651,32 +650,33 @@ static int iuu_bulk_write(struct usb_serial_port *port)
651 unsigned long flags; 650 unsigned long flags;
652 int result; 651 int result;
653 int i; 652 int i;
653 int buf_len;
654 char *buf_ptr = port->write_urb->transfer_buffer; 654 char *buf_ptr = port->write_urb->transfer_buffer;
655 dbg("%s - enter", __func__); 655 dbg("%s - enter", __func__);
656 656
657 spin_lock_irqsave(&priv->lock, flags);
657 *buf_ptr++ = IUU_UART_ESC; 658 *buf_ptr++ = IUU_UART_ESC;
658 *buf_ptr++ = IUU_UART_TX; 659 *buf_ptr++ = IUU_UART_TX;
659 *buf_ptr++ = priv->writelen; 660 *buf_ptr++ = priv->writelen;
660 661
661 memcpy(buf_ptr, priv->writebuf, 662 memcpy(buf_ptr, priv->writebuf, priv->writelen);
662 priv->writelen); 663 buf_len = priv->writelen;
664 priv->writelen = 0;
665 spin_unlock_irqrestore(&priv->lock, flags);
663 if (debug == 1) { 666 if (debug == 1) {
664 for (i = 0; i < priv->writelen; i++) 667 for (i = 0; i < buf_len; i++)
665 sprintf(priv->dbgbuf + i*2 , 668 sprintf(priv->dbgbuf + i*2 ,
666 "%02X", priv->writebuf[i]); 669 "%02X", priv->writebuf[i]);
667 priv->dbgbuf[priv->writelen+i*2] = 0; 670 priv->dbgbuf[buf_len+i*2] = 0;
668 dbg("%s - writing %i chars : %s", __func__, 671 dbg("%s - writing %i chars : %s", __func__,
669 priv->writelen, priv->dbgbuf); 672 buf_len, priv->dbgbuf);
670 } 673 }
671 usb_fill_bulk_urb(port->write_urb, port->serial->dev, 674 usb_fill_bulk_urb(port->write_urb, port->serial->dev,
672 usb_sndbulkpipe(port->serial->dev, 675 usb_sndbulkpipe(port->serial->dev,
673 port->bulk_out_endpointAddress), 676 port->bulk_out_endpointAddress),
674 port->write_urb->transfer_buffer, priv->writelen + 3, 677 port->write_urb->transfer_buffer, buf_len + 3,
675 iuu_rxcmd, port); 678 iuu_rxcmd, port);
676 result = usb_submit_urb(port->write_urb, GFP_ATOMIC); 679 result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
677 spin_lock_irqsave(&priv->lock, flags);
678 priv->writelen = 0;
679 spin_unlock_irqrestore(&priv->lock, flags);
680 usb_serial_port_softint(port); 680 usb_serial_port_softint(port);
681 return result; 681 return result;
682} 682}
@@ -770,14 +770,10 @@ static int iuu_uart_write(struct tty_struct *tty, struct usb_serial_port *port,
770 return -ENOMEM; 770 return -ENOMEM;
771 771
772 spin_lock_irqsave(&priv->lock, flags); 772 spin_lock_irqsave(&priv->lock, flags);
773 if (priv->writelen > 0) { 773
774 /* buffer already filled but not commited */
775 spin_unlock_irqrestore(&priv->lock, flags);
776 return 0;
777 }
778 /* fill the buffer */ 774 /* fill the buffer */
779 memcpy(priv->writebuf, buf, count); 775 memcpy(priv->writebuf + priv->writelen, buf, count);
780 priv->writelen = count; 776 priv->writelen += count;
781 spin_unlock_irqrestore(&priv->lock, flags); 777 spin_unlock_irqrestore(&priv->lock, flags);
782 778
783 return count; 779 return count;
@@ -819,7 +815,7 @@ static int iuu_uart_on(struct usb_serial_port *port)
819 buf[0] = IUU_UART_ENABLE; 815 buf[0] = IUU_UART_ENABLE;
820 buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF); 816 buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF);
821 buf[2] = (u8) (0x00FF & IUU_BAUD_9600); 817 buf[2] = (u8) (0x00FF & IUU_BAUD_9600);
822 buf[3] = (u8) (0x0F0 & IUU_TWO_STOP_BITS) | (0x07 & IUU_PARITY_EVEN); 818 buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN);
823 819
824 status = bulk_immediate(port, buf, 4); 820 status = bulk_immediate(port, buf, 4);
825 if (status != IUU_OPERATION_OK) { 821 if (status != IUU_OPERATION_OK) {
@@ -946,19 +942,59 @@ static int iuu_uart_baud(struct usb_serial_port *port, u32 baud,
946 return status; 942 return status;
947} 943}
948 944
949static int set_control_lines(struct usb_device *dev, u8 value) 945static void iuu_set_termios(struct tty_struct *tty,
946 struct usb_serial_port *port, struct ktermios *old_termios)
950{ 947{
951 return 0; 948 const u32 supported_mask = CMSPAR|PARENB|PARODD;
949
950 unsigned int cflag = tty->termios->c_cflag;
951 int status;
952 u32 actual;
953 u32 parity;
954 int csize = CS7;
955 int baud = 9600; /* Fixed for the moment */
956 u32 newval = cflag & supported_mask;
957
958 /* compute the parity parameter */
959 parity = 0;
960 if (cflag & CMSPAR) { /* Using mark space */
961 if (cflag & PARODD)
962 parity |= IUU_PARITY_SPACE;
963 else
964 parity |= IUU_PARITY_MARK;
965 } else if (!(cflag & PARENB)) {
966 parity |= IUU_PARITY_NONE;
967 csize = CS8;
968 } else if (cflag & PARODD)
969 parity |= IUU_PARITY_ODD;
970 else
971 parity |= IUU_PARITY_EVEN;
972
973 parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT);
974
975 /* set it */
976 status = iuu_uart_baud(port,
977 (clockmode == 2) ? 16457 : 9600 * boost / 100,
978 &actual, parity);
979
980 /* set the termios value to the real one, so the user now what has
981 * changed. We support few fields so its easies to copy the old hw
982 * settings back over and then adjust them
983 */
984 if (old_termios)
985 tty_termios_copy_hw(tty->termios, old_termios);
986 if (status != 0) /* Set failed - return old bits */
987 return;
988 /* Re-encode speed, parity and csize */
989 tty_encode_baud_rate(tty, baud, baud);
990 tty->termios->c_cflag &= ~(supported_mask|CSIZE);
991 tty->termios->c_cflag |= newval | csize;
952} 992}
953 993
954static void iuu_close(struct tty_struct *tty, 994static void iuu_close(struct usb_serial_port *port)
955 struct usb_serial_port *port, struct file *filp)
956{ 995{
957 /* iuu_led (port,255,0,0,0); */ 996 /* iuu_led (port,255,0,0,0); */
958 struct usb_serial *serial; 997 struct usb_serial *serial;
959 struct iuu_private *priv = usb_get_serial_port_data(port);
960 unsigned long flags;
961 unsigned int c_cflag;
962 998
963 serial = port->serial; 999 serial = port->serial;
964 if (!serial) 1000 if (!serial)
@@ -968,17 +1004,6 @@ static void iuu_close(struct tty_struct *tty,
968 1004
969 iuu_uart_off(port); 1005 iuu_uart_off(port);
970 if (serial->dev) { 1006 if (serial->dev) {
971 if (tty) {
972 c_cflag = tty->termios->c_cflag;
973 if (c_cflag & HUPCL) {
974 /* drop DTR and RTS */
975 priv = usb_get_serial_port_data(port);
976 spin_lock_irqsave(&priv->lock, flags);
977 priv->line_control = 0;
978 spin_unlock_irqrestore(&priv->lock, flags);
979 set_control_lines(port->serial->dev, 0);
980 }
981 }
982 /* free writebuf */ 1007 /* free writebuf */
983 /* shutdown our urbs */ 1008 /* shutdown our urbs */
984 dbg("%s - shutting down urbs", __func__); 1009 dbg("%s - shutting down urbs", __func__);
@@ -1154,7 +1179,7 @@ static int iuu_open(struct tty_struct *tty,
1154 if (result) { 1179 if (result) {
1155 dev_err(&port->dev, "%s - failed submitting read urb," 1180 dev_err(&port->dev, "%s - failed submitting read urb,"
1156 " error %d\n", __func__, result); 1181 " error %d\n", __func__, result);
1157 iuu_close(tty, port, NULL); 1182 iuu_close(port);
1158 return -EPROTO; 1183 return -EPROTO;
1159 } else { 1184 } else {
1160 dbg("%s - rxcmd OK", __func__); 1185 dbg("%s - rxcmd OK", __func__);
@@ -1175,6 +1200,7 @@ static struct usb_serial_driver iuu_device = {
1175 .read_bulk_callback = iuu_uart_read_callback, 1200 .read_bulk_callback = iuu_uart_read_callback,
1176 .tiocmget = iuu_tiocmget, 1201 .tiocmget = iuu_tiocmget,
1177 .tiocmset = iuu_tiocmset, 1202 .tiocmset = iuu_tiocmset,
1203 .set_termios = iuu_set_termios,
1178 .attach = iuu_startup, 1204 .attach = iuu_startup,
1179 .shutdown = iuu_shutdown, 1205 .shutdown = iuu_shutdown,
1180}; 1206};
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 00daa8f7759a..f1195a98f316 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -1298,8 +1298,16 @@ static inline void stop_urb(struct urb *urb)
1298 usb_kill_urb(urb); 1298 usb_kill_urb(urb);
1299} 1299}
1300 1300
1301static void keyspan_close(struct tty_struct *tty, 1301static void keyspan_dtr_rts(struct usb_serial_port *port, int on)
1302 struct usb_serial_port *port, struct file *filp) 1302{
1303 struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);
1304
1305 p_priv->rts_state = on;
1306 p_priv->dtr_state = on;
1307 keyspan_send_setup(port, 0);
1308}
1309
1310static void keyspan_close(struct usb_serial_port *port)
1303{ 1311{
1304 int i; 1312 int i;
1305 struct usb_serial *serial = port->serial; 1313 struct usb_serial *serial = port->serial;
@@ -1336,7 +1344,6 @@ static void keyspan_close(struct tty_struct *tty,
1336 stop_urb(p_priv->out_urbs[i]); 1344 stop_urb(p_priv->out_urbs[i]);
1337 } 1345 }
1338 } 1346 }
1339 tty_port_tty_set(&port->port, NULL);
1340} 1347}
1341 1348
1342/* download the firmware to a pre-renumeration device */ 1349/* download the firmware to a pre-renumeration device */
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 38b4582e0734..0d4569b60768 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -38,9 +38,8 @@
38static int keyspan_open (struct tty_struct *tty, 38static int keyspan_open (struct tty_struct *tty,
39 struct usb_serial_port *port, 39 struct usb_serial_port *port,
40 struct file *filp); 40 struct file *filp);
41static void keyspan_close (struct tty_struct *tty, 41static void keyspan_close (struct usb_serial_port *port);
42 struct usb_serial_port *port, 42static void keyspan_dtr_rts (struct usb_serial_port *port, int on);
43 struct file *filp);
44static int keyspan_startup (struct usb_serial *serial); 43static int keyspan_startup (struct usb_serial *serial);
45static void keyspan_shutdown (struct usb_serial *serial); 44static void keyspan_shutdown (struct usb_serial *serial);
46static int keyspan_write_room (struct tty_struct *tty); 45static int keyspan_write_room (struct tty_struct *tty);
@@ -562,6 +561,7 @@ static struct usb_serial_driver keyspan_1port_device = {
562 .num_ports = 1, 561 .num_ports = 1,
563 .open = keyspan_open, 562 .open = keyspan_open,
564 .close = keyspan_close, 563 .close = keyspan_close,
564 .dtr_rts = keyspan_dtr_rts,
565 .write = keyspan_write, 565 .write = keyspan_write,
566 .write_room = keyspan_write_room, 566 .write_room = keyspan_write_room,
567 .set_termios = keyspan_set_termios, 567 .set_termios = keyspan_set_termios,
@@ -582,6 +582,7 @@ static struct usb_serial_driver keyspan_2port_device = {
582 .num_ports = 2, 582 .num_ports = 2,
583 .open = keyspan_open, 583 .open = keyspan_open,
584 .close = keyspan_close, 584 .close = keyspan_close,
585 .dtr_rts = keyspan_dtr_rts,
585 .write = keyspan_write, 586 .write = keyspan_write,
586 .write_room = keyspan_write_room, 587 .write_room = keyspan_write_room,
587 .set_termios = keyspan_set_termios, 588 .set_termios = keyspan_set_termios,
@@ -602,6 +603,7 @@ static struct usb_serial_driver keyspan_4port_device = {
602 .num_ports = 4, 603 .num_ports = 4,
603 .open = keyspan_open, 604 .open = keyspan_open,
604 .close = keyspan_close, 605 .close = keyspan_close,
606 .dtr_rts = keyspan_dtr_rts,
605 .write = keyspan_write, 607 .write = keyspan_write,
606 .write_room = keyspan_write_room, 608 .write_room = keyspan_write_room,
607 .set_termios = keyspan_set_termios, 609 .set_termios = keyspan_set_termios,
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index bf1ae247da66..ab769dbea1b3 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -651,6 +651,35 @@ static int keyspan_pda_chars_in_buffer(struct tty_struct *tty)
651} 651}
652 652
653 653
654static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on)
655{
656 struct usb_serial *serial = port->serial;
657
658 if (serial->dev) {
659 if (on)
660 keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2));
661 else
662 keyspan_pda_set_modem_info(serial, 0);
663 }
664}
665
666static int keyspan_pda_carrier_raised(struct usb_serial_port *port)
667{
668 struct usb_serial *serial = port->serial;
669 unsigned char modembits;
670
671 /* If we can read the modem status and the DCD is low then
672 carrier is not raised yet */
673 if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) {
674 if (!(modembits & (1>>6)))
675 return 0;
676 }
677 /* Carrier raised, or we failed (eg disconnected) so
678 progress accordingly */
679 return 1;
680}
681
682
654static int keyspan_pda_open(struct tty_struct *tty, 683static int keyspan_pda_open(struct tty_struct *tty,
655 struct usb_serial_port *port, struct file *filp) 684 struct usb_serial_port *port, struct file *filp)
656{ 685{
@@ -682,13 +711,6 @@ static int keyspan_pda_open(struct tty_struct *tty,
682 priv->tx_room = room; 711 priv->tx_room = room;
683 priv->tx_throttled = room ? 0 : 1; 712 priv->tx_throttled = room ? 0 : 1;
684 713
685 /* the normal serial device seems to always turn on DTR and RTS here,
686 so do the same */
687 if (tty && (tty->termios->c_cflag & CBAUD))
688 keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2));
689 else
690 keyspan_pda_set_modem_info(serial, 0);
691
692 /*Start reading from the device*/ 714 /*Start reading from the device*/
693 port->interrupt_in_urb->dev = serial->dev; 715 port->interrupt_in_urb->dev = serial->dev;
694 rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); 716 rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
@@ -700,19 +722,11 @@ static int keyspan_pda_open(struct tty_struct *tty,
700error: 722error:
701 return rc; 723 return rc;
702} 724}
703 725static void keyspan_pda_close(struct usb_serial_port *port)
704
705static void keyspan_pda_close(struct tty_struct *tty,
706 struct usb_serial_port *port, struct file *filp)
707{ 726{
708 struct usb_serial *serial = port->serial; 727 struct usb_serial *serial = port->serial;
709 728
710 if (serial->dev) { 729 if (serial->dev) {
711 /* the normal serial device seems to always shut
712 off DTR and RTS now */
713 if (tty->termios->c_cflag & HUPCL)
714 keyspan_pda_set_modem_info(serial, 0);
715
716 /* shutdown our bulk reads and writes */ 730 /* shutdown our bulk reads and writes */
717 usb_kill_urb(port->write_urb); 731 usb_kill_urb(port->write_urb);
718 usb_kill_urb(port->interrupt_in_urb); 732 usb_kill_urb(port->interrupt_in_urb);
@@ -839,6 +853,8 @@ static struct usb_serial_driver keyspan_pda_device = {
839 .usb_driver = &keyspan_pda_driver, 853 .usb_driver = &keyspan_pda_driver,
840 .id_table = id_table_std, 854 .id_table = id_table_std,
841 .num_ports = 1, 855 .num_ports = 1,
856 .dtr_rts = keyspan_pda_dtr_rts,
857 .carrier_raised = keyspan_pda_carrier_raised,
842 .open = keyspan_pda_open, 858 .open = keyspan_pda_open,
843 .close = keyspan_pda_close, 859 .close = keyspan_pda_close,
844 .write = keyspan_pda_write, 860 .write = keyspan_pda_write,
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index fcd9082f3e7f..fa817c66b3e8 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -76,8 +76,7 @@ static int klsi_105_startup(struct usb_serial *serial);
76static void klsi_105_shutdown(struct usb_serial *serial); 76static void klsi_105_shutdown(struct usb_serial *serial);
77static int klsi_105_open(struct tty_struct *tty, 77static int klsi_105_open(struct tty_struct *tty,
78 struct usb_serial_port *port, struct file *filp); 78 struct usb_serial_port *port, struct file *filp);
79static void klsi_105_close(struct tty_struct *tty, 79static void klsi_105_close(struct usb_serial_port *port);
80 struct usb_serial_port *port, struct file *filp);
81static int klsi_105_write(struct tty_struct *tty, 80static int klsi_105_write(struct tty_struct *tty,
82 struct usb_serial_port *port, const unsigned char *buf, int count); 81 struct usb_serial_port *port, const unsigned char *buf, int count);
83static void klsi_105_write_bulk_callback(struct urb *urb); 82static void klsi_105_write_bulk_callback(struct urb *urb);
@@ -447,8 +446,7 @@ exit:
447} /* klsi_105_open */ 446} /* klsi_105_open */
448 447
449 448
450static void klsi_105_close(struct tty_struct *tty, 449static void klsi_105_close(struct usb_serial_port *port)
451 struct usb_serial_port *port, struct file *filp)
452{ 450{
453 struct klsi_105_private *priv = usb_get_serial_port_data(port); 451 struct klsi_105_private *priv = usb_get_serial_port_data(port);
454 int rc; 452 int rc;
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index c148544953b3..6b570498287f 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -72,8 +72,7 @@ static int kobil_startup(struct usb_serial *serial);
72static void kobil_shutdown(struct usb_serial *serial); 72static void kobil_shutdown(struct usb_serial *serial);
73static int kobil_open(struct tty_struct *tty, 73static int kobil_open(struct tty_struct *tty,
74 struct usb_serial_port *port, struct file *filp); 74 struct usb_serial_port *port, struct file *filp);
75static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port, 75static void kobil_close(struct usb_serial_port *port);
76 struct file *filp);
77static int kobil_write(struct tty_struct *tty, struct usb_serial_port *port, 76static int kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
78 const unsigned char *buf, int count); 77 const unsigned char *buf, int count);
79static int kobil_write_room(struct tty_struct *tty); 78static int kobil_write_room(struct tty_struct *tty);
@@ -209,7 +208,7 @@ static void kobil_shutdown(struct usb_serial *serial)
209 208
210 for (i = 0; i < serial->num_ports; ++i) { 209 for (i = 0; i < serial->num_ports; ++i) {
211 while (serial->port[i]->port.count > 0) 210 while (serial->port[i]->port.count > 0)
212 kobil_close(NULL, serial->port[i], NULL); 211 kobil_close(serial->port[i]);
213 kfree(usb_get_serial_port_data(serial->port[i])); 212 kfree(usb_get_serial_port_data(serial->port[i]));
214 usb_set_serial_port_data(serial->port[i], NULL); 213 usb_set_serial_port_data(serial->port[i], NULL);
215 } 214 }
@@ -346,11 +345,11 @@ static int kobil_open(struct tty_struct *tty,
346} 345}
347 346
348 347
349static void kobil_close(struct tty_struct *tty, 348static void kobil_close(struct usb_serial_port *port)
350 struct usb_serial_port *port, struct file *filp)
351{ 349{
352 dbg("%s - port %d", __func__, port->number); 350 dbg("%s - port %d", __func__, port->number);
353 351
352 /* FIXME: Add rts/dtr methods */
354 if (port->write_urb) { 353 if (port->write_urb) {
355 usb_kill_urb(port->write_urb); 354 usb_kill_urb(port->write_urb);
356 usb_free_urb(port->write_urb); 355 usb_free_urb(port->write_urb);
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index 82930a7d5093..873795548fc0 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -95,8 +95,8 @@ static int mct_u232_startup(struct usb_serial *serial);
95static void mct_u232_shutdown(struct usb_serial *serial); 95static void mct_u232_shutdown(struct usb_serial *serial);
96static int mct_u232_open(struct tty_struct *tty, 96static int mct_u232_open(struct tty_struct *tty,
97 struct usb_serial_port *port, struct file *filp); 97 struct usb_serial_port *port, struct file *filp);
98static void mct_u232_close(struct tty_struct *tty, 98static void mct_u232_close(struct usb_serial_port *port);
99 struct usb_serial_port *port, struct file *filp); 99static void mct_u232_dtr_rts(struct usb_serial_port *port, int on);
100static void mct_u232_read_int_callback(struct urb *urb); 100static void mct_u232_read_int_callback(struct urb *urb);
101static void mct_u232_set_termios(struct tty_struct *tty, 101static void mct_u232_set_termios(struct tty_struct *tty,
102 struct usb_serial_port *port, struct ktermios *old); 102 struct usb_serial_port *port, struct ktermios *old);
@@ -140,6 +140,7 @@ static struct usb_serial_driver mct_u232_device = {
140 .num_ports = 1, 140 .num_ports = 1,
141 .open = mct_u232_open, 141 .open = mct_u232_open,
142 .close = mct_u232_close, 142 .close = mct_u232_close,
143 .dtr_rts = mct_u232_dtr_rts,
143 .throttle = mct_u232_throttle, 144 .throttle = mct_u232_throttle,
144 .unthrottle = mct_u232_unthrottle, 145 .unthrottle = mct_u232_unthrottle,
145 .read_int_callback = mct_u232_read_int_callback, 146 .read_int_callback = mct_u232_read_int_callback,
@@ -496,29 +497,29 @@ error:
496 return retval; 497 return retval;
497} /* mct_u232_open */ 498} /* mct_u232_open */
498 499
499 500static void mct_u232_dtr_rts(struct usb_serial_port *port, int on)
500static void mct_u232_close(struct tty_struct *tty,
501 struct usb_serial_port *port, struct file *filp)
502{ 501{
503 unsigned int c_cflag;
504 unsigned int control_state; 502 unsigned int control_state;
505 struct mct_u232_private *priv = usb_get_serial_port_data(port); 503 struct mct_u232_private *priv = usb_get_serial_port_data(port);
506 dbg("%s port %d", __func__, port->number);
507 504
508 if (tty) { 505 mutex_lock(&port->serial->disc_mutex);
509 c_cflag = tty->termios->c_cflag; 506 if (!port->serial->disconnected) {
510 mutex_lock(&port->serial->disc_mutex); 507 /* drop DTR and RTS */
511 if (c_cflag & HUPCL && !port->serial->disconnected) { 508 spin_lock_irq(&priv->lock);
512 /* drop DTR and RTS */ 509 if (on)
513 spin_lock_irq(&priv->lock); 510 priv->control_state |= TIOCM_DTR | TIOCM_RTS;
511 else
514 priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS); 512 priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS);
515 control_state = priv->control_state; 513 control_state = priv->control_state;
516 spin_unlock_irq(&priv->lock); 514 spin_unlock_irq(&priv->lock);
517 mct_u232_set_modem_ctrl(port->serial, control_state); 515 mct_u232_set_modem_ctrl(port->serial, control_state);
518 }
519 mutex_unlock(&port->serial->disc_mutex);
520 } 516 }
517 mutex_unlock(&port->serial->disc_mutex);
518}
521 519
520static void mct_u232_close(struct usb_serial_port *port)
521{
522 dbg("%s port %d", __func__, port->number);
522 523
523 if (port->serial->dev) { 524 if (port->serial->dev) {
524 /* shutdown our urbs */ 525 /* shutdown our urbs */
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 24e3b5d4b4d4..9e1a013ee7f6 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -533,8 +533,7 @@ static int mos7720_chars_in_buffer(struct tty_struct *tty)
533 return chars; 533 return chars;
534} 534}
535 535
536static void mos7720_close(struct tty_struct *tty, 536static void mos7720_close(struct usb_serial_port *port)
537 struct usb_serial_port *port, struct file *filp)
538{ 537{
539 struct usb_serial *serial; 538 struct usb_serial *serial;
540 struct moschip_port *mos7720_port; 539 struct moschip_port *mos7720_port;
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 84fb1dcd30dc..10b78a37214f 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1135,54 +1135,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty)
1135 1135
1136} 1136}
1137 1137
1138/************************************************************************
1139 *
1140 * mos7840_block_until_tx_empty
1141 *
1142 * This function will block the close until one of the following:
1143 * 1. TX count are 0
1144 * 2. The mos7840 has stopped
1145 * 3. A timeout of 3 seconds without activity has expired
1146 *
1147 ************************************************************************/
1148static void mos7840_block_until_tx_empty(struct tty_struct *tty,
1149 struct moschip_port *mos7840_port)
1150{
1151 int timeout = HZ / 10;
1152 int wait = 30;
1153 int count;
1154
1155 while (1) {
1156
1157 count = mos7840_chars_in_buffer(tty);
1158
1159 /* Check for Buffer status */
1160 if (count <= 0)
1161 return;
1162
1163 /* Block the thread for a while */
1164 interruptible_sleep_on_timeout(&mos7840_port->wait_chase,
1165 timeout);
1166
1167 /* No activity.. count down section */
1168 wait--;
1169 if (wait == 0) {
1170 dbg("%s - TIMEOUT", __func__);
1171 return;
1172 } else {
1173 /* Reset timeout value back to seconds */
1174 wait = 30;
1175 }
1176 }
1177}
1178
1179/***************************************************************************** 1138/*****************************************************************************
1180 * mos7840_close 1139 * mos7840_close
1181 * this function is called by the tty driver when a port is closed 1140 * this function is called by the tty driver when a port is closed
1182 *****************************************************************************/ 1141 *****************************************************************************/
1183 1142
1184static void mos7840_close(struct tty_struct *tty, 1143static void mos7840_close(struct usb_serial_port *port)
1185 struct usb_serial_port *port, struct file *filp)
1186{ 1144{
1187 struct usb_serial *serial; 1145 struct usb_serial *serial;
1188 struct moschip_port *mos7840_port; 1146 struct moschip_port *mos7840_port;
@@ -1223,10 +1181,6 @@ static void mos7840_close(struct tty_struct *tty,
1223 } 1181 }
1224 } 1182 }
1225 1183
1226 if (serial->dev)
1227 /* flush and block until tx is empty */
1228 mos7840_block_until_tx_empty(tty, mos7840_port);
1229
1230 /* While closing port, shutdown all bulk read, write * 1184 /* While closing port, shutdown all bulk read, write *
1231 * and interrupt read if they exists */ 1185 * and interrupt read if they exists */
1232 if (serial->dev) { 1186 if (serial->dev) {
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index bcdcbb822705..f5f3751a888c 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -98,8 +98,7 @@ static int navman_open(struct tty_struct *tty,
98 return result; 98 return result;
99} 99}
100 100
101static void navman_close(struct tty_struct *tty, 101static void navman_close(struct usb_serial_port *port)
102 struct usb_serial_port *port, struct file *filp)
103{ 102{
104 dbg("%s - port %d", __func__, port->number); 103 dbg("%s - port %d", __func__, port->number);
105 104
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index df6539712726..1104617334f5 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -66,8 +66,7 @@ static int debug;
66/* function prototypes */ 66/* function prototypes */
67static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port, 67static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port,
68 struct file *filp); 68 struct file *filp);
69static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port, 69static void omninet_close(struct usb_serial_port *port);
70 struct file *filp);
71static void omninet_read_bulk_callback(struct urb *urb); 70static void omninet_read_bulk_callback(struct urb *urb);
72static void omninet_write_bulk_callback(struct urb *urb); 71static void omninet_write_bulk_callback(struct urb *urb);
73static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, 72static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -189,8 +188,7 @@ static int omninet_open(struct tty_struct *tty,
189 return result; 188 return result;
190} 189}
191 190
192static void omninet_close(struct tty_struct *tty, 191static void omninet_close(struct usb_serial_port *port)
193 struct usb_serial_port *port, struct file *filp)
194{ 192{
195 dbg("%s - port %d", __func__, port->number); 193 dbg("%s - port %d", __func__, port->number);
196 usb_kill_urb(port->read_urb); 194 usb_kill_urb(port->read_urb);
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index b500ad10b758..c20480aa9755 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -173,8 +173,7 @@ static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port,
173 return result; 173 return result;
174} 174}
175 175
176static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port, 176static void opticon_close(struct usb_serial_port *port)
177 struct file *filp)
178{ 177{
179 struct opticon_private *priv = usb_get_serial_data(port->serial); 178 struct opticon_private *priv = usb_get_serial_data(port->serial);
180 179
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 7817b82889ca..a16d69fadba1 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -45,8 +45,9 @@
45/* Function prototypes */ 45/* Function prototypes */
46static int option_open(struct tty_struct *tty, struct usb_serial_port *port, 46static int option_open(struct tty_struct *tty, struct usb_serial_port *port,
47 struct file *filp); 47 struct file *filp);
48static void option_close(struct tty_struct *tty, struct usb_serial_port *port, 48static void option_close(struct usb_serial_port *port);
49 struct file *filp); 49static void option_dtr_rts(struct usb_serial_port *port, int on);
50
50static int option_startup(struct usb_serial *serial); 51static int option_startup(struct usb_serial *serial);
51static void option_shutdown(struct usb_serial *serial); 52static void option_shutdown(struct usb_serial *serial);
52static int option_write_room(struct tty_struct *tty); 53static int option_write_room(struct tty_struct *tty);
@@ -61,7 +62,7 @@ static void option_set_termios(struct tty_struct *tty,
61static int option_tiocmget(struct tty_struct *tty, struct file *file); 62static int option_tiocmget(struct tty_struct *tty, struct file *file);
62static int option_tiocmset(struct tty_struct *tty, struct file *file, 63static int option_tiocmset(struct tty_struct *tty, struct file *file,
63 unsigned int set, unsigned int clear); 64 unsigned int set, unsigned int clear);
64static int option_send_setup(struct tty_struct *tty, struct usb_serial_port *port); 65static int option_send_setup(struct usb_serial_port *port);
65static int option_suspend(struct usb_serial *serial, pm_message_t message); 66static int option_suspend(struct usb_serial *serial, pm_message_t message);
66static int option_resume(struct usb_serial *serial); 67static int option_resume(struct usb_serial *serial);
67 68
@@ -551,6 +552,7 @@ static struct usb_serial_driver option_1port_device = {
551 .num_ports = 1, 552 .num_ports = 1,
552 .open = option_open, 553 .open = option_open,
553 .close = option_close, 554 .close = option_close,
555 .dtr_rts = option_dtr_rts,
554 .write = option_write, 556 .write = option_write,
555 .write_room = option_write_room, 557 .write_room = option_write_room,
556 .chars_in_buffer = option_chars_in_buffer, 558 .chars_in_buffer = option_chars_in_buffer,
@@ -630,7 +632,7 @@ static void option_set_termios(struct tty_struct *tty,
630 dbg("%s", __func__); 632 dbg("%s", __func__);
631 /* Doesn't support option setting */ 633 /* Doesn't support option setting */
632 tty_termios_copy_hw(tty->termios, old_termios); 634 tty_termios_copy_hw(tty->termios, old_termios);
633 option_send_setup(tty, port); 635 option_send_setup(port);
634} 636}
635 637
636static int option_tiocmget(struct tty_struct *tty, struct file *file) 638static int option_tiocmget(struct tty_struct *tty, struct file *file)
@@ -669,7 +671,7 @@ static int option_tiocmset(struct tty_struct *tty, struct file *file,
669 portdata->rts_state = 0; 671 portdata->rts_state = 0;
670 if (clear & TIOCM_DTR) 672 if (clear & TIOCM_DTR)
671 portdata->dtr_state = 0; 673 portdata->dtr_state = 0;
672 return option_send_setup(tty, port); 674 return option_send_setup(port);
673} 675}
674 676
675/* Write */ 677/* Write */
@@ -897,10 +899,6 @@ static int option_open(struct tty_struct *tty,
897 899
898 dbg("%s", __func__); 900 dbg("%s", __func__);
899 901
900 /* Set some sane defaults */
901 portdata->rts_state = 1;
902 portdata->dtr_state = 1;
903
904 /* Reset low level data toggle and start reading from endpoints */ 902 /* Reset low level data toggle and start reading from endpoints */
905 for (i = 0; i < N_IN_URB; i++) { 903 for (i = 0; i < N_IN_URB; i++) {
906 urb = portdata->in_urbs[i]; 904 urb = portdata->in_urbs[i];
@@ -936,37 +934,43 @@ static int option_open(struct tty_struct *tty,
936 usb_pipeout(urb->pipe), 0); */ 934 usb_pipeout(urb->pipe), 0); */
937 } 935 }
938 936
939 option_send_setup(tty, port); 937 option_send_setup(port);
940 938
941 return 0; 939 return 0;
942} 940}
943 941
944static void option_close(struct tty_struct *tty, 942static void option_dtr_rts(struct usb_serial_port *port, int on)
945 struct usb_serial_port *port, struct file *filp)
946{ 943{
947 int i;
948 struct usb_serial *serial = port->serial; 944 struct usb_serial *serial = port->serial;
949 struct option_port_private *portdata; 945 struct option_port_private *portdata;
950 946
951 dbg("%s", __func__); 947 dbg("%s", __func__);
952 portdata = usb_get_serial_port_data(port); 948 portdata = usb_get_serial_port_data(port);
949 mutex_lock(&serial->disc_mutex);
950 portdata->rts_state = on;
951 portdata->dtr_state = on;
952 if (serial->dev)
953 option_send_setup(port);
954 mutex_unlock(&serial->disc_mutex);
955}
953 956
954 portdata->rts_state = 0;
955 portdata->dtr_state = 0;
956 957
957 if (serial->dev) { 958static void option_close(struct usb_serial_port *port)
958 mutex_lock(&serial->disc_mutex); 959{
959 if (!serial->disconnected) 960 int i;
960 option_send_setup(tty, port); 961 struct usb_serial *serial = port->serial;
961 mutex_unlock(&serial->disc_mutex); 962 struct option_port_private *portdata;
963
964 dbg("%s", __func__);
965 portdata = usb_get_serial_port_data(port);
962 966
967 if (serial->dev) {
963 /* Stop reading/writing urbs */ 968 /* Stop reading/writing urbs */
964 for (i = 0; i < N_IN_URB; i++) 969 for (i = 0; i < N_IN_URB; i++)
965 usb_kill_urb(portdata->in_urbs[i]); 970 usb_kill_urb(portdata->in_urbs[i]);
966 for (i = 0; i < N_OUT_URB; i++) 971 for (i = 0; i < N_OUT_URB; i++)
967 usb_kill_urb(portdata->out_urbs[i]); 972 usb_kill_urb(portdata->out_urbs[i]);
968 } 973 }
969 tty_port_tty_set(&port->port, NULL);
970} 974}
971 975
972/* Helper functions used by option_setup_urbs */ 976/* Helper functions used by option_setup_urbs */
@@ -1032,28 +1036,24 @@ static void option_setup_urbs(struct usb_serial *serial)
1032 * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN 1036 * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN
1033 * CDC. 1037 * CDC.
1034*/ 1038*/
1035static int option_send_setup(struct tty_struct *tty, 1039static int option_send_setup(struct usb_serial_port *port)
1036 struct usb_serial_port *port)
1037{ 1040{
1038 struct usb_serial *serial = port->serial; 1041 struct usb_serial *serial = port->serial;
1039 struct option_port_private *portdata; 1042 struct option_port_private *portdata;
1040 int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber; 1043 int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber;
1044 int val = 0;
1041 dbg("%s", __func__); 1045 dbg("%s", __func__);
1042 1046
1043 portdata = usb_get_serial_port_data(port); 1047 portdata = usb_get_serial_port_data(port);
1044 1048
1045 if (tty) { 1049 if (portdata->dtr_state)
1046 int val = 0; 1050 val |= 0x01;
1047 if (portdata->dtr_state) 1051 if (portdata->rts_state)
1048 val |= 0x01; 1052 val |= 0x02;
1049 if (portdata->rts_state)
1050 val |= 0x02;
1051 1053
1052 return usb_control_msg(serial->dev, 1054 return usb_control_msg(serial->dev,
1053 usb_rcvctrlpipe(serial->dev, 0), 1055 usb_rcvctrlpipe(serial->dev, 0),
1054 0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT); 1056 0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
1055 }
1056 return 0;
1057} 1057}
1058 1058
1059static int option_startup(struct usb_serial *serial) 1059static int option_startup(struct usb_serial *serial)
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index ba551f00f16f..7de54781fe61 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -143,8 +143,7 @@ struct oti6858_control_pkt {
143/* function prototypes */ 143/* function prototypes */
144static int oti6858_open(struct tty_struct *tty, 144static int oti6858_open(struct tty_struct *tty,
145 struct usb_serial_port *port, struct file *filp); 145 struct usb_serial_port *port, struct file *filp);
146static void oti6858_close(struct tty_struct *tty, 146static void oti6858_close(struct usb_serial_port *port);
147 struct usb_serial_port *port, struct file *filp);
148static void oti6858_set_termios(struct tty_struct *tty, 147static void oti6858_set_termios(struct tty_struct *tty,
149 struct usb_serial_port *port, struct ktermios *old); 148 struct usb_serial_port *port, struct ktermios *old);
150static int oti6858_ioctl(struct tty_struct *tty, struct file *file, 149static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
@@ -622,67 +621,30 @@ static int oti6858_open(struct tty_struct *tty,
622 if (result != 0) { 621 if (result != 0) {
623 dev_err(&port->dev, "%s(): usb_submit_urb() failed" 622 dev_err(&port->dev, "%s(): usb_submit_urb() failed"
624 " with error %d\n", __func__, result); 623 " with error %d\n", __func__, result);
625 oti6858_close(tty, port, NULL); 624 oti6858_close(port);
626 return -EPROTO; 625 return -EPROTO;
627 } 626 }
628 627
629 /* setup termios */ 628 /* setup termios */
630 if (tty) 629 if (tty)
631 oti6858_set_termios(tty, port, &tmp_termios); 630 oti6858_set_termios(tty, port, &tmp_termios);
632 631 port->port.drain_delay = 256; /* FIXME: check the FIFO length */
633 return 0; 632 return 0;
634} 633}
635 634
636static void oti6858_close(struct tty_struct *tty, 635static void oti6858_close(struct usb_serial_port *port)
637 struct usb_serial_port *port, struct file *filp)
638{ 636{
639 struct oti6858_private *priv = usb_get_serial_port_data(port); 637 struct oti6858_private *priv = usb_get_serial_port_data(port);
640 unsigned long flags; 638 unsigned long flags;
641 long timeout;
642 wait_queue_t wait;
643 639
644 dbg("%s(port = %d)", __func__, port->number); 640 dbg("%s(port = %d)", __func__, port->number);
645 641
646 /* wait for data to drain from the buffer */
647 spin_lock_irqsave(&priv->lock, flags); 642 spin_lock_irqsave(&priv->lock, flags);
648 timeout = 30 * HZ; /* PL2303_CLOSING_WAIT */
649 init_waitqueue_entry(&wait, current);
650 add_wait_queue(&tty->write_wait, &wait);
651 dbg("%s(): entering wait loop", __func__);
652 for (;;) {
653 set_current_state(TASK_INTERRUPTIBLE);
654 if (oti6858_buf_data_avail(priv->buf) == 0
655 || timeout == 0 || signal_pending(current)
656 || port->serial->disconnected)
657 break;
658 spin_unlock_irqrestore(&priv->lock, flags);
659 timeout = schedule_timeout(timeout);
660 spin_lock_irqsave(&priv->lock, flags);
661 }
662 set_current_state(TASK_RUNNING);
663 remove_wait_queue(&tty->write_wait, &wait);
664 dbg("%s(): after wait loop", __func__);
665
666 /* clear out any remaining data in the buffer */ 643 /* clear out any remaining data in the buffer */
667 oti6858_buf_clear(priv->buf); 644 oti6858_buf_clear(priv->buf);
668 spin_unlock_irqrestore(&priv->lock, flags); 645 spin_unlock_irqrestore(&priv->lock, flags);
669 646
670 /* wait for characters to drain from the device */ 647 dbg("%s(): after buf_clear()", __func__);
671 /* (this is long enough for the entire 256 byte */
672 /* pl2303 hardware buffer to drain with no flow */
673 /* control for data rates of 1200 bps or more, */
674 /* for lower rates we should really know how much */
675 /* data is in the buffer to compute a delay */
676 /* that is not unnecessarily long) */
677 /* FIXME
678 bps = tty_get_baud_rate(tty);
679 if (bps > 1200)
680 timeout = max((HZ*2560)/bps,HZ/10);
681 else
682 */
683 timeout = 2*HZ;
684 schedule_timeout_interruptible(timeout);
685 dbg("%s(): after schedule_timeout_interruptible()", __func__);
686 648
687 /* cancel scheduled setup */ 649 /* cancel scheduled setup */
688 cancel_delayed_work(&priv->delayed_setup_work); 650 cancel_delayed_work(&priv->delayed_setup_work);
@@ -694,15 +656,6 @@ static void oti6858_close(struct tty_struct *tty,
694 usb_kill_urb(port->write_urb); 656 usb_kill_urb(port->write_urb);
695 usb_kill_urb(port->read_urb); 657 usb_kill_urb(port->read_urb);
696 usb_kill_urb(port->interrupt_in_urb); 658 usb_kill_urb(port->interrupt_in_urb);
697
698 /*
699 if (tty && (tty->termios->c_cflag) & HUPCL) {
700 // drop DTR and RTS
701 spin_lock_irqsave(&priv->lock, flags);
702 priv->pending_setup.control &= ~CONTROL_MASK;
703 spin_unlock_irqrestore(&priv->lock, flags);
704 }
705 */
706} 659}
707 660
708static int oti6858_tiocmset(struct tty_struct *tty, struct file *file, 661static int oti6858_tiocmset(struct tty_struct *tty, struct file *file,
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 751a533a4347..e02dc3d643c7 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -652,69 +652,41 @@ static void pl2303_set_termios(struct tty_struct *tty,
652 kfree(buf); 652 kfree(buf);
653} 653}
654 654
655static void pl2303_close(struct tty_struct *tty, 655static void pl2303_dtr_rts(struct usb_serial_port *port, int on)
656 struct usb_serial_port *port, struct file *filp) 656{
657 struct pl2303_private *priv = usb_get_serial_port_data(port);
658 unsigned long flags;
659 u8 control;
660
661 spin_lock_irqsave(&priv->lock, flags);
662 /* Change DTR and RTS */
663 if (on)
664 priv->line_control |= (CONTROL_DTR | CONTROL_RTS);
665 else
666 priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS);
667 control = priv->line_control;
668 spin_unlock_irqrestore(&priv->lock, flags);
669 set_control_lines(port->serial->dev, control);
670}
671
672static void pl2303_close(struct usb_serial_port *port)
657{ 673{
658 struct pl2303_private *priv = usb_get_serial_port_data(port); 674 struct pl2303_private *priv = usb_get_serial_port_data(port);
659 unsigned long flags; 675 unsigned long flags;
660 unsigned int c_cflag;
661 int bps;
662 long timeout;
663 wait_queue_t wait;
664 676
665 dbg("%s - port %d", __func__, port->number); 677 dbg("%s - port %d", __func__, port->number);
666 678
667 /* wait for data to drain from the buffer */
668 spin_lock_irqsave(&priv->lock, flags); 679 spin_lock_irqsave(&priv->lock, flags);
669 timeout = PL2303_CLOSING_WAIT;
670 init_waitqueue_entry(&wait, current);
671 add_wait_queue(&tty->write_wait, &wait);
672 for (;;) {
673 set_current_state(TASK_INTERRUPTIBLE);
674 if (pl2303_buf_data_avail(priv->buf) == 0 ||
675 timeout == 0 || signal_pending(current) ||
676 port->serial->disconnected)
677 break;
678 spin_unlock_irqrestore(&priv->lock, flags);
679 timeout = schedule_timeout(timeout);
680 spin_lock_irqsave(&priv->lock, flags);
681 }
682 set_current_state(TASK_RUNNING);
683 remove_wait_queue(&tty->write_wait, &wait);
684 /* clear out any remaining data in the buffer */ 680 /* clear out any remaining data in the buffer */
685 pl2303_buf_clear(priv->buf); 681 pl2303_buf_clear(priv->buf);
686 spin_unlock_irqrestore(&priv->lock, flags); 682 spin_unlock_irqrestore(&priv->lock, flags);
687 683
688 /* wait for characters to drain from the device */
689 /* (this is long enough for the entire 256 byte */
690 /* pl2303 hardware buffer to drain with no flow */
691 /* control for data rates of 1200 bps or more, */
692 /* for lower rates we should really know how much */
693 /* data is in the buffer to compute a delay */
694 /* that is not unnecessarily long) */
695 bps = tty_get_baud_rate(tty);
696 if (bps > 1200)
697 timeout = max((HZ*2560)/bps, HZ/10);
698 else
699 timeout = 2*HZ;
700 schedule_timeout_interruptible(timeout);
701
702 /* shutdown our urbs */ 684 /* shutdown our urbs */
703 dbg("%s - shutting down urbs", __func__); 685 dbg("%s - shutting down urbs", __func__);
704 usb_kill_urb(port->write_urb); 686 usb_kill_urb(port->write_urb);
705 usb_kill_urb(port->read_urb); 687 usb_kill_urb(port->read_urb);
706 usb_kill_urb(port->interrupt_in_urb); 688 usb_kill_urb(port->interrupt_in_urb);
707 689
708 if (tty) {
709 c_cflag = tty->termios->c_cflag;
710 if (c_cflag & HUPCL) {
711 /* drop DTR and RTS */
712 spin_lock_irqsave(&priv->lock, flags);
713 priv->line_control = 0;
714 spin_unlock_irqrestore(&priv->lock, flags);
715 set_control_lines(port->serial->dev, 0);
716 }
717 }
718} 690}
719 691
720static int pl2303_open(struct tty_struct *tty, 692static int pl2303_open(struct tty_struct *tty,
@@ -748,7 +720,7 @@ static int pl2303_open(struct tty_struct *tty,
748 if (result) { 720 if (result) {
749 dev_err(&port->dev, "%s - failed submitting read urb," 721 dev_err(&port->dev, "%s - failed submitting read urb,"
750 " error %d\n", __func__, result); 722 " error %d\n", __func__, result);
751 pl2303_close(tty, port, NULL); 723 pl2303_close(port);
752 return -EPROTO; 724 return -EPROTO;
753 } 725 }
754 726
@@ -758,9 +730,10 @@ static int pl2303_open(struct tty_struct *tty,
758 if (result) { 730 if (result) {
759 dev_err(&port->dev, "%s - failed submitting interrupt urb," 731 dev_err(&port->dev, "%s - failed submitting interrupt urb,"
760 " error %d\n", __func__, result); 732 " error %d\n", __func__, result);
761 pl2303_close(tty, port, NULL); 733 pl2303_close(port);
762 return -EPROTO; 734 return -EPROTO;
763 } 735 }
736 port->port.drain_delay = 256;
764 return 0; 737 return 0;
765} 738}
766 739
@@ -821,6 +794,14 @@ static int pl2303_tiocmget(struct tty_struct *tty, struct file *file)
821 return result; 794 return result;
822} 795}
823 796
797static int pl2303_carrier_raised(struct usb_serial_port *port)
798{
799 struct pl2303_private *priv = usb_get_serial_port_data(port);
800 if (priv->line_status & UART_DCD)
801 return 1;
802 return 0;
803}
804
824static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) 805static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
825{ 806{
826 struct pl2303_private *priv = usb_get_serial_port_data(port); 807 struct pl2303_private *priv = usb_get_serial_port_data(port);
@@ -1125,6 +1106,8 @@ static struct usb_serial_driver pl2303_device = {
1125 .num_ports = 1, 1106 .num_ports = 1,
1126 .open = pl2303_open, 1107 .open = pl2303_open,
1127 .close = pl2303_close, 1108 .close = pl2303_close,
1109 .dtr_rts = pl2303_dtr_rts,
1110 .carrier_raised = pl2303_carrier_raised,
1128 .write = pl2303_write, 1111 .write = pl2303_write,
1129 .ioctl = pl2303_ioctl, 1112 .ioctl = pl2303_ioctl,
1130 .break_ctl = pl2303_break_ctl, 1113 .break_ctl = pl2303_break_ctl,
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 913225c61610..17ac34f4d668 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -26,12 +26,10 @@
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/usb.h> 27#include <linux/usb.h>
28#include <linux/usb/serial.h> 28#include <linux/usb/serial.h>
29#include <linux/usb/ch9.h>
30 29
31#define SWIMS_USB_REQUEST_SetPower 0x00 30#define SWIMS_USB_REQUEST_SetPower 0x00
32#define SWIMS_USB_REQUEST_SetNmea 0x07 31#define SWIMS_USB_REQUEST_SetNmea 0x07
33 32
34/* per port private data */
35#define N_IN_URB 4 33#define N_IN_URB 4
36#define N_OUT_URB 4 34#define N_OUT_URB 4
37#define IN_BUFLEN 4096 35#define IN_BUFLEN 4096
@@ -39,6 +37,12 @@
39static int debug; 37static int debug;
40static int nmea; 38static int nmea;
41 39
40/* Used in interface blacklisting */
41struct sierra_iface_info {
42 const u32 infolen; /* number of interface numbers on blacklist */
43 const u8 *ifaceinfo; /* pointer to the array holding the numbers */
44};
45
42static int sierra_set_power_state(struct usb_device *udev, __u16 swiState) 46static int sierra_set_power_state(struct usb_device *udev, __u16 swiState)
43{ 47{
44 int result; 48 int result;
@@ -85,6 +89,23 @@ static int sierra_calc_num_ports(struct usb_serial *serial)
85 return result; 89 return result;
86} 90}
87 91
92static int is_blacklisted(const u8 ifnum,
93 const struct sierra_iface_info *blacklist)
94{
95 const u8 *info;
96 int i;
97
98 if (blacklist) {
99 info = blacklist->ifaceinfo;
100
101 for (i = 0; i < blacklist->infolen; i++) {
102 if (info[i] == ifnum)
103 return 1;
104 }
105 }
106 return 0;
107}
108
88static int sierra_calc_interface(struct usb_serial *serial) 109static int sierra_calc_interface(struct usb_serial *serial)
89{ 110{
90 int interface; 111 int interface;
@@ -153,9 +174,25 @@ static int sierra_probe(struct usb_serial *serial,
153 */ 174 */
154 usb_set_serial_data(serial, (void *)num_ports); 175 usb_set_serial_data(serial, (void *)num_ports);
155 176
177 /* ifnum could have changed - by calling usb_set_interface */
178 ifnum = sierra_calc_interface(serial);
179
180 if (is_blacklisted(ifnum,
181 (struct sierra_iface_info *)id->driver_info)) {
182 dev_dbg(&serial->dev->dev,
183 "Ignoring blacklisted interface #%d\n", ifnum);
184 return -ENODEV;
185 }
186
156 return result; 187 return result;
157} 188}
158 189
190static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11 };
191static const struct sierra_iface_info direct_ip_interface_blacklist = {
192 .infolen = ARRAY_SIZE(direct_ip_non_serial_ifaces),
193 .ifaceinfo = direct_ip_non_serial_ifaces,
194};
195
159static struct usb_device_id id_table [] = { 196static struct usb_device_id id_table [] = {
160 { USB_DEVICE(0x1199, 0x0017) }, /* Sierra Wireless EM5625 */ 197 { USB_DEVICE(0x1199, 0x0017) }, /* Sierra Wireless EM5625 */
161 { USB_DEVICE(0x1199, 0x0018) }, /* Sierra Wireless MC5720 */ 198 { USB_DEVICE(0x1199, 0x0018) }, /* Sierra Wireless MC5720 */
@@ -188,9 +225,11 @@ static struct usb_device_id id_table [] = {
188 { USB_DEVICE(0x1199, 0x6833) }, /* Sierra Wireless MC8781 */ 225 { USB_DEVICE(0x1199, 0x6833) }, /* Sierra Wireless MC8781 */
189 { USB_DEVICE(0x1199, 0x683A) }, /* Sierra Wireless MC8785 */ 226 { USB_DEVICE(0x1199, 0x683A) }, /* Sierra Wireless MC8785 */
190 { USB_DEVICE(0x1199, 0x683B) }, /* Sierra Wireless MC8785 Composite */ 227 { USB_DEVICE(0x1199, 0x683B) }, /* Sierra Wireless MC8785 Composite */
191 { USB_DEVICE(0x1199, 0x683C) }, /* Sierra Wireless MC8790 */ 228 /* Sierra Wireless MC8790, MC8791, MC8792 Composite */
192 { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8790 */ 229 { USB_DEVICE(0x1199, 0x683C) },
193 { USB_DEVICE(0x1199, 0x683E) }, /* Sierra Wireless MC8790 */ 230 { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8791 Composite */
231 /* Sierra Wireless MC8790, MC8791, MC8792 */
232 { USB_DEVICE(0x1199, 0x683E) },
194 { USB_DEVICE(0x1199, 0x6850) }, /* Sierra Wireless AirCard 880 */ 233 { USB_DEVICE(0x1199, 0x6850) }, /* Sierra Wireless AirCard 880 */
195 { USB_DEVICE(0x1199, 0x6851) }, /* Sierra Wireless AirCard 881 */ 234 { USB_DEVICE(0x1199, 0x6851) }, /* Sierra Wireless AirCard 881 */
196 { USB_DEVICE(0x1199, 0x6852) }, /* Sierra Wireless AirCard 880 E */ 235 { USB_DEVICE(0x1199, 0x6852) }, /* Sierra Wireless AirCard 880 E */
@@ -211,6 +250,10 @@ static struct usb_device_id id_table [] = {
211 { USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */ 250 { USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */
212 { USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */ 251 { USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */
213 252
253 { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */
254 .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
255 },
256
214 { } 257 { }
215}; 258};
216MODULE_DEVICE_TABLE(usb, id_table); 259MODULE_DEVICE_TABLE(usb, id_table);
@@ -229,7 +272,6 @@ struct sierra_port_private {
229 272
230 /* Input endpoints and buffers for this port */ 273 /* Input endpoints and buffers for this port */
231 struct urb *in_urbs[N_IN_URB]; 274 struct urb *in_urbs[N_IN_URB];
232 char *in_buffer[N_IN_URB];
233 275
234 /* Settings for the port */ 276 /* Settings for the port */
235 int rts_state; /* Handshaking pins (outputs) */ 277 int rts_state; /* Handshaking pins (outputs) */
@@ -240,57 +282,50 @@ struct sierra_port_private {
240 int ri_state; 282 int ri_state;
241}; 283};
242 284
243static int sierra_send_setup(struct tty_struct *tty, 285static int sierra_send_setup(struct usb_serial_port *port)
244 struct usb_serial_port *port)
245{ 286{
246 struct usb_serial *serial = port->serial; 287 struct usb_serial *serial = port->serial;
247 struct sierra_port_private *portdata; 288 struct sierra_port_private *portdata;
248 __u16 interface = 0; 289 __u16 interface = 0;
290 int val = 0;
249 291
250 dev_dbg(&port->dev, "%s", __func__); 292 dev_dbg(&port->dev, "%s", __func__);
251 293
252 portdata = usb_get_serial_port_data(port); 294 portdata = usb_get_serial_port_data(port);
253 295
254 if (tty) { 296 if (portdata->dtr_state)
255 int val = 0; 297 val |= 0x01;
256 if (portdata->dtr_state) 298 if (portdata->rts_state)
257 val |= 0x01; 299 val |= 0x02;
258 if (portdata->rts_state)
259 val |= 0x02;
260
261 /* If composite device then properly report interface */
262 if (serial->num_ports == 1) {
263 interface = sierra_calc_interface(serial);
264
265 /* Control message is sent only to interfaces with
266 * interrupt_in endpoints
267 */
268 if (port->interrupt_in_urb) {
269 /* send control message */
270 return usb_control_msg(serial->dev,
271 usb_rcvctrlpipe(serial->dev, 0),
272 0x22, 0x21, val, interface,
273 NULL, 0, USB_CTRL_SET_TIMEOUT);
274 }
275 }
276
277 /* Otherwise the need to do non-composite mapping */
278 else {
279 if (port->bulk_out_endpointAddress == 2)
280 interface = 0;
281 else if (port->bulk_out_endpointAddress == 4)
282 interface = 1;
283 else if (port->bulk_out_endpointAddress == 5)
284 interface = 2;
285 300
301 /* If composite device then properly report interface */
302 if (serial->num_ports == 1) {
303 interface = sierra_calc_interface(serial);
304 /* Control message is sent only to interfaces with
305 * interrupt_in endpoints
306 */
307 if (port->interrupt_in_urb) {
308 /* send control message */
286 return usb_control_msg(serial->dev, 309 return usb_control_msg(serial->dev,
287 usb_rcvctrlpipe(serial->dev, 0), 310 usb_rcvctrlpipe(serial->dev, 0),
288 0x22, 0x21, val, interface, 311 0x22, 0x21, val, interface,
289 NULL, 0, USB_CTRL_SET_TIMEOUT); 312 NULL, 0, USB_CTRL_SET_TIMEOUT);
290
291 } 313 }
292 } 314 }
293 315
316 /* Otherwise the need to do non-composite mapping */
317 else {
318 if (port->bulk_out_endpointAddress == 2)
319 interface = 0;
320 else if (port->bulk_out_endpointAddress == 4)
321 interface = 1;
322 else if (port->bulk_out_endpointAddress == 5)
323 interface = 2;
324 return usb_control_msg(serial->dev,
325 usb_rcvctrlpipe(serial->dev, 0),
326 0x22, 0x21, val, interface,
327 NULL, 0, USB_CTRL_SET_TIMEOUT);
328 }
294 return 0; 329 return 0;
295} 330}
296 331
@@ -299,7 +334,7 @@ static void sierra_set_termios(struct tty_struct *tty,
299{ 334{
300 dev_dbg(&port->dev, "%s", __func__); 335 dev_dbg(&port->dev, "%s", __func__);
301 tty_termios_copy_hw(tty->termios, old_termios); 336 tty_termios_copy_hw(tty->termios, old_termios);
302 sierra_send_setup(tty, port); 337 sierra_send_setup(port);
303} 338}
304 339
305static int sierra_tiocmget(struct tty_struct *tty, struct file *file) 340static int sierra_tiocmget(struct tty_struct *tty, struct file *file)
@@ -338,7 +373,18 @@ static int sierra_tiocmset(struct tty_struct *tty, struct file *file,
338 portdata->rts_state = 0; 373 portdata->rts_state = 0;
339 if (clear & TIOCM_DTR) 374 if (clear & TIOCM_DTR)
340 portdata->dtr_state = 0; 375 portdata->dtr_state = 0;
341 return sierra_send_setup(tty, port); 376 return sierra_send_setup(port);
377}
378
379static void sierra_release_urb(struct urb *urb)
380{
381 struct usb_serial_port *port;
382 if (urb) {
383 port = urb->context;
384 dev_dbg(&port->dev, "%s: %p\n", __func__, urb);
385 kfree(urb->transfer_buffer);
386 usb_free_urb(urb);
387 }
342} 388}
343 389
344static void sierra_outdat_callback(struct urb *urb) 390static void sierra_outdat_callback(struct urb *urb)
@@ -465,7 +511,7 @@ static void sierra_indat_callback(struct urb *urb)
465 " received", __func__); 511 " received", __func__);
466 512
467 /* Resubmit urb so we continue receiving */ 513 /* Resubmit urb so we continue receiving */
468 if (port->port.count && status != -ESHUTDOWN) { 514 if (port->port.count && status != -ESHUTDOWN && status != -EPERM) {
469 err = usb_submit_urb(urb, GFP_ATOMIC); 515 err = usb_submit_urb(urb, GFP_ATOMIC);
470 if (err) 516 if (err)
471 dev_err(&port->dev, "resubmit read urb failed." 517 dev_err(&port->dev, "resubmit read urb failed."
@@ -557,67 +603,99 @@ static int sierra_write_room(struct tty_struct *tty)
557 return 2048; 603 return 2048;
558} 604}
559 605
560static int sierra_open(struct tty_struct *tty, 606static void sierra_stop_rx_urbs(struct usb_serial_port *port)
561 struct usb_serial_port *port, struct file *filp)
562{ 607{
563 struct sierra_port_private *portdata;
564 struct usb_serial *serial = port->serial;
565 int i; 608 int i;
566 struct urb *urb; 609 struct sierra_port_private *portdata = usb_get_serial_port_data(port);
567 int result;
568 610
569 portdata = usb_get_serial_port_data(port); 611 for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++)
612 usb_kill_urb(portdata->in_urbs[i]);
570 613
571 dev_dbg(&port->dev, "%s", __func__); 614 usb_kill_urb(port->interrupt_in_urb);
615}
572 616
573 /* Set some sane defaults */ 617static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags)
574 portdata->rts_state = 1; 618{
575 portdata->dtr_state = 1; 619 int ok_cnt;
620 int err = -EINVAL;
621 int i;
622 struct urb *urb;
623 struct sierra_port_private *portdata = usb_get_serial_port_data(port);
576 624
577 /* Reset low level data toggle and start reading from endpoints */ 625 ok_cnt = 0;
578 for (i = 0; i < N_IN_URB; i++) { 626 for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
579 urb = portdata->in_urbs[i]; 627 urb = portdata->in_urbs[i];
580 if (!urb) 628 if (!urb)
581 continue; 629 continue;
582 if (urb->dev != serial->dev) { 630 err = usb_submit_urb(urb, mem_flags);
583 dev_dbg(&port->dev, "%s: dev %p != %p", 631 if (err) {
584 __func__, urb->dev, serial->dev); 632 dev_err(&port->dev, "%s: submit urb failed: %d\n",
585 continue; 633 __func__, err);
634 } else {
635 ok_cnt++;
586 } 636 }
637 }
587 638
588 /* 639 if (ok_cnt && port->interrupt_in_urb) {
589 * make sure endpoint data toggle is synchronized with the 640 err = usb_submit_urb(port->interrupt_in_urb, mem_flags);
590 * device 641 if (err) {
591 */ 642 dev_err(&port->dev, "%s: submit intr urb failed: %d\n",
592 usb_clear_halt(urb->dev, urb->pipe); 643 __func__, err);
593
594 result = usb_submit_urb(urb, GFP_KERNEL);
595 if (result) {
596 dev_err(&port->dev, "submit urb %d failed (%d) %d\n",
597 i, result, urb->transfer_buffer_length);
598 } 644 }
599 } 645 }
600 646
601 sierra_send_setup(tty, port); 647 if (ok_cnt > 0) /* at least one rx urb submitted */
648 return 0;
649 else
650 return err;
651}
652
653static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint,
654 int dir, void *ctx, int len,
655 gfp_t mem_flags,
656 usb_complete_t callback)
657{
658 struct urb *urb;
659 u8 *buf;
660
661 if (endpoint == -1)
662 return NULL;
602 663
603 /* start up the interrupt endpoint if we have one */ 664 urb = usb_alloc_urb(0, mem_flags);
604 if (port->interrupt_in_urb) { 665 if (urb == NULL) {
605 result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); 666 dev_dbg(&serial->dev->dev, "%s: alloc for endpoint %d failed\n",
606 if (result) 667 __func__, endpoint);
607 dev_err(&port->dev, "submit irq_in urb failed %d\n", 668 return NULL;
608 result);
609 } 669 }
610 return 0; 670
671 buf = kmalloc(len, mem_flags);
672 if (buf) {
673 /* Fill URB using supplied data */
674 usb_fill_bulk_urb(urb, serial->dev,
675 usb_sndbulkpipe(serial->dev, endpoint) | dir,
676 buf, len, callback, ctx);
677
678 /* debug */
679 dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__,
680 dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
681 } else {
682 dev_dbg(&serial->dev->dev, "%s %c u:%p d:%p\n", __func__,
683 dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
684
685 sierra_release_urb(urb);
686 urb = NULL;
687 }
688
689 return urb;
611} 690}
612 691
613static void sierra_close(struct tty_struct *tty, 692static void sierra_close(struct usb_serial_port *port)
614 struct usb_serial_port *port, struct file *filp)
615{ 693{
616 int i; 694 int i;
617 struct usb_serial *serial = port->serial; 695 struct usb_serial *serial = port->serial;
618 struct sierra_port_private *portdata; 696 struct sierra_port_private *portdata;
619 697
620 dev_dbg(&port->dev, "%s", __func__); 698 dev_dbg(&port->dev, "%s\n", __func__);
621 portdata = usb_get_serial_port_data(port); 699 portdata = usb_get_serial_port_data(port);
622 700
623 portdata->rts_state = 0; 701 portdata->rts_state = 0;
@@ -626,25 +704,83 @@ static void sierra_close(struct tty_struct *tty,
626 if (serial->dev) { 704 if (serial->dev) {
627 mutex_lock(&serial->disc_mutex); 705 mutex_lock(&serial->disc_mutex);
628 if (!serial->disconnected) 706 if (!serial->disconnected)
629 sierra_send_setup(tty, port); 707 sierra_send_setup(port);
630 mutex_unlock(&serial->disc_mutex); 708 mutex_unlock(&serial->disc_mutex);
631 709
632 /* Stop reading/writing urbs */ 710 /* Stop reading urbs */
633 for (i = 0; i < N_IN_URB; i++) 711 sierra_stop_rx_urbs(port);
634 usb_kill_urb(portdata->in_urbs[i]); 712 /* .. and release them */
713 for (i = 0; i < N_IN_URB; i++) {
714 sierra_release_urb(portdata->in_urbs[i]);
715 portdata->in_urbs[i] = NULL;
716 }
635 } 717 }
718}
636 719
637 usb_kill_urb(port->interrupt_in_urb); 720static int sierra_open(struct tty_struct *tty,
638 tty_port_tty_set(&port->port, NULL); 721 struct usb_serial_port *port, struct file *filp)
722{
723 struct sierra_port_private *portdata;
724 struct usb_serial *serial = port->serial;
725 int i;
726 int err;
727 int endpoint;
728 struct urb *urb;
729
730 portdata = usb_get_serial_port_data(port);
731
732 dev_dbg(&port->dev, "%s", __func__);
733
734 /* Set some sane defaults */
735 portdata->rts_state = 1;
736 portdata->dtr_state = 1;
737
738
739 endpoint = port->bulk_in_endpointAddress;
740 for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
741 urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port,
742 IN_BUFLEN, GFP_KERNEL,
743 sierra_indat_callback);
744 portdata->in_urbs[i] = urb;
745 }
746 /* clear halt condition */
747 usb_clear_halt(serial->dev,
748 usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN);
749
750 err = sierra_submit_rx_urbs(port, GFP_KERNEL);
751 if (err) {
752 /* get rid of everything as in close */
753 sierra_close(port);
754 return err;
755 }
756 sierra_send_setup(port);
757
758 return 0;
759}
760
761
762static void sierra_dtr_rts(struct usb_serial_port *port, int on)
763{
764 struct usb_serial *serial = port->serial;
765 struct sierra_port_private *portdata;
766
767 portdata = usb_get_serial_port_data(port);
768 portdata->rts_state = on;
769 portdata->dtr_state = on;
770
771 if (serial->dev) {
772 mutex_lock(&serial->disc_mutex);
773 if (!serial->disconnected)
774 sierra_send_setup(port);
775 mutex_unlock(&serial->disc_mutex);
776 }
639} 777}
640 778
641static int sierra_startup(struct usb_serial *serial) 779static int sierra_startup(struct usb_serial *serial)
642{ 780{
643 struct usb_serial_port *port; 781 struct usb_serial_port *port;
644 struct sierra_port_private *portdata; 782 struct sierra_port_private *portdata;
645 struct urb *urb;
646 int i; 783 int i;
647 int j;
648 784
649 dev_dbg(&serial->dev->dev, "%s", __func__); 785 dev_dbg(&serial->dev->dev, "%s", __func__);
650 786
@@ -666,34 +802,8 @@ static int sierra_startup(struct usb_serial *serial)
666 return -ENOMEM; 802 return -ENOMEM;
667 } 803 }
668 spin_lock_init(&portdata->lock); 804 spin_lock_init(&portdata->lock);
669 for (j = 0; j < N_IN_URB; j++) { 805 /* Set the port private data pointer */
670 portdata->in_buffer[j] = kmalloc(IN_BUFLEN, GFP_KERNEL);
671 if (!portdata->in_buffer[j]) {
672 for (--j; j >= 0; j--)
673 kfree(portdata->in_buffer[j]);
674 kfree(portdata);
675 return -ENOMEM;
676 }
677 }
678
679 usb_set_serial_port_data(port, portdata); 806 usb_set_serial_port_data(port, portdata);
680
681 /* initialize the in urbs */
682 for (j = 0; j < N_IN_URB; ++j) {
683 urb = usb_alloc_urb(0, GFP_KERNEL);
684 if (urb == NULL) {
685 dev_dbg(&port->dev, "%s: alloc for in "
686 "port failed.", __func__);
687 continue;
688 }
689 /* Fill URB using supplied data. */
690 usb_fill_bulk_urb(urb, serial->dev,
691 usb_rcvbulkpipe(serial->dev,
692 port->bulk_in_endpointAddress),
693 portdata->in_buffer[j], IN_BUFLEN,
694 sierra_indat_callback, port);
695 portdata->in_urbs[j] = urb;
696 }
697 } 807 }
698 808
699 return 0; 809 return 0;
@@ -701,7 +811,7 @@ static int sierra_startup(struct usb_serial *serial)
701 811
702static void sierra_shutdown(struct usb_serial *serial) 812static void sierra_shutdown(struct usb_serial *serial)
703{ 813{
704 int i, j; 814 int i;
705 struct usb_serial_port *port; 815 struct usb_serial_port *port;
706 struct sierra_port_private *portdata; 816 struct sierra_port_private *portdata;
707 817
@@ -714,12 +824,6 @@ static void sierra_shutdown(struct usb_serial *serial)
714 portdata = usb_get_serial_port_data(port); 824 portdata = usb_get_serial_port_data(port);
715 if (!portdata) 825 if (!portdata)
716 continue; 826 continue;
717
718 for (j = 0; j < N_IN_URB; j++) {
719 usb_kill_urb(portdata->in_urbs[j]);
720 usb_free_urb(portdata->in_urbs[j]);
721 kfree(portdata->in_buffer[j]);
722 }
723 kfree(portdata); 827 kfree(portdata);
724 usb_set_serial_port_data(port, NULL); 828 usb_set_serial_port_data(port, NULL);
725 } 829 }
@@ -737,6 +841,7 @@ static struct usb_serial_driver sierra_device = {
737 .probe = sierra_probe, 841 .probe = sierra_probe,
738 .open = sierra_open, 842 .open = sierra_open,
739 .close = sierra_close, 843 .close = sierra_close,
844 .dtr_rts = sierra_dtr_rts,
740 .write = sierra_write, 845 .write = sierra_write,
741 .write_room = sierra_write_room, 846 .write_room = sierra_write_room,
742 .set_termios = sierra_set_termios, 847 .set_termios = sierra_set_termios,
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 5e7528cc81a8..8f7ed8f13996 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -446,66 +446,47 @@ static void spcp8x5_set_workMode(struct usb_device *dev, u16 value,
446 "RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret); 446 "RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret);
447} 447}
448 448
449static int spcp8x5_carrier_raised(struct usb_serial_port *port)
450{
451 struct spcp8x5_private *priv = usb_get_serial_port_data(port);
452 if (priv->line_status & MSR_STATUS_LINE_DCD)
453 return 1;
454 return 0;
455}
456
457static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on)
458{
459 struct spcp8x5_private *priv = usb_get_serial_port_data(port);
460 unsigned long flags;
461 u8 control;
462
463 spin_lock_irqsave(&priv->lock, flags);
464 if (on)
465 priv->line_control = MCR_CONTROL_LINE_DTR
466 | MCR_CONTROL_LINE_RTS;
467 else
468 priv->line_control &= ~ (MCR_CONTROL_LINE_DTR
469 | MCR_CONTROL_LINE_RTS);
470 control = priv->line_control;
471 spin_unlock_irqrestore(&priv->lock, flags);
472 spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type);
473}
474
449/* close the serial port. We should wait for data sending to device 1st and 475/* close the serial port. We should wait for data sending to device 1st and
450 * then kill all urb. */ 476 * then kill all urb. */
451static void spcp8x5_close(struct tty_struct *tty, 477static void spcp8x5_close(struct usb_serial_port *port)
452 struct usb_serial_port *port, struct file *filp)
453{ 478{
454 struct spcp8x5_private *priv = usb_get_serial_port_data(port); 479 struct spcp8x5_private *priv = usb_get_serial_port_data(port);
455 unsigned long flags; 480 unsigned long flags;
456 unsigned int c_cflag;
457 int bps;
458 long timeout;
459 wait_queue_t wait;
460 int result; 481 int result;
461 482
462 dbg("%s - port %d", __func__, port->number); 483 dbg("%s - port %d", __func__, port->number);
463 484
464 /* wait for data to drain from the buffer */
465 spin_lock_irqsave(&priv->lock, flags); 485 spin_lock_irqsave(&priv->lock, flags);
466 timeout = SPCP8x5_CLOSING_WAIT;
467 init_waitqueue_entry(&wait, current);
468 add_wait_queue(&tty->write_wait, &wait);
469 for (;;) {
470 set_current_state(TASK_INTERRUPTIBLE);
471 if (ringbuf_avail_data(priv->buf) == 0 ||
472 timeout == 0 || signal_pending(current))
473 break;
474 spin_unlock_irqrestore(&priv->lock, flags);
475 timeout = schedule_timeout(timeout);
476 spin_lock_irqsave(&priv->lock, flags);
477 }
478 set_current_state(TASK_RUNNING);
479 remove_wait_queue(&tty->write_wait, &wait);
480
481 /* clear out any remaining data in the buffer */ 486 /* clear out any remaining data in the buffer */
482 clear_ringbuf(priv->buf); 487 clear_ringbuf(priv->buf);
483 spin_unlock_irqrestore(&priv->lock, flags); 488 spin_unlock_irqrestore(&priv->lock, flags);
484 489
485 /* wait for characters to drain from the device (this is long enough
486 * for the entire all byte spcp8x5 hardware buffer to drain with no
487 * flow control for data rates of 1200 bps or more, for lower rates we
488 * should really know how much data is in the buffer to compute a delay
489 * that is not unnecessarily long) */
490 bps = tty_get_baud_rate(tty);
491 if (bps > 1200)
492 timeout = max((HZ*2560) / bps, HZ/10);
493 else
494 timeout = 2*HZ;
495 set_current_state(TASK_INTERRUPTIBLE);
496 schedule_timeout(timeout);
497
498 /* clear control lines */
499 if (tty) {
500 c_cflag = tty->termios->c_cflag;
501 if (c_cflag & HUPCL) {
502 spin_lock_irqsave(&priv->lock, flags);
503 priv->line_control = 0;
504 spin_unlock_irqrestore(&priv->lock, flags);
505 spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type);
506 }
507 }
508
509 /* kill urb */ 490 /* kill urb */
510 if (port->write_urb != NULL) { 491 if (port->write_urb != NULL) {
511 result = usb_unlink_urb(port->write_urb); 492 result = usb_unlink_urb(port->write_urb);
@@ -665,13 +646,6 @@ static int spcp8x5_open(struct tty_struct *tty,
665 if (ret) 646 if (ret)
666 return ret; 647 return ret;
667 648
668 spin_lock_irqsave(&priv->lock, flags);
669 if (tty && (tty->termios->c_cflag & CBAUD))
670 priv->line_control = MCR_DTR | MCR_RTS;
671 else
672 priv->line_control = 0;
673 spin_unlock_irqrestore(&priv->lock, flags);
674
675 spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type); 649 spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type);
676 650
677 /* Setup termios */ 651 /* Setup termios */
@@ -691,9 +665,10 @@ static int spcp8x5_open(struct tty_struct *tty,
691 port->read_urb->dev = serial->dev; 665 port->read_urb->dev = serial->dev;
692 ret = usb_submit_urb(port->read_urb, GFP_KERNEL); 666 ret = usb_submit_urb(port->read_urb, GFP_KERNEL);
693 if (ret) { 667 if (ret) {
694 spcp8x5_close(tty, port, NULL); 668 spcp8x5_close(port);
695 return -EPROTO; 669 return -EPROTO;
696 } 670 }
671 port->port.drain_delay = 256;
697 return 0; 672 return 0;
698} 673}
699 674
@@ -1033,6 +1008,8 @@ static struct usb_serial_driver spcp8x5_device = {
1033 .num_ports = 1, 1008 .num_ports = 1,
1034 .open = spcp8x5_open, 1009 .open = spcp8x5_open,
1035 .close = spcp8x5_close, 1010 .close = spcp8x5_close,
1011 .dtr_rts = spcp8x5_dtr_rts,
1012 .carrier_raised = spcp8x5_carrier_raised,
1036 .write = spcp8x5_write, 1013 .write = spcp8x5_write,
1037 .set_termios = spcp8x5_set_termios, 1014 .set_termios = spcp8x5_set_termios,
1038 .ioctl = spcp8x5_ioctl, 1015 .ioctl = spcp8x5_ioctl,
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 69879e437940..8b07ebc6baeb 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -152,8 +152,7 @@ static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port,
152 return result; 152 return result;
153} 153}
154 154
155static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port, 155static void symbol_close(struct usb_serial_port *port)
156 struct file *filp)
157{ 156{
158 struct symbol_private *priv = usb_get_serial_data(port->serial); 157 struct symbol_private *priv = usb_get_serial_data(port->serial);
159 158
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 0a64bac306ee..42cb04c403be 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -100,8 +100,7 @@ static int ti_startup(struct usb_serial *serial);
100static void ti_shutdown(struct usb_serial *serial); 100static void ti_shutdown(struct usb_serial *serial);
101static int ti_open(struct tty_struct *tty, struct usb_serial_port *port, 101static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
102 struct file *file); 102 struct file *file);
103static void ti_close(struct tty_struct *tty, struct usb_serial_port *port, 103static void ti_close(struct usb_serial_port *port);
104 struct file *file);
105static int ti_write(struct tty_struct *tty, struct usb_serial_port *port, 104static int ti_write(struct tty_struct *tty, struct usb_serial_port *port,
106 const unsigned char *data, int count); 105 const unsigned char *data, int count);
107static int ti_write_room(struct tty_struct *tty); 106static int ti_write_room(struct tty_struct *tty);
@@ -647,8 +646,7 @@ release_lock:
647} 646}
648 647
649 648
650static void ti_close(struct tty_struct *tty, struct usb_serial_port *port, 649static void ti_close(struct usb_serial_port *port)
651 struct file *file)
652{ 650{
653 struct ti_device *tdev; 651 struct ti_device *tdev;
654 struct ti_port *tport; 652 struct ti_port *tport;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index f331e2bde88a..1967a7edc10c 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -238,9 +238,11 @@ static int serial_open (struct tty_struct *tty, struct file *filp)
238 goto bailout_interface_put; 238 goto bailout_interface_put;
239 mutex_unlock(&serial->disc_mutex); 239 mutex_unlock(&serial->disc_mutex);
240 } 240 }
241
242 mutex_unlock(&port->mutex); 241 mutex_unlock(&port->mutex);
243 return 0; 242 /* Now do the correct tty layer semantics */
243 retval = tty_port_block_til_ready(&port->port, tty, filp);
244 if (retval == 0)
245 return 0;
244 246
245bailout_interface_put: 247bailout_interface_put:
246 usb_autopm_put_interface(serial->interface); 248 usb_autopm_put_interface(serial->interface);
@@ -259,64 +261,89 @@ bailout_serial_put:
259 return retval; 261 return retval;
260} 262}
261 263
262static void serial_close(struct tty_struct *tty, struct file *filp) 264/**
265 * serial_do_down - shut down hardware
266 * @port: port to shut down
267 *
268 * Shut down a USB port unless it is the console. We never shut down the
269 * console hardware as it will always be in use.
270 *
271 * Don't free any resources at this point
272 */
273static void serial_do_down(struct usb_serial_port *port)
263{ 274{
264 struct usb_serial_port *port = tty->driver_data; 275 struct usb_serial_driver *drv = port->serial->type;
265 struct usb_serial *serial; 276 struct usb_serial *serial;
266 struct module *owner; 277 struct module *owner;
267 int count;
268 278
269 if (!port) 279 /* The console is magical, do not hang up the console hardware
280 or there will be tears */
281 if (port->console)
270 return; 282 return;
271 283
272 dbg("%s - port %d", __func__, port->number);
273
274 mutex_lock(&port->mutex); 284 mutex_lock(&port->mutex);
275 serial = port->serial; 285 serial = port->serial;
276 owner = serial->type->driver.owner; 286 owner = serial->type->driver.owner;
277 287
278 if (port->port.count == 0) { 288 if (drv->close)
279 mutex_unlock(&port->mutex); 289 drv->close(port);
280 return;
281 }
282
283 if (port->port.count == 1)
284 /* only call the device specific close if this
285 * port is being closed by the last owner. Ensure we do
286 * this before we drop the port count. The call is protected
287 * by the port mutex
288 */
289 serial->type->close(tty, port, filp);
290
291 if (port->port.count == (port->console ? 2 : 1)) {
292 struct tty_struct *tty = tty_port_tty_get(&port->port);
293 if (tty) {
294 /* We must do this before we drop the port count to
295 zero. */
296 if (tty->driver_data)
297 tty->driver_data = NULL;
298 tty_port_tty_set(&port->port, NULL);
299 tty_kref_put(tty);
300 }
301 }
302 290
303 --port->port.count;
304 count = port->port.count;
305 mutex_unlock(&port->mutex); 291 mutex_unlock(&port->mutex);
306 put_device(&port->dev); 292}
293
294/**
295 * serial_do_free - free resources post close/hangup
296 * @port: port to free up
297 *
298 * Do the resource freeing and refcount dropping for the port. We must
299 * be careful about ordering and we must avoid freeing up the console.
300 */
307 301
302static void serial_do_free(struct usb_serial_port *port)
303{
304 struct usb_serial *serial;
305 struct module *owner;
306
307 /* The console is magical, do not hang up the console hardware
308 or there will be tears */
309 if (port->console)
310 return;
311
312 serial = port->serial;
313 owner = serial->type->driver.owner;
314 put_device(&port->dev);
308 /* Mustn't dereference port any more */ 315 /* Mustn't dereference port any more */
309 if (count == 0) { 316 mutex_lock(&serial->disc_mutex);
310 mutex_lock(&serial->disc_mutex); 317 if (!serial->disconnected)
311 if (!serial->disconnected) 318 usb_autopm_put_interface(serial->interface);
312 usb_autopm_put_interface(serial->interface); 319 mutex_unlock(&serial->disc_mutex);
313 mutex_unlock(&serial->disc_mutex);
314 }
315 usb_serial_put(serial); 320 usb_serial_put(serial);
316
317 /* Mustn't dereference serial any more */ 321 /* Mustn't dereference serial any more */
318 if (count == 0) 322 module_put(owner);
319 module_put(owner); 323}
324
325static void serial_close(struct tty_struct *tty, struct file *filp)
326{
327 struct usb_serial_port *port = tty->driver_data;
328
329 dbg("%s - port %d", __func__, port->number);
330
331
332 if (tty_port_close_start(&port->port, tty, filp) == 0)
333 return;
334
335 serial_do_down(port);
336 tty_port_close_end(&port->port, tty);
337 tty_port_tty_set(&port->port, NULL);
338 serial_do_free(port);
339}
340
341static void serial_hangup(struct tty_struct *tty)
342{
343 struct usb_serial_port *port = tty->driver_data;
344 serial_do_down(port);
345 tty_port_hangup(&port->port);
346 serial_do_free(port);
320} 347}
321 348
322static int serial_write(struct tty_struct *tty, const unsigned char *buf, 349static int serial_write(struct tty_struct *tty, const unsigned char *buf,
@@ -648,6 +675,29 @@ static struct usb_serial_driver *search_serial_device(
648 return NULL; 675 return NULL;
649} 676}
650 677
678static int serial_carrier_raised(struct tty_port *port)
679{
680 struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
681 struct usb_serial_driver *drv = p->serial->type;
682 if (drv->carrier_raised)
683 return drv->carrier_raised(p);
684 /* No carrier control - don't block */
685 return 1;
686}
687
688static void serial_dtr_rts(struct tty_port *port, int on)
689{
690 struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
691 struct usb_serial_driver *drv = p->serial->type;
692 if (drv->dtr_rts)
693 drv->dtr_rts(p, on);
694}
695
696static const struct tty_port_operations serial_port_ops = {
697 .carrier_raised = serial_carrier_raised,
698 .dtr_rts = serial_dtr_rts,
699};
700
651int usb_serial_probe(struct usb_interface *interface, 701int usb_serial_probe(struct usb_interface *interface,
652 const struct usb_device_id *id) 702 const struct usb_device_id *id)
653{ 703{
@@ -841,6 +891,7 @@ int usb_serial_probe(struct usb_interface *interface,
841 if (!port) 891 if (!port)
842 goto probe_error; 892 goto probe_error;
843 tty_port_init(&port->port); 893 tty_port_init(&port->port);
894 port->port.ops = &serial_port_ops;
844 port->serial = serial; 895 port->serial = serial;
845 spin_lock_init(&port->lock); 896 spin_lock_init(&port->lock);
846 mutex_init(&port->mutex); 897 mutex_init(&port->mutex);
@@ -1071,6 +1122,9 @@ void usb_serial_disconnect(struct usb_interface *interface)
1071 if (port) { 1122 if (port) {
1072 struct tty_struct *tty = tty_port_tty_get(&port->port); 1123 struct tty_struct *tty = tty_port_tty_get(&port->port);
1073 if (tty) { 1124 if (tty) {
1125 /* The hangup will occur asynchronously but
1126 the object refcounts will sort out all the
1127 cleanup */
1074 tty_hangup(tty); 1128 tty_hangup(tty);
1075 tty_kref_put(tty); 1129 tty_kref_put(tty);
1076 } 1130 }
@@ -1135,6 +1189,7 @@ static const struct tty_operations serial_ops = {
1135 .open = serial_open, 1189 .open = serial_open,
1136 .close = serial_close, 1190 .close = serial_close,
1137 .write = serial_write, 1191 .write = serial_write,
1192 .hangup = serial_hangup,
1138 .write_room = serial_write_room, 1193 .write_room = serial_write_room,
1139 .ioctl = serial_ioctl, 1194 .ioctl = serial_ioctl,
1140 .set_termios = serial_set_termios, 1195 .set_termios = serial_set_termios,
@@ -1147,6 +1202,7 @@ static const struct tty_operations serial_ops = {
1147 .proc_fops = &serial_proc_fops, 1202 .proc_fops = &serial_proc_fops,
1148}; 1203};
1149 1204
1205
1150struct tty_driver *usb_serial_tty_driver; 1206struct tty_driver *usb_serial_tty_driver;
1151 1207
1152static int __init usb_serial_init(void) 1208static int __init usb_serial_init(void)
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 5ac414bda718..b15f1c0e1d4a 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -38,8 +38,7 @@
38/* function prototypes for a handspring visor */ 38/* function prototypes for a handspring visor */
39static int visor_open(struct tty_struct *tty, struct usb_serial_port *port, 39static int visor_open(struct tty_struct *tty, struct usb_serial_port *port,
40 struct file *filp); 40 struct file *filp);
41static void visor_close(struct tty_struct *tty, struct usb_serial_port *port, 41static void visor_close(struct usb_serial_port *port);
42 struct file *filp);
43static int visor_write(struct tty_struct *tty, struct usb_serial_port *port, 42static int visor_write(struct tty_struct *tty, struct usb_serial_port *port,
44 const unsigned char *buf, int count); 43 const unsigned char *buf, int count);
45static int visor_write_room(struct tty_struct *tty); 44static int visor_write_room(struct tty_struct *tty);
@@ -324,8 +323,7 @@ exit:
324} 323}
325 324
326 325
327static void visor_close(struct tty_struct *tty, 326static void visor_close(struct usb_serial_port *port)
328 struct usb_serial_port *port, struct file *filp)
329{ 327{
330 struct visor_private *priv = usb_get_serial_port_data(port); 328 struct visor_private *priv = usb_get_serial_port_data(port);
331 unsigned char *transfer_buffer; 329 unsigned char *transfer_buffer;
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 5335d3211c07..7c7295d09f34 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -147,8 +147,7 @@ static int whiteheat_attach(struct usb_serial *serial);
147static void whiteheat_shutdown(struct usb_serial *serial); 147static void whiteheat_shutdown(struct usb_serial *serial);
148static int whiteheat_open(struct tty_struct *tty, 148static int whiteheat_open(struct tty_struct *tty,
149 struct usb_serial_port *port, struct file *filp); 149 struct usb_serial_port *port, struct file *filp);
150static void whiteheat_close(struct tty_struct *tty, 150static void whiteheat_close(struct usb_serial_port *port);
151 struct usb_serial_port *port, struct file *filp);
152static int whiteheat_write(struct tty_struct *tty, 151static int whiteheat_write(struct tty_struct *tty,
153 struct usb_serial_port *port, 152 struct usb_serial_port *port,
154 const unsigned char *buf, int count); 153 const unsigned char *buf, int count);
@@ -712,8 +711,7 @@ exit:
712} 711}
713 712
714 713
715static void whiteheat_close(struct tty_struct *tty, 714static void whiteheat_close(struct usb_serial_port *port)
716 struct usb_serial_port *port, struct file *filp)
717{ 715{
718 struct whiteheat_private *info = usb_get_serial_port_data(port); 716 struct whiteheat_private *info = usb_get_serial_port_data(port);
719 struct whiteheat_urb_wrap *wrap; 717 struct whiteheat_urb_wrap *wrap;
@@ -723,31 +721,7 @@ static void whiteheat_close(struct tty_struct *tty,
723 721
724 dbg("%s - port %d", __func__, port->number); 722 dbg("%s - port %d", __func__, port->number);
725 723
726 mutex_lock(&port->serial->disc_mutex);
727 /* filp is NULL when called from usb_serial_disconnect */
728 if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) {
729 mutex_unlock(&port->serial->disc_mutex);
730 return;
731 }
732 mutex_unlock(&port->serial->disc_mutex);
733
734 tty->closing = 1;
735
736/*
737 * Not currently in use; tty_wait_until_sent() calls
738 * serial_chars_in_buffer() which deadlocks on the second semaphore
739 * acquisition. This should be fixed at some point. Greg's been
740 * notified.
741 if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) {
742 tty_wait_until_sent(tty, CLOSING_DELAY);
743 }
744*/
745
746 tty_driver_flush_buffer(tty);
747 tty_ldisc_flush(tty);
748
749 firm_report_tx_done(port); 724 firm_report_tx_done(port);
750
751 firm_close(port); 725 firm_close(port);
752 726
753 /* shutdown our bulk reads and writes */ 727 /* shutdown our bulk reads and writes */
@@ -775,10 +749,7 @@ static void whiteheat_close(struct tty_struct *tty,
775 } 749 }
776 spin_unlock_irq(&info->lock); 750 spin_unlock_irq(&info->lock);
777 mutex_unlock(&info->deathwarrant); 751 mutex_unlock(&info->deathwarrant);
778
779 stop_command_port(port->serial); 752 stop_command_port(port->serial);
780
781 tty->closing = 0;
782} 753}
783 754
784 755
diff --git a/fs/buffer.c b/fs/buffer.c
index 49106127a4aa..1864d0b63088 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh)
2935 BUG_ON(!buffer_locked(bh)); 2935 BUG_ON(!buffer_locked(bh));
2936 BUG_ON(!buffer_mapped(bh)); 2936 BUG_ON(!buffer_mapped(bh));
2937 BUG_ON(!bh->b_end_io); 2937 BUG_ON(!bh->b_end_io);
2938 BUG_ON(buffer_delay(bh));
2939 BUG_ON(buffer_unwritten(bh));
2938 2940
2939 /* 2941 /*
2940 * Mask in barrier bit for a write (could be either a WRITE or a 2942 * Mask in barrier bit for a write (could be either a WRITE or a
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index f20c4069c220..b48689839428 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,12 @@
1Version 1.59
2------------
3Client uses server inode numbers (which are persistent) rather than
4client generated ones by default (mount option "serverino" turned
5on by default if server supports it). Add forceuid and forcegid
6mount options (so that when negotiating unix extensions specifying
7which uid mounted does not immediately force the server's reported
8uids to be overridden).
9
1Version 1.58 10Version 1.58
2------------ 11------------
3Guard against buffer overruns in various UCS-2 to UTF-8 string conversions 12Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
@@ -10,6 +19,8 @@ we converted from). Fix endianness of the vcnum field used during
10session setup to distinguish multiple mounts to same server from different 19session setup to distinguish multiple mounts to same server from different
11userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental 20userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
12flag to be set to 2, and mount must enable krb5 to turn on extended security). 21flag to be set to 2, and mount must enable krb5 to turn on extended security).
22Performance of file create to Samba improved (posix create on lookup
23removes 1 of 2 network requests sent on file create)
13 24
14Version 1.57 25Version 1.57
15------------ 26------------
diff --git a/fs/cifs/README b/fs/cifs/README
index db208ddb9899..ad92921dbde4 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -262,7 +262,8 @@ A partial list of the supported mount options follows:
262 mount. 262 mount.
263 domain Set the SMB/CIFS workgroup name prepended to the 263 domain Set the SMB/CIFS workgroup name prepended to the
264 username during CIFS session establishment 264 username during CIFS session establishment
265 uid Set the default uid for inodes. For mounts to servers 265 forceuid Set the default uid for inodes based on the uid
266 passed in. For mounts to servers
266 which do support the CIFS Unix extensions, such as a 267 which do support the CIFS Unix extensions, such as a
267 properly configured Samba server, the server provides 268 properly configured Samba server, the server provides
268 the uid, gid and mode so this parameter should not be 269 the uid, gid and mode so this parameter should not be
@@ -292,6 +293,12 @@ A partial list of the supported mount options follows:
292 the client. Note that the mount.cifs helper must be 293 the client. Note that the mount.cifs helper must be
293 at version 1.10 or higher to support specifying the uid 294 at version 1.10 or higher to support specifying the uid
294 (or gid) in non-numeric form. 295 (or gid) in non-numeric form.
296 forcegid (similar to above but for the groupid instead of uid)
297 uid Set the default uid for inodes, and indicate to the
298 cifs kernel driver which local user mounted . If the server
299 supports the unix extensions the default uid is
300 not used to fill in the owner fields of inodes (files)
301 unless the "forceuid" parameter is specified.
295 gid Set the default gid for inodes (similar to above). 302 gid Set the default gid for inodes (similar to above).
296 file_mode If CIFS Unix extensions are not supported by the server 303 file_mode If CIFS Unix extensions are not supported by the server
297 this overrides the default mode for file inodes. 304 this overrides the default mode for file inodes.
@@ -388,8 +395,13 @@ A partial list of the supported mount options follows:
388 or the CIFS Unix Extensions equivalent and for those 395 or the CIFS Unix Extensions equivalent and for those
389 this mount option will have no effect. Exporting cifs mounts 396 this mount option will have no effect. Exporting cifs mounts
390 under nfsd requires this mount option on the cifs mount. 397 under nfsd requires this mount option on the cifs mount.
398 This is now the default if server supports the
399 required network operation.
391 noserverino Client generates inode numbers (rather than using the actual one 400 noserverino Client generates inode numbers (rather than using the actual one
392 from the server) by default. 401 from the server). These inode numbers will vary after
402 unmount or reboot which can confuse some applications,
403 but not all server filesystems support unique inode
404 numbers.
393 setuids If the CIFS Unix extensions are negotiated with the server 405 setuids If the CIFS Unix extensions are negotiated with the server
394 the client will attempt to set the effective uid and gid of 406 the client will attempt to set the effective uid and gid of
395 the local process on newly created files, directories, and 407 the local process on newly created files, directories, and
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 67bf93a40d2e..4a4581cb2b5e 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -23,6 +23,7 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <keys/user-type.h> 24#include <keys/user-type.h>
25#include <linux/key-type.h> 25#include <linux/key-type.h>
26#include <linux/inet.h>
26#include "cifsglob.h" 27#include "cifsglob.h"
27#include "cifs_spnego.h" 28#include "cifs_spnego.h"
28#include "cifs_debug.h" 29#include "cifs_debug.h"
@@ -73,9 +74,6 @@ struct key_type cifs_spnego_key_type = {
73 * strlen(";sec=ntlmsspi") */ 74 * strlen(";sec=ntlmsspi") */
74#define MAX_MECH_STR_LEN 13 75#define MAX_MECH_STR_LEN 13
75 76
76/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/128 */
77#define MAX_IPV6_ADDR_LEN 43
78
79/* strlen of "host=" */ 77/* strlen of "host=" */
80#define HOST_KEY_LEN 5 78#define HOST_KEY_LEN 5
81 79
@@ -102,7 +100,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
102 host=hostname sec=mechanism uid=0xFF user=username */ 100 host=hostname sec=mechanism uid=0xFF user=username */
103 desc_len = MAX_VER_STR_LEN + 101 desc_len = MAX_VER_STR_LEN +
104 HOST_KEY_LEN + strlen(hostname) + 102 HOST_KEY_LEN + strlen(hostname) +
105 IP_KEY_LEN + MAX_IPV6_ADDR_LEN + 103 IP_KEY_LEN + INET6_ADDRSTRLEN +
106 MAX_MECH_STR_LEN + 104 MAX_MECH_STR_LEN +
107 UID_KEY_LEN + (sizeof(uid_t) * 2) + 105 UID_KEY_LEN + (sizeof(uid_t) * 2) +
108 USER_KEY_LEN + strlen(sesInfo->userName) + 1; 106 USER_KEY_LEN + strlen(sesInfo->userName) + 1;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 57ecdc83c26f..1403b5d86a73 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -552,130 +552,138 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
552 return rc; 552 return rc;
553} 553}
554 554
555 555static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
556/* Retrieve an ACL from the server */ 556 __u16 fid, u32 *pacllen)
557static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
558 const char *path, const __u16 *pfid)
559{ 557{
560 struct cifsFileInfo *open_file = NULL;
561 bool unlock_file = false;
562 int xid;
563 int rc = -EIO;
564 __u16 fid;
565 struct super_block *sb;
566 struct cifs_sb_info *cifs_sb;
567 struct cifs_ntsd *pntsd = NULL; 558 struct cifs_ntsd *pntsd = NULL;
559 int xid, rc;
560
561 xid = GetXid();
562 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
563 FreeXid(xid);
568 564
569 cFYI(1, ("get mode from ACL for %s", path));
570 565
571 if (inode == NULL) 566 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
572 return NULL; 567 return pntsd;
568}
569
570static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
571 const char *path, u32 *pacllen)
572{
573 struct cifs_ntsd *pntsd = NULL;
574 int oplock = 0;
575 int xid, rc;
576 __u16 fid;
573 577
574 xid = GetXid(); 578 xid = GetXid();
575 if (pfid == NULL)
576 open_file = find_readable_file(CIFS_I(inode));
577 else
578 fid = *pfid;
579 579
580 sb = inode->i_sb; 580 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0,
581 if (sb == NULL) { 581 &fid, &oplock, NULL, cifs_sb->local_nls,
582 FreeXid(xid); 582 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
583 return NULL; 583 if (rc) {
584 } 584 cERROR(1, ("Unable to open file to get ACL"));
585 cifs_sb = CIFS_SB(sb); 585 goto out;
586
587 if (open_file) {
588 unlock_file = true;
589 fid = open_file->netfid;
590 } else if (pfid == NULL) {
591 int oplock = 0;
592 /* open file */
593 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
594 READ_CONTROL, 0, &fid, &oplock, NULL,
595 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
596 CIFS_MOUNT_MAP_SPECIAL_CHR);
597 if (rc != 0) {
598 cERROR(1, ("Unable to open file to get ACL"));
599 FreeXid(xid);
600 return NULL;
601 }
602 } 586 }
603 587
604 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); 588 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
605 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); 589 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
606 if (unlock_file == true) /* find_readable_file increments ref count */
607 atomic_dec(&open_file->wrtPending);
608 else if (pfid == NULL) /* if opened above we have to close the handle */
609 CIFSSMBClose(xid, cifs_sb->tcon, fid);
610 /* else handle was passed in by caller */
611 590
591 CIFSSMBClose(xid, cifs_sb->tcon, fid);
592 out:
612 FreeXid(xid); 593 FreeXid(xid);
613 return pntsd; 594 return pntsd;
614} 595}
615 596
616/* Set an ACL on the server */ 597/* Retrieve an ACL from the server */
617static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, 598static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
618 struct inode *inode, const char *path) 599 struct inode *inode, const char *path,
600 u32 *pacllen)
619{ 601{
620 struct cifsFileInfo *open_file; 602 struct cifs_ntsd *pntsd = NULL;
621 bool unlock_file = false; 603 struct cifsFileInfo *open_file = NULL;
622 int xid;
623 int rc = -EIO;
624 __u16 fid;
625 struct super_block *sb;
626 struct cifs_sb_info *cifs_sb;
627 604
628 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); 605 if (inode)
606 open_file = find_readable_file(CIFS_I(inode));
607 if (!open_file)
608 return get_cifs_acl_by_path(cifs_sb, path, pacllen);
629 609
630 if (!inode) 610 pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen);
631 return rc; 611 atomic_dec(&open_file->wrtPending);
612 return pntsd;
613}
632 614
633 sb = inode->i_sb; 615static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid,
634 if (sb == NULL) 616 struct cifs_ntsd *pnntsd, u32 acllen)
635 return rc; 617{
618 int xid, rc;
636 619
637 cifs_sb = CIFS_SB(sb);
638 xid = GetXid(); 620 xid = GetXid();
621 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
622 FreeXid(xid);
639 623
640 open_file = find_readable_file(CIFS_I(inode)); 624 cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
641 if (open_file) { 625 return rc;
642 unlock_file = true; 626}
643 fid = open_file->netfid; 627
644 } else { 628static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
645 int oplock = 0; 629 struct cifs_ntsd *pnntsd, u32 acllen)
646 /* open file */ 630{
647 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, 631 int oplock = 0;
648 WRITE_DAC, 0, &fid, &oplock, NULL, 632 int xid, rc;
649 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 633 __u16 fid;
650 CIFS_MOUNT_MAP_SPECIAL_CHR); 634
651 if (rc != 0) { 635 xid = GetXid();
652 cERROR(1, ("Unable to open file to set ACL")); 636
653 FreeXid(xid); 637 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, WRITE_DAC, 0,
654 return rc; 638 &fid, &oplock, NULL, cifs_sb->local_nls,
655 } 639 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
640 if (rc) {
641 cERROR(1, ("Unable to open file to set ACL"));
642 goto out;
656 } 643 }
657 644
658 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); 645 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
659 cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); 646 cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
660 if (unlock_file)
661 atomic_dec(&open_file->wrtPending);
662 else
663 CIFSSMBClose(xid, cifs_sb->tcon, fid);
664 647
648 CIFSSMBClose(xid, cifs_sb->tcon, fid);
649 out:
665 FreeXid(xid); 650 FreeXid(xid);
651 return rc;
652}
666 653
654/* Set an ACL on the server */
655static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
656 struct inode *inode, const char *path)
657{
658 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
659 struct cifsFileInfo *open_file;
660 int rc;
661
662 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
663
664 open_file = find_readable_file(CIFS_I(inode));
665 if (!open_file)
666 return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
667
668 rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
669 atomic_dec(&open_file->wrtPending);
667 return rc; 670 return rc;
668} 671}
669 672
670/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ 673/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
671void acl_to_uid_mode(struct inode *inode, const char *path, const __u16 *pfid) 674void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
675 const char *path, const __u16 *pfid)
672{ 676{
673 struct cifs_ntsd *pntsd = NULL; 677 struct cifs_ntsd *pntsd = NULL;
674 u32 acllen = 0; 678 u32 acllen = 0;
675 int rc = 0; 679 int rc = 0;
676 680
677 cFYI(DBG2, ("converting ACL to mode for %s", path)); 681 cFYI(DBG2, ("converting ACL to mode for %s", path));
678 pntsd = get_cifs_acl(&acllen, inode, path, pfid); 682
683 if (pfid)
684 pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen);
685 else
686 pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen);
679 687
680 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ 688 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
681 if (pntsd) 689 if (pntsd)
@@ -698,7 +706,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
698 cFYI(DBG2, ("set ACL from mode for %s", path)); 706 cFYI(DBG2, ("set ACL from mode for %s", path));
699 707
700 /* Get the security descriptor */ 708 /* Get the security descriptor */
701 pntsd = get_cifs_acl(&secdesclen, inode, path, NULL); 709 pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
702 710
703 /* Add three ACEs for owner, group, everyone getting rid of 711 /* Add three ACEs for owner, group, everyone getting rid of
704 other ACEs as chmod disables ACEs and set the security descriptor */ 712 other ACEs as chmod disables ACEs and set the security descriptor */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5e6d35804d73..0a10a59b6392 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -146,7 +146,7 @@ cifs_read_super(struct super_block *sb, void *data,
146#endif 146#endif
147 sb->s_blocksize = CIFS_MAX_MSGSIZE; 147 sb->s_blocksize = CIFS_MAX_MSGSIZE;
148 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ 148 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
149 inode = cifs_iget(sb, ROOT_I); 149 inode = cifs_root_iget(sb, ROOT_I);
150 150
151 if (IS_ERR(inode)) { 151 if (IS_ERR(inode)) {
152 rc = PTR_ERR(inode); 152 rc = PTR_ERR(inode);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 051b71cfdea9..9570a0e8023f 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -36,7 +36,7 @@ extern void cifs_read_inode(struct inode *);
36 36
37/* Functions related to inodes */ 37/* Functions related to inodes */
38extern const struct inode_operations cifs_dir_inode_ops; 38extern const struct inode_operations cifs_dir_inode_ops;
39extern struct inode *cifs_iget(struct super_block *, unsigned long); 39extern struct inode *cifs_root_iget(struct super_block *, unsigned long);
40extern int cifs_create(struct inode *, struct dentry *, int, 40extern int cifs_create(struct inode *, struct dentry *, int,
41 struct nameidata *); 41 struct nameidata *);
42extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 42extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
100extern const struct export_operations cifs_export_ops; 100extern const struct export_operations cifs_export_ops;
101#endif /* EXPERIMENTAL */ 101#endif /* EXPERIMENTAL */
102 102
103#define CIFS_VERSION "1.58" 103#define CIFS_VERSION "1.59"
104#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fae083930eee..f9452329bcce 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -90,10 +90,10 @@ extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16,
90 struct cifsTconInfo *); 90 struct cifsTconInfo *);
91extern void DeleteOplockQEntry(struct oplock_q_entry *); 91extern void DeleteOplockQEntry(struct oplock_q_entry *);
92extern void DeleteTconOplockQEntries(struct cifsTconInfo *); 92extern void DeleteTconOplockQEntries(struct cifsTconInfo *);
93extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601); 93extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
94extern u64 cifs_UnixTimeToNT(struct timespec); 94extern u64 cifs_UnixTimeToNT(struct timespec);
95extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); 95extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
96extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); 96 int offset);
97 97
98extern int cifs_posix_open(char *full_path, struct inode **pinode, 98extern int cifs_posix_open(char *full_path, struct inode **pinode,
99 struct super_block *sb, int mode, int oflags, 99 struct super_block *sb, int mode, int oflags,
@@ -108,8 +108,8 @@ extern int cifs_get_inode_info(struct inode **pinode,
108extern int cifs_get_inode_info_unix(struct inode **pinode, 108extern int cifs_get_inode_info_unix(struct inode **pinode,
109 const unsigned char *search_path, 109 const unsigned char *search_path,
110 struct super_block *sb, int xid); 110 struct super_block *sb, int xid);
111extern void acl_to_uid_mode(struct inode *inode, const char *path, 111extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
112 const __u16 *pfid); 112 const char *path, const __u16 *pfid);
113extern int mode_to_acl(struct inode *inode, const char *path, __u64); 113extern int mode_to_acl(struct inode *inode, const char *path, __u64);
114 114
115extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 115extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d06260251c30..b84c61d5bca4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -524,8 +524,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
524 int val, seconds, remain, result; 524 int val, seconds, remain, result;
525 struct timespec ts, utc; 525 struct timespec ts, utc;
526 utc = CURRENT_TIME; 526 utc = CURRENT_TIME;
527 ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date), 527 ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
528 le16_to_cpu(rsp->SrvTime.Time)); 528 rsp->SrvTime.Time, 0);
529 cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d", 529 cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d",
530 (int)ts.tv_sec, (int)utc.tv_sec, 530 (int)ts.tv_sec, (int)utc.tv_sec,
531 (int)(utc.tv_sec - ts.tv_sec))); 531 (int)(utc.tv_sec - ts.tv_sec)));
@@ -2427,8 +2427,7 @@ querySymLinkRetry:
2427 params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; 2427 params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ;
2428 pSMB->TotalDataCount = 0; 2428 pSMB->TotalDataCount = 0;
2429 pSMB->MaxParameterCount = cpu_to_le16(2); 2429 pSMB->MaxParameterCount = cpu_to_le16(2);
2430 /* BB find exact max data count below from sess structure BB */ 2430 pSMB->MaxDataCount = cpu_to_le16(CIFSMaxBufSize);
2431 pSMB->MaxDataCount = cpu_to_le16(4000);
2432 pSMB->MaxSetupCount = 0; 2431 pSMB->MaxSetupCount = 0;
2433 pSMB->Reserved = 0; 2432 pSMB->Reserved = 0;
2434 pSMB->Flags = 0; 2433 pSMB->Flags = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4aa81a507b74..97f4311b9a8e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -35,6 +35,7 @@
35#include <linux/namei.h> 35#include <linux/namei.h>
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include <asm/processor.h> 37#include <asm/processor.h>
38#include <linux/inet.h>
38#include <net/ipv6.h> 39#include <net/ipv6.h>
39#include "cifspdu.h" 40#include "cifspdu.h"
40#include "cifsglob.h" 41#include "cifsglob.h"
@@ -61,7 +62,6 @@ struct smb_vol {
61 char *domainname; 62 char *domainname;
62 char *UNC; 63 char *UNC;
63 char *UNCip; 64 char *UNCip;
64 char *in6_addr; /* ipv6 address as human readable form of in6_addr */
65 char *iocharset; /* local code page for mapping to and from Unicode */ 65 char *iocharset; /* local code page for mapping to and from Unicode */
66 char source_rfc1001_name[16]; /* netbios name of client */ 66 char source_rfc1001_name[16]; /* netbios name of client */
67 char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ 67 char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
@@ -827,14 +827,16 @@ cifs_parse_mount_options(char *options, const char *devname,
827 vol->target_rfc1001_name[0] = 0; 827 vol->target_rfc1001_name[0] = 0;
828 vol->linux_uid = current_uid(); /* use current_euid() instead? */ 828 vol->linux_uid = current_uid(); /* use current_euid() instead? */
829 vol->linux_gid = current_gid(); 829 vol->linux_gid = current_gid();
830 vol->dir_mode = S_IRWXUGO; 830
831 /* 2767 perms indicate mandatory locking support */ 831 /* default to only allowing write access to owner of the mount */
832 vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP); 832 vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
833 833
834 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ 834 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
835 vol->rw = true; 835 vol->rw = true;
836 /* default is always to request posix paths. */ 836 /* default is always to request posix paths. */
837 vol->posix_paths = 1; 837 vol->posix_paths = 1;
838 /* default to using server inode numbers where available */
839 vol->server_ino = 1;
838 840
839 if (!options) 841 if (!options)
840 return 1; 842 return 1;
@@ -955,10 +957,12 @@ cifs_parse_mount_options(char *options, const char *devname,
955 } 957 }
956 strcpy(vol->password, value); 958 strcpy(vol->password, value);
957 } 959 }
958 } else if (strnicmp(data, "ip", 2) == 0) { 960 } else if (!strnicmp(data, "ip", 2) ||
961 !strnicmp(data, "addr", 4)) {
959 if (!value || !*value) { 962 if (!value || !*value) {
960 vol->UNCip = NULL; 963 vol->UNCip = NULL;
961 } else if (strnlen(value, 35) < 35) { 964 } else if (strnlen(value, INET6_ADDRSTRLEN) <
965 INET6_ADDRSTRLEN) {
962 vol->UNCip = value; 966 vol->UNCip = value;
963 } else { 967 } else {
964 printk(KERN_WARNING "CIFS: ip address " 968 printk(KERN_WARNING "CIFS: ip address "
@@ -1092,17 +1096,17 @@ cifs_parse_mount_options(char *options, const char *devname,
1092 return 1; 1096 return 1;
1093 } 1097 }
1094 } else if (strnicmp(data, "uid", 3) == 0) { 1098 } else if (strnicmp(data, "uid", 3) == 0) {
1095 if (value && *value) { 1099 if (value && *value)
1096 vol->linux_uid = 1100 vol->linux_uid =
1097 simple_strtoul(value, &value, 0); 1101 simple_strtoul(value, &value, 0);
1102 } else if (strnicmp(data, "forceuid", 8) == 0) {
1098 vol->override_uid = 1; 1103 vol->override_uid = 1;
1099 }
1100 } else if (strnicmp(data, "gid", 3) == 0) { 1104 } else if (strnicmp(data, "gid", 3) == 0) {
1101 if (value && *value) { 1105 if (value && *value)
1102 vol->linux_gid = 1106 vol->linux_gid =
1103 simple_strtoul(value, &value, 0); 1107 simple_strtoul(value, &value, 0);
1108 } else if (strnicmp(data, "forcegid", 8) == 0) {
1104 vol->override_gid = 1; 1109 vol->override_gid = 1;
1105 }
1106 } else if (strnicmp(data, "file_mode", 4) == 0) { 1110 } else if (strnicmp(data, "file_mode", 4) == 0) {
1107 if (value && *value) { 1111 if (value && *value) {
1108 vol->file_mode = 1112 vol->file_mode =
@@ -1315,16 +1319,6 @@ cifs_parse_mount_options(char *options, const char *devname,
1315 vol->direct_io = 1; 1319 vol->direct_io = 1;
1316 } else if (strnicmp(data, "forcedirectio", 13) == 0) { 1320 } else if (strnicmp(data, "forcedirectio", 13) == 0) {
1317 vol->direct_io = 1; 1321 vol->direct_io = 1;
1318 } else if (strnicmp(data, "in6_addr", 8) == 0) {
1319 if (!value || !*value) {
1320 vol->in6_addr = NULL;
1321 } else if (strnlen(value, 49) == 48) {
1322 vol->in6_addr = value;
1323 } else {
1324 printk(KERN_WARNING "CIFS: ip v6 address not "
1325 "48 characters long\n");
1326 return 1;
1327 }
1328 } else if (strnicmp(data, "noac", 4) == 0) { 1322 } else if (strnicmp(data, "noac", 4) == 0) {
1329 printk(KERN_WARNING "CIFS: Mount option noac not " 1323 printk(KERN_WARNING "CIFS: Mount option noac not "
1330 "supported. Instead set " 1324 "supported. Instead set "
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 302ea15f02e6..06866841b97f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -241,7 +241,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
241 /* BB need same check in cifs_create too? */ 241 /* BB need same check in cifs_create too? */
242 /* if not oplocked, invalidate inode pages if mtime or file 242 /* if not oplocked, invalidate inode pages if mtime or file
243 size changed */ 243 size changed */
244 temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime)); 244 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
245 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && 245 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
246 (file->f_path.dentry->d_inode->i_size == 246 (file->f_path.dentry->d_inode->i_size ==
247 (loff_t)le64_to_cpu(buf->EndOfFile))) { 247 (loff_t)le64_to_cpu(buf->EndOfFile))) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9c869a6dcba1..fad882b075ba 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -85,10 +85,10 @@ static void cifs_unix_info_to_inode(struct inode *inode,
85 __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes); 85 __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes);
86 __u64 end_of_file = le64_to_cpu(info->EndOfFile); 86 __u64 end_of_file = le64_to_cpu(info->EndOfFile);
87 87
88 inode->i_atime = cifs_NTtimeToUnix(le64_to_cpu(info->LastAccessTime)); 88 inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime);
89 inode->i_mtime = 89 inode->i_mtime =
90 cifs_NTtimeToUnix(le64_to_cpu(info->LastModificationTime)); 90 cifs_NTtimeToUnix(info->LastModificationTime);
91 inode->i_ctime = cifs_NTtimeToUnix(le64_to_cpu(info->LastStatusChange)); 91 inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
92 inode->i_mode = le64_to_cpu(info->Permissions); 92 inode->i_mode = le64_to_cpu(info->Permissions);
93 93
94 /* 94 /*
@@ -554,14 +554,11 @@ int cifs_get_inode_info(struct inode **pinode,
554 554
555 /* Linux can not store file creation time so ignore it */ 555 /* Linux can not store file creation time so ignore it */
556 if (pfindData->LastAccessTime) 556 if (pfindData->LastAccessTime)
557 inode->i_atime = cifs_NTtimeToUnix 557 inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime);
558 (le64_to_cpu(pfindData->LastAccessTime));
559 else /* do not need to use current_fs_time - time not stored */ 558 else /* do not need to use current_fs_time - time not stored */
560 inode->i_atime = CURRENT_TIME; 559 inode->i_atime = CURRENT_TIME;
561 inode->i_mtime = 560 inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime);
562 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); 561 inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime);
563 inode->i_ctime =
564 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
565 cFYI(DBG2, ("Attributes came in as 0x%x", attr)); 562 cFYI(DBG2, ("Attributes came in as 0x%x", attr));
566 if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { 563 if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
567 inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; 564 inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
@@ -629,7 +626,7 @@ int cifs_get_inode_info(struct inode **pinode,
629 /* fill in 0777 bits from ACL */ 626 /* fill in 0777 bits from ACL */
630 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 627 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
631 cFYI(1, ("Getting mode bits from ACL")); 628 cFYI(1, ("Getting mode bits from ACL"));
632 acl_to_uid_mode(inode, full_path, pfid); 629 acl_to_uid_mode(cifs_sb, inode, full_path, pfid);
633 } 630 }
634#endif 631#endif
635 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { 632 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
@@ -699,7 +696,7 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
699} 696}
700 697
701/* gets root inode */ 698/* gets root inode */
702struct inode *cifs_iget(struct super_block *sb, unsigned long ino) 699struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
703{ 700{
704 int xid; 701 int xid;
705 struct cifs_sb_info *cifs_sb; 702 struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index e2fe998989a3..32d6baa0a54f 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -853,12 +853,12 @@ smbCalcSize_LE(struct smb_hdr *ptr)
853 853
854#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) 854#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
855 855
856 /* 856/*
857 * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units) 857 * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
858 * into Unix UTC (based 1970-01-01, in seconds). 858 * into Unix UTC (based 1970-01-01, in seconds).
859 */ 859 */
860struct timespec 860struct timespec
861cifs_NTtimeToUnix(u64 ntutc) 861cifs_NTtimeToUnix(__le64 ntutc)
862{ 862{
863 struct timespec ts; 863 struct timespec ts;
864 /* BB what about the timezone? BB */ 864 /* BB what about the timezone? BB */
@@ -866,7 +866,7 @@ cifs_NTtimeToUnix(u64 ntutc)
866 /* Subtract the NTFS time offset, then convert to 1s intervals. */ 866 /* Subtract the NTFS time offset, then convert to 1s intervals. */
867 u64 t; 867 u64 t;
868 868
869 t = ntutc - NTFS_TIME_OFFSET; 869 t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET;
870 ts.tv_nsec = do_div(t, 10000000) * 100; 870 ts.tv_nsec = do_div(t, 10000000) * 100;
871 ts.tv_sec = t; 871 ts.tv_sec = t;
872 return ts; 872 return ts;
@@ -883,16 +883,12 @@ cifs_UnixTimeToNT(struct timespec t)
883static int total_days_of_prev_months[] = 883static int total_days_of_prev_months[] =
884{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}; 884{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
885 885
886 886struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
887__le64 cnvrtDosCifsTm(__u16 date, __u16 time)
888{
889 return cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(date, time)));
890}
891
892struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
893{ 887{
894 struct timespec ts; 888 struct timespec ts;
895 int sec, min, days, month, year; 889 int sec, min, days, month, year;
890 u16 date = le16_to_cpu(le_date);
891 u16 time = le16_to_cpu(le_time);
896 SMB_TIME *st = (SMB_TIME *)&time; 892 SMB_TIME *st = (SMB_TIME *)&time;
897 SMB_DATE *sd = (SMB_DATE *)&date; 893 SMB_DATE *sd = (SMB_DATE *)&date;
898 894
@@ -933,7 +929,7 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
933 days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0); 929 days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0);
934 sec += 24 * 60 * 60 * days; 930 sec += 24 * 60 * 60 * days;
935 931
936 ts.tv_sec = sec; 932 ts.tv_sec = sec + offset;
937 933
938 /* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */ 934 /* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */
939 935
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 964e097c8203..86d0055dc529 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -115,17 +115,6 @@ construct_dentry(struct qstr *qstring, struct file *file,
115 return rc; 115 return rc;
116} 116}
117 117
118static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode)
119{
120 if ((tcon) && (tcon->ses) && (tcon->ses->server)) {
121 inode->i_ctime.tv_sec += tcon->ses->server->timeAdj;
122 inode->i_mtime.tv_sec += tcon->ses->server->timeAdj;
123 inode->i_atime.tv_sec += tcon->ses->server->timeAdj;
124 }
125 return;
126}
127
128
129static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, 118static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
130 char *buf, unsigned int *pobject_type, int isNewInode) 119 char *buf, unsigned int *pobject_type, int isNewInode)
131{ 120{
@@ -150,26 +139,25 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
150 allocation_size = le64_to_cpu(pfindData->AllocationSize); 139 allocation_size = le64_to_cpu(pfindData->AllocationSize);
151 end_of_file = le64_to_cpu(pfindData->EndOfFile); 140 end_of_file = le64_to_cpu(pfindData->EndOfFile);
152 tmp_inode->i_atime = 141 tmp_inode->i_atime =
153 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); 142 cifs_NTtimeToUnix(pfindData->LastAccessTime);
154 tmp_inode->i_mtime = 143 tmp_inode->i_mtime =
155 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); 144 cifs_NTtimeToUnix(pfindData->LastWriteTime);
156 tmp_inode->i_ctime = 145 tmp_inode->i_ctime =
157 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); 146 cifs_NTtimeToUnix(pfindData->ChangeTime);
158 } else { /* legacy, OS2 and DOS style */ 147 } else { /* legacy, OS2 and DOS style */
159/* struct timespec ts;*/ 148 int offset = cifs_sb->tcon->ses->server->timeAdj;
160 FIND_FILE_STANDARD_INFO *pfindData = 149 FIND_FILE_STANDARD_INFO *pfindData =
161 (FIND_FILE_STANDARD_INFO *)buf; 150 (FIND_FILE_STANDARD_INFO *)buf;
162 151
163 tmp_inode->i_mtime = cnvrtDosUnixTm( 152 tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate,
164 le16_to_cpu(pfindData->LastWriteDate), 153 pfindData->LastWriteTime,
165 le16_to_cpu(pfindData->LastWriteTime)); 154 offset);
166 tmp_inode->i_atime = cnvrtDosUnixTm( 155 tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate,
167 le16_to_cpu(pfindData->LastAccessDate), 156 pfindData->LastAccessTime,
168 le16_to_cpu(pfindData->LastAccessTime)); 157 offset);
169 tmp_inode->i_ctime = cnvrtDosUnixTm( 158 tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate,
170 le16_to_cpu(pfindData->LastWriteDate), 159 pfindData->LastWriteTime,
171 le16_to_cpu(pfindData->LastWriteTime)); 160 offset);
172 AdjustForTZ(cifs_sb->tcon, tmp_inode);
173 attr = le16_to_cpu(pfindData->Attributes); 161 attr = le16_to_cpu(pfindData->Attributes);
174 allocation_size = le32_to_cpu(pfindData->AllocationSize); 162 allocation_size = le32_to_cpu(pfindData->AllocationSize);
175 end_of_file = le32_to_cpu(pfindData->DataSize); 163 end_of_file = le32_to_cpu(pfindData->DataSize);
@@ -331,11 +319,11 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
331 local_size = tmp_inode->i_size; 319 local_size = tmp_inode->i_size;
332 320
333 tmp_inode->i_atime = 321 tmp_inode->i_atime =
334 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); 322 cifs_NTtimeToUnix(pfindData->LastAccessTime);
335 tmp_inode->i_mtime = 323 tmp_inode->i_mtime =
336 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastModificationTime)); 324 cifs_NTtimeToUnix(pfindData->LastModificationTime);
337 tmp_inode->i_ctime = 325 tmp_inode->i_ctime =
338 cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastStatusChange)); 326 cifs_NTtimeToUnix(pfindData->LastStatusChange);
339 327
340 tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions); 328 tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions);
341 /* since we set the inode type below we need to mask off type 329 /* since we set the inode type below we need to mask off type
diff --git a/fs/compat.c b/fs/compat.c
index 681ed81e6be0..bb2a9b2e8173 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1488,7 +1488,7 @@ int compat_do_execve(char * filename,
1488 if (!bprm) 1488 if (!bprm)
1489 goto out_files; 1489 goto out_files;
1490 1490
1491 retval = mutex_lock_interruptible(&current->cred_exec_mutex); 1491 retval = mutex_lock_interruptible(&current->cred_guard_mutex);
1492 if (retval < 0) 1492 if (retval < 0)
1493 goto out_free; 1493 goto out_free;
1494 current->in_execve = 1; 1494 current->in_execve = 1;
@@ -1550,7 +1550,7 @@ int compat_do_execve(char * filename,
1550 /* execve succeeded */ 1550 /* execve succeeded */
1551 current->fs->in_exec = 0; 1551 current->fs->in_exec = 0;
1552 current->in_execve = 0; 1552 current->in_execve = 0;
1553 mutex_unlock(&current->cred_exec_mutex); 1553 mutex_unlock(&current->cred_guard_mutex);
1554 acct_update_integrals(current); 1554 acct_update_integrals(current);
1555 free_bprm(bprm); 1555 free_bprm(bprm);
1556 if (displaced) 1556 if (displaced)
@@ -1573,7 +1573,7 @@ out_unmark:
1573 1573
1574out_unlock: 1574out_unlock:
1575 current->in_execve = 0; 1575 current->in_execve = 0;
1576 mutex_unlock(&current->cred_exec_mutex); 1576 mutex_unlock(&current->cred_guard_mutex);
1577 1577
1578out_free: 1578out_free:
1579 free_bprm(bprm); 1579 free_bprm(bprm);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c68edb969441..9b1d285f9fe6 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -557,8 +557,10 @@ static int __init init_devpts_fs(void)
557 int err = register_filesystem(&devpts_fs_type); 557 int err = register_filesystem(&devpts_fs_type);
558 if (!err) { 558 if (!err) {
559 devpts_mnt = kern_mount(&devpts_fs_type); 559 devpts_mnt = kern_mount(&devpts_fs_type);
560 if (IS_ERR(devpts_mnt)) 560 if (IS_ERR(devpts_mnt)) {
561 err = PTR_ERR(devpts_mnt); 561 err = PTR_ERR(devpts_mnt);
562 unregister_filesystem(&devpts_fs_type);
563 }
562 } 564 }
563 return err; 565 return err;
564} 566}
diff --git a/fs/exec.c b/fs/exec.c
index 895823d0149d..a7fcd975c6b2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1016,7 +1016,7 @@ void install_exec_creds(struct linux_binprm *bprm)
1016 commit_creds(bprm->cred); 1016 commit_creds(bprm->cred);
1017 bprm->cred = NULL; 1017 bprm->cred = NULL;
1018 1018
1019 /* cred_exec_mutex must be held at least to this point to prevent 1019 /* cred_guard_mutex must be held at least to this point to prevent
1020 * ptrace_attach() from altering our determination of the task's 1020 * ptrace_attach() from altering our determination of the task's
1021 * credentials; any time after this it may be unlocked */ 1021 * credentials; any time after this it may be unlocked */
1022 1022
@@ -1026,7 +1026,7 @@ EXPORT_SYMBOL(install_exec_creds);
1026 1026
1027/* 1027/*
1028 * determine how safe it is to execute the proposed program 1028 * determine how safe it is to execute the proposed program
1029 * - the caller must hold current->cred_exec_mutex to protect against 1029 * - the caller must hold current->cred_guard_mutex to protect against
1030 * PTRACE_ATTACH 1030 * PTRACE_ATTACH
1031 */ 1031 */
1032int check_unsafe_exec(struct linux_binprm *bprm) 1032int check_unsafe_exec(struct linux_binprm *bprm)
@@ -1268,7 +1268,7 @@ int do_execve(char * filename,
1268 if (!bprm) 1268 if (!bprm)
1269 goto out_files; 1269 goto out_files;
1270 1270
1271 retval = mutex_lock_interruptible(&current->cred_exec_mutex); 1271 retval = mutex_lock_interruptible(&current->cred_guard_mutex);
1272 if (retval < 0) 1272 if (retval < 0)
1273 goto out_free; 1273 goto out_free;
1274 current->in_execve = 1; 1274 current->in_execve = 1;
@@ -1331,7 +1331,7 @@ int do_execve(char * filename,
1331 /* execve succeeded */ 1331 /* execve succeeded */
1332 current->fs->in_exec = 0; 1332 current->fs->in_exec = 0;
1333 current->in_execve = 0; 1333 current->in_execve = 0;
1334 mutex_unlock(&current->cred_exec_mutex); 1334 mutex_unlock(&current->cred_guard_mutex);
1335 acct_update_integrals(current); 1335 acct_update_integrals(current);
1336 free_bprm(bprm); 1336 free_bprm(bprm);
1337 if (displaced) 1337 if (displaced)
@@ -1354,7 +1354,7 @@ out_unmark:
1354 1354
1355out_unlock: 1355out_unlock:
1356 current->in_execve = 0; 1356 current->in_execve = 0;
1357 mutex_unlock(&current->cred_exec_mutex); 1357 mutex_unlock(&current->cred_guard_mutex);
1358 1358
1359out_free: 1359out_free:
1360 free_bprm(bprm); 1360 free_bprm(bprm);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 5c4afe652245..e3c748faf2db 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1093,6 +1093,7 @@ failed_mount:
1093 brelse(bh); 1093 brelse(bh);
1094failed_sbi: 1094failed_sbi:
1095 sb->s_fs_info = NULL; 1095 sb->s_fs_info = NULL;
1096 kfree(sbi->s_blockgroup_lock);
1096 kfree(sbi); 1097 kfree(sbi);
1097 return ret; 1098 return ret;
1098} 1099}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 599dbfe504c3..d8b73d4abe3e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2021,6 +2021,7 @@ failed_mount:
2021 brelse(bh); 2021 brelse(bh);
2022out_fail: 2022out_fail:
2023 sb->s_fs_info = NULL; 2023 sb->s_fs_info = NULL;
2024 kfree(sbi->s_blockgroup_lock);
2024 kfree(sbi); 2025 kfree(sbi);
2025 lock_kernel(); 2026 lock_kernel();
2026 return ret; 2027 return ret;
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a8ff003a00f7..8a34710ecf40 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,8 +5,8 @@
5obj-$(CONFIG_EXT4_FS) += ext4.o 5obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o
10 10
11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 53c72ad85877..e2126d70dff5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -19,7 +19,6 @@
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include "ext4.h" 20#include "ext4.h"
21#include "ext4_jbd2.h" 21#include "ext4_jbd2.h"
22#include "group.h"
23#include "mballoc.h" 22#include "mballoc.h"
24 23
25/* 24/*
@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
88 ext4_group_t block_group, struct ext4_group_desc *gdp) 87 ext4_group_t block_group, struct ext4_group_desc *gdp)
89{ 88{
90 int bit, bit_max; 89 int bit, bit_max;
90 ext4_group_t ngroups = ext4_get_groups_count(sb);
91 unsigned free_blocks, group_blocks; 91 unsigned free_blocks, group_blocks;
92 struct ext4_sb_info *sbi = EXT4_SB(sb); 92 struct ext4_sb_info *sbi = EXT4_SB(sb);
93 93
@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
123 bit_max += ext4_bg_num_gdb(sb, block_group); 123 bit_max += ext4_bg_num_gdb(sb, block_group);
124 } 124 }
125 125
126 if (block_group == sbi->s_groups_count - 1) { 126 if (block_group == ngroups - 1) {
127 /* 127 /*
128 * Even though mke2fs always initialize first and last group 128 * Even though mke2fs always initialize first and last group
129 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 129 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
131 */ 131 */
132 group_blocks = ext4_blocks_count(sbi->s_es) - 132 group_blocks = ext4_blocks_count(sbi->s_es) -
133 le32_to_cpu(sbi->s_es->s_first_data_block) - 133 le32_to_cpu(sbi->s_es->s_first_data_block) -
134 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); 134 (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
135 } else { 135 } else {
136 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 136 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
137 } 137 }
@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
205{ 205{
206 unsigned int group_desc; 206 unsigned int group_desc;
207 unsigned int offset; 207 unsigned int offset;
208 ext4_group_t ngroups = ext4_get_groups_count(sb);
208 struct ext4_group_desc *desc; 209 struct ext4_group_desc *desc;
209 struct ext4_sb_info *sbi = EXT4_SB(sb); 210 struct ext4_sb_info *sbi = EXT4_SB(sb);
210 211
211 if (block_group >= sbi->s_groups_count) { 212 if (block_group >= ngroups) {
212 ext4_error(sb, "ext4_get_group_desc", 213 ext4_error(sb, "ext4_get_group_desc",
213 "block_group >= groups_count - " 214 "block_group >= groups_count - "
214 "block_group = %u, groups_count = %u", 215 "block_group = %u, groups_count = %u",
215 block_group, sbi->s_groups_count); 216 block_group, ngroups);
216 217
217 return NULL; 218 return NULL;
218 } 219 }
219 smp_rmb();
220 220
221 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 221 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
222 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 222 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
326 unlock_buffer(bh); 326 unlock_buffer(bh);
327 return bh; 327 return bh;
328 } 328 }
329 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 329 ext4_lock_group(sb, block_group);
330 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 330 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
331 ext4_init_block_bitmap(sb, bh, block_group, desc); 331 ext4_init_block_bitmap(sb, bh, block_group, desc);
332 set_bitmap_uptodate(bh); 332 set_bitmap_uptodate(bh);
333 set_buffer_uptodate(bh); 333 set_buffer_uptodate(bh);
334 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 334 ext4_unlock_group(sb, block_group);
335 unlock_buffer(bh); 335 unlock_buffer(bh);
336 return bh; 336 return bh;
337 } 337 }
338 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 338 ext4_unlock_group(sb, block_group);
339 if (buffer_uptodate(bh)) { 339 if (buffer_uptodate(bh)) {
340 /* 340 /*
341 * if not uninit if bh is uptodate, 341 * if not uninit if bh is uptodate,
@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
451 down_write(&grp->alloc_sem); 451 down_write(&grp->alloc_sem);
452 for (i = 0, blocks_freed = 0; i < count; i++) { 452 for (i = 0, blocks_freed = 0; i < count; i++) {
453 BUFFER_TRACE(bitmap_bh, "clear bit"); 453 BUFFER_TRACE(bitmap_bh, "clear bit");
454 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 454 if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
455 bit + i, bitmap_bh->b_data)) { 455 bit + i, bitmap_bh->b_data)) {
456 ext4_error(sb, __func__, 456 ext4_error(sb, __func__,
457 "bit already cleared for block %llu", 457 "bit already cleared for block %llu",
@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
461 blocks_freed++; 461 blocks_freed++;
462 } 462 }
463 } 463 }
464 spin_lock(sb_bgl_lock(sbi, block_group)); 464 ext4_lock_group(sb, block_group);
465 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); 465 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
466 ext4_free_blks_set(sb, desc, blk_free_count); 466 ext4_free_blks_set(sb, desc, blk_free_count);
467 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 467 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
468 spin_unlock(sb_bgl_lock(sbi, block_group)); 468 ext4_unlock_group(sb, block_group);
469 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); 469 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
470 470
471 if (sbi->s_log_groups_per_flex) { 471 if (sbi->s_log_groups_per_flex) {
@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
665 ext4_fsblk_t desc_count; 665 ext4_fsblk_t desc_count;
666 struct ext4_group_desc *gdp; 666 struct ext4_group_desc *gdp;
667 ext4_group_t i; 667 ext4_group_t i;
668 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 668 ext4_group_t ngroups = ext4_get_groups_count(sb);
669#ifdef EXT4FS_DEBUG 669#ifdef EXT4FS_DEBUG
670 struct ext4_super_block *es; 670 struct ext4_super_block *es;
671 ext4_fsblk_t bitmap_count; 671 ext4_fsblk_t bitmap_count;
@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
677 bitmap_count = 0; 677 bitmap_count = 0;
678 gdp = NULL; 678 gdp = NULL;
679 679
680 smp_rmb();
681 for (i = 0; i < ngroups; i++) { 680 for (i = 0; i < ngroups; i++) {
682 gdp = ext4_get_group_desc(sb, i, NULL); 681 gdp = ext4_get_group_desc(sb, i, NULL);
683 if (!gdp) 682 if (!gdp)
@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
700 return bitmap_count; 699 return bitmap_count;
701#else 700#else
702 desc_count = 0; 701 desc_count = 0;
703 smp_rmb();
704 for (i = 0; i < ngroups; i++) { 702 for (i = 0; i < ngroups; i++) {
705 gdp = ext4_get_group_desc(sb, i, NULL); 703 gdp = ext4_get_group_desc(sb, i, NULL);
706 if (!gdp) 704 if (!gdp)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
new file mode 100644
index 000000000000..50784ef07563
--- /dev/null
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
1/*
2 * linux/fs/ext4/block_validity.c
3 *
4 * Copyright (C) 2009
5 * Theodore Ts'o (tytso@mit.edu)
6 *
7 * Track which blocks in the filesystem are metadata blocks that
8 * should never be used as data blocks by files or directories.
9 */
10
11#include <linux/time.h>
12#include <linux/fs.h>
13#include <linux/namei.h>
14#include <linux/quotaops.h>
15#include <linux/buffer_head.h>
16#include <linux/module.h>
17#include <linux/swap.h>
18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h>
21#include <linux/mutex.h>
22#include "ext4.h"
23
24struct ext4_system_zone {
25 struct rb_node node;
26 ext4_fsblk_t start_blk;
27 unsigned int count;
28};
29
30static struct kmem_cache *ext4_system_zone_cachep;
31
32int __init init_ext4_system_zone(void)
33{
34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
35 SLAB_RECLAIM_ACCOUNT);
36 if (ext4_system_zone_cachep == NULL)
37 return -ENOMEM;
38 return 0;
39}
40
41void exit_ext4_system_zone(void)
42{
43 kmem_cache_destroy(ext4_system_zone_cachep);
44}
45
46static inline int can_merge(struct ext4_system_zone *entry1,
47 struct ext4_system_zone *entry2)
48{
49 if ((entry1->start_blk + entry1->count) == entry2->start_blk)
50 return 1;
51 return 0;
52}
53
54/*
55 * Mark a range of blocks as belonging to the "system zone" --- that
56 * is, filesystem metadata blocks which should never be used by
57 * inodes.
58 */
59static int add_system_zone(struct ext4_sb_info *sbi,
60 ext4_fsblk_t start_blk,
61 unsigned int count)
62{
63 struct ext4_system_zone *new_entry = NULL, *entry;
64 struct rb_node **n = &sbi->system_blks.rb_node, *node;
65 struct rb_node *parent = NULL, *new_node = NULL;
66
67 while (*n) {
68 parent = *n;
69 entry = rb_entry(parent, struct ext4_system_zone, node);
70 if (start_blk < entry->start_blk)
71 n = &(*n)->rb_left;
72 else if (start_blk >= (entry->start_blk + entry->count))
73 n = &(*n)->rb_right;
74 else {
75 if (start_blk + count > (entry->start_blk +
76 entry->count))
77 entry->count = (start_blk + count -
78 entry->start_blk);
79 new_node = *n;
80 new_entry = rb_entry(new_node, struct ext4_system_zone,
81 node);
82 break;
83 }
84 }
85
86 if (!new_entry) {
87 new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
88 GFP_KERNEL);
89 if (!new_entry)
90 return -ENOMEM;
91 new_entry->start_blk = start_blk;
92 new_entry->count = count;
93 new_node = &new_entry->node;
94
95 rb_link_node(new_node, parent, n);
96 rb_insert_color(new_node, &sbi->system_blks);
97 }
98
99 /* Can we merge to the left? */
100 node = rb_prev(new_node);
101 if (node) {
102 entry = rb_entry(node, struct ext4_system_zone, node);
103 if (can_merge(entry, new_entry)) {
104 new_entry->start_blk = entry->start_blk;
105 new_entry->count += entry->count;
106 rb_erase(node, &sbi->system_blks);
107 kmem_cache_free(ext4_system_zone_cachep, entry);
108 }
109 }
110
111 /* Can we merge to the right? */
112 node = rb_next(new_node);
113 if (node) {
114 entry = rb_entry(node, struct ext4_system_zone, node);
115 if (can_merge(new_entry, entry)) {
116 new_entry->count += entry->count;
117 rb_erase(node, &sbi->system_blks);
118 kmem_cache_free(ext4_system_zone_cachep, entry);
119 }
120 }
121 return 0;
122}
123
124static void debug_print_tree(struct ext4_sb_info *sbi)
125{
126 struct rb_node *node;
127 struct ext4_system_zone *entry;
128 int first = 1;
129
130 printk(KERN_INFO "System zones: ");
131 node = rb_first(&sbi->system_blks);
132 while (node) {
133 entry = rb_entry(node, struct ext4_system_zone, node);
134 printk("%s%llu-%llu", first ? "" : ", ",
135 entry->start_blk, entry->start_blk + entry->count - 1);
136 first = 0;
137 node = rb_next(node);
138 }
139 printk("\n");
140}
141
142int ext4_setup_system_zone(struct super_block *sb)
143{
144 ext4_group_t ngroups = ext4_get_groups_count(sb);
145 struct ext4_sb_info *sbi = EXT4_SB(sb);
146 struct ext4_group_desc *gdp;
147 ext4_group_t i;
148 int flex_size = ext4_flex_bg_size(sbi);
149 int ret;
150
151 if (!test_opt(sb, BLOCK_VALIDITY)) {
152 if (EXT4_SB(sb)->system_blks.rb_node)
153 ext4_release_system_zone(sb);
154 return 0;
155 }
156 if (EXT4_SB(sb)->system_blks.rb_node)
157 return 0;
158
159 for (i=0; i < ngroups; i++) {
160 if (ext4_bg_has_super(sb, i) &&
161 ((i < 5) || ((i % flex_size) == 0)))
162 add_system_zone(sbi, ext4_group_first_block_no(sb, i),
163 sbi->s_gdb_count + 1);
164 gdp = ext4_get_group_desc(sb, i, NULL);
165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
166 if (ret)
167 return ret;
168 ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
169 if (ret)
170 return ret;
171 ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
172 sbi->s_itb_per_group);
173 if (ret)
174 return ret;
175 }
176
177 if (test_opt(sb, DEBUG))
178 debug_print_tree(EXT4_SB(sb));
179 return 0;
180}
181
182/* Called when the filesystem is unmounted */
183void ext4_release_system_zone(struct super_block *sb)
184{
185 struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node;
186 struct rb_node *parent;
187 struct ext4_system_zone *entry;
188
189 while (n) {
190 /* Do the node's children first */
191 if (n->rb_left) {
192 n = n->rb_left;
193 continue;
194 }
195 if (n->rb_right) {
196 n = n->rb_right;
197 continue;
198 }
199 /*
200 * The node has no children; free it, and then zero
201 * out parent's link to it. Finally go to the
202 * beginning of the loop and try to free the parent
203 * node.
204 */
205 parent = rb_parent(n);
206 entry = rb_entry(n, struct ext4_system_zone, node);
207 kmem_cache_free(ext4_system_zone_cachep, entry);
208 if (!parent)
209 EXT4_SB(sb)->system_blks.rb_node = NULL;
210 else if (parent->rb_left == n)
211 parent->rb_left = NULL;
212 else if (parent->rb_right == n)
213 parent->rb_right = NULL;
214 n = parent;
215 }
216 EXT4_SB(sb)->system_blks.rb_node = NULL;
217}
218
219/*
220 * Returns 1 if the passed-in block region (start_blk,
221 * start_blk+count) is valid; 0 if some part of the block region
222 * overlaps with filesystem metadata blocks.
223 */
224int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
225 unsigned int count)
226{
227 struct ext4_system_zone *entry;
228 struct rb_node *n = sbi->system_blks.rb_node;
229
230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
231 (start_blk + count > ext4_blocks_count(sbi->s_es)))
232 return 0;
233 while (n) {
234 entry = rb_entry(n, struct ext4_system_zone, node);
235 if (start_blk + count - 1 < entry->start_blk)
236 n = n->rb_left;
237 else if (start_blk >= (entry->start_blk + entry->count))
238 n = n->rb_right;
239 else
240 return 0;
241 }
242 return 1;
243}
244
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b64789929a65..9dc93168e262 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp,
131 struct buffer_head *bh = NULL; 131 struct buffer_head *bh = NULL;
132 132
133 map_bh.b_state = 0; 133 map_bh.b_state = 0;
134 err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 134 err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
135 0, 0, 0);
136 if (err > 0) { 135 if (err > 0) {
137 pgoff_t index = map_bh.b_blocknr >> 136 pgoff_t index = map_bh.b_blocknr >>
138 (PAGE_CACHE_SHIFT - inode->i_blkbits); 137 (PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d0f15ef56de1..cc7d5edc38c9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -21,7 +21,14 @@
21#include <linux/magic.h> 21#include <linux/magic.h>
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/quota.h> 23#include <linux/quota.h>
24#include "ext4_i.h" 24#include <linux/rwsem.h>
25#include <linux/rbtree.h>
26#include <linux/seqlock.h>
27#include <linux/mutex.h>
28#include <linux/timer.h>
29#include <linux/wait.h>
30#include <linux/blockgroup_lock.h>
31#include <linux/percpu_counter.h>
25 32
26/* 33/*
27 * The fourth extended filesystem constants/structures 34 * The fourth extended filesystem constants/structures
@@ -46,6 +53,19 @@
46#define ext4_debug(f, a...) do {} while (0) 53#define ext4_debug(f, a...) do {} while (0)
47#endif 54#endif
48 55
56/* data type for block offset of block group */
57typedef int ext4_grpblk_t;
58
59/* data type for filesystem-wide blocks number */
60typedef unsigned long long ext4_fsblk_t;
61
62/* data type for file logical block number */
63typedef __u32 ext4_lblk_t;
64
65/* data type for block group number */
66typedef unsigned int ext4_group_t;
67
68
49/* prefer goal again. length */ 69/* prefer goal again. length */
50#define EXT4_MB_HINT_MERGE 1 70#define EXT4_MB_HINT_MERGE 1
51/* blocks already reserved */ 71/* blocks already reserved */
@@ -179,9 +199,6 @@ struct flex_groups {
179#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ 199#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
180#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ 200#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
181 201
182#ifdef __KERNEL__
183#include "ext4_sb.h"
184#endif
185/* 202/*
186 * Macro-instructions used to manage group descriptors 203 * Macro-instructions used to manage group descriptors
187 */ 204 */
@@ -297,10 +314,23 @@ struct ext4_new_group_data {
297}; 314};
298 315
299/* 316/*
300 * Following is used by preallocation code to tell get_blocks() that we 317 * Flags used by ext4_get_blocks()
301 * want uninitialzed extents.
302 */ 318 */
303#define EXT4_CREATE_UNINITIALIZED_EXT 2 319 /* Allocate any needed blocks and/or convert an unitialized
320 extent to be an initialized ext4 */
321#define EXT4_GET_BLOCKS_CREATE 0x0001
322 /* Request the creation of an unitialized extent */
323#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002
324#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
325 EXT4_GET_BLOCKS_CREATE)
326 /* Caller is from the delayed allocation writeout path,
327 so set the magic i_delalloc_reserve_flag after taking the
328 inode allocation semaphore for */
329#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
330 /* Call ext4_da_update_reserve_space() after successfully
331 allocating the blocks */
332#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
333
304 334
305/* 335/*
306 * ioctl commands 336 * ioctl commands
@@ -516,6 +546,110 @@ do { \
516#endif /* defined(__KERNEL__) || defined(__linux__) */ 546#endif /* defined(__KERNEL__) || defined(__linux__) */
517 547
518/* 548/*
549 * storage for cached extent
550 */
551struct ext4_ext_cache {
552 ext4_fsblk_t ec_start;
553 ext4_lblk_t ec_block;
554 __u32 ec_len; /* must be 32bit to return holes */
555 __u32 ec_type;
556};
557
558/*
559 * fourth extended file system inode data in memory
560 */
561struct ext4_inode_info {
562 __le32 i_data[15]; /* unconverted */
563 __u32 i_flags;
564 ext4_fsblk_t i_file_acl;
565 __u32 i_dtime;
566
567 /*
568 * i_block_group is the number of the block group which contains
569 * this file's inode. Constant across the lifetime of the inode,
570 * it is ued for making block allocation decisions - we try to
571 * place a file's data blocks near its inode block, and new inodes
572 * near to their parent directory's inode.
573 */
574 ext4_group_t i_block_group;
575 __u32 i_state; /* Dynamic state flags for ext4 */
576
577 ext4_lblk_t i_dir_start_lookup;
578#ifdef CONFIG_EXT4_FS_XATTR
579 /*
580 * Extended attributes can be read independently of the main file
581 * data. Taking i_mutex even when reading would cause contention
582 * between readers of EAs and writers of regular file data, so
583 * instead we synchronize on xattr_sem when reading or changing
584 * EAs.
585 */
586 struct rw_semaphore xattr_sem;
587#endif
588#ifdef CONFIG_EXT4_FS_POSIX_ACL
589 struct posix_acl *i_acl;
590 struct posix_acl *i_default_acl;
591#endif
592
593 struct list_head i_orphan; /* unlinked but open inodes */
594
595 /*
596 * i_disksize keeps track of what the inode size is ON DISK, not
597 * in memory. During truncate, i_size is set to the new size by
598 * the VFS prior to calling ext4_truncate(), but the filesystem won't
599 * set i_disksize to 0 until the truncate is actually under way.
600 *
601 * The intent is that i_disksize always represents the blocks which
602 * are used by this file. This allows recovery to restart truncate
603 * on orphans if we crash during truncate. We actually write i_disksize
604 * into the on-disk inode when writing inodes out, instead of i_size.
605 *
606 * The only time when i_disksize and i_size may be different is when
607 * a truncate is in progress. The only things which change i_disksize
608 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
609 */
610 loff_t i_disksize;
611
612 /*
613 * i_data_sem is for serialising ext4_truncate() against
614 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
615 * data tree are chopped off during truncate. We can't do that in
616 * ext4 because whenever we perform intermediate commits during
617 * truncate, the inode and all the metadata blocks *must* be in a
618 * consistent state which allows truncation of the orphans to restart
619 * during recovery. Hence we must fix the get_block-vs-truncate race
620 * by other means, so we have i_data_sem.
621 */
622 struct rw_semaphore i_data_sem;
623 struct inode vfs_inode;
624 struct jbd2_inode jinode;
625
626 struct ext4_ext_cache i_cached_extent;
627 /*
628 * File creation time. Its function is same as that of
629 * struct timespec i_{a,c,m}time in the generic inode.
630 */
631 struct timespec i_crtime;
632
633 /* mballoc */
634 struct list_head i_prealloc_list;
635 spinlock_t i_prealloc_lock;
636
637 /* ialloc */
638 ext4_group_t i_last_alloc_group;
639
640 /* allocation reservation info for delalloc */
641 unsigned int i_reserved_data_blocks;
642 unsigned int i_reserved_meta_blocks;
643 unsigned int i_allocated_meta_blocks;
644 unsigned short i_delalloc_reserved_flag;
645
646 /* on-disk additional length */
647 __u16 i_extra_isize;
648
649 spinlock_t i_block_reservation_lock;
650};
651
652/*
519 * File system states 653 * File system states
520 */ 654 */
521#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ 655#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
@@ -560,6 +694,7 @@ do { \
560#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 694#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
561#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 695#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
562#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 696#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
697#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
563 698
564/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ 699/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
565#ifndef _LINUX_EXT2_FS_H 700#ifndef _LINUX_EXT2_FS_H
@@ -689,6 +824,137 @@ struct ext4_super_block {
689}; 824};
690 825
691#ifdef __KERNEL__ 826#ifdef __KERNEL__
827/*
828 * fourth extended-fs super-block data in memory
829 */
830struct ext4_sb_info {
831 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
832 unsigned long s_inodes_per_block;/* Number of inodes per block */
833 unsigned long s_blocks_per_group;/* Number of blocks in a group */
834 unsigned long s_inodes_per_group;/* Number of inodes in a group */
835 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
836 unsigned long s_gdb_count; /* Number of group descriptor blocks */
837 unsigned long s_desc_per_block; /* Number of group descriptors per block */
838 ext4_group_t s_groups_count; /* Number of groups in the fs */
839 unsigned long s_overhead_last; /* Last calculated overhead */
840 unsigned long s_blocks_last; /* Last seen block count */
841 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
842 struct buffer_head * s_sbh; /* Buffer containing the super block */
843 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
844 struct buffer_head **s_group_desc;
845 unsigned long s_mount_opt;
846 ext4_fsblk_t s_sb_block;
847 uid_t s_resuid;
848 gid_t s_resgid;
849 unsigned short s_mount_state;
850 unsigned short s_pad;
851 int s_addr_per_block_bits;
852 int s_desc_per_block_bits;
853 int s_inode_size;
854 int s_first_ino;
855 unsigned int s_inode_readahead_blks;
856 spinlock_t s_next_gen_lock;
857 u32 s_next_generation;
858 u32 s_hash_seed[4];
859 int s_def_hash_version;
860 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
861 struct percpu_counter s_freeblocks_counter;
862 struct percpu_counter s_freeinodes_counter;
863 struct percpu_counter s_dirs_counter;
864 struct percpu_counter s_dirtyblocks_counter;
865 struct blockgroup_lock *s_blockgroup_lock;
866 struct proc_dir_entry *s_proc;
867 struct kobject s_kobj;
868 struct completion s_kobj_unregister;
869
870 /* Journaling */
871 struct inode *s_journal_inode;
872 struct journal_s *s_journal;
873 struct list_head s_orphan;
874 struct mutex s_orphan_lock;
875 struct mutex s_resize_lock;
876 unsigned long s_commit_interval;
877 u32 s_max_batch_time;
878 u32 s_min_batch_time;
879 struct block_device *journal_bdev;
880#ifdef CONFIG_JBD2_DEBUG
881 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
882 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
883#endif
884#ifdef CONFIG_QUOTA
885 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
886 int s_jquota_fmt; /* Format of quota to use */
887#endif
888 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
889 struct rb_root system_blks;
890
891#ifdef EXTENTS_STATS
892 /* ext4 extents stats */
893 unsigned long s_ext_min;
894 unsigned long s_ext_max;
895 unsigned long s_depth_max;
896 spinlock_t s_ext_stats_lock;
897 unsigned long s_ext_blocks;
898 unsigned long s_ext_extents;
899#endif
900
901 /* for buddy allocator */
902 struct ext4_group_info ***s_group_info;
903 struct inode *s_buddy_cache;
904 long s_blocks_reserved;
905 spinlock_t s_reserve_lock;
906 spinlock_t s_md_lock;
907 tid_t s_last_transaction;
908 unsigned short *s_mb_offsets;
909 unsigned int *s_mb_maxs;
910
911 /* tunables */
912 unsigned long s_stripe;
913 unsigned int s_mb_stream_request;
914 unsigned int s_mb_max_to_scan;
915 unsigned int s_mb_min_to_scan;
916 unsigned int s_mb_stats;
917 unsigned int s_mb_order2_reqs;
918 unsigned int s_mb_group_prealloc;
919 /* where last allocation was done - for stream allocation */
920 unsigned long s_mb_last_group;
921 unsigned long s_mb_last_start;
922
923 /* history to debug policy */
924 struct ext4_mb_history *s_mb_history;
925 int s_mb_history_cur;
926 int s_mb_history_max;
927 int s_mb_history_num;
928 spinlock_t s_mb_history_lock;
929 int s_mb_history_filter;
930
931 /* stats for buddy allocator */
932 spinlock_t s_mb_pa_lock;
933 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
934 atomic_t s_bal_success; /* we found long enough chunks */
935 atomic_t s_bal_allocated; /* in blocks */
936 atomic_t s_bal_ex_scanned; /* total extents scanned */
937 atomic_t s_bal_goals; /* goal hits */
938 atomic_t s_bal_breaks; /* too long searches */
939 atomic_t s_bal_2orders; /* 2^order hits */
940 spinlock_t s_bal_lock;
941 unsigned long s_mb_buddies_generated;
942 unsigned long long s_mb_generation_time;
943 atomic_t s_mb_lost_chunks;
944 atomic_t s_mb_preallocated;
945 atomic_t s_mb_discarded;
946
947 /* locality groups */
948 struct ext4_locality_group *s_locality_groups;
949
950 /* for write statistics */
951 unsigned long s_sectors_written_start;
952 u64 s_kbytes_written;
953
954 unsigned int s_log_groups_per_flex;
955 struct flex_groups *s_flex_groups;
956};
957
692static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 958static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
693{ 959{
694 return sb->s_fs_info; 960 return sb->s_fs_info;
@@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode)
704 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 970 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
705} 971}
706 972
707
708static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) 973static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
709{ 974{
710 return ino == EXT4_ROOT_INO || 975 return ino == EXT4_ROOT_INO ||
@@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1014 ext4_group_t block_group, 1279 ext4_group_t block_group,
1015 struct buffer_head ** bh); 1280 struct buffer_head ** bh);
1016extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1281extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1282struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1283 ext4_group_t block_group);
1284extern unsigned ext4_init_block_bitmap(struct super_block *sb,
1285 struct buffer_head *bh,
1286 ext4_group_t group,
1287 struct ext4_group_desc *desc);
1288#define ext4_free_blocks_after_init(sb, group, desc) \
1289 ext4_init_block_bitmap(sb, NULL, group, desc)
1017 1290
1018/* dir.c */ 1291/* dir.c */
1019extern int ext4_check_dir_entry(const char *, struct inode *, 1292extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1038extern unsigned long ext4_count_free_inodes(struct super_block *); 1311extern unsigned long ext4_count_free_inodes(struct super_block *);
1039extern unsigned long ext4_count_dirs(struct super_block *); 1312extern unsigned long ext4_count_dirs(struct super_block *);
1040extern void ext4_check_inodes_bitmap(struct super_block *); 1313extern void ext4_check_inodes_bitmap(struct super_block *);
1314extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
1315 struct buffer_head *bh,
1316 ext4_group_t group,
1317 struct ext4_group_desc *desc);
1318extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1041 1319
1042/* mballoc.c */ 1320/* mballoc.c */
1043extern long ext4_mb_stats; 1321extern long ext4_mb_stats;
@@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1123 __attribute__ ((format (printf, 3, 4))); 1401 __attribute__ ((format (printf, 3, 4)));
1124extern void ext4_warning(struct super_block *, const char *, const char *, ...) 1402extern void ext4_warning(struct super_block *, const char *, const char *, ...)
1125 __attribute__ ((format (printf, 3, 4))); 1403 __attribute__ ((format (printf, 3, 4)));
1404extern void ext4_msg(struct super_block *, const char *, const char *, ...)
1405 __attribute__ ((format (printf, 3, 4)));
1126extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, 1406extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
1127 const char *, const char *, ...) 1407 const char *, const char *, ...)
1128 __attribute__ ((format (printf, 4, 5))); 1408 __attribute__ ((format (printf, 4, 5)));
@@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb,
1161 struct ext4_group_desc *bg, __u32 count); 1441 struct ext4_group_desc *bg, __u32 count);
1162extern void ext4_itable_unused_set(struct super_block *sb, 1442extern void ext4_itable_unused_set(struct super_block *sb,
1163 struct ext4_group_desc *bg, __u32 count); 1443 struct ext4_group_desc *bg, __u32 count);
1444extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
1445 struct ext4_group_desc *gdp);
1446extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
1447 struct ext4_group_desc *gdp);
1164 1448
1165static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 1449static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
1166{ 1450{
@@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1228 return grp_info[indexv][indexh]; 1512 return grp_info[indexv][indexh];
1229} 1513}
1230 1514
1515/*
1516 * Reading s_groups_count requires using smp_rmb() afterwards. See
1517 * the locking protocol documented in the comments of ext4_group_add()
1518 * in resize.c
1519 */
1520static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
1521{
1522 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
1523
1524 smp_rmb();
1525 return ngroups;
1526}
1231 1527
1232static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, 1528static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
1233 ext4_group_t block_group) 1529 ext4_group_t block_group)
@@ -1283,33 +1579,25 @@ struct ext4_group_info {
1283}; 1579};
1284 1580
1285#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 1581#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
1286#define EXT4_GROUP_INFO_LOCKED_BIT 1
1287 1582
1288#define EXT4_MB_GRP_NEED_INIT(grp) \ 1583#define EXT4_MB_GRP_NEED_INIT(grp) \
1289 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) 1584 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
1290 1585
1291static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) 1586static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
1587 ext4_group_t group)
1292{ 1588{
1293 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1589 return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
1294
1295 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1296} 1590}
1297 1591
1298static inline void ext4_unlock_group(struct super_block *sb, 1592static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
1299 ext4_group_t group)
1300{ 1593{
1301 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1594 spin_lock(ext4_group_lock_ptr(sb, group));
1302
1303 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1304} 1595}
1305 1596
1306static inline int ext4_is_group_locked(struct super_block *sb, 1597static inline void ext4_unlock_group(struct super_block *sb,
1307 ext4_group_t group) 1598 ext4_group_t group)
1308{ 1599{
1309 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1600 spin_unlock(ext4_group_lock_ptr(sb, group));
1310
1311 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
1312 &(grinfo->bb_state));
1313} 1601}
1314 1602
1315/* 1603/*
@@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations;
1326/* namei.c */ 1614/* namei.c */
1327extern const struct inode_operations ext4_dir_inode_operations; 1615extern const struct inode_operations ext4_dir_inode_operations;
1328extern const struct inode_operations ext4_special_inode_operations; 1616extern const struct inode_operations ext4_special_inode_operations;
1617extern struct dentry *ext4_get_parent(struct dentry *child);
1329 1618
1330/* symlink.c */ 1619/* symlink.c */
1331extern const struct inode_operations ext4_symlink_inode_operations; 1620extern const struct inode_operations ext4_symlink_inode_operations;
1332extern const struct inode_operations ext4_fast_symlink_inode_operations; 1621extern const struct inode_operations ext4_fast_symlink_inode_operations;
1333 1622
1623/* block_validity */
1624extern void ext4_release_system_zone(struct super_block *sb);
1625extern int ext4_setup_system_zone(struct super_block *sb);
1626extern int __init init_ext4_system_zone(void);
1627extern void exit_ext4_system_zone(void);
1628extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
1629 ext4_fsblk_t start_blk,
1630 unsigned int count);
1631
1334/* extents.c */ 1632/* extents.c */
1335extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 1633extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1336extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 1634extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
@@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
1338 int chunk); 1636 int chunk);
1339extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1637extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1340 ext4_lblk_t iblock, unsigned int max_blocks, 1638 ext4_lblk_t iblock, unsigned int max_blocks,
1341 struct buffer_head *bh_result, 1639 struct buffer_head *bh_result, int flags);
1342 int create, int extend_disksize);
1343extern void ext4_ext_truncate(struct inode *); 1640extern void ext4_ext_truncate(struct inode *);
1344extern void ext4_ext_init(struct super_block *); 1641extern void ext4_ext_init(struct super_block *);
1345extern void ext4_ext_release(struct super_block *); 1642extern void ext4_ext_release(struct super_block *);
1346extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 1643extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1347 loff_t len); 1644 loff_t len);
1348extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, 1645extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1349 sector_t block, unsigned int max_blocks, 1646 sector_t block, unsigned int max_blocks,
1350 struct buffer_head *bh, int create, 1647 struct buffer_head *bh, int flags);
1351 int extend_disksize, int flag);
1352extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1648extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1353 __u64 start, __u64 len); 1649 __u64 start, __u64 len);
1354 1650
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
deleted file mode 100644
index 4ce2187123aa..000000000000
--- a/fs/ext4/ext4_i.h
+++ /dev/null
@@ -1,140 +0,0 @@
1/*
2 * ext4_i.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_i.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_I
17#define _EXT4_I
18
19#include <linux/rwsem.h>
20#include <linux/rbtree.h>
21#include <linux/seqlock.h>
22#include <linux/mutex.h>
23
24/* data type for block offset of block group */
25typedef int ext4_grpblk_t;
26
27/* data type for filesystem-wide blocks number */
28typedef unsigned long long ext4_fsblk_t;
29
30/* data type for file logical block number */
31typedef __u32 ext4_lblk_t;
32
33/* data type for block group number */
34typedef unsigned int ext4_group_t;
35
36/*
37 * storage for cached extent
38 */
39struct ext4_ext_cache {
40 ext4_fsblk_t ec_start;
41 ext4_lblk_t ec_block;
42 __u32 ec_len; /* must be 32bit to return holes */
43 __u32 ec_type;
44};
45
46/*
47 * fourth extended file system inode data in memory
48 */
49struct ext4_inode_info {
50 __le32 i_data[15]; /* unconverted */
51 __u32 i_flags;
52 ext4_fsblk_t i_file_acl;
53 __u32 i_dtime;
54
55 /*
56 * i_block_group is the number of the block group which contains
57 * this file's inode. Constant across the lifetime of the inode,
58 * it is ued for making block allocation decisions - we try to
59 * place a file's data blocks near its inode block, and new inodes
60 * near to their parent directory's inode.
61 */
62 ext4_group_t i_block_group;
63 __u32 i_state; /* Dynamic state flags for ext4 */
64
65 ext4_lblk_t i_dir_start_lookup;
66#ifdef CONFIG_EXT4_FS_XATTR
67 /*
68 * Extended attributes can be read independently of the main file
69 * data. Taking i_mutex even when reading would cause contention
70 * between readers of EAs and writers of regular file data, so
71 * instead we synchronize on xattr_sem when reading or changing
72 * EAs.
73 */
74 struct rw_semaphore xattr_sem;
75#endif
76#ifdef CONFIG_EXT4_FS_POSIX_ACL
77 struct posix_acl *i_acl;
78 struct posix_acl *i_default_acl;
79#endif
80
81 struct list_head i_orphan; /* unlinked but open inodes */
82
83 /*
84 * i_disksize keeps track of what the inode size is ON DISK, not
85 * in memory. During truncate, i_size is set to the new size by
86 * the VFS prior to calling ext4_truncate(), but the filesystem won't
87 * set i_disksize to 0 until the truncate is actually under way.
88 *
89 * The intent is that i_disksize always represents the blocks which
90 * are used by this file. This allows recovery to restart truncate
91 * on orphans if we crash during truncate. We actually write i_disksize
92 * into the on-disk inode when writing inodes out, instead of i_size.
93 *
94 * The only time when i_disksize and i_size may be different is when
95 * a truncate is in progress. The only things which change i_disksize
96 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
97 */
98 loff_t i_disksize;
99
100 /*
101 * i_data_sem is for serialising ext4_truncate() against
102 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
103 * data tree are chopped off during truncate. We can't do that in
104 * ext4 because whenever we perform intermediate commits during
105 * truncate, the inode and all the metadata blocks *must* be in a
106 * consistent state which allows truncation of the orphans to restart
107 * during recovery. Hence we must fix the get_block-vs-truncate race
108 * by other means, so we have i_data_sem.
109 */
110 struct rw_semaphore i_data_sem;
111 struct inode vfs_inode;
112 struct jbd2_inode jinode;
113
114 struct ext4_ext_cache i_cached_extent;
115 /*
116 * File creation time. Its function is same as that of
117 * struct timespec i_{a,c,m}time in the generic inode.
118 */
119 struct timespec i_crtime;
120
121 /* mballoc */
122 struct list_head i_prealloc_list;
123 spinlock_t i_prealloc_lock;
124
125 /* ialloc */
126 ext4_group_t i_last_alloc_group;
127
128 /* allocation reservation info for delalloc */
129 unsigned int i_reserved_data_blocks;
130 unsigned int i_reserved_meta_blocks;
131 unsigned int i_allocated_meta_blocks;
132 unsigned short i_delalloc_reserved_flag;
133
134 /* on-disk additional length */
135 __u16 i_extra_isize;
136
137 spinlock_t i_block_reservation_lock;
138};
139
140#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
deleted file mode 100644
index 57b71fefbccf..000000000000
--- a/fs/ext4/ext4_sb.h
+++ /dev/null
@@ -1,161 +0,0 @@
1/*
2 * ext4_sb.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_sb.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_SB
17#define _EXT4_SB
18
19#ifdef __KERNEL__
20#include <linux/timer.h>
21#include <linux/wait.h>
22#include <linux/blockgroup_lock.h>
23#include <linux/percpu_counter.h>
24#endif
25#include <linux/rbtree.h>
26
27/*
28 * fourth extended-fs super-block data in memory
29 */
30struct ext4_sb_info {
31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
32 unsigned long s_inodes_per_block;/* Number of inodes per block */
33 unsigned long s_blocks_per_group;/* Number of blocks in a group */
34 unsigned long s_inodes_per_group;/* Number of inodes in a group */
35 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
36 unsigned long s_gdb_count; /* Number of group descriptor blocks */
37 unsigned long s_desc_per_block; /* Number of group descriptors per block */
38 ext4_group_t s_groups_count; /* Number of groups in the fs */
39 unsigned long s_overhead_last; /* Last calculated overhead */
40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head **s_group_desc;
45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid;
48 gid_t s_resgid;
49 unsigned short s_mount_state;
50 unsigned short s_pad;
51 int s_addr_per_block_bits;
52 int s_desc_per_block_bits;
53 int s_inode_size;
54 int s_first_ino;
55 unsigned int s_inode_readahead_blks;
56 spinlock_t s_next_gen_lock;
57 u32 s_next_generation;
58 u32 s_hash_seed[4];
59 int s_def_hash_version;
60 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
61 struct percpu_counter s_freeblocks_counter;
62 struct percpu_counter s_freeinodes_counter;
63 struct percpu_counter s_dirs_counter;
64 struct percpu_counter s_dirtyblocks_counter;
65 struct blockgroup_lock *s_blockgroup_lock;
66 struct proc_dir_entry *s_proc;
67 struct kobject s_kobj;
68 struct completion s_kobj_unregister;
69
70 /* Journaling */
71 struct inode *s_journal_inode;
72 struct journal_s *s_journal;
73 struct list_head s_orphan;
74 unsigned long s_commit_interval;
75 u32 s_max_batch_time;
76 u32 s_min_batch_time;
77 struct block_device *journal_bdev;
78#ifdef CONFIG_JBD2_DEBUG
79 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
80 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
81#endif
82#ifdef CONFIG_QUOTA
83 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
84 int s_jquota_fmt; /* Format of quota to use */
85#endif
86 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
87
88#ifdef EXTENTS_STATS
89 /* ext4 extents stats */
90 unsigned long s_ext_min;
91 unsigned long s_ext_max;
92 unsigned long s_depth_max;
93 spinlock_t s_ext_stats_lock;
94 unsigned long s_ext_blocks;
95 unsigned long s_ext_extents;
96#endif
97
98 /* for buddy allocator */
99 struct ext4_group_info ***s_group_info;
100 struct inode *s_buddy_cache;
101 long s_blocks_reserved;
102 spinlock_t s_reserve_lock;
103 spinlock_t s_md_lock;
104 tid_t s_last_transaction;
105 unsigned short *s_mb_offsets;
106 unsigned int *s_mb_maxs;
107
108 /* tunables */
109 unsigned long s_stripe;
110 unsigned int s_mb_stream_request;
111 unsigned int s_mb_max_to_scan;
112 unsigned int s_mb_min_to_scan;
113 unsigned int s_mb_stats;
114 unsigned int s_mb_order2_reqs;
115 unsigned int s_mb_group_prealloc;
116 /* where last allocation was done - for stream allocation */
117 unsigned long s_mb_last_group;
118 unsigned long s_mb_last_start;
119
120 /* history to debug policy */
121 struct ext4_mb_history *s_mb_history;
122 int s_mb_history_cur;
123 int s_mb_history_max;
124 int s_mb_history_num;
125 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter;
127
128 /* stats for buddy allocator */
129 spinlock_t s_mb_pa_lock;
130 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
131 atomic_t s_bal_success; /* we found long enough chunks */
132 atomic_t s_bal_allocated; /* in blocks */
133 atomic_t s_bal_ex_scanned; /* total extents scanned */
134 atomic_t s_bal_goals; /* goal hits */
135 atomic_t s_bal_breaks; /* too long searches */
136 atomic_t s_bal_2orders; /* 2^order hits */
137 spinlock_t s_bal_lock;
138 unsigned long s_mb_buddies_generated;
139 unsigned long long s_mb_generation_time;
140 atomic_t s_mb_lost_chunks;
141 atomic_t s_mb_preallocated;
142 atomic_t s_mb_discarded;
143
144 /* locality groups */
145 struct ext4_locality_group *s_locality_groups;
146
147 /* for write statistics */
148 unsigned long s_sectors_written_start;
149 u64 s_kbytes_written;
150
151 unsigned int s_log_groups_per_flex;
152 struct flex_groups *s_flex_groups;
153};
154
155static inline spinlock_t *
156sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
157{
158 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
159}
160
161#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e3a55eb8b26a..2593f748c3a4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth)
326 326
327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
328{ 328{
329 ext4_fsblk_t block = ext_pblock(ext), valid_block; 329 ext4_fsblk_t block = ext_pblock(ext);
330 int len = ext4_ext_get_actual_len(ext); 330 int len = ext4_ext_get_actual_len(ext);
331 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
332 331
333 valid_block = le32_to_cpu(es->s_first_data_block) + 332 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
334 EXT4_SB(inode->i_sb)->s_gdb_count;
335 if (unlikely(block <= valid_block ||
336 ((block + len) > ext4_blocks_count(es))))
337 return 0;
338 else
339 return 1;
340} 333}
341 334
342static int ext4_valid_extent_idx(struct inode *inode, 335static int ext4_valid_extent_idx(struct inode *inode,
343 struct ext4_extent_idx *ext_idx) 336 struct ext4_extent_idx *ext_idx)
344{ 337{
345 ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; 338 ext4_fsblk_t block = idx_pblock(ext_idx);
346 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
347 339
348 valid_block = le32_to_cpu(es->s_first_data_block) + 340 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
349 EXT4_SB(inode->i_sb)->s_gdb_count;
350 if (unlikely(block <= valid_block ||
351 (block >= ext4_blocks_count(es))))
352 return 0;
353 else
354 return 1;
355} 341}
356 342
357static int ext4_valid_extent_entries(struct inode *inode, 343static int ext4_valid_extent_entries(struct inode *inode,
@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2097 ex = EXT_LAST_EXTENT(eh); 2083 ex = EXT_LAST_EXTENT(eh);
2098 2084
2099 ex_ee_block = le32_to_cpu(ex->ee_block); 2085 ex_ee_block = le32_to_cpu(ex->ee_block);
2100 if (ext4_ext_is_uninitialized(ex))
2101 uninitialized = 1;
2102 ex_ee_len = ext4_ext_get_actual_len(ex); 2086 ex_ee_len = ext4_ext_get_actual_len(ex);
2103 2087
2104 while (ex >= EXT_FIRST_EXTENT(eh) && 2088 while (ex >= EXT_FIRST_EXTENT(eh) &&
2105 ex_ee_block + ex_ee_len > start) { 2089 ex_ee_block + ex_ee_len > start) {
2090
2091 if (ext4_ext_is_uninitialized(ex))
2092 uninitialized = 1;
2093 else
2094 uninitialized = 0;
2095
2106 ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); 2096 ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
2107 path[depth].p_ext = ex; 2097 path[depth].p_ext = ex;
2108 2098
@@ -2784,7 +2774,7 @@ fix_extent_len:
2784int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 2774int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2785 ext4_lblk_t iblock, 2775 ext4_lblk_t iblock,
2786 unsigned int max_blocks, struct buffer_head *bh_result, 2776 unsigned int max_blocks, struct buffer_head *bh_result,
2787 int create, int extend_disksize) 2777 int flags)
2788{ 2778{
2789 struct ext4_ext_path *path = NULL; 2779 struct ext4_ext_path *path = NULL;
2790 struct ext4_extent_header *eh; 2780 struct ext4_extent_header *eh;
@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2793 int err = 0, depth, ret, cache_type; 2783 int err = 0, depth, ret, cache_type;
2794 unsigned int allocated = 0; 2784 unsigned int allocated = 0;
2795 struct ext4_allocation_request ar; 2785 struct ext4_allocation_request ar;
2796 loff_t disksize;
2797 2786
2798 __clear_bit(BH_New, &bh_result->b_state); 2787 __clear_bit(BH_New, &bh_result->b_state);
2799 ext_debug("blocks %u/%u requested for inode %u\n", 2788 ext_debug("blocks %u/%u requested for inode %u\n",
@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2803 cache_type = ext4_ext_in_cache(inode, iblock, &newex); 2792 cache_type = ext4_ext_in_cache(inode, iblock, &newex);
2804 if (cache_type) { 2793 if (cache_type) {
2805 if (cache_type == EXT4_EXT_CACHE_GAP) { 2794 if (cache_type == EXT4_EXT_CACHE_GAP) {
2806 if (!create) { 2795 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2807 /* 2796 /*
2808 * block isn't allocated yet and 2797 * block isn't allocated yet and
2809 * user doesn't want to allocate it 2798 * user doesn't want to allocate it
@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2869 EXT4_EXT_CACHE_EXTENT); 2858 EXT4_EXT_CACHE_EXTENT);
2870 goto out; 2859 goto out;
2871 } 2860 }
2872 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2861 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
2873 goto out; 2862 goto out;
2874 if (!create) { 2863 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2864 if (allocated > max_blocks)
2865 allocated = max_blocks;
2875 /* 2866 /*
2876 * We have blocks reserved already. We 2867 * We have blocks reserved already. We
2877 * return allocated blocks so that delalloc 2868 * return allocated blocks so that delalloc
@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2879 * the buffer head will be unmapped so that 2870 * the buffer head will be unmapped so that
2880 * a read from the block returns 0s. 2871 * a read from the block returns 0s.
2881 */ 2872 */
2882 if (allocated > max_blocks)
2883 allocated = max_blocks;
2884 set_buffer_unwritten(bh_result); 2873 set_buffer_unwritten(bh_result);
2885 bh_result->b_bdev = inode->i_sb->s_bdev; 2874 bh_result->b_bdev = inode->i_sb->s_bdev;
2886 bh_result->b_blocknr = newblock; 2875 bh_result->b_blocknr = newblock;
@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2903 * requested block isn't allocated yet; 2892 * requested block isn't allocated yet;
2904 * we couldn't try to create block if create flag is zero 2893 * we couldn't try to create block if create flag is zero
2905 */ 2894 */
2906 if (!create) { 2895 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2907 /* 2896 /*
2908 * put just found gap into cache to speed up 2897 * put just found gap into cache to speed up
2909 * subsequent requests 2898 * subsequent requests
@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2932 * EXT_UNINIT_MAX_LEN. 2921 * EXT_UNINIT_MAX_LEN.
2933 */ 2922 */
2934 if (max_blocks > EXT_INIT_MAX_LEN && 2923 if (max_blocks > EXT_INIT_MAX_LEN &&
2935 create != EXT4_CREATE_UNINITIALIZED_EXT) 2924 !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
2936 max_blocks = EXT_INIT_MAX_LEN; 2925 max_blocks = EXT_INIT_MAX_LEN;
2937 else if (max_blocks > EXT_UNINIT_MAX_LEN && 2926 else if (max_blocks > EXT_UNINIT_MAX_LEN &&
2938 create == EXT4_CREATE_UNINITIALIZED_EXT) 2927 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
2939 max_blocks = EXT_UNINIT_MAX_LEN; 2928 max_blocks = EXT_UNINIT_MAX_LEN;
2940 2929
2941 /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ 2930 /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2966 /* try to insert new extent into found leaf and return */ 2955 /* try to insert new extent into found leaf and return */
2967 ext4_ext_store_pblock(&newex, newblock); 2956 ext4_ext_store_pblock(&newex, newblock);
2968 newex.ee_len = cpu_to_le16(ar.len); 2957 newex.ee_len = cpu_to_le16(ar.len);
2969 if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ 2958 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */
2970 ext4_ext_mark_uninitialized(&newex); 2959 ext4_ext_mark_uninitialized(&newex);
2971 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2960 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2972 if (err) { 2961 if (err) {
@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2983 newblock = ext_pblock(&newex); 2972 newblock = ext_pblock(&newex);
2984 allocated = ext4_ext_get_actual_len(&newex); 2973 allocated = ext4_ext_get_actual_len(&newex);
2985outnew: 2974outnew:
2986 if (extend_disksize) {
2987 disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
2988 if (disksize > i_size_read(inode))
2989 disksize = i_size_read(inode);
2990 if (disksize > EXT4_I(inode)->i_disksize)
2991 EXT4_I(inode)->i_disksize = disksize;
2992 }
2993
2994 set_buffer_new(bh_result); 2975 set_buffer_new(bh_result);
2995 2976
2996 /* Cache only when it is _not_ an uninitialized extent */ 2977 /* Cache only when it is _not_ an uninitialized extent */
2997 if (create != EXT4_CREATE_UNINITIALIZED_EXT) 2978 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
2998 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 2979 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
2999 EXT4_EXT_CACHE_EXTENT); 2980 EXT4_EXT_CACHE_EXTENT);
3000out: 2981out:
@@ -3150,9 +3131,10 @@ retry:
3150 ret = PTR_ERR(handle); 3131 ret = PTR_ERR(handle);
3151 break; 3132 break;
3152 } 3133 }
3153 ret = ext4_get_blocks_wrap(handle, inode, block, 3134 map_bh.b_state = 0;
3154 max_blocks, &map_bh, 3135 ret = ext4_get_blocks(handle, inode, block,
3155 EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); 3136 max_blocks, &map_bh,
3137 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
3156 if (ret <= 0) { 3138 if (ret <= 0) {
3157#ifdef EXT4FS_DEBUG 3139#ifdef EXT4FS_DEBUG
3158 WARN_ON(ret <= 0); 3140 WARN_ON(ret <= 0);
@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3195 void *data) 3177 void *data)
3196{ 3178{
3197 struct fiemap_extent_info *fieinfo = data; 3179 struct fiemap_extent_info *fieinfo = data;
3198 unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; 3180 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
3199 __u64 logical; 3181 __u64 logical;
3200 __u64 physical; 3182 __u64 physical;
3201 __u64 length; 3183 __u64 length;
@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3242 * 3224 *
3243 * XXX this might miss a single-block extent at EXT_MAX_BLOCK 3225 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3244 */ 3226 */
3245 if (logical + length - 1 == EXT_MAX_BLOCK || 3227 if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
3246 ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) 3228 newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
3229 loff_t size = i_size_read(inode);
3230 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
3231
3247 flags |= FIEMAP_EXTENT_LAST; 3232 flags |= FIEMAP_EXTENT_LAST;
3233 if ((flags & FIEMAP_EXTENT_DELALLOC) &&
3234 logical+length > size)
3235 length = (size - logical + bs - 1) & ~(bs-1);
3236 }
3248 3237
3249 error = fiemap_fill_next_extent(fieinfo, logical, physical, 3238 error = fiemap_fill_next_extent(fieinfo, logical, physical,
3250 length, flags); 3239 length, flags);
@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3318 * Walk the extent tree gathering extent information. 3307 * Walk the extent tree gathering extent information.
3319 * ext4_ext_fiemap_cb will push extents back to user. 3308 * ext4_ext_fiemap_cb will push extents back to user.
3320 */ 3309 */
3321 down_write(&EXT4_I(inode)->i_data_sem); 3310 down_read(&EXT4_I(inode)->i_data_sem);
3322 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3311 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3323 ext4_ext_fiemap_cb, fieinfo); 3312 ext4_ext_fiemap_cb, fieinfo);
3324 up_write(&EXT4_I(inode)->i_data_sem); 3313 up_read(&EXT4_I(inode)->i_data_sem);
3325 } 3314 }
3326 3315
3327 return error; 3316 return error;
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
deleted file mode 100644
index c2c0a8d06d0e..000000000000
--- a/fs/ext4/group.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * linux/fs/ext4/group.h
3 *
4 * Copyright (C) 2007 Cluster File Systems, Inc
5 *
6 * Author: Andreas Dilger <adilger@clusterfs.com>
7 */
8
9#ifndef _LINUX_EXT4_GROUP_H
10#define _LINUX_EXT4_GROUP_H
11
12extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
13 struct ext4_group_desc *gdp);
14extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
15 struct ext4_group_desc *gdp);
16struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
17 ext4_group_t block_group);
18extern unsigned ext4_init_block_bitmap(struct super_block *sb,
19 struct buffer_head *bh,
20 ext4_group_t group,
21 struct ext4_group_desc *desc);
22#define ext4_free_blocks_after_init(sb, group, desc) \
23 ext4_init_block_bitmap(sb, NULL, group, desc)
24extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
25 struct buffer_head *bh,
26 ext4_group_t group,
27 struct ext4_group_desc *desc);
28extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
29#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18e0a08a6b5..3743bd849bce 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -27,7 +27,6 @@
27#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
28#include "xattr.h" 28#include "xattr.h"
29#include "acl.h" 29#include "acl.h"
30#include "group.h"
31 30
32/* 31/*
33 * ialloc.c contains the inodes allocation and deallocation routines 32 * ialloc.c contains the inodes allocation and deallocation routines
@@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
123 unlock_buffer(bh); 122 unlock_buffer(bh);
124 return bh; 123 return bh;
125 } 124 }
126 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 125 ext4_lock_group(sb, block_group);
127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 126 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
128 ext4_init_inode_bitmap(sb, bh, block_group, desc); 127 ext4_init_inode_bitmap(sb, bh, block_group, desc);
129 set_bitmap_uptodate(bh); 128 set_bitmap_uptodate(bh);
130 set_buffer_uptodate(bh); 129 set_buffer_uptodate(bh);
131 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 130 ext4_unlock_group(sb, block_group);
132 unlock_buffer(bh); 131 unlock_buffer(bh);
133 return bh; 132 return bh;
134 } 133 }
135 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 134 ext4_unlock_group(sb, block_group);
136 if (buffer_uptodate(bh)) { 135 if (buffer_uptodate(bh)) {
137 /* 136 /*
138 * if not uninit if bh is uptodate, 137 * if not uninit if bh is uptodate,
@@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
247 goto error_return; 246 goto error_return;
248 247
249 /* Ok, now we can actually update the inode bitmaps.. */ 248 /* Ok, now we can actually update the inode bitmaps.. */
250 spin_lock(sb_bgl_lock(sbi, block_group)); 249 cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
251 cleared = ext4_clear_bit(bit, bitmap_bh->b_data); 250 bit, bitmap_bh->b_data);
252 spin_unlock(sb_bgl_lock(sbi, block_group));
253 if (!cleared) 251 if (!cleared)
254 ext4_error(sb, "ext4_free_inode", 252 ext4_error(sb, "ext4_free_inode",
255 "bit already cleared for inode %lu", ino); 253 "bit already cleared for inode %lu", ino);
@@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
261 if (fatal) goto error_return; 259 if (fatal) goto error_return;
262 260
263 if (gdp) { 261 if (gdp) {
264 spin_lock(sb_bgl_lock(sbi, block_group)); 262 ext4_lock_group(sb, block_group);
265 count = ext4_free_inodes_count(sb, gdp) + 1; 263 count = ext4_free_inodes_count(sb, gdp) + 1;
266 ext4_free_inodes_set(sb, gdp, count); 264 ext4_free_inodes_set(sb, gdp, count);
267 if (is_directory) { 265 if (is_directory) {
@@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
277 } 275 }
278 gdp->bg_checksum = ext4_group_desc_csum(sbi, 276 gdp->bg_checksum = ext4_group_desc_csum(sbi,
279 block_group, gdp); 277 block_group, gdp);
280 spin_unlock(sb_bgl_lock(sbi, block_group)); 278 ext4_unlock_group(sb, block_group);
281 percpu_counter_inc(&sbi->s_freeinodes_counter); 279 percpu_counter_inc(&sbi->s_freeinodes_counter);
282 if (is_directory) 280 if (is_directory)
283 percpu_counter_dec(&sbi->s_dirs_counter); 281 percpu_counter_dec(&sbi->s_dirs_counter);
@@ -316,7 +314,7 @@ error_return:
316static int find_group_dir(struct super_block *sb, struct inode *parent, 314static int find_group_dir(struct super_block *sb, struct inode *parent,
317 ext4_group_t *best_group) 315 ext4_group_t *best_group)
318{ 316{
319 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 317 ext4_group_t ngroups = ext4_get_groups_count(sb);
320 unsigned int freei, avefreei; 318 unsigned int freei, avefreei;
321 struct ext4_group_desc *desc, *best_desc = NULL; 319 struct ext4_group_desc *desc, *best_desc = NULL;
322 ext4_group_t group; 320 ext4_group_t group;
@@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
349{ 347{
350 struct ext4_sb_info *sbi = EXT4_SB(sb); 348 struct ext4_sb_info *sbi = EXT4_SB(sb);
351 struct ext4_group_desc *desc; 349 struct ext4_group_desc *desc;
352 struct buffer_head *bh;
353 struct flex_groups *flex_group = sbi->s_flex_groups; 350 struct flex_groups *flex_group = sbi->s_flex_groups;
354 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 351 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
355 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); 352 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
356 ext4_group_t ngroups = sbi->s_groups_count; 353 ext4_group_t ngroups = ext4_get_groups_count(sb);
357 int flex_size = ext4_flex_bg_size(sbi); 354 int flex_size = ext4_flex_bg_size(sbi);
358 ext4_group_t best_flex = parent_fbg_group; 355 ext4_group_t best_flex = parent_fbg_group;
359 int blocks_per_flex = sbi->s_blocks_per_group * flex_size; 356 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
@@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
362 ext4_group_t n_fbg_groups; 359 ext4_group_t n_fbg_groups;
363 ext4_group_t i; 360 ext4_group_t i;
364 361
365 n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> 362 n_fbg_groups = (ngroups + flex_size - 1) >>
366 sbi->s_log_groups_per_flex; 363 sbi->s_log_groups_per_flex;
367 364
368find_close_to_parent: 365find_close_to_parent:
@@ -404,7 +401,7 @@ find_close_to_parent:
404found_flexbg: 401found_flexbg:
405 for (i = best_flex * flex_size; i < ngroups && 402 for (i = best_flex * flex_size; i < ngroups &&
406 i < (best_flex + 1) * flex_size; i++) { 403 i < (best_flex + 1) * flex_size; i++) {
407 desc = ext4_get_group_desc(sb, i, &bh); 404 desc = ext4_get_group_desc(sb, i, NULL);
408 if (ext4_free_inodes_count(sb, desc)) { 405 if (ext4_free_inodes_count(sb, desc)) {
409 *best_group = i; 406 *best_group = i;
410 goto out; 407 goto out;
@@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
478{ 475{
479 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
480 struct ext4_sb_info *sbi = EXT4_SB(sb); 477 struct ext4_sb_info *sbi = EXT4_SB(sb);
481 ext4_group_t ngroups = sbi->s_groups_count; 478 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
482 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 479 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
483 unsigned int freei, avefreei; 480 unsigned int freei, avefreei;
484 ext4_fsblk_t freeb, avefreeb; 481 ext4_fsblk_t freeb, avefreeb;
485 unsigned int ndirs; 482 unsigned int ndirs;
486 int max_dirs, min_inodes; 483 int max_dirs, min_inodes;
487 ext4_grpblk_t min_blocks; 484 ext4_grpblk_t min_blocks;
488 ext4_group_t i, grp, g; 485 ext4_group_t i, grp, g, ngroups;
489 struct ext4_group_desc *desc; 486 struct ext4_group_desc *desc;
490 struct orlov_stats stats; 487 struct orlov_stats stats;
491 int flex_size = ext4_flex_bg_size(sbi); 488 int flex_size = ext4_flex_bg_size(sbi);
492 489
490 ngroups = real_ngroups;
493 if (flex_size > 1) { 491 if (flex_size > 1) {
494 ngroups = (ngroups + flex_size - 1) >> 492 ngroups = (real_ngroups + flex_size - 1) >>
495 sbi->s_log_groups_per_flex; 493 sbi->s_log_groups_per_flex;
496 parent_group >>= sbi->s_log_groups_per_flex; 494 parent_group >>= sbi->s_log_groups_per_flex;
497 } 495 }
@@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
543 */ 541 */
544 grp *= flex_size; 542 grp *= flex_size;
545 for (i = 0; i < flex_size; i++) { 543 for (i = 0; i < flex_size; i++) {
546 if (grp+i >= sbi->s_groups_count) 544 if (grp+i >= real_ngroups)
547 break; 545 break;
548 desc = ext4_get_group_desc(sb, grp+i, NULL); 546 desc = ext4_get_group_desc(sb, grp+i, NULL);
549 if (desc && ext4_free_inodes_count(sb, desc)) { 547 if (desc && ext4_free_inodes_count(sb, desc)) {
@@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
583 } 581 }
584 582
585fallback: 583fallback:
586 ngroups = sbi->s_groups_count; 584 ngroups = real_ngroups;
587 avefreei = freei / ngroups; 585 avefreei = freei / ngroups;
588fallback_retry: 586fallback_retry:
589 parent_group = EXT4_I(parent)->i_block_group; 587 parent_group = EXT4_I(parent)->i_block_group;
@@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
613 ext4_group_t *group, int mode) 611 ext4_group_t *group, int mode)
614{ 612{
615 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 613 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
616 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 614 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
617 struct ext4_group_desc *desc; 615 struct ext4_group_desc *desc;
618 ext4_group_t i, last;
619 int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); 616 int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
620 617
621 /* 618 /*
@@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
708 705
709/* 706/*
710 * claim the inode from the inode bitmap. If the group 707 * claim the inode from the inode bitmap. If the group
711 * is uninit we need to take the groups's sb_bgl_lock 708 * is uninit we need to take the groups's ext4_group_lock
712 * and clear the uninit flag. The inode bitmap update 709 * and clear the uninit flag. The inode bitmap update
713 * and group desc uninit flag clear should be done 710 * and group desc uninit flag clear should be done
714 * after holding sb_bgl_lock so that ext4_read_inode_bitmap 711 * after holding ext4_group_lock so that ext4_read_inode_bitmap
715 * doesn't race with the ext4_claim_inode 712 * doesn't race with the ext4_claim_inode
716 */ 713 */
717static int ext4_claim_inode(struct super_block *sb, 714static int ext4_claim_inode(struct super_block *sb,
@@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb,
722 struct ext4_sb_info *sbi = EXT4_SB(sb); 719 struct ext4_sb_info *sbi = EXT4_SB(sb);
723 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 720 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
724 721
725 spin_lock(sb_bgl_lock(sbi, group)); 722 ext4_lock_group(sb, group);
726 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 723 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
727 /* not a free inode */ 724 /* not a free inode */
728 retval = 1; 725 retval = 1;
@@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb,
731 ino++; 728 ino++;
732 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 729 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
733 ino > EXT4_INODES_PER_GROUP(sb)) { 730 ino > EXT4_INODES_PER_GROUP(sb)) {
734 spin_unlock(sb_bgl_lock(sbi, group)); 731 ext4_unlock_group(sb, group);
735 ext4_error(sb, __func__, 732 ext4_error(sb, __func__,
736 "reserved inode or inode > inodes count - " 733 "reserved inode or inode > inodes count - "
737 "block_group = %u, inode=%lu", group, 734 "block_group = %u, inode=%lu", group,
@@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb,
780 } 777 }
781 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 778 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
782err_ret: 779err_ret:
783 spin_unlock(sb_bgl_lock(sbi, group)); 780 ext4_unlock_group(sb, group);
784 return retval; 781 return retval;
785} 782}
786 783
@@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
799 struct super_block *sb; 796 struct super_block *sb;
800 struct buffer_head *inode_bitmap_bh = NULL; 797 struct buffer_head *inode_bitmap_bh = NULL;
801 struct buffer_head *group_desc_bh; 798 struct buffer_head *group_desc_bh;
802 ext4_group_t group = 0; 799 ext4_group_t ngroups, group = 0;
803 unsigned long ino = 0; 800 unsigned long ino = 0;
804 struct inode *inode; 801 struct inode *inode;
805 struct ext4_group_desc *gdp = NULL; 802 struct ext4_group_desc *gdp = NULL;
806 struct ext4_super_block *es;
807 struct ext4_inode_info *ei; 803 struct ext4_inode_info *ei;
808 struct ext4_sb_info *sbi; 804 struct ext4_sb_info *sbi;
809 int ret2, err = 0; 805 int ret2, err = 0;
@@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
818 return ERR_PTR(-EPERM); 814 return ERR_PTR(-EPERM);
819 815
820 sb = dir->i_sb; 816 sb = dir->i_sb;
817 ngroups = ext4_get_groups_count(sb);
821 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, 818 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
822 dir->i_ino, mode); 819 dir->i_ino, mode);
823 inode = new_inode(sb); 820 inode = new_inode(sb);
824 if (!inode) 821 if (!inode)
825 return ERR_PTR(-ENOMEM); 822 return ERR_PTR(-ENOMEM);
826 ei = EXT4_I(inode); 823 ei = EXT4_I(inode);
827
828 sbi = EXT4_SB(sb); 824 sbi = EXT4_SB(sb);
829 es = sbi->s_es;
830 825
831 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 826 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
832 ret2 = find_group_flex(sb, dir, &group); 827 ret2 = find_group_flex(sb, dir, &group);
@@ -856,7 +851,7 @@ got_group:
856 if (ret2 == -1) 851 if (ret2 == -1)
857 goto out; 852 goto out;
858 853
859 for (i = 0; i < sbi->s_groups_count; i++) { 854 for (i = 0; i < ngroups; i++) {
860 err = -EIO; 855 err = -EIO;
861 856
862 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 857 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -917,7 +912,7 @@ repeat_in_this_group:
917 * group descriptor metadata has not yet been updated. 912 * group descriptor metadata has not yet been updated.
918 * So we just go onto the next blockgroup. 913 * So we just go onto the next blockgroup.
919 */ 914 */
920 if (++group == sbi->s_groups_count) 915 if (++group == ngroups)
921 group = 0; 916 group = 0;
922 } 917 }
923 err = -ENOSPC; 918 err = -ENOSPC;
@@ -938,7 +933,7 @@ got:
938 } 933 }
939 934
940 free = 0; 935 free = 0;
941 spin_lock(sb_bgl_lock(sbi, group)); 936 ext4_lock_group(sb, group);
942 /* recheck and clear flag under lock if we still need to */ 937 /* recheck and clear flag under lock if we still need to */
943 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 938 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
944 free = ext4_free_blocks_after_init(sb, group, gdp); 939 free = ext4_free_blocks_after_init(sb, group, gdp);
@@ -947,7 +942,7 @@ got:
947 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, 942 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
948 gdp); 943 gdp);
949 } 944 }
950 spin_unlock(sb_bgl_lock(sbi, group)); 945 ext4_unlock_group(sb, group);
951 946
952 /* Don't need to dirty bitmap block if we didn't change it */ 947 /* Don't need to dirty bitmap block if we didn't change it */
953 if (free) { 948 if (free) {
@@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1158{ 1153{
1159 unsigned long desc_count; 1154 unsigned long desc_count;
1160 struct ext4_group_desc *gdp; 1155 struct ext4_group_desc *gdp;
1161 ext4_group_t i; 1156 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1162#ifdef EXT4FS_DEBUG 1157#ifdef EXT4FS_DEBUG
1163 struct ext4_super_block *es; 1158 struct ext4_super_block *es;
1164 unsigned long bitmap_count, x; 1159 unsigned long bitmap_count, x;
@@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1168 desc_count = 0; 1163 desc_count = 0;
1169 bitmap_count = 0; 1164 bitmap_count = 0;
1170 gdp = NULL; 1165 gdp = NULL;
1171 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1166 for (i = 0; i < ngroups; i++) {
1172 gdp = ext4_get_group_desc(sb, i, NULL); 1167 gdp = ext4_get_group_desc(sb, i, NULL);
1173 if (!gdp) 1168 if (!gdp)
1174 continue; 1169 continue;
@@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1190 return desc_count; 1185 return desc_count;
1191#else 1186#else
1192 desc_count = 0; 1187 desc_count = 0;
1193 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1188 for (i = 0; i < ngroups; i++) {
1194 gdp = ext4_get_group_desc(sb, i, NULL); 1189 gdp = ext4_get_group_desc(sb, i, NULL);
1195 if (!gdp) 1190 if (!gdp)
1196 continue; 1191 continue;
@@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1205unsigned long ext4_count_dirs(struct super_block * sb) 1200unsigned long ext4_count_dirs(struct super_block * sb)
1206{ 1201{
1207 unsigned long count = 0; 1202 unsigned long count = 0;
1208 ext4_group_t i; 1203 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1209 1204
1210 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1205 for (i = 0; i < ngroups; i++) {
1211 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1206 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1212 if (!gdp) 1207 if (!gdp)
1213 continue; 1208 continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a9ffd528dd1..875db944b22f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode,
372} 372}
373 373
374static int __ext4_check_blockref(const char *function, struct inode *inode, 374static int __ext4_check_blockref(const char *function, struct inode *inode,
375 __le32 *p, unsigned int max) { 375 __le32 *p, unsigned int max)
376 376{
377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
378 __le32 *bref = p; 377 __le32 *bref = p;
378 unsigned int blk;
379
379 while (bref < p+max) { 380 while (bref < p+max) {
380 if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { 381 blk = le32_to_cpu(*bref++);
382 if (blk &&
383 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
384 blk, 1))) {
381 ext4_error(inode->i_sb, function, 385 ext4_error(inode->i_sb, function,
382 "block reference %u >= max (%u) " 386 "invalid block reference %u "
383 "in inode #%lu, offset=%d", 387 "in inode #%lu", blk, inode->i_ino);
384 le32_to_cpu(*bref), maxblocks,
385 inode->i_ino, (int)(bref-p));
386 return -EIO; 388 return -EIO;
387 } 389 }
388 bref++;
389 } 390 }
390 return 0; 391 return 0;
391} 392}
@@ -892,6 +893,10 @@ err_out:
892} 893}
893 894
894/* 895/*
896 * The ext4_ind_get_blocks() function handles non-extents inodes
897 * (i.e., using the traditional indirect/double-indirect i_blocks
898 * scheme) for ext4_get_blocks().
899 *
895 * Allocation strategy is simple: if we have to allocate something, we will 900 * Allocation strategy is simple: if we have to allocate something, we will
896 * have to go the whole way to leaf. So let's do it before attaching anything 901 * have to go the whole way to leaf. So let's do it before attaching anything
897 * to tree, set linkage between the newborn blocks, write them if sync is 902 * to tree, set linkage between the newborn blocks, write them if sync is
@@ -909,15 +914,16 @@ err_out:
909 * return = 0, if plain lookup failed. 914 * return = 0, if plain lookup failed.
910 * return < 0, error case. 915 * return < 0, error case.
911 * 916 *
912 * 917 * The ext4_ind_get_blocks() function should be called with
913 * Need to be called with 918 * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
914 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block 919 * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
915 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) 920 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
921 * blocks.
916 */ 922 */
917static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, 923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
918 ext4_lblk_t iblock, unsigned int maxblocks, 924 ext4_lblk_t iblock, unsigned int maxblocks,
919 struct buffer_head *bh_result, 925 struct buffer_head *bh_result,
920 int create, int extend_disksize) 926 int flags)
921{ 927{
922 int err = -EIO; 928 int err = -EIO;
923 ext4_lblk_t offsets[4]; 929 ext4_lblk_t offsets[4];
@@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
927 int indirect_blks; 933 int indirect_blks;
928 int blocks_to_boundary = 0; 934 int blocks_to_boundary = 0;
929 int depth; 935 int depth;
930 struct ext4_inode_info *ei = EXT4_I(inode);
931 int count = 0; 936 int count = 0;
932 ext4_fsblk_t first_block = 0; 937 ext4_fsblk_t first_block = 0;
933 loff_t disksize;
934
935 938
936 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); 939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
937 J_ASSERT(handle != NULL || create == 0); 940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
938 depth = ext4_block_to_path(inode, iblock, offsets, 941 depth = ext4_block_to_path(inode, iblock, offsets,
939 &blocks_to_boundary); 942 &blocks_to_boundary);
940 943
@@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
963 } 966 }
964 967
965 /* Next simple case - plain lookup or failed read of indirect block */ 968 /* Next simple case - plain lookup or failed read of indirect block */
966 if (!create || err == -EIO) 969 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
967 goto cleanup; 970 goto cleanup;
968 971
969 /* 972 /*
@@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
997 if (!err) 1000 if (!err)
998 err = ext4_splice_branch(handle, inode, iblock, 1001 err = ext4_splice_branch(handle, inode, iblock,
999 partial, indirect_blks, count); 1002 partial, indirect_blks, count);
1000 /* 1003 else
1001 * i_disksize growing is protected by i_data_sem. Don't forget to
1002 * protect it if you're about to implement concurrent
1003 * ext4_get_block() -bzzz
1004 */
1005 if (!err && extend_disksize) {
1006 disksize = ((loff_t) iblock + count) << inode->i_blkbits;
1007 if (disksize > i_size_read(inode))
1008 disksize = i_size_read(inode);
1009 if (disksize > ei->i_disksize)
1010 ei->i_disksize = disksize;
1011 }
1012 if (err)
1013 goto cleanup; 1004 goto cleanup;
1014 1005
1015 set_buffer_new(bh_result); 1006 set_buffer_new(bh_result);
@@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1120 ext4_discard_preallocations(inode); 1111 ext4_discard_preallocations(inode);
1121} 1112}
1122 1113
1114static int check_block_validity(struct inode *inode, sector_t logical,
1115 sector_t phys, int len)
1116{
1117 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1118 ext4_error(inode->i_sb, "check_block_validity",
1119 "inode #%lu logical block %llu mapped to %llu "
1120 "(size %d)", inode->i_ino,
1121 (unsigned long long) logical,
1122 (unsigned long long) phys, len);
1123 WARN_ON(1);
1124 return -EIO;
1125 }
1126 return 0;
1127}
1128
1123/* 1129/*
1124 * The ext4_get_blocks_wrap() function try to look up the requested blocks, 1130 * The ext4_get_blocks() function tries to look up the requested blocks,
1125 * and returns if the blocks are already mapped. 1131 * and returns if the blocks are already mapped.
1126 * 1132 *
1127 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 1133 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
@@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1129 * mapped. 1135 * mapped.
1130 * 1136 *
1131 * If file type is extents based, it will call ext4_ext_get_blocks(), 1137 * If file type is extents based, it will call ext4_ext_get_blocks(),
1132 * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping 1138 * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
1133 * based files 1139 * based files
1134 * 1140 *
1135 * On success, it returns the number of blocks being mapped or allocate. 1141 * On success, it returns the number of blocks being mapped or allocate.
@@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1142 * 1148 *
1143 * It returns the error in case of allocation failure. 1149 * It returns the error in case of allocation failure.
1144 */ 1150 */
1145int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 1151int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1146 unsigned int max_blocks, struct buffer_head *bh, 1152 unsigned int max_blocks, struct buffer_head *bh,
1147 int create, int extend_disksize, int flag) 1153 int flags)
1148{ 1154{
1149 int retval; 1155 int retval;
1150 1156
@@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1152 clear_buffer_unwritten(bh); 1158 clear_buffer_unwritten(bh);
1153 1159
1154 /* 1160 /*
1155 * Try to see if we can get the block without requesting 1161 * Try to see if we can get the block without requesting a new
1156 * for new file system block. 1162 * file system block.
1157 */ 1163 */
1158 down_read((&EXT4_I(inode)->i_data_sem)); 1164 down_read((&EXT4_I(inode)->i_data_sem));
1159 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 1165 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
1160 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, 1166 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
1161 bh, 0, 0); 1167 bh, 0);
1162 } else { 1168 } else {
1163 retval = ext4_get_blocks_handle(handle, 1169 retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
1164 inode, block, max_blocks, bh, 0, 0); 1170 bh, 0);
1165 } 1171 }
1166 up_read((&EXT4_I(inode)->i_data_sem)); 1172 up_read((&EXT4_I(inode)->i_data_sem));
1167 1173
1174 if (retval > 0 && buffer_mapped(bh)) {
1175 int ret = check_block_validity(inode, block,
1176 bh->b_blocknr, retval);
1177 if (ret != 0)
1178 return ret;
1179 }
1180
1168 /* If it is only a block(s) look up */ 1181 /* If it is only a block(s) look up */
1169 if (!create) 1182 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
1170 return retval; 1183 return retval;
1171 1184
1172 /* 1185 /*
@@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1205 * let the underlying get_block() function know to 1218 * let the underlying get_block() function know to
1206 * avoid double accounting 1219 * avoid double accounting
1207 */ 1220 */
1208 if (flag) 1221 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1209 EXT4_I(inode)->i_delalloc_reserved_flag = 1; 1222 EXT4_I(inode)->i_delalloc_reserved_flag = 1;
1210 /* 1223 /*
1211 * We need to check for EXT4 here because migrate 1224 * We need to check for EXT4 here because migrate
@@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1213 */ 1226 */
1214 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 1227 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
1215 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, 1228 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
1216 bh, create, extend_disksize); 1229 bh, flags);
1217 } else { 1230 } else {
1218 retval = ext4_get_blocks_handle(handle, inode, block, 1231 retval = ext4_ind_get_blocks(handle, inode, block,
1219 max_blocks, bh, create, extend_disksize); 1232 max_blocks, bh, flags);
1220 1233
1221 if (retval > 0 && buffer_new(bh)) { 1234 if (retval > 0 && buffer_new(bh)) {
1222 /* 1235 /*
@@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1229 } 1242 }
1230 } 1243 }
1231 1244
1232 if (flag) { 1245 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1233 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1246 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1234 /* 1247
1235 * Update reserved blocks/metadata blocks 1248 /*
1236 * after successful block allocation 1249 * Update reserved blocks/metadata blocks after successful
1237 * which were deferred till now 1250 * block allocation which had been deferred till now.
1238 */ 1251 */
1239 if ((retval > 0) && buffer_delay(bh)) 1252 if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
1240 ext4_da_update_reserve_space(inode, retval); 1253 ext4_da_update_reserve_space(inode, retval);
1241 }
1242 1254
1243 up_write((&EXT4_I(inode)->i_data_sem)); 1255 up_write((&EXT4_I(inode)->i_data_sem));
1256 if (retval > 0 && buffer_mapped(bh)) {
1257 int ret = check_block_validity(inode, block,
1258 bh->b_blocknr, retval);
1259 if (ret != 0)
1260 return ret;
1261 }
1244 return retval; 1262 return retval;
1245} 1263}
1246 1264
@@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
1268 started = 1; 1286 started = 1;
1269 } 1287 }
1270 1288
1271 ret = ext4_get_blocks_wrap(handle, inode, iblock, 1289 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
1272 max_blocks, bh_result, create, 0, 0); 1290 create ? EXT4_GET_BLOCKS_CREATE : 0);
1273 if (ret > 0) { 1291 if (ret > 0) {
1274 bh_result->b_size = (ret << inode->i_blkbits); 1292 bh_result->b_size = (ret << inode->i_blkbits);
1275 ret = 0; 1293 ret = 0;
@@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1288{ 1306{
1289 struct buffer_head dummy; 1307 struct buffer_head dummy;
1290 int fatal = 0, err; 1308 int fatal = 0, err;
1309 int flags = 0;
1291 1310
1292 J_ASSERT(handle != NULL || create == 0); 1311 J_ASSERT(handle != NULL || create == 0);
1293 1312
1294 dummy.b_state = 0; 1313 dummy.b_state = 0;
1295 dummy.b_blocknr = -1000; 1314 dummy.b_blocknr = -1000;
1296 buffer_trace_init(&dummy.b_history); 1315 buffer_trace_init(&dummy.b_history);
1297 err = ext4_get_blocks_wrap(handle, inode, block, 1, 1316 if (create)
1298 &dummy, create, 1, 0); 1317 flags |= EXT4_GET_BLOCKS_CREATE;
1318 err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
1299 /* 1319 /*
1300 * ext4_get_blocks_handle() returns number of blocks 1320 * ext4_get_blocks() returns number of blocks mapped. 0 in
1301 * mapped. 0 in case of a HOLE. 1321 * case of a HOLE.
1302 */ 1322 */
1303 if (err > 0) { 1323 if (err > 0) {
1304 if (err > 1) 1324 if (err > 1)
@@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1439 struct page **pagep, void **fsdata) 1459 struct page **pagep, void **fsdata)
1440{ 1460{
1441 struct inode *inode = mapping->host; 1461 struct inode *inode = mapping->host;
1442 int ret, needed_blocks = ext4_writepage_trans_blocks(inode); 1462 int ret, needed_blocks;
1443 handle_t *handle; 1463 handle_t *handle;
1444 int retries = 0; 1464 int retries = 0;
1445 struct page *page; 1465 struct page *page;
@@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1450 "dev %s ino %lu pos %llu len %u flags %u", 1470 "dev %s ino %lu pos %llu len %u flags %u",
1451 inode->i_sb->s_id, inode->i_ino, 1471 inode->i_sb->s_id, inode->i_ino,
1452 (unsigned long long) pos, len, flags); 1472 (unsigned long long) pos, len, flags);
1473 /*
1474 * Reserve one block more for addition to orphan list in case
1475 * we allocate blocks but write fails for some reason
1476 */
1477 needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
1453 index = pos >> PAGE_CACHE_SHIFT; 1478 index = pos >> PAGE_CACHE_SHIFT;
1454 from = pos & (PAGE_CACHE_SIZE - 1); 1479 from = pos & (PAGE_CACHE_SIZE - 1);
1455 to = from + len; 1480 to = from + len;
@@ -1483,15 +1508,30 @@ retry:
1483 1508
1484 if (ret) { 1509 if (ret) {
1485 unlock_page(page); 1510 unlock_page(page);
1486 ext4_journal_stop(handle);
1487 page_cache_release(page); 1511 page_cache_release(page);
1488 /* 1512 /*
1489 * block_write_begin may have instantiated a few blocks 1513 * block_write_begin may have instantiated a few blocks
1490 * outside i_size. Trim these off again. Don't need 1514 * outside i_size. Trim these off again. Don't need
1491 * i_size_read because we hold i_mutex. 1515 * i_size_read because we hold i_mutex.
1516 *
1517 * Add inode to orphan list in case we crash before
1518 * truncate finishes
1492 */ 1519 */
1493 if (pos + len > inode->i_size) 1520 if (pos + len > inode->i_size)
1521 ext4_orphan_add(handle, inode);
1522
1523 ext4_journal_stop(handle);
1524 if (pos + len > inode->i_size) {
1494 vmtruncate(inode, inode->i_size); 1525 vmtruncate(inode, inode->i_size);
1526 /*
1527 * If vmtruncate failed early the inode might
1528 * still be on the orphan list; we need to
1529 * make sure the inode is removed from the
1530 * orphan list in that case.
1531 */
1532 if (inode->i_nlink)
1533 ext4_orphan_del(NULL, inode);
1534 }
1495 } 1535 }
1496 1536
1497 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 1537 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1509 return ext4_handle_dirty_metadata(handle, NULL, bh); 1549 return ext4_handle_dirty_metadata(handle, NULL, bh);
1510} 1550}
1511 1551
1552static int ext4_generic_write_end(struct file *file,
1553 struct address_space *mapping,
1554 loff_t pos, unsigned len, unsigned copied,
1555 struct page *page, void *fsdata)
1556{
1557 int i_size_changed = 0;
1558 struct inode *inode = mapping->host;
1559 handle_t *handle = ext4_journal_current_handle();
1560
1561 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1562
1563 /*
1564 * No need to use i_size_read() here, the i_size
1565 * cannot change under us because we hold i_mutex.
1566 *
1567 * But it's important to update i_size while still holding page lock:
1568 * page writeout could otherwise come in and zero beyond i_size.
1569 */
1570 if (pos + copied > inode->i_size) {
1571 i_size_write(inode, pos + copied);
1572 i_size_changed = 1;
1573 }
1574
1575 if (pos + copied > EXT4_I(inode)->i_disksize) {
1576 /* We need to mark inode dirty even if
1577 * new_i_size is less that inode->i_size
1578 * bu greater than i_disksize.(hint delalloc)
1579 */
1580 ext4_update_i_disksize(inode, (pos + copied));
1581 i_size_changed = 1;
1582 }
1583 unlock_page(page);
1584 page_cache_release(page);
1585
1586 /*
1587 * Don't mark the inode dirty under page lock. First, it unnecessarily
1588 * makes the holding time of page lock longer. Second, it forces lock
1589 * ordering of page lock and transaction start for journaling
1590 * filesystems.
1591 */
1592 if (i_size_changed)
1593 ext4_mark_inode_dirty(handle, inode);
1594
1595 return copied;
1596}
1597
1512/* 1598/*
1513 * We need to pick up the new inode size which generic_commit_write gave us 1599 * We need to pick up the new inode size which generic_commit_write gave us
1514 * `file' can be NULL - eg, when called from page_symlink(). 1600 * `file' can be NULL - eg, when called from page_symlink().
@@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file,
1532 ret = ext4_jbd2_file_inode(handle, inode); 1618 ret = ext4_jbd2_file_inode(handle, inode);
1533 1619
1534 if (ret == 0) { 1620 if (ret == 0) {
1535 loff_t new_i_size; 1621 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1536
1537 new_i_size = pos + copied;
1538 if (new_i_size > EXT4_I(inode)->i_disksize) {
1539 ext4_update_i_disksize(inode, new_i_size);
1540 /* We need to mark inode dirty even if
1541 * new_i_size is less that inode->i_size
1542 * bu greater than i_disksize.(hint delalloc)
1543 */
1544 ext4_mark_inode_dirty(handle, inode);
1545 }
1546
1547 ret2 = generic_write_end(file, mapping, pos, len, copied,
1548 page, fsdata); 1622 page, fsdata);
1549 copied = ret2; 1623 copied = ret2;
1624 if (pos + len > inode->i_size)
1625 /* if we have allocated more blocks and copied
1626 * less. We will have blocks allocated outside
1627 * inode->i_size. So truncate them
1628 */
1629 ext4_orphan_add(handle, inode);
1550 if (ret2 < 0) 1630 if (ret2 < 0)
1551 ret = ret2; 1631 ret = ret2;
1552 } 1632 }
@@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file,
1554 if (!ret) 1634 if (!ret)
1555 ret = ret2; 1635 ret = ret2;
1556 1636
1637 if (pos + len > inode->i_size) {
1638 vmtruncate(inode, inode->i_size);
1639 /*
1640 * If vmtruncate failed early the inode might still be
1641 * on the orphan list; we need to make sure the inode
1642 * is removed from the orphan list in that case.
1643 */
1644 if (inode->i_nlink)
1645 ext4_orphan_del(NULL, inode);
1646 }
1647
1648
1557 return ret ? ret : copied; 1649 return ret ? ret : copied;
1558} 1650}
1559 1651
@@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file,
1565 handle_t *handle = ext4_journal_current_handle(); 1657 handle_t *handle = ext4_journal_current_handle();
1566 struct inode *inode = mapping->host; 1658 struct inode *inode = mapping->host;
1567 int ret = 0, ret2; 1659 int ret = 0, ret2;
1568 loff_t new_i_size;
1569 1660
1570 trace_mark(ext4_writeback_write_end, 1661 trace_mark(ext4_writeback_write_end,
1571 "dev %s ino %lu pos %llu len %u copied %u", 1662 "dev %s ino %lu pos %llu len %u copied %u",
1572 inode->i_sb->s_id, inode->i_ino, 1663 inode->i_sb->s_id, inode->i_ino,
1573 (unsigned long long) pos, len, copied); 1664 (unsigned long long) pos, len, copied);
1574 new_i_size = pos + copied; 1665 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1575 if (new_i_size > EXT4_I(inode)->i_disksize) {
1576 ext4_update_i_disksize(inode, new_i_size);
1577 /* We need to mark inode dirty even if
1578 * new_i_size is less that inode->i_size
1579 * bu greater than i_disksize.(hint delalloc)
1580 */
1581 ext4_mark_inode_dirty(handle, inode);
1582 }
1583
1584 ret2 = generic_write_end(file, mapping, pos, len, copied,
1585 page, fsdata); 1666 page, fsdata);
1586 copied = ret2; 1667 copied = ret2;
1668 if (pos + len > inode->i_size)
1669 /* if we have allocated more blocks and copied
1670 * less. We will have blocks allocated outside
1671 * inode->i_size. So truncate them
1672 */
1673 ext4_orphan_add(handle, inode);
1674
1587 if (ret2 < 0) 1675 if (ret2 < 0)
1588 ret = ret2; 1676 ret = ret2;
1589 1677
@@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file,
1591 if (!ret) 1679 if (!ret)
1592 ret = ret2; 1680 ret = ret2;
1593 1681
1682 if (pos + len > inode->i_size) {
1683 vmtruncate(inode, inode->i_size);
1684 /*
1685 * If vmtruncate failed early the inode might still be
1686 * on the orphan list; we need to make sure the inode
1687 * is removed from the orphan list in that case.
1688 */
1689 if (inode->i_nlink)
1690 ext4_orphan_del(NULL, inode);
1691 }
1692
1594 return ret ? ret : copied; 1693 return ret ? ret : copied;
1595} 1694}
1596 1695
@@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file,
1635 } 1734 }
1636 1735
1637 unlock_page(page); 1736 unlock_page(page);
1737 page_cache_release(page);
1738 if (pos + len > inode->i_size)
1739 /* if we have allocated more blocks and copied
1740 * less. We will have blocks allocated outside
1741 * inode->i_size. So truncate them
1742 */
1743 ext4_orphan_add(handle, inode);
1744
1638 ret2 = ext4_journal_stop(handle); 1745 ret2 = ext4_journal_stop(handle);
1639 if (!ret) 1746 if (!ret)
1640 ret = ret2; 1747 ret = ret2;
1641 page_cache_release(page); 1748 if (pos + len > inode->i_size) {
1749 vmtruncate(inode, inode->i_size);
1750 /*
1751 * If vmtruncate failed early the inode might still be
1752 * on the orphan list; we need to make sure the inode
1753 * is removed from the orphan list in that case.
1754 */
1755 if (inode->i_nlink)
1756 ext4_orphan_del(NULL, inode);
1757 }
1642 1758
1643 return ret ? ret : copied; 1759 return ret ? ret : copied;
1644} 1760}
@@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1852 * @logical - first logical block to start assignment with 1968 * @logical - first logical block to start assignment with
1853 * 1969 *
1854 * the function goes through all passed space and put actual disk 1970 * the function goes through all passed space and put actual disk
1855 * block numbers into buffer heads, dropping BH_Delay 1971 * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
1856 */ 1972 */
1857static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, 1973static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1858 struct buffer_head *exbh) 1974 struct buffer_head *exbh)
@@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1902 do { 2018 do {
1903 if (cur_logical >= logical + blocks) 2019 if (cur_logical >= logical + blocks)
1904 break; 2020 break;
1905 if (buffer_delay(bh)) { 2021
1906 bh->b_blocknr = pblock; 2022 if (buffer_delay(bh) ||
1907 clear_buffer_delay(bh); 2023 buffer_unwritten(bh)) {
1908 bh->b_bdev = inode->i_sb->s_bdev; 2024
1909 } else if (buffer_unwritten(bh)) { 2025 BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
1910 bh->b_blocknr = pblock; 2026
1911 clear_buffer_unwritten(bh); 2027 if (buffer_delay(bh)) {
1912 set_buffer_mapped(bh); 2028 clear_buffer_delay(bh);
1913 set_buffer_new(bh); 2029 bh->b_blocknr = pblock;
1914 bh->b_bdev = inode->i_sb->s_bdev; 2030 } else {
2031 /*
2032 * unwritten already should have
2033 * blocknr assigned. Verify that
2034 */
2035 clear_buffer_unwritten(bh);
2036 BUG_ON(bh->b_blocknr != pblock);
2037 }
2038
1915 } else if (buffer_mapped(bh)) 2039 } else if (buffer_mapped(bh))
1916 BUG_ON(bh->b_blocknr != pblock); 2040 BUG_ON(bh->b_blocknr != pblock);
1917 2041
@@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode)
1990 return; 2114 return;
1991} 2115}
1992 2116
1993#define EXT4_DELALLOC_RSVED 1
1994static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1995 struct buffer_head *bh_result, int create)
1996{
1997 int ret;
1998 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1999 loff_t disksize = EXT4_I(inode)->i_disksize;
2000 handle_t *handle = NULL;
2001
2002 handle = ext4_journal_current_handle();
2003 BUG_ON(!handle);
2004 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2005 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2006 if (ret <= 0)
2007 return ret;
2008
2009 bh_result->b_size = (ret << inode->i_blkbits);
2010
2011 if (ext4_should_order_data(inode)) {
2012 int retval;
2013 retval = ext4_jbd2_file_inode(handle, inode);
2014 if (retval)
2015 /*
2016 * Failed to add inode for ordered mode. Don't
2017 * update file size
2018 */
2019 return retval;
2020 }
2021
2022 /*
2023 * Update on-disk size along with block allocation we don't
2024 * use 'extend_disksize' as size may change within already
2025 * allocated block -bzzz
2026 */
2027 disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
2028 if (disksize > i_size_read(inode))
2029 disksize = i_size_read(inode);
2030 if (disksize > EXT4_I(inode)->i_disksize) {
2031 ext4_update_i_disksize(inode, disksize);
2032 ret = ext4_mark_inode_dirty(handle, inode);
2033 return ret;
2034 }
2035 return 0;
2036}
2037
2038/* 2117/*
2039 * mpage_da_map_blocks - go through given space 2118 * mpage_da_map_blocks - go through given space
2040 * 2119 *
@@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2045 */ 2124 */
2046static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2125static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2047{ 2126{
2048 int err = 0; 2127 int err, blks, get_blocks_flags;
2049 struct buffer_head new; 2128 struct buffer_head new;
2050 sector_t next; 2129 sector_t next = mpd->b_blocknr;
2130 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
2131 loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
2132 handle_t *handle = NULL;
2051 2133
2052 /* 2134 /*
2053 * We consider only non-mapped and non-allocated blocks 2135 * We consider only non-mapped and non-allocated blocks
2054 */ 2136 */
2055 if ((mpd->b_state & (1 << BH_Mapped)) && 2137 if ((mpd->b_state & (1 << BH_Mapped)) &&
2056 !(mpd->b_state & (1 << BH_Delay))) 2138 !(mpd->b_state & (1 << BH_Delay)) &&
2139 !(mpd->b_state & (1 << BH_Unwritten)))
2057 return 0; 2140 return 0;
2058 new.b_state = mpd->b_state; 2141
2059 new.b_blocknr = 0;
2060 new.b_size = mpd->b_size;
2061 next = mpd->b_blocknr;
2062 /* 2142 /*
2063 * If we didn't accumulate anything 2143 * If we didn't accumulate anything to write simply return
2064 * to write simply return
2065 */ 2144 */
2066 if (!new.b_size) 2145 if (!mpd->b_size)
2067 return 0; 2146 return 0;
2068 2147
2069 err = ext4_da_get_block_write(mpd->inode, next, &new, 1); 2148 handle = ext4_journal_current_handle();
2070 if (err) { 2149 BUG_ON(!handle);
2150
2151 /*
2152 * Call ext4_get_blocks() to allocate any delayed allocation
2153 * blocks, or to convert an uninitialized extent to be
2154 * initialized (in the case where we have written into
2155 * one or more preallocated blocks).
2156 *
2157 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
2158 * indicate that we are on the delayed allocation path. This
2159 * affects functions in many different parts of the allocation
2160 * call path. This flag exists primarily because we don't
2161 * want to change *many* call functions, so ext4_get_blocks()
2162 * will set the magic i_delalloc_reserved_flag once the
2163 * inode's allocation semaphore is taken.
2164 *
2165 * If the blocks in questions were delalloc blocks, set
2166 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
2167 * variables are updated after the blocks have been allocated.
2168 */
2169 new.b_state = 0;
2170 get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
2171 EXT4_GET_BLOCKS_DELALLOC_RESERVE);
2172 if (mpd->b_state & (1 << BH_Delay))
2173 get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
2174 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
2175 &new, get_blocks_flags);
2176 if (blks < 0) {
2177 err = blks;
2071 /* 2178 /*
2072 * If get block returns with error we simply 2179 * If get block returns with error we simply
2073 * return. Later writepage will redirty the page and 2180 * return. Later writepage will redirty the page and
@@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2100 if (err == -ENOSPC) { 2207 if (err == -ENOSPC) {
2101 ext4_print_free_blocks(mpd->inode); 2208 ext4_print_free_blocks(mpd->inode);
2102 } 2209 }
2103 /* invlaidate all the pages */ 2210 /* invalidate all the pages */
2104 ext4_da_block_invalidatepages(mpd, next, 2211 ext4_da_block_invalidatepages(mpd, next,
2105 mpd->b_size >> mpd->inode->i_blkbits); 2212 mpd->b_size >> mpd->inode->i_blkbits);
2106 return err; 2213 return err;
2107 } 2214 }
2108 BUG_ON(new.b_size == 0); 2215 BUG_ON(blks == 0);
2216
2217 new.b_size = (blks << mpd->inode->i_blkbits);
2109 2218
2110 if (buffer_new(&new)) 2219 if (buffer_new(&new))
2111 __unmap_underlying_blocks(mpd->inode, &new); 2220 __unmap_underlying_blocks(mpd->inode, &new);
@@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2118 (mpd->b_state & (1 << BH_Unwritten))) 2227 (mpd->b_state & (1 << BH_Unwritten)))
2119 mpage_put_bnr_to_bhs(mpd, next, &new); 2228 mpage_put_bnr_to_bhs(mpd, next, &new);
2120 2229
2230 if (ext4_should_order_data(mpd->inode)) {
2231 err = ext4_jbd2_file_inode(handle, mpd->inode);
2232 if (err)
2233 return err;
2234 }
2235
2236 /*
2237 * Update on-disk size along with block allocation.
2238 */
2239 disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
2240 if (disksize > i_size_read(mpd->inode))
2241 disksize = i_size_read(mpd->inode);
2242 if (disksize > EXT4_I(mpd->inode)->i_disksize) {
2243 ext4_update_i_disksize(mpd->inode, disksize);
2244 return ext4_mark_inode_dirty(handle, mpd->inode);
2245 }
2246
2121 return 0; 2247 return 0;
2122} 2248}
2123 2249
@@ -2192,6 +2318,17 @@ flush_it:
2192 return; 2318 return;
2193} 2319}
2194 2320
2321static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2322{
2323 /*
2324 * unmapped buffer is possible for holes.
2325 * delay buffer is possible with delayed allocation.
2326 * We also need to consider unwritten buffer as unmapped.
2327 */
2328 return (!buffer_mapped(bh) || buffer_delay(bh) ||
2329 buffer_unwritten(bh)) && buffer_dirty(bh);
2330}
2331
2195/* 2332/*
2196 * __mpage_da_writepage - finds extent of pages and blocks 2333 * __mpage_da_writepage - finds extent of pages and blocks
2197 * 2334 *
@@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page,
2276 * Otherwise we won't make progress 2413 * Otherwise we won't make progress
2277 * with the page in ext4_da_writepage 2414 * with the page in ext4_da_writepage
2278 */ 2415 */
2279 if (buffer_dirty(bh) && 2416 if (ext4_bh_unmapped_or_delay(NULL, bh)) {
2280 (!buffer_mapped(bh) || buffer_delay(bh))) {
2281 mpage_add_bh_to_extent(mpd, logical, 2417 mpage_add_bh_to_extent(mpd, logical,
2282 bh->b_size, 2418 bh->b_size,
2283 bh->b_state); 2419 bh->b_state);
@@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page,
2303} 2439}
2304 2440
2305/* 2441/*
2306 * this is a special callback for ->write_begin() only 2442 * This is a special get_blocks_t callback which is used by
2307 * it's intention is to return mapped block or reserve space 2443 * ext4_da_write_begin(). It will either return mapped block or
2444 * reserve space for a single block.
2445 *
2446 * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
2447 * We also have b_blocknr = -1 and b_bdev initialized properly
2448 *
2449 * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
2450 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
2451 * initialized properly.
2308 */ 2452 */
2309static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 2453static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2310 struct buffer_head *bh_result, int create) 2454 struct buffer_head *bh_result, int create)
@@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2323 * preallocated blocks are unmapped but should treated 2467 * preallocated blocks are unmapped but should treated
2324 * the same as allocated blocks. 2468 * the same as allocated blocks.
2325 */ 2469 */
2326 ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); 2470 ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0);
2327 if ((ret == 0) && !buffer_delay(bh_result)) { 2471 if ((ret == 0) && !buffer_delay(bh_result)) {
2328 /* the block isn't (pre)allocated yet, let's reserve space */ 2472 /* the block isn't (pre)allocated yet, let's reserve space */
2329 /* 2473 /*
@@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2340 set_buffer_delay(bh_result); 2484 set_buffer_delay(bh_result);
2341 } else if (ret > 0) { 2485 } else if (ret > 0) {
2342 bh_result->b_size = (ret << inode->i_blkbits); 2486 bh_result->b_size = (ret << inode->i_blkbits);
2343 /* 2487 if (buffer_unwritten(bh_result)) {
2344 * With sub-block writes into unwritten extents 2488 /* A delayed write to unwritten bh should
2345 * we also need to mark the buffer as new so that 2489 * be marked new and mapped. Mapped ensures
2346 * the unwritten parts of the buffer gets correctly zeroed. 2490 * that we don't do get_block multiple times
2347 */ 2491 * when we write to the same offset and new
2348 if (buffer_unwritten(bh_result)) 2492 * ensures that we do proper zero out for
2493 * partial write.
2494 */
2349 set_buffer_new(bh_result); 2495 set_buffer_new(bh_result);
2496 set_buffer_mapped(bh_result);
2497 }
2350 ret = 0; 2498 ret = 0;
2351 } 2499 }
2352 2500
2353 return ret; 2501 return ret;
2354} 2502}
2355 2503
2356static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) 2504/*
2357{ 2505 * This function is used as a standard get_block_t calback function
2358 /* 2506 * when there is no desire to allocate any blocks. It is used as a
2359 * unmapped buffer is possible for holes. 2507 * callback function for block_prepare_write(), nobh_writepage(), and
2360 * delay buffer is possible with delayed allocation 2508 * block_write_full_page(). These functions should only try to map a
2361 */ 2509 * single block at a time.
2362 return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); 2510 *
2363} 2511 * Since this function doesn't do block allocations even if the caller
2364 2512 * requests it by passing in create=1, it is critically important that
2365static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, 2513 * any caller checks to make sure that any buffer heads are returned
2514 * by this function are either all already mapped or marked for
2515 * delayed allocation before calling nobh_writepage() or
2516 * block_write_full_page(). Otherwise, b_blocknr could be left
2517 * unitialized, and the page write functions will be taken by
2518 * surprise.
2519 */
2520static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2366 struct buffer_head *bh_result, int create) 2521 struct buffer_head *bh_result, int create)
2367{ 2522{
2368 int ret = 0; 2523 int ret = 0;
2369 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 2524 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
2370 2525
2526 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
2527
2371 /* 2528 /*
2372 * we don't want to do block allocation in writepage 2529 * we don't want to do block allocation in writepage
2373 * so call get_block_wrap with create = 0 2530 * so call get_block_wrap with create = 0
2374 */ 2531 */
2375 ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, 2532 ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
2376 bh_result, 0, 0, 0); 2533 BUG_ON(create && ret == 0);
2377 if (ret > 0) { 2534 if (ret > 0) {
2378 bh_result->b_size = (ret << inode->i_blkbits); 2535 bh_result->b_size = (ret << inode->i_blkbits);
2379 ret = 0; 2536 ret = 0;
@@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
2382} 2539}
2383 2540
2384/* 2541/*
2385 * get called vi ext4_da_writepages after taking page lock (have journal handle) 2542 * This function can get called via...
2386 * get called via journal_submit_inode_data_buffers (no journal handle) 2543 * - ext4_da_writepages after taking page lock (have journal handle)
2387 * get called via shrink_page_list via pdflush (no journal handle) 2544 * - journal_submit_inode_data_buffers (no journal handle)
2388 * or grab_page_cache when doing write_begin (have journal handle) 2545 * - shrink_page_list via pdflush (no journal handle)
2546 * - grab_page_cache when doing write_begin (have journal handle)
2389 */ 2547 */
2390static int ext4_da_writepage(struct page *page, 2548static int ext4_da_writepage(struct page *page,
2391 struct writeback_control *wbc) 2549 struct writeback_control *wbc)
@@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page,
2436 * do block allocation here. 2594 * do block allocation here.
2437 */ 2595 */
2438 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 2596 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
2439 ext4_normal_get_block_write); 2597 noalloc_get_block_write);
2440 if (!ret) { 2598 if (!ret) {
2441 page_bufs = page_buffers(page); 2599 page_bufs = page_buffers(page);
2442 /* check whether all are mapped and non delay */ 2600 /* check whether all are mapped and non delay */
@@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page,
2461 } 2619 }
2462 2620
2463 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2621 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2464 ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); 2622 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2465 else 2623 else
2466 ret = block_write_full_page(page, 2624 ret = block_write_full_page(page, noalloc_get_block_write,
2467 ext4_normal_get_block_write, 2625 wbc);
2468 wbc);
2469 2626
2470 return ret; 2627 return ret;
2471} 2628}
@@ -2777,7 +2934,7 @@ retry:
2777 *pagep = page; 2934 *pagep = page;
2778 2935
2779 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2936 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
2780 ext4_da_get_block_prep); 2937 ext4_da_get_block_prep);
2781 if (ret < 0) { 2938 if (ret < 0) {
2782 unlock_page(page); 2939 unlock_page(page);
2783 ext4_journal_stop(handle); 2940 ext4_journal_stop(handle);
@@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2815 for (i = 0; i < idx; i++) 2972 for (i = 0; i < idx; i++)
2816 bh = bh->b_this_page; 2973 bh = bh->b_this_page;
2817 2974
2818 if (!buffer_mapped(bh) || (buffer_delay(bh))) 2975 if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
2819 return 0; 2976 return 0;
2820 return 1; 2977 return 1;
2821} 2978}
@@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page,
3085 struct inode *inode = page->mapping->host; 3242 struct inode *inode = page->mapping->host;
3086 3243
3087 if (test_opt(inode->i_sb, NOBH)) 3244 if (test_opt(inode->i_sb, NOBH))
3088 return nobh_writepage(page, 3245 return nobh_writepage(page, noalloc_get_block_write, wbc);
3089 ext4_normal_get_block_write, wbc);
3090 else 3246 else
3091 return block_write_full_page(page, 3247 return block_write_full_page(page, noalloc_get_block_write,
3092 ext4_normal_get_block_write, 3248 wbc);
3093 wbc);
3094} 3249}
3095 3250
3096static int ext4_normal_writepage(struct page *page, 3251static int ext4_normal_writepage(struct page *page,
@@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page,
3142 int err; 3297 int err;
3143 3298
3144 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 3299 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
3145 ext4_normal_get_block_write); 3300 noalloc_get_block_write);
3146 if (ret != 0) 3301 if (ret != 0)
3147 goto out_unlock; 3302 goto out_unlock;
3148 3303
@@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page,
3227 * really know unless we go poke around in the buffer_heads. 3382 * really know unless we go poke around in the buffer_heads.
3228 * But block_write_full_page will do the right thing. 3383 * But block_write_full_page will do the right thing.
3229 */ 3384 */
3230 return block_write_full_page(page, 3385 return block_write_full_page(page, noalloc_get_block_write,
3231 ext4_normal_get_block_write, 3386 wbc);
3232 wbc);
3233 } 3387 }
3234no_write: 3388no_write:
3235 redirty_page_for_writepage(wbc, page); 3389 redirty_page_for_writepage(wbc, page);
@@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode)
3973 if (!ext4_can_truncate(inode)) 4127 if (!ext4_can_truncate(inode))
3974 return; 4128 return;
3975 4129
3976 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 4130 if (ei->i_disksize && inode->i_size == 0 &&
4131 !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3977 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 4132 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
3978 4133
3979 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 4134 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait)
4715 return ext4_force_commit(inode->i_sb); 4870 return ext4_force_commit(inode->i_sb);
4716} 4871}
4717 4872
4718int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
4719{
4720 int err = 0;
4721
4722 mark_buffer_dirty(bh);
4723 if (inode && inode_needs_sync(inode)) {
4724 sync_dirty_buffer(bh);
4725 if (buffer_req(bh) && !buffer_uptodate(bh)) {
4726 ext4_error(inode->i_sb, __func__,
4727 "IO error syncing inode, "
4728 "inode=%lu, block=%llu",
4729 inode->i_ino,
4730 (unsigned long long)bh->b_blocknr);
4731 err = -EIO;
4732 }
4733 }
4734 return err;
4735}
4736
4737/* 4873/*
4738 * ext4_setattr() 4874 * ext4_setattr()
4739 * 4875 *
@@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4930 */ 5066 */
4931int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) 5067int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4932{ 5068{
4933 int groups, gdpblocks; 5069 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
5070 int gdpblocks;
4934 int idxblocks; 5071 int idxblocks;
4935 int ret = 0; 5072 int ret = 0;
4936 5073
@@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4957 groups += nrblocks; 5094 groups += nrblocks;
4958 5095
4959 gdpblocks = groups; 5096 gdpblocks = groups;
4960 if (groups > EXT4_SB(inode->i_sb)->s_groups_count) 5097 if (groups > ngroups)
4961 groups = EXT4_SB(inode->i_sb)->s_groups_count; 5098 groups = ngroups;
4962 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) 5099 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4963 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; 5100 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4964 5101
@@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
4998 * Calculate the journal credits for a chunk of data modification. 5135 * Calculate the journal credits for a chunk of data modification.
4999 * 5136 *
5000 * This is called from DIO, fallocate or whoever calling 5137 * This is called from DIO, fallocate or whoever calling
5001 * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. 5138 * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
5002 * 5139 *
5003 * journal buffers for data blocks are not included here, as DIO 5140 * journal buffers for data blocks are not included here, as DIO
5004 * and fallocate do no need to journal data buffers. 5141 * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677a7984..ed8482e22c0e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr)
372 ext4_set_bit(bit, addr); 372 ext4_set_bit(bit, addr);
373} 373}
374 374
375static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
376{
377 addr = mb_correct_addr_and_bit(&bit, addr);
378 ext4_set_bit_atomic(lock, bit, addr);
379}
380
381static inline void mb_clear_bit(int bit, void *addr) 375static inline void mb_clear_bit(int bit, void *addr)
382{ 376{
383 addr = mb_correct_addr_and_bit(&bit, addr); 377 addr = mb_correct_addr_and_bit(&bit, addr);
384 ext4_clear_bit(bit, addr); 378 ext4_clear_bit(bit, addr);
385} 379}
386 380
387static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
388{
389 addr = mb_correct_addr_and_bit(&bit, addr);
390 ext4_clear_bit_atomic(lock, bit, addr);
391}
392
393static inline int mb_find_next_zero_bit(void *addr, int max, int start) 381static inline int mb_find_next_zero_bit(void *addr, int max, int start)
394{ 382{
395 int fix = 0, ret, tmpmax; 383 int fix = 0, ret, tmpmax;
@@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
448 436
449 if (unlikely(e4b->bd_info->bb_bitmap == NULL)) 437 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
450 return; 438 return;
451 BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); 439 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
452 for (i = 0; i < count; i++) { 440 for (i = 0; i < count; i++) {
453 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { 441 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
454 ext4_fsblk_t blocknr; 442 ext4_fsblk_t blocknr;
@@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
472 460
473 if (unlikely(e4b->bd_info->bb_bitmap == NULL)) 461 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
474 return; 462 return;
475 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 463 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
476 for (i = 0; i < count; i++) { 464 for (i = 0; i < count; i++) {
477 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); 465 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
478 mb_set_bit(first + i, e4b->bd_info->bb_bitmap); 466 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
@@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
739 727
740static int ext4_mb_init_cache(struct page *page, char *incore) 728static int ext4_mb_init_cache(struct page *page, char *incore)
741{ 729{
730 ext4_group_t ngroups;
742 int blocksize; 731 int blocksize;
743 int blocks_per_page; 732 int blocks_per_page;
744 int groups_per_page; 733 int groups_per_page;
@@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
757 746
758 inode = page->mapping->host; 747 inode = page->mapping->host;
759 sb = inode->i_sb; 748 sb = inode->i_sb;
749 ngroups = ext4_get_groups_count(sb);
760 blocksize = 1 << inode->i_blkbits; 750 blocksize = 1 << inode->i_blkbits;
761 blocks_per_page = PAGE_CACHE_SIZE / blocksize; 751 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
762 752
@@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
780 for (i = 0; i < groups_per_page; i++) { 770 for (i = 0; i < groups_per_page; i++) {
781 struct ext4_group_desc *desc; 771 struct ext4_group_desc *desc;
782 772
783 if (first_group + i >= EXT4_SB(sb)->s_groups_count) 773 if (first_group + i >= ngroups)
784 break; 774 break;
785 775
786 err = -EIO; 776 err = -EIO;
@@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
801 unlock_buffer(bh[i]); 791 unlock_buffer(bh[i]);
802 continue; 792 continue;
803 } 793 }
804 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 794 ext4_lock_group(sb, first_group + i);
805 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 795 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
806 ext4_init_block_bitmap(sb, bh[i], 796 ext4_init_block_bitmap(sb, bh[i],
807 first_group + i, desc); 797 first_group + i, desc);
808 set_bitmap_uptodate(bh[i]); 798 set_bitmap_uptodate(bh[i]);
809 set_buffer_uptodate(bh[i]); 799 set_buffer_uptodate(bh[i]);
810 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 800 ext4_unlock_group(sb, first_group + i);
811 unlock_buffer(bh[i]); 801 unlock_buffer(bh[i]);
812 continue; 802 continue;
813 } 803 }
814 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 804 ext4_unlock_group(sb, first_group + i);
815 if (buffer_uptodate(bh[i])) { 805 if (buffer_uptodate(bh[i])) {
816 /* 806 /*
817 * if not uninit if bh is uptodate, 807 * if not uninit if bh is uptodate,
@@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
852 struct ext4_group_info *grinfo; 842 struct ext4_group_info *grinfo;
853 843
854 group = (first_block + i) >> 1; 844 group = (first_block + i) >> 1;
855 if (group >= EXT4_SB(sb)->s_groups_count) 845 if (group >= ngroups)
856 break; 846 break;
857 847
858 /* 848 /*
@@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1078 return 0; 1068 return 0;
1079} 1069}
1080 1070
1081static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) 1071static void mb_clear_bits(void *bm, int cur, int len)
1082{ 1072{
1083 __u32 *addr; 1073 __u32 *addr;
1084 1074
@@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
1091 cur += 32; 1081 cur += 32;
1092 continue; 1082 continue;
1093 } 1083 }
1094 if (lock) 1084 mb_clear_bit(cur, bm);
1095 mb_clear_bit_atomic(lock, cur, bm);
1096 else
1097 mb_clear_bit(cur, bm);
1098 cur++; 1085 cur++;
1099 } 1086 }
1100} 1087}
1101 1088
1102static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) 1089static void mb_set_bits(void *bm, int cur, int len)
1103{ 1090{
1104 __u32 *addr; 1091 __u32 *addr;
1105 1092
@@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
1112 cur += 32; 1099 cur += 32;
1113 continue; 1100 continue;
1114 } 1101 }
1115 if (lock) 1102 mb_set_bit(cur, bm);
1116 mb_set_bit_atomic(lock, cur, bm);
1117 else
1118 mb_set_bit(cur, bm);
1119 cur++; 1103 cur++;
1120 } 1104 }
1121} 1105}
@@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1131 struct super_block *sb = e4b->bd_sb; 1115 struct super_block *sb = e4b->bd_sb;
1132 1116
1133 BUG_ON(first + count > (sb->s_blocksize << 3)); 1117 BUG_ON(first + count > (sb->s_blocksize << 3));
1134 BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); 1118 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1135 mb_check_buddy(e4b); 1119 mb_check_buddy(e4b);
1136 mb_free_blocks_double(inode, e4b, first, count); 1120 mb_free_blocks_double(inode, e4b, first, count);
1137 1121
@@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1212 int ord; 1196 int ord;
1213 void *buddy; 1197 void *buddy;
1214 1198
1215 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 1199 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1216 BUG_ON(ex == NULL); 1200 BUG_ON(ex == NULL);
1217 1201
1218 buddy = mb_find_buddy(e4b, order, &max); 1202 buddy = mb_find_buddy(e4b, order, &max);
@@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1276 1260
1277 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); 1261 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1278 BUG_ON(e4b->bd_group != ex->fe_group); 1262 BUG_ON(e4b->bd_group != ex->fe_group);
1279 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 1263 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1280 mb_check_buddy(e4b); 1264 mb_check_buddy(e4b);
1281 mb_mark_used_double(e4b, start, len); 1265 mb_mark_used_double(e4b, start, len);
1282 1266
@@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1330 e4b->bd_info->bb_counters[ord]++; 1314 e4b->bd_info->bb_counters[ord]++;
1331 } 1315 }
1332 1316
1333 mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), 1317 mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1334 EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1335 mb_check_buddy(e4b); 1318 mb_check_buddy(e4b);
1336 1319
1337 return ret; 1320 return ret;
@@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1726 unsigned free, fragments; 1709 unsigned free, fragments;
1727 unsigned i, bits; 1710 unsigned i, bits;
1728 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); 1711 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1729 struct ext4_group_desc *desc;
1730 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 1712 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1731 1713
1732 BUG_ON(cr < 0 || cr >= 4); 1714 BUG_ON(cr < 0 || cr >= 4);
@@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1742 switch (cr) { 1724 switch (cr) {
1743 case 0: 1725 case 0:
1744 BUG_ON(ac->ac_2order == 0); 1726 BUG_ON(ac->ac_2order == 0);
1745 /* If this group is uninitialized, skip it initially */
1746 desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
1747 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1748 return 0;
1749 1727
1750 /* Avoid using the first bg of a flexgroup for data files */ 1728 /* Avoid using the first bg of a flexgroup for data files */
1751 if ((ac->ac_flags & EXT4_MB_HINT_DATA) && 1729 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
@@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1788 int block, pnum; 1766 int block, pnum;
1789 int blocks_per_page; 1767 int blocks_per_page;
1790 int groups_per_page; 1768 int groups_per_page;
1769 ext4_group_t ngroups = ext4_get_groups_count(sb);
1791 ext4_group_t first_group; 1770 ext4_group_t first_group;
1792 struct ext4_group_info *grp; 1771 struct ext4_group_info *grp;
1793 1772
@@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1807 /* read all groups the page covers into the cache */ 1786 /* read all groups the page covers into the cache */
1808 for (i = 0; i < groups_per_page; i++) { 1787 for (i = 0; i < groups_per_page; i++) {
1809 1788
1810 if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) 1789 if ((first_group + i) >= ngroups)
1811 break; 1790 break;
1812 grp = ext4_get_group_info(sb, first_group + i); 1791 grp = ext4_get_group_info(sb, first_group + i);
1813 /* take all groups write allocation 1792 /* take all groups write allocation
@@ -1945,8 +1924,7 @@ err:
1945static noinline_for_stack int 1924static noinline_for_stack int
1946ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1925ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1947{ 1926{
1948 ext4_group_t group; 1927 ext4_group_t ngroups, group, i;
1949 ext4_group_t i;
1950 int cr; 1928 int cr;
1951 int err = 0; 1929 int err = 0;
1952 int bsbits; 1930 int bsbits;
@@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1957 1935
1958 sb = ac->ac_sb; 1936 sb = ac->ac_sb;
1959 sbi = EXT4_SB(sb); 1937 sbi = EXT4_SB(sb);
1938 ngroups = ext4_get_groups_count(sb);
1960 BUG_ON(ac->ac_status == AC_STATUS_FOUND); 1939 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1961 1940
1962 /* first, try the goal */ 1941 /* first, try the goal */
@@ -2017,11 +1996,11 @@ repeat:
2017 */ 1996 */
2018 group = ac->ac_g_ex.fe_group; 1997 group = ac->ac_g_ex.fe_group;
2019 1998
2020 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { 1999 for (i = 0; i < ngroups; group++, i++) {
2021 struct ext4_group_info *grp; 2000 struct ext4_group_info *grp;
2022 struct ext4_group_desc *desc; 2001 struct ext4_group_desc *desc;
2023 2002
2024 if (group == EXT4_SB(sb)->s_groups_count) 2003 if (group == ngroups)
2025 group = 0; 2004 group = 0;
2026 2005
2027 /* quick check to skip empty groups */ 2006 /* quick check to skip empty groups */
@@ -2064,9 +2043,7 @@ repeat:
2064 2043
2065 ac->ac_groups_scanned++; 2044 ac->ac_groups_scanned++;
2066 desc = ext4_get_group_desc(sb, group, NULL); 2045 desc = ext4_get_group_desc(sb, group, NULL);
2067 if (cr == 0 || (desc->bg_flags & 2046 if (cr == 0)
2068 cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
2069 ac->ac_2order != 0))
2070 ext4_mb_simple_scan_group(ac, &e4b); 2047 ext4_mb_simple_scan_group(ac, &e4b);
2071 else if (cr == 1 && 2048 else if (cr == 1 &&
2072 ac->ac_g_ex.fe_len == sbi->s_stripe) 2049 ac->ac_g_ex.fe_len == sbi->s_stripe)
@@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
2315static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) 2292static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2316{ 2293{
2317 struct super_block *sb = seq->private; 2294 struct super_block *sb = seq->private;
2318 struct ext4_sb_info *sbi = EXT4_SB(sb);
2319 ext4_group_t group; 2295 ext4_group_t group;
2320 2296
2321 if (*pos < 0 || *pos >= sbi->s_groups_count) 2297 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2322 return NULL; 2298 return NULL;
2323
2324 group = *pos + 1; 2299 group = *pos + 1;
2325 return (void *) ((unsigned long) group); 2300 return (void *) ((unsigned long) group);
2326} 2301}
@@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2328static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) 2303static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2329{ 2304{
2330 struct super_block *sb = seq->private; 2305 struct super_block *sb = seq->private;
2331 struct ext4_sb_info *sbi = EXT4_SB(sb);
2332 ext4_group_t group; 2306 ext4_group_t group;
2333 2307
2334 ++*pos; 2308 ++*pos;
2335 if (*pos < 0 || *pos >= sbi->s_groups_count) 2309 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2336 return NULL; 2310 return NULL;
2337 group = *pos + 1; 2311 group = *pos + 1;
2338 return (void *) ((unsigned long) group); 2312 return (void *) ((unsigned long) group);
@@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb)
2420 2394
2421 if (sbi->s_proc != NULL) { 2395 if (sbi->s_proc != NULL) {
2422 remove_proc_entry("mb_groups", sbi->s_proc); 2396 remove_proc_entry("mb_groups", sbi->s_proc);
2423 remove_proc_entry("mb_history", sbi->s_proc); 2397 if (sbi->s_mb_history_max)
2398 remove_proc_entry("mb_history", sbi->s_proc);
2424 } 2399 }
2425 kfree(sbi->s_mb_history); 2400 kfree(sbi->s_mb_history);
2426} 2401}
@@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb)
2431 int i; 2406 int i;
2432 2407
2433 if (sbi->s_proc != NULL) { 2408 if (sbi->s_proc != NULL) {
2434 proc_create_data("mb_history", S_IRUGO, sbi->s_proc, 2409 if (sbi->s_mb_history_max)
2435 &ext4_mb_seq_history_fops, sb); 2410 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2411 &ext4_mb_seq_history_fops, sb);
2436 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2412 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2437 &ext4_mb_seq_groups_fops, sb); 2413 &ext4_mb_seq_groups_fops, sb);
2438 } 2414 }
2439 2415
2440 sbi->s_mb_history_max = 1000;
2441 sbi->s_mb_history_cur = 0; 2416 sbi->s_mb_history_cur = 0;
2442 spin_lock_init(&sbi->s_mb_history_lock); 2417 spin_lock_init(&sbi->s_mb_history_lock);
2443 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); 2418 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
2444 sbi->s_mb_history = kzalloc(i, GFP_KERNEL); 2419 sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
2445 /* if we can't allocate history, then we simple won't use it */ 2420 /* if we can't allocate history, then we simple won't use it */
2446} 2421}
2447 2422
@@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
2451 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2426 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2452 struct ext4_mb_history h; 2427 struct ext4_mb_history h;
2453 2428
2454 if (unlikely(sbi->s_mb_history == NULL)) 2429 if (sbi->s_mb_history == NULL)
2455 return; 2430 return;
2456 2431
2457 if (!(ac->ac_op & sbi->s_mb_history_filter)) 2432 if (!(ac->ac_op & sbi->s_mb_history_filter))
@@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2587 2562
2588static int ext4_mb_init_backend(struct super_block *sb) 2563static int ext4_mb_init_backend(struct super_block *sb)
2589{ 2564{
2565 ext4_group_t ngroups = ext4_get_groups_count(sb);
2590 ext4_group_t i; 2566 ext4_group_t i;
2591 int metalen; 2567 int metalen;
2592 struct ext4_sb_info *sbi = EXT4_SB(sb); 2568 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2598 struct ext4_group_desc *desc; 2574 struct ext4_group_desc *desc;
2599 2575
2600 /* This is the number of blocks used by GDT */ 2576 /* This is the number of blocks used by GDT */
2601 num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 2577 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2602 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); 2578 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2603 2579
2604 /* 2580 /*
@@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2644 for (i = 0; i < num_meta_group_infos; i++) { 2620 for (i = 0; i < num_meta_group_infos; i++) {
2645 if ((i + 1) == num_meta_group_infos) 2621 if ((i + 1) == num_meta_group_infos)
2646 metalen = sizeof(*meta_group_info) * 2622 metalen = sizeof(*meta_group_info) *
2647 (sbi->s_groups_count - 2623 (ngroups -
2648 (i << EXT4_DESC_PER_BLOCK_BITS(sb))); 2624 (i << EXT4_DESC_PER_BLOCK_BITS(sb)));
2649 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2625 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2650 if (meta_group_info == NULL) { 2626 if (meta_group_info == NULL) {
@@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2655 sbi->s_group_info[i] = meta_group_info; 2631 sbi->s_group_info[i] = meta_group_info;
2656 } 2632 }
2657 2633
2658 for (i = 0; i < sbi->s_groups_count; i++) { 2634 for (i = 0; i < ngroups; i++) {
2659 desc = ext4_get_group_desc(sb, i, NULL); 2635 desc = ext4_get_group_desc(sb, i, NULL);
2660 if (desc == NULL) { 2636 if (desc == NULL) {
2661 printk(KERN_ERR 2637 printk(KERN_ERR
@@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2761 return 0; 2737 return 0;
2762} 2738}
2763 2739
2764/* need to called with ext4 group lock (ext4_lock_group) */ 2740/* need to called with the ext4 group lock held */
2765static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) 2741static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2766{ 2742{
2767 struct ext4_prealloc_space *pa; 2743 struct ext4_prealloc_space *pa;
@@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2781 2757
2782int ext4_mb_release(struct super_block *sb) 2758int ext4_mb_release(struct super_block *sb)
2783{ 2759{
2760 ext4_group_t ngroups = ext4_get_groups_count(sb);
2784 ext4_group_t i; 2761 ext4_group_t i;
2785 int num_meta_group_infos; 2762 int num_meta_group_infos;
2786 struct ext4_group_info *grinfo; 2763 struct ext4_group_info *grinfo;
2787 struct ext4_sb_info *sbi = EXT4_SB(sb); 2764 struct ext4_sb_info *sbi = EXT4_SB(sb);
2788 2765
2789 if (sbi->s_group_info) { 2766 if (sbi->s_group_info) {
2790 for (i = 0; i < sbi->s_groups_count; i++) { 2767 for (i = 0; i < ngroups; i++) {
2791 grinfo = ext4_get_group_info(sb, i); 2768 grinfo = ext4_get_group_info(sb, i);
2792#ifdef DOUBLE_CHECK 2769#ifdef DOUBLE_CHECK
2793 kfree(grinfo->bb_bitmap); 2770 kfree(grinfo->bb_bitmap);
@@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb)
2797 ext4_unlock_group(sb, i); 2774 ext4_unlock_group(sb, i);
2798 kfree(grinfo); 2775 kfree(grinfo);
2799 } 2776 }
2800 num_meta_group_infos = (sbi->s_groups_count + 2777 num_meta_group_infos = (ngroups +
2801 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2778 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2802 EXT4_DESC_PER_BLOCK_BITS(sb); 2779 EXT4_DESC_PER_BLOCK_BITS(sb);
2803 for (i = 0; i < num_meta_group_infos; i++) 2780 for (i = 0; i < num_meta_group_infos; i++)
@@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2984 + le32_to_cpu(es->s_first_data_block); 2961 + le32_to_cpu(es->s_first_data_block);
2985 2962
2986 len = ac->ac_b_ex.fe_len; 2963 len = ac->ac_b_ex.fe_len;
2987 if (in_range(ext4_block_bitmap(sb, gdp), block, len) || 2964 if (!ext4_data_block_valid(sbi, block, len)) {
2988 in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
2989 in_range(block, ext4_inode_table(sb, gdp),
2990 EXT4_SB(sb)->s_itb_per_group) ||
2991 in_range(block + len - 1, ext4_inode_table(sb, gdp),
2992 EXT4_SB(sb)->s_itb_per_group)) {
2993 ext4_error(sb, __func__, 2965 ext4_error(sb, __func__,
2994 "Allocating block %llu in system zone of %d group\n", 2966 "Allocating blocks %llu-%llu which overlap "
2995 block, ac->ac_b_ex.fe_group); 2967 "fs metadata\n", block, block+len);
2996 /* File system mounted not to panic on error 2968 /* File system mounted not to panic on error
2997 * Fix the bitmap and repeat the block allocation 2969 * Fix the bitmap and repeat the block allocation
2998 * We leak some of the blocks here. 2970 * We leak some of the blocks here.
2999 */ 2971 */
3000 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), 2972 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3001 bitmap_bh->b_data, ac->ac_b_ex.fe_start, 2973 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3002 ac->ac_b_ex.fe_len); 2974 ac->ac_b_ex.fe_len);
2975 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3003 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2976 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3004 if (!err) 2977 if (!err)
3005 err = -EAGAIN; 2978 err = -EAGAIN;
3006 goto out_err; 2979 goto out_err;
3007 } 2980 }
2981
2982 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3008#ifdef AGGRESSIVE_CHECK 2983#ifdef AGGRESSIVE_CHECK
3009 { 2984 {
3010 int i; 2985 int i;
@@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3014 } 2989 }
3015 } 2990 }
3016#endif 2991#endif
3017 spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2992 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
3018 mb_set_bits(NULL, bitmap_bh->b_data,
3019 ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
3020 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2993 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3021 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 2994 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3022 ext4_free_blks_set(sb, gdp, 2995 ext4_free_blks_set(sb, gdp,
@@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3026 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; 2999 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
3027 ext4_free_blks_set(sb, gdp, len); 3000 ext4_free_blks_set(sb, gdp, len);
3028 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 3001 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3029 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 3002
3003 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3030 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 3004 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
3031 /* 3005 /*
3032 * Now reduce the dirty block count also. Should not go negative 3006 * Now reduce the dirty block count also. Should not go negative
@@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3459 * the function goes through all block freed in the group 3433 * the function goes through all block freed in the group
3460 * but not yet committed and marks them used in in-core bitmap. 3434 * but not yet committed and marks them used in in-core bitmap.
3461 * buddy must be generated from this bitmap 3435 * buddy must be generated from this bitmap
3462 * Need to be called with ext4 group lock (ext4_lock_group) 3436 * Need to be called with the ext4 group lock held
3463 */ 3437 */
3464static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 3438static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3465 ext4_group_t group) 3439 ext4_group_t group)
@@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3473 3447
3474 while (n) { 3448 while (n) {
3475 entry = rb_entry(n, struct ext4_free_data, node); 3449 entry = rb_entry(n, struct ext4_free_data, node);
3476 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), 3450 mb_set_bits(bitmap, entry->start_blk, entry->count);
3477 bitmap, entry->start_blk,
3478 entry->count);
3479 n = rb_next(n); 3451 n = rb_next(n);
3480 } 3452 }
3481 return; 3453 return;
@@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3484/* 3456/*
3485 * the function goes through all preallocation in this group and marks them 3457 * the function goes through all preallocation in this group and marks them
3486 * used in in-core bitmap. buddy must be generated from this bitmap 3458 * used in in-core bitmap. buddy must be generated from this bitmap
3487 * Need to be called with ext4 group lock (ext4_lock_group) 3459 * Need to be called with ext4 group lock held
3488 */ 3460 */
3489static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 3461static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3490 ext4_group_t group) 3462 ext4_group_t group)
@@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3516 if (unlikely(len == 0)) 3488 if (unlikely(len == 0))
3517 continue; 3489 continue;
3518 BUG_ON(groupnr != group); 3490 BUG_ON(groupnr != group);
3519 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), 3491 mb_set_bits(bitmap, start, len);
3520 bitmap, start, len);
3521 preallocated += len; 3492 preallocated += len;
3522 count++; 3493 count++;
3523 } 3494 }
@@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
4121static void ext4_mb_show_ac(struct ext4_allocation_context *ac) 4092static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4122{ 4093{
4123 struct super_block *sb = ac->ac_sb; 4094 struct super_block *sb = ac->ac_sb;
4124 ext4_group_t i; 4095 ext4_group_t ngroups, i;
4125 4096
4126 printk(KERN_ERR "EXT4-fs: Can't allocate:" 4097 printk(KERN_ERR "EXT4-fs: Can't allocate:"
4127 " Allocation context details:\n"); 4098 " Allocation context details:\n");
@@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4145 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, 4116 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
4146 ac->ac_found); 4117 ac->ac_found);
4147 printk(KERN_ERR "EXT4-fs: groups: \n"); 4118 printk(KERN_ERR "EXT4-fs: groups: \n");
4148 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 4119 ngroups = ext4_get_groups_count(sb);
4120 for (i = 0; i < ngroups; i++) {
4149 struct ext4_group_info *grp = ext4_get_group_info(sb, i); 4121 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4150 struct ext4_prealloc_space *pa; 4122 struct ext4_prealloc_space *pa;
4151 ext4_grpblk_t start; 4123 ext4_grpblk_t start;
@@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4469 4441
4470static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) 4442static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4471{ 4443{
4472 ext4_group_t i; 4444 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4473 int ret; 4445 int ret;
4474 int freed = 0; 4446 int freed = 0;
4475 4447
4476 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", 4448 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
4477 sb->s_id, needed); 4449 sb->s_id, needed);
4478 for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { 4450 for (i = 0; i < ngroups && needed > 0; i++) {
4479 ret = ext4_mb_discard_group_preallocations(sb, i, needed); 4451 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4480 freed += ret; 4452 freed += ret;
4481 needed -= ret; 4453 needed -= ret;
@@ -4859,29 +4831,25 @@ do_more:
4859 new_entry->group = block_group; 4831 new_entry->group = block_group;
4860 new_entry->count = count; 4832 new_entry->count = count;
4861 new_entry->t_tid = handle->h_transaction->t_tid; 4833 new_entry->t_tid = handle->h_transaction->t_tid;
4834
4862 ext4_lock_group(sb, block_group); 4835 ext4_lock_group(sb, block_group);
4863 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, 4836 mb_clear_bits(bitmap_bh->b_data, bit, count);
4864 bit, count);
4865 ext4_mb_free_metadata(handle, &e4b, new_entry); 4837 ext4_mb_free_metadata(handle, &e4b, new_entry);
4866 ext4_unlock_group(sb, block_group);
4867 } else { 4838 } else {
4868 ext4_lock_group(sb, block_group);
4869 /* need to update group_info->bb_free and bitmap 4839 /* need to update group_info->bb_free and bitmap
4870 * with group lock held. generate_buddy look at 4840 * with group lock held. generate_buddy look at
4871 * them with group lock_held 4841 * them with group lock_held
4872 */ 4842 */
4873 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, 4843 ext4_lock_group(sb, block_group);
4874 bit, count); 4844 mb_clear_bits(bitmap_bh->b_data, bit, count);
4875 mb_free_blocks(inode, &e4b, bit, count); 4845 mb_free_blocks(inode, &e4b, bit, count);
4876 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4846 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4877 ext4_unlock_group(sb, block_group);
4878 } 4847 }
4879 4848
4880 spin_lock(sb_bgl_lock(sbi, block_group));
4881 ret = ext4_free_blks_count(sb, gdp) + count; 4849 ret = ext4_free_blks_count(sb, gdp) + count;
4882 ext4_free_blks_set(sb, gdp, ret); 4850 ext4_free_blks_set(sb, gdp, ret);
4883 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4851 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4884 spin_unlock(sb_bgl_lock(sbi, block_group)); 4852 ext4_unlock_group(sb, block_group);
4885 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4853 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4886 4854
4887 if (sbi->s_log_groups_per_flex) { 4855 if (sbi->s_log_groups_per_flex) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index dd9e6cd5f6cf..75e34f69215b 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,7 +23,6 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "ext4.h" 25#include "ext4.h"
26#include "group.h"
27 26
28/* 27/*
29 * with AGGRESSIVE_CHECK allocator runs consistency checks over 28 * with AGGRESSIVE_CHECK allocator runs consistency checks over
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 22098e1cd085..07eb6649e4fa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -37,7 +37,6 @@
37#include "ext4.h" 37#include "ext4.h"
38#include "ext4_jbd2.h" 38#include "ext4_jbd2.h"
39 39
40#include "namei.h"
41#include "xattr.h" 40#include "xattr.h"
42#include "acl.h" 41#include "acl.h"
43 42
@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
750 ext4fs_dirhash(de->name, de->name_len, &h); 749 ext4fs_dirhash(de->name, de->name_len, &h);
751 map_tail--; 750 map_tail--;
752 map_tail->hash = h.hash; 751 map_tail->hash = h.hash;
753 map_tail->offs = (u16) ((char *) de - base); 752 map_tail->offs = ((char *) de - base)>>2;
754 map_tail->size = le16_to_cpu(de->rec_len); 753 map_tail->size = le16_to_cpu(de->rec_len);
755 count++; 754 count++;
756 cond_resched(); 755 cond_resched();
@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1148 unsigned rec_len = 0; 1147 unsigned rec_len = 0;
1149 1148
1150 while (count--) { 1149 while (count--) {
1151 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); 1150 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1151 (from + (map->offs<<2));
1152 rec_len = EXT4_DIR_REC_LEN(de->name_len); 1152 rec_len = EXT4_DIR_REC_LEN(de->name_len);
1153 memcpy (to, de, rec_len); 1153 memcpy (to, de, rec_len);
1154 ((struct ext4_dir_entry_2 *) to)->rec_len = 1154 ((struct ext4_dir_entry_2 *) to)->rec_len =
@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
1997 if (!ext4_handle_valid(handle)) 1997 if (!ext4_handle_valid(handle))
1998 return 0; 1998 return 0;
1999 1999
2000 lock_super(sb); 2000 mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
2001 if (!list_empty(&EXT4_I(inode)->i_orphan)) 2001 if (!list_empty(&EXT4_I(inode)->i_orphan))
2002 goto out_unlock; 2002 goto out_unlock;
2003 2003
@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2006 2006
2007 /* @@@ FIXME: Observation from aviro: 2007 /* @@@ FIXME: Observation from aviro:
2008 * I think I can trigger J_ASSERT in ext4_orphan_add(). We block 2008 * I think I can trigger J_ASSERT in ext4_orphan_add(). We block
2009 * here (on lock_super()), so race with ext4_link() which might bump 2009 * here (on s_orphan_lock), so race with ext4_link() which might bump
2010 * ->i_nlink. For, say it, character device. Not a regular file, 2010 * ->i_nlink. For, say it, character device. Not a regular file,
2011 * not a directory, not a symlink and ->i_nlink > 0. 2011 * not a directory, not a symlink and ->i_nlink > 0.
2012 *
2013 * tytso, 4/25/2009: I'm not sure how that could happen;
2014 * shouldn't the fs core protect us from these sort of
2015 * unlink()/link() races?
2012 */ 2016 */
2013 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2017 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2014 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 2018 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2045 jbd_debug(4, "orphan inode %lu will point to %d\n", 2049 jbd_debug(4, "orphan inode %lu will point to %d\n",
2046 inode->i_ino, NEXT_ORPHAN(inode)); 2050 inode->i_ino, NEXT_ORPHAN(inode));
2047out_unlock: 2051out_unlock:
2048 unlock_super(sb); 2052 mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
2049 ext4_std_error(inode->i_sb, err); 2053 ext4_std_error(inode->i_sb, err);
2050 return err; 2054 return err;
2051} 2055}
@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2066 if (!ext4_handle_valid(handle)) 2070 if (!ext4_handle_valid(handle))
2067 return 0; 2071 return 0;
2068 2072
2069 lock_super(inode->i_sb); 2073 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2070 if (list_empty(&ei->i_orphan)) { 2074 if (list_empty(&ei->i_orphan))
2071 unlock_super(inode->i_sb); 2075 goto out;
2072 return 0;
2073 }
2074 2076
2075 ino_next = NEXT_ORPHAN(inode); 2077 ino_next = NEXT_ORPHAN(inode);
2076 prev = ei->i_orphan.prev; 2078 prev = ei->i_orphan.prev;
@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2120out_err: 2122out_err:
2121 ext4_std_error(inode->i_sb, err); 2123 ext4_std_error(inode->i_sb, err);
2122out: 2124out:
2123 unlock_super(inode->i_sb); 2125 mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2124 return err; 2126 return err;
2125 2127
2126out_brelse: 2128out_brelse:
@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2533 .removexattr = generic_removexattr, 2535 .removexattr = generic_removexattr,
2534#endif 2536#endif
2535 .permission = ext4_permission, 2537 .permission = ext4_permission,
2538 .fiemap = ext4_fiemap,
2536}; 2539};
2537 2540
2538const struct inode_operations ext4_special_inode_operations = { 2541const struct inode_operations ext4_special_inode_operations = {
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
deleted file mode 100644
index 5e4dfff36a00..000000000000
--- a/fs/ext4/namei.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/* linux/fs/ext4/namei.h
2 *
3 * Copyright (C) 2005 Simtec Electronics
4 * Ben Dooks <ben@simtec.co.uk>
5 *
6*/
7
8extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 546c7dd869e1..27eb289eea37 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
18#include "group.h"
19 18
20#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 19#define outside(b, first, last) ((b) < (first) || (b) >= (last))
21#define inside(b, first, last) ((b) >= (first) && (b) < (last)) 20#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb,
193 if (IS_ERR(handle)) 192 if (IS_ERR(handle))
194 return PTR_ERR(handle); 193 return PTR_ERR(handle);
195 194
196 lock_super(sb); 195 mutex_lock(&sbi->s_resize_lock);
197 if (input->group != sbi->s_groups_count) { 196 if (input->group != sbi->s_groups_count) {
198 err = -EBUSY; 197 err = -EBUSY;
199 goto exit_journal; 198 goto exit_journal;
@@ -302,7 +301,7 @@ exit_bh:
302 brelse(bh); 301 brelse(bh);
303 302
304exit_journal: 303exit_journal:
305 unlock_super(sb); 304 mutex_unlock(&sbi->s_resize_lock);
306 if ((err2 = ext4_journal_stop(handle)) && !err) 305 if ((err2 = ext4_journal_stop(handle)) && !err)
307 err = err2; 306 err = err2;
308 307
@@ -643,11 +642,12 @@ exit_free:
643 * important part is that the new block and inode counts are in the backup 642 * important part is that the new block and inode counts are in the backup
644 * superblocks, and the location of the new group metadata in the GDT backups. 643 * superblocks, and the location of the new group metadata in the GDT backups.
645 * 644 *
646 * We do not need lock_super() for this, because these blocks are not 645 * We do not need take the s_resize_lock for this, because these
647 * otherwise touched by the filesystem code when it is mounted. We don't 646 * blocks are not otherwise touched by the filesystem code when it is
648 * need to worry about last changing from sbi->s_groups_count, because the 647 * mounted. We don't need to worry about last changing from
649 * worst that can happen is that we do not copy the full number of backups 648 * sbi->s_groups_count, because the worst that can happen is that we
650 * at this time. The resize which changed s_groups_count will backup again. 649 * do not copy the full number of backups at this time. The resize
650 * which changed s_groups_count will backup again.
651 */ 651 */
652static void update_backups(struct super_block *sb, 652static void update_backups(struct super_block *sb,
653 int blk_off, char *data, int size) 653 int blk_off, char *data, int size)
@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
809 goto exit_put; 809 goto exit_put;
810 } 810 }
811 811
812 lock_super(sb); 812 mutex_lock(&sbi->s_resize_lock);
813 if (input->group != sbi->s_groups_count) { 813 if (input->group != sbi->s_groups_count) {
814 ext4_warning(sb, __func__, 814 ext4_warning(sb, __func__,
815 "multiple resizers run on filesystem!"); 815 "multiple resizers run on filesystem!");
@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
840 /* 840 /*
841 * OK, now we've set up the new group. Time to make it active. 841 * OK, now we've set up the new group. Time to make it active.
842 * 842 *
843 * Current kernels don't lock all allocations via lock_super(), 843 * We do not lock all allocations via s_resize_lock
844 * so we have to be safe wrt. concurrent accesses the group 844 * so we have to be safe wrt. concurrent accesses the group
845 * data. So we need to be careful to set all of the relevant 845 * data. So we need to be careful to set all of the relevant
846 * group descriptor data etc. *before* we enable the group. 846 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
900 * 900 *
901 * The precise rules we use are: 901 * The precise rules we use are:
902 * 902 *
903 * * Writers of s_groups_count *must* hold lock_super 903 * * Writers of s_groups_count *must* hold s_resize_lock
904 * AND 904 * AND
905 * * Writers must perform a smp_wmb() after updating all dependent 905 * * Writers must perform a smp_wmb() after updating all dependent
906 * data and before modifying the groups count 906 * data and before modifying the groups count
907 * 907 *
908 * * Readers must hold lock_super() over the access 908 * * Readers must hold s_resize_lock over the access
909 * OR 909 * OR
910 * * Readers must perform an smp_rmb() after reading the groups count 910 * * Readers must perform an smp_rmb() after reading the groups count
911 * and before reading any dependent data. 911 * and before reading any dependent data.
@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
948 sb->s_dirt = 1; 948 sb->s_dirt = 1;
949 949
950exit_journal: 950exit_journal:
951 unlock_super(sb); 951 mutex_unlock(&sbi->s_resize_lock);
952 if ((err2 = ext4_journal_stop(handle)) && !err) 952 if ((err2 = ext4_journal_stop(handle)) && !err)
953 err = err2; 953 err = err2;
954 if (!err) { 954 if (!err) {
@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
986 986
987 /* We don't need to worry about locking wrt other resizers just 987 /* We don't need to worry about locking wrt other resizers just
988 * yet: we're going to revalidate es->s_blocks_count after 988 * yet: we're going to revalidate es->s_blocks_count after
989 * taking lock_super() below. */ 989 * taking the s_resize_lock below. */
990 o_blocks_count = ext4_blocks_count(es); 990 o_blocks_count = ext4_blocks_count(es);
991 o_groups_count = EXT4_SB(sb)->s_groups_count; 991 o_groups_count = EXT4_SB(sb)->s_groups_count;
992 992
@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1056 goto exit_put; 1056 goto exit_put;
1057 } 1057 }
1058 1058
1059 lock_super(sb); 1059 mutex_lock(&EXT4_SB(sb)->s_resize_lock);
1060 if (o_blocks_count != ext4_blocks_count(es)) { 1060 if (o_blocks_count != ext4_blocks_count(es)) {
1061 ext4_warning(sb, __func__, 1061 ext4_warning(sb, __func__,
1062 "multiple resizers run on filesystem!"); 1062 "multiple resizers run on filesystem!");
1063 unlock_super(sb); 1063 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1064 ext4_journal_stop(handle); 1064 ext4_journal_stop(handle);
1065 err = -EBUSY; 1065 err = -EBUSY;
1066 goto exit_put; 1066 goto exit_put;
@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1070 EXT4_SB(sb)->s_sbh))) { 1070 EXT4_SB(sb)->s_sbh))) {
1071 ext4_warning(sb, __func__, 1071 ext4_warning(sb, __func__,
1072 "error %d on journal write access", err); 1072 "error %d on journal write access", err);
1073 unlock_super(sb); 1073 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1074 ext4_journal_stop(handle); 1074 ext4_journal_stop(handle);
1075 goto exit_put; 1075 goto exit_put;
1076 } 1076 }
1077 ext4_blocks_count_set(es, o_blocks_count + add); 1077 ext4_blocks_count_set(es, o_blocks_count + add);
1078 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 1078 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
1079 sb->s_dirt = 1; 1079 sb->s_dirt = 1;
1080 unlock_super(sb); 1080 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1081 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1081 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1082 o_blocks_count + add); 1082 o_blocks_count + add);
1083 /* We add the blocks to the bitmap and set the group need init bit */ 1083 /* We add the blocks to the bitmap and set the group need init bit */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e6f222..c191d0f65fed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -20,6 +20,7 @@
20#include <linux/string.h> 20#include <linux/string.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/vmalloc.h>
23#include <linux/jbd2.h> 24#include <linux/jbd2.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
25#include <linux/init.h> 26#include <linux/init.h>
@@ -45,16 +46,20 @@
45#include "ext4_jbd2.h" 46#include "ext4_jbd2.h"
46#include "xattr.h" 47#include "xattr.h"
47#include "acl.h" 48#include "acl.h"
48#include "namei.h" 49
49#include "group.h" 50static int default_mb_history_length = 1000;
51
52module_param_named(default_mb_history_length, default_mb_history_length,
53 int, 0644);
54MODULE_PARM_DESC(default_mb_history_length,
55 "Default number of entries saved for mb_history");
50 56
51struct proc_dir_entry *ext4_proc_root; 57struct proc_dir_entry *ext4_proc_root;
52static struct kset *ext4_kset; 58static struct kset *ext4_kset;
53 59
54static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 60static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
55 unsigned long journal_devnum); 61 unsigned long journal_devnum);
56static int ext4_commit_super(struct super_block *sb, 62static int ext4_commit_super(struct super_block *sb, int sync);
57 struct ext4_super_block *es, int sync);
58static void ext4_mark_recovery_complete(struct super_block *sb, 63static void ext4_mark_recovery_complete(struct super_block *sb,
59 struct ext4_super_block *es); 64 struct ext4_super_block *es);
60static void ext4_clear_journal_err(struct super_block *sb, 65static void ext4_clear_journal_err(struct super_block *sb,
@@ -74,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
74{ 79{
75 return le32_to_cpu(bg->bg_block_bitmap_lo) | 80 return le32_to_cpu(bg->bg_block_bitmap_lo) |
76 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 81 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
77 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 82 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
78} 83}
79 84
80ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 85ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -82,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
82{ 87{
83 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 88 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
84 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 89 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
85 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 90 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
86} 91}
87 92
88ext4_fsblk_t ext4_inode_table(struct super_block *sb, 93ext4_fsblk_t ext4_inode_table(struct super_block *sb,
@@ -90,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
90{ 95{
91 return le32_to_cpu(bg->bg_inode_table_lo) | 96 return le32_to_cpu(bg->bg_inode_table_lo) |
92 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 97 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
93 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 98 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
94} 99}
95 100
96__u32 ext4_free_blks_count(struct super_block *sb, 101__u32 ext4_free_blks_count(struct super_block *sb,
@@ -98,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb,
98{ 103{
99 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 104 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
100 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 105 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
101 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 106 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
102} 107}
103 108
104__u32 ext4_free_inodes_count(struct super_block *sb, 109__u32 ext4_free_inodes_count(struct super_block *sb,
@@ -106,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb,
106{ 111{
107 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 112 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
108 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 113 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
109 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 114 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
110} 115}
111 116
112__u32 ext4_used_dirs_count(struct super_block *sb, 117__u32 ext4_used_dirs_count(struct super_block *sb,
@@ -114,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb,
114{ 119{
115 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 120 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
116 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 121 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
117 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 122 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
118} 123}
119 124
120__u32 ext4_itable_unused_count(struct super_block *sb, 125__u32 ext4_itable_unused_count(struct super_block *sb,
@@ -122,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb,
122{ 127{
123 return le16_to_cpu(bg->bg_itable_unused_lo) | 128 return le16_to_cpu(bg->bg_itable_unused_lo) |
124 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 129 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
125 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 130 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
126} 131}
127 132
128void ext4_block_bitmap_set(struct super_block *sb, 133void ext4_block_bitmap_set(struct super_block *sb,
@@ -202,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
202 journal = EXT4_SB(sb)->s_journal; 207 journal = EXT4_SB(sb)->s_journal;
203 if (journal) { 208 if (journal) {
204 if (is_journal_aborted(journal)) { 209 if (is_journal_aborted(journal)) {
205 ext4_abort(sb, __func__, 210 ext4_abort(sb, __func__, "Detected aborted journal");
206 "Detected aborted journal");
207 return ERR_PTR(-EROFS); 211 return ERR_PTR(-EROFS);
208 } 212 }
209 return jbd2_journal_start(journal, nblocks); 213 return jbd2_journal_start(journal, nblocks);
@@ -302,10 +306,10 @@ static void ext4_handle_error(struct super_block *sb)
302 jbd2_journal_abort(journal, -EIO); 306 jbd2_journal_abort(journal, -EIO);
303 } 307 }
304 if (test_opt(sb, ERRORS_RO)) { 308 if (test_opt(sb, ERRORS_RO)) {
305 printk(KERN_CRIT "Remounting filesystem read-only\n"); 309 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
306 sb->s_flags |= MS_RDONLY; 310 sb->s_flags |= MS_RDONLY;
307 } 311 }
308 ext4_commit_super(sb, es, 1); 312 ext4_commit_super(sb, 1);
309 if (test_opt(sb, ERRORS_PANIC)) 313 if (test_opt(sb, ERRORS_PANIC))
310 panic("EXT4-fs (device %s): panic forced after error\n", 314 panic("EXT4-fs (device %s): panic forced after error\n",
311 sb->s_id); 315 sb->s_id);
@@ -395,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function,
395{ 399{
396 va_list args; 400 va_list args;
397 401
398 printk(KERN_CRIT "ext4_abort called.\n");
399
400 va_start(args, fmt); 402 va_start(args, fmt);
401 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 403 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
402 vprintk(fmt, args); 404 vprintk(fmt, args);
@@ -409,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function,
409 if (sb->s_flags & MS_RDONLY) 411 if (sb->s_flags & MS_RDONLY)
410 return; 412 return;
411 413
412 printk(KERN_CRIT "Remounting filesystem read-only\n"); 414 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
413 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 415 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
414 sb->s_flags |= MS_RDONLY; 416 sb->s_flags |= MS_RDONLY;
415 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 417 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
@@ -417,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function,
417 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 419 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
418} 420}
419 421
422void ext4_msg (struct super_block * sb, const char *prefix,
423 const char *fmt, ...)
424{
425 va_list args;
426
427 va_start(args, fmt);
428 printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
429 vprintk(fmt, args);
430 printk("\n");
431 va_end(args);
432}
433
420void ext4_warning(struct super_block *sb, const char *function, 434void ext4_warning(struct super_block *sb, const char *function,
421 const char *fmt, ...) 435 const char *fmt, ...)
422{ 436{
@@ -431,7 +445,7 @@ void ext4_warning(struct super_block *sb, const char *function,
431} 445}
432 446
433void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 447void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
434 const char *function, const char *fmt, ...) 448 const char *function, const char *fmt, ...)
435__releases(bitlock) 449__releases(bitlock)
436__acquires(bitlock) 450__acquires(bitlock)
437{ 451{
@@ -447,7 +461,7 @@ __acquires(bitlock)
447 if (test_opt(sb, ERRORS_CONT)) { 461 if (test_opt(sb, ERRORS_CONT)) {
448 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 462 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
449 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 463 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
450 ext4_commit_super(sb, es, 0); 464 ext4_commit_super(sb, 0);
451 return; 465 return;
452 } 466 }
453 ext4_unlock_group(sb, grp); 467 ext4_unlock_group(sb, grp);
@@ -467,7 +481,6 @@ __acquires(bitlock)
467 return; 481 return;
468} 482}
469 483
470
471void ext4_update_dynamic_rev(struct super_block *sb) 484void ext4_update_dynamic_rev(struct super_block *sb)
472{ 485{
473 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 486 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -496,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
496/* 509/*
497 * Open the external journal device 510 * Open the external journal device
498 */ 511 */
499static struct block_device *ext4_blkdev_get(dev_t dev) 512static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
500{ 513{
501 struct block_device *bdev; 514 struct block_device *bdev;
502 char b[BDEVNAME_SIZE]; 515 char b[BDEVNAME_SIZE];
@@ -507,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev)
507 return bdev; 520 return bdev;
508 521
509fail: 522fail:
510 printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", 523 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
511 __bdevname(dev, b), PTR_ERR(bdev)); 524 __bdevname(dev, b), PTR_ERR(bdev));
512 return NULL; 525 return NULL;
513} 526}
@@ -543,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
543{ 556{
544 struct list_head *l; 557 struct list_head *l;
545 558
546 printk(KERN_ERR "sb orphan head is %d\n", 559 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
547 le32_to_cpu(sbi->s_es->s_last_orphan)); 560 le32_to_cpu(sbi->s_es->s_last_orphan));
548 561
549 printk(KERN_ERR "sb_info orphan list:\n"); 562 printk(KERN_ERR "sb_info orphan list:\n");
550 list_for_each(l, &sbi->s_orphan) { 563 list_for_each(l, &sbi->s_orphan) {
@@ -563,6 +576,7 @@ static void ext4_put_super(struct super_block *sb)
563 struct ext4_super_block *es = sbi->s_es; 576 struct ext4_super_block *es = sbi->s_es;
564 int i, err; 577 int i, err;
565 578
579 ext4_release_system_zone(sb);
566 ext4_mb_release(sb); 580 ext4_mb_release(sb);
567 ext4_ext_release(sb); 581 ext4_ext_release(sb);
568 ext4_xattr_put_super(sb); 582 ext4_xattr_put_super(sb);
@@ -576,7 +590,7 @@ static void ext4_put_super(struct super_block *sb)
576 if (!(sb->s_flags & MS_RDONLY)) { 590 if (!(sb->s_flags & MS_RDONLY)) {
577 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 591 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
578 es->s_state = cpu_to_le16(sbi->s_mount_state); 592 es->s_state = cpu_to_le16(sbi->s_mount_state);
579 ext4_commit_super(sb, es, 1); 593 ext4_commit_super(sb, 1);
580 } 594 }
581 if (sbi->s_proc) { 595 if (sbi->s_proc) {
582 remove_proc_entry(sb->s_id, ext4_proc_root); 596 remove_proc_entry(sb->s_id, ext4_proc_root);
@@ -586,7 +600,10 @@ static void ext4_put_super(struct super_block *sb)
586 for (i = 0; i < sbi->s_gdb_count; i++) 600 for (i = 0; i < sbi->s_gdb_count; i++)
587 brelse(sbi->s_group_desc[i]); 601 brelse(sbi->s_group_desc[i]);
588 kfree(sbi->s_group_desc); 602 kfree(sbi->s_group_desc);
589 kfree(sbi->s_flex_groups); 603 if (is_vmalloc_addr(sbi->s_flex_groups))
604 vfree(sbi->s_flex_groups);
605 else
606 kfree(sbi->s_flex_groups);
590 percpu_counter_destroy(&sbi->s_freeblocks_counter); 607 percpu_counter_destroy(&sbi->s_freeblocks_counter);
591 percpu_counter_destroy(&sbi->s_freeinodes_counter); 608 percpu_counter_destroy(&sbi->s_freeinodes_counter);
592 percpu_counter_destroy(&sbi->s_dirs_counter); 609 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -629,7 +646,6 @@ static void ext4_put_super(struct super_block *sb)
629 lock_kernel(); 646 lock_kernel();
630 kfree(sbi->s_blockgroup_lock); 647 kfree(sbi->s_blockgroup_lock);
631 kfree(sbi); 648 kfree(sbi);
632 return;
633} 649}
634 650
635static struct kmem_cache *ext4_inode_cachep; 651static struct kmem_cache *ext4_inode_cachep;
@@ -644,6 +660,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
644 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 660 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
645 if (!ei) 661 if (!ei)
646 return NULL; 662 return NULL;
663
647#ifdef CONFIG_EXT4_FS_POSIX_ACL 664#ifdef CONFIG_EXT4_FS_POSIX_ACL
648 ei->i_acl = EXT4_ACL_NOT_CACHED; 665 ei->i_acl = EXT4_ACL_NOT_CACHED;
649 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 666 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
@@ -664,14 +681,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
664 ei->i_allocated_meta_blocks = 0; 681 ei->i_allocated_meta_blocks = 0;
665 ei->i_delalloc_reserved_flag = 0; 682 ei->i_delalloc_reserved_flag = 0;
666 spin_lock_init(&(ei->i_block_reservation_lock)); 683 spin_lock_init(&(ei->i_block_reservation_lock));
684
667 return &ei->vfs_inode; 685 return &ei->vfs_inode;
668} 686}
669 687
670static void ext4_destroy_inode(struct inode *inode) 688static void ext4_destroy_inode(struct inode *inode)
671{ 689{
672 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 690 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
673 printk("EXT4 Inode %p: orphan list check failed!\n", 691 ext4_msg(inode->i_sb, KERN_ERR,
674 EXT4_I(inode)); 692 "Inode %lu (%p): orphan list check failed!",
693 inode->i_ino, EXT4_I(inode));
675 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 694 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
676 EXT4_I(inode), sizeof(struct ext4_inode_info), 695 EXT4_I(inode), sizeof(struct ext4_inode_info),
677 true); 696 true);
@@ -870,12 +889,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
870 seq_puts(seq, ",noauto_da_alloc"); 889 seq_puts(seq, ",noauto_da_alloc");
871 890
872 ext4_show_quota_options(seq, sb); 891 ext4_show_quota_options(seq, sb);
892
873 return 0; 893 return 0;
874} 894}
875 895
876
877static struct inode *ext4_nfs_get_inode(struct super_block *sb, 896static struct inode *ext4_nfs_get_inode(struct super_block *sb,
878 u64 ino, u32 generation) 897 u64 ino, u32 generation)
879{ 898{
880 struct inode *inode; 899 struct inode *inode;
881 900
@@ -904,14 +923,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
904} 923}
905 924
906static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 925static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
907 int fh_len, int fh_type) 926 int fh_len, int fh_type)
908{ 927{
909 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 928 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
910 ext4_nfs_get_inode); 929 ext4_nfs_get_inode);
911} 930}
912 931
913static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 932static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
914 int fh_len, int fh_type) 933 int fh_len, int fh_type)
915{ 934{
916 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 935 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
917 ext4_nfs_get_inode); 936 ext4_nfs_get_inode);
@@ -923,7 +942,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
923 * which would prevent try_to_free_buffers() from freeing them, we must use 942 * which would prevent try_to_free_buffers() from freeing them, we must use
924 * jbd2 layer's try_to_free_buffers() function to release them. 943 * jbd2 layer's try_to_free_buffers() function to release them.
925 */ 944 */
926static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) 945static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
946 gfp_t wait)
927{ 947{
928 journal_t *journal = EXT4_SB(sb)->s_journal; 948 journal_t *journal = EXT4_SB(sb)->s_journal;
929 949
@@ -992,7 +1012,6 @@ static const struct super_operations ext4_sops = {
992 .dirty_inode = ext4_dirty_inode, 1012 .dirty_inode = ext4_dirty_inode,
993 .delete_inode = ext4_delete_inode, 1013 .delete_inode = ext4_delete_inode,
994 .put_super = ext4_put_super, 1014 .put_super = ext4_put_super,
995 .write_super = ext4_write_super,
996 .sync_fs = ext4_sync_fs, 1015 .sync_fs = ext4_sync_fs,
997 .freeze_fs = ext4_freeze, 1016 .freeze_fs = ext4_freeze,
998 .unfreeze_fs = ext4_unfreeze, 1017 .unfreeze_fs = ext4_unfreeze,
@@ -1007,6 +1026,25 @@ static const struct super_operations ext4_sops = {
1007 .bdev_try_to_free_page = bdev_try_to_free_page, 1026 .bdev_try_to_free_page = bdev_try_to_free_page,
1008}; 1027};
1009 1028
1029static const struct super_operations ext4_nojournal_sops = {
1030 .alloc_inode = ext4_alloc_inode,
1031 .destroy_inode = ext4_destroy_inode,
1032 .write_inode = ext4_write_inode,
1033 .dirty_inode = ext4_dirty_inode,
1034 .delete_inode = ext4_delete_inode,
1035 .write_super = ext4_write_super,
1036 .put_super = ext4_put_super,
1037 .statfs = ext4_statfs,
1038 .remount_fs = ext4_remount,
1039 .clear_inode = ext4_clear_inode,
1040 .show_options = ext4_show_options,
1041#ifdef CONFIG_QUOTA
1042 .quota_read = ext4_quota_read,
1043 .quota_write = ext4_quota_write,
1044#endif
1045 .bdev_try_to_free_page = bdev_try_to_free_page,
1046};
1047
1010static const struct export_operations ext4_export_ops = { 1048static const struct export_operations ext4_export_ops = {
1011 .fh_to_dentry = ext4_fh_to_dentry, 1049 .fh_to_dentry = ext4_fh_to_dentry,
1012 .fh_to_parent = ext4_fh_to_parent, 1050 .fh_to_parent = ext4_fh_to_parent,
@@ -1023,12 +1061,13 @@ enum {
1023 Opt_journal_update, Opt_journal_dev, 1061 Opt_journal_update, Opt_journal_dev,
1024 Opt_journal_checksum, Opt_journal_async_commit, 1062 Opt_journal_checksum, Opt_journal_async_commit,
1025 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1063 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1026 Opt_data_err_abort, Opt_data_err_ignore, 1064 Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
1027 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1065 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1028 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1066 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1029 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1067 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1030 Opt_usrquota, Opt_grpquota, Opt_i_version, 1068 Opt_usrquota, Opt_grpquota, Opt_i_version,
1031 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1069 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1070 Opt_block_validity, Opt_noblock_validity,
1032 Opt_inode_readahead_blks, Opt_journal_ioprio 1071 Opt_inode_readahead_blks, Opt_journal_ioprio
1033}; 1072};
1034 1073
@@ -1069,6 +1108,7 @@ static const match_table_t tokens = {
1069 {Opt_data_writeback, "data=writeback"}, 1108 {Opt_data_writeback, "data=writeback"},
1070 {Opt_data_err_abort, "data_err=abort"}, 1109 {Opt_data_err_abort, "data_err=abort"},
1071 {Opt_data_err_ignore, "data_err=ignore"}, 1110 {Opt_data_err_ignore, "data_err=ignore"},
1111 {Opt_mb_history_length, "mb_history_length=%u"},
1072 {Opt_offusrjquota, "usrjquota="}, 1112 {Opt_offusrjquota, "usrjquota="},
1073 {Opt_usrjquota, "usrjquota=%s"}, 1113 {Opt_usrjquota, "usrjquota=%s"},
1074 {Opt_offgrpjquota, "grpjquota="}, 1114 {Opt_offgrpjquota, "grpjquota="},
@@ -1087,6 +1127,8 @@ static const match_table_t tokens = {
1087 {Opt_resize, "resize"}, 1127 {Opt_resize, "resize"},
1088 {Opt_delalloc, "delalloc"}, 1128 {Opt_delalloc, "delalloc"},
1089 {Opt_nodelalloc, "nodelalloc"}, 1129 {Opt_nodelalloc, "nodelalloc"},
1130 {Opt_block_validity, "block_validity"},
1131 {Opt_noblock_validity, "noblock_validity"},
1090 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1132 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1091 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1133 {Opt_journal_ioprio, "journal_ioprio=%u"},
1092 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1134 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
@@ -1102,8 +1144,9 @@ static ext4_fsblk_t get_sb_block(void **data)
1102 1144
1103 if (!options || strncmp(options, "sb=", 3) != 0) 1145 if (!options || strncmp(options, "sb=", 3) != 0)
1104 return 1; /* Default location */ 1146 return 1; /* Default location */
1147
1105 options += 3; 1148 options += 3;
1106 /*todo: use simple_strtoll with >32bit ext4 */ 1149 /* TODO: use simple_strtoll with >32bit ext4 */
1107 sb_block = simple_strtoul(options, &options, 0); 1150 sb_block = simple_strtoul(options, &options, 0);
1108 if (*options && *options != ',') { 1151 if (*options && *options != ',') {
1109 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1152 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
@@ -1113,6 +1156,7 @@ static ext4_fsblk_t get_sb_block(void **data)
1113 if (*options == ',') 1156 if (*options == ',')
1114 options++; 1157 options++;
1115 *data = (void *) options; 1158 *data = (void *) options;
1159
1116 return sb_block; 1160 return sb_block;
1117} 1161}
1118 1162
@@ -1206,8 +1250,7 @@ static int parse_options(char *options, struct super_block *sb,
1206#else 1250#else
1207 case Opt_user_xattr: 1251 case Opt_user_xattr:
1208 case Opt_nouser_xattr: 1252 case Opt_nouser_xattr:
1209 printk(KERN_ERR "EXT4 (no)user_xattr options " 1253 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
1210 "not supported\n");
1211 break; 1254 break;
1212#endif 1255#endif
1213#ifdef CONFIG_EXT4_FS_POSIX_ACL 1256#ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1220,8 +1263,7 @@ static int parse_options(char *options, struct super_block *sb,
1220#else 1263#else
1221 case Opt_acl: 1264 case Opt_acl:
1222 case Opt_noacl: 1265 case Opt_noacl:
1223 printk(KERN_ERR "EXT4 (no)acl options " 1266 ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
1224 "not supported\n");
1225 break; 1267 break;
1226#endif 1268#endif
1227 case Opt_journal_update: 1269 case Opt_journal_update:
@@ -1231,16 +1273,16 @@ static int parse_options(char *options, struct super_block *sb,
1231 user to specify an existing inode to be the 1273 user to specify an existing inode to be the
1232 journal file. */ 1274 journal file. */
1233 if (is_remount) { 1275 if (is_remount) {
1234 printk(KERN_ERR "EXT4-fs: cannot specify " 1276 ext4_msg(sb, KERN_ERR,
1235 "journal on remount\n"); 1277 "Cannot specify journal on remount");
1236 return 0; 1278 return 0;
1237 } 1279 }
1238 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1280 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1239 break; 1281 break;
1240 case Opt_journal_dev: 1282 case Opt_journal_dev:
1241 if (is_remount) { 1283 if (is_remount) {
1242 printk(KERN_ERR "EXT4-fs: cannot specify " 1284 ext4_msg(sb, KERN_ERR,
1243 "journal on remount\n"); 1285 "Cannot specify journal on remount");
1244 return 0; 1286 return 0;
1245 } 1287 }
1246 if (match_int(&args[0], &option)) 1288 if (match_int(&args[0], &option))
@@ -1294,9 +1336,8 @@ static int parse_options(char *options, struct super_block *sb,
1294 if (is_remount) { 1336 if (is_remount) {
1295 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1337 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1296 != data_opt) { 1338 != data_opt) {
1297 printk(KERN_ERR 1339 ext4_msg(sb, KERN_ERR,
1298 "EXT4-fs: cannot change data " 1340 "Cannot change data mode on remount");
1299 "mode on remount\n");
1300 return 0; 1341 return 0;
1301 } 1342 }
1302 } else { 1343 } else {
@@ -1310,6 +1351,13 @@ static int parse_options(char *options, struct super_block *sb,
1310 case Opt_data_err_ignore: 1351 case Opt_data_err_ignore:
1311 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1352 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1312 break; 1353 break;
1354 case Opt_mb_history_length:
1355 if (match_int(&args[0], &option))
1356 return 0;
1357 if (option < 0)
1358 return 0;
1359 sbi->s_mb_history_max = option;
1360 break;
1313#ifdef CONFIG_QUOTA 1361#ifdef CONFIG_QUOTA
1314 case Opt_usrjquota: 1362 case Opt_usrjquota:
1315 qtype = USRQUOTA; 1363 qtype = USRQUOTA;
@@ -1319,31 +1367,31 @@ static int parse_options(char *options, struct super_block *sb,
1319set_qf_name: 1367set_qf_name:
1320 if (sb_any_quota_loaded(sb) && 1368 if (sb_any_quota_loaded(sb) &&
1321 !sbi->s_qf_names[qtype]) { 1369 !sbi->s_qf_names[qtype]) {
1322 printk(KERN_ERR 1370 ext4_msg(sb, KERN_ERR,
1323 "EXT4-fs: Cannot change journaled " 1371 "Cannot change journaled "
1324 "quota options when quota turned on.\n"); 1372 "quota options when quota turned on");
1325 return 0; 1373 return 0;
1326 } 1374 }
1327 qname = match_strdup(&args[0]); 1375 qname = match_strdup(&args[0]);
1328 if (!qname) { 1376 if (!qname) {
1329 printk(KERN_ERR 1377 ext4_msg(sb, KERN_ERR,
1330 "EXT4-fs: not enough memory for " 1378 "Not enough memory for "
1331 "storing quotafile name.\n"); 1379 "storing quotafile name");
1332 return 0; 1380 return 0;
1333 } 1381 }
1334 if (sbi->s_qf_names[qtype] && 1382 if (sbi->s_qf_names[qtype] &&
1335 strcmp(sbi->s_qf_names[qtype], qname)) { 1383 strcmp(sbi->s_qf_names[qtype], qname)) {
1336 printk(KERN_ERR 1384 ext4_msg(sb, KERN_ERR,
1337 "EXT4-fs: %s quota file already " 1385 "%s quota file already "
1338 "specified.\n", QTYPE2NAME(qtype)); 1386 "specified", QTYPE2NAME(qtype));
1339 kfree(qname); 1387 kfree(qname);
1340 return 0; 1388 return 0;
1341 } 1389 }
1342 sbi->s_qf_names[qtype] = qname; 1390 sbi->s_qf_names[qtype] = qname;
1343 if (strchr(sbi->s_qf_names[qtype], '/')) { 1391 if (strchr(sbi->s_qf_names[qtype], '/')) {
1344 printk(KERN_ERR 1392 ext4_msg(sb, KERN_ERR,
1345 "EXT4-fs: quotafile must be on " 1393 "quotafile must be on "
1346 "filesystem root.\n"); 1394 "filesystem root");
1347 kfree(sbi->s_qf_names[qtype]); 1395 kfree(sbi->s_qf_names[qtype]);
1348 sbi->s_qf_names[qtype] = NULL; 1396 sbi->s_qf_names[qtype] = NULL;
1349 return 0; 1397 return 0;
@@ -1358,9 +1406,9 @@ set_qf_name:
1358clear_qf_name: 1406clear_qf_name:
1359 if (sb_any_quota_loaded(sb) && 1407 if (sb_any_quota_loaded(sb) &&
1360 sbi->s_qf_names[qtype]) { 1408 sbi->s_qf_names[qtype]) {
1361 printk(KERN_ERR "EXT4-fs: Cannot change " 1409 ext4_msg(sb, KERN_ERR, "Cannot change "
1362 "journaled quota options when " 1410 "journaled quota options when "
1363 "quota turned on.\n"); 1411 "quota turned on");
1364 return 0; 1412 return 0;
1365 } 1413 }
1366 /* 1414 /*
@@ -1377,9 +1425,9 @@ clear_qf_name:
1377set_qf_format: 1425set_qf_format:
1378 if (sb_any_quota_loaded(sb) && 1426 if (sb_any_quota_loaded(sb) &&
1379 sbi->s_jquota_fmt != qfmt) { 1427 sbi->s_jquota_fmt != qfmt) {
1380 printk(KERN_ERR "EXT4-fs: Cannot change " 1428 ext4_msg(sb, KERN_ERR, "Cannot change "
1381 "journaled quota options when " 1429 "journaled quota options when "
1382 "quota turned on.\n"); 1430 "quota turned on");
1383 return 0; 1431 return 0;
1384 } 1432 }
1385 sbi->s_jquota_fmt = qfmt; 1433 sbi->s_jquota_fmt = qfmt;
@@ -1395,8 +1443,8 @@ set_qf_format:
1395 break; 1443 break;
1396 case Opt_noquota: 1444 case Opt_noquota:
1397 if (sb_any_quota_loaded(sb)) { 1445 if (sb_any_quota_loaded(sb)) {
1398 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1446 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1399 "options when quota turned on.\n"); 1447 "options when quota turned on");
1400 return 0; 1448 return 0;
1401 } 1449 }
1402 clear_opt(sbi->s_mount_opt, QUOTA); 1450 clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1407,8 +1455,8 @@ set_qf_format:
1407 case Opt_quota: 1455 case Opt_quota:
1408 case Opt_usrquota: 1456 case Opt_usrquota:
1409 case Opt_grpquota: 1457 case Opt_grpquota:
1410 printk(KERN_ERR 1458 ext4_msg(sb, KERN_ERR,
1411 "EXT4-fs: quota options not supported.\n"); 1459 "quota options not supported");
1412 break; 1460 break;
1413 case Opt_usrjquota: 1461 case Opt_usrjquota:
1414 case Opt_grpjquota: 1462 case Opt_grpjquota:
@@ -1416,9 +1464,8 @@ set_qf_format:
1416 case Opt_offgrpjquota: 1464 case Opt_offgrpjquota:
1417 case Opt_jqfmt_vfsold: 1465 case Opt_jqfmt_vfsold:
1418 case Opt_jqfmt_vfsv0: 1466 case Opt_jqfmt_vfsv0:
1419 printk(KERN_ERR 1467 ext4_msg(sb, KERN_ERR,
1420 "EXT4-fs: journaled quota options not " 1468 "journaled quota options not supported");
1421 "supported.\n");
1422 break; 1469 break;
1423 case Opt_noquota: 1470 case Opt_noquota:
1424 break; 1471 break;
@@ -1443,8 +1490,9 @@ set_qf_format:
1443 break; 1490 break;
1444 case Opt_resize: 1491 case Opt_resize:
1445 if (!is_remount) { 1492 if (!is_remount) {
1446 printk("EXT4-fs: resize option only available " 1493 ext4_msg(sb, KERN_ERR,
1447 "for remount\n"); 1494 "resize option only available "
1495 "for remount");
1448 return 0; 1496 return 0;
1449 } 1497 }
1450 if (match_int(&args[0], &option) != 0) 1498 if (match_int(&args[0], &option) != 0)
@@ -1474,14 +1522,21 @@ set_qf_format:
1474 case Opt_delalloc: 1522 case Opt_delalloc:
1475 set_opt(sbi->s_mount_opt, DELALLOC); 1523 set_opt(sbi->s_mount_opt, DELALLOC);
1476 break; 1524 break;
1525 case Opt_block_validity:
1526 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1527 break;
1528 case Opt_noblock_validity:
1529 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1530 break;
1477 case Opt_inode_readahead_blks: 1531 case Opt_inode_readahead_blks:
1478 if (match_int(&args[0], &option)) 1532 if (match_int(&args[0], &option))
1479 return 0; 1533 return 0;
1480 if (option < 0 || option > (1 << 30)) 1534 if (option < 0 || option > (1 << 30))
1481 return 0; 1535 return 0;
1482 if (option & (option - 1)) { 1536 if (!is_power_of_2(option)) {
1483 printk(KERN_ERR "EXT4-fs: inode_readahead_blks" 1537 ext4_msg(sb, KERN_ERR,
1484 " must be a power of 2\n"); 1538 "EXT4-fs: inode_readahead_blks"
1539 " must be a power of 2");
1485 return 0; 1540 return 0;
1486 } 1541 }
1487 sbi->s_inode_readahead_blks = option; 1542 sbi->s_inode_readahead_blks = option;
@@ -1508,9 +1563,9 @@ set_qf_format:
1508 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1563 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1509 break; 1564 break;
1510 default: 1565 default:
1511 printk(KERN_ERR 1566 ext4_msg(sb, KERN_ERR,
1512 "EXT4-fs: Unrecognized mount option \"%s\" " 1567 "Unrecognized mount option \"%s\" "
1513 "or missing value\n", p); 1568 "or missing value", p);
1514 return 0; 1569 return 0;
1515 } 1570 }
1516 } 1571 }
@@ -1528,21 +1583,21 @@ set_qf_format:
1528 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1583 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1529 (sbi->s_qf_names[GRPQUOTA] && 1584 (sbi->s_qf_names[GRPQUOTA] &&
1530 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1585 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1531 printk(KERN_ERR "EXT4-fs: old and new quota " 1586 ext4_msg(sb, KERN_ERR, "old and new quota "
1532 "format mixing.\n"); 1587 "format mixing");
1533 return 0; 1588 return 0;
1534 } 1589 }
1535 1590
1536 if (!sbi->s_jquota_fmt) { 1591 if (!sbi->s_jquota_fmt) {
1537 printk(KERN_ERR "EXT4-fs: journaled quota format " 1592 ext4_msg(sb, KERN_ERR, "journaled quota format "
1538 "not specified.\n"); 1593 "not specified");
1539 return 0; 1594 return 0;
1540 } 1595 }
1541 } else { 1596 } else {
1542 if (sbi->s_jquota_fmt) { 1597 if (sbi->s_jquota_fmt) {
1543 printk(KERN_ERR "EXT4-fs: journaled quota format " 1598 ext4_msg(sb, KERN_ERR, "journaled quota format "
1544 "specified with no journaling " 1599 "specified with no journaling "
1545 "enabled.\n"); 1600 "enabled");
1546 return 0; 1601 return 0;
1547 } 1602 }
1548 } 1603 }
@@ -1557,32 +1612,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1557 int res = 0; 1612 int res = 0;
1558 1613
1559 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1614 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1560 printk(KERN_ERR "EXT4-fs warning: revision level too high, " 1615 ext4_msg(sb, KERN_ERR, "revision level too high, "
1561 "forcing read-only mode\n"); 1616 "forcing read-only mode");
1562 res = MS_RDONLY; 1617 res = MS_RDONLY;
1563 } 1618 }
1564 if (read_only) 1619 if (read_only)
1565 return res; 1620 return res;
1566 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1621 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1567 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1622 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1568 "running e2fsck is recommended\n"); 1623 "running e2fsck is recommended");
1569 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1624 else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1570 printk(KERN_WARNING 1625 ext4_msg(sb, KERN_WARNING,
1571 "EXT4-fs warning: mounting fs with errors, " 1626 "warning: mounting fs with errors, "
1572 "running e2fsck is recommended\n"); 1627 "running e2fsck is recommended");
1573 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1628 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1574 le16_to_cpu(es->s_mnt_count) >= 1629 le16_to_cpu(es->s_mnt_count) >=
1575 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1630 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1576 printk(KERN_WARNING 1631 ext4_msg(sb, KERN_WARNING,
1577 "EXT4-fs warning: maximal mount count reached, " 1632 "warning: maximal mount count reached, "
1578 "running e2fsck is recommended\n"); 1633 "running e2fsck is recommended");
1579 else if (le32_to_cpu(es->s_checkinterval) && 1634 else if (le32_to_cpu(es->s_checkinterval) &&
1580 (le32_to_cpu(es->s_lastcheck) + 1635 (le32_to_cpu(es->s_lastcheck) +
1581 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1636 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1582 printk(KERN_WARNING 1637 ext4_msg(sb, KERN_WARNING,
1583 "EXT4-fs warning: checktime reached, " 1638 "warning: checktime reached, "
1584 "running e2fsck is recommended\n"); 1639 "running e2fsck is recommended");
1585 if (!sbi->s_journal) 1640 if (!sbi->s_journal)
1586 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1641 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1587 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1642 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1588 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1643 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
@@ -1592,7 +1647,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1592 if (sbi->s_journal) 1647 if (sbi->s_journal)
1593 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1648 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1594 1649
1595 ext4_commit_super(sb, es, 1); 1650 ext4_commit_super(sb, 1);
1596 if (test_opt(sb, DEBUG)) 1651 if (test_opt(sb, DEBUG))
1597 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1652 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1598 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1653 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
@@ -1603,11 +1658,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1603 sbi->s_mount_opt); 1658 sbi->s_mount_opt);
1604 1659
1605 if (EXT4_SB(sb)->s_journal) { 1660 if (EXT4_SB(sb)->s_journal) {
1606 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", 1661 ext4_msg(sb, KERN_INFO, "%s journal on %s",
1607 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : 1662 EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1608 "external", EXT4_SB(sb)->s_journal->j_devname); 1663 "external", EXT4_SB(sb)->s_journal->j_devname);
1609 } else { 1664 } else {
1610 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); 1665 ext4_msg(sb, KERN_INFO, "no journal");
1611 } 1666 }
1612 return res; 1667 return res;
1613} 1668}
@@ -1616,10 +1671,10 @@ static int ext4_fill_flex_info(struct super_block *sb)
1616{ 1671{
1617 struct ext4_sb_info *sbi = EXT4_SB(sb); 1672 struct ext4_sb_info *sbi = EXT4_SB(sb);
1618 struct ext4_group_desc *gdp = NULL; 1673 struct ext4_group_desc *gdp = NULL;
1619 struct buffer_head *bh;
1620 ext4_group_t flex_group_count; 1674 ext4_group_t flex_group_count;
1621 ext4_group_t flex_group; 1675 ext4_group_t flex_group;
1622 int groups_per_flex = 0; 1676 int groups_per_flex = 0;
1677 size_t size;
1623 int i; 1678 int i;
1624 1679
1625 if (!sbi->s_es->s_log_groups_per_flex) { 1680 if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1634,16 +1689,21 @@ static int ext4_fill_flex_info(struct super_block *sb)
1634 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1689 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1635 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1690 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1636 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1691 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1637 sbi->s_flex_groups = kzalloc(flex_group_count * 1692 size = flex_group_count * sizeof(struct flex_groups);
1638 sizeof(struct flex_groups), GFP_KERNEL); 1693 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1694 if (sbi->s_flex_groups == NULL) {
1695 sbi->s_flex_groups = vmalloc(size);
1696 if (sbi->s_flex_groups)
1697 memset(sbi->s_flex_groups, 0, size);
1698 }
1639 if (sbi->s_flex_groups == NULL) { 1699 if (sbi->s_flex_groups == NULL) {
1640 printk(KERN_ERR "EXT4-fs: not enough memory for " 1700 ext4_msg(sb, KERN_ERR, "not enough memory for "
1641 "%u flex groups\n", flex_group_count); 1701 "%u flex groups", flex_group_count);
1642 goto failed; 1702 goto failed;
1643 } 1703 }
1644 1704
1645 for (i = 0; i < sbi->s_groups_count; i++) { 1705 for (i = 0; i < sbi->s_groups_count; i++) {
1646 gdp = ext4_get_group_desc(sb, i, &bh); 1706 gdp = ext4_get_group_desc(sb, i, NULL);
1647 1707
1648 flex_group = ext4_flex_group(sbi, i); 1708 flex_group = ext4_flex_group(sbi, i);
1649 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, 1709 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
@@ -1724,44 +1784,44 @@ static int ext4_check_descriptors(struct super_block *sb)
1724 1784
1725 block_bitmap = ext4_block_bitmap(sb, gdp); 1785 block_bitmap = ext4_block_bitmap(sb, gdp);
1726 if (block_bitmap < first_block || block_bitmap > last_block) { 1786 if (block_bitmap < first_block || block_bitmap > last_block) {
1727 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1787 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1728 "Block bitmap for group %u not in group " 1788 "Block bitmap for group %u not in group "
1729 "(block %llu)!\n", i, block_bitmap); 1789 "(block %llu)!", i, block_bitmap);
1730 return 0; 1790 return 0;
1731 } 1791 }
1732 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1792 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1733 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1793 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1734 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1794 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1735 "Inode bitmap for group %u not in group " 1795 "Inode bitmap for group %u not in group "
1736 "(block %llu)!\n", i, inode_bitmap); 1796 "(block %llu)!", i, inode_bitmap);
1737 return 0; 1797 return 0;
1738 } 1798 }
1739 inode_table = ext4_inode_table(sb, gdp); 1799 inode_table = ext4_inode_table(sb, gdp);
1740 if (inode_table < first_block || 1800 if (inode_table < first_block ||
1741 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1801 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1742 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1802 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1743 "Inode table for group %u not in group " 1803 "Inode table for group %u not in group "
1744 "(block %llu)!\n", i, inode_table); 1804 "(block %llu)!", i, inode_table);
1745 return 0; 1805 return 0;
1746 } 1806 }
1747 spin_lock(sb_bgl_lock(sbi, i)); 1807 ext4_lock_group(sb, i);
1748 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1808 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1749 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1809 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1750 "Checksum for group %u failed (%u!=%u)\n", 1810 "Checksum for group %u failed (%u!=%u)",
1751 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1811 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1752 gdp)), le16_to_cpu(gdp->bg_checksum)); 1812 gdp)), le16_to_cpu(gdp->bg_checksum));
1753 if (!(sb->s_flags & MS_RDONLY)) { 1813 if (!(sb->s_flags & MS_RDONLY)) {
1754 spin_unlock(sb_bgl_lock(sbi, i)); 1814 ext4_unlock_group(sb, i);
1755 return 0; 1815 return 0;
1756 } 1816 }
1757 } 1817 }
1758 spin_unlock(sb_bgl_lock(sbi, i)); 1818 ext4_unlock_group(sb, i);
1759 if (!flexbg_flag) 1819 if (!flexbg_flag)
1760 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1820 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1761 } 1821 }
1762 1822
1763 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1823 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1764 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 1824 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
1765 return 1; 1825 return 1;
1766} 1826}
1767 1827
@@ -1796,8 +1856,8 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1796 } 1856 }
1797 1857
1798 if (bdev_read_only(sb->s_bdev)) { 1858 if (bdev_read_only(sb->s_bdev)) {
1799 printk(KERN_ERR "EXT4-fs: write access " 1859 ext4_msg(sb, KERN_ERR, "write access "
1800 "unavailable, skipping orphan cleanup.\n"); 1860 "unavailable, skipping orphan cleanup");
1801 return; 1861 return;
1802 } 1862 }
1803 1863
@@ -1811,8 +1871,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1811 } 1871 }
1812 1872
1813 if (s_flags & MS_RDONLY) { 1873 if (s_flags & MS_RDONLY) {
1814 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", 1874 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1815 sb->s_id);
1816 sb->s_flags &= ~MS_RDONLY; 1875 sb->s_flags &= ~MS_RDONLY;
1817 } 1876 }
1818#ifdef CONFIG_QUOTA 1877#ifdef CONFIG_QUOTA
@@ -1823,9 +1882,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1823 if (EXT4_SB(sb)->s_qf_names[i]) { 1882 if (EXT4_SB(sb)->s_qf_names[i]) {
1824 int ret = ext4_quota_on_mount(sb, i); 1883 int ret = ext4_quota_on_mount(sb, i);
1825 if (ret < 0) 1884 if (ret < 0)
1826 printk(KERN_ERR 1885 ext4_msg(sb, KERN_ERR,
1827 "EXT4-fs: Cannot turn on journaled " 1886 "Cannot turn on journaled "
1828 "quota: error %d\n", ret); 1887 "quota: error %d", ret);
1829 } 1888 }
1830 } 1889 }
1831#endif 1890#endif
@@ -1842,16 +1901,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1842 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1901 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1843 vfs_dq_init(inode); 1902 vfs_dq_init(inode);
1844 if (inode->i_nlink) { 1903 if (inode->i_nlink) {
1845 printk(KERN_DEBUG 1904 ext4_msg(sb, KERN_DEBUG,
1846 "%s: truncating inode %lu to %lld bytes\n", 1905 "%s: truncating inode %lu to %lld bytes",
1847 __func__, inode->i_ino, inode->i_size); 1906 __func__, inode->i_ino, inode->i_size);
1848 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 1907 jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1849 inode->i_ino, inode->i_size); 1908 inode->i_ino, inode->i_size);
1850 ext4_truncate(inode); 1909 ext4_truncate(inode);
1851 nr_truncates++; 1910 nr_truncates++;
1852 } else { 1911 } else {
1853 printk(KERN_DEBUG 1912 ext4_msg(sb, KERN_DEBUG,
1854 "%s: deleting unreferenced inode %lu\n", 1913 "%s: deleting unreferenced inode %lu",
1855 __func__, inode->i_ino); 1914 __func__, inode->i_ino);
1856 jbd_debug(2, "deleting unreferenced inode %lu\n", 1915 jbd_debug(2, "deleting unreferenced inode %lu\n",
1857 inode->i_ino); 1916 inode->i_ino);
@@ -1863,11 +1922,11 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1863#define PLURAL(x) (x), ((x) == 1) ? "" : "s" 1922#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1864 1923
1865 if (nr_orphans) 1924 if (nr_orphans)
1866 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1925 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1867 sb->s_id, PLURAL(nr_orphans)); 1926 PLURAL(nr_orphans));
1868 if (nr_truncates) 1927 if (nr_truncates)
1869 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", 1928 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1870 sb->s_id, PLURAL(nr_truncates)); 1929 PLURAL(nr_truncates));
1871#ifdef CONFIG_QUOTA 1930#ifdef CONFIG_QUOTA
1872 /* Turn quotas off */ 1931 /* Turn quotas off */
1873 for (i = 0; i < MAXQUOTAS; i++) { 1932 for (i = 0; i < MAXQUOTAS; i++) {
@@ -1877,6 +1936,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1877#endif 1936#endif
1878 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1937 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1879} 1938}
1939
1880/* 1940/*
1881 * Maximal extent format file size. 1941 * Maximal extent format file size.
1882 * Resulting logical blkno at s_maxbytes must fit in our on-disk 1942 * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -1927,19 +1987,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1927 loff_t res = EXT4_NDIR_BLOCKS; 1987 loff_t res = EXT4_NDIR_BLOCKS;
1928 int meta_blocks; 1988 int meta_blocks;
1929 loff_t upper_limit; 1989 loff_t upper_limit;
1930 /* This is calculated to be the largest file size for a 1990 /* This is calculated to be the largest file size for a dense, block
1931 * dense, bitmapped file such that the total number of 1991 * mapped file such that the file's total number of 512-byte sectors,
1932 * sectors in the file, including data and all indirect blocks, 1992 * including data and all indirect blocks, does not exceed (2^48 - 1).
1933 * does not exceed 2^48 -1 1993 *
1934 * __u32 i_blocks_lo and _u16 i_blocks_high representing the 1994 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
1935 * total number of 512 bytes blocks of the file 1995 * number of 512-byte sectors of the file.
1936 */ 1996 */
1937 1997
1938 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1998 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1939 /* 1999 /*
1940 * !has_huge_files or CONFIG_LBD is not enabled 2000 * !has_huge_files or CONFIG_LBD not enabled implies that
1941 * implies the inode i_block represent total blocks in 2001 * the inode i_block field represents total file blocks in
1942 * 512 bytes 32 == size of vfs inode i_blocks * 8 2002 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
1943 */ 2003 */
1944 upper_limit = (1LL << 32) - 1; 2004 upper_limit = (1LL << 32) - 1;
1945 2005
@@ -1981,7 +2041,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1981} 2041}
1982 2042
1983static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2043static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1984 ext4_fsblk_t logical_sb_block, int nr) 2044 ext4_fsblk_t logical_sb_block, int nr)
1985{ 2045{
1986 struct ext4_sb_info *sbi = EXT4_SB(sb); 2046 struct ext4_sb_info *sbi = EXT4_SB(sb);
1987 ext4_group_t bg, first_meta_bg; 2047 ext4_group_t bg, first_meta_bg;
@@ -1995,6 +2055,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1995 bg = sbi->s_desc_per_block * nr; 2055 bg = sbi->s_desc_per_block * nr;
1996 if (ext4_bg_has_super(sb, bg)) 2056 if (ext4_bg_has_super(sb, bg))
1997 has_super = 1; 2057 has_super = 1;
2058
1998 return (has_super + ext4_group_first_block_no(sb, bg)); 2059 return (has_super + ext4_group_first_block_no(sb, bg));
1999} 2060}
2000 2061
@@ -2091,8 +2152,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2091 if (parse_strtoul(buf, 0x40000000, &t)) 2152 if (parse_strtoul(buf, 0x40000000, &t))
2092 return -EINVAL; 2153 return -EINVAL;
2093 2154
2094 /* inode_readahead_blks must be a power of 2 */ 2155 if (!is_power_of_2(t))
2095 if (t & (t-1))
2096 return -EINVAL; 2156 return -EINVAL;
2097 2157
2098 sbi->s_inode_readahead_blks = t; 2158 sbi->s_inode_readahead_blks = t;
@@ -2100,7 +2160,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2100} 2160}
2101 2161
2102static ssize_t sbi_ui_show(struct ext4_attr *a, 2162static ssize_t sbi_ui_show(struct ext4_attr *a,
2103 struct ext4_sb_info *sbi, char *buf) 2163 struct ext4_sb_info *sbi, char *buf)
2104{ 2164{
2105 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2165 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2106 2166
@@ -2205,7 +2265,6 @@ static struct kobj_type ext4_ktype = {
2205static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2265static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2206 __releases(kernel_lock) 2266 __releases(kernel_lock)
2207 __acquires(kernel_lock) 2267 __acquires(kernel_lock)
2208
2209{ 2268{
2210 struct buffer_head *bh; 2269 struct buffer_head *bh;
2211 struct ext4_super_block *es = NULL; 2270 struct ext4_super_block *es = NULL;
@@ -2256,7 +2315,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2256 2315
2257 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2316 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2258 if (!blocksize) { 2317 if (!blocksize) {
2259 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 2318 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
2260 goto out_fail; 2319 goto out_fail;
2261 } 2320 }
2262 2321
@@ -2272,7 +2331,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2272 } 2331 }
2273 2332
2274 if (!(bh = sb_bread(sb, logical_sb_block))) { 2333 if (!(bh = sb_bread(sb, logical_sb_block))) {
2275 printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); 2334 ext4_msg(sb, KERN_ERR, "unable to read superblock");
2276 goto out_fail; 2335 goto out_fail;
2277 } 2336 }
2278 /* 2337 /*
@@ -2321,6 +2380,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2321 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2380 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2322 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2381 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2323 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2382 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2383 sbi->s_mb_history_max = default_mb_history_length;
2324 2384
2325 set_opt(sbi->s_mount_opt, BARRIER); 2385 set_opt(sbi->s_mount_opt, BARRIER);
2326 2386
@@ -2330,7 +2390,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2330 */ 2390 */
2331 set_opt(sbi->s_mount_opt, DELALLOC); 2391 set_opt(sbi->s_mount_opt, DELALLOC);
2332 2392
2333
2334 if (!parse_options((char *) data, sb, &journal_devnum, 2393 if (!parse_options((char *) data, sb, &journal_devnum,
2335 &journal_ioprio, NULL, 0)) 2394 &journal_ioprio, NULL, 0))
2336 goto failed_mount; 2395 goto failed_mount;
@@ -2342,9 +2401,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2342 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2401 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2343 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2402 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2344 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2403 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2345 printk(KERN_WARNING 2404 ext4_msg(sb, KERN_WARNING,
2346 "EXT4-fs warning: feature flags set on rev 0 fs, " 2405 "feature flags set on rev 0 fs, "
2347 "running e2fsck is recommended\n"); 2406 "running e2fsck is recommended");
2348 2407
2349 /* 2408 /*
2350 * Check feature flags regardless of the revision level, since we 2409 * Check feature flags regardless of the revision level, since we
@@ -2353,16 +2412,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2353 */ 2412 */
2354 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 2413 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2355 if (features) { 2414 if (features) {
2356 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 2415 ext4_msg(sb, KERN_ERR,
2357 "unsupported optional features (%x).\n", sb->s_id, 2416 "Couldn't mount because of "
2417 "unsupported optional features (%x)",
2358 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2418 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2359 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2419 ~EXT4_FEATURE_INCOMPAT_SUPP));
2360 goto failed_mount; 2420 goto failed_mount;
2361 } 2421 }
2362 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 2422 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2363 if (!(sb->s_flags & MS_RDONLY) && features) { 2423 if (!(sb->s_flags & MS_RDONLY) && features) {
2364 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 2424 ext4_msg(sb, KERN_ERR,
2365 "unsupported optional features (%x).\n", sb->s_id, 2425 "Couldn't mount RDWR because of "
2426 "unsupported optional features (%x)",
2366 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2427 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2367 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2428 ~EXT4_FEATURE_RO_COMPAT_SUPP));
2368 goto failed_mount; 2429 goto failed_mount;
@@ -2376,9 +2437,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2376 */ 2437 */
2377 if (sizeof(root->i_blocks) < sizeof(u64) && 2438 if (sizeof(root->i_blocks) < sizeof(u64) &&
2378 !(sb->s_flags & MS_RDONLY)) { 2439 !(sb->s_flags & MS_RDONLY)) {
2379 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2440 ext4_msg(sb, KERN_ERR, "Filesystem with huge "
2380 "files cannot be mounted read-write " 2441 "files cannot be mounted read-write "
2381 "without CONFIG_LBD.\n", sb->s_id); 2442 "without CONFIG_LBD");
2382 goto failed_mount; 2443 goto failed_mount;
2383 } 2444 }
2384 } 2445 }
@@ -2386,17 +2447,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2386 2447
2387 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2448 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2388 blocksize > EXT4_MAX_BLOCK_SIZE) { 2449 blocksize > EXT4_MAX_BLOCK_SIZE) {
2389 printk(KERN_ERR 2450 ext4_msg(sb, KERN_ERR,
2390 "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", 2451 "Unsupported filesystem blocksize %d", blocksize);
2391 blocksize, sb->s_id);
2392 goto failed_mount; 2452 goto failed_mount;
2393 } 2453 }
2394 2454
2395 if (sb->s_blocksize != blocksize) { 2455 if (sb->s_blocksize != blocksize) {
2396
2397 /* Validate the filesystem blocksize */ 2456 /* Validate the filesystem blocksize */
2398 if (!sb_set_blocksize(sb, blocksize)) { 2457 if (!sb_set_blocksize(sb, blocksize)) {
2399 printk(KERN_ERR "EXT4-fs: bad block size %d.\n", 2458 ext4_msg(sb, KERN_ERR, "bad block size %d",
2400 blocksize); 2459 blocksize);
2401 goto failed_mount; 2460 goto failed_mount;
2402 } 2461 }
@@ -2406,15 +2465,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2406 offset = do_div(logical_sb_block, blocksize); 2465 offset = do_div(logical_sb_block, blocksize);
2407 bh = sb_bread(sb, logical_sb_block); 2466 bh = sb_bread(sb, logical_sb_block);
2408 if (!bh) { 2467 if (!bh) {
2409 printk(KERN_ERR 2468 ext4_msg(sb, KERN_ERR,
2410 "EXT4-fs: Can't read superblock on 2nd try.\n"); 2469 "Can't read superblock on 2nd try");
2411 goto failed_mount; 2470 goto failed_mount;
2412 } 2471 }
2413 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2472 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2414 sbi->s_es = es; 2473 sbi->s_es = es;
2415 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2474 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2416 printk(KERN_ERR 2475 ext4_msg(sb, KERN_ERR,
2417 "EXT4-fs: Magic mismatch, very weird !\n"); 2476 "Magic mismatch, very weird!");
2418 goto failed_mount; 2477 goto failed_mount;
2419 } 2478 }
2420 } 2479 }
@@ -2432,30 +2491,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2432 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2491 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2433 (!is_power_of_2(sbi->s_inode_size)) || 2492 (!is_power_of_2(sbi->s_inode_size)) ||
2434 (sbi->s_inode_size > blocksize)) { 2493 (sbi->s_inode_size > blocksize)) {
2435 printk(KERN_ERR 2494 ext4_msg(sb, KERN_ERR,
2436 "EXT4-fs: unsupported inode size: %d\n", 2495 "unsupported inode size: %d",
2437 sbi->s_inode_size); 2496 sbi->s_inode_size);
2438 goto failed_mount; 2497 goto failed_mount;
2439 } 2498 }
2440 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2499 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2441 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2500 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2442 } 2501 }
2502
2443 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2503 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2444 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2504 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2445 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2505 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2446 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2506 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2447 !is_power_of_2(sbi->s_desc_size)) { 2507 !is_power_of_2(sbi->s_desc_size)) {
2448 printk(KERN_ERR 2508 ext4_msg(sb, KERN_ERR,
2449 "EXT4-fs: unsupported descriptor size %lu\n", 2509 "unsupported descriptor size %lu",
2450 sbi->s_desc_size); 2510 sbi->s_desc_size);
2451 goto failed_mount; 2511 goto failed_mount;
2452 } 2512 }
2453 } else 2513 } else
2454 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2514 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2515
2455 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2516 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2456 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2517 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2457 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2518 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2458 goto cantfind_ext4; 2519 goto cantfind_ext4;
2520
2459 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2521 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2460 if (sbi->s_inodes_per_block == 0) 2522 if (sbi->s_inodes_per_block == 0)
2461 goto cantfind_ext4; 2523 goto cantfind_ext4;
@@ -2466,6 +2528,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2466 sbi->s_mount_state = le16_to_cpu(es->s_state); 2528 sbi->s_mount_state = le16_to_cpu(es->s_state);
2467 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2529 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2468 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2530 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2531
2469 for (i = 0; i < 4; i++) 2532 for (i = 0; i < 4; i++)
2470 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2533 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2471 sbi->s_def_hash_version = es->s_def_hash_version; 2534 sbi->s_def_hash_version = es->s_def_hash_version;
@@ -2483,25 +2546,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2483 } 2546 }
2484 2547
2485 if (sbi->s_blocks_per_group > blocksize * 8) { 2548 if (sbi->s_blocks_per_group > blocksize * 8) {
2486 printk(KERN_ERR 2549 ext4_msg(sb, KERN_ERR,
2487 "EXT4-fs: #blocks per group too big: %lu\n", 2550 "#blocks per group too big: %lu",
2488 sbi->s_blocks_per_group); 2551 sbi->s_blocks_per_group);
2489 goto failed_mount; 2552 goto failed_mount;
2490 } 2553 }
2491 if (sbi->s_inodes_per_group > blocksize * 8) { 2554 if (sbi->s_inodes_per_group > blocksize * 8) {
2492 printk(KERN_ERR 2555 ext4_msg(sb, KERN_ERR,
2493 "EXT4-fs: #inodes per group too big: %lu\n", 2556 "#inodes per group too big: %lu",
2494 sbi->s_inodes_per_group); 2557 sbi->s_inodes_per_group);
2495 goto failed_mount; 2558 goto failed_mount;
2496 } 2559 }
2497 2560
2498 if (ext4_blocks_count(es) > 2561 if (ext4_blocks_count(es) >
2499 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 2562 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2500 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 2563 ext4_msg(sb, KERN_ERR, "filesystem"
2501 " too large to mount safely\n", sb->s_id); 2564 " too large to mount safely");
2502 if (sizeof(sector_t) < 8) 2565 if (sizeof(sector_t) < 8)
2503 printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " 2566 ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
2504 "enabled\n");
2505 goto failed_mount; 2567 goto failed_mount;
2506 } 2568 }
2507 2569
@@ -2511,21 +2573,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2511 /* check blocks count against device size */ 2573 /* check blocks count against device size */
2512 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 2574 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2513 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 2575 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2514 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " 2576 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
2515 "exceeds size of device (%llu blocks)\n", 2577 "exceeds size of device (%llu blocks)",
2516 ext4_blocks_count(es), blocks_count); 2578 ext4_blocks_count(es), blocks_count);
2517 goto failed_mount; 2579 goto failed_mount;
2518 } 2580 }
2519 2581
2520 /* 2582 /*
2521 * It makes no sense for the first data block to be beyond the end 2583 * It makes no sense for the first data block to be beyond the end
2522 * of the filesystem. 2584 * of the filesystem.
2523 */ 2585 */
2524 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2586 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2525 printk(KERN_WARNING "EXT4-fs: bad geometry: first data" 2587 ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
2526 "block %u is beyond end of filesystem (%llu)\n", 2588 "block %u is beyond end of filesystem (%llu)",
2527 le32_to_cpu(es->s_first_data_block), 2589 le32_to_cpu(es->s_first_data_block),
2528 ext4_blocks_count(es)); 2590 ext4_blocks_count(es));
2529 goto failed_mount; 2591 goto failed_mount;
2530 } 2592 }
2531 blocks_count = (ext4_blocks_count(es) - 2593 blocks_count = (ext4_blocks_count(es) -
@@ -2533,9 +2595,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2533 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2595 EXT4_BLOCKS_PER_GROUP(sb) - 1);
2534 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2596 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2535 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2597 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2536 printk(KERN_WARNING "EXT4-fs: groups count too large: %u " 2598 ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
2537 "(block count %llu, first data block %u, " 2599 "(block count %llu, first data block %u, "
2538 "blocks per group %lu)\n", sbi->s_groups_count, 2600 "blocks per group %lu)", sbi->s_groups_count,
2539 ext4_blocks_count(es), 2601 ext4_blocks_count(es),
2540 le32_to_cpu(es->s_first_data_block), 2602 le32_to_cpu(es->s_first_data_block),
2541 EXT4_BLOCKS_PER_GROUP(sb)); 2603 EXT4_BLOCKS_PER_GROUP(sb));
@@ -2547,7 +2609,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2547 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2609 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2548 GFP_KERNEL); 2610 GFP_KERNEL);
2549 if (sbi->s_group_desc == NULL) { 2611 if (sbi->s_group_desc == NULL) {
2550 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 2612 ext4_msg(sb, KERN_ERR, "not enough memory");
2551 goto failed_mount; 2613 goto failed_mount;
2552 } 2614 }
2553 2615
@@ -2562,21 +2624,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2562 block = descriptor_loc(sb, logical_sb_block, i); 2624 block = descriptor_loc(sb, logical_sb_block, i);
2563 sbi->s_group_desc[i] = sb_bread(sb, block); 2625 sbi->s_group_desc[i] = sb_bread(sb, block);
2564 if (!sbi->s_group_desc[i]) { 2626 if (!sbi->s_group_desc[i]) {
2565 printk(KERN_ERR "EXT4-fs: " 2627 ext4_msg(sb, KERN_ERR,
2566 "can't read group descriptor %d\n", i); 2628 "can't read group descriptor %d", i);
2567 db_count = i; 2629 db_count = i;
2568 goto failed_mount2; 2630 goto failed_mount2;
2569 } 2631 }
2570 } 2632 }
2571 if (!ext4_check_descriptors(sb)) { 2633 if (!ext4_check_descriptors(sb)) {
2572 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2634 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
2573 goto failed_mount2; 2635 goto failed_mount2;
2574 } 2636 }
2575 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2637 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2576 if (!ext4_fill_flex_info(sb)) { 2638 if (!ext4_fill_flex_info(sb)) {
2577 printk(KERN_ERR 2639 ext4_msg(sb, KERN_ERR,
2578 "EXT4-fs: unable to initialize " 2640 "unable to initialize "
2579 "flex_bg meta info!\n"); 2641 "flex_bg meta info!");
2580 goto failed_mount2; 2642 goto failed_mount2;
2581 } 2643 }
2582 2644
@@ -2598,7 +2660,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2598 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2660 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2599 } 2661 }
2600 if (err) { 2662 if (err) {
2601 printk(KERN_ERR "EXT4-fs: insufficient memory\n"); 2663 ext4_msg(sb, KERN_ERR, "insufficient memory");
2602 goto failed_mount3; 2664 goto failed_mount3;
2603 } 2665 }
2604 2666
@@ -2607,7 +2669,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2607 /* 2669 /*
2608 * set up enough so that it can read an inode 2670 * set up enough so that it can read an inode
2609 */ 2671 */
2610 sb->s_op = &ext4_sops; 2672 if (!test_opt(sb, NOLOAD) &&
2673 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2674 sb->s_op = &ext4_sops;
2675 else
2676 sb->s_op = &ext4_nojournal_sops;
2611 sb->s_export_op = &ext4_export_ops; 2677 sb->s_export_op = &ext4_export_ops;
2612 sb->s_xattr = ext4_xattr_handlers; 2678 sb->s_xattr = ext4_xattr_handlers;
2613#ifdef CONFIG_QUOTA 2679#ifdef CONFIG_QUOTA
@@ -2615,6 +2681,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2615 sb->dq_op = &ext4_quota_operations; 2681 sb->dq_op = &ext4_quota_operations;
2616#endif 2682#endif
2617 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2683 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2684 mutex_init(&sbi->s_orphan_lock);
2685 mutex_init(&sbi->s_resize_lock);
2618 2686
2619 sb->s_root = NULL; 2687 sb->s_root = NULL;
2620 2688
@@ -2632,13 +2700,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2632 goto failed_mount3; 2700 goto failed_mount3;
2633 if (!(sb->s_flags & MS_RDONLY) && 2701 if (!(sb->s_flags & MS_RDONLY) &&
2634 EXT4_SB(sb)->s_journal->j_failed_commit) { 2702 EXT4_SB(sb)->s_journal->j_failed_commit) {
2635 printk(KERN_CRIT "EXT4-fs error (device %s): " 2703 ext4_msg(sb, KERN_CRIT, "error: "
2636 "ext4_fill_super: Journal transaction " 2704 "ext4_fill_super: Journal transaction "
2637 "%u is corrupt\n", sb->s_id, 2705 "%u is corrupt",
2638 EXT4_SB(sb)->s_journal->j_failed_commit); 2706 EXT4_SB(sb)->s_journal->j_failed_commit);
2639 if (test_opt(sb, ERRORS_RO)) { 2707 if (test_opt(sb, ERRORS_RO)) {
2640 printk(KERN_CRIT 2708 ext4_msg(sb, KERN_CRIT,
2641 "Mounting filesystem read-only\n"); 2709 "Mounting filesystem read-only");
2642 sb->s_flags |= MS_RDONLY; 2710 sb->s_flags |= MS_RDONLY;
2643 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2711 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2644 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2712 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2646,14 +2714,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2646 if (test_opt(sb, ERRORS_PANIC)) { 2714 if (test_opt(sb, ERRORS_PANIC)) {
2647 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2715 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2648 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2716 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2649 ext4_commit_super(sb, es, 1); 2717 ext4_commit_super(sb, 1);
2650 goto failed_mount4; 2718 goto failed_mount4;
2651 } 2719 }
2652 } 2720 }
2653 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2721 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2654 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2722 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2655 printk(KERN_ERR "EXT4-fs: required journal recovery " 2723 ext4_msg(sb, KERN_ERR, "required journal recovery "
2656 "suppressed and not mounted read-only\n"); 2724 "suppressed and not mounted read-only");
2657 goto failed_mount4; 2725 goto failed_mount4;
2658 } else { 2726 } else {
2659 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2727 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
@@ -2666,7 +2734,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2666 if (ext4_blocks_count(es) > 0xffffffffULL && 2734 if (ext4_blocks_count(es) > 0xffffffffULL &&
2667 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2735 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2668 JBD2_FEATURE_INCOMPAT_64BIT)) { 2736 JBD2_FEATURE_INCOMPAT_64BIT)) {
2669 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); 2737 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
2670 goto failed_mount4; 2738 goto failed_mount4;
2671 } 2739 }
2672 2740
@@ -2704,8 +2772,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2704 case EXT4_MOUNT_WRITEBACK_DATA: 2772 case EXT4_MOUNT_WRITEBACK_DATA:
2705 if (!jbd2_journal_check_available_features 2773 if (!jbd2_journal_check_available_features
2706 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2774 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2707 printk(KERN_ERR "EXT4-fs: Journal does not support " 2775 ext4_msg(sb, KERN_ERR, "Journal does not support "
2708 "requested data journaling mode\n"); 2776 "requested data journaling mode");
2709 goto failed_mount4; 2777 goto failed_mount4;
2710 } 2778 }
2711 default: 2779 default:
@@ -2717,8 +2785,8 @@ no_journal:
2717 2785
2718 if (test_opt(sb, NOBH)) { 2786 if (test_opt(sb, NOBH)) {
2719 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2787 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2720 printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " 2788 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
2721 "its supported only with writeback mode\n"); 2789 "its supported only with writeback mode");
2722 clear_opt(sbi->s_mount_opt, NOBH); 2790 clear_opt(sbi->s_mount_opt, NOBH);
2723 } 2791 }
2724 } 2792 }
@@ -2729,18 +2797,18 @@ no_journal:
2729 2797
2730 root = ext4_iget(sb, EXT4_ROOT_INO); 2798 root = ext4_iget(sb, EXT4_ROOT_INO);
2731 if (IS_ERR(root)) { 2799 if (IS_ERR(root)) {
2732 printk(KERN_ERR "EXT4-fs: get root inode failed\n"); 2800 ext4_msg(sb, KERN_ERR, "get root inode failed");
2733 ret = PTR_ERR(root); 2801 ret = PTR_ERR(root);
2734 goto failed_mount4; 2802 goto failed_mount4;
2735 } 2803 }
2736 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2804 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2737 iput(root); 2805 iput(root);
2738 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); 2806 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
2739 goto failed_mount4; 2807 goto failed_mount4;
2740 } 2808 }
2741 sb->s_root = d_alloc_root(root); 2809 sb->s_root = d_alloc_root(root);
2742 if (!sb->s_root) { 2810 if (!sb->s_root) {
2743 printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); 2811 ext4_msg(sb, KERN_ERR, "get root dentry failed");
2744 iput(root); 2812 iput(root);
2745 ret = -ENOMEM; 2813 ret = -ENOMEM;
2746 goto failed_mount4; 2814 goto failed_mount4;
@@ -2769,22 +2837,29 @@ no_journal:
2769 sbi->s_inode_size) { 2837 sbi->s_inode_size) {
2770 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2838 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2771 EXT4_GOOD_OLD_INODE_SIZE; 2839 EXT4_GOOD_OLD_INODE_SIZE;
2772 printk(KERN_INFO "EXT4-fs: required extra inode space not" 2840 ext4_msg(sb, KERN_INFO, "required extra inode space not"
2773 "available.\n"); 2841 "available");
2774 } 2842 }
2775 2843
2776 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2844 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2777 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2845 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
2778 "requested data journaling mode\n"); 2846 "requested data journaling mode");
2779 clear_opt(sbi->s_mount_opt, DELALLOC); 2847 clear_opt(sbi->s_mount_opt, DELALLOC);
2780 } else if (test_opt(sb, DELALLOC)) 2848 } else if (test_opt(sb, DELALLOC))
2781 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); 2849 ext4_msg(sb, KERN_INFO, "delayed allocation enabled");
2850
2851 err = ext4_setup_system_zone(sb);
2852 if (err) {
2853 ext4_msg(sb, KERN_ERR, "failed to initialize system "
2854 "zone (%d)\n", err);
2855 goto failed_mount4;
2856 }
2782 2857
2783 ext4_ext_init(sb); 2858 ext4_ext_init(sb);
2784 err = ext4_mb_init(sb, needs_recovery); 2859 err = ext4_mb_init(sb, needs_recovery);
2785 if (err) { 2860 if (err) {
2786 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", 2861 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
2787 err); 2862 err);
2788 goto failed_mount4; 2863 goto failed_mount4;
2789 } 2864 }
2790 2865
@@ -2798,19 +2873,11 @@ no_journal:
2798 goto failed_mount4; 2873 goto failed_mount4;
2799 }; 2874 };
2800 2875
2801 /*
2802 * akpm: core read_super() calls in here with the superblock locked.
2803 * That deadlocks, because orphan cleanup needs to lock the superblock
2804 * in numerous places. Here we just pop the lock - it's relatively
2805 * harmless, because we are now ready to accept write_super() requests,
2806 * and aviro says that's the only reason for hanging onto the
2807 * superblock lock.
2808 */
2809 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2876 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2810 ext4_orphan_cleanup(sb, es); 2877 ext4_orphan_cleanup(sb, es);
2811 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2878 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2812 if (needs_recovery) { 2879 if (needs_recovery) {
2813 printk(KERN_INFO "EXT4-fs: recovery complete.\n"); 2880 ext4_msg(sb, KERN_INFO, "recovery complete");
2814 ext4_mark_recovery_complete(sb, es); 2881 ext4_mark_recovery_complete(sb, es);
2815 } 2882 }
2816 if (EXT4_SB(sb)->s_journal) { 2883 if (EXT4_SB(sb)->s_journal) {
@@ -2823,25 +2890,30 @@ no_journal:
2823 } else 2890 } else
2824 descr = "out journal"; 2891 descr = "out journal";
2825 2892
2826 printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", 2893 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
2827 sb->s_id, descr);
2828 2894
2829 lock_kernel(); 2895 lock_kernel();
2830 return 0; 2896 return 0;
2831 2897
2832cantfind_ext4: 2898cantfind_ext4:
2833 if (!silent) 2899 if (!silent)
2834 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", 2900 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
2835 sb->s_id);
2836 goto failed_mount; 2901 goto failed_mount;
2837 2902
2838failed_mount4: 2903failed_mount4:
2839 printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); 2904 ext4_msg(sb, KERN_ERR, "mount failed");
2905 ext4_release_system_zone(sb);
2840 if (sbi->s_journal) { 2906 if (sbi->s_journal) {
2841 jbd2_journal_destroy(sbi->s_journal); 2907 jbd2_journal_destroy(sbi->s_journal);
2842 sbi->s_journal = NULL; 2908 sbi->s_journal = NULL;
2843 } 2909 }
2844failed_mount3: 2910failed_mount3:
2911 if (sbi->s_flex_groups) {
2912 if (is_vmalloc_addr(sbi->s_flex_groups))
2913 vfree(sbi->s_flex_groups);
2914 else
2915 kfree(sbi->s_flex_groups);
2916 }
2845 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2917 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2846 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2918 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2847 percpu_counter_destroy(&sbi->s_dirs_counter); 2919 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -2862,6 +2934,7 @@ failed_mount:
2862 brelse(bh); 2934 brelse(bh);
2863out_fail: 2935out_fail:
2864 sb->s_fs_info = NULL; 2936 sb->s_fs_info = NULL;
2937 kfree(sbi->s_blockgroup_lock);
2865 kfree(sbi); 2938 kfree(sbi);
2866 lock_kernel(); 2939 lock_kernel();
2867 return ret; 2940 return ret;
@@ -2906,27 +2979,27 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2906 2979
2907 journal_inode = ext4_iget(sb, journal_inum); 2980 journal_inode = ext4_iget(sb, journal_inum);
2908 if (IS_ERR(journal_inode)) { 2981 if (IS_ERR(journal_inode)) {
2909 printk(KERN_ERR "EXT4-fs: no journal found.\n"); 2982 ext4_msg(sb, KERN_ERR, "no journal found");
2910 return NULL; 2983 return NULL;
2911 } 2984 }
2912 if (!journal_inode->i_nlink) { 2985 if (!journal_inode->i_nlink) {
2913 make_bad_inode(journal_inode); 2986 make_bad_inode(journal_inode);
2914 iput(journal_inode); 2987 iput(journal_inode);
2915 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); 2988 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
2916 return NULL; 2989 return NULL;
2917 } 2990 }
2918 2991
2919 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 2992 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2920 journal_inode, journal_inode->i_size); 2993 journal_inode, journal_inode->i_size);
2921 if (!S_ISREG(journal_inode->i_mode)) { 2994 if (!S_ISREG(journal_inode->i_mode)) {
2922 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 2995 ext4_msg(sb, KERN_ERR, "invalid journal inode");
2923 iput(journal_inode); 2996 iput(journal_inode);
2924 return NULL; 2997 return NULL;
2925 } 2998 }
2926 2999
2927 journal = jbd2_journal_init_inode(journal_inode); 3000 journal = jbd2_journal_init_inode(journal_inode);
2928 if (!journal) { 3001 if (!journal) {
2929 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); 3002 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
2930 iput(journal_inode); 3003 iput(journal_inode);
2931 return NULL; 3004 return NULL;
2932 } 3005 }
@@ -2950,13 +3023,13 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2950 3023
2951 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3024 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2952 3025
2953 bdev = ext4_blkdev_get(j_dev); 3026 bdev = ext4_blkdev_get(j_dev, sb);
2954 if (bdev == NULL) 3027 if (bdev == NULL)
2955 return NULL; 3028 return NULL;
2956 3029
2957 if (bd_claim(bdev, sb)) { 3030 if (bd_claim(bdev, sb)) {
2958 printk(KERN_ERR 3031 ext4_msg(sb, KERN_ERR,
2959 "EXT4-fs: failed to claim external journal device.\n"); 3032 "failed to claim external journal device");
2960 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 3033 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2961 return NULL; 3034 return NULL;
2962 } 3035 }
@@ -2964,8 +3037,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2964 blocksize = sb->s_blocksize; 3037 blocksize = sb->s_blocksize;
2965 hblock = bdev_hardsect_size(bdev); 3038 hblock = bdev_hardsect_size(bdev);
2966 if (blocksize < hblock) { 3039 if (blocksize < hblock) {
2967 printk(KERN_ERR 3040 ext4_msg(sb, KERN_ERR,
2968 "EXT4-fs: blocksize too small for journal device.\n"); 3041 "blocksize too small for journal device");
2969 goto out_bdev; 3042 goto out_bdev;
2970 } 3043 }
2971 3044
@@ -2973,8 +3046,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2973 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3046 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
2974 set_blocksize(bdev, blocksize); 3047 set_blocksize(bdev, blocksize);
2975 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3048 if (!(bh = __bread(bdev, sb_block, blocksize))) {
2976 printk(KERN_ERR "EXT4-fs: couldn't read superblock of " 3049 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
2977 "external journal\n"); 3050 "external journal");
2978 goto out_bdev; 3051 goto out_bdev;
2979 } 3052 }
2980 3053
@@ -2982,14 +3055,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2982 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3055 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
2983 !(le32_to_cpu(es->s_feature_incompat) & 3056 !(le32_to_cpu(es->s_feature_incompat) &
2984 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3057 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2985 printk(KERN_ERR "EXT4-fs: external journal has " 3058 ext4_msg(sb, KERN_ERR, "external journal has "
2986 "bad superblock\n"); 3059 "bad superblock");
2987 brelse(bh); 3060 brelse(bh);
2988 goto out_bdev; 3061 goto out_bdev;
2989 } 3062 }
2990 3063
2991 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3064 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2992 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); 3065 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
2993 brelse(bh); 3066 brelse(bh);
2994 goto out_bdev; 3067 goto out_bdev;
2995 } 3068 }
@@ -3001,25 +3074,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
3001 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3074 journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3002 start, len, blocksize); 3075 start, len, blocksize);
3003 if (!journal) { 3076 if (!journal) {
3004 printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); 3077 ext4_msg(sb, KERN_ERR, "failed to create device journal");
3005 goto out_bdev; 3078 goto out_bdev;
3006 } 3079 }
3007 journal->j_private = sb; 3080 journal->j_private = sb;
3008 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3081 ll_rw_block(READ, 1, &journal->j_sb_buffer);
3009 wait_on_buffer(journal->j_sb_buffer); 3082 wait_on_buffer(journal->j_sb_buffer);
3010 if (!buffer_uptodate(journal->j_sb_buffer)) { 3083 if (!buffer_uptodate(journal->j_sb_buffer)) {
3011 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); 3084 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
3012 goto out_journal; 3085 goto out_journal;
3013 } 3086 }
3014 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3087 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3015 printk(KERN_ERR "EXT4-fs: External journal has more than one " 3088 ext4_msg(sb, KERN_ERR, "External journal has more than one "
3016 "user (unsupported) - %d\n", 3089 "user (unsupported) - %d",
3017 be32_to_cpu(journal->j_superblock->s_nr_users)); 3090 be32_to_cpu(journal->j_superblock->s_nr_users));
3018 goto out_journal; 3091 goto out_journal;
3019 } 3092 }
3020 EXT4_SB(sb)->journal_bdev = bdev; 3093 EXT4_SB(sb)->journal_bdev = bdev;
3021 ext4_init_journal_params(sb, journal); 3094 ext4_init_journal_params(sb, journal);
3022 return journal; 3095 return journal;
3096
3023out_journal: 3097out_journal:
3024 jbd2_journal_destroy(journal); 3098 jbd2_journal_destroy(journal);
3025out_bdev: 3099out_bdev:
@@ -3041,8 +3115,8 @@ static int ext4_load_journal(struct super_block *sb,
3041 3115
3042 if (journal_devnum && 3116 if (journal_devnum &&
3043 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3117 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3044 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 3118 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
3045 "numbers have changed\n"); 3119 "numbers have changed");
3046 journal_dev = new_decode_dev(journal_devnum); 3120 journal_dev = new_decode_dev(journal_devnum);
3047 } else 3121 } else
3048 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3122 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -3054,24 +3128,23 @@ static int ext4_load_journal(struct super_block *sb,
3054 * crash? For recovery, we need to check in advance whether we 3128 * crash? For recovery, we need to check in advance whether we
3055 * can get read-write access to the device. 3129 * can get read-write access to the device.
3056 */ 3130 */
3057
3058 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3131 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3059 if (sb->s_flags & MS_RDONLY) { 3132 if (sb->s_flags & MS_RDONLY) {
3060 printk(KERN_INFO "EXT4-fs: INFO: recovery " 3133 ext4_msg(sb, KERN_INFO, "INFO: recovery "
3061 "required on readonly filesystem.\n"); 3134 "required on readonly filesystem");
3062 if (really_read_only) { 3135 if (really_read_only) {
3063 printk(KERN_ERR "EXT4-fs: write access " 3136 ext4_msg(sb, KERN_ERR, "write access "
3064 "unavailable, cannot proceed.\n"); 3137 "unavailable, cannot proceed");
3065 return -EROFS; 3138 return -EROFS;
3066 } 3139 }
3067 printk(KERN_INFO "EXT4-fs: write access will " 3140 ext4_msg(sb, KERN_INFO, "write access will "
3068 "be enabled during recovery.\n"); 3141 "be enabled during recovery");
3069 } 3142 }
3070 } 3143 }
3071 3144
3072 if (journal_inum && journal_dev) { 3145 if (journal_inum && journal_dev) {
3073 printk(KERN_ERR "EXT4-fs: filesystem has both journal " 3146 ext4_msg(sb, KERN_ERR, "filesystem has both journal "
3074 "and inode journals!\n"); 3147 "and inode journals!");
3075 return -EINVAL; 3148 return -EINVAL;
3076 } 3149 }
3077 3150
@@ -3084,14 +3157,14 @@ static int ext4_load_journal(struct super_block *sb,
3084 } 3157 }
3085 3158
3086 if (journal->j_flags & JBD2_BARRIER) 3159 if (journal->j_flags & JBD2_BARRIER)
3087 printk(KERN_INFO "EXT4-fs: barriers enabled\n"); 3160 ext4_msg(sb, KERN_INFO, "barriers enabled");
3088 else 3161 else
3089 printk(KERN_INFO "EXT4-fs: barriers disabled\n"); 3162 ext4_msg(sb, KERN_INFO, "barriers disabled");
3090 3163
3091 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3164 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3092 err = jbd2_journal_update_format(journal); 3165 err = jbd2_journal_update_format(journal);
3093 if (err) { 3166 if (err) {
3094 printk(KERN_ERR "EXT4-fs: error updating journal.\n"); 3167 ext4_msg(sb, KERN_ERR, "error updating journal");
3095 jbd2_journal_destroy(journal); 3168 jbd2_journal_destroy(journal);
3096 return err; 3169 return err;
3097 } 3170 }
@@ -3103,7 +3176,7 @@ static int ext4_load_journal(struct super_block *sb,
3103 err = jbd2_journal_load(journal); 3176 err = jbd2_journal_load(journal);
3104 3177
3105 if (err) { 3178 if (err) {
3106 printk(KERN_ERR "EXT4-fs: error loading journal.\n"); 3179 ext4_msg(sb, KERN_ERR, "error loading journal");
3107 jbd2_journal_destroy(journal); 3180 jbd2_journal_destroy(journal);
3108 return err; 3181 return err;
3109 } 3182 }
@@ -3114,18 +3187,17 @@ static int ext4_load_journal(struct super_block *sb,
3114 if (journal_devnum && 3187 if (journal_devnum &&
3115 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3188 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3116 es->s_journal_dev = cpu_to_le32(journal_devnum); 3189 es->s_journal_dev = cpu_to_le32(journal_devnum);
3117 sb->s_dirt = 1;
3118 3190
3119 /* Make sure we flush the recovery flag to disk. */ 3191 /* Make sure we flush the recovery flag to disk. */
3120 ext4_commit_super(sb, es, 1); 3192 ext4_commit_super(sb, 1);
3121 } 3193 }
3122 3194
3123 return 0; 3195 return 0;
3124} 3196}
3125 3197
3126static int ext4_commit_super(struct super_block *sb, 3198static int ext4_commit_super(struct super_block *sb, int sync)
3127 struct ext4_super_block *es, int sync)
3128{ 3199{
3200 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3129 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3201 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3130 int error = 0; 3202 int error = 0;
3131 3203
@@ -3140,8 +3212,8 @@ static int ext4_commit_super(struct super_block *sb,
3140 * be remapped. Nothing we can do but to retry the 3212 * be remapped. Nothing we can do but to retry the
3141 * write and hope for the best. 3213 * write and hope for the best.
3142 */ 3214 */
3143 printk(KERN_ERR "EXT4-fs: previous I/O error to " 3215 ext4_msg(sb, KERN_ERR, "previous I/O error to "
3144 "superblock detected for %s.\n", sb->s_id); 3216 "superblock detected");
3145 clear_buffer_write_io_error(sbh); 3217 clear_buffer_write_io_error(sbh);
3146 set_buffer_uptodate(sbh); 3218 set_buffer_uptodate(sbh);
3147 } 3219 }
@@ -3154,7 +3226,7 @@ static int ext4_commit_super(struct super_block *sb,
3154 &EXT4_SB(sb)->s_freeblocks_counter)); 3226 &EXT4_SB(sb)->s_freeblocks_counter));
3155 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3227 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3156 &EXT4_SB(sb)->s_freeinodes_counter)); 3228 &EXT4_SB(sb)->s_freeinodes_counter));
3157 3229 sb->s_dirt = 0;
3158 BUFFER_TRACE(sbh, "marking dirty"); 3230 BUFFER_TRACE(sbh, "marking dirty");
3159 mark_buffer_dirty(sbh); 3231 mark_buffer_dirty(sbh);
3160 if (sync) { 3232 if (sync) {
@@ -3164,8 +3236,8 @@ static int ext4_commit_super(struct super_block *sb,
3164 3236
3165 error = buffer_write_io_error(sbh); 3237 error = buffer_write_io_error(sbh);
3166 if (error) { 3238 if (error) {
3167 printk(KERN_ERR "EXT4-fs: I/O error while writing " 3239 ext4_msg(sb, KERN_ERR, "I/O error while writing "
3168 "superblock for %s.\n", sb->s_id); 3240 "superblock");
3169 clear_buffer_write_io_error(sbh); 3241 clear_buffer_write_io_error(sbh);
3170 set_buffer_uptodate(sbh); 3242 set_buffer_uptodate(sbh);
3171 } 3243 }
@@ -3173,7 +3245,6 @@ static int ext4_commit_super(struct super_block *sb,
3173 return error; 3245 return error;
3174} 3246}
3175 3247
3176
3177/* 3248/*
3178 * Have we just finished recovery? If so, and if we are mounting (or 3249 * Have we just finished recovery? If so, and if we are mounting (or
3179 * remounting) the filesystem readonly, then we will end up with a 3250 * remounting) the filesystem readonly, then we will end up with a
@@ -3192,14 +3263,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
3192 if (jbd2_journal_flush(journal) < 0) 3263 if (jbd2_journal_flush(journal) < 0)
3193 goto out; 3264 goto out;
3194 3265
3195 lock_super(sb);
3196 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 3266 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3197 sb->s_flags & MS_RDONLY) { 3267 sb->s_flags & MS_RDONLY) {
3198 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3268 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3199 sb->s_dirt = 0; 3269 ext4_commit_super(sb, 1);
3200 ext4_commit_super(sb, es, 1);
3201 } 3270 }
3202 unlock_super(sb);
3203 3271
3204out: 3272out:
3205 jbd2_journal_unlock_updates(journal); 3273 jbd2_journal_unlock_updates(journal);
@@ -3238,7 +3306,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
3238 3306
3239 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3307 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3240 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3308 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3241 ext4_commit_super(sb, es, 1); 3309 ext4_commit_super(sb, 1);
3242 3310
3243 jbd2_journal_clear_err(journal); 3311 jbd2_journal_clear_err(journal);
3244 } 3312 }
@@ -3257,29 +3325,15 @@ int ext4_force_commit(struct super_block *sb)
3257 return 0; 3325 return 0;
3258 3326
3259 journal = EXT4_SB(sb)->s_journal; 3327 journal = EXT4_SB(sb)->s_journal;
3260 if (journal) { 3328 if (journal)
3261 sb->s_dirt = 0;
3262 ret = ext4_journal_force_commit(journal); 3329 ret = ext4_journal_force_commit(journal);
3263 }
3264 3330
3265 return ret; 3331 return ret;
3266} 3332}
3267 3333
3268/*
3269 * Ext4 always journals updates to the superblock itself, so we don't
3270 * have to propagate any other updates to the superblock on disk at this
3271 * point. (We can probably nuke this function altogether, and remove
3272 * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
3273 */
3274static void ext4_write_super(struct super_block *sb) 3334static void ext4_write_super(struct super_block *sb)
3275{ 3335{
3276 if (EXT4_SB(sb)->s_journal) { 3336 ext4_commit_super(sb, 1);
3277 if (mutex_trylock(&sb->s_lock) != 0)
3278 BUG();
3279 sb->s_dirt = 0;
3280 } else {
3281 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3282 }
3283} 3337}
3284 3338
3285static int ext4_sync_fs(struct super_block *sb, int wait) 3339static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3288,16 +3342,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
3288 tid_t target; 3342 tid_t target;
3289 3343
3290 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3344 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3291 sb->s_dirt = 0; 3345 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3292 if (EXT4_SB(sb)->s_journal) { 3346 if (wait)
3293 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, 3347 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
3294 &target)) {
3295 if (wait)
3296 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
3297 target);
3298 }
3299 } else {
3300 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3301 } 3348 }
3302 return ret; 3349 return ret;
3303} 3350}
@@ -3310,34 +3357,32 @@ static int ext4_freeze(struct super_block *sb)
3310{ 3357{
3311 int error = 0; 3358 int error = 0;
3312 journal_t *journal; 3359 journal_t *journal;
3313 sb->s_dirt = 0;
3314 3360
3315 if (!(sb->s_flags & MS_RDONLY)) { 3361 if (sb->s_flags & MS_RDONLY)
3316 journal = EXT4_SB(sb)->s_journal; 3362 return 0;
3317 3363
3318 if (journal) { 3364 journal = EXT4_SB(sb)->s_journal;
3319 /* Now we set up the journal barrier. */
3320 jbd2_journal_lock_updates(journal);
3321 3365
3322 /* 3366 /* Now we set up the journal barrier. */
3323 * We don't want to clear needs_recovery flag when we 3367 jbd2_journal_lock_updates(journal);
3324 * failed to flush the journal.
3325 */
3326 error = jbd2_journal_flush(journal);
3327 if (error < 0)
3328 goto out;
3329 }
3330 3368
3331 /* Journal blocked and flushed, clear needs_recovery flag. */ 3369 /*
3332 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3370 * Don't clear the needs_recovery flag if we failed to flush
3333 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3371 * the journal.
3334 if (error) 3372 */
3335 goto out; 3373 error = jbd2_journal_flush(journal);
3374 if (error < 0) {
3375 out:
3376 jbd2_journal_unlock_updates(journal);
3377 return error;
3336 } 3378 }
3379
3380 /* Journal blocked and flushed, clear needs_recovery flag. */
3381 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3382 error = ext4_commit_super(sb, 1);
3383 if (error)
3384 goto out;
3337 return 0; 3385 return 0;
3338out:
3339 jbd2_journal_unlock_updates(journal);
3340 return error;
3341} 3386}
3342 3387
3343/* 3388/*
@@ -3346,14 +3391,15 @@ out:
3346 */ 3391 */
3347static int ext4_unfreeze(struct super_block *sb) 3392static int ext4_unfreeze(struct super_block *sb)
3348{ 3393{
3349 if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { 3394 if (sb->s_flags & MS_RDONLY)
3350 lock_super(sb); 3395 return 0;
3351 /* Reser the needs_recovery flag before the fs is unlocked. */ 3396
3352 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3397 lock_super(sb);
3353 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3398 /* Reset the needs_recovery flag before the fs is unlocked. */
3354 unlock_super(sb); 3399 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3355 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3400 ext4_commit_super(sb, 1);
3356 } 3401 unlock_super(sb);
3402 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3357 return 0; 3403 return 0;
3358} 3404}
3359 3405
@@ -3432,22 +3478,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3432 (sbi->s_mount_state & EXT4_VALID_FS)) 3478 (sbi->s_mount_state & EXT4_VALID_FS))
3433 es->s_state = cpu_to_le16(sbi->s_mount_state); 3479 es->s_state = cpu_to_le16(sbi->s_mount_state);
3434 3480
3435 /* 3481 if (sbi->s_journal)
3436 * We have to unlock super so that we can wait for
3437 * transactions.
3438 */
3439 if (sbi->s_journal) {
3440 unlock_super(sb);
3441 ext4_mark_recovery_complete(sb, es); 3482 ext4_mark_recovery_complete(sb, es);
3442 lock_super(sb);
3443 }
3444 } else { 3483 } else {
3445 int ret; 3484 int ret;
3446 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3485 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3447 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 3486 ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3448 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3487 ext4_msg(sb, KERN_WARNING, "couldn't "
3449 "remount RDWR because of unsupported " 3488 "remount RDWR because of unsupported "
3450 "optional features (%x).\n", sb->s_id, 3489 "optional features (%x)",
3451 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & 3490 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3452 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 3491 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3453 err = -EROFS; 3492 err = -EROFS;
@@ -3456,17 +3495,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3456 3495
3457 /* 3496 /*
3458 * Make sure the group descriptor checksums 3497 * Make sure the group descriptor checksums
3459 * are sane. If they aren't, refuse to 3498 * are sane. If they aren't, refuse to remount r/w.
3460 * remount r/w.
3461 */ 3499 */
3462 for (g = 0; g < sbi->s_groups_count; g++) { 3500 for (g = 0; g < sbi->s_groups_count; g++) {
3463 struct ext4_group_desc *gdp = 3501 struct ext4_group_desc *gdp =
3464 ext4_get_group_desc(sb, g, NULL); 3502 ext4_get_group_desc(sb, g, NULL);
3465 3503
3466 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3504 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3467 printk(KERN_ERR 3505 ext4_msg(sb, KERN_ERR,
3468 "EXT4-fs: ext4_remount: " 3506 "ext4_remount: Checksum for group %u failed (%u!=%u)",
3469 "Checksum for group %u failed (%u!=%u)\n",
3470 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3507 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3471 le16_to_cpu(gdp->bg_checksum)); 3508 le16_to_cpu(gdp->bg_checksum));
3472 err = -EINVAL; 3509 err = -EINVAL;
@@ -3480,11 +3517,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3480 * require a full umount/remount for now. 3517 * require a full umount/remount for now.
3481 */ 3518 */
3482 if (es->s_last_orphan) { 3519 if (es->s_last_orphan) {
3483 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3520 ext4_msg(sb, KERN_WARNING, "Couldn't "
3484 "remount RDWR because of unprocessed " 3521 "remount RDWR because of unprocessed "
3485 "orphan inode list. Please " 3522 "orphan inode list. Please "
3486 "umount/remount instead.\n", 3523 "umount/remount instead");
3487 sb->s_id);
3488 err = -EINVAL; 3524 err = -EINVAL;
3489 goto restore_opts; 3525 goto restore_opts;
3490 } 3526 }
@@ -3504,8 +3540,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3504 sb->s_flags &= ~MS_RDONLY; 3540 sb->s_flags &= ~MS_RDONLY;
3505 } 3541 }
3506 } 3542 }
3543 ext4_setup_system_zone(sb);
3507 if (sbi->s_journal == NULL) 3544 if (sbi->s_journal == NULL)
3508 ext4_commit_super(sb, es, 1); 3545 ext4_commit_super(sb, 1);
3509 3546
3510#ifdef CONFIG_QUOTA 3547#ifdef CONFIG_QUOTA
3511 /* Release old quota file names */ 3548 /* Release old quota file names */
@@ -3515,6 +3552,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3515 kfree(old_opts.s_qf_names[i]); 3552 kfree(old_opts.s_qf_names[i]);
3516#endif 3553#endif
3517 return 0; 3554 return 0;
3555
3518restore_opts: 3556restore_opts:
3519 sb->s_flags = old_sb_flags; 3557 sb->s_flags = old_sb_flags;
3520 sbi->s_mount_opt = old_opts.s_mount_opt; 3558 sbi->s_mount_opt = old_opts.s_mount_opt;
@@ -3545,9 +3583,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3545 if (test_opt(sb, MINIX_DF)) { 3583 if (test_opt(sb, MINIX_DF)) {
3546 sbi->s_overhead_last = 0; 3584 sbi->s_overhead_last = 0;
3547 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3585 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3548 ext4_group_t ngroups = sbi->s_groups_count, i; 3586 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3549 ext4_fsblk_t overhead = 0; 3587 ext4_fsblk_t overhead = 0;
3550 smp_rmb();
3551 3588
3552 /* 3589 /*
3553 * Compute the overhead (FS structures). This is constant 3590 * Compute the overhead (FS structures). This is constant
@@ -3599,11 +3636,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3599 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3636 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3600 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3637 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3601 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3638 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3639
3602 return 0; 3640 return 0;
3603} 3641}
3604 3642
3605/* Helper function for writing quotas on sync - we need to start transaction before quota file 3643/* Helper function for writing quotas on sync - we need to start transaction
3606 * is locked for write. Otherwise the are possible deadlocks: 3644 * before quota file is locked for write. Otherwise the are possible deadlocks:
3607 * Process 1 Process 2 3645 * Process 1 Process 2
3608 * ext4_create() quota_sync() 3646 * ext4_create() quota_sync()
3609 * jbd2_journal_start() write_dquot() 3647 * jbd2_journal_start() write_dquot()
@@ -3627,7 +3665,7 @@ static int ext4_write_dquot(struct dquot *dquot)
3627 3665
3628 inode = dquot_to_inode(dquot); 3666 inode = dquot_to_inode(dquot);
3629 handle = ext4_journal_start(inode, 3667 handle = ext4_journal_start(inode,
3630 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3668 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3631 if (IS_ERR(handle)) 3669 if (IS_ERR(handle))
3632 return PTR_ERR(handle); 3670 return PTR_ERR(handle);
3633 ret = dquot_commit(dquot); 3671 ret = dquot_commit(dquot);
@@ -3643,7 +3681,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)
3643 handle_t *handle; 3681 handle_t *handle;
3644 3682
3645 handle = ext4_journal_start(dquot_to_inode(dquot), 3683 handle = ext4_journal_start(dquot_to_inode(dquot),
3646 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3684 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3647 if (IS_ERR(handle)) 3685 if (IS_ERR(handle))
3648 return PTR_ERR(handle); 3686 return PTR_ERR(handle);
3649 ret = dquot_acquire(dquot); 3687 ret = dquot_acquire(dquot);
@@ -3659,7 +3697,7 @@ static int ext4_release_dquot(struct dquot *dquot)
3659 handle_t *handle; 3697 handle_t *handle;
3660 3698
3661 handle = ext4_journal_start(dquot_to_inode(dquot), 3699 handle = ext4_journal_start(dquot_to_inode(dquot),
3662 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3700 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3663 if (IS_ERR(handle)) { 3701 if (IS_ERR(handle)) {
3664 /* Release dquot anyway to avoid endless cycle in dqput() */ 3702 /* Release dquot anyway to avoid endless cycle in dqput() */
3665 dquot_release(dquot); 3703 dquot_release(dquot);
@@ -3707,7 +3745,7 @@ static int ext4_write_info(struct super_block *sb, int type)
3707static int ext4_quota_on_mount(struct super_block *sb, int type) 3745static int ext4_quota_on_mount(struct super_block *sb, int type)
3708{ 3746{
3709 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3747 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3710 EXT4_SB(sb)->s_jquota_fmt, type); 3748 EXT4_SB(sb)->s_jquota_fmt, type);
3711} 3749}
3712 3750
3713/* 3751/*
@@ -3738,9 +3776,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3738 if (EXT4_SB(sb)->s_qf_names[type]) { 3776 if (EXT4_SB(sb)->s_qf_names[type]) {
3739 /* Quotafile not in fs root? */ 3777 /* Quotafile not in fs root? */
3740 if (path.dentry->d_parent != sb->s_root) 3778 if (path.dentry->d_parent != sb->s_root)
3741 printk(KERN_WARNING 3779 ext4_msg(sb, KERN_WARNING,
3742 "EXT4-fs: Quota file not on filesystem root. " 3780 "Quota file not on filesystem root. "
3743 "Journaled quota will not work.\n"); 3781 "Journaled quota will not work");
3744 } 3782 }
3745 3783
3746 /* 3784 /*
@@ -3823,8 +3861,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3823 handle_t *handle = journal_current_handle(); 3861 handle_t *handle = journal_current_handle();
3824 3862
3825 if (EXT4_SB(sb)->s_journal && !handle) { 3863 if (EXT4_SB(sb)->s_journal && !handle) {
3826 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" 3864 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3827 " cancelled because transaction is not started.\n", 3865 " cancelled because transaction is not started",
3828 (unsigned long long)off, (unsigned long long)len); 3866 (unsigned long long)off, (unsigned long long)len);
3829 return -EIO; 3867 return -EIO;
3830 } 3868 }
@@ -3878,10 +3916,10 @@ out:
3878 3916
3879#endif 3917#endif
3880 3918
3881static int ext4_get_sb(struct file_system_type *fs_type, 3919static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3882 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3920 const char *dev_name, void *data, struct vfsmount *mnt)
3883{ 3921{
3884 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3922 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3885} 3923}
3886 3924
3887static struct file_system_type ext4_fs_type = { 3925static struct file_system_type ext4_fs_type = {
@@ -3893,14 +3931,14 @@ static struct file_system_type ext4_fs_type = {
3893}; 3931};
3894 3932
3895#ifdef CONFIG_EXT4DEV_COMPAT 3933#ifdef CONFIG_EXT4DEV_COMPAT
3896static int ext4dev_get_sb(struct file_system_type *fs_type, 3934static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
3897 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3935 const char *dev_name, void *data,struct vfsmount *mnt)
3898{ 3936{
3899 printk(KERN_WARNING "EXT4-fs: Update your userspace programs " 3937 printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs "
3900 "to mount using ext4\n"); 3938 "to mount using ext4\n", dev_name);
3901 printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " 3939 printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility "
3902 "will go away by 2.6.31\n"); 3940 "will go away by 2.6.31\n", dev_name);
3903 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3941 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3904} 3942}
3905 3943
3906static struct file_system_type ext4dev_fs_type = { 3944static struct file_system_type ext4dev_fs_type = {
@@ -3917,13 +3955,16 @@ static int __init init_ext4_fs(void)
3917{ 3955{
3918 int err; 3956 int err;
3919 3957
3958 err = init_ext4_system_zone();
3959 if (err)
3960 return err;
3920 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 3961 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3921 if (!ext4_kset) 3962 if (!ext4_kset)
3922 return -ENOMEM; 3963 goto out4;
3923 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 3964 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3924 err = init_ext4_mballoc(); 3965 err = init_ext4_mballoc();
3925 if (err) 3966 if (err)
3926 return err; 3967 goto out3;
3927 3968
3928 err = init_ext4_xattr(); 3969 err = init_ext4_xattr();
3929 if (err) 3970 if (err)
@@ -3948,6 +3989,11 @@ out1:
3948 exit_ext4_xattr(); 3989 exit_ext4_xattr();
3949out2: 3990out2:
3950 exit_ext4_mballoc(); 3991 exit_ext4_mballoc();
3992out3:
3993 remove_proc_entry("fs/ext4", NULL);
3994 kset_unregister(ext4_kset);
3995out4:
3996 exit_ext4_system_zone();
3951 return err; 3997 return err;
3952} 3998}
3953 3999
@@ -3962,6 +4008,7 @@ static void __exit exit_ext4_fs(void)
3962 exit_ext4_mballoc(); 4008 exit_ext4_mballoc();
3963 remove_proc_entry("fs/ext4", NULL); 4009 remove_proc_entry("fs/ext4", NULL);
3964 kset_unregister(ext4_kset); 4010 kset_unregister(ext4_kset);
4011 exit_ext4_system_zone();
3965} 4012}
3966 4013
3967MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4014MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c1462d43e721..941c8425c10b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -30,6 +30,7 @@
30#include <linux/dnotify.h> 30#include <linux/dnotify.h>
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/ima.h>
33 34
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35 36
@@ -986,6 +987,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
986 &hugetlbfs_file_operations); 987 &hugetlbfs_file_operations);
987 if (!file) 988 if (!file)
988 goto out_dentry; /* inode is already attached */ 989 goto out_dentry; /* inode is already attached */
990 ima_counts_get(file);
989 991
990 return file; 992 return file;
991 993
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 82d9c42b8bac..286f38dfc6c0 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
414 switch (cmd) { 414 switch (cmd) {
415 case FIBMAP: 415 case FIBMAP:
416 return ioctl_fibmap(filp, p); 416 return ioctl_fibmap(filp, p);
417 case FS_IOC_FIEMAP:
418 return ioctl_fiemap(filp, arg);
419 case FIGETBSZ:
420 return put_user(inode->i_sb->s_blocksize, p);
421 case FIONREAD: 417 case FIONREAD:
422 return put_user(i_size_read(inode) - filp->f_pos, p); 418 return put_user(i_size_read(inode) - filp->f_pos, p);
423 } 419 }
@@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
557 error = ioctl_fsthaw(filp); 553 error = ioctl_fsthaw(filp);
558 break; 554 break;
559 555
556 case FS_IOC_FIEMAP:
557 return ioctl_fiemap(filp, arg);
558
559 case FIGETBSZ:
560 {
561 struct inode *inode = filp->f_path.dentry->d_inode;
562 int __user *p = (int __user *)arg;
563 return put_user(inode->i_sb->s_blocksize, p);
564 }
565
560 default: 566 default:
561 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 567 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
562 error = file_ioctl(filp, cmd, arg); 568 error = file_ioctl(filp, cmd, arg);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 58144102bf25..62be7d294ec2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1781 * Journal abort has very specific semantics, which we describe 1781 * Journal abort has very specific semantics, which we describe
1782 * for journal abort. 1782 * for journal abort.
1783 * 1783 *
1784 * Two internal function, which provide abort to te jbd layer 1784 * Two internal functions, which provide abort to the jbd layer
1785 * itself are here. 1785 * itself are here.
1786 */ 1786 */
1787 1787
@@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
1879 * int jbd2_journal_errno () - returns the journal's error state. 1879 * int jbd2_journal_errno () - returns the journal's error state.
1880 * @journal: journal to examine. 1880 * @journal: journal to examine.
1881 * 1881 *
1882 * This is the errno numbet set with jbd2_journal_abort(), the last 1882 * This is the errno number set with jbd2_journal_abort(), the last
1883 * time the journal was mounted - if the journal was stopped 1883 * time the journal was mounted - if the journal was stopped
1884 * without calling abort this will be 0. 1884 * without calling abort this will be 0.
1885 * 1885 *
@@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal)
1903 * int jbd2_journal_clear_err () - clears the journal's error state 1903 * int jbd2_journal_clear_err () - clears the journal's error state
1904 * @journal: journal to act on. 1904 * @journal: journal to act on.
1905 * 1905 *
1906 * An error must be cleared or Acked to take a FS out of readonly 1906 * An error must be cleared or acked to take a FS out of readonly
1907 * mode. 1907 * mode.
1908 */ 1908 */
1909int jbd2_journal_clear_err(journal_t *journal) 1909int jbd2_journal_clear_err(journal_t *journal)
@@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal)
1923 * void jbd2_journal_ack_err() - Ack journal err. 1923 * void jbd2_journal_ack_err() - Ack journal err.
1924 * @journal: journal to act on. 1924 * @journal: journal to act on.
1925 * 1925 *
1926 * An error must be cleared or Acked to take a FS out of readonly 1926 * An error must be cleared or acked to take a FS out of readonly
1927 * mode. 1927 * mode.
1928 */ 1928 */
1929void jbd2_journal_ack_err(journal_t *journal) 1929void jbd2_journal_ack_err(journal_t *journal)
diff --git a/fs/mpage.c b/fs/mpage.c
index 680ba60863ff..42381bd6543b 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
379 struct buffer_head map_bh; 379 struct buffer_head map_bh;
380 unsigned long first_logical_block = 0; 380 unsigned long first_logical_block = 0;
381 381
382 clear_buffer_mapped(&map_bh); 382 map_bh.b_state = 0;
383 map_bh.b_size = 0;
383 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 384 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
384 struct page *page = list_entry(pages->prev, struct page, lru); 385 struct page *page = list_entry(pages->prev, struct page, lru);
385 386
@@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block)
412 struct buffer_head map_bh; 413 struct buffer_head map_bh;
413 unsigned long first_logical_block = 0; 414 unsigned long first_logical_block = 0;
414 415
415 clear_buffer_mapped(&map_bh); 416 map_bh.b_state = 0;
417 map_bh.b_size = 0;
416 bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, 418 bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
417 &map_bh, &first_logical_block, get_block); 419 &map_bh, &first_logical_block, get_block);
418 if (bio) 420 if (bio)
diff --git a/fs/namei.c b/fs/namei.c
index 967c3db92724..c82805d088e1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -853,7 +853,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
853 err = inode_permission(nd->path.dentry->d_inode, 853 err = inode_permission(nd->path.dentry->d_inode,
854 MAY_EXEC); 854 MAY_EXEC);
855 if (!err) 855 if (!err)
856 err = ima_path_check(&nd->path, MAY_EXEC); 856 err = ima_path_check(&nd->path, MAY_EXEC,
857 IMA_COUNT_UPDATE);
857 if (err) 858 if (err)
858 break; 859 break;
859 860
@@ -1515,7 +1516,8 @@ int may_open(struct path *path, int acc_mode, int flag)
1515 return error; 1516 return error;
1516 1517
1517 error = ima_path_check(path, 1518 error = ima_path_check(path,
1518 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); 1519 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC),
1520 IMA_COUNT_UPDATE);
1519 if (error) 1521 if (error)
1520 return error; 1522 return error;
1521 /* 1523 /*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b660435978d2..bd584bcf1d9f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -55,6 +55,7 @@
55#include <linux/security.h> 55#include <linux/security.h>
56#endif /* CONFIG_NFSD_V4 */ 56#endif /* CONFIG_NFSD_V4 */
57#include <linux/jhash.h> 57#include <linux/jhash.h>
58#include <linux/ima.h>
58 59
59#include <asm/uaccess.h> 60#include <asm/uaccess.h>
60 61
@@ -735,6 +736,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
735 flags, cred); 736 flags, cred);
736 if (IS_ERR(*filp)) 737 if (IS_ERR(*filp))
737 host_err = PTR_ERR(*filp); 738 host_err = PTR_ERR(*filp);
739 else
740 ima_counts_get(*filp);
738out_nfserr: 741out_nfserr:
739 err = nfserrno(host_err); 742 err = nfserrno(host_err);
740out: 743out:
@@ -2024,6 +2027,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2024 struct dentry *dentry, int acc) 2027 struct dentry *dentry, int acc)
2025{ 2028{
2026 struct inode *inode = dentry->d_inode; 2029 struct inode *inode = dentry->d_inode;
2030 struct path path;
2027 int err; 2031 int err;
2028 2032
2029 if (acc == NFSD_MAY_NOP) 2033 if (acc == NFSD_MAY_NOP)
@@ -2096,7 +2100,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2096 if (err == -EACCES && S_ISREG(inode->i_mode) && 2100 if (err == -EACCES && S_ISREG(inode->i_mode) &&
2097 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) 2101 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
2098 err = inode_permission(inode, MAY_EXEC); 2102 err = inode_permission(inode, MAY_EXEC);
2103 if (err)
2104 goto nfsd_out;
2099 2105
2106 /* Do integrity (permission) checking now, but defer incrementing
2107 * IMA counts to the actual file open.
2108 */
2109 path.mnt = exp->ex_path.mnt;
2110 path.dentry = dentry;
2111 err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
2112 IMA_COUNT_LEAVE);
2113nfsd_out:
2100 return err? nfserrno(err) : 0; 2114 return err? nfserrno(err) : 0;
2101} 2115}
2102 2116
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3326bbf9ab95..1539e630c47d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2128,9 +2128,15 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2128 if (copy_from_user(page, buf, count)) 2128 if (copy_from_user(page, buf, count))
2129 goto out_free; 2129 goto out_free;
2130 2130
2131 /* Guard against adverse ptrace interaction */
2132 length = mutex_lock_interruptible(&task->cred_guard_mutex);
2133 if (length < 0)
2134 goto out_free;
2135
2131 length = security_setprocattr(task, 2136 length = security_setprocattr(task,
2132 (char*)file->f_path.dentry->d_name.name, 2137 (char*)file->f_path.dentry->d_name.name,
2133 (void*)page, count); 2138 (void*)page, count);
2139 mutex_unlock(&task->cred_guard_mutex);
2134out_free: 2140out_free:
2135 free_page((unsigned long) page); 2141 free_page((unsigned long) page);
2136out: 2142out:
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 788850ba4e75..1fbdea4f08eb 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL {
142 142
143 143
144#ifndef DP_WINDOW_SIZE 144#ifndef DP_WINDOW_SIZE
145/* #include "cyclomz.h" */
146/****************** ****************** *******************/
147/*
148 * The data types defined below are used in all ZFIRM interface
149 * data structures. They accomodate differences between HW
150 * architectures and compilers.
151 */
152
153typedef __u64 ucdouble; /* 64 bits, unsigned */
154typedef __u32 uclong; /* 32 bits, unsigned */
155typedef __u16 ucshort; /* 16 bits, unsigned */
156typedef __u8 ucchar; /* 8 bits, unsigned */
157
158/* 145/*
159 * Memory Window Sizes 146 * Memory Window Sizes
160 */ 147 */
@@ -507,16 +494,20 @@ struct ZFW_CTRL {
507 494
508/* Per card data structure */ 495/* Per card data structure */
509struct cyclades_card { 496struct cyclades_card {
510 void __iomem *base_addr; 497 void __iomem *base_addr;
511 void __iomem *ctl_addr; 498 union {
512 int irq; 499 void __iomem *p9050;
513 unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ 500 struct RUNTIME_9060 __iomem *p9060;
514 unsigned int first_line; /* minor number of first channel on card */ 501 } ctl_addr;
515 unsigned int nports; /* Number of ports in the card */ 502 int irq;
516 int bus_index; /* address shift - 0 for ISA, 1 for PCI */ 503 unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */
517 int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ 504 unsigned int first_line; /* minor number of first channel on card */
518 spinlock_t card_lock; 505 unsigned int nports; /* Number of ports in the card */
519 struct cyclades_port *ports; 506 int bus_index; /* address shift - 0 for ISA, 1 for PCI */
507 int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */
508 u32 hw_ver;
509 spinlock_t card_lock;
510 struct cyclades_port *ports;
520}; 511};
521 512
522/*************************************** 513/***************************************
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9fed365a598b..867cb68d8461 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -26,6 +26,9 @@
26#include <asm/io.h> 26#include <asm/io.h>
27#include <asm/mutex.h> 27#include <asm/mutex.h>
28 28
29/* for request_sense */
30#include <linux/cdrom.h>
31
29#if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) 32#if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300)
30# define SUPPORT_VLB_SYNC 0 33# define SUPPORT_VLB_SYNC 0
31#else 34#else
@@ -324,7 +327,6 @@ struct ide_cmd {
324 unsigned int cursg_ofs; 327 unsigned int cursg_ofs;
325 328
326 struct request *rq; /* copy of request */ 329 struct request *rq; /* copy of request */
327 void *special; /* valid_t generally */
328}; 330};
329 331
330/* ATAPI packet command flags */ 332/* ATAPI packet command flags */
@@ -360,11 +362,7 @@ struct ide_atapi_pc {
360 362
361 /* data buffer */ 363 /* data buffer */
362 u8 *buf; 364 u8 *buf;
363 /* current buffer position */
364 u8 *cur_pos;
365 int buf_size; 365 int buf_size;
366 /* missing/available data on the current buffer */
367 int b_count;
368 366
369 /* the corresponding request */ 367 /* the corresponding request */
370 struct request *rq; 368 struct request *rq;
@@ -377,10 +375,6 @@ struct ide_atapi_pc {
377 */ 375 */
378 u8 pc_buf[IDE_PC_BUFFER_SIZE]; 376 u8 pc_buf[IDE_PC_BUFFER_SIZE];
379 377
380 /* idetape only */
381 struct idetape_bh *bh;
382 char *b_data;
383
384 unsigned long timeout; 378 unsigned long timeout;
385}; 379};
386 380
@@ -593,16 +587,16 @@ struct ide_drive_s {
593 /* callback for packet commands */ 587 /* callback for packet commands */
594 int (*pc_callback)(struct ide_drive_s *, int); 588 int (*pc_callback)(struct ide_drive_s *, int);
595 589
596 void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *);
597 int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
598 unsigned int, int);
599
600 ide_startstop_t (*irq_handler)(struct ide_drive_s *); 590 ide_startstop_t (*irq_handler)(struct ide_drive_s *);
601 591
602 unsigned long atapi_flags; 592 unsigned long atapi_flags;
603 593
604 struct ide_atapi_pc request_sense_pc; 594 struct ide_atapi_pc request_sense_pc;
605 struct request request_sense_rq; 595
596 /* current sense rq and buffer */
597 bool sense_rq_armed;
598 struct request sense_rq;
599 struct request_sense sense_data;
606}; 600};
607 601
608typedef struct ide_drive_s ide_drive_t; 602typedef struct ide_drive_s ide_drive_t;
@@ -1174,7 +1168,10 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
1174int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); 1168int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
1175int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); 1169int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
1176void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); 1170void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
1177void ide_retry_pc(ide_drive_t *, struct gendisk *); 1171void ide_retry_pc(ide_drive_t *drive);
1172
1173void ide_prep_sense(ide_drive_t *drive, struct request *rq);
1174int ide_queue_sense_rq(ide_drive_t *drive, void *special);
1178 1175
1179int ide_cd_expiry(ide_drive_t *); 1176int ide_cd_expiry(ide_drive_t *);
1180 1177
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e2aa45cb0ce..b1b827d091a9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -13,14 +13,17 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14struct linux_binprm; 14struct linux_binprm;
15 15
16#define IMA_COUNT_UPDATE 1
17#define IMA_COUNT_LEAVE 0
18
16#ifdef CONFIG_IMA 19#ifdef CONFIG_IMA
17extern int ima_bprm_check(struct linux_binprm *bprm); 20extern int ima_bprm_check(struct linux_binprm *bprm);
18extern int ima_inode_alloc(struct inode *inode); 21extern int ima_inode_alloc(struct inode *inode);
19extern void ima_inode_free(struct inode *inode); 22extern void ima_inode_free(struct inode *inode);
20extern int ima_path_check(struct path *path, int mask); 23extern int ima_path_check(struct path *path, int mask, int update_counts);
21extern void ima_file_free(struct file *file); 24extern void ima_file_free(struct file *file);
22extern int ima_file_mmap(struct file *file, unsigned long prot); 25extern int ima_file_mmap(struct file *file, unsigned long prot);
23extern void ima_shm_check(struct file *file); 26extern void ima_counts_get(struct file *file);
24 27
25#else 28#else
26static inline int ima_bprm_check(struct linux_binprm *bprm) 29static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -38,7 +41,7 @@ static inline void ima_inode_free(struct inode *inode)
38 return; 41 return;
39} 42}
40 43
41static inline int ima_path_check(struct path *path, int mask) 44static inline int ima_path_check(struct path *path, int mask, int update_counts)
42{ 45{
43 return 0; 46 return 0;
44} 47}
@@ -53,7 +56,7 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
53 return 0; 56 return 0;
54} 57}
55 58
56static inline void ima_shm_check(struct file *file) 59static inline void ima_counts_get(struct file *file)
57{ 60{
58 return; 61 return;
59} 62}
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 889bf99eca6d..6646bfc7b892 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -145,8 +145,8 @@ extern struct cred init_cred;
145 .group_leader = &tsk, \ 145 .group_leader = &tsk, \
146 .real_cred = &init_cred, \ 146 .real_cred = &init_cred, \
147 .cred = &init_cred, \ 147 .cred = &init_cred, \
148 .cred_exec_mutex = \ 148 .cred_guard_mutex = \
149 __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ 149 __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \
150 .comm = "swapper", \ 150 .comm = "swapper", \
151 .thread = INIT_THREAD, \ 151 .thread = INIT_THREAD, \
152 .fs = &init_fs, \ 152 .fs = &init_fs, \
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8cc137911b34..3db5d8d37485 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -119,7 +119,7 @@ struct kvm_run {
119 __u32 error_code; 119 __u32 error_code;
120 } ex; 120 } ex;
121 /* KVM_EXIT_IO */ 121 /* KVM_EXIT_IO */
122 struct kvm_io { 122 struct {
123#define KVM_EXIT_IO_IN 0 123#define KVM_EXIT_IO_IN 0
124#define KVM_EXIT_IO_OUT 1 124#define KVM_EXIT_IO_OUT 1
125 __u8 direction; 125 __u8 direction;
@@ -224,10 +224,10 @@ struct kvm_interrupt {
224/* for KVM_GET_DIRTY_LOG */ 224/* for KVM_GET_DIRTY_LOG */
225struct kvm_dirty_log { 225struct kvm_dirty_log {
226 __u32 slot; 226 __u32 slot;
227 __u32 padding; 227 __u32 padding1;
228 union { 228 union {
229 void __user *dirty_bitmap; /* one bit per page */ 229 void __user *dirty_bitmap; /* one bit per page */
230 __u64 padding; 230 __u64 padding2;
231 }; 231 };
232}; 232};
233 233
@@ -409,6 +409,10 @@ struct kvm_trace_rec {
409#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT 409#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
410#define KVM_CAP_DEVICE_DEASSIGNMENT 27 410#define KVM_CAP_DEVICE_DEASSIGNMENT 27
411#endif 411#endif
412#ifdef __KVM_HAVE_MSIX
413#define KVM_CAP_DEVICE_MSIX 28
414#endif
415#define KVM_CAP_ASSIGN_DEV_IRQ 29
412/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ 416/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
413#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 417#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
414 418
@@ -482,11 +486,18 @@ struct kvm_irq_routing {
482#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ 486#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
483 struct kvm_assigned_pci_dev) 487 struct kvm_assigned_pci_dev)
484#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) 488#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
489/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
485#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ 490#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
486 struct kvm_assigned_irq) 491 struct kvm_assigned_irq)
492#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
487#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) 493#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
488#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ 494#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
489 struct kvm_assigned_pci_dev) 495 struct kvm_assigned_pci_dev)
496#define KVM_ASSIGN_SET_MSIX_NR \
497 _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
498#define KVM_ASSIGN_SET_MSIX_ENTRY \
499 _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
500#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
490 501
491/* 502/*
492 * ioctls for vcpu fds 503 * ioctls for vcpu fds
@@ -577,6 +588,8 @@ struct kvm_debug_guest {
577#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) 588#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
578#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) 589#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
579 590
591#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
592
580struct kvm_assigned_pci_dev { 593struct kvm_assigned_pci_dev {
581 __u32 assigned_dev_id; 594 __u32 assigned_dev_id;
582 __u32 busnr; 595 __u32 busnr;
@@ -587,6 +600,17 @@ struct kvm_assigned_pci_dev {
587 }; 600 };
588}; 601};
589 602
603#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
604#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
605#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
606
607#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
608#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
609#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
610
611#define KVM_DEV_IRQ_HOST_MASK 0x00ff
612#define KVM_DEV_IRQ_GUEST_MASK 0xff00
613
590struct kvm_assigned_irq { 614struct kvm_assigned_irq {
591 __u32 assigned_dev_id; 615 __u32 assigned_dev_id;
592 __u32 host_irq; 616 __u32 host_irq;
@@ -602,9 +626,19 @@ struct kvm_assigned_irq {
602 }; 626 };
603}; 627};
604 628
605#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
606 629
607#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI 630struct kvm_assigned_msix_nr {
608#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) 631 __u32 assigned_dev_id;
632 __u16 entry_nr;
633 __u16 padding;
634};
635
636#define KVM_MAX_MSIX_PER_DEV 512
637struct kvm_assigned_msix_entry {
638 __u32 assigned_dev_id;
639 __u32 gsi;
640 __u16 entry; /* The index of entry in the MSI-X table */
641 __u16 padding[3];
642};
609 643
610#endif 644#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 894a56e365e8..aacc5449f586 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
38#define KVM_REQ_UNHALT 6 38#define KVM_REQ_UNHALT 6
39#define KVM_REQ_MMU_SYNC 7 39#define KVM_REQ_MMU_SYNC 7
40#define KVM_REQ_KVMCLOCK_UPDATE 8 40#define KVM_REQ_KVMCLOCK_UPDATE 8
41#define KVM_REQ_KICK 9
41 42
42#define KVM_USERSPACE_IRQ_SOURCE_ID 0 43#define KVM_USERSPACE_IRQ_SOURCE_ID 0
43 44
@@ -72,7 +73,6 @@ struct kvm_vcpu {
72 struct mutex mutex; 73 struct mutex mutex;
73 int cpu; 74 int cpu;
74 struct kvm_run *run; 75 struct kvm_run *run;
75 int guest_mode;
76 unsigned long requests; 76 unsigned long requests;
77 unsigned long guest_debug; 77 unsigned long guest_debug;
78 int fpu_active; 78 int fpu_active;
@@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void);
298void kvm_arch_hardware_unsetup(void); 298void kvm_arch_hardware_unsetup(void);
299void kvm_arch_check_processor_compat(void *rtn); 299void kvm_arch_check_processor_compat(void *rtn);
300int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); 300int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
301int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
301 302
302void kvm_free_physmem(struct kvm *kvm); 303void kvm_free_physmem(struct kvm *kvm);
303 304
@@ -319,6 +320,13 @@ struct kvm_irq_ack_notifier {
319 void (*irq_acked)(struct kvm_irq_ack_notifier *kian); 320 void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
320}; 321};
321 322
323#define KVM_ASSIGNED_MSIX_PENDING 0x1
324struct kvm_guest_msix_entry {
325 u32 vector;
326 u16 entry;
327 u16 flags;
328};
329
322struct kvm_assigned_dev_kernel { 330struct kvm_assigned_dev_kernel {
323 struct kvm_irq_ack_notifier ack_notifier; 331 struct kvm_irq_ack_notifier ack_notifier;
324 struct work_struct interrupt_work; 332 struct work_struct interrupt_work;
@@ -326,18 +334,18 @@ struct kvm_assigned_dev_kernel {
326 int assigned_dev_id; 334 int assigned_dev_id;
327 int host_busnr; 335 int host_busnr;
328 int host_devfn; 336 int host_devfn;
337 unsigned int entries_nr;
329 int host_irq; 338 int host_irq;
330 bool host_irq_disabled; 339 bool host_irq_disabled;
340 struct msix_entry *host_msix_entries;
331 int guest_irq; 341 int guest_irq;
332#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) 342 struct kvm_guest_msix_entry *guest_msix_entries;
333#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
334#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
335#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
336 unsigned long irq_requested_type; 343 unsigned long irq_requested_type;
337 int irq_source_id; 344 int irq_source_id;
338 int flags; 345 int flags;
339 struct pci_dev *dev; 346 struct pci_dev *dev;
340 struct kvm *kvm; 347 struct kvm *kvm;
348 spinlock_t assigned_dev_lock;
341}; 349};
342 350
343struct kvm_irq_mask_notifier { 351struct kvm_irq_mask_notifier {
@@ -360,6 +368,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
360int kvm_request_irq_source_id(struct kvm *kvm); 368int kvm_request_irq_source_id(struct kvm *kvm);
361void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 369void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
362 370
371/* For vcpu->arch.iommu_flags */
372#define KVM_IOMMU_CACHE_COHERENCY 0x1
373
363#ifdef CONFIG_IOMMU_API 374#ifdef CONFIG_IOMMU_API
364int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, 375int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
365 unsigned long npages); 376 unsigned long npages);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2b8318c83e53..fb46efbeabec 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -40,4 +40,31 @@ typedef unsigned long hfn_t;
40 40
41typedef hfn_t pfn_t; 41typedef hfn_t pfn_t;
42 42
43union kvm_ioapic_redirect_entry {
44 u64 bits;
45 struct {
46 u8 vector;
47 u8 delivery_mode:3;
48 u8 dest_mode:1;
49 u8 delivery_status:1;
50 u8 polarity:1;
51 u8 remote_irr:1;
52 u8 trig_mode:1;
53 u8 mask:1;
54 u8 reserve:7;
55 u8 reserved[4];
56 u8 dest_id;
57 } fields;
58};
59
60struct kvm_lapic_irq {
61 u32 vector;
62 u32 delivery_mode;
63 u32 dest_mode;
64 u32 level;
65 u32 trig_mode;
66 u32 shorthand;
67 u32 dest_id;
68};
69
43#endif /* __KVM_TYPES_H__ */ 70#endif /* __KVM_TYPES_H__ */
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
new file mode 100644
index 000000000000..e461b2c3d711
--- /dev/null
+++ b/include/linux/lsm_audit.h
@@ -0,0 +1,111 @@
1/*
2 * Common LSM logging functions
3 * Heavily borrowed from selinux/avc.h
4 *
5 * Author : Etienne BASSET <etienne.basset@ensta.org>
6 *
7 * All credits to : Stephen Smalley, <sds@epoch.ncsc.mil>
8 * All BUGS to : Etienne BASSET <etienne.basset@ensta.org>
9 */
10#ifndef _LSM_COMMON_LOGGING_
11#define _LSM_COMMON_LOGGING_
12
13#include <linux/stddef.h>
14#include <linux/errno.h>
15#include <linux/kernel.h>
16#include <linux/kdev_t.h>
17#include <linux/spinlock.h>
18#include <linux/init.h>
19#include <linux/audit.h>
20#include <linux/in6.h>
21#include <linux/path.h>
22#include <linux/key.h>
23#include <linux/skbuff.h>
24#include <asm/system.h>
25
26
27/* Auxiliary data to use in generating the audit record. */
28struct common_audit_data {
29 char type;
30#define LSM_AUDIT_DATA_FS 1
31#define LSM_AUDIT_DATA_NET 2
32#define LSM_AUDIT_DATA_CAP 3
33#define LSM_AUDIT_DATA_IPC 4
34#define LSM_AUDIT_DATA_TASK 5
35#define LSM_AUDIT_DATA_KEY 6
36 struct task_struct *tsk;
37 union {
38 struct {
39 struct path path;
40 struct inode *inode;
41 } fs;
42 struct {
43 int netif;
44 struct sock *sk;
45 u16 family;
46 __be16 dport;
47 __be16 sport;
48 union {
49 struct {
50 __be32 daddr;
51 __be32 saddr;
52 } v4;
53 struct {
54 struct in6_addr daddr;
55 struct in6_addr saddr;
56 } v6;
57 } fam;
58 } net;
59 int cap;
60 int ipc_id;
61 struct task_struct *tsk;
62#ifdef CONFIG_KEYS
63 struct {
64 key_serial_t key;
65 char *key_desc;
66 } key_struct;
67#endif
68 } u;
69 const char *function;
70 /* this union contains LSM specific data */
71 union {
72 /* SMACK data */
73 struct smack_audit_data {
74 char *subject;
75 char *object;
76 char *request;
77 int result;
78 } smack_audit_data;
79 /* SELinux data */
80 struct {
81 u32 ssid;
82 u32 tsid;
83 u16 tclass;
84 u32 requested;
85 u32 audited;
86 struct av_decision *avd;
87 int result;
88 } selinux_audit_data;
89 } lsm_priv;
90 /* these callback will be implemented by a specific LSM */
91 void (*lsm_pre_audit)(struct audit_buffer *, void *);
92 void (*lsm_post_audit)(struct audit_buffer *, void *);
93};
94
95#define v4info fam.v4
96#define v6info fam.v6
97
98int ipv4_skb_to_auditdata(struct sk_buff *skb,
99 struct common_audit_data *ad, u8 *proto);
100
101int ipv6_skb_to_auditdata(struct sk_buff *skb,
102 struct common_audit_data *ad, u8 *proto);
103
104/* Initialize an LSM audit data structure. */
105#define COMMON_AUDIT_DATA_INIT(_d, _t) \
106 { memset((_d), 0, sizeof(struct common_audit_data)); \
107 (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; }
108
109void common_lsm_audit(struct common_audit_data *a);
110
111#endif
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 5b4e28bcb788..927138cf3050 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -9,6 +9,7 @@
9#define DEBUGFS_MAGIC 0x64626720 9#define DEBUGFS_MAGIC 0x64626720
10#define SYSFS_MAGIC 0x62656572 10#define SYSFS_MAGIC 0x62656572
11#define SECURITYFS_MAGIC 0x73636673 11#define SECURITYFS_MAGIC 0x73636673
12#define SELINUX_MAGIC 0xf97cff8c
12#define TMPFS_MAGIC 0x01021994 13#define TMPFS_MAGIC 0x01021994
13#define SQUASHFS_MAGIC 0x73717368 14#define SQUASHFS_MAGIC 0x73717368
14#define EFS_SUPER_MAGIC 0x414A53 15#define EFS_SUPER_MAGIC 0x414A53
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9772d6cbfc82..ad613ed66ab0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -581,12 +581,10 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
581 */ 581 */
582static inline unsigned long round_hint_to_min(unsigned long hint) 582static inline unsigned long round_hint_to_min(unsigned long hint)
583{ 583{
584#ifdef CONFIG_SECURITY
585 hint &= PAGE_MASK; 584 hint &= PAGE_MASK;
586 if (((void *)hint != NULL) && 585 if (((void *)hint != NULL) &&
587 (hint < mmap_min_addr)) 586 (hint < mmap_min_addr))
588 return PAGE_ALIGN(mmap_min_addr); 587 return PAGE_ALIGN(mmap_min_addr);
589#endif
590 return hint; 588 return hint;
591} 589}
592 590
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812d67d3..d7d1c41a0b17 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1996,10 +1996,12 @@
1996#define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U 0xC118 1996#define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U 0xC118
1997#define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU 0xC11C 1997#define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU 0xC11C
1998#define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501 1998#define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501
1999#define PCI_DEVICE_ID_OXSEMI_C950 0x950B
1999#define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 2000#define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511
2000#define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 2001#define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513
2001#define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 2002#define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521
2002#define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 2003#define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523
2004#define PCI_SUBDEVICE_ID_OXSEMI_C950 0x0001
2003 2005
2004#define PCI_VENDOR_ID_CHELSIO 0x1425 2006#define PCI_VENDOR_ID_CHELSIO 0x1425
2005 2007
diff --git a/include/linux/rational.h b/include/linux/rational.h
new file mode 100644
index 000000000000..4f532fcd9eea
--- /dev/null
+++ b/include/linux/rational.h
@@ -0,0 +1,19 @@
1/*
2 * rational fractions
3 *
4 * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
5 *
6 * helper functions when coping with rational numbers,
7 * e.g. when calculating optimum numerator/denominator pairs for
8 * pll configuration taking into account restricted register size
9 */
10
11#ifndef _LINUX_RATIONAL_H
12#define _LINUX_RATIONAL_H
13
14void rational_best_approximation(
15 unsigned long given_numerator, unsigned long given_denominator,
16 unsigned long max_numerator, unsigned long max_denominator,
17 unsigned long *best_numerator, unsigned long *best_denominator);
18
19#endif /* _LINUX_RATIONAL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d1399660b776..42bf2766111e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1261,7 +1261,9 @@ struct task_struct {
1261 * credentials (COW) */ 1261 * credentials (COW) */
1262 const struct cred *cred; /* effective (overridable) subjective task 1262 const struct cred *cred; /* effective (overridable) subjective task
1263 * credentials (COW) */ 1263 * credentials (COW) */
1264 struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ 1264 struct mutex cred_guard_mutex; /* guard against foreign influences on
1265 * credential calculations
1266 * (notably. ptrace) */
1265 1267
1266 char comm[TASK_COMM_LEN]; /* executable name excluding path 1268 char comm[TASK_COMM_LEN]; /* executable name excluding path
1267 - access with [gs]et_task_comm (which lock 1269 - access with [gs]et_task_comm (which lock
@@ -1901,6 +1903,7 @@ extern void sched_dead(struct task_struct *p);
1901 1903
1902extern void proc_caches_init(void); 1904extern void proc_caches_init(void);
1903extern void flush_signals(struct task_struct *); 1905extern void flush_signals(struct task_struct *);
1906extern void __flush_signals(struct task_struct *);
1904extern void ignore_signals(struct task_struct *); 1907extern void ignore_signals(struct task_struct *);
1905extern void flush_signal_handlers(struct task_struct *, int force_default); 1908extern void flush_signal_handlers(struct task_struct *, int force_default);
1906extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); 1909extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
diff --git a/include/linux/security.h b/include/linux/security.h
index d5fd6163606f..5eff459b3833 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2197,6 +2197,8 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot,
2197 unsigned long addr, 2197 unsigned long addr,
2198 unsigned long addr_only) 2198 unsigned long addr_only)
2199{ 2199{
2200 if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO))
2201 return -EACCES;
2200 return 0; 2202 return 0;
2201} 2203}
2202 2204
diff --git a/include/linux/serial.h b/include/linux/serial.h
index 9136cc5608c3..e5bb75a63802 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -96,54 +96,76 @@ struct serial_uart_config {
96 96
97/* 97/*
98 * Definitions for async_struct (and serial_struct) flags field 98 * Definitions for async_struct (and serial_struct) flags field
99 *
100 * Define ASYNCB_* for convenient use with {test,set,clear}_bit.
99 */ 101 */
100#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes 102#define ASYNCB_HUP_NOTIFY 0 /* Notify getty on hangups and closes
101 on the callout port */ 103 * on the callout port */
102#define ASYNC_FOURPORT 0x0002 /* Set OU1, OUT2 per AST Fourport settings */ 104#define ASYNCB_FOURPORT 1 /* Set OU1, OUT2 per AST Fourport settings */
103#define ASYNC_SAK 0x0004 /* Secure Attention Key (Orange book) */ 105#define ASYNCB_SAK 2 /* Secure Attention Key (Orange book) */
104#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ 106#define ASYNCB_SPLIT_TERMIOS 3 /* Separate termios for dialin/callout */
105 107#define ASYNCB_SPD_HI 4 /* Use 56000 instead of 38400 bps */
106#define ASYNC_SPD_MASK 0x1030 108#define ASYNCB_SPD_VHI 5 /* Use 115200 instead of 38400 bps */
107#define ASYNC_SPD_HI 0x0010 /* Use 56000 instead of 38400 bps */ 109#define ASYNCB_SKIP_TEST 6 /* Skip UART test during autoconfiguration */
108 110#define ASYNCB_AUTO_IRQ 7 /* Do automatic IRQ during
109#define ASYNC_SPD_VHI 0x0020 /* Use 115200 instead of 38400 bps */ 111 * autoconfiguration */
110#define ASYNC_SPD_CUST 0x0030 /* Use user-specified divisor */ 112#define ASYNCB_SESSION_LOCKOUT 8 /* Lock out cua opens based on session */
111 113#define ASYNCB_PGRP_LOCKOUT 9 /* Lock out cua opens based on pgrp */
112#define ASYNC_SKIP_TEST 0x0040 /* Skip UART test during autoconfiguration */ 114#define ASYNCB_CALLOUT_NOHUP 10 /* Don't do hangups for cua device */
113#define ASYNC_AUTO_IRQ 0x0080 /* Do automatic IRQ during autoconfiguration */ 115#define ASYNCB_HARDPPS_CD 11 /* Call hardpps when CD goes high */
114#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */ 116#define ASYNCB_SPD_SHI 12 /* Use 230400 instead of 38400 bps */
115#define ASYNC_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ 117#define ASYNCB_LOW_LATENCY 13 /* Request low latency behaviour */
116#define ASYNC_CALLOUT_NOHUP 0x0400 /* Don't do hangups for cua device */ 118#define ASYNCB_BUGGY_UART 14 /* This is a buggy UART, skip some safety
117 119 * checks. Note: can be dangerous! */
118#define ASYNC_HARDPPS_CD 0x0800 /* Call hardpps when CD goes high */ 120#define ASYNCB_AUTOPROBE 15 /* Port was autoprobed by PCI or PNP code */
119 121#define ASYNCB_LAST_USER 15
120#define ASYNC_SPD_SHI 0x1000 /* Use 230400 instead of 38400 bps */ 122
121#define ASYNC_SPD_WARP 0x1010 /* Use 460800 instead of 38400 bps */ 123/* Internal flags used only by kernel */
122 124#define ASYNCB_INITIALIZED 31 /* Serial port was initialized */
123#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */ 125#define ASYNCB_NORMAL_ACTIVE 29 /* Normal device is active */
124 126#define ASYNCB_BOOT_AUTOCONF 28 /* Autoconfigure port on bootup */
125#define ASYNC_BUGGY_UART 0x4000 /* This is a buggy UART, skip some safety 127#define ASYNCB_CLOSING 27 /* Serial port is closing */
126 * checks. Note: can be dangerous! */ 128#define ASYNCB_CTS_FLOW 26 /* Do CTS flow control */
127 129#define ASYNCB_CHECK_CD 25 /* i.e., CLOCAL */
128#define ASYNC_AUTOPROBE 0x8000 /* Port was autoprobed by PCI or PNP code */ 130#define ASYNCB_SHARE_IRQ 24 /* for multifunction cards, no longer used */
129 131#define ASYNCB_CONS_FLOW 23 /* flow control for console */
130#define ASYNC_FLAGS 0x7FFF /* Possible legal async flags */ 132#define ASYNCB_BOOT_ONLYMCA 22 /* Probe only if MCA bus */
131#define ASYNC_USR_MASK 0x3430 /* Legal flags that non-privileged 133#define ASYNCB_FIRST_KERNEL 22
132 * users can set or reset */ 134
133 135#define ASYNC_HUP_NOTIFY (1U << ASYNCB_HUP_NOTIFY)
134/* Internal flags used only by kernel/chr_drv/serial.c */ 136#define ASYNC_FOURPORT (1U << ASYNCB_FOURPORT)
135#define ASYNC_INITIALIZED 0x80000000 /* Serial port was initialized */ 137#define ASYNC_SAK (1U << ASYNCB_SAK)
136#define ASYNC_NORMAL_ACTIVE 0x20000000 /* Normal device is active */ 138#define ASYNC_SPLIT_TERMIOS (1U << ASYNCB_SPLIT_TERMIOS)
137#define ASYNC_BOOT_AUTOCONF 0x10000000 /* Autoconfigure port on bootup */ 139#define ASYNC_SPD_HI (1U << ASYNCB_SPD_HI)
138#define ASYNC_CLOSING 0x08000000 /* Serial port is closing */ 140#define ASYNC_SPD_VHI (1U << ASYNCB_SPD_VHI)
139#define ASYNC_CTS_FLOW 0x04000000 /* Do CTS flow control */ 141#define ASYNC_SKIP_TEST (1U << ASYNCB_SKIP_TEST)
140#define ASYNC_CHECK_CD 0x02000000 /* i.e., CLOCAL */ 142#define ASYNC_AUTO_IRQ (1U << ASYNCB_AUTO_IRQ)
141#define ASYNC_SHARE_IRQ 0x01000000 /* for multifunction cards 143#define ASYNC_SESSION_LOCKOUT (1U << ASYNCB_SESSION_LOCKOUT)
142 --- no longer used */ 144#define ASYNC_PGRP_LOCKOUT (1U << ASYNCB_PGRP_LOCKOUT)
143#define ASYNC_CONS_FLOW 0x00800000 /* flow control for console */ 145#define ASYNC_CALLOUT_NOHUP (1U << ASYNCB_CALLOUT_NOHUP)
144 146#define ASYNC_HARDPPS_CD (1U << ASYNCB_HARDPPS_CD)
145#define ASYNC_BOOT_ONLYMCA 0x00400000 /* Probe only if MCA bus */ 147#define ASYNC_SPD_SHI (1U << ASYNCB_SPD_SHI)
146#define ASYNC_INTERNAL_FLAGS 0xFFC00000 /* Internal flags */ 148#define ASYNC_LOW_LATENCY (1U << ASYNCB_LOW_LATENCY)
149#define ASYNC_BUGGY_UART (1U << ASYNCB_BUGGY_UART)
150#define ASYNC_AUTOPROBE (1U << ASYNCB_AUTOPROBE)
151
152#define ASYNC_FLAGS ((1U << ASYNCB_LAST_USER) - 1)
153#define ASYNC_USR_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI| \
154 ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY)
155#define ASYNC_SPD_CUST (ASYNC_SPD_HI|ASYNC_SPD_VHI)
156#define ASYNC_SPD_WARP (ASYNC_SPD_HI|ASYNC_SPD_SHI)
157#define ASYNC_SPD_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI)
158
159#define ASYNC_INITIALIZED (1U << ASYNCB_INITIALIZED)
160#define ASYNC_NORMAL_ACTIVE (1U << ASYNCB_NORMAL_ACTIVE)
161#define ASYNC_BOOT_AUTOCONF (1U << ASYNCB_BOOT_AUTOCONF)
162#define ASYNC_CLOSING (1U << ASYNCB_CLOSING)
163#define ASYNC_CTS_FLOW (1U << ASYNCB_CTS_FLOW)
164#define ASYNC_CHECK_CD (1U << ASYNCB_CHECK_CD)
165#define ASYNC_SHARE_IRQ (1U << ASYNCB_SHARE_IRQ)
166#define ASYNC_CONS_FLOW (1U << ASYNCB_CONS_FLOW)
167#define ASYNC_BOOT_ONLYMCA (1U << ASYNCB_BOOT_ONLYMCA)
168#define ASYNC_INTERNAL_FLAGS (~((1U << ASYNCB_FIRST_KERNEL) - 1))
147 169
148/* 170/*
149 * Multiport serial configuration structure --- external structure 171 * Multiport serial configuration structure --- external structure
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 57a97e52e58d..6fd80c4243f1 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -41,7 +41,8 @@
41#define PORT_XSCALE 15 41#define PORT_XSCALE 15
42#define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ 42#define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */
43#define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ 43#define PORT_OCTEON 17 /* Cavium OCTEON internal UART */
44#define PORT_MAX_8250 17 /* max port ID */ 44#define PORT_AR7 18 /* Texas Instruments AR7 internal UART */
45#define PORT_MAX_8250 18 /* max port ID */
45 46
46/* 47/*
47 * ARM specific type numbers. These are not currently guaranteed 48 * ARM specific type numbers. These are not currently guaranteed
@@ -167,6 +168,9 @@
167/* MAX3100 */ 168/* MAX3100 */
168#define PORT_MAX3100 86 169#define PORT_MAX3100 86
169 170
171/* Timberdale UART */
172#define PORT_TIMBUART 87
173
170#ifdef __KERNEL__ 174#ifdef __KERNEL__
171 175
172#include <linux/compiler.h> 176#include <linux/compiler.h>
diff --git a/include/linux/tty.h b/include/linux/tty.h
index fc39db95499f..1488d8c81aac 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -185,7 +185,7 @@ struct tty_port;
185struct tty_port_operations { 185struct tty_port_operations {
186 /* Return 1 if the carrier is raised */ 186 /* Return 1 if the carrier is raised */
187 int (*carrier_raised)(struct tty_port *port); 187 int (*carrier_raised)(struct tty_port *port);
188 void (*raise_dtr_rts)(struct tty_port *port); 188 void (*dtr_rts)(struct tty_port *port, int raise);
189}; 189};
190 190
191struct tty_port { 191struct tty_port {
@@ -201,6 +201,9 @@ struct tty_port {
201 unsigned char *xmit_buf; /* Optional buffer */ 201 unsigned char *xmit_buf; /* Optional buffer */
202 int close_delay; /* Close port delay */ 202 int close_delay; /* Close port delay */
203 int closing_wait; /* Delay for output */ 203 int closing_wait; /* Delay for output */
204 int drain_delay; /* Set to zero if no pure time
205 based drain is needed else
206 set to size of fifo */
204}; 207};
205 208
206/* 209/*
@@ -223,8 +226,11 @@ struct tty_struct {
223 struct tty_driver *driver; 226 struct tty_driver *driver;
224 const struct tty_operations *ops; 227 const struct tty_operations *ops;
225 int index; 228 int index;
226 /* The ldisc objects are protected by tty_ldisc_lock at the moment */ 229
227 struct tty_ldisc ldisc; 230 /* Protects ldisc changes: Lock tty not pty */
231 struct mutex ldisc_mutex;
232 struct tty_ldisc *ldisc;
233
228 struct mutex termios_mutex; 234 struct mutex termios_mutex;
229 spinlock_t ctrl_lock; 235 spinlock_t ctrl_lock;
230 /* Termios values are protected by the termios mutex */ 236 /* Termios values are protected by the termios mutex */
@@ -311,6 +317,7 @@ struct tty_struct {
311#define TTY_CLOSING 7 /* ->close() in progress */ 317#define TTY_CLOSING 7 /* ->close() in progress */
312#define TTY_LDISC 9 /* Line discipline attached */ 318#define TTY_LDISC 9 /* Line discipline attached */
313#define TTY_LDISC_CHANGING 10 /* Line discipline changing */ 319#define TTY_LDISC_CHANGING 10 /* Line discipline changing */
320#define TTY_LDISC_OPEN 11 /* Line discipline is open */
314#define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */ 321#define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */
315#define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */ 322#define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */
316#define TTY_PTY_LOCK 16 /* pty private */ 323#define TTY_PTY_LOCK 16 /* pty private */
@@ -403,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b);
403extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); 410extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
404extern void tty_ldisc_deref(struct tty_ldisc *); 411extern void tty_ldisc_deref(struct tty_ldisc *);
405extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); 412extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
413extern void tty_ldisc_hangup(struct tty_struct *tty);
406extern const struct file_operations tty_ldiscs_proc_fops; 414extern const struct file_operations tty_ldiscs_proc_fops;
407 415
408extern void tty_wakeup(struct tty_struct *tty); 416extern void tty_wakeup(struct tty_struct *tty);
@@ -425,6 +433,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
425extern void tty_release_dev(struct file *filp); 433extern void tty_release_dev(struct file *filp);
426extern int tty_init_termios(struct tty_struct *tty); 434extern int tty_init_termios(struct tty_struct *tty);
427 435
436extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
437extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
438
428extern struct mutex tty_mutex; 439extern struct mutex tty_mutex;
429 440
430extern void tty_write_unlock(struct tty_struct *tty); 441extern void tty_write_unlock(struct tty_struct *tty);
@@ -438,6 +449,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
438extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); 449extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
439extern int tty_port_carrier_raised(struct tty_port *port); 450extern int tty_port_carrier_raised(struct tty_port *port);
440extern void tty_port_raise_dtr_rts(struct tty_port *port); 451extern void tty_port_raise_dtr_rts(struct tty_port *port);
452extern void tty_port_lower_dtr_rts(struct tty_port *port);
441extern void tty_port_hangup(struct tty_port *port); 453extern void tty_port_hangup(struct tty_port *port);
442extern int tty_port_block_til_ready(struct tty_port *port, 454extern int tty_port_block_til_ready(struct tty_port *port,
443 struct tty_struct *tty, struct file *filp); 455 struct tty_struct *tty, struct file *filp);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index bcba84ea2d86..3566129384a4 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -127,7 +127,8 @@
127 * the line discipline are close to full, and it should somehow 127 * the line discipline are close to full, and it should somehow
128 * signal that no more characters should be sent to the tty. 128 * signal that no more characters should be sent to the tty.
129 * 129 *
130 * Optional: Always invoke via tty_throttle(); 130 * Optional: Always invoke via tty_throttle(), called under the
131 * termios lock.
131 * 132 *
132 * void (*unthrottle)(struct tty_struct * tty); 133 * void (*unthrottle)(struct tty_struct * tty);
133 * 134 *
@@ -135,7 +136,8 @@
135 * that characters can now be sent to the tty without fear of 136 * that characters can now be sent to the tty without fear of
136 * overrunning the input buffers of the line disciplines. 137 * overrunning the input buffers of the line disciplines.
137 * 138 *
138 * Optional: Always invoke via tty_unthrottle(); 139 * Optional: Always invoke via tty_unthrottle(), called under the
140 * termios lock.
139 * 141 *
140 * void (*stop)(struct tty_struct *tty); 142 * void (*stop)(struct tty_struct *tty);
141 * 143 *
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 625e9e4639c6..8cdfed738fe4 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -224,8 +224,7 @@ struct usb_serial_driver {
224 /* Called by console with tty = NULL and by tty */ 224 /* Called by console with tty = NULL and by tty */
225 int (*open)(struct tty_struct *tty, 225 int (*open)(struct tty_struct *tty,
226 struct usb_serial_port *port, struct file *filp); 226 struct usb_serial_port *port, struct file *filp);
227 void (*close)(struct tty_struct *tty, 227 void (*close)(struct usb_serial_port *port);
228 struct usb_serial_port *port, struct file *filp);
229 int (*write)(struct tty_struct *tty, struct usb_serial_port *port, 228 int (*write)(struct tty_struct *tty, struct usb_serial_port *port,
230 const unsigned char *buf, int count); 229 const unsigned char *buf, int count);
231 /* Called only by the tty layer */ 230 /* Called only by the tty layer */
@@ -241,6 +240,10 @@ struct usb_serial_driver {
241 int (*tiocmget)(struct tty_struct *tty, struct file *file); 240 int (*tiocmget)(struct tty_struct *tty, struct file *file);
242 int (*tiocmset)(struct tty_struct *tty, struct file *file, 241 int (*tiocmset)(struct tty_struct *tty, struct file *file,
243 unsigned int set, unsigned int clear); 242 unsigned int set, unsigned int clear);
243 /* Called by the tty layer for port level work. There may or may not
244 be an attached tty at this point */
245 void (*dtr_rts)(struct usb_serial_port *port, int on);
246 int (*carrier_raised)(struct usb_serial_port *port);
244 /* USB events */ 247 /* USB events */
245 void (*read_int_callback)(struct urb *urb); 248 void (*read_int_callback)(struct urb *urb);
246 void (*write_int_callback)(struct urb *urb); 249 void (*write_int_callback)(struct urb *urb);
@@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty,
283 struct usb_serial_port *port, struct file *filp); 286 struct usb_serial_port *port, struct file *filp);
284extern int usb_serial_generic_write(struct tty_struct *tty, 287extern int usb_serial_generic_write(struct tty_struct *tty,
285 struct usb_serial_port *port, const unsigned char *buf, int count); 288 struct usb_serial_port *port, const unsigned char *buf, int count);
286extern void usb_serial_generic_close(struct tty_struct *tty, 289extern void usb_serial_generic_close(struct usb_serial_port *port);
287 struct usb_serial_port *port, struct file *filp);
288extern int usb_serial_generic_resume(struct usb_serial *serial); 290extern int usb_serial_generic_resume(struct usb_serial *serial);
289extern int usb_serial_generic_write_room(struct tty_struct *tty); 291extern int usb_serial_generic_write_room(struct tty_struct *tty);
290extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty); 292extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty);
diff --git a/ipc/shm.c b/ipc/shm.c
index 425971600485..15dd238e5338 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -384,7 +384,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
384 error = PTR_ERR(file); 384 error = PTR_ERR(file);
385 if (IS_ERR(file)) 385 if (IS_ERR(file))
386 goto no_file; 386 goto no_file;
387 ima_shm_check(file);
388 387
389 id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 388 id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
390 if (id < 0) { 389 if (id < 0) {
@@ -891,7 +890,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
891 file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); 890 file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
892 if (!file) 891 if (!file)
893 goto out_free; 892 goto out_free;
894 ima_shm_check(file); 893 ima_counts_get(file);
895 894
896 file->private_data = sfd; 895 file->private_data = sfd;
897 file->f_mapping = shp->shm_file->f_mapping; 896 file->f_mapping = shp->shm_file->f_mapping;
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a039189d707..1bb4d7e5d616 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -167,7 +167,7 @@ EXPORT_SYMBOL(prepare_creds);
167 167
168/* 168/*
169 * Prepare credentials for current to perform an execve() 169 * Prepare credentials for current to perform an execve()
170 * - The caller must hold current->cred_exec_mutex 170 * - The caller must hold current->cred_guard_mutex
171 */ 171 */
172struct cred *prepare_exec_creds(void) 172struct cred *prepare_exec_creds(void)
173{ 173{
@@ -276,7 +276,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
276 struct cred *new; 276 struct cred *new;
277 int ret; 277 int ret;
278 278
279 mutex_init(&p->cred_exec_mutex); 279 mutex_init(&p->cred_guard_mutex);
280 280
281 if ( 281 if (
282#ifdef CONFIG_KEYS 282#ifdef CONFIG_KEYS
diff --git a/kernel/exit.c b/kernel/exit.c
index cab535c427b8..51d1fe3fb7ad 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1472,6 +1472,7 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1472 */ 1472 */
1473 if (*notask_error) 1473 if (*notask_error)
1474 *notask_error = ret; 1474 *notask_error = ret;
1475 return 0;
1475 } 1476 }
1476 1477
1477 if (likely(!ptrace) && unlikely(p->ptrace)) { 1478 if (likely(!ptrace) && unlikely(p->ptrace)) {
diff --git a/kernel/module.c b/kernel/module.c
index 2383e60fcf3f..278e9b6762bb 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -73,6 +73,9 @@ DEFINE_MUTEX(module_mutex);
73EXPORT_SYMBOL_GPL(module_mutex); 73EXPORT_SYMBOL_GPL(module_mutex);
74static LIST_HEAD(modules); 74static LIST_HEAD(modules);
75 75
76/* Block module loading/unloading? */
77int modules_disabled = 0;
78
76/* Waiting for a module to finish initializing? */ 79/* Waiting for a module to finish initializing? */
77static DECLARE_WAIT_QUEUE_HEAD(module_wq); 80static DECLARE_WAIT_QUEUE_HEAD(module_wq);
78 81
@@ -778,7 +781,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
778 char name[MODULE_NAME_LEN]; 781 char name[MODULE_NAME_LEN];
779 int ret, forced = 0; 782 int ret, forced = 0;
780 783
781 if (!capable(CAP_SYS_MODULE)) 784 if (!capable(CAP_SYS_MODULE) || modules_disabled)
782 return -EPERM; 785 return -EPERM;
783 786
784 if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) 787 if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
@@ -2338,7 +2341,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2338 int ret = 0; 2341 int ret = 0;
2339 2342
2340 /* Must have permission */ 2343 /* Must have permission */
2341 if (!capable(CAP_SYS_MODULE)) 2344 if (!capable(CAP_SYS_MODULE) || modules_disabled)
2342 return -EPERM; 2345 return -EPERM;
2343 2346
2344 /* Only one module load at a time, please */ 2347 /* Only one module load at a time, please */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 2442d140bd9a..f6d8b8cb5e34 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -175,10 +175,11 @@ int ptrace_attach(struct task_struct *task)
175 if (same_thread_group(task, current)) 175 if (same_thread_group(task, current))
176 goto out; 176 goto out;
177 177
178 /* Protect exec's credential calculations against our interference; 178 /* Protect the target's credential calculations against our
179 * SUID, SGID and LSM creds get determined differently under ptrace. 179 * interference; SUID, SGID and LSM creds get determined differently
180 * under ptrace.
180 */ 181 */
181 retval = mutex_lock_interruptible(&task->cred_exec_mutex); 182 retval = mutex_lock_interruptible(&task->cred_guard_mutex);
182 if (retval < 0) 183 if (retval < 0)
183 goto out; 184 goto out;
184 185
@@ -222,7 +223,7 @@ repeat:
222bad: 223bad:
223 write_unlock_irqrestore(&tasklist_lock, flags); 224 write_unlock_irqrestore(&tasklist_lock, flags);
224 task_unlock(task); 225 task_unlock(task);
225 mutex_unlock(&task->cred_exec_mutex); 226 mutex_unlock(&task->cred_guard_mutex);
226out: 227out:
227 return retval; 228 return retval;
228} 229}
diff --git a/kernel/signal.c b/kernel/signal.c
index dba6ae99978a..809a228019ad 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -247,14 +247,19 @@ void flush_sigqueue(struct sigpending *queue)
247/* 247/*
248 * Flush all pending signals for a task. 248 * Flush all pending signals for a task.
249 */ 249 */
250void __flush_signals(struct task_struct *t)
251{
252 clear_tsk_thread_flag(t, TIF_SIGPENDING);
253 flush_sigqueue(&t->pending);
254 flush_sigqueue(&t->signal->shared_pending);
255}
256
250void flush_signals(struct task_struct *t) 257void flush_signals(struct task_struct *t)
251{ 258{
252 unsigned long flags; 259 unsigned long flags;
253 260
254 spin_lock_irqsave(&t->sighand->siglock, flags); 261 spin_lock_irqsave(&t->sighand->siglock, flags);
255 clear_tsk_thread_flag(t, TIF_SIGPENDING); 262 __flush_signals(t);
256 flush_sigqueue(&t->pending);
257 flush_sigqueue(&t->signal->shared_pending);
258 spin_unlock_irqrestore(&t->sighand->siglock, flags); 263 spin_unlock_irqrestore(&t->sighand->siglock, flags);
259} 264}
260 265
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6a463716ecbf..944ba03cae19 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -114,6 +114,7 @@ static int ngroups_max = NGROUPS_MAX;
114 114
115#ifdef CONFIG_MODULES 115#ifdef CONFIG_MODULES
116extern char modprobe_path[]; 116extern char modprobe_path[];
117extern int modules_disabled;
117#endif 118#endif
118#ifdef CONFIG_CHR_DEV_SG 119#ifdef CONFIG_CHR_DEV_SG
119extern int sg_big_buff; 120extern int sg_big_buff;
@@ -534,6 +535,17 @@ static struct ctl_table kern_table[] = {
534 .proc_handler = &proc_dostring, 535 .proc_handler = &proc_dostring,
535 .strategy = &sysctl_string, 536 .strategy = &sysctl_string,
536 }, 537 },
538 {
539 .ctl_name = CTL_UNNUMBERED,
540 .procname = "modules_disabled",
541 .data = &modules_disabled,
542 .maxlen = sizeof(int),
543 .mode = 0644,
544 /* only handle a transition from default "0" to "1" */
545 .proc_handler = &proc_dointvec_minmax,
546 .extra1 = &one,
547 .extra2 = &one,
548 },
537#endif 549#endif
538#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) 550#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
539 { 551 {
@@ -1233,7 +1245,6 @@ static struct ctl_table vm_table[] = {
1233 .strategy = &sysctl_jiffies, 1245 .strategy = &sysctl_jiffies,
1234 }, 1246 },
1235#endif 1247#endif
1236#ifdef CONFIG_SECURITY
1237 { 1248 {
1238 .ctl_name = CTL_UNNUMBERED, 1249 .ctl_name = CTL_UNNUMBERED,
1239 .procname = "mmap_min_addr", 1250 .procname = "mmap_min_addr",
@@ -1242,7 +1253,6 @@ static struct ctl_table vm_table[] = {
1242 .mode = 0644, 1253 .mode = 0644,
1243 .proc_handler = &proc_doulongvec_minmax, 1254 .proc_handler = &proc_doulongvec_minmax,
1244 }, 1255 },
1245#endif
1246#ifdef CONFIG_NUMA 1256#ifdef CONFIG_NUMA
1247 { 1257 {
1248 .ctl_name = CTL_UNNUMBERED, 1258 .ctl_name = CTL_UNNUMBERED,
diff --git a/lib/Kconfig b/lib/Kconfig
index 8ade0a7a91e0..9960be04cbbe 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -10,6 +10,9 @@ menu "Library routines"
10config BITREVERSE 10config BITREVERSE
11 tristate 11 tristate
12 12
13config RATIONAL
14 boolean
15
13config GENERIC_FIND_FIRST_BIT 16config GENERIC_FIND_FIRST_BIT
14 bool 17 bool
15 18
diff --git a/lib/Makefile b/lib/Makefile
index 33a40e40e3ee..1f6edefebffe 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -50,6 +50,7 @@ ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
50endif 50endif
51 51
52obj-$(CONFIG_BITREVERSE) += bitrev.o 52obj-$(CONFIG_BITREVERSE) += bitrev.o
53obj-$(CONFIG_RATIONAL) += rational.o
53obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o 54obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o
54obj-$(CONFIG_CRC16) += crc16.o 55obj-$(CONFIG_CRC16) += crc16.o
55obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o 56obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
diff --git a/lib/rational.c b/lib/rational.c
new file mode 100644
index 000000000000..b3c099b5478e
--- /dev/null
+++ b/lib/rational.c
@@ -0,0 +1,62 @@
1/*
2 * rational fractions
3 *
4 * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
5 *
6 * helper functions when coping with rational numbers
7 */
8
9#include <linux/rational.h>
10
11/*
12 * calculate best rational approximation for a given fraction
13 * taking into account restricted register size, e.g. to find
14 * appropriate values for a pll with 5 bit denominator and
15 * 8 bit numerator register fields, trying to set up with a
16 * frequency ratio of 3.1415, one would say:
17 *
18 * rational_best_approximation(31415, 10000,
19 * (1 << 8) - 1, (1 << 5) - 1, &n, &d);
20 *
21 * you may look at given_numerator as a fixed point number,
22 * with the fractional part size described in given_denominator.
23 *
24 * for theoretical background, see:
25 * http://en.wikipedia.org/wiki/Continued_fraction
26 */
27
28void rational_best_approximation(
29 unsigned long given_numerator, unsigned long given_denominator,
30 unsigned long max_numerator, unsigned long max_denominator,
31 unsigned long *best_numerator, unsigned long *best_denominator)
32{
33 unsigned long n, d, n0, d0, n1, d1;
34 n = given_numerator;
35 d = given_denominator;
36 n0 = d1 = 0;
37 n1 = d0 = 1;
38 for (;;) {
39 unsigned long t, a;
40 if ((n1 > max_numerator) || (d1 > max_denominator)) {
41 n1 = n0;
42 d1 = d0;
43 break;
44 }
45 if (d == 0)
46 break;
47 t = d;
48 a = n / d;
49 d = n % d;
50 n = t;
51 t = n0 + a * n1;
52 n0 = n1;
53 n1 = t;
54 t = d0 + a * d1;
55 d0 = d1;
56 d1 = t;
57 }
58 *best_numerator = n1;
59 *best_denominator = d1;
60}
61
62EXPORT_SYMBOL(rational_best_approximation);
diff --git a/mm/Kconfig b/mm/Kconfig
index c2b57d81e153..71830ba7b986 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -226,6 +226,25 @@ config HAVE_MLOCKED_PAGE_BIT
226config MMU_NOTIFIER 226config MMU_NOTIFIER
227 bool 227 bool
228 228
229config DEFAULT_MMAP_MIN_ADDR
230 int "Low address space to protect from user allocation"
231 default 4096
232 help
233 This is the portion of low virtual memory which should be protected
234 from userspace allocation. Keeping a user from writing to low pages
235 can help reduce the impact of kernel NULL pointer bugs.
236
237 For most ia64, ppc64 and x86 users with lots of address space
238 a value of 65536 is reasonable and should cause no problems.
239 On arm and other archs it should not be higher than 32768.
240 Programs which use vm86 functionality would either need additional
241 permissions from either the LSM or the capabilities module or have
242 this protection disabled.
243
244 This value can be changed after boot using the
245 /proc/sys/vm/mmap_min_addr tunable.
246
247
229config NOMMU_INITIAL_TRIM_EXCESS 248config NOMMU_INITIAL_TRIM_EXCESS
230 int "Turn on mmap() excess space trimming before booting" 249 int "Turn on mmap() excess space trimming before booting"
231 depends on !MMU 250 depends on !MMU
diff --git a/mm/mmap.c b/mm/mmap.c
index 6b7b1a95944b..2b43fa1aa3c8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -87,6 +87,9 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */
87int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; 87int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
88struct percpu_counter vm_committed_as; 88struct percpu_counter vm_committed_as;
89 89
90/* amount of vm to protect from userspace access */
91unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
92
90/* 93/*
91 * Check that a process has enough memory to allocate a new virtual 94 * Check that a process has enough memory to allocate a new virtual
92 * mapping. 0 means there is enough memory for the allocation to 95 * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/nommu.c b/mm/nommu.c
index b571ef707428..2fd2ad5da98e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -69,6 +69,9 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
69int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 69int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
70int heap_stack_gap = 0; 70int heap_stack_gap = 0;
71 71
72/* amount of vm to protect from userspace access */
73unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
74
72atomic_long_t mmap_pages_allocated; 75atomic_long_t mmap_pages_allocated;
73 76
74EXPORT_SYMBOL(mem_map); 77EXPORT_SYMBOL(mem_map);
diff --git a/mm/shmem.c b/mm/shmem.c
index b25f95ce3db7..0132fbd45a23 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2659,6 +2659,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2659 if (error) 2659 if (error)
2660 goto close_file; 2660 goto close_file;
2661#endif 2661#endif
2662 ima_counts_get(file);
2662 return file; 2663 return file;
2663 2664
2664close_file: 2665close_file:
@@ -2684,7 +2685,6 @@ int shmem_zero_setup(struct vm_area_struct *vma)
2684 if (IS_ERR(file)) 2685 if (IS_ERR(file))
2685 return PTR_ERR(file); 2686 return PTR_ERR(file);
2686 2687
2687 ima_shm_check(file);
2688 if (vma->vm_file) 2688 if (vma->vm_file)
2689 fput(vma->vm_file); 2689 fput(vma->vm_file);
2690 vma->vm_file = file; 2690 vma->vm_file = file;
diff --git a/security/Kconfig b/security/Kconfig
index bb244774e9d7..d23c839038f0 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -110,28 +110,8 @@ config SECURITY_ROOTPLUG
110 110
111 See <http://www.linuxjournal.com/article.php?sid=6279> for 111 See <http://www.linuxjournal.com/article.php?sid=6279> for
112 more information about this module. 112 more information about this module.
113
114 If you are unsure how to answer this question, answer N.
115
116config SECURITY_DEFAULT_MMAP_MIN_ADDR
117 int "Low address space to protect from user allocation"
118 depends on SECURITY
119 default 0
120 help
121 This is the portion of low virtual memory which should be protected
122 from userspace allocation. Keeping a user from writing to low pages
123 can help reduce the impact of kernel NULL pointer bugs.
124
125 For most ia64, ppc64 and x86 users with lots of address space
126 a value of 65536 is reasonable and should cause no problems.
127 On arm and other archs it should not be higher than 32768.
128 Programs which use vm86 functionality would either need additional
129 permissions from either the LSM or the capabilities module or have
130 this protection disabled.
131
132 This value can be changed after boot using the
133 /proc/sys/vm/mmap_min_addr tunable.
134 113
114 If you are unsure how to answer this question, answer N.
135 115
136source security/selinux/Kconfig 116source security/selinux/Kconfig
137source security/smack/Kconfig 117source security/smack/Kconfig
diff --git a/security/Makefile b/security/Makefile
index fa77021d9778..c67557cdaa85 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -16,6 +16,9 @@ obj-$(CONFIG_SECURITYFS) += inode.o
16# Must precede capability.o in order to stack properly. 16# Must precede capability.o in order to stack properly.
17obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o 17obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o
18obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o 18obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o
19ifeq ($(CONFIG_AUDIT),y)
20obj-$(CONFIG_SECURITY_SMACK) += lsm_audit.o
21endif
19obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o 22obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o
20obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o 23obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o
21obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o 24obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o
diff --git a/security/commoncap.c b/security/commoncap.c
index beac0258c2a8..48b7e0228fa3 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -28,6 +28,28 @@
28#include <linux/prctl.h> 28#include <linux/prctl.h>
29#include <linux/securebits.h> 29#include <linux/securebits.h>
30 30
31/*
32 * If a non-root user executes a setuid-root binary in
33 * !secure(SECURE_NOROOT) mode, then we raise capabilities.
34 * However if fE is also set, then the intent is for only
35 * the file capabilities to be applied, and the setuid-root
36 * bit is left on either to change the uid (plausible) or
37 * to get full privilege on a kernel without file capabilities
38 * support. So in that case we do not raise capabilities.
39 *
40 * Warn if that happens, once per boot.
41 */
42static void warn_setuid_and_fcaps_mixed(char *fname)
43{
44 static int warned;
45 if (!warned) {
46 printk(KERN_INFO "warning: `%s' has both setuid-root and"
47 " effective capabilities. Therefore not raising all"
48 " capabilities.\n", fname);
49 warned = 1;
50 }
51}
52
31int cap_netlink_send(struct sock *sk, struct sk_buff *skb) 53int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
32{ 54{
33 NETLINK_CB(skb).eff_cap = current_cap(); 55 NETLINK_CB(skb).eff_cap = current_cap();
@@ -464,6 +486,15 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
464 486
465 if (!issecure(SECURE_NOROOT)) { 487 if (!issecure(SECURE_NOROOT)) {
466 /* 488 /*
489 * If the legacy file capability is set, then don't set privs
490 * for a setuid root binary run by a non-root user. Do set it
491 * for a root user just to cause least surprise to an admin.
492 */
493 if (effective && new->uid != 0 && new->euid == 0) {
494 warn_setuid_and_fcaps_mixed(bprm->filename);
495 goto skip;
496 }
497 /*
467 * To support inheritance of root-permissions and suid-root 498 * To support inheritance of root-permissions and suid-root
468 * executables under compatibility mode, we override the 499 * executables under compatibility mode, we override the
469 * capability sets for the file. 500 * capability sets for the file.
@@ -478,6 +509,7 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
478 if (new->euid == 0) 509 if (new->euid == 0)
479 effective = true; 510 effective = true;
480 } 511 }
512skip:
481 513
482 /* Don't let someone trace a set[ug]id/setpcap binary with the revised 514 /* Don't let someone trace a set[ug]id/setpcap binary with the revised
483 * credentials unless they have the appropriate permit 515 * credentials unless they have the appropriate permit
diff --git a/security/inode.c b/security/inode.c
index f3b91bfbe4cb..f7496c6a022b 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -287,7 +287,7 @@ void securityfs_remove(struct dentry *dentry)
287{ 287{
288 struct dentry *parent; 288 struct dentry *parent;
289 289
290 if (!dentry) 290 if (!dentry || IS_ERR(dentry))
291 return; 291 return;
292 292
293 parent = dentry->d_parent; 293 parent = dentry->d_parent;
diff --git a/security/integrity/ima/ima_audit.c b/security/integrity/ima/ima_audit.c
index 1e082bb987be..ff513ff737f5 100644
--- a/security/integrity/ima/ima_audit.c
+++ b/security/integrity/ima/ima_audit.c
@@ -22,18 +22,9 @@ static int ima_audit;
22static int __init ima_audit_setup(char *str) 22static int __init ima_audit_setup(char *str)
23{ 23{
24 unsigned long audit; 24 unsigned long audit;
25 int rc, result = 0;
26 char *op = "ima_audit";
27 char *cause;
28 25
29 rc = strict_strtoul(str, 0, &audit); 26 if (!strict_strtoul(str, 0, &audit))
30 if (rc || audit > 1) 27 ima_audit = audit ? 1 : 0;
31 result = 1;
32 else
33 ima_audit = audit;
34 cause = ima_audit ? "enabled" : "not_enabled";
35 integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL, NULL,
36 op, cause, result, 0);
37 return 1; 28 return 1;
38} 29}
39__setup("ima_audit=", ima_audit_setup); 30__setup("ima_audit=", ima_audit_setup);
@@ -50,23 +41,14 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode,
50 41
51 ab = audit_log_start(current->audit_context, GFP_KERNEL, audit_msgno); 42 ab = audit_log_start(current->audit_context, GFP_KERNEL, audit_msgno);
52 audit_log_format(ab, "integrity: pid=%d uid=%u auid=%u ses=%u", 43 audit_log_format(ab, "integrity: pid=%d uid=%u auid=%u ses=%u",
53 current->pid, current->cred->uid, 44 current->pid, current_cred()->uid,
54 audit_get_loginuid(current), 45 audit_get_loginuid(current),
55 audit_get_sessionid(current)); 46 audit_get_sessionid(current));
56 audit_log_task_context(ab); 47 audit_log_task_context(ab);
57 switch (audit_msgno) { 48 audit_log_format(ab, " op=");
58 case AUDIT_INTEGRITY_DATA: 49 audit_log_string(ab, op);
59 case AUDIT_INTEGRITY_METADATA: 50 audit_log_format(ab, " cause=");
60 case AUDIT_INTEGRITY_PCR: 51 audit_log_string(ab, cause);
61 case AUDIT_INTEGRITY_STATUS:
62 audit_log_format(ab, " op=%s cause=%s", op, cause);
63 break;
64 case AUDIT_INTEGRITY_HASH:
65 audit_log_format(ab, " op=%s hash=%s", op, cause);
66 break;
67 default:
68 audit_log_format(ab, " op=%s", op);
69 }
70 audit_log_format(ab, " comm="); 52 audit_log_format(ab, " comm=");
71 audit_log_untrustedstring(ab, current->comm); 53 audit_log_untrustedstring(ab, current->comm);
72 if (fname) { 54 if (fname) {
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 50d572b74caf..63003a63aaee 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -103,7 +103,7 @@ int ima_calc_template_hash(int template_len, void *template, char *digest)
103 return rc; 103 return rc;
104} 104}
105 105
106static void ima_pcrread(int idx, u8 *pcr) 106static void __init ima_pcrread(int idx, u8 *pcr)
107{ 107{
108 if (!ima_used_chip) 108 if (!ima_used_chip)
109 return; 109 return;
@@ -115,7 +115,7 @@ static void ima_pcrread(int idx, u8 *pcr)
115/* 115/*
116 * Calculate the boot aggregate hash 116 * Calculate the boot aggregate hash
117 */ 117 */
118int ima_calc_boot_aggregate(char *digest) 118int __init ima_calc_boot_aggregate(char *digest)
119{ 119{
120 struct hash_desc desc; 120 struct hash_desc desc;
121 struct scatterlist sg; 121 struct scatterlist sg;
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 510186f0b72e..6bfc7eaebfda 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -15,6 +15,7 @@
15 * implemenents security file system for reporting 15 * implemenents security file system for reporting
16 * current measurement list and IMA statistics 16 * current measurement list and IMA statistics
17 */ 17 */
18#include <linux/fcntl.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/seq_file.h> 20#include <linux/seq_file.h>
20#include <linux/rculist.h> 21#include <linux/rculist.h>
@@ -283,6 +284,9 @@ static atomic_t policy_opencount = ATOMIC_INIT(1);
283 */ 284 */
284int ima_open_policy(struct inode * inode, struct file * filp) 285int ima_open_policy(struct inode * inode, struct file * filp)
285{ 286{
287 /* No point in being allowed to open it if you aren't going to write */
288 if (!(filp->f_flags & O_WRONLY))
289 return -EACCES;
286 if (atomic_dec_and_test(&policy_opencount)) 290 if (atomic_dec_and_test(&policy_opencount))
287 return 0; 291 return 0;
288 return -EBUSY; 292 return -EBUSY;
@@ -315,7 +319,7 @@ static struct file_operations ima_measure_policy_ops = {
315 .release = ima_release_policy 319 .release = ima_release_policy
316}; 320};
317 321
318int ima_fs_init(void) 322int __init ima_fs_init(void)
319{ 323{
320 ima_dir = securityfs_create_dir("ima", NULL); 324 ima_dir = securityfs_create_dir("ima", NULL);
321 if (IS_ERR(ima_dir)) 325 if (IS_ERR(ima_dir))
@@ -349,7 +353,7 @@ int ima_fs_init(void)
349 goto out; 353 goto out;
350 354
351 ima_policy = securityfs_create_file("policy", 355 ima_policy = securityfs_create_file("policy",
352 S_IRUSR | S_IRGRP | S_IWUSR, 356 S_IWUSR,
353 ima_dir, NULL, 357 ima_dir, NULL,
354 &ima_measure_policy_ops); 358 &ima_measure_policy_ops);
355 if (IS_ERR(ima_policy)) 359 if (IS_ERR(ima_policy))
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index ec79f1ee992c..b8dd693f8790 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -196,7 +196,7 @@ static void init_once(void *foo)
196 kref_set(&iint->refcount, 1); 196 kref_set(&iint->refcount, 1);
197} 197}
198 198
199void ima_iintcache_init(void) 199void __init ima_iintcache_init(void)
200{ 200{
201 iint_cache = 201 iint_cache =
202 kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0, 202 kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0,
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 0b0bb8c978cc..a40da7ae5900 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -38,7 +38,7 @@ int ima_used_chip;
38 * a different value.) Violations add a zero entry to the measurement 38 * a different value.) Violations add a zero entry to the measurement
39 * list and extend the aggregate PCR value with ff...ff's. 39 * list and extend the aggregate PCR value with ff...ff's.
40 */ 40 */
41static void ima_add_boot_aggregate(void) 41static void __init ima_add_boot_aggregate(void)
42{ 42{
43 struct ima_template_entry *entry; 43 struct ima_template_entry *entry;
44 const char *op = "add_boot_aggregate"; 44 const char *op = "add_boot_aggregate";
@@ -71,7 +71,7 @@ err_out:
71 audit_cause, result, 0); 71 audit_cause, result, 0);
72} 72}
73 73
74int ima_init(void) 74int __init ima_init(void)
75{ 75{
76 u8 pcr_i[IMA_DIGEST_SIZE]; 76 u8 pcr_i[IMA_DIGEST_SIZE];
77 int rc; 77 int rc;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f4e7266f5aee..6f611874d10e 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -29,20 +29,8 @@ int ima_initialized;
29char *ima_hash = "sha1"; 29char *ima_hash = "sha1";
30static int __init hash_setup(char *str) 30static int __init hash_setup(char *str)
31{ 31{
32 const char *op = "hash_setup"; 32 if (strncmp(str, "md5", 3) == 0)
33 const char *hash = "sha1"; 33 ima_hash = "md5";
34 int result = 0;
35 int audit_info = 0;
36
37 if (strncmp(str, "md5", 3) == 0) {
38 hash = "md5";
39 ima_hash = str;
40 } else if (strncmp(str, "sha1", 4) != 0) {
41 hash = "invalid_hash_type";
42 result = 1;
43 }
44 integrity_audit_msg(AUDIT_INTEGRITY_HASH, NULL, NULL, op, hash,
45 result, audit_info);
46 return 1; 34 return 1;
47} 35}
48__setup("ima_hash=", hash_setup); 36__setup("ima_hash=", hash_setup);
@@ -128,10 +116,6 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
128{ 116{
129 int rc = 0; 117 int rc = 0;
130 118
131 if (IS_ERR(file)) {
132 pr_info("%s dentry_open failed\n", filename);
133 return rc;
134 }
135 iint->opencount++; 119 iint->opencount++;
136 iint->readcount++; 120 iint->readcount++;
137 121
@@ -141,6 +125,15 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
141 return rc; 125 return rc;
142} 126}
143 127
128static void ima_update_counts(struct ima_iint_cache *iint, int mask)
129{
130 iint->opencount++;
131 if ((mask & MAY_WRITE) || (mask == 0))
132 iint->writecount++;
133 else if (mask & (MAY_READ | MAY_EXEC))
134 iint->readcount++;
135}
136
144/** 137/**
145 * ima_path_check - based on policy, collect/store measurement. 138 * ima_path_check - based on policy, collect/store measurement.
146 * @path: contains a pointer to the path to be measured 139 * @path: contains a pointer to the path to be measured
@@ -156,10 +149,10 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
156 * - Opening a file for read when already open for write, 149 * - Opening a file for read when already open for write,
157 * could result in a file measurement error. 150 * could result in a file measurement error.
158 * 151 *
159 * Return 0 on success, an error code on failure. 152 * Always return 0 and audit dentry_open failures.
160 * (Based on the results of appraise_measurement().) 153 * (Return code will be based upon measurement appraisal.)
161 */ 154 */
162int ima_path_check(struct path *path, int mask) 155int ima_path_check(struct path *path, int mask, int update_counts)
163{ 156{
164 struct inode *inode = path->dentry->d_inode; 157 struct inode *inode = path->dentry->d_inode;
165 struct ima_iint_cache *iint; 158 struct ima_iint_cache *iint;
@@ -173,11 +166,8 @@ int ima_path_check(struct path *path, int mask)
173 return 0; 166 return 0;
174 167
175 mutex_lock(&iint->mutex); 168 mutex_lock(&iint->mutex);
176 iint->opencount++; 169 if (update_counts)
177 if ((mask & MAY_WRITE) || (mask == 0)) 170 ima_update_counts(iint, mask);
178 iint->writecount++;
179 else if (mask & (MAY_READ | MAY_EXEC))
180 iint->readcount++;
181 171
182 rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK); 172 rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
183 if (rc < 0) 173 if (rc < 0)
@@ -196,7 +186,19 @@ int ima_path_check(struct path *path, int mask)
196 struct dentry *dentry = dget(path->dentry); 186 struct dentry *dentry = dget(path->dentry);
197 struct vfsmount *mnt = mntget(path->mnt); 187 struct vfsmount *mnt = mntget(path->mnt);
198 188
199 file = dentry_open(dentry, mnt, O_RDONLY, current->cred); 189 file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
190 current_cred());
191 if (IS_ERR(file)) {
192 int audit_info = 0;
193
194 integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
195 dentry->d_name.name,
196 "add_measurement",
197 "dentry_open failed",
198 1, audit_info);
199 file = NULL;
200 goto out;
201 }
200 rc = get_path_measurement(iint, file, dentry->d_name.name); 202 rc = get_path_measurement(iint, file, dentry->d_name.name);
201 } 203 }
202out: 204out:
@@ -206,6 +208,7 @@ out:
206 kref_put(&iint->refcount, iint_free); 208 kref_put(&iint->refcount, iint_free);
207 return 0; 209 return 0;
208} 210}
211EXPORT_SYMBOL_GPL(ima_path_check);
209 212
210static int process_measurement(struct file *file, const unsigned char *filename, 213static int process_measurement(struct file *file, const unsigned char *filename,
211 int mask, int function) 214 int mask, int function)
@@ -234,7 +237,16 @@ out:
234 return rc; 237 return rc;
235} 238}
236 239
237static void opencount_get(struct file *file) 240/*
241 * ima_opens_get - increment file counts
242 *
243 * - for IPC shm and shmat file.
244 * - for nfsd exported files.
245 *
246 * Increment the counts for these files to prevent unnecessary
247 * imbalance messages.
248 */
249void ima_counts_get(struct file *file)
238{ 250{
239 struct inode *inode = file->f_dentry->d_inode; 251 struct inode *inode = file->f_dentry->d_inode;
240 struct ima_iint_cache *iint; 252 struct ima_iint_cache *iint;
@@ -246,8 +258,14 @@ static void opencount_get(struct file *file)
246 return; 258 return;
247 mutex_lock(&iint->mutex); 259 mutex_lock(&iint->mutex);
248 iint->opencount++; 260 iint->opencount++;
261 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
262 iint->readcount++;
263
264 if (file->f_mode & FMODE_WRITE)
265 iint->writecount++;
249 mutex_unlock(&iint->mutex); 266 mutex_unlock(&iint->mutex);
250} 267}
268EXPORT_SYMBOL_GPL(ima_counts_get);
251 269
252/** 270/**
253 * ima_file_mmap - based on policy, collect/store measurement. 271 * ima_file_mmap - based on policy, collect/store measurement.
@@ -272,18 +290,6 @@ int ima_file_mmap(struct file *file, unsigned long prot)
272 return 0; 290 return 0;
273} 291}
274 292
275/*
276 * ima_shm_check - IPC shm and shmat create/fput a file
277 *
278 * Maintain the opencount for these files to prevent unnecessary
279 * imbalance messages.
280 */
281void ima_shm_check(struct file *file)
282{
283 opencount_get(file);
284 return;
285}
286
287/** 293/**
288 * ima_bprm_check - based on policy, collect/store measurement. 294 * ima_bprm_check - based on policy, collect/store measurement.
289 * @bprm: contains the linux_binprm structure 295 * @bprm: contains the linux_binprm structure
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index b5291ad5ef56..e1278399b345 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -45,24 +45,30 @@ struct ima_measure_rule_entry {
45 } lsm[MAX_LSM_RULES]; 45 } lsm[MAX_LSM_RULES];
46}; 46};
47 47
48/* Without LSM specific knowledge, the default policy can only be 48/*
49 * Without LSM specific knowledge, the default policy can only be
49 * written in terms of .action, .func, .mask, .fsmagic, and .uid 50 * written in terms of .action, .func, .mask, .fsmagic, and .uid
50 */ 51 */
52
53/*
54 * The minimum rule set to allow for full TCB coverage. Measures all files
55 * opened or mmap for exec and everything read by root. Dangerous because
56 * normal users can easily run the machine out of memory simply building
57 * and running executables.
58 */
51static struct ima_measure_rule_entry default_rules[] = { 59static struct ima_measure_rule_entry default_rules[] = {
52 {.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC, 60 {.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,.flags = IMA_FSMAGIC},
53 .flags = IMA_FSMAGIC},
54 {.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC}, 61 {.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC},
55 {.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC}, 62 {.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC},
56 {.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC}, 63 {.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC},
57 {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC, 64 {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC},
58 .flags = IMA_FSMAGIC}, 65 {.action = DONT_MEASURE,.fsmagic = SELINUX_MAGIC,.flags = IMA_FSMAGIC},
59 {.action = DONT_MEASURE,.fsmagic = 0xF97CFF8C,.flags = IMA_FSMAGIC},
60 {.action = MEASURE,.func = FILE_MMAP,.mask = MAY_EXEC, 66 {.action = MEASURE,.func = FILE_MMAP,.mask = MAY_EXEC,
61 .flags = IMA_FUNC | IMA_MASK}, 67 .flags = IMA_FUNC | IMA_MASK},
62 {.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC, 68 {.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC,
63 .flags = IMA_FUNC | IMA_MASK}, 69 .flags = IMA_FUNC | IMA_MASK},
64 {.action = MEASURE,.func = PATH_CHECK,.mask = MAY_READ,.uid = 0, 70 {.action = MEASURE,.func = PATH_CHECK,.mask = MAY_READ,.uid = 0,
65 .flags = IMA_FUNC | IMA_MASK | IMA_UID} 71 .flags = IMA_FUNC | IMA_MASK | IMA_UID},
66}; 72};
67 73
68static LIST_HEAD(measure_default_rules); 74static LIST_HEAD(measure_default_rules);
@@ -71,6 +77,14 @@ static struct list_head *ima_measure;
71 77
72static DEFINE_MUTEX(ima_measure_mutex); 78static DEFINE_MUTEX(ima_measure_mutex);
73 79
80static bool ima_use_tcb __initdata;
81static int __init default_policy_setup(char *str)
82{
83 ima_use_tcb = 1;
84 return 1;
85}
86__setup("ima_tcb", default_policy_setup);
87
74/** 88/**
75 * ima_match_rules - determine whether an inode matches the measure rule. 89 * ima_match_rules - determine whether an inode matches the measure rule.
76 * @rule: a pointer to a rule 90 * @rule: a pointer to a rule
@@ -96,7 +110,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule,
96 if ((rule->flags & IMA_UID) && rule->uid != tsk->cred->uid) 110 if ((rule->flags & IMA_UID) && rule->uid != tsk->cred->uid)
97 return false; 111 return false;
98 for (i = 0; i < MAX_LSM_RULES; i++) { 112 for (i = 0; i < MAX_LSM_RULES; i++) {
99 int rc; 113 int rc = 0;
100 u32 osid, sid; 114 u32 osid, sid;
101 115
102 if (!rule->lsm[i].rule) 116 if (!rule->lsm[i].rule)
@@ -109,7 +123,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule,
109 security_inode_getsecid(inode, &osid); 123 security_inode_getsecid(inode, &osid);
110 rc = security_filter_rule_match(osid, 124 rc = security_filter_rule_match(osid,
111 rule->lsm[i].type, 125 rule->lsm[i].type,
112 AUDIT_EQUAL, 126 Audit_equal,
113 rule->lsm[i].rule, 127 rule->lsm[i].rule,
114 NULL); 128 NULL);
115 break; 129 break;
@@ -119,7 +133,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule,
119 security_task_getsecid(tsk, &sid); 133 security_task_getsecid(tsk, &sid);
120 rc = security_filter_rule_match(sid, 134 rc = security_filter_rule_match(sid,
121 rule->lsm[i].type, 135 rule->lsm[i].type,
122 AUDIT_EQUAL, 136 Audit_equal,
123 rule->lsm[i].rule, 137 rule->lsm[i].rule,
124 NULL); 138 NULL);
125 default: 139 default:
@@ -164,11 +178,17 @@ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask)
164 * ima_measure points to either the measure_default_rules or the 178 * ima_measure points to either the measure_default_rules or the
165 * the new measure_policy_rules. 179 * the new measure_policy_rules.
166 */ 180 */
167void ima_init_policy(void) 181void __init ima_init_policy(void)
168{ 182{
169 int i; 183 int i, entries;
184
185 /* if !ima_use_tcb set entries = 0 so we load NO default rules */
186 if (ima_use_tcb)
187 entries = ARRAY_SIZE(default_rules);
188 else
189 entries = 0;
170 190
171 for (i = 0; i < ARRAY_SIZE(default_rules); i++) 191 for (i = 0; i < entries; i++)
172 list_add_tail(&default_rules[i].list, &measure_default_rules); 192 list_add_tail(&default_rules[i].list, &measure_default_rules);
173 ima_measure = &measure_default_rules; 193 ima_measure = &measure_default_rules;
174} 194}
@@ -227,7 +247,7 @@ static int ima_lsm_rule_init(struct ima_measure_rule_entry *entry,
227 247
228 entry->lsm[lsm_rule].type = audit_type; 248 entry->lsm[lsm_rule].type = audit_type;
229 result = security_filter_rule_init(entry->lsm[lsm_rule].type, 249 result = security_filter_rule_init(entry->lsm[lsm_rule].type,
230 AUDIT_EQUAL, args, 250 Audit_equal, args,
231 &entry->lsm[lsm_rule].rule); 251 &entry->lsm[lsm_rule].rule);
232 return result; 252 return result;
233} 253}
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
new file mode 100644
index 000000000000..94b868494b31
--- /dev/null
+++ b/security/lsm_audit.c
@@ -0,0 +1,386 @@
1/*
2 * common LSM auditing functions
3 *
4 * Based on code written for SELinux by :
5 * Stephen Smalley, <sds@epoch.ncsc.mil>
6 * James Morris <jmorris@redhat.com>
7 * Author : Etienne Basset, <etienne.basset@ensta.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2,
11 * as published by the Free Software Foundation.
12 */
13
14#include <linux/types.h>
15#include <linux/stddef.h>
16#include <linux/kernel.h>
17#include <linux/fs.h>
18#include <linux/init.h>
19#include <net/sock.h>
20#include <linux/un.h>
21#include <net/af_unix.h>
22#include <linux/audit.h>
23#include <linux/ipv6.h>
24#include <linux/ip.h>
25#include <net/ip.h>
26#include <net/ipv6.h>
27#include <linux/tcp.h>
28#include <linux/udp.h>
29#include <linux/dccp.h>
30#include <linux/sctp.h>
31#include <linux/lsm_audit.h>
32
33/**
34 * ipv4_skb_to_auditdata : fill auditdata from skb
35 * @skb : the skb
36 * @ad : the audit data to fill
37 * @proto : the layer 4 protocol
38 *
39 * return 0 on success
40 */
41int ipv4_skb_to_auditdata(struct sk_buff *skb,
42 struct common_audit_data *ad, u8 *proto)
43{
44 int ret = 0;
45 struct iphdr *ih;
46
47 ih = ip_hdr(skb);
48 if (ih == NULL)
49 return -EINVAL;
50
51 ad->u.net.v4info.saddr = ih->saddr;
52 ad->u.net.v4info.daddr = ih->daddr;
53
54 if (proto)
55 *proto = ih->protocol;
56 /* non initial fragment */
57 if (ntohs(ih->frag_off) & IP_OFFSET)
58 return 0;
59
60 switch (ih->protocol) {
61 case IPPROTO_TCP: {
62 struct tcphdr *th = tcp_hdr(skb);
63 if (th == NULL)
64 break;
65
66 ad->u.net.sport = th->source;
67 ad->u.net.dport = th->dest;
68 break;
69 }
70 case IPPROTO_UDP: {
71 struct udphdr *uh = udp_hdr(skb);
72 if (uh == NULL)
73 break;
74
75 ad->u.net.sport = uh->source;
76 ad->u.net.dport = uh->dest;
77 break;
78 }
79 case IPPROTO_DCCP: {
80 struct dccp_hdr *dh = dccp_hdr(skb);
81 if (dh == NULL)
82 break;
83
84 ad->u.net.sport = dh->dccph_sport;
85 ad->u.net.dport = dh->dccph_dport;
86 break;
87 }
88 case IPPROTO_SCTP: {
89 struct sctphdr *sh = sctp_hdr(skb);
90 if (sh == NULL)
91 break;
92 ad->u.net.sport = sh->source;
93 ad->u.net.dport = sh->dest;
94 break;
95 }
96 default:
97 ret = -EINVAL;
98 }
99 return ret;
100}
101#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
102/**
103 * ipv6_skb_to_auditdata : fill auditdata from skb
104 * @skb : the skb
105 * @ad : the audit data to fill
106 * @proto : the layer 4 protocol
107 *
108 * return 0 on success
109 */
110int ipv6_skb_to_auditdata(struct sk_buff *skb,
111 struct common_audit_data *ad, u8 *proto)
112{
113 int offset, ret = 0;
114 struct ipv6hdr *ip6;
115 u8 nexthdr;
116
117 ip6 = ipv6_hdr(skb);
118 if (ip6 == NULL)
119 return -EINVAL;
120 ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
121 ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
122 ret = 0;
123 /* IPv6 can have several extension header before the Transport header
124 * skip them */
125 offset = skb_network_offset(skb);
126 offset += sizeof(*ip6);
127 nexthdr = ip6->nexthdr;
128 offset = ipv6_skip_exthdr(skb, offset, &nexthdr);
129 if (offset < 0)
130 return 0;
131 if (proto)
132 *proto = nexthdr;
133 switch (nexthdr) {
134 case IPPROTO_TCP: {
135 struct tcphdr _tcph, *th;
136
137 th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
138 if (th == NULL)
139 break;
140
141 ad->u.net.sport = th->source;
142 ad->u.net.dport = th->dest;
143 break;
144 }
145 case IPPROTO_UDP: {
146 struct udphdr _udph, *uh;
147
148 uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
149 if (uh == NULL)
150 break;
151
152 ad->u.net.sport = uh->source;
153 ad->u.net.dport = uh->dest;
154 break;
155 }
156 case IPPROTO_DCCP: {
157 struct dccp_hdr _dccph, *dh;
158
159 dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
160 if (dh == NULL)
161 break;
162
163 ad->u.net.sport = dh->dccph_sport;
164 ad->u.net.dport = dh->dccph_dport;
165 break;
166 }
167 case IPPROTO_SCTP: {
168 struct sctphdr _sctph, *sh;
169
170 sh = skb_header_pointer(skb, offset, sizeof(_sctph), &_sctph);
171 if (sh == NULL)
172 break;
173 ad->u.net.sport = sh->source;
174 ad->u.net.dport = sh->dest;
175 break;
176 }
177 default:
178 ret = -EINVAL;
179 }
180 return ret;
181}
182#endif
183
184
185static inline void print_ipv6_addr(struct audit_buffer *ab,
186 struct in6_addr *addr, __be16 port,
187 char *name1, char *name2)
188{
189 if (!ipv6_addr_any(addr))
190 audit_log_format(ab, " %s=%pI6", name1, addr);
191 if (port)
192 audit_log_format(ab, " %s=%d", name2, ntohs(port));
193}
194
195static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
196 __be16 port, char *name1, char *name2)
197{
198 if (addr)
199 audit_log_format(ab, " %s=%pI4", name1, &addr);
200 if (port)
201 audit_log_format(ab, " %s=%d", name2, ntohs(port));
202}
203
204/**
205 * dump_common_audit_data - helper to dump common audit data
206 * @a : common audit data
207 *
208 */
209static void dump_common_audit_data(struct audit_buffer *ab,
210 struct common_audit_data *a)
211{
212 struct inode *inode = NULL;
213 struct task_struct *tsk = current;
214
215 if (a->tsk)
216 tsk = a->tsk;
217 if (tsk && tsk->pid) {
218 audit_log_format(ab, " pid=%d comm=", tsk->pid);
219 audit_log_untrustedstring(ab, tsk->comm);
220 }
221
222 switch (a->type) {
223 case LSM_AUDIT_DATA_IPC:
224 audit_log_format(ab, " key=%d ", a->u.ipc_id);
225 break;
226 case LSM_AUDIT_DATA_CAP:
227 audit_log_format(ab, " capability=%d ", a->u.cap);
228 break;
229 case LSM_AUDIT_DATA_FS:
230 if (a->u.fs.path.dentry) {
231 struct dentry *dentry = a->u.fs.path.dentry;
232 if (a->u.fs.path.mnt) {
233 audit_log_d_path(ab, "path=", &a->u.fs.path);
234 } else {
235 audit_log_format(ab, " name=");
236 audit_log_untrustedstring(ab,
237 dentry->d_name.name);
238 }
239 inode = dentry->d_inode;
240 } else if (a->u.fs.inode) {
241 struct dentry *dentry;
242 inode = a->u.fs.inode;
243 dentry = d_find_alias(inode);
244 if (dentry) {
245 audit_log_format(ab, " name=");
246 audit_log_untrustedstring(ab,
247 dentry->d_name.name);
248 dput(dentry);
249 }
250 }
251 if (inode)
252 audit_log_format(ab, " dev=%s ino=%lu",
253 inode->i_sb->s_id,
254 inode->i_ino);
255 break;
256 case LSM_AUDIT_DATA_TASK:
257 tsk = a->u.tsk;
258 if (tsk && tsk->pid) {
259 audit_log_format(ab, " pid=%d comm=", tsk->pid);
260 audit_log_untrustedstring(ab, tsk->comm);
261 }
262 break;
263 case LSM_AUDIT_DATA_NET:
264 if (a->u.net.sk) {
265 struct sock *sk = a->u.net.sk;
266 struct unix_sock *u;
267 int len = 0;
268 char *p = NULL;
269
270 switch (sk->sk_family) {
271 case AF_INET: {
272 struct inet_sock *inet = inet_sk(sk);
273
274 print_ipv4_addr(ab, inet->rcv_saddr,
275 inet->sport,
276 "laddr", "lport");
277 print_ipv4_addr(ab, inet->daddr,
278 inet->dport,
279 "faddr", "fport");
280 break;
281 }
282 case AF_INET6: {
283 struct inet_sock *inet = inet_sk(sk);
284 struct ipv6_pinfo *inet6 = inet6_sk(sk);
285
286 print_ipv6_addr(ab, &inet6->rcv_saddr,
287 inet->sport,
288 "laddr", "lport");
289 print_ipv6_addr(ab, &inet6->daddr,
290 inet->dport,
291 "faddr", "fport");
292 break;
293 }
294 case AF_UNIX:
295 u = unix_sk(sk);
296 if (u->dentry) {
297 struct path path = {
298 .dentry = u->dentry,
299 .mnt = u->mnt
300 };
301 audit_log_d_path(ab, "path=", &path);
302 break;
303 }
304 if (!u->addr)
305 break;
306 len = u->addr->len-sizeof(short);
307 p = &u->addr->name->sun_path[0];
308 audit_log_format(ab, " path=");
309 if (*p)
310 audit_log_untrustedstring(ab, p);
311 else
312 audit_log_n_hex(ab, p, len);
313 break;
314 }
315 }
316
317 switch (a->u.net.family) {
318 case AF_INET:
319 print_ipv4_addr(ab, a->u.net.v4info.saddr,
320 a->u.net.sport,
321 "saddr", "src");
322 print_ipv4_addr(ab, a->u.net.v4info.daddr,
323 a->u.net.dport,
324 "daddr", "dest");
325 break;
326 case AF_INET6:
327 print_ipv6_addr(ab, &a->u.net.v6info.saddr,
328 a->u.net.sport,
329 "saddr", "src");
330 print_ipv6_addr(ab, &a->u.net.v6info.daddr,
331 a->u.net.dport,
332 "daddr", "dest");
333 break;
334 }
335 if (a->u.net.netif > 0) {
336 struct net_device *dev;
337
338 /* NOTE: we always use init's namespace */
339 dev = dev_get_by_index(&init_net, a->u.net.netif);
340 if (dev) {
341 audit_log_format(ab, " netif=%s", dev->name);
342 dev_put(dev);
343 }
344 }
345 break;
346#ifdef CONFIG_KEYS
347 case LSM_AUDIT_DATA_KEY:
348 audit_log_format(ab, " key_serial=%u", a->u.key_struct.key);
349 if (a->u.key_struct.key_desc) {
350 audit_log_format(ab, " key_desc=");
351 audit_log_untrustedstring(ab, a->u.key_struct.key_desc);
352 }
353 break;
354#endif
355 } /* switch (a->type) */
356}
357
358/**
359 * common_lsm_audit - generic LSM auditing function
360 * @a: auxiliary audit data
361 *
362 * setup the audit buffer for common security information
363 * uses callback to print LSM specific information
364 */
365void common_lsm_audit(struct common_audit_data *a)
366{
367 struct audit_buffer *ab;
368
369 if (a == NULL)
370 return;
371 /* we use GFP_ATOMIC so we won't sleep */
372 ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
373
374 if (ab == NULL)
375 return;
376
377 if (a->lsm_pre_audit)
378 a->lsm_pre_audit(ab, a);
379
380 dump_common_audit_data(ab, a);
381
382 if (a->lsm_post_audit)
383 a->lsm_post_audit(ab, a);
384
385 audit_log_end(ab);
386}
diff --git a/security/root_plug.c b/security/root_plug.c
index 40fb4f15e27b..2f7ffa67c4d2 100644
--- a/security/root_plug.c
+++ b/security/root_plug.c
@@ -71,18 +71,6 @@ static int rootplug_bprm_check_security (struct linux_binprm *bprm)
71} 71}
72 72
73static struct security_operations rootplug_security_ops = { 73static struct security_operations rootplug_security_ops = {
74 /* Use the capability functions for some of the hooks */
75 .ptrace_may_access = cap_ptrace_may_access,
76 .ptrace_traceme = cap_ptrace_traceme,
77 .capget = cap_capget,
78 .capset = cap_capset,
79 .capable = cap_capable,
80
81 .bprm_set_creds = cap_bprm_set_creds,
82
83 .task_fix_setuid = cap_task_fix_setuid,
84 .task_prctl = cap_task_prctl,
85
86 .bprm_check_security = rootplug_bprm_check_security, 74 .bprm_check_security = rootplug_bprm_check_security,
87}; 75};
88 76
diff --git a/security/security.c b/security/security.c
index 5284255c5cdf..dc7674fbfc7a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -26,9 +26,6 @@ extern void security_fixup_ops(struct security_operations *ops);
26 26
27struct security_operations *security_ops; /* Initialized to NULL */ 27struct security_operations *security_ops; /* Initialized to NULL */
28 28
29/* amount of vm to protect from userspace access */
30unsigned long mmap_min_addr = CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR;
31
32static inline int verify(struct security_operations *ops) 29static inline int verify(struct security_operations *ops)
33{ 30{
34 /* verify the security_operations structure exists */ 31 /* verify the security_operations structure exists */
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 7f9b5fac8779..b2ab60859832 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -927,7 +927,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
927 if (denied) { 927 if (denied) {
928 if (flags & AVC_STRICT) 928 if (flags & AVC_STRICT)
929 rc = -EACCES; 929 rc = -EACCES;
930 else if (!selinux_enforcing || security_permissive_sid(ssid)) 930 else if (!selinux_enforcing || (avd->flags & AVD_FLAGS_PERMISSIVE))
931 avc_update_node(AVC_CALLBACK_GRANT, requested, ssid, 931 avc_update_node(AVC_CALLBACK_GRANT, requested, ssid,
932 tsid, tclass, avd->seqno); 932 tsid, tclass, avd->seqno);
933 else 933 else
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2fcad7c33eaf..195906bce266 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1980,10 +1980,6 @@ static int selinux_sysctl(ctl_table *table, int op)
1980 u32 tsid, sid; 1980 u32 tsid, sid;
1981 int rc; 1981 int rc;
1982 1982
1983 rc = secondary_ops->sysctl(table, op);
1984 if (rc)
1985 return rc;
1986
1987 sid = current_sid(); 1983 sid = current_sid();
1988 1984
1989 rc = selinux_sysctl_get_sid(table, (op == 0001) ? 1985 rc = selinux_sysctl_get_sid(table, (op == 0001) ?
@@ -2375,10 +2371,8 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
2375{ 2371{
2376 const struct task_security_struct *tsec = current_security(); 2372 const struct task_security_struct *tsec = current_security();
2377 struct itimerval itimer; 2373 struct itimerval itimer;
2378 struct sighand_struct *psig;
2379 u32 osid, sid; 2374 u32 osid, sid;
2380 int rc, i; 2375 int rc, i;
2381 unsigned long flags;
2382 2376
2383 osid = tsec->osid; 2377 osid = tsec->osid;
2384 sid = tsec->sid; 2378 sid = tsec->sid;
@@ -2398,22 +2392,20 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
2398 memset(&itimer, 0, sizeof itimer); 2392 memset(&itimer, 0, sizeof itimer);
2399 for (i = 0; i < 3; i++) 2393 for (i = 0; i < 3; i++)
2400 do_setitimer(i, &itimer, NULL); 2394 do_setitimer(i, &itimer, NULL);
2401 flush_signals(current);
2402 spin_lock_irq(&current->sighand->siglock); 2395 spin_lock_irq(&current->sighand->siglock);
2403 flush_signal_handlers(current, 1); 2396 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
2404 sigemptyset(&current->blocked); 2397 __flush_signals(current);
2405 recalc_sigpending(); 2398 flush_signal_handlers(current, 1);
2399 sigemptyset(&current->blocked);
2400 }
2406 spin_unlock_irq(&current->sighand->siglock); 2401 spin_unlock_irq(&current->sighand->siglock);
2407 } 2402 }
2408 2403
2409 /* Wake up the parent if it is waiting so that it can recheck 2404 /* Wake up the parent if it is waiting so that it can recheck
2410 * wait permission to the new task SID. */ 2405 * wait permission to the new task SID. */
2411 read_lock_irq(&tasklist_lock); 2406 read_lock(&tasklist_lock);
2412 psig = current->parent->sighand; 2407 wake_up_interruptible(&current->real_parent->signal->wait_chldexit);
2413 spin_lock_irqsave(&psig->siglock, flags); 2408 read_unlock(&tasklist_lock);
2414 wake_up_interruptible(&current->parent->signal->wait_chldexit);
2415 spin_unlock_irqrestore(&psig->siglock, flags);
2416 read_unlock_irq(&tasklist_lock);
2417} 2409}
2418 2410
2419/* superblock security operations */ 2411/* superblock security operations */
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 5c3434f7626f..ca835795a8b3 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -8,14 +8,13 @@
8#ifndef _SELINUX_SECURITY_H_ 8#ifndef _SELINUX_SECURITY_H_
9#define _SELINUX_SECURITY_H_ 9#define _SELINUX_SECURITY_H_
10 10
11#include <linux/magic.h>
11#include "flask.h" 12#include "flask.h"
12 13
13#define SECSID_NULL 0x00000000 /* unspecified SID */ 14#define SECSID_NULL 0x00000000 /* unspecified SID */
14#define SECSID_WILD 0xffffffff /* wildcard SID */ 15#define SECSID_WILD 0xffffffff /* wildcard SID */
15#define SECCLASS_NULL 0x0000 /* no class */ 16#define SECCLASS_NULL 0x0000 /* no class */
16 17
17#define SELINUX_MAGIC 0xf97cff8c
18
19/* Identify specific policy version changes */ 18/* Identify specific policy version changes */
20#define POLICYDB_VERSION_BASE 15 19#define POLICYDB_VERSION_BASE 15
21#define POLICYDB_VERSION_BOOL 16 20#define POLICYDB_VERSION_BOOL 16
@@ -91,9 +90,11 @@ struct av_decision {
91 u32 auditallow; 90 u32 auditallow;
92 u32 auditdeny; 91 u32 auditdeny;
93 u32 seqno; 92 u32 seqno;
93 u32 flags;
94}; 94};
95 95
96int security_permissive_sid(u32 sid); 96/* definitions of av_decision.flags */
97#define AVD_FLAGS_PERMISSIVE 0x0001
97 98
98int security_compute_av(u32 ssid, u32 tsid, 99int security_compute_av(u32 ssid, u32 tsid,
99 u16 tclass, u32 requested, 100 u16 tclass, u32 requested,
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index c6875fd3b9d6..dd7cc6de77f9 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -112,6 +112,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] =
112 { AUDIT_DEL_RULE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, 112 { AUDIT_DEL_RULE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE },
113 { AUDIT_USER, NETLINK_AUDIT_SOCKET__NLMSG_RELAY }, 113 { AUDIT_USER, NETLINK_AUDIT_SOCKET__NLMSG_RELAY },
114 { AUDIT_SIGNAL_INFO, NETLINK_AUDIT_SOCKET__NLMSG_READ }, 114 { AUDIT_SIGNAL_INFO, NETLINK_AUDIT_SOCKET__NLMSG_READ },
115 { AUDIT_TRIM, NETLINK_AUDIT_SOCKET__NLMSG_WRITE },
116 { AUDIT_MAKE_EQUIV, NETLINK_AUDIT_SOCKET__NLMSG_WRITE },
115 { AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, 117 { AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ },
116 { AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT }, 118 { AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT },
117}; 119};
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 2d5136ec3d54..b4fc506e7a87 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -527,10 +527,10 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size)
527 goto out2; 527 goto out2;
528 528
529 length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, 529 length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT,
530 "%x %x %x %x %u", 530 "%x %x %x %x %u %x",
531 avd.allowed, 0xffffffff, 531 avd.allowed, 0xffffffff,
532 avd.auditallow, avd.auditdeny, 532 avd.auditallow, avd.auditdeny,
533 avd.seqno); 533 avd.seqno, avd.flags);
534out2: 534out2:
535 kfree(tcon); 535 kfree(tcon);
536out: 536out:
@@ -803,10 +803,6 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf,
803 goto out; 803 goto out;
804 } 804 }
805 805
806 if (count > PAGE_SIZE) {
807 ret = -EINVAL;
808 goto out;
809 }
810 page = (char *)get_zeroed_page(GFP_KERNEL); 806 page = (char *)get_zeroed_page(GFP_KERNEL);
811 if (!page) { 807 if (!page) {
812 ret = -ENOMEM; 808 ret = -ENOMEM;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index deeec6c013ae..500e6f78e115 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -410,6 +410,7 @@ static int context_struct_compute_av(struct context *scontext,
410 avd->auditallow = 0; 410 avd->auditallow = 0;
411 avd->auditdeny = 0xffffffff; 411 avd->auditdeny = 0xffffffff;
412 avd->seqno = latest_granting; 412 avd->seqno = latest_granting;
413 avd->flags = 0;
413 414
414 /* 415 /*
415 * Check for all the invalid cases. 416 * Check for all the invalid cases.
@@ -528,31 +529,6 @@ inval_class:
528 return 0; 529 return 0;
529} 530}
530 531
531/*
532 * Given a sid find if the type has the permissive flag set
533 */
534int security_permissive_sid(u32 sid)
535{
536 struct context *context;
537 u32 type;
538 int rc;
539
540 read_lock(&policy_rwlock);
541
542 context = sidtab_search(&sidtab, sid);
543 BUG_ON(!context);
544
545 type = context->type;
546 /*
547 * we are intentionally using type here, not type-1, the 0th bit may
548 * someday indicate that we are globally setting permissive in policy.
549 */
550 rc = ebitmap_get_bit(&policydb.permissive_map, type);
551
552 read_unlock(&policy_rwlock);
553 return rc;
554}
555
556static int security_validtrans_handle_fail(struct context *ocontext, 532static int security_validtrans_handle_fail(struct context *ocontext,
557 struct context *ncontext, 533 struct context *ncontext,
558 struct context *tcontext, 534 struct context *tcontext,
@@ -767,6 +743,10 @@ int security_compute_av(u32 ssid,
767 743
768 rc = context_struct_compute_av(scontext, tcontext, tclass, 744 rc = context_struct_compute_av(scontext, tcontext, tclass,
769 requested, avd); 745 requested, avd);
746
747 /* permissive domain? */
748 if (ebitmap_get_bit(&policydb.permissive_map, scontext->type))
749 avd->flags |= AVD_FLAGS_PERMISSIVE;
770out: 750out:
771 read_unlock(&policy_rwlock); 751 read_unlock(&policy_rwlock);
772 return rc; 752 return rc;
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 42ef313f9856..243bec175be0 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -20,6 +20,7 @@
20#include <net/netlabel.h> 20#include <net/netlabel.h>
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/rculist.h> 22#include <linux/rculist.h>
23#include <linux/lsm_audit.h>
23 24
24/* 25/*
25 * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is 26 * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is
@@ -179,6 +180,20 @@ struct smack_known {
179#define MAY_NOT 0 180#define MAY_NOT 0
180 181
181/* 182/*
183 * Number of access types used by Smack (rwxa)
184 */
185#define SMK_NUM_ACCESS_TYPE 4
186
187/*
188 * Smack audit data; is empty if CONFIG_AUDIT not set
189 * to save some stack
190 */
191struct smk_audit_info {
192#ifdef CONFIG_AUDIT
193 struct common_audit_data a;
194#endif
195};
196/*
182 * These functions are in smack_lsm.c 197 * These functions are in smack_lsm.c
183 */ 198 */
184struct inode_smack *new_inode_smack(char *); 199struct inode_smack *new_inode_smack(char *);
@@ -186,8 +201,8 @@ struct inode_smack *new_inode_smack(char *);
186/* 201/*
187 * These functions are in smack_access.c 202 * These functions are in smack_access.c
188 */ 203 */
189int smk_access(char *, char *, int); 204int smk_access(char *, char *, int, struct smk_audit_info *);
190int smk_curacc(char *, u32); 205int smk_curacc(char *, u32, struct smk_audit_info *);
191int smack_to_cipso(const char *, struct smack_cipso *); 206int smack_to_cipso(const char *, struct smack_cipso *);
192void smack_from_cipso(u32, char *, char *); 207void smack_from_cipso(u32, char *, char *);
193char *smack_from_secid(const u32); 208char *smack_from_secid(const u32);
@@ -237,4 +252,93 @@ static inline char *smk_of_inode(const struct inode *isp)
237 return sip->smk_inode; 252 return sip->smk_inode;
238} 253}
239 254
255/*
256 * logging functions
257 */
258#define SMACK_AUDIT_DENIED 0x1
259#define SMACK_AUDIT_ACCEPT 0x2
260extern int log_policy;
261
262void smack_log(char *subject_label, char *object_label,
263 int request,
264 int result, struct smk_audit_info *auditdata);
265
266#ifdef CONFIG_AUDIT
267
268/*
269 * some inline functions to set up audit data
270 * they do nothing if CONFIG_AUDIT is not set
271 *
272 */
273static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
274 char type)
275{
276 memset(a, 0, sizeof(*a));
277 a->a.type = type;
278 a->a.function = func;
279}
280
281static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
282 struct task_struct *t)
283{
284 a->a.u.tsk = t;
285}
286static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a,
287 struct dentry *d)
288{
289 a->a.u.fs.path.dentry = d;
290}
291static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a,
292 struct vfsmount *m)
293{
294 a->a.u.fs.path.mnt = m;
295}
296static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a,
297 struct inode *i)
298{
299 a->a.u.fs.inode = i;
300}
301static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a,
302 struct path p)
303{
304 a->a.u.fs.path = p;
305}
306static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a,
307 struct sock *sk)
308{
309 a->a.u.net.sk = sk;
310}
311
312#else /* no AUDIT */
313
314static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
315 char type)
316{
317}
318static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
319 struct task_struct *t)
320{
321}
322static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a,
323 struct dentry *d)
324{
325}
326static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a,
327 struct vfsmount *m)
328{
329}
330static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a,
331 struct inode *i)
332{
333}
334static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a,
335 struct path p)
336{
337}
338static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a,
339 struct sock *sk)
340{
341}
342#endif
343
240#endif /* _SECURITY_SMACK_H */ 344#endif /* _SECURITY_SMACK_H */
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index ac0a2707f6d4..513dc1aa16dd 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -59,11 +59,18 @@ LIST_HEAD(smack_known_list);
59 */ 59 */
60static u32 smack_next_secid = 10; 60static u32 smack_next_secid = 10;
61 61
62/*
63 * what events do we log
64 * can be overwritten at run-time by /smack/logging
65 */
66int log_policy = SMACK_AUDIT_DENIED;
67
62/** 68/**
63 * smk_access - determine if a subject has a specific access to an object 69 * smk_access - determine if a subject has a specific access to an object
64 * @subject_label: a pointer to the subject's Smack label 70 * @subject_label: a pointer to the subject's Smack label
65 * @object_label: a pointer to the object's Smack label 71 * @object_label: a pointer to the object's Smack label
66 * @request: the access requested, in "MAY" format 72 * @request: the access requested, in "MAY" format
73 * @a : a pointer to the audit data
67 * 74 *
68 * This function looks up the subject/object pair in the 75 * This function looks up the subject/object pair in the
69 * access rule list and returns 0 if the access is permitted, 76 * access rule list and returns 0 if the access is permitted,
@@ -78,10 +85,12 @@ static u32 smack_next_secid = 10;
78 * will be on the list, so checking the pointers may be a worthwhile 85 * will be on the list, so checking the pointers may be a worthwhile
79 * optimization. 86 * optimization.
80 */ 87 */
81int smk_access(char *subject_label, char *object_label, int request) 88int smk_access(char *subject_label, char *object_label, int request,
89 struct smk_audit_info *a)
82{ 90{
83 u32 may = MAY_NOT; 91 u32 may = MAY_NOT;
84 struct smack_rule *srp; 92 struct smack_rule *srp;
93 int rc = 0;
85 94
86 /* 95 /*
87 * Hardcoded comparisons. 96 * Hardcoded comparisons.
@@ -89,8 +98,10 @@ int smk_access(char *subject_label, char *object_label, int request)
89 * A star subject can't access any object. 98 * A star subject can't access any object.
90 */ 99 */
91 if (subject_label == smack_known_star.smk_known || 100 if (subject_label == smack_known_star.smk_known ||
92 strcmp(subject_label, smack_known_star.smk_known) == 0) 101 strcmp(subject_label, smack_known_star.smk_known) == 0) {
93 return -EACCES; 102 rc = -EACCES;
103 goto out_audit;
104 }
94 /* 105 /*
95 * An internet object can be accessed by any subject. 106 * An internet object can be accessed by any subject.
96 * Tasks cannot be assigned the internet label. 107 * Tasks cannot be assigned the internet label.
@@ -100,20 +111,20 @@ int smk_access(char *subject_label, char *object_label, int request)
100 subject_label == smack_known_web.smk_known || 111 subject_label == smack_known_web.smk_known ||
101 strcmp(object_label, smack_known_web.smk_known) == 0 || 112 strcmp(object_label, smack_known_web.smk_known) == 0 ||
102 strcmp(subject_label, smack_known_web.smk_known) == 0) 113 strcmp(subject_label, smack_known_web.smk_known) == 0)
103 return 0; 114 goto out_audit;
104 /* 115 /*
105 * A star object can be accessed by any subject. 116 * A star object can be accessed by any subject.
106 */ 117 */
107 if (object_label == smack_known_star.smk_known || 118 if (object_label == smack_known_star.smk_known ||
108 strcmp(object_label, smack_known_star.smk_known) == 0) 119 strcmp(object_label, smack_known_star.smk_known) == 0)
109 return 0; 120 goto out_audit;
110 /* 121 /*
111 * An object can be accessed in any way by a subject 122 * An object can be accessed in any way by a subject
112 * with the same label. 123 * with the same label.
113 */ 124 */
114 if (subject_label == object_label || 125 if (subject_label == object_label ||
115 strcmp(subject_label, object_label) == 0) 126 strcmp(subject_label, object_label) == 0)
116 return 0; 127 goto out_audit;
117 /* 128 /*
118 * A hat subject can read any object. 129 * A hat subject can read any object.
119 * A floor object can be read by any subject. 130 * A floor object can be read by any subject.
@@ -121,10 +132,10 @@ int smk_access(char *subject_label, char *object_label, int request)
121 if ((request & MAY_ANYREAD) == request) { 132 if ((request & MAY_ANYREAD) == request) {
122 if (object_label == smack_known_floor.smk_known || 133 if (object_label == smack_known_floor.smk_known ||
123 strcmp(object_label, smack_known_floor.smk_known) == 0) 134 strcmp(object_label, smack_known_floor.smk_known) == 0)
124 return 0; 135 goto out_audit;
125 if (subject_label == smack_known_hat.smk_known || 136 if (subject_label == smack_known_hat.smk_known ||
126 strcmp(subject_label, smack_known_hat.smk_known) == 0) 137 strcmp(subject_label, smack_known_hat.smk_known) == 0)
127 return 0; 138 goto out_audit;
128 } 139 }
129 /* 140 /*
130 * Beyond here an explicit relationship is required. 141 * Beyond here an explicit relationship is required.
@@ -148,28 +159,36 @@ int smk_access(char *subject_label, char *object_label, int request)
148 * This is a bit map operation. 159 * This is a bit map operation.
149 */ 160 */
150 if ((request & may) == request) 161 if ((request & may) == request)
151 return 0; 162 goto out_audit;
152 163
153 return -EACCES; 164 rc = -EACCES;
165out_audit:
166#ifdef CONFIG_AUDIT
167 if (a)
168 smack_log(subject_label, object_label, request, rc, a);
169#endif
170 return rc;
154} 171}
155 172
156/** 173/**
157 * smk_curacc - determine if current has a specific access to an object 174 * smk_curacc - determine if current has a specific access to an object
158 * @obj_label: a pointer to the object's Smack label 175 * @obj_label: a pointer to the object's Smack label
159 * @mode: the access requested, in "MAY" format 176 * @mode: the access requested, in "MAY" format
177 * @a : common audit data
160 * 178 *
161 * This function checks the current subject label/object label pair 179 * This function checks the current subject label/object label pair
162 * in the access rule list and returns 0 if the access is permitted, 180 * in the access rule list and returns 0 if the access is permitted,
163 * non zero otherwise. It allows that current may have the capability 181 * non zero otherwise. It allows that current may have the capability
164 * to override the rules. 182 * to override the rules.
165 */ 183 */
166int smk_curacc(char *obj_label, u32 mode) 184int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
167{ 185{
168 int rc; 186 int rc;
187 char *sp = current_security();
169 188
170 rc = smk_access(current_security(), obj_label, mode); 189 rc = smk_access(sp, obj_label, mode, NULL);
171 if (rc == 0) 190 if (rc == 0)
172 return 0; 191 goto out_audit;
173 192
174 /* 193 /*
175 * Return if a specific label has been designated as the 194 * Return if a specific label has been designated as the
@@ -177,14 +196,105 @@ int smk_curacc(char *obj_label, u32 mode)
177 * have that label. 196 * have that label.
178 */ 197 */
179 if (smack_onlycap != NULL && smack_onlycap != current->cred->security) 198 if (smack_onlycap != NULL && smack_onlycap != current->cred->security)
180 return rc; 199 goto out_audit;
181 200
182 if (capable(CAP_MAC_OVERRIDE)) 201 if (capable(CAP_MAC_OVERRIDE))
183 return 0; 202 return 0;
184 203
204out_audit:
205#ifdef CONFIG_AUDIT
206 if (a)
207 smack_log(sp, obj_label, mode, rc, a);
208#endif
185 return rc; 209 return rc;
186} 210}
187 211
212#ifdef CONFIG_AUDIT
213/**
214 * smack_str_from_perm : helper to transalate an int to a
215 * readable string
216 * @string : the string to fill
217 * @access : the int
218 *
219 */
220static inline void smack_str_from_perm(char *string, int access)
221{
222 int i = 0;
223 if (access & MAY_READ)
224 string[i++] = 'r';
225 if (access & MAY_WRITE)
226 string[i++] = 'w';
227 if (access & MAY_EXEC)
228 string[i++] = 'x';
229 if (access & MAY_APPEND)
230 string[i++] = 'a';
231 string[i] = '\0';
232}
233/**
234 * smack_log_callback - SMACK specific information
235 * will be called by generic audit code
236 * @ab : the audit_buffer
237 * @a : audit_data
238 *
239 */
240static void smack_log_callback(struct audit_buffer *ab, void *a)
241{
242 struct common_audit_data *ad = a;
243 struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data;
244 audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function,
245 sad->result ? "denied" : "granted");
246 audit_log_format(ab, " subject=");
247 audit_log_untrustedstring(ab, sad->subject);
248 audit_log_format(ab, " object=");
249 audit_log_untrustedstring(ab, sad->object);
250 audit_log_format(ab, " requested=%s", sad->request);
251}
252
253/**
254 * smack_log - Audit the granting or denial of permissions.
255 * @subject_label : smack label of the requester
256 * @object_label : smack label of the object being accessed
257 * @request: requested permissions
258 * @result: result from smk_access
259 * @a: auxiliary audit data
260 *
261 * Audit the granting or denial of permissions in accordance
262 * with the policy.
263 */
264void smack_log(char *subject_label, char *object_label, int request,
265 int result, struct smk_audit_info *ad)
266{
267 char request_buffer[SMK_NUM_ACCESS_TYPE + 1];
268 struct smack_audit_data *sad;
269 struct common_audit_data *a = &ad->a;
270
271 /* check if we have to log the current event */
272 if (result != 0 && (log_policy & SMACK_AUDIT_DENIED) == 0)
273 return;
274 if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
275 return;
276
277 if (a->function == NULL)
278 a->function = "unknown";
279
280 /* end preparing the audit data */
281 sad = &a->lsm_priv.smack_audit_data;
282 smack_str_from_perm(request_buffer, request);
283 sad->subject = subject_label;
284 sad->object = object_label;
285 sad->request = request_buffer;
286 sad->result = result;
287 a->lsm_pre_audit = smack_log_callback;
288
289 common_lsm_audit(a);
290}
291#else /* #ifdef CONFIG_AUDIT */
292void smack_log(char *subject_label, char *object_label, int request,
293 int result, struct smk_audit_info *ad)
294{
295}
296#endif
297
188static DEFINE_MUTEX(smack_known_lock); 298static DEFINE_MUTEX(smack_known_lock);
189 299
190/** 300/**
@@ -209,7 +319,8 @@ struct smack_known *smk_import_entry(const char *string, int len)
209 if (found) 319 if (found)
210 smack[i] = '\0'; 320 smack[i] = '\0';
211 else if (i >= len || string[i] > '~' || string[i] <= ' ' || 321 else if (i >= len || string[i] > '~' || string[i] <= ' ' ||
212 string[i] == '/') { 322 string[i] == '/' || string[i] == '"' ||
323 string[i] == '\\' || string[i] == '\'') {
213 smack[i] = '\0'; 324 smack[i] = '\0';
214 found = 1; 325 found = 1;
215 } else 326 } else
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 98b3195347ab..0023182078c7 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -30,7 +30,6 @@
30#include <net/netlabel.h> 30#include <net/netlabel.h>
31#include <net/cipso_ipv4.h> 31#include <net/cipso_ipv4.h>
32#include <linux/audit.h> 32#include <linux/audit.h>
33
34#include "smack.h" 33#include "smack.h"
35 34
36#define task_security(task) (task_cred_xxx((task), security)) 35#define task_security(task) (task_cred_xxx((task), security))
@@ -103,14 +102,24 @@ struct inode_smack *new_inode_smack(char *smack)
103static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) 102static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
104{ 103{
105 int rc; 104 int rc;
105 struct smk_audit_info ad;
106 char *sp, *tsp;
106 107
107 rc = cap_ptrace_may_access(ctp, mode); 108 rc = cap_ptrace_may_access(ctp, mode);
108 if (rc != 0) 109 if (rc != 0)
109 return rc; 110 return rc;
110 111
111 rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE); 112 sp = current_security();
113 tsp = task_security(ctp);
114 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
115 smk_ad_setfield_u_tsk(&ad, ctp);
116
117 /* we won't log here, because rc can be overriden */
118 rc = smk_access(sp, tsp, MAY_READWRITE, NULL);
112 if (rc != 0 && capable(CAP_MAC_OVERRIDE)) 119 if (rc != 0 && capable(CAP_MAC_OVERRIDE))
113 return 0; 120 rc = 0;
121
122 smack_log(sp, tsp, MAY_READWRITE, rc, &ad);
114 return rc; 123 return rc;
115} 124}
116 125
@@ -125,14 +134,24 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
125static int smack_ptrace_traceme(struct task_struct *ptp) 134static int smack_ptrace_traceme(struct task_struct *ptp)
126{ 135{
127 int rc; 136 int rc;
137 struct smk_audit_info ad;
138 char *sp, *tsp;
128 139
129 rc = cap_ptrace_traceme(ptp); 140 rc = cap_ptrace_traceme(ptp);
130 if (rc != 0) 141 if (rc != 0)
131 return rc; 142 return rc;
132 143
133 rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE); 144 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
145 smk_ad_setfield_u_tsk(&ad, ptp);
146
147 sp = current_security();
148 tsp = task_security(ptp);
149 /* we won't log here, because rc can be overriden */
150 rc = smk_access(tsp, sp, MAY_READWRITE, NULL);
134 if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) 151 if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE))
135 return 0; 152 rc = 0;
153
154 smack_log(tsp, sp, MAY_READWRITE, rc, &ad);
136 return rc; 155 return rc;
137} 156}
138 157
@@ -327,8 +346,14 @@ static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
327static int smack_sb_statfs(struct dentry *dentry) 346static int smack_sb_statfs(struct dentry *dentry)
328{ 347{
329 struct superblock_smack *sbp = dentry->d_sb->s_security; 348 struct superblock_smack *sbp = dentry->d_sb->s_security;
349 int rc;
350 struct smk_audit_info ad;
351
352 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
353 smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
330 354
331 return smk_curacc(sbp->smk_floor, MAY_READ); 355 rc = smk_curacc(sbp->smk_floor, MAY_READ, &ad);
356 return rc;
332} 357}
333 358
334/** 359/**
@@ -346,8 +371,12 @@ static int smack_sb_mount(char *dev_name, struct path *path,
346 char *type, unsigned long flags, void *data) 371 char *type, unsigned long flags, void *data)
347{ 372{
348 struct superblock_smack *sbp = path->mnt->mnt_sb->s_security; 373 struct superblock_smack *sbp = path->mnt->mnt_sb->s_security;
374 struct smk_audit_info ad;
375
376 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
377 smk_ad_setfield_u_fs_path(&ad, *path);
349 378
350 return smk_curacc(sbp->smk_floor, MAY_WRITE); 379 return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
351} 380}
352 381
353/** 382/**
@@ -361,10 +390,14 @@ static int smack_sb_mount(char *dev_name, struct path *path,
361static int smack_sb_umount(struct vfsmount *mnt, int flags) 390static int smack_sb_umount(struct vfsmount *mnt, int flags)
362{ 391{
363 struct superblock_smack *sbp; 392 struct superblock_smack *sbp;
393 struct smk_audit_info ad;
364 394
365 sbp = mnt->mnt_sb->s_security; 395 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
396 smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_mountpoint);
397 smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
366 398
367 return smk_curacc(sbp->smk_floor, MAY_WRITE); 399 sbp = mnt->mnt_sb->s_security;
400 return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
368} 401}
369 402
370/* 403/*
@@ -441,15 +474,20 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir,
441static int smack_inode_link(struct dentry *old_dentry, struct inode *dir, 474static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
442 struct dentry *new_dentry) 475 struct dentry *new_dentry)
443{ 476{
444 int rc;
445 char *isp; 477 char *isp;
478 struct smk_audit_info ad;
479 int rc;
480
481 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
482 smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
446 483
447 isp = smk_of_inode(old_dentry->d_inode); 484 isp = smk_of_inode(old_dentry->d_inode);
448 rc = smk_curacc(isp, MAY_WRITE); 485 rc = smk_curacc(isp, MAY_WRITE, &ad);
449 486
450 if (rc == 0 && new_dentry->d_inode != NULL) { 487 if (rc == 0 && new_dentry->d_inode != NULL) {
451 isp = smk_of_inode(new_dentry->d_inode); 488 isp = smk_of_inode(new_dentry->d_inode);
452 rc = smk_curacc(isp, MAY_WRITE); 489 smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
490 rc = smk_curacc(isp, MAY_WRITE, &ad);
453 } 491 }
454 492
455 return rc; 493 return rc;
@@ -466,18 +504,24 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
466static int smack_inode_unlink(struct inode *dir, struct dentry *dentry) 504static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
467{ 505{
468 struct inode *ip = dentry->d_inode; 506 struct inode *ip = dentry->d_inode;
507 struct smk_audit_info ad;
469 int rc; 508 int rc;
470 509
510 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
511 smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
512
471 /* 513 /*
472 * You need write access to the thing you're unlinking 514 * You need write access to the thing you're unlinking
473 */ 515 */
474 rc = smk_curacc(smk_of_inode(ip), MAY_WRITE); 516 rc = smk_curacc(smk_of_inode(ip), MAY_WRITE, &ad);
475 if (rc == 0) 517 if (rc == 0) {
476 /* 518 /*
477 * You also need write access to the containing directory 519 * You also need write access to the containing directory
478 */ 520 */
479 rc = smk_curacc(smk_of_inode(dir), MAY_WRITE); 521 smk_ad_setfield_u_fs_path_dentry(&ad, NULL);
480 522 smk_ad_setfield_u_fs_inode(&ad, dir);
523 rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad);
524 }
481 return rc; 525 return rc;
482} 526}
483 527
@@ -491,17 +535,24 @@ static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
491 */ 535 */
492static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry) 536static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry)
493{ 537{
538 struct smk_audit_info ad;
494 int rc; 539 int rc;
495 540
541 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
542 smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
543
496 /* 544 /*
497 * You need write access to the thing you're removing 545 * You need write access to the thing you're removing
498 */ 546 */
499 rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); 547 rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
500 if (rc == 0) 548 if (rc == 0) {
501 /* 549 /*
502 * You also need write access to the containing directory 550 * You also need write access to the containing directory
503 */ 551 */
504 rc = smk_curacc(smk_of_inode(dir), MAY_WRITE); 552 smk_ad_setfield_u_fs_path_dentry(&ad, NULL);
553 smk_ad_setfield_u_fs_inode(&ad, dir);
554 rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad);
555 }
505 556
506 return rc; 557 return rc;
507} 558}
@@ -525,15 +576,19 @@ static int smack_inode_rename(struct inode *old_inode,
525{ 576{
526 int rc; 577 int rc;
527 char *isp; 578 char *isp;
579 struct smk_audit_info ad;
580
581 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
582 smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
528 583
529 isp = smk_of_inode(old_dentry->d_inode); 584 isp = smk_of_inode(old_dentry->d_inode);
530 rc = smk_curacc(isp, MAY_READWRITE); 585 rc = smk_curacc(isp, MAY_READWRITE, &ad);
531 586
532 if (rc == 0 && new_dentry->d_inode != NULL) { 587 if (rc == 0 && new_dentry->d_inode != NULL) {
533 isp = smk_of_inode(new_dentry->d_inode); 588 isp = smk_of_inode(new_dentry->d_inode);
534 rc = smk_curacc(isp, MAY_READWRITE); 589 smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
590 rc = smk_curacc(isp, MAY_READWRITE, &ad);
535 } 591 }
536
537 return rc; 592 return rc;
538} 593}
539 594
@@ -548,13 +603,15 @@ static int smack_inode_rename(struct inode *old_inode,
548 */ 603 */
549static int smack_inode_permission(struct inode *inode, int mask) 604static int smack_inode_permission(struct inode *inode, int mask)
550{ 605{
606 struct smk_audit_info ad;
551 /* 607 /*
552 * No permission to check. Existence test. Yup, it's there. 608 * No permission to check. Existence test. Yup, it's there.
553 */ 609 */
554 if (mask == 0) 610 if (mask == 0)
555 return 0; 611 return 0;
556 612 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
557 return smk_curacc(smk_of_inode(inode), mask); 613 smk_ad_setfield_u_fs_inode(&ad, inode);
614 return smk_curacc(smk_of_inode(inode), mask, &ad);
558} 615}
559 616
560/** 617/**
@@ -566,13 +623,16 @@ static int smack_inode_permission(struct inode *inode, int mask)
566 */ 623 */
567static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr) 624static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
568{ 625{
626 struct smk_audit_info ad;
569 /* 627 /*
570 * Need to allow for clearing the setuid bit. 628 * Need to allow for clearing the setuid bit.
571 */ 629 */
572 if (iattr->ia_valid & ATTR_FORCE) 630 if (iattr->ia_valid & ATTR_FORCE)
573 return 0; 631 return 0;
632 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
633 smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
574 634
575 return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); 635 return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
576} 636}
577 637
578/** 638/**
@@ -584,7 +644,12 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
584 */ 644 */
585static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) 645static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
586{ 646{
587 return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); 647 struct smk_audit_info ad;
648
649 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
650 smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
651 smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
652 return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
588} 653}
589 654
590/** 655/**
@@ -602,6 +667,7 @@ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
602static int smack_inode_setxattr(struct dentry *dentry, const char *name, 667static int smack_inode_setxattr(struct dentry *dentry, const char *name,
603 const void *value, size_t size, int flags) 668 const void *value, size_t size, int flags)
604{ 669{
670 struct smk_audit_info ad;
605 int rc = 0; 671 int rc = 0;
606 672
607 if (strcmp(name, XATTR_NAME_SMACK) == 0 || 673 if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
@@ -619,8 +685,11 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
619 } else 685 } else
620 rc = cap_inode_setxattr(dentry, name, value, size, flags); 686 rc = cap_inode_setxattr(dentry, name, value, size, flags);
621 687
688 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
689 smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
690
622 if (rc == 0) 691 if (rc == 0)
623 rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); 692 rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
624 693
625 return rc; 694 return rc;
626} 695}
@@ -672,7 +741,12 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
672 */ 741 */
673static int smack_inode_getxattr(struct dentry *dentry, const char *name) 742static int smack_inode_getxattr(struct dentry *dentry, const char *name)
674{ 743{
675 return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); 744 struct smk_audit_info ad;
745
746 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
747 smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
748
749 return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
676} 750}
677 751
678/* 752/*
@@ -686,6 +760,7 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
686 */ 760 */
687static int smack_inode_removexattr(struct dentry *dentry, const char *name) 761static int smack_inode_removexattr(struct dentry *dentry, const char *name)
688{ 762{
763 struct smk_audit_info ad;
689 int rc = 0; 764 int rc = 0;
690 765
691 if (strcmp(name, XATTR_NAME_SMACK) == 0 || 766 if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
@@ -696,8 +771,10 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
696 } else 771 } else
697 rc = cap_inode_removexattr(dentry, name); 772 rc = cap_inode_removexattr(dentry, name);
698 773
774 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
775 smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
699 if (rc == 0) 776 if (rc == 0)
700 rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); 777 rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
701 778
702 return rc; 779 return rc;
703} 780}
@@ -856,12 +933,16 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd,
856 unsigned long arg) 933 unsigned long arg)
857{ 934{
858 int rc = 0; 935 int rc = 0;
936 struct smk_audit_info ad;
937
938 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
939 smk_ad_setfield_u_fs_path(&ad, file->f_path);
859 940
860 if (_IOC_DIR(cmd) & _IOC_WRITE) 941 if (_IOC_DIR(cmd) & _IOC_WRITE)
861 rc = smk_curacc(file->f_security, MAY_WRITE); 942 rc = smk_curacc(file->f_security, MAY_WRITE, &ad);
862 943
863 if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ)) 944 if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ))
864 rc = smk_curacc(file->f_security, MAY_READ); 945 rc = smk_curacc(file->f_security, MAY_READ, &ad);
865 946
866 return rc; 947 return rc;
867} 948}
@@ -875,7 +956,11 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd,
875 */ 956 */
876static int smack_file_lock(struct file *file, unsigned int cmd) 957static int smack_file_lock(struct file *file, unsigned int cmd)
877{ 958{
878 return smk_curacc(file->f_security, MAY_WRITE); 959 struct smk_audit_info ad;
960
961 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
962 smk_ad_setfield_u_fs_path_dentry(&ad, file->f_path.dentry);
963 return smk_curacc(file->f_security, MAY_WRITE, &ad);
879} 964}
880 965
881/** 966/**
@@ -889,8 +974,12 @@ static int smack_file_lock(struct file *file, unsigned int cmd)
889static int smack_file_fcntl(struct file *file, unsigned int cmd, 974static int smack_file_fcntl(struct file *file, unsigned int cmd,
890 unsigned long arg) 975 unsigned long arg)
891{ 976{
977 struct smk_audit_info ad;
892 int rc; 978 int rc;
893 979
980 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
981 smk_ad_setfield_u_fs_path(&ad, file->f_path);
982
894 switch (cmd) { 983 switch (cmd) {
895 case F_DUPFD: 984 case F_DUPFD:
896 case F_GETFD: 985 case F_GETFD:
@@ -898,7 +987,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
898 case F_GETLK: 987 case F_GETLK:
899 case F_GETOWN: 988 case F_GETOWN:
900 case F_GETSIG: 989 case F_GETSIG:
901 rc = smk_curacc(file->f_security, MAY_READ); 990 rc = smk_curacc(file->f_security, MAY_READ, &ad);
902 break; 991 break;
903 case F_SETFD: 992 case F_SETFD:
904 case F_SETFL: 993 case F_SETFL:
@@ -906,10 +995,10 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
906 case F_SETLKW: 995 case F_SETLKW:
907 case F_SETOWN: 996 case F_SETOWN:
908 case F_SETSIG: 997 case F_SETSIG:
909 rc = smk_curacc(file->f_security, MAY_WRITE); 998 rc = smk_curacc(file->f_security, MAY_WRITE, &ad);
910 break; 999 break;
911 default: 1000 default:
912 rc = smk_curacc(file->f_security, MAY_READWRITE); 1001 rc = smk_curacc(file->f_security, MAY_READWRITE, &ad);
913 } 1002 }
914 1003
915 return rc; 1004 return rc;
@@ -944,14 +1033,21 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
944{ 1033{
945 struct file *file; 1034 struct file *file;
946 int rc; 1035 int rc;
1036 char *tsp = tsk->cred->security;
1037 struct smk_audit_info ad;
947 1038
948 /* 1039 /*
949 * struct fown_struct is never outside the context of a struct file 1040 * struct fown_struct is never outside the context of a struct file
950 */ 1041 */
951 file = container_of(fown, struct file, f_owner); 1042 file = container_of(fown, struct file, f_owner);
952 rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE); 1043 /* we don't log here as rc can be overriden */
1044 rc = smk_access(file->f_security, tsp, MAY_WRITE, NULL);
953 if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE)) 1045 if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE))
954 return 0; 1046 rc = 0;
1047
1048 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
1049 smk_ad_setfield_u_tsk(&ad, tsk);
1050 smack_log(file->f_security, tsp, MAY_WRITE, rc, &ad);
955 return rc; 1051 return rc;
956} 1052}
957 1053
@@ -964,7 +1060,10 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
964static int smack_file_receive(struct file *file) 1060static int smack_file_receive(struct file *file)
965{ 1061{
966 int may = 0; 1062 int may = 0;
1063 struct smk_audit_info ad;
967 1064
1065 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
1066 smk_ad_setfield_u_fs_path(&ad, file->f_path);
968 /* 1067 /*
969 * This code relies on bitmasks. 1068 * This code relies on bitmasks.
970 */ 1069 */
@@ -973,7 +1072,7 @@ static int smack_file_receive(struct file *file)
973 if (file->f_mode & FMODE_WRITE) 1072 if (file->f_mode & FMODE_WRITE)
974 may |= MAY_WRITE; 1073 may |= MAY_WRITE;
975 1074
976 return smk_curacc(file->f_security, may); 1075 return smk_curacc(file->f_security, may, &ad);
977} 1076}
978 1077
979/* 1078/*
@@ -1053,6 +1152,22 @@ static int smack_kernel_create_files_as(struct cred *new,
1053} 1152}
1054 1153
1055/** 1154/**
1155 * smk_curacc_on_task - helper to log task related access
1156 * @p: the task object
1157 * @access : the access requested
1158 *
1159 * Return 0 if access is permitted
1160 */
1161static int smk_curacc_on_task(struct task_struct *p, int access)
1162{
1163 struct smk_audit_info ad;
1164
1165 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
1166 smk_ad_setfield_u_tsk(&ad, p);
1167 return smk_curacc(task_security(p), access, &ad);
1168}
1169
1170/**
1056 * smack_task_setpgid - Smack check on setting pgid 1171 * smack_task_setpgid - Smack check on setting pgid
1057 * @p: the task object 1172 * @p: the task object
1058 * @pgid: unused 1173 * @pgid: unused
@@ -1061,7 +1176,7 @@ static int smack_kernel_create_files_as(struct cred *new,
1061 */ 1176 */
1062static int smack_task_setpgid(struct task_struct *p, pid_t pgid) 1177static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
1063{ 1178{
1064 return smk_curacc(task_security(p), MAY_WRITE); 1179 return smk_curacc_on_task(p, MAY_WRITE);
1065} 1180}
1066 1181
1067/** 1182/**
@@ -1072,7 +1187,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
1072 */ 1187 */
1073static int smack_task_getpgid(struct task_struct *p) 1188static int smack_task_getpgid(struct task_struct *p)
1074{ 1189{
1075 return smk_curacc(task_security(p), MAY_READ); 1190 return smk_curacc_on_task(p, MAY_READ);
1076} 1191}
1077 1192
1078/** 1193/**
@@ -1083,7 +1198,7 @@ static int smack_task_getpgid(struct task_struct *p)
1083 */ 1198 */
1084static int smack_task_getsid(struct task_struct *p) 1199static int smack_task_getsid(struct task_struct *p)
1085{ 1200{
1086 return smk_curacc(task_security(p), MAY_READ); 1201 return smk_curacc_on_task(p, MAY_READ);
1087} 1202}
1088 1203
1089/** 1204/**
@@ -1111,7 +1226,7 @@ static int smack_task_setnice(struct task_struct *p, int nice)
1111 1226
1112 rc = cap_task_setnice(p, nice); 1227 rc = cap_task_setnice(p, nice);
1113 if (rc == 0) 1228 if (rc == 0)
1114 rc = smk_curacc(task_security(p), MAY_WRITE); 1229 rc = smk_curacc_on_task(p, MAY_WRITE);
1115 return rc; 1230 return rc;
1116} 1231}
1117 1232
@@ -1128,7 +1243,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio)
1128 1243
1129 rc = cap_task_setioprio(p, ioprio); 1244 rc = cap_task_setioprio(p, ioprio);
1130 if (rc == 0) 1245 if (rc == 0)
1131 rc = smk_curacc(task_security(p), MAY_WRITE); 1246 rc = smk_curacc_on_task(p, MAY_WRITE);
1132 return rc; 1247 return rc;
1133} 1248}
1134 1249
@@ -1140,7 +1255,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio)
1140 */ 1255 */
1141static int smack_task_getioprio(struct task_struct *p) 1256static int smack_task_getioprio(struct task_struct *p)
1142{ 1257{
1143 return smk_curacc(task_security(p), MAY_READ); 1258 return smk_curacc_on_task(p, MAY_READ);
1144} 1259}
1145 1260
1146/** 1261/**
@@ -1158,7 +1273,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy,
1158 1273
1159 rc = cap_task_setscheduler(p, policy, lp); 1274 rc = cap_task_setscheduler(p, policy, lp);
1160 if (rc == 0) 1275 if (rc == 0)
1161 rc = smk_curacc(task_security(p), MAY_WRITE); 1276 rc = smk_curacc_on_task(p, MAY_WRITE);
1162 return rc; 1277 return rc;
1163} 1278}
1164 1279
@@ -1170,7 +1285,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy,
1170 */ 1285 */
1171static int smack_task_getscheduler(struct task_struct *p) 1286static int smack_task_getscheduler(struct task_struct *p)
1172{ 1287{
1173 return smk_curacc(task_security(p), MAY_READ); 1288 return smk_curacc_on_task(p, MAY_READ);
1174} 1289}
1175 1290
1176/** 1291/**
@@ -1181,7 +1296,7 @@ static int smack_task_getscheduler(struct task_struct *p)
1181 */ 1296 */
1182static int smack_task_movememory(struct task_struct *p) 1297static int smack_task_movememory(struct task_struct *p)
1183{ 1298{
1184 return smk_curacc(task_security(p), MAY_WRITE); 1299 return smk_curacc_on_task(p, MAY_WRITE);
1185} 1300}
1186 1301
1187/** 1302/**
@@ -1199,18 +1314,23 @@ static int smack_task_movememory(struct task_struct *p)
1199static int smack_task_kill(struct task_struct *p, struct siginfo *info, 1314static int smack_task_kill(struct task_struct *p, struct siginfo *info,
1200 int sig, u32 secid) 1315 int sig, u32 secid)
1201{ 1316{
1317 struct smk_audit_info ad;
1318
1319 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
1320 smk_ad_setfield_u_tsk(&ad, p);
1202 /* 1321 /*
1203 * Sending a signal requires that the sender 1322 * Sending a signal requires that the sender
1204 * can write the receiver. 1323 * can write the receiver.
1205 */ 1324 */
1206 if (secid == 0) 1325 if (secid == 0)
1207 return smk_curacc(task_security(p), MAY_WRITE); 1326 return smk_curacc(task_security(p), MAY_WRITE, &ad);
1208 /* 1327 /*
1209 * If the secid isn't 0 we're dealing with some USB IO 1328 * If the secid isn't 0 we're dealing with some USB IO
1210 * specific behavior. This is not clean. For one thing 1329 * specific behavior. This is not clean. For one thing
1211 * we can't take privilege into account. 1330 * we can't take privilege into account.
1212 */ 1331 */
1213 return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE); 1332 return smk_access(smack_from_secid(secid), task_security(p),
1333 MAY_WRITE, &ad);
1214} 1334}
1215 1335
1216/** 1336/**
@@ -1221,11 +1341,15 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info,
1221 */ 1341 */
1222static int smack_task_wait(struct task_struct *p) 1342static int smack_task_wait(struct task_struct *p)
1223{ 1343{
1344 struct smk_audit_info ad;
1345 char *sp = current_security();
1346 char *tsp = task_security(p);
1224 int rc; 1347 int rc;
1225 1348
1226 rc = smk_access(current_security(), task_security(p), MAY_WRITE); 1349 /* we don't log here, we can be overriden */
1350 rc = smk_access(sp, tsp, MAY_WRITE, NULL);
1227 if (rc == 0) 1351 if (rc == 0)
1228 return 0; 1352 goto out_log;
1229 1353
1230 /* 1354 /*
1231 * Allow the operation to succeed if either task 1355 * Allow the operation to succeed if either task
@@ -1239,8 +1363,12 @@ static int smack_task_wait(struct task_struct *p)
1239 * the smack value. 1363 * the smack value.
1240 */ 1364 */
1241 if (capable(CAP_MAC_OVERRIDE) || has_capability(p, CAP_MAC_OVERRIDE)) 1365 if (capable(CAP_MAC_OVERRIDE) || has_capability(p, CAP_MAC_OVERRIDE))
1242 return 0; 1366 rc = 0;
1243 1367 /* we log only if we didn't get overriden */
1368 out_log:
1369 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
1370 smk_ad_setfield_u_tsk(&ad, p);
1371 smack_log(sp, tsp, MAY_WRITE, rc, &ad);
1244 return rc; 1372 return rc;
1245} 1373}
1246 1374
@@ -1456,12 +1584,19 @@ static int smack_netlabel_send(struct sock *sk, struct sockaddr_in *sap)
1456 int sk_lbl; 1584 int sk_lbl;
1457 char *hostsp; 1585 char *hostsp;
1458 struct socket_smack *ssp = sk->sk_security; 1586 struct socket_smack *ssp = sk->sk_security;
1587 struct smk_audit_info ad;
1459 1588
1460 rcu_read_lock(); 1589 rcu_read_lock();
1461 hostsp = smack_host_label(sap); 1590 hostsp = smack_host_label(sap);
1462 if (hostsp != NULL) { 1591 if (hostsp != NULL) {
1463 sk_lbl = SMACK_UNLABELED_SOCKET; 1592 sk_lbl = SMACK_UNLABELED_SOCKET;
1464 rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE); 1593#ifdef CONFIG_AUDIT
1594 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
1595 ad.a.u.net.family = sap->sin_family;
1596 ad.a.u.net.dport = sap->sin_port;
1597 ad.a.u.net.v4info.daddr = sap->sin_addr.s_addr;
1598#endif
1599 rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE, &ad);
1465 } else { 1600 } else {
1466 sk_lbl = SMACK_CIPSO_SOCKET; 1601 sk_lbl = SMACK_CIPSO_SOCKET;
1467 rc = 0; 1602 rc = 0;
@@ -1657,6 +1792,25 @@ static void smack_shm_free_security(struct shmid_kernel *shp)
1657} 1792}
1658 1793
1659/** 1794/**
1795 * smk_curacc_shm : check if current has access on shm
1796 * @shp : the object
1797 * @access : access requested
1798 *
1799 * Returns 0 if current has the requested access, error code otherwise
1800 */
1801static int smk_curacc_shm(struct shmid_kernel *shp, int access)
1802{
1803 char *ssp = smack_of_shm(shp);
1804 struct smk_audit_info ad;
1805
1806#ifdef CONFIG_AUDIT
1807 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
1808 ad.a.u.ipc_id = shp->shm_perm.id;
1809#endif
1810 return smk_curacc(ssp, access, &ad);
1811}
1812
1813/**
1660 * smack_shm_associate - Smack access check for shm 1814 * smack_shm_associate - Smack access check for shm
1661 * @shp: the object 1815 * @shp: the object
1662 * @shmflg: access requested 1816 * @shmflg: access requested
@@ -1665,11 +1819,10 @@ static void smack_shm_free_security(struct shmid_kernel *shp)
1665 */ 1819 */
1666static int smack_shm_associate(struct shmid_kernel *shp, int shmflg) 1820static int smack_shm_associate(struct shmid_kernel *shp, int shmflg)
1667{ 1821{
1668 char *ssp = smack_of_shm(shp);
1669 int may; 1822 int may;
1670 1823
1671 may = smack_flags_to_may(shmflg); 1824 may = smack_flags_to_may(shmflg);
1672 return smk_curacc(ssp, may); 1825 return smk_curacc_shm(shp, may);
1673} 1826}
1674 1827
1675/** 1828/**
@@ -1681,7 +1834,6 @@ static int smack_shm_associate(struct shmid_kernel *shp, int shmflg)
1681 */ 1834 */
1682static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) 1835static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
1683{ 1836{
1684 char *ssp;
1685 int may; 1837 int may;
1686 1838
1687 switch (cmd) { 1839 switch (cmd) {
@@ -1704,9 +1856,7 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
1704 default: 1856 default:
1705 return -EINVAL; 1857 return -EINVAL;
1706 } 1858 }
1707 1859 return smk_curacc_shm(shp, may);
1708 ssp = smack_of_shm(shp);
1709 return smk_curacc(ssp, may);
1710} 1860}
1711 1861
1712/** 1862/**
@@ -1720,11 +1870,10 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
1720static int smack_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, 1870static int smack_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr,
1721 int shmflg) 1871 int shmflg)
1722{ 1872{
1723 char *ssp = smack_of_shm(shp);
1724 int may; 1873 int may;
1725 1874
1726 may = smack_flags_to_may(shmflg); 1875 may = smack_flags_to_may(shmflg);
1727 return smk_curacc(ssp, may); 1876 return smk_curacc_shm(shp, may);
1728} 1877}
1729 1878
1730/** 1879/**
@@ -1766,6 +1915,25 @@ static void smack_sem_free_security(struct sem_array *sma)
1766} 1915}
1767 1916
1768/** 1917/**
1918 * smk_curacc_sem : check if current has access on sem
1919 * @sma : the object
1920 * @access : access requested
1921 *
1922 * Returns 0 if current has the requested access, error code otherwise
1923 */
1924static int smk_curacc_sem(struct sem_array *sma, int access)
1925{
1926 char *ssp = smack_of_sem(sma);
1927 struct smk_audit_info ad;
1928
1929#ifdef CONFIG_AUDIT
1930 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
1931 ad.a.u.ipc_id = sma->sem_perm.id;
1932#endif
1933 return smk_curacc(ssp, access, &ad);
1934}
1935
1936/**
1769 * smack_sem_associate - Smack access check for sem 1937 * smack_sem_associate - Smack access check for sem
1770 * @sma: the object 1938 * @sma: the object
1771 * @semflg: access requested 1939 * @semflg: access requested
@@ -1774,11 +1942,10 @@ static void smack_sem_free_security(struct sem_array *sma)
1774 */ 1942 */
1775static int smack_sem_associate(struct sem_array *sma, int semflg) 1943static int smack_sem_associate(struct sem_array *sma, int semflg)
1776{ 1944{
1777 char *ssp = smack_of_sem(sma);
1778 int may; 1945 int may;
1779 1946
1780 may = smack_flags_to_may(semflg); 1947 may = smack_flags_to_may(semflg);
1781 return smk_curacc(ssp, may); 1948 return smk_curacc_sem(sma, may);
1782} 1949}
1783 1950
1784/** 1951/**
@@ -1790,7 +1957,6 @@ static int smack_sem_associate(struct sem_array *sma, int semflg)
1790 */ 1957 */
1791static int smack_sem_semctl(struct sem_array *sma, int cmd) 1958static int smack_sem_semctl(struct sem_array *sma, int cmd)
1792{ 1959{
1793 char *ssp;
1794 int may; 1960 int may;
1795 1961
1796 switch (cmd) { 1962 switch (cmd) {
@@ -1819,8 +1985,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd)
1819 return -EINVAL; 1985 return -EINVAL;
1820 } 1986 }
1821 1987
1822 ssp = smack_of_sem(sma); 1988 return smk_curacc_sem(sma, may);
1823 return smk_curacc(ssp, may);
1824} 1989}
1825 1990
1826/** 1991/**
@@ -1837,9 +2002,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd)
1837static int smack_sem_semop(struct sem_array *sma, struct sembuf *sops, 2002static int smack_sem_semop(struct sem_array *sma, struct sembuf *sops,
1838 unsigned nsops, int alter) 2003 unsigned nsops, int alter)
1839{ 2004{
1840 char *ssp = smack_of_sem(sma); 2005 return smk_curacc_sem(sma, MAY_READWRITE);
1841
1842 return smk_curacc(ssp, MAY_READWRITE);
1843} 2006}
1844 2007
1845/** 2008/**
@@ -1881,6 +2044,25 @@ static char *smack_of_msq(struct msg_queue *msq)
1881} 2044}
1882 2045
1883/** 2046/**
2047 * smk_curacc_msq : helper to check if current has access on msq
2048 * @msq : the msq
2049 * @access : access requested
2050 *
2051 * return 0 if current has access, error otherwise
2052 */
2053static int smk_curacc_msq(struct msg_queue *msq, int access)
2054{
2055 char *msp = smack_of_msq(msq);
2056 struct smk_audit_info ad;
2057
2058#ifdef CONFIG_AUDIT
2059 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
2060 ad.a.u.ipc_id = msq->q_perm.id;
2061#endif
2062 return smk_curacc(msp, access, &ad);
2063}
2064
2065/**
1884 * smack_msg_queue_associate - Smack access check for msg_queue 2066 * smack_msg_queue_associate - Smack access check for msg_queue
1885 * @msq: the object 2067 * @msq: the object
1886 * @msqflg: access requested 2068 * @msqflg: access requested
@@ -1889,11 +2071,10 @@ static char *smack_of_msq(struct msg_queue *msq)
1889 */ 2071 */
1890static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg) 2072static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg)
1891{ 2073{
1892 char *msp = smack_of_msq(msq);
1893 int may; 2074 int may;
1894 2075
1895 may = smack_flags_to_may(msqflg); 2076 may = smack_flags_to_may(msqflg);
1896 return smk_curacc(msp, may); 2077 return smk_curacc_msq(msq, may);
1897} 2078}
1898 2079
1899/** 2080/**
@@ -1905,7 +2086,6 @@ static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg)
1905 */ 2086 */
1906static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd) 2087static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
1907{ 2088{
1908 char *msp;
1909 int may; 2089 int may;
1910 2090
1911 switch (cmd) { 2091 switch (cmd) {
@@ -1927,8 +2107,7 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
1927 return -EINVAL; 2107 return -EINVAL;
1928 } 2108 }
1929 2109
1930 msp = smack_of_msq(msq); 2110 return smk_curacc_msq(msq, may);
1931 return smk_curacc(msp, may);
1932} 2111}
1933 2112
1934/** 2113/**
@@ -1942,11 +2121,10 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
1942static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, 2121static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
1943 int msqflg) 2122 int msqflg)
1944{ 2123{
1945 char *msp = smack_of_msq(msq); 2124 int may;
1946 int rc;
1947 2125
1948 rc = smack_flags_to_may(msqflg); 2126 may = smack_flags_to_may(msqflg);
1949 return smk_curacc(msp, rc); 2127 return smk_curacc_msq(msq, may);
1950} 2128}
1951 2129
1952/** 2130/**
@@ -1962,9 +2140,7 @@ static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
1962static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, 2140static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
1963 struct task_struct *target, long type, int mode) 2141 struct task_struct *target, long type, int mode)
1964{ 2142{
1965 char *msp = smack_of_msq(msq); 2143 return smk_curacc_msq(msq, MAY_READWRITE);
1966
1967 return smk_curacc(msp, MAY_READWRITE);
1968} 2144}
1969 2145
1970/** 2146/**
@@ -1977,10 +2153,14 @@ static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
1977static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag) 2153static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag)
1978{ 2154{
1979 char *isp = ipp->security; 2155 char *isp = ipp->security;
1980 int may; 2156 int may = smack_flags_to_may(flag);
2157 struct smk_audit_info ad;
1981 2158
1982 may = smack_flags_to_may(flag); 2159#ifdef CONFIG_AUDIT
1983 return smk_curacc(isp, may); 2160 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
2161 ad.a.u.ipc_id = ipp->id;
2162#endif
2163 return smk_curacc(isp, may, &ad);
1984} 2164}
1985 2165
1986/** 2166/**
@@ -2239,8 +2419,12 @@ static int smack_unix_stream_connect(struct socket *sock,
2239{ 2419{
2240 struct inode *sp = SOCK_INODE(sock); 2420 struct inode *sp = SOCK_INODE(sock);
2241 struct inode *op = SOCK_INODE(other); 2421 struct inode *op = SOCK_INODE(other);
2422 struct smk_audit_info ad;
2242 2423
2243 return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_READWRITE); 2424 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
2425 smk_ad_setfield_u_net_sk(&ad, other->sk);
2426 return smk_access(smk_of_inode(sp), smk_of_inode(op),
2427 MAY_READWRITE, &ad);
2244} 2428}
2245 2429
2246/** 2430/**
@@ -2255,8 +2439,11 @@ static int smack_unix_may_send(struct socket *sock, struct socket *other)
2255{ 2439{
2256 struct inode *sp = SOCK_INODE(sock); 2440 struct inode *sp = SOCK_INODE(sock);
2257 struct inode *op = SOCK_INODE(other); 2441 struct inode *op = SOCK_INODE(other);
2442 struct smk_audit_info ad;
2258 2443
2259 return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE); 2444 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
2445 smk_ad_setfield_u_net_sk(&ad, other->sk);
2446 return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE, &ad);
2260} 2447}
2261 2448
2262/** 2449/**
@@ -2371,7 +2558,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
2371 char smack[SMK_LABELLEN]; 2558 char smack[SMK_LABELLEN];
2372 char *csp; 2559 char *csp;
2373 int rc; 2560 int rc;
2374 2561 struct smk_audit_info ad;
2375 if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) 2562 if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
2376 return 0; 2563 return 0;
2377 2564
@@ -2389,13 +2576,19 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
2389 2576
2390 netlbl_secattr_destroy(&secattr); 2577 netlbl_secattr_destroy(&secattr);
2391 2578
2579#ifdef CONFIG_AUDIT
2580 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
2581 ad.a.u.net.family = sk->sk_family;
2582 ad.a.u.net.netif = skb->iif;
2583 ipv4_skb_to_auditdata(skb, &ad.a, NULL);
2584#endif
2392 /* 2585 /*
2393 * Receiving a packet requires that the other end 2586 * Receiving a packet requires that the other end
2394 * be able to write here. Read access is not required. 2587 * be able to write here. Read access is not required.
2395 * This is the simplist possible security model 2588 * This is the simplist possible security model
2396 * for networking. 2589 * for networking.
2397 */ 2590 */
2398 rc = smk_access(csp, ssp->smk_in, MAY_WRITE); 2591 rc = smk_access(csp, ssp->smk_in, MAY_WRITE, &ad);
2399 if (rc != 0) 2592 if (rc != 0)
2400 netlbl_skbuff_err(skb, rc, 0); 2593 netlbl_skbuff_err(skb, rc, 0);
2401 return rc; 2594 return rc;
@@ -2524,6 +2717,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
2524 struct iphdr *hdr; 2717 struct iphdr *hdr;
2525 char smack[SMK_LABELLEN]; 2718 char smack[SMK_LABELLEN];
2526 int rc; 2719 int rc;
2720 struct smk_audit_info ad;
2527 2721
2528 /* handle mapped IPv4 packets arriving via IPv6 sockets */ 2722 /* handle mapped IPv4 packets arriving via IPv6 sockets */
2529 if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) 2723 if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
@@ -2537,11 +2731,17 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
2537 strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN); 2731 strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN);
2538 netlbl_secattr_destroy(&secattr); 2732 netlbl_secattr_destroy(&secattr);
2539 2733
2734#ifdef CONFIG_AUDIT
2735 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
2736 ad.a.u.net.family = family;
2737 ad.a.u.net.netif = skb->iif;
2738 ipv4_skb_to_auditdata(skb, &ad.a, NULL);
2739#endif
2540 /* 2740 /*
2541 * Receiving a packet requires that the other end be able to write 2741 * Receiving a packet requires that the other end be able to write
2542 * here. Read access is not required. 2742 * here. Read access is not required.
2543 */ 2743 */
2544 rc = smk_access(smack, ssp->smk_in, MAY_WRITE); 2744 rc = smk_access(smack, ssp->smk_in, MAY_WRITE, &ad);
2545 if (rc != 0) 2745 if (rc != 0)
2546 return rc; 2746 return rc;
2547 2747
@@ -2643,6 +2843,7 @@ static int smack_key_permission(key_ref_t key_ref,
2643 const struct cred *cred, key_perm_t perm) 2843 const struct cred *cred, key_perm_t perm)
2644{ 2844{
2645 struct key *keyp; 2845 struct key *keyp;
2846 struct smk_audit_info ad;
2646 2847
2647 keyp = key_ref_to_ptr(key_ref); 2848 keyp = key_ref_to_ptr(key_ref);
2648 if (keyp == NULL) 2849 if (keyp == NULL)
@@ -2658,8 +2859,13 @@ static int smack_key_permission(key_ref_t key_ref,
2658 */ 2859 */
2659 if (cred->security == NULL) 2860 if (cred->security == NULL)
2660 return -EACCES; 2861 return -EACCES;
2661 2862#ifdef CONFIG_AUDIT
2662 return smk_access(cred->security, keyp->security, MAY_READWRITE); 2863 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_KEY);
2864 ad.a.u.key_struct.key = keyp->serial;
2865 ad.a.u.key_struct.key_desc = keyp->description;
2866#endif
2867 return smk_access(cred->security, keyp->security,
2868 MAY_READWRITE, &ad);
2663} 2869}
2664#endif /* CONFIG_KEYS */ 2870#endif /* CONFIG_KEYS */
2665 2871
@@ -2828,15 +3034,7 @@ struct security_operations smack_ops = {
2828 3034
2829 .ptrace_may_access = smack_ptrace_may_access, 3035 .ptrace_may_access = smack_ptrace_may_access,
2830 .ptrace_traceme = smack_ptrace_traceme, 3036 .ptrace_traceme = smack_ptrace_traceme,
2831 .capget = cap_capget,
2832 .capset = cap_capset,
2833 .capable = cap_capable,
2834 .syslog = smack_syslog, 3037 .syslog = smack_syslog,
2835 .settime = cap_settime,
2836 .vm_enough_memory = cap_vm_enough_memory,
2837
2838 .bprm_set_creds = cap_bprm_set_creds,
2839 .bprm_secureexec = cap_bprm_secureexec,
2840 3038
2841 .sb_alloc_security = smack_sb_alloc_security, 3039 .sb_alloc_security = smack_sb_alloc_security,
2842 .sb_free_security = smack_sb_free_security, 3040 .sb_free_security = smack_sb_free_security,
@@ -2860,8 +3058,6 @@ struct security_operations smack_ops = {
2860 .inode_post_setxattr = smack_inode_post_setxattr, 3058 .inode_post_setxattr = smack_inode_post_setxattr,
2861 .inode_getxattr = smack_inode_getxattr, 3059 .inode_getxattr = smack_inode_getxattr,
2862 .inode_removexattr = smack_inode_removexattr, 3060 .inode_removexattr = smack_inode_removexattr,
2863 .inode_need_killpriv = cap_inode_need_killpriv,
2864 .inode_killpriv = cap_inode_killpriv,
2865 .inode_getsecurity = smack_inode_getsecurity, 3061 .inode_getsecurity = smack_inode_getsecurity,
2866 .inode_setsecurity = smack_inode_setsecurity, 3062 .inode_setsecurity = smack_inode_setsecurity,
2867 .inode_listsecurity = smack_inode_listsecurity, 3063 .inode_listsecurity = smack_inode_listsecurity,
@@ -2882,7 +3078,6 @@ struct security_operations smack_ops = {
2882 .cred_commit = smack_cred_commit, 3078 .cred_commit = smack_cred_commit,
2883 .kernel_act_as = smack_kernel_act_as, 3079 .kernel_act_as = smack_kernel_act_as,
2884 .kernel_create_files_as = smack_kernel_create_files_as, 3080 .kernel_create_files_as = smack_kernel_create_files_as,
2885 .task_fix_setuid = cap_task_fix_setuid,
2886 .task_setpgid = smack_task_setpgid, 3081 .task_setpgid = smack_task_setpgid,
2887 .task_getpgid = smack_task_getpgid, 3082 .task_getpgid = smack_task_getpgid,
2888 .task_getsid = smack_task_getsid, 3083 .task_getsid = smack_task_getsid,
@@ -2896,7 +3091,6 @@ struct security_operations smack_ops = {
2896 .task_kill = smack_task_kill, 3091 .task_kill = smack_task_kill,
2897 .task_wait = smack_task_wait, 3092 .task_wait = smack_task_wait,
2898 .task_to_inode = smack_task_to_inode, 3093 .task_to_inode = smack_task_to_inode,
2899 .task_prctl = cap_task_prctl,
2900 3094
2901 .ipc_permission = smack_ipc_permission, 3095 .ipc_permission = smack_ipc_permission,
2902 .ipc_getsecid = smack_ipc_getsecid, 3096 .ipc_getsecid = smack_ipc_getsecid,
@@ -2923,9 +3117,6 @@ struct security_operations smack_ops = {
2923 .sem_semctl = smack_sem_semctl, 3117 .sem_semctl = smack_sem_semctl,
2924 .sem_semop = smack_sem_semop, 3118 .sem_semop = smack_sem_semop,
2925 3119
2926 .netlink_send = cap_netlink_send,
2927 .netlink_recv = cap_netlink_recv,
2928
2929 .d_instantiate = smack_d_instantiate, 3120 .d_instantiate = smack_d_instantiate,
2930 3121
2931 .getprocattr = smack_getprocattr, 3122 .getprocattr = smack_getprocattr,
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 11d2cb19d7a6..f83a80980726 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -41,6 +41,7 @@ enum smk_inos {
41 SMK_AMBIENT = 7, /* internet ambient label */ 41 SMK_AMBIENT = 7, /* internet ambient label */
42 SMK_NETLBLADDR = 8, /* single label hosts */ 42 SMK_NETLBLADDR = 8, /* single label hosts */
43 SMK_ONLYCAP = 9, /* the only "capable" label */ 43 SMK_ONLYCAP = 9, /* the only "capable" label */
44 SMK_LOGGING = 10, /* logging */
44}; 45};
45 46
46/* 47/*
@@ -775,7 +776,7 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
775 struct sockaddr_in newname; 776 struct sockaddr_in newname;
776 char smack[SMK_LABELLEN]; 777 char smack[SMK_LABELLEN];
777 char *sp; 778 char *sp;
778 char data[SMK_NETLBLADDRMAX]; 779 char data[SMK_NETLBLADDRMAX + 1];
779 char *host = (char *)&newname.sin_addr.s_addr; 780 char *host = (char *)&newname.sin_addr.s_addr;
780 int rc; 781 int rc;
781 struct netlbl_audit audit_info; 782 struct netlbl_audit audit_info;
@@ -1192,6 +1193,69 @@ static const struct file_operations smk_onlycap_ops = {
1192}; 1193};
1193 1194
1194/** 1195/**
1196 * smk_read_logging - read() for /smack/logging
1197 * @filp: file pointer, not actually used
1198 * @buf: where to put the result
1199 * @cn: maximum to send along
1200 * @ppos: where to start
1201 *
1202 * Returns number of bytes read or error code, as appropriate
1203 */
1204static ssize_t smk_read_logging(struct file *filp, char __user *buf,
1205 size_t count, loff_t *ppos)
1206{
1207 char temp[32];
1208 ssize_t rc;
1209
1210 if (*ppos != 0)
1211 return 0;
1212
1213 sprintf(temp, "%d\n", log_policy);
1214 rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
1215 return rc;
1216}
1217
1218/**
1219 * smk_write_logging - write() for /smack/logging
1220 * @file: file pointer, not actually used
1221 * @buf: where to get the data from
1222 * @count: bytes sent
1223 * @ppos: where to start
1224 *
1225 * Returns number of bytes written or error code, as appropriate
1226 */
1227static ssize_t smk_write_logging(struct file *file, const char __user *buf,
1228 size_t count, loff_t *ppos)
1229{
1230 char temp[32];
1231 int i;
1232
1233 if (!capable(CAP_MAC_ADMIN))
1234 return -EPERM;
1235
1236 if (count >= sizeof(temp) || count == 0)
1237 return -EINVAL;
1238
1239 if (copy_from_user(temp, buf, count) != 0)
1240 return -EFAULT;
1241
1242 temp[count] = '\0';
1243
1244 if (sscanf(temp, "%d", &i) != 1)
1245 return -EINVAL;
1246 if (i < 0 || i > 3)
1247 return -EINVAL;
1248 log_policy = i;
1249 return count;
1250}
1251
1252
1253
1254static const struct file_operations smk_logging_ops = {
1255 .read = smk_read_logging,
1256 .write = smk_write_logging,
1257};
1258/**
1195 * smk_fill_super - fill the /smackfs superblock 1259 * smk_fill_super - fill the /smackfs superblock
1196 * @sb: the empty superblock 1260 * @sb: the empty superblock
1197 * @data: unused 1261 * @data: unused
@@ -1221,6 +1285,8 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
1221 {"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR}, 1285 {"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR},
1222 [SMK_ONLYCAP] = 1286 [SMK_ONLYCAP] =
1223 {"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR}, 1287 {"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
1288 [SMK_LOGGING] =
1289 {"logging", &smk_logging_ops, S_IRUGO|S_IWUSR},
1224 /* last one */ {""} 1290 /* last one */ {""}
1225 }; 1291 };
1226 1292
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index ddfb9cccf468..fdd1f4b8c448 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -28,7 +28,13 @@ static const char *tomoyo_mode_2[4] = {
28 "disabled", "enabled", "enabled", "enabled" 28 "disabled", "enabled", "enabled", "enabled"
29}; 29};
30 30
31/* Table for profile. */ 31/*
32 * tomoyo_control_array is a static data which contains
33 *
34 * (1) functionality name used by /sys/kernel/security/tomoyo/profile .
35 * (2) initial values for "struct tomoyo_profile".
36 * (3) max values for "struct tomoyo_profile".
37 */
32static struct { 38static struct {
33 const char *keyword; 39 const char *keyword;
34 unsigned int current_value; 40 unsigned int current_value;
@@ -39,7 +45,13 @@ static struct {
39 [TOMOYO_VERBOSE] = { "TOMOYO_VERBOSE", 1, 1 }, 45 [TOMOYO_VERBOSE] = { "TOMOYO_VERBOSE", 1, 1 },
40}; 46};
41 47
42/* Profile table. Memory is allocated as needed. */ 48/*
49 * tomoyo_profile is a structure which is used for holding the mode of access
50 * controls. TOMOYO has 4 modes: disabled, learning, permissive, enforcing.
51 * An administrator can define up to 256 profiles.
52 * The ->profile of "struct tomoyo_domain_info" is used for remembering
53 * the profile's number (0 - 255) assigned to that domain.
54 */
43static struct tomoyo_profile { 55static struct tomoyo_profile {
44 unsigned int value[TOMOYO_MAX_CONTROL_INDEX]; 56 unsigned int value[TOMOYO_MAX_CONTROL_INDEX];
45 const struct tomoyo_path_info *comment; 57 const struct tomoyo_path_info *comment;
@@ -428,7 +440,6 @@ void tomoyo_fill_path_info(struct tomoyo_path_info *ptr)
428 const char *name = ptr->name; 440 const char *name = ptr->name;
429 const int len = strlen(name); 441 const int len = strlen(name);
430 442
431 ptr->total_len = len;
432 ptr->const_len = tomoyo_const_part_length(name); 443 ptr->const_len = tomoyo_const_part_length(name);
433 ptr->is_dir = len && (name[len - 1] == '/'); 444 ptr->is_dir = len && (name[len - 1] == '/');
434 ptr->is_patterned = (ptr->const_len < len); 445 ptr->is_patterned = (ptr->const_len < len);
@@ -866,7 +877,6 @@ static struct tomoyo_profile *tomoyo_find_or_assign_new_profile(const unsigned
866 877
867 if (profile >= TOMOYO_MAX_PROFILES) 878 if (profile >= TOMOYO_MAX_PROFILES)
868 return NULL; 879 return NULL;
869 /***** EXCLUSIVE SECTION START *****/
870 mutex_lock(&lock); 880 mutex_lock(&lock);
871 ptr = tomoyo_profile_ptr[profile]; 881 ptr = tomoyo_profile_ptr[profile];
872 if (ptr) 882 if (ptr)
@@ -880,7 +890,6 @@ static struct tomoyo_profile *tomoyo_find_or_assign_new_profile(const unsigned
880 tomoyo_profile_ptr[profile] = ptr; 890 tomoyo_profile_ptr[profile] = ptr;
881 ok: 891 ok:
882 mutex_unlock(&lock); 892 mutex_unlock(&lock);
883 /***** EXCLUSIVE SECTION END *****/
884 return ptr; 893 return ptr;
885} 894}
886 895
@@ -1009,7 +1018,19 @@ static int tomoyo_read_profile(struct tomoyo_io_buffer *head)
1009 return 0; 1018 return 0;
1010} 1019}
1011 1020
1012/* Structure for policy manager. */ 1021/*
1022 * tomoyo_policy_manager_entry is a structure which is used for holding list of
1023 * domainnames or programs which are permitted to modify configuration via
1024 * /sys/kernel/security/tomoyo/ interface.
1025 * It has following fields.
1026 *
1027 * (1) "list" which is linked to tomoyo_policy_manager_list .
1028 * (2) "manager" is a domainname or a program's pathname.
1029 * (3) "is_domain" is a bool which is true if "manager" is a domainname, false
1030 * otherwise.
1031 * (4) "is_deleted" is a bool which is true if marked as deleted, false
1032 * otherwise.
1033 */
1013struct tomoyo_policy_manager_entry { 1034struct tomoyo_policy_manager_entry {
1014 struct list_head list; 1035 struct list_head list;
1015 /* A path to program or a domainname. */ 1036 /* A path to program or a domainname. */
@@ -1018,7 +1039,36 @@ struct tomoyo_policy_manager_entry {
1018 bool is_deleted; /* True if this entry is deleted. */ 1039 bool is_deleted; /* True if this entry is deleted. */
1019}; 1040};
1020 1041
1021/* The list for "struct tomoyo_policy_manager_entry". */ 1042/*
1043 * tomoyo_policy_manager_list is used for holding list of domainnames or
1044 * programs which are permitted to modify configuration via
1045 * /sys/kernel/security/tomoyo/ interface.
1046 *
1047 * An entry is added by
1048 *
1049 * # echo '<kernel> /sbin/mingetty /bin/login /bin/bash' > \
1050 * /sys/kernel/security/tomoyo/manager
1051 * (if you want to specify by a domainname)
1052 *
1053 * or
1054 *
1055 * # echo '/usr/lib/ccs/editpolicy' > /sys/kernel/security/tomoyo/manager
1056 * (if you want to specify by a program's location)
1057 *
1058 * and is deleted by
1059 *
1060 * # echo 'delete <kernel> /sbin/mingetty /bin/login /bin/bash' > \
1061 * /sys/kernel/security/tomoyo/manager
1062 *
1063 * or
1064 *
1065 * # echo 'delete /usr/lib/ccs/editpolicy' > \
1066 * /sys/kernel/security/tomoyo/manager
1067 *
1068 * and all entries are retrieved by
1069 *
1070 * # cat /sys/kernel/security/tomoyo/manager
1071 */
1022static LIST_HEAD(tomoyo_policy_manager_list); 1072static LIST_HEAD(tomoyo_policy_manager_list);
1023static DECLARE_RWSEM(tomoyo_policy_manager_list_lock); 1073static DECLARE_RWSEM(tomoyo_policy_manager_list_lock);
1024 1074
@@ -1050,7 +1100,6 @@ static int tomoyo_update_manager_entry(const char *manager,
1050 saved_manager = tomoyo_save_name(manager); 1100 saved_manager = tomoyo_save_name(manager);
1051 if (!saved_manager) 1101 if (!saved_manager)
1052 return -ENOMEM; 1102 return -ENOMEM;
1053 /***** EXCLUSIVE SECTION START *****/
1054 down_write(&tomoyo_policy_manager_list_lock); 1103 down_write(&tomoyo_policy_manager_list_lock);
1055 list_for_each_entry(ptr, &tomoyo_policy_manager_list, list) { 1104 list_for_each_entry(ptr, &tomoyo_policy_manager_list, list) {
1056 if (ptr->manager != saved_manager) 1105 if (ptr->manager != saved_manager)
@@ -1072,7 +1121,6 @@ static int tomoyo_update_manager_entry(const char *manager,
1072 error = 0; 1121 error = 0;
1073 out: 1122 out:
1074 up_write(&tomoyo_policy_manager_list_lock); 1123 up_write(&tomoyo_policy_manager_list_lock);
1075 /***** EXCLUSIVE SECTION END *****/
1076 return error; 1124 return error;
1077} 1125}
1078 1126
@@ -1117,10 +1165,9 @@ static int tomoyo_read_manager_policy(struct tomoyo_io_buffer *head)
1117 list); 1165 list);
1118 if (ptr->is_deleted) 1166 if (ptr->is_deleted)
1119 continue; 1167 continue;
1120 if (!tomoyo_io_printf(head, "%s\n", ptr->manager->name)) { 1168 done = tomoyo_io_printf(head, "%s\n", ptr->manager->name);
1121 done = false; 1169 if (!done)
1122 break; 1170 break;
1123 }
1124 } 1171 }
1125 up_read(&tomoyo_policy_manager_list_lock); 1172 up_read(&tomoyo_policy_manager_list_lock);
1126 head->read_eof = done; 1173 head->read_eof = done;
@@ -1197,13 +1244,11 @@ static bool tomoyo_is_select_one(struct tomoyo_io_buffer *head,
1197 1244
1198 if (sscanf(data, "pid=%u", &pid) == 1) { 1245 if (sscanf(data, "pid=%u", &pid) == 1) {
1199 struct task_struct *p; 1246 struct task_struct *p;
1200 /***** CRITICAL SECTION START *****/
1201 read_lock(&tasklist_lock); 1247 read_lock(&tasklist_lock);
1202 p = find_task_by_vpid(pid); 1248 p = find_task_by_vpid(pid);
1203 if (p) 1249 if (p)
1204 domain = tomoyo_real_domain(p); 1250 domain = tomoyo_real_domain(p);
1205 read_unlock(&tasklist_lock); 1251 read_unlock(&tasklist_lock);
1206 /***** CRITICAL SECTION END *****/
1207 } else if (!strncmp(data, "domain=", 7)) { 1252 } else if (!strncmp(data, "domain=", 7)) {
1208 if (tomoyo_is_domain_def(data + 7)) { 1253 if (tomoyo_is_domain_def(data + 7)) {
1209 down_read(&tomoyo_domain_list_lock); 1254 down_read(&tomoyo_domain_list_lock);
@@ -1447,15 +1492,14 @@ static int tomoyo_read_domain_policy(struct tomoyo_io_buffer *head)
1447 TOMOYO_DOMAIN_FLAGS_IGNORE_GLOBAL_ALLOW_READ) 1492 TOMOYO_DOMAIN_FLAGS_IGNORE_GLOBAL_ALLOW_READ)
1448 ignore_global_allow_read 1493 ignore_global_allow_read
1449 = TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ "\n"; 1494 = TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ "\n";
1450 if (!tomoyo_io_printf(head, 1495 done = tomoyo_io_printf(head, "%s\n" TOMOYO_KEYWORD_USE_PROFILE
1451 "%s\n" TOMOYO_KEYWORD_USE_PROFILE "%u\n" 1496 "%u\n%s%s%s\n",
1452 "%s%s%s\n", domain->domainname->name, 1497 domain->domainname->name,
1453 domain->profile, quota_exceeded, 1498 domain->profile, quota_exceeded,
1454 transition_failed, 1499 transition_failed,
1455 ignore_global_allow_read)) { 1500 ignore_global_allow_read);
1456 done = false; 1501 if (!done)
1457 break; 1502 break;
1458 }
1459 head->read_step = 2; 1503 head->read_step = 2;
1460acl_loop: 1504acl_loop:
1461 if (head->read_step == 3) 1505 if (head->read_step == 3)
@@ -1463,24 +1507,22 @@ acl_loop:
1463 /* Print ACL entries in the domain. */ 1507 /* Print ACL entries in the domain. */
1464 down_read(&tomoyo_domain_acl_info_list_lock); 1508 down_read(&tomoyo_domain_acl_info_list_lock);
1465 list_for_each_cookie(apos, head->read_var2, 1509 list_for_each_cookie(apos, head->read_var2,
1466 &domain->acl_info_list) { 1510 &domain->acl_info_list) {
1467 struct tomoyo_acl_info *ptr 1511 struct tomoyo_acl_info *ptr
1468 = list_entry(apos, struct tomoyo_acl_info, 1512 = list_entry(apos, struct tomoyo_acl_info,
1469 list); 1513 list);
1470 if (!tomoyo_print_entry(head, ptr)) { 1514 done = tomoyo_print_entry(head, ptr);
1471 done = false; 1515 if (!done)
1472 break; 1516 break;
1473 }
1474 } 1517 }
1475 up_read(&tomoyo_domain_acl_info_list_lock); 1518 up_read(&tomoyo_domain_acl_info_list_lock);
1476 if (!done) 1519 if (!done)
1477 break; 1520 break;
1478 head->read_step = 3; 1521 head->read_step = 3;
1479tail_mark: 1522tail_mark:
1480 if (!tomoyo_io_printf(head, "\n")) { 1523 done = tomoyo_io_printf(head, "\n");
1481 done = false; 1524 if (!done)
1482 break; 1525 break;
1483 }
1484 head->read_step = 1; 1526 head->read_step = 1;
1485 if (head->read_single_domain) 1527 if (head->read_single_domain)
1486 break; 1528 break;
@@ -1550,11 +1592,10 @@ static int tomoyo_read_domain_profile(struct tomoyo_io_buffer *head)
1550 domain = list_entry(pos, struct tomoyo_domain_info, list); 1592 domain = list_entry(pos, struct tomoyo_domain_info, list);
1551 if (domain->is_deleted) 1593 if (domain->is_deleted)
1552 continue; 1594 continue;
1553 if (!tomoyo_io_printf(head, "%u %s\n", domain->profile, 1595 done = tomoyo_io_printf(head, "%u %s\n", domain->profile,
1554 domain->domainname->name)) { 1596 domain->domainname->name);
1555 done = false; 1597 if (!done)
1556 break; 1598 break;
1557 }
1558 } 1599 }
1559 up_read(&tomoyo_domain_list_lock); 1600 up_read(&tomoyo_domain_list_lock);
1560 head->read_eof = done; 1601 head->read_eof = done;
@@ -1594,13 +1635,11 @@ static int tomoyo_read_pid(struct tomoyo_io_buffer *head)
1594 const int pid = head->read_step; 1635 const int pid = head->read_step;
1595 struct task_struct *p; 1636 struct task_struct *p;
1596 struct tomoyo_domain_info *domain = NULL; 1637 struct tomoyo_domain_info *domain = NULL;
1597 /***** CRITICAL SECTION START *****/
1598 read_lock(&tasklist_lock); 1638 read_lock(&tasklist_lock);
1599 p = find_task_by_vpid(pid); 1639 p = find_task_by_vpid(pid);
1600 if (p) 1640 if (p)
1601 domain = tomoyo_real_domain(p); 1641 domain = tomoyo_real_domain(p);
1602 read_unlock(&tasklist_lock); 1642 read_unlock(&tasklist_lock);
1603 /***** CRITICAL SECTION END *****/
1604 if (domain) 1643 if (domain)
1605 tomoyo_io_printf(head, "%d %u %s", pid, domain->profile, 1644 tomoyo_io_printf(head, "%d %u %s", pid, domain->profile,
1606 domain->domainname->name); 1645 domain->domainname->name);
@@ -2138,7 +2177,13 @@ static ssize_t tomoyo_write(struct file *file, const char __user *buf,
2138 return tomoyo_write_control(file, buf, count); 2177 return tomoyo_write_control(file, buf, count);
2139} 2178}
2140 2179
2141/* Operations for /sys/kernel/security/tomoyo/ interface. */ 2180/*
2181 * tomoyo_operations is a "struct file_operations" which is used for handling
2182 * /sys/kernel/security/tomoyo/ interface.
2183 *
2184 * Some files under /sys/kernel/security/tomoyo/ directory accept open(O_RDWR).
2185 * See tomoyo_io_buffer for internals.
2186 */
2142static const struct file_operations tomoyo_operations = { 2187static const struct file_operations tomoyo_operations = {
2143 .open = tomoyo_open, 2188 .open = tomoyo_open,
2144 .release = tomoyo_release, 2189 .release = tomoyo_release,
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 678f4ff16aa4..6d6ba09af457 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -26,16 +26,43 @@
26struct dentry; 26struct dentry;
27struct vfsmount; 27struct vfsmount;
28 28
29/* Temporary buffer for holding pathnames. */ 29/*
30 * tomoyo_page_buffer is a structure which is used for holding a pathname
31 * obtained from "struct dentry" and "struct vfsmount" pair.
32 * As of now, it is 4096 bytes. If users complain that 4096 bytes is too small
33 * (because TOMOYO escapes non ASCII printable characters using \ooo format),
34 * we will make the buffer larger.
35 */
30struct tomoyo_page_buffer { 36struct tomoyo_page_buffer {
31 char buffer[4096]; 37 char buffer[4096];
32}; 38};
33 39
34/* Structure for holding a token. */ 40/*
41 * tomoyo_path_info is a structure which is used for holding a string data
42 * used by TOMOYO.
43 * This structure has several fields for supporting pattern matching.
44 *
45 * (1) "name" is the '\0' terminated string data.
46 * (2) "hash" is full_name_hash(name, strlen(name)).
47 * This allows tomoyo_pathcmp() to compare by hash before actually compare
48 * using strcmp().
49 * (3) "const_len" is the length of the initial segment of "name" which
50 * consists entirely of non wildcard characters. In other words, the length
51 * which we can compare two strings using strncmp().
52 * (4) "is_dir" is a bool which is true if "name" ends with "/",
53 * false otherwise.
54 * TOMOYO distinguishes directory and non-directory. A directory ends with
55 * "/" and non-directory does not end with "/".
56 * (5) "is_patterned" is a bool which is true if "name" contains wildcard
57 * characters, false otherwise. This allows TOMOYO to use "hash" and
58 * strcmp() for string comparison if "is_patterned" is false.
59 * (6) "depth" is calculated using the number of "/" characters in "name".
60 * This allows TOMOYO to avoid comparing two pathnames which never match
61 * (e.g. whether "/var/www/html/index.html" matches "/tmp/sh-thd-\$").
62 */
35struct tomoyo_path_info { 63struct tomoyo_path_info {
36 const char *name; 64 const char *name;
37 u32 hash; /* = full_name_hash(name, strlen(name)) */ 65 u32 hash; /* = full_name_hash(name, strlen(name)) */
38 u16 total_len; /* = strlen(name) */
39 u16 const_len; /* = tomoyo_const_part_length(name) */ 66 u16 const_len; /* = tomoyo_const_part_length(name) */
40 bool is_dir; /* = tomoyo_strendswith(name, "/") */ 67 bool is_dir; /* = tomoyo_strendswith(name, "/") */
41 bool is_patterned; /* = tomoyo_path_contains_pattern(name) */ 68 bool is_patterned; /* = tomoyo_path_contains_pattern(name) */
@@ -51,7 +78,20 @@ struct tomoyo_path_info {
51 */ 78 */
52#define TOMOYO_MAX_PATHNAME_LEN 4000 79#define TOMOYO_MAX_PATHNAME_LEN 4000
53 80
54/* Structure for holding requested pathname. */ 81/*
82 * tomoyo_path_info_with_data is a structure which is used for holding a
83 * pathname obtained from "struct dentry" and "struct vfsmount" pair.
84 *
85 * "struct tomoyo_path_info_with_data" consists of "struct tomoyo_path_info"
86 * and buffer for the pathname, while "struct tomoyo_page_buffer" consists of
87 * buffer for the pathname only.
88 *
89 * "struct tomoyo_path_info_with_data" is intended to allow TOMOYO to release
90 * both "struct tomoyo_path_info" and buffer for the pathname by single kfree()
91 * so that we don't need to return two pointers to the caller. If the caller
92 * puts "struct tomoyo_path_info" on stack memory, we will be able to remove
93 * "struct tomoyo_path_info_with_data".
94 */
55struct tomoyo_path_info_with_data { 95struct tomoyo_path_info_with_data {
56 /* Keep "head" first, for this pointer is passed to tomoyo_free(). */ 96 /* Keep "head" first, for this pointer is passed to tomoyo_free(). */
57 struct tomoyo_path_info head; 97 struct tomoyo_path_info head;
@@ -61,7 +101,15 @@ struct tomoyo_path_info_with_data {
61}; 101};
62 102
63/* 103/*
64 * Common header for holding ACL entries. 104 * tomoyo_acl_info is a structure which is used for holding
105 *
106 * (1) "list" which is linked to the ->acl_info_list of
107 * "struct tomoyo_domain_info"
108 * (2) "type" which tells
109 * (a) type & 0x7F : type of the entry (either
110 * "struct tomoyo_single_path_acl_record" or
111 * "struct tomoyo_double_path_acl_record")
112 * (b) type & 0x80 : whether the entry is marked as "deleted".
65 * 113 *
66 * Packing "struct tomoyo_acl_info" allows 114 * Packing "struct tomoyo_acl_info" allows
67 * "struct tomoyo_single_path_acl_record" to embed "u16" and 115 * "struct tomoyo_single_path_acl_record" to embed "u16" and
@@ -81,7 +129,28 @@ struct tomoyo_acl_info {
81/* This ACL entry is deleted. */ 129/* This ACL entry is deleted. */
82#define TOMOYO_ACL_DELETED 0x80 130#define TOMOYO_ACL_DELETED 0x80
83 131
84/* Structure for domain information. */ 132/*
133 * tomoyo_domain_info is a structure which is used for holding permissions
134 * (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
135 * It has following fields.
136 *
137 * (1) "list" which is linked to tomoyo_domain_list .
138 * (2) "acl_info_list" which is linked to "struct tomoyo_acl_info".
139 * (3) "domainname" which holds the name of the domain.
140 * (4) "profile" which remembers profile number assigned to this domain.
141 * (5) "is_deleted" is a bool which is true if this domain is marked as
142 * "deleted", false otherwise.
143 * (6) "quota_warned" is a bool which is used for suppressing warning message
144 * when learning mode learned too much entries.
145 * (7) "flags" which remembers this domain's attributes.
146 *
147 * A domain's lifecycle is an analogy of files on / directory.
148 * Multiple domains with the same domainname cannot be created (as with
149 * creating files with the same filename fails with -EEXIST).
150 * If a process reached a domain, that process can reside in that domain after
151 * that domain is marked as "deleted" (as with a process can access an already
152 * open()ed file after that file was unlink()ed).
153 */
85struct tomoyo_domain_info { 154struct tomoyo_domain_info {
86 struct list_head list; 155 struct list_head list;
87 struct list_head acl_info_list; 156 struct list_head acl_info_list;
@@ -108,10 +177,18 @@ struct tomoyo_domain_info {
108#define TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED 2 177#define TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED 2
109 178
110/* 179/*
111 * Structure for "allow_read/write", "allow_execute", "allow_read", 180 * tomoyo_single_path_acl_record is a structure which is used for holding an
112 * "allow_write", "allow_create", "allow_unlink", "allow_mkdir", "allow_rmdir", 181 * entry with one pathname operation (e.g. open(), mkdir()).
113 * "allow_mkfifo", "allow_mksock", "allow_mkblock", "allow_mkchar", 182 * It has following fields.
114 * "allow_truncate", "allow_symlink" and "allow_rewrite" directive. 183 *
184 * (1) "head" which is a "struct tomoyo_acl_info".
185 * (2) "perm" which is a bitmask of permitted operations.
186 * (3) "filename" is the pathname.
187 *
188 * Directives held by this structure are "allow_read/write", "allow_execute",
189 * "allow_read", "allow_write", "allow_create", "allow_unlink", "allow_mkdir",
190 * "allow_rmdir", "allow_mkfifo", "allow_mksock", "allow_mkblock",
191 * "allow_mkchar", "allow_truncate", "allow_symlink" and "allow_rewrite".
115 */ 192 */
116struct tomoyo_single_path_acl_record { 193struct tomoyo_single_path_acl_record {
117 struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_SINGLE_PATH_ACL */ 194 struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_SINGLE_PATH_ACL */
@@ -120,7 +197,18 @@ struct tomoyo_single_path_acl_record {
120 const struct tomoyo_path_info *filename; 197 const struct tomoyo_path_info *filename;
121}; 198};
122 199
123/* Structure for "allow_rename" and "allow_link" directive. */ 200/*
201 * tomoyo_double_path_acl_record is a structure which is used for holding an
202 * entry with two pathnames operation (i.e. link() and rename()).
203 * It has following fields.
204 *
205 * (1) "head" which is a "struct tomoyo_acl_info".
206 * (2) "perm" which is a bitmask of permitted operations.
207 * (3) "filename1" is the source/old pathname.
208 * (4) "filename2" is the destination/new pathname.
209 *
210 * Directives held by this structure are "allow_rename" and "allow_link".
211 */
124struct tomoyo_double_path_acl_record { 212struct tomoyo_double_path_acl_record {
125 struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_DOUBLE_PATH_ACL */ 213 struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_DOUBLE_PATH_ACL */
126 u8 perm; 214 u8 perm;
@@ -153,7 +241,29 @@ struct tomoyo_double_path_acl_record {
153#define TOMOYO_VERBOSE 2 241#define TOMOYO_VERBOSE 2
154#define TOMOYO_MAX_CONTROL_INDEX 3 242#define TOMOYO_MAX_CONTROL_INDEX 3
155 243
156/* Structure for reading/writing policy via securityfs interfaces. */ 244/*
245 * tomoyo_io_buffer is a structure which is used for reading and modifying
246 * configuration via /sys/kernel/security/tomoyo/ interface.
247 * It has many fields. ->read_var1 , ->read_var2 , ->write_var1 are used as
248 * cursors.
249 *
250 * Since the content of /sys/kernel/security/tomoyo/domain_policy is a list of
251 * "struct tomoyo_domain_info" entries and each "struct tomoyo_domain_info"
252 * entry has a list of "struct tomoyo_acl_info", we need two cursors when
253 * reading (one is for traversing tomoyo_domain_list and the other is for
254 * traversing "struct tomoyo_acl_info"->acl_info_list ).
255 *
256 * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
257 * "select ", TOMOYO seeks the cursor ->read_var1 and ->write_var1 to the
258 * domain with the domainname specified by the rest of that line (NULL is set
259 * if seek failed).
260 * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
261 * "delete ", TOMOYO deletes an entry or a domain specified by the rest of that
262 * line (->write_var1 is set to NULL if a domain was deleted).
263 * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
264 * neither "select " nor "delete ", an entry or a domain specified by that line
265 * is appended.
266 */
157struct tomoyo_io_buffer { 267struct tomoyo_io_buffer {
158 int (*read) (struct tomoyo_io_buffer *); 268 int (*read) (struct tomoyo_io_buffer *);
159 int (*write) (struct tomoyo_io_buffer *); 269 int (*write) (struct tomoyo_io_buffer *);
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 2d6748741a26..1d8b16960576 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -19,11 +19,63 @@
19/* The initial domain. */ 19/* The initial domain. */
20struct tomoyo_domain_info tomoyo_kernel_domain; 20struct tomoyo_domain_info tomoyo_kernel_domain;
21 21
22/* The list for "struct tomoyo_domain_info". */ 22/*
23 * tomoyo_domain_list is used for holding list of domains.
24 * The ->acl_info_list of "struct tomoyo_domain_info" is used for holding
25 * permissions (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
26 *
27 * An entry is added by
28 *
29 * # ( echo "<kernel>"; echo "allow_execute /sbin/init" ) > \
30 * /sys/kernel/security/tomoyo/domain_policy
31 *
32 * and is deleted by
33 *
34 * # ( echo "<kernel>"; echo "delete allow_execute /sbin/init" ) > \
35 * /sys/kernel/security/tomoyo/domain_policy
36 *
37 * and all entries are retrieved by
38 *
39 * # cat /sys/kernel/security/tomoyo/domain_policy
40 *
41 * A domain is added by
42 *
43 * # echo "<kernel>" > /sys/kernel/security/tomoyo/domain_policy
44 *
45 * and is deleted by
46 *
47 * # echo "delete <kernel>" > /sys/kernel/security/tomoyo/domain_policy
48 *
49 * and all domains are retrieved by
50 *
51 * # grep '^<kernel>' /sys/kernel/security/tomoyo/domain_policy
52 *
53 * Normally, a domainname is monotonically getting longer because a domainname
54 * which the process will belong to if an execve() operation succeeds is
55 * defined as a concatenation of "current domainname" + "pathname passed to
56 * execve()".
57 * See tomoyo_domain_initializer_list and tomoyo_domain_keeper_list for
58 * exceptions.
59 */
23LIST_HEAD(tomoyo_domain_list); 60LIST_HEAD(tomoyo_domain_list);
24DECLARE_RWSEM(tomoyo_domain_list_lock); 61DECLARE_RWSEM(tomoyo_domain_list_lock);
25 62
26/* Structure for "initialize_domain" and "no_initialize_domain" keyword. */ 63/*
64 * tomoyo_domain_initializer_entry is a structure which is used for holding
65 * "initialize_domain" and "no_initialize_domain" entries.
66 * It has following fields.
67 *
68 * (1) "list" which is linked to tomoyo_domain_initializer_list .
69 * (2) "domainname" which is "a domainname" or "the last component of a
70 * domainname". This field is NULL if "from" clause is not specified.
71 * (3) "program" which is a program's pathname.
72 * (4) "is_deleted" is a bool which is true if marked as deleted, false
73 * otherwise.
74 * (5) "is_not" is a bool which is true if "no_initialize_domain", false
75 * otherwise.
76 * (6) "is_last_name" is a bool which is true if "domainname" is "the last
77 * component of a domainname", false otherwise.
78 */
27struct tomoyo_domain_initializer_entry { 79struct tomoyo_domain_initializer_entry {
28 struct list_head list; 80 struct list_head list;
29 const struct tomoyo_path_info *domainname; /* This may be NULL */ 81 const struct tomoyo_path_info *domainname; /* This may be NULL */
@@ -34,7 +86,23 @@ struct tomoyo_domain_initializer_entry {
34 bool is_last_name; 86 bool is_last_name;
35}; 87};
36 88
37/* Structure for "keep_domain" and "no_keep_domain" keyword. */ 89/*
90 * tomoyo_domain_keeper_entry is a structure which is used for holding
91 * "keep_domain" and "no_keep_domain" entries.
92 * It has following fields.
93 *
94 * (1) "list" which is linked to tomoyo_domain_keeper_list .
95 * (2) "domainname" which is "a domainname" or "the last component of a
96 * domainname".
97 * (3) "program" which is a program's pathname.
98 * This field is NULL if "from" clause is not specified.
99 * (4) "is_deleted" is a bool which is true if marked as deleted, false
100 * otherwise.
101 * (5) "is_not" is a bool which is true if "no_initialize_domain", false
102 * otherwise.
103 * (6) "is_last_name" is a bool which is true if "domainname" is "the last
104 * component of a domainname", false otherwise.
105 */
38struct tomoyo_domain_keeper_entry { 106struct tomoyo_domain_keeper_entry {
39 struct list_head list; 107 struct list_head list;
40 const struct tomoyo_path_info *domainname; 108 const struct tomoyo_path_info *domainname;
@@ -45,7 +113,16 @@ struct tomoyo_domain_keeper_entry {
45 bool is_last_name; 113 bool is_last_name;
46}; 114};
47 115
48/* Structure for "alias" keyword. */ 116/*
117 * tomoyo_alias_entry is a structure which is used for holding "alias" entries.
118 * It has following fields.
119 *
120 * (1) "list" which is linked to tomoyo_alias_list .
121 * (2) "original_name" which is a dereferenced pathname.
122 * (3) "aliased_name" which is a symlink's pathname.
123 * (4) "is_deleted" is a bool which is true if marked as deleted, false
124 * otherwise.
125 */
49struct tomoyo_alias_entry { 126struct tomoyo_alias_entry {
50 struct list_head list; 127 struct list_head list;
51 const struct tomoyo_path_info *original_name; 128 const struct tomoyo_path_info *original_name;
@@ -67,14 +144,12 @@ void tomoyo_set_domain_flag(struct tomoyo_domain_info *domain,
67{ 144{
68 /* We need to serialize because this is bitfield operation. */ 145 /* We need to serialize because this is bitfield operation. */
69 static DEFINE_SPINLOCK(lock); 146 static DEFINE_SPINLOCK(lock);
70 /***** CRITICAL SECTION START *****/
71 spin_lock(&lock); 147 spin_lock(&lock);
72 if (!is_delete) 148 if (!is_delete)
73 domain->flags |= flags; 149 domain->flags |= flags;
74 else 150 else
75 domain->flags &= ~flags; 151 domain->flags &= ~flags;
76 spin_unlock(&lock); 152 spin_unlock(&lock);
77 /***** CRITICAL SECTION END *****/
78} 153}
79 154
80/** 155/**
@@ -94,7 +169,42 @@ const char *tomoyo_get_last_name(const struct tomoyo_domain_info *domain)
94 return cp0; 169 return cp0;
95} 170}
96 171
97/* The list for "struct tomoyo_domain_initializer_entry". */ 172/*
173 * tomoyo_domain_initializer_list is used for holding list of programs which
174 * triggers reinitialization of domainname. Normally, a domainname is
175 * monotonically getting longer. But sometimes, we restart daemon programs.
176 * It would be convenient for us that "a daemon started upon system boot" and
177 * "the daemon restarted from console" belong to the same domain. Thus, TOMOYO
178 * provides a way to shorten domainnames.
179 *
180 * An entry is added by
181 *
182 * # echo 'initialize_domain /usr/sbin/httpd' > \
183 * /sys/kernel/security/tomoyo/exception_policy
184 *
185 * and is deleted by
186 *
187 * # echo 'delete initialize_domain /usr/sbin/httpd' > \
188 * /sys/kernel/security/tomoyo/exception_policy
189 *
190 * and all entries are retrieved by
191 *
192 * # grep ^initialize_domain /sys/kernel/security/tomoyo/exception_policy
193 *
194 * In the example above, /usr/sbin/httpd will belong to
195 * "<kernel> /usr/sbin/httpd" domain.
196 *
197 * You may specify a domainname using "from" keyword.
198 * "initialize_domain /usr/sbin/httpd from <kernel> /etc/rc.d/init.d/httpd"
199 * will cause "/usr/sbin/httpd" executed from "<kernel> /etc/rc.d/init.d/httpd"
200 * domain to belong to "<kernel> /usr/sbin/httpd" domain.
201 *
202 * You may add "no_" prefix to "initialize_domain".
203 * "initialize_domain /usr/sbin/httpd" and
204 * "no_initialize_domain /usr/sbin/httpd from <kernel> /etc/rc.d/init.d/httpd"
205 * will cause "/usr/sbin/httpd" to belong to "<kernel> /usr/sbin/httpd" domain
206 * unless executed from "<kernel> /etc/rc.d/init.d/httpd" domain.
207 */
98static LIST_HEAD(tomoyo_domain_initializer_list); 208static LIST_HEAD(tomoyo_domain_initializer_list);
99static DECLARE_RWSEM(tomoyo_domain_initializer_list_lock); 209static DECLARE_RWSEM(tomoyo_domain_initializer_list_lock);
100 210
@@ -135,7 +245,6 @@ static int tomoyo_update_domain_initializer_entry(const char *domainname,
135 saved_program = tomoyo_save_name(program); 245 saved_program = tomoyo_save_name(program);
136 if (!saved_program) 246 if (!saved_program)
137 return -ENOMEM; 247 return -ENOMEM;
138 /***** EXCLUSIVE SECTION START *****/
139 down_write(&tomoyo_domain_initializer_list_lock); 248 down_write(&tomoyo_domain_initializer_list_lock);
140 list_for_each_entry(ptr, &tomoyo_domain_initializer_list, list) { 249 list_for_each_entry(ptr, &tomoyo_domain_initializer_list, list) {
141 if (ptr->is_not != is_not || 250 if (ptr->is_not != is_not ||
@@ -161,7 +270,6 @@ static int tomoyo_update_domain_initializer_entry(const char *domainname,
161 error = 0; 270 error = 0;
162 out: 271 out:
163 up_write(&tomoyo_domain_initializer_list_lock); 272 up_write(&tomoyo_domain_initializer_list_lock);
164 /***** EXCLUSIVE SECTION END *****/
165 return error; 273 return error;
166} 274}
167 275
@@ -193,13 +301,12 @@ bool tomoyo_read_domain_initializer_policy(struct tomoyo_io_buffer *head)
193 from = " from "; 301 from = " from ";
194 domain = ptr->domainname->name; 302 domain = ptr->domainname->name;
195 } 303 }
196 if (!tomoyo_io_printf(head, 304 done = tomoyo_io_printf(head,
197 "%s" TOMOYO_KEYWORD_INITIALIZE_DOMAIN 305 "%s" TOMOYO_KEYWORD_INITIALIZE_DOMAIN
198 "%s%s%s\n", no, ptr->program->name, from, 306 "%s%s%s\n", no, ptr->program->name,
199 domain)) { 307 from, domain);
200 done = false; 308 if (!done)
201 break; 309 break;
202 }
203 } 310 }
204 up_read(&tomoyo_domain_initializer_list_lock); 311 up_read(&tomoyo_domain_initializer_list_lock);
205 return done; 312 return done;
@@ -273,7 +380,44 @@ static bool tomoyo_is_domain_initializer(const struct tomoyo_path_info *
273 return flag; 380 return flag;
274} 381}
275 382
276/* The list for "struct tomoyo_domain_keeper_entry". */ 383/*
384 * tomoyo_domain_keeper_list is used for holding list of domainnames which
385 * suppresses domain transition. Normally, a domainname is monotonically
386 * getting longer. But sometimes, we want to suppress domain transition.
387 * It would be convenient for us that programs executed from a login session
388 * belong to the same domain. Thus, TOMOYO provides a way to suppress domain
389 * transition.
390 *
391 * An entry is added by
392 *
393 * # echo 'keep_domain <kernel> /usr/sbin/sshd /bin/bash' > \
394 * /sys/kernel/security/tomoyo/exception_policy
395 *
396 * and is deleted by
397 *
398 * # echo 'delete keep_domain <kernel> /usr/sbin/sshd /bin/bash' > \
399 * /sys/kernel/security/tomoyo/exception_policy
400 *
401 * and all entries are retrieved by
402 *
403 * # grep ^keep_domain /sys/kernel/security/tomoyo/exception_policy
404 *
405 * In the example above, any process which belongs to
406 * "<kernel> /usr/sbin/sshd /bin/bash" domain will remain in that domain,
407 * unless explicitly specified by "initialize_domain" or "no_keep_domain".
408 *
409 * You may specify a program using "from" keyword.
410 * "keep_domain /bin/pwd from <kernel> /usr/sbin/sshd /bin/bash"
411 * will cause "/bin/pwd" executed from "<kernel> /usr/sbin/sshd /bin/bash"
412 * domain to remain in "<kernel> /usr/sbin/sshd /bin/bash" domain.
413 *
414 * You may add "no_" prefix to "keep_domain".
415 * "keep_domain <kernel> /usr/sbin/sshd /bin/bash" and
416 * "no_keep_domain /usr/bin/passwd from <kernel> /usr/sbin/sshd /bin/bash" will
417 * cause "/usr/bin/passwd" to belong to
418 * "<kernel> /usr/sbin/sshd /bin/bash /usr/bin/passwd" domain, unless
419 * explicitly specified by "initialize_domain".
420 */
277static LIST_HEAD(tomoyo_domain_keeper_list); 421static LIST_HEAD(tomoyo_domain_keeper_list);
278static DECLARE_RWSEM(tomoyo_domain_keeper_list_lock); 422static DECLARE_RWSEM(tomoyo_domain_keeper_list_lock);
279 423
@@ -296,7 +440,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname,
296 struct tomoyo_domain_keeper_entry *ptr; 440 struct tomoyo_domain_keeper_entry *ptr;
297 const struct tomoyo_path_info *saved_domainname; 441 const struct tomoyo_path_info *saved_domainname;
298 const struct tomoyo_path_info *saved_program = NULL; 442 const struct tomoyo_path_info *saved_program = NULL;
299 static DEFINE_MUTEX(lock);
300 int error = -ENOMEM; 443 int error = -ENOMEM;
301 bool is_last_name = false; 444 bool is_last_name = false;
302 445
@@ -315,7 +458,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname,
315 saved_domainname = tomoyo_save_name(domainname); 458 saved_domainname = tomoyo_save_name(domainname);
316 if (!saved_domainname) 459 if (!saved_domainname)
317 return -ENOMEM; 460 return -ENOMEM;
318 /***** EXCLUSIVE SECTION START *****/
319 down_write(&tomoyo_domain_keeper_list_lock); 461 down_write(&tomoyo_domain_keeper_list_lock);
320 list_for_each_entry(ptr, &tomoyo_domain_keeper_list, list) { 462 list_for_each_entry(ptr, &tomoyo_domain_keeper_list, list) {
321 if (ptr->is_not != is_not || 463 if (ptr->is_not != is_not ||
@@ -341,7 +483,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname,
341 error = 0; 483 error = 0;
342 out: 484 out:
343 up_write(&tomoyo_domain_keeper_list_lock); 485 up_write(&tomoyo_domain_keeper_list_lock);
344 /***** EXCLUSIVE SECTION END *****/
345 return error; 486 return error;
346} 487}
347 488
@@ -394,13 +535,12 @@ bool tomoyo_read_domain_keeper_policy(struct tomoyo_io_buffer *head)
394 from = " from "; 535 from = " from ";
395 program = ptr->program->name; 536 program = ptr->program->name;
396 } 537 }
397 if (!tomoyo_io_printf(head, 538 done = tomoyo_io_printf(head,
398 "%s" TOMOYO_KEYWORD_KEEP_DOMAIN 539 "%s" TOMOYO_KEYWORD_KEEP_DOMAIN
399 "%s%s%s\n", no, program, from, 540 "%s%s%s\n", no, program, from,
400 ptr->domainname->name)) { 541 ptr->domainname->name);
401 done = false; 542 if (!done)
402 break; 543 break;
403 }
404 } 544 }
405 up_read(&tomoyo_domain_keeper_list_lock); 545 up_read(&tomoyo_domain_keeper_list_lock);
406 return done; 546 return done;
@@ -446,7 +586,36 @@ static bool tomoyo_is_domain_keeper(const struct tomoyo_path_info *domainname,
446 return flag; 586 return flag;
447} 587}
448 588
449/* The list for "struct tomoyo_alias_entry". */ 589/*
590 * tomoyo_alias_list is used for holding list of symlink's pathnames which are
591 * allowed to be passed to an execve() request. Normally, the domainname which
592 * the current process will belong to after execve() succeeds is calculated
593 * using dereferenced pathnames. But some programs behave differently depending
594 * on the name passed to argv[0]. For busybox, calculating domainname using
595 * dereferenced pathnames will cause all programs in the busybox to belong to
596 * the same domain. Thus, TOMOYO provides a way to allow use of symlink's
597 * pathname for checking execve()'s permission and calculating domainname which
598 * the current process will belong to after execve() succeeds.
599 *
600 * An entry is added by
601 *
602 * # echo 'alias /bin/busybox /bin/cat' > \
603 * /sys/kernel/security/tomoyo/exception_policy
604 *
605 * and is deleted by
606 *
607 * # echo 'delete alias /bin/busybox /bin/cat' > \
608 * /sys/kernel/security/tomoyo/exception_policy
609 *
610 * and all entries are retrieved by
611 *
612 * # grep ^alias /sys/kernel/security/tomoyo/exception_policy
613 *
614 * In the example above, if /bin/cat is a symlink to /bin/busybox and execution
615 * of /bin/cat is requested, permission is checked for /bin/cat rather than
616 * /bin/busybox and domainname which the current process will belong to after
617 * execve() succeeds is calculated using /bin/cat rather than /bin/busybox .
618 */
450static LIST_HEAD(tomoyo_alias_list); 619static LIST_HEAD(tomoyo_alias_list);
451static DECLARE_RWSEM(tomoyo_alias_list_lock); 620static DECLARE_RWSEM(tomoyo_alias_list_lock);
452 621
@@ -476,7 +645,6 @@ static int tomoyo_update_alias_entry(const char *original_name,
476 saved_aliased_name = tomoyo_save_name(aliased_name); 645 saved_aliased_name = tomoyo_save_name(aliased_name);
477 if (!saved_original_name || !saved_aliased_name) 646 if (!saved_original_name || !saved_aliased_name)
478 return -ENOMEM; 647 return -ENOMEM;
479 /***** EXCLUSIVE SECTION START *****/
480 down_write(&tomoyo_alias_list_lock); 648 down_write(&tomoyo_alias_list_lock);
481 list_for_each_entry(ptr, &tomoyo_alias_list, list) { 649 list_for_each_entry(ptr, &tomoyo_alias_list, list) {
482 if (ptr->original_name != saved_original_name || 650 if (ptr->original_name != saved_original_name ||
@@ -499,7 +667,6 @@ static int tomoyo_update_alias_entry(const char *original_name,
499 error = 0; 667 error = 0;
500 out: 668 out:
501 up_write(&tomoyo_alias_list_lock); 669 up_write(&tomoyo_alias_list_lock);
502 /***** EXCLUSIVE SECTION END *****/
503 return error; 670 return error;
504} 671}
505 672
@@ -522,12 +689,11 @@ bool tomoyo_read_alias_policy(struct tomoyo_io_buffer *head)
522 ptr = list_entry(pos, struct tomoyo_alias_entry, list); 689 ptr = list_entry(pos, struct tomoyo_alias_entry, list);
523 if (ptr->is_deleted) 690 if (ptr->is_deleted)
524 continue; 691 continue;
525 if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_ALIAS "%s %s\n", 692 done = tomoyo_io_printf(head, TOMOYO_KEYWORD_ALIAS "%s %s\n",
526 ptr->original_name->name, 693 ptr->original_name->name,
527 ptr->aliased_name->name)) { 694 ptr->aliased_name->name);
528 done = false; 695 if (!done)
529 break; 696 break;
530 }
531 } 697 }
532 up_read(&tomoyo_alias_list_lock); 698 up_read(&tomoyo_alias_list_lock);
533 return done; 699 return done;
@@ -567,7 +733,6 @@ int tomoyo_delete_domain(char *domainname)
567 733
568 name.name = domainname; 734 name.name = domainname;
569 tomoyo_fill_path_info(&name); 735 tomoyo_fill_path_info(&name);
570 /***** EXCLUSIVE SECTION START *****/
571 down_write(&tomoyo_domain_list_lock); 736 down_write(&tomoyo_domain_list_lock);
572 /* Is there an active domain? */ 737 /* Is there an active domain? */
573 list_for_each_entry(domain, &tomoyo_domain_list, list) { 738 list_for_each_entry(domain, &tomoyo_domain_list, list) {
@@ -581,7 +746,6 @@ int tomoyo_delete_domain(char *domainname)
581 break; 746 break;
582 } 747 }
583 up_write(&tomoyo_domain_list_lock); 748 up_write(&tomoyo_domain_list_lock);
584 /***** EXCLUSIVE SECTION END *****/
585 return 0; 749 return 0;
586} 750}
587 751
@@ -600,7 +764,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
600 struct tomoyo_domain_info *domain = NULL; 764 struct tomoyo_domain_info *domain = NULL;
601 const struct tomoyo_path_info *saved_domainname; 765 const struct tomoyo_path_info *saved_domainname;
602 766
603 /***** EXCLUSIVE SECTION START *****/
604 down_write(&tomoyo_domain_list_lock); 767 down_write(&tomoyo_domain_list_lock);
605 domain = tomoyo_find_domain(domainname); 768 domain = tomoyo_find_domain(domainname);
606 if (domain) 769 if (domain)
@@ -619,7 +782,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
619 domain->domainname != saved_domainname) 782 domain->domainname != saved_domainname)
620 continue; 783 continue;
621 flag = false; 784 flag = false;
622 /***** CRITICAL SECTION START *****/
623 read_lock(&tasklist_lock); 785 read_lock(&tasklist_lock);
624 for_each_process(p) { 786 for_each_process(p) {
625 if (tomoyo_real_domain(p) != domain) 787 if (tomoyo_real_domain(p) != domain)
@@ -628,7 +790,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
628 break; 790 break;
629 } 791 }
630 read_unlock(&tasklist_lock); 792 read_unlock(&tasklist_lock);
631 /***** CRITICAL SECTION END *****/
632 if (flag) 793 if (flag)
633 continue; 794 continue;
634 list_for_each_entry(ptr, &domain->acl_info_list, list) { 795 list_for_each_entry(ptr, &domain->acl_info_list, list) {
@@ -651,7 +812,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
651 } 812 }
652 out: 813 out:
653 up_write(&tomoyo_domain_list_lock); 814 up_write(&tomoyo_domain_list_lock);
654 /***** EXCLUSIVE SECTION END *****/
655 return domain; 815 return domain;
656} 816}
657 817
@@ -739,7 +899,7 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm,
739 } 899 }
740 900
741 /* Check execute permission. */ 901 /* Check execute permission. */
742 retval = tomoyo_check_exec_perm(old_domain, &r, tmp); 902 retval = tomoyo_check_exec_perm(old_domain, &r);
743 if (retval < 0) 903 if (retval < 0)
744 goto out; 904 goto out;
745 905
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 2316da8ec5bc..5ae3a571559f 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -14,21 +14,50 @@
14#include "realpath.h" 14#include "realpath.h"
15#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 15#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
16 16
17/* Structure for "allow_read" keyword. */ 17/*
18 * tomoyo_globally_readable_file_entry is a structure which is used for holding
19 * "allow_read" entries.
20 * It has following fields.
21 *
22 * (1) "list" which is linked to tomoyo_globally_readable_list .
23 * (2) "filename" is a pathname which is allowed to open(O_RDONLY).
24 * (3) "is_deleted" is a bool which is true if marked as deleted, false
25 * otherwise.
26 */
18struct tomoyo_globally_readable_file_entry { 27struct tomoyo_globally_readable_file_entry {
19 struct list_head list; 28 struct list_head list;
20 const struct tomoyo_path_info *filename; 29 const struct tomoyo_path_info *filename;
21 bool is_deleted; 30 bool is_deleted;
22}; 31};
23 32
24/* Structure for "file_pattern" keyword. */ 33/*
34 * tomoyo_pattern_entry is a structure which is used for holding
35 * "tomoyo_pattern_list" entries.
36 * It has following fields.
37 *
38 * (1) "list" which is linked to tomoyo_pattern_list .
39 * (2) "pattern" is a pathname pattern which is used for converting pathnames
40 * to pathname patterns during learning mode.
41 * (3) "is_deleted" is a bool which is true if marked as deleted, false
42 * otherwise.
43 */
25struct tomoyo_pattern_entry { 44struct tomoyo_pattern_entry {
26 struct list_head list; 45 struct list_head list;
27 const struct tomoyo_path_info *pattern; 46 const struct tomoyo_path_info *pattern;
28 bool is_deleted; 47 bool is_deleted;
29}; 48};
30 49
31/* Structure for "deny_rewrite" keyword. */ 50/*
51 * tomoyo_no_rewrite_entry is a structure which is used for holding
52 * "deny_rewrite" entries.
53 * It has following fields.
54 *
55 * (1) "list" which is linked to tomoyo_no_rewrite_list .
56 * (2) "pattern" is a pathname which is by default not permitted to modify
57 * already existing content.
58 * (3) "is_deleted" is a bool which is true if marked as deleted, false
59 * otherwise.
60 */
32struct tomoyo_no_rewrite_entry { 61struct tomoyo_no_rewrite_entry {
33 struct list_head list; 62 struct list_head list;
34 const struct tomoyo_path_info *pattern; 63 const struct tomoyo_path_info *pattern;
@@ -141,7 +170,31 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename,
141 struct tomoyo_domain_info * 170 struct tomoyo_domain_info *
142 const domain, const bool is_delete); 171 const domain, const bool is_delete);
143 172
144/* The list for "struct tomoyo_globally_readable_file_entry". */ 173/*
174 * tomoyo_globally_readable_list is used for holding list of pathnames which
175 * are by default allowed to be open()ed for reading by any process.
176 *
177 * An entry is added by
178 *
179 * # echo 'allow_read /lib/libc-2.5.so' > \
180 * /sys/kernel/security/tomoyo/exception_policy
181 *
182 * and is deleted by
183 *
184 * # echo 'delete allow_read /lib/libc-2.5.so' > \
185 * /sys/kernel/security/tomoyo/exception_policy
186 *
187 * and all entries are retrieved by
188 *
189 * # grep ^allow_read /sys/kernel/security/tomoyo/exception_policy
190 *
191 * In the example above, any process is allowed to
192 * open("/lib/libc-2.5.so", O_RDONLY).
193 * One exception is, if the domain which current process belongs to is marked
194 * as "ignore_global_allow_read", current process can't do so unless explicitly
195 * given "allow_read /lib/libc-2.5.so" to the domain which current process
196 * belongs to.
197 */
145static LIST_HEAD(tomoyo_globally_readable_list); 198static LIST_HEAD(tomoyo_globally_readable_list);
146static DECLARE_RWSEM(tomoyo_globally_readable_list_lock); 199static DECLARE_RWSEM(tomoyo_globally_readable_list_lock);
147 200
@@ -166,7 +219,6 @@ static int tomoyo_update_globally_readable_entry(const char *filename,
166 saved_filename = tomoyo_save_name(filename); 219 saved_filename = tomoyo_save_name(filename);
167 if (!saved_filename) 220 if (!saved_filename)
168 return -ENOMEM; 221 return -ENOMEM;
169 /***** EXCLUSIVE SECTION START *****/
170 down_write(&tomoyo_globally_readable_list_lock); 222 down_write(&tomoyo_globally_readable_list_lock);
171 list_for_each_entry(ptr, &tomoyo_globally_readable_list, list) { 223 list_for_each_entry(ptr, &tomoyo_globally_readable_list, list) {
172 if (ptr->filename != saved_filename) 224 if (ptr->filename != saved_filename)
@@ -187,7 +239,6 @@ static int tomoyo_update_globally_readable_entry(const char *filename,
187 error = 0; 239 error = 0;
188 out: 240 out:
189 up_write(&tomoyo_globally_readable_list_lock); 241 up_write(&tomoyo_globally_readable_list_lock);
190 /***** EXCLUSIVE SECTION END *****/
191 return error; 242 return error;
192} 243}
193 244
@@ -249,17 +300,44 @@ bool tomoyo_read_globally_readable_policy(struct tomoyo_io_buffer *head)
249 list); 300 list);
250 if (ptr->is_deleted) 301 if (ptr->is_deleted)
251 continue; 302 continue;
252 if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_ALLOW_READ "%s\n", 303 done = tomoyo_io_printf(head, TOMOYO_KEYWORD_ALLOW_READ "%s\n",
253 ptr->filename->name)) { 304 ptr->filename->name);
254 done = false; 305 if (!done)
255 break; 306 break;
256 }
257 } 307 }
258 up_read(&tomoyo_globally_readable_list_lock); 308 up_read(&tomoyo_globally_readable_list_lock);
259 return done; 309 return done;
260} 310}
261 311
262/* The list for "struct tomoyo_pattern_entry". */ 312/* tomoyo_pattern_list is used for holding list of pathnames which are used for
313 * converting pathnames to pathname patterns during learning mode.
314 *
315 * An entry is added by
316 *
317 * # echo 'file_pattern /proc/\$/mounts' > \
318 * /sys/kernel/security/tomoyo/exception_policy
319 *
320 * and is deleted by
321 *
322 * # echo 'delete file_pattern /proc/\$/mounts' > \
323 * /sys/kernel/security/tomoyo/exception_policy
324 *
325 * and all entries are retrieved by
326 *
327 * # grep ^file_pattern /sys/kernel/security/tomoyo/exception_policy
328 *
329 * In the example above, if a process which belongs to a domain which is in
330 * learning mode requested open("/proc/1/mounts", O_RDONLY),
331 * "allow_read /proc/\$/mounts" is automatically added to the domain which that
332 * process belongs to.
333 *
334 * It is not a desirable behavior that we have to use /proc/\$/ instead of
335 * /proc/self/ when current process needs to access only current process's
336 * information. As of now, LSM version of TOMOYO is using __d_path() for
337 * calculating pathname. Non LSM version of TOMOYO is using its own function
338 * which pretends as if /proc/self/ is not a symlink; so that we can forbid
339 * current process from accessing other process's information.
340 */
263static LIST_HEAD(tomoyo_pattern_list); 341static LIST_HEAD(tomoyo_pattern_list);
264static DECLARE_RWSEM(tomoyo_pattern_list_lock); 342static DECLARE_RWSEM(tomoyo_pattern_list_lock);
265 343
@@ -284,7 +362,6 @@ static int tomoyo_update_file_pattern_entry(const char *pattern,
284 saved_pattern = tomoyo_save_name(pattern); 362 saved_pattern = tomoyo_save_name(pattern);
285 if (!saved_pattern) 363 if (!saved_pattern)
286 return -ENOMEM; 364 return -ENOMEM;
287 /***** EXCLUSIVE SECTION START *****/
288 down_write(&tomoyo_pattern_list_lock); 365 down_write(&tomoyo_pattern_list_lock);
289 list_for_each_entry(ptr, &tomoyo_pattern_list, list) { 366 list_for_each_entry(ptr, &tomoyo_pattern_list, list) {
290 if (saved_pattern != ptr->pattern) 367 if (saved_pattern != ptr->pattern)
@@ -305,7 +382,6 @@ static int tomoyo_update_file_pattern_entry(const char *pattern,
305 error = 0; 382 error = 0;
306 out: 383 out:
307 up_write(&tomoyo_pattern_list_lock); 384 up_write(&tomoyo_pattern_list_lock);
308 /***** EXCLUSIVE SECTION END *****/
309 return error; 385 return error;
310} 386}
311 387
@@ -373,17 +449,44 @@ bool tomoyo_read_file_pattern(struct tomoyo_io_buffer *head)
373 ptr = list_entry(pos, struct tomoyo_pattern_entry, list); 449 ptr = list_entry(pos, struct tomoyo_pattern_entry, list);
374 if (ptr->is_deleted) 450 if (ptr->is_deleted)
375 continue; 451 continue;
376 if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_FILE_PATTERN "%s\n", 452 done = tomoyo_io_printf(head, TOMOYO_KEYWORD_FILE_PATTERN
377 ptr->pattern->name)) { 453 "%s\n", ptr->pattern->name);
378 done = false; 454 if (!done)
379 break; 455 break;
380 }
381 } 456 }
382 up_read(&tomoyo_pattern_list_lock); 457 up_read(&tomoyo_pattern_list_lock);
383 return done; 458 return done;
384} 459}
385 460
386/* The list for "struct tomoyo_no_rewrite_entry". */ 461/*
462 * tomoyo_no_rewrite_list is used for holding list of pathnames which are by
463 * default forbidden to modify already written content of a file.
464 *
465 * An entry is added by
466 *
467 * # echo 'deny_rewrite /var/log/messages' > \
468 * /sys/kernel/security/tomoyo/exception_policy
469 *
470 * and is deleted by
471 *
472 * # echo 'delete deny_rewrite /var/log/messages' > \
473 * /sys/kernel/security/tomoyo/exception_policy
474 *
475 * and all entries are retrieved by
476 *
477 * # grep ^deny_rewrite /sys/kernel/security/tomoyo/exception_policy
478 *
479 * In the example above, if a process requested to rewrite /var/log/messages ,
480 * the process can't rewrite unless the domain which that process belongs to
481 * has "allow_rewrite /var/log/messages" entry.
482 *
483 * It is not a desirable behavior that we have to add "\040(deleted)" suffix
484 * when we want to allow rewriting already unlink()ed file. As of now,
485 * LSM version of TOMOYO is using __d_path() for calculating pathname.
486 * Non LSM version of TOMOYO is using its own function which doesn't append
487 * " (deleted)" suffix if the file is already unlink()ed; so that we don't
488 * need to worry whether the file is already unlink()ed or not.
489 */
387static LIST_HEAD(tomoyo_no_rewrite_list); 490static LIST_HEAD(tomoyo_no_rewrite_list);
388static DECLARE_RWSEM(tomoyo_no_rewrite_list_lock); 491static DECLARE_RWSEM(tomoyo_no_rewrite_list_lock);
389 492
@@ -407,7 +510,6 @@ static int tomoyo_update_no_rewrite_entry(const char *pattern,
407 saved_pattern = tomoyo_save_name(pattern); 510 saved_pattern = tomoyo_save_name(pattern);
408 if (!saved_pattern) 511 if (!saved_pattern)
409 return -ENOMEM; 512 return -ENOMEM;
410 /***** EXCLUSIVE SECTION START *****/
411 down_write(&tomoyo_no_rewrite_list_lock); 513 down_write(&tomoyo_no_rewrite_list_lock);
412 list_for_each_entry(ptr, &tomoyo_no_rewrite_list, list) { 514 list_for_each_entry(ptr, &tomoyo_no_rewrite_list, list) {
413 if (ptr->pattern != saved_pattern) 515 if (ptr->pattern != saved_pattern)
@@ -428,7 +530,6 @@ static int tomoyo_update_no_rewrite_entry(const char *pattern,
428 error = 0; 530 error = 0;
429 out: 531 out:
430 up_write(&tomoyo_no_rewrite_list_lock); 532 up_write(&tomoyo_no_rewrite_list_lock);
431 /***** EXCLUSIVE SECTION END *****/
432 return error; 533 return error;
433} 534}
434 535
@@ -489,11 +590,10 @@ bool tomoyo_read_no_rewrite_policy(struct tomoyo_io_buffer *head)
489 ptr = list_entry(pos, struct tomoyo_no_rewrite_entry, list); 590 ptr = list_entry(pos, struct tomoyo_no_rewrite_entry, list);
490 if (ptr->is_deleted) 591 if (ptr->is_deleted)
491 continue; 592 continue;
492 if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_DENY_REWRITE "%s\n", 593 done = tomoyo_io_printf(head, TOMOYO_KEYWORD_DENY_REWRITE
493 ptr->pattern->name)) { 594 "%s\n", ptr->pattern->name);
494 done = false; 595 if (!done)
495 break; 596 break;
496 }
497 } 597 }
498 up_read(&tomoyo_no_rewrite_list_lock); 598 up_read(&tomoyo_no_rewrite_list_lock);
499 return done; 599 return done;
@@ -745,7 +845,6 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename,
745 saved_filename = tomoyo_save_name(filename); 845 saved_filename = tomoyo_save_name(filename);
746 if (!saved_filename) 846 if (!saved_filename)
747 return -ENOMEM; 847 return -ENOMEM;
748 /***** EXCLUSIVE SECTION START *****/
749 down_write(&tomoyo_domain_acl_info_list_lock); 848 down_write(&tomoyo_domain_acl_info_list_lock);
750 if (is_delete) 849 if (is_delete)
751 goto delete; 850 goto delete;
@@ -800,7 +899,6 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename,
800 } 899 }
801 out: 900 out:
802 up_write(&tomoyo_domain_acl_info_list_lock); 901 up_write(&tomoyo_domain_acl_info_list_lock);
803 /***** EXCLUSIVE SECTION END *****/
804 return error; 902 return error;
805} 903}
806 904
@@ -836,7 +934,6 @@ static int tomoyo_update_double_path_acl(const u8 type, const char *filename1,
836 saved_filename2 = tomoyo_save_name(filename2); 934 saved_filename2 = tomoyo_save_name(filename2);
837 if (!saved_filename1 || !saved_filename2) 935 if (!saved_filename1 || !saved_filename2)
838 return -ENOMEM; 936 return -ENOMEM;
839 /***** EXCLUSIVE SECTION START *****/
840 down_write(&tomoyo_domain_acl_info_list_lock); 937 down_write(&tomoyo_domain_acl_info_list_lock);
841 if (is_delete) 938 if (is_delete)
842 goto delete; 939 goto delete;
@@ -884,7 +981,6 @@ static int tomoyo_update_double_path_acl(const u8 type, const char *filename1,
884 } 981 }
885 out: 982 out:
886 up_write(&tomoyo_domain_acl_info_list_lock); 983 up_write(&tomoyo_domain_acl_info_list_lock);
887 /***** EXCLUSIVE SECTION END *****/
888 return error; 984 return error;
889} 985}
890 986
@@ -1025,13 +1121,11 @@ int tomoyo_check_file_perm(struct tomoyo_domain_info *domain,
1025 * 1121 *
1026 * @domain: Pointer to "struct tomoyo_domain_info". 1122 * @domain: Pointer to "struct tomoyo_domain_info".
1027 * @filename: Check permission for "execute". 1123 * @filename: Check permission for "execute".
1028 * @tmp: Buffer for temporary use.
1029 * 1124 *
1030 * Returns 0 on success, negativevalue otherwise. 1125 * Returns 0 on success, negativevalue otherwise.
1031 */ 1126 */
1032int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain, 1127int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain,
1033 const struct tomoyo_path_info *filename, 1128 const struct tomoyo_path_info *filename)
1034 struct tomoyo_page_buffer *tmp)
1035{ 1129{
1036 const u8 mode = tomoyo_check_flags(domain, TOMOYO_MAC_FOR_FILE); 1130 const u8 mode = tomoyo_check_flags(domain, TOMOYO_MAC_FOR_FILE);
1037 1131
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 40927a84cb6e..5f2e33263371 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -220,7 +220,6 @@ void *tomoyo_alloc_element(const unsigned int size)
220 = roundup(size, max(sizeof(void *), sizeof(long))); 220 = roundup(size, max(sizeof(void *), sizeof(long)));
221 if (word_aligned_size > PATH_MAX) 221 if (word_aligned_size > PATH_MAX)
222 return NULL; 222 return NULL;
223 /***** EXCLUSIVE SECTION START *****/
224 mutex_lock(&lock); 223 mutex_lock(&lock);
225 if (buf_used_len + word_aligned_size > PATH_MAX) { 224 if (buf_used_len + word_aligned_size > PATH_MAX) {
226 if (!tomoyo_quota_for_elements || 225 if (!tomoyo_quota_for_elements ||
@@ -251,7 +250,6 @@ void *tomoyo_alloc_element(const unsigned int size)
251 } 250 }
252 } 251 }
253 mutex_unlock(&lock); 252 mutex_unlock(&lock);
254 /***** EXCLUSIVE SECTION END *****/
255 return ptr; 253 return ptr;
256} 254}
257 255
@@ -267,7 +265,16 @@ static unsigned int tomoyo_quota_for_savename;
267 */ 265 */
268#define TOMOYO_MAX_HASH 256 266#define TOMOYO_MAX_HASH 256
269 267
270/* Structure for string data. */ 268/*
269 * tomoyo_name_entry is a structure which is used for linking
270 * "struct tomoyo_path_info" into tomoyo_name_list .
271 *
272 * Since tomoyo_name_list manages a list of strings which are shared by
273 * multiple processes (whereas "struct tomoyo_path_info" inside
274 * "struct tomoyo_path_info_with_data" is not shared), a reference counter will
275 * be added to "struct tomoyo_name_entry" rather than "struct tomoyo_path_info"
276 * when TOMOYO starts supporting garbage collector.
277 */
271struct tomoyo_name_entry { 278struct tomoyo_name_entry {
272 struct list_head list; 279 struct list_head list;
273 struct tomoyo_path_info entry; 280 struct tomoyo_path_info entry;
@@ -281,10 +288,10 @@ struct tomoyo_free_memory_block_list {
281}; 288};
282 289
283/* 290/*
284 * The list for "struct tomoyo_name_entry". 291 * tomoyo_name_list is used for holding string data used by TOMOYO.
285 * 292 * Since same string data is likely used for multiple times (e.g.
286 * This list is updated only inside tomoyo_save_name(), thus 293 * "/lib/libc-2.5.so"), TOMOYO shares string data in the form of
287 * no global mutex exists. 294 * "const struct tomoyo_path_info *".
288 */ 295 */
289static struct list_head tomoyo_name_list[TOMOYO_MAX_HASH]; 296static struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
290 297
@@ -318,7 +325,6 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name)
318 return NULL; 325 return NULL;
319 } 326 }
320 hash = full_name_hash((const unsigned char *) name, len - 1); 327 hash = full_name_hash((const unsigned char *) name, len - 1);
321 /***** EXCLUSIVE SECTION START *****/
322 mutex_lock(&lock); 328 mutex_lock(&lock);
323 list_for_each_entry(ptr, &tomoyo_name_list[hash % TOMOYO_MAX_HASH], 329 list_for_each_entry(ptr, &tomoyo_name_list[hash % TOMOYO_MAX_HASH],
324 list) { 330 list) {
@@ -366,7 +372,6 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name)
366 } 372 }
367 out: 373 out:
368 mutex_unlock(&lock); 374 mutex_unlock(&lock);
369 /***** EXCLUSIVE SECTION END *****/
370 return ptr ? &ptr->entry : NULL; 375 return ptr ? &ptr->entry : NULL;
371} 376}
372 377
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index e42be5c4f055..3194d09fe0f4 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -262,6 +262,10 @@ static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
262 return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags); 262 return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags);
263} 263}
264 264
265/*
266 * tomoyo_security_ops is a "struct security_operations" which is used for
267 * registering TOMOYO.
268 */
265static struct security_operations tomoyo_security_ops = { 269static struct security_operations tomoyo_security_ops = {
266 .name = "tomoyo", 270 .name = "tomoyo",
267 .cred_prepare = tomoyo_cred_prepare, 271 .cred_prepare = tomoyo_cred_prepare,
diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h
index 41c6ebafb9c5..0fd588a629cf 100644
--- a/security/tomoyo/tomoyo.h
+++ b/security/tomoyo/tomoyo.h
@@ -17,13 +17,11 @@ struct path;
17struct inode; 17struct inode;
18struct linux_binprm; 18struct linux_binprm;
19struct pt_regs; 19struct pt_regs;
20struct tomoyo_page_buffer;
21 20
22int tomoyo_check_file_perm(struct tomoyo_domain_info *domain, 21int tomoyo_check_file_perm(struct tomoyo_domain_info *domain,
23 const char *filename, const u8 perm); 22 const char *filename, const u8 perm);
24int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain, 23int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain,
25 const struct tomoyo_path_info *filename, 24 const struct tomoyo_path_info *filename);
26 struct tomoyo_page_buffer *buf);
27int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, 25int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
28 struct path *path, const int flag); 26 struct path *path, const int flag);
29int tomoyo_check_1path_perm(struct tomoyo_domain_info *domain, 27int tomoyo_check_1path_perm(struct tomoyo_domain_info *domain,
@@ -90,17 +88,10 @@ static inline struct tomoyo_domain_info *tomoyo_domain(void)
90 return current_cred()->security; 88 return current_cred()->security;
91} 89}
92 90
93/* Caller holds tasklist_lock spinlock. */
94static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct 91static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct
95 *task) 92 *task)
96{ 93{
97 /***** CRITICAL SECTION START *****/ 94 return task_cred_xxx(task, security);
98 const struct cred *cred = get_task_cred(task);
99 struct tomoyo_domain_info *domain = cred->security;
100
101 put_cred(cred);
102 return domain;
103 /***** CRITICAL SECTION END *****/
104} 95}
105 96
106#endif /* !defined(_SECURITY_TOMOYO_TOMOYO_H) */ 97#endif /* !defined(_SECURITY_TOMOYO_TOMOYO_H) */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c3b99def9cbc..1eddae94bab3 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
85 85
86static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) 86static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
87{ 87{
88 union ioapic_redir_entry *pent; 88 union kvm_ioapic_redirect_entry *pent;
89 int injected = -1; 89 int injected = -1;
90 90
91 pent = &ioapic->redirtbl[idx]; 91 pent = &ioapic->redirtbl[idx];
@@ -142,149 +142,40 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
142 } 142 }
143} 143}
144 144
145static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
146 struct kvm_vcpu *vcpu,
147 u8 vector, u8 trig_mode, u8 delivery_mode)
148{
149 ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
150 delivery_mode);
151
152 ASSERT((delivery_mode == IOAPIC_FIXED) ||
153 (delivery_mode == IOAPIC_LOWEST_PRIORITY));
154
155 return kvm_apic_set_irq(vcpu, vector, trig_mode);
156}
157
158static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
159{
160 kvm_inject_nmi(vcpu);
161 kvm_vcpu_kick(vcpu);
162}
163
164u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
165 u8 dest_mode)
166{
167 u32 mask = 0;
168 int i;
169 struct kvm *kvm = ioapic->kvm;
170 struct kvm_vcpu *vcpu;
171
172 ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
173
174 if (dest_mode == 0) { /* Physical mode. */
175 if (dest == 0xFF) { /* Broadcast. */
176 for (i = 0; i < KVM_MAX_VCPUS; ++i)
177 if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
178 mask |= 1 << i;
179 return mask;
180 }
181 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
182 vcpu = kvm->vcpus[i];
183 if (!vcpu)
184 continue;
185 if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
186 if (vcpu->arch.apic)
187 mask = 1 << i;
188 break;
189 }
190 }
191 } else if (dest != 0) /* Logical mode, MDA non-zero. */
192 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
193 vcpu = kvm->vcpus[i];
194 if (!vcpu)
195 continue;
196 if (vcpu->arch.apic &&
197 kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
198 mask |= 1 << vcpu->vcpu_id;
199 }
200 ioapic_debug("mask %x\n", mask);
201 return mask;
202}
203
204static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) 145static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
205{ 146{
206 u8 dest = ioapic->redirtbl[irq].fields.dest_id; 147 union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
207 u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; 148 struct kvm_lapic_irq irqe;
208 u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
209 u8 vector = ioapic->redirtbl[irq].fields.vector;
210 u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
211 u32 deliver_bitmask;
212 struct kvm_vcpu *vcpu;
213 int vcpu_id, r = -1;
214 149
215 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 150 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
216 "vector=%x trig_mode=%x\n", 151 "vector=%x trig_mode=%x\n",
217 dest, dest_mode, delivery_mode, vector, trig_mode); 152 entry->fields.dest, entry->fields.dest_mode,
218 153 entry->fields.delivery_mode, entry->fields.vector,
219 deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, 154 entry->fields.trig_mode);
220 dest_mode); 155
221 if (!deliver_bitmask) { 156 irqe.dest_id = entry->fields.dest_id;
222 ioapic_debug("no target on destination\n"); 157 irqe.vector = entry->fields.vector;
223 return 0; 158 irqe.dest_mode = entry->fields.dest_mode;
224 } 159 irqe.trig_mode = entry->fields.trig_mode;
160 irqe.delivery_mode = entry->fields.delivery_mode << 8;
161 irqe.level = 1;
162 irqe.shorthand = 0;
225 163
226 switch (delivery_mode) {
227 case IOAPIC_LOWEST_PRIORITY:
228 vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
229 deliver_bitmask);
230#ifdef CONFIG_X86 164#ifdef CONFIG_X86
231 if (irq == 0) 165 /* Always delivery PIT interrupt to vcpu 0 */
232 vcpu = ioapic->kvm->vcpus[0]; 166 if (irq == 0) {
233#endif 167 irqe.dest_mode = 0; /* Physical mode. */
234 if (vcpu != NULL) 168 irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
235 r = ioapic_inj_irq(ioapic, vcpu, vector,
236 trig_mode, delivery_mode);
237 else
238 ioapic_debug("null lowest prio vcpu: "
239 "mask=%x vector=%x delivery_mode=%x\n",
240 deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
241 break;
242 case IOAPIC_FIXED:
243#ifdef CONFIG_X86
244 if (irq == 0)
245 deliver_bitmask = 1;
246#endif
247 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
248 if (!(deliver_bitmask & (1 << vcpu_id)))
249 continue;
250 deliver_bitmask &= ~(1 << vcpu_id);
251 vcpu = ioapic->kvm->vcpus[vcpu_id];
252 if (vcpu) {
253 if (r < 0)
254 r = 0;
255 r += ioapic_inj_irq(ioapic, vcpu, vector,
256 trig_mode, delivery_mode);
257 }
258 }
259 break;
260 case IOAPIC_NMI:
261 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
262 if (!(deliver_bitmask & (1 << vcpu_id)))
263 continue;
264 deliver_bitmask &= ~(1 << vcpu_id);
265 vcpu = ioapic->kvm->vcpus[vcpu_id];
266 if (vcpu) {
267 ioapic_inj_nmi(vcpu);
268 r = 1;
269 }
270 else
271 ioapic_debug("NMI to vcpu %d failed\n",
272 vcpu->vcpu_id);
273 }
274 break;
275 default:
276 printk(KERN_WARNING "Unsupported delivery mode %d\n",
277 delivery_mode);
278 break;
279 } 169 }
280 return r; 170#endif
171 return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
281} 172}
282 173
283int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) 174int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
284{ 175{
285 u32 old_irr = ioapic->irr; 176 u32 old_irr = ioapic->irr;
286 u32 mask = 1 << irq; 177 u32 mask = 1 << irq;
287 union ioapic_redir_entry entry; 178 union kvm_ioapic_redirect_entry entry;
288 int ret = 1; 179 int ret = 1;
289 180
290 if (irq >= 0 && irq < IOAPIC_NUM_PINS) { 181 if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
@@ -305,7 +196,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
305static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, 196static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
306 int trigger_mode) 197 int trigger_mode)
307{ 198{
308 union ioapic_redir_entry *ent; 199 union kvm_ioapic_redirect_entry *ent;
309 200
310 ent = &ioapic->redirtbl[pin]; 201 ent = &ioapic->redirtbl[pin];
311 202
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index a34bd5e6436b..7080b713c160 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -40,22 +40,7 @@ struct kvm_ioapic {
40 u32 id; 40 u32 id;
41 u32 irr; 41 u32 irr;
42 u32 pad; 42 u32 pad;
43 union ioapic_redir_entry { 43 union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
44 u64 bits;
45 struct {
46 u8 vector;
47 u8 delivery_mode:3;
48 u8 dest_mode:1;
49 u8 delivery_status:1;
50 u8 polarity:1;
51 u8 remote_irr:1;
52 u8 trig_mode:1;
53 u8 mask:1;
54 u8 reserve:7;
55 u8 reserved[4];
56 u8 dest_id;
57 } fields;
58 } redirtbl[IOAPIC_NUM_PINS];
59 struct kvm_io_device dev; 44 struct kvm_io_device dev;
60 struct kvm *kvm; 45 struct kvm *kvm;
61 void (*ack_notifier)(void *opaque, int irq); 46 void (*ack_notifier)(void *opaque, int irq);
@@ -79,13 +64,13 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
79 return kvm->arch.vioapic; 64 return kvm->arch.vioapic;
80} 65}
81 66
82struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, 67int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
83 unsigned long bitmap); 68 int short_hand, int dest, int dest_mode);
69int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
84void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); 70void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
85int kvm_ioapic_init(struct kvm *kvm); 71int kvm_ioapic_init(struct kvm *kvm);
86int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 72int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
87void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 73void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
88u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, 74int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
89 u8 dest_mode); 75 struct kvm_lapic_irq *irq);
90
91#endif 76#endif
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c4037503600..15147583abd1 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
39 pfn_t pfn; 39 pfn_t pfn;
40 int i, r = 0; 40 int i, r = 0;
41 struct iommu_domain *domain = kvm->arch.iommu_domain; 41 struct iommu_domain *domain = kvm->arch.iommu_domain;
42 int flags;
42 43
43 /* check if iommu exists and in use */ 44 /* check if iommu exists and in use */
44 if (!domain) 45 if (!domain)
45 return 0; 46 return 0;
46 47
48 flags = IOMMU_READ | IOMMU_WRITE;
49 if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
50 flags |= IOMMU_CACHE;
51
47 for (i = 0; i < npages; i++) { 52 for (i = 0; i < npages; i++) {
48 /* check if already mapped */ 53 /* check if already mapped */
49 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
53 r = iommu_map_range(domain, 58 r = iommu_map_range(domain,
54 gfn_to_gpa(gfn), 59 gfn_to_gpa(gfn),
55 pfn_to_hpa(pfn), 60 pfn_to_hpa(pfn),
56 PAGE_SIZE, 61 PAGE_SIZE, flags);
57 IOMMU_READ | IOMMU_WRITE);
58 if (r) { 62 if (r) {
59 printk(KERN_ERR "kvm_iommu_map_address:" 63 printk(KERN_ERR "kvm_iommu_map_address:"
60 "iommu failed to map pfn=%lx\n", pfn); 64 "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
88{ 92{
89 struct pci_dev *pdev = NULL; 93 struct pci_dev *pdev = NULL;
90 struct iommu_domain *domain = kvm->arch.iommu_domain; 94 struct iommu_domain *domain = kvm->arch.iommu_domain;
91 int r; 95 int r, last_flags;
92 96
93 /* check if iommu exists and in use */ 97 /* check if iommu exists and in use */
94 if (!domain) 98 if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
107 return r; 111 return r;
108 } 112 }
109 113
114 last_flags = kvm->arch.iommu_flags;
115 if (iommu_domain_has_cap(kvm->arch.iommu_domain,
116 IOMMU_CAP_CACHE_COHERENCY))
117 kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
118
119 /* Check if need to update IOMMU page table for guest memory */
120 if ((last_flags ^ kvm->arch.iommu_flags) ==
121 KVM_IOMMU_CACHE_COHERENCY) {
122 kvm_iommu_unmap_memslots(kvm);
123 r = kvm_iommu_map_memslots(kvm);
124 if (r)
125 goto out_unmap;
126 }
127
110 printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", 128 printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
111 assigned_dev->host_busnr, 129 assigned_dev->host_busnr,
112 PCI_SLOT(assigned_dev->host_devfn), 130 PCI_SLOT(assigned_dev->host_devfn),
113 PCI_FUNC(assigned_dev->host_devfn)); 131 PCI_FUNC(assigned_dev->host_devfn));
114 132
115 return 0; 133 return 0;
134out_unmap:
135 kvm_iommu_unmap_memslots(kvm);
136 return r;
116} 137}
117 138
118int kvm_deassign_device(struct kvm *kvm, 139int kvm_deassign_device(struct kvm *kvm,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 864ac5483baa..a8bd466d00cc 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -22,6 +22,9 @@
22#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
23 23
24#include <asm/msidef.h> 24#include <asm/msidef.h>
25#ifdef CONFIG_IA64
26#include <asm/iosapic.h>
27#endif
25 28
26#include "irq.h" 29#include "irq.h"
27 30
@@ -43,57 +46,73 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
43 return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); 46 return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
44} 47}
45 48
46static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, 49inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
47 struct kvm *kvm, int level)
48{ 50{
49 int vcpu_id, r = -1; 51#ifdef CONFIG_IA64
50 struct kvm_vcpu *vcpu; 52 return irq->delivery_mode ==
51 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); 53 (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
52 int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) 54#else
53 >> MSI_ADDR_DEST_ID_SHIFT; 55 return irq->delivery_mode == APIC_DM_LOWEST;
54 int vector = (e->msi.data & MSI_DATA_VECTOR_MASK) 56#endif
55 >> MSI_DATA_VECTOR_SHIFT; 57}
56 int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, 58
57 (unsigned long *)&e->msi.address_lo); 59int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
58 int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, 60 struct kvm_lapic_irq *irq)
59 (unsigned long *)&e->msi.data); 61{
60 int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, 62 int i, r = -1;
61 (unsigned long *)&e->msi.data); 63 struct kvm_vcpu *vcpu, *lowest = NULL;
62 u32 deliver_bitmask; 64
63 65 if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
64 BUG_ON(!ioapic); 66 kvm_is_dm_lowest_prio(irq))
65 67 printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
66 deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, 68
67 dest_id, dest_mode); 69 for (i = 0; i < KVM_MAX_VCPUS; i++) {
68 /* IOAPIC delivery mode value is the same as MSI here */ 70 vcpu = kvm->vcpus[i];
69 switch (delivery_mode) { 71
70 case IOAPIC_LOWEST_PRIORITY: 72 if (!vcpu || !kvm_apic_present(vcpu))
71 vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, 73 continue;
72 deliver_bitmask); 74
73 if (vcpu != NULL) 75 if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
74 r = kvm_apic_set_irq(vcpu, vector, trig_mode); 76 irq->dest_id, irq->dest_mode))
75 else 77 continue;
76 printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); 78
77 break; 79 if (!kvm_is_dm_lowest_prio(irq)) {
78 case IOAPIC_FIXED: 80 if (r < 0)
79 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { 81 r = 0;
80 if (!(deliver_bitmask & (1 << vcpu_id))) 82 r += kvm_apic_set_irq(vcpu, irq);
81 continue; 83 } else {
82 deliver_bitmask &= ~(1 << vcpu_id); 84 if (!lowest)
83 vcpu = ioapic->kvm->vcpus[vcpu_id]; 85 lowest = vcpu;
84 if (vcpu) { 86 else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
85 if (r < 0) 87 lowest = vcpu;
86 r = 0;
87 r += kvm_apic_set_irq(vcpu, vector, trig_mode);
88 }
89 } 88 }
90 break;
91 default:
92 break;
93 } 89 }
90
91 if (lowest)
92 r = kvm_apic_set_irq(lowest, irq);
93
94 return r; 94 return r;
95} 95}
96 96
97static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
98 struct kvm *kvm, int level)
99{
100 struct kvm_lapic_irq irq;
101
102 irq.dest_id = (e->msi.address_lo &
103 MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
104 irq.vector = (e->msi.data &
105 MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
106 irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
107 irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
108 irq.delivery_mode = e->msi.data & 0x700;
109 irq.level = 1;
110 irq.shorthand = 0;
111
112 /* TODO Deal with RH bit of MSI message address */
113 return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
114}
115
97/* This should be called with the kvm->lock mutex held 116/* This should be called with the kvm->lock mutex held
98 * Return value: 117 * Return value:
99 * < 0 Interrupt was ignored (masked or not delivered for other reasons) 118 * < 0 Interrupt was ignored (masked or not delivered for other reasons)
@@ -252,7 +271,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
252 delta = 8; 271 delta = 8;
253 break; 272 break;
254 case KVM_IRQCHIP_IOAPIC: 273 case KVM_IRQCHIP_IOAPIC:
255 e->set = kvm_set_ioapic_irq; 274 e->set = kvm_set_ioapic_irq;
256 break; 275 break;
257 default: 276 default:
258 goto out; 277 goto out;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d0dd390aa50..e21194566b71 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,8 @@
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/mman.h> 42#include <linux/mman.h>
43#include <linux/swap.h> 43#include <linux/swap.h>
44#include <linux/bitops.h>
45#include <linux/spinlock.h>
44 46
45#include <asm/processor.h> 47#include <asm/processor.h>
46#include <asm/io.h> 48#include <asm/io.h>
@@ -60,9 +62,6 @@
60MODULE_AUTHOR("Qumranet"); 62MODULE_AUTHOR("Qumranet");
61MODULE_LICENSE("GPL"); 63MODULE_LICENSE("GPL");
62 64
63static int msi2intx = 1;
64module_param(msi2intx, bool, 0);
65
66DEFINE_SPINLOCK(kvm_lock); 65DEFINE_SPINLOCK(kvm_lock);
67LIST_HEAD(vm_list); 66LIST_HEAD(vm_list);
68 67
@@ -95,38 +94,96 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
95 return NULL; 94 return NULL;
96} 95}
97 96
97static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
98 *assigned_dev, int irq)
99{
100 int i, index;
101 struct msix_entry *host_msix_entries;
102
103 host_msix_entries = assigned_dev->host_msix_entries;
104
105 index = -1;
106 for (i = 0; i < assigned_dev->entries_nr; i++)
107 if (irq == host_msix_entries[i].vector) {
108 index = i;
109 break;
110 }
111 if (index < 0) {
112 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
113 return 0;
114 }
115
116 return index;
117}
118
98static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) 119static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
99{ 120{
100 struct kvm_assigned_dev_kernel *assigned_dev; 121 struct kvm_assigned_dev_kernel *assigned_dev;
122 struct kvm *kvm;
123 int irq, i;
101 124
102 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, 125 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
103 interrupt_work); 126 interrupt_work);
127 kvm = assigned_dev->kvm;
104 128
105 /* This is taken to safely inject irq inside the guest. When 129 /* This is taken to safely inject irq inside the guest. When
106 * the interrupt injection (or the ioapic code) uses a 130 * the interrupt injection (or the ioapic code) uses a
107 * finer-grained lock, update this 131 * finer-grained lock, update this
108 */ 132 */
109 mutex_lock(&assigned_dev->kvm->lock); 133 mutex_lock(&kvm->lock);
110 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 134 spin_lock_irq(&assigned_dev->assigned_dev_lock);
111 assigned_dev->guest_irq, 1); 135 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
112 136 struct kvm_guest_msix_entry *guest_entries =
113 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { 137 assigned_dev->guest_msix_entries;
114 enable_irq(assigned_dev->host_irq); 138 for (i = 0; i < assigned_dev->entries_nr; i++) {
115 assigned_dev->host_irq_disabled = false; 139 if (!(guest_entries[i].flags &
140 KVM_ASSIGNED_MSIX_PENDING))
141 continue;
142 guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
143 kvm_set_irq(assigned_dev->kvm,
144 assigned_dev->irq_source_id,
145 guest_entries[i].vector, 1);
146 irq = assigned_dev->host_msix_entries[i].vector;
147 if (irq != 0)
148 enable_irq(irq);
149 assigned_dev->host_irq_disabled = false;
150 }
151 } else {
152 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
153 assigned_dev->guest_irq, 1);
154 if (assigned_dev->irq_requested_type &
155 KVM_DEV_IRQ_GUEST_MSI) {
156 enable_irq(assigned_dev->host_irq);
157 assigned_dev->host_irq_disabled = false;
158 }
116 } 159 }
160
161 spin_unlock_irq(&assigned_dev->assigned_dev_lock);
117 mutex_unlock(&assigned_dev->kvm->lock); 162 mutex_unlock(&assigned_dev->kvm->lock);
118} 163}
119 164
120static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) 165static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
121{ 166{
167 unsigned long flags;
122 struct kvm_assigned_dev_kernel *assigned_dev = 168 struct kvm_assigned_dev_kernel *assigned_dev =
123 (struct kvm_assigned_dev_kernel *) dev_id; 169 (struct kvm_assigned_dev_kernel *) dev_id;
124 170
171 spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
172 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
173 int index = find_index_from_host_irq(assigned_dev, irq);
174 if (index < 0)
175 goto out;
176 assigned_dev->guest_msix_entries[index].flags |=
177 KVM_ASSIGNED_MSIX_PENDING;
178 }
179
125 schedule_work(&assigned_dev->interrupt_work); 180 schedule_work(&assigned_dev->interrupt_work);
126 181
127 disable_irq_nosync(irq); 182 disable_irq_nosync(irq);
128 assigned_dev->host_irq_disabled = true; 183 assigned_dev->host_irq_disabled = true;
129 184
185out:
186 spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
130 return IRQ_HANDLED; 187 return IRQ_HANDLED;
131} 188}
132 189
@@ -134,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
134static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 191static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
135{ 192{
136 struct kvm_assigned_dev_kernel *dev; 193 struct kvm_assigned_dev_kernel *dev;
194 unsigned long flags;
137 195
138 if (kian->gsi == -1) 196 if (kian->gsi == -1)
139 return; 197 return;
@@ -146,28 +204,30 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
146 /* The guest irq may be shared so this ack may be 204 /* The guest irq may be shared so this ack may be
147 * from another device. 205 * from another device.
148 */ 206 */
207 spin_lock_irqsave(&dev->assigned_dev_lock, flags);
149 if (dev->host_irq_disabled) { 208 if (dev->host_irq_disabled) {
150 enable_irq(dev->host_irq); 209 enable_irq(dev->host_irq);
151 dev->host_irq_disabled = false; 210 dev->host_irq_disabled = false;
152 } 211 }
212 spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
153} 213}
154 214
155/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ 215static void deassign_guest_irq(struct kvm *kvm,
156static void kvm_free_assigned_irq(struct kvm *kvm, 216 struct kvm_assigned_dev_kernel *assigned_dev)
157 struct kvm_assigned_dev_kernel *assigned_dev)
158{ 217{
159 if (!irqchip_in_kernel(kvm))
160 return;
161
162 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); 218 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
219 assigned_dev->ack_notifier.gsi = -1;
163 220
164 if (assigned_dev->irq_source_id != -1) 221 if (assigned_dev->irq_source_id != -1)
165 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 222 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
166 assigned_dev->irq_source_id = -1; 223 assigned_dev->irq_source_id = -1;
224 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
225}
167 226
168 if (!assigned_dev->irq_requested_type) 227/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
169 return; 228static void deassign_host_irq(struct kvm *kvm,
170 229 struct kvm_assigned_dev_kernel *assigned_dev)
230{
171 /* 231 /*
172 * In kvm_free_device_irq, cancel_work_sync return true if: 232 * In kvm_free_device_irq, cancel_work_sync return true if:
173 * 1. work is scheduled, and then cancelled. 233 * 1. work is scheduled, and then cancelled.
@@ -184,17 +244,64 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
184 * now, the kvm state is still legal for probably we also have to wait 244 * now, the kvm state is still legal for probably we also have to wait
185 * interrupt_work done. 245 * interrupt_work done.
186 */ 246 */
187 disable_irq_nosync(assigned_dev->host_irq); 247 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
188 cancel_work_sync(&assigned_dev->interrupt_work); 248 int i;
249 for (i = 0; i < assigned_dev->entries_nr; i++)
250 disable_irq_nosync(assigned_dev->
251 host_msix_entries[i].vector);
252
253 cancel_work_sync(&assigned_dev->interrupt_work);
254
255 for (i = 0; i < assigned_dev->entries_nr; i++)
256 free_irq(assigned_dev->host_msix_entries[i].vector,
257 (void *)assigned_dev);
258
259 assigned_dev->entries_nr = 0;
260 kfree(assigned_dev->host_msix_entries);
261 kfree(assigned_dev->guest_msix_entries);
262 pci_disable_msix(assigned_dev->dev);
263 } else {
264 /* Deal with MSI and INTx */
265 disable_irq_nosync(assigned_dev->host_irq);
266 cancel_work_sync(&assigned_dev->interrupt_work);
189 267
190 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 268 free_irq(assigned_dev->host_irq, (void *)assigned_dev);
191 269
192 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 270 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
193 pci_disable_msi(assigned_dev->dev); 271 pci_disable_msi(assigned_dev->dev);
272 }
194 273
195 assigned_dev->irq_requested_type = 0; 274 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
196} 275}
197 276
277static int kvm_deassign_irq(struct kvm *kvm,
278 struct kvm_assigned_dev_kernel *assigned_dev,
279 unsigned long irq_requested_type)
280{
281 unsigned long guest_irq_type, host_irq_type;
282
283 if (!irqchip_in_kernel(kvm))
284 return -EINVAL;
285 /* no irq assignment to deassign */
286 if (!assigned_dev->irq_requested_type)
287 return -ENXIO;
288
289 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
290 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
291
292 if (host_irq_type)
293 deassign_host_irq(kvm, assigned_dev);
294 if (guest_irq_type)
295 deassign_guest_irq(kvm, assigned_dev);
296
297 return 0;
298}
299
300static void kvm_free_assigned_irq(struct kvm *kvm,
301 struct kvm_assigned_dev_kernel *assigned_dev)
302{
303 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
304}
198 305
199static void kvm_free_assigned_device(struct kvm *kvm, 306static void kvm_free_assigned_device(struct kvm *kvm,
200 struct kvm_assigned_dev_kernel 307 struct kvm_assigned_dev_kernel
@@ -226,190 +333,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
226 } 333 }
227} 334}
228 335
229static int assigned_device_update_intx(struct kvm *kvm, 336static int assigned_device_enable_host_intx(struct kvm *kvm,
230 struct kvm_assigned_dev_kernel *adev, 337 struct kvm_assigned_dev_kernel *dev)
231 struct kvm_assigned_irq *airq)
232{ 338{
233 adev->guest_irq = airq->guest_irq; 339 dev->host_irq = dev->dev->irq;
234 adev->ack_notifier.gsi = airq->guest_irq; 340 /* Even though this is PCI, we don't want to use shared
341 * interrupts. Sharing host devices with guest-assigned devices
342 * on the same interrupt line is not a happy situation: there
343 * are going to be long delays in accepting, acking, etc.
344 */
345 if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
346 0, "kvm_assigned_intx_device", (void *)dev))
347 return -EIO;
348 return 0;
349}
235 350
236 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) 351#ifdef __KVM_HAVE_MSI
237 return 0; 352static int assigned_device_enable_host_msi(struct kvm *kvm,
353 struct kvm_assigned_dev_kernel *dev)
354{
355 int r;
238 356
239 if (irqchip_in_kernel(kvm)) { 357 if (!dev->dev->msi_enabled) {
240 if (!msi2intx && 358 r = pci_enable_msi(dev->dev);
241 (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { 359 if (r)
242 free_irq(adev->host_irq, (void *)adev); 360 return r;
243 pci_disable_msi(adev->dev); 361 }
244 }
245 362
246 if (!capable(CAP_SYS_RAWIO)) 363 dev->host_irq = dev->dev->irq;
247 return -EPERM; 364 if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
365 "kvm_assigned_msi_device", (void *)dev)) {
366 pci_disable_msi(dev->dev);
367 return -EIO;
368 }
248 369
249 if (airq->host_irq) 370 return 0;
250 adev->host_irq = airq->host_irq; 371}
251 else 372#endif
252 adev->host_irq = adev->dev->irq;
253 373
254 /* Even though this is PCI, we don't want to use shared 374#ifdef __KVM_HAVE_MSIX
255 * interrupts. Sharing host devices with guest-assigned devices 375static int assigned_device_enable_host_msix(struct kvm *kvm,
256 * on the same interrupt line is not a happy situation: there 376 struct kvm_assigned_dev_kernel *dev)
257 * are going to be long delays in accepting, acking, etc. 377{
258 */ 378 int i, r = -EINVAL;
259 if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 379
260 0, "kvm_assigned_intx_device", (void *)adev)) 380 /* host_msix_entries and guest_msix_entries should have been
261 return -EIO; 381 * initialized */
382 if (dev->entries_nr == 0)
383 return r;
384
385 r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
386 if (r)
387 return r;
388
389 for (i = 0; i < dev->entries_nr; i++) {
390 r = request_irq(dev->host_msix_entries[i].vector,
391 kvm_assigned_dev_intr, 0,
392 "kvm_assigned_msix_device",
393 (void *)dev);
394 /* FIXME: free requested_irq's on failure */
395 if (r)
396 return r;
262 } 397 }
263 398
264 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
265 KVM_ASSIGNED_DEV_HOST_INTX;
266 return 0; 399 return 0;
267} 400}
268 401
269#ifdef CONFIG_X86 402#endif
270static int assigned_device_update_msi(struct kvm *kvm, 403
271 struct kvm_assigned_dev_kernel *adev, 404static int assigned_device_enable_guest_intx(struct kvm *kvm,
272 struct kvm_assigned_irq *airq) 405 struct kvm_assigned_dev_kernel *dev,
406 struct kvm_assigned_irq *irq)
273{ 407{
274 int r; 408 dev->guest_irq = irq->guest_irq;
409 dev->ack_notifier.gsi = irq->guest_irq;
410 return 0;
411}
275 412
276 adev->guest_irq = airq->guest_irq; 413#ifdef __KVM_HAVE_MSI
277 if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { 414static int assigned_device_enable_guest_msi(struct kvm *kvm,
278 /* x86 don't care upper address of guest msi message addr */ 415 struct kvm_assigned_dev_kernel *dev,
279 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; 416 struct kvm_assigned_irq *irq)
280 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; 417{
281 adev->ack_notifier.gsi = -1; 418 dev->guest_irq = irq->guest_irq;
282 } else if (msi2intx) { 419 dev->ack_notifier.gsi = -1;
283 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; 420 return 0;
284 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; 421}
285 adev->ack_notifier.gsi = airq->guest_irq; 422#endif
286 } else { 423#ifdef __KVM_HAVE_MSIX
287 /* 424static int assigned_device_enable_guest_msix(struct kvm *kvm,
288 * Guest require to disable device MSI, we disable MSI and 425 struct kvm_assigned_dev_kernel *dev,
289 * re-enable INTx by default again. Notice it's only for 426 struct kvm_assigned_irq *irq)
290 * non-msi2intx. 427{
291 */ 428 dev->guest_irq = irq->guest_irq;
292 assigned_device_update_intx(kvm, adev, airq); 429 dev->ack_notifier.gsi = -1;
293 return 0; 430 return 0;
431}
432#endif
433
434static int assign_host_irq(struct kvm *kvm,
435 struct kvm_assigned_dev_kernel *dev,
436 __u32 host_irq_type)
437{
438 int r = -EEXIST;
439
440 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
441 return r;
442
443 switch (host_irq_type) {
444 case KVM_DEV_IRQ_HOST_INTX:
445 r = assigned_device_enable_host_intx(kvm, dev);
446 break;
447#ifdef __KVM_HAVE_MSI
448 case KVM_DEV_IRQ_HOST_MSI:
449 r = assigned_device_enable_host_msi(kvm, dev);
450 break;
451#endif
452#ifdef __KVM_HAVE_MSIX
453 case KVM_DEV_IRQ_HOST_MSIX:
454 r = assigned_device_enable_host_msix(kvm, dev);
455 break;
456#endif
457 default:
458 r = -EINVAL;
294 } 459 }
295 460
296 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 461 if (!r)
297 return 0; 462 dev->irq_requested_type |= host_irq_type;
298 463
299 if (irqchip_in_kernel(kvm)) { 464 return r;
300 if (!msi2intx) { 465}
301 if (adev->irq_requested_type &
302 KVM_ASSIGNED_DEV_HOST_INTX)
303 free_irq(adev->host_irq, (void *)adev);
304 466
305 r = pci_enable_msi(adev->dev); 467static int assign_guest_irq(struct kvm *kvm,
306 if (r) 468 struct kvm_assigned_dev_kernel *dev,
307 return r; 469 struct kvm_assigned_irq *irq,
308 } 470 unsigned long guest_irq_type)
471{
472 int id;
473 int r = -EEXIST;
474
475 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
476 return r;
309 477
310 adev->host_irq = adev->dev->irq; 478 id = kvm_request_irq_source_id(kvm);
311 if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, 479 if (id < 0)
312 "kvm_assigned_msi_device", (void *)adev)) 480 return id;
313 return -EIO; 481
482 dev->irq_source_id = id;
483
484 switch (guest_irq_type) {
485 case KVM_DEV_IRQ_GUEST_INTX:
486 r = assigned_device_enable_guest_intx(kvm, dev, irq);
487 break;
488#ifdef __KVM_HAVE_MSI
489 case KVM_DEV_IRQ_GUEST_MSI:
490 r = assigned_device_enable_guest_msi(kvm, dev, irq);
491 break;
492#endif
493#ifdef __KVM_HAVE_MSIX
494 case KVM_DEV_IRQ_GUEST_MSIX:
495 r = assigned_device_enable_guest_msix(kvm, dev, irq);
496 break;
497#endif
498 default:
499 r = -EINVAL;
314 } 500 }
315 501
316 if (!msi2intx) 502 if (!r) {
317 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; 503 dev->irq_requested_type |= guest_irq_type;
504 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
505 } else
506 kvm_free_irq_source_id(kvm, dev->irq_source_id);
318 507
319 adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; 508 return r;
320 return 0;
321} 509}
322#endif
323 510
511/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
324static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 512static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
325 struct kvm_assigned_irq 513 struct kvm_assigned_irq *assigned_irq)
326 *assigned_irq)
327{ 514{
328 int r = 0; 515 int r = -EINVAL;
329 struct kvm_assigned_dev_kernel *match; 516 struct kvm_assigned_dev_kernel *match;
330 u32 current_flags = 0, changed_flags; 517 unsigned long host_irq_type, guest_irq_type;
331 518
332 mutex_lock(&kvm->lock); 519 if (!capable(CAP_SYS_RAWIO))
520 return -EPERM;
333 521
522 if (!irqchip_in_kernel(kvm))
523 return r;
524
525 mutex_lock(&kvm->lock);
526 r = -ENODEV;
334 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 527 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
335 assigned_irq->assigned_dev_id); 528 assigned_irq->assigned_dev_id);
336 if (!match) { 529 if (!match)
337 mutex_unlock(&kvm->lock); 530 goto out;
338 return -EINVAL;
339 }
340
341 if (!match->irq_requested_type) {
342 INIT_WORK(&match->interrupt_work,
343 kvm_assigned_dev_interrupt_work_handler);
344 if (irqchip_in_kernel(kvm)) {
345 /* Register ack nofitier */
346 match->ack_notifier.gsi = -1;
347 match->ack_notifier.irq_acked =
348 kvm_assigned_dev_ack_irq;
349 kvm_register_irq_ack_notifier(kvm,
350 &match->ack_notifier);
351
352 /* Request IRQ source ID */
353 r = kvm_request_irq_source_id(kvm);
354 if (r < 0)
355 goto out_release;
356 else
357 match->irq_source_id = r;
358
359#ifdef CONFIG_X86
360 /* Determine host device irq type, we can know the
361 * result from dev->msi_enabled */
362 if (msi2intx)
363 pci_enable_msi(match->dev);
364#endif
365 }
366 }
367 531
368 if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && 532 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
369 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) 533 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
370 current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
371 534
372 changed_flags = assigned_irq->flags ^ current_flags; 535 r = -EINVAL;
536 /* can only assign one type at a time */
537 if (hweight_long(host_irq_type) > 1)
538 goto out;
539 if (hweight_long(guest_irq_type) > 1)
540 goto out;
541 if (host_irq_type == 0 && guest_irq_type == 0)
542 goto out;
373 543
374 if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || 544 r = 0;
375 (msi2intx && match->dev->msi_enabled)) { 545 if (host_irq_type)
376#ifdef CONFIG_X86 546 r = assign_host_irq(kvm, match, host_irq_type);
377 r = assigned_device_update_msi(kvm, match, assigned_irq); 547 if (r)
378 if (r) { 548 goto out;
379 printk(KERN_WARNING "kvm: failed to enable "
380 "MSI device!\n");
381 goto out_release;
382 }
383#else
384 r = -ENOTTY;
385#endif
386 } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
387 /* Host device IRQ 0 means don't support INTx */
388 if (!msi2intx) {
389 printk(KERN_WARNING
390 "kvm: wait device to enable MSI!\n");
391 r = 0;
392 } else {
393 printk(KERN_WARNING
394 "kvm: failed to enable MSI device!\n");
395 r = -ENOTTY;
396 goto out_release;
397 }
398 } else {
399 /* Non-sharing INTx mode */
400 r = assigned_device_update_intx(kvm, match, assigned_irq);
401 if (r) {
402 printk(KERN_WARNING "kvm: failed to enable "
403 "INTx device!\n");
404 goto out_release;
405 }
406 }
407 549
550 if (guest_irq_type)
551 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
552out:
408 mutex_unlock(&kvm->lock); 553 mutex_unlock(&kvm->lock);
409 return r; 554 return r;
410out_release: 555}
556
557static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
558 struct kvm_assigned_irq
559 *assigned_irq)
560{
561 int r = -ENODEV;
562 struct kvm_assigned_dev_kernel *match;
563
564 mutex_lock(&kvm->lock);
565
566 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
567 assigned_irq->assigned_dev_id);
568 if (!match)
569 goto out;
570
571 r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
572out:
411 mutex_unlock(&kvm->lock); 573 mutex_unlock(&kvm->lock);
412 kvm_free_assigned_device(kvm, match);
413 return r; 574 return r;
414} 575}
415 576
@@ -427,7 +588,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
427 assigned_dev->assigned_dev_id); 588 assigned_dev->assigned_dev_id);
428 if (match) { 589 if (match) {
429 /* device already assigned */ 590 /* device already assigned */
430 r = -EINVAL; 591 r = -EEXIST;
431 goto out; 592 goto out;
432 } 593 }
433 594
@@ -464,8 +625,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
464 match->host_devfn = assigned_dev->devfn; 625 match->host_devfn = assigned_dev->devfn;
465 match->flags = assigned_dev->flags; 626 match->flags = assigned_dev->flags;
466 match->dev = dev; 627 match->dev = dev;
628 spin_lock_init(&match->assigned_dev_lock);
467 match->irq_source_id = -1; 629 match->irq_source_id = -1;
468 match->kvm = kvm; 630 match->kvm = kvm;
631 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
632 INIT_WORK(&match->interrupt_work,
633 kvm_assigned_dev_interrupt_work_handler);
469 634
470 list_add(&match->list, &kvm->arch.assigned_dev_head); 635 list_add(&match->list, &kvm->arch.assigned_dev_head);
471 636
@@ -878,6 +1043,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
878#endif 1043#endif
879#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 1044#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
880 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 1045 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
1046#else
1047 kvm_arch_flush_shadow(kvm);
881#endif 1048#endif
882 kvm_arch_destroy_vm(kvm); 1049 kvm_arch_destroy_vm(kvm);
883 mmdrop(mm); 1050 mmdrop(mm);
@@ -919,9 +1086,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
919{ 1086{
920 int r; 1087 int r;
921 gfn_t base_gfn; 1088 gfn_t base_gfn;
922 unsigned long npages; 1089 unsigned long npages, ugfn;
923 int largepages; 1090 unsigned long largepages, i;
924 unsigned long i;
925 struct kvm_memory_slot *memslot; 1091 struct kvm_memory_slot *memslot;
926 struct kvm_memory_slot old, new; 1092 struct kvm_memory_slot old, new;
927 1093
@@ -1010,6 +1176,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
1010 new.lpage_info[0].write_count = 1; 1176 new.lpage_info[0].write_count = 1;
1011 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) 1177 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
1012 new.lpage_info[largepages-1].write_count = 1; 1178 new.lpage_info[largepages-1].write_count = 1;
1179 ugfn = new.userspace_addr >> PAGE_SHIFT;
1180 /*
1181 * If the gfn and userspace address are not aligned wrt each
1182 * other, disable large page support for this slot
1183 */
1184 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
1185 for (i = 0; i < largepages; ++i)
1186 new.lpage_info[i].write_count = 1;
1013 } 1187 }
1014 1188
1015 /* Allocate page dirty bitmap if needed */ 1189 /* Allocate page dirty bitmap if needed */
@@ -1043,8 +1217,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
1043 1217
1044 kvm_free_physmem_slot(&old, npages ? &new : NULL); 1218 kvm_free_physmem_slot(&old, npages ? &new : NULL);
1045 /* Slot deletion case: we have to update the current slot */ 1219 /* Slot deletion case: we have to update the current slot */
1220 spin_lock(&kvm->mmu_lock);
1046 if (!npages) 1221 if (!npages)
1047 *memslot = old; 1222 *memslot = old;
1223 spin_unlock(&kvm->mmu_lock);
1048#ifdef CONFIG_DMAR 1224#ifdef CONFIG_DMAR
1049 /* map the pages in iommu page table */ 1225 /* map the pages in iommu page table */
1050 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 1226 r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@@ -1454,12 +1630,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1454 for (;;) { 1630 for (;;) {
1455 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1631 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1456 1632
1457 if (kvm_cpu_has_interrupt(vcpu) || 1633 if ((kvm_arch_interrupt_allowed(vcpu) &&
1458 kvm_cpu_has_pending_timer(vcpu) || 1634 kvm_cpu_has_interrupt(vcpu)) ||
1459 kvm_arch_vcpu_runnable(vcpu)) { 1635 kvm_arch_vcpu_runnable(vcpu)) {
1460 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1636 set_bit(KVM_REQ_UNHALT, &vcpu->requests);
1461 break; 1637 break;
1462 } 1638 }
1639 if (kvm_cpu_has_pending_timer(vcpu))
1640 break;
1463 if (signal_pending(current)) 1641 if (signal_pending(current))
1464 break; 1642 break;
1465 1643
@@ -1593,6 +1771,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
1593 return 0; 1771 return 0;
1594} 1772}
1595 1773
1774#ifdef __KVM_HAVE_MSIX
1775static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
1776 struct kvm_assigned_msix_nr *entry_nr)
1777{
1778 int r = 0;
1779 struct kvm_assigned_dev_kernel *adev;
1780
1781 mutex_lock(&kvm->lock);
1782
1783 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
1784 entry_nr->assigned_dev_id);
1785 if (!adev) {
1786 r = -EINVAL;
1787 goto msix_nr_out;
1788 }
1789
1790 if (adev->entries_nr == 0) {
1791 adev->entries_nr = entry_nr->entry_nr;
1792 if (adev->entries_nr == 0 ||
1793 adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
1794 r = -EINVAL;
1795 goto msix_nr_out;
1796 }
1797
1798 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
1799 entry_nr->entry_nr,
1800 GFP_KERNEL);
1801 if (!adev->host_msix_entries) {
1802 r = -ENOMEM;
1803 goto msix_nr_out;
1804 }
1805 adev->guest_msix_entries = kzalloc(
1806 sizeof(struct kvm_guest_msix_entry) *
1807 entry_nr->entry_nr, GFP_KERNEL);
1808 if (!adev->guest_msix_entries) {
1809 kfree(adev->host_msix_entries);
1810 r = -ENOMEM;
1811 goto msix_nr_out;
1812 }
1813 } else /* Not allowed set MSI-X number twice */
1814 r = -EINVAL;
1815msix_nr_out:
1816 mutex_unlock(&kvm->lock);
1817 return r;
1818}
1819
1820static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
1821 struct kvm_assigned_msix_entry *entry)
1822{
1823 int r = 0, i;
1824 struct kvm_assigned_dev_kernel *adev;
1825
1826 mutex_lock(&kvm->lock);
1827
1828 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
1829 entry->assigned_dev_id);
1830
1831 if (!adev) {
1832 r = -EINVAL;
1833 goto msix_entry_out;
1834 }
1835
1836 for (i = 0; i < adev->entries_nr; i++)
1837 if (adev->guest_msix_entries[i].vector == 0 ||
1838 adev->guest_msix_entries[i].entry == entry->entry) {
1839 adev->guest_msix_entries[i].entry = entry->entry;
1840 adev->guest_msix_entries[i].vector = entry->gsi;
1841 adev->host_msix_entries[i].entry = entry->entry;
1842 break;
1843 }
1844 if (i == adev->entries_nr) {
1845 r = -ENOSPC;
1846 goto msix_entry_out;
1847 }
1848
1849msix_entry_out:
1850 mutex_unlock(&kvm->lock);
1851
1852 return r;
1853}
1854#endif
1855
1596static long kvm_vcpu_ioctl(struct file *filp, 1856static long kvm_vcpu_ioctl(struct file *filp,
1597 unsigned int ioctl, unsigned long arg) 1857 unsigned int ioctl, unsigned long arg)
1598{ 1858{
@@ -1864,6 +2124,11 @@ static long kvm_vm_ioctl(struct file *filp,
1864 break; 2124 break;
1865 } 2125 }
1866 case KVM_ASSIGN_IRQ: { 2126 case KVM_ASSIGN_IRQ: {
2127 r = -EOPNOTSUPP;
2128 break;
2129 }
2130#ifdef KVM_CAP_ASSIGN_DEV_IRQ
2131 case KVM_ASSIGN_DEV_IRQ: {
1867 struct kvm_assigned_irq assigned_irq; 2132 struct kvm_assigned_irq assigned_irq;
1868 2133
1869 r = -EFAULT; 2134 r = -EFAULT;
@@ -1874,6 +2139,18 @@ static long kvm_vm_ioctl(struct file *filp,
1874 goto out; 2139 goto out;
1875 break; 2140 break;
1876 } 2141 }
2142 case KVM_DEASSIGN_DEV_IRQ: {
2143 struct kvm_assigned_irq assigned_irq;
2144
2145 r = -EFAULT;
2146 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
2147 goto out;
2148 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
2149 if (r)
2150 goto out;
2151 break;
2152 }
2153#endif
1877#endif 2154#endif
1878#ifdef KVM_CAP_DEVICE_DEASSIGNMENT 2155#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1879 case KVM_DEASSIGN_PCI_DEVICE: { 2156 case KVM_DEASSIGN_PCI_DEVICE: {
@@ -1917,7 +2194,29 @@ static long kvm_vm_ioctl(struct file *filp,
1917 vfree(entries); 2194 vfree(entries);
1918 break; 2195 break;
1919 } 2196 }
2197#ifdef __KVM_HAVE_MSIX
2198 case KVM_ASSIGN_SET_MSIX_NR: {
2199 struct kvm_assigned_msix_nr entry_nr;
2200 r = -EFAULT;
2201 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
2202 goto out;
2203 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
2204 if (r)
2205 goto out;
2206 break;
2207 }
2208 case KVM_ASSIGN_SET_MSIX_ENTRY: {
2209 struct kvm_assigned_msix_entry entry;
2210 r = -EFAULT;
2211 if (copy_from_user(&entry, argp, sizeof entry))
2212 goto out;
2213 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
2214 if (r)
2215 goto out;
2216 break;
2217 }
1920#endif 2218#endif
2219#endif /* KVM_CAP_IRQ_ROUTING */
1921 default: 2220 default:
1922 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 2221 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1923 } 2222 }
@@ -2112,15 +2411,15 @@ EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
2112static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 2411static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2113 void *v) 2412 void *v)
2114{ 2413{
2115 if (val == SYS_RESTART) { 2414 /*
2116 /* 2415 * Some (well, at least mine) BIOSes hang on reboot if
2117 * Some (well, at least mine) BIOSes hang on reboot if 2416 * in vmx root mode.
2118 * in vmx root mode. 2417 *
2119 */ 2418 * And Intel TXT required VMX off for all cpu when system shutdown.
2120 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 2419 */
2121 kvm_rebooting = true; 2420 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2122 on_each_cpu(hardware_disable, NULL, 1); 2421 kvm_rebooting = true;
2123 } 2422 on_each_cpu(hardware_disable, NULL, 1);
2124 return NOTIFY_OK; 2423 return NOTIFY_OK;
2125} 2424}
2126 2425
@@ -2354,9 +2653,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2354 2653
2355 kvm_preempt_ops.sched_in = kvm_sched_in; 2654 kvm_preempt_ops.sched_in = kvm_sched_in;
2356 kvm_preempt_ops.sched_out = kvm_sched_out; 2655 kvm_preempt_ops.sched_out = kvm_sched_out;
2357#ifndef CONFIG_X86
2358 msi2intx = 0;
2359#endif
2360 2656
2361 return 0; 2657 return 0;
2362 2658