aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/virtio/mmio.txt17
-rw-r--r--Documentation/filesystems/Locking1
-rw-r--r--Documentation/filesystems/ext3.txt8
-rw-r--r--Documentation/filesystems/ext4.txt41
-rw-r--r--Documentation/virtual/uml/UserModeLinux-HOWTO.txt532
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/s390/hypfs/inode.c6
-rw-r--r--arch/um/Kconfig.char114
-rw-r--r--arch/um/Kconfig.rest23
-rw-r--r--arch/um/Kconfig.um6
-rw-r--r--arch/um/Makefile49
-rw-r--r--arch/um/Makefile-x86_6426
-rw-r--r--arch/um/drivers/chan.h (renamed from arch/um/include/shared/chan_kern.h)6
-rw-r--r--arch/um/drivers/chan_kern.c12
-rw-r--r--arch/um/drivers/chan_user.c9
-rw-r--r--arch/um/drivers/chan_user.h (renamed from arch/um/include/shared/chan_user.h)0
-rw-r--r--arch/um/drivers/cow_sys.h1
-rw-r--r--arch/um/drivers/daemon_user.c1
-rw-r--r--arch/um/drivers/fd.c2
-rw-r--r--arch/um/drivers/harddog_user.c1
-rw-r--r--arch/um/drivers/line.c2
-rw-r--r--arch/um/drivers/line.h (renamed from arch/um/include/shared/line.h)0
-rw-r--r--arch/um/drivers/mconsole.h (renamed from arch/um/include/shared/mconsole.h)0
-rw-r--r--arch/um/drivers/mconsole_kern.h (renamed from arch/um/include/shared/mconsole_kern.h)0
-rw-r--r--arch/um/drivers/mconsole_user.c2
-rw-r--r--arch/um/drivers/net_user.c2
-rw-r--r--arch/um/drivers/pcap_user.c2
-rw-r--r--arch/um/drivers/port_user.c2
-rw-r--r--arch/um/drivers/pty.c2
-rw-r--r--arch/um/drivers/slip_user.c2
-rw-r--r--arch/um/drivers/slirp_user.c2
-rw-r--r--arch/um/drivers/ssl.c4
-rw-r--r--arch/um/drivers/stdio_console.c3
-rw-r--r--arch/um/drivers/tty.c2
-rw-r--r--arch/um/drivers/ubd_kern.c33
-rw-r--r--arch/um/drivers/ubd_user.c1
-rw-r--r--arch/um/drivers/ubd_user.h (renamed from arch/um/include/shared/ubd_user.h)0
-rw-r--r--arch/um/drivers/umcast_user.c2
-rw-r--r--arch/um/drivers/vde_user.c2
-rw-r--r--arch/um/drivers/xterm.c2
-rw-r--r--arch/um/include/asm/Kbuild3
-rw-r--r--arch/um/include/asm/bug.h6
-rw-r--r--arch/um/include/asm/checksum.h6
-rw-r--r--arch/um/include/asm/cputime.h6
-rw-r--r--arch/um/include/asm/device.h7
-rw-r--r--arch/um/include/asm/emergency-restart.h6
-rw-r--r--arch/um/include/asm/ftrace.h1
-rw-r--r--arch/um/include/asm/futex.h6
-rw-r--r--arch/um/include/asm/hardirq.h1
-rw-r--r--arch/um/include/asm/hw_irq.h7
-rw-r--r--arch/um/include/asm/irq_regs.h1
-rw-r--r--arch/um/include/asm/irqflags.h38
-rw-r--r--arch/um/include/asm/kdebug.h1
-rw-r--r--arch/um/include/asm/mmu.h22
-rw-r--r--arch/um/include/asm/mmu_context.h7
-rw-r--r--arch/um/include/asm/page.h2
-rw-r--r--arch/um/include/asm/page_offset.h1
-rw-r--r--arch/um/include/asm/pda.h21
-rw-r--r--arch/um/include/asm/percpu.h6
-rw-r--r--arch/um/include/asm/ptrace-generic.h7
-rw-r--r--arch/um/include/asm/sections.h7
-rw-r--r--arch/um/include/asm/system.h47
-rw-r--r--arch/um/include/asm/topology.h6
-rw-r--r--arch/um/include/asm/uaccess.h89
-rw-r--r--arch/um/include/asm/xor.h6
-rw-r--r--arch/um/include/shared/as-layout.h2
-rw-r--r--arch/um/include/shared/common-offsets.h1
-rw-r--r--arch/um/include/shared/initrd.h12
-rw-r--r--arch/um/include/shared/kern.h18
-rw-r--r--arch/um/include/shared/kern_util.h1
-rw-r--r--arch/um/include/shared/ldt.h37
-rw-r--r--arch/um/include/shared/mem_kern.h20
-rw-r--r--arch/um/include/shared/os.h13
-rw-r--r--arch/um/include/shared/process.h17
-rw-r--r--arch/um/include/shared/ptrace_user.h3
-rw-r--r--arch/um/include/shared/skas_ptregs.h6
-rw-r--r--arch/um/include/shared/syscall.h12
-rw-r--r--arch/um/include/shared/task.h9
-rw-r--r--arch/um/include/shared/tlb.h15
-rw-r--r--arch/um/include/shared/um_malloc.h2
-rw-r--r--arch/um/include/shared/um_mmu.h24
-rw-r--r--arch/um/include/shared/um_uaccess.h97
-rw-r--r--arch/um/include/shared/user.h7
-rw-r--r--arch/um/kernel/Makefile2
-rw-r--r--arch/um/kernel/exec.c18
-rw-r--r--arch/um/kernel/gmon_syms.c15
-rw-r--r--arch/um/kernel/initrd.c4
-rw-r--r--arch/um/kernel/irq.c1
-rw-r--r--arch/um/kernel/ksyms.c38
-rw-r--r--arch/um/kernel/mem.c2
-rw-r--r--arch/um/kernel/physmem.c21
-rw-r--r--arch/um/kernel/process.c4
-rw-r--r--arch/um/kernel/signal.c1
-rw-r--r--arch/um/kernel/skas/clone.c1
-rw-r--r--arch/um/kernel/skas/uaccess.c6
-rw-r--r--arch/um/kernel/tlb.c5
-rw-r--r--arch/um/kernel/trap.c3
-rw-r--r--arch/um/kernel/uaccess.c33
-rw-r--r--arch/um/kernel/um_arch.c2
-rw-r--r--arch/um/os-Linux/Makefile6
-rw-r--r--arch/um/os-Linux/aio.c2
-rw-r--r--arch/um/os-Linux/drivers/ethertap_user.c2
-rw-r--r--arch/um/os-Linux/drivers/tuntap_user.c2
-rw-r--r--arch/um/os-Linux/elf_aux.c1
-rw-r--r--arch/um/os-Linux/file.c2
-rw-r--r--arch/um/os-Linux/helper.c2
-rw-r--r--arch/um/os-Linux/internal.h1
-rw-r--r--arch/um/os-Linux/irq.c3
-rw-r--r--arch/um/os-Linux/main.c1
-rw-r--r--arch/um/os-Linux/mem.c2
-rw-r--r--arch/um/os-Linux/process.c23
-rw-r--r--arch/um/os-Linux/sigio.c3
-rw-r--r--arch/um/os-Linux/signal.c72
-rw-r--r--arch/um/os-Linux/skas/mem.c2
-rw-r--r--arch/um/os-Linux/skas/process.c21
-rw-r--r--arch/um/os-Linux/start_up.c10
-rw-r--r--arch/um/os-Linux/sys-i386/signal.c13
-rw-r--r--arch/um/os-Linux/sys-x86_64/Makefile10
-rw-r--r--arch/um/os-Linux/sys-x86_64/registers.c52
-rw-r--r--arch/um/os-Linux/sys-x86_64/signal.c16
-rw-r--r--arch/um/os-Linux/sys-x86_64/task_size.c5
-rw-r--r--arch/um/os-Linux/time.c4
-rw-r--r--arch/um/os-Linux/tls.c35
-rw-r--r--arch/um/os-Linux/tty.c2
-rw-r--r--arch/um/os-Linux/uaccess.c32
-rw-r--r--arch/um/os-Linux/umid.c2
-rw-r--r--arch/um/os-Linux/util.c2
-rw-r--r--arch/um/scripts/Makefile.rules7
-rw-r--r--arch/um/sys-i386/Makefile24
-rw-r--r--arch/um/sys-i386/asm/elf.h125
-rw-r--r--arch/um/sys-i386/asm/module.h13
-rw-r--r--arch/um/sys-i386/atomic64_cx8_32.S225
-rw-r--r--arch/um/sys-i386/bug.c21
-rw-r--r--arch/um/sys-i386/ksyms.c5
-rw-r--r--arch/um/sys-i386/shared/sysdep/barrier.h9
-rw-r--r--arch/um/sys-i386/shared/sysdep/host_ldt.h34
-rw-r--r--arch/um/sys-i386/shared/sysdep/ptrace_user.h50
-rw-r--r--arch/um/sys-i386/shared/sysdep/sc.h44
-rw-r--r--arch/um/sys-i386/shared/sysdep/sigcontext.h26
-rw-r--r--arch/um/sys-i386/shared/sysdep/vm-flags.h14
-rw-r--r--arch/um/sys-i386/stub_segv.c17
-rw-r--r--arch/um/sys-i386/user-offsets.c53
-rw-r--r--arch/um/sys-x86_64/Makefile27
-rw-r--r--arch/um/sys-x86_64/asm/archparam.h16
-rw-r--r--arch/um/sys-x86_64/asm/module.h20
-rw-r--r--arch/um/sys-x86_64/delay.c60
-rw-r--r--arch/um/sys-x86_64/fault.c28
-rw-r--r--arch/um/sys-x86_64/ptrace_user.c22
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/barrier.h7
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/host_ldt.h38
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h23
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/ptrace_user.h77
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/sc.h45
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/sigcontext.h27
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h22
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/system.h132
-rw-r--r--arch/um/sys-x86_64/shared/sysdep/tls.h29
-rw-r--r--arch/um/sys-x86_64/signal.c290
-rw-r--r--arch/x86/Makefile.um (renamed from arch/um/Makefile-i386)37
-rw-r--r--arch/x86/um/Kconfig (renamed from arch/um/Kconfig.x86)0
-rw-r--r--arch/x86/um/Makefile45
-rw-r--r--arch/x86/um/asm/apic.h (renamed from arch/um/include/asm/apic.h)0
-rw-r--r--arch/x86/um/asm/arch_hweight.h (renamed from arch/um/include/asm/arch_hweight.h)0
-rw-r--r--arch/x86/um/asm/archparam.h (renamed from arch/um/sys-i386/asm/archparam.h)8
-rw-r--r--arch/x86/um/asm/checksum.h10
-rw-r--r--arch/x86/um/asm/checksum_32.h (renamed from arch/um/sys-i386/shared/sysdep/checksum.h)2
-rw-r--r--arch/x86/um/asm/checksum_64.h (renamed from arch/um/sys-x86_64/shared/sysdep/checksum.h)0
-rw-r--r--arch/x86/um/asm/desc.h (renamed from arch/um/include/asm/desc.h)0
-rw-r--r--arch/x86/um/asm/elf.h (renamed from arch/um/sys-x86_64/asm/elf.h)145
-rw-r--r--arch/x86/um/asm/irq_vectors.h (renamed from arch/um/include/asm/irq_vectors.h)0
-rw-r--r--arch/x86/um/asm/mm_context.h72
-rw-r--r--arch/x86/um/asm/module.h23
-rw-r--r--arch/x86/um/asm/processor.h22
-rw-r--r--arch/x86/um/asm/processor_32.h (renamed from arch/um/sys-i386/asm/processor.h)18
-rw-r--r--arch/x86/um/asm/processor_64.h (renamed from arch/um/sys-x86_64/asm/processor.h)11
-rw-r--r--arch/x86/um/asm/ptrace.h5
-rw-r--r--arch/x86/um/asm/ptrace_32.h (renamed from arch/um/sys-i386/asm/ptrace.h)0
-rw-r--r--arch/x86/um/asm/ptrace_64.h (renamed from arch/um/sys-x86_64/asm/ptrace.h)2
-rw-r--r--arch/x86/um/asm/required-features.h (renamed from arch/um/include/asm/required-features.h)0
-rw-r--r--arch/x86/um/asm/segment.h (renamed from arch/um/include/asm/segment.h)0
-rw-r--r--arch/x86/um/asm/system.h (renamed from arch/um/sys-i386/shared/sysdep/system.h)3
-rw-r--r--arch/x86/um/asm/vm-flags.h (renamed from arch/um/sys-x86_64/shared/sysdep/vm-flags.h)14
-rw-r--r--arch/x86/um/bug.c (renamed from arch/um/sys-x86_64/bug.c)0
-rw-r--r--arch/x86/um/bugs_32.c (renamed from arch/um/sys-i386/bugs.c)6
-rw-r--r--arch/x86/um/bugs_64.c (renamed from arch/um/sys-x86_64/bugs.c)0
-rw-r--r--arch/x86/um/checksum_32.S (renamed from arch/um/sys-i386/checksum.S)0
-rw-r--r--arch/x86/um/delay.c (renamed from arch/um/sys-i386/delay.c)0
-rw-r--r--arch/x86/um/elfcore.c (renamed from arch/um/sys-i386/elfcore.c)0
-rw-r--r--arch/x86/um/fault.c (renamed from arch/um/sys-i386/fault.c)0
-rw-r--r--arch/x86/um/ksyms.c (renamed from arch/um/sys-x86_64/ksyms.c)2
-rw-r--r--arch/x86/um/ldt.c (renamed from arch/um/sys-i386/ldt.c)36
-rw-r--r--arch/x86/um/mem_32.c (renamed from arch/um/sys-i386/mem.c)0
-rw-r--r--arch/x86/um/mem_64.c (renamed from arch/um/sys-x86_64/mem.c)0
-rw-r--r--arch/x86/um/os-Linux/Makefile (renamed from arch/um/os-Linux/sys-i386/Makefile)5
-rw-r--r--arch/x86/um/os-Linux/mcontext.c31
-rw-r--r--arch/x86/um/os-Linux/prctl.c (renamed from arch/um/os-Linux/sys-x86_64/prctl.c)0
-rw-r--r--arch/x86/um/os-Linux/registers.c (renamed from arch/um/os-Linux/sys-i386/registers.c)62
-rw-r--r--arch/x86/um/os-Linux/task_size.c (renamed from arch/um/os-Linux/sys-i386/task_size.c)13
-rw-r--r--arch/x86/um/os-Linux/tls.c (renamed from arch/um/os-Linux/sys-i386/tls.c)35
-rw-r--r--arch/x86/um/ptrace_32.c (renamed from arch/um/sys-i386/ptrace.c)69
-rw-r--r--arch/x86/um/ptrace_64.c (renamed from arch/um/sys-x86_64/ptrace.c)103
-rw-r--r--arch/x86/um/ptrace_user.c (renamed from arch/um/sys-i386/ptrace_user.c)2
-rw-r--r--arch/x86/um/setjmp_32.S (renamed from arch/um/sys-i386/setjmp.S)0
-rw-r--r--arch/x86/um/setjmp_64.S (renamed from arch/um/sys-x86_64/setjmp.S)0
-rw-r--r--arch/x86/um/shared/sysdep/archsetjmp.h5
-rw-r--r--arch/x86/um/shared/sysdep/archsetjmp_32.h (renamed from arch/um/sys-i386/shared/sysdep/archsetjmp.h)0
-rw-r--r--arch/x86/um/shared/sysdep/archsetjmp_64.h (renamed from arch/um/sys-x86_64/shared/sysdep/archsetjmp.h)0
-rw-r--r--arch/x86/um/shared/sysdep/faultinfo.h5
-rw-r--r--arch/x86/um/shared/sysdep/faultinfo_32.h (renamed from arch/um/sys-i386/shared/sysdep/faultinfo.h)6
-rw-r--r--arch/x86/um/shared/sysdep/faultinfo_64.h (renamed from arch/um/sys-x86_64/shared/sysdep/faultinfo.h)6
-rw-r--r--arch/x86/um/shared/sysdep/kernel-offsets.h (renamed from arch/um/sys-i386/shared/sysdep/kernel-offsets.h)0
-rw-r--r--arch/x86/um/shared/sysdep/mcontext.h31
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h5
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_32.h (renamed from arch/um/sys-i386/shared/sysdep/ptrace.h)75
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_64.h (renamed from arch/um/sys-x86_64/shared/sysdep/ptrace.h)103
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_user.h27
-rw-r--r--arch/x86/um/shared/sysdep/skas_ptrace.h (renamed from arch/um/sys-i386/shared/sysdep/skas_ptrace.h)4
-rw-r--r--arch/x86/um/shared/sysdep/stub.h14
-rw-r--r--arch/x86/um/shared/sysdep/stub_32.h (renamed from arch/um/sys-i386/shared/sysdep/stub.h)8
-rw-r--r--arch/x86/um/shared/sysdep/stub_64.h (renamed from arch/um/sys-x86_64/shared/sysdep/stub.h)8
-rw-r--r--arch/x86/um/shared/sysdep/syscalls.h5
-rw-r--r--arch/x86/um/shared/sysdep/syscalls_32.h (renamed from arch/um/sys-i386/shared/sysdep/syscalls.h)0
-rw-r--r--arch/x86/um/shared/sysdep/syscalls_64.h (renamed from arch/um/sys-x86_64/shared/sysdep/syscalls.h)1
-rw-r--r--arch/x86/um/shared/sysdep/tls.h (renamed from arch/um/sys-i386/shared/sysdep/tls.h)9
-rw-r--r--arch/x86/um/signal.c (renamed from arch/um/sys-i386/signal.c)304
-rw-r--r--arch/x86/um/stub_32.S (renamed from arch/um/sys-i386/stub.S)0
-rw-r--r--arch/x86/um/stub_64.S (renamed from arch/um/sys-x86_64/stub.S)0
-rw-r--r--arch/x86/um/stub_segv.c (renamed from arch/um/sys-x86_64/stub_segv.c)11
-rw-r--r--arch/x86/um/sys_call_table_32.S (renamed from arch/um/sys-i386/sys_call_table.S)4
-rw-r--r--arch/x86/um/sys_call_table_64.c (renamed from arch/um/sys-x86_64/syscall_table.c)3
-rw-r--r--arch/x86/um/syscalls_32.c (renamed from arch/um/sys-i386/syscalls.c)0
-rw-r--r--arch/x86/um/syscalls_64.c (renamed from arch/um/sys-x86_64/syscalls.c)0
-rw-r--r--arch/x86/um/sysrq_32.c (renamed from arch/um/sys-i386/sysrq.c)0
-rw-r--r--arch/x86/um/sysrq_64.c (renamed from arch/um/sys-x86_64/sysrq.c)2
-rw-r--r--arch/x86/um/tls_32.c (renamed from arch/um/sys-i386/tls.c)0
-rw-r--r--arch/x86/um/tls_64.c (renamed from arch/um/sys-x86_64/tls.c)0
-rw-r--r--arch/x86/um/user-offsets.c (renamed from arch/um/sys-x86_64/user-offsets.c)51
-rw-r--r--arch/x86/um/vdso/Makefile (renamed from arch/um/sys-x86_64/vdso/Makefile)4
-rw-r--r--arch/x86/um/vdso/checkundef.sh (renamed from arch/um/sys-x86_64/vdso/checkundef.sh)0
-rw-r--r--arch/x86/um/vdso/um_vdso.c (renamed from arch/um/sys-x86_64/vdso/um_vdso.c)0
-rw-r--r--arch/x86/um/vdso/vdso-layout.lds.S (renamed from arch/um/sys-x86_64/vdso/vdso-layout.lds.S)0
-rw-r--r--arch/x86/um/vdso/vdso-note.S (renamed from arch/um/sys-x86_64/vdso/vdso-note.S)0
-rw-r--r--arch/x86/um/vdso/vdso.S (renamed from arch/um/sys-x86_64/vdso/vdso.S)2
-rw-r--r--arch/x86/um/vdso/vdso.lds.S (renamed from arch/um/sys-x86_64/vdso/vdso.lds.S)0
-rw-r--r--arch/x86/um/vdso/vma.c (renamed from arch/um/sys-x86_64/vdso/vma.c)2
-rw-r--r--drivers/block/virtio_blk.c30
-rw-r--r--drivers/char/Kconfig6
-rw-r--r--drivers/char/hw_random/Kconfig15
-rw-r--r--drivers/char/ttyprintk.c2
-rw-r--r--drivers/char/virtio_console.c120
-rw-r--r--drivers/input/Kconfig2
-rw-r--r--drivers/isdn/Kconfig2
-rw-r--r--drivers/mtd/mtdchar.c2
-rw-r--r--drivers/net/virtio_net.c14
-rw-r--r--drivers/net/wireless/ath/Kconfig2
-rw-r--r--drivers/net/wireless/rtlwifi/Kconfig4
-rw-r--r--drivers/power/Kconfig1
-rw-r--r--drivers/rtc/Kconfig2
-rw-r--r--drivers/staging/pohmelfs/inode.c2
-rw-r--r--drivers/tty/Kconfig2
-rw-r--r--drivers/virtio/Kconfig11
-rw-r--r--drivers/virtio/Makefile1
-rw-r--r--drivers/virtio/virtio_mmio.c479
-rw-r--r--drivers/virtio/virtio_pci.c10
-rw-r--r--drivers/watchdog/Kconfig6
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/inode.c8
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/dcache.c40
-rw-r--r--fs/devpts/inode.c4
-rw-r--r--fs/ecryptfs/inode.c12
-rw-r--r--fs/efs/inode.c2
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/ext2/balloc.c2
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c8
-rw-r--r--fs/ext3/balloc.c17
-rw-r--r--fs/ext3/fsync.c10
-rw-r--r--fs/ext3/ialloc.c47
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/ioctl.c24
-rw-r--r--fs/ext3/namei.c6
-rw-r--r--fs/ext3/super.c12
-rw-r--r--fs/ext4/balloc.c345
-rw-r--r--fs/ext4/ext4.h141
-rw-r--r--fs/ext4/ext4_extents.h2
-rw-r--r--fs/ext4/ext4_jbd2.c8
-rw-r--r--fs/ext4/extents.c1168
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/fsync.c10
-rw-r--r--fs/ext4/ialloc.c206
-rw-r--r--fs/ext4/indirect.c20
-rw-r--r--fs/ext4/inode.c514
-rw-r--r--fs/ext4/ioctl.c65
-rw-r--r--fs/ext4/mballoc.c331
-rw-r--r--fs/ext4/mballoc.h11
-rw-r--r--fs/ext4/migrate.c111
-rw-r--r--fs/ext4/mmp.c10
-rw-r--r--fs/ext4/move_extent.c1
-rw-r--r--fs/ext4/namei.c29
-rw-r--r--fs/ext4/page-io.c66
-rw-r--r--fs/ext4/resize.c10
-rw-r--r--fs/ext4/super.c263
-rw-r--r--fs/ext4/xattr.c12
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fat/namei_vfat.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/inode.c10
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hostfs/hostfs_user.c1
-rw-r--r--fs/hpfs/dir.c2
-rw-r--r--fs/hpfs/inode.c10
-rw-r--r--fs/hpfs/namei.c8
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/isofs/inode.c4
-rw-r--r--fs/isofs/rock.c4
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c44
-rw-r--r--fs/jbd2/recovery.c28
-rw-r--r--fs/jbd2/transaction.c68
-rw-r--r--fs/jffs2/dir.c6
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jfs/jfs_imap.c6
-rw-r--r--fs/jfs/jfs_inode.c2
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/logfs/dir.c8
-rw-r--r--fs/logfs/inode.c3
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/namei.c18
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/ntfs/inode.c8
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/namei.c18
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/proc/base.c12
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/quota/quota.c7
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/namei.c16
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/squashfs/inode.c18
-rw-r--r--fs/stack.c2
-rw-r--r--fs/stat.c5
-rw-r--r--fs/super.c9
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/udf/balloc.c14
-rw-r--r--fs/udf/directory.c8
-rw-r--r--fs/udf/inode.c56
-rw-r--r--fs/udf/lowlevel.c2
-rw-r--r--fs/udf/misc.c19
-rw-r--r--fs/udf/namei.c20
-rw-r--r--fs/udf/partition.c19
-rw-r--r--fs/udf/super.c280
-rw-r--r--fs/udf/truncate.c22
-rw-r--r--fs/udf/udf_sb.h5
-rw-r--r--fs/udf/udfdecl.h35
-rw-r--r--fs/udf/udftime.c3
-rw-r--r--fs/udf/unicode.c6
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c4
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--include/linux/dcache.h8
-rw-r--r--include/linux/ext2_fs.h4
-rw-r--r--include/linux/ext3_fs.h6
-rw-r--r--include/linux/ext3_fs_sb.h4
-rw-r--r--include/linux/fs.h41
-rw-r--r--include/linux/jbd.h64
-rw-r--r--include/linux/jbd2.h69
-rw-r--r--include/linux/jbd_common.h68
-rw-r--r--include/linux/namei.h1
-rw-r--r--include/linux/virtio.h4
-rw-r--r--include/linux/virtio_config.h3
-rw-r--r--include/linux/virtio_mmio.h111
-rw-r--r--include/linux/virtio_ring.h6
-rw-r--r--include/trace/events/ext4.h480
-rw-r--r--mm/shmem.c2
-rw-r--r--security/integrity/ima/Kconfig2
-rw-r--r--sound/Kconfig2
423 files changed, 5866 insertions, 5301 deletions
diff --git a/Documentation/devicetree/bindings/virtio/mmio.txt b/Documentation/devicetree/bindings/virtio/mmio.txt
new file mode 100644
index 000000000000..5069c1b8e193
--- /dev/null
+++ b/Documentation/devicetree/bindings/virtio/mmio.txt
@@ -0,0 +1,17 @@
1* virtio memory mapped device
2
3See http://ozlabs.org/~rusty/virtio-spec/ for more details.
4
5Required properties:
6
7- compatible: "virtio,mmio" compatibility string
8- reg: control registers base address and size including configuration space
9- interrupts: interrupt generated by the device
10
11Example:
12
13 virtio_block@3000 {
14 compatible = "virtio,mmio";
15 reg = <0x3000 0x100>;
16 interrupts = <41>;
17 }
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 653380793a6c..d819ba16a0c7 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -29,6 +29,7 @@ d_hash no no no maybe
29d_compare: yes no no maybe 29d_compare: yes no no maybe
30d_delete: no yes no no 30d_delete: no yes no no
31d_release: no no yes no 31d_release: no no yes no
32d_prune: no yes no no
32d_iput: no no yes no 33d_iput: no no yes no
33d_dname: no no no no 34d_dname: no no no no
34d_automount: no no yes no 35d_automount: no no yes no
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 22f3a0eda1d2..b100adc38adb 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -73,14 +73,6 @@ nobarrier (*) This also requires an IO stack which can support
73 also be used to enable or disable barriers, for 73 also be used to enable or disable barriers, for
74 consistency with other ext3 mount options. 74 consistency with other ext3 mount options.
75 75
76orlov (*) This enables the new Orlov block allocator. It is
77 enabled by default.
78
79oldalloc This disables the Orlov block allocator and enables
80 the old block allocator. Orlov should have better
81 performance - we'd like to get some feedback if it's
82 the contrary for you.
83
84user_xattr Enables Extended User Attributes. Additionally, you 76user_xattr Enables Extended User Attributes. Additionally, you
85 need to have extended attribute support enabled in the 77 need to have extended attribute support enabled in the
86 kernel configuration (CONFIG_EXT3_FS_XATTR). See the 78 kernel configuration (CONFIG_EXT3_FS_XATTR). See the
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 232a575a0c48..4917cf24a5e0 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -160,7 +160,9 @@ noload if the filesystem was not unmounted cleanly,
160 lead to any number of problems. 160 lead to any number of problems.
161 161
162data=journal All data are committed into the journal prior to being 162data=journal All data are committed into the journal prior to being
163 written into the main file system. 163 written into the main file system. Enabling
164 this mode will disable delayed allocation and
165 O_DIRECT support.
164 166
165data=ordered (*) All data are forced directly out to the main file 167data=ordered (*) All data are forced directly out to the main file
166 system prior to its metadata being committed to the 168 system prior to its metadata being committed to the
@@ -201,30 +203,19 @@ inode_readahead_blks=n This tuning parameter controls the maximum
201 table readahead algorithm will pre-read into 203 table readahead algorithm will pre-read into
202 the buffer cache. The default value is 32 blocks. 204 the buffer cache. The default value is 32 blocks.
203 205
204orlov (*) This enables the new Orlov block allocator. It is 206nouser_xattr Disables Extended User Attributes. If you have extended
205 enabled by default. 207 attribute support enabled in the kernel configuration
206 208 (CONFIG_EXT4_FS_XATTR), extended attribute support
207oldalloc This disables the Orlov block allocator and enables 209 is enabled by default on mount. See the attr(5) manual
208 the old block allocator. Orlov should have better 210 page and http://acl.bestbits.at/ for more information
209 performance - we'd like to get some feedback if it's 211 about extended attributes.
210 the contrary for you.
211
212user_xattr Enables Extended User Attributes. Additionally, you
213 need to have extended attribute support enabled in the
214 kernel configuration (CONFIG_EXT4_FS_XATTR). See the
215 attr(5) manual page and http://acl.bestbits.at/ to
216 learn more about extended attributes.
217
218nouser_xattr Disables Extended User Attributes.
219
220acl Enables POSIX Access Control Lists support.
221 Additionally, you need to have ACL support enabled in
222 the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL).
223 See the acl(5) manual page and http://acl.bestbits.at/
224 for more information.
225 212
226noacl This option disables POSIX Access Control List 213noacl This option disables POSIX Access Control List
227 support. 214 support. If ACL support is enabled in the kernel
215 configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL is
216 enabled by default on mount. See the acl(5) manual
217 page and http://acl.bestbits.at/ for more information
218 about acl.
228 219
229bsddf (*) Make 'df' act like BSD. 220bsddf (*) Make 'df' act like BSD.
230minixdf Make 'df' act like Minix. 221minixdf Make 'df' act like Minix.
@@ -419,8 +410,8 @@ written to the journal first, and then to its final location.
419In the event of a crash, the journal can be replayed, bringing both data and 410In the event of a crash, the journal can be replayed, bringing both data and
420metadata into a consistent state. This mode is the slowest except when data 411metadata into a consistent state. This mode is the slowest except when data
421needs to be read from and written to disk at the same time where it 412needs to be read from and written to disk at the same time where it
422outperforms all others modes. Currently ext4 does not have delayed 413outperforms all others modes. Enabling this mode will disable delayed
423allocation support if this data journalling mode is selected. 414allocation and O_DIRECT support.
424 415
425/proc entries 416/proc entries
426============= 417=============
diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
index 5d0fc8bfcdb9..77dfecf4e2d6 100644
--- a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
@@ -134,13 +134,13 @@
134 134
135 ______________________________________________________________________ 135 ______________________________________________________________________
136 136
137 11.. IInnttrroodduuccttiioonn 137 1. Introduction
138 138
139 Welcome to User Mode Linux. It's going to be fun. 139 Welcome to User Mode Linux. It's going to be fun.
140 140
141 141
142 142
143 11..11.. HHooww iiss UUsseerr MMooddee LLiinnuuxx DDiiffffeerreenntt?? 143 1.1. How is User Mode Linux Different?
144 144
145 Normally, the Linux Kernel talks straight to your hardware (video 145 Normally, the Linux Kernel talks straight to your hardware (video
146 card, keyboard, hard drives, etc), and any programs which run ask the 146 card, keyboard, hard drives, etc), and any programs which run ask the
@@ -181,7 +181,7 @@
181 181
182 182
183 183
184 11..22.. WWhhyy WWoouulldd II WWaanntt UUsseerr MMooddee LLiinnuuxx?? 184 1.2. Why Would I Want User Mode Linux?
185 185
186 186
187 1. If User Mode Linux crashes, your host kernel is still fine. 187 1. If User Mode Linux crashes, your host kernel is still fine.
@@ -206,12 +206,12 @@
206 206
207 207
208 208
209 22.. CCoommppiilliinngg tthhee kkeerrnneell aanndd mmoodduulleess 209 2. Compiling the kernel and modules
210 210
211 211
212 212
213 213
214 22..11.. CCoommppiilliinngg tthhee kkeerrnneell 214 2.1. Compiling the kernel
215 215
216 216
217 Compiling the user mode kernel is just like compiling any other 217 Compiling the user mode kernel is just like compiling any other
@@ -322,7 +322,7 @@
322 bug fixes and enhancements that have gone into subsequent releases. 322 bug fixes and enhancements that have gone into subsequent releases.
323 323
324 324
325 22..22.. CCoommppiilliinngg aanndd iinnssttaalllliinngg kkeerrnneell mmoodduulleess 325 2.2. Compiling and installing kernel modules
326 326
327 UML modules are built in the same way as the native kernel (with the 327 UML modules are built in the same way as the native kernel (with the
328 exception of the 'ARCH=um' that you always need for UML): 328 exception of the 'ARCH=um' that you always need for UML):
@@ -386,19 +386,19 @@
386 386
387 387
388 388
389 22..33.. CCoommppiilliinngg aanndd iinnssttaalllliinngg uummll__uuttiilliittiieess 389 2.3. Compiling and installing uml_utilities
390 390
391 Many features of the UML kernel require a user-space helper program, 391 Many features of the UML kernel require a user-space helper program,
392 so a uml_utilities package is distributed separately from the kernel 392 so a uml_utilities package is distributed separately from the kernel
393 patch which provides these helpers. Included within this is: 393 patch which provides these helpers. Included within this is:
394 394
395 +o port-helper - Used by consoles which connect to xterms or ports 395 o port-helper - Used by consoles which connect to xterms or ports
396 396
397 +o tunctl - Configuration tool to create and delete tap devices 397 o tunctl - Configuration tool to create and delete tap devices
398 398
399 +o uml_net - Setuid binary for automatic tap device configuration 399 o uml_net - Setuid binary for automatic tap device configuration
400 400
401 +o uml_switch - User-space virtual switch required for daemon 401 o uml_switch - User-space virtual switch required for daemon
402 transport 402 transport
403 403
404 The uml_utilities tree is compiled with: 404 The uml_utilities tree is compiled with:
@@ -423,11 +423,11 @@
423 423
424 424
425 425
426 33.. RRuunnnniinngg UUMMLL aanndd llooggggiinngg iinn 426 3. Running UML and logging in
427 427
428 428
429 429
430 33..11.. RRuunnnniinngg UUMMLL 430 3.1. Running UML
431 431
432 It runs on 2.2.15 or later, and all 2.4 kernels. 432 It runs on 2.2.15 or later, and all 2.4 kernels.
433 433
@@ -454,7 +454,7 @@
454 454
455 455
456 456
457 33..22.. LLooggggiinngg iinn 457 3.2. Logging in
458 458
459 459
460 460
@@ -468,7 +468,7 @@
468 468
469 There are a couple of other ways to log in: 469 There are a couple of other ways to log in:
470 470
471 +o On a virtual console 471 o On a virtual console
472 472
473 473
474 474
@@ -480,7 +480,7 @@
480 480
481 481
482 482
483 +o Over the serial line 483 o Over the serial line
484 484
485 485
486 In the boot output, find a line that looks like: 486 In the boot output, find a line that looks like:
@@ -503,7 +503,7 @@
503 503
504 504
505 505
506 +o Over the net 506 o Over the net
507 507
508 508
509 If the network is running, then you can telnet to the virtual 509 If the network is running, then you can telnet to the virtual
@@ -514,13 +514,13 @@
514 down and the process will exit. 514 down and the process will exit.
515 515
516 516
517 33..33.. EExxaammpplleess 517 3.3. Examples
518 518
519 Here are some examples of UML in action: 519 Here are some examples of UML in action:
520 520
521 +o A login session <http://user-mode-linux.sourceforge.net/login.html> 521 o A login session <http://user-mode-linux.sourceforge.net/login.html>
522 522
523 +o A virtual network <http://user-mode-linux.sourceforge.net/net.html> 523 o A virtual network <http://user-mode-linux.sourceforge.net/net.html>
524 524
525 525
526 526
@@ -528,12 +528,12 @@
528 528
529 529
530 530
531 44.. UUMMLL oonn 22GG//22GG hhoossttss 531 4. UML on 2G/2G hosts
532 532
533 533
534 534
535 535
536 44..11.. IInnttrroodduuccttiioonn 536 4.1. Introduction
537 537
538 538
539 Most Linux machines are configured so that the kernel occupies the 539 Most Linux machines are configured so that the kernel occupies the
@@ -546,7 +546,7 @@
546 546
547 547
548 548
549 44..22.. TThhee pprroobblleemm 549 4.2. The problem
550 550
551 551
552 The prebuilt UML binaries on this site will not run on 2G/2G hosts 552 The prebuilt UML binaries on this site will not run on 2G/2G hosts
@@ -558,7 +558,7 @@
558 558
559 559
560 560
561 44..33.. TThhee ssoolluuttiioonn 561 4.3. The solution
562 562
563 563
564 The fix for this is to rebuild UML from source after enabling 564 The fix for this is to rebuild UML from source after enabling
@@ -576,7 +576,7 @@
576 576
577 577
578 578
579 55.. SSeettttiinngg uupp sseerriiaall lliinneess aanndd ccoonnssoolleess 579 5. Setting up serial lines and consoles
580 580
581 581
582 It is possible to attach UML serial lines and consoles to many types 582 It is possible to attach UML serial lines and consoles to many types
@@ -586,12 +586,12 @@
586 You can attach them to host ptys, ttys, file descriptors, and ports. 586 You can attach them to host ptys, ttys, file descriptors, and ports.
587 This allows you to do things like 587 This allows you to do things like
588 588
589 +o have a UML console appear on an unused host console, 589 o have a UML console appear on an unused host console,
590 590
591 +o hook two virtual machines together by having one attach to a pty 591 o hook two virtual machines together by having one attach to a pty
592 and having the other attach to the corresponding tty 592 and having the other attach to the corresponding tty
593 593
594 +o make a virtual machine accessible from the net by attaching a 594 o make a virtual machine accessible from the net by attaching a
595 console to a port on the host. 595 console to a port on the host.
596 596
597 597
@@ -599,7 +599,7 @@
599 599
600 600
601 601
602 55..11.. SSppeecciiffyyiinngg tthhee ddeevviiccee 602 5.1. Specifying the device
603 603
604 Devices are specified with "con" or "ssl" (console or serial line, 604 Devices are specified with "con" or "ssl" (console or serial line,
605 respectively), optionally with a device number if you are talking 605 respectively), optionally with a device number if you are talking
@@ -626,13 +626,13 @@
626 626
627 627
628 628
629 55..22.. SSppeecciiffyyiinngg tthhee cchhaannnneell 629 5.2. Specifying the channel
630 630
631 There are a number of different types of channels to attach a UML 631 There are a number of different types of channels to attach a UML
632 device to, each with a different way of specifying exactly what to 632 device to, each with a different way of specifying exactly what to
633 attach to. 633 attach to.
634 634
635 +o pseudo-terminals - device=pty pts terminals - device=pts 635 o pseudo-terminals - device=pty pts terminals - device=pts
636 636
637 637
638 This will cause UML to allocate a free host pseudo-terminal for the 638 This will cause UML to allocate a free host pseudo-terminal for the
@@ -640,20 +640,20 @@
640 log. You access it by attaching a terminal program to the 640 log. You access it by attaching a terminal program to the
641 corresponding tty: 641 corresponding tty:
642 642
643 +o screen /dev/pts/n 643 o screen /dev/pts/n
644 644
645 +o screen /dev/ttyxx 645 o screen /dev/ttyxx
646 646
647 +o minicom -o -p /dev/ttyxx - minicom seems not able to handle pts 647 o minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
648 devices 648 devices
649 649
650 +o kermit - start it up, 'open' the device, then 'connect' 650 o kermit - start it up, 'open' the device, then 'connect'
651 651
652 652
653 653
654 654
655 655
656 +o terminals - device=tty:tty device file 656 o terminals - device=tty:tty device file
657 657
658 658
659 This will make UML attach the device to the specified tty (i.e 659 This will make UML attach the device to the specified tty (i.e
@@ -672,7 +672,7 @@
672 672
673 673
674 674
675 +o xterms - device=xterm 675 o xterms - device=xterm
676 676
677 677
678 UML will run an xterm and the device will be attached to it. 678 UML will run an xterm and the device will be attached to it.
@@ -681,7 +681,7 @@
681 681
682 682
683 683
684 +o Port - device=port:port number 684 o Port - device=port:port number
685 685
686 686
687 This will attach the UML devices to the specified host port. 687 This will attach the UML devices to the specified host port.
@@ -725,7 +725,7 @@
725 725
726 726
727 727
728 +o already-existing file descriptors - device=file descriptor 728 o already-existing file descriptors - device=file descriptor
729 729
730 730
731 If you set up a file descriptor on the UML command line, you can 731 If you set up a file descriptor on the UML command line, you can
@@ -743,7 +743,7 @@
743 743
744 744
745 745
746 +o Nothing - device=null 746 o Nothing - device=null
747 747
748 748
749 This allows the device to be opened, in contrast to 'none', but 749 This allows the device to be opened, in contrast to 'none', but
@@ -754,7 +754,7 @@
754 754
755 755
756 756
757 +o None - device=none 757 o None - device=none
758 758
759 759
760 This causes the device to disappear. 760 This causes the device to disappear.
@@ -770,7 +770,7 @@
770 770
771 771
772 772
773 will cause serial line 3 to accept input on the host's /dev/tty3 and 773 will cause serial line 3 to accept input on the host's /dev/tty2 and
774 display output on an xterm. That's a silly example - the most common 774 display output on an xterm. That's a silly example - the most common
775 use of this syntax is to reattach the main console to stdin and stdout 775 use of this syntax is to reattach the main console to stdin and stdout
776 as shown above. 776 as shown above.
@@ -785,7 +785,7 @@
785 785
786 786
787 787
788 55..33.. EExxaammpplleess 788 5.3. Examples
789 789
790 There are a number of interesting things you can do with this 790 There are a number of interesting things you can do with this
791 capability. 791 capability.
@@ -838,7 +838,7 @@
838 prompt of the other virtual machine. 838 prompt of the other virtual machine.
839 839
840 840
841 66.. SSeettttiinngg uupp tthhee nneettwwoorrkk 841 6. Setting up the network
842 842
843 843
844 844
@@ -858,19 +858,19 @@
858 There are currently five transport types available for a UML virtual 858 There are currently five transport types available for a UML virtual
859 machine to exchange packets with other hosts: 859 machine to exchange packets with other hosts:
860 860
861 +o ethertap 861 o ethertap
862 862
863 +o TUN/TAP 863 o TUN/TAP
864 864
865 +o Multicast 865 o Multicast
866 866
867 +o a switch daemon 867 o a switch daemon
868 868
869 +o slip 869 o slip
870 870
871 +o slirp 871 o slirp
872 872
873 +o pcap 873 o pcap
874 874
875 The TUN/TAP, ethertap, slip, and slirp transports allow a UML 875 The TUN/TAP, ethertap, slip, and slirp transports allow a UML
876 instance to exchange packets with the host. They may be directed 876 instance to exchange packets with the host. They may be directed
@@ -893,28 +893,28 @@
893 With so many host transports, which one should you use? Here's when 893 With so many host transports, which one should you use? Here's when
894 you should use each one: 894 you should use each one:
895 895
896 +o ethertap - if you want access to the host networking and it is 896 o ethertap - if you want access to the host networking and it is
897 running 2.2 897 running 2.2
898 898
899 +o TUN/TAP - if you want access to the host networking and it is 899 o TUN/TAP - if you want access to the host networking and it is
900 running 2.4. Also, the TUN/TAP transport is able to use a 900 running 2.4. Also, the TUN/TAP transport is able to use a
901 preconfigured device, allowing it to avoid using the setuid uml_net 901 preconfigured device, allowing it to avoid using the setuid uml_net
902 helper, which is a security advantage. 902 helper, which is a security advantage.
903 903
904 +o Multicast - if you want a purely virtual network and you don't want 904 o Multicast - if you want a purely virtual network and you don't want
905 to set up anything but the UML 905 to set up anything but the UML
906 906
907 +o a switch daemon - if you want a purely virtual network and you 907 o a switch daemon - if you want a purely virtual network and you
908 don't mind running the daemon in order to get somewhat better 908 don't mind running the daemon in order to get somewhat better
909 performance 909 performance
910 910
911 +o slip - there is no particular reason to run the slip backend unless 911 o slip - there is no particular reason to run the slip backend unless
912 ethertap and TUN/TAP are just not available for some reason 912 ethertap and TUN/TAP are just not available for some reason
913 913
914 +o slirp - if you don't have root access on the host to setup 914 o slirp - if you don't have root access on the host to setup
915 networking, or if you don't want to allocate an IP to your UML 915 networking, or if you don't want to allocate an IP to your UML
916 916
917 +o pcap - not much use for actual network connectivity, but great for 917 o pcap - not much use for actual network connectivity, but great for
918 monitoring traffic on the host 918 monitoring traffic on the host
919 919
920 Ethertap is available on 2.4 and works fine. TUN/TAP is preferred 920 Ethertap is available on 2.4 and works fine. TUN/TAP is preferred
@@ -926,7 +926,7 @@
926 exploit the helper's root privileges. 926 exploit the helper's root privileges.
927 927
928 928
929 66..11.. GGeenneerraall sseettuupp 929 6.1. General setup
930 930
931 First, you must have the virtual network enabled in your UML. If are 931 First, you must have the virtual network enabled in your UML. If are
932 running a prebuilt kernel from this site, everything is already 932 running a prebuilt kernel from this site, everything is already
@@ -995,7 +995,7 @@
995 995
996 996
997 997
998 66..22.. UUsseerrssppaaccee ddaaeemmoonnss 998 6.2. Userspace daemons
999 999
1000 You will likely need the setuid helper, or the switch daemon, or both. 1000 You will likely need the setuid helper, or the switch daemon, or both.
1001 They are both installed with the RPM and deb, so if you've installed 1001 They are both installed with the RPM and deb, so if you've installed
@@ -1011,7 +1011,7 @@
1011 1011
1012 1012
1013 1013
1014 66..33.. SSppeecciiffyyiinngg eetthheerrnneett aaddddrreesssseess 1014 6.3. Specifying ethernet addresses
1015 1015
1016 Below, you will see that the TUN/TAP, ethertap, and daemon interfaces 1016 Below, you will see that the TUN/TAP, ethertap, and daemon interfaces
1017 allow you to specify hardware addresses for the virtual ethernet 1017 allow you to specify hardware addresses for the virtual ethernet
@@ -1023,11 +1023,11 @@
1023 sufficient to guarantee a unique hardware address for the device. A 1023 sufficient to guarantee a unique hardware address for the device. A
1024 couple of exceptions are: 1024 couple of exceptions are:
1025 1025
1026 +o Another set of virtual ethernet devices are on the same network and 1026 o Another set of virtual ethernet devices are on the same network and
1027 they are assigned hardware addresses using a different scheme which 1027 they are assigned hardware addresses using a different scheme which
1028 may conflict with the UML IP address-based scheme 1028 may conflict with the UML IP address-based scheme
1029 1029
1030 +o You aren't going to use the device for IP networking, so you don't 1030 o You aren't going to use the device for IP networking, so you don't
1031 assign the device an IP address 1031 assign the device an IP address
1032 1032
1033 If you let the driver provide the hardware address, you should make 1033 If you let the driver provide the hardware address, you should make
@@ -1049,7 +1049,7 @@
1049 1049
1050 1050
1051 1051
1052 66..44.. UUMMLL iinntteerrffaaccee sseettuupp 1052 6.4. UML interface setup
1053 1053
1054 Once the network devices have been described on the command line, you 1054 Once the network devices have been described on the command line, you
1055 should boot UML and log in. 1055 should boot UML and log in.
@@ -1131,7 +1131,7 @@
1131 1131
1132 1132
1133 1133
1134 66..55.. MMuullttiiccaasstt 1134 6.5. Multicast
1135 1135
1136 The simplest way to set up a virtual network between multiple UMLs is 1136 The simplest way to set up a virtual network between multiple UMLs is
1137 to use the mcast transport. This was written by Harald Welte and is 1137 to use the mcast transport. This was written by Harald Welte and is
@@ -1194,7 +1194,7 @@
1194 1194
1195 1195
1196 1196
1197 66..66.. TTUUNN//TTAAPP wwiitthh tthhee uummll__nneett hheellppeerr 1197 6.6. TUN/TAP with the uml_net helper
1198 1198
1199 TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the 1199 TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the
1200 host. The TUN/TAP backend has been in UML since 2.4.9-3um. 1200 host. The TUN/TAP backend has been in UML since 2.4.9-3um.
@@ -1247,10 +1247,10 @@
1247 There are a couple potential problems with running the TUN/TAP 1247 There are a couple potential problems with running the TUN/TAP
1248 transport on a 2.4 host kernel 1248 transport on a 2.4 host kernel
1249 1249
1250 +o TUN/TAP seems not to work on 2.4.3 and earlier. Upgrade the host 1250 o TUN/TAP seems not to work on 2.4.3 and earlier. Upgrade the host
1251 kernel or use the ethertap transport. 1251 kernel or use the ethertap transport.
1252 1252
1253 +o With an upgraded kernel, TUN/TAP may fail with 1253 o With an upgraded kernel, TUN/TAP may fail with
1254 1254
1255 1255
1256 File descriptor in bad state 1256 File descriptor in bad state
@@ -1269,7 +1269,7 @@
1269 1269
1270 1270
1271 1271
1272 66..77.. TTUUNN//TTAAPP wwiitthh aa pprreeccoonnffiigguurreedd ttaapp ddeevviiccee 1272 6.7. TUN/TAP with a preconfigured tap device
1273 1273
1274 If you prefer not to have UML use uml_net (which is somewhat 1274 If you prefer not to have UML use uml_net (which is somewhat
1275 insecure), with UML 2.4.17-11, you can set up a TUN/TAP device 1275 insecure), with UML 2.4.17-11, you can set up a TUN/TAP device
@@ -1277,7 +1277,7 @@
1277 there is no need for root assistance. Setting up the device is done 1277 there is no need for root assistance. Setting up the device is done
1278 as follows: 1278 as follows:
1279 1279
1280 +o Create the device with tunctl (available from the UML utilities 1280 o Create the device with tunctl (available from the UML utilities
1281 tarball) 1281 tarball)
1282 1282
1283 1283
@@ -1291,7 +1291,7 @@
1291 where uid is the user id or username that UML will be run as. This 1291 where uid is the user id or username that UML will be run as. This
1292 will tell you what device was created. 1292 will tell you what device was created.
1293 1293
1294 +o Configure the device IP (change IP addresses and device name to 1294 o Configure the device IP (change IP addresses and device name to
1295 suit) 1295 suit)
1296 1296
1297 1297
@@ -1303,7 +1303,7 @@
1303 1303
1304 1304
1305 1305
1306 +o Set up routing and arping if desired - this is my recipe, there are 1306 o Set up routing and arping if desired - this is my recipe, there are
1307 other ways of doing the same thing 1307 other ways of doing the same thing
1308 1308
1309 1309
@@ -1338,7 +1338,7 @@
1338 utility which reads the information from a config file and sets up 1338 utility which reads the information from a config file and sets up
1339 devices at boot time. 1339 devices at boot time.
1340 1340
1341 +o Rather than using up two IPs and ARPing for one of them, you can 1341 o Rather than using up two IPs and ARPing for one of them, you can
1342 also provide direct access to your LAN by the UML by using a 1342 also provide direct access to your LAN by the UML by using a
1343 bridge. 1343 bridge.
1344 1344
@@ -1417,7 +1417,7 @@
1417 Note that 'br0' should be setup using ifconfig with the existing IP 1417 Note that 'br0' should be setup using ifconfig with the existing IP
1418 address of eth0, as eth0 no longer has its own IP. 1418 address of eth0, as eth0 no longer has its own IP.
1419 1419
1420 +o 1420 o
1421 1421
1422 1422
1423 Also, the /dev/net/tun device must be writable by the user running 1423 Also, the /dev/net/tun device must be writable by the user running
@@ -1438,11 +1438,11 @@
1438 devices and chgrp /dev/net/tun to that group with mode 664 or 660. 1438 devices and chgrp /dev/net/tun to that group with mode 664 or 660.
1439 1439
1440 1440
1441 +o Once the device is set up, run UML with 'eth0=tuntap,device name' 1441 o Once the device is set up, run UML with 'eth0=tuntap,device name'
1442 (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the 1442 (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the
1443 mconsole config command). 1443 mconsole config command).
1444 1444
1445 +o Bring the eth device up in UML and you're in business. 1445 o Bring the eth device up in UML and you're in business.
1446 1446
1447 If you don't want that tap device any more, you can make it non- 1447 If you don't want that tap device any more, you can make it non-
1448 persistent with 1448 persistent with
@@ -1465,7 +1465,7 @@
1465 1465
1466 1466
1467 1467
1468 66..88.. EEtthheerrttaapp 1468 6.8. Ethertap
1469 1469
1470 Ethertap is the general mechanism on 2.2 for userspace processes to 1470 Ethertap is the general mechanism on 2.2 for userspace processes to
1471 exchange packets with the kernel. 1471 exchange packets with the kernel.
@@ -1561,9 +1561,9 @@
1561 1561
1562 1562
1563 1563
1564 66..99.. TThhee sswwiittcchh ddaaeemmoonn 1564 6.9. The switch daemon
1565 1565
1566 NNoottee: This is the daemon formerly known as uml_router, but which was 1566 Note: This is the daemon formerly known as uml_router, but which was
1567 renamed so the network weenies of the world would stop growling at me. 1567 renamed so the network weenies of the world would stop growling at me.
1568 1568
1569 1569
@@ -1649,7 +1649,7 @@
1649 1649
1650 1650
1651 1651
1652 66..1100.. SSlliipp 1652 6.10. Slip
1653 1653
1654 Slip is another, less general, mechanism for a process to communicate 1654 Slip is another, less general, mechanism for a process to communicate
1655 with the host networking. In contrast to the ethertap interface, 1655 with the host networking. In contrast to the ethertap interface,
@@ -1681,7 +1681,7 @@
1681 1681
1682 1682
1683 1683
1684 66..1111.. SSlliirrpp 1684 6.11. Slirp
1685 1685
1686 slirp uses an external program, usually /usr/bin/slirp, to provide IP 1686 slirp uses an external program, usually /usr/bin/slirp, to provide IP
1687 only networking connectivity through the host. This is similar to IP 1687 only networking connectivity through the host. This is similar to IP
@@ -1737,7 +1737,7 @@
1737 1737
1738 1738
1739 1739
1740 66..1122.. ppccaapp 1740 6.12. pcap
1741 1741
1742 The pcap transport is attached to a UML ethernet device on the command 1742 The pcap transport is attached to a UML ethernet device on the command
1743 line or with uml_mconsole with the following syntax: 1743 line or with uml_mconsole with the following syntax:
@@ -1777,7 +1777,7 @@
1777 1777
1778 1778
1779 1779
1780 66..1133.. SSeettttiinngg uupp tthhee hhoosstt yyoouurrsseellff 1780 6.13. Setting up the host yourself
1781 1781
1782 If you don't specify an address for the host side of the ethertap or 1782 If you don't specify an address for the host side of the ethertap or
1783 slip device, UML won't do any setup on the host. So this is what is 1783 slip device, UML won't do any setup on the host. So this is what is
@@ -1785,7 +1785,7 @@
1785 192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your 1785 192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your
1786 own network): 1786 own network):
1787 1787
1788 +o The device needs to be configured with its IP address. Tap devices 1788 o The device needs to be configured with its IP address. Tap devices
1789 are also configured with an mtu of 1484. Slip devices are 1789 are also configured with an mtu of 1484. Slip devices are
1790 configured with a point-to-point address pointing at the UML ip 1790 configured with a point-to-point address pointing at the UML ip
1791 address. 1791 address.
@@ -1805,7 +1805,7 @@
1805 1805
1806 1806
1807 1807
1808 +o If a tap device is being set up, a route is set to the UML IP. 1808 o If a tap device is being set up, a route is set to the UML IP.
1809 1809
1810 1810
1811 UML# route add -host 192.168.0.250 gw 192.168.0.251 1811 UML# route add -host 192.168.0.250 gw 192.168.0.251
@@ -1814,7 +1814,7 @@
1814 1814
1815 1815
1816 1816
1817 +o To allow other hosts on your network to see the virtual machine, 1817 o To allow other hosts on your network to see the virtual machine,
1818 proxy arp is set up for it. 1818 proxy arp is set up for it.
1819 1819
1820 1820
@@ -1824,7 +1824,7 @@
1824 1824
1825 1825
1826 1826
1827 +o Finally, the host is set up to route packets. 1827 o Finally, the host is set up to route packets.
1828 1828
1829 1829
1830 host# echo 1 > /proc/sys/net/ipv4/ip_forward 1830 host# echo 1 > /proc/sys/net/ipv4/ip_forward
@@ -1838,12 +1838,12 @@
1838 1838
1839 1839
1840 1840
1841 77.. SShhaarriinngg FFiilleessyysstteemmss bbeettwweeeenn VViirrttuuaall MMaacchhiinneess 1841 7. Sharing Filesystems between Virtual Machines
1842 1842
1843 1843
1844 1844
1845 1845
1846 77..11.. AA wwaarrnniinngg 1846 7.1. A warning
1847 1847
1848 Don't attempt to share filesystems simply by booting two UMLs from the 1848 Don't attempt to share filesystems simply by booting two UMLs from the
1849 same file. That's the same thing as booting two physical machines 1849 same file. That's the same thing as booting two physical machines
@@ -1851,7 +1851,7 @@
1851 1851
1852 1852
1853 1853
1854 77..22.. UUssiinngg llaayyeerreedd bblloocckk ddeevviicceess 1854 7.2. Using layered block devices
1855 1855
1856 The way to share a filesystem between two virtual machines is to use 1856 The way to share a filesystem between two virtual machines is to use
1857 the copy-on-write (COW) layering capability of the ubd block driver. 1857 the copy-on-write (COW) layering capability of the ubd block driver.
@@ -1896,7 +1896,7 @@
1896 1896
1897 1897
1898 1898
1899 77..33.. NNoottee!! 1899 7.3. Note!
1900 1900
1901 When checking the size of the COW file in order to see the gobs of 1901 When checking the size of the COW file in order to see the gobs of
1902 space that you're saving, make sure you use 'ls -ls' to see the actual 1902 space that you're saving, make sure you use 'ls -ls' to see the actual
@@ -1926,7 +1926,7 @@
1926 1926
1927 1927
1928 1928
1929 77..44.. AAnnootthheerr wwaarrnniinngg 1929 7.4. Another warning
1930 1930
1931 Once a filesystem is being used as a readonly backing file for a COW 1931 Once a filesystem is being used as a readonly backing file for a COW
1932 file, do not boot directly from it or modify it in any way. Doing so 1932 file, do not boot directly from it or modify it in any way. Doing so
@@ -1952,7 +1952,7 @@
1952 1952
1953 1953
1954 1954
1955 77..55.. uummll__mmoooo :: MMeerrggiinngg aa CCOOWW ffiillee wwiitthh iittss bbaacckkiinngg ffiillee 1955 7.5. uml_moo : Merging a COW file with its backing file
1956 1956
1957 Depending on how you use UML and COW devices, it may be advisable to 1957 Depending on how you use UML and COW devices, it may be advisable to
1958 merge the changes in the COW file into the backing file every once in 1958 merge the changes in the COW file into the backing file every once in
@@ -2001,7 +2001,7 @@
2001 2001
2002 2002
2003 2003
2004 88.. CCrreeaattiinngg ffiilleessyysstteemmss 2004 8. Creating filesystems
2005 2005
2006 2006
2007 You may want to create and mount new UML filesystems, either because 2007 You may want to create and mount new UML filesystems, either because
@@ -2015,7 +2015,7 @@
2015 should be easy to translate to the filesystem of your choice. 2015 should be easy to translate to the filesystem of your choice.
2016 2016
2017 2017
2018 88..11.. CCrreeaattee tthhee ffiilleessyysstteemm ffiillee 2018 8.1. Create the filesystem file
2019 2019
2020 dd is your friend. All you need to do is tell dd to create an empty 2020 dd is your friend. All you need to do is tell dd to create an empty
2021 file of the appropriate size. I usually make it sparse to save time 2021 file of the appropriate size. I usually make it sparse to save time
@@ -2032,7 +2032,7 @@
2032 2032
2033 2033
2034 2034
2035 88..22.. AAssssiiggnn tthhee ffiillee ttoo aa UUMMLL ddeevviiccee 2035 8.2. Assign the file to a UML device
2036 2036
2037 Add an argument like the following to the UML command line: 2037 Add an argument like the following to the UML command line:
2038 2038
@@ -2045,7 +2045,7 @@
2045 2045
2046 2046
2047 2047
2048 88..33.. CCrreeaattiinngg aanndd mmoouunnttiinngg tthhee ffiilleessyysstteemm 2048 8.3. Creating and mounting the filesystem
2049 2049
2050 Make sure that the filesystem is available, either by being built into 2050 Make sure that the filesystem is available, either by being built into
2051 the kernel, or available as a module, then boot up UML and log in. If 2051 the kernel, or available as a module, then boot up UML and log in. If
@@ -2096,7 +2096,7 @@
2096 2096
2097 2097
2098 2098
2099 99.. HHoosstt ffiillee aacccceessss 2099 9. Host file access
2100 2100
2101 2101
2102 If you want to access files on the host machine from inside UML, you 2102 If you want to access files on the host machine from inside UML, you
@@ -2112,7 +2112,7 @@
2112 files contained in it just as you would on the host. 2112 files contained in it just as you would on the host.
2113 2113
2114 2114
2115 99..11.. UUssiinngg hhoossttffss 2115 9.1. Using hostfs
2116 2116
2117 To begin with, make sure that hostfs is available inside the virtual 2117 To begin with, make sure that hostfs is available inside the virtual
2118 machine with 2118 machine with
@@ -2151,7 +2151,7 @@
2151 2151
2152 2152
2153 2153
2154 99..22.. hhoossttffss aass tthhee rroooott ffiilleessyysstteemm 2154 9.2. hostfs as the root filesystem
2155 2155
2156 It's possible to boot from a directory hierarchy on the host using 2156 It's possible to boot from a directory hierarchy on the host using
2157 hostfs rather than using the standard filesystem in a file. 2157 hostfs rather than using the standard filesystem in a file.
@@ -2194,20 +2194,20 @@
2194 UML should then boot as it does normally. 2194 UML should then boot as it does normally.
2195 2195
2196 2196
2197 99..33.. BBuuiillddiinngg hhoossttffss 2197 9.3. Building hostfs
2198 2198
2199 If you need to build hostfs because it's not in your kernel, you have 2199 If you need to build hostfs because it's not in your kernel, you have
2200 two choices: 2200 two choices:
2201 2201
2202 2202
2203 2203
2204 +o Compiling hostfs into the kernel: 2204 o Compiling hostfs into the kernel:
2205 2205
2206 2206
2207 Reconfigure the kernel and set the 'Host filesystem' option under 2207 Reconfigure the kernel and set the 'Host filesystem' option under
2208 2208
2209 2209
2210 +o Compiling hostfs as a module: 2210 o Compiling hostfs as a module:
2211 2211
2212 2212
2213 Reconfigure the kernel and set the 'Host filesystem' option under 2213 Reconfigure the kernel and set the 'Host filesystem' option under
@@ -2228,7 +2228,7 @@
2228 2228
2229 2229
2230 2230
2231 1100.. TThhee MMaannaaggeemmeenntt CCoonnssoollee 2231 10. The Management Console
2232 2232
2233 2233
2234 2234
@@ -2240,15 +2240,15 @@
2240 2240
2241 There are a number of things you can do with the mconsole interface: 2241 There are a number of things you can do with the mconsole interface:
2242 2242
2243 +o get the kernel version 2243 o get the kernel version
2244 2244
2245 +o add and remove devices 2245 o add and remove devices
2246 2246
2247 +o halt or reboot the machine 2247 o halt or reboot the machine
2248 2248
2249 +o Send SysRq commands 2249 o Send SysRq commands
2250 2250
2251 +o Pause and resume the UML 2251 o Pause and resume the UML
2252 2252
2253 2253
2254 You need the mconsole client (uml_mconsole) which is present in CVS 2254 You need the mconsole client (uml_mconsole) which is present in CVS
@@ -2300,28 +2300,28 @@
2300 2300
2301 You'll get a prompt, at which you can run one of these commands: 2301 You'll get a prompt, at which you can run one of these commands:
2302 2302
2303 +o version 2303 o version
2304 2304
2305 +o halt 2305 o halt
2306 2306
2307 +o reboot 2307 o reboot
2308 2308
2309 +o config 2309 o config
2310 2310
2311 +o remove 2311 o remove
2312 2312
2313 +o sysrq 2313 o sysrq
2314 2314
2315 +o help 2315 o help
2316 2316
2317 +o cad 2317 o cad
2318 2318
2319 +o stop 2319 o stop
2320 2320
2321 +o go 2321 o go
2322 2322
2323 2323
2324 1100..11.. vveerrssiioonn 2324 10.1. version
2325 2325
2326 This takes no arguments. It prints the UML version. 2326 This takes no arguments. It prints the UML version.
2327 2327
@@ -2342,7 +2342,7 @@
2342 2342
2343 2343
2344 2344
2345 1100..22.. hhaalltt aanndd rreebboooott 2345 10.2. halt and reboot
2346 2346
2347 These take no arguments. They shut the machine down immediately, with 2347 These take no arguments. They shut the machine down immediately, with
2348 no syncing of disks and no clean shutdown of userspace. So, they are 2348 no syncing of disks and no clean shutdown of userspace. So, they are
@@ -2357,7 +2357,7 @@
2357 2357
2358 2358
2359 2359
2360 1100..33.. ccoonnffiigg 2360 10.3. config
2361 2361
2362 "config" adds a new device to the virtual machine. Currently the ubd 2362 "config" adds a new device to the virtual machine. Currently the ubd
2363 and network drivers support this. It takes one argument, which is the 2363 and network drivers support this. It takes one argument, which is the
@@ -2378,7 +2378,7 @@
2378 2378
2379 2379
2380 2380
2381 1100..44.. rreemmoovvee 2381 10.4. remove
2382 2382
2383 "remove" deletes a device from the system. Its argument is just the 2383 "remove" deletes a device from the system. Its argument is just the
2384 name of the device to be removed. The device must be idle in whatever 2384 name of the device to be removed. The device must be idle in whatever
@@ -2397,7 +2397,7 @@
2397 2397
2398 2398
2399 2399
2400 1100..55.. ssyyssrrqq 2400 10.5. sysrq
2401 2401
2402 This takes one argument, which is a single letter. It calls the 2402 This takes one argument, which is a single letter. It calls the
2403 generic kernel's SysRq driver, which does whatever is called for by 2403 generic kernel's SysRq driver, which does whatever is called for by
@@ -2407,14 +2407,14 @@
2407 2407
2408 2408
2409 2409
2410 1100..66.. hheellpp 2410 10.6. help
2411 2411
2412 "help" returns a string listing the valid commands and what each one 2412 "help" returns a string listing the valid commands and what each one
2413 does. 2413 does.
2414 2414
2415 2415
2416 2416
2417 1100..77.. ccaadd 2417 10.7. cad
2418 2418
2419 This invokes the Ctl-Alt-Del action on init. What exactly this ends 2419 This invokes the Ctl-Alt-Del action on init. What exactly this ends
2420 up doing is up to /etc/inittab. Normally, it reboots the machine. 2420 up doing is up to /etc/inittab. Normally, it reboots the machine.
@@ -2432,7 +2432,7 @@
2432 2432
2433 2433
2434 2434
2435 1100..88.. ssttoopp 2435 10.8. stop
2436 2436
2437 This puts the UML in a loop reading mconsole requests until a 'go' 2437 This puts the UML in a loop reading mconsole requests until a 'go'
2438 mconsole command is received. This is very useful for making backups 2438 mconsole command is received. This is very useful for making backups
@@ -2448,7 +2448,7 @@
2448 2448
2449 2449
2450 2450
2451 1100..99.. ggoo 2451 10.9. go
2452 2452
2453 This resumes a UML after being paused by a 'stop' command. Note that 2453 This resumes a UML after being paused by a 'stop' command. Note that
2454 when the UML has resumed, TCP connections may have timed out and if 2454 when the UML has resumed, TCP connections may have timed out and if
@@ -2462,10 +2462,10 @@
2462 2462
2463 2463
2464 2464
2465 1111.. KKeerrnneell ddeebbuuggggiinngg 2465 11. Kernel debugging
2466 2466
2467 2467
2468 NNoottee:: The interface that makes debugging, as described here, possible 2468 Note: The interface that makes debugging, as described here, possible
2469 is present in 2.4.0-test6 kernels and later. 2469 is present in 2.4.0-test6 kernels and later.
2470 2470
2471 2471
@@ -2485,7 +2485,7 @@
2485 2485
2486 2486
2487 2487
2488 1111..11.. SSttaarrttiinngg tthhee kkeerrnneell uunnddeerr ggddbb 2488 11.1. Starting the kernel under gdb
2489 2489
2490 You can have the kernel running under the control of gdb from the 2490 You can have the kernel running under the control of gdb from the
2491 beginning by putting 'debug' on the command line. You will get an 2491 beginning by putting 'debug' on the command line. You will get an
@@ -2498,7 +2498,7 @@
2498 There is a transcript of a debugging session here <debug- 2498 There is a transcript of a debugging session here <debug-
2499 session.html> , with breakpoints being set in the scheduler and in an 2499 session.html> , with breakpoints being set in the scheduler and in an
2500 interrupt handler. 2500 interrupt handler.
2501 1111..22.. EExxaammiinniinngg sslleeeeppiinngg pprroocceesssseess 2501 11.2. Examining sleeping processes
2502 2502
2503 Not every bug is evident in the currently running process. Sometimes, 2503 Not every bug is evident in the currently running process. Sometimes,
2504 processes hang in the kernel when they shouldn't because they've 2504 processes hang in the kernel when they shouldn't because they've
@@ -2516,7 +2516,7 @@
2516 2516
2517 Now what you do is this: 2517 Now what you do is this:
2518 2518
2519 +o detach from the current thread 2519 o detach from the current thread
2520 2520
2521 2521
2522 (UML gdb) det 2522 (UML gdb) det
@@ -2525,7 +2525,7 @@
2525 2525
2526 2526
2527 2527
2528 +o attach to the thread you are interested in 2528 o attach to the thread you are interested in
2529 2529
2530 2530
2531 (UML gdb) att <host pid> 2531 (UML gdb) att <host pid>
@@ -2534,7 +2534,7 @@
2534 2534
2535 2535
2536 2536
2537 +o look at its stack and anything else of interest 2537 o look at its stack and anything else of interest
2538 2538
2539 2539
2540 (UML gdb) bt 2540 (UML gdb) bt
@@ -2545,7 +2545,7 @@
2545 Note that you can't do anything at this point that requires that a 2545 Note that you can't do anything at this point that requires that a
2546 process execute, e.g. calling a function 2546 process execute, e.g. calling a function
2547 2547
2548 +o when you're done looking at that process, reattach to the current 2548 o when you're done looking at that process, reattach to the current
2549 thread and continue it 2549 thread and continue it
2550 2550
2551 2551
@@ -2569,12 +2569,12 @@
2569 2569
2570 2570
2571 2571
2572 1111..33.. RRuunnnniinngg dddddd oonn UUMMLL 2572 11.3. Running ddd on UML
2573 2573
2574 ddd works on UML, but requires a special kludge. The process goes 2574 ddd works on UML, but requires a special kludge. The process goes
2575 like this: 2575 like this:
2576 2576
2577 +o Start ddd 2577 o Start ddd
2578 2578
2579 2579
2580 host% ddd linux 2580 host% ddd linux
@@ -2583,14 +2583,14 @@
2583 2583
2584 2584
2585 2585
2586 +o With ps, get the pid of the gdb that ddd started. You can ask the 2586 o With ps, get the pid of the gdb that ddd started. You can ask the
2587 gdb to tell you, but for some reason that confuses things and 2587 gdb to tell you, but for some reason that confuses things and
2588 causes a hang. 2588 causes a hang.
2589 2589
2590 +o run UML with 'debug=parent gdb-pid=<pid>' added to the command line 2590 o run UML with 'debug=parent gdb-pid=<pid>' added to the command line
2591 - it will just sit there after you hit return 2591 - it will just sit there after you hit return
2592 2592
2593 +o type 'att 1' to the ddd gdb and you will see something like 2593 o type 'att 1' to the ddd gdb and you will see something like
2594 2594
2595 2595
2596 0xa013dc51 in __kill () 2596 0xa013dc51 in __kill ()
@@ -2602,12 +2602,12 @@
2602 2602
2603 2603
2604 2604
2605 +o At this point, type 'c', UML will boot up, and you can use ddd just 2605 o At this point, type 'c', UML will boot up, and you can use ddd just
2606 as you do on any other process. 2606 as you do on any other process.
2607 2607
2608 2608
2609 2609
2610 1111..44.. DDeebbuuggggiinngg mmoodduulleess 2610 11.4. Debugging modules
2611 2611
2612 gdb has support for debugging code which is dynamically loaded into 2612 gdb has support for debugging code which is dynamically loaded into
2613 the process. This support is what is needed to debug kernel modules 2613 the process. This support is what is needed to debug kernel modules
@@ -2823,7 +2823,7 @@
2823 2823
2824 2824
2825 2825
2826 1111..55.. AAttttaacchhiinngg ggddbb ttoo tthhee kkeerrnneell 2826 11.5. Attaching gdb to the kernel
2827 2827
2828 If you don't have the kernel running under gdb, you can attach gdb to 2828 If you don't have the kernel running under gdb, you can attach gdb to
2829 it later by sending the tracing thread a SIGUSR1. The first line of 2829 it later by sending the tracing thread a SIGUSR1. The first line of
@@ -2857,7 +2857,7 @@
2857 2857
2858 2858
2859 2859
2860 1111..66.. UUssiinngg aalltteerrnnaattee ddeebbuuggggeerrss 2860 11.6. Using alternate debuggers
2861 2861
2862 UML has support for attaching to an already running debugger rather 2862 UML has support for attaching to an already running debugger rather
2863 than starting gdb itself. This is present in CVS as of 17 Apr 2001. 2863 than starting gdb itself. This is present in CVS as of 17 Apr 2001.
@@ -2886,7 +2886,7 @@
2886 An example of an alternate debugger is strace. You can strace the 2886 An example of an alternate debugger is strace. You can strace the
2887 actual kernel as follows: 2887 actual kernel as follows:
2888 2888
2889 +o Run the following in a shell 2889 o Run the following in a shell
2890 2890
2891 2891
2892 host% 2892 host%
@@ -2894,10 +2894,10 @@
2894 2894
2895 2895
2896 2896
2897 +o Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out 2897 o Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
2898 by the previous command 2898 by the previous command
2899 2899
2900 +o Hit return in the shell, and UML will start running, and strace 2900 o Hit return in the shell, and UML will start running, and strace
2901 output will start accumulating in the output file. 2901 output will start accumulating in the output file.
2902 2902
2903 Note that this is different from running 2903 Note that this is different from running
@@ -2917,9 +2917,9 @@
2917 2917
2918 2918
2919 2919
2920 1122.. KKeerrnneell ddeebbuuggggiinngg eexxaammpplleess 2920 12. Kernel debugging examples
2921 2921
2922 1122..11.. TThhee ccaassee ooff tthhee hhuunngg ffsscckk 2922 12.1. The case of the hung fsck
2923 2923
2924 When booting up the kernel, fsck failed, and dropped me into a shell 2924 When booting up the kernel, fsck failed, and dropped me into a shell
2925 to fix things up. I ran fsck -y, which hung: 2925 to fix things up. I ran fsck -y, which hung:
@@ -3154,9 +3154,9 @@
3154 3154
3155 The interesting things here are : 3155 The interesting things here are :
3156 3156
3157 +o There are two segfaults on this stack (frames 9 and 14) 3157 o There are two segfaults on this stack (frames 9 and 14)
3158 3158
3159 +o The first faulting address (frame 11) is 0x50000800 3159 o The first faulting address (frame 11) is 0x50000800
3160 3160
3161 (gdb) p (void *)1342179328 3161 (gdb) p (void *)1342179328
3162 $16 = (void *) 0x50000800 3162 $16 = (void *) 0x50000800
@@ -3399,7 +3399,7 @@
3399 on will be somewhat clearer. 3399 on will be somewhat clearer.
3400 3400
3401 3401
3402 1122..22.. EEppiissooddee 22:: TThhee ccaassee ooff tthhee hhuunngg ffsscckk 3402 12.2. Episode 2: The case of the hung fsck
3403 3403
3404 After setting a trap in the SEGV handler for accesses to the signal 3404 After setting a trap in the SEGV handler for accesses to the signal
3405 thread's stack, I reran the kernel. 3405 thread's stack, I reran the kernel.
@@ -3788,12 +3788,12 @@
3788 3788
3789 3789
3790 3790
3791 1133.. WWhhaatt ttoo ddoo wwhheenn UUMMLL ddooeessnn''tt wwoorrkk 3791 13. What to do when UML doesn't work
3792 3792
3793 3793
3794 3794
3795 3795
3796 1133..11.. SSttrraannggee ccoommppiillaattiioonn eerrrroorrss wwhheenn yyoouu bbuuiilldd ffrroomm ssoouurrccee 3796 13.1. Strange compilation errors when you build from source
3797 3797
3798 As of test11, it is necessary to have "ARCH=um" in the environment or 3798 As of test11, it is necessary to have "ARCH=um" in the environment or
3799 on the make command line for all steps in building UML, including 3799 on the make command line for all steps in building UML, including
@@ -3824,8 +3824,8 @@
3824 3824
3825 3825
3826 3826
3827 1133..33.. AA vvaarriieettyy ooff ppaanniiccss aanndd hhaannggss wwiitthh //ttmmpp oonn aa rreeiisseerrffss ffiilleessyyss-- 3827 13.3. A variety of panics and hangs with /tmp on a reiserfs filesys-
3828 tteemm 3828 tem
3829 3829
3830 I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27. 3830 I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27.
3831 Panics preceded by 3831 Panics preceded by
@@ -3842,8 +3842,8 @@
3842 3842
3843 3843
3844 3844
3845 1133..44.. TThhee ccoommppiillee ffaaiillss wwiitthh eerrrroorrss aabboouutt ccoonnfflliiccttiinngg ttyyppeess ffoorr 3845 13.4. The compile fails with errors about conflicting types for
3846 ''ooppeenn'',, ''dduupp'',, aanndd ''wwaaiittppiidd'' 3846 'open', 'dup', and 'waitpid'
3847 3847
3848 This happens when you build in /usr/src/linux. The UML build makes 3848 This happens when you build in /usr/src/linux. The UML build makes
3849 the include/asm link point to include/asm-um. /usr/include/asm points 3849 the include/asm link point to include/asm-um. /usr/include/asm points
@@ -3854,14 +3854,14 @@
3854 3854
3855 3855
3856 3856
3857 1133..55.. UUMMLL ddooeessnn''tt wwoorrkk wwhheenn //ttmmpp iiss aann NNFFSS ffiilleessyysstteemm 3857 13.5. UML doesn't work when /tmp is an NFS filesystem
3858 3858
3859 This seems to be a similar situation with the ReiserFS problem above. 3859 This seems to be a similar situation with the ReiserFS problem above.
3860 Some versions of NFS seems not to handle mmap correctly, which UML 3860 Some versions of NFS seems not to handle mmap correctly, which UML
3861 depends on. The workaround is have /tmp be a non-NFS directory. 3861 depends on. The workaround is have /tmp be a non-NFS directory.
3862 3862
3863 3863
3864 1133..66.. UUMMLL hhaannggss oonn bboooott wwhheenn ccoommppiilleedd wwiitthh ggpprrooff ssuuppppoorrtt 3864 13.6. UML hangs on boot when compiled with gprof support
3865 3865
3866 If you build UML with gprof support and, early in the boot, it does 3866 If you build UML with gprof support and, early in the boot, it does
3867 this 3867 this
@@ -3878,7 +3878,7 @@
3878 3878
3879 3879
3880 3880
3881 1133..77.. ssyyssllooggdd ddiieess wwiitthh aa SSIIGGTTEERRMM oonn ssttaarrttuupp 3881 13.7. syslogd dies with a SIGTERM on startup
3882 3882
3883 The exact boot error depends on the distribution that you're booting, 3883 The exact boot error depends on the distribution that you're booting,
3884 but Debian produces this: 3884 but Debian produces this:
@@ -3897,17 +3897,17 @@
3897 3897
3898 3898
3899 3899
3900 1133..88.. TTUUNN//TTAAPP nneettwwoorrkkiinngg ddooeessnn''tt wwoorrkk oonn aa 22..44 hhoosstt 3900 13.8. TUN/TAP networking doesn't work on a 2.4 host
3901 3901
3902 There are a couple of problems which were 3902 There are a couple of problems which were
3903 <http://www.geocrawler.com/lists/3/SourceForge/597/0/> name="pointed 3903 <http://www.geocrawler.com/lists/3/SourceForge/597/0/> name="pointed
3904 out"> by Tim Robinson <timro at trkr dot net> 3904 out"> by Tim Robinson <timro at trkr dot net>
3905 3905
3906 +o It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier. 3906 o It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
3907 The fix is to upgrade to something more recent and then read the 3907 The fix is to upgrade to something more recent and then read the
3908 next item. 3908 next item.
3909 3909
3910 +o If you see 3910 o If you see
3911 3911
3912 3912
3913 File descriptor in bad state 3913 File descriptor in bad state
@@ -3921,8 +3921,8 @@
3921 3921
3922 3922
3923 3923
3924 1133..99.. YYoouu ccaann nneettwwoorrkk ttoo tthhee hhoosstt bbuutt nnoott ttoo ootthheerr mmaacchhiinneess oonn tthhee 3924 13.9. You can network to the host but not to other machines on the
3925 nneett 3925 net
3926 3926
3927 If you can connect to the host, and the host can connect to UML, but 3927 If you can connect to the host, and the host can connect to UML, but
3928 you cannot connect to any other machines, then you may need to enable 3928 you cannot connect to any other machines, then you may need to enable
@@ -3972,7 +3972,7 @@
3972 3972
3973 3973
3974 3974
3975 1133..1100.. II hhaavvee nnoo rroooott aanndd II wwaanntt ttoo ssccrreeaamm 3975 13.10. I have no root and I want to scream
3976 3976
3977 Thanks to Birgit Wahlich for telling me about this strange one. It 3977 Thanks to Birgit Wahlich for telling me about this strange one. It
3978 turns out that there's a limit of six environment variables on the 3978 turns out that there's a limit of six environment variables on the
@@ -3987,7 +3987,7 @@
3987 3987
3988 3988
3989 3989
3990 1133..1111.. UUMMLL bbuuiilldd ccoonnfflliicctt bbeettwweeeenn ppttrraaccee..hh aanndd uuccoonntteexxtt..hh 3990 13.11. UML build conflict between ptrace.h and ucontext.h
3991 3991
3992 On some older systems, /usr/include/asm/ptrace.h and 3992 On some older systems, /usr/include/asm/ptrace.h and
3993 /usr/include/sys/ucontext.h define the same names. So, when they're 3993 /usr/include/sys/ucontext.h define the same names. So, when they're
@@ -4007,7 +4007,7 @@
4007 4007
4008 4008
4009 4009
4010 1133..1122.. TThhee UUMMLL BBooggooMMiippss iiss eexxaaccttllyy hhaallff tthhee hhoosstt''ss BBooggooMMiippss 4010 13.12. The UML BogoMips is exactly half the host's BogoMips
4011 4011
4012 On i386 kernels, there are two ways of running the loop that is used 4012 On i386 kernels, there are two ways of running the loop that is used
4013 to calculate the BogoMips rating, using the TSC if it's there or using 4013 to calculate the BogoMips rating, using the TSC if it's there or using
@@ -4019,7 +4019,7 @@
4019 4019
4020 4020
4021 4021
4022 1133..1133.. WWhheenn yyoouu rruunn UUMMLL,, iitt iimmmmeeddiiaatteellyy sseeggffaauullttss 4022 13.13. When you run UML, it immediately segfaults
4023 4023
4024 If the host is configured with the 2G/2G address space split, that's 4024 If the host is configured with the 2G/2G address space split, that's
4025 why. See ``UML on 2G/2G hosts'' for the details on getting UML to 4025 why. See ``UML on 2G/2G hosts'' for the details on getting UML to
@@ -4027,7 +4027,7 @@
4027 4027
4028 4028
4029 4029
4030 1133..1144.. xxtteerrmmss aappppeeaarr,, tthheenn iimmmmeeddiiaatteellyy ddiissaappppeeaarr 4030 13.14. xterms appear, then immediately disappear
4031 4031
4032 If you're running an up to date kernel with an old release of 4032 If you're running an up to date kernel with an old release of
4033 uml_utilities, the port-helper program will not work properly, so 4033 uml_utilities, the port-helper program will not work properly, so
@@ -4039,7 +4039,7 @@
4039 4039
4040 4040
4041 4041
4042 1133..1155.. AAnnyy ootthheerr ppaanniicc,, hhaanngg,, oorr ssttrraannggee bbeehhaavviioorr 4042 13.15. Any other panic, hang, or strange behavior
4043 4043
4044 If you're seeing truly strange behavior, such as hangs or panics that 4044 If you're seeing truly strange behavior, such as hangs or panics that
4045 happen in random places, or you try running the debugger to see what's 4045 happen in random places, or you try running the debugger to see what's
@@ -4059,7 +4059,7 @@
4059 4059
4060 If you want to be super-helpful, read ``Diagnosing Problems'' and 4060 If you want to be super-helpful, read ``Diagnosing Problems'' and
4061 follow the instructions contained therein. 4061 follow the instructions contained therein.
4062 1144.. DDiiaaggnnoossiinngg PPrroobblleemmss 4062 14. Diagnosing Problems
4063 4063
4064 4064
4065 If you get UML to crash, hang, or otherwise misbehave, you should 4065 If you get UML to crash, hang, or otherwise misbehave, you should
@@ -4078,7 +4078,7 @@
4078 ``Kernel debugging'' UML first. 4078 ``Kernel debugging'' UML first.
4079 4079
4080 4080
4081 1144..11.. CCaassee 11 :: NNoorrmmaall kkeerrnneell ppaanniiccss 4081 14.1. Case 1 : Normal kernel panics
4082 4082
4083 The most common case is for a normal thread to panic. To debug this, 4083 The most common case is for a normal thread to panic. To debug this,
4084 you will need to run it under the debugger (add 'debug' to the command 4084 you will need to run it under the debugger (add 'debug' to the command
@@ -4128,7 +4128,7 @@
4128 to get that information from the faulting ip. 4128 to get that information from the faulting ip.
4129 4129
4130 4130
4131 1144..22.. CCaassee 22 :: TTrraacciinngg tthhrreeaadd ppaanniiccss 4131 14.2. Case 2 : Tracing thread panics
4132 4132
4133 The less common and more painful case is when the tracing thread 4133 The less common and more painful case is when the tracing thread
4134 panics. In this case, the kernel debugger will be useless because it 4134 panics. In this case, the kernel debugger will be useless because it
@@ -4161,7 +4161,7 @@
4161 backtrace in and wait for our crack debugging team to fix the problem. 4161 backtrace in and wait for our crack debugging team to fix the problem.
4162 4162
4163 4163
4164 1144..33.. CCaassee 33 :: TTrraacciinngg tthhrreeaadd ppaanniiccss ccaauusseedd bbyy ootthheerr tthhrreeaaddss 4164 14.3. Case 3 : Tracing thread panics caused by other threads
4165 4165
4166 However, there are cases where the misbehavior of another thread 4166 However, there are cases where the misbehavior of another thread
4167 caused the problem. The most common panic of this type is: 4167 caused the problem. The most common panic of this type is:
@@ -4227,7 +4227,7 @@
4227 4227
4228 4228
4229 4229
4230 1144..44.. CCaassee 44 :: HHaannggss 4230 14.4. Case 4 : Hangs
4231 4231
4232 Hangs seem to be fairly rare, but they sometimes happen. When a hang 4232 Hangs seem to be fairly rare, but they sometimes happen. When a hang
4233 happens, we need a backtrace from the offending process. Run the 4233 happens, we need a backtrace from the offending process. Run the
@@ -4257,7 +4257,7 @@
4257 4257
4258 4258
4259 4259
4260 1155.. TThhaannkkss 4260 15. Thanks
4261 4261
4262 4262
4263 A number of people have helped this project in various ways, and this 4263 A number of people have helped this project in various ways, and this
@@ -4274,20 +4274,20 @@
4274 bookkeeping lapses and I forget about contributions. 4274 bookkeeping lapses and I forget about contributions.
4275 4275
4276 4276
4277 1155..11.. CCooddee aanndd DDooccuummeennttaattiioonn 4277 15.1. Code and Documentation
4278 4278
4279 Rusty Russell <rusty at linuxcare.com.au> - 4279 Rusty Russell <rusty at linuxcare.com.au> -
4280 4280
4281 +o wrote the HOWTO <http://user-mode- 4281 o wrote the HOWTO <http://user-mode-
4282 linux.sourceforge.net/UserModeLinux-HOWTO.html> 4282 linux.sourceforge.net/UserModeLinux-HOWTO.html>
4283 4283
4284 +o prodded me into making this project official and putting it on 4284 o prodded me into making this project official and putting it on
4285 SourceForge 4285 SourceForge
4286 4286
4287 +o came up with the way cool UML logo <http://user-mode- 4287 o came up with the way cool UML logo <http://user-mode-
4288 linux.sourceforge.net/uml-small.png> 4288 linux.sourceforge.net/uml-small.png>
4289 4289
4290 +o redid the config process 4290 o redid the config process
4291 4291
4292 4292
4293 Peter Moulder <reiter at netspace.net.au> - Fixed my config and build 4293 Peter Moulder <reiter at netspace.net.au> - Fixed my config and build
@@ -4296,18 +4296,18 @@
4296 4296
4297 Bill Stearns <wstearns at pobox.com> - 4297 Bill Stearns <wstearns at pobox.com> -
4298 4298
4299 +o HOWTO updates 4299 o HOWTO updates
4300 4300
4301 +o lots of bug reports 4301 o lots of bug reports
4302 4302
4303 +o lots of testing 4303 o lots of testing
4304 4304
4305 +o dedicated a box (uml.ists.dartmouth.edu) to support UML development 4305 o dedicated a box (uml.ists.dartmouth.edu) to support UML development
4306 4306
4307 +o wrote the mkrootfs script, which allows bootable filesystems of 4307 o wrote the mkrootfs script, which allows bootable filesystems of
4308 RPM-based distributions to be cranked out 4308 RPM-based distributions to be cranked out
4309 4309
4310 +o cranked out a large number of filesystems with said script 4310 o cranked out a large number of filesystems with said script
4311 4311
4312 4312
4313 Jim Leu <jleu at mindspring.com> - Wrote the virtual ethernet driver 4313 Jim Leu <jleu at mindspring.com> - Wrote the virtual ethernet driver
@@ -4375,176 +4375,176 @@
4375 4375
4376 David Coulson <http://davidcoulson.net> - 4376 David Coulson <http://davidcoulson.net> -
4377 4377
4378 +o Set up the usermodelinux.org <http://usermodelinux.org> site, 4378 o Set up the usermodelinux.org <http://usermodelinux.org> site,
4379 which is a great way of keeping the UML user community on top of 4379 which is a great way of keeping the UML user community on top of
4380 UML goings-on. 4380 UML goings-on.
4381 4381
4382 +o Site documentation and updates 4382 o Site documentation and updates
4383 4383
4384 +o Nifty little UML management daemon UMLd 4384 o Nifty little UML management daemon UMLd
4385 <http://uml.openconsultancy.com/umld/> 4385 <http://uml.openconsultancy.com/umld/>
4386 4386
4387 +o Lots of testing and bug reports 4387 o Lots of testing and bug reports
4388 4388
4389 4389
4390 4390
4391 4391
4392 1155..22.. FFlluusshhiinngg oouutt bbuuggss 4392 15.2. Flushing out bugs
4393 4393
4394 4394
4395 4395
4396 +o Yuri Pudgorodsky 4396 o Yuri Pudgorodsky
4397 4397
4398 +o Gerald Britton 4398 o Gerald Britton
4399 4399
4400 +o Ian Wehrman 4400 o Ian Wehrman
4401 4401
4402 +o Gord Lamb 4402 o Gord Lamb
4403 4403
4404 +o Eugene Koontz 4404 o Eugene Koontz
4405 4405
4406 +o John H. Hartman 4406 o John H. Hartman
4407 4407
4408 +o Anders Karlsson 4408 o Anders Karlsson
4409 4409
4410 +o Daniel Phillips 4410 o Daniel Phillips
4411 4411
4412 +o John Fremlin 4412 o John Fremlin
4413 4413
4414 +o Rainer Burgstaller 4414 o Rainer Burgstaller
4415 4415
4416 +o James Stevenson 4416 o James Stevenson
4417 4417
4418 +o Matt Clay 4418 o Matt Clay
4419 4419
4420 +o Cliff Jefferies 4420 o Cliff Jefferies
4421 4421
4422 +o Geoff Hoff 4422 o Geoff Hoff
4423 4423
4424 +o Lennert Buytenhek 4424 o Lennert Buytenhek
4425 4425
4426 +o Al Viro 4426 o Al Viro
4427 4427
4428 +o Frank Klingenhoefer 4428 o Frank Klingenhoefer
4429 4429
4430 +o Livio Baldini Soares 4430 o Livio Baldini Soares
4431 4431
4432 +o Jon Burgess 4432 o Jon Burgess
4433 4433
4434 +o Petru Paler 4434 o Petru Paler
4435 4435
4436 +o Paul 4436 o Paul
4437 4437
4438 +o Chris Reahard 4438 o Chris Reahard
4439 4439
4440 +o Sverker Nilsson 4440 o Sverker Nilsson
4441 4441
4442 +o Gong Su 4442 o Gong Su
4443 4443
4444 +o johan verrept 4444 o johan verrept
4445 4445
4446 +o Bjorn Eriksson 4446 o Bjorn Eriksson
4447 4447
4448 +o Lorenzo Allegrucci 4448 o Lorenzo Allegrucci
4449 4449
4450 +o Muli Ben-Yehuda 4450 o Muli Ben-Yehuda
4451 4451
4452 +o David Mansfield 4452 o David Mansfield
4453 4453
4454 +o Howard Goff 4454 o Howard Goff
4455 4455
4456 +o Mike Anderson 4456 o Mike Anderson
4457 4457
4458 +o John Byrne 4458 o John Byrne
4459 4459
4460 +o Sapan J. Batia 4460 o Sapan J. Batia
4461 4461
4462 +o Iris Huang 4462 o Iris Huang
4463 4463
4464 +o Jan Hudec 4464 o Jan Hudec
4465 4465
4466 +o Voluspa 4466 o Voluspa
4467 4467
4468 4468
4469 4469
4470 4470
4471 1155..33.. BBuugglleettss aanndd cclleeaann--uuppss 4471 15.3. Buglets and clean-ups
4472 4472
4473 4473
4474 4474
4475 +o Dave Zarzycki 4475 o Dave Zarzycki
4476 4476
4477 +o Adam Lazur 4477 o Adam Lazur
4478 4478
4479 +o Boria Feigin 4479 o Boria Feigin
4480 4480
4481 +o Brian J. Murrell 4481 o Brian J. Murrell
4482 4482
4483 +o JS 4483 o JS
4484 4484
4485 +o Roman Zippel 4485 o Roman Zippel
4486 4486
4487 +o Wil Cooley 4487 o Wil Cooley
4488 4488
4489 +o Ayelet Shemesh 4489 o Ayelet Shemesh
4490 4490
4491 +o Will Dyson 4491 o Will Dyson
4492 4492
4493 +o Sverker Nilsson 4493 o Sverker Nilsson
4494 4494
4495 +o dvorak 4495 o dvorak
4496 4496
4497 +o v.naga srinivas 4497 o v.naga srinivas
4498 4498
4499 +o Shlomi Fish 4499 o Shlomi Fish
4500 4500
4501 +o Roger Binns 4501 o Roger Binns
4502 4502
4503 +o johan verrept 4503 o johan verrept
4504 4504
4505 +o MrChuoi 4505 o MrChuoi
4506 4506
4507 +o Peter Cleve 4507 o Peter Cleve
4508 4508
4509 +o Vincent Guffens 4509 o Vincent Guffens
4510 4510
4511 +o Nathan Scott 4511 o Nathan Scott
4512 4512
4513 +o Patrick Caulfield 4513 o Patrick Caulfield
4514 4514
4515 +o jbearce 4515 o jbearce
4516 4516
4517 +o Catalin Marinas 4517 o Catalin Marinas
4518 4518
4519 +o Shane Spencer 4519 o Shane Spencer
4520 4520
4521 +o Zou Min 4521 o Zou Min
4522 4522
4523 4523
4524 +o Ryan Boder 4524 o Ryan Boder
4525 4525
4526 +o Lorenzo Colitti 4526 o Lorenzo Colitti
4527 4527
4528 +o Gwendal Grignou 4528 o Gwendal Grignou
4529 4529
4530 +o Andre' Breiler 4530 o Andre' Breiler
4531 4531
4532 +o Tsutomu Yasuda 4532 o Tsutomu Yasuda
4533 4533
4534 4534
4535 4535
4536 1155..44.. CCaassee SSttuuddiieess 4536 15.4. Case Studies
4537 4537
4538 4538
4539 +o Jon Wright 4539 o Jon Wright
4540 4540
4541 +o William McEwan 4541 o William McEwan
4542 4542
4543 +o Michael Richardson 4543 o Michael Richardson
4544 4544
4545 4545
4546 4546
4547 1155..55.. OOtthheerr ccoonnttrriibbuuttiioonnss 4547 15.5. Other contributions
4548 4548
4549 4549
4550 Bill Carr <Bill.Carr at compaq.com> made the Red Hat mkrootfs script 4550 Bill Carr <Bill.Carr at compaq.com> made the Red Hat mkrootfs script
diff --git a/MAINTAINERS b/MAINTAINERS
index c406f9ba1923..479beaa39131 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6683,7 +6683,6 @@ F: drivers/net/ethernet/8390/ne-h8300.c
6683 6683
6684UDF FILESYSTEM 6684UDF FILESYSTEM
6685M: Jan Kara <jack@suse.cz> 6685M: Jan Kara <jack@suse.cz>
6686W: http://linux-udf.sourceforge.net
6687S: Maintained 6686S: Maintained
6688F: Documentation/filesystems/udf.txt 6687F: Documentation/filesystems/udf.txt
6689F: fs/udf/ 6688F: fs/udf/
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 6fe874fc5f8e..481f4f76f664 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -108,9 +108,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode)
108 ret->i_gid = hypfs_info->gid; 108 ret->i_gid = hypfs_info->gid;
109 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; 109 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
110 if (mode & S_IFDIR) 110 if (mode & S_IFDIR)
111 ret->i_nlink = 2; 111 set_nlink(ret, 2);
112 else
113 ret->i_nlink = 1;
114 } 112 }
115 return ret; 113 return ret;
116} 114}
@@ -361,7 +359,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
361 } else if (mode & S_IFDIR) { 359 } else if (mode & S_IFDIR) {
362 inode->i_op = &simple_dir_inode_operations; 360 inode->i_op = &simple_dir_inode_operations;
363 inode->i_fop = &simple_dir_operations; 361 inode->i_fop = &simple_dir_operations;
364 parent->d_inode->i_nlink++; 362 inc_nlink(parent->d_inode);
365 } else 363 } else
366 BUG(); 364 BUG();
367 inode->i_private = data; 365 inode->i_private = data;
diff --git a/arch/um/Kconfig.char b/arch/um/Kconfig.char
index 70dabd1e0652..b9d7c4276682 100644
--- a/arch/um/Kconfig.char
+++ b/arch/um/Kconfig.char
@@ -1,5 +1,4 @@
1 1menu "UML Character Devices"
2menu "Character Devices"
3 2
4config STDERR_CONSOLE 3config STDERR_CONSOLE
5 bool "stderr console" 4 bool "stderr console"
@@ -105,92 +104,6 @@ config SSL_CHAN
105 this if you expect the UML that you build to be run in environments 104 this if you expect the UML that you build to be run in environments
106 which don't have a set of /dev/pty* devices. 105 which don't have a set of /dev/pty* devices.
107 106
108config UNIX98_PTYS
109 bool "Unix98 PTY support"
110 help
111 A pseudo terminal (PTY) is a software device consisting of two
112 halves: a master and a slave. The slave device behaves identical to
113 a physical terminal; the master device is used by a process to
114 read data from and write data to the slave, thereby emulating a
115 terminal. Typical programs for the master side are telnet servers
116 and xterms.
117
118 Linux has traditionally used the BSD-like names /dev/ptyxx for
119 masters and /dev/ttyxx for slaves of pseudo terminals. This scheme
120 has a number of problems. The GNU C library glibc 2.1 and later,
121 however, supports the Unix98 naming standard: in order to acquire a
122 pseudo terminal, a process opens /dev/ptmx; the number of the pseudo
123 terminal is then made available to the process and the pseudo
124 terminal slave can be accessed as /dev/pts/<number>. What was
125 traditionally /dev/ttyp2 will then be /dev/pts/2, for example.
126
127 All modern Linux systems use the Unix98 ptys. Say Y unless
128 you're on an embedded system and want to conserve memory.
129
130config LEGACY_PTYS
131 bool "Legacy (BSD) PTY support"
132 default y
133 help
134 A pseudo terminal (PTY) is a software device consisting of two
135 halves: a master and a slave. The slave device behaves identical to
136 a physical terminal; the master device is used by a process to
137 read data from and write data to the slave, thereby emulating a
138 terminal. Typical programs for the master side are telnet servers
139 and xterms.
140
141 Linux has traditionally used the BSD-like names /dev/ptyxx
142 for masters and /dev/ttyxx for slaves of pseudo
143 terminals. This scheme has a number of problems, including
144 security. This option enables these legacy devices; on most
145 systems, it is safe to say N.
146
147config RAW_DRIVER
148 tristate "RAW driver (/dev/raw/rawN)"
149 depends on BLOCK
150 help
151 The raw driver permits block devices to be bound to /dev/raw/rawN.
152 Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O.
153 See the raw(8) manpage for more details.
154
155 Applications should preferably open the device (eg /dev/hda1)
156 with the O_DIRECT flag.
157
158config MAX_RAW_DEVS
159 int "Maximum number of RAW devices to support (1-8192)"
160 depends on RAW_DRIVER
161 default "256"
162 help
163 The maximum number of RAW devices that are supported.
164 Default is 256. Increase this number in case you need lots of
165 raw devices.
166
167config LEGACY_PTY_COUNT
168 int "Maximum number of legacy PTY in use"
169 depends on LEGACY_PTYS
170 default "256"
171 help
172 The maximum number of legacy PTYs that can be used at any one time.
173 The default is 256, and should be more than enough. Embedded
174 systems may want to reduce this to save memory.
175
176 When not in use, each legacy PTY occupies 12 bytes on 32-bit
177 architectures and 24 bytes on 64-bit architectures.
178
179config WATCHDOG
180 bool "Watchdog Timer Support"
181
182config WATCHDOG_NOWAYOUT
183 bool "Disable watchdog shutdown on close"
184 depends on WATCHDOG
185
186config SOFT_WATCHDOG
187 tristate "Software Watchdog"
188 depends on WATCHDOG
189
190config UML_WATCHDOG
191 tristate "UML watchdog"
192 depends on WATCHDOG
193
194config UML_SOUND 107config UML_SOUND
195 tristate "Sound support" 108 tristate "Sound support"
196 help 109 help
@@ -211,29 +124,4 @@ config HOSTAUDIO
211 tristate 124 tristate
212 default UML_SOUND 125 default UML_SOUND
213 126
214#It is selected elsewhere, so kconfig would warn without this.
215config HW_RANDOM
216 tristate
217 default n
218
219config UML_RANDOM
220 tristate "Hardware random number generator"
221 help
222 This option enables UML's "hardware" random number generator. It
223 attaches itself to the host's /dev/random, supplying as much entropy
224 as the host has, rather than the small amount the UML gets from its
225 own drivers. It registers itself as a standard hardware random number
226 generator, major 10, minor 183, and the canonical device name is
227 /dev/hwrng.
228 The way to make use of this is to install the rng-tools package
229 (check your distro, or download from
230 http://sourceforge.net/projects/gkernel/). rngd periodically reads
231 /dev/hwrng and injects the entropy into /dev/random.
232
233config MMAPPER
234 tristate "iomem emulation driver"
235 help
236 This driver allows a host file to be used as emulated IO memory inside
237 UML.
238
239endmenu 127endmenu
diff --git a/arch/um/Kconfig.rest b/arch/um/Kconfig.rest
index 0ccad0ff6d6e..567eb5fc21df 100644
--- a/arch/um/Kconfig.rest
+++ b/arch/um/Kconfig.rest
@@ -2,20 +2,14 @@ source "init/Kconfig"
2 2
3source "kernel/Kconfig.freezer" 3source "kernel/Kconfig.freezer"
4 4
5source "drivers/block/Kconfig"
6
7source "arch/um/Kconfig.char" 5source "arch/um/Kconfig.char"
8 6
9source "drivers/base/Kconfig" 7source "drivers/Kconfig"
10 8
11source "net/Kconfig" 9source "net/Kconfig"
12 10
13source "arch/um/Kconfig.net" 11source "arch/um/Kconfig.net"
14 12
15source "drivers/net/Kconfig"
16
17source "drivers/connector/Kconfig"
18
19source "fs/Kconfig" 13source "fs/Kconfig"
20 14
21source "security/Kconfig" 15source "security/Kconfig"
@@ -24,19 +18,4 @@ source "crypto/Kconfig"
24 18
25source "lib/Kconfig" 19source "lib/Kconfig"
26 20
27source "drivers/scsi/Kconfig"
28
29source "drivers/md/Kconfig"
30
31if BROKEN
32 source "drivers/mtd/Kconfig"
33endif
34
35source "drivers/leds/Kconfig"
36
37#This is just to shut up some Kconfig warnings, so no prompt.
38config INPUT
39 tristate
40 default n
41
42source "arch/um/Kconfig.debug" 21source "arch/um/Kconfig.debug"
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um
index b5e675e370c6..70fd690964e4 100644
--- a/arch/um/Kconfig.um
+++ b/arch/um/Kconfig.um
@@ -148,5 +148,11 @@ config KERNEL_STACK_ORDER
148 be 1 << order pages. The default is OK unless you're running Valgrind 148 be 1 << order pages. The default is OK unless you're running Valgrind
149 on UML, in which case, set this to 3. 149 on UML, in which case, set this to 3.
150 150
151config MMAPPER
152 tristate "iomem emulation driver"
153 help
154 This driver allows a host file to be used as emulated IO memory inside
155 UML.
156
151config NO_DMA 157config NO_DMA
152 def_bool y 158 def_bool y
diff --git a/arch/um/Makefile b/arch/um/Makefile
index c0f712cc7c5f..7730af6ec13f 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -20,15 +20,27 @@ core-y += $(ARCH_DIR)/kernel/ \
20 20
21MODE_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include/shared/skas 21MODE_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include/shared/skas
22 22
23HEADER_ARCH := $(SUBARCH)
24
25# Additional ARCH settings for x86
26ifeq ($(SUBARCH),i386)
27 HEADER_ARCH := x86
28endif
29ifeq ($(SUBARCH),x86_64)
30 HEADER_ARCH := x86
31endif
32
33HOST_DIR := arch/$(HEADER_ARCH)
34
23include $(srctree)/$(ARCH_DIR)/Makefile-skas 35include $(srctree)/$(ARCH_DIR)/Makefile-skas
36include $(srctree)/$(HOST_DIR)/Makefile.um
37
38core-y += $(HOST_DIR)/um/
24 39
25SHARED_HEADERS := $(ARCH_DIR)/include/shared 40SHARED_HEADERS := $(ARCH_DIR)/include/shared
26ARCH_INCLUDE := -I$(srctree)/$(SHARED_HEADERS) 41ARCH_INCLUDE := -I$(srctree)/$(SHARED_HEADERS)
27ARCH_INCLUDE += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)/shared 42ARCH_INCLUDE += -I$(srctree)/$(HOST_DIR)/um/shared
28ifneq ($(KBUILD_SRC),) 43KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um
29ARCH_INCLUDE += -I$(SHARED_HEADERS)
30endif
31KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)
32 44
33# -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so 45# -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so
34# named - it's a common symbol in libpcap, so we get a binary which crashes. 46# named - it's a common symbol in libpcap, so we get a binary which crashes.
@@ -47,14 +59,12 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE)
47 59
48USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\ 60USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
49 $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \ 61 $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \
50 $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 62 $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include
51
52include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
53 63
54#This will adjust *FLAGS accordingly to the platform. 64#This will adjust *FLAGS accordingly to the platform.
55include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS) 65include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
56 66
57KBUILD_CPPFLAGS += -I$(srctree)/arch/$(HEADER_ARCH)/include 67KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include
58 68
59# -Derrno=kernel_errno - This turns all kernel references to errno into 69# -Derrno=kernel_errno - This turns all kernel references to errno into
60# kernel_errno to separate them from the libc errno. This allows -fno-common 70# kernel_errno to separate them from the libc errno. This allows -fno-common
@@ -84,10 +94,9 @@ define archhelp
84 echo ' find in the kernel root.' 94 echo ' find in the kernel root.'
85endef 95endef
86 96
87KBUILD_KCONFIG := arch/um/Kconfig.$(HEADER_ARCH) 97KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig
88 98
89archprepare: $(SHARED_HEADERS)/user_constants.h 99archprepare: include/generated/user_constants.h
90archprepare: $(SHARED_HEADERS)/kern_constants.h
91 100
92LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static 101LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
93LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib 102LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
@@ -118,9 +127,7 @@ endef
118 127
119# When cleaning we don't include .config, so we don't include 128# When cleaning we don't include .config, so we don't include
120# TT or skas makefiles and don't clean skas_ptregs.h. 129# TT or skas makefiles and don't clean skas_ptregs.h.
121CLEAN_FILES += linux x.i gmon.out \ 130CLEAN_FILES += linux x.i gmon.out
122 $(SHARED_HEADERS)/user_constants.h \
123 $(SHARED_HEADERS)/kern_constants.h
124 131
125archclean: 132archclean:
126 @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ 133 @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
@@ -128,8 +135,8 @@ archclean:
128 135
129# Generated files 136# Generated files
130 137
131$(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s: FORCE 138$(HOST_DIR)/um/user-offsets.s: FORCE
132 $(Q)$(MAKE) $(build)=$(ARCH_DIR)/sys-$(SUBARCH) $@ 139 $(Q)$(MAKE) $(build)=$(HOST_DIR)/um $@
133 140
134define filechk_gen-asm-offsets 141define filechk_gen-asm-offsets
135 (set -e; \ 142 (set -e; \
@@ -144,11 +151,7 @@ define filechk_gen-asm-offsets
144 echo ""; ) 151 echo ""; )
145endef 152endef
146 153
147$(SHARED_HEADERS)/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s 154include/generated/user_constants.h: $(HOST_DIR)/um/user-offsets.s
148 $(call filechk,gen-asm-offsets) 155 $(call filechk,gen-asm-offsets)
149 156
150$(SHARED_HEADERS)/kern_constants.h: 157export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS DEV_NULL_PATH
151 $(Q)mkdir -p $(dir $@)
152 $(Q)echo '#include "../../../../include/generated/asm-offsets.h"' >$@
153
154export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH
diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64
deleted file mode 100644
index a9cd7e77a7ab..000000000000
--- a/arch/um/Makefile-x86_64
+++ /dev/null
@@ -1,26 +0,0 @@
1# Copyright 2003 - 2004 Pathscale, Inc
2# Released under the GPL
3
4core-y += arch/um/sys-x86_64/ arch/x86/crypto/
5START := 0x60000000
6
7_extra_flags_ = -fno-builtin -m64
8
9KBUILD_CFLAGS += $(_extra_flags_)
10
11CHECKFLAGS += -m64 -D__x86_64__
12KBUILD_AFLAGS += -m64
13LDFLAGS += -m elf_x86_64
14KBUILD_CPPFLAGS += -m64
15
16ELF_ARCH := i386:x86-64
17ELF_FORMAT := elf64-x86-64
18HEADER_ARCH := x86
19
20# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
21
22LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
23LINK-y += -m64
24
25# Do unit-at-a-time unconditionally on x86_64, following the host
26KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
diff --git a/arch/um/include/shared/chan_kern.h b/arch/um/drivers/chan.h
index 1e651457e049..8df0fd9024dc 100644
--- a/arch/um/include/shared/chan_kern.h
+++ b/arch/um/drivers/chan.h
@@ -6,9 +6,9 @@
6#ifndef __CHAN_KERN_H__ 6#ifndef __CHAN_KERN_H__
7#define __CHAN_KERN_H__ 7#define __CHAN_KERN_H__
8 8
9#include "linux/tty.h" 9#include <linux/tty.h>
10#include "linux/list.h" 10#include <linux/list.h>
11#include "linux/console.h" 11#include <linux/console.h>
12#include "chan_user.h" 12#include "chan_user.h"
13#include "line.h" 13#include "line.h"
14 14
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index d4191fe1cede..420e2c800799 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -6,7 +6,7 @@
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <linux/tty.h> 7#include <linux/tty.h>
8#include <linux/tty_flip.h> 8#include <linux/tty_flip.h>
9#include "chan_kern.h" 9#include "chan.h"
10#include "os.h" 10#include "os.h"
11 11
12#ifdef CONFIG_NOCONFIG_CHAN 12#ifdef CONFIG_NOCONFIG_CHAN
@@ -358,11 +358,11 @@ int chan_window_size(struct list_head *chans, unsigned short *rows_out,
358 return 0; 358 return 0;
359} 359}
360 360
361static void free_one_chan(struct chan *chan, int delay_free_irq) 361static void free_one_chan(struct chan *chan)
362{ 362{
363 list_del(&chan->list); 363 list_del(&chan->list);
364 364
365 close_one_chan(chan, delay_free_irq); 365 close_one_chan(chan, 0);
366 366
367 if (chan->ops->free != NULL) 367 if (chan->ops->free != NULL)
368 (*chan->ops->free)(chan->data); 368 (*chan->ops->free)(chan->data);
@@ -372,14 +372,14 @@ static void free_one_chan(struct chan *chan, int delay_free_irq)
372 kfree(chan); 372 kfree(chan);
373} 373}
374 374
375static void free_chan(struct list_head *chans, int delay_free_irq) 375static void free_chan(struct list_head *chans)
376{ 376{
377 struct list_head *ele, *next; 377 struct list_head *ele, *next;
378 struct chan *chan; 378 struct chan *chan;
379 379
380 list_for_each_safe(ele, next, chans) { 380 list_for_each_safe(ele, next, chans) {
381 chan = list_entry(ele, struct chan, list); 381 chan = list_entry(ele, struct chan, list);
382 free_one_chan(chan, delay_free_irq); 382 free_one_chan(chan);
383 } 383 }
384} 384}
385 385
@@ -547,7 +547,7 @@ int parse_chan_pair(char *str, struct line *line, int device,
547 char *in, *out; 547 char *in, *out;
548 548
549 if (!list_empty(chans)) { 549 if (!list_empty(chans)) {
550 free_chan(chans, 0); 550 free_chan(chans);
551 INIT_LIST_HEAD(chans); 551 INIT_LIST_HEAD(chans);
552 } 552 }
553 553
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index cfeb3f4a44af..f180813ce2c7 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -11,10 +11,8 @@
11#include <termios.h> 11#include <termios.h>
12#include <sys/ioctl.h> 12#include <sys/ioctl.h>
13#include "chan_user.h" 13#include "chan_user.h"
14#include "kern_constants.h"
15#include "os.h" 14#include "os.h"
16#include "um_malloc.h" 15#include "um_malloc.h"
17#include "user.h"
18 16
19void generic_close(int fd, void *unused) 17void generic_close(int fd, void *unused)
20{ 18{
@@ -283,7 +281,12 @@ void register_winch(int fd, struct tty_struct *tty)
283 return; 281 return;
284 282
285 pid = tcgetpgrp(fd); 283 pid = tcgetpgrp(fd);
286 if (!is_skas_winch(pid, fd, tty) && (pid == -1)) { 284 if (is_skas_winch(pid, fd, tty)) {
285 register_winch_irq(-1, fd, -1, tty, 0);
286 return;
287 }
288
289 if (pid == -1) {
287 thread = winch_tramp(fd, tty, &thread_fd, &stack); 290 thread = winch_tramp(fd, tty, &thread_fd, &stack);
288 if (thread < 0) 291 if (thread < 0)
289 return; 292 return;
diff --git a/arch/um/include/shared/chan_user.h b/arch/um/drivers/chan_user.h
index 9b9ced85b703..9b9ced85b703 100644
--- a/arch/um/include/shared/chan_user.h
+++ b/arch/um/drivers/chan_user.h
diff --git a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h
index f5701fd2ef90..7f2ed0b8824a 100644
--- a/arch/um/drivers/cow_sys.h
+++ b/arch/um/drivers/cow_sys.h
@@ -3,7 +3,6 @@
3 3
4#include "kern_util.h" 4#include "kern_util.h"
5#include "os.h" 5#include "os.h"
6#include "user.h"
7#include "um_malloc.h" 6#include "um_malloc.h"
8 7
9static inline void *cow_malloc(int size) 8static inline void *cow_malloc(int size)
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index f8e85e0bdace..a4fd7bc14af7 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -17,7 +17,6 @@
17#include "net_user.h" 17#include "net_user.h"
18#include "os.h" 18#include "os.h"
19#include "um_malloc.h" 19#include "um_malloc.h"
20#include "user.h"
21 20
22enum request_type { REQ_NEW_CONTROL }; 21enum request_type { REQ_NEW_CONTROL };
23 22
diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c
index f5a981a16240..5b81d2574415 100644
--- a/arch/um/drivers/fd.c
+++ b/arch/um/drivers/fd.c
@@ -9,10 +9,8 @@
9#include <errno.h> 9#include <errno.h>
10#include <termios.h> 10#include <termios.h>
11#include "chan_user.h" 11#include "chan_user.h"
12#include "kern_constants.h"
13#include "os.h" 12#include "os.h"
14#include "um_malloc.h" 13#include "um_malloc.h"
15#include "user.h"
16 14
17struct fd_chan { 15struct fd_chan {
18 int fd; 16 int fd;
diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
index 84dce3fc590c..0345d6206d40 100644
--- a/arch/um/drivers/harddog_user.c
+++ b/arch/um/drivers/harddog_user.c
@@ -7,7 +7,6 @@
7#include <unistd.h> 7#include <unistd.h>
8#include <errno.h> 8#include <errno.h>
9#include "os.h" 9#include "os.h"
10#include "user.h"
11 10
12struct dog_data { 11struct dog_data {
13 int stdin; 12 int stdin;
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 364c8a15c4c3..c1cf2206b84b 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -7,7 +7,7 @@
7#include "linux/kd.h" 7#include "linux/kd.h"
8#include "linux/sched.h" 8#include "linux/sched.h"
9#include "linux/slab.h" 9#include "linux/slab.h"
10#include "chan_kern.h" 10#include "chan.h"
11#include "irq_kern.h" 11#include "irq_kern.h"
12#include "irq_user.h" 12#include "irq_user.h"
13#include "kern_util.h" 13#include "kern_util.h"
diff --git a/arch/um/include/shared/line.h b/arch/um/drivers/line.h
index 63df3ca02ac2..63df3ca02ac2 100644
--- a/arch/um/include/shared/line.h
+++ b/arch/um/drivers/line.h
diff --git a/arch/um/include/shared/mconsole.h b/arch/um/drivers/mconsole.h
index c139ae1d6826..c139ae1d6826 100644
--- a/arch/um/include/shared/mconsole.h
+++ b/arch/um/drivers/mconsole.h
diff --git a/arch/um/include/shared/mconsole_kern.h b/arch/um/drivers/mconsole_kern.h
index d2fe07e78958..d2fe07e78958 100644
--- a/arch/um/include/shared/mconsole_kern.h
+++ b/arch/um/drivers/mconsole_kern.h
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index f8cf4c8bedef..99209826adb1 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -10,9 +10,7 @@
10#include <sys/socket.h> 10#include <sys/socket.h>
11#include <sys/uio.h> 11#include <sys/uio.h>
12#include <sys/un.h> 12#include <sys/un.h>
13#include "kern_constants.h"
14#include "mconsole.h" 13#include "mconsole.h"
15#include "user.h"
16 14
17static struct mconsole_command commands[] = { 15static struct mconsole_command commands[] = {
18 /* 16 /*
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index 520118888f16..05090c37fa84 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -12,10 +12,8 @@
12#include <sys/socket.h> 12#include <sys/socket.h>
13#include <sys/wait.h> 13#include <sys/wait.h>
14#include "net_user.h" 14#include "net_user.h"
15#include "kern_constants.h"
16#include "os.h" 15#include "os.h"
17#include "um_malloc.h" 16#include "um_malloc.h"
18#include "user.h"
19 17
20int tap_open_common(void *dev, char *gate_addr) 18int tap_open_common(void *dev, char *gate_addr)
21{ 19{
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
index 5f903587d69e..702a75b190ee 100644
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
@@ -9,9 +9,7 @@
9#include <asm/types.h> 9#include <asm/types.h>
10#include "net_user.h" 10#include "net_user.h"
11#include "pcap_user.h" 11#include "pcap_user.h"
12#include "kern_constants.h"
13#include "um_malloc.h" 12#include "um_malloc.h"
14#include "user.h"
15 13
16#define PCAP_FD(p) (*(int *)(p)) 14#define PCAP_FD(p) (*(int *)(p))
17 15
diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
index b49bf56a56aa..7b010b76ddf0 100644
--- a/arch/um/drivers/port_user.c
+++ b/arch/um/drivers/port_user.c
@@ -10,11 +10,9 @@
10#include <unistd.h> 10#include <unistd.h>
11#include <netinet/in.h> 11#include <netinet/in.h>
12#include "chan_user.h" 12#include "chan_user.h"
13#include "kern_constants.h"
14#include "os.h" 13#include "os.h"
15#include "port.h" 14#include "port.h"
16#include "um_malloc.h" 15#include "um_malloc.h"
17#include "user.h"
18 16
19struct port_chan { 17struct port_chan {
20 int raw; 18 int raw;
diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c
index 1113911dcb2b..cff2b75d31fd 100644
--- a/arch/um/drivers/pty.c
+++ b/arch/um/drivers/pty.c
@@ -12,10 +12,8 @@
12#include <termios.h> 12#include <termios.h>
13#include <sys/stat.h> 13#include <sys/stat.h>
14#include "chan_user.h" 14#include "chan_user.h"
15#include "kern_constants.h"
16#include "os.h" 15#include "os.h"
17#include "um_malloc.h" 16#include "um_malloc.h"
18#include "user.h"
19 17
20struct pty_chan { 18struct pty_chan {
21 void (*announce)(char *dev_name, int dev); 19 void (*announce)(char *dev_name, int dev);
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index cbacfc4e63e6..932b4d69bec2 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -11,12 +11,10 @@
11#include <string.h> 11#include <string.h>
12#include <sys/termios.h> 12#include <sys/termios.h>
13#include <sys/wait.h> 13#include <sys/wait.h>
14#include "kern_constants.h"
15#include "net_user.h" 14#include "net_user.h"
16#include "os.h" 15#include "os.h"
17#include "slip.h" 16#include "slip.h"
18#include "um_malloc.h" 17#include "um_malloc.h"
19#include "user.h"
20 18
21static int slip_user_init(void *data, void *dev) 19static int slip_user_init(void *data, void *dev)
22{ 20{
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
index a0ada8fec72a..db4adb639ff8 100644
--- a/arch/um/drivers/slirp_user.c
+++ b/arch/um/drivers/slirp_user.c
@@ -7,11 +7,9 @@
7#include <errno.h> 7#include <errno.h>
8#include <string.h> 8#include <string.h>
9#include <sys/wait.h> 9#include <sys/wait.h>
10#include "kern_constants.h"
11#include "net_user.h" 10#include "net_user.h"
12#include "os.h" 11#include "os.h"
13#include "slirp.h" 12#include "slirp.h"
14#include "user.h"
15 13
16static int slirp_user_init(void *data, void *dev) 14static int slirp_user_init(void *data, void *dev)
17{ 15{
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index f1786e64607f..9d8c20af6f80 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -12,10 +12,8 @@
12#include "linux/console.h" 12#include "linux/console.h"
13#include "asm/termbits.h" 13#include "asm/termbits.h"
14#include "asm/irq.h" 14#include "asm/irq.h"
15#include "line.h"
16#include "ssl.h" 15#include "ssl.h"
17#include "chan_kern.h" 16#include "chan.h"
18#include "kern.h"
19#include "init.h" 17#include "init.h"
20#include "irq_user.h" 18#include "irq_user.h"
21#include "mconsole_kern.h" 19#include "mconsole_kern.h"
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 49266f6108c4..088776f01908 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -20,8 +20,7 @@
20#include "asm/current.h" 20#include "asm/current.h"
21#include "asm/irq.h" 21#include "asm/irq.h"
22#include "stdio_console.h" 22#include "stdio_console.h"
23#include "line.h" 23#include "chan.h"
24#include "chan_kern.h"
25#include "irq_user.h" 24#include "irq_user.h"
26#include "mconsole_kern.h" 25#include "mconsole_kern.h"
27#include "init.h" 26#include "init.h"
diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c
index 495858a090e4..a97391f9ec54 100644
--- a/arch/um/drivers/tty.c
+++ b/arch/um/drivers/tty.c
@@ -7,10 +7,8 @@
7#include <fcntl.h> 7#include <fcntl.h>
8#include <termios.h> 8#include <termios.h>
9#include "chan_user.h" 9#include "chan_user.h"
10#include "kern_constants.h"
11#include "os.h" 10#include "os.h"
12#include "um_malloc.h" 11#include "um_malloc.h"
13#include "user.h"
14 12
15struct tty_chan { 13struct tty_chan {
16 char *dev; 14 char *dev;
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 620f5b70957d..944453a3ec99 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -46,7 +46,6 @@
46#include "asm/tlbflush.h" 46#include "asm/tlbflush.h"
47#include "mem_user.h" 47#include "mem_user.h"
48#include "kern_util.h" 48#include "kern_util.h"
49#include "kern.h"
50#include "mconsole_kern.h" 49#include "mconsole_kern.h"
51#include "init.h" 50#include "init.h"
52#include "irq_user.h" 51#include "irq_user.h"
@@ -54,7 +53,6 @@
54#include "ubd_user.h" 53#include "ubd_user.h"
55#include "os.h" 54#include "os.h"
56#include "mem.h" 55#include "mem.h"
57#include "mem_kern.h"
58#include "cow.h" 56#include "cow.h"
59 57
60enum ubd_req { UBD_READ, UBD_WRITE }; 58enum ubd_req { UBD_READ, UBD_WRITE };
@@ -513,8 +511,37 @@ __uml_exitcall(kill_io_thread);
513static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) 511static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
514{ 512{
515 char *file; 513 char *file;
514 int fd;
515 int err;
516
517 __u32 version;
518 __u32 align;
519 char *backing_file;
520 time_t mtime;
521 unsigned long long size;
522 int sector_size;
523 int bitmap_offset;
524
525 if (ubd_dev->file && ubd_dev->cow.file) {
526 file = ubd_dev->cow.file;
527
528 goto out;
529 }
516 530
517 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file; 531 fd = os_open_file(ubd_dev->file, global_openflags, 0);
532 if (fd < 0)
533 return fd;
534
535 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
536 &mtime, &size, &sector_size, &align, &bitmap_offset);
537 os_close_file(fd);
538
539 if(err == -EINVAL)
540 file = ubd_dev->file;
541 else
542 file = backing_file;
543
544out:
518 return os_file_size(file, size_out); 545 return os_file_size(file, size_out);
519} 546}
520 547
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index b591bb9c41dd..007b94d97726 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -16,7 +16,6 @@
16#include <sys/mman.h> 16#include <sys/mman.h>
17#include <sys/param.h> 17#include <sys/param.h>
18#include "asm/types.h" 18#include "asm/types.h"
19#include "user.h"
20#include "ubd_user.h" 19#include "ubd_user.h"
21#include "os.h" 20#include "os.h"
22#include "cow.h" 21#include "cow.h"
diff --git a/arch/um/include/shared/ubd_user.h b/arch/um/drivers/ubd_user.h
index 3845051f1b10..3845051f1b10 100644
--- a/arch/um/include/shared/ubd_user.h
+++ b/arch/um/drivers/ubd_user.h
diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c
index 59c56fd6f52a..010fa2d849ec 100644
--- a/arch/um/drivers/umcast_user.c
+++ b/arch/um/drivers/umcast_user.c
@@ -15,11 +15,9 @@
15#include <unistd.h> 15#include <unistd.h>
16#include <errno.h> 16#include <errno.h>
17#include <netinet/in.h> 17#include <netinet/in.h>
18#include "kern_constants.h"
19#include "umcast.h" 18#include "umcast.h"
20#include "net_user.h" 19#include "net_user.h"
21#include "um_malloc.h" 20#include "um_malloc.h"
22#include "user.h"
23 21
24static struct sockaddr_in *new_addr(char *addr, unsigned short port) 22static struct sockaddr_in *new_addr(char *addr, unsigned short port)
25{ 23{
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
index c5c43253e6ce..b8c286748d3d 100644
--- a/arch/um/drivers/vde_user.c
+++ b/arch/um/drivers/vde_user.c
@@ -6,10 +6,8 @@
6#include <stddef.h> 6#include <stddef.h>
7#include <errno.h> 7#include <errno.h>
8#include <libvdeplug.h> 8#include <libvdeplug.h>
9#include "kern_constants.h"
10#include "net_user.h" 9#include "net_user.h"
11#include "um_malloc.h" 10#include "um_malloc.h"
12#include "user.h"
13#include "vde.h" 11#include "vde.h"
14 12
15static int vde_user_init(void *data, void *dev) 13static int vde_user_init(void *data, void *dev)
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index 2e1de5728604..969110e56487 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -11,10 +11,8 @@
11#include <string.h> 11#include <string.h>
12#include <termios.h> 12#include <termios.h>
13#include "chan_user.h" 13#include "chan_user.h"
14#include "kern_constants.h"
15#include "os.h" 14#include "os.h"
16#include "um_malloc.h" 15#include "um_malloc.h"
17#include "user.h"
18#include "xterm.h" 16#include "xterm.h"
19 17
20struct xterm_chan { 18struct xterm_chan {
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
new file mode 100644
index 000000000000..451f4517b334
--- /dev/null
+++ b/arch/um/include/asm/Kbuild
@@ -0,0 +1,3 @@
1generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h
2generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
3generic-y += ftrace.h
diff --git a/arch/um/include/asm/bug.h b/arch/um/include/asm/bug.h
deleted file mode 100644
index 9e33b864c359..000000000000
--- a/arch/um/include/asm/bug.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __UM_BUG_H
2#define __UM_BUG_H
3
4#include <asm-generic/bug.h>
5
6#endif
diff --git a/arch/um/include/asm/checksum.h b/arch/um/include/asm/checksum.h
deleted file mode 100644
index 5b501361e361..000000000000
--- a/arch/um/include/asm/checksum.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __UM_CHECKSUM_H
2#define __UM_CHECKSUM_H
3
4#include "sysdep/checksum.h"
5
6#endif
diff --git a/arch/um/include/asm/cputime.h b/arch/um/include/asm/cputime.h
deleted file mode 100644
index c84acbadfa2f..000000000000
--- a/arch/um/include/asm/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __UM_CPUTIME_H
2#define __UM_CPUTIME_H
3
4#include <asm-generic/cputime.h>
5
6#endif /* __UM_CPUTIME_H */
diff --git a/arch/um/include/asm/device.h b/arch/um/include/asm/device.h
deleted file mode 100644
index d8f9872b0e2d..000000000000
--- a/arch/um/include/asm/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
1/*
2 * Arch specific extensions to struct device
3 *
4 * This file is released under the GPLv2
5 */
6#include <asm-generic/device.h>
7
diff --git a/arch/um/include/asm/emergency-restart.h b/arch/um/include/asm/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/arch/um/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_EMERGENCY_RESTART_H
2#define _ASM_EMERGENCY_RESTART_H
3
4#include <asm-generic/emergency-restart.h>
5
6#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/um/include/asm/ftrace.h b/arch/um/include/asm/ftrace.h
deleted file mode 100644
index 40a8c178f10d..000000000000
--- a/arch/um/include/asm/ftrace.h
+++ /dev/null
@@ -1 +0,0 @@
1/* empty */
diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
deleted file mode 100644
index 6a332a9f099c..000000000000
--- a/arch/um/include/asm/futex.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H
3
4#include <asm-generic/futex.h>
5
6#endif
diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
deleted file mode 100644
index fb3c05a0cbbf..000000000000
--- a/arch/um/include/asm/hardirq.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/hardirq.h>
diff --git a/arch/um/include/asm/hw_irq.h b/arch/um/include/asm/hw_irq.h
deleted file mode 100644
index 1cf84cf5f21a..000000000000
--- a/arch/um/include/asm/hw_irq.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef _ASM_UM_HW_IRQ_H
2#define _ASM_UM_HW_IRQ_H
3
4#include "asm/irq.h"
5#include "asm/archparam.h"
6
7#endif
diff --git a/arch/um/include/asm/irq_regs.h b/arch/um/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/um/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h
index 659b9abdfdba..c780d8a16773 100644
--- a/arch/um/include/asm/irqflags.h
+++ b/arch/um/include/asm/irqflags.h
@@ -1,6 +1,42 @@
1#ifndef __UM_IRQFLAGS_H 1#ifndef __UM_IRQFLAGS_H
2#define __UM_IRQFLAGS_H 2#define __UM_IRQFLAGS_H
3 3
4/* Empty for now */ 4extern int get_signals(void);
5extern int set_signals(int enable);
6extern void block_signals(void);
7extern void unblock_signals(void);
8
9static inline unsigned long arch_local_save_flags(void)
10{
11 return get_signals();
12}
13
14static inline void arch_local_irq_restore(unsigned long flags)
15{
16 set_signals(flags);
17}
18
19static inline void arch_local_irq_enable(void)
20{
21 unblock_signals();
22}
23
24static inline void arch_local_irq_disable(void)
25{
26 block_signals();
27}
28
29static inline unsigned long arch_local_irq_save(void)
30{
31 unsigned long flags;
32 flags = arch_local_save_flags();
33 arch_local_irq_disable();
34 return flags;
35}
36
37static inline bool arch_irqs_disabled(void)
38{
39 return arch_local_save_flags() == 0;
40}
5 41
6#endif 42#endif
diff --git a/arch/um/include/asm/kdebug.h b/arch/um/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/um/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/kdebug.h>
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index cf259de51531..30509b9f37fd 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -1,12 +1,24 @@
1/* 1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#ifndef __MMU_H 6#ifndef __ARCH_UM_MMU_H
7#define __MMU_H 7#define __ARCH_UM_MMU_H
8 8
9#include "um_mmu.h" 9#include "mm_id.h"
10#include <asm/mm_context.h>
10 11
11#endif 12typedef struct mm_context {
13 struct mm_id id;
14 struct uml_arch_mm_context arch;
15 struct page **stub_pages;
16} mm_context_t;
17
18extern void __switch_mm(struct mm_id * mm_idp);
12 19
20/* Avoid tangled inclusion with asm/ldt.h */
21extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
22extern void free_ldt(struct mm_context *mm);
23
24#endif
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 34d813011b7a..591b3d8d7614 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -6,15 +6,12 @@
6#ifndef __UM_MMU_CONTEXT_H 6#ifndef __UM_MMU_CONTEXT_H
7#define __UM_MMU_CONTEXT_H 7#define __UM_MMU_CONTEXT_H
8 8
9#include "linux/sched.h" 9#include <linux/sched.h>
10#include "um_mmu.h" 10#include <asm/mmu.h>
11 11
12extern void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); 12extern void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
13extern void arch_exit_mmap(struct mm_struct *mm); 13extern void arch_exit_mmap(struct mm_struct *mm);
14 14
15#define get_mmu_context(task) do ; while(0)
16#define activate_context(tsk) do ; while(0)
17
18#define deactivate_mm(tsk,mm) do { } while (0) 15#define deactivate_mm(tsk,mm) do { } while (0)
19 16
20extern void force_flush_all(void); 17extern void force_flush_all(void);
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 4cc9b6cf480a..7cfc3cedce84 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -19,7 +19,7 @@
19struct page; 19struct page;
20 20
21#include <linux/types.h> 21#include <linux/types.h>
22#include <sysdep/vm-flags.h> 22#include <asm/vm-flags.h>
23 23
24/* 24/*
25 * These are used to make use of C type-checking.. 25 * These are used to make use of C type-checking..
diff --git a/arch/um/include/asm/page_offset.h b/arch/um/include/asm/page_offset.h
deleted file mode 100644
index 1c168dfbf359..000000000000
--- a/arch/um/include/asm/page_offset.h
+++ /dev/null
@@ -1 +0,0 @@
1#define PAGE_OFFSET_RAW (uml_physmem)
diff --git a/arch/um/include/asm/pda.h b/arch/um/include/asm/pda.h
deleted file mode 100644
index ddcd774fc2a0..000000000000
--- a/arch/um/include/asm/pda.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright 2003 PathScale, Inc.
3 *
4 * Licensed under the GPL
5 */
6
7#ifndef __UM_PDA_X86_64_H
8#define __UM_PDA_X86_64_H
9
10/* XXX */
11struct foo {
12 unsigned int __softirq_pending;
13 unsigned int __nmi_count;
14};
15
16extern struct foo me;
17
18#define read_pda(me) (&me)
19
20#endif
21
diff --git a/arch/um/include/asm/percpu.h b/arch/um/include/asm/percpu.h
deleted file mode 100644
index efe7508d8abd..000000000000
--- a/arch/um/include/asm/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __UM_PERCPU_H
2#define __UM_PERCPU_H
3
4#include <asm-generic/percpu.h>
5
6#endif /* __UM_PERCPU_H */
diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
index 1a7d2757fe05..f605d3c4844c 100644
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -23,17 +23,10 @@ struct pt_regs {
23#define PT_REGS_IP(r) UPT_IP(&(r)->regs) 23#define PT_REGS_IP(r) UPT_IP(&(r)->regs)
24#define PT_REGS_SP(r) UPT_SP(&(r)->regs) 24#define PT_REGS_SP(r) UPT_SP(&(r)->regs)
25 25
26#define PT_REG(r, reg) UPT_REG(&(r)->regs, reg)
27#define PT_REGS_SET(r, reg, val) UPT_SET(&(r)->regs, reg, val)
28
29#define PT_REGS_SET_SYSCALL_RETURN(r, res) \
30 UPT_SET_SYSCALL_RETURN(&(r)->regs, res)
31#define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs) 26#define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs)
32 27
33#define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs) 28#define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs)
34 29
35#define PT_REGS_SC(r) UPT_SC(&(r)->regs)
36
37#define instruction_pointer(regs) PT_REGS_IP(regs) 30#define instruction_pointer(regs) PT_REGS_IP(regs)
38 31
39struct task_struct; 32struct task_struct;
diff --git a/arch/um/include/asm/sections.h b/arch/um/include/asm/sections.h
deleted file mode 100644
index 6b0231eefea8..000000000000
--- a/arch/um/include/asm/sections.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef _UM_SECTIONS_H
2#define _UM_SECTIONS_H
3
4/* nothing to see, move along */
5#include <asm-generic/sections.h>
6
7#endif
diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h
deleted file mode 100644
index 68a90ecd1450..000000000000
--- a/arch/um/include/asm/system.h
+++ /dev/null
@@ -1,47 +0,0 @@
1#ifndef __UM_SYSTEM_GENERIC_H
2#define __UM_SYSTEM_GENERIC_H
3
4#include "sysdep/system.h"
5
6extern int get_signals(void);
7extern int set_signals(int enable);
8extern void block_signals(void);
9extern void unblock_signals(void);
10
11static inline unsigned long arch_local_save_flags(void)
12{
13 return get_signals();
14}
15
16static inline void arch_local_irq_restore(unsigned long flags)
17{
18 set_signals(flags);
19}
20
21static inline void arch_local_irq_enable(void)
22{
23 unblock_signals();
24}
25
26static inline void arch_local_irq_disable(void)
27{
28 block_signals();
29}
30
31static inline unsigned long arch_local_irq_save(void)
32{
33 unsigned long flags;
34 flags = arch_local_save_flags();
35 arch_local_irq_disable();
36 return flags;
37}
38
39static inline bool arch_irqs_disabled(void)
40{
41 return arch_local_save_flags() == 0;
42}
43
44extern void *_switch_to(void *prev, void *next, void *last);
45#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
46
47#endif
diff --git a/arch/um/include/asm/topology.h b/arch/um/include/asm/topology.h
deleted file mode 100644
index 0905e4f21d42..000000000000
--- a/arch/um/include/asm/topology.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_UM_TOPOLOGY_H
2#define _ASM_UM_TOPOLOGY_H
3
4#include <asm-generic/topology.h>
5
6#endif
diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h
index b9a895d6fa1d..3f22fbf7ca1d 100644
--- a/arch/um/include/asm/uaccess.h
+++ b/arch/um/include/asm/uaccess.h
@@ -6,15 +6,15 @@
6#ifndef __UM_UACCESS_H 6#ifndef __UM_UACCESS_H
7#define __UM_UACCESS_H 7#define __UM_UACCESS_H
8 8
9#include <asm/errno.h>
10#include <asm/processor.h>
11
12/* thread_info has a mm_segment_t in it, so put the definition up here */ 9/* thread_info has a mm_segment_t in it, so put the definition up here */
13typedef struct { 10typedef struct {
14 unsigned long seg; 11 unsigned long seg;
15} mm_segment_t; 12} mm_segment_t;
16 13
17#include "linux/thread_info.h" 14#include <linux/thread_info.h>
15#include <linux/errno.h>
16#include <asm/processor.h>
17#include <asm/elf.h>
18 18
19#define VERIFY_READ 0 19#define VERIFY_READ 0
20#define VERIFY_WRITE 1 20#define VERIFY_WRITE 1
@@ -38,7 +38,86 @@ typedef struct {
38 38
39#define segment_eq(a, b) ((a).seg == (b).seg) 39#define segment_eq(a, b) ((a).seg == (b).seg)
40 40
41#include "um_uaccess.h" 41#define __under_task_size(addr, size) \
42 (((unsigned long) (addr) < TASK_SIZE) && \
43 (((unsigned long) (addr) + (size)) < TASK_SIZE))
44
45#define __access_ok_vsyscall(type, addr, size) \
46 ((type == VERIFY_READ) && \
47 ((unsigned long) (addr) >= FIXADDR_USER_START) && \
48 ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
49 ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))
50
51#define __addr_range_nowrap(addr, size) \
52 ((unsigned long) (addr) <= ((unsigned long) (addr) + (size)))
53
54#define access_ok(type, addr, size) \
55 (__addr_range_nowrap(addr, size) && \
56 (__under_task_size(addr, size) || \
57 __access_ok_vsyscall(type, addr, size) || \
58 segment_eq(get_fs(), KERNEL_DS)))
59
60extern int copy_from_user(void *to, const void __user *from, int n);
61extern int copy_to_user(void __user *to, const void *from, int n);
62
63/*
64 * strncpy_from_user: - Copy a NUL terminated string from userspace.
65 * @dst: Destination address, in kernel space. This buffer must be at
66 * least @count bytes long.
67 * @src: Source address, in user space.
68 * @count: Maximum number of bytes to copy, including the trailing NUL.
69 *
70 * Copies a NUL-terminated string from userspace to kernel space.
71 *
72 * On success, returns the length of the string (not including the trailing
73 * NUL).
74 *
75 * If access to userspace fails, returns -EFAULT (some data may have been
76 * copied).
77 *
78 * If @count is smaller than the length of the string, copies @count bytes
79 * and returns @count.
80 */
81
82extern int strncpy_from_user(char *dst, const char __user *src, int count);
83
84/*
85 * __clear_user: - Zero a block of memory in user space, with less checking.
86 * @to: Destination address, in user space.
87 * @n: Number of bytes to zero.
88 *
89 * Zero a block of memory in user space. Caller must check
90 * the specified block with access_ok() before calling this function.
91 *
92 * Returns number of bytes that could not be cleared.
93 * On success, this will be zero.
94 */
95extern int __clear_user(void __user *mem, int len);
96
97/*
98 * clear_user: - Zero a block of memory in user space.
99 * @to: Destination address, in user space.
100 * @n: Number of bytes to zero.
101 *
102 * Zero a block of memory in user space.
103 *
104 * Returns number of bytes that could not be cleared.
105 * On success, this will be zero.
106 */
107extern int clear_user(void __user *mem, int len);
108
109/*
110 * strlen_user: - Get the size of a string in user space.
111 * @str: The string to measure.
112 * @n: The maximum valid length
113 *
114 * Get the size of a NUL-terminated string in user space.
115 *
116 * Returns the size of the string INCLUDING the terminating NUL.
117 * On exception, returns 0.
118 * If the string is too long, returns a value greater than @n.
119 */
120extern int strnlen_user(const void __user *str, int len);
42 121
43#define __copy_from_user(to, from, n) copy_from_user(to, from, n) 122#define __copy_from_user(to, from, n) copy_from_user(to, from, n)
44 123
diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
deleted file mode 100644
index a19db3e17241..000000000000
--- a/arch/um/include/asm/xor.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __UM_XOR_H
2#define __UM_XOR_H
3
4#include "asm-generic/xor.h"
5
6#endif
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index a92b678503cf..896e16602176 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -6,7 +6,7 @@
6#ifndef __START_H__ 6#ifndef __START_H__
7#define __START_H__ 7#define __START_H__
8 8
9#include "kern_constants.h" 9#include <generated/asm-offsets.h>
10 10
11/* 11/*
12 * Stolen from linux/const.h, which can't be directly included since 12 * Stolen from linux/const.h, which can't be directly included since
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 72009c7e3210..d7fe563aa7e7 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -2,7 +2,6 @@
2 2
3DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); 3DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
4 4
5OFFSET(HOST_TASK_REGS, task_struct, thread.regs);
6OFFSET(HOST_TASK_PID, task_struct, pid); 5OFFSET(HOST_TASK_PID, task_struct, pid);
7 6
8DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); 7DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/um/include/shared/initrd.h b/arch/um/include/shared/initrd.h
deleted file mode 100644
index 22673bcc273d..000000000000
--- a/arch/um/include/shared/initrd.h
+++ /dev/null
@@ -1,12 +0,0 @@
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __INITRD_USER_H__
7#define __INITRD_USER_H__
8
9extern int load_initrd(char *filename, void *buf, int size);
10
11#endif
12
diff --git a/arch/um/include/shared/kern.h b/arch/um/include/shared/kern.h
index 4ce3fc650e57..6cd01240bbf0 100644
--- a/arch/um/include/shared/kern.h
+++ b/arch/um/include/shared/kern.h
@@ -13,28 +13,10 @@
13 * includes. 13 * includes.
14 */ 14 */
15 15
16extern int errno;
17
18extern int clone(int (*proc)(void *), void *sp, int flags, void *data);
19extern int sleep(int);
20extern int printf(const char *fmt, ...); 16extern int printf(const char *fmt, ...);
21extern char *strerror(int errnum);
22extern char *ptsname(int __fd);
23extern int munmap(void *, int);
24extern void *sbrk(int increment); 17extern void *sbrk(int increment);
25extern void *malloc(int size);
26extern void perror(char *err);
27extern int kill(int pid, int sig);
28extern int getuid(void);
29extern int getgid(void);
30extern int pause(void); 18extern int pause(void);
31extern int write(int, const void *, int);
32extern void exit(int); 19extern void exit(int);
33extern int close(int);
34extern int read(unsigned int, char *, int);
35extern int pipe(int *);
36extern int sched_yield(void);
37extern int ptrace(int op, int pid, long addr, long data);
38 20
39#endif 21#endif
40 22
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index 3c341222d252..0f1483852460 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -21,7 +21,6 @@ extern unsigned long alloc_stack(int order, int atomic);
21extern void free_stack(unsigned long stack, int order); 21extern void free_stack(unsigned long stack, int order);
22 22
23extern int do_signal(void); 23extern int do_signal(void);
24extern void copy_sc(struct uml_pt_regs *regs, void *from);
25extern void interrupt_end(void); 24extern void interrupt_end(void);
26extern void relay_signal(int sig, struct uml_pt_regs *regs); 25extern void relay_signal(int sig, struct uml_pt_regs *regs);
27 26
diff --git a/arch/um/include/shared/ldt.h b/arch/um/include/shared/ldt.h
deleted file mode 100644
index a7f999a58774..000000000000
--- a/arch/um/include/shared/ldt.h
+++ /dev/null
@@ -1,37 +0,0 @@
1/*
2 * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
3 * Licensed under the GPL
4 *
5 * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
6 */
7
8#ifndef __ASM_LDT_H
9#define __ASM_LDT_H
10
11#include <linux/mutex.h>
12#include <sysdep/host_ldt.h>
13
14extern void ldt_host_info(void);
15
16#define LDT_PAGES_MAX \
17 ((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
18#define LDT_ENTRIES_PER_PAGE \
19 (PAGE_SIZE/LDT_ENTRY_SIZE)
20#define LDT_DIRECT_ENTRIES \
21 ((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
22
23struct ldt_entry {
24 __u32 a;
25 __u32 b;
26};
27
28typedef struct uml_ldt {
29 int entry_count;
30 struct mutex lock;
31 union {
32 struct ldt_entry * pages[LDT_PAGES_MAX];
33 struct ldt_entry entries[LDT_DIRECT_ENTRIES];
34 } u;
35} uml_ldt_t;
36
37#endif
diff --git a/arch/um/include/shared/mem_kern.h b/arch/um/include/shared/mem_kern.h
deleted file mode 100644
index 69be0fd0ce4b..000000000000
--- a/arch/um/include/shared/mem_kern.h
+++ /dev/null
@@ -1,20 +0,0 @@
1/*
2 * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __MEM_KERN_H__
7#define __MEM_KERN_H__
8
9#include "linux/list.h"
10#include "linux/types.h"
11
12struct remapper {
13 struct list_head list;
14 int (*proc)(int, unsigned long, int, __u64);
15};
16
17extern void register_remapper(struct remapper *info);
18
19#endif
20
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 83c7c2ecd614..89b686c1a3ea 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -10,7 +10,6 @@
10#include "irq_user.h" 10#include "irq_user.h"
11#include "longjmp.h" 11#include "longjmp.h"
12#include "mm_id.h" 12#include "mm_id.h"
13#include "sysdep/tls.h"
14 13
15#define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR)) 14#define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
16 15
@@ -203,12 +202,6 @@ extern int os_drop_memory(void *addr, int length);
203extern int can_drop_memory(void); 202extern int can_drop_memory(void);
204extern void os_flush_stdout(void); 203extern void os_flush_stdout(void);
205 204
206/* uaccess.c */
207extern unsigned long __do_user_copy(void *to, const void *from, int n,
208 void **fault_addr, jmp_buf **fault_catcher,
209 void (*op)(void *to, const void *from,
210 int n), int *faulted_out);
211
212/* execvp.c */ 205/* execvp.c */
213extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); 206extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
214/* helper.c */ 207/* helper.c */
@@ -218,10 +211,6 @@ extern int run_helper_thread(int (*proc)(void *), void *arg,
218extern int helper_wait(int pid); 211extern int helper_wait(int pid);
219 212
220 213
221/* tls.c */
222extern int os_set_thread_area(user_desc_t *info, int pid);
223extern int os_get_thread_area(user_desc_t *info, int pid);
224
225/* umid.c */ 214/* umid.c */
226extern int umid_file_name(char *name, char *buf, int len); 215extern int umid_file_name(char *name, char *buf, int len);
227extern int set_umid(char *name); 216extern int set_umid(char *name);
@@ -231,7 +220,7 @@ extern char *get_umid(void);
231extern void timer_init(void); 220extern void timer_init(void);
232extern void set_sigstack(void *sig_stack, int size); 221extern void set_sigstack(void *sig_stack, int size);
233extern void remove_sigstack(void); 222extern void remove_sigstack(void);
234extern void set_handler(int sig, void (*handler)(int), int flags, ...); 223extern void set_handler(int sig);
235extern int change_sig(int signal, int on); 224extern int change_sig(int signal, int on);
236extern void block_signals(void); 225extern void block_signals(void);
237extern void unblock_signals(void); 226extern void unblock_signals(void);
diff --git a/arch/um/include/shared/process.h b/arch/um/include/shared/process.h
deleted file mode 100644
index bb873a51262e..000000000000
--- a/arch/um/include/shared/process.h
+++ /dev/null
@@ -1,17 +0,0 @@
1/*
2 * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __PROCESS_H__
7#define __PROCESS_H__
8
9#include <signal.h>
10
11/* Copied from linux/compiler-gcc.h since we can't include it directly */
12#define barrier() __asm__ __volatile__("": : :"memory")
13
14extern void sig_handler(int sig, struct sigcontext *sc);
15extern void alarm_handler(int sig, struct sigcontext *sc);
16
17#endif
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 7fd8539bc19a..56b2f284b108 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -6,7 +6,8 @@
6#ifndef __PTRACE_USER_H__ 6#ifndef __PTRACE_USER_H__
7#define __PTRACE_USER_H__ 7#define __PTRACE_USER_H__
8 8
9#include "sysdep/ptrace_user.h" 9#include <sys/ptrace.h>
10#include <sysdep/ptrace_user.h>
10 11
11extern int ptrace_getregs(long pid, unsigned long *regs_out); 12extern int ptrace_getregs(long pid, unsigned long *regs_out);
12extern int ptrace_setregs(long pid, unsigned long *regs_in); 13extern int ptrace_setregs(long pid, unsigned long *regs_in);
diff --git a/arch/um/include/shared/skas_ptregs.h b/arch/um/include/shared/skas_ptregs.h
deleted file mode 100644
index 73db19e9c077..000000000000
--- a/arch/um/include/shared/skas_ptregs.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __SKAS_PT_REGS_
2#define __SKAS_PT_REGS_
3
4#include <user_constants.h>
5
6#endif
diff --git a/arch/um/include/shared/syscall.h b/arch/um/include/shared/syscall.h
deleted file mode 100644
index dda1df901a08..000000000000
--- a/arch/um/include/shared/syscall.h
+++ /dev/null
@@ -1,12 +0,0 @@
1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __SYSCALL_USER_H
7#define __SYSCALL_USER_H
8
9extern int record_syscall_start(int syscall);
10extern void record_syscall_end(int index, long result);
11
12#endif
diff --git a/arch/um/include/shared/task.h b/arch/um/include/shared/task.h
deleted file mode 100644
index 3fe726b3cf48..000000000000
--- a/arch/um/include/shared/task.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef __TASK_H
2#define __TASK_H
3
4#include <kern_constants.h>
5
6#define TASK_REGS(task) ((struct uml_pt_regs *) &(((char *) (task))[HOST_TASK_REGS]))
7#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
8
9#endif
diff --git a/arch/um/include/shared/tlb.h b/arch/um/include/shared/tlb.h
deleted file mode 100644
index ecd2265b301b..000000000000
--- a/arch/um/include/shared/tlb.h
+++ /dev/null
@@ -1,15 +0,0 @@
1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __TLB_H__
7#define __TLB_H__
8
9#include "um_mmu.h"
10
11extern void force_flush_all(void);
12extern int flush_tlb_kernel_range_common(unsigned long start,
13 unsigned long end);
14
15#endif
diff --git a/arch/um/include/shared/um_malloc.h b/arch/um/include/shared/um_malloc.h
index c554d706d106..6395fef6b69b 100644
--- a/arch/um/include/shared/um_malloc.h
+++ b/arch/um/include/shared/um_malloc.h
@@ -6,7 +6,7 @@
6#ifndef __UM_MALLOC_H__ 6#ifndef __UM_MALLOC_H__
7#define __UM_MALLOC_H__ 7#define __UM_MALLOC_H__
8 8
9#include "kern_constants.h" 9#include <generated/asm-offsets.h>
10 10
11extern void *uml_kmalloc(int size, int flags); 11extern void *uml_kmalloc(int size, int flags);
12extern void kfree(const void *ptr); 12extern void kfree(const void *ptr);
diff --git a/arch/um/include/shared/um_mmu.h b/arch/um/include/shared/um_mmu.h
deleted file mode 100644
index b1a7e47d1027..000000000000
--- a/arch/um/include/shared/um_mmu.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __ARCH_UM_MMU_H
7#define __ARCH_UM_MMU_H
8
9#include "mm_id.h"
10#include "ldt.h"
11
12typedef struct mm_context {
13 struct mm_id id;
14 struct uml_ldt ldt;
15 struct page **stub_pages;
16} mm_context_t;
17
18extern void __switch_mm(struct mm_id * mm_idp);
19
20/* Avoid tangled inclusion with asm/ldt.h */
21extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
22extern void free_ldt(struct mm_context *mm);
23
24#endif
diff --git a/arch/um/include/shared/um_uaccess.h b/arch/um/include/shared/um_uaccess.h
deleted file mode 100644
index 45c04999d670..000000000000
--- a/arch/um/include/shared/um_uaccess.h
+++ /dev/null
@@ -1,97 +0,0 @@
1/*
2 * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __ARCH_UM_UACCESS_H
7#define __ARCH_UM_UACCESS_H
8
9#include <asm/elf.h>
10#include <asm/fixmap.h>
11#include "sysdep/archsetjmp.h"
12
13#define __under_task_size(addr, size) \
14 (((unsigned long) (addr) < TASK_SIZE) && \
15 (((unsigned long) (addr) + (size)) < TASK_SIZE))
16
17#define __access_ok_vsyscall(type, addr, size) \
18 ((type == VERIFY_READ) && \
19 ((unsigned long) (addr) >= FIXADDR_USER_START) && \
20 ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
21 ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))
22
23#define __addr_range_nowrap(addr, size) \
24 ((unsigned long) (addr) <= ((unsigned long) (addr) + (size)))
25
26#define access_ok(type, addr, size) \
27 (__addr_range_nowrap(addr, size) && \
28 (__under_task_size(addr, size) || \
29 __access_ok_vsyscall(type, addr, size) || \
30 segment_eq(get_fs(), KERNEL_DS)))
31
32extern int copy_from_user(void *to, const void __user *from, int n);
33extern int copy_to_user(void __user *to, const void *from, int n);
34
35extern int __do_copy_to_user(void *to, const void *from, int n,
36 void **fault_addr, jmp_buf **fault_catcher);
37
38/*
39 * strncpy_from_user: - Copy a NUL terminated string from userspace.
40 * @dst: Destination address, in kernel space. This buffer must be at
41 * least @count bytes long.
42 * @src: Source address, in user space.
43 * @count: Maximum number of bytes to copy, including the trailing NUL.
44 *
45 * Copies a NUL-terminated string from userspace to kernel space.
46 *
47 * On success, returns the length of the string (not including the trailing
48 * NUL).
49 *
50 * If access to userspace fails, returns -EFAULT (some data may have been
51 * copied).
52 *
53 * If @count is smaller than the length of the string, copies @count bytes
54 * and returns @count.
55 */
56
57extern int strncpy_from_user(char *dst, const char __user *src, int count);
58
59/*
60 * __clear_user: - Zero a block of memory in user space, with less checking.
61 * @to: Destination address, in user space.
62 * @n: Number of bytes to zero.
63 *
64 * Zero a block of memory in user space. Caller must check
65 * the specified block with access_ok() before calling this function.
66 *
67 * Returns number of bytes that could not be cleared.
68 * On success, this will be zero.
69 */
70extern int __clear_user(void __user *mem, int len);
71
72/*
73 * clear_user: - Zero a block of memory in user space.
74 * @to: Destination address, in user space.
75 * @n: Number of bytes to zero.
76 *
77 * Zero a block of memory in user space.
78 *
79 * Returns number of bytes that could not be cleared.
80 * On success, this will be zero.
81 */
82extern int clear_user(void __user *mem, int len);
83
84/*
85 * strlen_user: - Get the size of a string in user space.
86 * @str: The string to measure.
87 * @n: The maximum valid length
88 *
89 * Get the size of a NUL-terminated string in user space.
90 *
91 * Returns the size of the string INCLUDING the terminating NUL.
92 * On exception, returns 0.
93 * If the string is too long, returns a value greater than @n.
94 */
95extern int strnlen_user(const void __user *str, int len);
96
97#endif
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index 293f7c794faa..4fa82c055aab 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -6,7 +6,7 @@
6#ifndef __USER_H__ 6#ifndef __USER_H__
7#define __USER_H__ 7#define __USER_H__
8 8
9#include "kern_constants.h" 9#include <generated/asm-offsets.h>
10 10
11/* 11/*
12 * The usual definition - copied here because the kernel provides its own, 12 * The usual definition - copied here because the kernel provides its own,
@@ -36,10 +36,11 @@ static inline int printk(const char *fmt, ...)
36} 36}
37#endif 37#endif
38 38
39extern void schedule(void);
40extern int in_aton(char *str); 39extern int in_aton(char *str);
41extern int open_gdb_chan(void);
42extern size_t strlcpy(char *, const char *, size_t); 40extern size_t strlcpy(char *, const char *, size_t);
43extern size_t strlcat(char *, const char *, size_t); 41extern size_t strlcat(char *, const char *, size_t);
44 42
43/* Copied from linux/compiler-gcc.h since we can't include it directly */
44#define barrier() __asm__ __volatile__("": : :"memory")
45
45#endif 46#endif
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index c4491c15afb2..bc494741b1f3 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -11,7 +11,7 @@ clean-files :=
11 11
12obj-y = config.o exec.o exitcode.o init_task.o irq.o ksyms.o mem.o \ 12obj-y = config.o exec.o exitcode.o init_task.o irq.o ksyms.o mem.o \
13 physmem.o process.o ptrace.o reboot.o sigio.o \ 13 physmem.o process.o ptrace.o reboot.o sigio.o \
14 signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o uaccess.o \ 14 signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \
15 um_arch.o umid.o skas/ 15 um_arch.o umid.o skas/
16 16
17obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o 17obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 939a4a67f0fd..6cade9366364 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -3,14 +3,15 @@
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#include "linux/stddef.h" 6#include <linux/stddef.h>
7#include "linux/fs.h" 7#include <linux/module.h>
8#include "linux/ptrace.h" 8#include <linux/fs.h>
9#include "linux/sched.h" 9#include <linux/ptrace.h>
10#include "linux/slab.h" 10#include <linux/sched.h>
11#include "asm/current.h" 11#include <linux/slab.h>
12#include "asm/processor.h" 12#include <asm/current.h>
13#include "asm/uaccess.h" 13#include <asm/processor.h>
14#include <asm/uaccess.h>
14#include "as-layout.h" 15#include "as-layout.h"
15#include "mem_user.h" 16#include "mem_user.h"
16#include "skas.h" 17#include "skas.h"
@@ -41,6 +42,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
41 PT_REGS_IP(regs) = eip; 42 PT_REGS_IP(regs) = eip;
42 PT_REGS_SP(regs) = esp; 43 PT_REGS_SP(regs) = esp;
43} 44}
45EXPORT_SYMBOL(start_thread);
44 46
45static long execve1(const char *file, 47static long execve1(const char *file,
46 const char __user *const __user *argv, 48 const char __user *const __user *argv,
diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c
index 72eccd2a4113..e9bcf247bcee 100644
--- a/arch/um/kernel/gmon_syms.c
+++ b/arch/um/kernel/gmon_syms.c
@@ -7,18 +7,3 @@
7 7
8extern void __bb_init_func(void *) __attribute__((weak)); 8extern void __bb_init_func(void *) __attribute__((weak));
9EXPORT_SYMBOL(__bb_init_func); 9EXPORT_SYMBOL(__bb_init_func);
10
11/*
12 * This is defined (and referred to in profiling stub code) only by some GCC
13 * versions in libgcov.
14 *
15 * Since SuSE backported the fix, we cannot handle it depending on GCC version.
16 * So, unconditionally export it. But also give it a weak declaration, which
17 * will be overridden by any other one.
18 */
19
20extern void __gcov_init(void *) __attribute__((weak));
21EXPORT_SYMBOL(__gcov_init);
22
23extern void __gcov_merge_add(void *) __attribute__((weak));
24EXPORT_SYMBOL(__gcov_merge_add);
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index d386c75c88eb..10cc18f729fd 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -7,12 +7,12 @@
7#include "linux/bootmem.h" 7#include "linux/bootmem.h"
8#include "linux/initrd.h" 8#include "linux/initrd.h"
9#include "asm/types.h" 9#include "asm/types.h"
10#include "initrd.h"
11#include "init.h" 10#include "init.h"
12#include "os.h" 11#include "os.h"
13 12
14/* Changed by uml_initrd_setup, which is a setup */ 13/* Changed by uml_initrd_setup, which is a setup */
15static char *initrd __initdata = NULL; 14static char *initrd __initdata = NULL;
15static int load_initrd(char *filename, void *buf, int size);
16 16
17static int __init read_initrd(void) 17static int __init read_initrd(void)
18{ 18{
@@ -62,7 +62,7 @@ __uml_setup("initrd=", uml_initrd_setup,
62" name of the file containing the image.\n\n" 62" name of the file containing the image.\n\n"
63); 63);
64 64
65int load_initrd(char *filename, void *buf, int size) 65static int load_initrd(char *filename, void *buf, int size)
66{ 66{
67 int fd, n; 67 int fd, n;
68 68
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9e485c770308..71b8c947e5ef 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -258,6 +258,7 @@ void deactivate_fd(int fd, int irqnum)
258 258
259 ignore_sigio_fd(fd); 259 ignore_sigio_fd(fd);
260} 260}
261EXPORT_SYMBOL(deactivate_fd);
261 262
262/* 263/*
263 * Called just before shutdown in order to provide a clean exec 264 * Called just before shutdown in order to provide a clean exec
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 0ae0dfcfbffb..e17bea0b22e1 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -3,33 +3,11 @@
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#include "linux/module.h" 6#include <linux/module.h>
7#include "linux/syscalls.h"
8#include "asm/tlbflush.h"
9#include "asm/uaccess.h"
10#include "as-layout.h"
11#include "kern_util.h"
12#include "mem_user.h"
13#include "os.h" 7#include "os.h"
14 8
15EXPORT_SYMBOL(uml_physmem);
16EXPORT_SYMBOL(set_signals); 9EXPORT_SYMBOL(set_signals);
17EXPORT_SYMBOL(get_signals); 10EXPORT_SYMBOL(get_signals);
18EXPORT_SYMBOL(kernel_thread);
19EXPORT_SYMBOL(sys_waitpid);
20EXPORT_SYMBOL(flush_tlb_range);
21
22EXPORT_SYMBOL(high_physmem);
23EXPORT_SYMBOL(empty_zero_page);
24EXPORT_SYMBOL(handle_page_fault);
25EXPORT_SYMBOL(find_iomem);
26
27EXPORT_SYMBOL(strnlen_user);
28EXPORT_SYMBOL(strncpy_from_user);
29EXPORT_SYMBOL(copy_to_user);
30EXPORT_SYMBOL(copy_from_user);
31EXPORT_SYMBOL(clear_user);
32EXPORT_SYMBOL(uml_strdup);
33 11
34EXPORT_SYMBOL(os_stat_fd); 12EXPORT_SYMBOL(os_stat_fd);
35EXPORT_SYMBOL(os_stat_file); 13EXPORT_SYMBOL(os_stat_file);
@@ -57,24 +35,10 @@ EXPORT_SYMBOL(os_connect_socket);
57EXPORT_SYMBOL(os_accept_connection); 35EXPORT_SYMBOL(os_accept_connection);
58EXPORT_SYMBOL(os_rcv_fd); 36EXPORT_SYMBOL(os_rcv_fd);
59EXPORT_SYMBOL(run_helper); 37EXPORT_SYMBOL(run_helper);
60EXPORT_SYMBOL(start_thread);
61EXPORT_SYMBOL(os_major); 38EXPORT_SYMBOL(os_major);
62EXPORT_SYMBOL(os_minor); 39EXPORT_SYMBOL(os_minor);
63EXPORT_SYMBOL(os_makedev); 40EXPORT_SYMBOL(os_makedev);
64 41
65EXPORT_SYMBOL(add_sigio_fd); 42EXPORT_SYMBOL(add_sigio_fd);
66EXPORT_SYMBOL(ignore_sigio_fd); 43EXPORT_SYMBOL(ignore_sigio_fd);
67EXPORT_SYMBOL(deactivate_fd);
68EXPORT_SYMBOL(sigio_broken); 44EXPORT_SYMBOL(sigio_broken);
69
70#ifdef CONFIG_SMP
71
72/* required for SMP */
73
74extern void __write_lock_failed(rwlock_t *rw);
75EXPORT_SYMBOL(__write_lock_failed);
76
77extern void __read_lock_failed(rwlock_t *rw);
78EXPORT_SYMBOL(__read_lock_failed);
79
80#endif
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 8137ccc9635b..ebb86b218445 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -4,6 +4,7 @@
4 */ 4 */
5 5
6#include <linux/stddef.h> 6#include <linux/stddef.h>
7#include <linux/module.h>
7#include <linux/bootmem.h> 8#include <linux/bootmem.h>
8#include <linux/highmem.h> 9#include <linux/highmem.h>
9#include <linux/mm.h> 10#include <linux/mm.h>
@@ -20,6 +21,7 @@
20 21
21/* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */ 22/* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
22unsigned long *empty_zero_page = NULL; 23unsigned long *empty_zero_page = NULL;
24EXPORT_SYMBOL(empty_zero_page);
23/* allocated in paging_init and unchanged thereafter */ 25/* allocated in paging_init and unchanged thereafter */
24static unsigned long *empty_bad_page = NULL; 26static unsigned long *empty_bad_page = NULL;
25 27
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index a1a9090254c2..f116db15d402 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -3,20 +3,22 @@
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#include "linux/bootmem.h" 6#include <linux/module.h>
7#include "linux/mm.h" 7#include <linux/bootmem.h>
8#include "linux/pfn.h" 8#include <linux/mm.h>
9#include "asm/page.h" 9#include <linux/pfn.h>
10#include "as-layout.h" 10#include <asm/page.h>
11#include "init.h" 11#include <as-layout.h>
12#include "kern.h" 12#include <init.h>
13#include "mem_user.h" 13#include <kern.h>
14#include "os.h" 14#include <mem_user.h>
15#include <os.h>
15 16
16static int physmem_fd = -1; 17static int physmem_fd = -1;
17 18
18/* Changed during early boot */ 19/* Changed during early boot */
19unsigned long high_physmem; 20unsigned long high_physmem;
21EXPORT_SYMBOL(high_physmem);
20 22
21extern unsigned long long physmem_size; 23extern unsigned long long physmem_size;
22 24
@@ -184,6 +186,7 @@ unsigned long find_iomem(char *driver, unsigned long *len_out)
184 186
185 return 0; 187 return 0;
186} 188}
189EXPORT_SYMBOL(find_iomem);
187 190
188static int setup_iomem(void) 191static int setup_iomem(void)
189{ 192{
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 21c1ae7c3d75..c5338351aecd 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -20,12 +20,12 @@
20#include <linux/threads.h> 20#include <linux/threads.h>
21#include <asm/current.h> 21#include <asm/current.h>
22#include <asm/pgtable.h> 22#include <asm/pgtable.h>
23#include <asm/mmu_context.h>
23#include <asm/uaccess.h> 24#include <asm/uaccess.h>
24#include "as-layout.h" 25#include "as-layout.h"
25#include "kern_util.h" 26#include "kern_util.h"
26#include "os.h" 27#include "os.h"
27#include "skas.h" 28#include "skas.h"
28#include "tlb.h"
29 29
30/* 30/*
31 * This is a per-cpu array. A processor only modifies its entry and it only 31 * This is a per-cpu array. A processor only modifies its entry and it only
@@ -78,6 +78,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
78 &current->thread.regs, 0, NULL, NULL); 78 &current->thread.regs, 0, NULL, NULL);
79 return pid; 79 return pid;
80} 80}
81EXPORT_SYMBOL(kernel_thread);
81 82
82static inline void set_current(struct task_struct *task) 83static inline void set_current(struct task_struct *task)
83{ 84{
@@ -286,6 +287,7 @@ char *uml_strdup(const char *string)
286{ 287{
287 return kstrdup(string, GFP_KERNEL); 288 return kstrdup(string, GFP_KERNEL);
288} 289}
290EXPORT_SYMBOL(uml_strdup);
289 291
290int copy_to_user_proc(void __user *to, void *from, int size) 292int copy_to_user_proc(void __user *to, void *from, int size)
291{ 293{
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index b5c094c4ade4..e8b889d3bce7 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -11,7 +11,6 @@
11#include <asm/unistd.h> 11#include <asm/unistd.h>
12#include "frame_kern.h" 12#include "frame_kern.h"
13#include "kern_util.h" 13#include "kern_util.h"
14#include <sysdep/sigcontext.h>
15 14
16EXPORT_SYMBOL(block_signals); 15EXPORT_SYMBOL(block_signals);
17EXPORT_SYMBOL(unblock_signals); 16EXPORT_SYMBOL(unblock_signals);
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 2c8583c1a344..e1fd066a3525 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -8,7 +8,6 @@
8#include <asm/unistd.h> 8#include <asm/unistd.h>
9#include <sys/time.h> 9#include <sys/time.h>
10#include "as-layout.h" 10#include "as-layout.h"
11#include "kern_constants.h"
12#include "ptrace_user.h" 11#include "ptrace_user.h"
13#include "stub-data.h" 12#include "stub-data.h"
14#include "sysdep/stub.h" 13#include "sysdep/stub.h"
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 696634214dc6..9fefd924fb49 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -6,6 +6,7 @@
6#include <linux/err.h> 6#include <linux/err.h>
7#include <linux/highmem.h> 7#include <linux/highmem.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/module.h>
9#include <linux/sched.h> 10#include <linux/sched.h>
10#include <asm/current.h> 11#include <asm/current.h>
11#include <asm/page.h> 12#include <asm/page.h>
@@ -149,6 +150,7 @@ int copy_from_user(void *to, const void __user *from, int n)
149 buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): 150 buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
150 n; 151 n;
151} 152}
153EXPORT_SYMBOL(copy_from_user);
152 154
153static int copy_chunk_to_user(unsigned long to, int len, void *arg) 155static int copy_chunk_to_user(unsigned long to, int len, void *arg)
154{ 156{
@@ -170,6 +172,7 @@ int copy_to_user(void __user *to, const void *from, int n)
170 buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : 172 buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
171 n; 173 n;
172} 174}
175EXPORT_SYMBOL(copy_to_user);
173 176
174static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) 177static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
175{ 178{
@@ -204,6 +207,7 @@ int strncpy_from_user(char *dst, const char __user *src, int count)
204 return -EFAULT; 207 return -EFAULT;
205 return strnlen(dst, count); 208 return strnlen(dst, count);
206} 209}
210EXPORT_SYMBOL(strncpy_from_user);
207 211
208static int clear_chunk(unsigned long addr, int len, void *unused) 212static int clear_chunk(unsigned long addr, int len, void *unused)
209{ 213{
@@ -226,6 +230,7 @@ int clear_user(void __user *mem, int len)
226 return access_ok(VERIFY_WRITE, mem, len) ? 230 return access_ok(VERIFY_WRITE, mem, len) ?
227 buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len; 231 buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len;
228} 232}
233EXPORT_SYMBOL(clear_user);
229 234
230static int strnlen_chunk(unsigned long str, int len, void *arg) 235static int strnlen_chunk(unsigned long str, int len, void *arg)
231{ 236{
@@ -251,3 +256,4 @@ int strnlen_user(const void __user *str, int len)
251 return count + 1; 256 return count + 1;
252 return -EFAULT; 257 return -EFAULT;
253} 258}
259EXPORT_SYMBOL(strnlen_user);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index d175d0566af0..7f3d4d86431a 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -4,6 +4,7 @@
4 */ 4 */
5 5
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/module.h>
7#include <linux/sched.h> 8#include <linux/sched.h>
8#include <asm/pgtable.h> 9#include <asm/pgtable.h>
9#include <asm/tlbflush.h> 10#include <asm/tlbflush.h>
@@ -11,7 +12,6 @@
11#include "mem_user.h" 12#include "mem_user.h"
12#include "os.h" 13#include "os.h"
13#include "skas.h" 14#include "skas.h"
14#include "tlb.h"
15 15
16struct host_vm_change { 16struct host_vm_change {
17 struct host_vm_op { 17 struct host_vm_op {
@@ -287,7 +287,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
287 } 287 }
288} 288}
289 289
290int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 290static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
291{ 291{
292 struct mm_struct *mm; 292 struct mm_struct *mm;
293 pgd_t *pgd; 293 pgd_t *pgd;
@@ -499,6 +499,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
499 flush_tlb_kernel_range_common(start, end); 499 flush_tlb_kernel_range_common(start, end);
500 else fix_range(vma->vm_mm, start, end, 0); 500 else fix_range(vma->vm_mm, start, end, 0);
501} 501}
502EXPORT_SYMBOL(flush_tlb_range);
502 503
503void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 504void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
504 unsigned long end) 505 unsigned long end)
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 8c7b8823d1f0..dafc94715950 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -6,6 +6,7 @@
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/hardirq.h> 8#include <linux/hardirq.h>
9#include <linux/module.h>
9#include <asm/current.h> 10#include <asm/current.h>
10#include <asm/pgtable.h> 11#include <asm/pgtable.h>
11#include <asm/tlbflush.h> 12#include <asm/tlbflush.h>
@@ -14,7 +15,6 @@
14#include "kern_util.h" 15#include "kern_util.h"
15#include "os.h" 16#include "os.h"
16#include "skas.h" 17#include "skas.h"
17#include "sysdep/sigcontext.h"
18 18
19/* 19/*
20 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by 20 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
@@ -112,6 +112,7 @@ out_of_memory:
112 pagefault_out_of_memory(); 112 pagefault_out_of_memory();
113 return 0; 113 return 0;
114} 114}
115EXPORT_SYMBOL(handle_page_fault);
115 116
116static void show_segv_info(struct uml_pt_regs *regs) 117static void show_segv_info(struct uml_pt_regs *regs)
117{ 118{
diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c
deleted file mode 100644
index dd33f040c526..000000000000
--- a/arch/um/kernel/uaccess.c
+++ /dev/null
@@ -1,33 +0,0 @@
1/*
2 * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
3 * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
4 * Licensed under the GPL
5 */
6
7/*
8 * These are here rather than tt/uaccess.c because skas mode needs them in
9 * order to do SIGBUS recovery when a tmpfs mount runs out of room.
10 */
11
12#include <linux/string.h>
13#include "os.h"
14
15static void __do_copy(void *to, const void *from, int n)
16{
17 memcpy(to, from, n);
18}
19
20
21int __do_copy_to_user(void *to, const void *from, int n,
22 void **fault_addr, jmp_buf **fault_catcher)
23{
24 unsigned long fault;
25 int faulted;
26
27 fault = __do_user_copy(to, from, n, fault_addr, fault_catcher,
28 __do_copy, &faulted);
29 if (!faulted)
30 return 0;
31 else
32 return n - (fault - (unsigned long) to);
33}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 8d84250324b3..ba00eae45aad 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -102,6 +102,8 @@ const struct seq_operations cpuinfo_op = {
102 102
103/* Set in linux_main */ 103/* Set in linux_main */
104unsigned long uml_physmem; 104unsigned long uml_physmem;
105EXPORT_SYMBOL(uml_physmem);
106
105unsigned long uml_reserved; /* Also modified in mem_init */ 107unsigned long uml_reserved; /* Also modified in mem_init */
106unsigned long start_vm; 108unsigned long start_vm;
107unsigned long end_vm; 109unsigned long end_vm;
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index b33f4dfe7ae5..dd764101e488 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -4,14 +4,14 @@
4# 4#
5 5
6obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ 6obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
7 registers.o sigio.o signal.o start_up.o time.o tty.o uaccess.o \ 7 registers.o sigio.o signal.o start_up.o time.o tty.o \
8 umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/ 8 umid.o user_syms.o util.o drivers/ skas/
9 9
10obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o 10obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
11 11
12USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \ 12USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \
13 main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ 13 main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
14 tty.o tls.o uaccess.o umid.o util.o 14 tty.o umid.o util.o
15 15
16CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) 16CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH)
17 17
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index 57e3d46c989c..c5d039e1ff3b 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -11,10 +11,8 @@
11#include <asm/unistd.h> 11#include <asm/unistd.h>
12#include "aio.h" 12#include "aio.h"
13#include "init.h" 13#include "init.h"
14#include "kern_constants.h"
15#include "kern_util.h" 14#include "kern_util.h"
16#include "os.h" 15#include "os.h"
17#include "user.h"
18 16
19struct aio_thread_req { 17struct aio_thread_req {
20 enum aio_type type; 18 enum aio_type type;
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
index cc72cb2c1af6..db3d6481375a 100644
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ b/arch/um/os-Linux/drivers/ethertap_user.c
@@ -13,11 +13,9 @@
13#include <sys/socket.h> 13#include <sys/socket.h>
14#include <sys/wait.h> 14#include <sys/wait.h>
15#include "etap.h" 15#include "etap.h"
16#include "kern_constants.h"
17#include "os.h" 16#include "os.h"
18#include "net_user.h" 17#include "net_user.h"
19#include "um_malloc.h" 18#include "um_malloc.h"
20#include "user.h"
21 19
22#define MAX_PACKET ETH_MAX_PACKET 20#define MAX_PACKET ETH_MAX_PACKET
23 21
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
index 2448be03fd7a..a2aacffdd907 100644
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ b/arch/um/os-Linux/drivers/tuntap_user.c
@@ -13,11 +13,9 @@
13#include <sys/socket.h> 13#include <sys/socket.h>
14#include <sys/wait.h> 14#include <sys/wait.h>
15#include <sys/uio.h> 15#include <sys/uio.h>
16#include "kern_constants.h"
17#include "kern_util.h" 16#include "kern_util.h"
18#include "os.h" 17#include "os.h"
19#include "tuntap.h" 18#include "tuntap.h"
20#include "user.h"
21 19
22static int tuntap_user_init(void *data, void *dev) 20static int tuntap_user_init(void *data, void *dev)
23{ 21{
diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c
index 953323799381..d895271ad6f7 100644
--- a/arch/um/os-Linux/elf_aux.c
+++ b/arch/um/os-Linux/elf_aux.c
@@ -12,7 +12,6 @@
12#include "init.h" 12#include "init.h"
13#include "elf_user.h" 13#include "elf_user.h"
14#include "mem_user.h" 14#include "mem_user.h"
15#include <kern_constants.h>
16 15
17typedef Elf32_auxv_t elf_auxv_t; 16typedef Elf32_auxv_t elf_auxv_t;
18 17
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 140e587bc0ad..b049a63bb74b 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -13,9 +13,7 @@
13#include <sys/socket.h> 13#include <sys/socket.h>
14#include <sys/stat.h> 14#include <sys/stat.h>
15#include <sys/un.h> 15#include <sys/un.h>
16#include "kern_constants.h"
17#include "os.h" 16#include "os.h"
18#include "user.h"
19 17
20static void copy_stat(struct uml_stat *dst, const struct stat64 *src) 18static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
21{ 19{
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index feff22d64672..cf26c4a9a43a 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -10,11 +10,9 @@
10#include <linux/limits.h> 10#include <linux/limits.h>
11#include <sys/socket.h> 11#include <sys/socket.h>
12#include <sys/wait.h> 12#include <sys/wait.h>
13#include "kern_constants.h"
14#include "kern_util.h" 13#include "kern_util.h"
15#include "os.h" 14#include "os.h"
16#include "um_malloc.h" 15#include "um_malloc.h"
17#include "user.h"
18 16
19struct helper_data { 17struct helper_data {
20 void (*pre_exec)(void*); 18 void (*pre_exec)(void*);
diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h
new file mode 100644
index 000000000000..2c3c3ecd8c01
--- /dev/null
+++ b/arch/um/os-Linux/internal.h
@@ -0,0 +1 @@
void alarm_handler(int, mcontext_t *);
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index 0348b975e81c..9a49908b576c 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -9,11 +9,8 @@
9#include <signal.h> 9#include <signal.h>
10#include <string.h> 10#include <string.h>
11#include "irq_user.h" 11#include "irq_user.h"
12#include "kern_constants.h"
13#include "os.h" 12#include "os.h"
14#include "process.h"
15#include "um_malloc.h" 13#include "um_malloc.h"
16#include "user.h"
17 14
18/* 15/*
19 * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd 16 * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 8471b817d94f..7a86dd516eb1 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -12,7 +12,6 @@
12#include <sys/resource.h> 12#include <sys/resource.h>
13#include "as-layout.h" 13#include "as-layout.h"
14#include "init.h" 14#include "init.h"
15#include "kern_constants.h"
16#include "kern_util.h" 15#include "kern_util.h"
17#include "os.h" 16#include "os.h"
18#include "um_malloc.h" 17#include "um_malloc.h"
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 62878cf1d33f..8e421e1d6d36 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -14,9 +14,7 @@
14#include <sys/mman.h> 14#include <sys/mman.h>
15#include <sys/param.h> 15#include <sys/param.h>
16#include "init.h" 16#include "init.h"
17#include "kern_constants.h"
18#include "os.h" 17#include "os.h"
19#include "user.h"
20 18
21/* Modified by which_tmpdir, which is called during early boot */ 19/* Modified by which_tmpdir, which is called during early boot */
22static char *default_tmpdir = "/tmp"; 20static char *default_tmpdir = "/tmp";
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 0c45dc8efb05..307f173e7f82 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -13,12 +13,9 @@
13#include <sys/wait.h> 13#include <sys/wait.h>
14#include <asm/unistd.h> 14#include <asm/unistd.h>
15#include "init.h" 15#include "init.h"
16#include "kern_constants.h"
17#include "longjmp.h" 16#include "longjmp.h"
18#include "os.h" 17#include "os.h"
19#include "process.h"
20#include "skas_ptrace.h" 18#include "skas_ptrace.h"
21#include "user.h"
22 19
23#define ARBITRARY_ADDR -1 20#define ARBITRARY_ADDR -1
24#define FAILURE_PID -1 21#define FAILURE_PID -1
@@ -237,21 +234,13 @@ out:
237 234
238void init_new_thread_signals(void) 235void init_new_thread_signals(void)
239{ 236{
240 set_handler(SIGSEGV, (__sighandler_t) sig_handler, SA_ONSTACK, 237 set_handler(SIGSEGV);
241 SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); 238 set_handler(SIGTRAP);
242 set_handler(SIGTRAP, (__sighandler_t) sig_handler, SA_ONSTACK, 239 set_handler(SIGFPE);
243 SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); 240 set_handler(SIGILL);
244 set_handler(SIGFPE, (__sighandler_t) sig_handler, SA_ONSTACK, 241 set_handler(SIGBUS);
245 SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
246 set_handler(SIGILL, (__sighandler_t) sig_handler, SA_ONSTACK,
247 SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
248 set_handler(SIGBUS, (__sighandler_t) sig_handler, SA_ONSTACK,
249 SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
250 signal(SIGHUP, SIG_IGN); 242 signal(SIGHUP, SIG_IGN);
251 243 set_handler(SIGIO);
252 set_handler(SIGIO, (__sighandler_t) sig_handler,
253 SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM,
254 SIGVTALRM, -1);
255 signal(SIGWINCH, SIG_IGN); 244 signal(SIGWINCH, SIG_IGN);
256 signal(SIGTERM, SIG_DFL); 245 signal(SIGTERM, SIG_DFL);
257} 246}
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 63d299df152b..3c161218c671 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -11,14 +11,11 @@
11#include <sched.h> 11#include <sched.h>
12#include <signal.h> 12#include <signal.h>
13#include <string.h> 13#include <string.h>
14#include "kern_constants.h"
15#include "kern_util.h" 14#include "kern_util.h"
16#include "init.h" 15#include "init.h"
17#include "os.h" 16#include "os.h"
18#include "process.h"
19#include "sigio.h" 17#include "sigio.h"
20#include "um_malloc.h" 18#include "um_malloc.h"
21#include "user.h"
22 19
23/* 20/*
24 * Protected by sigio_lock(), also used by sigio_cleanup, which is an 21 * Protected by sigio_lock(), also used by sigio_cleanup, which is an
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 6ae180703a63..2d22f1fcd8e2 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -12,13 +12,7 @@
12#include "as-layout.h" 12#include "as-layout.h"
13#include "kern_util.h" 13#include "kern_util.h"
14#include "os.h" 14#include "os.h"
15#include "process.h" 15#include "sysdep/mcontext.h"
16#include "sysdep/barrier.h"
17#include "sysdep/sigcontext.h"
18#include "user.h"
19
20/* Copied from linux/compiler-gcc.h since we can't include it directly */
21#define barrier() __asm__ __volatile__("": : :"memory")
22 16
23void (*sig_info[NSIG])(int, struct uml_pt_regs *) = { 17void (*sig_info[NSIG])(int, struct uml_pt_regs *) = {
24 [SIGTRAP] = relay_signal, 18 [SIGTRAP] = relay_signal,
@@ -30,7 +24,7 @@ void (*sig_info[NSIG])(int, struct uml_pt_regs *) = {
30 [SIGIO] = sigio_handler, 24 [SIGIO] = sigio_handler,
31 [SIGVTALRM] = timer_handler }; 25 [SIGVTALRM] = timer_handler };
32 26
33static void sig_handler_common(int sig, struct sigcontext *sc) 27static void sig_handler_common(int sig, mcontext_t *mc)
34{ 28{
35 struct uml_pt_regs r; 29 struct uml_pt_regs r;
36 int save_errno = errno; 30 int save_errno = errno;
@@ -38,8 +32,8 @@ static void sig_handler_common(int sig, struct sigcontext *sc)
38 r.is_user = 0; 32 r.is_user = 0;
39 if (sig == SIGSEGV) { 33 if (sig == SIGSEGV) {
40 /* For segfaults, we want the data from the sigcontext. */ 34 /* For segfaults, we want the data from the sigcontext. */
41 copy_sc(&r, sc); 35 get_regs_from_mc(&r, mc);
42 GET_FAULTINFO_FROM_SC(r.faultinfo, sc); 36 GET_FAULTINFO_FROM_MC(r.faultinfo, mc);
43 } 37 }
44 38
45 /* enable signals if sig isn't IRQ signal */ 39 /* enable signals if sig isn't IRQ signal */
@@ -66,7 +60,7 @@ static void sig_handler_common(int sig, struct sigcontext *sc)
66static int signals_enabled; 60static int signals_enabled;
67static unsigned int signals_pending; 61static unsigned int signals_pending;
68 62
69void sig_handler(int sig, struct sigcontext *sc) 63void sig_handler(int sig, mcontext_t *mc)
70{ 64{
71 int enabled; 65 int enabled;
72 66
@@ -78,23 +72,23 @@ void sig_handler(int sig, struct sigcontext *sc)
78 72
79 block_signals(); 73 block_signals();
80 74
81 sig_handler_common(sig, sc); 75 sig_handler_common(sig, mc);
82 76
83 set_signals(enabled); 77 set_signals(enabled);
84} 78}
85 79
86static void real_alarm_handler(struct sigcontext *sc) 80static void real_alarm_handler(mcontext_t *mc)
87{ 81{
88 struct uml_pt_regs regs; 82 struct uml_pt_regs regs;
89 83
90 if (sc != NULL) 84 if (mc != NULL)
91 copy_sc(&regs, sc); 85 get_regs_from_mc(&regs, mc);
92 regs.is_user = 0; 86 regs.is_user = 0;
93 unblock_signals(); 87 unblock_signals();
94 timer_handler(SIGVTALRM, &regs); 88 timer_handler(SIGVTALRM, &regs);
95} 89}
96 90
97void alarm_handler(int sig, struct sigcontext *sc) 91void alarm_handler(int sig, mcontext_t *mc)
98{ 92{
99 int enabled; 93 int enabled;
100 94
@@ -106,14 +100,13 @@ void alarm_handler(int sig, struct sigcontext *sc)
106 100
107 block_signals(); 101 block_signals();
108 102
109 real_alarm_handler(sc); 103 real_alarm_handler(mc);
110 set_signals(enabled); 104 set_signals(enabled);
111} 105}
112 106
113void timer_init(void) 107void timer_init(void)
114{ 108{
115 set_handler(SIGVTALRM, (__sighandler_t) alarm_handler, 109 set_handler(SIGVTALRM);
116 SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, -1);
117} 110}
118 111
119void set_sigstack(void *sig_stack, int size) 112void set_sigstack(void *sig_stack, int size)
@@ -126,10 +119,23 @@ void set_sigstack(void *sig_stack, int size)
126 panic("enabling signal stack failed, errno = %d\n", errno); 119 panic("enabling signal stack failed, errno = %d\n", errno);
127} 120}
128 121
129static void (*handlers[_NSIG])(int sig, struct sigcontext *sc); 122static void (*handlers[_NSIG])(int sig, mcontext_t *mc) = {
123 [SIGSEGV] = sig_handler,
124 [SIGBUS] = sig_handler,
125 [SIGILL] = sig_handler,
126 [SIGFPE] = sig_handler,
127 [SIGTRAP] = sig_handler,
128
129 [SIGIO] = sig_handler,
130 [SIGWINCH] = sig_handler,
131 [SIGVTALRM] = alarm_handler
132};
133
130 134
131void handle_signal(int sig, struct sigcontext *sc) 135static void hard_handler(int sig, siginfo_t *info, void *p)
132{ 136{
137 struct ucontext *uc = p;
138 mcontext_t *mc = &uc->uc_mcontext;
133 unsigned long pending = 1UL << sig; 139 unsigned long pending = 1UL << sig;
134 140
135 do { 141 do {
@@ -155,7 +161,7 @@ void handle_signal(int sig, struct sigcontext *sc)
155 while ((sig = ffs(pending)) != 0){ 161 while ((sig = ffs(pending)) != 0){
156 sig--; 162 sig--;
157 pending &= ~(1 << sig); 163 pending &= ~(1 << sig);
158 (*handlers[sig])(sig, sc); 164 (*handlers[sig])(sig, mc);
159 } 165 }
160 166
161 /* 167 /*
@@ -169,28 +175,26 @@ void handle_signal(int sig, struct sigcontext *sc)
169 } while (pending); 175 } while (pending);
170} 176}
171 177
172extern void hard_handler(int sig); 178void set_handler(int sig)
173
174void set_handler(int sig, void (*handler)(int), int flags, ...)
175{ 179{
176 struct sigaction action; 180 struct sigaction action;
177 va_list ap; 181 int flags = SA_SIGINFO | SA_ONSTACK;
178 sigset_t sig_mask; 182 sigset_t sig_mask;
179 int mask;
180 183
181 handlers[sig] = (void (*)(int, struct sigcontext *)) handler; 184 action.sa_sigaction = hard_handler;
182 action.sa_handler = hard_handler;
183 185
186 /* block irq ones */
184 sigemptyset(&action.sa_mask); 187 sigemptyset(&action.sa_mask);
185 188 sigaddset(&action.sa_mask, SIGVTALRM);
186 va_start(ap, flags); 189 sigaddset(&action.sa_mask, SIGIO);
187 while ((mask = va_arg(ap, int)) != -1) 190 sigaddset(&action.sa_mask, SIGWINCH);
188 sigaddset(&action.sa_mask, mask);
189 va_end(ap);
190 191
191 if (sig == SIGSEGV) 192 if (sig == SIGSEGV)
192 flags |= SA_NODEFER; 193 flags |= SA_NODEFER;
193 194
195 if (sigismember(&action.sa_mask, sig))
196 flags |= SA_RESTART; /* if it's an irq signal */
197
194 action.sa_flags = flags; 198 action.sa_flags = flags;
195 action.sa_restorer = NULL; 199 action.sa_restorer = NULL;
196 if (sigaction(sig, &action, NULL) < 0) 200 if (sigaction(sig, &action, NULL) < 0)
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index e771398be5f3..c0afff7af4bd 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -9,7 +9,6 @@
9#include <string.h> 9#include <string.h>
10#include <sys/mman.h> 10#include <sys/mman.h>
11#include "init.h" 11#include "init.h"
12#include "kern_constants.h"
13#include "as-layout.h" 12#include "as-layout.h"
14#include "mm_id.h" 13#include "mm_id.h"
15#include "os.h" 14#include "os.h"
@@ -17,7 +16,6 @@
17#include "ptrace_user.h" 16#include "ptrace_user.h"
18#include "registers.h" 17#include "registers.h"
19#include "skas.h" 18#include "skas.h"
20#include "user.h"
21#include "sysdep/ptrace.h" 19#include "sysdep/ptrace.h"
22#include "sysdep/stub.h" 20#include "sysdep/stub.h"
23 21
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index dee0e8cf8ad0..cd65727854eb 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -9,31 +9,23 @@
9#include <errno.h> 9#include <errno.h>
10#include <string.h> 10#include <string.h>
11#include <sys/mman.h> 11#include <sys/mman.h>
12#include <sys/ptrace.h>
13#include <sys/wait.h> 12#include <sys/wait.h>
14#include <asm/unistd.h> 13#include <asm/unistd.h>
15#include "as-layout.h" 14#include "as-layout.h"
16#include "chan_user.h" 15#include "init.h"
17#include "kern_constants.h"
18#include "kern_util.h" 16#include "kern_util.h"
19#include "mem.h" 17#include "mem.h"
20#include "os.h" 18#include "os.h"
21#include "process.h"
22#include "proc_mm.h" 19#include "proc_mm.h"
23#include "ptrace_user.h" 20#include "ptrace_user.h"
24#include "registers.h" 21#include "registers.h"
25#include "skas.h" 22#include "skas.h"
26#include "skas_ptrace.h" 23#include "skas_ptrace.h"
27#include "user.h"
28#include "sysdep/stub.h" 24#include "sysdep/stub.h"
29 25
30int is_skas_winch(int pid, int fd, void *data) 26int is_skas_winch(int pid, int fd, void *data)
31{ 27{
32 if (pid != getpgrp()) 28 return pid == getpgrp();
33 return 0;
34
35 register_winch_irq(-1, fd, -1, data, 0);
36 return 1;
37} 29}
38 30
39static int ptrace_dump_regs(int pid) 31static int ptrace_dump_regs(int pid)
@@ -169,7 +161,7 @@ static void handle_trap(int pid, struct uml_pt_regs *regs,
169 161
170 if (!local_using_sysemu) 162 if (!local_using_sysemu)
171 { 163 {
172 err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, 164 err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
173 __NR_getpid); 165 __NR_getpid);
174 if (err < 0) { 166 if (err < 0) {
175 printk(UM_KERN_ERR "handle_trap - nullifying syscall " 167 printk(UM_KERN_ERR "handle_trap - nullifying syscall "
@@ -257,8 +249,8 @@ static int userspace_tramp(void *stack)
257 249
258 set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE); 250 set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
259 sigemptyset(&sa.sa_mask); 251 sigemptyset(&sa.sa_mask);
260 sa.sa_flags = SA_ONSTACK | SA_NODEFER; 252 sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
261 sa.sa_handler = (void *) v; 253 sa.sa_sigaction = (void *) v;
262 sa.sa_restorer = NULL; 254 sa.sa_restorer = NULL;
263 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 255 if (sigaction(SIGSEGV, &sa, NULL) < 0) {
264 printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV " 256 printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV "
@@ -661,8 +653,7 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
661{ 653{
662 int n; 654 int n;
663 655
664 set_handler(SIGWINCH, (__sighandler_t) sig_handler, 656 set_handler(SIGWINCH);
665 SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGVTALRM, -1);
666 657
667 /* 658 /*
668 * Can't use UML_SETJMP or UML_LONGJMP here because they save 659 * Can't use UML_SETJMP or UML_LONGJMP here because they save
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 02ee9adff54a..425162e22af5 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -13,12 +13,10 @@
13#include <signal.h> 13#include <signal.h>
14#include <string.h> 14#include <string.h>
15#include <sys/mman.h> 15#include <sys/mman.h>
16#include <sys/ptrace.h>
17#include <sys/stat.h> 16#include <sys/stat.h>
18#include <sys/wait.h> 17#include <sys/wait.h>
19#include <asm/unistd.h> 18#include <asm/unistd.h>
20#include "init.h" 19#include "init.h"
21#include "kern_constants.h"
22#include "os.h" 20#include "os.h"
23#include "mem_user.h" 21#include "mem_user.h"
24#include "ptrace_user.h" 22#include "ptrace_user.h"
@@ -225,7 +223,7 @@ static void __init check_sysemu(void)
225 goto fail; 223 goto fail;
226 } 224 }
227 225
228 n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, os_getpid()); 226 n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
229 if (n < 0) { 227 if (n < 0) {
230 non_fatal("check_sysemu : failed to modify system call " 228 non_fatal("check_sysemu : failed to modify system call "
231 "return"); 229 "return");
@@ -261,7 +259,7 @@ static void __init check_sysemu(void)
261 "doesn't singlestep"); 259 "doesn't singlestep");
262 goto fail; 260 goto fail;
263 } 261 }
264 n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, 262 n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET,
265 os_getpid()); 263 os_getpid());
266 if (n < 0) 264 if (n < 0)
267 fatal_perror("check_sysemu : failed to modify " 265 fatal_perror("check_sysemu : failed to modify "
@@ -317,10 +315,10 @@ static void __init check_ptrace(void)
317 fatal("check_ptrace : expected (SIGTRAP|0x80), " 315 fatal("check_ptrace : expected (SIGTRAP|0x80), "
318 "got status = %d", status); 316 "got status = %d", status);
319 317
320 syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, 318 syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET,
321 0); 319 0);
322 if (syscall == __NR_getpid) { 320 if (syscall == __NR_getpid) {
323 n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, 321 n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
324 __NR_getppid); 322 __NR_getppid);
325 if (n < 0) 323 if (n < 0)
326 fatal_perror("check_ptrace : failed to modify " 324 fatal_perror("check_ptrace : failed to modify "
diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c
deleted file mode 100644
index f311609f93da..000000000000
--- a/arch/um/os-Linux/sys-i386/signal.c
+++ /dev/null
@@ -1,13 +0,0 @@
1/*
2 * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <signal.h>
7
8extern void handle_signal(int sig, struct sigcontext *sc);
9
10void hard_handler(int sig)
11{
12 handle_signal(sig, (struct sigcontext *) (&sig + 1));
13}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
deleted file mode 100644
index a44a47f8f57b..000000000000
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
1#
2# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3# Licensed under the GPL
4#
5
6obj-y = registers.o prctl.o signal.o task_size.o
7
8USER_OBJS := $(obj-y)
9
10include arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
deleted file mode 100644
index 594d97ad02b3..000000000000
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (C) 2006 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <errno.h>
7#include <sys/ptrace.h>
8#define __FRAME_OFFSETS
9#include <asm/ptrace.h>
10#include "kern_constants.h"
11#include "longjmp.h"
12#include "user.h"
13
14int save_fp_registers(int pid, unsigned long *fp_regs)
15{
16 if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
17 return -errno;
18 return 0;
19}
20
21int restore_fp_registers(int pid, unsigned long *fp_regs)
22{
23 if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
24 return -errno;
25 return 0;
26}
27
28unsigned long get_thread_reg(int reg, jmp_buf *buf)
29{
30 switch (reg) {
31 case RIP:
32 return buf[0]->__rip;
33 case RSP:
34 return buf[0]->__rsp;
35 case RBP:
36 return buf[0]->__rbp;
37 default:
38 printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
39 reg);
40 return 0;
41 }
42}
43
44int get_fp_registers(int pid, unsigned long *regs)
45{
46 return save_fp_registers(pid, regs);
47}
48
49int put_fp_registers(int pid, unsigned long *regs)
50{
51 return restore_fp_registers(pid, regs);
52}
diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c
deleted file mode 100644
index 82a388822cd3..000000000000
--- a/arch/um/os-Linux/sys-x86_64/signal.c
+++ /dev/null
@@ -1,16 +0,0 @@
1/*
2 * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <signal.h>
7
8extern void handle_signal(int sig, struct sigcontext *sc);
9
10void hard_handler(int sig)
11{
12 struct ucontext *uc;
13 asm("movq %%rdx, %0" : "=r" (uc));
14
15 handle_signal(sig, (struct sigcontext *) &uc->uc_mcontext);
16}
diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c
deleted file mode 100644
index 26a0dd1f349c..000000000000
--- a/arch/um/os-Linux/sys-x86_64/task_size.c
+++ /dev/null
@@ -1,5 +0,0 @@
1unsigned long os_get_top_address(unsigned long shift)
2{
3 /* The old value of CONFIG_TOP_ADDR */
4 return 0x7fc0000000;
5}
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 6e3359d6a839..910499d76a67 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -8,11 +8,9 @@
8#include <signal.h> 8#include <signal.h>
9#include <time.h> 9#include <time.h>
10#include <sys/time.h> 10#include <sys/time.h>
11#include "kern_constants.h"
12#include "kern_util.h" 11#include "kern_util.h"
13#include "os.h" 12#include "os.h"
14#include "process.h" 13#include "internal.h"
15#include "user.h"
16 14
17int set_interval(void) 15int set_interval(void)
18{ 16{
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
deleted file mode 100644
index 73277801ef14..000000000000
--- a/arch/um/os-Linux/tls.c
+++ /dev/null
@@ -1,35 +0,0 @@
1#include <errno.h>
2#include <sys/ptrace.h>
3#include "sysdep/tls.h"
4
5/* TLS support - we basically rely on the host's one.*/
6
7#ifndef PTRACE_GET_THREAD_AREA
8#define PTRACE_GET_THREAD_AREA 25
9#endif
10
11#ifndef PTRACE_SET_THREAD_AREA
12#define PTRACE_SET_THREAD_AREA 26
13#endif
14
15int os_set_thread_area(user_desc_t *info, int pid)
16{
17 int ret;
18
19 ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number,
20 (unsigned long) info);
21 if (ret < 0)
22 ret = -errno;
23 return ret;
24}
25
26int os_get_thread_area(user_desc_t *info, int pid)
27{
28 int ret;
29
30 ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number,
31 (unsigned long) info);
32 if (ret < 0)
33 ret = -errno;
34 return ret;
35}
diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c
index b09ff66a77ee..dd12b99dcb59 100644
--- a/arch/um/os-Linux/tty.c
+++ b/arch/um/os-Linux/tty.c
@@ -7,10 +7,8 @@
7#include <unistd.h> 7#include <unistd.h>
8#include <errno.h> 8#include <errno.h>
9#include <fcntl.h> 9#include <fcntl.h>
10#include "kern_constants.h"
11#include "kern_util.h" 10#include "kern_util.h"
12#include "os.h" 11#include "os.h"
13#include "user.h"
14 12
15struct grantpt_info { 13struct grantpt_info {
16 int fd; 14 int fd;
diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c
deleted file mode 100644
index 087ed74ffca5..000000000000
--- a/arch/um/os-Linux/uaccess.c
+++ /dev/null
@@ -1,32 +0,0 @@
1/*
2 * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
3 * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
4 * Licensed under the GPL
5 */
6
7#include <stddef.h>
8#include "longjmp.h"
9
10unsigned long __do_user_copy(void *to, const void *from, int n,
11 void **fault_addr, jmp_buf **fault_catcher,
12 void (*op)(void *to, const void *from,
13 int n), int *faulted_out)
14{
15 unsigned long *faddrp = (unsigned long *) fault_addr, ret;
16
17 jmp_buf jbuf;
18 *fault_catcher = &jbuf;
19 if (UML_SETJMP(&jbuf) == 0) {
20 (*op)(to, from, n);
21 ret = 0;
22 *faulted_out = 0;
23 }
24 else {
25 ret = *faddrp;
26 *faulted_out = 1;
27 }
28 *fault_addr = NULL;
29 *fault_catcher = NULL;
30 return ret;
31}
32
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index a27defb81884..4832eb519f8d 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -13,9 +13,7 @@
13#include <unistd.h> 13#include <unistd.h>
14#include <sys/stat.h> 14#include <sys/stat.h>
15#include "init.h" 15#include "init.h"
16#include "kern_constants.h"
17#include "os.h" 16#include "os.h"
18#include "user.h"
19 17
20#define UML_DIR "~/.uml/" 18#define UML_DIR "~/.uml/"
21 19
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 5803b1887672..9e3b43bb84c9 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -13,9 +13,7 @@
13#include <wait.h> 13#include <wait.h>
14#include <sys/mman.h> 14#include <sys/mman.h>
15#include <sys/utsname.h> 15#include <sys/utsname.h>
16#include "kern_constants.h"
17#include "os.h" 16#include "os.h"
18#include "user.h"
19 17
20void stack_protections(unsigned long address) 18void stack_protections(unsigned long address)
21{ 19{
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index 61107b68e05b..2eb2843b0634 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS))
8USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) 8USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
9 9
10$(USER_OBJS:.o=.%): \ 10$(USER_OBJS:.o=.%): \
11 c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) $(CFLAGS_$(basetarget).o) 11 c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o)
12$(USER_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \ 12$(USER_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \
13 -Dunix -D__unix__ -D__$(SUBARCH)__ $(CF) 13 -Dunix -D__unix__ -D__$(SUBARCH)__ $(CF)
14 14
@@ -25,8 +25,3 @@ $(UNPROFILE_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \
25define unprofile 25define unprofile
26 $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1))) 26 $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1)))
27endef 27endef
28
29ifdef subarch-obj-y
30obj-y += subarch.o
31subarch-y = $(addprefix ../../$(HEADER_ARCH)/,$(subarch-obj-y))
32endif
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
deleted file mode 100644
index 3923cfb87649..000000000000
--- a/arch/um/sys-i386/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
1#
2# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3#
4
5obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
6 ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
7 sys_call_table.o tls.o atomic64_cx8_32.o mem.o
8
9obj-$(CONFIG_BINFMT_ELF) += elfcore.o
10
11subarch-obj-y = lib/string_32.o
12subarch-obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += lib/rwsem.o
13subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
14subarch-obj-$(CONFIG_MODULES) += kernel/module.o
15
16USER_OBJS := bugs.o ptrace_user.o fault.o
17
18USER_OBJS += user-offsets.s
19extra-y += user-offsets.s
20
21UNPROFILE_OBJS := stub_segv.o
22CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
23
24include arch/um/scripts/Makefile.rules
diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h
deleted file mode 100644
index 42305551d204..000000000000
--- a/arch/um/sys-i386/asm/elf.h
+++ /dev/null
@@ -1,125 +0,0 @@
1/*
2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5#ifndef __UM_ELF_I386_H
6#define __UM_ELF_I386_H
7
8#include <asm/user.h>
9#include "skas.h"
10
11#define R_386_NONE 0
12#define R_386_32 1
13#define R_386_PC32 2
14#define R_386_GOT32 3
15#define R_386_PLT32 4
16#define R_386_COPY 5
17#define R_386_GLOB_DAT 6
18#define R_386_JMP_SLOT 7
19#define R_386_RELATIVE 8
20#define R_386_GOTOFF 9
21#define R_386_GOTPC 10
22#define R_386_NUM 11
23
24typedef unsigned long elf_greg_t;
25
26#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
27typedef elf_greg_t elf_gregset_t[ELF_NGREG];
28
29typedef struct user_i387_struct elf_fpregset_t;
30
31/*
32 * This is used to ensure we don't load something for the wrong architecture.
33 */
34#define elf_check_arch(x) \
35 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
36
37#define ELF_CLASS ELFCLASS32
38#define ELF_DATA ELFDATA2LSB
39#define ELF_ARCH EM_386
40
41#define ELF_PLAT_INIT(regs, load_addr) do { \
42 PT_REGS_EBX(regs) = 0; \
43 PT_REGS_ECX(regs) = 0; \
44 PT_REGS_EDX(regs) = 0; \
45 PT_REGS_ESI(regs) = 0; \
46 PT_REGS_EDI(regs) = 0; \
47 PT_REGS_EBP(regs) = 0; \
48 PT_REGS_EAX(regs) = 0; \
49} while (0)
50
51#define ELF_EXEC_PAGESIZE 4096
52
53#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
54
55/* Shamelessly stolen from include/asm-i386/elf.h */
56
57#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \
58 pr_reg[0] = PT_REGS_EBX(regs); \
59 pr_reg[1] = PT_REGS_ECX(regs); \
60 pr_reg[2] = PT_REGS_EDX(regs); \
61 pr_reg[3] = PT_REGS_ESI(regs); \
62 pr_reg[4] = PT_REGS_EDI(regs); \
63 pr_reg[5] = PT_REGS_EBP(regs); \
64 pr_reg[6] = PT_REGS_EAX(regs); \
65 pr_reg[7] = PT_REGS_DS(regs); \
66 pr_reg[8] = PT_REGS_ES(regs); \
67 /* fake once used fs and gs selectors? */ \
68 pr_reg[9] = PT_REGS_DS(regs); \
69 pr_reg[10] = PT_REGS_DS(regs); \
70 pr_reg[11] = PT_REGS_SYSCALL_NR(regs); \
71 pr_reg[12] = PT_REGS_IP(regs); \
72 pr_reg[13] = PT_REGS_CS(regs); \
73 pr_reg[14] = PT_REGS_EFLAGS(regs); \
74 pr_reg[15] = PT_REGS_SP(regs); \
75 pr_reg[16] = PT_REGS_SS(regs); \
76} while (0);
77
78#define task_pt_regs(t) (&(t)->thread.regs)
79
80struct task_struct;
81
82extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
83
84#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
85
86extern long elf_aux_hwcap;
87#define ELF_HWCAP (elf_aux_hwcap)
88
89extern char * elf_aux_platform;
90#define ELF_PLATFORM (elf_aux_platform)
91
92#define SET_PERSONALITY(ex) do { } while (0)
93
94extern unsigned long vsyscall_ehdr;
95extern unsigned long vsyscall_end;
96extern unsigned long __kernel_vsyscall;
97
98#define VSYSCALL_BASE vsyscall_ehdr
99#define VSYSCALL_END vsyscall_end
100
101/*
102 * This is the range that is readable by user mode, and things
103 * acting like user mode such as get_user_pages.
104 */
105#define FIXADDR_USER_START VSYSCALL_BASE
106#define FIXADDR_USER_END VSYSCALL_END
107
108#define __HAVE_ARCH_GATE_AREA 1
109
110/*
111 * Architecture-neutral AT_ values in 0-17, leave some room
112 * for more of them, start the x86-specific ones at 32.
113 */
114#define AT_SYSINFO 32
115#define AT_SYSINFO_EHDR 33
116
117#define ARCH_DLINFO \
118do { \
119 if ( vsyscall_ehdr ) { \
120 NEW_AUX_ENT(AT_SYSINFO, __kernel_vsyscall); \
121 NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr); \
122 } \
123} while (0)
124
125#endif
diff --git a/arch/um/sys-i386/asm/module.h b/arch/um/sys-i386/asm/module.h
deleted file mode 100644
index 5ead4a0b2e35..000000000000
--- a/arch/um/sys-i386/asm/module.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef __UM_MODULE_I386_H
2#define __UM_MODULE_I386_H
3
4/* UML is simple */
5struct mod_arch_specific
6{
7};
8
9#define Elf_Shdr Elf32_Shdr
10#define Elf_Sym Elf32_Sym
11#define Elf_Ehdr Elf32_Ehdr
12
13#endif
diff --git a/arch/um/sys-i386/atomic64_cx8_32.S b/arch/um/sys-i386/atomic64_cx8_32.S
deleted file mode 100644
index 1e901d3d4a95..000000000000
--- a/arch/um/sys-i386/atomic64_cx8_32.S
+++ /dev/null
@@ -1,225 +0,0 @@
1/*
2 * atomic64_t for 586+
3 *
4 * Copied from arch/x86/lib/atomic64_cx8_32.S
5 *
6 * Copyright © 2010 Luca Barbieri
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 */
14
15#include <linux/linkage.h>
16#include <asm/alternative-asm.h>
17#include <asm/dwarf2.h>
18
19.macro SAVE reg
20 pushl_cfi %\reg
21 CFI_REL_OFFSET \reg, 0
22.endm
23
24.macro RESTORE reg
25 popl_cfi %\reg
26 CFI_RESTORE \reg
27.endm
28
29.macro read64 reg
30 movl %ebx, %eax
31 movl %ecx, %edx
32/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
33 LOCK_PREFIX
34 cmpxchg8b (\reg)
35.endm
36
37ENTRY(atomic64_read_cx8)
38 CFI_STARTPROC
39
40 read64 %ecx
41 ret
42 CFI_ENDPROC
43ENDPROC(atomic64_read_cx8)
44
45ENTRY(atomic64_set_cx8)
46 CFI_STARTPROC
47
481:
49/* we don't need LOCK_PREFIX since aligned 64-bit writes
50 * are atomic on 586 and newer */
51 cmpxchg8b (%esi)
52 jne 1b
53
54 ret
55 CFI_ENDPROC
56ENDPROC(atomic64_set_cx8)
57
58ENTRY(atomic64_xchg_cx8)
59 CFI_STARTPROC
60
61 movl %ebx, %eax
62 movl %ecx, %edx
631:
64 LOCK_PREFIX
65 cmpxchg8b (%esi)
66 jne 1b
67
68 ret
69 CFI_ENDPROC
70ENDPROC(atomic64_xchg_cx8)
71
72.macro addsub_return func ins insc
73ENTRY(atomic64_\func\()_return_cx8)
74 CFI_STARTPROC
75 SAVE ebp
76 SAVE ebx
77 SAVE esi
78 SAVE edi
79
80 movl %eax, %esi
81 movl %edx, %edi
82 movl %ecx, %ebp
83
84 read64 %ebp
851:
86 movl %eax, %ebx
87 movl %edx, %ecx
88 \ins\()l %esi, %ebx
89 \insc\()l %edi, %ecx
90 LOCK_PREFIX
91 cmpxchg8b (%ebp)
92 jne 1b
93
9410:
95 movl %ebx, %eax
96 movl %ecx, %edx
97 RESTORE edi
98 RESTORE esi
99 RESTORE ebx
100 RESTORE ebp
101 ret
102 CFI_ENDPROC
103ENDPROC(atomic64_\func\()_return_cx8)
104.endm
105
106addsub_return add add adc
107addsub_return sub sub sbb
108
109.macro incdec_return func ins insc
110ENTRY(atomic64_\func\()_return_cx8)
111 CFI_STARTPROC
112 SAVE ebx
113
114 read64 %esi
1151:
116 movl %eax, %ebx
117 movl %edx, %ecx
118 \ins\()l $1, %ebx
119 \insc\()l $0, %ecx
120 LOCK_PREFIX
121 cmpxchg8b (%esi)
122 jne 1b
123
12410:
125 movl %ebx, %eax
126 movl %ecx, %edx
127 RESTORE ebx
128 ret
129 CFI_ENDPROC
130ENDPROC(atomic64_\func\()_return_cx8)
131.endm
132
133incdec_return inc add adc
134incdec_return dec sub sbb
135
136ENTRY(atomic64_dec_if_positive_cx8)
137 CFI_STARTPROC
138 SAVE ebx
139
140 read64 %esi
1411:
142 movl %eax, %ebx
143 movl %edx, %ecx
144 subl $1, %ebx
145 sbb $0, %ecx
146 js 2f
147 LOCK_PREFIX
148 cmpxchg8b (%esi)
149 jne 1b
150
1512:
152 movl %ebx, %eax
153 movl %ecx, %edx
154 RESTORE ebx
155 ret
156 CFI_ENDPROC
157ENDPROC(atomic64_dec_if_positive_cx8)
158
159ENTRY(atomic64_add_unless_cx8)
160 CFI_STARTPROC
161 SAVE ebp
162 SAVE ebx
163/* these just push these two parameters on the stack */
164 SAVE edi
165 SAVE esi
166
167 movl %ecx, %ebp
168 movl %eax, %esi
169 movl %edx, %edi
170
171 read64 %ebp
1721:
173 cmpl %eax, 0(%esp)
174 je 4f
1752:
176 movl %eax, %ebx
177 movl %edx, %ecx
178 addl %esi, %ebx
179 adcl %edi, %ecx
180 LOCK_PREFIX
181 cmpxchg8b (%ebp)
182 jne 1b
183
184 movl $1, %eax
1853:
186 addl $8, %esp
187 CFI_ADJUST_CFA_OFFSET -8
188 RESTORE ebx
189 RESTORE ebp
190 ret
1914:
192 cmpl %edx, 4(%esp)
193 jne 2b
194 xorl %eax, %eax
195 jmp 3b
196 CFI_ENDPROC
197ENDPROC(atomic64_add_unless_cx8)
198
199ENTRY(atomic64_inc_not_zero_cx8)
200 CFI_STARTPROC
201 SAVE ebx
202
203 read64 %esi
2041:
205 testl %eax, %eax
206 je 4f
2072:
208 movl %eax, %ebx
209 movl %edx, %ecx
210 addl $1, %ebx
211 adcl $0, %ecx
212 LOCK_PREFIX
213 cmpxchg8b (%esi)
214 jne 1b
215
216 movl $1, %eax
2173:
218 RESTORE ebx
219 ret
2204:
221 testl %edx, %edx
222 jne 2b
223 jmp 3b
224 CFI_ENDPROC
225ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/um/sys-i386/bug.c b/arch/um/sys-i386/bug.c
deleted file mode 100644
index 8d4f273f1219..000000000000
--- a/arch/um/sys-i386/bug.c
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL V2
4 */
5
6#include <linux/uaccess.h>
7#include <asm/errno.h>
8
9/* Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
10 * that's not relevant in skas mode.
11 */
12
13int is_valid_bugaddr(unsigned long eip)
14{
15 unsigned short ud2;
16
17 if (probe_kernel_address((unsigned short __user *)eip, ud2))
18 return 0;
19
20 return ud2 == 0x0b0f;
21}
diff --git a/arch/um/sys-i386/ksyms.c b/arch/um/sys-i386/ksyms.c
deleted file mode 100644
index bfbefd30db8f..000000000000
--- a/arch/um/sys-i386/ksyms.c
+++ /dev/null
@@ -1,5 +0,0 @@
1#include "linux/module.h"
2#include "asm/checksum.h"
3
4/* Networking helper routines. */
5EXPORT_SYMBOL(csum_partial);
diff --git a/arch/um/sys-i386/shared/sysdep/barrier.h b/arch/um/sys-i386/shared/sysdep/barrier.h
deleted file mode 100644
index b58d52c5b2f4..000000000000
--- a/arch/um/sys-i386/shared/sysdep/barrier.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef __SYSDEP_I386_BARRIER_H
2#define __SYSDEP_I386_BARRIER_H
3
4/* Copied from include/asm-i386 for use by userspace. i386 has the option
5 * of using mfence, but I'm just using this, which works everywhere, for now.
6 */
7#define mb() asm volatile("lock; addl $0,0(%esp)")
8
9#endif
diff --git a/arch/um/sys-i386/shared/sysdep/host_ldt.h b/arch/um/sys-i386/shared/sysdep/host_ldt.h
deleted file mode 100644
index 0953cc4df652..000000000000
--- a/arch/um/sys-i386/shared/sysdep/host_ldt.h
+++ /dev/null
@@ -1,34 +0,0 @@
1#ifndef __ASM_HOST_LDT_I386_H
2#define __ASM_HOST_LDT_I386_H
3
4#include <asm/ldt.h>
5
6/*
7 * macros stolen from include/asm-i386/desc.h
8 */
9#define LDT_entry_a(info) \
10 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
11
12#define LDT_entry_b(info) \
13 (((info)->base_addr & 0xff000000) | \
14 (((info)->base_addr & 0x00ff0000) >> 16) | \
15 ((info)->limit & 0xf0000) | \
16 (((info)->read_exec_only ^ 1) << 9) | \
17 ((info)->contents << 10) | \
18 (((info)->seg_not_present ^ 1) << 15) | \
19 ((info)->seg_32bit << 22) | \
20 ((info)->limit_in_pages << 23) | \
21 ((info)->useable << 20) | \
22 0x7000)
23
24#define LDT_empty(info) (\
25 (info)->base_addr == 0 && \
26 (info)->limit == 0 && \
27 (info)->contents == 0 && \
28 (info)->read_exec_only == 1 && \
29 (info)->seg_32bit == 0 && \
30 (info)->limit_in_pages == 0 && \
31 (info)->seg_not_present == 1 && \
32 (info)->useable == 0 )
33
34#endif
diff --git a/arch/um/sys-i386/shared/sysdep/ptrace_user.h b/arch/um/sys-i386/shared/sysdep/ptrace_user.h
deleted file mode 100644
index ef56247e4143..000000000000
--- a/arch/um/sys-i386/shared/sysdep/ptrace_user.h
+++ /dev/null
@@ -1,50 +0,0 @@
1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __SYSDEP_I386_PTRACE_USER_H__
7#define __SYSDEP_I386_PTRACE_USER_H__
8
9#include <sys/ptrace.h>
10#include <linux/ptrace.h>
11#include <asm/ptrace.h>
12#include "user_constants.h"
13
14#define PT_OFFSET(r) ((r) * sizeof(long))
15
16#define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX])
17#define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX)
18
19#define PT_SYSCALL_ARG1_OFFSET PT_OFFSET(EBX)
20#define PT_SYSCALL_ARG2_OFFSET PT_OFFSET(ECX)
21#define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX)
22#define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI)
23#define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI)
24#define PT_SYSCALL_ARG6_OFFSET PT_OFFSET(EBP)
25
26#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX)
27
28#define REGS_SYSCALL_NR EAX /* This is used before a system call */
29#define REGS_SYSCALL_ARG1 EBX
30#define REGS_SYSCALL_ARG2 ECX
31#define REGS_SYSCALL_ARG3 EDX
32#define REGS_SYSCALL_ARG4 ESI
33#define REGS_SYSCALL_ARG5 EDI
34#define REGS_SYSCALL_ARG6 EBP
35
36#define REGS_IP_INDEX EIP
37#define REGS_SP_INDEX UESP
38
39#define PT_IP_OFFSET PT_OFFSET(EIP)
40#define PT_IP(regs) ((regs)[EIP])
41#define PT_SP_OFFSET PT_OFFSET(UESP)
42#define PT_SP(regs) ((regs)[UESP])
43
44#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
45
46#ifndef FRAME_SIZE
47#define FRAME_SIZE (17)
48#endif
49
50#endif
diff --git a/arch/um/sys-i386/shared/sysdep/sc.h b/arch/um/sys-i386/shared/sysdep/sc.h
deleted file mode 100644
index c57d1780ad37..000000000000
--- a/arch/um/sys-i386/shared/sysdep/sc.h
+++ /dev/null
@@ -1,44 +0,0 @@
1#ifndef __SYSDEP_I386_SC_H
2#define __SYSDEP_I386_SC_H
3
4#include <user_constants.h>
5
6#define SC_OFFSET(sc, field) \
7 *((unsigned long *) &(((char *) (sc))[HOST_##field]))
8#define SC_FP_OFFSET(sc, field) \
9 *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[HOST_##field]))
10#define SC_FP_OFFSET_PTR(sc, field, type) \
11 ((type *) &(((char *) (SC_FPSTATE(sc)))[HOST_##field]))
12
13#define SC_IP(sc) SC_OFFSET(sc, SC_IP)
14#define SC_SP(sc) SC_OFFSET(sc, SC_SP)
15#define SC_FS(sc) SC_OFFSET(sc, SC_FS)
16#define SC_GS(sc) SC_OFFSET(sc, SC_GS)
17#define SC_DS(sc) SC_OFFSET(sc, SC_DS)
18#define SC_ES(sc) SC_OFFSET(sc, SC_ES)
19#define SC_SS(sc) SC_OFFSET(sc, SC_SS)
20#define SC_CS(sc) SC_OFFSET(sc, SC_CS)
21#define SC_EFLAGS(sc) SC_OFFSET(sc, SC_EFLAGS)
22#define SC_EAX(sc) SC_OFFSET(sc, SC_EAX)
23#define SC_EBX(sc) SC_OFFSET(sc, SC_EBX)
24#define SC_ECX(sc) SC_OFFSET(sc, SC_ECX)
25#define SC_EDX(sc) SC_OFFSET(sc, SC_EDX)
26#define SC_EDI(sc) SC_OFFSET(sc, SC_EDI)
27#define SC_ESI(sc) SC_OFFSET(sc, SC_ESI)
28#define SC_EBP(sc) SC_OFFSET(sc, SC_EBP)
29#define SC_TRAPNO(sc) SC_OFFSET(sc, SC_TRAPNO)
30#define SC_ERR(sc) SC_OFFSET(sc, SC_ERR)
31#define SC_CR2(sc) SC_OFFSET(sc, SC_CR2)
32#define SC_FPSTATE(sc) SC_OFFSET(sc, SC_FPSTATE)
33#define SC_SIGMASK(sc) SC_OFFSET(sc, SC_SIGMASK)
34#define SC_FP_CW(sc) SC_FP_OFFSET(sc, SC_FP_CW)
35#define SC_FP_SW(sc) SC_FP_OFFSET(sc, SC_FP_SW)
36#define SC_FP_TAG(sc) SC_FP_OFFSET(sc, SC_FP_TAG)
37#define SC_FP_IPOFF(sc) SC_FP_OFFSET(sc, SC_FP_IPOFF)
38#define SC_FP_CSSEL(sc) SC_FP_OFFSET(sc, SC_FP_CSSEL)
39#define SC_FP_DATAOFF(sc) SC_FP_OFFSET(sc, SC_FP_DATAOFF)
40#define SC_FP_DATASEL(sc) SC_FP_OFFSET(sc, SC_FP_DATASEL)
41#define SC_FP_ST(sc) SC_FP_OFFSET_PTR(sc, SC_FP_ST, struct _fpstate)
42#define SC_FXSR_ENV(sc) SC_FP_OFFSET_PTR(sc, SC_FXSR_ENV, void)
43
44#endif
diff --git a/arch/um/sys-i386/shared/sysdep/sigcontext.h b/arch/um/sys-i386/shared/sysdep/sigcontext.h
deleted file mode 100644
index f583c87111a0..000000000000
--- a/arch/um/sys-i386/shared/sysdep/sigcontext.h
+++ /dev/null
@@ -1,26 +0,0 @@
1/*
2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __SYS_SIGCONTEXT_I386_H
7#define __SYS_SIGCONTEXT_I386_H
8
9#include "sysdep/sc.h"
10
11#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
12
13#define GET_FAULTINFO_FROM_SC(fi, sc) \
14 { \
15 (fi).cr2 = SC_CR2(sc); \
16 (fi).error_code = SC_ERR(sc); \
17 (fi).trap_no = SC_TRAPNO(sc); \
18 }
19
20/* This is Page Fault */
21#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14)
22
23/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
24#define SEGV_MAYBE_FIXABLE(fi) ((fi)->trap_no == 0 && ptrace_faultinfo)
25
26#endif
diff --git a/arch/um/sys-i386/shared/sysdep/vm-flags.h b/arch/um/sys-i386/shared/sysdep/vm-flags.h
deleted file mode 100644
index e0d24c568dbc..000000000000
--- a/arch/um/sys-i386/shared/sysdep/vm-flags.h
+++ /dev/null
@@ -1,14 +0,0 @@
1/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __VM_FLAGS_I386_H
7#define __VM_FLAGS_I386_H
8
9#define VM_DATA_DEFAULT_FLAGS \
10 (VM_READ | VM_WRITE | \
11 ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
12 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
13
14#endif
diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c
deleted file mode 100644
index 28ccf737a79f..000000000000
--- a/arch/um/sys-i386/stub_segv.c
+++ /dev/null
@@ -1,17 +0,0 @@
1/*
2 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include "sysdep/stub.h"
7#include "sysdep/sigcontext.h"
8
9void __attribute__ ((__section__ (".__syscall_stub")))
10stub_segv_handler(int sig)
11{
12 struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
13
14 GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), sc);
15
16 trap_myself();
17}
diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c
deleted file mode 100644
index 5f883bfe773f..000000000000
--- a/arch/um/sys-i386/user-offsets.c
+++ /dev/null
@@ -1,53 +0,0 @@
1#include <stdio.h>
2#include <stddef.h>
3#include <signal.h>
4#include <sys/poll.h>
5#include <sys/user.h>
6#include <sys/mman.h>
7#include <asm/ptrace.h>
8
9#define DEFINE(sym, val) \
10 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
11
12#define DEFINE_LONGS(sym, val) \
13 asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
14
15#define OFFSET(sym, str, mem) \
16 DEFINE(sym, offsetof(struct str, mem));
17
18void foo(void)
19{
20 OFFSET(HOST_SC_TRAPNO, sigcontext, trapno);
21 OFFSET(HOST_SC_ERR, sigcontext, err);
22 OFFSET(HOST_SC_CR2, sigcontext, cr2);
23
24 DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
25 DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct));
26
27 DEFINE(HOST_IP, EIP);
28 DEFINE(HOST_SP, UESP);
29 DEFINE(HOST_EFLAGS, EFL);
30 DEFINE(HOST_EAX, EAX);
31 DEFINE(HOST_EBX, EBX);
32 DEFINE(HOST_ECX, ECX);
33 DEFINE(HOST_EDX, EDX);
34 DEFINE(HOST_ESI, ESI);
35 DEFINE(HOST_EDI, EDI);
36 DEFINE(HOST_EBP, EBP);
37 DEFINE(HOST_CS, CS);
38 DEFINE(HOST_SS, SS);
39 DEFINE(HOST_DS, DS);
40 DEFINE(HOST_FS, FS);
41 DEFINE(HOST_ES, ES);
42 DEFINE(HOST_GS, GS);
43 DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
44
45 /* XXX Duplicated between i386 and x86_64 */
46 DEFINE(UM_POLLIN, POLLIN);
47 DEFINE(UM_POLLPRI, POLLPRI);
48 DEFINE(UM_POLLOUT, POLLOUT);
49
50 DEFINE(UM_PROT_READ, PROT_READ);
51 DEFINE(UM_PROT_WRITE, PROT_WRITE);
52 DEFINE(UM_PROT_EXEC, PROT_EXEC);
53}
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
deleted file mode 100644
index bd4d1d3ba919..000000000000
--- a/arch/um/sys-x86_64/Makefile
+++ /dev/null
@@ -1,27 +0,0 @@
1#
2# Copyright 2003 PathScale, Inc.
3#
4# Licensed under the GPL
5#
6
7obj-y = bug.o bugs.o delay.o fault.o ldt.o ptrace.o ptrace_user.o mem.o \
8 setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
9 sysrq.o ksyms.o tls.o
10
11obj-y += vdso/
12
13subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \
14 lib/rwsem.o
15subarch-obj-$(CONFIG_MODULES) += kernel/module.o
16
17ldt-y = ../sys-i386/ldt.o
18
19USER_OBJS := ptrace_user.o
20
21USER_OBJS += user-offsets.s
22extra-y += user-offsets.s
23
24UNPROFILE_OBJS := stub_segv.o
25CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
26
27include arch/um/scripts/Makefile.rules
diff --git a/arch/um/sys-x86_64/asm/archparam.h b/arch/um/sys-x86_64/asm/archparam.h
deleted file mode 100644
index 6c083663b8d9..000000000000
--- a/arch/um/sys-x86_64/asm/archparam.h
+++ /dev/null
@@ -1,16 +0,0 @@
1/*
2 * Copyright 2003 PathScale, Inc.
3 *
4 * Licensed under the GPL
5 */
6
7#ifndef __UM_ARCHPARAM_X86_64_H
8#define __UM_ARCHPARAM_X86_64_H
9
10
11/* No user-accessible fixmap addresses, i.e. vsyscall */
12#define FIXADDR_USER_START 0
13#define FIXADDR_USER_END 0
14
15#endif
16
diff --git a/arch/um/sys-x86_64/asm/module.h b/arch/um/sys-x86_64/asm/module.h
deleted file mode 100644
index 8eb79c2d07d5..000000000000
--- a/arch/um/sys-x86_64/asm/module.h
+++ /dev/null
@@ -1,20 +0,0 @@
1/*
2 * Copyright 2003 PathScale, Inc.
3 *
4 * Licensed under the GPL
5 */
6
7#ifndef __UM_MODULE_X86_64_H
8#define __UM_MODULE_X86_64_H
9
10/* UML is simple */
11struct mod_arch_specific
12{
13};
14
15#define Elf_Shdr Elf64_Shdr
16#define Elf_Sym Elf64_Sym
17#define Elf_Ehdr Elf64_Ehdr
18
19#endif
20
diff --git a/arch/um/sys-x86_64/delay.c b/arch/um/sys-x86_64/delay.c
deleted file mode 100644
index f3fe1a688f7e..000000000000
--- a/arch/um/sys-x86_64/delay.c
+++ /dev/null
@@ -1,60 +0,0 @@
1/*
2 * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
3 * Mostly copied from arch/x86/lib/delay.c
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/kernel.h>
12#include <linux/delay.h>
13#include <asm/param.h>
14
15void __delay(unsigned long loops)
16{
17 asm volatile(
18 "test %0,%0\n"
19 "jz 3f\n"
20 "jmp 1f\n"
21
22 ".align 16\n"
23 "1: jmp 2f\n"
24
25 ".align 16\n"
26 "2: dec %0\n"
27 " jnz 2b\n"
28 "3: dec %0\n"
29
30 : /* we don't need output */
31 : "a" (loops)
32 );
33}
34EXPORT_SYMBOL(__delay);
35
36inline void __const_udelay(unsigned long xloops)
37{
38 int d0;
39
40 xloops *= 4;
41 asm("mull %%edx"
42 : "=d" (xloops), "=&a" (d0)
43 : "1" (xloops), "0"
44 (loops_per_jiffy * (HZ/4)));
45
46 __delay(++xloops);
47}
48EXPORT_SYMBOL(__const_udelay);
49
50void __udelay(unsigned long usecs)
51{
52 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
53}
54EXPORT_SYMBOL(__udelay);
55
56void __ndelay(unsigned long nsecs)
57{
58 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
59}
60EXPORT_SYMBOL(__ndelay);
diff --git a/arch/um/sys-x86_64/fault.c b/arch/um/sys-x86_64/fault.c
deleted file mode 100644
index ce85117fc64e..000000000000
--- a/arch/um/sys-x86_64/fault.c
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright 2003 PathScale, Inc.
3 *
4 * Licensed under the GPL
5 */
6
7#include "sysdep/ptrace.h"
8
9/* These two are from asm-um/uaccess.h and linux/module.h, check them. */
10struct exception_table_entry
11{
12 unsigned long insn;
13 unsigned long fixup;
14};
15
16const struct exception_table_entry *search_exception_tables(unsigned long add);
17
18int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
19{
20 const struct exception_table_entry *fixup;
21
22 fixup = search_exception_tables(address);
23 if (fixup != 0) {
24 UPT_IP(regs) = fixup->fixup;
25 return 1;
26 }
27 return 0;
28}
diff --git a/arch/um/sys-x86_64/ptrace_user.c b/arch/um/sys-x86_64/ptrace_user.c
deleted file mode 100644
index c57a496d3f5b..000000000000
--- a/arch/um/sys-x86_64/ptrace_user.c
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright 2003 PathScale, Inc.
3 *
4 * Licensed under the GPL
5 */
6
7#include <errno.h>
8#include "ptrace_user.h"
9
10int ptrace_getregs(long pid, unsigned long *regs_out)
11{
12 if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0)
13 return -errno;
14 return(0);
15}
16
17int ptrace_setregs(long pid, unsigned long *regs_out)
18{
19 if (ptrace(PTRACE_SETREGS, pid, 0, regs_out) < 0)
20 return -errno;
21 return(0);
22}
diff --git a/arch/um/sys-x86_64/shared/sysdep/barrier.h b/arch/um/sys-x86_64/shared/sysdep/barrier.h
deleted file mode 100644
index 7b610befdc8f..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/barrier.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef __SYSDEP_X86_64_BARRIER_H
2#define __SYSDEP_X86_64_BARRIER_H
3
4/* Copied from include/asm-x86_64 for use by userspace. */
5#define mb() asm volatile("mfence":::"memory")
6
7#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/host_ldt.h b/arch/um/sys-x86_64/shared/sysdep/host_ldt.h
deleted file mode 100644
index e8b1be1e154f..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/host_ldt.h
+++ /dev/null
@@ -1,38 +0,0 @@
1#ifndef __ASM_HOST_LDT_X86_64_H
2#define __ASM_HOST_LDT_X86_64_H
3
4#include <asm/ldt.h>
5
6/*
7 * macros stolen from include/asm-x86_64/desc.h
8 */
9#define LDT_entry_a(info) \
10 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
11
12/* Don't allow setting of the lm bit. It is useless anyways because
13 * 64bit system calls require __USER_CS. */
14#define LDT_entry_b(info) \
15 (((info)->base_addr & 0xff000000) | \
16 (((info)->base_addr & 0x00ff0000) >> 16) | \
17 ((info)->limit & 0xf0000) | \
18 (((info)->read_exec_only ^ 1) << 9) | \
19 ((info)->contents << 10) | \
20 (((info)->seg_not_present ^ 1) << 15) | \
21 ((info)->seg_32bit << 22) | \
22 ((info)->limit_in_pages << 23) | \
23 ((info)->useable << 20) | \
24 /* ((info)->lm << 21) | */ \
25 0x7000)
26
27#define LDT_empty(info) (\
28 (info)->base_addr == 0 && \
29 (info)->limit == 0 && \
30 (info)->contents == 0 && \
31 (info)->read_exec_only == 1 && \
32 (info)->seg_32bit == 0 && \
33 (info)->limit_in_pages == 0 && \
34 (info)->seg_not_present == 1 && \
35 (info)->useable == 0 && \
36 (info)->lm == 0)
37
38#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h b/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h
deleted file mode 100644
index a307237b7964..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#include <linux/stddef.h>
2#include <linux/sched.h>
3#include <linux/time.h>
4#include <linux/elf.h>
5#include <linux/crypto.h>
6#include <asm/page.h>
7#include <asm/mman.h>
8
9#define DEFINE(sym, val) \
10 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
11
12#define DEFINE_STR1(x) #x
13#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " DEFINE_STR1(val) " " #val: : )
14
15#define BLANK() asm volatile("\n->" : : )
16
17#define OFFSET(sym, str, mem) \
18 DEFINE(sym, offsetof(struct str, mem));
19
20void foo(void)
21{
22#include <common-offsets.h>
23}
diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h b/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h
deleted file mode 100644
index 4dbccdb58f48..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * Copyright 2003 PathScale, Inc.
3 *
4 * Licensed under the GPL
5 */
6
7#ifndef __SYSDEP_X86_64_PTRACE_USER_H__
8#define __SYSDEP_X86_64_PTRACE_USER_H__
9
10#define __FRAME_OFFSETS
11#include <sys/ptrace.h>
12#include <linux/ptrace.h>
13#include <asm/ptrace.h>
14#undef __FRAME_OFFSETS
15#include "user_constants.h"
16
17#define PT_INDEX(off) ((off) / sizeof(unsigned long))
18
19#define PT_SYSCALL_NR(regs) ((regs)[PT_INDEX(ORIG_RAX)])
20#define PT_SYSCALL_NR_OFFSET (ORIG_RAX)
21
22#define PT_SYSCALL_ARG1(regs) (((unsigned long *) (regs))[PT_INDEX(RDI)])
23#define PT_SYSCALL_ARG1_OFFSET (RDI)
24
25#define PT_SYSCALL_ARG2(regs) (((unsigned long *) (regs))[PT_INDEX(RSI)])
26#define PT_SYSCALL_ARG2_OFFSET (RSI)
27
28#define PT_SYSCALL_ARG3(regs) (((unsigned long *) (regs))[PT_INDEX(RDX)])
29#define PT_SYSCALL_ARG3_OFFSET (RDX)
30
31#define PT_SYSCALL_ARG4(regs) (((unsigned long *) (regs))[PT_INDEX(RCX)])
32#define PT_SYSCALL_ARG4_OFFSET (RCX)
33
34#define PT_SYSCALL_ARG5(regs) (((unsigned long *) (regs))[PT_INDEX(R8)])
35#define PT_SYSCALL_ARG5_OFFSET (R8)
36
37#define PT_SYSCALL_ARG6(regs) (((unsigned long *) (regs))[PT_INDEX(R9)])
38#define PT_SYSCALL_ARG6_OFFSET (R9)
39
40#define PT_SYSCALL_RET_OFFSET (RAX)
41
42#define PT_IP_OFFSET (RIP)
43#define PT_IP(regs) ((regs)[PT_INDEX(RIP)])
44
45#define PT_SP_OFFSET (RSP)
46#define PT_SP(regs) ((regs)[PT_INDEX(RSP)])
47
48#define PT_ORIG_RAX_OFFSET (ORIG_RAX)
49#define PT_ORIG_RAX(regs) ((regs)[PT_INDEX(ORIG_RAX)])
50
51/*
52 * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
53 * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
54 * 2.4 name and value for 2.4 host compatibility.
55 */
56#ifndef PTRACE_OLDSETOPTIONS
57#define PTRACE_OLDSETOPTIONS 21
58#endif
59
60/*
61 * These are before the system call, so the system call number is RAX
62 * rather than ORIG_RAX, and arg4 is R10 rather than RCX
63 */
64#define REGS_SYSCALL_NR PT_INDEX(RAX)
65#define REGS_SYSCALL_ARG1 PT_INDEX(RDI)
66#define REGS_SYSCALL_ARG2 PT_INDEX(RSI)
67#define REGS_SYSCALL_ARG3 PT_INDEX(RDX)
68#define REGS_SYSCALL_ARG4 PT_INDEX(R10)
69#define REGS_SYSCALL_ARG5 PT_INDEX(R8)
70#define REGS_SYSCALL_ARG6 PT_INDEX(R9)
71
72#define REGS_IP_INDEX PT_INDEX(RIP)
73#define REGS_SP_INDEX PT_INDEX(RSP)
74
75#define FP_SIZE (HOST_FP_SIZE)
76
77#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/sc.h b/arch/um/sys-x86_64/shared/sysdep/sc.h
deleted file mode 100644
index 8aee45b07434..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/sc.h
+++ /dev/null
@@ -1,45 +0,0 @@
1#ifndef __SYSDEP_X86_64_SC_H
2#define __SYSDEP_X86_64_SC_H
3
4/* Copyright (C) 2003 - 2004 PathScale, Inc
5 * Released under the GPL
6 */
7
8#include <user_constants.h>
9
10#define SC_OFFSET(sc, field) \
11 *((unsigned long *) &(((char *) (sc))[HOST_##field]))
12
13#define SC_RBX(sc) SC_OFFSET(sc, SC_RBX)
14#define SC_RCX(sc) SC_OFFSET(sc, SC_RCX)
15#define SC_RDX(sc) SC_OFFSET(sc, SC_RDX)
16#define SC_RSI(sc) SC_OFFSET(sc, SC_RSI)
17#define SC_RDI(sc) SC_OFFSET(sc, SC_RDI)
18#define SC_RBP(sc) SC_OFFSET(sc, SC_RBP)
19#define SC_RAX(sc) SC_OFFSET(sc, SC_RAX)
20#define SC_R8(sc) SC_OFFSET(sc, SC_R8)
21#define SC_R9(sc) SC_OFFSET(sc, SC_R9)
22#define SC_R10(sc) SC_OFFSET(sc, SC_R10)
23#define SC_R11(sc) SC_OFFSET(sc, SC_R11)
24#define SC_R12(sc) SC_OFFSET(sc, SC_R12)
25#define SC_R13(sc) SC_OFFSET(sc, SC_R13)
26#define SC_R14(sc) SC_OFFSET(sc, SC_R14)
27#define SC_R15(sc) SC_OFFSET(sc, SC_R15)
28#define SC_IP(sc) SC_OFFSET(sc, SC_IP)
29#define SC_SP(sc) SC_OFFSET(sc, SC_SP)
30#define SC_CR2(sc) SC_OFFSET(sc, SC_CR2)
31#define SC_ERR(sc) SC_OFFSET(sc, SC_ERR)
32#define SC_TRAPNO(sc) SC_OFFSET(sc, SC_TRAPNO)
33#define SC_CS(sc) SC_OFFSET(sc, SC_CS)
34#define SC_FS(sc) SC_OFFSET(sc, SC_FS)
35#define SC_GS(sc) SC_OFFSET(sc, SC_GS)
36#define SC_EFLAGS(sc) SC_OFFSET(sc, SC_EFLAGS)
37#define SC_SIGMASK(sc) SC_OFFSET(sc, SC_SIGMASK)
38#define SC_SS(sc) SC_OFFSET(sc, SC_SS)
39#if 0
40#define SC_ORIG_RAX(sc) SC_OFFSET(sc, SC_ORIG_RAX)
41#define SC_DS(sc) SC_OFFSET(sc, SC_DS)
42#define SC_ES(sc) SC_OFFSET(sc, SC_ES)
43#endif
44
45#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/sigcontext.h b/arch/um/sys-x86_64/shared/sysdep/sigcontext.h
deleted file mode 100644
index 0155133b1458..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/sigcontext.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright 2003 PathScale, Inc.
3 *
4 * Licensed under the GPL
5 */
6
7#ifndef __SYSDEP_X86_64_SIGCONTEXT_H
8#define __SYSDEP_X86_64_SIGCONTEXT_H
9
10#include <sysdep/sc.h>
11
12#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
13
14#define GET_FAULTINFO_FROM_SC(fi, sc) \
15 { \
16 (fi).cr2 = SC_CR2(sc); \
17 (fi).error_code = SC_ERR(sc); \
18 (fi).trap_no = SC_TRAPNO(sc); \
19 }
20
21/* This is Page Fault */
22#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14)
23
24/* No broken SKAS API, which doesn't pass trap_no, here. */
25#define SEGV_MAYBE_FIXABLE(fi) 0
26
27#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h b/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h
deleted file mode 100644
index 95db4be786e4..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __SYSDEP_X86_64_SKAS_PTRACE_H
7#define __SYSDEP_X86_64_SKAS_PTRACE_H
8
9struct ptrace_faultinfo {
10 int is_write;
11 unsigned long addr;
12};
13
14struct ptrace_ldt {
15 int func;
16 void *ptr;
17 unsigned long bytecount;
18};
19
20#define PTRACE_LDT 54
21
22#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/system.h b/arch/um/sys-x86_64/shared/sysdep/system.h
deleted file mode 100644
index d1b93c436200..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/system.h
+++ /dev/null
@@ -1,132 +0,0 @@
1#ifndef _ASM_X86_SYSTEM_H_
2#define _ASM_X86_SYSTEM_H_
3
4#include <asm/asm.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h>
9
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/* entries in ARCH_DLINFO: */
14#ifdef CONFIG_IA32_EMULATION
15# define AT_VECTOR_SIZE_ARCH 2
16#else
17# define AT_VECTOR_SIZE_ARCH 1
18#endif
19
20extern unsigned long arch_align_stack(unsigned long sp);
21
22void default_idle(void);
23
24/*
25 * Force strict CPU ordering.
26 * And yes, this is required on UP too when we're talking
27 * to devices.
28 */
29#ifdef CONFIG_X86_32
30/*
31 * Some non-Intel clones support out of order store. wmb() ceases to be a
32 * nop for these.
33 */
34#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
35#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
36#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
37#else
38#define mb() asm volatile("mfence":::"memory")
39#define rmb() asm volatile("lfence":::"memory")
40#define wmb() asm volatile("sfence" ::: "memory")
41#endif
42
43/**
44 * read_barrier_depends - Flush all pending reads that subsequents reads
45 * depend on.
46 *
47 * No data-dependent reads from memory-like regions are ever reordered
48 * over this barrier. All reads preceding this primitive are guaranteed
49 * to access memory (but not necessarily other CPUs' caches) before any
50 * reads following this primitive that depend on the data return by
51 * any of the preceding reads. This primitive is much lighter weight than
52 * rmb() on most CPUs, and is never heavier weight than is
53 * rmb().
54 *
55 * These ordering constraints are respected by both the local CPU
56 * and the compiler.
57 *
58 * Ordering is not guaranteed by anything other than these primitives,
59 * not even by data dependencies. See the documentation for
60 * memory_barrier() for examples and URLs to more information.
61 *
62 * For example, the following code would force ordering (the initial
63 * value of "a" is zero, "b" is one, and "p" is "&a"):
64 *
65 * <programlisting>
66 * CPU 0 CPU 1
67 *
68 * b = 2;
69 * memory_barrier();
70 * p = &b; q = p;
71 * read_barrier_depends();
72 * d = *q;
73 * </programlisting>
74 *
75 * because the read of "*q" depends on the read of "p" and these
76 * two reads are separated by a read_barrier_depends(). However,
77 * the following code, with the same initial values for "a" and "b":
78 *
79 * <programlisting>
80 * CPU 0 CPU 1
81 *
82 * a = 2;
83 * memory_barrier();
84 * b = 3; y = b;
85 * read_barrier_depends();
86 * x = a;
87 * </programlisting>
88 *
89 * does not enforce ordering, since there is no data dependency between
90 * the read of "a" and the read of "b". Therefore, on some CPUs, such
91 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
92 * in cases like this where there are no data dependencies.
93 **/
94
95#define read_barrier_depends() do { } while (0)
96
97#ifdef CONFIG_SMP
98#define smp_mb() mb()
99#ifdef CONFIG_X86_PPRO_FENCE
100# define smp_rmb() rmb()
101#else
102# define smp_rmb() barrier()
103#endif
104#ifdef CONFIG_X86_OOSTORE
105# define smp_wmb() wmb()
106#else
107# define smp_wmb() barrier()
108#endif
109#define smp_read_barrier_depends() read_barrier_depends()
110#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
111#else
112#define smp_mb() barrier()
113#define smp_rmb() barrier()
114#define smp_wmb() barrier()
115#define smp_read_barrier_depends() do { } while (0)
116#define set_mb(var, value) do { var = value; barrier(); } while (0)
117#endif
118
119/*
120 * Stop RDTSC speculation. This is needed when you need to use RDTSC
121 * (or get_cycles or vread that possibly accesses the TSC) in a defined
122 * code region.
123 *
124 * (Could use an alternative three way for this if there was one.)
125 */
126static inline void rdtsc_barrier(void)
127{
128 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
129 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
130}
131
132#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/tls.h b/arch/um/sys-x86_64/shared/sysdep/tls.h
deleted file mode 100644
index 18c000d0357a..000000000000
--- a/arch/um/sys-x86_64/shared/sysdep/tls.h
+++ /dev/null
@@ -1,29 +0,0 @@
1#ifndef _SYSDEP_TLS_H
2#define _SYSDEP_TLS_H
3
4# ifndef __KERNEL__
5
6/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which
7 * may be named user_desc (but in 2.4 and in header matching its API was named
8 * modify_ldt_ldt_s). */
9
10typedef struct um_dup_user_desc {
11 unsigned int entry_number;
12 unsigned int base_addr;
13 unsigned int limit;
14 unsigned int seg_32bit:1;
15 unsigned int contents:2;
16 unsigned int read_exec_only:1;
17 unsigned int limit_in_pages:1;
18 unsigned int seg_not_present:1;
19 unsigned int useable:1;
20 unsigned int lm:1;
21} user_desc_t;
22
23# else /* __KERNEL__ */
24
25# include <ldt.h>
26typedef struct user_desc user_desc_t;
27
28# endif /* __KERNEL__ */
29#endif /* _SYSDEP_TLS_H */
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
deleted file mode 100644
index b6b65c7c7a7d..000000000000
--- a/arch/um/sys-x86_64/signal.c
+++ /dev/null
@@ -1,290 +0,0 @@
1/*
2 * Copyright (C) 2003 PathScale, Inc.
3 * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
4 * Licensed under the GPL
5 */
6
7#include <linux/personality.h>
8#include <linux/ptrace.h>
9#include <linux/kernel.h>
10#include <asm/unistd.h>
11#include <asm/uaccess.h>
12#include <asm/ucontext.h>
13#include "frame_kern.h"
14#include "skas.h"
15
16void copy_sc(struct uml_pt_regs *regs, void *from)
17{
18 struct sigcontext *sc = from;
19
20#define GETREG(regs, regno, sc, regname) \
21 (regs)->gp[(regno) / sizeof(unsigned long)] = (sc)->regname
22
23 GETREG(regs, R8, sc, r8);
24 GETREG(regs, R9, sc, r9);
25 GETREG(regs, R10, sc, r10);
26 GETREG(regs, R11, sc, r11);
27 GETREG(regs, R12, sc, r12);
28 GETREG(regs, R13, sc, r13);
29 GETREG(regs, R14, sc, r14);
30 GETREG(regs, R15, sc, r15);
31 GETREG(regs, RDI, sc, di);
32 GETREG(regs, RSI, sc, si);
33 GETREG(regs, RBP, sc, bp);
34 GETREG(regs, RBX, sc, bx);
35 GETREG(regs, RDX, sc, dx);
36 GETREG(regs, RAX, sc, ax);
37 GETREG(regs, RCX, sc, cx);
38 GETREG(regs, RSP, sc, sp);
39 GETREG(regs, RIP, sc, ip);
40 GETREG(regs, EFLAGS, sc, flags);
41 GETREG(regs, CS, sc, cs);
42
43#undef GETREG
44}
45
46static int copy_sc_from_user(struct pt_regs *regs,
47 struct sigcontext __user *from,
48 struct _fpstate __user *fpp)
49{
50 struct user_i387_struct fp;
51 int err = 0;
52
53#define GETREG(regs, regno, sc, regname) \
54 __get_user((regs)->regs.gp[(regno) / sizeof(unsigned long)], \
55 &(sc)->regname)
56
57 err |= GETREG(regs, R8, from, r8);
58 err |= GETREG(regs, R9, from, r9);
59 err |= GETREG(regs, R10, from, r10);
60 err |= GETREG(regs, R11, from, r11);
61 err |= GETREG(regs, R12, from, r12);
62 err |= GETREG(regs, R13, from, r13);
63 err |= GETREG(regs, R14, from, r14);
64 err |= GETREG(regs, R15, from, r15);
65 err |= GETREG(regs, RDI, from, di);
66 err |= GETREG(regs, RSI, from, si);
67 err |= GETREG(regs, RBP, from, bp);
68 err |= GETREG(regs, RBX, from, bx);
69 err |= GETREG(regs, RDX, from, dx);
70 err |= GETREG(regs, RAX, from, ax);
71 err |= GETREG(regs, RCX, from, cx);
72 err |= GETREG(regs, RSP, from, sp);
73 err |= GETREG(regs, RIP, from, ip);
74 err |= GETREG(regs, EFLAGS, from, flags);
75 err |= GETREG(regs, CS, from, cs);
76 if (err)
77 return 1;
78
79#undef GETREG
80
81 err = copy_from_user(&fp, fpp, sizeof(struct user_i387_struct));
82 if (err)
83 return 1;
84
85 err = restore_fp_registers(userspace_pid[current_thread_info()->cpu],
86 (unsigned long *) &fp);
87 if (err < 0) {
88 printk(KERN_ERR "copy_sc_from_user - "
89 "restore_fp_registers failed, errno = %d\n",
90 -err);
91 return 1;
92 }
93
94 return 0;
95}
96
97static int copy_sc_to_user(struct sigcontext __user *to,
98 struct _fpstate __user *to_fp, struct pt_regs *regs,
99 unsigned long mask, unsigned long sp)
100{
101 struct faultinfo * fi = &current->thread.arch.faultinfo;
102 struct user_i387_struct fp;
103 int err = 0;
104
105 err |= __put_user(0, &to->gs);
106 err |= __put_user(0, &to->fs);
107
108#define PUTREG(regs, regno, sc, regname) \
109 __put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)], \
110 &(sc)->regname)
111
112 err |= PUTREG(regs, RDI, to, di);
113 err |= PUTREG(regs, RSI, to, si);
114 err |= PUTREG(regs, RBP, to, bp);
115 /*
116 * Must use original RSP, which is passed in, rather than what's in
117 * the pt_regs, because that's already been updated to point at the
118 * signal frame.
119 */
120 err |= __put_user(sp, &to->sp);
121 err |= PUTREG(regs, RBX, to, bx);
122 err |= PUTREG(regs, RDX, to, dx);
123 err |= PUTREG(regs, RCX, to, cx);
124 err |= PUTREG(regs, RAX, to, ax);
125 err |= PUTREG(regs, R8, to, r8);
126 err |= PUTREG(regs, R9, to, r9);
127 err |= PUTREG(regs, R10, to, r10);
128 err |= PUTREG(regs, R11, to, r11);
129 err |= PUTREG(regs, R12, to, r12);
130 err |= PUTREG(regs, R13, to, r13);
131 err |= PUTREG(regs, R14, to, r14);
132 err |= PUTREG(regs, R15, to, r15);
133 err |= PUTREG(regs, CS, to, cs); /* XXX x86_64 doesn't do this */
134
135 err |= __put_user(fi->cr2, &to->cr2);
136 err |= __put_user(fi->error_code, &to->err);
137 err |= __put_user(fi->trap_no, &to->trapno);
138
139 err |= PUTREG(regs, RIP, to, ip);
140 err |= PUTREG(regs, EFLAGS, to, flags);
141#undef PUTREG
142
143 err |= __put_user(mask, &to->oldmask);
144 if (err)
145 return 1;
146
147 err = save_fp_registers(userspace_pid[current_thread_info()->cpu],
148 (unsigned long *) &fp);
149 if (err < 0) {
150 printk(KERN_ERR "copy_sc_from_user - restore_fp_registers "
151 "failed, errno = %d\n", -err);
152 return 1;
153 }
154
155 if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
156 return 1;
157
158 return err;
159}
160
161struct rt_sigframe
162{
163 char __user *pretcode;
164 struct ucontext uc;
165 struct siginfo info;
166 struct _fpstate fpstate;
167};
168
169int setup_signal_stack_si(unsigned long stack_top, int sig,
170 struct k_sigaction *ka, struct pt_regs * regs,
171 siginfo_t *info, sigset_t *set)
172{
173 struct rt_sigframe __user *frame;
174 unsigned long save_sp = PT_REGS_RSP(regs);
175 int err = 0;
176 struct task_struct *me = current;
177
178 frame = (struct rt_sigframe __user *)
179 round_down(stack_top - sizeof(struct rt_sigframe), 16);
180 /* Subtract 128 for a red zone and 8 for proper alignment */
181 frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
182
183 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
184 goto out;
185
186 if (ka->sa.sa_flags & SA_SIGINFO) {
187 err |= copy_siginfo_to_user(&frame->info, info);
188 if (err)
189 goto out;
190 }
191
192 /*
193 * Update SP now because the page fault handler refuses to extend
194 * the stack if the faulting address is too far below the current
195 * SP, which frame now certainly is. If there's an error, the original
196 * value is restored on the way out.
197 * When writing the sigcontext to the stack, we have to write the
198 * original value, so that's passed to copy_sc_to_user, which does
199 * the right thing with it.
200 */
201 PT_REGS_RSP(regs) = (unsigned long) frame;
202
203 /* Create the ucontext. */
204 err |= __put_user(0, &frame->uc.uc_flags);
205 err |= __put_user(0, &frame->uc.uc_link);
206 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
207 err |= __put_user(sas_ss_flags(save_sp),
208 &frame->uc.uc_stack.ss_flags);
209 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
210 err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
211 set->sig[0], save_sp);
212 err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
213 if (sizeof(*set) == 16) {
214 __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
215 __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
216 }
217 else
218 err |= __copy_to_user(&frame->uc.uc_sigmask, set,
219 sizeof(*set));
220
221 /*
222 * Set up to return from userspace. If provided, use a stub
223 * already in userspace.
224 */
225 /* x86-64 should always use SA_RESTORER. */
226 if (ka->sa.sa_flags & SA_RESTORER)
227 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
228 else
229 /* could use a vstub here */
230 goto restore_sp;
231
232 if (err)
233 goto restore_sp;
234
235 /* Set up registers for signal handler */
236 {
237 struct exec_domain *ed = current_thread_info()->exec_domain;
238 if (unlikely(ed && ed->signal_invmap && sig < 32))
239 sig = ed->signal_invmap[sig];
240 }
241
242 PT_REGS_RDI(regs) = sig;
243 /* In case the signal handler was declared without prototypes */
244 PT_REGS_RAX(regs) = 0;
245
246 /*
247 * This also works for non SA_SIGINFO handlers because they expect the
248 * next argument after the signal number on the stack.
249 */
250 PT_REGS_RSI(regs) = (unsigned long) &frame->info;
251 PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
252 PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
253 out:
254 return err;
255
256restore_sp:
257 PT_REGS_RSP(regs) = save_sp;
258 return err;
259}
260
261long sys_rt_sigreturn(struct pt_regs *regs)
262{
263 unsigned long sp = PT_REGS_SP(&current->thread.regs);
264 struct rt_sigframe __user *frame =
265 (struct rt_sigframe __user *)(sp - 8);
266 struct ucontext __user *uc = &frame->uc;
267 sigset_t set;
268
269 if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
270 goto segfault;
271
272 sigdelsetmask(&set, ~_BLOCKABLE);
273
274 spin_lock_irq(&current->sighand->siglock);
275 current->blocked = set;
276 recalc_sigpending();
277 spin_unlock_irq(&current->sighand->siglock);
278
279 if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext,
280 &frame->fpstate))
281 goto segfault;
282
283 /* Avoid ERESTART handling */
284 PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
285 return PT_REGS_SYSCALL_RET(&current->thread.regs);
286
287 segfault:
288 force_sig(SIGSEGV, current);
289 return 0;
290}
diff --git a/arch/um/Makefile-i386 b/arch/x86/Makefile.um
index 302cbe504543..36ddec6a41c9 100644
--- a/arch/um/Makefile-i386
+++ b/arch/x86/Makefile.um
@@ -1,14 +1,11 @@
1core-y += arch/um/sys-i386/ arch/x86/crypto/ 1core-y += arch/x86/crypto/
2
3TOP_ADDR := $(CONFIG_TOP_ADDR)
4 2
3ifeq ($(CONFIG_X86_32),y)
5START := 0x8048000 4START := 0x8048000
6 5
7LDFLAGS += -m elf_i386 6LDFLAGS += -m elf_i386
8ELF_ARCH := $(SUBARCH) 7ELF_ARCH := i386
9ELF_FORMAT := elf32-$(SUBARCH) 8ELF_FORMAT := elf32-i386
10OBJCOPYFLAGS := -O binary -R .note -R .comment -S
11HEADER_ARCH := x86
12CHECKFLAGS += -D__i386__ 9CHECKFLAGS += -D__i386__
13 10
14ifeq ("$(origin SUBARCH)", "command line") 11ifeq ("$(origin SUBARCH)", "command line")
@@ -16,9 +13,8 @@ ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
16KBUILD_CFLAGS += $(call cc-option,-m32) 13KBUILD_CFLAGS += $(call cc-option,-m32)
17KBUILD_AFLAGS += $(call cc-option,-m32) 14KBUILD_AFLAGS += $(call cc-option,-m32)
18LINK-y += $(call cc-option,-m32) 15LINK-y += $(call cc-option,-m32)
19UML_OBJCOPYFLAGS += -F $(ELF_FORMAT)
20 16
21export LDFLAGS HOSTCFLAGS HOSTLDFLAGS UML_OBJCOPYFLAGS 17export LDFLAGS
22endif 18endif
23endif 19endif
24 20
@@ -40,3 +36,26 @@ KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
40 else echo $(call cc-option,-funit-at-a-time); fi ;) 36 else echo $(call cc-option,-funit-at-a-time); fi ;)
41 37
42KBUILD_CFLAGS += $(cflags-y) 38KBUILD_CFLAGS += $(cflags-y)
39
40else
41
42START := 0x60000000
43
44KBUILD_CFLAGS += -fno-builtin -m64
45
46CHECKFLAGS += -m64 -D__x86_64__
47KBUILD_AFLAGS += -m64
48LDFLAGS += -m elf_x86_64
49KBUILD_CPPFLAGS += -m64
50
51ELF_ARCH := i386:x86-64
52ELF_FORMAT := elf64-x86-64
53
54# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
55
56LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
57LINK-y += -m64
58
59# Do unit-at-a-time unconditionally on x86_64, following the host
60KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
61endif
diff --git a/arch/um/Kconfig.x86 b/arch/x86/um/Kconfig
index 21bebe63df66..21bebe63df66 100644
--- a/arch/um/Kconfig.x86
+++ b/arch/x86/um/Kconfig
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
new file mode 100644
index 000000000000..8fb58400e415
--- /dev/null
+++ b/arch/x86/um/Makefile
@@ -0,0 +1,45 @@
1#
2# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3#
4
5ifeq ($(CONFIG_X86_32),y)
6 BITS := 32
7else
8 BITS := 64
9endif
10
11obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
12 ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
13 stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \
14 sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
15 mem_$(BITS).o subarch.o os-$(OS)/
16
17ifeq ($(CONFIG_X86_32),y)
18
19obj-y += checksum_32.o
20obj-$(CONFIG_BINFMT_ELF) += elfcore.o
21
22subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
23subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
24subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o
25
26else
27
28obj-y += vdso/
29
30subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \
31 ../lib/rwsem.o
32
33endif
34
35subarch-$(CONFIG_MODULES) += ../kernel/module.o
36
37USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o
38
39extra-y += user-offsets.s
40$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS)
41
42UNPROFILE_OBJS := stub_segv.o
43CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
44
45include arch/um/scripts/Makefile.rules
diff --git a/arch/um/include/asm/apic.h b/arch/x86/um/asm/apic.h
index 876dee84ab11..876dee84ab11 100644
--- a/arch/um/include/asm/apic.h
+++ b/arch/x86/um/asm/apic.h
diff --git a/arch/um/include/asm/arch_hweight.h b/arch/x86/um/asm/arch_hweight.h
index c656cf443f4a..c656cf443f4a 100644
--- a/arch/um/include/asm/arch_hweight.h
+++ b/arch/x86/um/asm/arch_hweight.h
diff --git a/arch/um/sys-i386/asm/archparam.h b/arch/x86/um/asm/archparam.h
index 2a18a884ca1b..c17cf68dda0f 100644
--- a/arch/um/sys-i386/asm/archparam.h
+++ b/arch/x86/um/asm/archparam.h
@@ -1,10 +1,13 @@
1/* 1/*
2 * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) 2 * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
3 * Copyright 2003 PathScale, Inc.
3 * Licensed under the GPL 4 * Licensed under the GPL
4 */ 5 */
5 6
6#ifndef __UM_ARCHPARAM_I386_H 7#ifndef __UM_ARCHPARAM_H
7#define __UM_ARCHPARAM_I386_H 8#define __UM_ARCHPARAM_H
9
10#ifdef CONFIG_X86_32
8 11
9#ifdef CONFIG_X86_PAE 12#ifdef CONFIG_X86_PAE
10#define LAST_PKMAP 512 13#define LAST_PKMAP 512
@@ -14,3 +17,4 @@
14 17
15#endif 18#endif
16 19
20#endif
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
new file mode 100644
index 000000000000..b6efe2381b5d
--- /dev/null
+++ b/arch/x86/um/asm/checksum.h
@@ -0,0 +1,10 @@
1#ifndef __UM_CHECKSUM_H
2#define __UM_CHECKSUM_H
3
4#ifdef CONFIG_X86_32
5# include "checksum_32.h"
6#else
7# include "checksum_64.h"
8#endif
9
10#endif
diff --git a/arch/um/sys-i386/shared/sysdep/checksum.h b/arch/x86/um/asm/checksum_32.h
index ed47445f3905..caab74252e27 100644
--- a/arch/um/sys-i386/shared/sysdep/checksum.h
+++ b/arch/x86/um/asm/checksum_32.h
@@ -1,4 +1,4 @@
1/* 1/*
2 * Licensed under the GPL 2 * Licensed under the GPL
3 */ 3 */
4 4
diff --git a/arch/um/sys-x86_64/shared/sysdep/checksum.h b/arch/x86/um/asm/checksum_64.h
index a5be9031ea85..a5be9031ea85 100644
--- a/arch/um/sys-x86_64/shared/sysdep/checksum.h
+++ b/arch/x86/um/asm/checksum_64.h
diff --git a/arch/um/include/asm/desc.h b/arch/x86/um/asm/desc.h
index 4ec34a51b62c..4ec34a51b62c 100644
--- a/arch/um/include/asm/desc.h
+++ b/arch/x86/um/asm/desc.h
diff --git a/arch/um/sys-x86_64/asm/elf.h b/arch/x86/um/asm/elf.h
index 11a2bfb38859..f3b0633b69a1 100644
--- a/arch/um/sys-x86_64/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -1,15 +1,103 @@
1/* 1/*
2 * Copyright 2003 PathScale, Inc. 2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
4 *
5 * Licensed under the GPL 3 * Licensed under the GPL
6 */ 4 */
7#ifndef __UM_ELF_X86_64_H 5#ifndef __UM_ELF_X86_H
8#define __UM_ELF_X86_64_H 6#define __UM_ELF_X86_H
9 7
10#include <asm/user.h> 8#include <asm/user.h>
11#include "skas.h" 9#include "skas.h"
12 10
11#ifdef CONFIG_X86_32
12
13#define R_386_NONE 0
14#define R_386_32 1
15#define R_386_PC32 2
16#define R_386_GOT32 3
17#define R_386_PLT32 4
18#define R_386_COPY 5
19#define R_386_GLOB_DAT 6
20#define R_386_JMP_SLOT 7
21#define R_386_RELATIVE 8
22#define R_386_GOTOFF 9
23#define R_386_GOTPC 10
24#define R_386_NUM 11
25
26/*
27 * This is used to ensure we don't load something for the wrong architecture.
28 */
29#define elf_check_arch(x) \
30 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
31
32#define ELF_CLASS ELFCLASS32
33#define ELF_DATA ELFDATA2LSB
34#define ELF_ARCH EM_386
35
36#define ELF_PLAT_INIT(regs, load_addr) do { \
37 PT_REGS_EBX(regs) = 0; \
38 PT_REGS_ECX(regs) = 0; \
39 PT_REGS_EDX(regs) = 0; \
40 PT_REGS_ESI(regs) = 0; \
41 PT_REGS_EDI(regs) = 0; \
42 PT_REGS_EBP(regs) = 0; \
43 PT_REGS_EAX(regs) = 0; \
44} while (0)
45
46/* Shamelessly stolen from include/asm-i386/elf.h */
47
48#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \
49 pr_reg[0] = PT_REGS_EBX(regs); \
50 pr_reg[1] = PT_REGS_ECX(regs); \
51 pr_reg[2] = PT_REGS_EDX(regs); \
52 pr_reg[3] = PT_REGS_ESI(regs); \
53 pr_reg[4] = PT_REGS_EDI(regs); \
54 pr_reg[5] = PT_REGS_EBP(regs); \
55 pr_reg[6] = PT_REGS_EAX(regs); \
56 pr_reg[7] = PT_REGS_DS(regs); \
57 pr_reg[8] = PT_REGS_ES(regs); \
58 /* fake once used fs and gs selectors? */ \
59 pr_reg[9] = PT_REGS_DS(regs); \
60 pr_reg[10] = PT_REGS_DS(regs); \
61 pr_reg[11] = PT_REGS_SYSCALL_NR(regs); \
62 pr_reg[12] = PT_REGS_IP(regs); \
63 pr_reg[13] = PT_REGS_CS(regs); \
64 pr_reg[14] = PT_REGS_EFLAGS(regs); \
65 pr_reg[15] = PT_REGS_SP(regs); \
66 pr_reg[16] = PT_REGS_SS(regs); \
67} while (0);
68
69extern char * elf_aux_platform;
70#define ELF_PLATFORM (elf_aux_platform)
71
72extern unsigned long vsyscall_ehdr;
73extern unsigned long vsyscall_end;
74extern unsigned long __kernel_vsyscall;
75
76/*
77 * This is the range that is readable by user mode, and things
78 * acting like user mode such as get_user_pages.
79 */
80#define FIXADDR_USER_START vsyscall_ehdr
81#define FIXADDR_USER_END vsyscall_end
82
83
84/*
85 * Architecture-neutral AT_ values in 0-17, leave some room
86 * for more of them, start the x86-specific ones at 32.
87 */
88#define AT_SYSINFO 32
89#define AT_SYSINFO_EHDR 33
90
91#define ARCH_DLINFO \
92do { \
93 if ( vsyscall_ehdr ) { \
94 NEW_AUX_ENT(AT_SYSINFO, __kernel_vsyscall); \
95 NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr); \
96 } \
97} while (0)
98
99#else
100
13/* x86-64 relocation types, taken from asm-x86_64/elf.h */ 101/* x86-64 relocation types, taken from asm-x86_64/elf.h */
14#define R_X86_64_NONE 0 /* No reloc */ 102#define R_X86_64_NONE 0 /* No reloc */
15#define R_X86_64_64 1 /* Direct 64 bit */ 103#define R_X86_64_64 1 /* Direct 64 bit */
@@ -31,13 +119,6 @@
31 119
32#define R_X86_64_NUM 16 120#define R_X86_64_NUM 16
33 121
34typedef unsigned long elf_greg_t;
35
36#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
37typedef elf_greg_t elf_gregset_t[ELF_NGREG];
38
39typedef struct user_i387_struct elf_fpregset_t;
40
41/* 122/*
42 * This is used to ensure we don't load something for the wrong architecture. 123 * This is used to ensure we don't load something for the wrong architecture.
43 */ 124 */
@@ -95,6 +176,30 @@ typedef struct user_i387_struct elf_fpregset_t;
95 (pr_reg)[25] = 0; \ 176 (pr_reg)[25] = 0; \
96 (pr_reg)[26] = 0; 177 (pr_reg)[26] = 0;
97 178
179#define ELF_PLATFORM "x86_64"
180
181/* No user-accessible fixmap addresses, i.e. vsyscall */
182#define FIXADDR_USER_START 0
183#define FIXADDR_USER_END 0
184
185#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
186struct linux_binprm;
187extern int arch_setup_additional_pages(struct linux_binprm *bprm,
188 int uses_interp);
189
190extern unsigned long um_vdso_addr;
191#define AT_SYSINFO_EHDR 33
192#define ARCH_DLINFO NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr)
193
194#endif
195
196typedef unsigned long elf_greg_t;
197
198#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
199typedef elf_greg_t elf_gregset_t[ELF_NGREG];
200
201typedef struct user_i387_struct elf_fpregset_t;
202
98#define task_pt_regs(t) (&(t)->thread.regs) 203#define task_pt_regs(t) (&(t)->thread.regs)
99 204
100struct task_struct; 205struct task_struct;
@@ -103,11 +208,6 @@ extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
103 208
104#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu) 209#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
105 210
106#ifdef TIF_IA32 /* XXX */
107#error XXX, indeed
108 clear_thread_flag(TIF_IA32);
109#endif
110
111#define ELF_EXEC_PAGESIZE 4096 211#define ELF_EXEC_PAGESIZE 4096
112 212
113#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) 213#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
@@ -115,18 +215,7 @@ extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
115extern long elf_aux_hwcap; 215extern long elf_aux_hwcap;
116#define ELF_HWCAP (elf_aux_hwcap) 216#define ELF_HWCAP (elf_aux_hwcap)
117 217
118#define ELF_PLATFORM "x86_64"
119
120#define SET_PERSONALITY(ex) do ; while(0) 218#define SET_PERSONALITY(ex) do ; while(0)
121
122#define __HAVE_ARCH_GATE_AREA 1 219#define __HAVE_ARCH_GATE_AREA 1
123#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
124struct linux_binprm;
125extern int arch_setup_additional_pages(struct linux_binprm *bprm,
126 int uses_interp);
127
128extern unsigned long um_vdso_addr;
129#define AT_SYSINFO_EHDR 33
130#define ARCH_DLINFO NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr)
131 220
132#endif 221#endif
diff --git a/arch/um/include/asm/irq_vectors.h b/arch/x86/um/asm/irq_vectors.h
index 272a81e0ce14..272a81e0ce14 100644
--- a/arch/um/include/asm/irq_vectors.h
+++ b/arch/x86/um/asm/irq_vectors.h
diff --git a/arch/x86/um/asm/mm_context.h b/arch/x86/um/asm/mm_context.h
new file mode 100644
index 000000000000..4a73d63e4760
--- /dev/null
+++ b/arch/x86/um/asm/mm_context.h
@@ -0,0 +1,72 @@
1/*
2 * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
3 * Licensed under the GPL
4 *
5 * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
6 */
7
8#ifndef __ASM_LDT_H
9#define __ASM_LDT_H
10
11#include <linux/mutex.h>
12#include <asm/ldt.h>
13
14extern void ldt_host_info(void);
15
16#define LDT_PAGES_MAX \
17 ((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
18#define LDT_ENTRIES_PER_PAGE \
19 (PAGE_SIZE/LDT_ENTRY_SIZE)
20#define LDT_DIRECT_ENTRIES \
21 ((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
22
23struct ldt_entry {
24 __u32 a;
25 __u32 b;
26};
27
28typedef struct uml_ldt {
29 int entry_count;
30 struct mutex lock;
31 union {
32 struct ldt_entry * pages[LDT_PAGES_MAX];
33 struct ldt_entry entries[LDT_DIRECT_ENTRIES];
34 } u;
35} uml_ldt_t;
36
37#define LDT_entry_a(info) \
38 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
39
40#define LDT_entry_b(info) \
41 (((info)->base_addr & 0xff000000) | \
42 (((info)->base_addr & 0x00ff0000) >> 16) | \
43 ((info)->limit & 0xf0000) | \
44 (((info)->read_exec_only ^ 1) << 9) | \
45 ((info)->contents << 10) | \
46 (((info)->seg_not_present ^ 1) << 15) | \
47 ((info)->seg_32bit << 22) | \
48 ((info)->limit_in_pages << 23) | \
49 ((info)->useable << 20) | \
50 0x7000)
51
52#define _LDT_empty(info) (\
53 (info)->base_addr == 0 && \
54 (info)->limit == 0 && \
55 (info)->contents == 0 && \
56 (info)->read_exec_only == 1 && \
57 (info)->seg_32bit == 0 && \
58 (info)->limit_in_pages == 0 && \
59 (info)->seg_not_present == 1 && \
60 (info)->useable == 0 )
61
62#ifdef CONFIG_X86_64
63#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
64#else
65#define LDT_empty(info) (_LDT_empty(info))
66#endif
67
68struct uml_arch_mm_context {
69 uml_ldt_t ldt;
70};
71
72#endif
diff --git a/arch/x86/um/asm/module.h b/arch/x86/um/asm/module.h
new file mode 100644
index 000000000000..61af80e932eb
--- /dev/null
+++ b/arch/x86/um/asm/module.h
@@ -0,0 +1,23 @@
1#ifndef __UM_MODULE_H
2#define __UM_MODULE_H
3
4/* UML is simple */
5struct mod_arch_specific
6{
7};
8
9#ifdef CONFIG_X86_32
10
11#define Elf_Shdr Elf32_Shdr
12#define Elf_Sym Elf32_Sym
13#define Elf_Ehdr Elf32_Ehdr
14
15#else
16
17#define Elf_Shdr Elf64_Shdr
18#define Elf_Sym Elf64_Sym
19#define Elf_Ehdr Elf64_Ehdr
20
21#endif
22
23#endif
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
new file mode 100644
index 000000000000..118c143a9cb4
--- /dev/null
+++ b/arch/x86/um/asm/processor.h
@@ -0,0 +1,22 @@
1#ifndef __UM_PROCESSOR_H
2#define __UM_PROCESSOR_H
3
4/* include faultinfo structure */
5#include <sysdep/faultinfo.h>
6
7#ifdef CONFIG_X86_32
8# include "processor_32.h"
9#else
10# include "processor_64.h"
11#endif
12
13#define KSTK_EIP(tsk) KSTK_REG(tsk, HOST_IP)
14#define KSTK_ESP(tsk) KSTK_REG(tsk, HOST_IP)
15#define KSTK_EBP(tsk) KSTK_REG(tsk, HOST_BP)
16
17#define ARCH_IS_STACKGROW(address) \
18 (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs))
19
20#include <asm/processor-generic.h>
21
22#endif
diff --git a/arch/um/sys-i386/asm/processor.h b/arch/x86/um/asm/processor_32.h
index 82a9061ab5be..018f732704dd 100644
--- a/arch/um/sys-i386/asm/processor.h
+++ b/arch/x86/um/asm/processor_32.h
@@ -6,15 +6,12 @@
6#ifndef __UM_PROCESSOR_I386_H 6#ifndef __UM_PROCESSOR_I386_H
7#define __UM_PROCESSOR_I386_H 7#define __UM_PROCESSOR_I386_H
8 8
9#include "linux/string.h" 9#include <linux/string.h>
10#include <sysdep/host_ldt.h> 10#include <asm/segment.h>
11#include "asm/segment.h" 11#include <asm/ldt.h>
12 12
13extern int host_has_cmov; 13extern int host_has_cmov;
14 14
15/* include faultinfo structure */
16#include "sysdep/faultinfo.h"
17
18struct uml_tls_struct { 15struct uml_tls_struct {
19 struct user_desc tls; 16 struct user_desc tls;
20 unsigned flushed:1; 17 unsigned flushed:1;
@@ -66,13 +63,4 @@ static inline void rep_nop(void)
66#define current_text_addr() \ 63#define current_text_addr() \
67 ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) 64 ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
68 65
69#define ARCH_IS_STACKGROW(address) \
70 (address + 32 >= UPT_SP(&current->thread.regs.regs))
71
72#define KSTK_EIP(tsk) KSTK_REG(tsk, EIP)
73#define KSTK_ESP(tsk) KSTK_REG(tsk, UESP)
74#define KSTK_EBP(tsk) KSTK_REG(tsk, EBP)
75
76#include "asm/processor-generic.h"
77
78#endif 66#endif
diff --git a/arch/um/sys-x86_64/asm/processor.h b/arch/x86/um/asm/processor_64.h
index 875a26a62614..61de92d916c3 100644
--- a/arch/um/sys-x86_64/asm/processor.h
+++ b/arch/x86/um/asm/processor_64.h
@@ -7,9 +7,6 @@
7#ifndef __UM_PROCESSOR_X86_64_H 7#ifndef __UM_PROCESSOR_X86_64_H
8#define __UM_PROCESSOR_X86_64_H 8#define __UM_PROCESSOR_X86_64_H
9 9
10/* include faultinfo structure */
11#include "sysdep/faultinfo.h"
12
13struct arch_thread { 10struct arch_thread {
14 unsigned long debugregs[8]; 11 unsigned long debugregs[8];
15 int debugregs_seq; 12 int debugregs_seq;
@@ -45,12 +42,4 @@ static inline void arch_copy_thread(struct arch_thread *from,
45#define current_text_addr() \ 42#define current_text_addr() \
46 ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; }) 43 ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
47 44
48#define ARCH_IS_STACKGROW(address) \
49 (address + 128 >= UPT_SP(&current->thread.regs.regs))
50
51#define KSTK_EIP(tsk) KSTK_REG(tsk, RIP)
52#define KSTK_ESP(tsk) KSTK_REG(tsk, RSP)
53
54#include "asm/processor-generic.h"
55
56#endif 45#endif
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
new file mode 100644
index 000000000000..c8aca8c501b0
--- /dev/null
+++ b/arch/x86/um/asm/ptrace.h
@@ -0,0 +1,5 @@
1#ifdef CONFIG_X86_32
2# include "ptrace_32.h"
3#else
4# include "ptrace_64.h"
5#endif
diff --git a/arch/um/sys-i386/asm/ptrace.h b/arch/x86/um/asm/ptrace_32.h
index 5d2a59112537..5d2a59112537 100644
--- a/arch/um/sys-i386/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace_32.h
diff --git a/arch/um/sys-x86_64/asm/ptrace.h b/arch/x86/um/asm/ptrace_64.h
index 83d8c473b905..706a0d80545c 100644
--- a/arch/um/sys-x86_64/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace_64.h
@@ -40,7 +40,7 @@
40 40
41#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs) 41#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs)
42#define PT_REGS_RIP(r) UPT_IP(&(r)->regs) 42#define PT_REGS_RIP(r) UPT_IP(&(r)->regs)
43#define PT_REGS_RSP(r) UPT_SP(&(r)->regs) 43#define PT_REGS_SP(r) UPT_SP(&(r)->regs)
44 44
45#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs) 45#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
46 46
diff --git a/arch/um/include/asm/required-features.h b/arch/x86/um/asm/required-features.h
index dfb967b2d2f3..dfb967b2d2f3 100644
--- a/arch/um/include/asm/required-features.h
+++ b/arch/x86/um/asm/required-features.h
diff --git a/arch/um/include/asm/segment.h b/arch/x86/um/asm/segment.h
index 45183fcd10b6..45183fcd10b6 100644
--- a/arch/um/include/asm/segment.h
+++ b/arch/x86/um/asm/segment.h
diff --git a/arch/um/sys-i386/shared/sysdep/system.h b/arch/x86/um/asm/system.h
index d1b93c436200..a459fd9b7598 100644
--- a/arch/um/sys-i386/shared/sysdep/system.h
+++ b/arch/x86/um/asm/system.h
@@ -129,4 +129,7 @@ static inline void rdtsc_barrier(void)
129 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 129 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
130} 130}
131 131
132extern void *_switch_to(void *prev, void *next, void *last);
133#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
134
132#endif 135#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/vm-flags.h b/arch/x86/um/asm/vm-flags.h
index 3978e55132d2..7c297e9e2413 100644
--- a/arch/um/sys-x86_64/shared/sysdep/vm-flags.h
+++ b/arch/x86/um/asm/vm-flags.h
@@ -4,8 +4,17 @@
4 * Licensed under the GPL 4 * Licensed under the GPL
5 */ 5 */
6 6
7#ifndef __VM_FLAGS_X86_64_H 7#ifndef __VM_FLAGS_X86_H
8#define __VM_FLAGS_X86_64_H 8#define __VM_FLAGS_X86_H
9
10#ifdef CONFIG_X86_32
11
12#define VM_DATA_DEFAULT_FLAGS \
13 (VM_READ | VM_WRITE | \
14 ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
15 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
16
17#else
9 18
10#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ 19#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
11 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 20 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
@@ -13,3 +22,4 @@
13 VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 22 VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
14 23
15#endif 24#endif
25#endif
diff --git a/arch/um/sys-x86_64/bug.c b/arch/x86/um/bug.c
index e8034e363d83..e8034e363d83 100644
--- a/arch/um/sys-x86_64/bug.c
+++ b/arch/x86/um/bug.c
diff --git a/arch/um/sys-i386/bugs.c b/arch/x86/um/bugs_32.c
index 2c6d0d731c12..a1fba5fb9dbe 100644
--- a/arch/um/sys-i386/bugs.c
+++ b/arch/x86/um/bugs_32.c
@@ -4,17 +4,17 @@
4 */ 4 */
5 5
6#include <signal.h> 6#include <signal.h>
7#include "kern_constants.h"
8#include "kern_util.h" 7#include "kern_util.h"
9#include "longjmp.h" 8#include "longjmp.h"
10#include "task.h"
11#include "user.h"
12#include "sysdep/ptrace.h" 9#include "sysdep/ptrace.h"
10#include <generated/asm-offsets.h>
13 11
14/* Set during early boot */ 12/* Set during early boot */
15static int host_has_cmov = 1; 13static int host_has_cmov = 1;
16static jmp_buf cmov_test_return; 14static jmp_buf cmov_test_return;
17 15
16#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
17
18static void cmov_sigill_test_handler(int sig) 18static void cmov_sigill_test_handler(int sig)
19{ 19{
20 host_has_cmov = 0; 20 host_has_cmov = 0;
diff --git a/arch/um/sys-x86_64/bugs.c b/arch/x86/um/bugs_64.c
index 44e02ba2a265..44e02ba2a265 100644
--- a/arch/um/sys-x86_64/bugs.c
+++ b/arch/x86/um/bugs_64.c
diff --git a/arch/um/sys-i386/checksum.S b/arch/x86/um/checksum_32.S
index f058d2f82e18..f058d2f82e18 100644
--- a/arch/um/sys-i386/checksum.S
+++ b/arch/x86/um/checksum_32.S
diff --git a/arch/um/sys-i386/delay.c b/arch/x86/um/delay.c
index f3fe1a688f7e..f3fe1a688f7e 100644
--- a/arch/um/sys-i386/delay.c
+++ b/arch/x86/um/delay.c
diff --git a/arch/um/sys-i386/elfcore.c b/arch/x86/um/elfcore.c
index 6bb49b687c97..6bb49b687c97 100644
--- a/arch/um/sys-i386/elfcore.c
+++ b/arch/x86/um/elfcore.c
diff --git a/arch/um/sys-i386/fault.c b/arch/x86/um/fault.c
index d670f68532f4..d670f68532f4 100644
--- a/arch/um/sys-i386/fault.c
+++ b/arch/x86/um/fault.c
diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/x86/um/ksyms.c
index 1db2fce00948..2e8f43ec6214 100644
--- a/arch/um/sys-x86_64/ksyms.c
+++ b/arch/x86/um/ksyms.c
@@ -2,10 +2,12 @@
2#include <asm/string.h> 2#include <asm/string.h>
3#include <asm/checksum.h> 3#include <asm/checksum.h>
4 4
5#ifndef CONFIG_X86_32
5/*XXX: we need them because they would be exported by x86_64 */ 6/*XXX: we need them because they would be exported by x86_64 */
6#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 7#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
7EXPORT_SYMBOL(memcpy); 8EXPORT_SYMBOL(memcpy);
8#else 9#else
9EXPORT_SYMBOL(__memcpy); 10EXPORT_SYMBOL(__memcpy);
10#endif 11#endif
12#endif
11EXPORT_SYMBOL(csum_partial); 13EXPORT_SYMBOL(csum_partial);
diff --git a/arch/um/sys-i386/ldt.c b/arch/x86/um/ldt.c
index 3f2bf208d884..26b0e39d2ce9 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -137,7 +137,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
137{ 137{
138 int i, err = 0; 138 int i, err = 0;
139 unsigned long size; 139 unsigned long size;
140 uml_ldt_t * ldt = &current->mm->context.ldt; 140 uml_ldt_t *ldt = &current->mm->context.arch.ldt;
141 141
142 if (!ldt->entry_count) 142 if (!ldt->entry_count)
143 goto out; 143 goto out;
@@ -205,7 +205,7 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount)
205 205
206static int write_ldt(void __user * ptr, unsigned long bytecount, int func) 206static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
207{ 207{
208 uml_ldt_t * ldt = &current->mm->context.ldt; 208 uml_ldt_t *ldt = &current->mm->context.arch.ldt;
209 struct mm_id * mm_idp = &current->mm->context.id; 209 struct mm_id * mm_idp = &current->mm->context.id;
210 int i, err; 210 int i, err;
211 struct user_desc ldt_info; 211 struct user_desc ldt_info;
@@ -397,7 +397,7 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
397 397
398 398
399 if (!ptrace_ldt) 399 if (!ptrace_ldt)
400 mutex_init(&new_mm->ldt.lock); 400 mutex_init(&new_mm->arch.ldt.lock);
401 401
402 if (!from_mm) { 402 if (!from_mm) {
403 memset(&desc, 0, sizeof(desc)); 403 memset(&desc, 0, sizeof(desc));
@@ -429,7 +429,7 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
429 break; 429 break;
430 } 430 }
431 } 431 }
432 new_mm->ldt.entry_count = 0; 432 new_mm->arch.ldt.entry_count = 0;
433 433
434 goto out; 434 goto out;
435 } 435 }
@@ -457,26 +457,26 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
457 * i.e., we have to use the stub for modify_ldt, which 457 * i.e., we have to use the stub for modify_ldt, which
458 * can't handle the big read buffer of up to 64kB. 458 * can't handle the big read buffer of up to 64kB.
459 */ 459 */
460 mutex_lock(&from_mm->ldt.lock); 460 mutex_lock(&from_mm->arch.ldt.lock);
461 if (from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES) 461 if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
462 memcpy(new_mm->ldt.u.entries, from_mm->ldt.u.entries, 462 memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
463 sizeof(new_mm->ldt.u.entries)); 463 sizeof(new_mm->arch.ldt.u.entries));
464 else { 464 else {
465 i = from_mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE; 465 i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
466 while (i-->0) { 466 while (i-->0) {
467 page = __get_free_page(GFP_KERNEL|__GFP_ZERO); 467 page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
468 if (!page) { 468 if (!page) {
469 err = -ENOMEM; 469 err = -ENOMEM;
470 break; 470 break;
471 } 471 }
472 new_mm->ldt.u.pages[i] = 472 new_mm->arch.ldt.u.pages[i] =
473 (struct ldt_entry *) page; 473 (struct ldt_entry *) page;
474 memcpy(new_mm->ldt.u.pages[i], 474 memcpy(new_mm->arch.ldt.u.pages[i],
475 from_mm->ldt.u.pages[i], PAGE_SIZE); 475 from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
476 } 476 }
477 } 477 }
478 new_mm->ldt.entry_count = from_mm->ldt.entry_count; 478 new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
479 mutex_unlock(&from_mm->ldt.lock); 479 mutex_unlock(&from_mm->arch.ldt.lock);
480 } 480 }
481 481
482 out: 482 out:
@@ -488,12 +488,12 @@ void free_ldt(struct mm_context *mm)
488{ 488{
489 int i; 489 int i;
490 490
491 if (!ptrace_ldt && mm->ldt.entry_count > LDT_DIRECT_ENTRIES) { 491 if (!ptrace_ldt && mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
492 i = mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE; 492 i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
493 while (i-- > 0) 493 while (i-- > 0)
494 free_page((long) mm->ldt.u.pages[i]); 494 free_page((long) mm->arch.ldt.u.pages[i]);
495 } 495 }
496 mm->ldt.entry_count = 0; 496 mm->arch.ldt.entry_count = 0;
497} 497}
498 498
499int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) 499int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
diff --git a/arch/um/sys-i386/mem.c b/arch/x86/um/mem_32.c
index 639900a6fde9..639900a6fde9 100644
--- a/arch/um/sys-i386/mem.c
+++ b/arch/x86/um/mem_32.c
diff --git a/arch/um/sys-x86_64/mem.c b/arch/x86/um/mem_64.c
index 546518727a73..546518727a73 100644
--- a/arch/um/sys-x86_64/mem.c
+++ b/arch/x86/um/mem_64.c
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/x86/um/os-Linux/Makefile
index b4bc6ac4f30b..253bfb8cb702 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/x86/um/os-Linux/Makefile
@@ -3,7 +3,10 @@
3# Licensed under the GPL 3# Licensed under the GPL
4# 4#
5 5
6obj-y = registers.o signal.o task_size.o tls.o 6obj-y = registers.o task_size.o mcontext.o
7
8obj-$(CONFIG_X86_32) += tls.o
9obj-$(CONFIG_64BIT) += prctl.o
7 10
8USER_OBJS := $(obj-y) 11USER_OBJS := $(obj-y)
9 12
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
new file mode 100644
index 000000000000..1d33d72c6284
--- /dev/null
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -0,0 +1,31 @@
1#include <sys/ucontext.h>
2#define __FRAME_OFFSETS
3#include <asm/ptrace.h>
4#include <sysdep/ptrace.h>
5
6void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
7{
8#ifdef __i386__
9#define COPY2(X,Y) regs->gp[X] = mc->gregs[REG_##Y]
10#define COPY(X) regs->gp[X] = mc->gregs[REG_##X]
11#define COPY_SEG(X) regs->gp[X] = mc->gregs[REG_##X] & 0xffff;
12#define COPY_SEG_CPL3(X) regs->gp[X] = (mc->gregs[REG_##X] & 0xffff) | 3;
13 COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS);
14 COPY(EDI); COPY(ESI); COPY(EBP);
15 COPY2(UESP, ESP); /* sic */
16 COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
17 COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
18#else
19#define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y]
20#define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X]
21 COPY(R8); COPY(R9); COPY(R10); COPY(R11);
22 COPY(R12); COPY(R13); COPY(R14); COPY(R15);
23 COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX);
24 COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP);
25 COPY(RIP);
26 COPY2(EFLAGS, EFL);
27 COPY2(CS, CSGSFS);
28 regs->gp[CS / sizeof(unsigned long)] &= 0xffff;
29 regs->gp[CS / sizeof(unsigned long)] |= 3;
30#endif
31}
diff --git a/arch/um/os-Linux/sys-x86_64/prctl.c b/arch/x86/um/os-Linux/prctl.c
index 9d34eddb517f..9d34eddb517f 100644
--- a/arch/um/os-Linux/sys-x86_64/prctl.c
+++ b/arch/x86/um/os-Linux/prctl.c
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/x86/um/os-Linux/registers.c
index 229f7a53d8da..0cdbb86b012b 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -6,10 +6,10 @@
6 6
7#include <errno.h> 7#include <errno.h>
8#include <sys/ptrace.h> 8#include <sys/ptrace.h>
9#ifdef __i386__
9#include <sys/user.h> 10#include <sys/user.h>
10#include "kern_constants.h" 11#endif
11#include "longjmp.h" 12#include "longjmp.h"
12#include "user.h"
13#include "sysdep/ptrace_user.h" 13#include "sysdep/ptrace_user.h"
14 14
15int save_fp_registers(int pid, unsigned long *fp_regs) 15int save_fp_registers(int pid, unsigned long *fp_regs)
@@ -26,6 +26,8 @@ int restore_fp_registers(int pid, unsigned long *fp_regs)
26 return 0; 26 return 0;
27} 27}
28 28
29#ifdef __i386__
30int have_fpx_regs = 1;
29int save_fpx_registers(int pid, unsigned long *fp_regs) 31int save_fpx_registers(int pid, unsigned long *fp_regs)
30{ 32{
31 if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0) 33 if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0)
@@ -40,24 +42,6 @@ int restore_fpx_registers(int pid, unsigned long *fp_regs)
40 return 0; 42 return 0;
41} 43}
42 44
43unsigned long get_thread_reg(int reg, jmp_buf *buf)
44{
45 switch (reg) {
46 case EIP:
47 return buf[0]->__eip;
48 case UESP:
49 return buf[0]->__esp;
50 case EBP:
51 return buf[0]->__ebp;
52 default:
53 printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
54 reg);
55 return 0;
56 }
57}
58
59int have_fpx_regs = 1;
60
61int get_fp_registers(int pid, unsigned long *regs) 45int get_fp_registers(int pid, unsigned long *regs)
62{ 46{
63 if (have_fpx_regs) 47 if (have_fpx_regs)
@@ -89,3 +73,41 @@ void arch_init_registers(int pid)
89 73
90 have_fpx_regs = 0; 74 have_fpx_regs = 0;
91} 75}
76#else
77
78int get_fp_registers(int pid, unsigned long *regs)
79{
80 return save_fp_registers(pid, regs);
81}
82
83int put_fp_registers(int pid, unsigned long *regs)
84{
85 return restore_fp_registers(pid, regs);
86}
87
88#endif
89
90unsigned long get_thread_reg(int reg, jmp_buf *buf)
91{
92 switch (reg) {
93#ifdef __i386__
94 case HOST_IP:
95 return buf[0]->__eip;
96 case HOST_SP:
97 return buf[0]->__esp;
98 case HOST_BP:
99 return buf[0]->__ebp;
100#else
101 case HOST_IP:
102 return buf[0]->__rip;
103 case HOST_SP:
104 return buf[0]->__rsp;
105 case HOST_BP:
106 return buf[0]->__rbp;
107#endif
108 default:
109 printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
110 reg);
111 return 0;
112 }
113}
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/x86/um/os-Linux/task_size.c
index be04c1e183bf..efb16c5c9bcf 100644
--- a/arch/um/os-Linux/sys-i386/task_size.c
+++ b/arch/x86/um/os-Linux/task_size.c
@@ -3,7 +3,8 @@
3#include <signal.h> 3#include <signal.h>
4#include <sys/mman.h> 4#include <sys/mman.h>
5#include "longjmp.h" 5#include "longjmp.h"
6#include "kern_constants.h" 6
7#ifdef __i386__
7 8
8static jmp_buf buf; 9static jmp_buf buf;
9 10
@@ -137,3 +138,13 @@ out:
137 138
138 return top; 139 return top;
139} 140}
141
142#else
143
144unsigned long os_get_top_address(void)
145{
146 /* The old value of CONFIG_TOP_ADDR */
147 return 0x7fc0000000;
148}
149
150#endif
diff --git a/arch/um/os-Linux/sys-i386/tls.c b/arch/x86/um/os-Linux/tls.c
index 32ed41ec1a3d..82276b6071af 100644
--- a/arch/um/os-Linux/sys-i386/tls.c
+++ b/arch/x86/um/os-Linux/tls.c
@@ -1,16 +1,25 @@
1#include <errno.h> 1#include <errno.h>
2#include <linux/unistd.h> 2#include <linux/unistd.h>
3 3
4#include <sys/ptrace.h>
4#include <sys/syscall.h> 5#include <sys/syscall.h>
5#include <unistd.h> 6#include <unistd.h>
6 7
7#include "sysdep/tls.h" 8#include "sysdep/tls.h"
8#include "user.h" 9
10#ifndef PTRACE_GET_THREAD_AREA
11#define PTRACE_GET_THREAD_AREA 25
12#endif
13
14#ifndef PTRACE_SET_THREAD_AREA
15#define PTRACE_SET_THREAD_AREA 26
16#endif
9 17
10/* Checks whether host supports TLS, and sets *tls_min according to the value 18/* Checks whether host supports TLS, and sets *tls_min according to the value
11 * valid on the host. 19 * valid on the host.
12 * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */ 20 * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
13void check_host_supports_tls(int *supports_tls, int *tls_min) { 21void check_host_supports_tls(int *supports_tls, int *tls_min)
22{
14 /* Values for x86 and x86_64.*/ 23 /* Values for x86 and x86_64.*/
15 int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64}; 24 int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64};
16 int i; 25 int i;
@@ -34,3 +43,25 @@ void check_host_supports_tls(int *supports_tls, int *tls_min) {
34 43
35 *supports_tls = 0; 44 *supports_tls = 0;
36} 45}
46
47int os_set_thread_area(user_desc_t *info, int pid)
48{
49 int ret;
50
51 ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number,
52 (unsigned long) info);
53 if (ret < 0)
54 ret = -errno;
55 return ret;
56}
57
58int os_get_thread_area(user_desc_t *info, int pid)
59{
60 int ret;
61
62 ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number,
63 (unsigned long) info);
64 if (ret < 0)
65 ret = -errno;
66 return ret;
67}
diff --git a/arch/um/sys-i386/ptrace.c b/arch/x86/um/ptrace_32.c
index 3375c2717851..3b949daa095c 100644
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/x86/um/ptrace_32.c
@@ -50,20 +50,47 @@ int is_syscall(unsigned long addr)
50/* 1 = access 0 = no access */ 50/* 1 = access 0 = no access */
51#define FLAG_MASK 0x00044dd5 51#define FLAG_MASK 0x00044dd5
52 52
53static const int reg_offsets[] = {
54 [EBX] = HOST_BX,
55 [ECX] = HOST_CX,
56 [EDX] = HOST_DX,
57 [ESI] = HOST_SI,
58 [EDI] = HOST_DI,
59 [EBP] = HOST_BP,
60 [EAX] = HOST_AX,
61 [DS] = HOST_DS,
62 [ES] = HOST_ES,
63 [FS] = HOST_FS,
64 [GS] = HOST_GS,
65 [EIP] = HOST_IP,
66 [CS] = HOST_CS,
67 [EFL] = HOST_EFLAGS,
68 [UESP] = HOST_SP,
69 [SS] = HOST_SS,
70};
71
53int putreg(struct task_struct *child, int regno, unsigned long value) 72int putreg(struct task_struct *child, int regno, unsigned long value)
54{ 73{
55 regno >>= 2; 74 regno >>= 2;
56 switch (regno) { 75 switch (regno) {
76 case EBX:
77 case ECX:
78 case EDX:
79 case ESI:
80 case EDI:
81 case EBP:
82 case EAX:
83 case EIP:
84 case UESP:
85 break;
57 case FS: 86 case FS:
58 if (value && (value & 3) != 3) 87 if (value && (value & 3) != 3)
59 return -EIO; 88 return -EIO;
60 PT_REGS_FS(&child->thread.regs) = value; 89 break;
61 return 0;
62 case GS: 90 case GS:
63 if (value && (value & 3) != 3) 91 if (value && (value & 3) != 3)
64 return -EIO; 92 return -EIO;
65 PT_REGS_GS(&child->thread.regs) = value; 93 break;
66 return 0;
67 case DS: 94 case DS:
68 case ES: 95 case ES:
69 if (value && (value & 3) != 3) 96 if (value && (value & 3) != 3)
@@ -78,10 +105,15 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
78 break; 105 break;
79 case EFL: 106 case EFL:
80 value &= FLAG_MASK; 107 value &= FLAG_MASK;
81 value |= PT_REGS_EFLAGS(&child->thread.regs); 108 child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
82 break; 109 return 0;
110 case ORIG_EAX:
111 child->thread.regs.regs.syscall = value;
112 return 0;
113 default :
114 panic("Bad register in putreg() : %d\n", regno);
83 } 115 }
84 PT_REGS_SET(&child->thread.regs, regno, value); 116 child->thread.regs.regs.gp[reg_offsets[regno]] = value;
85 return 0; 117 return 0;
86} 118}
87 119
@@ -106,22 +138,35 @@ int poke_user(struct task_struct *child, long addr, long data)
106 138
107unsigned long getreg(struct task_struct *child, int regno) 139unsigned long getreg(struct task_struct *child, int regno)
108{ 140{
109 unsigned long retval = ~0UL; 141 unsigned long mask = ~0UL;
110 142
111 regno >>= 2; 143 regno >>= 2;
112 switch (regno) { 144 switch (regno) {
145 case ORIG_EAX:
146 return child->thread.regs.regs.syscall;
113 case FS: 147 case FS:
114 case GS: 148 case GS:
115 case DS: 149 case DS:
116 case ES: 150 case ES:
117 case SS: 151 case SS:
118 case CS: 152 case CS:
119 retval = 0xffff; 153 mask = 0xffff;
120 /* fall through */ 154 break;
155 case EIP:
156 case UESP:
157 case EAX:
158 case EBX:
159 case ECX:
160 case EDX:
161 case ESI:
162 case EDI:
163 case EBP:
164 case EFL:
165 break;
121 default: 166 default:
122 retval &= PT_REG(&child->thread.regs, regno); 167 panic("Bad register in getreg() : %d\n", regno);
123 } 168 }
124 return retval; 169 return mask & child->thread.regs.regs.gp[reg_offsets[regno]];
125} 170}
126 171
127/* read the word at location addr in the USER area. */ 172/* read the word at location addr in the USER area. */
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/x86/um/ptrace_64.c
index 4005506834fd..3b52bf0b418a 100644
--- a/arch/um/sys-x86_64/ptrace.c
+++ b/arch/x86/um/ptrace_64.c
@@ -18,10 +18,39 @@
18 */ 18 */
19#define FLAG_MASK 0x44dd5UL 19#define FLAG_MASK 0x44dd5UL
20 20
21int putreg(struct task_struct *child, int regno, unsigned long value) 21static const int reg_offsets[] =
22{ 22{
23 unsigned long tmp; 23 [R8 >> 3] = HOST_R8,
24 [R9 >> 3] = HOST_R9,
25 [R10 >> 3] = HOST_R10,
26 [R11 >> 3] = HOST_R11,
27 [R12 >> 3] = HOST_R12,
28 [R13 >> 3] = HOST_R13,
29 [R14 >> 3] = HOST_R14,
30 [R15 >> 3] = HOST_R15,
31 [RIP >> 3] = HOST_IP,
32 [RSP >> 3] = HOST_SP,
33 [RAX >> 3] = HOST_AX,
34 [RBX >> 3] = HOST_BX,
35 [RCX >> 3] = HOST_CX,
36 [RDX >> 3] = HOST_DX,
37 [RSI >> 3] = HOST_SI,
38 [RDI >> 3] = HOST_DI,
39 [RBP >> 3] = HOST_BP,
40 [CS >> 3] = HOST_CS,
41 [SS >> 3] = HOST_SS,
42 [FS_BASE >> 3] = HOST_FS_BASE,
43 [GS_BASE >> 3] = HOST_GS_BASE,
44 [DS >> 3] = HOST_DS,
45 [ES >> 3] = HOST_ES,
46 [FS >> 3] = HOST_FS,
47 [GS >> 3] = HOST_GS,
48 [EFLAGS >> 3] = HOST_EFLAGS,
49 [ORIG_RAX >> 3] = HOST_ORIG_AX,
50};
24 51
52int putreg(struct task_struct *child, int regno, unsigned long value)
53{
25#ifdef TIF_IA32 54#ifdef TIF_IA32
26 /* 55 /*
27 * Some code in the 64bit emulation may not be 64bit clean. 56 * Some code in the 64bit emulation may not be 64bit clean.
@@ -31,6 +60,26 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
31 value &= 0xffffffff; 60 value &= 0xffffffff;
32#endif 61#endif
33 switch (regno) { 62 switch (regno) {
63 case R8:
64 case R9:
65 case R10:
66 case R11:
67 case R12:
68 case R13:
69 case R14:
70 case R15:
71 case RIP:
72 case RSP:
73 case RAX:
74 case RBX:
75 case RCX:
76 case RDX:
77 case RSI:
78 case RDI:
79 case RBP:
80 case ORIG_RAX:
81 break;
82
34 case FS: 83 case FS:
35 case GS: 84 case GS:
36 case DS: 85 case DS:
@@ -50,12 +99,14 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
50 99
51 case EFLAGS: 100 case EFLAGS:
52 value &= FLAG_MASK; 101 value &= FLAG_MASK;
53 tmp = PT_REGS_EFLAGS(&child->thread.regs) & ~FLAG_MASK; 102 child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
54 value |= tmp; 103 return 0;
55 break; 104
105 default:
106 panic("Bad register in putreg(): %d\n", regno);
56 } 107 }
57 108
58 PT_REGS_SET(&child->thread.regs, regno, value); 109 child->thread.regs.regs.gp[reg_offsets[regno >> 3]] = value;
59 return 0; 110 return 0;
60} 111}
61 112
@@ -80,24 +131,46 @@ int poke_user(struct task_struct *child, long addr, long data)
80 131
81unsigned long getreg(struct task_struct *child, int regno) 132unsigned long getreg(struct task_struct *child, int regno)
82{ 133{
83 unsigned long retval = ~0UL; 134 unsigned long mask = ~0UL;
135#ifdef TIF_IA32
136 if (test_tsk_thread_flag(child, TIF_IA32))
137 mask = 0xffffffff;
138#endif
84 switch (regno) { 139 switch (regno) {
140 case R8:
141 case R9:
142 case R10:
143 case R11:
144 case R12:
145 case R13:
146 case R14:
147 case R15:
148 case RIP:
149 case RSP:
150 case RAX:
151 case RBX:
152 case RCX:
153 case RDX:
154 case RSI:
155 case RDI:
156 case RBP:
157 case ORIG_RAX:
158 case EFLAGS:
159 case FS_BASE:
160 case GS_BASE:
161 break;
85 case FS: 162 case FS:
86 case GS: 163 case GS:
87 case DS: 164 case DS:
88 case ES: 165 case ES:
89 case SS: 166 case SS:
90 case CS: 167 case CS:
91 retval = 0xffff; 168 mask = 0xffff;
92 /* fall through */ 169 break;
93 default: 170 default:
94 retval &= PT_REG(&child->thread.regs, regno); 171 panic("Bad register in getreg: %d\n", regno);
95#ifdef TIF_IA32
96 if (test_tsk_thread_flag(child, TIF_IA32))
97 retval &= 0xffffffff;
98#endif
99 } 172 }
100 return retval; 173 return mask & child->thread.regs.regs.gp[reg_offsets[regno >> 3]];
101} 174}
102 175
103int peek_user(struct task_struct *child, long addr, long data) 176int peek_user(struct task_struct *child, long addr, long data)
diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/x86/um/ptrace_user.c
index 0b10c3e74028..3960ca1dd35a 100644
--- a/arch/um/sys-i386/ptrace_user.c
+++ b/arch/x86/um/ptrace_user.c
@@ -4,7 +4,7 @@
4 */ 4 */
5 5
6#include <errno.h> 6#include <errno.h>
7#include <sys/ptrace.h> 7#include "ptrace_user.h"
8 8
9int ptrace_getregs(long pid, unsigned long *regs_out) 9int ptrace_getregs(long pid, unsigned long *regs_out)
10{ 10{
diff --git a/arch/um/sys-i386/setjmp.S b/arch/x86/um/setjmp_32.S
index b766792c9933..b766792c9933 100644
--- a/arch/um/sys-i386/setjmp.S
+++ b/arch/x86/um/setjmp_32.S
diff --git a/arch/um/sys-x86_64/setjmp.S b/arch/x86/um/setjmp_64.S
index 45f547b4043e..45f547b4043e 100644
--- a/arch/um/sys-x86_64/setjmp.S
+++ b/arch/x86/um/setjmp_64.S
diff --git a/arch/x86/um/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp.h
new file mode 100644
index 000000000000..ff7766d28226
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/archsetjmp.h
@@ -0,0 +1,5 @@
1#ifdef __i386__
2#include "archsetjmp_32.h"
3#else
4#include "archsetjmp_64.h"
5#endif
diff --git a/arch/um/sys-i386/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp_32.h
index 0f312085ce1d..0f312085ce1d 100644
--- a/arch/um/sys-i386/shared/sysdep/archsetjmp.h
+++ b/arch/x86/um/shared/sysdep/archsetjmp_32.h
diff --git a/arch/um/sys-x86_64/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp_64.h
index 2af8f12ca161..2af8f12ca161 100644
--- a/arch/um/sys-x86_64/shared/sysdep/archsetjmp.h
+++ b/arch/x86/um/shared/sysdep/archsetjmp_64.h
diff --git a/arch/x86/um/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo.h
new file mode 100644
index 000000000000..862ecb1c7781
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/faultinfo.h
@@ -0,0 +1,5 @@
1#ifdef __i386__
2#include "faultinfo_32.h"
3#else
4#include "faultinfo_64.h"
5#endif
diff --git a/arch/um/sys-i386/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo_32.h
index db437cc373bc..a26086b8a800 100644
--- a/arch/um/sys-i386/shared/sysdep/faultinfo.h
+++ b/arch/x86/um/shared/sysdep/faultinfo_32.h
@@ -24,6 +24,12 @@ struct faultinfo {
24#define FAULT_WRITE(fi) ((fi).error_code & 2) 24#define FAULT_WRITE(fi) ((fi).error_code & 2)
25#define FAULT_ADDRESS(fi) ((fi).cr2) 25#define FAULT_ADDRESS(fi) ((fi).cr2)
26 26
27/* This is Page Fault */
28#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14)
29
30/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
31#define SEGV_MAYBE_FIXABLE(fi) ((fi)->trap_no == 0 && ptrace_faultinfo)
32
27#define PTRACE_FULL_FAULTINFO 0 33#define PTRACE_FULL_FAULTINFO 0
28 34
29#endif 35#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo_64.h
index cb917b0d5660..f811cbe15d62 100644
--- a/arch/um/sys-x86_64/shared/sysdep/faultinfo.h
+++ b/arch/x86/um/shared/sysdep/faultinfo_64.h
@@ -24,6 +24,12 @@ struct faultinfo {
24#define FAULT_WRITE(fi) ((fi).error_code & 2) 24#define FAULT_WRITE(fi) ((fi).error_code & 2)
25#define FAULT_ADDRESS(fi) ((fi).cr2) 25#define FAULT_ADDRESS(fi) ((fi).cr2)
26 26
27/* This is Page Fault */
28#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14)
29
30/* No broken SKAS API, which doesn't pass trap_no, here. */
31#define SEGV_MAYBE_FIXABLE(fi) 0
32
27#define PTRACE_FULL_FAULTINFO 1 33#define PTRACE_FULL_FAULTINFO 1
28 34
29#endif 35#endif
diff --git a/arch/um/sys-i386/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
index 5868526b5eef..5868526b5eef 100644
--- a/arch/um/sys-i386/shared/sysdep/kernel-offsets.h
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h
new file mode 100644
index 000000000000..b724c54da316
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/mcontext.h
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __SYS_SIGCONTEXT_X86_H
7#define __SYS_SIGCONTEXT_X86_H
8
9extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *);
10
11#ifdef __i386__
12
13#define GET_FAULTINFO_FROM_MC(fi, mc) \
14 { \
15 (fi).cr2 = (mc)->cr2; \
16 (fi).error_code = (mc)->gregs[REG_ERR]; \
17 (fi).trap_no = (mc)->gregs[REG_TRAPNO]; \
18 }
19
20#else
21
22#define GET_FAULTINFO_FROM_MC(fi, mc) \
23 { \
24 (fi).cr2 = (mc)->gregs[REG_CR2]; \
25 (fi).error_code = (mc)->gregs[REG_ERR]; \
26 (fi).trap_no = (mc)->gregs[REG_TRAPNO]; \
27 }
28
29#endif
30
31#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
new file mode 100644
index 000000000000..711b1621747f
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -0,0 +1,5 @@
1#ifdef __i386__
2#include "ptrace_32.h"
3#else
4#include "ptrace_64.h"
5#endif
diff --git a/arch/um/sys-i386/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace_32.h
index c398a5076111..befd1df32ed0 100644
--- a/arch/um/sys-i386/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -6,7 +6,7 @@
6#ifndef __SYSDEP_I386_PTRACE_H 6#ifndef __SYSDEP_I386_PTRACE_H
7#define __SYSDEP_I386_PTRACE_H 7#define __SYSDEP_I386_PTRACE_H
8 8
9#include "user_constants.h" 9#include <generated/user_constants.h>
10#include "sysdep/faultinfo.h" 10#include "sysdep/faultinfo.h"
11 11
12#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long)) 12#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
@@ -24,18 +24,16 @@ void set_using_sysemu(int value);
24int get_using_sysemu(void); 24int get_using_sysemu(void);
25extern int sysemu_supported; 25extern int sysemu_supported;
26 26
27#include "skas_ptregs.h"
28
29#define REGS_IP(r) ((r)[HOST_IP]) 27#define REGS_IP(r) ((r)[HOST_IP])
30#define REGS_SP(r) ((r)[HOST_SP]) 28#define REGS_SP(r) ((r)[HOST_SP])
31#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS]) 29#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
32#define REGS_EAX(r) ((r)[HOST_EAX]) 30#define REGS_EAX(r) ((r)[HOST_AX])
33#define REGS_EBX(r) ((r)[HOST_EBX]) 31#define REGS_EBX(r) ((r)[HOST_BX])
34#define REGS_ECX(r) ((r)[HOST_ECX]) 32#define REGS_ECX(r) ((r)[HOST_CX])
35#define REGS_EDX(r) ((r)[HOST_EDX]) 33#define REGS_EDX(r) ((r)[HOST_DX])
36#define REGS_ESI(r) ((r)[HOST_ESI]) 34#define REGS_ESI(r) ((r)[HOST_SI])
37#define REGS_EDI(r) ((r)[HOST_EDI]) 35#define REGS_EDI(r) ((r)[HOST_DI])
38#define REGS_EBP(r) ((r)[HOST_EBP]) 36#define REGS_EBP(r) ((r)[HOST_BP])
39#define REGS_CS(r) ((r)[HOST_CS]) 37#define REGS_CS(r) ((r)[HOST_CS])
40#define REGS_SS(r) ((r)[HOST_SS]) 38#define REGS_SS(r) ((r)[HOST_SS])
41#define REGS_DS(r) ((r)[HOST_DS]) 39#define REGS_DS(r) ((r)[HOST_DS])
@@ -45,6 +43,7 @@ extern int sysemu_supported;
45 43
46#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res) 44#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
47 45
46#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
48#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) 47#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
49 48
50#ifndef PTRACE_SYSEMU_SINGLESTEP 49#ifndef PTRACE_SYSEMU_SINGLESTEP
@@ -102,62 +101,6 @@ struct syscall_args {
102 UPT_SYSCALL_ARG5(r), \ 101 UPT_SYSCALL_ARG5(r), \
103 UPT_SYSCALL_ARG6(r) } } ) 102 UPT_SYSCALL_ARG6(r) } } )
104 103
105#define UPT_REG(regs, reg) \
106 ({ unsigned long val; \
107 switch(reg){ \
108 case EIP: val = UPT_IP(regs); break; \
109 case UESP: val = UPT_SP(regs); break; \
110 case EAX: val = UPT_EAX(regs); break; \
111 case EBX: val = UPT_EBX(regs); break; \
112 case ECX: val = UPT_ECX(regs); break; \
113 case EDX: val = UPT_EDX(regs); break; \
114 case ESI: val = UPT_ESI(regs); break; \
115 case EDI: val = UPT_EDI(regs); break; \
116 case EBP: val = UPT_EBP(regs); break; \
117 case ORIG_EAX: val = UPT_ORIG_EAX(regs); break; \
118 case CS: val = UPT_CS(regs); break; \
119 case SS: val = UPT_SS(regs); break; \
120 case DS: val = UPT_DS(regs); break; \
121 case ES: val = UPT_ES(regs); break; \
122 case FS: val = UPT_FS(regs); break; \
123 case GS: val = UPT_GS(regs); break; \
124 case EFL: val = UPT_EFLAGS(regs); break; \
125 default : \
126 panic("Bad register in UPT_REG : %d\n", reg); \
127 val = -1; \
128 } \
129 val; \
130 })
131
132#define UPT_SET(regs, reg, val) \
133 do { \
134 switch(reg){ \
135 case EIP: UPT_IP(regs) = val; break; \
136 case UESP: UPT_SP(regs) = val; break; \
137 case EAX: UPT_EAX(regs) = val; break; \
138 case EBX: UPT_EBX(regs) = val; break; \
139 case ECX: UPT_ECX(regs) = val; break; \
140 case EDX: UPT_EDX(regs) = val; break; \
141 case ESI: UPT_ESI(regs) = val; break; \
142 case EDI: UPT_EDI(regs) = val; break; \
143 case EBP: UPT_EBP(regs) = val; break; \
144 case ORIG_EAX: UPT_ORIG_EAX(regs) = val; break; \
145 case CS: UPT_CS(regs) = val; break; \
146 case SS: UPT_SS(regs) = val; break; \
147 case DS: UPT_DS(regs) = val; break; \
148 case ES: UPT_ES(regs) = val; break; \
149 case FS: UPT_FS(regs) = val; break; \
150 case GS: UPT_GS(regs) = val; break; \
151 case EFL: UPT_EFLAGS(regs) = val; break; \
152 default : \
153 panic("Bad register in UPT_SET : %d\n", reg); \
154 break; \
155 } \
156 } while (0)
157
158#define UPT_SET_SYSCALL_RETURN(r, res) \
159 REGS_SET_SYSCALL_RETURN((r)->regs, (res))
160
161#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) 104#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
162 105
163#define UPT_ORIG_SYSCALL(r) UPT_EAX(r) 106#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace_64.h
index 8ee8f8e12af1..031edc53ac57 100644
--- a/arch/um/sys-x86_64/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -8,24 +8,22 @@
8#ifndef __SYSDEP_X86_64_PTRACE_H 8#ifndef __SYSDEP_X86_64_PTRACE_H
9#define __SYSDEP_X86_64_PTRACE_H 9#define __SYSDEP_X86_64_PTRACE_H
10 10
11#include "user_constants.h" 11#include <generated/user_constants.h>
12#include "sysdep/faultinfo.h" 12#include "sysdep/faultinfo.h"
13 13
14#define MAX_REG_OFFSET (UM_FRAME_SIZE) 14#define MAX_REG_OFFSET (UM_FRAME_SIZE)
15#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long)) 15#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
16 16
17#include "skas_ptregs.h"
18
19#define REGS_IP(r) ((r)[HOST_IP]) 17#define REGS_IP(r) ((r)[HOST_IP])
20#define REGS_SP(r) ((r)[HOST_SP]) 18#define REGS_SP(r) ((r)[HOST_SP])
21 19
22#define REGS_RBX(r) ((r)[HOST_RBX]) 20#define REGS_RBX(r) ((r)[HOST_BX])
23#define REGS_RCX(r) ((r)[HOST_RCX]) 21#define REGS_RCX(r) ((r)[HOST_CX])
24#define REGS_RDX(r) ((r)[HOST_RDX]) 22#define REGS_RDX(r) ((r)[HOST_DX])
25#define REGS_RSI(r) ((r)[HOST_RSI]) 23#define REGS_RSI(r) ((r)[HOST_SI])
26#define REGS_RDI(r) ((r)[HOST_RDI]) 24#define REGS_RDI(r) ((r)[HOST_DI])
27#define REGS_RBP(r) ((r)[HOST_RBP]) 25#define REGS_RBP(r) ((r)[HOST_BP])
28#define REGS_RAX(r) ((r)[HOST_RAX]) 26#define REGS_RAX(r) ((r)[HOST_AX])
29#define REGS_R8(r) ((r)[HOST_R8]) 27#define REGS_R8(r) ((r)[HOST_R8])
30#define REGS_R9(r) ((r)[HOST_R9]) 28#define REGS_R9(r) ((r)[HOST_R9])
31#define REGS_R10(r) ((r)[HOST_R10]) 29#define REGS_R10(r) ((r)[HOST_R10])
@@ -67,14 +65,13 @@
67#define REGS_FS(r) ((r)[HOST_FS]) 65#define REGS_FS(r) ((r)[HOST_FS])
68#define REGS_GS(r) ((r)[HOST_GS]) 66#define REGS_GS(r) ((r)[HOST_GS])
69 67
70#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_RAX]) 68#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_AX])
71 69
72#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res) 70#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
73 71
72#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
74#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) 73#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
75 74
76#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type)
77
78#define REGS_FAULT_ADDR(r) ((r)->fault_addr) 75#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
79 76
80#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type) 77#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
@@ -149,88 +146,8 @@ struct syscall_args {
149 UPT_SYSCALL_ARG5(r), \ 146 UPT_SYSCALL_ARG5(r), \
150 UPT_SYSCALL_ARG6(r) } } ) 147 UPT_SYSCALL_ARG6(r) } } )
151 148
152#define UPT_REG(regs, reg) \
153 ({ unsigned long val; \
154 switch(reg){ \
155 case R8: val = UPT_R8(regs); break; \
156 case R9: val = UPT_R9(regs); break; \
157 case R10: val = UPT_R10(regs); break; \
158 case R11: val = UPT_R11(regs); break; \
159 case R12: val = UPT_R12(regs); break; \
160 case R13: val = UPT_R13(regs); break; \
161 case R14: val = UPT_R14(regs); break; \
162 case R15: val = UPT_R15(regs); break; \
163 case RIP: val = UPT_IP(regs); break; \
164 case RSP: val = UPT_SP(regs); break; \
165 case RAX: val = UPT_RAX(regs); break; \
166 case RBX: val = UPT_RBX(regs); break; \
167 case RCX: val = UPT_RCX(regs); break; \
168 case RDX: val = UPT_RDX(regs); break; \
169 case RSI: val = UPT_RSI(regs); break; \
170 case RDI: val = UPT_RDI(regs); break; \
171 case RBP: val = UPT_RBP(regs); break; \
172 case ORIG_RAX: val = UPT_ORIG_RAX(regs); break; \
173 case CS: val = UPT_CS(regs); break; \
174 case SS: val = UPT_SS(regs); break; \
175 case FS_BASE: val = UPT_FS_BASE(regs); break; \
176 case GS_BASE: val = UPT_GS_BASE(regs); break; \
177 case DS: val = UPT_DS(regs); break; \
178 case ES: val = UPT_ES(regs); break; \
179 case FS : val = UPT_FS (regs); break; \
180 case GS: val = UPT_GS(regs); break; \
181 case EFLAGS: val = UPT_EFLAGS(regs); break; \
182 default : \
183 panic("Bad register in UPT_REG : %d\n", reg); \
184 val = -1; \
185 } \
186 val; \
187 })
188
189
190#define UPT_SET(regs, reg, val) \
191 ({ unsigned long __upt_val = val; \
192 switch(reg){ \
193 case R8: UPT_R8(regs) = __upt_val; break; \
194 case R9: UPT_R9(regs) = __upt_val; break; \
195 case R10: UPT_R10(regs) = __upt_val; break; \
196 case R11: UPT_R11(regs) = __upt_val; break; \
197 case R12: UPT_R12(regs) = __upt_val; break; \
198 case R13: UPT_R13(regs) = __upt_val; break; \
199 case R14: UPT_R14(regs) = __upt_val; break; \
200 case R15: UPT_R15(regs) = __upt_val; break; \
201 case RIP: UPT_IP(regs) = __upt_val; break; \
202 case RSP: UPT_SP(regs) = __upt_val; break; \
203 case RAX: UPT_RAX(regs) = __upt_val; break; \
204 case RBX: UPT_RBX(regs) = __upt_val; break; \
205 case RCX: UPT_RCX(regs) = __upt_val; break; \
206 case RDX: UPT_RDX(regs) = __upt_val; break; \
207 case RSI: UPT_RSI(regs) = __upt_val; break; \
208 case RDI: UPT_RDI(regs) = __upt_val; break; \
209 case RBP: UPT_RBP(regs) = __upt_val; break; \
210 case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break; \
211 case CS: UPT_CS(regs) = __upt_val; break; \
212 case SS: UPT_SS(regs) = __upt_val; break; \
213 case FS_BASE: UPT_FS_BASE(regs) = __upt_val; break; \
214 case GS_BASE: UPT_GS_BASE(regs) = __upt_val; break; \
215 case DS: UPT_DS(regs) = __upt_val; break; \
216 case ES: UPT_ES(regs) = __upt_val; break; \
217 case FS: UPT_FS(regs) = __upt_val; break; \
218 case GS: UPT_GS(regs) = __upt_val; break; \
219 case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break; \
220 default : \
221 panic("Bad register in UPT_SET : %d\n", reg); \
222 break; \
223 } \
224 __upt_val; \
225 })
226
227#define UPT_SET_SYSCALL_RETURN(r, res) \
228 REGS_SET_SYSCALL_RETURN((r)->regs, (res))
229
230#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) 149#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
231 150
232#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&r->skas)
233
234#define UPT_FAULTINFO(r) (&(r)->faultinfo) 151#define UPT_FAULTINFO(r) (&(r)->faultinfo)
235 152
236static inline void arch_init_registers(int pid) 153static inline void arch_init_registers(int pid)
diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h
new file mode 100644
index 000000000000..16cd6b5e71f7
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace_user.h
@@ -0,0 +1,27 @@
1#include <generated/user_constants.h>
2
3#define PT_OFFSET(r) ((r) * sizeof(long))
4
5#define PT_SYSCALL_NR(regs) ((regs)[HOST_ORIG_AX])
6#define PT_SYSCALL_NR_OFFSET PT_OFFSET(HOST_ORIG_AX)
7
8#define PT_SYSCALL_RET_OFFSET PT_OFFSET(HOST_AX)
9
10#define REGS_IP_INDEX HOST_IP
11#define REGS_SP_INDEX HOST_SP
12
13#ifdef __i386__
14#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
15#else
16#define FP_SIZE HOST_FP_SIZE
17
18/*
19 * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
20 * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
21 * 2.4 name and value for 2.4 host compatibility.
22 */
23#ifndef PTRACE_OLDSETOPTIONS
24#define PTRACE_OLDSETOPTIONS 21
25#endif
26
27#endif
diff --git a/arch/um/sys-i386/shared/sysdep/skas_ptrace.h b/arch/x86/um/shared/sysdep/skas_ptrace.h
index e27b8a791773..453febe98993 100644
--- a/arch/um/sys-i386/shared/sysdep/skas_ptrace.h
+++ b/arch/x86/um/shared/sysdep/skas_ptrace.h
@@ -3,8 +3,8 @@
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#ifndef __SYSDEP_I386_SKAS_PTRACE_H 6#ifndef __SYSDEP_X86_SKAS_PTRACE_H
7#define __SYSDEP_I386_SKAS_PTRACE_H 7#define __SYSDEP_X86_SKAS_PTRACE_H
8 8
9struct ptrace_faultinfo { 9struct ptrace_faultinfo {
10 int is_write; 10 int is_write;
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
new file mode 100644
index 000000000000..bd161e300102
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -0,0 +1,14 @@
1#include <asm/unistd.h>
2#include <sys/mman.h>
3#include <signal.h>
4#include "as-layout.h"
5#include "stub-data.h"
6
7#ifdef __i386__
8#include "stub_32.h"
9#else
10#include "stub_64.h"
11#endif
12
13extern void stub_segv_handler(int, siginfo_t *, void *);
14extern void stub_clone_handler(void);
diff --git a/arch/um/sys-i386/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub_32.h
index 977dedd9221b..51fd256c75f0 100644
--- a/arch/um/sys-i386/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -6,15 +6,7 @@
6#ifndef __SYSDEP_STUB_H 6#ifndef __SYSDEP_STUB_H
7#define __SYSDEP_STUB_H 7#define __SYSDEP_STUB_H
8 8
9#include <sys/mman.h>
10#include <asm/ptrace.h> 9#include <asm/ptrace.h>
11#include <asm/unistd.h>
12#include "as-layout.h"
13#include "stub-data.h"
14#include "kern_constants.h"
15
16extern void stub_segv_handler(int sig);
17extern void stub_clone_handler(void);
18 10
19#define STUB_SYSCALL_RET EAX 11#define STUB_SYSCALL_RET EAX
20#define STUB_MMAP_NR __NR_mmap2 12#define STUB_MMAP_NR __NR_mmap2
diff --git a/arch/um/sys-x86_64/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub_64.h
index 3432aa249970..994df93c5ed3 100644
--- a/arch/um/sys-x86_64/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -6,15 +6,7 @@
6#ifndef __SYSDEP_STUB_H 6#ifndef __SYSDEP_STUB_H
7#define __SYSDEP_STUB_H 7#define __SYSDEP_STUB_H
8 8
9#include <sys/mman.h>
10#include <asm/unistd.h>
11#include <sysdep/ptrace_user.h> 9#include <sysdep/ptrace_user.h>
12#include "as-layout.h"
13#include "stub-data.h"
14#include "kern_constants.h"
15
16extern void stub_segv_handler(int sig);
17extern void stub_clone_handler(void);
18 10
19#define STUB_SYSCALL_RET PT_INDEX(RAX) 11#define STUB_SYSCALL_RET PT_INDEX(RAX)
20#define STUB_MMAP_NR __NR_mmap 12#define STUB_MMAP_NR __NR_mmap
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
new file mode 100644
index 000000000000..bd9a89b67e41
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -0,0 +1,5 @@
1#ifdef __i386__
2#include "syscalls_32.h"
3#else
4#include "syscalls_64.h"
5#endif
diff --git a/arch/um/sys-i386/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 05cb796aecb5..05cb796aecb5 100644
--- a/arch/um/sys-i386/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
diff --git a/arch/um/sys-x86_64/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls_64.h
index 7cfb0b085655..8a7d5e1da98e 100644
--- a/arch/um/sys-x86_64/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls_64.h
@@ -9,7 +9,6 @@
9 9
10#include <linux/msg.h> 10#include <linux/msg.h>
11#include <linux/shm.h> 11#include <linux/shm.h>
12#include <kern_constants.h>
13 12
14typedef long syscall_handler_t(void); 13typedef long syscall_handler_t(void);
15 14
diff --git a/arch/um/sys-i386/shared/sysdep/tls.h b/arch/x86/um/shared/sysdep/tls.h
index 34550755b2a1..27cce00c6b30 100644
--- a/arch/um/sys-i386/shared/sysdep/tls.h
+++ b/arch/x86/um/shared/sysdep/tls.h
@@ -17,16 +17,23 @@ typedef struct um_dup_user_desc {
17 unsigned int limit_in_pages:1; 17 unsigned int limit_in_pages:1;
18 unsigned int seg_not_present:1; 18 unsigned int seg_not_present:1;
19 unsigned int useable:1; 19 unsigned int useable:1;
20#ifdef __x86_64__
21 unsigned int lm:1;
22#endif
20} user_desc_t; 23} user_desc_t;
21 24
22# else /* __KERNEL__ */ 25# else /* __KERNEL__ */
23 26
24# include <ldt.h>
25typedef struct user_desc user_desc_t; 27typedef struct user_desc user_desc_t;
26 28
27# endif /* __KERNEL__ */ 29# endif /* __KERNEL__ */
28 30
31extern int os_set_thread_area(user_desc_t *info, int pid);
32extern int os_get_thread_area(user_desc_t *info, int pid);
33
34#ifdef __i386__
29#define GDT_ENTRY_TLS_MIN_I386 6 35#define GDT_ENTRY_TLS_MIN_I386 6
30#define GDT_ENTRY_TLS_MIN_X86_64 12 36#define GDT_ENTRY_TLS_MIN_X86_64 12
37#endif
31 38
32#endif /* _SYSDEP_TLS_H */ 39#endif /* _SYSDEP_TLS_H */
diff --git a/arch/um/sys-i386/signal.c b/arch/x86/um/signal.c
index 89a46626bfd8..4883b9546016 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/x86/um/signal.c
@@ -1,36 +1,20 @@
1/* 1/*
2 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 2 * Copyright (C) 2003 PathScale, Inc.
3 * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL 4 * Licensed under the GPL
4 */ 5 */
5 6
7
8#include <linux/personality.h>
6#include <linux/ptrace.h> 9#include <linux/ptrace.h>
10#include <linux/kernel.h>
7#include <asm/unistd.h> 11#include <asm/unistd.h>
8#include <asm/uaccess.h> 12#include <asm/uaccess.h>
9#include <asm/ucontext.h> 13#include <asm/ucontext.h>
10#include "frame_kern.h" 14#include "frame_kern.h"
11#include "skas.h" 15#include "skas.h"
12 16
13void copy_sc(struct uml_pt_regs *regs, void *from) 17#ifdef CONFIG_X86_32
14{
15 struct sigcontext *sc = from;
16
17 REGS_GS(regs->gp) = sc->gs;
18 REGS_FS(regs->gp) = sc->fs;
19 REGS_ES(regs->gp) = sc->es;
20 REGS_DS(regs->gp) = sc->ds;
21 REGS_EDI(regs->gp) = sc->di;
22 REGS_ESI(regs->gp) = sc->si;
23 REGS_EBP(regs->gp) = sc->bp;
24 REGS_SP(regs->gp) = sc->sp;
25 REGS_EBX(regs->gp) = sc->bx;
26 REGS_EDX(regs->gp) = sc->dx;
27 REGS_ECX(regs->gp) = sc->cx;
28 REGS_EAX(regs->gp) = sc->ax;
29 REGS_IP(regs->gp) = sc->ip;
30 REGS_CS(regs->gp) = sc->cs;
31 REGS_EFLAGS(regs->gp) = sc->flags;
32 REGS_SS(regs->gp) = sc->ss;
33}
34 18
35/* 19/*
36 * FPU tag word conversions. 20 * FPU tag word conversions.
@@ -164,6 +148,8 @@ static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave,
164 148
165extern int have_fpx_regs; 149extern int have_fpx_regs;
166 150
151#endif
152
167static int copy_sc_from_user(struct pt_regs *regs, 153static int copy_sc_from_user(struct pt_regs *regs,
168 struct sigcontext __user *from) 154 struct sigcontext __user *from)
169{ 155{
@@ -174,8 +160,45 @@ static int copy_sc_from_user(struct pt_regs *regs,
174 if (err) 160 if (err)
175 return err; 161 return err;
176 162
163#define GETREG(regno, regname) regs->regs.gp[HOST_##regno] = sc.regname
164
165#ifdef CONFIG_X86_32
166 GETREG(GS, gs);
167 GETREG(FS, fs);
168 GETREG(ES, es);
169 GETREG(DS, ds);
170#endif
171 GETREG(DI, di);
172 GETREG(SI, si);
173 GETREG(BP, bp);
174 GETREG(SP, sp);
175 GETREG(BX, bx);
176 GETREG(DX, dx);
177 GETREG(CX, cx);
178 GETREG(AX, ax);
179 GETREG(IP, ip);
180
181#ifdef CONFIG_X86_64
182 GETREG(R8, r8);
183 GETREG(R9, r9);
184 GETREG(R10, r10);
185 GETREG(R11, r11);
186 GETREG(R12, r12);
187 GETREG(R13, r13);
188 GETREG(R14, r14);
189 GETREG(R15, r15);
190#endif
191
192 GETREG(CS, cs);
193 GETREG(EFLAGS, flags);
194#ifdef CONFIG_X86_32
195 GETREG(SS, ss);
196#endif
197
198#undef GETREG
199
177 pid = userspace_pid[current_thread_info()->cpu]; 200 pid = userspace_pid[current_thread_info()->cpu];
178 copy_sc(&regs->regs, &sc); 201#ifdef CONFIG_X86_32
179 if (have_fpx_regs) { 202 if (have_fpx_regs) {
180 struct user_fxsr_struct fpx; 203 struct user_fxsr_struct fpx;
181 204
@@ -196,8 +219,9 @@ static int copy_sc_from_user(struct pt_regs *regs,
196 -err); 219 -err);
197 return 1; 220 return 1;
198 } 221 }
199 } 222 } else
200 else { 223#endif
224 {
201 struct user_i387_struct fp; 225 struct user_i387_struct fp;
202 226
203 err = copy_from_user(&fp, sc.fpstate, 227 err = copy_from_user(&fp, sc.fpstate,
@@ -213,43 +237,66 @@ static int copy_sc_from_user(struct pt_regs *regs,
213 return 1; 237 return 1;
214 } 238 }
215 } 239 }
216
217 return 0; 240 return 0;
218} 241}
219 242
220static int copy_sc_to_user(struct sigcontext __user *to, 243static int copy_sc_to_user(struct sigcontext __user *to,
221 struct _fpstate __user *to_fp, struct pt_regs *regs, 244 struct _fpstate __user *to_fp, struct pt_regs *regs,
222 unsigned long sp) 245 unsigned long mask)
223{ 246{
224 struct sigcontext sc; 247 struct sigcontext sc;
225 struct faultinfo * fi = &current->thread.arch.faultinfo; 248 struct faultinfo * fi = &current->thread.arch.faultinfo;
226 int err, pid; 249 int err, pid;
250 memset(&sc, 0, sizeof(struct sigcontext));
251
252#define PUTREG(regno, regname) sc.regname = regs->regs.gp[HOST_##regno]
253
254#ifdef CONFIG_X86_32
255 PUTREG(GS, gs);
256 PUTREG(FS, fs);
257 PUTREG(ES, es);
258 PUTREG(DS, ds);
259#endif
260 PUTREG(DI, di);
261 PUTREG(SI, si);
262 PUTREG(BP, bp);
263 PUTREG(SP, sp);
264 PUTREG(BX, bx);
265 PUTREG(DX, dx);
266 PUTREG(CX, cx);
267 PUTREG(AX, ax);
268#ifdef CONFIG_X86_64
269 PUTREG(R8, r8);
270 PUTREG(R9, r9);
271 PUTREG(R10, r10);
272 PUTREG(R11, r11);
273 PUTREG(R12, r12);
274 PUTREG(R13, r13);
275 PUTREG(R14, r14);
276 PUTREG(R15, r15);
277#endif
227 278
228 sc.gs = REGS_GS(regs->regs.gp);
229 sc.fs = REGS_FS(regs->regs.gp);
230 sc.es = REGS_ES(regs->regs.gp);
231 sc.ds = REGS_DS(regs->regs.gp);
232 sc.di = REGS_EDI(regs->regs.gp);
233 sc.si = REGS_ESI(regs->regs.gp);
234 sc.bp = REGS_EBP(regs->regs.gp);
235 sc.sp = sp;
236 sc.bx = REGS_EBX(regs->regs.gp);
237 sc.dx = REGS_EDX(regs->regs.gp);
238 sc.cx = REGS_ECX(regs->regs.gp);
239 sc.ax = REGS_EAX(regs->regs.gp);
240 sc.ip = REGS_IP(regs->regs.gp);
241 sc.cs = REGS_CS(regs->regs.gp);
242 sc.flags = REGS_EFLAGS(regs->regs.gp);
243 sc.sp_at_signal = regs->regs.gp[UESP];
244 sc.ss = regs->regs.gp[SS];
245 sc.cr2 = fi->cr2; 279 sc.cr2 = fi->cr2;
246 sc.err = fi->error_code; 280 sc.err = fi->error_code;
247 sc.trapno = fi->trap_no; 281 sc.trapno = fi->trap_no;
248 282 PUTREG(IP, ip);
249 to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1)); 283 PUTREG(CS, cs);
284 PUTREG(EFLAGS, flags);
285#ifdef CONFIG_X86_32
286 PUTREG(SP, sp_at_signal);
287 PUTREG(SS, ss);
288#endif
289#undef PUTREG
290 sc.oldmask = mask;
250 sc.fpstate = to_fp; 291 sc.fpstate = to_fp;
251 292
293 err = copy_to_user(to, &sc, sizeof(struct sigcontext));
294 if (err)
295 return 1;
296
252 pid = userspace_pid[current_thread_info()->cpu]; 297 pid = userspace_pid[current_thread_info()->cpu];
298
299#ifdef CONFIG_X86_32
253 if (have_fpx_regs) { 300 if (have_fpx_regs) {
254 struct user_fxsr_struct fpx; 301 struct user_fxsr_struct fpx;
255 302
@@ -272,8 +319,9 @@ static int copy_sc_to_user(struct sigcontext __user *to,
272 if (copy_to_user(&to_fp->_fxsr_env[0], &fpx, 319 if (copy_to_user(&to_fp->_fxsr_env[0], &fpx,
273 sizeof(struct user_fxsr_struct))) 320 sizeof(struct user_fxsr_struct)))
274 return 1; 321 return 1;
275 } 322 } else
276 else { 323#endif
324 {
277 struct user_i387_struct fp; 325 struct user_i387_struct fp;
278 326
279 err = save_fp_registers(pid, (unsigned long *) &fp); 327 err = save_fp_registers(pid, (unsigned long *) &fp);
@@ -281,9 +329,10 @@ static int copy_sc_to_user(struct sigcontext __user *to,
281 return 1; 329 return 1;
282 } 330 }
283 331
284 return copy_to_user(to, &sc, sizeof(sc)); 332 return 0;
285} 333}
286 334
335#ifdef CONFIG_X86_32
287static int copy_ucontext_to_user(struct ucontext __user *uc, 336static int copy_ucontext_to_user(struct ucontext __user *uc,
288 struct _fpstate __user *fp, sigset_t *set, 337 struct _fpstate __user *fp, sigset_t *set,
289 unsigned long sp) 338 unsigned long sp)
@@ -293,7 +342,7 @@ static int copy_ucontext_to_user(struct ucontext __user *uc,
293 err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp); 342 err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
294 err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags); 343 err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
295 err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size); 344 err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
296 err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, sp); 345 err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, 0);
297 err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set)); 346 err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
298 return err; 347 return err;
299} 348}
@@ -326,7 +375,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
326{ 375{
327 struct sigframe __user *frame; 376 struct sigframe __user *frame;
328 void __user *restorer; 377 void __user *restorer;
329 unsigned long save_sp = PT_REGS_SP(regs);
330 int err = 0; 378 int err = 0;
331 379
332 /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */ 380 /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */
@@ -339,20 +387,9 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
339 if (ka->sa.sa_flags & SA_RESTORER) 387 if (ka->sa.sa_flags & SA_RESTORER)
340 restorer = ka->sa.sa_restorer; 388 restorer = ka->sa.sa_restorer;
341 389
342 /* Update SP now because the page fault handler refuses to extend
343 * the stack if the faulting address is too far below the current
344 * SP, which frame now certainly is. If there's an error, the original
345 * value is restored on the way out.
346 * When writing the sigcontext to the stack, we have to write the
347 * original value, so that's passed to copy_sc_to_user, which does
348 * the right thing with it.
349 */
350 PT_REGS_SP(regs) = (unsigned long) frame;
351
352 err |= __put_user(restorer, &frame->pretcode); 390 err |= __put_user(restorer, &frame->pretcode);
353 err |= __put_user(sig, &frame->sig); 391 err |= __put_user(sig, &frame->sig);
354 err |= copy_sc_to_user(&frame->sc, NULL, regs, save_sp); 392 err |= copy_sc_to_user(&frame->sc, &frame->fpstate, regs, mask->sig[0]);
355 err |= __put_user(mask->sig[0], &frame->sc.oldmask);
356 if (_NSIG_WORDS > 1) 393 if (_NSIG_WORDS > 1)
357 err |= __copy_to_user(&frame->extramask, &mask->sig[1], 394 err |= __copy_to_user(&frame->extramask, &mask->sig[1],
358 sizeof(frame->extramask)); 395 sizeof(frame->extramask));
@@ -369,7 +406,7 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
369 err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); 406 err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
370 407
371 if (err) 408 if (err)
372 goto err; 409 return err;
373 410
374 PT_REGS_SP(regs) = (unsigned long) frame; 411 PT_REGS_SP(regs) = (unsigned long) frame;
375 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 412 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
@@ -380,10 +417,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
380 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) 417 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
381 ptrace_notify(SIGTRAP); 418 ptrace_notify(SIGTRAP);
382 return 0; 419 return 0;
383
384err:
385 PT_REGS_SP(regs) = save_sp;
386 return err;
387} 420}
388 421
389int setup_signal_stack_si(unsigned long stack_top, int sig, 422int setup_signal_stack_si(unsigned long stack_top, int sig,
@@ -392,7 +425,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
392{ 425{
393 struct rt_sigframe __user *frame; 426 struct rt_sigframe __user *frame;
394 void __user *restorer; 427 void __user *restorer;
395 unsigned long save_sp = PT_REGS_SP(regs);
396 int err = 0; 428 int err = 0;
397 429
398 stack_top &= -8UL; 430 stack_top &= -8UL;
@@ -404,16 +436,13 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
404 if (ka->sa.sa_flags & SA_RESTORER) 436 if (ka->sa.sa_flags & SA_RESTORER)
405 restorer = ka->sa.sa_restorer; 437 restorer = ka->sa.sa_restorer;
406 438
407 /* See comment above about why this is here */
408 PT_REGS_SP(regs) = (unsigned long) frame;
409
410 err |= __put_user(restorer, &frame->pretcode); 439 err |= __put_user(restorer, &frame->pretcode);
411 err |= __put_user(sig, &frame->sig); 440 err |= __put_user(sig, &frame->sig);
412 err |= __put_user(&frame->info, &frame->pinfo); 441 err |= __put_user(&frame->info, &frame->pinfo);
413 err |= __put_user(&frame->uc, &frame->puc); 442 err |= __put_user(&frame->uc, &frame->puc);
414 err |= copy_siginfo_to_user(&frame->info, info); 443 err |= copy_siginfo_to_user(&frame->info, info);
415 err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, 444 err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask,
416 save_sp); 445 PT_REGS_SP(regs));
417 446
418 /* 447 /*
419 * This is movl $,%eax ; int $0x80 448 * This is movl $,%eax ; int $0x80
@@ -427,8 +456,9 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
427 err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); 456 err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
428 457
429 if (err) 458 if (err)
430 goto err; 459 return err;
431 460
461 PT_REGS_SP(regs) = (unsigned long) frame;
432 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 462 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
433 PT_REGS_EAX(regs) = (unsigned long) sig; 463 PT_REGS_EAX(regs) = (unsigned long) sig;
434 PT_REGS_EDX(regs) = (unsigned long) &frame->info; 464 PT_REGS_EDX(regs) = (unsigned long) &frame->info;
@@ -437,13 +467,9 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
437 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) 467 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
438 ptrace_notify(SIGTRAP); 468 ptrace_notify(SIGTRAP);
439 return 0; 469 return 0;
440
441err:
442 PT_REGS_SP(regs) = save_sp;
443 return err;
444} 470}
445 471
446long sys_sigreturn(struct pt_regs regs) 472long sys_sigreturn(struct pt_regs *regs)
447{ 473{
448 unsigned long sp = PT_REGS_SP(&current->thread.regs); 474 unsigned long sp = PT_REGS_SP(&current->thread.regs);
449 struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); 475 struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
@@ -458,11 +484,7 @@ long sys_sigreturn(struct pt_regs regs)
458 goto segfault; 484 goto segfault;
459 485
460 sigdelsetmask(&set, ~_BLOCKABLE); 486 sigdelsetmask(&set, ~_BLOCKABLE);
461 487 set_current_blocked(&set);
462 spin_lock_irq(&current->sighand->siglock);
463 current->blocked = set;
464 recalc_sigpending();
465 spin_unlock_irq(&current->sighand->siglock);
466 488
467 if (copy_sc_from_user(&current->thread.regs, sc)) 489 if (copy_sc_from_user(&current->thread.regs, sc))
468 goto segfault; 490 goto segfault;
@@ -476,24 +498,107 @@ long sys_sigreturn(struct pt_regs regs)
476 return 0; 498 return 0;
477} 499}
478 500
479long sys_rt_sigreturn(struct pt_regs regs) 501#else
502
503struct rt_sigframe
504{
505 char __user *pretcode;
506 struct ucontext uc;
507 struct siginfo info;
508 struct _fpstate fpstate;
509};
510
511int setup_signal_stack_si(unsigned long stack_top, int sig,
512 struct k_sigaction *ka, struct pt_regs * regs,
513 siginfo_t *info, sigset_t *set)
514{
515 struct rt_sigframe __user *frame;
516 int err = 0;
517 struct task_struct *me = current;
518
519 frame = (struct rt_sigframe __user *)
520 round_down(stack_top - sizeof(struct rt_sigframe), 16);
521 /* Subtract 128 for a red zone and 8 for proper alignment */
522 frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
523
524 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
525 goto out;
526
527 if (ka->sa.sa_flags & SA_SIGINFO) {
528 err |= copy_siginfo_to_user(&frame->info, info);
529 if (err)
530 goto out;
531 }
532
533 /* Create the ucontext. */
534 err |= __put_user(0, &frame->uc.uc_flags);
535 err |= __put_user(0, &frame->uc.uc_link);
536 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
537 err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
538 &frame->uc.uc_stack.ss_flags);
539 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
540 err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
541 set->sig[0]);
542 err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
543 if (sizeof(*set) == 16) {
544 __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
545 __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
546 }
547 else
548 err |= __copy_to_user(&frame->uc.uc_sigmask, set,
549 sizeof(*set));
550
551 /*
552 * Set up to return from userspace. If provided, use a stub
553 * already in userspace.
554 */
555 /* x86-64 should always use SA_RESTORER. */
556 if (ka->sa.sa_flags & SA_RESTORER)
557 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
558 else
559 /* could use a vstub here */
560 return err;
561
562 if (err)
563 return err;
564
565 /* Set up registers for signal handler */
566 {
567 struct exec_domain *ed = current_thread_info()->exec_domain;
568 if (unlikely(ed && ed->signal_invmap && sig < 32))
569 sig = ed->signal_invmap[sig];
570 }
571
572 PT_REGS_SP(regs) = (unsigned long) frame;
573 PT_REGS_RDI(regs) = sig;
574 /* In case the signal handler was declared without prototypes */
575 PT_REGS_RAX(regs) = 0;
576
577 /*
578 * This also works for non SA_SIGINFO handlers because they expect the
579 * next argument after the signal number on the stack.
580 */
581 PT_REGS_RSI(regs) = (unsigned long) &frame->info;
582 PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
583 PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
584 out:
585 return err;
586}
587#endif
588
589long sys_rt_sigreturn(struct pt_regs *regs)
480{ 590{
481 unsigned long sp = PT_REGS_SP(&current->thread.regs); 591 unsigned long sp = PT_REGS_SP(&current->thread.regs);
482 struct rt_sigframe __user *frame = 592 struct rt_sigframe __user *frame =
483 (struct rt_sigframe __user *) (sp - 4); 593 (struct rt_sigframe __user *)(sp - sizeof(long));
484 sigset_t set;
485 struct ucontext __user *uc = &frame->uc; 594 struct ucontext __user *uc = &frame->uc;
486 int sig_size = _NSIG_WORDS * sizeof(unsigned long); 595 sigset_t set;
487 596
488 if (copy_from_user(&set, &uc->uc_sigmask, sig_size)) 597 if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
489 goto segfault; 598 goto segfault;
490 599
491 sigdelsetmask(&set, ~_BLOCKABLE); 600 sigdelsetmask(&set, ~_BLOCKABLE);
492 601 set_current_blocked(&set);
493 spin_lock_irq(&current->sighand->siglock);
494 current->blocked = set;
495 recalc_sigpending();
496 spin_unlock_irq(&current->sighand->siglock);
497 602
498 if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext)) 603 if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
499 goto segfault; 604 goto segfault;
@@ -506,3 +611,14 @@ long sys_rt_sigreturn(struct pt_regs regs)
506 force_sig(SIGSEGV, current); 611 force_sig(SIGSEGV, current);
507 return 0; 612 return 0;
508} 613}
614
615#ifdef CONFIG_X86_32
616long ptregs_sigreturn(void)
617{
618 return sys_sigreturn(NULL);
619}
620long ptregs_rt_sigreturn(void)
621{
622 return sys_rt_sigreturn(NULL);
623}
624#endif
diff --git a/arch/um/sys-i386/stub.S b/arch/x86/um/stub_32.S
index 54a36ec20cb7..54a36ec20cb7 100644
--- a/arch/um/sys-i386/stub.S
+++ b/arch/x86/um/stub_32.S
diff --git a/arch/um/sys-x86_64/stub.S b/arch/x86/um/stub_64.S
index 20e4a96a6dcb..20e4a96a6dcb 100644
--- a/arch/um/sys-x86_64/stub.S
+++ b/arch/x86/um/stub_64.S
diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/x86/um/stub_segv.c
index ced051afc705..b7450bd22e7d 100644
--- a/arch/um/sys-x86_64/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -3,19 +3,16 @@
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#include <signal.h>
7#include "as-layout.h"
8#include "sysdep/stub.h" 6#include "sysdep/stub.h"
9#include "sysdep/faultinfo.h" 7#include "sysdep/faultinfo.h"
10#include "sysdep/sigcontext.h" 8#include "sysdep/mcontext.h"
11 9
12void __attribute__ ((__section__ (".__syscall_stub"))) 10void __attribute__ ((__section__ (".__syscall_stub")))
13stub_segv_handler(int sig) 11stub_segv_handler(int sig, siginfo_t *info, void *p)
14{ 12{
15 struct ucontext *uc; 13 struct ucontext *uc = p;
16 14
17 __asm__ __volatile__("movq %%rdx, %0" : "=g" (uc) :); 15 GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
18 GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA),
19 &uc->uc_mcontext); 16 &uc->uc_mcontext);
20 trap_myself(); 17 trap_myself();
21} 18}
diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/x86/um/sys_call_table_32.S
index de274071455d..a7ca80d2dceb 100644
--- a/arch/um/sys-i386/sys_call_table.S
+++ b/arch/x86/um/sys_call_table_32.S
@@ -13,16 +13,14 @@
13#define ptregs_execve sys_execve 13#define ptregs_execve sys_execve
14#define ptregs_iopl sys_iopl 14#define ptregs_iopl sys_iopl
15#define ptregs_vm86old sys_vm86old 15#define ptregs_vm86old sys_vm86old
16#define ptregs_sigreturn sys_sigreturn
17#define ptregs_clone sys_clone 16#define ptregs_clone sys_clone
18#define ptregs_vm86 sys_vm86 17#define ptregs_vm86 sys_vm86
19#define ptregs_rt_sigreturn sys_rt_sigreturn
20#define ptregs_sigaltstack sys_sigaltstack 18#define ptregs_sigaltstack sys_sigaltstack
21#define ptregs_vfork sys_vfork 19#define ptregs_vfork sys_vfork
22 20
23.section .rodata,"a" 21.section .rodata,"a"
24 22
25#include "../../x86/kernel/syscall_table_32.S" 23#include "../kernel/syscall_table_32.S"
26 24
27ENTRY(syscall_table_size) 25ENTRY(syscall_table_size)
28.long .-sys_call_table 26.long .-sys_call_table
diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/x86/um/sys_call_table_64.c
index 47d469e7e7ce..99522f78b162 100644
--- a/arch/um/sys-x86_64/syscall_table.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -6,7 +6,6 @@
6#include <linux/linkage.h> 6#include <linux/linkage.h>
7#include <linux/sys.h> 7#include <linux/sys.h>
8#include <linux/cache.h> 8#include <linux/cache.h>
9#include <kern_constants.h>
10 9
11#define __NO_STUBS 10#define __NO_STUBS
12 11
@@ -59,7 +58,7 @@ extern void sys_ni_syscall(void);
59 */ 58 */
60 59
61sys_call_ptr_t sys_call_table[] __cacheline_aligned = { 60sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
62#include "../../x86/include/asm/unistd_64.h" 61#include <asm/unistd_64.h>
63}; 62};
64 63
65int syscall_table_size = sizeof(sys_call_table); 64int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/um/sys-i386/syscalls.c b/arch/x86/um/syscalls_32.c
index 70ca357393b8..70ca357393b8 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/x86/um/syscalls_32.c
diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/x86/um/syscalls_64.c
index f3d82bb6e15a..f3d82bb6e15a 100644
--- a/arch/um/sys-x86_64/syscalls.c
+++ b/arch/x86/um/syscalls_64.c
diff --git a/arch/um/sys-i386/sysrq.c b/arch/x86/um/sysrq_32.c
index 171b3e9dc867..171b3e9dc867 100644
--- a/arch/um/sys-i386/sysrq.c
+++ b/arch/x86/um/sysrq_32.c
diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/x86/um/sysrq_64.c
index f4f82beb3508..e8913436d7dc 100644
--- a/arch/um/sys-x86_64/sysrq.c
+++ b/arch/x86/um/sysrq_64.c
@@ -20,7 +20,7 @@ void __show_regs(struct pt_regs *regs)
20 current->comm, print_tainted(), init_utsname()->release); 20 current->comm, print_tainted(), init_utsname()->release);
21 printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff, 21 printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
22 PT_REGS_RIP(regs)); 22 PT_REGS_RIP(regs));
23 printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_RSP(regs), 23 printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_SP(regs),
24 PT_REGS_EFLAGS(regs)); 24 PT_REGS_EFLAGS(regs));
25 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", 25 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
26 PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs)); 26 PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs));
diff --git a/arch/um/sys-i386/tls.c b/arch/x86/um/tls_32.c
index c6c7131e563b..c6c7131e563b 100644
--- a/arch/um/sys-i386/tls.c
+++ b/arch/x86/um/tls_32.c
diff --git a/arch/um/sys-x86_64/tls.c b/arch/x86/um/tls_64.c
index f7ba46200ecd..f7ba46200ecd 100644
--- a/arch/um/sys-x86_64/tls.c
+++ b/arch/x86/um/tls_64.c
diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/x86/um/user-offsets.c
index 973585414a66..ca49be8ddd0c 100644
--- a/arch/um/sys-x86_64/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -9,28 +9,43 @@
9#include <asm/types.h> 9#include <asm/types.h>
10 10
11#define DEFINE(sym, val) \ 11#define DEFINE(sym, val) \
12 asm volatile("\n->" #sym " %0 " #val : : "i" (val)) 12 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
13 13
14#define DEFINE_LONGS(sym, val) \ 14#define DEFINE_LONGS(sym, val) \
15 asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long))) 15 asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
16
17#define OFFSET(sym, str, mem) \
18 DEFINE(sym, offsetof(struct str, mem));
19 16
20void foo(void) 17void foo(void)
21{ 18{
22 OFFSET(HOST_SC_CR2, sigcontext, cr2); 19#ifdef __i386__
23 OFFSET(HOST_SC_ERR, sigcontext, err); 20 DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
24 OFFSET(HOST_SC_TRAPNO, sigcontext, trapno); 21 DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct));
25 22
23 DEFINE(HOST_IP, EIP);
24 DEFINE(HOST_SP, UESP);
25 DEFINE(HOST_EFLAGS, EFL);
26 DEFINE(HOST_AX, EAX);
27 DEFINE(HOST_BX, EBX);
28 DEFINE(HOST_CX, ECX);
29 DEFINE(HOST_DX, EDX);
30 DEFINE(HOST_SI, ESI);
31 DEFINE(HOST_DI, EDI);
32 DEFINE(HOST_BP, EBP);
33 DEFINE(HOST_CS, CS);
34 DEFINE(HOST_SS, SS);
35 DEFINE(HOST_DS, DS);
36 DEFINE(HOST_FS, FS);
37 DEFINE(HOST_ES, ES);
38 DEFINE(HOST_GS, GS);
39 DEFINE(HOST_ORIG_AX, ORIG_EAX);
40#else
26 DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); 41 DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
27 DEFINE_LONGS(HOST_RBX, RBX); 42 DEFINE_LONGS(HOST_BX, RBX);
28 DEFINE_LONGS(HOST_RCX, RCX); 43 DEFINE_LONGS(HOST_CX, RCX);
29 DEFINE_LONGS(HOST_RDI, RDI); 44 DEFINE_LONGS(HOST_DI, RDI);
30 DEFINE_LONGS(HOST_RSI, RSI); 45 DEFINE_LONGS(HOST_SI, RSI);
31 DEFINE_LONGS(HOST_RDX, RDX); 46 DEFINE_LONGS(HOST_DX, RDX);
32 DEFINE_LONGS(HOST_RBP, RBP); 47 DEFINE_LONGS(HOST_BP, RBP);
33 DEFINE_LONGS(HOST_RAX, RAX); 48 DEFINE_LONGS(HOST_AX, RAX);
34 DEFINE_LONGS(HOST_R8, R8); 49 DEFINE_LONGS(HOST_R8, R8);
35 DEFINE_LONGS(HOST_R9, R9); 50 DEFINE_LONGS(HOST_R9, R9);
36 DEFINE_LONGS(HOST_R10, R10); 51 DEFINE_LONGS(HOST_R10, R10);
@@ -39,7 +54,7 @@ void foo(void)
39 DEFINE_LONGS(HOST_R13, R13); 54 DEFINE_LONGS(HOST_R13, R13);
40 DEFINE_LONGS(HOST_R14, R14); 55 DEFINE_LONGS(HOST_R14, R14);
41 DEFINE_LONGS(HOST_R15, R15); 56 DEFINE_LONGS(HOST_R15, R15);
42 DEFINE_LONGS(HOST_ORIG_RAX, ORIG_RAX); 57 DEFINE_LONGS(HOST_ORIG_AX, ORIG_RAX);
43 DEFINE_LONGS(HOST_CS, CS); 58 DEFINE_LONGS(HOST_CS, CS);
44 DEFINE_LONGS(HOST_SS, SS); 59 DEFINE_LONGS(HOST_SS, SS);
45 DEFINE_LONGS(HOST_EFLAGS, EFLAGS); 60 DEFINE_LONGS(HOST_EFLAGS, EFLAGS);
@@ -52,9 +67,9 @@ void foo(void)
52 67
53 DEFINE_LONGS(HOST_IP, RIP); 68 DEFINE_LONGS(HOST_IP, RIP);
54 DEFINE_LONGS(HOST_SP, RSP); 69 DEFINE_LONGS(HOST_SP, RSP);
55 DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct)); 70#endif
56 71
57 /* XXX Duplicated between i386 and x86_64 */ 72 DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
58 DEFINE(UM_POLLIN, POLLIN); 73 DEFINE(UM_POLLIN, POLLIN);
59 DEFINE(UM_POLLPRI, POLLPRI); 74 DEFINE(UM_POLLPRI, POLLPRI);
60 DEFINE(UM_POLLOUT, POLLOUT); 75 DEFINE(UM_POLLOUT, POLLOUT);
diff --git a/arch/um/sys-x86_64/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 5dffe6d46686..6c803ca49b5d 100644
--- a/arch/um/sys-x86_64/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -46,8 +46,8 @@ $(vobjs): KBUILD_CFLAGS += $(CFL)
46# 46#
47# vDSO code runs in userspace and -pg doesn't help with profiling anyway. 47# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
48# 48#
49CFLAGS_REMOVE_vdso-note.o = -pg 49CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage
50CFLAGS_REMOVE_um_vdso.o = -pg 50CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
51 51
52targets += vdso-syms.lds 52targets += vdso-syms.lds
53obj-$(VDSO64-y) += vdso-syms.lds 53obj-$(VDSO64-y) += vdso-syms.lds
diff --git a/arch/um/sys-x86_64/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh
index 7ee90a9b549d..7ee90a9b549d 100644
--- a/arch/um/sys-x86_64/vdso/checkundef.sh
+++ b/arch/x86/um/vdso/checkundef.sh
diff --git a/arch/um/sys-x86_64/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
index 7c441b59d375..7c441b59d375 100644
--- a/arch/um/sys-x86_64/vdso/um_vdso.c
+++ b/arch/x86/um/vdso/um_vdso.c
diff --git a/arch/um/sys-x86_64/vdso/vdso-layout.lds.S b/arch/x86/um/vdso/vdso-layout.lds.S
index 634a2cf62046..634a2cf62046 100644
--- a/arch/um/sys-x86_64/vdso/vdso-layout.lds.S
+++ b/arch/x86/um/vdso/vdso-layout.lds.S
diff --git a/arch/um/sys-x86_64/vdso/vdso-note.S b/arch/x86/um/vdso/vdso-note.S
index 79a071e4357e..79a071e4357e 100644
--- a/arch/um/sys-x86_64/vdso/vdso-note.S
+++ b/arch/x86/um/vdso/vdso-note.S
diff --git a/arch/um/sys-x86_64/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
index ec82c1686bd6..1cb468adacbb 100644
--- a/arch/um/sys-x86_64/vdso/vdso.S
+++ b/arch/x86/um/vdso/vdso.S
@@ -4,7 +4,7 @@ __INITDATA
4 4
5 .globl vdso_start, vdso_end 5 .globl vdso_start, vdso_end
6vdso_start: 6vdso_start:
7 .incbin "arch/um/sys-x86_64/vdso/vdso.so" 7 .incbin "arch/x86/um/vdso/vdso.so"
8vdso_end: 8vdso_end:
9 9
10__FINIT 10__FINIT
diff --git a/arch/um/sys-x86_64/vdso/vdso.lds.S b/arch/x86/um/vdso/vdso.lds.S
index b96b2677cad8..b96b2677cad8 100644
--- a/arch/um/sys-x86_64/vdso/vdso.lds.S
+++ b/arch/x86/um/vdso/vdso.lds.S
diff --git a/arch/um/sys-x86_64/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 9495c8d0ce37..91f4ec9a0a56 100644
--- a/arch/um/sys-x86_64/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -28,7 +28,7 @@ static int __init init_vdso(void)
28 28
29 um_vdso_addr = task_size - PAGE_SIZE; 29 um_vdso_addr = task_size - PAGE_SIZE;
30 30
31 vdsop = kmalloc(GFP_KERNEL, sizeof(struct page *)); 31 vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL);
32 if (!vdsop) 32 if (!vdsop)
33 goto oom; 33 goto oom;
34 34
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 079c08808d8a..e7a5750a93d9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -8,10 +8,13 @@
8#include <linux/scatterlist.h> 8#include <linux/scatterlist.h>
9#include <linux/string_helpers.h> 9#include <linux/string_helpers.h>
10#include <scsi/scsi_cmnd.h> 10#include <scsi/scsi_cmnd.h>
11#include <linux/idr.h>
11 12
12#define PART_BITS 4 13#define PART_BITS 4
13 14
14static int major, index; 15static int major;
16static DEFINE_IDA(vd_index_ida);
17
15struct workqueue_struct *virtblk_wq; 18struct workqueue_struct *virtblk_wq;
16 19
17struct virtio_blk 20struct virtio_blk
@@ -35,6 +38,9 @@ struct virtio_blk
35 /* What host tells us, plus 2 for header & tailer. */ 38 /* What host tells us, plus 2 for header & tailer. */
36 unsigned int sg_elems; 39 unsigned int sg_elems;
37 40
41 /* Ida index - used to track minor number allocations. */
42 int index;
43
38 /* Scatterlist: can be too big for stack. */ 44 /* Scatterlist: can be too big for stack. */
39 struct scatterlist sg[/*sg_elems*/]; 45 struct scatterlist sg[/*sg_elems*/];
40}; 46};
@@ -276,6 +282,11 @@ static int index_to_minor(int index)
276 return index << PART_BITS; 282 return index << PART_BITS;
277} 283}
278 284
285static int minor_to_index(int minor)
286{
287 return minor >> PART_BITS;
288}
289
279static ssize_t virtblk_serial_show(struct device *dev, 290static ssize_t virtblk_serial_show(struct device *dev,
280 struct device_attribute *attr, char *buf) 291 struct device_attribute *attr, char *buf)
281{ 292{
@@ -341,14 +352,17 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
341{ 352{
342 struct virtio_blk *vblk; 353 struct virtio_blk *vblk;
343 struct request_queue *q; 354 struct request_queue *q;
344 int err; 355 int err, index;
345 u64 cap; 356 u64 cap;
346 u32 v, blk_size, sg_elems, opt_io_size; 357 u32 v, blk_size, sg_elems, opt_io_size;
347 u16 min_io_size; 358 u16 min_io_size;
348 u8 physical_block_exp, alignment_offset; 359 u8 physical_block_exp, alignment_offset;
349 360
350 if (index_to_minor(index) >= 1 << MINORBITS) 361 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
351 return -ENOSPC; 362 GFP_KERNEL);
363 if (err < 0)
364 goto out;
365 index = err;
352 366
353 /* We need to know how many segments before we allocate. */ 367 /* We need to know how many segments before we allocate. */
354 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, 368 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
@@ -365,7 +379,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
365 sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL); 379 sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
366 if (!vblk) { 380 if (!vblk) {
367 err = -ENOMEM; 381 err = -ENOMEM;
368 goto out; 382 goto out_free_index;
369 } 383 }
370 384
371 INIT_LIST_HEAD(&vblk->reqs); 385 INIT_LIST_HEAD(&vblk->reqs);
@@ -421,7 +435,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
421 vblk->disk->private_data = vblk; 435 vblk->disk->private_data = vblk;
422 vblk->disk->fops = &virtblk_fops; 436 vblk->disk->fops = &virtblk_fops;
423 vblk->disk->driverfs_dev = &vdev->dev; 437 vblk->disk->driverfs_dev = &vdev->dev;
424 index++; 438 vblk->index = index;
425 439
426 /* configure queue flush support */ 440 /* configure queue flush support */
427 if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) 441 if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
@@ -516,6 +530,8 @@ out_free_vq:
516 vdev->config->del_vqs(vdev); 530 vdev->config->del_vqs(vdev);
517out_free_vblk: 531out_free_vblk:
518 kfree(vblk); 532 kfree(vblk);
533out_free_index:
534 ida_simple_remove(&vd_index_ida, index);
519out: 535out:
520 return err; 536 return err;
521} 537}
@@ -523,6 +539,7 @@ out:
523static void __devexit virtblk_remove(struct virtio_device *vdev) 539static void __devexit virtblk_remove(struct virtio_device *vdev)
524{ 540{
525 struct virtio_blk *vblk = vdev->priv; 541 struct virtio_blk *vblk = vdev->priv;
542 int index = vblk->index;
526 543
527 flush_work(&vblk->config_work); 544 flush_work(&vblk->config_work);
528 545
@@ -538,6 +555,7 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
538 mempool_destroy(vblk->pool); 555 mempool_destroy(vblk->pool);
539 vdev->config->del_vqs(vdev); 556 vdev->config->del_vqs(vdev);
540 kfree(vblk); 557 kfree(vblk);
558 ida_simple_remove(&vd_index_ida, index);
541} 559}
542 560
543static const struct virtio_device_id id_table[] = { 561static const struct virtio_device_id id_table[] = {
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 423fd56bf612..43643033a3ae 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -298,7 +298,7 @@ if RTC_LIB=n
298config RTC 298config RTC
299 tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)" 299 tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)"
300 depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV \ 300 depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV \
301 && !ARM && !SUPERH && !S390 && !AVR32 && !BLACKFIN 301 && !ARM && !SUPERH && !S390 && !AVR32 && !BLACKFIN && !UML
302 ---help--- 302 ---help---
303 If you say Y here and create a character special file /dev/rtc with 303 If you say Y here and create a character special file /dev/rtc with
304 major number 10 and minor number 135 using mknod ("man mknod"), you 304 major number 10 and minor number 135 using mknod ("man mknod"), you
@@ -346,7 +346,7 @@ config JS_RTC
346 346
347config GEN_RTC 347config GEN_RTC
348 tristate "Generic /dev/rtc emulation" 348 tristate "Generic /dev/rtc emulation"
349 depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN 349 depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN && !UML
350 ---help--- 350 ---help---
351 If you say Y here and create a character special file /dev/rtc with 351 If you say Y here and create a character special file /dev/rtc with
352 major number 10 and minor number 135 using mknod ("man mknod"), you 352 major number 10 and minor number 135 using mknod ("man mknod"), you
@@ -490,7 +490,7 @@ config SCx200_GPIO
490 490
491config PC8736x_GPIO 491config PC8736x_GPIO
492 tristate "NatSemi PC8736x GPIO Support" 492 tristate "NatSemi PC8736x GPIO Support"
493 depends on X86_32 493 depends on X86_32 && !UML
494 default SCx200_GPIO # mostly N 494 default SCx200_GPIO # mostly N
495 select NSC_GPIO # needed for support routines 495 select NSC_GPIO # needed for support routines
496 help 496 help
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index e0135873ba9d..0689bf6b0183 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -235,3 +235,18 @@ config HW_RANDOM_PPC4XX
235 module will be called ppc4xx-rng. 235 module will be called ppc4xx-rng.
236 236
237 If unsure, say N. 237 If unsure, say N.
238
239config UML_RANDOM
240 depends on UML
241 tristate "Hardware random number generator"
242 help
243 This option enables UML's "hardware" random number generator. It
244 attaches itself to the host's /dev/random, supplying as much entropy
245 as the host has, rather than the small amount the UML gets from its
246 own drivers. It registers itself as a standard hardware random number
247 generator, major 10, minor 183, and the canonical device name is
248 /dev/hwrng.
249 The way to make use of this is to install the rng-tools package
250 (check your distro, or download from
251 http://sourceforge.net/projects/gkernel/). rngd periodically reads
252 /dev/hwrng and injects the entropy into /dev/random.
diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c
index a1f68af4ccf4..f22861511909 100644
--- a/drivers/char/ttyprintk.c
+++ b/drivers/char/ttyprintk.c
@@ -170,7 +170,7 @@ static const struct tty_operations ttyprintk_ops = {
170 .ioctl = tpk_ioctl, 170 .ioctl = tpk_ioctl,
171}; 171};
172 172
173struct tty_port_operations null_ops = { }; 173static struct tty_port_operations null_ops = { };
174 174
175static struct tty_driver *ttyprintk_driver; 175static struct tty_driver *ttyprintk_driver;
176 176
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index fb68b1295373..4ca181f1378b 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -19,8 +19,10 @@
19 */ 19 */
20#include <linux/cdev.h> 20#include <linux/cdev.h>
21#include <linux/debugfs.h> 21#include <linux/debugfs.h>
22#include <linux/completion.h>
22#include <linux/device.h> 23#include <linux/device.h>
23#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/freezer.h>
24#include <linux/fs.h> 26#include <linux/fs.h>
25#include <linux/init.h> 27#include <linux/init.h>
26#include <linux/list.h> 28#include <linux/list.h>
@@ -73,6 +75,7 @@ struct ports_driver_data {
73static struct ports_driver_data pdrvdata; 75static struct ports_driver_data pdrvdata;
74 76
75DEFINE_SPINLOCK(pdrvdata_lock); 77DEFINE_SPINLOCK(pdrvdata_lock);
78DECLARE_COMPLETION(early_console_added);
76 79
77/* This struct holds information that's relevant only for console ports */ 80/* This struct holds information that's relevant only for console ports */
78struct console { 81struct console {
@@ -151,6 +154,10 @@ struct ports_device {
151 int chr_major; 154 int chr_major;
152}; 155};
153 156
157struct port_stats {
158 unsigned long bytes_sent, bytes_received, bytes_discarded;
159};
160
154/* This struct holds the per-port data */ 161/* This struct holds the per-port data */
155struct port { 162struct port {
156 /* Next port in the list, head is in the ports_device */ 163 /* Next port in the list, head is in the ports_device */
@@ -179,6 +186,13 @@ struct port {
179 struct dentry *debugfs_file; 186 struct dentry *debugfs_file;
180 187
181 /* 188 /*
189 * Keep count of the bytes sent, received and discarded for
190 * this port for accounting and debugging purposes. These
191 * counts are not reset across port open / close events.
192 */
193 struct port_stats stats;
194
195 /*
182 * The entries in this struct will be valid if this port is 196 * The entries in this struct will be valid if this port is
183 * hooked up to an hvc console 197 * hooked up to an hvc console
184 */ 198 */
@@ -347,17 +361,19 @@ fail:
347} 361}
348 362
349/* Callers should take appropriate locks */ 363/* Callers should take appropriate locks */
350static void *get_inbuf(struct port *port) 364static struct port_buffer *get_inbuf(struct port *port)
351{ 365{
352 struct port_buffer *buf; 366 struct port_buffer *buf;
353 struct virtqueue *vq;
354 unsigned int len; 367 unsigned int len;
355 368
356 vq = port->in_vq; 369 if (port->inbuf)
357 buf = virtqueue_get_buf(vq, &len); 370 return port->inbuf;
371
372 buf = virtqueue_get_buf(port->in_vq, &len);
358 if (buf) { 373 if (buf) {
359 buf->len = len; 374 buf->len = len;
360 buf->offset = 0; 375 buf->offset = 0;
376 port->stats.bytes_received += len;
361 } 377 }
362 return buf; 378 return buf;
363} 379}
@@ -384,32 +400,27 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf)
384static void discard_port_data(struct port *port) 400static void discard_port_data(struct port *port)
385{ 401{
386 struct port_buffer *buf; 402 struct port_buffer *buf;
387 struct virtqueue *vq; 403 unsigned int err;
388 unsigned int len;
389 int ret;
390 404
391 if (!port->portdev) { 405 if (!port->portdev) {
392 /* Device has been unplugged. vqs are already gone. */ 406 /* Device has been unplugged. vqs are already gone. */
393 return; 407 return;
394 } 408 }
395 vq = port->in_vq; 409 buf = get_inbuf(port);
396 if (port->inbuf)
397 buf = port->inbuf;
398 else
399 buf = virtqueue_get_buf(vq, &len);
400 410
401 ret = 0; 411 err = 0;
402 while (buf) { 412 while (buf) {
403 if (add_inbuf(vq, buf) < 0) { 413 port->stats.bytes_discarded += buf->len - buf->offset;
404 ret++; 414 if (add_inbuf(port->in_vq, buf) < 0) {
415 err++;
405 free_buf(buf); 416 free_buf(buf);
406 } 417 }
407 buf = virtqueue_get_buf(vq, &len); 418 port->inbuf = NULL;
419 buf = get_inbuf(port);
408 } 420 }
409 port->inbuf = NULL; 421 if (err)
410 if (ret)
411 dev_warn(port->dev, "Errors adding %d buffers back to vq\n", 422 dev_warn(port->dev, "Errors adding %d buffers back to vq\n",
412 ret); 423 err);
413} 424}
414 425
415static bool port_has_data(struct port *port) 426static bool port_has_data(struct port *port)
@@ -417,18 +428,12 @@ static bool port_has_data(struct port *port)
417 unsigned long flags; 428 unsigned long flags;
418 bool ret; 429 bool ret;
419 430
431 ret = false;
420 spin_lock_irqsave(&port->inbuf_lock, flags); 432 spin_lock_irqsave(&port->inbuf_lock, flags);
421 if (port->inbuf) {
422 ret = true;
423 goto out;
424 }
425 port->inbuf = get_inbuf(port); 433 port->inbuf = get_inbuf(port);
426 if (port->inbuf) { 434 if (port->inbuf)
427 ret = true; 435 ret = true;
428 goto out; 436
429 }
430 ret = false;
431out:
432 spin_unlock_irqrestore(&port->inbuf_lock, flags); 437 spin_unlock_irqrestore(&port->inbuf_lock, flags);
433 return ret; 438 return ret;
434} 439}
@@ -529,6 +534,8 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
529 cpu_relax(); 534 cpu_relax();
530done: 535done:
531 spin_unlock_irqrestore(&port->outvq_lock, flags); 536 spin_unlock_irqrestore(&port->outvq_lock, flags);
537
538 port->stats.bytes_sent += in_count;
532 /* 539 /*
533 * We're expected to return the amount of data we wrote -- all 540 * We're expected to return the amount of data we wrote -- all
534 * of it 541 * of it
@@ -633,8 +640,8 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
633 if (filp->f_flags & O_NONBLOCK) 640 if (filp->f_flags & O_NONBLOCK)
634 return -EAGAIN; 641 return -EAGAIN;
635 642
636 ret = wait_event_interruptible(port->waitqueue, 643 ret = wait_event_freezable(port->waitqueue,
637 !will_read_block(port)); 644 !will_read_block(port));
638 if (ret < 0) 645 if (ret < 0)
639 return ret; 646 return ret;
640 } 647 }
@@ -677,8 +684,8 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
677 if (nonblock) 684 if (nonblock)
678 return -EAGAIN; 685 return -EAGAIN;
679 686
680 ret = wait_event_interruptible(port->waitqueue, 687 ret = wait_event_freezable(port->waitqueue,
681 !will_write_block(port)); 688 !will_write_block(port));
682 if (ret < 0) 689 if (ret < 0)
683 return ret; 690 return ret;
684 } 691 }
@@ -1059,6 +1066,14 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf,
1059 out_offset += snprintf(buf + out_offset, out_count - out_offset, 1066 out_offset += snprintf(buf + out_offset, out_count - out_offset,
1060 "outvq_full: %d\n", port->outvq_full); 1067 "outvq_full: %d\n", port->outvq_full);
1061 out_offset += snprintf(buf + out_offset, out_count - out_offset, 1068 out_offset += snprintf(buf + out_offset, out_count - out_offset,
1069 "bytes_sent: %lu\n", port->stats.bytes_sent);
1070 out_offset += snprintf(buf + out_offset, out_count - out_offset,
1071 "bytes_received: %lu\n",
1072 port->stats.bytes_received);
1073 out_offset += snprintf(buf + out_offset, out_count - out_offset,
1074 "bytes_discarded: %lu\n",
1075 port->stats.bytes_discarded);
1076 out_offset += snprintf(buf + out_offset, out_count - out_offset,
1062 "is_console: %s\n", 1077 "is_console: %s\n",
1063 is_console_port(port) ? "yes" : "no"); 1078 is_console_port(port) ? "yes" : "no");
1064 out_offset += snprintf(buf + out_offset, out_count - out_offset, 1079 out_offset += snprintf(buf + out_offset, out_count - out_offset,
@@ -1143,6 +1158,7 @@ static int add_port(struct ports_device *portdev, u32 id)
1143 port->cons.ws.ws_row = port->cons.ws.ws_col = 0; 1158 port->cons.ws.ws_row = port->cons.ws.ws_col = 0;
1144 1159
1145 port->host_connected = port->guest_connected = false; 1160 port->host_connected = port->guest_connected = false;
1161 port->stats = (struct port_stats) { 0 };
1146 1162
1147 port->outvq_full = false; 1163 port->outvq_full = false;
1148 1164
@@ -1352,6 +1368,7 @@ static void handle_control_message(struct ports_device *portdev,
1352 break; 1368 break;
1353 1369
1354 init_port_console(port); 1370 init_port_console(port);
1371 complete(&early_console_added);
1355 /* 1372 /*
1356 * Could remove the port here in case init fails - but 1373 * Could remove the port here in case init fails - but
1357 * have to notify the host first. 1374 * have to notify the host first.
@@ -1394,6 +1411,13 @@ static void handle_control_message(struct ports_device *portdev,
1394 break; 1411 break;
1395 case VIRTIO_CONSOLE_PORT_NAME: 1412 case VIRTIO_CONSOLE_PORT_NAME:
1396 /* 1413 /*
1414 * If we woke up after hibernation, we can get this
1415 * again. Skip it in that case.
1416 */
1417 if (port->name)
1418 break;
1419
1420 /*
1397 * Skip the size of the header and the cpkt to get the size 1421 * Skip the size of the header and the cpkt to get the size
1398 * of the name that was sent 1422 * of the name that was sent
1399 */ 1423 */
@@ -1481,8 +1505,7 @@ static void in_intr(struct virtqueue *vq)
1481 return; 1505 return;
1482 1506
1483 spin_lock_irqsave(&port->inbuf_lock, flags); 1507 spin_lock_irqsave(&port->inbuf_lock, flags);
1484 if (!port->inbuf) 1508 port->inbuf = get_inbuf(port);
1485 port->inbuf = get_inbuf(port);
1486 1509
1487 /* 1510 /*
1488 * Don't queue up data when port is closed. This condition 1511 * Don't queue up data when port is closed. This condition
@@ -1563,7 +1586,7 @@ static int init_vqs(struct ports_device *portdev)
1563 portdev->out_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *), 1586 portdev->out_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *),
1564 GFP_KERNEL); 1587 GFP_KERNEL);
1565 if (!vqs || !io_callbacks || !io_names || !portdev->in_vqs || 1588 if (!vqs || !io_callbacks || !io_names || !portdev->in_vqs ||
1566 !portdev->out_vqs) { 1589 !portdev->out_vqs) {
1567 err = -ENOMEM; 1590 err = -ENOMEM;
1568 goto free; 1591 goto free;
1569 } 1592 }
@@ -1648,6 +1671,10 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
1648 struct ports_device *portdev; 1671 struct ports_device *portdev;
1649 int err; 1672 int err;
1650 bool multiport; 1673 bool multiport;
1674 bool early = early_put_chars != NULL;
1675
1676 /* Ensure to read early_put_chars now */
1677 barrier();
1651 1678
1652 portdev = kmalloc(sizeof(*portdev), GFP_KERNEL); 1679 portdev = kmalloc(sizeof(*portdev), GFP_KERNEL);
1653 if (!portdev) { 1680 if (!portdev) {
@@ -1675,13 +1702,11 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
1675 1702
1676 multiport = false; 1703 multiport = false;
1677 portdev->config.max_nr_ports = 1; 1704 portdev->config.max_nr_ports = 1;
1678 if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) { 1705 if (virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT,
1706 offsetof(struct virtio_console_config,
1707 max_nr_ports),
1708 &portdev->config.max_nr_ports) == 0)
1679 multiport = true; 1709 multiport = true;
1680 vdev->config->get(vdev, offsetof(struct virtio_console_config,
1681 max_nr_ports),
1682 &portdev->config.max_nr_ports,
1683 sizeof(portdev->config.max_nr_ports));
1684 }
1685 1710
1686 err = init_vqs(portdev); 1711 err = init_vqs(portdev);
1687 if (err < 0) { 1712 if (err < 0) {
@@ -1719,6 +1744,19 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
1719 1744
1720 __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, 1745 __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
1721 VIRTIO_CONSOLE_DEVICE_READY, 1); 1746 VIRTIO_CONSOLE_DEVICE_READY, 1);
1747
1748 /*
1749 * If there was an early virtio console, assume that there are no
1750 * other consoles. We need to wait until the hvc_alloc matches the
1751 * hvc_instantiate, otherwise tty_open will complain, resulting in
1752 * a "Warning: unable to open an initial console" boot failure.
1753 * Without multiport this is done in add_port above. With multiport
1754 * this might take some host<->guest communication - thus we have to
1755 * wait.
1756 */
1757 if (multiport && early)
1758 wait_for_completion(&early_console_added);
1759
1722 return 0; 1760 return 0;
1723 1761
1724free_vqs: 1762free_vqs:
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index 23e82e46656d..001b147c7f95 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4 4
5menu "Input device support" 5menu "Input device support"
6 depends on !S390 6 depends on !S390 && !UML
7 7
8config INPUT 8config INPUT
9 tristate "Generic input layer (needed for keyboard, mouse, ...)" if EXPERT 9 tristate "Generic input layer (needed for keyboard, mouse, ...)" if EXPERT
diff --git a/drivers/isdn/Kconfig b/drivers/isdn/Kconfig
index 4fb601670de3..a233ed53913a 100644
--- a/drivers/isdn/Kconfig
+++ b/drivers/isdn/Kconfig
@@ -5,7 +5,7 @@
5menuconfig ISDN 5menuconfig ISDN
6 bool "ISDN support" 6 bool "ISDN support"
7 depends on NET 7 depends on NET
8 depends on !S390 8 depends on !S390 && !UML
9 ---help--- 9 ---help---
10 ISDN ("Integrated Services Digital Network", called RNIS in France) 10 ISDN ("Integrated Services Digital Network", called RNIS in France)
11 is a fully digital telephone service that can be used for voice and 11 is a fully digital telephone service that can be used for voice and
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index f1af2228a1b1..61086ea3cc6b 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1144,7 +1144,7 @@ static void mtdchar_notify_remove(struct mtd_info *mtd)
1144 1144
1145 if (mtd_ino) { 1145 if (mtd_ino) {
1146 /* Destroy the inode if it exists */ 1146 /* Destroy the inode if it exists */
1147 mtd_ino->i_nlink = 0; 1147 clear_nlink(mtd_ino);
1148 iput(mtd_ino); 1148 iput(mtd_ino);
1149 } 1149 }
1150} 1150}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 91039ab16728..6ee8410443c4 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -925,12 +925,10 @@ static void virtnet_update_status(struct virtnet_info *vi)
925{ 925{
926 u16 v; 926 u16 v;
927 927
928 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) 928 if (virtio_config_val(vi->vdev, VIRTIO_NET_F_STATUS,
929 return;
930
931 vi->vdev->config->get(vi->vdev,
932 offsetof(struct virtio_net_config, status), 929 offsetof(struct virtio_net_config, status),
933 &v, sizeof(v)); 930 &v) < 0)
931 return;
934 932
935 /* Ignore unknown (future) status bits */ 933 /* Ignore unknown (future) status bits */
936 v &= VIRTIO_NET_S_LINK_UP; 934 v &= VIRTIO_NET_S_LINK_UP;
@@ -1006,11 +1004,9 @@ static int virtnet_probe(struct virtio_device *vdev)
1006 } 1004 }
1007 1005
1008 /* Configuration may specify what MAC to use. Otherwise random. */ 1006 /* Configuration may specify what MAC to use. Otherwise random. */
1009 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 1007 if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC,
1010 vdev->config->get(vdev,
1011 offsetof(struct virtio_net_config, mac), 1008 offsetof(struct virtio_net_config, mac),
1012 dev->dev_addr, dev->addr_len); 1009 dev->dev_addr, dev->addr_len) < 0)
1013 } else
1014 random_ether_addr(dev->dev_addr); 1010 random_ether_addr(dev->dev_addr);
1015 1011
1016 /* Set up our device-specific information */ 1012 /* Set up our device-specific information */
diff --git a/drivers/net/wireless/ath/Kconfig b/drivers/net/wireless/ath/Kconfig
index 073548836413..09602241901b 100644
--- a/drivers/net/wireless/ath/Kconfig
+++ b/drivers/net/wireless/ath/Kconfig
@@ -1,6 +1,6 @@
1menuconfig ATH_COMMON 1menuconfig ATH_COMMON
2 tristate "Atheros Wireless Cards" 2 tristate "Atheros Wireless Cards"
3 depends on CFG80211 3 depends on CFG80211 && (!UML || BROKEN)
4 ---help--- 4 ---help---
5 This will enable the support for the Atheros wireless drivers. 5 This will enable the support for the Atheros wireless drivers.
6 ath5k, ath9k, ath9k_htc and ar9170 drivers share some common code, this option 6 ath5k, ath9k, ath9k_htc and ar9170 drivers share some common code, this option
diff --git a/drivers/net/wireless/rtlwifi/Kconfig b/drivers/net/wireless/rtlwifi/Kconfig
index 45e14760c16e..d6c42e69bdbd 100644
--- a/drivers/net/wireless/rtlwifi/Kconfig
+++ b/drivers/net/wireless/rtlwifi/Kconfig
@@ -12,7 +12,7 @@ config RTL8192CE
12 12
13config RTL8192SE 13config RTL8192SE
14 tristate "Realtek RTL8192SE/RTL8191SE PCIe Wireless Network Adapter" 14 tristate "Realtek RTL8192SE/RTL8191SE PCIe Wireless Network Adapter"
15 depends on MAC80211 && EXPERIMENTAL 15 depends on MAC80211 && EXPERIMENTAL && PCI
16 select FW_LOADER 16 select FW_LOADER
17 select RTLWIFI 17 select RTLWIFI
18 ---help--- 18 ---help---
@@ -23,7 +23,7 @@ config RTL8192SE
23 23
24config RTL8192DE 24config RTL8192DE
25 tristate "Realtek RTL8192DE/RTL8188DE PCIe Wireless Network Adapter" 25 tristate "Realtek RTL8192DE/RTL8188DE PCIe Wireless Network Adapter"
26 depends on MAC80211 && EXPERIMENTAL 26 depends on MAC80211 && EXPERIMENTAL && PCI
27 select FW_LOADER 27 select FW_LOADER
28 select RTLWIFI 28 select RTLWIFI
29 ---help--- 29 ---help---
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 57de051a74b3..9f88641e67f9 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -70,6 +70,7 @@ config BATTERY_DS2760
70 70
71config BATTERY_DS2780 71config BATTERY_DS2780
72 tristate "DS2780 battery driver" 72 tristate "DS2780 battery driver"
73 depends on HAS_IOMEM
73 select W1 74 select W1
74 select W1_SLAVE_DS2780 75 select W1_SLAVE_DS2780
75 help 76 help
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 5a538fc1cc85..53eb4e55b289 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -8,7 +8,7 @@ config RTC_LIB
8menuconfig RTC_CLASS 8menuconfig RTC_CLASS
9 bool "Real Time Clock" 9 bool "Real Time Clock"
10 default n 10 default n
11 depends on !S390 11 depends on !S390 && !UML
12 select RTC_LIB 12 select RTC_LIB
13 help 13 help
14 Generic RTC class support. If you say yes here, you will 14 Generic RTC class support. If you say yes here, you will
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index f3c6060c96b8..7a1955583b7d 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -1197,7 +1197,7 @@ const struct inode_operations pohmelfs_file_inode_operations = {
1197void pohmelfs_fill_inode(struct inode *inode, struct netfs_inode_info *info) 1197void pohmelfs_fill_inode(struct inode *inode, struct netfs_inode_info *info)
1198{ 1198{
1199 inode->i_mode = info->mode; 1199 inode->i_mode = info->mode;
1200 inode->i_nlink = info->nlink; 1200 set_nlink(inode, info->nlink);
1201 inode->i_uid = info->uid; 1201 inode->i_uid = info->uid;
1202 inode->i_gid = info->gid; 1202 inode->i_gid = info->gid;
1203 inode->i_blocks = info->blocks; 1203 inode->i_blocks = info->blocks;
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 8816f53e004d..b3d17416d86a 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -1,6 +1,6 @@
1config VT 1config VT
2 bool "Virtual terminal" if EXPERT 2 bool "Virtual terminal" if EXPERT
3 depends on !S390 3 depends on !S390 && !UML
4 select INPUT 4 select INPUT
5 default y 5 default y
6 ---help--- 6 ---help---
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 57e493b1bd20..816ed08e7cf3 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -35,4 +35,15 @@ config VIRTIO_BALLOON
35 35
36 If unsure, say M. 36 If unsure, say M.
37 37
38 config VIRTIO_MMIO
39 tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)"
40 depends on EXPERIMENTAL
41 select VIRTIO
42 select VIRTIO_RING
43 ---help---
44 This drivers provides support for memory mapped virtio
45 platform device driver.
46
47 If unsure, say N.
48
38endmenu 49endmenu
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 6738c446c199..5a4c63cfd380 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,4 +1,5 @@
1obj-$(CONFIG_VIRTIO) += virtio.o 1obj-$(CONFIG_VIRTIO) += virtio.o
2obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o 2obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
3obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
3obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o 4obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
4obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o 5obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
new file mode 100644
index 000000000000..acc5e43c373e
--- /dev/null
+++ b/drivers/virtio/virtio_mmio.c
@@ -0,0 +1,479 @@
1/*
2 * Virtio memory mapped device driver
3 *
4 * Copyright 2011, ARM Ltd.
5 *
6 * This module allows virtio devices to be used over a virtual, memory mapped
7 * platform device.
8 *
9 * Registers layout (all 32-bit wide):
10 *
11 * offset d. name description
12 * ------ -- ---------------- -----------------
13 *
14 * 0x000 R MagicValue Magic value "virt"
15 * 0x004 R Version Device version (current max. 1)
16 * 0x008 R DeviceID Virtio device ID
17 * 0x00c R VendorID Virtio vendor ID
18 *
19 * 0x010 R HostFeatures Features supported by the host
20 * 0x014 W HostFeaturesSel Set of host features to access via HostFeatures
21 *
22 * 0x020 W GuestFeatures Features activated by the guest
23 * 0x024 W GuestFeaturesSel Set of activated features to set via GuestFeatures
24 * 0x028 W GuestPageSize Size of guest's memory page in bytes
25 *
26 * 0x030 W QueueSel Queue selector
27 * 0x034 R QueueNumMax Maximum size of the currently selected queue
28 * 0x038 W QueueNum Queue size for the currently selected queue
29 * 0x03c W QueueAlign Used Ring alignment for the current queue
30 * 0x040 RW QueuePFN PFN for the currently selected queue
31 *
32 * 0x050 W QueueNotify Queue notifier
33 * 0x060 R InterruptStatus Interrupt status register
34 * 0x060 W InterruptACK Interrupt acknowledge register
35 * 0x070 RW Status Device status register
36 *
37 * 0x100+ RW Device-specific configuration space
38 *
39 * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007
40 *
41 * This work is licensed under the terms of the GNU GPL, version 2 or later.
42 * See the COPYING file in the top-level directory.
43 */
44
45#include <linux/highmem.h>
46#include <linux/interrupt.h>
47#include <linux/io.h>
48#include <linux/list.h>
49#include <linux/module.h>
50#include <linux/platform_device.h>
51#include <linux/slab.h>
52#include <linux/spinlock.h>
53#include <linux/virtio.h>
54#include <linux/virtio_config.h>
55#include <linux/virtio_mmio.h>
56#include <linux/virtio_ring.h>
57
58
59
60/* The alignment to use between consumer and producer parts of vring.
61 * Currently hardcoded to the page size. */
62#define VIRTIO_MMIO_VRING_ALIGN PAGE_SIZE
63
64
65
66#define to_virtio_mmio_device(_plat_dev) \
67 container_of(_plat_dev, struct virtio_mmio_device, vdev)
68
69struct virtio_mmio_device {
70 struct virtio_device vdev;
71 struct platform_device *pdev;
72
73 void __iomem *base;
74 unsigned long version;
75
76 /* a list of queues so we can dispatch IRQs */
77 spinlock_t lock;
78 struct list_head virtqueues;
79};
80
81struct virtio_mmio_vq_info {
82 /* the actual virtqueue */
83 struct virtqueue *vq;
84
85 /* the number of entries in the queue */
86 unsigned int num;
87
88 /* the index of the queue */
89 int queue_index;
90
91 /* the virtual address of the ring queue */
92 void *queue;
93
94 /* the list node for the virtqueues list */
95 struct list_head node;
96};
97
98
99
100/* Configuration interface */
101
102static u32 vm_get_features(struct virtio_device *vdev)
103{
104 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
105
106 /* TODO: Features > 32 bits */
107 writel(0, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL);
108
109 return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES);
110}
111
112static void vm_finalize_features(struct virtio_device *vdev)
113{
114 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
115 int i;
116
117 /* Give virtio_ring a chance to accept features. */
118 vring_transport_features(vdev);
119
120 for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
121 writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SET);
122 writel(vdev->features[i],
123 vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
124 }
125}
126
127static void vm_get(struct virtio_device *vdev, unsigned offset,
128 void *buf, unsigned len)
129{
130 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
131 u8 *ptr = buf;
132 int i;
133
134 for (i = 0; i < len; i++)
135 ptr[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
136}
137
138static void vm_set(struct virtio_device *vdev, unsigned offset,
139 const void *buf, unsigned len)
140{
141 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
142 const u8 *ptr = buf;
143 int i;
144
145 for (i = 0; i < len; i++)
146 writeb(ptr[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
147}
148
149static u8 vm_get_status(struct virtio_device *vdev)
150{
151 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
152
153 return readl(vm_dev->base + VIRTIO_MMIO_STATUS) & 0xff;
154}
155
156static void vm_set_status(struct virtio_device *vdev, u8 status)
157{
158 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
159
160 /* We should never be setting status to 0. */
161 BUG_ON(status == 0);
162
163 writel(status, vm_dev->base + VIRTIO_MMIO_STATUS);
164}
165
166static void vm_reset(struct virtio_device *vdev)
167{
168 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
169
170 /* 0 status means a reset. */
171 writel(0, vm_dev->base + VIRTIO_MMIO_STATUS);
172}
173
174
175
176/* Transport interface */
177
178/* the notify function used when creating a virt queue */
179static void vm_notify(struct virtqueue *vq)
180{
181 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
182 struct virtio_mmio_vq_info *info = vq->priv;
183
184 /* We write the queue's selector into the notification register to
185 * signal the other end */
186 writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
187}
188
189/* Notify all virtqueues on an interrupt. */
190static irqreturn_t vm_interrupt(int irq, void *opaque)
191{
192 struct virtio_mmio_device *vm_dev = opaque;
193 struct virtio_mmio_vq_info *info;
194 struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
195 struct virtio_driver, driver);
196 unsigned long status;
197 unsigned long flags;
198 irqreturn_t ret = IRQ_NONE;
199
200 /* Read and acknowledge interrupts */
201 status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
202 writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
203
204 if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
205 && vdrv && vdrv->config_changed) {
206 vdrv->config_changed(&vm_dev->vdev);
207 ret = IRQ_HANDLED;
208 }
209
210 if (likely(status & VIRTIO_MMIO_INT_VRING)) {
211 spin_lock_irqsave(&vm_dev->lock, flags);
212 list_for_each_entry(info, &vm_dev->virtqueues, node)
213 ret |= vring_interrupt(irq, info->vq);
214 spin_unlock_irqrestore(&vm_dev->lock, flags);
215 }
216
217 return ret;
218}
219
220
221
222static void vm_del_vq(struct virtqueue *vq)
223{
224 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
225 struct virtio_mmio_vq_info *info = vq->priv;
226 unsigned long flags, size;
227
228 spin_lock_irqsave(&vm_dev->lock, flags);
229 list_del(&info->node);
230 spin_unlock_irqrestore(&vm_dev->lock, flags);
231
232 vring_del_virtqueue(vq);
233
234 /* Select and deactivate the queue */
235 writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
236 writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
237
238 size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN));
239 free_pages_exact(info->queue, size);
240 kfree(info);
241}
242
243static void vm_del_vqs(struct virtio_device *vdev)
244{
245 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
246 struct virtqueue *vq, *n;
247
248 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
249 vm_del_vq(vq);
250
251 free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
252}
253
254
255
256static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
257 void (*callback)(struct virtqueue *vq),
258 const char *name)
259{
260 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
261 struct virtio_mmio_vq_info *info;
262 struct virtqueue *vq;
263 unsigned long flags, size;
264 int err;
265
266 /* Select the queue we're interested in */
267 writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
268
269 /* Queue shouldn't already be set up. */
270 if (readl(vm_dev->base + VIRTIO_MMIO_QUEUE_PFN)) {
271 err = -ENOENT;
272 goto error_available;
273 }
274
275 /* Allocate and fill out our active queue description */
276 info = kmalloc(sizeof(*info), GFP_KERNEL);
277 if (!info) {
278 err = -ENOMEM;
279 goto error_kmalloc;
280 }
281 info->queue_index = index;
282
283 /* Allocate pages for the queue - start with a queue as big as
284 * possible (limited by maximum size allowed by device), drop down
285 * to a minimal size, just big enough to fit descriptor table
286 * and two rings (which makes it "alignment_size * 2")
287 */
288 info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
289 while (1) {
290 size = PAGE_ALIGN(vring_size(info->num,
291 VIRTIO_MMIO_VRING_ALIGN));
292 /* Already smallest possible allocation? */
293 if (size <= VIRTIO_MMIO_VRING_ALIGN * 2) {
294 err = -ENOMEM;
295 goto error_alloc_pages;
296 }
297
298 info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
299 if (info->queue)
300 break;
301
302 info->num /= 2;
303 }
304
305 /* Activate the queue */
306 writel(info->num, vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
307 writel(VIRTIO_MMIO_VRING_ALIGN,
308 vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN);
309 writel(virt_to_phys(info->queue) >> PAGE_SHIFT,
310 vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
311
312 /* Create the vring */
313 vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN,
314 vdev, info->queue, vm_notify, callback, name);
315 if (!vq) {
316 err = -ENOMEM;
317 goto error_new_virtqueue;
318 }
319
320 vq->priv = info;
321 info->vq = vq;
322
323 spin_lock_irqsave(&vm_dev->lock, flags);
324 list_add(&info->node, &vm_dev->virtqueues);
325 spin_unlock_irqrestore(&vm_dev->lock, flags);
326
327 return vq;
328
329error_new_virtqueue:
330 writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
331 free_pages_exact(info->queue, size);
332error_alloc_pages:
333 kfree(info);
334error_kmalloc:
335error_available:
336 return ERR_PTR(err);
337}
338
339static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
340 struct virtqueue *vqs[],
341 vq_callback_t *callbacks[],
342 const char *names[])
343{
344 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
345 unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
346 int i, err;
347
348 err = request_irq(irq, vm_interrupt, IRQF_SHARED,
349 dev_name(&vdev->dev), vm_dev);
350 if (err)
351 return err;
352
353 for (i = 0; i < nvqs; ++i) {
354 vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
355 if (IS_ERR(vqs[i])) {
356 vm_del_vqs(vdev);
357 return PTR_ERR(vqs[i]);
358 }
359 }
360
361 return 0;
362}
363
364
365
366static struct virtio_config_ops virtio_mmio_config_ops = {
367 .get = vm_get,
368 .set = vm_set,
369 .get_status = vm_get_status,
370 .set_status = vm_set_status,
371 .reset = vm_reset,
372 .find_vqs = vm_find_vqs,
373 .del_vqs = vm_del_vqs,
374 .get_features = vm_get_features,
375 .finalize_features = vm_finalize_features,
376};
377
378
379
380/* Platform device */
381
382static int __devinit virtio_mmio_probe(struct platform_device *pdev)
383{
384 struct virtio_mmio_device *vm_dev;
385 struct resource *mem;
386 unsigned long magic;
387
388 mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
389 if (!mem)
390 return -EINVAL;
391
392 if (!devm_request_mem_region(&pdev->dev, mem->start,
393 resource_size(mem), pdev->name))
394 return -EBUSY;
395
396 vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
397 if (!vm_dev)
398 return -ENOMEM;
399
400 vm_dev->vdev.dev.parent = &pdev->dev;
401 vm_dev->vdev.config = &virtio_mmio_config_ops;
402 vm_dev->pdev = pdev;
403 INIT_LIST_HEAD(&vm_dev->virtqueues);
404 spin_lock_init(&vm_dev->lock);
405
406 vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
407 if (vm_dev->base == NULL)
408 return -EFAULT;
409
410 /* Check magic value */
411 magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE);
412 if (memcmp(&magic, "virt", 4) != 0) {
413 dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic);
414 return -ENODEV;
415 }
416
417 /* Check device version */
418 vm_dev->version = readl(vm_dev->base + VIRTIO_MMIO_VERSION);
419 if (vm_dev->version != 1) {
420 dev_err(&pdev->dev, "Version %ld not supported!\n",
421 vm_dev->version);
422 return -ENXIO;
423 }
424
425 vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID);
426 vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
427
428 writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE);
429
430 platform_set_drvdata(pdev, vm_dev);
431
432 return register_virtio_device(&vm_dev->vdev);
433}
434
435static int __devexit virtio_mmio_remove(struct platform_device *pdev)
436{
437 struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev);
438
439 unregister_virtio_device(&vm_dev->vdev);
440
441 return 0;
442}
443
444
445
446/* Platform driver */
447
448static struct of_device_id virtio_mmio_match[] = {
449 { .compatible = "virtio,mmio", },
450 {},
451};
452MODULE_DEVICE_TABLE(of, virtio_mmio_match);
453
454static struct platform_driver virtio_mmio_driver = {
455 .probe = virtio_mmio_probe,
456 .remove = __devexit_p(virtio_mmio_remove),
457 .driver = {
458 .name = "virtio-mmio",
459 .owner = THIS_MODULE,
460 .of_match_table = virtio_mmio_match,
461 },
462};
463
464static int __init virtio_mmio_init(void)
465{
466 return platform_driver_register(&virtio_mmio_driver);
467}
468
469static void __exit virtio_mmio_exit(void)
470{
471 platform_driver_unregister(&virtio_mmio_driver);
472}
473
474module_init(virtio_mmio_init);
475module_exit(virtio_mmio_exit);
476
477MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
478MODULE_DESCRIPTION("Platform bus driver for memory mapped virtio devices");
479MODULE_LICENSE("GPL");
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 4bcc8b82640b..79a31e5b4b68 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -415,9 +415,13 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
415 } 415 }
416 } 416 }
417 417
418 spin_lock_irqsave(&vp_dev->lock, flags); 418 if (callback) {
419 list_add(&info->node, &vp_dev->virtqueues); 419 spin_lock_irqsave(&vp_dev->lock, flags);
420 spin_unlock_irqrestore(&vp_dev->lock, flags); 420 list_add(&info->node, &vp_dev->virtqueues);
421 spin_unlock_irqrestore(&vp_dev->lock, flags);
422 } else {
423 INIT_LIST_HEAD(&info->node);
424 }
421 425
422 return vq; 426 return vq;
423 427
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 86b0735e6aa0..64c6752ea2c6 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -726,7 +726,7 @@ config SBC8360_WDT
726 726
727config SBC7240_WDT 727config SBC7240_WDT
728 tristate "SBC Nano 7240 Watchdog Timer" 728 tristate "SBC Nano 7240 Watchdog Timer"
729 depends on X86_32 729 depends on X86_32 && !UML
730 ---help--- 730 ---help---
731 This is the driver for the hardware watchdog found on the IEI 731 This is the driver for the hardware watchdog found on the IEI
732 single board computers EPIC Nano 7240 (and likely others). This 732 single board computers EPIC Nano 7240 (and likely others). This
@@ -1174,6 +1174,10 @@ config XEN_WDT
1174 by Xen 4.0 and newer. The watchdog timeout period is normally one 1174 by Xen 4.0 and newer. The watchdog timeout period is normally one
1175 minute but can be changed with a boot-time parameter. 1175 minute but can be changed with a boot-time parameter.
1176 1176
1177config UML_WATCHDOG
1178 tristate "UML watchdog"
1179 depends on UML
1180
1177# 1181#
1178# ISA-based Watchdog Cards 1182# ISA-based Watchdog Cards
1179# 1183#
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b5a1076aaa6c..879ed8851737 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1138,7 +1138,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1138 struct v9fs_session_info *v9ses = sb->s_fs_info; 1138 struct v9fs_session_info *v9ses = sb->s_fs_info;
1139 struct v9fs_inode *v9inode = V9FS_I(inode); 1139 struct v9fs_inode *v9inode = V9FS_I(inode);
1140 1140
1141 inode->i_nlink = 1; 1141 set_nlink(inode, 1);
1142 1142
1143 inode->i_atime.tv_sec = stat->atime; 1143 inode->i_atime.tv_sec = stat->atime;
1144 inode->i_mtime.tv_sec = stat->mtime; 1144 inode->i_mtime.tv_sec = stat->mtime;
@@ -1164,7 +1164,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1164 /* HARDLINKCOUNT %u */ 1164 /* HARDLINKCOUNT %u */
1165 sscanf(ext, "%13s %u", tag_name, &i_nlink); 1165 sscanf(ext, "%13s %u", tag_name, &i_nlink);
1166 if (!strncmp(tag_name, "HARDLINKCOUNT", 13)) 1166 if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
1167 inode->i_nlink = i_nlink; 1167 set_nlink(inode, i_nlink);
1168 } 1168 }
1169 } 1169 }
1170 mode = stat->mode & S_IALLUGO; 1170 mode = stat->mode & S_IALLUGO;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index aded79fcd5cf..0b5745e21946 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -606,7 +606,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
606 inode->i_ctime.tv_nsec = stat->st_ctime_nsec; 606 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
607 inode->i_uid = stat->st_uid; 607 inode->i_uid = stat->st_uid;
608 inode->i_gid = stat->st_gid; 608 inode->i_gid = stat->st_gid;
609 inode->i_nlink = stat->st_nlink; 609 set_nlink(inode, stat->st_nlink);
610 610
611 mode = stat->st_mode & S_IALLUGO; 611 mode = stat->st_mode & S_IALLUGO;
612 mode |= inode->i_mode & ~S_IALLUGO; 612 mode |= inode->i_mode & ~S_IALLUGO;
@@ -632,7 +632,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
632 if (stat->st_result_mask & P9_STATS_GID) 632 if (stat->st_result_mask & P9_STATS_GID)
633 inode->i_gid = stat->st_gid; 633 inode->i_gid = stat->st_gid;
634 if (stat->st_result_mask & P9_STATS_NLINK) 634 if (stat->st_result_mask & P9_STATS_NLINK)
635 inode->i_nlink = stat->st_nlink; 635 set_nlink(inode, stat->st_nlink);
636 if (stat->st_result_mask & P9_STATS_MODE) { 636 if (stat->st_result_mask & P9_STATS_MODE) {
637 inode->i_mode = stat->st_mode; 637 inode->i_mode = stat->st_mode;
638 if ((S_ISBLK(inode->i_mode)) || 638 if ((S_ISBLK(inode->i_mode)) ||
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index d5250c5aae21..1dab6a174d6a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -247,7 +247,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
247 inode->i_gid = ADFS_SB(sb)->s_gid; 247 inode->i_gid = ADFS_SB(sb)->s_gid;
248 inode->i_ino = obj->file_id; 248 inode->i_ino = obj->file_id;
249 inode->i_size = obj->size; 249 inode->i_size = obj->size;
250 inode->i_nlink = 2; 250 set_nlink(inode, 2);
251 inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >> 251 inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >>
252 sb->s_blocksize_bits; 252 sb->s_blocksize_bits;
253 253
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 3a4557e8325c..de37ec842340 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -215,7 +215,7 @@ affs_remove_link(struct dentry *dentry)
215 break; 215 break;
216 default: 216 default:
217 if (!AFFS_TAIL(sb, bh)->link_chain) 217 if (!AFFS_TAIL(sb, bh)->link_chain)
218 inode->i_nlink = 1; 218 set_nlink(inode, 1);
219 } 219 }
220 affs_free_block(sb, link_ino); 220 affs_free_block(sb, link_ino);
221 goto done; 221 goto done;
@@ -316,7 +316,7 @@ affs_remove_header(struct dentry *dentry)
316 if (inode->i_nlink > 1) 316 if (inode->i_nlink > 1)
317 retval = affs_remove_link(dentry); 317 retval = affs_remove_link(dentry);
318 else 318 else
319 inode->i_nlink = 0; 319 clear_nlink(inode);
320 affs_unlock_link(inode); 320 affs_unlock_link(inode);
321 inode->i_ctime = CURRENT_TIME_SEC; 321 inode->i_ctime = CURRENT_TIME_SEC;
322 mark_inode_dirty(inode); 322 mark_inode_dirty(inode);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 5d828903ac69..88a4b0b50058 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -54,7 +54,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
54 prot = be32_to_cpu(tail->protect); 54 prot = be32_to_cpu(tail->protect);
55 55
56 inode->i_size = 0; 56 inode->i_size = 0;
57 inode->i_nlink = 1; 57 set_nlink(inode, 1);
58 inode->i_mode = 0; 58 inode->i_mode = 0;
59 AFFS_I(inode)->i_extcnt = 1; 59 AFFS_I(inode)->i_extcnt = 1;
60 AFFS_I(inode)->i_ext_last = ~1; 60 AFFS_I(inode)->i_ext_last = ~1;
@@ -137,7 +137,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
137 sbi->s_hashsize + 1; 137 sbi->s_hashsize + 1;
138 } 138 }
139 if (tail->link_chain) 139 if (tail->link_chain)
140 inode->i_nlink = 2; 140 set_nlink(inode, 2);
141 inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 141 inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
142 inode->i_op = &affs_file_inode_operations; 142 inode->i_op = &affs_file_inode_operations;
143 inode->i_fop = &affs_file_operations; 143 inode->i_fop = &affs_file_operations;
@@ -304,7 +304,7 @@ affs_new_inode(struct inode *dir)
304 inode->i_uid = current_fsuid(); 304 inode->i_uid = current_fsuid();
305 inode->i_gid = current_fsgid(); 305 inode->i_gid = current_fsgid();
306 inode->i_ino = block; 306 inode->i_ino = block;
307 inode->i_nlink = 1; 307 set_nlink(inode, 1);
308 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 308 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
309 atomic_set(&AFFS_I(inode)->i_opencnt, 0); 309 atomic_set(&AFFS_I(inode)->i_opencnt, 0);
310 AFFS_I(inode)->i_blkcnt = 0; 310 AFFS_I(inode)->i_blkcnt = 0;
@@ -387,7 +387,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
387 AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block); 387 AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block);
388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); 388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
389 mark_buffer_dirty_inode(inode_bh, inode); 389 mark_buffer_dirty_inode(inode_bh, inode);
390 inode->i_nlink = 2; 390 set_nlink(inode, 2);
391 ihold(inode); 391 ihold(inode);
392 } 392 }
393 affs_fix_checksum(sb, bh); 393 affs_fix_checksum(sb, bh);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index e3e9efc1fdd8..780a11dc6318 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -277,7 +277,7 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
277 inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 277 inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
278 error = affs_add_entry(dir, inode, dentry, ST_FILE); 278 error = affs_add_entry(dir, inode, dentry, ST_FILE);
279 if (error) { 279 if (error) {
280 inode->i_nlink = 0; 280 clear_nlink(inode);
281 iput(inode); 281 iput(inode);
282 return error; 282 return error;
283 } 283 }
@@ -305,7 +305,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
305 305
306 error = affs_add_entry(dir, inode, dentry, ST_USERDIR); 306 error = affs_add_entry(dir, inode, dentry, ST_USERDIR);
307 if (error) { 307 if (error) {
308 inode->i_nlink = 0; 308 clear_nlink(inode);
309 mark_inode_dirty(inode); 309 mark_inode_dirty(inode);
310 iput(inode); 310 iput(inode);
311 return error; 311 return error;
@@ -392,7 +392,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
392 return 0; 392 return 0;
393 393
394err: 394err:
395 inode->i_nlink = 0; 395 clear_nlink(inode);
396 mark_inode_dirty(inode); 396 mark_inode_dirty(inode);
397 iput(inode); 397 iput(inode);
398 return error; 398 return error;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 346e3289abd7..2f213d109c21 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -90,7 +90,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
90 vnode->vfs_inode.i_uid = status->owner; 90 vnode->vfs_inode.i_uid = status->owner;
91 vnode->vfs_inode.i_gid = status->group; 91 vnode->vfs_inode.i_gid = status->group;
92 vnode->vfs_inode.i_generation = vnode->fid.unique; 92 vnode->vfs_inode.i_generation = vnode->fid.unique;
93 vnode->vfs_inode.i_nlink = status->nlink; 93 set_nlink(&vnode->vfs_inode, status->nlink);
94 94
95 mode = vnode->vfs_inode.i_mode; 95 mode = vnode->vfs_inode.i_mode;
96 mode &= ~S_IALLUGO; 96 mode &= ~S_IALLUGO;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0fdab6e03d87..d890ae3b2ce6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -67,7 +67,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
67 fscache_attr_changed(vnode->cache); 67 fscache_attr_changed(vnode->cache);
68#endif 68#endif
69 69
70 inode->i_nlink = vnode->status.nlink; 70 set_nlink(inode, vnode->status.nlink);
71 inode->i_uid = vnode->status.owner; 71 inode->i_uid = vnode->status.owner;
72 inode->i_gid = 0; 72 inode->i_gid = 0;
73 inode->i_size = vnode->status.size; 73 inode->i_size = vnode->status.size;
@@ -174,7 +174,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
174 inode->i_size = 0; 174 inode->i_size = 0;
175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
176 inode->i_op = &afs_autocell_inode_operations; 176 inode->i_op = &afs_autocell_inode_operations;
177 inode->i_nlink = 2; 177 set_nlink(inode, 2);
178 inode->i_uid = 0; 178 inode->i_uid = 0;
179 inode->i_gid = 0; 179 inode->i_gid = 0;
180 inode->i_ctime.tv_sec = get_seconds(); 180 inode->i_ctime.tv_sec = get_seconds();
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 180fa2425e49..8179f1ab8175 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -342,7 +342,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
342 inode->i_ino = get_next_ino(); 342 inode->i_ino = get_next_ino();
343 343
344 if (S_ISDIR(mode)) { 344 if (S_ISDIR(mode)) {
345 inode->i_nlink = 2; 345 set_nlink(inode, 2);
346 inode->i_op = &autofs4_dir_inode_operations; 346 inode->i_op = &autofs4_dir_inode_operations;
347 inode->i_fop = &autofs4_dir_operations; 347 inode->i_fop = &autofs4_dir_operations;
348 } else if (S_ISLNK(mode)) { 348 } else if (S_ISLNK(mode)) {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 720d885e8dca..8342ca67abcd 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -357,7 +357,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
357 inode->i_gid = befs_sb->mount_opts.use_gid ? 357 inode->i_gid = befs_sb->mount_opts.use_gid ?
358 befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid); 358 befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid);
359 359
360 inode->i_nlink = 1; 360 set_nlink(inode, 1);
361 361
362 /* 362 /*
363 * BEFS's time is 64 bits, but current VFS is 32 bits... 363 * BEFS's time is 64 bits, but current VFS is 32 bits...
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index b14cebfd9047..9cc074019479 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -199,7 +199,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
199 printf("unlinking non-existent file %s:%lu (nlink=%d)\n", 199 printf("unlinking non-existent file %s:%lu (nlink=%d)\n",
200 inode->i_sb->s_id, inode->i_ino, 200 inode->i_sb->s_id, inode->i_ino,
201 inode->i_nlink); 201 inode->i_nlink);
202 inode->i_nlink = 1; 202 set_nlink(inode, 1);
203 } 203 }
204 de->ino = 0; 204 de->ino = 0;
205 mark_buffer_dirty_inode(bh, dir); 205 mark_buffer_dirty_inode(bh, dir);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index a8e37f81d097..697af5bf70b3 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -78,7 +78,7 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
78 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); 78 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino);
79 inode->i_uid = le32_to_cpu(di->i_uid); 79 inode->i_uid = le32_to_cpu(di->i_uid);
80 inode->i_gid = le32_to_cpu(di->i_gid); 80 inode->i_gid = le32_to_cpu(di->i_gid);
81 inode->i_nlink = le32_to_cpu(di->i_nlink); 81 set_nlink(inode, le32_to_cpu(di->i_nlink));
82 inode->i_size = BFS_FILESIZE(di); 82 inode->i_size = BFS_FILESIZE(di);
83 inode->i_blocks = BFS_FILEBLOCKS(di); 83 inode->i_blocks = BFS_FILEBLOCKS(di);
84 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); 84 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index ba1a1ae4a18a..1e9edbdeda7e 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -521,7 +521,7 @@ static void kill_node(Node *e)
521 write_unlock(&entries_lock); 521 write_unlock(&entries_lock);
522 522
523 if (dentry) { 523 if (dentry) {
524 dentry->d_inode->i_nlink--; 524 drop_nlink(dentry->d_inode);
525 d_drop(dentry); 525 d_drop(dentry);
526 dput(dentry); 526 dput(dentry);
527 simple_release_fs(&bm_mnt, &entry_count); 527 simple_release_fs(&bm_mnt, &entry_count);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index b52c672f4c18..ae4d9cd10961 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1641,7 +1641,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1641 inode->i_gid = btrfs_stack_inode_gid(inode_item); 1641 inode->i_gid = btrfs_stack_inode_gid(inode_item);
1642 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); 1642 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1643 inode->i_mode = btrfs_stack_inode_mode(inode_item); 1643 inode->i_mode = btrfs_stack_inode_mode(inode_item);
1644 inode->i_nlink = btrfs_stack_inode_nlink(inode_item); 1644 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1645 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1645 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1646 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1646 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1647 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); 1647 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 07b3ac662e19..07ea91879a91 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1705,7 +1705,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1705 sb->s_bdi = &fs_info->bdi; 1705 sb->s_bdi = &fs_info->bdi;
1706 1706
1707 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 1707 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
1708 fs_info->btree_inode->i_nlink = 1; 1708 set_nlink(fs_info->btree_inode, 1);
1709 /* 1709 /*
1710 * we set the i_size on the btree inode to the max possible int. 1710 * we set the i_size on the btree inode to the max possible int.
1711 * the real end of the address space is determined by all of 1711 * the real end of the address space is determined by all of
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b2d004ad66a0..75686a61bd45 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2534,7 +2534,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
2534 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2534 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2535 struct btrfs_inode_item); 2535 struct btrfs_inode_item);
2536 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2536 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2537 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); 2537 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
2538 inode->i_uid = btrfs_inode_uid(leaf, inode_item); 2538 inode->i_uid = btrfs_inode_uid(leaf, inode_item);
2539 inode->i_gid = btrfs_inode_gid(leaf, inode_item); 2539 inode->i_gid = btrfs_inode_gid(leaf, inode_item);
2540 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); 2540 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
@@ -6728,7 +6728,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6728 inode->i_op = &btrfs_dir_inode_operations; 6728 inode->i_op = &btrfs_dir_inode_operations;
6729 inode->i_fop = &btrfs_dir_file_operations; 6729 inode->i_fop = &btrfs_dir_file_operations;
6730 6730
6731 inode->i_nlink = 1; 6731 set_nlink(inode, 1);
6732 btrfs_i_size_write(inode, 0); 6732 btrfs_i_size_write(inode, 0);
6733 6733
6734 err = btrfs_update_inode(trans, new_root, inode); 6734 err = btrfs_update_inode(trans, new_root, inode);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 786639fca067..0618aa39740b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1030,7 +1030,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1030 } 1030 }
1031 btrfs_release_path(path); 1031 btrfs_release_path(path);
1032 if (nlink != inode->i_nlink) { 1032 if (nlink != inode->i_nlink) {
1033 inode->i_nlink = nlink; 1033 set_nlink(inode, nlink);
1034 btrfs_update_inode(trans, root, inode); 1034 btrfs_update_inode(trans, root, inode);
1035 } 1035 }
1036 BTRFS_I(inode)->index_cnt = (u64)-1; 1036 BTRFS_I(inode)->index_cnt = (u64)-1;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b8731bf3ef1f..15b21e35078a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2363,7 +2363,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2363 } 2363 }
2364 2364
2365 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 2365 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
2366 inode->i_nlink = le32_to_cpu(grant->nlink); 2366 set_nlink(inode, le32_to_cpu(grant->nlink));
2367 2367
2368 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) { 2368 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
2369 int len = le32_to_cpu(grant->xattr_len); 2369 int len = le32_to_cpu(grant->xattr_len);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5dde7d51dc11..1616a0d37cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -618,7 +618,7 @@ static int fill_inode(struct inode *inode,
618 } 618 }
619 619
620 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 620 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
621 inode->i_nlink = le32_to_cpu(info->nlink); 621 set_nlink(inode, le32_to_cpu(info->nlink));
622 622
623 /* be careful with mtime, atime, size */ 623 /* be careful with mtime, atime, size */
624 ceph_decode_timespec(&atime, &info->atime); 624 ceph_decode_timespec(&atime, &info->atime);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2c50bd2f65d1..e851d5b8931e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -132,7 +132,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
132 inode->i_mtime = fattr->cf_mtime; 132 inode->i_mtime = fattr->cf_mtime;
133 inode->i_ctime = fattr->cf_ctime; 133 inode->i_ctime = fattr->cf_ctime;
134 inode->i_rdev = fattr->cf_rdev; 134 inode->i_rdev = fattr->cf_rdev;
135 inode->i_nlink = fattr->cf_nlink; 135 set_nlink(inode, fattr->cf_nlink);
136 inode->i_uid = fattr->cf_uid; 136 inode->i_uid = fattr->cf_uid;
137 inode->i_gid = fattr->cf_gid; 137 inode->i_gid = fattr->cf_gid;
138 138
@@ -905,7 +905,7 @@ struct inode *cifs_root_iget(struct super_block *sb)
905 if (rc && tcon->ipc) { 905 if (rc && tcon->ipc) {
906 cFYI(1, "ipc connection - fake read inode"); 906 cFYI(1, "ipc connection - fake read inode");
907 inode->i_mode |= S_IFDIR; 907 inode->i_mode |= S_IFDIR;
908 inode->i_nlink = 2; 908 set_nlink(inode, 2);
909 inode->i_op = &cifs_ipc_inode_ops; 909 inode->i_op = &cifs_ipc_inode_ops;
910 inode->i_fop = &simple_dir_operations; 910 inode->i_fop = &simple_dir_operations;
911 inode->i_uid = cifs_sb->mnt_uid; 911 inode->i_uid = cifs_sb->mnt_uid;
@@ -1367,7 +1367,7 @@ mkdir_get_info:
1367 /* setting nlink not necessary except in cases where we 1367 /* setting nlink not necessary except in cases where we
1368 * failed to get it from the server or was set bogus */ 1368 * failed to get it from the server or was set bogus */
1369 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) 1369 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
1370 direntry->d_inode->i_nlink = 2; 1370 set_nlink(direntry->d_inode, 2);
1371 1371
1372 mode &= ~current_umask(); 1372 mode &= ~current_umask();
1373 /* must turn on setgid bit if parent dir has it */ 1373 /* must turn on setgid bit if parent dir has it */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 8693b5d0e180..6b0e06434391 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -433,7 +433,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
433 if (old_file->d_inode) { 433 if (old_file->d_inode) {
434 cifsInode = CIFS_I(old_file->d_inode); 434 cifsInode = CIFS_I(old_file->d_inode);
435 if (rc == 0) { 435 if (rc == 0) {
436 old_file->d_inode->i_nlink++; 436 inc_nlink(old_file->d_inode);
437/* BB should we make this contingent on superblock flag NOATIME? */ 437/* BB should we make this contingent on superblock flag NOATIME? */
438/* old_file->d_inode->i_ctime = CURRENT_TIME;*/ 438/* old_file->d_inode->i_ctime = CURRENT_TIME;*/
439 /* parent dir timestamps will update from srv 439 /* parent dir timestamps will update from srv
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2bdbcc11b373..854ace712685 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -104,7 +104,7 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
104 if (attr->va_gid != -1) 104 if (attr->va_gid != -1)
105 inode->i_gid = (gid_t) attr->va_gid; 105 inode->i_gid = (gid_t) attr->va_gid;
106 if (attr->va_nlink != -1) 106 if (attr->va_nlink != -1)
107 inode->i_nlink = attr->va_nlink; 107 set_nlink(inode, attr->va_nlink);
108 if (attr->va_size != -1) 108 if (attr->va_size != -1)
109 inode->i_size = attr->va_size; 109 inode->i_size = attr->va_size;
110 if (attr->va_size != -1) 110 if (attr->va_size != -1)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 0239433f50cb..28e7e135cfab 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -340,7 +340,7 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
340 if (!error) { 340 if (!error) {
341 /* VFS may delete the child */ 341 /* VFS may delete the child */
342 if (de->d_inode) 342 if (de->d_inode)
343 de->d_inode->i_nlink = 0; 343 clear_nlink(de->d_inode);
344 344
345 /* fix the link count of the parent */ 345 /* fix the link count of the parent */
346 coda_dir_drop_nlink(dir); 346 coda_dir_drop_nlink(dir);
diff --git a/fs/dcache.c b/fs/dcache.c
index a88948b8bd17..274f13e2f094 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -225,7 +225,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
225} 225}
226 226
227/* 227/*
228 * dentry_lru_(add|del|move_tail) must be called with d_lock held. 228 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
229 */ 229 */
230static void dentry_lru_add(struct dentry *dentry) 230static void dentry_lru_add(struct dentry *dentry)
231{ 231{
@@ -245,6 +245,9 @@ static void __dentry_lru_del(struct dentry *dentry)
245 dentry_stat.nr_unused--; 245 dentry_stat.nr_unused--;
246} 246}
247 247
248/*
249 * Remove a dentry with references from the LRU.
250 */
248static void dentry_lru_del(struct dentry *dentry) 251static void dentry_lru_del(struct dentry *dentry)
249{ 252{
250 if (!list_empty(&dentry->d_lru)) { 253 if (!list_empty(&dentry->d_lru)) {
@@ -254,6 +257,23 @@ static void dentry_lru_del(struct dentry *dentry)
254 } 257 }
255} 258}
256 259
260/*
261 * Remove a dentry that is unreferenced and about to be pruned
262 * (unhashed and destroyed) from the LRU, and inform the file system.
263 * This wrapper should be called _prior_ to unhashing a victim dentry.
264 */
265static void dentry_lru_prune(struct dentry *dentry)
266{
267 if (!list_empty(&dentry->d_lru)) {
268 if (dentry->d_flags & DCACHE_OP_PRUNE)
269 dentry->d_op->d_prune(dentry);
270
271 spin_lock(&dcache_lru_lock);
272 __dentry_lru_del(dentry);
273 spin_unlock(&dcache_lru_lock);
274 }
275}
276
257static void dentry_lru_move_tail(struct dentry *dentry) 277static void dentry_lru_move_tail(struct dentry *dentry)
258{ 278{
259 spin_lock(&dcache_lru_lock); 279 spin_lock(&dcache_lru_lock);
@@ -403,8 +423,12 @@ relock:
403 423
404 if (ref) 424 if (ref)
405 dentry->d_count--; 425 dentry->d_count--;
406 /* if dentry was on the d_lru list delete it from there */ 426 /*
407 dentry_lru_del(dentry); 427 * if dentry was on the d_lru list delete it from there.
428 * inform the fs via d_prune that this dentry is about to be
429 * unhashed and destroyed.
430 */
431 dentry_lru_prune(dentry);
408 /* if it was on the hash then remove it */ 432 /* if it was on the hash then remove it */
409 __d_drop(dentry); 433 __d_drop(dentry);
410 return d_kill(dentry, parent); 434 return d_kill(dentry, parent);
@@ -854,8 +878,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
854 do { 878 do {
855 struct inode *inode; 879 struct inode *inode;
856 880
857 /* detach from the system */ 881 /*
858 dentry_lru_del(dentry); 882 * remove the dentry from the lru, and inform
883 * the fs that this dentry is about to be
884 * unhashed and destroyed.
885 */
886 dentry_lru_prune(dentry);
859 __d_shrink(dentry); 887 __d_shrink(dentry);
860 888
861 if (dentry->d_count != 0) { 889 if (dentry->d_count != 0) {
@@ -1283,6 +1311,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1283 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1311 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1284 if (op->d_delete) 1312 if (op->d_delete)
1285 dentry->d_flags |= DCACHE_OP_DELETE; 1313 dentry->d_flags |= DCACHE_OP_DELETE;
1314 if (op->d_prune)
1315 dentry->d_flags |= DCACHE_OP_PRUNE;
1286 1316
1287} 1317}
1288EXPORT_SYMBOL(d_set_d_op); 1318EXPORT_SYMBOL(d_set_d_op);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 2f27e578d466..d5d5297efe97 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -307,7 +307,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; 307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
308 inode->i_op = &simple_dir_inode_operations; 308 inode->i_op = &simple_dir_inode_operations;
309 inode->i_fop = &simple_dir_operations; 309 inode->i_fop = &simple_dir_operations;
310 inode->i_nlink = 2; 310 set_nlink(inode, 2);
311 311
312 s->s_root = d_alloc_root(inode); 312 s->s_root = d_alloc_root(inode);
313 if (s->s_root) 313 if (s->s_root)
@@ -549,7 +549,7 @@ void devpts_pty_kill(struct tty_struct *tty)
549 549
550 dentry = d_find_alias(inode); 550 dentry = d_find_alias(inode);
551 551
552 inode->i_nlink--; 552 drop_nlink(inode);
553 d_delete(dentry); 553 d_delete(dentry);
554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */ 554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
555 dput(dentry); /* d_find_alias above */ 555 dput(dentry); /* d_find_alias above */
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 11f8582d7218..a36d327f1521 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -474,8 +474,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
474 goto out_lock; 474 goto out_lock;
475 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 475 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
476 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); 476 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
477 old_dentry->d_inode->i_nlink = 477 set_nlink(old_dentry->d_inode,
478 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; 478 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink);
479 i_size_write(new_dentry->d_inode, file_size_save); 479 i_size_write(new_dentry->d_inode, file_size_save);
480out_lock: 480out_lock:
481 unlock_dir(lower_dir_dentry); 481 unlock_dir(lower_dir_dentry);
@@ -499,8 +499,8 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
499 goto out_unlock; 499 goto out_unlock;
500 } 500 }
501 fsstack_copy_attr_times(dir, lower_dir_inode); 501 fsstack_copy_attr_times(dir, lower_dir_inode);
502 dentry->d_inode->i_nlink = 502 set_nlink(dentry->d_inode,
503 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink; 503 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink);
504 dentry->d_inode->i_ctime = dir->i_ctime; 504 dentry->d_inode->i_ctime = dir->i_ctime;
505 d_drop(dentry); 505 d_drop(dentry);
506out_unlock: 506out_unlock:
@@ -565,7 +565,7 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
565 goto out; 565 goto out;
566 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 566 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
567 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); 567 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
568 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 568 set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
569out: 569out:
570 unlock_dir(lower_dir_dentry); 570 unlock_dir(lower_dir_dentry);
571 if (!dentry->d_inode) 571 if (!dentry->d_inode)
@@ -588,7 +588,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
588 if (!rc && dentry->d_inode) 588 if (!rc && dentry->d_inode)
589 clear_nlink(dentry->d_inode); 589 clear_nlink(dentry->d_inode);
590 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 590 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
591 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 591 set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
592 unlock_dir(lower_dir_dentry); 592 unlock_dir(lower_dir_dentry);
593 if (!rc) 593 if (!rc)
594 d_drop(dentry); 594 d_drop(dentry);
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 9c13412e6c99..bc84f365d75c 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -96,7 +96,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
96 efs_inode = (struct efs_dinode *) (bh->b_data + offset); 96 efs_inode = (struct efs_dinode *) (bh->b_data + offset);
97 97
98 inode->i_mode = be16_to_cpu(efs_inode->di_mode); 98 inode->i_mode = be16_to_cpu(efs_inode->di_mode);
99 inode->i_nlink = be16_to_cpu(efs_inode->di_nlink); 99 set_nlink(inode, be16_to_cpu(efs_inode->di_nlink));
100 inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid); 100 inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid);
101 inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid); 101 inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid);
102 inode->i_size = be32_to_cpu(efs_inode->di_size); 102 inode->i_size = be32_to_cpu(efs_inode->di_size);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3e5f3a6be90a..f6dbf7768ce6 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1165,7 +1165,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1165 inode->i_mode = le16_to_cpu(fcb.i_mode); 1165 inode->i_mode = le16_to_cpu(fcb.i_mode);
1166 inode->i_uid = le32_to_cpu(fcb.i_uid); 1166 inode->i_uid = le32_to_cpu(fcb.i_uid);
1167 inode->i_gid = le32_to_cpu(fcb.i_gid); 1167 inode->i_gid = le32_to_cpu(fcb.i_gid);
1168 inode->i_nlink = le16_to_cpu(fcb.i_links_count); 1168 set_nlink(inode, le16_to_cpu(fcb.i_links_count));
1169 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 1169 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
1170 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 1170 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
1171 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); 1171 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 8f44cef1b3ef..a8cbe1bc6ad4 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -421,7 +421,7 @@ static inline int rsv_is_empty(struct ext2_reserve_window *rsv)
421void ext2_init_block_alloc_info(struct inode *inode) 421void ext2_init_block_alloc_info(struct inode *inode)
422{ 422{
423 struct ext2_inode_info *ei = EXT2_I(inode); 423 struct ext2_inode_info *ei = EXT2_I(inode);
424 struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info; 424 struct ext2_block_alloc_info *block_i;
425 struct super_block *sb = inode->i_sb; 425 struct super_block *sb = inode->i_sb;
426 426
427 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 427 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ee9ed31948e1..c4e81dfb74ba 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -601,7 +601,7 @@ fail_free_drop:
601fail_drop: 601fail_drop:
602 dquot_drop(inode); 602 dquot_drop(inode);
603 inode->i_flags |= S_NOQUOTA; 603 inode->i_flags |= S_NOQUOTA;
604 inode->i_nlink = 0; 604 clear_nlink(inode);
605 unlock_new_inode(inode); 605 unlock_new_inode(inode);
606 iput(inode); 606 iput(inode);
607 return ERR_PTR(err); 607 return ERR_PTR(err);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a8a58f63f07c..91a6945af6d8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1321,7 +1321,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1321 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 1321 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
1322 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 1322 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
1323 } 1323 }
1324 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 1324 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
1325 inode->i_size = le32_to_cpu(raw_inode->i_size); 1325 inode->i_size = le32_to_cpu(raw_inode->i_size);
1326 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); 1326 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
1327 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); 1327 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1dd62ed35b85..bd8ac164a3bf 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -327,10 +327,10 @@ static struct inode *ext2_nfs_get_inode(struct super_block *sb,
327 if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) 327 if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
328 return ERR_PTR(-ESTALE); 328 return ERR_PTR(-ESTALE);
329 329
330 /* iget isn't really right if the inode is currently unallocated!! 330 /*
331 * ext2_read_inode currently does appropriate checks, but 331 * ext2_iget isn't quite right if the inode is currently unallocated!
332 * it might be "neater" to call ext2_get_inode first and check 332 * However ext2_iget currently does appropriate checks to handle stale
333 * if the inode is valid..... 333 * inodes so everything is OK.
334 */ 334 */
335 inode = ext2_iget(sb, ino); 335 inode = ext2_iget(sb, ino);
336 if (IS_ERR(inode)) 336 if (IS_ERR(inode))
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 6386d76f44a7..a2038928f9a3 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -427,7 +427,7 @@ static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
427void ext3_init_block_alloc_info(struct inode *inode) 427void ext3_init_block_alloc_info(struct inode *inode)
428{ 428{
429 struct ext3_inode_info *ei = EXT3_I(inode); 429 struct ext3_inode_info *ei = EXT3_I(inode);
430 struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info; 430 struct ext3_block_alloc_info *block_i;
431 struct super_block *sb = inode->i_sb; 431 struct super_block *sb = inode->i_sb;
432 432
433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
@@ -1440,14 +1440,14 @@ out:
1440 * 1440 *
1441 * Check if filesystem has at least 1 free block available for allocation. 1441 * Check if filesystem has at least 1 free block available for allocation.
1442 */ 1442 */
1443static int ext3_has_free_blocks(struct ext3_sb_info *sbi) 1443static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
1444{ 1444{
1445 ext3_fsblk_t free_blocks, root_blocks; 1445 ext3_fsblk_t free_blocks, root_blocks;
1446 1446
1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
1450 sbi->s_resuid != current_fsuid() && 1450 !use_reservation && sbi->s_resuid != current_fsuid() &&
1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
1452 return 0; 1452 return 0;
1453 } 1453 }
@@ -1468,7 +1468,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1468 */ 1468 */
1469int ext3_should_retry_alloc(struct super_block *sb, int *retries) 1469int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1470{ 1470{
1471 if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3) 1471 if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
1472 return 0; 1472 return 0;
1473 1473
1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1546,7 +1546,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1547 my_rsv = &block_i->rsv_window_node; 1547 my_rsv = &block_i->rsv_window_node;
1548 1548
1549 if (!ext3_has_free_blocks(sbi)) { 1549 if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
1550 *errp = -ENOSPC; 1550 *errp = -ENOSPC;
1551 goto out; 1551 goto out;
1552 } 1552 }
@@ -1924,9 +1924,10 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1924 * reaches any used block. Then issue a TRIM command on this extent and free 1924 * reaches any used block. Then issue a TRIM command on this extent and free
1925 * the extent in the block bitmap. This is done until whole group is scanned. 1925 * the extent in the block bitmap. This is done until whole group is scanned.
1926 */ 1926 */
1927ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group, 1927static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1928 ext3_grpblk_t start, ext3_grpblk_t max, 1928 unsigned int group,
1929 ext3_grpblk_t minblocks) 1929 ext3_grpblk_t start, ext3_grpblk_t max,
1930 ext3_grpblk_t minblocks)
1930{ 1931{
1931 handle_t *handle; 1932 handle_t *handle;
1932 ext3_grpblk_t next, free_blocks, bit, freed, count = 0; 1933 ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d494c554c6e6..1860ed356323 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -61,13 +61,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
61 if (ret) 61 if (ret)
62 goto out; 62 goto out;
63 63
64 /*
65 * Taking the mutex here just to keep consistent with how fsync was
66 * called previously, however it looks like we don't need to take
67 * i_mutex at all.
68 */
69 mutex_lock(&inode->i_mutex);
70
71 J_ASSERT(ext3_journal_current_handle() == NULL); 64 J_ASSERT(ext3_journal_current_handle() == NULL);
72 65
73 /* 66 /*
@@ -85,7 +78,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
85 * safe in-journal, which is all fsync() needs to ensure. 78 * safe in-journal, which is all fsync() needs to ensure.
86 */ 79 */
87 if (ext3_should_journal_data(inode)) { 80 if (ext3_should_journal_data(inode)) {
88 mutex_unlock(&inode->i_mutex);
89 ret = ext3_force_commit(inode->i_sb); 81 ret = ext3_force_commit(inode->i_sb);
90 goto out; 82 goto out;
91 } 83 }
@@ -108,8 +100,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
108 */ 100 */
109 if (needs_barrier) 101 if (needs_barrier)
110 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 102 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
111
112 mutex_unlock(&inode->i_mutex);
113out: 103out:
114 trace_ext3_sync_file_exit(inode, ret); 104 trace_ext3_sync_file_exit(inode, ret);
115 return ret; 105 return ret;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index bf09cbf938cc..5c866e06e7ab 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -178,42 +178,6 @@ error_return:
178} 178}
179 179
180/* 180/*
181 * There are two policies for allocating an inode. If the new inode is
182 * a directory, then a forward search is made for a block group with both
183 * free space and a low directory-to-inode ratio; if that fails, then of
184 * the groups with above-average free space, that group with the fewest
185 * directories already is chosen.
186 *
187 * For other inodes, search forward from the parent directory\'s block
188 * group to find a free inode.
189 */
190static int find_group_dir(struct super_block *sb, struct inode *parent)
191{
192 int ngroups = EXT3_SB(sb)->s_groups_count;
193 unsigned int freei, avefreei;
194 struct ext3_group_desc *desc, *best_desc = NULL;
195 int group, best_group = -1;
196
197 freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
198 avefreei = freei / ngroups;
199
200 for (group = 0; group < ngroups; group++) {
201 desc = ext3_get_group_desc (sb, group, NULL);
202 if (!desc || !desc->bg_free_inodes_count)
203 continue;
204 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
205 continue;
206 if (!best_desc ||
207 (le16_to_cpu(desc->bg_free_blocks_count) >
208 le16_to_cpu(best_desc->bg_free_blocks_count))) {
209 best_group = group;
210 best_desc = desc;
211 }
212 }
213 return best_group;
214}
215
216/*
217 * Orlov's allocator for directories. 181 * Orlov's allocator for directories.
218 * 182 *
219 * We always try to spread first-level directories. 183 * We always try to spread first-level directories.
@@ -436,12 +400,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
436 400
437 sbi = EXT3_SB(sb); 401 sbi = EXT3_SB(sb);
438 es = sbi->s_es; 402 es = sbi->s_es;
439 if (S_ISDIR(mode)) { 403 if (S_ISDIR(mode))
440 if (test_opt (sb, OLDALLOC)) 404 group = find_group_orlov(sb, dir);
441 group = find_group_dir(sb, dir); 405 else
442 else
443 group = find_group_orlov(sb, dir);
444 } else
445 group = find_group_other(sb, dir); 406 group = find_group_other(sb, dir);
446 407
447 err = -ENOSPC; 408 err = -ENOSPC;
@@ -621,7 +582,7 @@ fail_free_drop:
621fail_drop: 582fail_drop:
622 dquot_drop(inode); 583 dquot_drop(inode);
623 inode->i_flags |= S_NOQUOTA; 584 inode->i_flags |= S_NOQUOTA;
624 inode->i_nlink = 0; 585 clear_nlink(inode);
625 unlock_new_inode(inode); 586 unlock_new_inode(inode);
626 iput(inode); 587 iput(inode);
627 brelse(bitmap_bh); 588 brelse(bitmap_bh);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 12661e1deedd..85fe655fe3e0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2899,7 +2899,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2899 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 2899 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2900 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 2900 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2901 } 2901 }
2902 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 2902 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2903 inode->i_size = le32_to_cpu(raw_inode->i_size); 2903 inode->i_size = le32_to_cpu(raw_inode->i_size);
2904 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); 2904 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2905 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); 2905 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index c7f43944f160..ba1b54e23cae 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -150,30 +150,6 @@ setversion_out:
150 mnt_drop_write(filp->f_path.mnt); 150 mnt_drop_write(filp->f_path.mnt);
151 return err; 151 return err;
152 } 152 }
153#ifdef CONFIG_JBD_DEBUG
154 case EXT3_IOC_WAIT_FOR_READONLY:
155 /*
156 * This is racy - by the time we're woken up and running,
157 * the superblock could be released. And the module could
158 * have been unloaded. So sue me.
159 *
160 * Returns 1 if it slept, else zero.
161 */
162 {
163 struct super_block *sb = inode->i_sb;
164 DECLARE_WAITQUEUE(wait, current);
165 int ret = 0;
166
167 set_current_state(TASK_INTERRUPTIBLE);
168 add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
169 if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
170 schedule();
171 ret = 1;
172 }
173 remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
174 return ret;
175 }
176#endif
177 case EXT3_IOC_GETRSVSZ: 153 case EXT3_IOC_GETRSVSZ:
178 if (test_opt(inode->i_sb, RESERVATION) 154 if (test_opt(inode->i_sb, RESERVATION)
179 && S_ISREG(inode->i_mode) 155 && S_ISREG(inode->i_mode)
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0629e09f6511..642dc6d66dfd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1821,7 +1821,7 @@ retry:
1821 de->name_len = 2; 1821 de->name_len = 2;
1822 strcpy (de->name, ".."); 1822 strcpy (de->name, "..");
1823 ext3_set_de_type(dir->i_sb, de, S_IFDIR); 1823 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
1824 inode->i_nlink = 2; 1824 set_nlink(inode, 2);
1825 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); 1825 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
1826 err = ext3_journal_dirty_metadata(handle, dir_block); 1826 err = ext3_journal_dirty_metadata(handle, dir_block);
1827 if (err) 1827 if (err)
@@ -1833,7 +1833,7 @@ retry:
1833 1833
1834 if (err) { 1834 if (err) {
1835out_clear_inode: 1835out_clear_inode:
1836 inode->i_nlink = 0; 1836 clear_nlink(inode);
1837 unlock_new_inode(inode); 1837 unlock_new_inode(inode);
1838 ext3_mark_inode_dirty(handle, inode); 1838 ext3_mark_inode_dirty(handle, inode);
1839 iput (inode); 1839 iput (inode);
@@ -2170,7 +2170,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2170 ext3_warning (inode->i_sb, "ext3_unlink", 2170 ext3_warning (inode->i_sb, "ext3_unlink",
2171 "Deleting nonexistent file (%lu), %d", 2171 "Deleting nonexistent file (%lu), %d",
2172 inode->i_ino, inode->i_nlink); 2172 inode->i_ino, inode->i_nlink);
2173 inode->i_nlink = 1; 2173 set_nlink(inode, 1);
2174 } 2174 }
2175 retval = ext3_delete_entry(handle, dir, de, bh); 2175 retval = ext3_delete_entry(handle, dir, de, bh);
2176 if (retval) 2176 if (retval)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7beb69ae0015..922d289aeeb3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -652,8 +652,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
652 seq_puts(seq, ",nouid32"); 652 seq_puts(seq, ",nouid32");
653 if (test_opt(sb, DEBUG)) 653 if (test_opt(sb, DEBUG))
654 seq_puts(seq, ",debug"); 654 seq_puts(seq, ",debug");
655 if (test_opt(sb, OLDALLOC))
656 seq_puts(seq, ",oldalloc");
657#ifdef CONFIG_EXT3_FS_XATTR 655#ifdef CONFIG_EXT3_FS_XATTR
658 if (test_opt(sb, XATTR_USER)) 656 if (test_opt(sb, XATTR_USER))
659 seq_puts(seq, ",user_xattr"); 657 seq_puts(seq, ",user_xattr");
@@ -1049,10 +1047,12 @@ static int parse_options (char *options, struct super_block *sb,
1049 set_opt (sbi->s_mount_opt, DEBUG); 1047 set_opt (sbi->s_mount_opt, DEBUG);
1050 break; 1048 break;
1051 case Opt_oldalloc: 1049 case Opt_oldalloc:
1052 set_opt (sbi->s_mount_opt, OLDALLOC); 1050 ext3_msg(sb, KERN_WARNING,
1051 "Ignoring deprecated oldalloc option");
1053 break; 1052 break;
1054 case Opt_orlov: 1053 case Opt_orlov:
1055 clear_opt (sbi->s_mount_opt, OLDALLOC); 1054 ext3_msg(sb, KERN_WARNING,
1055 "Ignoring deprecated orlov option");
1056 break; 1056 break;
1057#ifdef CONFIG_EXT3_FS_XATTR 1057#ifdef CONFIG_EXT3_FS_XATTR
1058 case Opt_user_xattr: 1058 case Opt_user_xattr:
@@ -2669,13 +2669,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2669 /* 2669 /*
2670 * If we have an unprocessed orphan list hanging 2670 * If we have an unprocessed orphan list hanging
2671 * around from a previously readonly bdev mount, 2671 * around from a previously readonly bdev mount,
2672 * require a full umount/remount for now. 2672 * require a full umount & mount for now.
2673 */ 2673 */
2674 if (es->s_last_orphan) { 2674 if (es->s_last_orphan) {
2675 ext3_msg(sb, KERN_WARNING, "warning: couldn't " 2675 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2676 "remount RDWR because of unprocessed " 2676 "remount RDWR because of unprocessed "
2677 "orphan inode list. Please " 2677 "orphan inode list. Please "
2678 "umount/remount instead."); 2678 "umount & mount instead.");
2679 err = -EINVAL; 2679 err = -EINVAL;
2680 goto restore_opts; 2680 goto restore_opts;
2681 } 2681 }
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f8224adf496e..f6dba4505f1c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -28,7 +28,8 @@
28 */ 28 */
29 29
30/* 30/*
31 * Calculate the block group number and offset, given a block number 31 * Calculate the block group number and offset into the block/cluster
32 * allocation bitmap, given a block number
32 */ 33 */
33void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 34void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
34 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) 35 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
@@ -37,7 +38,8 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
37 ext4_grpblk_t offset; 38 ext4_grpblk_t offset;
38 39
39 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 40 blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
40 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); 41 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
42 EXT4_SB(sb)->s_cluster_bits;
41 if (offsetp) 43 if (offsetp)
42 *offsetp = offset; 44 *offsetp = offset;
43 if (blockgrpp) 45 if (blockgrpp)
@@ -55,130 +57,169 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
55 return 0; 57 return 0;
56} 58}
57 59
58static int ext4_group_used_meta_blocks(struct super_block *sb, 60/* Return the number of clusters used for file system metadata; this
59 ext4_group_t block_group, 61 * represents the overhead needed by the file system.
60 struct ext4_group_desc *gdp) 62 */
63unsigned ext4_num_overhead_clusters(struct super_block *sb,
64 ext4_group_t block_group,
65 struct ext4_group_desc *gdp)
61{ 66{
62 ext4_fsblk_t tmp; 67 unsigned num_clusters;
68 int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
69 ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
70 ext4_fsblk_t itbl_blk;
63 struct ext4_sb_info *sbi = EXT4_SB(sb); 71 struct ext4_sb_info *sbi = EXT4_SB(sb);
64 /* block bitmap, inode bitmap, and inode table blocks */
65 int used_blocks = sbi->s_itb_per_group + 2;
66 72
67 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 73 /* This is the number of clusters used by the superblock,
68 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), 74 * block group descriptors, and reserved block group
69 block_group)) 75 * descriptor blocks */
70 used_blocks--; 76 num_clusters = ext4_num_base_meta_clusters(sb, block_group);
71 77
72 if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), 78 /*
73 block_group)) 79 * For the allocation bitmaps and inode table, we first need
74 used_blocks--; 80 * to check to see if the block is in the block group. If it
75 81 * is, then check to see if the cluster is already accounted
76 tmp = ext4_inode_table(sb, gdp); 82 * for in the clusters used for the base metadata cluster, or
77 for (; tmp < ext4_inode_table(sb, gdp) + 83 * if we can increment the base metadata cluster to include
78 sbi->s_itb_per_group; tmp++) { 84 * that block. Otherwise, we will have to track the cluster
79 if (!ext4_block_in_group(sb, tmp, block_group)) 85 * used for the allocation bitmap or inode table explicitly.
80 used_blocks -= 1; 86 * Normally all of these blocks are contiguous, so the special
87 * case handling shouldn't be necessary except for *very*
88 * unusual file system layouts.
89 */
90 if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
91 block_cluster = EXT4_B2C(sbi, (start -
92 ext4_block_bitmap(sb, gdp)));
93 if (block_cluster < num_clusters)
94 block_cluster = -1;
95 else if (block_cluster == num_clusters) {
96 num_clusters++;
97 block_cluster = -1;
81 } 98 }
82 } 99 }
83 return used_blocks;
84}
85 100
86/* Initializes an uninitialized block bitmap if given, and returns the 101 if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
87 * number of blocks free in the group. */ 102 inode_cluster = EXT4_B2C(sbi,
88unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 103 start - ext4_inode_bitmap(sb, gdp));
89 ext4_group_t block_group, struct ext4_group_desc *gdp) 104 if (inode_cluster < num_clusters)
90{ 105 inode_cluster = -1;
91 int bit, bit_max; 106 else if (inode_cluster == num_clusters) {
92 ext4_group_t ngroups = ext4_get_groups_count(sb); 107 num_clusters++;
93 unsigned free_blocks, group_blocks; 108 inode_cluster = -1;
94 struct ext4_sb_info *sbi = EXT4_SB(sb);
95
96 if (bh) {
97 J_ASSERT_BH(bh, buffer_locked(bh));
98
99 /* If checksum is bad mark all blocks used to prevent allocation
100 * essentially implementing a per-group read-only flag. */
101 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
102 ext4_error(sb, "Checksum bad for group %u",
103 block_group);
104 ext4_free_blks_set(sb, gdp, 0);
105 ext4_free_inodes_set(sb, gdp, 0);
106 ext4_itable_unused_set(sb, gdp, 0);
107 memset(bh->b_data, 0xff, sb->s_blocksize);
108 return 0;
109 } 109 }
110 memset(bh->b_data, 0, sb->s_blocksize);
111 } 110 }
112 111
113 /* Check for superblock and gdt backups in this group */ 112 itbl_blk = ext4_inode_table(sb, gdp);
114 bit_max = ext4_bg_has_super(sb, block_group); 113 for (i = 0; i < sbi->s_itb_per_group; i++) {
115 114 if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
116 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 115 c = EXT4_B2C(sbi, start - itbl_blk + i);
117 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * 116 if ((c < num_clusters) || (c == inode_cluster) ||
118 sbi->s_desc_per_block) { 117 (c == block_cluster) || (c == itbl_cluster))
119 if (bit_max) { 118 continue;
120 bit_max += ext4_bg_num_gdb(sb, block_group); 119 if (c == num_clusters) {
121 bit_max += 120 num_clusters++;
122 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); 121 continue;
122 }
123 num_clusters++;
124 itbl_cluster = c;
123 } 125 }
124 } else { /* For META_BG_BLOCK_GROUPS */
125 bit_max += ext4_bg_num_gdb(sb, block_group);
126 } 126 }
127 127
128 if (block_group == ngroups - 1) { 128 if (block_cluster != -1)
129 num_clusters++;
130 if (inode_cluster != -1)
131 num_clusters++;
132
133 return num_clusters;
134}
135
136static unsigned int num_clusters_in_group(struct super_block *sb,
137 ext4_group_t block_group)
138{
139 unsigned int blocks;
140
141 if (block_group == ext4_get_groups_count(sb) - 1) {
129 /* 142 /*
130 * Even though mke2fs always initialize first and last group 143 * Even though mke2fs always initializes the first and
131 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 144 * last group, just in case some other tool was used,
132 * to make sure we calculate the right free blocks 145 * we need to make sure we calculate the right free
146 * blocks.
133 */ 147 */
134 group_blocks = ext4_blocks_count(sbi->s_es) - 148 blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
135 ext4_group_first_block_no(sb, ngroups - 1); 149 ext4_group_first_block_no(sb, block_group);
136 } else { 150 } else
137 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 151 blocks = EXT4_BLOCKS_PER_GROUP(sb);
138 } 152 return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
153}
139 154
140 free_blocks = group_blocks - bit_max; 155/* Initializes an uninitialized block bitmap */
156void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
157 ext4_group_t block_group,
158 struct ext4_group_desc *gdp)
159{
160 unsigned int bit, bit_max;
161 struct ext4_sb_info *sbi = EXT4_SB(sb);
162 ext4_fsblk_t start, tmp;
163 int flex_bg = 0;
164
165 J_ASSERT_BH(bh, buffer_locked(bh));
166
167 /* If checksum is bad mark all blocks used to prevent allocation
168 * essentially implementing a per-group read-only flag. */
169 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
170 ext4_error(sb, "Checksum bad for group %u", block_group);
171 ext4_free_group_clusters_set(sb, gdp, 0);
172 ext4_free_inodes_set(sb, gdp, 0);
173 ext4_itable_unused_set(sb, gdp, 0);
174 memset(bh->b_data, 0xff, sb->s_blocksize);
175 return;
176 }
177 memset(bh->b_data, 0, sb->s_blocksize);
141 178
142 if (bh) { 179 bit_max = ext4_num_base_meta_clusters(sb, block_group);
143 ext4_fsblk_t start, tmp; 180 for (bit = 0; bit < bit_max; bit++)
144 int flex_bg = 0; 181 ext4_set_bit(bit, bh->b_data);
145 182
146 for (bit = 0; bit < bit_max; bit++) 183 start = ext4_group_first_block_no(sb, block_group);
147 ext4_set_bit(bit, bh->b_data);
148 184
149 start = ext4_group_first_block_no(sb, block_group); 185 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
186 flex_bg = 1;
150 187
151 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 188 /* Set bits for block and inode bitmaps, and inode table */
152 EXT4_FEATURE_INCOMPAT_FLEX_BG)) 189 tmp = ext4_block_bitmap(sb, gdp);
153 flex_bg = 1; 190 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
191 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
154 192
155 /* Set bits for block and inode bitmaps, and inode table */ 193 tmp = ext4_inode_bitmap(sb, gdp);
156 tmp = ext4_block_bitmap(sb, gdp); 194 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
157 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 195 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
158 ext4_set_bit(tmp - start, bh->b_data);
159 196
160 tmp = ext4_inode_bitmap(sb, gdp); 197 tmp = ext4_inode_table(sb, gdp);
198 for (; tmp < ext4_inode_table(sb, gdp) +
199 sbi->s_itb_per_group; tmp++) {
161 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 200 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
162 ext4_set_bit(tmp - start, bh->b_data); 201 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
163
164 tmp = ext4_inode_table(sb, gdp);
165 for (; tmp < ext4_inode_table(sb, gdp) +
166 sbi->s_itb_per_group; tmp++) {
167 if (!flex_bg ||
168 ext4_block_in_group(sb, tmp, block_group))
169 ext4_set_bit(tmp - start, bh->b_data);
170 }
171 /*
172 * Also if the number of blocks within the group is
173 * less than the blocksize * 8 ( which is the size
174 * of bitmap ), set rest of the block bitmap to 1
175 */
176 ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
177 bh->b_data);
178 } 202 }
179 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); 203
204 /*
205 * Also if the number of blocks within the group is less than
206 * the blocksize * 8 ( which is the size of bitmap ), set rest
207 * of the block bitmap to 1
208 */
209 ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
210 sb->s_blocksize * 8, bh->b_data);
180} 211}
181 212
213/* Return the number of free blocks in a block group. It is used when
214 * the block bitmap is uninitialized, so we can't just count the bits
215 * in the bitmap. */
216unsigned ext4_free_clusters_after_init(struct super_block *sb,
217 ext4_group_t block_group,
218 struct ext4_group_desc *gdp)
219{
220 return num_clusters_in_group(sb, block_group) -
221 ext4_num_overhead_clusters(sb, block_group, gdp);
222}
182 223
183/* 224/*
184 * The free blocks are managed by bitmaps. A file system contains several 225 * The free blocks are managed by bitmaps. A file system contains several
@@ -362,53 +403,54 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
362} 403}
363 404
364/** 405/**
365 * ext4_has_free_blocks() 406 * ext4_has_free_clusters()
366 * @sbi: in-core super block structure. 407 * @sbi: in-core super block structure.
367 * @nblocks: number of needed blocks 408 * @nclusters: number of needed blocks
409 * @flags: flags from ext4_mb_new_blocks()
368 * 410 *
369 * Check if filesystem has nblocks free & available for allocation. 411 * Check if filesystem has nclusters free & available for allocation.
370 * On success return 1, return 0 on failure. 412 * On success return 1, return 0 on failure.
371 */ 413 */
372static int ext4_has_free_blocks(struct ext4_sb_info *sbi, 414static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
373 s64 nblocks, unsigned int flags) 415 s64 nclusters, unsigned int flags)
374{ 416{
375 s64 free_blocks, dirty_blocks, root_blocks; 417 s64 free_clusters, dirty_clusters, root_clusters;
376 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 418 struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
377 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; 419 struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
378 420
379 free_blocks = percpu_counter_read_positive(fbc); 421 free_clusters = percpu_counter_read_positive(fcc);
380 dirty_blocks = percpu_counter_read_positive(dbc); 422 dirty_clusters = percpu_counter_read_positive(dcc);
381 root_blocks = ext4_r_blocks_count(sbi->s_es); 423 root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
382 424
383 if (free_blocks - (nblocks + root_blocks + dirty_blocks) < 425 if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
384 EXT4_FREEBLOCKS_WATERMARK) { 426 EXT4_FREECLUSTERS_WATERMARK) {
385 free_blocks = percpu_counter_sum_positive(fbc); 427 free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
386 dirty_blocks = percpu_counter_sum_positive(dbc); 428 dirty_clusters = percpu_counter_sum_positive(dcc);
387 } 429 }
388 /* Check whether we have space after 430 /* Check whether we have space after accounting for current
389 * accounting for current dirty blocks & root reserved blocks. 431 * dirty clusters & root reserved clusters.
390 */ 432 */
391 if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks)) 433 if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
392 return 1; 434 return 1;
393 435
394 /* Hm, nope. Are (enough) root reserved blocks available? */ 436 /* Hm, nope. Are (enough) root reserved clusters available? */
395 if (sbi->s_resuid == current_fsuid() || 437 if (sbi->s_resuid == current_fsuid() ||
396 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || 438 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
397 capable(CAP_SYS_RESOURCE) || 439 capable(CAP_SYS_RESOURCE) ||
398 (flags & EXT4_MB_USE_ROOT_BLOCKS)) { 440 (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
399 441
400 if (free_blocks >= (nblocks + dirty_blocks)) 442 if (free_clusters >= (nclusters + dirty_clusters))
401 return 1; 443 return 1;
402 } 444 }
403 445
404 return 0; 446 return 0;
405} 447}
406 448
407int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 449int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
408 s64 nblocks, unsigned int flags) 450 s64 nclusters, unsigned int flags)
409{ 451{
410 if (ext4_has_free_blocks(sbi, nblocks, flags)) { 452 if (ext4_has_free_clusters(sbi, nclusters, flags)) {
411 percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); 453 percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
412 return 0; 454 return 0;
413 } else 455 } else
414 return -ENOSPC; 456 return -ENOSPC;
@@ -428,7 +470,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
428 */ 470 */
429int ext4_should_retry_alloc(struct super_block *sb, int *retries) 471int ext4_should_retry_alloc(struct super_block *sb, int *retries)
430{ 472{
431 if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || 473 if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
432 (*retries)++ > 3 || 474 (*retries)++ > 3 ||
433 !EXT4_SB(sb)->s_journal) 475 !EXT4_SB(sb)->s_journal)
434 return 0; 476 return 0;
@@ -444,7 +486,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
444 * @handle: handle to this transaction 486 * @handle: handle to this transaction
445 * @inode: file inode 487 * @inode: file inode
446 * @goal: given target block(filesystem wide) 488 * @goal: given target block(filesystem wide)
447 * @count: pointer to total number of blocks needed 489 * @count: pointer to total number of clusters needed
448 * @errp: error code 490 * @errp: error code
449 * 491 *
450 * Return 1st allocated block number on success, *count stores total account 492 * Return 1st allocated block number on success, *count stores total account
@@ -476,18 +518,19 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
476 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 518 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
477 EXT4_I(inode)->i_allocated_meta_blocks += ar.len; 519 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
478 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 520 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
479 dquot_alloc_block_nofail(inode, ar.len); 521 dquot_alloc_block_nofail(inode,
522 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
480 } 523 }
481 return ret; 524 return ret;
482} 525}
483 526
484/** 527/**
485 * ext4_count_free_blocks() -- count filesystem free blocks 528 * ext4_count_free_clusters() -- count filesystem free clusters
486 * @sb: superblock 529 * @sb: superblock
487 * 530 *
488 * Adds up the number of free blocks from each block group. 531 * Adds up the number of free clusters from each block group.
489 */ 532 */
490ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) 533ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
491{ 534{
492 ext4_fsblk_t desc_count; 535 ext4_fsblk_t desc_count;
493 struct ext4_group_desc *gdp; 536 struct ext4_group_desc *gdp;
@@ -508,7 +551,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
508 gdp = ext4_get_group_desc(sb, i, NULL); 551 gdp = ext4_get_group_desc(sb, i, NULL);
509 if (!gdp) 552 if (!gdp)
510 continue; 553 continue;
511 desc_count += ext4_free_blks_count(sb, gdp); 554 desc_count += ext4_free_group_clusters(sb, gdp);
512 brelse(bitmap_bh); 555 brelse(bitmap_bh);
513 bitmap_bh = ext4_read_block_bitmap(sb, i); 556 bitmap_bh = ext4_read_block_bitmap(sb, i);
514 if (bitmap_bh == NULL) 557 if (bitmap_bh == NULL)
@@ -516,12 +559,13 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
516 559
517 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 560 x = ext4_count_free(bitmap_bh, sb->s_blocksize);
518 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", 561 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
519 i, ext4_free_blks_count(sb, gdp), x); 562 i, ext4_free_group_clusters(sb, gdp), x);
520 bitmap_count += x; 563 bitmap_count += x;
521 } 564 }
522 brelse(bitmap_bh); 565 brelse(bitmap_bh);
523 printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" 566 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
524 ", computed = %llu, %llu\n", ext4_free_blocks_count(es), 567 ", computed = %llu, %llu\n",
568 EXT4_B2C(sbi, ext4_free_blocks_count(es)),
525 desc_count, bitmap_count); 569 desc_count, bitmap_count);
526 return bitmap_count; 570 return bitmap_count;
527#else 571#else
@@ -530,7 +574,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
530 gdp = ext4_get_group_desc(sb, i, NULL); 574 gdp = ext4_get_group_desc(sb, i, NULL);
531 if (!gdp) 575 if (!gdp)
532 continue; 576 continue;
533 desc_count += ext4_free_blks_count(sb, gdp); 577 desc_count += ext4_free_group_clusters(sb, gdp);
534 } 578 }
535 579
536 return desc_count; 580 return desc_count;
@@ -620,6 +664,31 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
620 664
621} 665}
622 666
667/*
668 * This function returns the number of file system metadata clusters at
669 * the beginning of a block group, including the reserved gdt blocks.
670 */
671unsigned ext4_num_base_meta_clusters(struct super_block *sb,
672 ext4_group_t block_group)
673{
674 struct ext4_sb_info *sbi = EXT4_SB(sb);
675 unsigned num;
676
677 /* Check for superblock and gdt backups in this group */
678 num = ext4_bg_has_super(sb, block_group);
679
680 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
681 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
682 sbi->s_desc_per_block) {
683 if (num) {
684 num += ext4_bg_num_gdb(sb, block_group);
685 num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
686 }
687 } else { /* For META_BG_BLOCK_GROUPS */
688 num += ext4_bg_num_gdb(sb, block_group);
689 }
690 return EXT4_NUM_B2C(sbi, num);
691}
623/** 692/**
624 * ext4_inode_to_goal_block - return a hint for block allocation 693 * ext4_inode_to_goal_block - return a hint for block allocation
625 * @inode: inode for block allocation 694 * @inode: inode for block allocation
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cec3145e532c..5b0e26a1272d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -144,9 +144,17 @@ struct ext4_allocation_request {
144#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) 144#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
145#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) 145#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
146#define EXT4_MAP_UNINIT (1 << BH_Uninit) 146#define EXT4_MAP_UNINIT (1 << BH_Uninit)
147/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
148 * ext4_map_blocks wants to know whether or not the underlying cluster has
149 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
150 * the requested mapping was from previously mapped (or delayed allocated)
151 * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
152 * should never appear on buffer_head's state flags.
153 */
154#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
147#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 155#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
148 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ 156 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
149 EXT4_MAP_UNINIT) 157 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
150 158
151struct ext4_map_blocks { 159struct ext4_map_blocks {
152 ext4_fsblk_t m_pblk; 160 ext4_fsblk_t m_pblk;
@@ -239,8 +247,11 @@ struct ext4_io_submit {
239# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) 247# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
240#endif 248#endif
241#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) 249#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
250#define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \
251 EXT4_SB(s)->s_cluster_bits)
242#ifdef __KERNEL__ 252#ifdef __KERNEL__
243# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) 253# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
254# define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits)
244#else 255#else
245# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) 256# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
246#endif 257#endif
@@ -258,6 +269,14 @@ struct ext4_io_submit {
258#endif 269#endif
259#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) 270#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
260 271
272/* Translate a block number to a cluster number */
273#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
274/* Translate a cluster number to a block number */
275#define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
276/* Translate # of blks to # of clusters */
277#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
278 (sbi)->s_cluster_bits)
279
261/* 280/*
262 * Structure of a blocks group descriptor 281 * Structure of a blocks group descriptor
263 */ 282 */
@@ -289,7 +308,7 @@ struct ext4_group_desc
289 308
290struct flex_groups { 309struct flex_groups {
291 atomic_t free_inodes; 310 atomic_t free_inodes;
292 atomic_t free_blocks; 311 atomic_t free_clusters;
293 atomic_t used_dirs; 312 atomic_t used_dirs;
294}; 313};
295 314
@@ -306,6 +325,7 @@ struct flex_groups {
306#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) 325#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
307#ifdef __KERNEL__ 326#ifdef __KERNEL__
308# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) 327# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
328# define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group)
309# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) 329# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
310# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) 330# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
311# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) 331# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
@@ -358,8 +378,7 @@ struct flex_groups {
358 378
359/* Flags that should be inherited by new inodes from their parent. */ 379/* Flags that should be inherited by new inodes from their parent. */
360#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ 380#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
361 EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ 381 EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
362 EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
363 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ 382 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
364 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) 383 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
365 384
@@ -520,6 +539,8 @@ struct ext4_new_group_data {
520#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 539#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
521 /* Don't normalize allocation size (used for fallocate) */ 540 /* Don't normalize allocation size (used for fallocate) */
522#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 541#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
542 /* Request will not result in inode size update (user for fallocate) */
543#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
523 544
524/* 545/*
525 * Flags used by ext4_free_blocks 546 * Flags used by ext4_free_blocks
@@ -528,6 +549,13 @@ struct ext4_new_group_data {
528#define EXT4_FREE_BLOCKS_FORGET 0x0002 549#define EXT4_FREE_BLOCKS_FORGET 0x0002
529#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 550#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
530#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 551#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
552#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
553#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
554
555/*
556 * Flags used by ext4_discard_partial_page_buffers
557 */
558#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
531 559
532/* 560/*
533 * ioctl commands 561 * ioctl commands
@@ -538,9 +566,6 @@ struct ext4_new_group_data {
538#define EXT4_IOC_SETVERSION _IOW('f', 4, long) 566#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
539#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION 567#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
540#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION 568#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
541#ifdef CONFIG_JBD2_DEBUG
542#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
543#endif
544#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) 569#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
545#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) 570#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
546#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) 571#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
@@ -563,9 +588,6 @@ struct ext4_new_group_data {
563#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) 588#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
564#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) 589#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
565#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) 590#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
566#ifdef CONFIG_JBD2_DEBUG
567#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
568#endif
569#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION 591#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
570#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION 592#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
571#endif 593#endif
@@ -837,6 +859,7 @@ struct ext4_inode_info {
837 ext4_group_t i_last_alloc_group; 859 ext4_group_t i_last_alloc_group;
838 860
839 /* allocation reservation info for delalloc */ 861 /* allocation reservation info for delalloc */
862 /* In case of bigalloc, these refer to clusters rather than blocks */
840 unsigned int i_reserved_data_blocks; 863 unsigned int i_reserved_data_blocks;
841 unsigned int i_reserved_meta_blocks; 864 unsigned int i_reserved_meta_blocks;
842 unsigned int i_allocated_meta_blocks; 865 unsigned int i_allocated_meta_blocks;
@@ -886,7 +909,6 @@ struct ext4_inode_info {
886/* 909/*
887 * Mount flags 910 * Mount flags
888 */ 911 */
889#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
890#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 912#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
891#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 913#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
892#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 914#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@@ -918,6 +940,9 @@ struct ext4_inode_info {
918#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 940#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
919#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ 941#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
920 942
943#define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
944 specified delalloc */
945
921#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ 946#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
922 ~EXT4_MOUNT_##opt 947 ~EXT4_MOUNT_##opt
923#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ 948#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
@@ -968,9 +993,9 @@ struct ext4_super_block {
968/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ 993/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
969 __le32 s_first_data_block; /* First Data Block */ 994 __le32 s_first_data_block; /* First Data Block */
970 __le32 s_log_block_size; /* Block size */ 995 __le32 s_log_block_size; /* Block size */
971 __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ 996 __le32 s_log_cluster_size; /* Allocation cluster size */
972/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ 997/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
973 __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ 998 __le32 s_clusters_per_group; /* # Clusters per group */
974 __le32 s_inodes_per_group; /* # Inodes per group */ 999 __le32 s_inodes_per_group; /* # Inodes per group */
975 __le32 s_mtime; /* Mount time */ 1000 __le32 s_mtime; /* Mount time */
976/*30*/ __le32 s_wtime; /* Write time */ 1001/*30*/ __le32 s_wtime; /* Write time */
@@ -1066,7 +1091,10 @@ struct ext4_super_block {
1066 __u8 s_last_error_func[32]; /* function where the error happened */ 1091 __u8 s_last_error_func[32]; /* function where the error happened */
1067#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) 1092#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
1068 __u8 s_mount_opts[64]; 1093 __u8 s_mount_opts[64];
1069 __le32 s_reserved[112]; /* Padding to the end of the block */ 1094 __le32 s_usr_quota_inum; /* inode for tracking user quota */
1095 __le32 s_grp_quota_inum; /* inode for tracking group quota */
1096 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
1097 __le32 s_reserved[109]; /* Padding to the end of the block */
1070}; 1098};
1071 1099
1072#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) 1100#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -1086,6 +1114,7 @@ struct ext4_sb_info {
1086 unsigned long s_desc_size; /* Size of a group descriptor in bytes */ 1114 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
1087 unsigned long s_inodes_per_block;/* Number of inodes per block */ 1115 unsigned long s_inodes_per_block;/* Number of inodes per block */
1088 unsigned long s_blocks_per_group;/* Number of blocks in a group */ 1116 unsigned long s_blocks_per_group;/* Number of blocks in a group */
1117 unsigned long s_clusters_per_group; /* Number of clusters in a group */
1089 unsigned long s_inodes_per_group;/* Number of inodes in a group */ 1118 unsigned long s_inodes_per_group;/* Number of inodes in a group */
1090 unsigned long s_itb_per_group; /* Number of inode table blocks per group */ 1119 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
1091 unsigned long s_gdb_count; /* Number of group descriptor blocks */ 1120 unsigned long s_gdb_count; /* Number of group descriptor blocks */
@@ -1094,6 +1123,8 @@ struct ext4_sb_info {
1094 ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ 1123 ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
1095 unsigned long s_overhead_last; /* Last calculated overhead */ 1124 unsigned long s_overhead_last; /* Last calculated overhead */
1096 unsigned long s_blocks_last; /* Last seen block count */ 1125 unsigned long s_blocks_last; /* Last seen block count */
1126 unsigned int s_cluster_ratio; /* Number of blocks per cluster */
1127 unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
1097 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ 1128 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
1098 struct buffer_head * s_sbh; /* Buffer containing the super block */ 1129 struct buffer_head * s_sbh; /* Buffer containing the super block */
1099 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 1130 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
@@ -1117,10 +1148,10 @@ struct ext4_sb_info {
1117 u32 s_hash_seed[4]; 1148 u32 s_hash_seed[4];
1118 int s_def_hash_version; 1149 int s_def_hash_version;
1119 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ 1150 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
1120 struct percpu_counter s_freeblocks_counter; 1151 struct percpu_counter s_freeclusters_counter;
1121 struct percpu_counter s_freeinodes_counter; 1152 struct percpu_counter s_freeinodes_counter;
1122 struct percpu_counter s_dirs_counter; 1153 struct percpu_counter s_dirs_counter;
1123 struct percpu_counter s_dirtyblocks_counter; 1154 struct percpu_counter s_dirtyclusters_counter;
1124 struct blockgroup_lock *s_blockgroup_lock; 1155 struct blockgroup_lock *s_blockgroup_lock;
1125 struct proc_dir_entry *s_proc; 1156 struct proc_dir_entry *s_proc;
1126 struct kobject s_kobj; 1157 struct kobject s_kobj;
@@ -1136,10 +1167,6 @@ struct ext4_sb_info {
1136 u32 s_max_batch_time; 1167 u32 s_max_batch_time;
1137 u32 s_min_batch_time; 1168 u32 s_min_batch_time;
1138 struct block_device *journal_bdev; 1169 struct block_device *journal_bdev;
1139#ifdef CONFIG_JBD2_DEBUG
1140 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
1141 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
1142#endif
1143#ifdef CONFIG_QUOTA 1170#ifdef CONFIG_QUOTA
1144 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ 1171 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
1145 int s_jquota_fmt; /* Format of quota to use */ 1172 int s_jquota_fmt; /* Format of quota to use */
@@ -1248,6 +1275,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1248 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); 1275 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
1249} 1276}
1250 1277
1278static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1279 struct ext4_io_end *io_end)
1280{
1281 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1282 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1283 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
1284 }
1285}
1286
1251/* 1287/*
1252 * Inode dynamic state flags 1288 * Inode dynamic state flags
1253 */ 1289 */
@@ -1360,6 +1396,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1360#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 1396#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
1361#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1397#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
1362#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1398#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
1399#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
1363 1400
1364#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1401#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1365#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1402#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1402,7 +1439,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1402 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ 1439 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
1403 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ 1440 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
1404 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ 1441 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
1405 EXT4_FEATURE_RO_COMPAT_HUGE_FILE) 1442 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
1443 EXT4_FEATURE_RO_COMPAT_BIGALLOC)
1406 1444
1407/* 1445/*
1408 * Default values for user and/or group using reserved blocks 1446 * Default values for user and/or group using reserved blocks
@@ -1735,9 +1773,9 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1735 unsigned int flags, 1773 unsigned int flags,
1736 unsigned long *count, 1774 unsigned long *count,
1737 int *errp); 1775 int *errp);
1738extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 1776extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
1739 s64 nblocks, unsigned int flags); 1777 s64 nclusters, unsigned int flags);
1740extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1778extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
1741extern void ext4_check_blocks_bitmap(struct super_block *); 1779extern void ext4_check_blocks_bitmap(struct super_block *);
1742extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1780extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1743 ext4_group_t block_group, 1781 ext4_group_t block_group,
@@ -1745,12 +1783,18 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1745extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1783extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1746struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1784struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1747 ext4_group_t block_group); 1785 ext4_group_t block_group);
1748extern unsigned ext4_init_block_bitmap(struct super_block *sb, 1786extern void ext4_init_block_bitmap(struct super_block *sb,
1749 struct buffer_head *bh, 1787 struct buffer_head *bh,
1750 ext4_group_t group, 1788 ext4_group_t group,
1751 struct ext4_group_desc *desc); 1789 struct ext4_group_desc *desc);
1752#define ext4_free_blocks_after_init(sb, group, desc) \ 1790extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
1753 ext4_init_block_bitmap(sb, NULL, group, desc) 1791 ext4_group_t block_group,
1792 struct ext4_group_desc *gdp);
1793extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
1794 ext4_group_t block_group);
1795extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
1796 ext4_group_t block_group,
1797 struct ext4_group_desc *gdp);
1754ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); 1798ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
1755 1799
1756/* dir.c */ 1800/* dir.c */
@@ -1776,7 +1820,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1776 1820
1777/* ialloc.c */ 1821/* ialloc.c */
1778extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, 1822extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
1779 const struct qstr *qstr, __u32 goal); 1823 const struct qstr *qstr, __u32 goal,
1824 uid_t *owner);
1780extern void ext4_free_inode(handle_t *, struct inode *); 1825extern void ext4_free_inode(handle_t *, struct inode *);
1781extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 1826extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1782extern unsigned long ext4_count_free_inodes(struct super_block *); 1827extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1839,6 +1884,12 @@ extern int ext4_block_truncate_page(handle_t *handle,
1839 struct address_space *mapping, loff_t from); 1884 struct address_space *mapping, loff_t from);
1840extern int ext4_block_zero_page_range(handle_t *handle, 1885extern int ext4_block_zero_page_range(handle_t *handle,
1841 struct address_space *mapping, loff_t from, loff_t length); 1886 struct address_space *mapping, loff_t from, loff_t length);
1887extern int ext4_discard_partial_page_buffers(handle_t *handle,
1888 struct address_space *mapping, loff_t from,
1889 loff_t length, int flags);
1890extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
1891 struct inode *inode, struct page *page, loff_t from,
1892 loff_t length, int flags);
1842extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1893extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1843extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1894extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1844extern void ext4_da_update_reserve_space(struct inode *inode, 1895extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1927,8 +1978,8 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
1927 struct ext4_group_desc *bg); 1978 struct ext4_group_desc *bg);
1928extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, 1979extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
1929 struct ext4_group_desc *bg); 1980 struct ext4_group_desc *bg);
1930extern __u32 ext4_free_blks_count(struct super_block *sb, 1981extern __u32 ext4_free_group_clusters(struct super_block *sb,
1931 struct ext4_group_desc *bg); 1982 struct ext4_group_desc *bg);
1932extern __u32 ext4_free_inodes_count(struct super_block *sb, 1983extern __u32 ext4_free_inodes_count(struct super_block *sb,
1933 struct ext4_group_desc *bg); 1984 struct ext4_group_desc *bg);
1934extern __u32 ext4_used_dirs_count(struct super_block *sb, 1985extern __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -1941,8 +1992,9 @@ extern void ext4_inode_bitmap_set(struct super_block *sb,
1941 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1992 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1942extern void ext4_inode_table_set(struct super_block *sb, 1993extern void ext4_inode_table_set(struct super_block *sb,
1943 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1994 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1944extern void ext4_free_blks_set(struct super_block *sb, 1995extern void ext4_free_group_clusters_set(struct super_block *sb,
1945 struct ext4_group_desc *bg, __u32 count); 1996 struct ext4_group_desc *bg,
1997 __u32 count);
1946extern void ext4_free_inodes_set(struct super_block *sb, 1998extern void ext4_free_inodes_set(struct super_block *sb,
1947 struct ext4_group_desc *bg, __u32 count); 1999 struct ext4_group_desc *bg, __u32 count);
1948extern void ext4_used_dirs_set(struct super_block *sb, 2000extern void ext4_used_dirs_set(struct super_block *sb,
@@ -2051,13 +2103,13 @@ do { \
2051} while (0) 2103} while (0)
2052 2104
2053#ifdef CONFIG_SMP 2105#ifdef CONFIG_SMP
2054/* Each CPU can accumulate percpu_counter_batch blocks in their local 2106/* Each CPU can accumulate percpu_counter_batch clusters in their local
2055 * counters. So we need to make sure we have free blocks more 2107 * counters. So we need to make sure we have free clusters more
2056 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. 2108 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
2057 */ 2109 */
2058#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) 2110#define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
2059#else 2111#else
2060#define EXT4_FREEBLOCKS_WATERMARK 0 2112#define EXT4_FREECLUSTERS_WATERMARK 0
2061#endif 2113#endif
2062 2114
2063static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) 2115static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@@ -2243,10 +2295,19 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
2243enum ext4_state_bits { 2295enum ext4_state_bits {
2244 BH_Uninit /* blocks are allocated but uninitialized on disk */ 2296 BH_Uninit /* blocks are allocated but uninitialized on disk */
2245 = BH_JBDPrivateStart, 2297 = BH_JBDPrivateStart,
2298 BH_AllocFromCluster, /* allocated blocks were part of already
2299 * allocated cluster. Note that this flag will
2300 * never, ever appear in a buffer_head's state
2301 * flag. See EXT4_MAP_FROM_CLUSTER to see where
2302 * this is used. */
2303 BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This
2304 * flag is set when ext4_map_blocks is called on a
2305 * delayed allocated block to get its real mapping. */
2246}; 2306};
2247 2307
2248BUFFER_FNS(Uninit, uninit) 2308BUFFER_FNS(Uninit, uninit)
2249TAS_BUFFER_FNS(Uninit, uninit) 2309TAS_BUFFER_FNS(Uninit, uninit)
2310BUFFER_FNS(Da_Mapped, da_mapped)
2250 2311
2251/* 2312/*
2252 * Add new method to test wether block and inode bitmaps are properly 2313 * Add new method to test wether block and inode bitmaps are properly
@@ -2282,4 +2343,6 @@ extern void ext4_resize_end(struct super_block *sb);
2282 2343
2283#endif /* __KERNEL__ */ 2344#endif /* __KERNEL__ */
2284 2345
2346#include "ext4_extents.h"
2347
2285#endif /* _EXT4_H */ 2348#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 095c36f3b612..a52db3a69a30 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
290 struct ext4_ext_path *); 290 struct ext4_ext_path *);
291extern void ext4_ext_drop_refs(struct ext4_ext_path *); 291extern void ext4_ext_drop_refs(struct ext4_ext_path *);
292extern int ext4_ext_check_inode(struct inode *inode); 292extern int ext4_ext_check_inode(struct inode *inode);
293extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
294 int search_hint_reverse);
293#endif /* _EXT4_EXTENTS */ 295#endif /* _EXT4_EXTENTS */
294 296
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index f5240aa15601..aca179017582 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -109,9 +109,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
109 109
110 if (ext4_handle_valid(handle)) { 110 if (ext4_handle_valid(handle)) {
111 err = jbd2_journal_dirty_metadata(handle, bh); 111 err = jbd2_journal_dirty_metadata(handle, bh);
112 if (err) 112 if (err) {
113 ext4_journal_abort_handle(where, line, __func__, 113 /* Errors can only happen if there is a bug */
114 bh, handle, err); 114 handle->h_err = err;
115 __ext4_journal_stop(where, line, handle);
116 }
115 } else { 117 } else {
116 if (inode) 118 if (inode)
117 mark_buffer_dirty_inode(bh, inode); 119 mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 57cf568a98ab..61fa9e1614af 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -42,7 +42,6 @@
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include <linux/fiemap.h> 43#include <linux/fiemap.h>
44#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
45#include "ext4_extents.h"
46 45
47#include <trace/events/ext4.h> 46#include <trace/events/ext4.h>
48 47
@@ -96,13 +95,17 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
96 * - ENOMEM 95 * - ENOMEM
97 * - EIO 96 * - EIO
98 */ 97 */
99static int ext4_ext_dirty(handle_t *handle, struct inode *inode, 98#define ext4_ext_dirty(handle, inode, path) \
100 struct ext4_ext_path *path) 99 __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
100static int __ext4_ext_dirty(const char *where, unsigned int line,
101 handle_t *handle, struct inode *inode,
102 struct ext4_ext_path *path)
101{ 103{
102 int err; 104 int err;
103 if (path->p_bh) { 105 if (path->p_bh) {
104 /* path points to block */ 106 /* path points to block */
105 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); 107 err = __ext4_handle_dirty_metadata(where, line, handle,
108 inode, path->p_bh);
106 } else { 109 } else {
107 /* path points to leaf/index in inode body */ 110 /* path points to leaf/index in inode body */
108 err = ext4_mark_inode_dirty(handle, inode); 111 err = ext4_mark_inode_dirty(handle, inode);
@@ -114,11 +117,9 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
114 struct ext4_ext_path *path, 117 struct ext4_ext_path *path,
115 ext4_lblk_t block) 118 ext4_lblk_t block)
116{ 119{
117 int depth;
118
119 if (path) { 120 if (path) {
121 int depth = path->p_depth;
120 struct ext4_extent *ex; 122 struct ext4_extent *ex;
121 depth = path->p_depth;
122 123
123 /* 124 /*
124 * Try to predict block placement assuming that we are 125 * Try to predict block placement assuming that we are
@@ -180,12 +181,10 @@ static inline int ext4_ext_space_block(struct inode *inode, int check)
180 181
181 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 182 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
182 / sizeof(struct ext4_extent); 183 / sizeof(struct ext4_extent);
183 if (!check) {
184#ifdef AGGRESSIVE_TEST 184#ifdef AGGRESSIVE_TEST
185 if (size > 6) 185 if (!check && size > 6)
186 size = 6; 186 size = 6;
187#endif 187#endif
188 }
189 return size; 188 return size;
190} 189}
191 190
@@ -195,12 +194,10 @@ static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
195 194
196 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 195 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
197 / sizeof(struct ext4_extent_idx); 196 / sizeof(struct ext4_extent_idx);
198 if (!check) {
199#ifdef AGGRESSIVE_TEST 197#ifdef AGGRESSIVE_TEST
200 if (size > 5) 198 if (!check && size > 5)
201 size = 5; 199 size = 5;
202#endif 200#endif
203 }
204 return size; 201 return size;
205} 202}
206 203
@@ -211,12 +208,10 @@ static inline int ext4_ext_space_root(struct inode *inode, int check)
211 size = sizeof(EXT4_I(inode)->i_data); 208 size = sizeof(EXT4_I(inode)->i_data);
212 size -= sizeof(struct ext4_extent_header); 209 size -= sizeof(struct ext4_extent_header);
213 size /= sizeof(struct ext4_extent); 210 size /= sizeof(struct ext4_extent);
214 if (!check) {
215#ifdef AGGRESSIVE_TEST 211#ifdef AGGRESSIVE_TEST
216 if (size > 3) 212 if (!check && size > 3)
217 size = 3; 213 size = 3;
218#endif 214#endif
219 }
220 return size; 215 return size;
221} 216}
222 217
@@ -227,12 +222,10 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
227 size = sizeof(EXT4_I(inode)->i_data); 222 size = sizeof(EXT4_I(inode)->i_data);
228 size -= sizeof(struct ext4_extent_header); 223 size -= sizeof(struct ext4_extent_header);
229 size /= sizeof(struct ext4_extent_idx); 224 size /= sizeof(struct ext4_extent_idx);
230 if (!check) {
231#ifdef AGGRESSIVE_TEST 225#ifdef AGGRESSIVE_TEST
232 if (size > 4) 226 if (!check && size > 4)
233 size = 4; 227 size = 4;
234#endif 228#endif
235 }
236 return size; 229 return size;
237} 230}
238 231
@@ -244,7 +237,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
244int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) 237int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
245{ 238{
246 struct ext4_inode_info *ei = EXT4_I(inode); 239 struct ext4_inode_info *ei = EXT4_I(inode);
247 int idxs, num = 0; 240 int idxs;
248 241
249 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 242 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
250 / sizeof(struct ext4_extent_idx)); 243 / sizeof(struct ext4_extent_idx));
@@ -259,6 +252,8 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
259 */ 252 */
260 if (ei->i_da_metadata_calc_len && 253 if (ei->i_da_metadata_calc_len &&
261 ei->i_da_metadata_calc_last_lblock+1 == lblock) { 254 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
255 int num = 0;
256
262 if ((ei->i_da_metadata_calc_len % idxs) == 0) 257 if ((ei->i_da_metadata_calc_len % idxs) == 0)
263 num++; 258 num++;
264 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) 259 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
@@ -321,8 +316,6 @@ static int ext4_valid_extent_entries(struct inode *inode,
321 struct ext4_extent_header *eh, 316 struct ext4_extent_header *eh,
322 int depth) 317 int depth)
323{ 318{
324 struct ext4_extent *ext;
325 struct ext4_extent_idx *ext_idx;
326 unsigned short entries; 319 unsigned short entries;
327 if (eh->eh_entries == 0) 320 if (eh->eh_entries == 0)
328 return 1; 321 return 1;
@@ -331,7 +324,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
331 324
332 if (depth == 0) { 325 if (depth == 0) {
333 /* leaf entries */ 326 /* leaf entries */
334 ext = EXT_FIRST_EXTENT(eh); 327 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
335 while (entries) { 328 while (entries) {
336 if (!ext4_valid_extent(inode, ext)) 329 if (!ext4_valid_extent(inode, ext))
337 return 0; 330 return 0;
@@ -339,7 +332,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
339 entries--; 332 entries--;
340 } 333 }
341 } else { 334 } else {
342 ext_idx = EXT_FIRST_INDEX(eh); 335 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
343 while (entries) { 336 while (entries) {
344 if (!ext4_valid_extent_idx(inode, ext_idx)) 337 if (!ext4_valid_extent_idx(inode, ext_idx))
345 return 0; 338 return 0;
@@ -751,31 +744,30 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
751 return -EIO; 744 return -EIO;
752 } 745 }
753 746
754 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
755 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 747 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
756 /* insert after */ 748 /* insert after */
757 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { 749 ext_debug("insert new index %d after: %llu\n", logical, ptr);
758 len = (len - 1) * sizeof(struct ext4_extent_idx);
759 len = len < 0 ? 0 : len;
760 ext_debug("insert new index %d after: %llu. "
761 "move %d from 0x%p to 0x%p\n",
762 logical, ptr, len,
763 (curp->p_idx + 1), (curp->p_idx + 2));
764 memmove(curp->p_idx + 2, curp->p_idx + 1, len);
765 }
766 ix = curp->p_idx + 1; 750 ix = curp->p_idx + 1;
767 } else { 751 } else {
768 /* insert before */ 752 /* insert before */
769 len = len * sizeof(struct ext4_extent_idx); 753 ext_debug("insert new index %d before: %llu\n", logical, ptr);
770 len = len < 0 ? 0 : len;
771 ext_debug("insert new index %d before: %llu. "
772 "move %d from 0x%p to 0x%p\n",
773 logical, ptr, len,
774 curp->p_idx, (curp->p_idx + 1));
775 memmove(curp->p_idx + 1, curp->p_idx, len);
776 ix = curp->p_idx; 754 ix = curp->p_idx;
777 } 755 }
778 756
757 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
758 BUG_ON(len < 0);
759 if (len > 0) {
760 ext_debug("insert new index %d: "
761 "move %d indices from 0x%p to 0x%p\n",
762 logical, len, ix, ix + 1);
763 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
764 }
765
766 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
767 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
768 return -EIO;
769 }
770
779 ix->ei_block = cpu_to_le32(logical); 771 ix->ei_block = cpu_to_le32(logical);
780 ext4_idx_store_pblock(ix, ptr); 772 ext4_idx_store_pblock(ix, ptr);
781 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 773 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
@@ -1042,16 +1034,14 @@ cleanup:
1042 */ 1034 */
1043static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1035static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1044 unsigned int flags, 1036 unsigned int flags,
1045 struct ext4_ext_path *path,
1046 struct ext4_extent *newext) 1037 struct ext4_extent *newext)
1047{ 1038{
1048 struct ext4_ext_path *curp = path;
1049 struct ext4_extent_header *neh; 1039 struct ext4_extent_header *neh;
1050 struct buffer_head *bh; 1040 struct buffer_head *bh;
1051 ext4_fsblk_t newblock; 1041 ext4_fsblk_t newblock;
1052 int err = 0; 1042 int err = 0;
1053 1043
1054 newblock = ext4_ext_new_meta_block(handle, inode, path, 1044 newblock = ext4_ext_new_meta_block(handle, inode, NULL,
1055 newext, &err, flags); 1045 newext, &err, flags);
1056 if (newblock == 0) 1046 if (newblock == 0)
1057 return err; 1047 return err;
@@ -1071,7 +1061,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1071 } 1061 }
1072 1062
1073 /* move top-level index/leaf into new block */ 1063 /* move top-level index/leaf into new block */
1074 memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); 1064 memmove(bh->b_data, EXT4_I(inode)->i_data,
1065 sizeof(EXT4_I(inode)->i_data));
1075 1066
1076 /* set size of new block */ 1067 /* set size of new block */
1077 neh = ext_block_hdr(bh); 1068 neh = ext_block_hdr(bh);
@@ -1089,32 +1080,23 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1089 if (err) 1080 if (err)
1090 goto out; 1081 goto out;
1091 1082
1092 /* create index in new top-level index: num,max,pointer */ 1083 /* Update top-level index: num,max,pointer */
1093 err = ext4_ext_get_access(handle, inode, curp);
1094 if (err)
1095 goto out;
1096
1097 curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
1098 curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1099 curp->p_hdr->eh_entries = cpu_to_le16(1);
1100 curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
1101
1102 if (path[0].p_hdr->eh_depth)
1103 curp->p_idx->ei_block =
1104 EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
1105 else
1106 curp->p_idx->ei_block =
1107 EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
1108 ext4_idx_store_pblock(curp->p_idx, newblock);
1109
1110 neh = ext_inode_hdr(inode); 1084 neh = ext_inode_hdr(inode);
1085 neh->eh_entries = cpu_to_le16(1);
1086 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1087 if (neh->eh_depth == 0) {
1088 /* Root extent block becomes index block */
1089 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1090 EXT_FIRST_INDEX(neh)->ei_block =
1091 EXT_FIRST_EXTENT(neh)->ee_block;
1092 }
1111 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1093 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1112 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1094 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1113 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1095 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1114 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1096 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1115 1097
1116 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1098 neh->eh_depth = cpu_to_le16(neh->eh_depth + 1);
1117 err = ext4_ext_dirty(handle, inode, curp); 1099 ext4_mark_inode_dirty(handle, inode);
1118out: 1100out:
1119 brelse(bh); 1101 brelse(bh);
1120 1102
@@ -1162,8 +1144,7 @@ repeat:
1162 err = PTR_ERR(path); 1144 err = PTR_ERR(path);
1163 } else { 1145 } else {
1164 /* tree is full, time to grow in depth */ 1146 /* tree is full, time to grow in depth */
1165 err = ext4_ext_grow_indepth(handle, inode, flags, 1147 err = ext4_ext_grow_indepth(handle, inode, flags, newext);
1166 path, newext);
1167 if (err) 1148 if (err)
1168 goto out; 1149 goto out;
1169 1150
@@ -1235,9 +1216,9 @@ static int ext4_ext_search_left(struct inode *inode,
1235 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1216 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1236 EXT4_ERROR_INODE(inode, 1217 EXT4_ERROR_INODE(inode,
1237 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", 1218 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1238 ix != NULL ? ix->ei_block : 0, 1219 ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1239 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? 1220 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1240 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, 1221 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
1241 depth); 1222 depth);
1242 return -EIO; 1223 return -EIO;
1243 } 1224 }
@@ -1260,13 +1241,14 @@ static int ext4_ext_search_left(struct inode *inode,
1260/* 1241/*
1261 * search the closest allocated block to the right for *logical 1242 * search the closest allocated block to the right for *logical
1262 * and returns it at @logical + it's physical address at @phys 1243 * and returns it at @logical + it's physical address at @phys
1263 * if *logical is the smallest allocated block, the function 1244 * if *logical is the largest allocated block, the function
1264 * returns 0 at @phys 1245 * returns 0 at @phys
1265 * return value contains 0 (success) or error code 1246 * return value contains 0 (success) or error code
1266 */ 1247 */
1267static int ext4_ext_search_right(struct inode *inode, 1248static int ext4_ext_search_right(struct inode *inode,
1268 struct ext4_ext_path *path, 1249 struct ext4_ext_path *path,
1269 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1250 ext4_lblk_t *logical, ext4_fsblk_t *phys,
1251 struct ext4_extent **ret_ex)
1270{ 1252{
1271 struct buffer_head *bh = NULL; 1253 struct buffer_head *bh = NULL;
1272 struct ext4_extent_header *eh; 1254 struct ext4_extent_header *eh;
@@ -1308,9 +1290,7 @@ static int ext4_ext_search_right(struct inode *inode,
1308 return -EIO; 1290 return -EIO;
1309 } 1291 }
1310 } 1292 }
1311 *logical = le32_to_cpu(ex->ee_block); 1293 goto found_extent;
1312 *phys = ext4_ext_pblock(ex);
1313 return 0;
1314 } 1294 }
1315 1295
1316 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1296 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
@@ -1323,9 +1303,7 @@ static int ext4_ext_search_right(struct inode *inode,
1323 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1303 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1324 /* next allocated block in this leaf */ 1304 /* next allocated block in this leaf */
1325 ex++; 1305 ex++;
1326 *logical = le32_to_cpu(ex->ee_block); 1306 goto found_extent;
1327 *phys = ext4_ext_pblock(ex);
1328 return 0;
1329 } 1307 }
1330 1308
1331 /* go up and search for index to the right */ 1309 /* go up and search for index to the right */
@@ -1368,9 +1346,12 @@ got_index:
1368 return -EIO; 1346 return -EIO;
1369 } 1347 }
1370 ex = EXT_FIRST_EXTENT(eh); 1348 ex = EXT_FIRST_EXTENT(eh);
1349found_extent:
1371 *logical = le32_to_cpu(ex->ee_block); 1350 *logical = le32_to_cpu(ex->ee_block);
1372 *phys = ext4_ext_pblock(ex); 1351 *phys = ext4_ext_pblock(ex);
1373 put_bh(bh); 1352 *ret_ex = ex;
1353 if (bh)
1354 put_bh(bh);
1374 return 0; 1355 return 0;
1375} 1356}
1376 1357
@@ -1395,7 +1376,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1395 while (depth >= 0) { 1376 while (depth >= 0) {
1396 if (depth == path->p_depth) { 1377 if (depth == path->p_depth) {
1397 /* leaf */ 1378 /* leaf */
1398 if (path[depth].p_ext != 1379 if (path[depth].p_ext &&
1380 path[depth].p_ext !=
1399 EXT_LAST_EXTENT(path[depth].p_hdr)) 1381 EXT_LAST_EXTENT(path[depth].p_hdr))
1400 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1382 return le32_to_cpu(path[depth].p_ext[1].ee_block);
1401 } else { 1383 } else {
@@ -1623,7 +1605,8 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1623 * such that there will be no overlap, and then returns 1. 1605 * such that there will be no overlap, and then returns 1.
1624 * If there is no overlap found, it returns 0. 1606 * If there is no overlap found, it returns 0.
1625 */ 1607 */
1626static unsigned int ext4_ext_check_overlap(struct inode *inode, 1608static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1609 struct inode *inode,
1627 struct ext4_extent *newext, 1610 struct ext4_extent *newext,
1628 struct ext4_ext_path *path) 1611 struct ext4_ext_path *path)
1629{ 1612{
@@ -1637,6 +1620,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
1637 if (!path[depth].p_ext) 1620 if (!path[depth].p_ext)
1638 goto out; 1621 goto out;
1639 b2 = le32_to_cpu(path[depth].p_ext->ee_block); 1622 b2 = le32_to_cpu(path[depth].p_ext->ee_block);
1623 b2 &= ~(sbi->s_cluster_ratio - 1);
1640 1624
1641 /* 1625 /*
1642 * get the next allocated block if the extent in the path 1626 * get the next allocated block if the extent in the path
@@ -1646,6 +1630,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
1646 b2 = ext4_ext_next_allocated_block(path); 1630 b2 = ext4_ext_next_allocated_block(path);
1647 if (b2 == EXT_MAX_BLOCKS) 1631 if (b2 == EXT_MAX_BLOCKS)
1648 goto out; 1632 goto out;
1633 b2 &= ~(sbi->s_cluster_ratio - 1);
1649 } 1634 }
1650 1635
1651 /* check for wrap through zero on extent logical start block*/ 1636 /* check for wrap through zero on extent logical start block*/
@@ -1697,7 +1682,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1697 /* try to insert block into found extent and return */ 1682 /* try to insert block into found extent and return */
1698 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1683 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1699 && ext4_can_extents_be_merged(inode, ex, newext)) { 1684 && ext4_can_extents_be_merged(inode, ex, newext)) {
1700 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1685 ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
1701 ext4_ext_is_uninitialized(newext), 1686 ext4_ext_is_uninitialized(newext),
1702 ext4_ext_get_actual_len(newext), 1687 ext4_ext_get_actual_len(newext),
1703 le32_to_cpu(ex->ee_block), 1688 le32_to_cpu(ex->ee_block),
@@ -1735,7 +1720,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1735 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) 1720 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
1736 next = ext4_ext_next_leaf_block(path); 1721 next = ext4_ext_next_leaf_block(path);
1737 if (next != EXT_MAX_BLOCKS) { 1722 if (next != EXT_MAX_BLOCKS) {
1738 ext_debug("next leaf block - %d\n", next); 1723 ext_debug("next leaf block - %u\n", next);
1739 BUG_ON(npath != NULL); 1724 BUG_ON(npath != NULL);
1740 npath = ext4_ext_find_extent(inode, next, NULL); 1725 npath = ext4_ext_find_extent(inode, next, NULL);
1741 if (IS_ERR(npath)) 1726 if (IS_ERR(npath))
@@ -1773,46 +1758,51 @@ has_space:
1773 1758
1774 if (!nearex) { 1759 if (!nearex) {
1775 /* there is no extent in this leaf, create first one */ 1760 /* there is no extent in this leaf, create first one */
1776 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1761 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
1777 le32_to_cpu(newext->ee_block), 1762 le32_to_cpu(newext->ee_block),
1778 ext4_ext_pblock(newext), 1763 ext4_ext_pblock(newext),
1779 ext4_ext_is_uninitialized(newext), 1764 ext4_ext_is_uninitialized(newext),
1780 ext4_ext_get_actual_len(newext)); 1765 ext4_ext_get_actual_len(newext));
1781 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1766 nearex = EXT_FIRST_EXTENT(eh);
1782 } else if (le32_to_cpu(newext->ee_block) 1767 } else {
1768 if (le32_to_cpu(newext->ee_block)
1783 > le32_to_cpu(nearex->ee_block)) { 1769 > le32_to_cpu(nearex->ee_block)) {
1784/* BUG_ON(newext->ee_block == nearex->ee_block); */ 1770 /* Insert after */
1785 if (nearex != EXT_LAST_EXTENT(eh)) { 1771 ext_debug("insert %u:%llu:[%d]%d before: "
1786 len = EXT_MAX_EXTENT(eh) - nearex; 1772 "nearest %p\n",
1787 len = (len - 1) * sizeof(struct ext4_extent);
1788 len = len < 0 ? 0 : len;
1789 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
1790 "move %d from 0x%p to 0x%p\n",
1791 le32_to_cpu(newext->ee_block), 1773 le32_to_cpu(newext->ee_block),
1792 ext4_ext_pblock(newext), 1774 ext4_ext_pblock(newext),
1793 ext4_ext_is_uninitialized(newext), 1775 ext4_ext_is_uninitialized(newext),
1794 ext4_ext_get_actual_len(newext), 1776 ext4_ext_get_actual_len(newext),
1795 nearex, len, nearex + 1, nearex + 2); 1777 nearex);
1796 memmove(nearex + 2, nearex + 1, len); 1778 nearex++;
1779 } else {
1780 /* Insert before */
1781 BUG_ON(newext->ee_block == nearex->ee_block);
1782 ext_debug("insert %u:%llu:[%d]%d after: "
1783 "nearest %p\n",
1784 le32_to_cpu(newext->ee_block),
1785 ext4_ext_pblock(newext),
1786 ext4_ext_is_uninitialized(newext),
1787 ext4_ext_get_actual_len(newext),
1788 nearex);
1789 }
1790 len = EXT_LAST_EXTENT(eh) - nearex + 1;
1791 if (len > 0) {
1792 ext_debug("insert %u:%llu:[%d]%d: "
1793 "move %d extents from 0x%p to 0x%p\n",
1794 le32_to_cpu(newext->ee_block),
1795 ext4_ext_pblock(newext),
1796 ext4_ext_is_uninitialized(newext),
1797 ext4_ext_get_actual_len(newext),
1798 len, nearex, nearex + 1);
1799 memmove(nearex + 1, nearex,
1800 len * sizeof(struct ext4_extent));
1797 } 1801 }
1798 path[depth].p_ext = nearex + 1;
1799 } else {
1800 BUG_ON(newext->ee_block == nearex->ee_block);
1801 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1802 len = len < 0 ? 0 : len;
1803 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1804 "move %d from 0x%p to 0x%p\n",
1805 le32_to_cpu(newext->ee_block),
1806 ext4_ext_pblock(newext),
1807 ext4_ext_is_uninitialized(newext),
1808 ext4_ext_get_actual_len(newext),
1809 nearex, len, nearex, nearex + 1);
1810 memmove(nearex + 1, nearex, len);
1811 path[depth].p_ext = nearex;
1812 } 1802 }
1813 1803
1814 le16_add_cpu(&eh->eh_entries, 1); 1804 le16_add_cpu(&eh->eh_entries, 1);
1815 nearex = path[depth].p_ext; 1805 path[depth].p_ext = nearex;
1816 nearex->ee_block = newext->ee_block; 1806 nearex->ee_block = newext->ee_block;
1817 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1807 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1818 nearex->ee_len = newext->ee_len; 1808 nearex->ee_len = newext->ee_len;
@@ -1962,6 +1952,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1962 struct ext4_ext_cache *cex; 1952 struct ext4_ext_cache *cex;
1963 BUG_ON(len == 0); 1953 BUG_ON(len == 0);
1964 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1954 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1955 trace_ext4_ext_put_in_cache(inode, block, len, start);
1965 cex = &EXT4_I(inode)->i_cached_extent; 1956 cex = &EXT4_I(inode)->i_cached_extent;
1966 cex->ec_block = block; 1957 cex->ec_block = block;
1967 cex->ec_len = len; 1958 cex->ec_len = len;
@@ -2063,6 +2054,7 @@ errout:
2063 sbi->extent_cache_misses++; 2054 sbi->extent_cache_misses++;
2064 else 2055 else
2065 sbi->extent_cache_hits++; 2056 sbi->extent_cache_hits++;
2057 trace_ext4_ext_in_cache(inode, block, ret);
2066 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2058 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2067 return ret; 2059 return ret;
2068} 2060}
@@ -2130,6 +2122,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2130 if (err) 2122 if (err)
2131 return err; 2123 return err;
2132 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2124 ext_debug("index is empty, remove it, free block %llu\n", leaf);
2125 trace_ext4_ext_rm_idx(inode, leaf);
2126
2133 ext4_free_blocks(handle, inode, NULL, leaf, 1, 2127 ext4_free_blocks(handle, inode, NULL, leaf, 1,
2134 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2128 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2135 return err; 2129 return err;
@@ -2158,7 +2152,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2158 * need to account for leaf block credit 2152 * need to account for leaf block credit
2159 * 2153 *
2160 * bitmaps and block group descriptor blocks 2154 * bitmaps and block group descriptor blocks
2161 * and other metadat blocks still need to be 2155 * and other metadata blocks still need to be
2162 * accounted. 2156 * accounted.
2163 */ 2157 */
2164 /* 1 bitmap, 1 block group descriptor */ 2158 /* 1 bitmap, 1 block group descriptor */
@@ -2195,14 +2189,40 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
2195} 2189}
2196 2190
2197static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 2191static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2198 struct ext4_extent *ex, 2192 struct ext4_extent *ex,
2199 ext4_lblk_t from, ext4_lblk_t to) 2193 ext4_fsblk_t *partial_cluster,
2194 ext4_lblk_t from, ext4_lblk_t to)
2200{ 2195{
2196 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2201 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2197 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2198 ext4_fsblk_t pblk;
2202 int flags = EXT4_FREE_BLOCKS_FORGET; 2199 int flags = EXT4_FREE_BLOCKS_FORGET;
2203 2200
2204 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2201 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2205 flags |= EXT4_FREE_BLOCKS_METADATA; 2202 flags |= EXT4_FREE_BLOCKS_METADATA;
2203 /*
2204 * For bigalloc file systems, we never free a partial cluster
2205 * at the beginning of the extent. Instead, we make a note
2206 * that we tried freeing the cluster, and check to see if we
2207 * need to free it on a subsequent call to ext4_remove_blocks,
2208 * or at the end of the ext4_truncate() operation.
2209 */
2210 flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2211
2212 trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
2213 /*
2214 * If we have a partial cluster, and it's different from the
2215 * cluster of the last block, we need to explicitly free the
2216 * partial cluster here.
2217 */
2218 pblk = ext4_ext_pblock(ex) + ee_len - 1;
2219 if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
2220 ext4_free_blocks(handle, inode, NULL,
2221 EXT4_C2B(sbi, *partial_cluster),
2222 sbi->s_cluster_ratio, flags);
2223 *partial_cluster = 0;
2224 }
2225
2206#ifdef EXTENTS_STATS 2226#ifdef EXTENTS_STATS
2207 { 2227 {
2208 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2228 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2222,12 +2242,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2222 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { 2242 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
2223 /* tail removal */ 2243 /* tail removal */
2224 ext4_lblk_t num; 2244 ext4_lblk_t num;
2225 ext4_fsblk_t start;
2226 2245
2227 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2246 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2228 start = ext4_ext_pblock(ex) + ee_len - num; 2247 pblk = ext4_ext_pblock(ex) + ee_len - num;
2229 ext_debug("free last %u blocks starting %llu\n", num, start); 2248 ext_debug("free last %u blocks starting %llu\n", num, pblk);
2230 ext4_free_blocks(handle, inode, NULL, start, num, flags); 2249 ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2250 /*
2251 * If the block range to be freed didn't start at the
2252 * beginning of a cluster, and we removed the entire
2253 * extent, save the partial cluster here, since we
2254 * might need to delete if we determine that the
2255 * truncate operation has removed all of the blocks in
2256 * the cluster.
2257 */
2258 if (pblk & (sbi->s_cluster_ratio - 1) &&
2259 (ee_len == num))
2260 *partial_cluster = EXT4_B2C(sbi, pblk);
2261 else
2262 *partial_cluster = 0;
2231 } else if (from == le32_to_cpu(ex->ee_block) 2263 } else if (from == le32_to_cpu(ex->ee_block)
2232 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2264 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2233 /* head removal */ 2265 /* head removal */
@@ -2238,7 +2270,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2238 start = ext4_ext_pblock(ex); 2270 start = ext4_ext_pblock(ex);
2239 2271
2240 ext_debug("free first %u blocks starting %llu\n", num, start); 2272 ext_debug("free first %u blocks starting %llu\n", num, start);
2241 ext4_free_blocks(handle, inode, 0, start, num, flags); 2273 ext4_free_blocks(handle, inode, NULL, start, num, flags);
2242 2274
2243 } else { 2275 } else {
2244 printk(KERN_INFO "strange request: removal(2) " 2276 printk(KERN_INFO "strange request: removal(2) "
@@ -2262,19 +2294,19 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2262 */ 2294 */
2263static int 2295static int
2264ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2296ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2265 struct ext4_ext_path *path, ext4_lblk_t start, 2297 struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
2266 ext4_lblk_t end) 2298 ext4_lblk_t start, ext4_lblk_t end)
2267{ 2299{
2300 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2268 int err = 0, correct_index = 0; 2301 int err = 0, correct_index = 0;
2269 int depth = ext_depth(inode), credits; 2302 int depth = ext_depth(inode), credits;
2270 struct ext4_extent_header *eh; 2303 struct ext4_extent_header *eh;
2271 ext4_lblk_t a, b, block; 2304 ext4_lblk_t a, b;
2272 unsigned num; 2305 unsigned num;
2273 ext4_lblk_t ex_ee_block; 2306 ext4_lblk_t ex_ee_block;
2274 unsigned short ex_ee_len; 2307 unsigned short ex_ee_len;
2275 unsigned uninitialized = 0; 2308 unsigned uninitialized = 0;
2276 struct ext4_extent *ex; 2309 struct ext4_extent *ex;
2277 struct ext4_map_blocks map;
2278 2310
2279 /* the header must be checked already in ext4_ext_remove_space() */ 2311 /* the header must be checked already in ext4_ext_remove_space() */
2280 ext_debug("truncate since %u in leaf\n", start); 2312 ext_debug("truncate since %u in leaf\n", start);
@@ -2291,6 +2323,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2291 ex_ee_block = le32_to_cpu(ex->ee_block); 2323 ex_ee_block = le32_to_cpu(ex->ee_block);
2292 ex_ee_len = ext4_ext_get_actual_len(ex); 2324 ex_ee_len = ext4_ext_get_actual_len(ex);
2293 2325
2326 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
2327
2294 while (ex >= EXT_FIRST_EXTENT(eh) && 2328 while (ex >= EXT_FIRST_EXTENT(eh) &&
2295 ex_ee_block + ex_ee_len > start) { 2329 ex_ee_block + ex_ee_len > start) {
2296 2330
@@ -2315,86 +2349,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2315 ex_ee_block = le32_to_cpu(ex->ee_block); 2349 ex_ee_block = le32_to_cpu(ex->ee_block);
2316 ex_ee_len = ext4_ext_get_actual_len(ex); 2350 ex_ee_len = ext4_ext_get_actual_len(ex);
2317 continue; 2351 continue;
2318 } else if (a != ex_ee_block && 2352 } else if (b != ex_ee_block + ex_ee_len - 1) {
2319 b != ex_ee_block + ex_ee_len - 1) { 2353 EXT4_ERROR_INODE(inode," bad truncate %u:%u\n",
2320 /* 2354 start, end);
2321 * If this is a truncate, then this condition should 2355 err = -EIO;
2322 * never happen because at least one of the end points 2356 goto out;
2323 * needs to be on the edge of the extent.
2324 */
2325 if (end == EXT_MAX_BLOCKS - 1) {
2326 ext_debug(" bad truncate %u:%u\n",
2327 start, end);
2328 block = 0;
2329 num = 0;
2330 err = -EIO;
2331 goto out;
2332 }
2333 /*
2334 * else this is a hole punch, so the extent needs to
2335 * be split since neither edge of the hole is on the
2336 * extent edge
2337 */
2338 else{
2339 map.m_pblk = ext4_ext_pblock(ex);
2340 map.m_lblk = ex_ee_block;
2341 map.m_len = b - ex_ee_block;
2342
2343 err = ext4_split_extent(handle,
2344 inode, path, &map, 0,
2345 EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
2346 EXT4_GET_BLOCKS_PRE_IO);
2347
2348 if (err < 0)
2349 goto out;
2350
2351 ex_ee_len = ext4_ext_get_actual_len(ex);
2352
2353 b = ex_ee_block+ex_ee_len - 1 < end ?
2354 ex_ee_block+ex_ee_len - 1 : end;
2355
2356 /* Then remove tail of this extent */
2357 block = ex_ee_block;
2358 num = a - block;
2359 }
2360 } else if (a != ex_ee_block) { 2357 } else if (a != ex_ee_block) {
2361 /* remove tail of the extent */ 2358 /* remove tail of the extent */
2362 block = ex_ee_block; 2359 num = a - ex_ee_block;
2363 num = a - block;
2364 } else if (b != ex_ee_block + ex_ee_len - 1) {
2365 /* remove head of the extent */
2366 block = b;
2367 num = ex_ee_block + ex_ee_len - b;
2368
2369 /*
2370 * If this is a truncate, this condition
2371 * should never happen
2372 */
2373 if (end == EXT_MAX_BLOCKS - 1) {
2374 ext_debug(" bad truncate %u:%u\n",
2375 start, end);
2376 err = -EIO;
2377 goto out;
2378 }
2379 } else { 2360 } else {
2380 /* remove whole extent: excellent! */ 2361 /* remove whole extent: excellent! */
2381 block = ex_ee_block;
2382 num = 0; 2362 num = 0;
2383 if (a != ex_ee_block) {
2384 ext_debug(" bad truncate %u:%u\n",
2385 start, end);
2386 err = -EIO;
2387 goto out;
2388 }
2389
2390 if (b != ex_ee_block + ex_ee_len - 1) {
2391 ext_debug(" bad truncate %u:%u\n",
2392 start, end);
2393 err = -EIO;
2394 goto out;
2395 }
2396 } 2363 }
2397
2398 /* 2364 /*
2399 * 3 for leaf, sb, and inode plus 2 (bmap and group 2365 * 3 for leaf, sb, and inode plus 2 (bmap and group
2400 * descriptor) for each block group; assume two block 2366 * descriptor) for each block group; assume two block
@@ -2416,23 +2382,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2416 if (err) 2382 if (err)
2417 goto out; 2383 goto out;
2418 2384
2419 err = ext4_remove_blocks(handle, inode, ex, a, b); 2385 err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
2386 a, b);
2420 if (err) 2387 if (err)
2421 goto out; 2388 goto out;
2422 2389
2423 if (num == 0) { 2390 if (num == 0)
2424 /* this extent is removed; mark slot entirely unused */ 2391 /* this extent is removed; mark slot entirely unused */
2425 ext4_ext_store_pblock(ex, 0); 2392 ext4_ext_store_pblock(ex, 0);
2426 } else if (block != ex_ee_block) {
2427 /*
2428 * If this was a head removal, then we need to update
2429 * the physical block since it is now at a different
2430 * location
2431 */
2432 ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
2433 }
2434 2393
2435 ex->ee_block = cpu_to_le32(block);
2436 ex->ee_len = cpu_to_le16(num); 2394 ex->ee_len = cpu_to_le16(num);
2437 /* 2395 /*
2438 * Do not mark uninitialized if all the blocks in the 2396 * Do not mark uninitialized if all the blocks in the
@@ -2440,11 +2398,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2440 */ 2398 */
2441 if (uninitialized && num) 2399 if (uninitialized && num)
2442 ext4_ext_mark_uninitialized(ex); 2400 ext4_ext_mark_uninitialized(ex);
2443
2444 err = ext4_ext_dirty(handle, inode, path + depth);
2445 if (err)
2446 goto out;
2447
2448 /* 2401 /*
2449 * If the extent was completely released, 2402 * If the extent was completely released,
2450 * we need to remove it from the leaf 2403 * we need to remove it from the leaf
@@ -2464,9 +2417,14 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2464 sizeof(struct ext4_extent)); 2417 sizeof(struct ext4_extent));
2465 } 2418 }
2466 le16_add_cpu(&eh->eh_entries, -1); 2419 le16_add_cpu(&eh->eh_entries, -1);
2467 } 2420 } else
2421 *partial_cluster = 0;
2468 2422
2469 ext_debug("new extent: %u:%u:%llu\n", block, num, 2423 err = ext4_ext_dirty(handle, inode, path + depth);
2424 if (err)
2425 goto out;
2426
2427 ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num,
2470 ext4_ext_pblock(ex)); 2428 ext4_ext_pblock(ex));
2471 ex--; 2429 ex--;
2472 ex_ee_block = le32_to_cpu(ex->ee_block); 2430 ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -2476,6 +2434,25 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2476 if (correct_index && eh->eh_entries) 2434 if (correct_index && eh->eh_entries)
2477 err = ext4_ext_correct_indexes(handle, inode, path); 2435 err = ext4_ext_correct_indexes(handle, inode, path);
2478 2436
2437 /*
2438 * If there is still a entry in the leaf node, check to see if
2439 * it references the partial cluster. This is the only place
2440 * where it could; if it doesn't, we can free the cluster.
2441 */
2442 if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
2443 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
2444 *partial_cluster)) {
2445 int flags = EXT4_FREE_BLOCKS_FORGET;
2446
2447 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2448 flags |= EXT4_FREE_BLOCKS_METADATA;
2449
2450 ext4_free_blocks(handle, inode, NULL,
2451 EXT4_C2B(sbi, *partial_cluster),
2452 sbi->s_cluster_ratio, flags);
2453 *partial_cluster = 0;
2454 }
2455
2479 /* if this leaf is free, then we should 2456 /* if this leaf is free, then we should
2480 * remove it from index block above */ 2457 * remove it from index block above */
2481 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2458 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
@@ -2511,6 +2488,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2511 struct super_block *sb = inode->i_sb; 2488 struct super_block *sb = inode->i_sb;
2512 int depth = ext_depth(inode); 2489 int depth = ext_depth(inode);
2513 struct ext4_ext_path *path; 2490 struct ext4_ext_path *path;
2491 ext4_fsblk_t partial_cluster = 0;
2514 handle_t *handle; 2492 handle_t *handle;
2515 int i, err; 2493 int i, err;
2516 2494
@@ -2524,6 +2502,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2524again: 2502again:
2525 ext4_ext_invalidate_cache(inode); 2503 ext4_ext_invalidate_cache(inode);
2526 2504
2505 trace_ext4_ext_remove_space(inode, start, depth);
2506
2527 /* 2507 /*
2528 * We start scanning from right side, freeing all the blocks 2508 * We start scanning from right side, freeing all the blocks
2529 * after i_size and walking into the tree depth-wise. 2509 * after i_size and walking into the tree depth-wise.
@@ -2546,7 +2526,8 @@ again:
2546 if (i == depth) { 2526 if (i == depth) {
2547 /* this is leaf block */ 2527 /* this is leaf block */
2548 err = ext4_ext_rm_leaf(handle, inode, path, 2528 err = ext4_ext_rm_leaf(handle, inode, path,
2549 start, EXT_MAX_BLOCKS - 1); 2529 &partial_cluster, start,
2530 EXT_MAX_BLOCKS - 1);
2550 /* root level has p_bh == NULL, brelse() eats this */ 2531 /* root level has p_bh == NULL, brelse() eats this */
2551 brelse(path[i].p_bh); 2532 brelse(path[i].p_bh);
2552 path[i].p_bh = NULL; 2533 path[i].p_bh = NULL;
@@ -2618,6 +2599,24 @@ again:
2618 } 2599 }
2619 } 2600 }
2620 2601
2602 trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster,
2603 path->p_hdr->eh_entries);
2604
2605 /* If we still have something in the partial cluster and we have removed
2606 * even the first extent, then we should free the blocks in the partial
2607 * cluster as well. */
2608 if (partial_cluster && path->p_hdr->eh_entries == 0) {
2609 int flags = EXT4_FREE_BLOCKS_FORGET;
2610
2611 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2612 flags |= EXT4_FREE_BLOCKS_METADATA;
2613
2614 ext4_free_blocks(handle, inode, NULL,
2615 EXT4_C2B(EXT4_SB(sb), partial_cluster),
2616 EXT4_SB(sb)->s_cluster_ratio, flags);
2617 partial_cluster = 0;
2618 }
2619
2621 /* TODO: flexible tree reduction should be here */ 2620 /* TODO: flexible tree reduction should be here */
2622 if (path->p_hdr->eh_entries == 0) { 2621 if (path->p_hdr->eh_entries == 0) {
2623 /* 2622 /*
@@ -2909,17 +2908,29 @@ out:
2909 * a> There is no split required: Entire extent should be initialized 2908 * a> There is no split required: Entire extent should be initialized
2910 * b> Splits in two extents: Write is happening at either end of the extent 2909 * b> Splits in two extents: Write is happening at either end of the extent
2911 * c> Splits in three extents: Somone is writing in middle of the extent 2910 * c> Splits in three extents: Somone is writing in middle of the extent
2911 *
2912 * Pre-conditions:
2913 * - The extent pointed to by 'path' is uninitialized.
2914 * - The extent pointed to by 'path' contains a superset
2915 * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
2916 *
2917 * Post-conditions on success:
2918 * - the returned value is the number of blocks beyond map->l_lblk
2919 * that are allocated and initialized.
2920 * It is guaranteed to be >= map->m_len.
2912 */ 2921 */
2913static int ext4_ext_convert_to_initialized(handle_t *handle, 2922static int ext4_ext_convert_to_initialized(handle_t *handle,
2914 struct inode *inode, 2923 struct inode *inode,
2915 struct ext4_map_blocks *map, 2924 struct ext4_map_blocks *map,
2916 struct ext4_ext_path *path) 2925 struct ext4_ext_path *path)
2917{ 2926{
2927 struct ext4_extent_header *eh;
2918 struct ext4_map_blocks split_map; 2928 struct ext4_map_blocks split_map;
2919 struct ext4_extent zero_ex; 2929 struct ext4_extent zero_ex;
2920 struct ext4_extent *ex; 2930 struct ext4_extent *ex;
2921 ext4_lblk_t ee_block, eof_block; 2931 ext4_lblk_t ee_block, eof_block;
2922 unsigned int allocated, ee_len, depth; 2932 unsigned int ee_len, depth;
2933 int allocated;
2923 int err = 0; 2934 int err = 0;
2924 int split_flag = 0; 2935 int split_flag = 0;
2925 2936
@@ -2933,11 +2944,93 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2933 eof_block = map->m_lblk + map->m_len; 2944 eof_block = map->m_lblk + map->m_len;
2934 2945
2935 depth = ext_depth(inode); 2946 depth = ext_depth(inode);
2947 eh = path[depth].p_hdr;
2936 ex = path[depth].p_ext; 2948 ex = path[depth].p_ext;
2937 ee_block = le32_to_cpu(ex->ee_block); 2949 ee_block = le32_to_cpu(ex->ee_block);
2938 ee_len = ext4_ext_get_actual_len(ex); 2950 ee_len = ext4_ext_get_actual_len(ex);
2939 allocated = ee_len - (map->m_lblk - ee_block); 2951 allocated = ee_len - (map->m_lblk - ee_block);
2940 2952
2953 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
2954
2955 /* Pre-conditions */
2956 BUG_ON(!ext4_ext_is_uninitialized(ex));
2957 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
2958 BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
2959
2960 /*
2961 * Attempt to transfer newly initialized blocks from the currently
2962 * uninitialized extent to its left neighbor. This is much cheaper
2963 * than an insertion followed by a merge as those involve costly
2964 * memmove() calls. This is the common case in steady state for
2965 * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
2966 * writes.
2967 *
2968 * Limitations of the current logic:
2969 * - L1: we only deal with writes at the start of the extent.
2970 * The approach could be extended to writes at the end
2971 * of the extent but this scenario was deemed less common.
2972 * - L2: we do not deal with writes covering the whole extent.
2973 * This would require removing the extent if the transfer
2974 * is possible.
2975 * - L3: we only attempt to merge with an extent stored in the
2976 * same extent tree node.
2977 */
2978 if ((map->m_lblk == ee_block) && /*L1*/
2979 (map->m_len < ee_len) && /*L2*/
2980 (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/
2981 struct ext4_extent *prev_ex;
2982 ext4_lblk_t prev_lblk;
2983 ext4_fsblk_t prev_pblk, ee_pblk;
2984 unsigned int prev_len, write_len;
2985
2986 prev_ex = ex - 1;
2987 prev_lblk = le32_to_cpu(prev_ex->ee_block);
2988 prev_len = ext4_ext_get_actual_len(prev_ex);
2989 prev_pblk = ext4_ext_pblock(prev_ex);
2990 ee_pblk = ext4_ext_pblock(ex);
2991 write_len = map->m_len;
2992
2993 /*
2994 * A transfer of blocks from 'ex' to 'prev_ex' is allowed
2995 * upon those conditions:
2996 * - C1: prev_ex is initialized,
2997 * - C2: prev_ex is logically abutting ex,
2998 * - C3: prev_ex is physically abutting ex,
2999 * - C4: prev_ex can receive the additional blocks without
3000 * overflowing the (initialized) length limit.
3001 */
3002 if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/
3003 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3004 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3005 (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/
3006 err = ext4_ext_get_access(handle, inode, path + depth);
3007 if (err)
3008 goto out;
3009
3010 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3011 map, ex, prev_ex);
3012
3013 /* Shift the start of ex by 'write_len' blocks */
3014 ex->ee_block = cpu_to_le32(ee_block + write_len);
3015 ext4_ext_store_pblock(ex, ee_pblk + write_len);
3016 ex->ee_len = cpu_to_le16(ee_len - write_len);
3017 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3018
3019 /* Extend prev_ex by 'write_len' blocks */
3020 prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
3021
3022 /* Mark the block containing both extents as dirty */
3023 ext4_ext_dirty(handle, inode, path + depth);
3024
3025 /* Update path to point to the right extent */
3026 path[depth].p_ext = prev_ex;
3027
3028 /* Result: number of initialized blocks past m_lblk */
3029 allocated = write_len;
3030 goto out;
3031 }
3032 }
3033
2941 WARN_ON(map->m_lblk < ee_block); 3034 WARN_ON(map->m_lblk < ee_block);
2942 /* 3035 /*
2943 * It is safe to convert extent to initialized via explicit 3036 * It is safe to convert extent to initialized via explicit
@@ -3165,6 +3258,192 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3165 return ext4_mark_inode_dirty(handle, inode); 3258 return ext4_mark_inode_dirty(handle, inode);
3166} 3259}
3167 3260
3261/**
3262 * ext4_find_delalloc_range: find delayed allocated block in the given range.
3263 *
3264 * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
3265 * whether there are any buffers marked for delayed allocation. It returns '1'
3266 * on the first delalloc'ed buffer head found. If no buffer head in the given
3267 * range is marked for delalloc, it returns 0.
3268 * lblk_start should always be <= lblk_end.
3269 * search_hint_reverse is to indicate that searching in reverse from lblk_end to
3270 * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
3271 * block sooner). This is useful when blocks are truncated sequentially from
3272 * lblk_start towards lblk_end.
3273 */
3274static int ext4_find_delalloc_range(struct inode *inode,
3275 ext4_lblk_t lblk_start,
3276 ext4_lblk_t lblk_end,
3277 int search_hint_reverse)
3278{
3279 struct address_space *mapping = inode->i_mapping;
3280 struct buffer_head *head, *bh = NULL;
3281 struct page *page;
3282 ext4_lblk_t i, pg_lblk;
3283 pgoff_t index;
3284
3285 /* reverse search wont work if fs block size is less than page size */
3286 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3287 search_hint_reverse = 0;
3288
3289 if (search_hint_reverse)
3290 i = lblk_end;
3291 else
3292 i = lblk_start;
3293
3294 index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
3295
3296 while ((i >= lblk_start) && (i <= lblk_end)) {
3297 page = find_get_page(mapping, index);
3298 if (!page)
3299 goto nextpage;
3300
3301 if (!page_has_buffers(page))
3302 goto nextpage;
3303
3304 head = page_buffers(page);
3305 if (!head)
3306 goto nextpage;
3307
3308 bh = head;
3309 pg_lblk = index << (PAGE_CACHE_SHIFT -
3310 inode->i_blkbits);
3311 do {
3312 if (unlikely(pg_lblk < lblk_start)) {
3313 /*
3314 * This is possible when fs block size is less
3315 * than page size and our cluster starts/ends in
3316 * middle of the page. So we need to skip the
3317 * initial few blocks till we reach the 'lblk'
3318 */
3319 pg_lblk++;
3320 continue;
3321 }
3322
3323 /* Check if the buffer is delayed allocated and that it
3324 * is not yet mapped. (when da-buffers are mapped during
3325 * their writeout, their da_mapped bit is set.)
3326 */
3327 if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
3328 page_cache_release(page);
3329 trace_ext4_find_delalloc_range(inode,
3330 lblk_start, lblk_end,
3331 search_hint_reverse,
3332 1, i);
3333 return 1;
3334 }
3335 if (search_hint_reverse)
3336 i--;
3337 else
3338 i++;
3339 } while ((i >= lblk_start) && (i <= lblk_end) &&
3340 ((bh = bh->b_this_page) != head));
3341nextpage:
3342 if (page)
3343 page_cache_release(page);
3344 /*
3345 * Move to next page. 'i' will be the first lblk in the next
3346 * page.
3347 */
3348 if (search_hint_reverse)
3349 index--;
3350 else
3351 index++;
3352 i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
3353 }
3354
3355 trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3356 search_hint_reverse, 0, 0);
3357 return 0;
3358}
3359
3360int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
3361 int search_hint_reverse)
3362{
3363 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3364 ext4_lblk_t lblk_start, lblk_end;
3365 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
3366 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3367
3368 return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3369 search_hint_reverse);
3370}
3371
3372/**
3373 * Determines how many complete clusters (out of those specified by the 'map')
3374 * are under delalloc and were reserved quota for.
3375 * This function is called when we are writing out the blocks that were
3376 * originally written with their allocation delayed, but then the space was
3377 * allocated using fallocate() before the delayed allocation could be resolved.
3378 * The cases to look for are:
3379 * ('=' indicated delayed allocated blocks
3380 * '-' indicates non-delayed allocated blocks)
3381 * (a) partial clusters towards beginning and/or end outside of allocated range
3382 * are not delalloc'ed.
3383 * Ex:
3384 * |----c---=|====c====|====c====|===-c----|
3385 * |++++++ allocated ++++++|
3386 * ==> 4 complete clusters in above example
3387 *
3388 * (b) partial cluster (outside of allocated range) towards either end is
3389 * marked for delayed allocation. In this case, we will exclude that
3390 * cluster.
3391 * Ex:
3392 * |----====c========|========c========|
3393 * |++++++ allocated ++++++|
3394 * ==> 1 complete clusters in above example
3395 *
3396 * Ex:
3397 * |================c================|
3398 * |++++++ allocated ++++++|
3399 * ==> 0 complete clusters in above example
3400 *
3401 * The ext4_da_update_reserve_space will be called only if we
3402 * determine here that there were some "entire" clusters that span
3403 * this 'allocated' range.
3404 * In the non-bigalloc case, this function will just end up returning num_blks
3405 * without ever calling ext4_find_delalloc_range.
3406 */
3407static unsigned int
3408get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3409 unsigned int num_blks)
3410{
3411 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3412 ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
3413 ext4_lblk_t lblk_from, lblk_to, c_offset;
3414 unsigned int allocated_clusters = 0;
3415
3416 alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
3417 alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
3418
3419 /* max possible clusters for this allocation */
3420 allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
3421
3422 trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
3423
3424 /* Check towards left side */
3425 c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
3426 if (c_offset) {
3427 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
3428 lblk_to = lblk_from + c_offset - 1;
3429
3430 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3431 allocated_clusters--;
3432 }
3433
3434 /* Now check towards right. */
3435 c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
3436 if (allocated_clusters && c_offset) {
3437 lblk_from = lblk_start + num_blks;
3438 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
3439
3440 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3441 allocated_clusters--;
3442 }
3443
3444 return allocated_clusters;
3445}
3446
3168static int 3447static int
3169ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3448ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3170 struct ext4_map_blocks *map, 3449 struct ext4_map_blocks *map,
@@ -3181,6 +3460,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3181 flags, allocated); 3460 flags, allocated);
3182 ext4_ext_show_leaf(inode, path); 3461 ext4_ext_show_leaf(inode, path);
3183 3462
3463 trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated,
3464 newblock);
3465
3184 /* get_block() before submit the IO, split the extent */ 3466 /* get_block() before submit the IO, split the extent */
3185 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3467 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3186 ret = ext4_split_unwritten_extents(handle, inode, map, 3468 ret = ext4_split_unwritten_extents(handle, inode, map,
@@ -3190,10 +3472,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3190 * that this IO needs to conversion to written when IO is 3472 * that this IO needs to conversion to written when IO is
3191 * completed 3473 * completed
3192 */ 3474 */
3193 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3475 if (io)
3194 io->flag = EXT4_IO_END_UNWRITTEN; 3476 ext4_set_io_unwritten_flag(inode, io);
3195 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 3477 else
3196 } else
3197 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3478 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3198 if (ext4_should_dioread_nolock(inode)) 3479 if (ext4_should_dioread_nolock(inode))
3199 map->m_flags |= EXT4_MAP_UNINIT; 3480 map->m_flags |= EXT4_MAP_UNINIT;
@@ -3234,14 +3515,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3234 3515
3235 /* buffered write, writepage time, convert*/ 3516 /* buffered write, writepage time, convert*/
3236 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3517 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3237 if (ret >= 0) { 3518 if (ret >= 0)
3238 ext4_update_inode_fsync_trans(handle, inode, 1); 3519 ext4_update_inode_fsync_trans(handle, inode, 1);
3239 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3240 map->m_len);
3241 if (err < 0)
3242 goto out2;
3243 }
3244
3245out: 3520out:
3246 if (ret <= 0) { 3521 if (ret <= 0) {
3247 err = ret; 3522 err = ret;
@@ -3270,11 +3545,24 @@ out:
3270 * But fallocate would have already updated quota and block 3545 * But fallocate would have already updated quota and block
3271 * count for this offset. So cancel these reservation 3546 * count for this offset. So cancel these reservation
3272 */ 3547 */
3273 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3548 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3274 ext4_da_update_reserve_space(inode, allocated, 0); 3549 unsigned int reserved_clusters;
3550 reserved_clusters = get_reserved_cluster_alloc(inode,
3551 map->m_lblk, map->m_len);
3552 if (reserved_clusters)
3553 ext4_da_update_reserve_space(inode,
3554 reserved_clusters,
3555 0);
3556 }
3275 3557
3276map_out: 3558map_out:
3277 map->m_flags |= EXT4_MAP_MAPPED; 3559 map->m_flags |= EXT4_MAP_MAPPED;
3560 if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
3561 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3562 map->m_len);
3563 if (err < 0)
3564 goto out2;
3565 }
3278out1: 3566out1:
3279 if (allocated > map->m_len) 3567 if (allocated > map->m_len)
3280 allocated = map->m_len; 3568 allocated = map->m_len;
@@ -3290,6 +3578,111 @@ out2:
3290} 3578}
3291 3579
3292/* 3580/*
3581 * get_implied_cluster_alloc - check to see if the requested
3582 * allocation (in the map structure) overlaps with a cluster already
3583 * allocated in an extent.
3584 * @sb The filesystem superblock structure
3585 * @map The requested lblk->pblk mapping
3586 * @ex The extent structure which might contain an implied
3587 * cluster allocation
3588 *
3589 * This function is called by ext4_ext_map_blocks() after we failed to
3590 * find blocks that were already in the inode's extent tree. Hence,
3591 * we know that the beginning of the requested region cannot overlap
3592 * the extent from the inode's extent tree. There are three cases we
3593 * want to catch. The first is this case:
3594 *
3595 * |--- cluster # N--|
3596 * |--- extent ---| |---- requested region ---|
3597 * |==========|
3598 *
3599 * The second case that we need to test for is this one:
3600 *
3601 * |--------- cluster # N ----------------|
3602 * |--- requested region --| |------- extent ----|
3603 * |=======================|
3604 *
3605 * The third case is when the requested region lies between two extents
3606 * within the same cluster:
3607 * |------------- cluster # N-------------|
3608 * |----- ex -----| |---- ex_right ----|
3609 * |------ requested region ------|
3610 * |================|
3611 *
3612 * In each of the above cases, we need to set the map->m_pblk and
3613 * map->m_len so it corresponds to the return the extent labelled as
3614 * "|====|" from cluster #N, since it is already in use for data in
3615 * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
3616 * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
3617 * as a new "allocated" block region. Otherwise, we will return 0 and
3618 * ext4_ext_map_blocks() will then allocate one or more new clusters
3619 * by calling ext4_mb_new_blocks().
3620 */
3621static int get_implied_cluster_alloc(struct super_block *sb,
3622 struct ext4_map_blocks *map,
3623 struct ext4_extent *ex,
3624 struct ext4_ext_path *path)
3625{
3626 struct ext4_sb_info *sbi = EXT4_SB(sb);
3627 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3628 ext4_lblk_t ex_cluster_start, ex_cluster_end;
3629 ext4_lblk_t rr_cluster_start, rr_cluster_end;
3630 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3631 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3632 unsigned short ee_len = ext4_ext_get_actual_len(ex);
3633
3634 /* The extent passed in that we are trying to match */
3635 ex_cluster_start = EXT4_B2C(sbi, ee_block);
3636 ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
3637
3638 /* The requested region passed into ext4_map_blocks() */
3639 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
3640 rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
3641
3642 if ((rr_cluster_start == ex_cluster_end) ||
3643 (rr_cluster_start == ex_cluster_start)) {
3644 if (rr_cluster_start == ex_cluster_end)
3645 ee_start += ee_len - 1;
3646 map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
3647 c_offset;
3648 map->m_len = min(map->m_len,
3649 (unsigned) sbi->s_cluster_ratio - c_offset);
3650 /*
3651 * Check for and handle this case:
3652 *
3653 * |--------- cluster # N-------------|
3654 * |------- extent ----|
3655 * |--- requested region ---|
3656 * |===========|
3657 */
3658
3659 if (map->m_lblk < ee_block)
3660 map->m_len = min(map->m_len, ee_block - map->m_lblk);
3661
3662 /*
3663 * Check for the case where there is already another allocated
3664 * block to the right of 'ex' but before the end of the cluster.
3665 *
3666 * |------------- cluster # N-------------|
3667 * |----- ex -----| |---- ex_right ----|
3668 * |------ requested region ------|
3669 * |================|
3670 */
3671 if (map->m_lblk > ee_block) {
3672 ext4_lblk_t next = ext4_ext_next_allocated_block(path);
3673 map->m_len = min(map->m_len, next - map->m_lblk);
3674 }
3675
3676 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
3677 return 1;
3678 }
3679
3680 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
3681 return 0;
3682}
3683
3684
3685/*
3293 * Block allocation/map/preallocation routine for extents based files 3686 * Block allocation/map/preallocation routine for extents based files
3294 * 3687 *
3295 * 3688 *
@@ -3311,15 +3704,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3311 struct ext4_map_blocks *map, int flags) 3704 struct ext4_map_blocks *map, int flags)
3312{ 3705{
3313 struct ext4_ext_path *path = NULL; 3706 struct ext4_ext_path *path = NULL;
3314 struct ext4_extent newex, *ex; 3707 struct ext4_extent newex, *ex, *ex2;
3708 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3315 ext4_fsblk_t newblock = 0; 3709 ext4_fsblk_t newblock = 0;
3316 int err = 0, depth, ret; 3710 int free_on_err = 0, err = 0, depth, ret;
3317 unsigned int allocated = 0; 3711 unsigned int allocated = 0, offset = 0;
3712 unsigned int allocated_clusters = 0;
3318 unsigned int punched_out = 0; 3713 unsigned int punched_out = 0;
3319 unsigned int result = 0; 3714 unsigned int result = 0;
3320 struct ext4_allocation_request ar; 3715 struct ext4_allocation_request ar;
3321 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3716 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3322 struct ext4_map_blocks punch_map; 3717 ext4_lblk_t cluster_offset;
3323 3718
3324 ext_debug("blocks %u/%u requested for inode %lu\n", 3719 ext_debug("blocks %u/%u requested for inode %lu\n",
3325 map->m_lblk, map->m_len, inode->i_ino); 3720 map->m_lblk, map->m_len, inode->i_ino);
@@ -3329,6 +3724,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3329 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && 3724 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
3330 ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3725 ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3331 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3726 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3727 if ((sbi->s_cluster_ratio > 1) &&
3728 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3729 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3730
3332 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3731 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3333 /* 3732 /*
3334 * block isn't allocated yet and 3733 * block isn't allocated yet and
@@ -3339,6 +3738,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3339 /* we should allocate requested block */ 3738 /* we should allocate requested block */
3340 } else { 3739 } else {
3341 /* block is already allocated */ 3740 /* block is already allocated */
3741 if (sbi->s_cluster_ratio > 1)
3742 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3342 newblock = map->m_lblk 3743 newblock = map->m_lblk
3343 - le32_to_cpu(newex.ee_block) 3744 - le32_to_cpu(newex.ee_block)
3344 + ext4_ext_pblock(&newex); 3745 + ext4_ext_pblock(&newex);
@@ -3384,8 +3785,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3384 * we split out initialized portions during a write. 3785 * we split out initialized portions during a write.
3385 */ 3786 */
3386 ee_len = ext4_ext_get_actual_len(ex); 3787 ee_len = ext4_ext_get_actual_len(ex);
3788
3789 trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
3790
3387 /* if found extent covers block, simply return it */ 3791 /* if found extent covers block, simply return it */
3388 if (in_range(map->m_lblk, ee_block, ee_len)) { 3792 if (in_range(map->m_lblk, ee_block, ee_len)) {
3793 struct ext4_map_blocks punch_map;
3794 ext4_fsblk_t partial_cluster = 0;
3795
3389 newblock = map->m_lblk - ee_block + ee_start; 3796 newblock = map->m_lblk - ee_block + ee_start;
3390 /* number of remaining blocks in the extent */ 3797 /* number of remaining blocks in the extent */
3391 allocated = ee_len - (map->m_lblk - ee_block); 3798 allocated = ee_len - (map->m_lblk - ee_block);
@@ -3469,7 +3876,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3469 ext4_ext_invalidate_cache(inode); 3876 ext4_ext_invalidate_cache(inode);
3470 3877
3471 err = ext4_ext_rm_leaf(handle, inode, path, 3878 err = ext4_ext_rm_leaf(handle, inode, path,
3472 map->m_lblk, map->m_lblk + punched_out); 3879 &partial_cluster, map->m_lblk,
3880 map->m_lblk + punched_out);
3473 3881
3474 if (!err && path->p_hdr->eh_entries == 0) { 3882 if (!err && path->p_hdr->eh_entries == 0) {
3475 /* 3883 /*
@@ -3492,6 +3900,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3492 } 3900 }
3493 } 3901 }
3494 3902
3903 if ((sbi->s_cluster_ratio > 1) &&
3904 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3905 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3906
3495 /* 3907 /*
3496 * requested block isn't allocated yet; 3908 * requested block isn't allocated yet;
3497 * we couldn't try to create block if create flag is zero 3909 * we couldn't try to create block if create flag is zero
@@ -3504,9 +3916,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3504 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3916 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
3505 goto out2; 3917 goto out2;
3506 } 3918 }
3919
3507 /* 3920 /*
3508 * Okay, we need to do block allocation. 3921 * Okay, we need to do block allocation.
3509 */ 3922 */
3923 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
3924 newex.ee_block = cpu_to_le32(map->m_lblk);
3925 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3926
3927 /*
3928 * If we are doing bigalloc, check to see if the extent returned
3929 * by ext4_ext_find_extent() implies a cluster we can use.
3930 */
3931 if (cluster_offset && ex &&
3932 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
3933 ar.len = allocated = map->m_len;
3934 newblock = map->m_pblk;
3935 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3936 goto got_allocated_blocks;
3937 }
3510 3938
3511 /* find neighbour allocated blocks */ 3939 /* find neighbour allocated blocks */
3512 ar.lleft = map->m_lblk; 3940 ar.lleft = map->m_lblk;
@@ -3514,10 +3942,21 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3514 if (err) 3942 if (err)
3515 goto out2; 3943 goto out2;
3516 ar.lright = map->m_lblk; 3944 ar.lright = map->m_lblk;
3517 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); 3945 ex2 = NULL;
3946 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
3518 if (err) 3947 if (err)
3519 goto out2; 3948 goto out2;
3520 3949
3950 /* Check if the extent after searching to the right implies a
3951 * cluster we can use. */
3952 if ((sbi->s_cluster_ratio > 1) && ex2 &&
3953 get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
3954 ar.len = allocated = map->m_len;
3955 newblock = map->m_pblk;
3956 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3957 goto got_allocated_blocks;
3958 }
3959
3521 /* 3960 /*
3522 * See if request is beyond maximum number of blocks we can have in 3961 * See if request is beyond maximum number of blocks we can have in
3523 * a single extent. For an initialized extent this limit is 3962 * a single extent. For an initialized extent this limit is
@@ -3532,9 +3971,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3532 map->m_len = EXT_UNINIT_MAX_LEN; 3971 map->m_len = EXT_UNINIT_MAX_LEN;
3533 3972
3534 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ 3973 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
3535 newex.ee_block = cpu_to_le32(map->m_lblk);
3536 newex.ee_len = cpu_to_le16(map->m_len); 3974 newex.ee_len = cpu_to_le16(map->m_len);
3537 err = ext4_ext_check_overlap(inode, &newex, path); 3975 err = ext4_ext_check_overlap(sbi, inode, &newex, path);
3538 if (err) 3976 if (err)
3539 allocated = ext4_ext_get_actual_len(&newex); 3977 allocated = ext4_ext_get_actual_len(&newex);
3540 else 3978 else
@@ -3544,7 +3982,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3544 ar.inode = inode; 3982 ar.inode = inode;
3545 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); 3983 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
3546 ar.logical = map->m_lblk; 3984 ar.logical = map->m_lblk;
3547 ar.len = allocated; 3985 /*
3986 * We calculate the offset from the beginning of the cluster
3987 * for the logical block number, since when we allocate a
3988 * physical cluster, the physical block should start at the
3989 * same offset from the beginning of the cluster. This is
3990 * needed so that future calls to get_implied_cluster_alloc()
3991 * work correctly.
3992 */
3993 offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
3994 ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
3995 ar.goal -= offset;
3996 ar.logical -= offset;
3548 if (S_ISREG(inode->i_mode)) 3997 if (S_ISREG(inode->i_mode))
3549 ar.flags = EXT4_MB_HINT_DATA; 3998 ar.flags = EXT4_MB_HINT_DATA;
3550 else 3999 else
@@ -3557,9 +4006,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3557 goto out2; 4006 goto out2;
3558 ext_debug("allocate new block: goal %llu, found %llu/%u\n", 4007 ext_debug("allocate new block: goal %llu, found %llu/%u\n",
3559 ar.goal, newblock, allocated); 4008 ar.goal, newblock, allocated);
4009 free_on_err = 1;
4010 allocated_clusters = ar.len;
4011 ar.len = EXT4_C2B(sbi, ar.len) - offset;
4012 if (ar.len > allocated)
4013 ar.len = allocated;
3560 4014
4015got_allocated_blocks:
3561 /* try to insert new extent into found leaf and return */ 4016 /* try to insert new extent into found leaf and return */
3562 ext4_ext_store_pblock(&newex, newblock); 4017 ext4_ext_store_pblock(&newex, newblock + offset);
3563 newex.ee_len = cpu_to_le16(ar.len); 4018 newex.ee_len = cpu_to_le16(ar.len);
3564 /* Mark uninitialized */ 4019 /* Mark uninitialized */
3565 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 4020 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
@@ -3572,10 +4027,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3572 * that we need to perform conversion when IO is done. 4027 * that we need to perform conversion when IO is done.
3573 */ 4028 */
3574 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4029 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3575 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 4030 if (io)
3576 io->flag = EXT4_IO_END_UNWRITTEN; 4031 ext4_set_io_unwritten_flag(inode, io);
3577 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 4032 else
3578 } else
3579 ext4_set_inode_state(inode, 4033 ext4_set_inode_state(inode,
3580 EXT4_STATE_DIO_UNWRITTEN); 4034 EXT4_STATE_DIO_UNWRITTEN);
3581 } 4035 }
@@ -3583,11 +4037,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3583 map->m_flags |= EXT4_MAP_UNINIT; 4037 map->m_flags |= EXT4_MAP_UNINIT;
3584 } 4038 }
3585 4039
3586 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); 4040 err = 0;
4041 if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0)
4042 err = check_eofblocks_fl(handle, inode, map->m_lblk,
4043 path, ar.len);
3587 if (!err) 4044 if (!err)
3588 err = ext4_ext_insert_extent(handle, inode, path, 4045 err = ext4_ext_insert_extent(handle, inode, path,
3589 &newex, flags); 4046 &newex, flags);
3590 if (err) { 4047 if (err && free_on_err) {
3591 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4048 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
3592 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; 4049 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
3593 /* free data blocks we just allocated */ 4050 /* free data blocks we just allocated */
@@ -3610,8 +4067,82 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3610 * Update reserved blocks/metadata blocks after successful 4067 * Update reserved blocks/metadata blocks after successful
3611 * block allocation which had been deferred till now. 4068 * block allocation which had been deferred till now.
3612 */ 4069 */
3613 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 4070 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3614 ext4_da_update_reserve_space(inode, allocated, 1); 4071 unsigned int reserved_clusters;
4072 /*
4073 * Check how many clusters we had reserved this allocated range
4074 */
4075 reserved_clusters = get_reserved_cluster_alloc(inode,
4076 map->m_lblk, allocated);
4077 if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
4078 if (reserved_clusters) {
4079 /*
4080 * We have clusters reserved for this range.
4081 * But since we are not doing actual allocation
4082 * and are simply using blocks from previously
4083 * allocated cluster, we should release the
4084 * reservation and not claim quota.
4085 */
4086 ext4_da_update_reserve_space(inode,
4087 reserved_clusters, 0);
4088 }
4089 } else {
4090 BUG_ON(allocated_clusters < reserved_clusters);
4091 /* We will claim quota for all newly allocated blocks.*/
4092 ext4_da_update_reserve_space(inode, allocated_clusters,
4093 1);
4094 if (reserved_clusters < allocated_clusters) {
4095 struct ext4_inode_info *ei = EXT4_I(inode);
4096 int reservation = allocated_clusters -
4097 reserved_clusters;
4098 /*
4099 * It seems we claimed few clusters outside of
4100 * the range of this allocation. We should give
4101 * it back to the reservation pool. This can
4102 * happen in the following case:
4103 *
4104 * * Suppose s_cluster_ratio is 4 (i.e., each
4105 * cluster has 4 blocks. Thus, the clusters
4106 * are [0-3],[4-7],[8-11]...
4107 * * First comes delayed allocation write for
4108 * logical blocks 10 & 11. Since there were no
4109 * previous delayed allocated blocks in the
4110 * range [8-11], we would reserve 1 cluster
4111 * for this write.
4112 * * Next comes write for logical blocks 3 to 8.
4113 * In this case, we will reserve 2 clusters
4114 * (for [0-3] and [4-7]; and not for [8-11] as
4115 * that range has a delayed allocated blocks.
4116 * Thus total reserved clusters now becomes 3.
4117 * * Now, during the delayed allocation writeout
4118 * time, we will first write blocks [3-8] and
4119 * allocate 3 clusters for writing these
4120 * blocks. Also, we would claim all these
4121 * three clusters above.
4122 * * Now when we come here to writeout the
4123 * blocks [10-11], we would expect to claim
4124 * the reservation of 1 cluster we had made
4125 * (and we would claim it since there are no
4126 * more delayed allocated blocks in the range
4127 * [8-11]. But our reserved cluster count had
4128 * already gone to 0.
4129 *
4130 * Thus, at the step 4 above when we determine
4131 * that there are still some unwritten delayed
4132 * allocated blocks outside of our current
4133 * block range, we should increment the
4134 * reserved clusters count so that when the
4135 * remaining blocks finally gets written, we
4136 * could claim them.
4137 */
4138 dquot_reserve_block(inode,
4139 EXT4_C2B(sbi, reservation));
4140 spin_lock(&ei->i_block_reservation_lock);
4141 ei->i_reserved_data_blocks += reservation;
4142 spin_unlock(&ei->i_block_reservation_lock);
4143 }
4144 }
4145 }
3615 4146
3616 /* 4147 /*
3617 * Cache the extent and update transaction to commit on fdatasync only 4148 * Cache the extent and update transaction to commit on fdatasync only
@@ -3634,12 +4165,12 @@ out2:
3634 ext4_ext_drop_refs(path); 4165 ext4_ext_drop_refs(path);
3635 kfree(path); 4166 kfree(path);
3636 } 4167 }
3637 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
3638 newblock, map->m_len, err ? err : allocated);
3639
3640 result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? 4168 result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
3641 punched_out : allocated; 4169 punched_out : allocated;
3642 4170
4171 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
4172 newblock, map->m_len, err ? err : result);
4173
3643 return err ? err : result; 4174 return err ? err : result;
3644} 4175}
3645 4176
@@ -3649,6 +4180,7 @@ void ext4_ext_truncate(struct inode *inode)
3649 struct super_block *sb = inode->i_sb; 4180 struct super_block *sb = inode->i_sb;
3650 ext4_lblk_t last_block; 4181 ext4_lblk_t last_block;
3651 handle_t *handle; 4182 handle_t *handle;
4183 loff_t page_len;
3652 int err = 0; 4184 int err = 0;
3653 4185
3654 /* 4186 /*
@@ -3665,8 +4197,16 @@ void ext4_ext_truncate(struct inode *inode)
3665 if (IS_ERR(handle)) 4197 if (IS_ERR(handle))
3666 return; 4198 return;
3667 4199
3668 if (inode->i_size & (sb->s_blocksize - 1)) 4200 if (inode->i_size % PAGE_CACHE_SIZE != 0) {
3669 ext4_block_truncate_page(handle, mapping, inode->i_size); 4201 page_len = PAGE_CACHE_SIZE -
4202 (inode->i_size & (PAGE_CACHE_SIZE - 1));
4203
4204 err = ext4_discard_partial_page_buffers(handle,
4205 mapping, inode->i_size, page_len, 0);
4206
4207 if (err)
4208 goto out_stop;
4209 }
3670 4210
3671 if (ext4_orphan_add(handle, inode)) 4211 if (ext4_orphan_add(handle, inode))
3672 goto out_stop; 4212 goto out_stop;
@@ -3760,6 +4300,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3760 int ret = 0; 4300 int ret = 0;
3761 int ret2 = 0; 4301 int ret2 = 0;
3762 int retries = 0; 4302 int retries = 0;
4303 int flags;
3763 struct ext4_map_blocks map; 4304 struct ext4_map_blocks map;
3764 unsigned int credits, blkbits = inode->i_blkbits; 4305 unsigned int credits, blkbits = inode->i_blkbits;
3765 4306
@@ -3796,6 +4337,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3796 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 4337 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
3797 return ret; 4338 return ret;
3798 } 4339 }
4340 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
4341 if (mode & FALLOC_FL_KEEP_SIZE)
4342 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4343 /*
4344 * Don't normalize the request if it can fit in one extent so
4345 * that it doesn't get unnecessarily split into multiple
4346 * extents.
4347 */
4348 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4349 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
3799retry: 4350retry:
3800 while (ret >= 0 && ret < max_blocks) { 4351 while (ret >= 0 && ret < max_blocks) {
3801 map.m_lblk = map.m_lblk + ret; 4352 map.m_lblk = map.m_lblk + ret;
@@ -3805,9 +4356,7 @@ retry:
3805 ret = PTR_ERR(handle); 4356 ret = PTR_ERR(handle);
3806 break; 4357 break;
3807 } 4358 }
3808 ret = ext4_map_blocks(handle, inode, &map, 4359 ret = ext4_map_blocks(handle, inode, &map, flags);
3809 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
3810 EXT4_GET_BLOCKS_NO_NORMALIZE);
3811 if (ret <= 0) { 4360 if (ret <= 0) {
3812#ifdef EXT4FS_DEBUG 4361#ifdef EXT4FS_DEBUG
3813 WARN_ON(ret <= 0); 4362 WARN_ON(ret <= 0);
@@ -4102,7 +4651,6 @@ found_delayed_extent:
4102 return EXT_BREAK; 4651 return EXT_BREAK;
4103 return EXT_CONTINUE; 4652 return EXT_CONTINUE;
4104} 4653}
4105
4106/* fiemap flags we can handle specified here */ 4654/* fiemap flags we can handle specified here */
4107#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 4655#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
4108 4656
@@ -4162,17 +4710,28 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4162 struct address_space *mapping = inode->i_mapping; 4710 struct address_space *mapping = inode->i_mapping;
4163 struct ext4_map_blocks map; 4711 struct ext4_map_blocks map;
4164 handle_t *handle; 4712 handle_t *handle;
4165 loff_t first_block_offset, last_block_offset, block_len; 4713 loff_t first_page, last_page, page_len;
4166 loff_t first_page, last_page, first_page_offset, last_page_offset; 4714 loff_t first_page_offset, last_page_offset;
4167 int ret, credits, blocks_released, err = 0; 4715 int ret, credits, blocks_released, err = 0;
4168 4716
4717 /* No need to punch hole beyond i_size */
4718 if (offset >= inode->i_size)
4719 return 0;
4720
4721 /*
4722 * If the hole extends beyond i_size, set the hole
4723 * to end after the page that contains i_size
4724 */
4725 if (offset + length > inode->i_size) {
4726 length = inode->i_size +
4727 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
4728 offset;
4729 }
4730
4169 first_block = (offset + sb->s_blocksize - 1) >> 4731 first_block = (offset + sb->s_blocksize - 1) >>
4170 EXT4_BLOCK_SIZE_BITS(sb); 4732 EXT4_BLOCK_SIZE_BITS(sb);
4171 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 4733 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4172 4734
4173 first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb);
4174 last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb);
4175
4176 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 4735 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4177 last_page = (offset + length) >> PAGE_CACHE_SHIFT; 4736 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4178 4737
@@ -4185,11 +4744,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4185 */ 4744 */
4186 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 4745 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4187 err = filemap_write_and_wait_range(mapping, 4746 err = filemap_write_and_wait_range(mapping,
4188 first_page_offset == 0 ? 0 : first_page_offset-1, 4747 offset, offset + length - 1);
4189 last_page_offset);
4190 4748
4191 if (err) 4749 if (err)
4192 return err; 4750 return err;
4193 } 4751 }
4194 4752
4195 /* Now release the pages */ 4753 /* Now release the pages */
@@ -4211,24 +4769,64 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4211 goto out; 4769 goto out;
4212 4770
4213 /* 4771 /*
4214 * Now we need to zero out the un block aligned data. 4772 * Now we need to zero out the non-page-aligned data in the
4215 * If the file is smaller than a block, just 4773 * pages at the start and tail of the hole, and unmap the buffer
4216 * zero out the middle 4774 * heads for the block aligned regions of the page that were
4775 * completely zeroed.
4217 */ 4776 */
4218 if (first_block > last_block) 4777 if (first_page > last_page) {
4219 ext4_block_zero_page_range(handle, mapping, offset, length); 4778 /*
4220 else { 4779 * If the file space being truncated is contained within a page
4221 /* zero out the head of the hole before the first block */ 4780 * just zero out and unmap the middle of that page
4222 block_len = first_block_offset - offset; 4781 */
4223 if (block_len > 0) 4782 err = ext4_discard_partial_page_buffers(handle,
4224 ext4_block_zero_page_range(handle, mapping, 4783 mapping, offset, length, 0);
4225 offset, block_len); 4784
4226 4785 if (err)
4227 /* zero out the tail of the hole after the last block */ 4786 goto out;
4228 block_len = offset + length - last_block_offset; 4787 } else {
4229 if (block_len > 0) { 4788 /*
4230 ext4_block_zero_page_range(handle, mapping, 4789 * zero out and unmap the partial page that contains
4231 last_block_offset, block_len); 4790 * the start of the hole
4791 */
4792 page_len = first_page_offset - offset;
4793 if (page_len > 0) {
4794 err = ext4_discard_partial_page_buffers(handle, mapping,
4795 offset, page_len, 0);
4796 if (err)
4797 goto out;
4798 }
4799
4800 /*
4801 * zero out and unmap the partial page that contains
4802 * the end of the hole
4803 */
4804 page_len = offset + length - last_page_offset;
4805 if (page_len > 0) {
4806 err = ext4_discard_partial_page_buffers(handle, mapping,
4807 last_page_offset, page_len, 0);
4808 if (err)
4809 goto out;
4810 }
4811 }
4812
4813
4814 /*
4815 * If i_size is contained in the last page, we need to
4816 * unmap and zero the partial page after i_size
4817 */
4818 if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
4819 inode->i_size % PAGE_CACHE_SIZE != 0) {
4820
4821 page_len = PAGE_CACHE_SIZE -
4822 (inode->i_size & (PAGE_CACHE_SIZE - 1));
4823
4824 if (page_len > 0) {
4825 err = ext4_discard_partial_page_buffers(handle,
4826 mapping, inode->i_size, page_len, 0);
4827
4828 if (err)
4829 goto out;
4232 } 4830 }
4233 } 4831 }
4234 4832
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b9548f477bb8..cb70f1812a70 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -181,8 +181,8 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
181 path.dentry = mnt->mnt_root; 181 path.dentry = mnt->mnt_root;
182 cp = d_path(&path, buf, sizeof(buf)); 182 cp = d_path(&path, buf, sizeof(buf));
183 if (!IS_ERR(cp)) { 183 if (!IS_ERR(cp)) {
184 memcpy(sbi->s_es->s_last_mounted, cp, 184 strlcpy(sbi->s_es->s_last_mounted, cp,
185 sizeof(sbi->s_es->s_last_mounted)); 185 sizeof(sbi->s_es->s_last_mounted));
186 ext4_mark_super_dirty(sb); 186 ext4_mark_super_dirty(sb);
187 } 187 }
188 } 188 }
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 036f78f7a1ef..00a2cb753efd 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
75 * to written. 75 * to written.
76 * The function return the number of pending IOs on success. 76 * The function return the number of pending IOs on success.
77 */ 77 */
78extern int ext4_flush_completed_IO(struct inode *inode) 78int ext4_flush_completed_IO(struct inode *inode)
79{ 79{
80 ext4_io_end_t *io; 80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode); 81 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -83,14 +83,12 @@ extern int ext4_flush_completed_IO(struct inode *inode)
83 int ret = 0; 83 int ret = 0;
84 int ret2 = 0; 84 int ret2 = 0;
85 85
86 if (list_empty(&ei->i_completed_io_list))
87 return ret;
88
89 dump_completed_IO(inode); 86 dump_completed_IO(inode);
90 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 87 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
91 while (!list_empty(&ei->i_completed_io_list)){ 88 while (!list_empty(&ei->i_completed_io_list)){
92 io = list_entry(ei->i_completed_io_list.next, 89 io = list_entry(ei->i_completed_io_list.next,
93 ext4_io_end_t, list); 90 ext4_io_end_t, list);
91 list_del_init(&io->list);
94 /* 92 /*
95 * Calling ext4_end_io_nolock() to convert completed 93 * Calling ext4_end_io_nolock() to convert completed
96 * IO to written. 94 * IO to written.
@@ -107,11 +105,9 @@ extern int ext4_flush_completed_IO(struct inode *inode)
107 */ 105 */
108 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 106 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
109 ret = ext4_end_io_nolock(io); 107 ret = ext4_end_io_nolock(io);
110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
111 if (ret < 0) 108 if (ret < 0)
112 ret2 = ret; 109 ret2 = ret;
113 else 110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
114 list_del_init(&io->list);
115 } 111 }
116 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 112 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
117 return (ret2 < 0) ? ret2 : 0; 113 return (ret2 < 0) ? ret2 : 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 9c63f273b550..00beb4f9cc4f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -78,7 +78,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
78 * allocation, essentially implementing a per-group read-only flag. */ 78 * allocation, essentially implementing a per-group read-only flag. */
79 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 79 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
80 ext4_error(sb, "Checksum bad for group %u", block_group); 80 ext4_error(sb, "Checksum bad for group %u", block_group);
81 ext4_free_blks_set(sb, gdp, 0); 81 ext4_free_group_clusters_set(sb, gdp, 0);
82 ext4_free_inodes_set(sb, gdp, 0); 82 ext4_free_inodes_set(sb, gdp, 0);
83 ext4_itable_unused_set(sb, gdp, 0); 83 ext4_itable_unused_set(sb, gdp, 0);
84 memset(bh->b_data, 0xff, sb->s_blocksize); 84 memset(bh->b_data, 0xff, sb->s_blocksize);
@@ -293,121 +293,9 @@ error_return:
293 ext4_std_error(sb, fatal); 293 ext4_std_error(sb, fatal);
294} 294}
295 295
296/*
297 * There are two policies for allocating an inode. If the new inode is
298 * a directory, then a forward search is made for a block group with both
299 * free space and a low directory-to-inode ratio; if that fails, then of
300 * the groups with above-average free space, that group with the fewest
301 * directories already is chosen.
302 *
303 * For other inodes, search forward from the parent directory\'s block
304 * group to find a free inode.
305 */
306static int find_group_dir(struct super_block *sb, struct inode *parent,
307 ext4_group_t *best_group)
308{
309 ext4_group_t ngroups = ext4_get_groups_count(sb);
310 unsigned int freei, avefreei;
311 struct ext4_group_desc *desc, *best_desc = NULL;
312 ext4_group_t group;
313 int ret = -1;
314
315 freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
316 avefreei = freei / ngroups;
317
318 for (group = 0; group < ngroups; group++) {
319 desc = ext4_get_group_desc(sb, group, NULL);
320 if (!desc || !ext4_free_inodes_count(sb, desc))
321 continue;
322 if (ext4_free_inodes_count(sb, desc) < avefreei)
323 continue;
324 if (!best_desc ||
325 (ext4_free_blks_count(sb, desc) >
326 ext4_free_blks_count(sb, best_desc))) {
327 *best_group = group;
328 best_desc = desc;
329 ret = 0;
330 }
331 }
332 return ret;
333}
334
335#define free_block_ratio 10
336
337static int find_group_flex(struct super_block *sb, struct inode *parent,
338 ext4_group_t *best_group)
339{
340 struct ext4_sb_info *sbi = EXT4_SB(sb);
341 struct ext4_group_desc *desc;
342 struct flex_groups *flex_group = sbi->s_flex_groups;
343 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
344 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
345 ext4_group_t ngroups = ext4_get_groups_count(sb);
346 int flex_size = ext4_flex_bg_size(sbi);
347 ext4_group_t best_flex = parent_fbg_group;
348 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
349 int flexbg_free_blocks;
350 int flex_freeb_ratio;
351 ext4_group_t n_fbg_groups;
352 ext4_group_t i;
353
354 n_fbg_groups = (ngroups + flex_size - 1) >>
355 sbi->s_log_groups_per_flex;
356
357find_close_to_parent:
358 flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
359 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
360 if (atomic_read(&flex_group[best_flex].free_inodes) &&
361 flex_freeb_ratio > free_block_ratio)
362 goto found_flexbg;
363
364 if (best_flex && best_flex == parent_fbg_group) {
365 best_flex--;
366 goto find_close_to_parent;
367 }
368
369 for (i = 0; i < n_fbg_groups; i++) {
370 if (i == parent_fbg_group || i == parent_fbg_group - 1)
371 continue;
372
373 flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
374 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
375
376 if (flex_freeb_ratio > free_block_ratio &&
377 (atomic_read(&flex_group[i].free_inodes))) {
378 best_flex = i;
379 goto found_flexbg;
380 }
381
382 if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
383 ((atomic_read(&flex_group[i].free_blocks) >
384 atomic_read(&flex_group[best_flex].free_blocks)) &&
385 atomic_read(&flex_group[i].free_inodes)))
386 best_flex = i;
387 }
388
389 if (!atomic_read(&flex_group[best_flex].free_inodes) ||
390 !atomic_read(&flex_group[best_flex].free_blocks))
391 return -1;
392
393found_flexbg:
394 for (i = best_flex * flex_size; i < ngroups &&
395 i < (best_flex + 1) * flex_size; i++) {
396 desc = ext4_get_group_desc(sb, i, NULL);
397 if (ext4_free_inodes_count(sb, desc)) {
398 *best_group = i;
399 goto out;
400 }
401 }
402
403 return -1;
404out:
405 return 0;
406}
407
408struct orlov_stats { 296struct orlov_stats {
409 __u32 free_inodes; 297 __u32 free_inodes;
410 __u32 free_blocks; 298 __u32 free_clusters;
411 __u32 used_dirs; 299 __u32 used_dirs;
412}; 300};
413 301
@@ -424,7 +312,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
424 312
425 if (flex_size > 1) { 313 if (flex_size > 1) {
426 stats->free_inodes = atomic_read(&flex_group[g].free_inodes); 314 stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
427 stats->free_blocks = atomic_read(&flex_group[g].free_blocks); 315 stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
428 stats->used_dirs = atomic_read(&flex_group[g].used_dirs); 316 stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
429 return; 317 return;
430 } 318 }
@@ -432,11 +320,11 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
432 desc = ext4_get_group_desc(sb, g, NULL); 320 desc = ext4_get_group_desc(sb, g, NULL);
433 if (desc) { 321 if (desc) {
434 stats->free_inodes = ext4_free_inodes_count(sb, desc); 322 stats->free_inodes = ext4_free_inodes_count(sb, desc);
435 stats->free_blocks = ext4_free_blks_count(sb, desc); 323 stats->free_clusters = ext4_free_group_clusters(sb, desc);
436 stats->used_dirs = ext4_used_dirs_count(sb, desc); 324 stats->used_dirs = ext4_used_dirs_count(sb, desc);
437 } else { 325 } else {
438 stats->free_inodes = 0; 326 stats->free_inodes = 0;
439 stats->free_blocks = 0; 327 stats->free_clusters = 0;
440 stats->used_dirs = 0; 328 stats->used_dirs = 0;
441 } 329 }
442} 330}
@@ -471,10 +359,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
471 ext4_group_t real_ngroups = ext4_get_groups_count(sb); 359 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
472 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
473 unsigned int freei, avefreei; 361 unsigned int freei, avefreei;
474 ext4_fsblk_t freeb, avefreeb; 362 ext4_fsblk_t freeb, avefreec;
475 unsigned int ndirs; 363 unsigned int ndirs;
476 int max_dirs, min_inodes; 364 int max_dirs, min_inodes;
477 ext4_grpblk_t min_blocks; 365 ext4_grpblk_t min_clusters;
478 ext4_group_t i, grp, g, ngroups; 366 ext4_group_t i, grp, g, ngroups;
479 struct ext4_group_desc *desc; 367 struct ext4_group_desc *desc;
480 struct orlov_stats stats; 368 struct orlov_stats stats;
@@ -490,9 +378,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
490 378
491 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); 379 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
492 avefreei = freei / ngroups; 380 avefreei = freei / ngroups;
493 freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 381 freeb = EXT4_C2B(sbi,
494 avefreeb = freeb; 382 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
495 do_div(avefreeb, ngroups); 383 avefreec = freeb;
384 do_div(avefreec, ngroups);
496 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); 385 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
497 386
498 if (S_ISDIR(mode) && 387 if (S_ISDIR(mode) &&
@@ -518,7 +407,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
518 continue; 407 continue;
519 if (stats.free_inodes < avefreei) 408 if (stats.free_inodes < avefreei)
520 continue; 409 continue;
521 if (stats.free_blocks < avefreeb) 410 if (stats.free_clusters < avefreec)
522 continue; 411 continue;
523 grp = g; 412 grp = g;
524 ret = 0; 413 ret = 0;
@@ -556,7 +445,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
556 min_inodes = avefreei - inodes_per_group*flex_size / 4; 445 min_inodes = avefreei - inodes_per_group*flex_size / 4;
557 if (min_inodes < 1) 446 if (min_inodes < 1)
558 min_inodes = 1; 447 min_inodes = 1;
559 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; 448 min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
560 449
561 /* 450 /*
562 * Start looking in the flex group where we last allocated an 451 * Start looking in the flex group where we last allocated an
@@ -575,7 +464,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
575 continue; 464 continue;
576 if (stats.free_inodes < min_inodes) 465 if (stats.free_inodes < min_inodes)
577 continue; 466 continue;
578 if (stats.free_blocks < min_blocks) 467 if (stats.free_clusters < min_clusters)
579 continue; 468 continue;
580 goto found_flex_bg; 469 goto found_flex_bg;
581 } 470 }
@@ -659,7 +548,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
659 *group = parent_group; 548 *group = parent_group;
660 desc = ext4_get_group_desc(sb, *group, NULL); 549 desc = ext4_get_group_desc(sb, *group, NULL);
661 if (desc && ext4_free_inodes_count(sb, desc) && 550 if (desc && ext4_free_inodes_count(sb, desc) &&
662 ext4_free_blks_count(sb, desc)) 551 ext4_free_group_clusters(sb, desc))
663 return 0; 552 return 0;
664 553
665 /* 554 /*
@@ -683,7 +572,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
683 *group -= ngroups; 572 *group -= ngroups;
684 desc = ext4_get_group_desc(sb, *group, NULL); 573 desc = ext4_get_group_desc(sb, *group, NULL);
685 if (desc && ext4_free_inodes_count(sb, desc) && 574 if (desc && ext4_free_inodes_count(sb, desc) &&
686 ext4_free_blks_count(sb, desc)) 575 ext4_free_group_clusters(sb, desc))
687 return 0; 576 return 0;
688 } 577 }
689 578
@@ -802,7 +691,7 @@ err_ret:
802 * group to find a free inode. 691 * group to find a free inode.
803 */ 692 */
804struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, 693struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
805 const struct qstr *qstr, __u32 goal) 694 const struct qstr *qstr, __u32 goal, uid_t *owner)
806{ 695{
807 struct super_block *sb; 696 struct super_block *sb;
808 struct buffer_head *inode_bitmap_bh = NULL; 697 struct buffer_head *inode_bitmap_bh = NULL;
@@ -816,8 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
816 int ret2, err = 0; 705 int ret2, err = 0;
817 struct inode *ret; 706 struct inode *ret;
818 ext4_group_t i; 707 ext4_group_t i;
819 int free = 0;
820 static int once = 1;
821 ext4_group_t flex_group; 708 ext4_group_t flex_group;
822 709
823 /* Cannot create files in a deleted directory */ 710 /* Cannot create files in a deleted directory */
@@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
843 goto got_group; 730 goto got_group;
844 } 731 }
845 732
846 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 733 if (S_ISDIR(mode))
847 ret2 = find_group_flex(sb, dir, &group); 734 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
848 if (ret2 == -1) { 735 else
849 ret2 = find_group_other(sb, dir, &group, mode);
850 if (ret2 == 0 && once) {
851 once = 0;
852 printk(KERN_NOTICE "ext4: find_group_flex "
853 "failed, fallback succeeded dir %lu\n",
854 dir->i_ino);
855 }
856 }
857 goto got_group;
858 }
859
860 if (S_ISDIR(mode)) {
861 if (test_opt(sb, OLDALLOC))
862 ret2 = find_group_dir(sb, dir, &group);
863 else
864 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
865 } else
866 ret2 = find_group_other(sb, dir, &group, mode); 736 ret2 = find_group_other(sb, dir, &group, mode);
867 737
868got_group: 738got_group:
@@ -950,26 +820,21 @@ got:
950 goto fail; 820 goto fail;
951 } 821 }
952 822
953 free = 0; 823 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
954 ext4_lock_group(sb, group); 824 err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
825 brelse(block_bitmap_bh);
826
955 /* recheck and clear flag under lock if we still need to */ 827 /* recheck and clear flag under lock if we still need to */
828 ext4_lock_group(sb, group);
956 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 829 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
957 free = ext4_free_blocks_after_init(sb, group, gdp);
958 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 830 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
959 ext4_free_blks_set(sb, gdp, free); 831 ext4_free_group_clusters_set(sb, gdp,
832 ext4_free_clusters_after_init(sb, group, gdp));
960 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, 833 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
961 gdp); 834 gdp);
962 } 835 }
963 ext4_unlock_group(sb, group); 836 ext4_unlock_group(sb, group);
964 837
965 /* Don't need to dirty bitmap block if we didn't change it */
966 if (free) {
967 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
968 err = ext4_handle_dirty_metadata(handle,
969 NULL, block_bitmap_bh);
970 }
971
972 brelse(block_bitmap_bh);
973 if (err) 838 if (err)
974 goto fail; 839 goto fail;
975 } 840 }
@@ -987,8 +852,11 @@ got:
987 flex_group = ext4_flex_group(sbi, group); 852 flex_group = ext4_flex_group(sbi, group);
988 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); 853 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
989 } 854 }
990 855 if (owner) {
991 if (test_opt(sb, GRPID)) { 856 inode->i_mode = mode;
857 inode->i_uid = owner[0];
858 inode->i_gid = owner[1];
859 } else if (test_opt(sb, GRPID)) {
992 inode->i_mode = mode; 860 inode->i_mode = mode;
993 inode->i_uid = current_fsuid(); 861 inode->i_uid = current_fsuid();
994 inode->i_gid = dir->i_gid; 862 inode->i_gid = dir->i_gid;
@@ -1005,11 +873,7 @@ got:
1005 ei->i_dir_start_lookup = 0; 873 ei->i_dir_start_lookup = 0;
1006 ei->i_disksize = 0; 874 ei->i_disksize = 0;
1007 875
1008 /* 876 /* Don't inherit extent flag from directory, amongst others. */
1009 * Don't inherit extent flag from directory, amongst others. We set
1010 * extent flag on newly created directory and file only if -o extent
1011 * mount option is specified
1012 */
1013 ei->i_flags = 877 ei->i_flags =
1014 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); 878 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
1015 ei->i_file_acl = 0; 879 ei->i_file_acl = 0;
@@ -1084,7 +948,7 @@ fail_free_drop:
1084fail_drop: 948fail_drop:
1085 dquot_drop(inode); 949 dquot_drop(inode);
1086 inode->i_flags |= S_NOQUOTA; 950 inode->i_flags |= S_NOQUOTA;
1087 inode->i_nlink = 0; 951 clear_nlink(inode);
1088 unlock_new_inode(inode); 952 unlock_new_inode(inode);
1089 iput(inode); 953 iput(inode);
1090 brelse(inode_bitmap_bh); 954 brelse(inode_bitmap_bh);
@@ -1235,7 +1099,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1235 * inode allocation from the current group, so we take alloc_sem lock, to 1099 * inode allocation from the current group, so we take alloc_sem lock, to
1236 * block ext4_claim_inode until we are finished. 1100 * block ext4_claim_inode until we are finished.
1237 */ 1101 */
1238extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, 1102int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1239 int barrier) 1103 int barrier)
1240{ 1104{
1241 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 1105 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 0962642119c0..3cfc73fbca8e 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -699,6 +699,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
699 /* 699 /*
700 * Okay, we need to do block allocation. 700 * Okay, we need to do block allocation.
701 */ 701 */
702 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
703 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
704 EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
705 "non-extent mapped inodes with bigalloc");
706 return -ENOSPC;
707 }
708
702 goal = ext4_find_goal(inode, map->m_lblk, partial); 709 goal = ext4_find_goal(inode, map->m_lblk, partial);
703 710
704 /* the number of blocks need to allocate for [d,t]indirect blocks */ 711 /* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1343,7 +1350,9 @@ void ext4_ind_truncate(struct inode *inode)
1343 __le32 nr = 0; 1350 __le32 nr = 0;
1344 int n = 0; 1351 int n = 0;
1345 ext4_lblk_t last_block, max_block; 1352 ext4_lblk_t last_block, max_block;
1353 loff_t page_len;
1346 unsigned blocksize = inode->i_sb->s_blocksize; 1354 unsigned blocksize = inode->i_sb->s_blocksize;
1355 int err;
1347 1356
1348 handle = start_transaction(inode); 1357 handle = start_transaction(inode);
1349 if (IS_ERR(handle)) 1358 if (IS_ERR(handle))
@@ -1354,9 +1363,16 @@ void ext4_ind_truncate(struct inode *inode)
1354 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) 1363 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1355 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 1364 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1356 1365
1357 if (inode->i_size & (blocksize - 1)) 1366 if (inode->i_size % PAGE_CACHE_SIZE != 0) {
1358 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 1367 page_len = PAGE_CACHE_SIZE -
1368 (inode->i_size & (PAGE_CACHE_SIZE - 1));
1369
1370 err = ext4_discard_partial_page_buffers(handle,
1371 mapping, inode->i_size, page_len, 0);
1372
1373 if (err)
1359 goto out_stop; 1374 goto out_stop;
1375 }
1360 1376
1361 if (last_block != max_block) { 1377 if (last_block != max_block) {
1362 n = ext4_block_to_path(inode, last_block, offsets, NULL); 1378 n = ext4_block_to_path(inode, last_block, offsets, NULL);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0defe0bfe019..cc5a6da030a1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -42,7 +42,6 @@
42#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
43#include "xattr.h" 43#include "xattr.h"
44#include "acl.h" 44#include "acl.h"
45#include "ext4_extents.h"
46#include "truncate.h" 45#include "truncate.h"
47 46
48#include <trace/events/ext4.h> 47#include <trace/events/ext4.h>
@@ -268,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
268 struct ext4_inode_info *ei = EXT4_I(inode); 267 struct ext4_inode_info *ei = EXT4_I(inode);
269 268
270 spin_lock(&ei->i_block_reservation_lock); 269 spin_lock(&ei->i_block_reservation_lock);
271 trace_ext4_da_update_reserve_space(inode, used); 270 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
272 if (unlikely(used > ei->i_reserved_data_blocks)) { 271 if (unlikely(used > ei->i_reserved_data_blocks)) {
273 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 272 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
274 "with only %d reserved data blocks\n", 273 "with only %d reserved data blocks\n",
@@ -281,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
281 /* Update per-inode reservations */ 280 /* Update per-inode reservations */
282 ei->i_reserved_data_blocks -= used; 281 ei->i_reserved_data_blocks -= used;
283 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 282 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
284 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 283 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
285 used + ei->i_allocated_meta_blocks); 284 used + ei->i_allocated_meta_blocks);
286 ei->i_allocated_meta_blocks = 0; 285 ei->i_allocated_meta_blocks = 0;
287 286
@@ -291,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
291 * only when we have written all of the delayed 290 * only when we have written all of the delayed
292 * allocation blocks. 291 * allocation blocks.
293 */ 292 */
294 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 293 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
295 ei->i_reserved_meta_blocks); 294 ei->i_reserved_meta_blocks);
296 ei->i_reserved_meta_blocks = 0; 295 ei->i_reserved_meta_blocks = 0;
297 ei->i_da_metadata_calc_len = 0; 296 ei->i_da_metadata_calc_len = 0;
@@ -300,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
300 299
301 /* Update quota subsystem for data blocks */ 300 /* Update quota subsystem for data blocks */
302 if (quota_claim) 301 if (quota_claim)
303 dquot_claim_block(inode, used); 302 dquot_claim_block(inode, EXT4_C2B(sbi, used));
304 else { 303 else {
305 /* 304 /*
306 * We did fallocate with an offset that is already delayed 305 * We did fallocate with an offset that is already delayed
307 * allocated. So on delayed allocated writeback we should 306 * allocated. So on delayed allocated writeback we should
308 * not re-claim the quota for fallocated blocks. 307 * not re-claim the quota for fallocated blocks.
309 */ 308 */
310 dquot_release_reservation_block(inode, used); 309 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
311 } 310 }
312 311
313 /* 312 /*
@@ -399,6 +398,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
399} 398}
400 399
401/* 400/*
401 * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
402 */
403static void set_buffers_da_mapped(struct inode *inode,
404 struct ext4_map_blocks *map)
405{
406 struct address_space *mapping = inode->i_mapping;
407 struct pagevec pvec;
408 int i, nr_pages;
409 pgoff_t index, end;
410
411 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
412 end = (map->m_lblk + map->m_len - 1) >>
413 (PAGE_CACHE_SHIFT - inode->i_blkbits);
414
415 pagevec_init(&pvec, 0);
416 while (index <= end) {
417 nr_pages = pagevec_lookup(&pvec, mapping, index,
418 min(end - index + 1,
419 (pgoff_t)PAGEVEC_SIZE));
420 if (nr_pages == 0)
421 break;
422 for (i = 0; i < nr_pages; i++) {
423 struct page *page = pvec.pages[i];
424 struct buffer_head *bh, *head;
425
426 if (unlikely(page->mapping != mapping) ||
427 !PageDirty(page))
428 break;
429
430 if (page_has_buffers(page)) {
431 bh = head = page_buffers(page);
432 do {
433 set_buffer_da_mapped(bh);
434 bh = bh->b_this_page;
435 } while (bh != head);
436 }
437 index++;
438 }
439 pagevec_release(&pvec);
440 }
441}
442
443/*
402 * The ext4_map_blocks() function tries to look up the requested blocks, 444 * The ext4_map_blocks() function tries to look up the requested blocks,
403 * and returns if the blocks are already mapped. 445 * and returns if the blocks are already mapped.
404 * 446 *
@@ -416,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
416 * the buffer head is mapped. 458 * the buffer head is mapped.
417 * 459 *
418 * It returns 0 if plain look up failed (blocks have not been allocated), in 460 * It returns 0 if plain look up failed (blocks have not been allocated), in
419 * that casem, buffer head is unmapped 461 * that case, buffer head is unmapped
420 * 462 *
421 * It returns the error in case of allocation failure. 463 * It returns the error in case of allocation failure.
422 */ 464 */
@@ -435,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
435 */ 477 */
436 down_read((&EXT4_I(inode)->i_data_sem)); 478 down_read((&EXT4_I(inode)->i_data_sem));
437 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 479 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
438 retval = ext4_ext_map_blocks(handle, inode, map, 0); 480 retval = ext4_ext_map_blocks(handle, inode, map, flags &
481 EXT4_GET_BLOCKS_KEEP_SIZE);
439 } else { 482 } else {
440 retval = ext4_ind_map_blocks(handle, inode, map, 0); 483 retval = ext4_ind_map_blocks(handle, inode, map, flags &
484 EXT4_GET_BLOCKS_KEEP_SIZE);
441 } 485 }
442 up_read((&EXT4_I(inode)->i_data_sem)); 486 up_read((&EXT4_I(inode)->i_data_sem));
443 487
@@ -455,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
455 * Returns if the blocks have already allocated 499 * Returns if the blocks have already allocated
456 * 500 *
457 * Note that if blocks have been preallocated 501 * Note that if blocks have been preallocated
458 * ext4_ext_get_block() returns th create = 0 502 * ext4_ext_get_block() returns the create = 0
459 * with buffer head unmapped. 503 * with buffer head unmapped.
460 */ 504 */
461 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 505 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
@@ -517,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
517 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 561 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
518 ext4_da_update_reserve_space(inode, retval, 1); 562 ext4_da_update_reserve_space(inode, retval, 1);
519 } 563 }
520 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 564 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
521 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 565 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
522 566
567 /* If we have successfully mapped the delayed allocated blocks,
568 * set the BH_Da_Mapped bit on them. Its important to do this
569 * under the protection of i_data_sem.
570 */
571 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
572 set_buffers_da_mapped(inode, map);
573 }
574
523 up_write((&EXT4_I(inode)->i_data_sem)); 575 up_write((&EXT4_I(inode)->i_data_sem));
524 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 576 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
525 int ret = check_block_validity(inode, map); 577 int ret = check_block_validity(inode, map);
@@ -909,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file,
909 ext4_orphan_add(handle, inode); 961 ext4_orphan_add(handle, inode);
910 if (ret2 < 0) 962 if (ret2 < 0)
911 ret = ret2; 963 ret = ret2;
964 } else {
965 unlock_page(page);
966 page_cache_release(page);
912 } 967 }
968
913 ret2 = ext4_journal_stop(handle); 969 ret2 = ext4_journal_stop(handle);
914 if (!ret) 970 if (!ret)
915 ret = ret2; 971 ret = ret2;
@@ -1037,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file,
1037} 1093}
1038 1094
1039/* 1095/*
1040 * Reserve a single block located at lblock 1096 * Reserve a single cluster located at lblock
1041 */ 1097 */
1042static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1098static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1043{ 1099{
1044 int retries = 0; 1100 int retries = 0;
1045 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1101 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1046 struct ext4_inode_info *ei = EXT4_I(inode); 1102 struct ext4_inode_info *ei = EXT4_I(inode);
1047 unsigned long md_needed; 1103 unsigned int md_needed;
1048 int ret; 1104 int ret;
1049 1105
1050 /* 1106 /*
@@ -1054,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1054 */ 1110 */
1055repeat: 1111repeat:
1056 spin_lock(&ei->i_block_reservation_lock); 1112 spin_lock(&ei->i_block_reservation_lock);
1057 md_needed = ext4_calc_metadata_amount(inode, lblock); 1113 md_needed = EXT4_NUM_B2C(sbi,
1114 ext4_calc_metadata_amount(inode, lblock));
1058 trace_ext4_da_reserve_space(inode, md_needed); 1115 trace_ext4_da_reserve_space(inode, md_needed);
1059 spin_unlock(&ei->i_block_reservation_lock); 1116 spin_unlock(&ei->i_block_reservation_lock);
1060 1117
@@ -1063,15 +1120,15 @@ repeat:
1063 * us from metadata over-estimation, though we may go over by 1120 * us from metadata over-estimation, though we may go over by
1064 * a small amount in the end. Here we just reserve for data. 1121 * a small amount in the end. Here we just reserve for data.
1065 */ 1122 */
1066 ret = dquot_reserve_block(inode, 1); 1123 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1067 if (ret) 1124 if (ret)
1068 return ret; 1125 return ret;
1069 /* 1126 /*
1070 * We do still charge estimated metadata to the sb though; 1127 * We do still charge estimated metadata to the sb though;
1071 * we cannot afford to run out of free blocks. 1128 * we cannot afford to run out of free blocks.
1072 */ 1129 */
1073 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { 1130 if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1074 dquot_release_reservation_block(inode, 1); 1131 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1075 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1132 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1076 yield(); 1133 yield();
1077 goto repeat; 1134 goto repeat;
@@ -1118,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1118 * We can release all of the reserved metadata blocks 1175 * We can release all of the reserved metadata blocks
1119 * only when we have written all of the delayed 1176 * only when we have written all of the delayed
1120 * allocation blocks. 1177 * allocation blocks.
1178 * Note that in case of bigalloc, i_reserved_meta_blocks,
1179 * i_reserved_data_blocks, etc. refer to number of clusters.
1121 */ 1180 */
1122 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 1181 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1123 ei->i_reserved_meta_blocks); 1182 ei->i_reserved_meta_blocks);
1124 ei->i_reserved_meta_blocks = 0; 1183 ei->i_reserved_meta_blocks = 0;
1125 ei->i_da_metadata_calc_len = 0; 1184 ei->i_da_metadata_calc_len = 0;
1126 } 1185 }
1127 1186
1128 /* update fs dirty data blocks counter */ 1187 /* update fs dirty data blocks counter */
1129 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); 1188 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1130 1189
1131 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1190 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1132 1191
1133 dquot_release_reservation_block(inode, to_free); 1192 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1134} 1193}
1135 1194
1136static void ext4_da_page_release_reservation(struct page *page, 1195static void ext4_da_page_release_reservation(struct page *page,
@@ -1139,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page,
1139 int to_release = 0; 1198 int to_release = 0;
1140 struct buffer_head *head, *bh; 1199 struct buffer_head *head, *bh;
1141 unsigned int curr_off = 0; 1200 unsigned int curr_off = 0;
1201 struct inode *inode = page->mapping->host;
1202 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1203 int num_clusters;
1142 1204
1143 head = page_buffers(page); 1205 head = page_buffers(page);
1144 bh = head; 1206 bh = head;
@@ -1148,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page,
1148 if ((offset <= curr_off) && (buffer_delay(bh))) { 1210 if ((offset <= curr_off) && (buffer_delay(bh))) {
1149 to_release++; 1211 to_release++;
1150 clear_buffer_delay(bh); 1212 clear_buffer_delay(bh);
1213 clear_buffer_da_mapped(bh);
1151 } 1214 }
1152 curr_off = next_off; 1215 curr_off = next_off;
1153 } while ((bh = bh->b_this_page) != head); 1216 } while ((bh = bh->b_this_page) != head);
1154 ext4_da_release_space(page->mapping->host, to_release); 1217
1218 /* If we have released all the blocks belonging to a cluster, then we
1219 * need to release the reserved space for that cluster. */
1220 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1221 while (num_clusters > 0) {
1222 ext4_fsblk_t lblk;
1223 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1224 ((num_clusters - 1) << sbi->s_cluster_bits);
1225 if (sbi->s_cluster_ratio == 1 ||
1226 !ext4_find_delalloc_cluster(inode, lblk, 1))
1227 ext4_da_release_space(inode, 1);
1228
1229 num_clusters--;
1230 }
1155} 1231}
1156 1232
1157/* 1233/*
@@ -1253,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1253 clear_buffer_delay(bh); 1329 clear_buffer_delay(bh);
1254 bh->b_blocknr = pblock; 1330 bh->b_blocknr = pblock;
1255 } 1331 }
1332 if (buffer_da_mapped(bh))
1333 clear_buffer_da_mapped(bh);
1256 if (buffer_unwritten(bh) || 1334 if (buffer_unwritten(bh) ||
1257 buffer_mapped(bh)) 1335 buffer_mapped(bh))
1258 BUG_ON(bh->b_blocknr != pblock); 1336 BUG_ON(bh->b_blocknr != pblock);
@@ -1346,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode)
1346{ 1424{
1347 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1425 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1348 printk(KERN_CRIT "Total free blocks count %lld\n", 1426 printk(KERN_CRIT "Total free blocks count %lld\n",
1349 ext4_count_free_blocks(inode->i_sb)); 1427 EXT4_C2B(EXT4_SB(inode->i_sb),
1428 ext4_count_free_clusters(inode->i_sb)));
1350 printk(KERN_CRIT "Free/Dirty block details\n"); 1429 printk(KERN_CRIT "Free/Dirty block details\n");
1351 printk(KERN_CRIT "free_blocks=%lld\n", 1430 printk(KERN_CRIT "free_blocks=%lld\n",
1352 (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); 1431 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1432 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1353 printk(KERN_CRIT "dirty_blocks=%lld\n", 1433 printk(KERN_CRIT "dirty_blocks=%lld\n",
1354 (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 1434 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1435 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1355 printk(KERN_CRIT "Block reservation details\n"); 1436 printk(KERN_CRIT "Block reservation details\n");
1356 printk(KERN_CRIT "i_reserved_data_blocks=%u\n", 1437 printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
1357 EXT4_I(inode)->i_reserved_data_blocks); 1438 EXT4_I(inode)->i_reserved_data_blocks);
@@ -1430,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1430 if (err == -EAGAIN) 1511 if (err == -EAGAIN)
1431 goto submit_io; 1512 goto submit_io;
1432 1513
1433 if (err == -ENOSPC && 1514 if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
1434 ext4_count_free_blocks(sb)) {
1435 mpd->retval = err; 1515 mpd->retval = err;
1436 goto submit_io; 1516 goto submit_io;
1437 } 1517 }
@@ -1471,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1471 1551
1472 for (i = 0; i < map.m_len; i++) 1552 for (i = 0; i < map.m_len; i++)
1473 unmap_underlying_metadata(bdev, map.m_pblk + i); 1553 unmap_underlying_metadata(bdev, map.m_pblk + i);
1474 }
1475 1554
1476 if (ext4_should_order_data(mpd->inode)) { 1555 if (ext4_should_order_data(mpd->inode)) {
1477 err = ext4_jbd2_file_inode(handle, mpd->inode); 1556 err = ext4_jbd2_file_inode(handle, mpd->inode);
1478 if (err) 1557 if (err) {
1479 /* This only happens if the journal is aborted */ 1558 /* Only if the journal is aborted */
1480 return; 1559 mpd->retval = err;
1560 goto submit_io;
1561 }
1562 }
1481 } 1563 }
1482 1564
1483 /* 1565 /*
@@ -1584,6 +1666,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1584} 1666}
1585 1667
1586/* 1668/*
1669 * This function is grabs code from the very beginning of
1670 * ext4_map_blocks, but assumes that the caller is from delayed write
1671 * time. This function looks up the requested blocks and sets the
1672 * buffer delay bit under the protection of i_data_sem.
1673 */
1674static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1675 struct ext4_map_blocks *map,
1676 struct buffer_head *bh)
1677{
1678 int retval;
1679 sector_t invalid_block = ~((sector_t) 0xffff);
1680
1681 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1682 invalid_block = ~0;
1683
1684 map->m_flags = 0;
1685 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1686 "logical block %lu\n", inode->i_ino, map->m_len,
1687 (unsigned long) map->m_lblk);
1688 /*
1689 * Try to see if we can get the block without requesting a new
1690 * file system block.
1691 */
1692 down_read((&EXT4_I(inode)->i_data_sem));
1693 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1694 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
1695 else
1696 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
1697
1698 if (retval == 0) {
1699 /*
1700 * XXX: __block_prepare_write() unmaps passed block,
1701 * is it OK?
1702 */
1703 /* If the block was allocated from previously allocated cluster,
1704 * then we dont need to reserve it again. */
1705 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1706 retval = ext4_da_reserve_space(inode, iblock);
1707 if (retval)
1708 /* not enough space to reserve */
1709 goto out_unlock;
1710 }
1711
1712 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1713 * and it should not appear on the bh->b_state.
1714 */
1715 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
1716
1717 map_bh(bh, inode->i_sb, invalid_block);
1718 set_buffer_new(bh);
1719 set_buffer_delay(bh);
1720 }
1721
1722out_unlock:
1723 up_read((&EXT4_I(inode)->i_data_sem));
1724
1725 return retval;
1726}
1727
1728/*
1587 * This is a special get_blocks_t callback which is used by 1729 * This is a special get_blocks_t callback which is used by
1588 * ext4_da_write_begin(). It will either return mapped block or 1730 * ext4_da_write_begin(). It will either return mapped block or
1589 * reserve space for a single block. 1731 * reserve space for a single block.
@@ -1600,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1600{ 1742{
1601 struct ext4_map_blocks map; 1743 struct ext4_map_blocks map;
1602 int ret = 0; 1744 int ret = 0;
1603 sector_t invalid_block = ~((sector_t) 0xffff);
1604
1605 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1606 invalid_block = ~0;
1607 1745
1608 BUG_ON(create == 0); 1746 BUG_ON(create == 0);
1609 BUG_ON(bh->b_size != inode->i_sb->s_blocksize); 1747 BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
@@ -1616,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1616 * preallocated blocks are unmapped but should treated 1754 * preallocated blocks are unmapped but should treated
1617 * the same as allocated blocks. 1755 * the same as allocated blocks.
1618 */ 1756 */
1619 ret = ext4_map_blocks(NULL, inode, &map, 0); 1757 ret = ext4_da_map_blocks(inode, iblock, &map, bh);
1620 if (ret < 0) 1758 if (ret <= 0)
1621 return ret; 1759 return ret;
1622 if (ret == 0) {
1623 if (buffer_delay(bh))
1624 return 0; /* Not sure this could or should happen */
1625 /*
1626 * XXX: __block_write_begin() unmaps passed block, is it OK?
1627 */
1628 ret = ext4_da_reserve_space(inode, iblock);
1629 if (ret)
1630 /* not enough space to reserve */
1631 return ret;
1632
1633 map_bh(bh, inode->i_sb, invalid_block);
1634 set_buffer_new(bh);
1635 set_buffer_delay(bh);
1636 return 0;
1637 }
1638 1760
1639 map_bh(bh, inode->i_sb, map.m_pblk); 1761 map_bh(bh, inode->i_sb, map.m_pblk);
1640 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 1762 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@@ -2050,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2050 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2172 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2051 pgoff_t done_index = 0; 2173 pgoff_t done_index = 0;
2052 pgoff_t end; 2174 pgoff_t end;
2175 struct blk_plug plug;
2053 2176
2054 trace_ext4_da_writepages(inode, wbc); 2177 trace_ext4_da_writepages(inode, wbc);
2055 2178
@@ -2128,6 +2251,7 @@ retry:
2128 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2251 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2129 tag_pages_for_writeback(mapping, index, end); 2252 tag_pages_for_writeback(mapping, index, end);
2130 2253
2254 blk_start_plug(&plug);
2131 while (!ret && wbc->nr_to_write > 0) { 2255 while (!ret && wbc->nr_to_write > 0) {
2132 2256
2133 /* 2257 /*
@@ -2178,11 +2302,12 @@ retry:
2178 ret = 0; 2302 ret = 0;
2179 } else if (ret == MPAGE_DA_EXTENT_TAIL) { 2303 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2180 /* 2304 /*
2181 * got one extent now try with 2305 * Got one extent now try with rest of the pages.
2182 * rest of the pages 2306 * If mpd.retval is set -EIO, journal is aborted.
2307 * So we don't need to write any more.
2183 */ 2308 */
2184 pages_written += mpd.pages_written; 2309 pages_written += mpd.pages_written;
2185 ret = 0; 2310 ret = mpd.retval;
2186 io_done = 1; 2311 io_done = 1;
2187 } else if (wbc->nr_to_write) 2312 } else if (wbc->nr_to_write)
2188 /* 2313 /*
@@ -2192,6 +2317,7 @@ retry:
2192 */ 2317 */
2193 break; 2318 break;
2194 } 2319 }
2320 blk_finish_plug(&plug);
2195 if (!io_done && !cycled) { 2321 if (!io_done && !cycled) {
2196 cycled = 1; 2322 cycled = 1;
2197 index = 0; 2323 index = 0;
@@ -2230,10 +2356,11 @@ static int ext4_nonda_switch(struct super_block *sb)
2230 * Delalloc need an accurate free block accounting. So switch 2356 * Delalloc need an accurate free block accounting. So switch
2231 * to non delalloc when we are near to error range. 2357 * to non delalloc when we are near to error range.
2232 */ 2358 */
2233 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 2359 free_blocks = EXT4_C2B(sbi,
2234 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); 2360 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
2361 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2235 if (2 * free_blocks < 3 * dirty_blocks || 2362 if (2 * free_blocks < 3 * dirty_blocks ||
2236 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 2363 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
2237 /* 2364 /*
2238 * free block count is less than 150% of dirty blocks 2365 * free block count is less than 150% of dirty blocks
2239 * or free blocks is less than watermark 2366 * or free blocks is less than watermark
@@ -2259,6 +2386,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2259 pgoff_t index; 2386 pgoff_t index;
2260 struct inode *inode = mapping->host; 2387 struct inode *inode = mapping->host;
2261 handle_t *handle; 2388 handle_t *handle;
2389 loff_t page_len;
2262 2390
2263 index = pos >> PAGE_CACHE_SHIFT; 2391 index = pos >> PAGE_CACHE_SHIFT;
2264 2392
@@ -2305,6 +2433,13 @@ retry:
2305 */ 2433 */
2306 if (pos + len > inode->i_size) 2434 if (pos + len > inode->i_size)
2307 ext4_truncate_failed_write(inode); 2435 ext4_truncate_failed_write(inode);
2436 } else {
2437 page_len = pos & (PAGE_CACHE_SIZE - 1);
2438 if (page_len > 0) {
2439 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2440 inode, page, pos - page_len, page_len,
2441 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2442 }
2308 } 2443 }
2309 2444
2310 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2445 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2347,6 +2482,7 @@ static int ext4_da_write_end(struct file *file,
2347 loff_t new_i_size; 2482 loff_t new_i_size;
2348 unsigned long start, end; 2483 unsigned long start, end;
2349 int write_mode = (int)(unsigned long)fsdata; 2484 int write_mode = (int)(unsigned long)fsdata;
2485 loff_t page_len;
2350 2486
2351 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2487 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2352 if (ext4_should_order_data(inode)) { 2488 if (ext4_should_order_data(inode)) {
@@ -2395,6 +2531,16 @@ static int ext4_da_write_end(struct file *file,
2395 } 2531 }
2396 ret2 = generic_write_end(file, mapping, pos, len, copied, 2532 ret2 = generic_write_end(file, mapping, pos, len, copied,
2397 page, fsdata); 2533 page, fsdata);
2534
2535 page_len = PAGE_CACHE_SIZE -
2536 ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
2537
2538 if (page_len > 0) {
2539 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2540 inode, page, pos + copied - 1, page_len,
2541 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2542 }
2543
2398 copied = ret2; 2544 copied = ret2;
2399 if (ret2 < 0) 2545 if (ret2 < 0)
2400 ret = ret2; 2546 ret = ret2;
@@ -2689,10 +2835,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2689 * but being more careful is always safe for the future change. 2835 * but being more careful is always safe for the future change.
2690 */ 2836 */
2691 inode = io_end->inode; 2837 inode = io_end->inode;
2692 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2838 ext4_set_io_unwritten_flag(inode, io_end);
2693 io_end->flag |= EXT4_IO_END_UNWRITTEN;
2694 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
2695 }
2696 2839
2697 /* Add the io_end to per-inode completed io list*/ 2840 /* Add the io_end to per-inode completed io list*/
2698 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 2841 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -2858,6 +3001,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
2858 struct inode *inode = file->f_mapping->host; 3001 struct inode *inode = file->f_mapping->host;
2859 ssize_t ret; 3002 ssize_t ret;
2860 3003
3004 /*
3005 * If we are doing data journalling we don't support O_DIRECT
3006 */
3007 if (ext4_should_journal_data(inode))
3008 return 0;
3009
2861 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); 3010 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
2862 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3011 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
2863 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3012 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -2927,6 +3076,7 @@ static const struct address_space_operations ext4_journalled_aops = {
2927 .bmap = ext4_bmap, 3076 .bmap = ext4_bmap,
2928 .invalidatepage = ext4_invalidatepage, 3077 .invalidatepage = ext4_invalidatepage,
2929 .releasepage = ext4_releasepage, 3078 .releasepage = ext4_releasepage,
3079 .direct_IO = ext4_direct_IO,
2930 .is_partially_uptodate = block_is_partially_uptodate, 3080 .is_partially_uptodate = block_is_partially_uptodate,
2931 .error_remove_page = generic_error_remove_page, 3081 .error_remove_page = generic_error_remove_page,
2932}; 3082};
@@ -2963,6 +3113,227 @@ void ext4_set_aops(struct inode *inode)
2963 inode->i_mapping->a_ops = &ext4_journalled_aops; 3113 inode->i_mapping->a_ops = &ext4_journalled_aops;
2964} 3114}
2965 3115
3116
3117/*
3118 * ext4_discard_partial_page_buffers()
3119 * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
3120 * This function finds and locks the page containing the offset
3121 * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
3122 * Calling functions that already have the page locked should call
3123 * ext4_discard_partial_page_buffers_no_lock directly.
3124 */
3125int ext4_discard_partial_page_buffers(handle_t *handle,
3126 struct address_space *mapping, loff_t from,
3127 loff_t length, int flags)
3128{
3129 struct inode *inode = mapping->host;
3130 struct page *page;
3131 int err = 0;
3132
3133 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3134 mapping_gfp_mask(mapping) & ~__GFP_FS);
3135 if (!page)
3136 return -ENOMEM;
3137
3138 err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
3139 from, length, flags);
3140
3141 unlock_page(page);
3142 page_cache_release(page);
3143 return err;
3144}
3145
3146/*
3147 * ext4_discard_partial_page_buffers_no_lock()
3148 * Zeros a page range of length 'length' starting from offset 'from'.
3149 * Buffer heads that correspond to the block aligned regions of the
3150 * zeroed range will be unmapped. Unblock aligned regions
3151 * will have the corresponding buffer head mapped if needed so that
3152 * that region of the page can be updated with the partial zero out.
3153 *
3154 * This function assumes that the page has already been locked. The
3155 * The range to be discarded must be contained with in the given page.
3156 * If the specified range exceeds the end of the page it will be shortened
3157 * to the end of the page that corresponds to 'from'. This function is
3158 * appropriate for updating a page and it buffer heads to be unmapped and
3159 * zeroed for blocks that have been either released, or are going to be
3160 * released.
3161 *
3162 * handle: The journal handle
3163 * inode: The files inode
3164 * page: A locked page that contains the offset "from"
3165 * from: The starting byte offset (from the begining of the file)
3166 * to begin discarding
3167 * len: The length of bytes to discard
3168 * flags: Optional flags that may be used:
3169 *
3170 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
3171 * Only zero the regions of the page whose buffer heads
3172 * have already been unmapped. This flag is appropriate
3173 * for updateing the contents of a page whose blocks may
3174 * have already been released, and we only want to zero
3175 * out the regions that correspond to those released blocks.
3176 *
3177 * Returns zero on sucess or negative on failure.
3178 */
3179int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3180 struct inode *inode, struct page *page, loff_t from,
3181 loff_t length, int flags)
3182{
3183 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3184 unsigned int offset = from & (PAGE_CACHE_SIZE-1);
3185 unsigned int blocksize, max, pos;
3186 ext4_lblk_t iblock;
3187 struct buffer_head *bh;
3188 int err = 0;
3189
3190 blocksize = inode->i_sb->s_blocksize;
3191 max = PAGE_CACHE_SIZE - offset;
3192
3193 if (index != page->index)
3194 return -EINVAL;
3195
3196 /*
3197 * correct length if it does not fall between
3198 * 'from' and the end of the page
3199 */
3200 if (length > max || length < 0)
3201 length = max;
3202
3203 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3204
3205 if (!page_has_buffers(page)) {
3206 /*
3207 * If the range to be discarded covers a partial block
3208 * we need to get the page buffers. This is because
3209 * partial blocks cannot be released and the page needs
3210 * to be updated with the contents of the block before
3211 * we write the zeros on top of it.
3212 */
3213 if ((from & (blocksize - 1)) ||
3214 ((from + length) & (blocksize - 1))) {
3215 create_empty_buffers(page, blocksize, 0);
3216 } else {
3217 /*
3218 * If there are no partial blocks,
3219 * there is nothing to update,
3220 * so we can return now
3221 */
3222 return 0;
3223 }
3224 }
3225
3226 /* Find the buffer that contains "offset" */
3227 bh = page_buffers(page);
3228 pos = blocksize;
3229 while (offset >= pos) {
3230 bh = bh->b_this_page;
3231 iblock++;
3232 pos += blocksize;
3233 }
3234
3235 pos = offset;
3236 while (pos < offset + length) {
3237 unsigned int end_of_block, range_to_discard;
3238
3239 err = 0;
3240
3241 /* The length of space left to zero and unmap */
3242 range_to_discard = offset + length - pos;
3243
3244 /* The length of space until the end of the block */
3245 end_of_block = blocksize - (pos & (blocksize-1));
3246
3247 /*
3248 * Do not unmap or zero past end of block
3249 * for this buffer head
3250 */
3251 if (range_to_discard > end_of_block)
3252 range_to_discard = end_of_block;
3253
3254
3255 /*
3256 * Skip this buffer head if we are only zeroing unampped
3257 * regions of the page
3258 */
3259 if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
3260 buffer_mapped(bh))
3261 goto next;
3262
3263 /* If the range is block aligned, unmap */
3264 if (range_to_discard == blocksize) {
3265 clear_buffer_dirty(bh);
3266 bh->b_bdev = NULL;
3267 clear_buffer_mapped(bh);
3268 clear_buffer_req(bh);
3269 clear_buffer_new(bh);
3270 clear_buffer_delay(bh);
3271 clear_buffer_unwritten(bh);
3272 clear_buffer_uptodate(bh);
3273 zero_user(page, pos, range_to_discard);
3274 BUFFER_TRACE(bh, "Buffer discarded");
3275 goto next;
3276 }
3277
3278 /*
3279 * If this block is not completely contained in the range
3280 * to be discarded, then it is not going to be released. Because
3281 * we need to keep this block, we need to make sure this part
3282 * of the page is uptodate before we modify it by writeing
3283 * partial zeros on it.
3284 */
3285 if (!buffer_mapped(bh)) {
3286 /*
3287 * Buffer head must be mapped before we can read
3288 * from the block
3289 */
3290 BUFFER_TRACE(bh, "unmapped");
3291 ext4_get_block(inode, iblock, bh, 0);
3292 /* unmapped? It's a hole - nothing to do */
3293 if (!buffer_mapped(bh)) {
3294 BUFFER_TRACE(bh, "still unmapped");
3295 goto next;
3296 }
3297 }
3298
3299 /* Ok, it's mapped. Make sure it's up-to-date */
3300 if (PageUptodate(page))
3301 set_buffer_uptodate(bh);
3302
3303 if (!buffer_uptodate(bh)) {
3304 err = -EIO;
3305 ll_rw_block(READ, 1, &bh);
3306 wait_on_buffer(bh);
3307 /* Uhhuh. Read error. Complain and punt.*/
3308 if (!buffer_uptodate(bh))
3309 goto next;
3310 }
3311
3312 if (ext4_should_journal_data(inode)) {
3313 BUFFER_TRACE(bh, "get write access");
3314 err = ext4_journal_get_write_access(handle, bh);
3315 if (err)
3316 goto next;
3317 }
3318
3319 zero_user(page, pos, range_to_discard);
3320
3321 err = 0;
3322 if (ext4_should_journal_data(inode)) {
3323 err = ext4_handle_dirty_metadata(handle, inode, bh);
3324 } else
3325 mark_buffer_dirty(bh);
3326
3327 BUFFER_TRACE(bh, "Partial buffer zeroed");
3328next:
3329 bh = bh->b_this_page;
3330 iblock++;
3331 pos += range_to_discard;
3332 }
3333
3334 return err;
3335}
3336
2966/* 3337/*
2967 * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3338 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
2968 * up to the end of the block which corresponds to `from'. 3339 * up to the end of the block which corresponds to `from'.
@@ -3005,7 +3376,7 @@ int ext4_block_zero_page_range(handle_t *handle,
3005 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, 3376 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3006 mapping_gfp_mask(mapping) & ~__GFP_FS); 3377 mapping_gfp_mask(mapping) & ~__GFP_FS);
3007 if (!page) 3378 if (!page)
3008 return -EINVAL; 3379 return -ENOMEM;
3009 3380
3010 blocksize = inode->i_sb->s_blocksize; 3381 blocksize = inode->i_sb->s_blocksize;
3011 max = blocksize - (offset & (blocksize - 1)); 3382 max = blocksize - (offset & (blocksize - 1));
@@ -3074,11 +3445,8 @@ int ext4_block_zero_page_range(handle_t *handle,
3074 err = 0; 3445 err = 0;
3075 if (ext4_should_journal_data(inode)) { 3446 if (ext4_should_journal_data(inode)) {
3076 err = ext4_handle_dirty_metadata(handle, inode, bh); 3447 err = ext4_handle_dirty_metadata(handle, inode, bh);
3077 } else { 3448 } else
3078 if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
3079 err = ext4_jbd2_file_inode(handle, inode);
3080 mark_buffer_dirty(bh); 3449 mark_buffer_dirty(bh);
3081 }
3082 3450
3083unlock: 3451unlock:
3084 unlock_page(page); 3452 unlock_page(page);
@@ -3119,6 +3487,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3119 return -ENOTSUPP; 3487 return -ENOTSUPP;
3120 } 3488 }
3121 3489
3490 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3491 /* TODO: Add support for bigalloc file systems */
3492 return -ENOTSUPP;
3493 }
3494
3122 return ext4_ext_punch_hole(file, offset, length); 3495 return ext4_ext_punch_hole(file, offset, length);
3123} 3496}
3124 3497
@@ -3418,7 +3791,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3418 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 3791 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3419 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 3792 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3420 } 3793 }
3421 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 3794 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
3422 3795
3423 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 3796 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
3424 ei->i_dir_start_lookup = 0; 3797 ei->i_dir_start_lookup = 0;
@@ -4420,6 +4793,7 @@ retry_alloc:
4420 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { 4793 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
4421 unlock_page(page); 4794 unlock_page(page);
4422 ret = VM_FAULT_SIGBUS; 4795 ret = VM_FAULT_SIGBUS;
4796 ext4_journal_stop(handle);
4423 goto out; 4797 goto out;
4424 } 4798 }
4425 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 4799 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f18bfe37aff8..a56796814d6a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -21,6 +21,7 @@
21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22{ 22{
23 struct inode *inode = filp->f_dentry->d_inode; 23 struct inode *inode = filp->f_dentry->d_inode;
24 struct super_block *sb = inode->i_sb;
24 struct ext4_inode_info *ei = EXT4_I(inode); 25 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 26 unsigned int flags;
26 27
@@ -173,33 +174,8 @@ setversion_out:
173 mnt_drop_write(filp->f_path.mnt); 174 mnt_drop_write(filp->f_path.mnt);
174 return err; 175 return err;
175 } 176 }
176#ifdef CONFIG_JBD2_DEBUG
177 case EXT4_IOC_WAIT_FOR_READONLY:
178 /*
179 * This is racy - by the time we're woken up and running,
180 * the superblock could be released. And the module could
181 * have been unloaded. So sue me.
182 *
183 * Returns 1 if it slept, else zero.
184 */
185 {
186 struct super_block *sb = inode->i_sb;
187 DECLARE_WAITQUEUE(wait, current);
188 int ret = 0;
189
190 set_current_state(TASK_INTERRUPTIBLE);
191 add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
192 if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) {
193 schedule();
194 ret = 1;
195 }
196 remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
197 return ret;
198 }
199#endif
200 case EXT4_IOC_GROUP_EXTEND: { 177 case EXT4_IOC_GROUP_EXTEND: {
201 ext4_fsblk_t n_blocks_count; 178 ext4_fsblk_t n_blocks_count;
202 struct super_block *sb = inode->i_sb;
203 int err, err2=0; 179 int err, err2=0;
204 180
205 err = ext4_resize_begin(sb); 181 err = ext4_resize_begin(sb);
@@ -209,6 +185,13 @@ setversion_out:
209 if (get_user(n_blocks_count, (__u32 __user *)arg)) 185 if (get_user(n_blocks_count, (__u32 __user *)arg))
210 return -EFAULT; 186 return -EFAULT;
211 187
188 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
189 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
190 ext4_msg(sb, KERN_ERR,
191 "Online resizing not supported with bigalloc");
192 return -EOPNOTSUPP;
193 }
194
212 err = mnt_want_write(filp->f_path.mnt); 195 err = mnt_want_write(filp->f_path.mnt);
213 if (err) 196 if (err)
214 return err; 197 return err;
@@ -250,6 +233,13 @@ setversion_out:
250 goto mext_out; 233 goto mext_out;
251 } 234 }
252 235
236 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
237 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
238 ext4_msg(sb, KERN_ERR,
239 "Online defrag not supported with bigalloc");
240 return -EOPNOTSUPP;
241 }
242
253 err = mnt_want_write(filp->f_path.mnt); 243 err = mnt_want_write(filp->f_path.mnt);
254 if (err) 244 if (err)
255 goto mext_out; 245 goto mext_out;
@@ -270,7 +260,6 @@ mext_out:
270 260
271 case EXT4_IOC_GROUP_ADD: { 261 case EXT4_IOC_GROUP_ADD: {
272 struct ext4_new_group_data input; 262 struct ext4_new_group_data input;
273 struct super_block *sb = inode->i_sb;
274 int err, err2=0; 263 int err, err2=0;
275 264
276 err = ext4_resize_begin(sb); 265 err = ext4_resize_begin(sb);
@@ -281,6 +270,13 @@ mext_out:
281 sizeof(input))) 270 sizeof(input)))
282 return -EFAULT; 271 return -EFAULT;
283 272
273 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
274 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
275 ext4_msg(sb, KERN_ERR,
276 "Online resizing not supported with bigalloc");
277 return -EOPNOTSUPP;
278 }
279
284 err = mnt_want_write(filp->f_path.mnt); 280 err = mnt_want_write(filp->f_path.mnt);
285 if (err) 281 if (err)
286 return err; 282 return err;
@@ -337,7 +333,6 @@ mext_out:
337 333
338 case FITRIM: 334 case FITRIM:
339 { 335 {
340 struct super_block *sb = inode->i_sb;
341 struct request_queue *q = bdev_get_queue(sb->s_bdev); 336 struct request_queue *q = bdev_get_queue(sb->s_bdev);
342 struct fstrim_range range; 337 struct fstrim_range range;
343 int ret = 0; 338 int ret = 0;
@@ -348,7 +343,14 @@ mext_out:
348 if (!blk_queue_discard(q)) 343 if (!blk_queue_discard(q))
349 return -EOPNOTSUPP; 344 return -EOPNOTSUPP;
350 345
351 if (copy_from_user(&range, (struct fstrim_range *)arg, 346 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
347 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
348 ext4_msg(sb, KERN_ERR,
349 "FITRIM not supported with bigalloc");
350 return -EOPNOTSUPP;
351 }
352
353 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
352 sizeof(range))) 354 sizeof(range)))
353 return -EFAULT; 355 return -EFAULT;
354 356
@@ -358,7 +360,7 @@ mext_out:
358 if (ret < 0) 360 if (ret < 0)
359 return ret; 361 return ret;
360 362
361 if (copy_to_user((struct fstrim_range *)arg, &range, 363 if (copy_to_user((struct fstrim_range __user *)arg, &range,
362 sizeof(range))) 364 sizeof(range)))
363 return -EFAULT; 365 return -EFAULT;
364 366
@@ -396,11 +398,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
396 case EXT4_IOC32_SETVERSION_OLD: 398 case EXT4_IOC32_SETVERSION_OLD:
397 cmd = EXT4_IOC_SETVERSION_OLD; 399 cmd = EXT4_IOC_SETVERSION_OLD;
398 break; 400 break;
399#ifdef CONFIG_JBD2_DEBUG
400 case EXT4_IOC32_WAIT_FOR_READONLY:
401 cmd = EXT4_IOC_WAIT_FOR_READONLY;
402 break;
403#endif
404 case EXT4_IOC32_GETRSVSZ: 401 case EXT4_IOC32_GETRSVSZ:
405 cmd = EXT4_IOC_GETRSVSZ; 402 cmd = EXT4_IOC_GETRSVSZ;
406 break; 403 break;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 17a5a57c415a..e2d8be8f28bf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -70,8 +70,8 @@
70 * 70 *
71 * pa_lstart -> the logical start block for this prealloc space 71 * pa_lstart -> the logical start block for this prealloc space
72 * pa_pstart -> the physical start block for this prealloc space 72 * pa_pstart -> the physical start block for this prealloc space
73 * pa_len -> length for this prealloc space 73 * pa_len -> length for this prealloc space (in clusters)
74 * pa_free -> free space available in this prealloc space 74 * pa_free -> free space available in this prealloc space (in clusters)
75 * 75 *
76 * The inode preallocation space is used looking at the _logical_ start 76 * The inode preallocation space is used looking at the _logical_ start
77 * block. If only the logical file block falls within the range of prealloc 77 * block. If only the logical file block falls within the range of prealloc
@@ -126,7 +126,8 @@
126 * list. In case of inode preallocation we follow a list of heuristics 126 * list. In case of inode preallocation we follow a list of heuristics
127 * based on file size. This can be found in ext4_mb_normalize_request. If 127 * based on file size. This can be found in ext4_mb_normalize_request. If
128 * we are doing a group prealloc we try to normalize the request to 128 * we are doing a group prealloc we try to normalize the request to
129 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is 129 * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is
130 * dependent on the cluster size; for non-bigalloc file systems, it is
130 * 512 blocks. This can be tuned via 131 * 512 blocks. This can be tuned via
131 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in 132 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
132 * terms of number of blocks. If we have mounted the file system with -O 133 * terms of number of blocks. If we have mounted the file system with -O
@@ -459,7 +460,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
459 ext4_fsblk_t blocknr; 460 ext4_fsblk_t blocknr;
460 461
461 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 462 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
462 blocknr += first + i; 463 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
463 ext4_grp_locked_error(sb, e4b->bd_group, 464 ext4_grp_locked_error(sb, e4b->bd_group,
464 inode ? inode->i_ino : 0, 465 inode ? inode->i_ino : 0,
465 blocknr, 466 blocknr,
@@ -580,7 +581,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
580 continue; 581 continue;
581 } 582 }
582 583
583 /* both bits in buddy2 must be 0 */ 584 /* both bits in buddy2 must be 1 */
584 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); 585 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
585 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); 586 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
586 587
@@ -653,7 +654,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
653 ext4_grpblk_t chunk; 654 ext4_grpblk_t chunk;
654 unsigned short border; 655 unsigned short border;
655 656
656 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); 657 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
657 658
658 border = 2 << sb->s_blocksize_bits; 659 border = 2 << sb->s_blocksize_bits;
659 660
@@ -705,7 +706,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
705 void *buddy, void *bitmap, ext4_group_t group) 706 void *buddy, void *bitmap, ext4_group_t group)
706{ 707{
707 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 708 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
708 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); 709 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
709 ext4_grpblk_t i = 0; 710 ext4_grpblk_t i = 0;
710 ext4_grpblk_t first; 711 ext4_grpblk_t first;
711 ext4_grpblk_t len; 712 ext4_grpblk_t len;
@@ -734,7 +735,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
734 735
735 if (free != grp->bb_free) { 736 if (free != grp->bb_free) {
736 ext4_grp_locked_error(sb, group, 0, 0, 737 ext4_grp_locked_error(sb, group, 0, 0,
737 "%u blocks in bitmap, %u in gd", 738 "%u clusters in bitmap, %u in gd",
738 free, grp->bb_free); 739 free, grp->bb_free);
739 /* 740 /*
740 * If we intent to continue, we consider group descritor 741 * If we intent to continue, we consider group descritor
@@ -1339,7 +1340,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1339 ext4_fsblk_t blocknr; 1340 ext4_fsblk_t blocknr;
1340 1341
1341 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1342 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1342 blocknr += block; 1343 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1343 ext4_grp_locked_error(sb, e4b->bd_group, 1344 ext4_grp_locked_error(sb, e4b->bd_group,
1344 inode ? inode->i_ino : 0, 1345 inode ? inode->i_ino : 0,
1345 blocknr, 1346 blocknr,
@@ -1390,7 +1391,6 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1390{ 1391{
1391 int next = block; 1392 int next = block;
1392 int max; 1393 int max;
1393 int ord;
1394 void *buddy; 1394 void *buddy;
1395 1395
1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); 1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
@@ -1432,9 +1432,8 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) 1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1433 break; 1433 break;
1434 1434
1435 ord = mb_find_order_for_block(e4b, next); 1435 order = mb_find_order_for_block(e4b, next);
1436 1436
1437 order = ord;
1438 block = next >> order; 1437 block = next >> order;
1439 ex->fe_len += 1 << order; 1438 ex->fe_len += 1 << order;
1440 } 1439 }
@@ -1624,8 +1623,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1624 struct ext4_free_extent *gex = &ac->ac_g_ex; 1623 struct ext4_free_extent *gex = &ac->ac_g_ex;
1625 1624
1626 BUG_ON(ex->fe_len <= 0); 1625 BUG_ON(ex->fe_len <= 0);
1627 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1626 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1628 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1627 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1629 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); 1628 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1630 1629
1631 ac->ac_found++; 1630 ac->ac_found++;
@@ -1823,15 +1822,15 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1823 1822
1824 while (free && ac->ac_status == AC_STATUS_CONTINUE) { 1823 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1825 i = mb_find_next_zero_bit(bitmap, 1824 i = mb_find_next_zero_bit(bitmap,
1826 EXT4_BLOCKS_PER_GROUP(sb), i); 1825 EXT4_CLUSTERS_PER_GROUP(sb), i);
1827 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { 1826 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1828 /* 1827 /*
1829 * IF we have corrupt bitmap, we won't find any 1828 * IF we have corrupt bitmap, we won't find any
1830 * free blocks even though group info says we 1829 * free blocks even though group info says we
1831 * we have free blocks 1830 * we have free blocks
1832 */ 1831 */
1833 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1832 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1834 "%d free blocks as per " 1833 "%d free clusters as per "
1835 "group info. But bitmap says 0", 1834 "group info. But bitmap says 0",
1836 free); 1835 free);
1837 break; 1836 break;
@@ -1841,7 +1840,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1841 BUG_ON(ex.fe_len <= 0); 1840 BUG_ON(ex.fe_len <= 0);
1842 if (free < ex.fe_len) { 1841 if (free < ex.fe_len) {
1843 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1842 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1844 "%d free blocks as per " 1843 "%d free clusters as per "
1845 "group info. But got %d blocks", 1844 "group info. But got %d blocks",
1846 free, ex.fe_len); 1845 free, ex.fe_len);
1847 /* 1846 /*
@@ -1887,7 +1886,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1887 do_div(a, sbi->s_stripe); 1886 do_div(a, sbi->s_stripe);
1888 i = (a * sbi->s_stripe) - first_group_block; 1887 i = (a * sbi->s_stripe) - first_group_block;
1889 1888
1890 while (i < EXT4_BLOCKS_PER_GROUP(sb)) { 1889 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1891 if (!mb_test_bit(i, bitmap)) { 1890 if (!mb_test_bit(i, bitmap)) {
1892 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); 1891 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1893 if (max >= sbi->s_stripe) { 1892 if (max >= sbi->s_stripe) {
@@ -2252,10 +2251,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2252 */ 2251 */
2253 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2252 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2254 meta_group_info[i]->bb_free = 2253 meta_group_info[i]->bb_free =
2255 ext4_free_blocks_after_init(sb, group, desc); 2254 ext4_free_clusters_after_init(sb, group, desc);
2256 } else { 2255 } else {
2257 meta_group_info[i]->bb_free = 2256 meta_group_info[i]->bb_free =
2258 ext4_free_blks_count(sb, desc); 2257 ext4_free_group_clusters(sb, desc);
2259 } 2258 }
2260 2259
2261 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2260 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -2473,7 +2472,20 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2473 sbi->s_mb_stats = MB_DEFAULT_STATS; 2472 sbi->s_mb_stats = MB_DEFAULT_STATS;
2474 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 2473 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2475 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 2474 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2476 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2475 /*
2476 * The default group preallocation is 512, which for 4k block
2477 * sizes translates to 2 megabytes. However for bigalloc file
2478 * systems, this is probably too big (i.e, if the cluster size
2479 * is 1 megabyte, then group preallocation size becomes half a
2480 * gigabyte!). As a default, we will keep a two megabyte
2481 * group pralloc size for cluster sizes up to 64k, and after
2482 * that, we will force a minimum group preallocation size of
2483 * 32 clusters. This translates to 8 megs when the cluster
2484 * size is 256k, and 32 megs when the cluster size is 1 meg,
2485 * which seems reasonable as a default.
2486 */
2487 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2488 sbi->s_cluster_bits, 32);
2477 /* 2489 /*
2478 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc 2490 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
2479 * to the lowest multiple of s_stripe which is bigger than 2491 * to the lowest multiple of s_stripe which is bigger than
@@ -2490,7 +2502,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2490 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2502 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2491 if (sbi->s_locality_groups == NULL) { 2503 if (sbi->s_locality_groups == NULL) {
2492 ret = -ENOMEM; 2504 ret = -ENOMEM;
2493 goto out; 2505 goto out_free_groupinfo_slab;
2494 } 2506 }
2495 for_each_possible_cpu(i) { 2507 for_each_possible_cpu(i) {
2496 struct ext4_locality_group *lg; 2508 struct ext4_locality_group *lg;
@@ -2503,9 +2515,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2503 2515
2504 /* init file for buddy data */ 2516 /* init file for buddy data */
2505 ret = ext4_mb_init_backend(sb); 2517 ret = ext4_mb_init_backend(sb);
2506 if (ret != 0) { 2518 if (ret != 0)
2507 goto out; 2519 goto out_free_locality_groups;
2508 }
2509 2520
2510 if (sbi->s_proc) 2521 if (sbi->s_proc)
2511 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2522 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
@@ -2513,11 +2524,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2513 2524
2514 if (sbi->s_journal) 2525 if (sbi->s_journal)
2515 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2526 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2527
2528 return 0;
2529
2530out_free_locality_groups:
2531 free_percpu(sbi->s_locality_groups);
2532 sbi->s_locality_groups = NULL;
2533out_free_groupinfo_slab:
2534 ext4_groupinfo_destroy_slabs();
2516out: 2535out:
2517 if (ret) { 2536 kfree(sbi->s_mb_offsets);
2518 kfree(sbi->s_mb_offsets); 2537 sbi->s_mb_offsets = NULL;
2519 kfree(sbi->s_mb_maxs); 2538 kfree(sbi->s_mb_maxs);
2520 } 2539 sbi->s_mb_maxs = NULL;
2521 return ret; 2540 return ret;
2522} 2541}
2523 2542
@@ -2602,11 +2621,13 @@ int ext4_mb_release(struct super_block *sb)
2602} 2621}
2603 2622
2604static inline int ext4_issue_discard(struct super_block *sb, 2623static inline int ext4_issue_discard(struct super_block *sb,
2605 ext4_group_t block_group, ext4_grpblk_t block, int count) 2624 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2606{ 2625{
2607 ext4_fsblk_t discard_block; 2626 ext4_fsblk_t discard_block;
2608 2627
2609 discard_block = block + ext4_group_first_block_no(sb, block_group); 2628 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2629 ext4_group_first_block_no(sb, block_group));
2630 count = EXT4_C2B(EXT4_SB(sb), count);
2610 trace_ext4_discard_blocks(sb, 2631 trace_ext4_discard_blocks(sb,
2611 (unsigned long long) discard_block, count); 2632 (unsigned long long) discard_block, count);
2612 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2633 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
@@ -2633,7 +2654,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2633 2654
2634 if (test_opt(sb, DISCARD)) 2655 if (test_opt(sb, DISCARD))
2635 ext4_issue_discard(sb, entry->group, 2656 ext4_issue_discard(sb, entry->group,
2636 entry->start_blk, entry->count); 2657 entry->start_cluster, entry->count);
2637 2658
2638 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2659 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2639 /* we expect to find existing buddy because it's pinned */ 2660 /* we expect to find existing buddy because it's pinned */
@@ -2646,7 +2667,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2646 ext4_lock_group(sb, entry->group); 2667 ext4_lock_group(sb, entry->group);
2647 /* Take it out of per group rb tree */ 2668 /* Take it out of per group rb tree */
2648 rb_erase(&entry->node, &(db->bb_free_root)); 2669 rb_erase(&entry->node, &(db->bb_free_root));
2649 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); 2670 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
2650 2671
2651 /* 2672 /*
2652 * Clear the trimmed flag for the group so that the next 2673 * Clear the trimmed flag for the group so that the next
@@ -2752,7 +2773,7 @@ void ext4_exit_mballoc(void)
2752 */ 2773 */
2753static noinline_for_stack int 2774static noinline_for_stack int
2754ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2775ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2755 handle_t *handle, unsigned int reserv_blks) 2776 handle_t *handle, unsigned int reserv_clstrs)
2756{ 2777{
2757 struct buffer_head *bitmap_bh = NULL; 2778 struct buffer_head *bitmap_bh = NULL;
2758 struct ext4_group_desc *gdp; 2779 struct ext4_group_desc *gdp;
@@ -2783,7 +2804,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2783 goto out_err; 2804 goto out_err;
2784 2805
2785 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, 2806 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2786 ext4_free_blks_count(sb, gdp)); 2807 ext4_free_group_clusters(sb, gdp));
2787 2808
2788 err = ext4_journal_get_write_access(handle, gdp_bh); 2809 err = ext4_journal_get_write_access(handle, gdp_bh);
2789 if (err) 2810 if (err)
@@ -2791,7 +2812,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2791 2812
2792 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 2813 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2793 2814
2794 len = ac->ac_b_ex.fe_len; 2815 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2795 if (!ext4_data_block_valid(sbi, block, len)) { 2816 if (!ext4_data_block_valid(sbi, block, len)) {
2796 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " 2817 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2797 "fs metadata\n", block, block+len); 2818 "fs metadata\n", block, block+len);
@@ -2823,28 +2844,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2823 ac->ac_b_ex.fe_len); 2844 ac->ac_b_ex.fe_len);
2824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2845 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 2846 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2826 ext4_free_blks_set(sb, gdp, 2847 ext4_free_group_clusters_set(sb, gdp,
2827 ext4_free_blocks_after_init(sb, 2848 ext4_free_clusters_after_init(sb,
2828 ac->ac_b_ex.fe_group, gdp)); 2849 ac->ac_b_ex.fe_group, gdp));
2829 } 2850 }
2830 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; 2851 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2831 ext4_free_blks_set(sb, gdp, len); 2852 ext4_free_group_clusters_set(sb, gdp, len);
2832 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2853 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2833 2854
2834 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 2855 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2835 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2856 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2836 /* 2857 /*
2837 * Now reduce the dirty block count also. Should not go negative 2858 * Now reduce the dirty block count also. Should not go negative
2838 */ 2859 */
2839 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2860 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2840 /* release all the reserved blocks if non delalloc */ 2861 /* release all the reserved blocks if non delalloc */
2841 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2862 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2863 reserv_clstrs);
2842 2864
2843 if (sbi->s_log_groups_per_flex) { 2865 if (sbi->s_log_groups_per_flex) {
2844 ext4_group_t flex_group = ext4_flex_group(sbi, 2866 ext4_group_t flex_group = ext4_flex_group(sbi,
2845 ac->ac_b_ex.fe_group); 2867 ac->ac_b_ex.fe_group);
2846 atomic_sub(ac->ac_b_ex.fe_len, 2868 atomic_sub(ac->ac_b_ex.fe_len,
2847 &sbi->s_flex_groups[flex_group].free_blocks); 2869 &sbi->s_flex_groups[flex_group].free_clusters);
2848 } 2870 }
2849 2871
2850 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2872 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -2886,6 +2908,7 @@ static noinline_for_stack void
2886ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2908ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2887 struct ext4_allocation_request *ar) 2909 struct ext4_allocation_request *ar)
2888{ 2910{
2911 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2889 int bsbits, max; 2912 int bsbits, max;
2890 ext4_lblk_t end; 2913 ext4_lblk_t end;
2891 loff_t size, orig_size, start_off; 2914 loff_t size, orig_size, start_off;
@@ -2916,7 +2939,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2916 2939
2917 /* first, let's learn actual file size 2940 /* first, let's learn actual file size
2918 * given current request is allocated */ 2941 * given current request is allocated */
2919 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 2942 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
2920 size = size << bsbits; 2943 size = size << bsbits;
2921 if (size < i_size_read(ac->ac_inode)) 2944 if (size < i_size_read(ac->ac_inode))
2922 size = i_size_read(ac->ac_inode); 2945 size = i_size_read(ac->ac_inode);
@@ -2988,7 +3011,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2988 continue; 3011 continue;
2989 } 3012 }
2990 3013
2991 pa_end = pa->pa_lstart + pa->pa_len; 3014 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3015 pa->pa_len);
2992 3016
2993 /* PA must not overlap original request */ 3017 /* PA must not overlap original request */
2994 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || 3018 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@@ -3018,9 +3042,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3018 rcu_read_lock(); 3042 rcu_read_lock();
3019 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { 3043 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3020 ext4_lblk_t pa_end; 3044 ext4_lblk_t pa_end;
3045
3021 spin_lock(&pa->pa_lock); 3046 spin_lock(&pa->pa_lock);
3022 if (pa->pa_deleted == 0) { 3047 if (pa->pa_deleted == 0) {
3023 pa_end = pa->pa_lstart + pa->pa_len; 3048 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3049 pa->pa_len);
3024 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); 3050 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3025 } 3051 }
3026 spin_unlock(&pa->pa_lock); 3052 spin_unlock(&pa->pa_lock);
@@ -3036,14 +3062,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3036 } 3062 }
3037 BUG_ON(start + size <= ac->ac_o_ex.fe_logical && 3063 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3038 start > ac->ac_o_ex.fe_logical); 3064 start > ac->ac_o_ex.fe_logical);
3039 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 3065 BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
3040 3066
3041 /* now prepare goal request */ 3067 /* now prepare goal request */
3042 3068
3043 /* XXX: is it better to align blocks WRT to logical 3069 /* XXX: is it better to align blocks WRT to logical
3044 * placement or satisfy big request as is */ 3070 * placement or satisfy big request as is */
3045 ac->ac_g_ex.fe_logical = start; 3071 ac->ac_g_ex.fe_logical = start;
3046 ac->ac_g_ex.fe_len = size; 3072 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3047 3073
3048 /* define goal start in order to merge */ 3074 /* define goal start in order to merge */
3049 if (ar->pright && (ar->lright == (start + size))) { 3075 if (ar->pright && (ar->lright == (start + size))) {
@@ -3112,14 +3138,16 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3112static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3138static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3113 struct ext4_prealloc_space *pa) 3139 struct ext4_prealloc_space *pa)
3114{ 3140{
3141 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3115 ext4_fsblk_t start; 3142 ext4_fsblk_t start;
3116 ext4_fsblk_t end; 3143 ext4_fsblk_t end;
3117 int len; 3144 int len;
3118 3145
3119 /* found preallocated blocks, use them */ 3146 /* found preallocated blocks, use them */
3120 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); 3147 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3121 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); 3148 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3122 len = end - start; 3149 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3150 len = EXT4_NUM_B2C(sbi, end - start);
3123 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, 3151 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3124 &ac->ac_b_ex.fe_start); 3152 &ac->ac_b_ex.fe_start);
3125 ac->ac_b_ex.fe_len = len; 3153 ac->ac_b_ex.fe_len = len;
@@ -3127,7 +3155,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3127 ac->ac_pa = pa; 3155 ac->ac_pa = pa;
3128 3156
3129 BUG_ON(start < pa->pa_pstart); 3157 BUG_ON(start < pa->pa_pstart);
3130 BUG_ON(start + len > pa->pa_pstart + pa->pa_len); 3158 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3131 BUG_ON(pa->pa_free < len); 3159 BUG_ON(pa->pa_free < len);
3132 pa->pa_free -= len; 3160 pa->pa_free -= len;
3133 3161
@@ -3193,6 +3221,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3193static noinline_for_stack int 3221static noinline_for_stack int
3194ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3222ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3195{ 3223{
3224 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3196 int order, i; 3225 int order, i;
3197 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3226 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3198 struct ext4_locality_group *lg; 3227 struct ext4_locality_group *lg;
@@ -3210,12 +3239,14 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3210 /* all fields in this condition don't change, 3239 /* all fields in this condition don't change,
3211 * so we can skip locking for them */ 3240 * so we can skip locking for them */
3212 if (ac->ac_o_ex.fe_logical < pa->pa_lstart || 3241 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3213 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) 3242 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3243 EXT4_C2B(sbi, pa->pa_len)))
3214 continue; 3244 continue;
3215 3245
3216 /* non-extent files can't have physical blocks past 2^32 */ 3246 /* non-extent files can't have physical blocks past 2^32 */
3217 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && 3247 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3218 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) 3248 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3249 EXT4_MAX_BLOCK_FILE_PHYS))
3219 continue; 3250 continue;
3220 3251
3221 /* found preallocated blocks, use them */ 3252 /* found preallocated blocks, use them */
@@ -3291,7 +3322,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3291 3322
3292 while (n) { 3323 while (n) {
3293 entry = rb_entry(n, struct ext4_free_data, node); 3324 entry = rb_entry(n, struct ext4_free_data, node);
3294 ext4_set_bits(bitmap, entry->start_blk, entry->count); 3325 ext4_set_bits(bitmap, entry->start_cluster, entry->count);
3295 n = rb_next(n); 3326 n = rb_next(n);
3296 } 3327 }
3297 return; 3328 return;
@@ -3312,7 +3343,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3312 ext4_group_t groupnr; 3343 ext4_group_t groupnr;
3313 ext4_grpblk_t start; 3344 ext4_grpblk_t start;
3314 int preallocated = 0; 3345 int preallocated = 0;
3315 int count = 0;
3316 int len; 3346 int len;
3317 3347
3318 /* all form of preallocation discards first load group, 3348 /* all form of preallocation discards first load group,
@@ -3335,7 +3365,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3335 BUG_ON(groupnr != group); 3365 BUG_ON(groupnr != group);
3336 ext4_set_bits(bitmap, start, len); 3366 ext4_set_bits(bitmap, start, len);
3337 preallocated += len; 3367 preallocated += len;
3338 count++;
3339 } 3368 }
3340 mb_debug(1, "prellocated %u for group %u\n", preallocated, group); 3369 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3341} 3370}
@@ -3412,6 +3441,7 @@ static noinline_for_stack int
3412ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3441ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3413{ 3442{
3414 struct super_block *sb = ac->ac_sb; 3443 struct super_block *sb = ac->ac_sb;
3444 struct ext4_sb_info *sbi = EXT4_SB(sb);
3415 struct ext4_prealloc_space *pa; 3445 struct ext4_prealloc_space *pa;
3416 struct ext4_group_info *grp; 3446 struct ext4_group_info *grp;
3417 struct ext4_inode_info *ei; 3447 struct ext4_inode_info *ei;
@@ -3443,16 +3473,18 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3443 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; 3473 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3444 3474
3445 /* also, we should cover whole original request */ 3475 /* also, we should cover whole original request */
3446 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; 3476 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3447 3477
3448 /* the smallest one defines real window */ 3478 /* the smallest one defines real window */
3449 win = min(winl, wins); 3479 win = min(winl, wins);
3450 3480
3451 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; 3481 offs = ac->ac_o_ex.fe_logical %
3482 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3452 if (offs && offs < win) 3483 if (offs && offs < win)
3453 win = offs; 3484 win = offs;
3454 3485
3455 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; 3486 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3487 EXT4_B2C(sbi, win);
3456 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); 3488 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3457 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); 3489 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3458 } 3490 }
@@ -3477,7 +3509,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3477 trace_ext4_mb_new_inode_pa(ac, pa); 3509 trace_ext4_mb_new_inode_pa(ac, pa);
3478 3510
3479 ext4_mb_use_inode_pa(ac, pa); 3511 ext4_mb_use_inode_pa(ac, pa);
3480 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3512 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3481 3513
3482 ei = EXT4_I(ac->ac_inode); 3514 ei = EXT4_I(ac->ac_inode);
3483 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); 3515 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
@@ -3592,7 +3624,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3592 3624
3593 BUG_ON(pa->pa_deleted == 0); 3625 BUG_ON(pa->pa_deleted == 0);
3594 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3626 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3595 grp_blk_start = pa->pa_pstart - bit; 3627 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3596 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3628 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3597 end = bit + pa->pa_len; 3629 end = bit + pa->pa_len;
3598 3630
@@ -3607,7 +3639,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3607 free += next - bit; 3639 free += next - bit;
3608 3640
3609 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); 3641 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3610 trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, 3642 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3643 EXT4_C2B(sbi, bit)),
3611 next - bit); 3644 next - bit);
3612 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3645 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3613 bit = next + 1; 3646 bit = next + 1;
@@ -3690,7 +3723,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3690 } 3723 }
3691 3724
3692 if (needed == 0) 3725 if (needed == 0)
3693 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3726 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3694 3727
3695 INIT_LIST_HEAD(&list); 3728 INIT_LIST_HEAD(&list);
3696repeat: 3729repeat:
@@ -3958,7 +3991,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3958 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) 3991 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3959 return; 3992 return;
3960 3993
3961 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 3994 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3962 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) 3995 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3963 >> bsbits; 3996 >> bsbits;
3964 3997
@@ -3969,6 +4002,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3969 return; 4002 return;
3970 } 4003 }
3971 4004
4005 if (sbi->s_mb_group_prealloc <= 0) {
4006 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4007 return;
4008 }
4009
3972 /* don't use group allocation for large files */ 4010 /* don't use group allocation for large files */
3973 size = max(size, isize); 4011 size = max(size, isize);
3974 if (size > sbi->s_mb_stream_request) { 4012 if (size > sbi->s_mb_stream_request) {
@@ -4007,8 +4045,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4007 len = ar->len; 4045 len = ar->len;
4008 4046
4009 /* just a dirty hack to filter too big requests */ 4047 /* just a dirty hack to filter too big requests */
4010 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10) 4048 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
4011 len = EXT4_BLOCKS_PER_GROUP(sb) - 10; 4049 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
4012 4050
4013 /* start searching from the goal */ 4051 /* start searching from the goal */
4014 goal = ar->goal; 4052 goal = ar->goal;
@@ -4019,18 +4057,15 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4019 4057
4020 /* set up allocation goals */ 4058 /* set up allocation goals */
4021 memset(ac, 0, sizeof(struct ext4_allocation_context)); 4059 memset(ac, 0, sizeof(struct ext4_allocation_context));
4022 ac->ac_b_ex.fe_logical = ar->logical; 4060 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4023 ac->ac_status = AC_STATUS_CONTINUE; 4061 ac->ac_status = AC_STATUS_CONTINUE;
4024 ac->ac_sb = sb; 4062 ac->ac_sb = sb;
4025 ac->ac_inode = ar->inode; 4063 ac->ac_inode = ar->inode;
4026 ac->ac_o_ex.fe_logical = ar->logical; 4064 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4027 ac->ac_o_ex.fe_group = group; 4065 ac->ac_o_ex.fe_group = group;
4028 ac->ac_o_ex.fe_start = block; 4066 ac->ac_o_ex.fe_start = block;
4029 ac->ac_o_ex.fe_len = len; 4067 ac->ac_o_ex.fe_len = len;
4030 ac->ac_g_ex.fe_logical = ar->logical; 4068 ac->ac_g_ex = ac->ac_o_ex;
4031 ac->ac_g_ex.fe_group = group;
4032 ac->ac_g_ex.fe_start = block;
4033 ac->ac_g_ex.fe_len = len;
4034 ac->ac_flags = ar->flags; 4069 ac->ac_flags = ar->flags;
4035 4070
4036 /* we have to define context: we'll we work with a file or 4071 /* we have to define context: we'll we work with a file or
@@ -4182,13 +4217,14 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4182 */ 4217 */
4183static int ext4_mb_release_context(struct ext4_allocation_context *ac) 4218static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4184{ 4219{
4220 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4185 struct ext4_prealloc_space *pa = ac->ac_pa; 4221 struct ext4_prealloc_space *pa = ac->ac_pa;
4186 if (pa) { 4222 if (pa) {
4187 if (pa->pa_type == MB_GROUP_PA) { 4223 if (pa->pa_type == MB_GROUP_PA) {
4188 /* see comment in ext4_mb_use_group_pa() */ 4224 /* see comment in ext4_mb_use_group_pa() */
4189 spin_lock(&pa->pa_lock); 4225 spin_lock(&pa->pa_lock);
4190 pa->pa_pstart += ac->ac_b_ex.fe_len; 4226 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4191 pa->pa_lstart += ac->ac_b_ex.fe_len; 4227 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4192 pa->pa_free -= ac->ac_b_ex.fe_len; 4228 pa->pa_free -= ac->ac_b_ex.fe_len;
4193 pa->pa_len -= ac->ac_b_ex.fe_len; 4229 pa->pa_len -= ac->ac_b_ex.fe_len;
4194 spin_unlock(&pa->pa_lock); 4230 spin_unlock(&pa->pa_lock);
@@ -4249,13 +4285,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4249 struct super_block *sb; 4285 struct super_block *sb;
4250 ext4_fsblk_t block = 0; 4286 ext4_fsblk_t block = 0;
4251 unsigned int inquota = 0; 4287 unsigned int inquota = 0;
4252 unsigned int reserv_blks = 0; 4288 unsigned int reserv_clstrs = 0;
4253 4289
4254 sb = ar->inode->i_sb; 4290 sb = ar->inode->i_sb;
4255 sbi = EXT4_SB(sb); 4291 sbi = EXT4_SB(sb);
4256 4292
4257 trace_ext4_request_blocks(ar); 4293 trace_ext4_request_blocks(ar);
4258 4294
4295 /* Allow to use superuser reservation for quota file */
4296 if (IS_NOQUOTA(ar->inode))
4297 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4298
4259 /* 4299 /*
4260 * For delayed allocation, we could skip the ENOSPC and 4300 * For delayed allocation, we could skip the ENOSPC and
4261 * EDQUOT check, as blocks and quotas have been already 4301 * EDQUOT check, as blocks and quotas have been already
@@ -4269,7 +4309,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4269 * and verify allocation doesn't exceed the quota limits. 4309 * and verify allocation doesn't exceed the quota limits.
4270 */ 4310 */
4271 while (ar->len && 4311 while (ar->len &&
4272 ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { 4312 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4273 4313
4274 /* let others to free the space */ 4314 /* let others to free the space */
4275 yield(); 4315 yield();
@@ -4279,12 +4319,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4279 *errp = -ENOSPC; 4319 *errp = -ENOSPC;
4280 return 0; 4320 return 0;
4281 } 4321 }
4282 reserv_blks = ar->len; 4322 reserv_clstrs = ar->len;
4283 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { 4323 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4284 dquot_alloc_block_nofail(ar->inode, ar->len); 4324 dquot_alloc_block_nofail(ar->inode,
4325 EXT4_C2B(sbi, ar->len));
4285 } else { 4326 } else {
4286 while (ar->len && 4327 while (ar->len &&
4287 dquot_alloc_block(ar->inode, ar->len)) { 4328 dquot_alloc_block(ar->inode,
4329 EXT4_C2B(sbi, ar->len))) {
4288 4330
4289 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4331 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4290 ar->len--; 4332 ar->len--;
@@ -4328,7 +4370,7 @@ repeat:
4328 ext4_mb_new_preallocation(ac); 4370 ext4_mb_new_preallocation(ac);
4329 } 4371 }
4330 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4372 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4331 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); 4373 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4332 if (*errp == -EAGAIN) { 4374 if (*errp == -EAGAIN) {
4333 /* 4375 /*
4334 * drop the reference that we took 4376 * drop the reference that we took
@@ -4364,13 +4406,13 @@ out:
4364 if (ac) 4406 if (ac)
4365 kmem_cache_free(ext4_ac_cachep, ac); 4407 kmem_cache_free(ext4_ac_cachep, ac);
4366 if (inquota && ar->len < inquota) 4408 if (inquota && ar->len < inquota)
4367 dquot_free_block(ar->inode, inquota - ar->len); 4409 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4368 if (!ar->len) { 4410 if (!ar->len) {
4369 if (!ext4_test_inode_state(ar->inode, 4411 if (!ext4_test_inode_state(ar->inode,
4370 EXT4_STATE_DELALLOC_RESERVED)) 4412 EXT4_STATE_DELALLOC_RESERVED))
4371 /* release all the reserved blocks if non delalloc */ 4413 /* release all the reserved blocks if non delalloc */
4372 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 4414 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4373 reserv_blks); 4415 reserv_clstrs);
4374 } 4416 }
4375 4417
4376 trace_ext4_allocate_blocks(ar, (unsigned long long)block); 4418 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
@@ -4388,7 +4430,7 @@ static int can_merge(struct ext4_free_data *entry1,
4388{ 4430{
4389 if ((entry1->t_tid == entry2->t_tid) && 4431 if ((entry1->t_tid == entry2->t_tid) &&
4390 (entry1->group == entry2->group) && 4432 (entry1->group == entry2->group) &&
4391 ((entry1->start_blk + entry1->count) == entry2->start_blk)) 4433 ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
4392 return 1; 4434 return 1;
4393 return 0; 4435 return 0;
4394} 4436}
@@ -4398,7 +4440,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4398 struct ext4_free_data *new_entry) 4440 struct ext4_free_data *new_entry)
4399{ 4441{
4400 ext4_group_t group = e4b->bd_group; 4442 ext4_group_t group = e4b->bd_group;
4401 ext4_grpblk_t block; 4443 ext4_grpblk_t cluster;
4402 struct ext4_free_data *entry; 4444 struct ext4_free_data *entry;
4403 struct ext4_group_info *db = e4b->bd_info; 4445 struct ext4_group_info *db = e4b->bd_info;
4404 struct super_block *sb = e4b->bd_sb; 4446 struct super_block *sb = e4b->bd_sb;
@@ -4411,7 +4453,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4411 BUG_ON(e4b->bd_buddy_page == NULL); 4453 BUG_ON(e4b->bd_buddy_page == NULL);
4412 4454
4413 new_node = &new_entry->node; 4455 new_node = &new_entry->node;
4414 block = new_entry->start_blk; 4456 cluster = new_entry->start_cluster;
4415 4457
4416 if (!*n) { 4458 if (!*n) {
4417 /* first free block exent. We need to 4459 /* first free block exent. We need to
@@ -4425,13 +4467,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4425 while (*n) { 4467 while (*n) {
4426 parent = *n; 4468 parent = *n;
4427 entry = rb_entry(parent, struct ext4_free_data, node); 4469 entry = rb_entry(parent, struct ext4_free_data, node);
4428 if (block < entry->start_blk) 4470 if (cluster < entry->start_cluster)
4429 n = &(*n)->rb_left; 4471 n = &(*n)->rb_left;
4430 else if (block >= (entry->start_blk + entry->count)) 4472 else if (cluster >= (entry->start_cluster + entry->count))
4431 n = &(*n)->rb_right; 4473 n = &(*n)->rb_right;
4432 else { 4474 else {
4433 ext4_grp_locked_error(sb, group, 0, 4475 ext4_grp_locked_error(sb, group, 0,
4434 ext4_group_first_block_no(sb, group) + block, 4476 ext4_group_first_block_no(sb, group) +
4477 EXT4_C2B(sbi, cluster),
4435 "Block already on to-be-freed list"); 4478 "Block already on to-be-freed list");
4436 return 0; 4479 return 0;
4437 } 4480 }
@@ -4445,7 +4488,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4445 if (node) { 4488 if (node) {
4446 entry = rb_entry(node, struct ext4_free_data, node); 4489 entry = rb_entry(node, struct ext4_free_data, node);
4447 if (can_merge(entry, new_entry)) { 4490 if (can_merge(entry, new_entry)) {
4448 new_entry->start_blk = entry->start_blk; 4491 new_entry->start_cluster = entry->start_cluster;
4449 new_entry->count += entry->count; 4492 new_entry->count += entry->count;
4450 rb_erase(node, &(db->bb_free_root)); 4493 rb_erase(node, &(db->bb_free_root));
4451 spin_lock(&sbi->s_md_lock); 4494 spin_lock(&sbi->s_md_lock);
@@ -4496,6 +4539,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4496 ext4_group_t block_group; 4539 ext4_group_t block_group;
4497 struct ext4_sb_info *sbi; 4540 struct ext4_sb_info *sbi;
4498 struct ext4_buddy e4b; 4541 struct ext4_buddy e4b;
4542 unsigned int count_clusters;
4499 int err = 0; 4543 int err = 0;
4500 int ret; 4544 int ret;
4501 4545
@@ -4544,6 +4588,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4544 if (!ext4_should_writeback_data(inode)) 4588 if (!ext4_should_writeback_data(inode))
4545 flags |= EXT4_FREE_BLOCKS_METADATA; 4589 flags |= EXT4_FREE_BLOCKS_METADATA;
4546 4590
4591 /*
4592 * If the extent to be freed does not begin on a cluster
4593 * boundary, we need to deal with partial clusters at the
4594 * beginning and end of the extent. Normally we will free
4595 * blocks at the beginning or the end unless we are explicitly
4596 * requested to avoid doing so.
4597 */
4598 overflow = block & (sbi->s_cluster_ratio - 1);
4599 if (overflow) {
4600 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4601 overflow = sbi->s_cluster_ratio - overflow;
4602 block += overflow;
4603 if (count > overflow)
4604 count -= overflow;
4605 else
4606 return;
4607 } else {
4608 block -= overflow;
4609 count += overflow;
4610 }
4611 }
4612 overflow = count & (sbi->s_cluster_ratio - 1);
4613 if (overflow) {
4614 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4615 if (count > overflow)
4616 count -= overflow;
4617 else
4618 return;
4619 } else
4620 count += sbi->s_cluster_ratio - overflow;
4621 }
4622
4547do_more: 4623do_more:
4548 overflow = 0; 4624 overflow = 0;
4549 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4625 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4552,10 +4628,12 @@ do_more:
4552 * Check to see if we are freeing blocks across a group 4628 * Check to see if we are freeing blocks across a group
4553 * boundary. 4629 * boundary.
4554 */ 4630 */
4555 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { 4631 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4556 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 4632 overflow = EXT4_C2B(sbi, bit) + count -
4633 EXT4_BLOCKS_PER_GROUP(sb);
4557 count -= overflow; 4634 count -= overflow;
4558 } 4635 }
4636 count_clusters = EXT4_B2C(sbi, count);
4559 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4637 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4560 if (!bitmap_bh) { 4638 if (!bitmap_bh) {
4561 err = -EIO; 4639 err = -EIO;
@@ -4570,9 +4648,9 @@ do_more:
4570 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4648 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4571 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4649 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4572 in_range(block, ext4_inode_table(sb, gdp), 4650 in_range(block, ext4_inode_table(sb, gdp),
4573 EXT4_SB(sb)->s_itb_per_group) || 4651 EXT4_SB(sb)->s_itb_per_group) ||
4574 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4652 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4575 EXT4_SB(sb)->s_itb_per_group)) { 4653 EXT4_SB(sb)->s_itb_per_group)) {
4576 4654
4577 ext4_error(sb, "Freeing blocks in system zone - " 4655 ext4_error(sb, "Freeing blocks in system zone - "
4578 "Block = %llu, count = %lu", block, count); 4656 "Block = %llu, count = %lu", block, count);
@@ -4597,11 +4675,11 @@ do_more:
4597#ifdef AGGRESSIVE_CHECK 4675#ifdef AGGRESSIVE_CHECK
4598 { 4676 {
4599 int i; 4677 int i;
4600 for (i = 0; i < count; i++) 4678 for (i = 0; i < count_clusters; i++)
4601 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4679 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4602 } 4680 }
4603#endif 4681#endif
4604 trace_ext4_mballoc_free(sb, inode, block_group, bit, count); 4682 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4605 4683
4606 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4684 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4607 if (err) 4685 if (err)
@@ -4618,13 +4696,13 @@ do_more:
4618 err = -ENOMEM; 4696 err = -ENOMEM;
4619 goto error_return; 4697 goto error_return;
4620 } 4698 }
4621 new_entry->start_blk = bit; 4699 new_entry->start_cluster = bit;
4622 new_entry->group = block_group; 4700 new_entry->group = block_group;
4623 new_entry->count = count; 4701 new_entry->count = count_clusters;
4624 new_entry->t_tid = handle->h_transaction->t_tid; 4702 new_entry->t_tid = handle->h_transaction->t_tid;
4625 4703
4626 ext4_lock_group(sb, block_group); 4704 ext4_lock_group(sb, block_group);
4627 mb_clear_bits(bitmap_bh->b_data, bit, count); 4705 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4628 ext4_mb_free_metadata(handle, &e4b, new_entry); 4706 ext4_mb_free_metadata(handle, &e4b, new_entry);
4629 } else { 4707 } else {
4630 /* need to update group_info->bb_free and bitmap 4708 /* need to update group_info->bb_free and bitmap
@@ -4632,25 +4710,29 @@ do_more:
4632 * them with group lock_held 4710 * them with group lock_held
4633 */ 4711 */
4634 ext4_lock_group(sb, block_group); 4712 ext4_lock_group(sb, block_group);
4635 mb_clear_bits(bitmap_bh->b_data, bit, count); 4713 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4636 mb_free_blocks(inode, &e4b, bit, count); 4714 mb_free_blocks(inode, &e4b, bit, count_clusters);
4637 } 4715 }
4638 4716
4639 ret = ext4_free_blks_count(sb, gdp) + count; 4717 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4640 ext4_free_blks_set(sb, gdp, ret); 4718 ext4_free_group_clusters_set(sb, gdp, ret);
4641 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4719 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4642 ext4_unlock_group(sb, block_group); 4720 ext4_unlock_group(sb, block_group);
4643 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4721 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4644 4722
4645 if (sbi->s_log_groups_per_flex) { 4723 if (sbi->s_log_groups_per_flex) {
4646 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4724 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4647 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); 4725 atomic_add(count_clusters,
4726 &sbi->s_flex_groups[flex_group].free_clusters);
4648 } 4727 }
4649 4728
4650 ext4_mb_unload_buddy(&e4b); 4729 ext4_mb_unload_buddy(&e4b);
4651 4730
4652 freed += count; 4731 freed += count;
4653 4732
4733 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4734 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4735
4654 /* We dirtied the bitmap block */ 4736 /* We dirtied the bitmap block */
4655 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4737 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4656 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4738 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4669,8 +4751,6 @@ do_more:
4669 } 4751 }
4670 ext4_mark_super_dirty(sb); 4752 ext4_mark_super_dirty(sb);
4671error_return: 4753error_return:
4672 if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4673 dquot_free_block(inode, freed);
4674 brelse(bitmap_bh); 4754 brelse(bitmap_bh);
4675 ext4_std_error(sb, err); 4755 ext4_std_error(sb, err);
4676 return; 4756 return;
@@ -4778,16 +4858,17 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4778 ext4_lock_group(sb, block_group); 4858 ext4_lock_group(sb, block_group);
4779 mb_clear_bits(bitmap_bh->b_data, bit, count); 4859 mb_clear_bits(bitmap_bh->b_data, bit, count);
4780 mb_free_blocks(NULL, &e4b, bit, count); 4860 mb_free_blocks(NULL, &e4b, bit, count);
4781 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); 4861 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
4782 ext4_free_blks_set(sb, desc, blk_free_count); 4862 ext4_free_group_clusters_set(sb, desc, blk_free_count);
4783 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 4863 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
4784 ext4_unlock_group(sb, block_group); 4864 ext4_unlock_group(sb, block_group);
4785 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); 4865 percpu_counter_add(&sbi->s_freeclusters_counter,
4866 EXT4_B2C(sbi, blocks_freed));
4786 4867
4787 if (sbi->s_log_groups_per_flex) { 4868 if (sbi->s_log_groups_per_flex) {
4788 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4869 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4789 atomic_add(blocks_freed, 4870 atomic_add(EXT4_B2C(sbi, blocks_freed),
4790 &sbi->s_flex_groups[flex_group].free_blocks); 4871 &sbi->s_flex_groups[flex_group].free_clusters);
4791 } 4872 }
4792 4873
4793 ext4_mb_unload_buddy(&e4b); 4874 ext4_mb_unload_buddy(&e4b);
@@ -4948,7 +5029,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4948 struct ext4_group_info *grp; 5029 struct ext4_group_info *grp;
4949 ext4_group_t first_group, last_group; 5030 ext4_group_t first_group, last_group;
4950 ext4_group_t group, ngroups = ext4_get_groups_count(sb); 5031 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4951 ext4_grpblk_t cnt = 0, first_block, last_block; 5032 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
4952 uint64_t start, len, minlen, trimmed = 0; 5033 uint64_t start, len, minlen, trimmed = 0;
4953 ext4_fsblk_t first_data_blk = 5034 ext4_fsblk_t first_data_blk =
4954 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 5035 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@@ -4958,7 +5039,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4958 len = range->len >> sb->s_blocksize_bits; 5039 len = range->len >> sb->s_blocksize_bits;
4959 minlen = range->minlen >> sb->s_blocksize_bits; 5040 minlen = range->minlen >> sb->s_blocksize_bits;
4960 5041
4961 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) 5042 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
4962 return -EINVAL; 5043 return -EINVAL;
4963 if (start + len <= first_data_blk) 5044 if (start + len <= first_data_blk)
4964 goto out; 5045 goto out;
@@ -4969,11 +5050,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4969 5050
4970 /* Determine first and last group to examine based on start and len */ 5051 /* Determine first and last group to examine based on start and len */
4971 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 5052 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4972 &first_group, &first_block); 5053 &first_group, &first_cluster);
4973 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), 5054 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4974 &last_group, &last_block); 5055 &last_group, &last_cluster);
4975 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; 5056 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4976 last_block = EXT4_BLOCKS_PER_GROUP(sb); 5057 last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
4977 5058
4978 if (first_group > last_group) 5059 if (first_group > last_group)
4979 return -EINVAL; 5060 return -EINVAL;
@@ -4993,20 +5074,20 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4993 * change it for the last group in which case start + 5074 * change it for the last group in which case start +
4994 * len < EXT4_BLOCKS_PER_GROUP(sb). 5075 * len < EXT4_BLOCKS_PER_GROUP(sb).
4995 */ 5076 */
4996 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) 5077 if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
4997 last_block = first_block + len; 5078 last_cluster = first_cluster + len;
4998 len -= last_block - first_block; 5079 len -= last_cluster - first_cluster;
4999 5080
5000 if (grp->bb_free >= minlen) { 5081 if (grp->bb_free >= minlen) {
5001 cnt = ext4_trim_all_free(sb, group, first_block, 5082 cnt = ext4_trim_all_free(sb, group, first_cluster,
5002 last_block, minlen); 5083 last_cluster, minlen);
5003 if (cnt < 0) { 5084 if (cnt < 0) {
5004 ret = cnt; 5085 ret = cnt;
5005 break; 5086 break;
5006 } 5087 }
5007 } 5088 }
5008 trimmed += cnt; 5089 trimmed += cnt;
5009 first_block = 0; 5090 first_cluster = 0;
5010 } 5091 }
5011 range->len = trimmed * sb->s_blocksize; 5092 range->len = trimmed * sb->s_blocksize;
5012 5093
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 9d4a636b546c..47705f3285e3 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -106,7 +106,7 @@ struct ext4_free_data {
106 ext4_group_t group; 106 ext4_group_t group;
107 107
108 /* free block extent */ 108 /* free block extent */
109 ext4_grpblk_t start_blk; 109 ext4_grpblk_t start_cluster;
110 ext4_grpblk_t count; 110 ext4_grpblk_t count;
111 111
112 /* transaction which freed this extent */ 112 /* transaction which freed this extent */
@@ -139,9 +139,9 @@ enum {
139 139
140struct ext4_free_extent { 140struct ext4_free_extent {
141 ext4_lblk_t fe_logical; 141 ext4_lblk_t fe_logical;
142 ext4_grpblk_t fe_start; 142 ext4_grpblk_t fe_start; /* In cluster units */
143 ext4_group_t fe_group; 143 ext4_group_t fe_group;
144 ext4_grpblk_t fe_len; 144 ext4_grpblk_t fe_len; /* In cluster units */
145}; 145};
146 146
147/* 147/*
@@ -175,7 +175,7 @@ struct ext4_allocation_context {
175 /* the best found extent */ 175 /* the best found extent */
176 struct ext4_free_extent ac_b_ex; 176 struct ext4_free_extent ac_b_ex;
177 177
178 /* copy of the bext found extent taken before preallocation efforts */ 178 /* copy of the best found extent taken before preallocation efforts */
179 struct ext4_free_extent ac_f_ex; 179 struct ext4_free_extent ac_f_ex;
180 180
181 /* number of iterations done. we have to track to limit searching */ 181 /* number of iterations done. we have to track to limit searching */
@@ -216,6 +216,7 @@ struct ext4_buddy {
216static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 216static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
217 struct ext4_free_extent *fex) 217 struct ext4_free_extent *fex)
218{ 218{
219 return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start; 219 return ext4_group_first_block_no(sb, fex->fe_group) +
220 (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
220} 221}
221#endif 222#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b57b98fb44d1..16ac228dbec6 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -15,19 +15,18 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
18#include "ext4_extents.h"
19 18
20/* 19/*
21 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
22 * represented by a single extent 21 * represented by a single extent
23 */ 22 */
24struct list_blocks_struct { 23struct migrate_struct {
25 ext4_lblk_t first_block, last_block; 24 ext4_lblk_t first_block, last_block, curr_block;
26 ext4_fsblk_t first_pblock, last_pblock; 25 ext4_fsblk_t first_pblock, last_pblock;
27}; 26};
28 27
29static int finish_range(handle_t *handle, struct inode *inode, 28static int finish_range(handle_t *handle, struct inode *inode,
30 struct list_blocks_struct *lb) 29 struct migrate_struct *lb)
31 30
32{ 31{
33 int retval = 0, needed; 32 int retval = 0, needed;
@@ -87,8 +86,7 @@ err_out:
87} 86}
88 87
89static int update_extent_range(handle_t *handle, struct inode *inode, 88static int update_extent_range(handle_t *handle, struct inode *inode,
90 ext4_fsblk_t pblock, ext4_lblk_t blk_num, 89 ext4_fsblk_t pblock, struct migrate_struct *lb)
91 struct list_blocks_struct *lb)
92{ 90{
93 int retval; 91 int retval;
94 /* 92 /*
@@ -96,9 +94,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
96 */ 94 */
97 if (lb->first_pblock && 95 if (lb->first_pblock &&
98 (lb->last_pblock+1 == pblock) && 96 (lb->last_pblock+1 == pblock) &&
99 (lb->last_block+1 == blk_num)) { 97 (lb->last_block+1 == lb->curr_block)) {
100 lb->last_pblock = pblock; 98 lb->last_pblock = pblock;
101 lb->last_block = blk_num; 99 lb->last_block = lb->curr_block;
100 lb->curr_block++;
102 return 0; 101 return 0;
103 } 102 }
104 /* 103 /*
@@ -106,64 +105,49 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
106 */ 105 */
107 retval = finish_range(handle, inode, lb); 106 retval = finish_range(handle, inode, lb);
108 lb->first_pblock = lb->last_pblock = pblock; 107 lb->first_pblock = lb->last_pblock = pblock;
109 lb->first_block = lb->last_block = blk_num; 108 lb->first_block = lb->last_block = lb->curr_block;
110 109 lb->curr_block++;
111 return retval; 110 return retval;
112} 111}
113 112
114static int update_ind_extent_range(handle_t *handle, struct inode *inode, 113static int update_ind_extent_range(handle_t *handle, struct inode *inode,
115 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 114 ext4_fsblk_t pblock,
116 struct list_blocks_struct *lb) 115 struct migrate_struct *lb)
117{ 116{
118 struct buffer_head *bh; 117 struct buffer_head *bh;
119 __le32 *i_data; 118 __le32 *i_data;
120 int i, retval = 0; 119 int i, retval = 0;
121 ext4_lblk_t blk_count = *blk_nump;
122 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 120 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
123 121
124 if (!pblock) {
125 /* Only update the file block number */
126 *blk_nump += max_entries;
127 return 0;
128 }
129
130 bh = sb_bread(inode->i_sb, pblock); 122 bh = sb_bread(inode->i_sb, pblock);
131 if (!bh) 123 if (!bh)
132 return -EIO; 124 return -EIO;
133 125
134 i_data = (__le32 *)bh->b_data; 126 i_data = (__le32 *)bh->b_data;
135 for (i = 0; i < max_entries; i++, blk_count++) { 127 for (i = 0; i < max_entries; i++) {
136 if (i_data[i]) { 128 if (i_data[i]) {
137 retval = update_extent_range(handle, inode, 129 retval = update_extent_range(handle, inode,
138 le32_to_cpu(i_data[i]), 130 le32_to_cpu(i_data[i]), lb);
139 blk_count, lb);
140 if (retval) 131 if (retval)
141 break; 132 break;
133 } else {
134 lb->curr_block++;
142 } 135 }
143 } 136 }
144
145 /* Update the file block number */
146 *blk_nump = blk_count;
147 put_bh(bh); 137 put_bh(bh);
148 return retval; 138 return retval;
149 139
150} 140}
151 141
152static int update_dind_extent_range(handle_t *handle, struct inode *inode, 142static int update_dind_extent_range(handle_t *handle, struct inode *inode,
153 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 143 ext4_fsblk_t pblock,
154 struct list_blocks_struct *lb) 144 struct migrate_struct *lb)
155{ 145{
156 struct buffer_head *bh; 146 struct buffer_head *bh;
157 __le32 *i_data; 147 __le32 *i_data;
158 int i, retval = 0; 148 int i, retval = 0;
159 ext4_lblk_t blk_count = *blk_nump;
160 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 149 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
161 150
162 if (!pblock) {
163 /* Only update the file block number */
164 *blk_nump += max_entries * max_entries;
165 return 0;
166 }
167 bh = sb_bread(inode->i_sb, pblock); 151 bh = sb_bread(inode->i_sb, pblock);
168 if (!bh) 152 if (!bh)
169 return -EIO; 153 return -EIO;
@@ -172,38 +156,28 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
172 for (i = 0; i < max_entries; i++) { 156 for (i = 0; i < max_entries; i++) {
173 if (i_data[i]) { 157 if (i_data[i]) {
174 retval = update_ind_extent_range(handle, inode, 158 retval = update_ind_extent_range(handle, inode,
175 le32_to_cpu(i_data[i]), 159 le32_to_cpu(i_data[i]), lb);
176 &blk_count, lb);
177 if (retval) 160 if (retval)
178 break; 161 break;
179 } else { 162 } else {
180 /* Only update the file block number */ 163 /* Only update the file block number */
181 blk_count += max_entries; 164 lb->curr_block += max_entries;
182 } 165 }
183 } 166 }
184
185 /* Update the file block number */
186 *blk_nump = blk_count;
187 put_bh(bh); 167 put_bh(bh);
188 return retval; 168 return retval;
189 169
190} 170}
191 171
192static int update_tind_extent_range(handle_t *handle, struct inode *inode, 172static int update_tind_extent_range(handle_t *handle, struct inode *inode,
193 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 173 ext4_fsblk_t pblock,
194 struct list_blocks_struct *lb) 174 struct migrate_struct *lb)
195{ 175{
196 struct buffer_head *bh; 176 struct buffer_head *bh;
197 __le32 *i_data; 177 __le32 *i_data;
198 int i, retval = 0; 178 int i, retval = 0;
199 ext4_lblk_t blk_count = *blk_nump;
200 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 179 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
201 180
202 if (!pblock) {
203 /* Only update the file block number */
204 *blk_nump += max_entries * max_entries * max_entries;
205 return 0;
206 }
207 bh = sb_bread(inode->i_sb, pblock); 181 bh = sb_bread(inode->i_sb, pblock);
208 if (!bh) 182 if (!bh)
209 return -EIO; 183 return -EIO;
@@ -212,16 +186,14 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
212 for (i = 0; i < max_entries; i++) { 186 for (i = 0; i < max_entries; i++) {
213 if (i_data[i]) { 187 if (i_data[i]) {
214 retval = update_dind_extent_range(handle, inode, 188 retval = update_dind_extent_range(handle, inode,
215 le32_to_cpu(i_data[i]), 189 le32_to_cpu(i_data[i]), lb);
216 &blk_count, lb);
217 if (retval) 190 if (retval)
218 break; 191 break;
219 } else 192 } else {
220 /* Only update the file block number */ 193 /* Only update the file block number */
221 blk_count += max_entries * max_entries; 194 lb->curr_block += max_entries * max_entries;
195 }
222 } 196 }
223 /* Update the file block number */
224 *blk_nump = blk_count;
225 put_bh(bh); 197 put_bh(bh);
226 return retval; 198 return retval;
227 199
@@ -462,12 +434,12 @@ int ext4_ext_migrate(struct inode *inode)
462 handle_t *handle; 434 handle_t *handle;
463 int retval = 0, i; 435 int retval = 0, i;
464 __le32 *i_data; 436 __le32 *i_data;
465 ext4_lblk_t blk_count = 0;
466 struct ext4_inode_info *ei; 437 struct ext4_inode_info *ei;
467 struct inode *tmp_inode = NULL; 438 struct inode *tmp_inode = NULL;
468 struct list_blocks_struct lb; 439 struct migrate_struct lb;
469 unsigned long max_entries; 440 unsigned long max_entries;
470 __u32 goal; 441 __u32 goal;
442 uid_t owner[2];
471 443
472 /* 444 /*
473 * If the filesystem does not support extents, or the inode 445 * If the filesystem does not support extents, or the inode
@@ -495,10 +467,12 @@ int ext4_ext_migrate(struct inode *inode)
495 } 467 }
496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 468 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 469 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
470 owner[0] = inode->i_uid;
471 owner[1] = inode->i_gid;
498 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 472 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
499 S_IFREG, NULL, goal); 473 S_IFREG, NULL, goal, owner);
500 if (IS_ERR(tmp_inode)) { 474 if (IS_ERR(tmp_inode)) {
501 retval = -ENOMEM; 475 retval = PTR_ERR(inode);
502 ext4_journal_stop(handle); 476 ext4_journal_stop(handle);
503 return retval; 477 return retval;
504 } 478 }
@@ -507,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
507 * Set the i_nlink to zero so it will be deleted later 481 * Set the i_nlink to zero so it will be deleted later
508 * when we drop inode reference. 482 * when we drop inode reference.
509 */ 483 */
510 tmp_inode->i_nlink = 0; 484 clear_nlink(tmp_inode);
511 485
512 ext4_ext_tree_init(handle, tmp_inode); 486 ext4_ext_tree_init(handle, tmp_inode);
513 ext4_orphan_add(handle, tmp_inode); 487 ext4_orphan_add(handle, tmp_inode);
@@ -551,35 +525,32 @@ int ext4_ext_migrate(struct inode *inode)
551 525
552 /* 32 bit block address 4 bytes */ 526 /* 32 bit block address 4 bytes */
553 max_entries = inode->i_sb->s_blocksize >> 2; 527 max_entries = inode->i_sb->s_blocksize >> 2;
554 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 528 for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
555 if (i_data[i]) { 529 if (i_data[i]) {
556 retval = update_extent_range(handle, tmp_inode, 530 retval = update_extent_range(handle, tmp_inode,
557 le32_to_cpu(i_data[i]), 531 le32_to_cpu(i_data[i]), &lb);
558 blk_count, &lb);
559 if (retval) 532 if (retval)
560 goto err_out; 533 goto err_out;
561 } 534 } else
535 lb.curr_block++;
562 } 536 }
563 if (i_data[EXT4_IND_BLOCK]) { 537 if (i_data[EXT4_IND_BLOCK]) {
564 retval = update_ind_extent_range(handle, tmp_inode, 538 retval = update_ind_extent_range(handle, tmp_inode,
565 le32_to_cpu(i_data[EXT4_IND_BLOCK]), 539 le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
566 &blk_count, &lb);
567 if (retval) 540 if (retval)
568 goto err_out; 541 goto err_out;
569 } else 542 } else
570 blk_count += max_entries; 543 lb.curr_block += max_entries;
571 if (i_data[EXT4_DIND_BLOCK]) { 544 if (i_data[EXT4_DIND_BLOCK]) {
572 retval = update_dind_extent_range(handle, tmp_inode, 545 retval = update_dind_extent_range(handle, tmp_inode,
573 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), 546 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
574 &blk_count, &lb);
575 if (retval) 547 if (retval)
576 goto err_out; 548 goto err_out;
577 } else 549 } else
578 blk_count += max_entries * max_entries; 550 lb.curr_block += max_entries * max_entries;
579 if (i_data[EXT4_TIND_BLOCK]) { 551 if (i_data[EXT4_TIND_BLOCK]) {
580 retval = update_tind_extent_range(handle, tmp_inode, 552 retval = update_tind_extent_range(handle, tmp_inode,
581 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), 553 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
582 &blk_count, &lb);
583 if (retval) 554 if (retval)
584 goto err_out; 555 goto err_out;
585 } 556 }
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 9bdef3f537c5..7ea4ba4eff2a 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -109,7 +109,7 @@ static int kmmpd(void *data)
109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
110 bdevname(bh->b_bdev, mmp->mmp_bdevname); 110 bdevname(bh->b_bdev, mmp->mmp_bdevname);
111 111
112 memcpy(mmp->mmp_nodename, init_utsname()->sysname, 112 memcpy(mmp->mmp_nodename, init_utsname()->nodename,
113 sizeof(mmp->mmp_nodename)); 113 sizeof(mmp->mmp_nodename));
114 114
115 while (!kthread_should_stop()) { 115 while (!kthread_should_stop()) {
@@ -125,8 +125,9 @@ static int kmmpd(void *data)
125 * Don't spew too many error messages. Print one every 125 * Don't spew too many error messages. Print one every
126 * (s_mmp_update_interval * 60) seconds. 126 * (s_mmp_update_interval * 60) seconds.
127 */ 127 */
128 if (retval && (failed_writes % 60) == 0) { 128 if (retval) {
129 ext4_error(sb, "Error writing to MMP block"); 129 if ((failed_writes % 60) == 0)
130 ext4_error(sb, "Error writing to MMP block");
130 failed_writes++; 131 failed_writes++;
131 } 132 }
132 133
@@ -295,7 +296,8 @@ skip:
295 /* 296 /*
296 * write a new random sequence number. 297 * write a new random sequence number.
297 */ 298 */
298 mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); 299 seq = mmp_new_seq();
300 mmp->mmp_seq = cpu_to_le32(seq);
299 301
300 retval = write_mmp_block(bh); 302 retval = write_mmp_block(bh);
301 if (retval) 303 if (retval)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index f57455a1b1b2..c5826c623e7a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -17,7 +17,6 @@
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
20#include "ext4_extents.h"
21#include "ext4.h" 20#include "ext4.h"
22 21
23/** 22/**
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c924faeb6c8..aa4c782c9dd7 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1586,7 +1586,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1586 dxtrace(dx_show_index("node", frames[1].entries)); 1586 dxtrace(dx_show_index("node", frames[1].entries));
1587 dxtrace(dx_show_index("node", 1587 dxtrace(dx_show_index("node",
1588 ((struct dx_node *) bh2->b_data)->entries)); 1588 ((struct dx_node *) bh2->b_data)->entries));
1589 err = ext4_handle_dirty_metadata(handle, inode, bh2); 1589 err = ext4_handle_dirty_metadata(handle, dir, bh2);
1590 if (err) 1590 if (err)
1591 goto journal_error; 1591 goto journal_error;
1592 brelse (bh2); 1592 brelse (bh2);
@@ -1612,7 +1612,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1612 if (err) 1612 if (err)
1613 goto journal_error; 1613 goto journal_error;
1614 } 1614 }
1615 err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh); 1615 err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
1616 if (err) { 1616 if (err) {
1617 ext4_std_error(inode->i_sb, err); 1617 ext4_std_error(inode->i_sb, err);
1618 goto cleanup; 1618 goto cleanup;
@@ -1694,7 +1694,7 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
1694 if (is_dx(inode) && inode->i_nlink > 1) { 1694 if (is_dx(inode) && inode->i_nlink > 1) {
1695 /* limit is 16-bit i_links_count */ 1695 /* limit is 16-bit i_links_count */
1696 if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { 1696 if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
1697 inode->i_nlink = 1; 1697 set_nlink(inode, 1);
1698 EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb, 1698 EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
1699 EXT4_FEATURE_RO_COMPAT_DIR_NLINK); 1699 EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
1700 } 1700 }
@@ -1707,9 +1707,8 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
1707 */ 1707 */
1708static void ext4_dec_count(handle_t *handle, struct inode *inode) 1708static void ext4_dec_count(handle_t *handle, struct inode *inode)
1709{ 1709{
1710 drop_nlink(inode); 1710 if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
1711 if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) 1711 drop_nlink(inode);
1712 inc_nlink(inode);
1713} 1712}
1714 1713
1715 1714
@@ -1756,7 +1755,7 @@ retry:
1756 if (IS_DIRSYNC(dir)) 1755 if (IS_DIRSYNC(dir))
1757 ext4_handle_sync(handle); 1756 ext4_handle_sync(handle);
1758 1757
1759 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); 1758 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
1760 err = PTR_ERR(inode); 1759 err = PTR_ERR(inode);
1761 if (!IS_ERR(inode)) { 1760 if (!IS_ERR(inode)) {
1762 inode->i_op = &ext4_file_inode_operations; 1761 inode->i_op = &ext4_file_inode_operations;
@@ -1792,7 +1791,7 @@ retry:
1792 if (IS_DIRSYNC(dir)) 1791 if (IS_DIRSYNC(dir))
1793 ext4_handle_sync(handle); 1792 ext4_handle_sync(handle);
1794 1793
1795 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); 1794 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
1796 err = PTR_ERR(inode); 1795 err = PTR_ERR(inode);
1797 if (!IS_ERR(inode)) { 1796 if (!IS_ERR(inode)) {
1798 init_special_inode(inode, inode->i_mode, rdev); 1797 init_special_inode(inode, inode->i_mode, rdev);
@@ -1832,7 +1831,7 @@ retry:
1832 ext4_handle_sync(handle); 1831 ext4_handle_sync(handle);
1833 1832
1834 inode = ext4_new_inode(handle, dir, S_IFDIR | mode, 1833 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
1835 &dentry->d_name, 0); 1834 &dentry->d_name, 0, NULL);
1836 err = PTR_ERR(inode); 1835 err = PTR_ERR(inode);
1837 if (IS_ERR(inode)) 1836 if (IS_ERR(inode))
1838 goto out_stop; 1837 goto out_stop;
@@ -1861,9 +1860,9 @@ retry:
1861 de->name_len = 2; 1860 de->name_len = 2;
1862 strcpy(de->name, ".."); 1861 strcpy(de->name, "..");
1863 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1862 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1864 inode->i_nlink = 2; 1863 set_nlink(inode, 2);
1865 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); 1864 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
1866 err = ext4_handle_dirty_metadata(handle, dir, dir_block); 1865 err = ext4_handle_dirty_metadata(handle, inode, dir_block);
1867 if (err) 1866 if (err)
1868 goto out_clear_inode; 1867 goto out_clear_inode;
1869 err = ext4_mark_inode_dirty(handle, inode); 1868 err = ext4_mark_inode_dirty(handle, inode);
@@ -2214,7 +2213,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2214 ext4_warning(inode->i_sb, 2213 ext4_warning(inode->i_sb,
2215 "Deleting nonexistent file (%lu), %d", 2214 "Deleting nonexistent file (%lu), %d",
2216 inode->i_ino, inode->i_nlink); 2215 inode->i_ino, inode->i_nlink);
2217 inode->i_nlink = 1; 2216 set_nlink(inode, 1);
2218 } 2217 }
2219 retval = ext4_delete_entry(handle, dir, de, bh); 2218 retval = ext4_delete_entry(handle, dir, de, bh);
2220 if (retval) 2219 if (retval)
@@ -2279,7 +2278,7 @@ retry:
2279 ext4_handle_sync(handle); 2278 ext4_handle_sync(handle);
2280 2279
2281 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, 2280 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2282 &dentry->d_name, 0); 2281 &dentry->d_name, 0, NULL);
2283 err = PTR_ERR(inode); 2282 err = PTR_ERR(inode);
2284 if (IS_ERR(inode)) 2283 if (IS_ERR(inode))
2285 goto out_stop; 2284 goto out_stop;
@@ -2530,7 +2529,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2530 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2529 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2531 cpu_to_le32(new_dir->i_ino); 2530 cpu_to_le32(new_dir->i_ino);
2532 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2531 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2533 retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh); 2532 retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
2534 if (retval) { 2533 if (retval) {
2535 ext4_std_error(old_dir->i_sb, retval); 2534 ext4_std_error(old_dir->i_sb, retval);
2536 goto end_rename; 2535 goto end_rename;
@@ -2539,7 +2538,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2539 if (new_inode) { 2538 if (new_inode) {
2540 /* checked empty_dir above, can't have another parent, 2539 /* checked empty_dir above, can't have another parent,
2541 * ext4_dec_count() won't work for many-linked dirs */ 2540 * ext4_dec_count() won't work for many-linked dirs */
2542 new_inode->i_nlink = 0; 2541 clear_nlink(new_inode);
2543 } else { 2542 } else {
2544 ext4_inc_count(handle, new_dir); 2543 ext4_inc_count(handle, new_dir);
2545 ext4_update_dx_flag(new_dir); 2544 ext4_update_dx_flag(new_dir);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 92f38ee13f8a..7ce1d0b19c94 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -70,7 +70,6 @@ static void put_io_page(struct ext4_io_page *io_page)
70void ext4_free_io_end(ext4_io_end_t *io) 70void ext4_free_io_end(ext4_io_end_t *io)
71{ 71{
72 int i; 72 int i;
73 wait_queue_head_t *wq;
74 73
75 BUG_ON(!io); 74 BUG_ON(!io);
76 if (io->page) 75 if (io->page)
@@ -78,56 +77,43 @@ void ext4_free_io_end(ext4_io_end_t *io)
78 for (i = 0; i < io->num_io_pages; i++) 77 for (i = 0; i < io->num_io_pages; i++)
79 put_io_page(io->pages[i]); 78 put_io_page(io->pages[i]);
80 io->num_io_pages = 0; 79 io->num_io_pages = 0;
81 wq = ext4_ioend_wq(io->inode); 80 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
82 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && 81 wake_up_all(ext4_ioend_wq(io->inode));
83 waitqueue_active(wq))
84 wake_up_all(wq);
85 kmem_cache_free(io_end_cachep, io); 82 kmem_cache_free(io_end_cachep, io);
86} 83}
87 84
88/* 85/*
89 * check a range of space and convert unwritten extents to written. 86 * check a range of space and convert unwritten extents to written.
87 *
88 * Called with inode->i_mutex; we depend on this when we manipulate
89 * io->flag, since we could otherwise race with ext4_flush_completed_IO()
90 */ 90 */
91int ext4_end_io_nolock(ext4_io_end_t *io) 91int ext4_end_io_nolock(ext4_io_end_t *io)
92{ 92{
93 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
94 loff_t offset = io->offset; 94 loff_t offset = io->offset;
95 ssize_t size = io->size; 95 ssize_t size = io->size;
96 wait_queue_head_t *wq;
97 int ret = 0; 96 int ret = 0;
98 97
99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 98 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
100 "list->prev 0x%p\n", 99 "list->prev 0x%p\n",
101 io, inode->i_ino, io->list.next, io->list.prev); 100 io, inode->i_ino, io->list.next, io->list.prev);
102 101
103 if (list_empty(&io->list))
104 return ret;
105
106 if (!(io->flag & EXT4_IO_END_UNWRITTEN))
107 return ret;
108
109 ret = ext4_convert_unwritten_extents(inode, offset, size); 102 ret = ext4_convert_unwritten_extents(inode, offset, size);
110 if (ret < 0) { 103 if (ret < 0) {
111 printk(KERN_EMERG "%s: failed to convert unwritten " 104 ext4_msg(inode->i_sb, KERN_EMERG,
112 "extents to written extents, error is %d " 105 "failed to convert unwritten extents to written "
113 "io is still on inode %lu aio dio list\n", 106 "extents -- potential data loss! "
114 __func__, ret, inode->i_ino); 107 "(inode %lu, offset %llu, size %zd, error %d)",
115 return ret; 108 inode->i_ino, offset, size, ret);
116 } 109 }
117 110
118 if (io->iocb) 111 if (io->iocb)
119 aio_complete(io->iocb, io->result, 0); 112 aio_complete(io->iocb, io->result, 0);
120 /* clear the DIO AIO unwritten flag */
121 if (io->flag & EXT4_IO_END_UNWRITTEN) {
122 io->flag &= ~EXT4_IO_END_UNWRITTEN;
123 /* Wake up anyone waiting on unwritten extent conversion */
124 wq = ext4_ioend_wq(io->inode);
125 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
126 waitqueue_active(wq)) {
127 wake_up_all(wq);
128 }
129 }
130 113
114 /* Wake up anyone waiting on unwritten extent conversion */
115 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
116 wake_up_all(ext4_ioend_wq(io->inode));
131 return ret; 117 return ret;
132} 118}
133 119
@@ -140,9 +126,15 @@ static void ext4_end_io_work(struct work_struct *work)
140 struct inode *inode = io->inode; 126 struct inode *inode = io->inode;
141 struct ext4_inode_info *ei = EXT4_I(inode); 127 struct ext4_inode_info *ei = EXT4_I(inode);
142 unsigned long flags; 128 unsigned long flags;
143 int ret; 129
130 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
131 if (list_empty(&io->list)) {
132 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
133 goto free;
134 }
144 135
145 if (!mutex_trylock(&inode->i_mutex)) { 136 if (!mutex_trylock(&inode->i_mutex)) {
137 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
146 /* 138 /*
147 * Requeue the work instead of waiting so that the work 139 * Requeue the work instead of waiting so that the work
148 * items queued after this can be processed. 140 * items queued after this can be processed.
@@ -159,17 +151,11 @@ static void ext4_end_io_work(struct work_struct *work)
159 io->flag |= EXT4_IO_END_QUEUED; 151 io->flag |= EXT4_IO_END_QUEUED;
160 return; 152 return;
161 } 153 }
162 ret = ext4_end_io_nolock(io); 154 list_del_init(&io->list);
163 if (ret < 0) {
164 mutex_unlock(&inode->i_mutex);
165 return;
166 }
167
168 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
169 if (!list_empty(&io->list))
170 list_del_init(&io->list);
171 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 155 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
156 (void) ext4_end_io_nolock(io);
172 mutex_unlock(&inode->i_mutex); 157 mutex_unlock(&inode->i_mutex);
158free:
173 ext4_free_io_end(io); 159 ext4_free_io_end(io);
174} 160}
175 161
@@ -350,10 +336,8 @@ submit_and_retry:
350 if ((io_end->num_io_pages >= MAX_IO_PAGES) && 336 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
351 (io_end->pages[io_end->num_io_pages-1] != io_page)) 337 (io_end->pages[io_end->num_io_pages-1] != io_page))
352 goto submit_and_retry; 338 goto submit_and_retry;
353 if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 339 if (buffer_uninit(bh))
354 io_end->flag |= EXT4_IO_END_UNWRITTEN; 340 ext4_set_io_unwritten_flag(inode, io_end);
355 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
356 }
357 io->io_end->size += bh->b_size; 341 io->io_end->size += bh->b_size;
358 io->io_next_block++; 342 io->io_next_block++;
359 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); 343 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 707d3f16f7ce..996780ab4f4e 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -875,7 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
878 ext4_free_blks_set(sb, gdp, input->free_blocks_count); 878 ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); 880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
@@ -937,8 +937,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
937 input->reserved_blocks); 937 input->reserved_blocks);
938 938
939 /* Update the free space counts */ 939 /* Update the free space counts */
940 percpu_counter_add(&sbi->s_freeblocks_counter, 940 percpu_counter_add(&sbi->s_freeclusters_counter,
941 input->free_blocks_count); 941 EXT4_B2C(sbi, input->free_blocks_count));
942 percpu_counter_add(&sbi->s_freeinodes_counter, 942 percpu_counter_add(&sbi->s_freeinodes_counter,
943 EXT4_INODES_PER_GROUP(sb)); 943 EXT4_INODES_PER_GROUP(sb));
944 944
@@ -946,8 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
946 sbi->s_log_groups_per_flex) { 946 sbi->s_log_groups_per_flex) {
947 ext4_group_t flex_group; 947 ext4_group_t flex_group;
948 flex_group = ext4_flex_group(sbi, input->group); 948 flex_group = ext4_flex_group(sbi, input->group);
949 atomic_add(input->free_blocks_count, 949 atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
950 &sbi->s_flex_groups[flex_group].free_blocks); 950 &sbi->s_flex_groups[flex_group].free_clusters);
951 atomic_add(EXT4_INODES_PER_GROUP(sb), 951 atomic_add(EXT4_INODES_PER_GROUP(sb),
952 &sbi->s_flex_groups[flex_group].free_inodes); 952 &sbi->s_flex_groups[flex_group].free_inodes);
953 } 953 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 44d0c8db2239..9953d80145ad 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -45,6 +45,7 @@
45#include <linux/freezer.h> 45#include <linux/freezer.h>
46 46
47#include "ext4.h" 47#include "ext4.h"
48#include "ext4_extents.h"
48#include "ext4_jbd2.h" 49#include "ext4_jbd2.h"
49#include "xattr.h" 50#include "xattr.h"
50#include "acl.h" 51#include "acl.h"
@@ -163,8 +164,8 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
163 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 164 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
164} 165}
165 166
166__u32 ext4_free_blks_count(struct super_block *sb, 167__u32 ext4_free_group_clusters(struct super_block *sb,
167 struct ext4_group_desc *bg) 168 struct ext4_group_desc *bg)
168{ 169{
169 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 170 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
170 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 171 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
@@ -219,8 +220,8 @@ void ext4_inode_table_set(struct super_block *sb,
219 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 220 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
220} 221}
221 222
222void ext4_free_blks_set(struct super_block *sb, 223void ext4_free_group_clusters_set(struct super_block *sb,
223 struct ext4_group_desc *bg, __u32 count) 224 struct ext4_group_desc *bg, __u32 count)
224{ 225{
225 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 226 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
226 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 227 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
@@ -414,6 +415,22 @@ static void save_error_info(struct super_block *sb, const char *func,
414 ext4_commit_super(sb, 1); 415 ext4_commit_super(sb, 1);
415} 416}
416 417
418/*
419 * The del_gendisk() function uninitializes the disk-specific data
420 * structures, including the bdi structure, without telling anyone
421 * else. Once this happens, any attempt to call mark_buffer_dirty()
422 * (for example, by ext4_commit_super), will cause a kernel OOPS.
423 * This is a kludge to prevent these oops until we can put in a proper
424 * hook in del_gendisk() to inform the VFS and file system layers.
425 */
426static int block_device_ejected(struct super_block *sb)
427{
428 struct inode *bd_inode = sb->s_bdev->bd_inode;
429 struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
430
431 return bdi->dev == NULL;
432}
433
417 434
418/* Deal with the reporting of failure conditions on a filesystem such as 435/* Deal with the reporting of failure conditions on a filesystem such as
419 * inconsistencies detected or read IO failures. 436 * inconsistencies detected or read IO failures.
@@ -821,10 +838,10 @@ static void ext4_put_super(struct super_block *sb)
821 brelse(sbi->s_group_desc[i]); 838 brelse(sbi->s_group_desc[i]);
822 ext4_kvfree(sbi->s_group_desc); 839 ext4_kvfree(sbi->s_group_desc);
823 ext4_kvfree(sbi->s_flex_groups); 840 ext4_kvfree(sbi->s_flex_groups);
824 percpu_counter_destroy(&sbi->s_freeblocks_counter); 841 percpu_counter_destroy(&sbi->s_freeclusters_counter);
825 percpu_counter_destroy(&sbi->s_freeinodes_counter); 842 percpu_counter_destroy(&sbi->s_freeinodes_counter);
826 percpu_counter_destroy(&sbi->s_dirs_counter); 843 percpu_counter_destroy(&sbi->s_dirs_counter);
827 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 844 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
828 brelse(sbi->s_sbh); 845 brelse(sbi->s_sbh);
829#ifdef CONFIG_QUOTA 846#ifdef CONFIG_QUOTA
830 for (i = 0; i < MAXQUOTAS; i++) 847 for (i = 0; i < MAXQUOTAS; i++)
@@ -1057,8 +1074,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1057 seq_puts(seq, ",nouid32"); 1074 seq_puts(seq, ",nouid32");
1058 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 1075 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
1059 seq_puts(seq, ",debug"); 1076 seq_puts(seq, ",debug");
1060 if (test_opt(sb, OLDALLOC))
1061 seq_puts(seq, ",oldalloc");
1062#ifdef CONFIG_EXT4_FS_XATTR 1077#ifdef CONFIG_EXT4_FS_XATTR
1063 if (test_opt(sb, XATTR_USER)) 1078 if (test_opt(sb, XATTR_USER))
1064 seq_puts(seq, ",user_xattr"); 1079 seq_puts(seq, ",user_xattr");
@@ -1567,10 +1582,12 @@ static int parse_options(char *options, struct super_block *sb,
1567 set_opt(sb, DEBUG); 1582 set_opt(sb, DEBUG);
1568 break; 1583 break;
1569 case Opt_oldalloc: 1584 case Opt_oldalloc:
1570 set_opt(sb, OLDALLOC); 1585 ext4_msg(sb, KERN_WARNING,
1586 "Ignoring deprecated oldalloc option");
1571 break; 1587 break;
1572 case Opt_orlov: 1588 case Opt_orlov:
1573 clear_opt(sb, OLDALLOC); 1589 ext4_msg(sb, KERN_WARNING,
1590 "Ignoring deprecated orlov option");
1574 break; 1591 break;
1575#ifdef CONFIG_EXT4_FS_XATTR 1592#ifdef CONFIG_EXT4_FS_XATTR
1576 case Opt_user_xattr: 1593 case Opt_user_xattr:
@@ -1801,6 +1818,7 @@ set_qf_format:
1801 break; 1818 break;
1802 case Opt_nodelalloc: 1819 case Opt_nodelalloc:
1803 clear_opt(sb, DELALLOC); 1820 clear_opt(sb, DELALLOC);
1821 clear_opt2(sb, EXPLICIT_DELALLOC);
1804 break; 1822 break;
1805 case Opt_mblk_io_submit: 1823 case Opt_mblk_io_submit:
1806 set_opt(sb, MBLK_IO_SUBMIT); 1824 set_opt(sb, MBLK_IO_SUBMIT);
@@ -1817,6 +1835,7 @@ set_qf_format:
1817 break; 1835 break;
1818 case Opt_delalloc: 1836 case Opt_delalloc:
1819 set_opt(sb, DELALLOC); 1837 set_opt(sb, DELALLOC);
1838 set_opt2(sb, EXPLICIT_DELALLOC);
1820 break; 1839 break;
1821 case Opt_block_validity: 1840 case Opt_block_validity:
1822 set_opt(sb, BLOCK_VALIDITY); 1841 set_opt(sb, BLOCK_VALIDITY);
@@ -1935,7 +1954,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1935 res = MS_RDONLY; 1954 res = MS_RDONLY;
1936 } 1955 }
1937 if (read_only) 1956 if (read_only)
1938 return res; 1957 goto done;
1939 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1958 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1940 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1959 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1941 "running e2fsck is recommended"); 1960 "running e2fsck is recommended");
@@ -1966,6 +1985,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1966 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1985 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1967 1986
1968 ext4_commit_super(sb, 1); 1987 ext4_commit_super(sb, 1);
1988done:
1969 if (test_opt(sb, DEBUG)) 1989 if (test_opt(sb, DEBUG))
1970 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1990 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1971 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", 1991 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
@@ -2015,8 +2035,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
2015 flex_group = ext4_flex_group(sbi, i); 2035 flex_group = ext4_flex_group(sbi, i);
2016 atomic_add(ext4_free_inodes_count(sb, gdp), 2036 atomic_add(ext4_free_inodes_count(sb, gdp),
2017 &sbi->s_flex_groups[flex_group].free_inodes); 2037 &sbi->s_flex_groups[flex_group].free_inodes);
2018 atomic_add(ext4_free_blks_count(sb, gdp), 2038 atomic_add(ext4_free_group_clusters(sb, gdp),
2019 &sbi->s_flex_groups[flex_group].free_blocks); 2039 &sbi->s_flex_groups[flex_group].free_clusters);
2020 atomic_add(ext4_used_dirs_count(sb, gdp), 2040 atomic_add(ext4_used_dirs_count(sb, gdp),
2021 &sbi->s_flex_groups[flex_group].used_dirs); 2041 &sbi->s_flex_groups[flex_group].used_dirs);
2022 } 2042 }
@@ -2134,7 +2154,8 @@ static int ext4_check_descriptors(struct super_block *sb,
2134 if (NULL != first_not_zeroed) 2154 if (NULL != first_not_zeroed)
2135 *first_not_zeroed = grp; 2155 *first_not_zeroed = grp;
2136 2156
2137 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 2157 ext4_free_blocks_count_set(sbi->s_es,
2158 EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
2138 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2159 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
2139 return 1; 2160 return 1;
2140} 2161}
@@ -2454,7 +2475,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2454 char *buf) 2475 char *buf)
2455{ 2476{
2456 return snprintf(buf, PAGE_SIZE, "%llu\n", 2477 return snprintf(buf, PAGE_SIZE, "%llu\n",
2457 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2478 (s64) EXT4_C2B(sbi,
2479 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2458} 2480}
2459 2481
2460static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2482static ssize_t session_write_kbytes_show(struct ext4_attr *a,
@@ -2682,6 +2704,13 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2682 return 0; 2704 return 0;
2683 } 2705 }
2684 } 2706 }
2707 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2708 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2709 ext4_msg(sb, KERN_ERR,
2710 "Can't support bigalloc feature without "
2711 "extents feature\n");
2712 return 0;
2713 }
2685 return 1; 2714 return 1;
2686} 2715}
2687 2716
@@ -3087,10 +3116,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3087 char *cp; 3116 char *cp;
3088 const char *descr; 3117 const char *descr;
3089 int ret = -ENOMEM; 3118 int ret = -ENOMEM;
3090 int blocksize; 3119 int blocksize, clustersize;
3091 unsigned int db_count; 3120 unsigned int db_count;
3092 unsigned int i; 3121 unsigned int i;
3093 int needs_recovery, has_huge_files; 3122 int needs_recovery, has_huge_files, has_bigalloc;
3094 __u64 blocks_count; 3123 __u64 blocks_count;
3095 int err; 3124 int err;
3096 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3125 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -3224,6 +3253,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3224 &journal_ioprio, NULL, 0)) 3253 &journal_ioprio, NULL, 0))
3225 goto failed_mount; 3254 goto failed_mount;
3226 3255
3256 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3257 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3258 "with data=journal disables delayed "
3259 "allocation and O_DIRECT support!\n");
3260 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3261 ext4_msg(sb, KERN_ERR, "can't mount with "
3262 "both data=journal and delalloc");
3263 goto failed_mount;
3264 }
3265 if (test_opt(sb, DIOREAD_NOLOCK)) {
3266 ext4_msg(sb, KERN_ERR, "can't mount with "
3267 "both data=journal and delalloc");
3268 goto failed_mount;
3269 }
3270 if (test_opt(sb, DELALLOC))
3271 clear_opt(sb, DELALLOC);
3272 }
3273
3274 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3275 if (test_opt(sb, DIOREAD_NOLOCK)) {
3276 if (blocksize < PAGE_SIZE) {
3277 ext4_msg(sb, KERN_ERR, "can't mount with "
3278 "dioread_nolock if block size != PAGE_SIZE");
3279 goto failed_mount;
3280 }
3281 }
3282
3227 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3283 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3228 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3284 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3229 3285
@@ -3265,8 +3321,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3265 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3321 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3266 goto failed_mount; 3322 goto failed_mount;
3267 3323
3268 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3269
3270 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3324 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3271 blocksize > EXT4_MAX_BLOCK_SIZE) { 3325 blocksize > EXT4_MAX_BLOCK_SIZE) {
3272 ext4_msg(sb, KERN_ERR, 3326 ext4_msg(sb, KERN_ERR,
@@ -3369,12 +3423,53 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3369 sb->s_dirt = 1; 3423 sb->s_dirt = 1;
3370 } 3424 }
3371 3425
3372 if (sbi->s_blocks_per_group > blocksize * 8) { 3426 /* Handle clustersize */
3373 ext4_msg(sb, KERN_ERR, 3427 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3374 "#blocks per group too big: %lu", 3428 has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3375 sbi->s_blocks_per_group); 3429 EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3376 goto failed_mount; 3430 if (has_bigalloc) {
3431 if (clustersize < blocksize) {
3432 ext4_msg(sb, KERN_ERR,
3433 "cluster size (%d) smaller than "
3434 "block size (%d)", clustersize, blocksize);
3435 goto failed_mount;
3436 }
3437 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3438 le32_to_cpu(es->s_log_block_size);
3439 sbi->s_clusters_per_group =
3440 le32_to_cpu(es->s_clusters_per_group);
3441 if (sbi->s_clusters_per_group > blocksize * 8) {
3442 ext4_msg(sb, KERN_ERR,
3443 "#clusters per group too big: %lu",
3444 sbi->s_clusters_per_group);
3445 goto failed_mount;
3446 }
3447 if (sbi->s_blocks_per_group !=
3448 (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3449 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3450 "clusters per group (%lu) inconsistent",
3451 sbi->s_blocks_per_group,
3452 sbi->s_clusters_per_group);
3453 goto failed_mount;
3454 }
3455 } else {
3456 if (clustersize != blocksize) {
3457 ext4_warning(sb, "fragment/cluster size (%d) != "
3458 "block size (%d)", clustersize,
3459 blocksize);
3460 clustersize = blocksize;
3461 }
3462 if (sbi->s_blocks_per_group > blocksize * 8) {
3463 ext4_msg(sb, KERN_ERR,
3464 "#blocks per group too big: %lu",
3465 sbi->s_blocks_per_group);
3466 goto failed_mount;
3467 }
3468 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3469 sbi->s_cluster_bits = 0;
3377 } 3470 }
3471 sbi->s_cluster_ratio = clustersize / blocksize;
3472
3378 if (sbi->s_inodes_per_group > blocksize * 8) { 3473 if (sbi->s_inodes_per_group > blocksize * 8) {
3379 ext4_msg(sb, KERN_ERR, 3474 ext4_msg(sb, KERN_ERR,
3380 "#inodes per group too big: %lu", 3475 "#inodes per group too big: %lu",
@@ -3446,10 +3541,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3446 goto failed_mount; 3541 goto failed_mount;
3447 } 3542 }
3448 3543
3449#ifdef CONFIG_PROC_FS
3450 if (ext4_proc_root) 3544 if (ext4_proc_root)
3451 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3545 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3452#endif
3453 3546
3454 bgl_lock_init(sbi->s_blockgroup_lock); 3547 bgl_lock_init(sbi->s_blockgroup_lock);
3455 3548
@@ -3483,8 +3576,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3483 sbi->s_err_report.function = print_daily_error_info; 3576 sbi->s_err_report.function = print_daily_error_info;
3484 sbi->s_err_report.data = (unsigned long) sb; 3577 sbi->s_err_report.data = (unsigned long) sb;
3485 3578
3486 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3579 err = percpu_counter_init(&sbi->s_freeclusters_counter,
3487 ext4_count_free_blocks(sb)); 3580 ext4_count_free_clusters(sb));
3488 if (!err) { 3581 if (!err) {
3489 err = percpu_counter_init(&sbi->s_freeinodes_counter, 3582 err = percpu_counter_init(&sbi->s_freeinodes_counter,
3490 ext4_count_free_inodes(sb)); 3583 ext4_count_free_inodes(sb));
@@ -3494,7 +3587,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3494 ext4_count_dirs(sb)); 3587 ext4_count_dirs(sb));
3495 } 3588 }
3496 if (!err) { 3589 if (!err) {
3497 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 3590 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
3498 } 3591 }
3499 if (err) { 3592 if (err) {
3500 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3593 ext4_msg(sb, KERN_ERR, "insufficient memory");
@@ -3609,13 +3702,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3609 * The journal may have updated the bg summary counts, so we 3702 * The journal may have updated the bg summary counts, so we
3610 * need to update the global counters. 3703 * need to update the global counters.
3611 */ 3704 */
3612 percpu_counter_set(&sbi->s_freeblocks_counter, 3705 percpu_counter_set(&sbi->s_freeclusters_counter,
3613 ext4_count_free_blocks(sb)); 3706 ext4_count_free_clusters(sb));
3614 percpu_counter_set(&sbi->s_freeinodes_counter, 3707 percpu_counter_set(&sbi->s_freeinodes_counter,
3615 ext4_count_free_inodes(sb)); 3708 ext4_count_free_inodes(sb));
3616 percpu_counter_set(&sbi->s_dirs_counter, 3709 percpu_counter_set(&sbi->s_dirs_counter,
3617 ext4_count_dirs(sb)); 3710 ext4_count_dirs(sb));
3618 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3711 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
3619 3712
3620no_journal: 3713no_journal:
3621 /* 3714 /*
@@ -3679,25 +3772,6 @@ no_journal:
3679 "available"); 3772 "available");
3680 } 3773 }
3681 3774
3682 if (test_opt(sb, DELALLOC) &&
3683 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
3684 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
3685 "requested data journaling mode");
3686 clear_opt(sb, DELALLOC);
3687 }
3688 if (test_opt(sb, DIOREAD_NOLOCK)) {
3689 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3690 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3691 "option - requested data journaling mode");
3692 clear_opt(sb, DIOREAD_NOLOCK);
3693 }
3694 if (sb->s_blocksize < PAGE_SIZE) {
3695 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3696 "option - block size is too small");
3697 clear_opt(sb, DIOREAD_NOLOCK);
3698 }
3699 }
3700
3701 err = ext4_setup_system_zone(sb); 3775 err = ext4_setup_system_zone(sb);
3702 if (err) { 3776 if (err) {
3703 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3777 ext4_msg(sb, KERN_ERR, "failed to initialize system "
@@ -3710,22 +3784,19 @@ no_journal:
3710 if (err) { 3784 if (err) {
3711 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 3785 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3712 err); 3786 err);
3713 goto failed_mount4; 3787 goto failed_mount5;
3714 } 3788 }
3715 3789
3716 err = ext4_register_li_request(sb, first_not_zeroed); 3790 err = ext4_register_li_request(sb, first_not_zeroed);
3717 if (err) 3791 if (err)
3718 goto failed_mount4; 3792 goto failed_mount6;
3719 3793
3720 sbi->s_kobj.kset = ext4_kset; 3794 sbi->s_kobj.kset = ext4_kset;
3721 init_completion(&sbi->s_kobj_unregister); 3795 init_completion(&sbi->s_kobj_unregister);
3722 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3796 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
3723 "%s", sb->s_id); 3797 "%s", sb->s_id);
3724 if (err) { 3798 if (err)
3725 ext4_mb_release(sb); 3799 goto failed_mount7;
3726 ext4_ext_release(sb);
3727 goto failed_mount4;
3728 };
3729 3800
3730 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3801 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
3731 ext4_orphan_cleanup(sb, es); 3802 ext4_orphan_cleanup(sb, es);
@@ -3759,13 +3830,19 @@ cantfind_ext4:
3759 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3830 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
3760 goto failed_mount; 3831 goto failed_mount;
3761 3832
3833failed_mount7:
3834 ext4_unregister_li_request(sb);
3835failed_mount6:
3836 ext4_ext_release(sb);
3837failed_mount5:
3838 ext4_mb_release(sb);
3839 ext4_release_system_zone(sb);
3762failed_mount4: 3840failed_mount4:
3763 iput(root); 3841 iput(root);
3764 sb->s_root = NULL; 3842 sb->s_root = NULL;
3765 ext4_msg(sb, KERN_ERR, "mount failed"); 3843 ext4_msg(sb, KERN_ERR, "mount failed");
3766 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3844 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
3767failed_mount_wq: 3845failed_mount_wq:
3768 ext4_release_system_zone(sb);
3769 if (sbi->s_journal) { 3846 if (sbi->s_journal) {
3770 jbd2_journal_destroy(sbi->s_journal); 3847 jbd2_journal_destroy(sbi->s_journal);
3771 sbi->s_journal = NULL; 3848 sbi->s_journal = NULL;
@@ -3774,10 +3851,10 @@ failed_mount3:
3774 del_timer(&sbi->s_err_report); 3851 del_timer(&sbi->s_err_report);
3775 if (sbi->s_flex_groups) 3852 if (sbi->s_flex_groups)
3776 ext4_kvfree(sbi->s_flex_groups); 3853 ext4_kvfree(sbi->s_flex_groups);
3777 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3854 percpu_counter_destroy(&sbi->s_freeclusters_counter);
3778 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3855 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3779 percpu_counter_destroy(&sbi->s_dirs_counter); 3856 percpu_counter_destroy(&sbi->s_dirs_counter);
3780 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3857 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
3781 if (sbi->s_mmp_tsk) 3858 if (sbi->s_mmp_tsk)
3782 kthread_stop(sbi->s_mmp_tsk); 3859 kthread_stop(sbi->s_mmp_tsk);
3783failed_mount2: 3860failed_mount2:
@@ -4064,7 +4141,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4064 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4141 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4065 int error = 0; 4142 int error = 0;
4066 4143
4067 if (!sbh) 4144 if (!sbh || block_device_ejected(sb))
4068 return error; 4145 return error;
4069 if (buffer_write_io_error(sbh)) { 4146 if (buffer_write_io_error(sbh)) {
4070 /* 4147 /*
@@ -4100,8 +4177,9 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4100 else 4177 else
4101 es->s_kbytes_written = 4178 es->s_kbytes_written =
4102 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4179 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4103 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 4180 ext4_free_blocks_count_set(es,
4104 &EXT4_SB(sb)->s_freeblocks_counter)); 4181 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4182 &EXT4_SB(sb)->s_freeclusters_counter)));
4105 es->s_free_inodes_count = 4183 es->s_free_inodes_count =
4106 cpu_to_le32(percpu_counter_sum_positive( 4184 cpu_to_le32(percpu_counter_sum_positive(
4107 &EXT4_SB(sb)->s_freeinodes_counter)); 4185 &EXT4_SB(sb)->s_freeinodes_counter));
@@ -4506,16 +4584,34 @@ restore_opts:
4506 return err; 4584 return err;
4507} 4585}
4508 4586
4587/*
4588 * Note: calculating the overhead so we can be compatible with
4589 * historical BSD practice is quite difficult in the face of
4590 * clusters/bigalloc. This is because multiple metadata blocks from
4591 * different block group can end up in the same allocation cluster.
4592 * Calculating the exact overhead in the face of clustered allocation
4593 * requires either O(all block bitmaps) in memory or O(number of block
4594 * groups**2) in time. We will still calculate the superblock for
4595 * older file systems --- and if we come across with a bigalloc file
4596 * system with zero in s_overhead_clusters the estimate will be close to
4597 * correct especially for very large cluster sizes --- but for newer
4598 * file systems, it's better to calculate this figure once at mkfs
4599 * time, and store it in the superblock. If the superblock value is
4600 * present (even for non-bigalloc file systems), we will use it.
4601 */
4509static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 4602static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4510{ 4603{
4511 struct super_block *sb = dentry->d_sb; 4604 struct super_block *sb = dentry->d_sb;
4512 struct ext4_sb_info *sbi = EXT4_SB(sb); 4605 struct ext4_sb_info *sbi = EXT4_SB(sb);
4513 struct ext4_super_block *es = sbi->s_es; 4606 struct ext4_super_block *es = sbi->s_es;
4607 struct ext4_group_desc *gdp;
4514 u64 fsid; 4608 u64 fsid;
4515 s64 bfree; 4609 s64 bfree;
4516 4610
4517 if (test_opt(sb, MINIX_DF)) { 4611 if (test_opt(sb, MINIX_DF)) {
4518 sbi->s_overhead_last = 0; 4612 sbi->s_overhead_last = 0;
4613 } else if (es->s_overhead_clusters) {
4614 sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
4519 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 4615 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
4520 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 4616 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4521 ext4_fsblk_t overhead = 0; 4617 ext4_fsblk_t overhead = 0;
@@ -4530,24 +4626,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4530 * All of the blocks before first_data_block are 4626 * All of the blocks before first_data_block are
4531 * overhead 4627 * overhead
4532 */ 4628 */
4533 overhead = le32_to_cpu(es->s_first_data_block); 4629 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4534 4630
4535 /* 4631 /*
4536 * Add the overhead attributed to the superblock and 4632 * Add the overhead found in each block group
4537 * block group descriptors. If the sparse superblocks
4538 * feature is turned on, then not all groups have this.
4539 */ 4633 */
4540 for (i = 0; i < ngroups; i++) { 4634 for (i = 0; i < ngroups; i++) {
4541 overhead += ext4_bg_has_super(sb, i) + 4635 gdp = ext4_get_group_desc(sb, i, NULL);
4542 ext4_bg_num_gdb(sb, i); 4636 overhead += ext4_num_overhead_clusters(sb, i, gdp);
4543 cond_resched(); 4637 cond_resched();
4544 } 4638 }
4545
4546 /*
4547 * Every block group has an inode bitmap, a block
4548 * bitmap, and an inode table.
4549 */
4550 overhead += ngroups * (2 + sbi->s_itb_per_group);
4551 sbi->s_overhead_last = overhead; 4639 sbi->s_overhead_last = overhead;
4552 smp_wmb(); 4640 smp_wmb();
4553 sbi->s_blocks_last = ext4_blocks_count(es); 4641 sbi->s_blocks_last = ext4_blocks_count(es);
@@ -4555,11 +4643,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4555 4643
4556 buf->f_type = EXT4_SUPER_MAGIC; 4644 buf->f_type = EXT4_SUPER_MAGIC;
4557 buf->f_bsize = sb->s_blocksize; 4645 buf->f_bsize = sb->s_blocksize;
4558 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 4646 buf->f_blocks = (ext4_blocks_count(es) -
4559 bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 4647 EXT4_C2B(sbi, sbi->s_overhead_last));
4560 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 4648 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
4649 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
4561 /* prevent underflow in case that few free space is available */ 4650 /* prevent underflow in case that few free space is available */
4562 buf->f_bfree = max_t(s64, bfree, 0); 4651 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
4563 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 4652 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
4564 if (buf->f_bfree < ext4_r_blocks_count(es)) 4653 if (buf->f_bfree < ext4_r_blocks_count(es))
4565 buf->f_bavail = 0; 4654 buf->f_bavail = 0;
@@ -4980,13 +5069,11 @@ static int __init ext4_init_fs(void)
4980 return err; 5069 return err;
4981 err = ext4_init_system_zone(); 5070 err = ext4_init_system_zone();
4982 if (err) 5071 if (err)
4983 goto out7; 5072 goto out6;
4984 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 5073 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4985 if (!ext4_kset) 5074 if (!ext4_kset)
4986 goto out6;
4987 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4988 if (!ext4_proc_root)
4989 goto out5; 5075 goto out5;
5076 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4990 5077
4991 err = ext4_init_feat_adverts(); 5078 err = ext4_init_feat_adverts();
4992 if (err) 5079 if (err)
@@ -5022,12 +5109,12 @@ out2:
5022out3: 5109out3:
5023 ext4_exit_feat_adverts(); 5110 ext4_exit_feat_adverts();
5024out4: 5111out4:
5025 remove_proc_entry("fs/ext4", NULL); 5112 if (ext4_proc_root)
5026out5: 5113 remove_proc_entry("fs/ext4", NULL);
5027 kset_unregister(ext4_kset); 5114 kset_unregister(ext4_kset);
5028out6: 5115out5:
5029 ext4_exit_system_zone(); 5116 ext4_exit_system_zone();
5030out7: 5117out6:
5031 ext4_exit_pageio(); 5118 ext4_exit_pageio();
5032 return err; 5119 return err;
5033} 5120}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c757adc97250..93a00d89a220 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -820,8 +820,14 @@ inserted:
820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
822 822
823 /*
824 * take i_data_sem because we will test
825 * i_delalloc_reserved_flag in ext4_mb_new_blocks
826 */
827 down_read((&EXT4_I(inode)->i_data_sem));
823 block = ext4_new_meta_blocks(handle, inode, goal, 0, 828 block = ext4_new_meta_blocks(handle, inode, goal, 0,
824 NULL, &error); 829 NULL, &error);
830 up_read((&EXT4_I(inode)->i_data_sem));
825 if (error) 831 if (error)
826 goto cleanup; 832 goto cleanup;
827 833
@@ -985,11 +991,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
985 no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); 991 no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
986 ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); 992 ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
987 993
988 error = ext4_get_inode_loc(inode, &is.iloc); 994 error = ext4_reserve_inode_write(handle, inode, &is.iloc);
989 if (error)
990 goto cleanup;
991
992 error = ext4_journal_get_write_access(handle, is.iloc.bh);
993 if (error) 995 if (error)
994 goto cleanup; 996 goto cleanup;
995 997
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 1726d7303047..808cac7edcfb 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -379,7 +379,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
379 return error; 379 return error;
380 MSDOS_I(inode)->mmu_private = inode->i_size; 380 MSDOS_I(inode)->mmu_private = inode->i_size;
381 381
382 inode->i_nlink = fat_subdirs(inode); 382 set_nlink(inode, fat_subdirs(inode));
383 } else { /* not a directory */ 383 } else { /* not a directory */
384 inode->i_generation |= 1; 384 inode->i_generation |= 1;
385 inode->i_mode = fat_make_mode(sbi, de->attr, 385 inode->i_mode = fat_make_mode(sbi, de->attr,
@@ -1233,7 +1233,7 @@ static int fat_read_root(struct inode *inode)
1233 fat_save_attrs(inode, ATTR_DIR); 1233 fat_save_attrs(inode, ATTR_DIR);
1234 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; 1234 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
1235 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; 1235 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1236 inode->i_nlink = fat_subdirs(inode)+2; 1236 set_nlink(inode, fat_subdirs(inode)+2);
1237 1237
1238 return 0; 1238 return 0;
1239} 1239}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 66e83b845455..216b419f30e2 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -387,7 +387,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
387 /* the directory was completed, just return a error */ 387 /* the directory was completed, just return a error */
388 goto out; 388 goto out;
389 } 389 }
390 inode->i_nlink = 2; 390 set_nlink(inode, 2);
391 inode->i_mtime = inode->i_atime = inode->i_ctime = ts; 391 inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
392 /* timestamp is already written, so mark_inode_dirty() is unneeded. */ 392 /* timestamp is already written, so mark_inode_dirty() is unneeded. */
393 393
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index bb3f29c3557b..a87a65663c25 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -900,7 +900,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
900 goto out; 900 goto out;
901 } 901 }
902 inode->i_version++; 902 inode->i_version++;
903 inode->i_nlink = 2; 903 set_nlink(inode, 2);
904 inode->i_mtime = inode->i_atime = inode->i_ctime = ts; 904 inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
905 /* timestamp is already written, so mark_inode_dirty() is unneeded. */ 905 /* timestamp is already written, so mark_inode_dirty() is unneeded. */
906 906
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 1a4311437a8b..7b2af5abe2fa 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -227,7 +227,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
227 ip->i_uid = (uid_t)vip->vii_uid; 227 ip->i_uid = (uid_t)vip->vii_uid;
228 ip->i_gid = (gid_t)vip->vii_gid; 228 ip->i_gid = (gid_t)vip->vii_gid;
229 229
230 ip->i_nlink = vip->vii_nlink; 230 set_nlink(ip, vip->vii_nlink);
231 ip->i_size = vip->vii_size; 231 ip->i_size = vip->vii_size;
232 232
233 ip->i_atime.tv_sec = vip->vii_atime; 233 ip->i_atime.tv_sec = vip->vii_atime;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 85542a7daf40..42593c587d48 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -231,7 +231,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
231 if (iop) 231 if (iop)
232 inode->i_op = iop; 232 inode->i_op = iop;
233 inode->i_fop = fop; 233 inode->i_fop = fop;
234 inode->i_nlink = nlink; 234 set_nlink(inode, nlink);
235 inode->i_private = fc; 235 inode->i_private = fc;
236 d_add(dentry, inode); 236 d_add(dentry, inode);
237 return dentry; 237 return dentry;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index add96f6ffda5..3e6d72756479 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -151,7 +151,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
151 151
152 inode->i_ino = attr->ino; 152 inode->i_ino = attr->ino;
153 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 153 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
154 inode->i_nlink = attr->nlink; 154 set_nlink(inode, attr->nlink);
155 inode->i_uid = attr->uid; 155 inode->i_uid = attr->uid;
156 inode->i_gid = attr->gid; 156 inode->i_gid = attr->gid;
157 inode->i_blocks = attr->blocks; 157 inode->i_blocks = attr->blocks;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 78418b4fa857..1656df7aacd2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -299,7 +299,7 @@ static void gfs2_set_nlink(struct inode *inode, u32 nlink)
299 if (nlink == 0) 299 if (nlink == 0)
300 clear_nlink(inode); 300 clear_nlink(inode);
301 else 301 else
302 inode->i_nlink = nlink; 302 set_nlink(inode, nlink);
303 } 303 }
304} 304}
305 305
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index b4d70b13be92..bce4eef91a06 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -198,7 +198,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
198 198
199 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 199 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
200 if (res) { 200 if (res) {
201 inode->i_nlink = 0; 201 clear_nlink(inode);
202 hfs_delete_inode(inode); 202 hfs_delete_inode(inode);
203 iput(inode); 203 iput(inode);
204 return res; 204 return res;
@@ -227,7 +227,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
227 227
228 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 228 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
229 if (res) { 229 if (res) {
230 inode->i_nlink = 0; 230 clear_nlink(inode);
231 hfs_delete_inode(inode); 231 hfs_delete_inode(inode);
232 iput(inode); 232 iput(inode);
233 return res; 233 return res;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 96a1b625fc74..a1a9fdcd2a00 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -183,7 +183,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
183 inode->i_mode = mode; 183 inode->i_mode = mode;
184 inode->i_uid = current_fsuid(); 184 inode->i_uid = current_fsuid();
185 inode->i_gid = current_fsgid(); 185 inode->i_gid = current_fsgid();
186 inode->i_nlink = 1; 186 set_nlink(inode, 1);
187 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 187 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
188 HFS_I(inode)->flags = 0; 188 HFS_I(inode)->flags = 0;
189 HFS_I(inode)->rsrc_inode = NULL; 189 HFS_I(inode)->rsrc_inode = NULL;
@@ -313,7 +313,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
313 /* Initialize the inode */ 313 /* Initialize the inode */
314 inode->i_uid = hsb->s_uid; 314 inode->i_uid = hsb->s_uid;
315 inode->i_gid = hsb->s_gid; 315 inode->i_gid = hsb->s_gid;
316 inode->i_nlink = 1; 316 set_nlink(inode, 1);
317 317
318 if (idata->key) 318 if (idata->key)
319 HFS_I(inode)->cat_key = *idata->key; 319 HFS_I(inode)->cat_key = *idata->key;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 25b2443a004c..4536cd3f15ae 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -415,7 +415,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
415 goto out; 415 goto out;
416 416
417out_err: 417out_err:
418 inode->i_nlink = 0; 418 clear_nlink(inode);
419 hfsplus_delete_inode(inode); 419 hfsplus_delete_inode(inode);
420 iput(inode); 420 iput(inode);
421out: 421out:
@@ -440,7 +440,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
440 440
441 res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); 441 res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
442 if (res) { 442 if (res) {
443 inode->i_nlink = 0; 443 clear_nlink(inode);
444 hfsplus_delete_inode(inode); 444 hfsplus_delete_inode(inode);
445 iput(inode); 445 iput(inode);
446 goto out; 446 goto out;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 4cc1e3a36ec7..40e1413be4cf 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -391,7 +391,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
391 inode->i_mode = mode; 391 inode->i_mode = mode;
392 inode->i_uid = current_fsuid(); 392 inode->i_uid = current_fsuid();
393 inode->i_gid = current_fsgid(); 393 inode->i_gid = current_fsgid();
394 inode->i_nlink = 1; 394 set_nlink(inode, 1);
395 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 395 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
396 396
397 hip = HFSPLUS_I(inode); 397 hip = HFSPLUS_I(inode);
@@ -512,7 +512,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
512 hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, 512 hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
513 sizeof(struct hfsplus_cat_folder)); 513 sizeof(struct hfsplus_cat_folder));
514 hfsplus_get_perms(inode, &folder->permissions, 1); 514 hfsplus_get_perms(inode, &folder->permissions, 1);
515 inode->i_nlink = 1; 515 set_nlink(inode, 1);
516 inode->i_size = 2 + be32_to_cpu(folder->valence); 516 inode->i_size = 2 + be32_to_cpu(folder->valence);
517 inode->i_atime = hfsp_mt2ut(folder->access_date); 517 inode->i_atime = hfsp_mt2ut(folder->access_date);
518 inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); 518 inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
@@ -532,11 +532,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
532 hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ? 532 hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ?
533 &file->rsrc_fork : &file->data_fork); 533 &file->rsrc_fork : &file->data_fork);
534 hfsplus_get_perms(inode, &file->permissions, 0); 534 hfsplus_get_perms(inode, &file->permissions, 0);
535 inode->i_nlink = 1; 535 set_nlink(inode, 1);
536 if (S_ISREG(inode->i_mode)) { 536 if (S_ISREG(inode->i_mode)) {
537 if (file->permissions.dev) 537 if (file->permissions.dev)
538 inode->i_nlink = 538 set_nlink(inode,
539 be32_to_cpu(file->permissions.dev); 539 be32_to_cpu(file->permissions.dev));
540 inode->i_op = &hfsplus_file_inode_operations; 540 inode->i_op = &hfsplus_file_inode_operations;
541 inode->i_fop = &hfsplus_file_operations; 541 inode->i_fop = &hfsplus_file_operations;
542 inode->i_mapping->a_ops = &hfsplus_aops; 542 inode->i_mapping->a_ops = &hfsplus_aops;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 0d22afdd4611..2f72da5ae686 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -541,7 +541,7 @@ static int read_name(struct inode *ino, char *name)
541 541
542 ino->i_ino = st.ino; 542 ino->i_ino = st.ino;
543 ino->i_mode = st.mode; 543 ino->i_mode = st.mode;
544 ino->i_nlink = st.nlink; 544 set_nlink(ino, st.nlink);
545 ino->i_uid = st.uid; 545 ino->i_uid = st.uid;
546 ino->i_gid = st.gid; 546 ino->i_gid = st.gid;
547 ino->i_atime = st.atime; 547 ino->i_atime = st.atime;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index d51a98384bc0..dd7bc38a3825 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -16,7 +16,6 @@
16#include <sys/vfs.h> 16#include <sys/vfs.h>
17#include "hostfs.h" 17#include "hostfs.h"
18#include "os.h" 18#include "os.h"
19#include "user.h"
20#include <utime.h> 19#include <utime.h>
21 20
22static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) 21static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 96a8ed91cedd..2fa0089a02a8 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -247,7 +247,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
247 result->i_mode &= ~0111; 247 result->i_mode &= ~0111;
248 result->i_op = &hpfs_file_iops; 248 result->i_op = &hpfs_file_iops;
249 result->i_fop = &hpfs_file_ops; 249 result->i_fop = &hpfs_file_ops;
250 result->i_nlink = 1; 250 set_nlink(result, 1);
251 } 251 }
252 unlock_new_inode(result); 252 unlock_new_inode(result);
253 } 253 }
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 338cd8368451..3b2cec29972b 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -53,7 +53,7 @@ void hpfs_read_inode(struct inode *i)
53 i->i_mode &= ~0111; 53 i->i_mode &= ~0111;
54 i->i_op = &hpfs_file_iops; 54 i->i_op = &hpfs_file_iops;
55 i->i_fop = &hpfs_file_ops; 55 i->i_fop = &hpfs_file_ops;
56 i->i_nlink = 0;*/ 56 clear_nlink(i);*/
57 make_bad_inode(i); 57 make_bad_inode(i);
58 return; 58 return;
59 } 59 }
@@ -77,7 +77,7 @@ void hpfs_read_inode(struct inode *i)
77 i->i_mode = S_IFLNK | 0777; 77 i->i_mode = S_IFLNK | 0777;
78 i->i_op = &page_symlink_inode_operations; 78 i->i_op = &page_symlink_inode_operations;
79 i->i_data.a_ops = &hpfs_symlink_aops; 79 i->i_data.a_ops = &hpfs_symlink_aops;
80 i->i_nlink = 1; 80 set_nlink(i, 1);
81 i->i_size = ea_size; 81 i->i_size = ea_size;
82 i->i_blocks = 1; 82 i->i_blocks = 1;
83 brelse(bh); 83 brelse(bh);
@@ -101,7 +101,7 @@ void hpfs_read_inode(struct inode *i)
101 } 101 }
102 if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { 102 if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
103 brelse(bh); 103 brelse(bh);
104 i->i_nlink = 1; 104 set_nlink(i, 1);
105 i->i_size = 0; 105 i->i_size = 0;
106 i->i_blocks = 1; 106 i->i_blocks = 1;
107 init_special_inode(i, mode, 107 init_special_inode(i, mode,
@@ -125,13 +125,13 @@ void hpfs_read_inode(struct inode *i)
125 hpfs_count_dnodes(i->i_sb, hpfs_inode->i_dno, &n_dnodes, &n_subdirs, NULL); 125 hpfs_count_dnodes(i->i_sb, hpfs_inode->i_dno, &n_dnodes, &n_subdirs, NULL);
126 i->i_blocks = 4 * n_dnodes; 126 i->i_blocks = 4 * n_dnodes;
127 i->i_size = 2048 * n_dnodes; 127 i->i_size = 2048 * n_dnodes;
128 i->i_nlink = 2 + n_subdirs; 128 set_nlink(i, 2 + n_subdirs);
129 } else { 129 } else {
130 i->i_mode |= S_IFREG; 130 i->i_mode |= S_IFREG;
131 if (!hpfs_inode->i_ea_mode) i->i_mode &= ~0111; 131 if (!hpfs_inode->i_ea_mode) i->i_mode &= ~0111;
132 i->i_op = &hpfs_file_iops; 132 i->i_op = &hpfs_file_iops;
133 i->i_fop = &hpfs_file_ops; 133 i->i_fop = &hpfs_file_ops;
134 i->i_nlink = 1; 134 set_nlink(i, 1);
135 i->i_size = le32_to_cpu(fnode->file_size); 135 i->i_size = le32_to_cpu(fnode->file_size);
136 i->i_blocks = ((i->i_size + 511) >> 9) + 1; 136 i->i_blocks = ((i->i_size + 511) >> 9) + 1;
137 i->i_data.a_ops = &hpfs_aops; 137 i->i_data.a_ops = &hpfs_aops;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 2df69e2f07cf..ea91fcb0ef9b 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -56,7 +56,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
56 result->i_fop = &hpfs_dir_ops; 56 result->i_fop = &hpfs_dir_ops;
57 result->i_blocks = 4; 57 result->i_blocks = 4;
58 result->i_size = 2048; 58 result->i_size = 2048;
59 result->i_nlink = 2; 59 set_nlink(result, 2);
60 if (dee.read_only) 60 if (dee.read_only)
61 result->i_mode &= ~0222; 61 result->i_mode &= ~0222;
62 62
@@ -150,7 +150,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
150 result->i_mode &= ~0111; 150 result->i_mode &= ~0111;
151 result->i_op = &hpfs_file_iops; 151 result->i_op = &hpfs_file_iops;
152 result->i_fop = &hpfs_file_ops; 152 result->i_fop = &hpfs_file_ops;
153 result->i_nlink = 1; 153 set_nlink(result, 1);
154 hpfs_i(result)->i_parent_dir = dir->i_ino; 154 hpfs_i(result)->i_parent_dir = dir->i_ino;
155 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); 155 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
156 result->i_ctime.tv_nsec = 0; 156 result->i_ctime.tv_nsec = 0;
@@ -242,7 +242,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
242 hpfs_i(result)->i_ea_size = 0; 242 hpfs_i(result)->i_ea_size = 0;
243 result->i_uid = current_fsuid(); 243 result->i_uid = current_fsuid();
244 result->i_gid = current_fsgid(); 244 result->i_gid = current_fsgid();
245 result->i_nlink = 1; 245 set_nlink(result, 1);
246 result->i_size = 0; 246 result->i_size = 0;
247 result->i_blocks = 1; 247 result->i_blocks = 1;
248 init_special_inode(result, mode, rdev); 248 init_special_inode(result, mode, rdev);
@@ -318,7 +318,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
318 result->i_uid = current_fsuid(); 318 result->i_uid = current_fsuid();
319 result->i_gid = current_fsgid(); 319 result->i_gid = current_fsgid();
320 result->i_blocks = 1; 320 result->i_blocks = 1;
321 result->i_nlink = 1; 321 set_nlink(result, 1);
322 result->i_size = strlen(symlink); 322 result->i_size = strlen(symlink);
323 result->i_op = &page_symlink_inode_operations; 323 result->i_op = &page_symlink_inode_operations;
324 result->i_data.a_ops = &hpfs_symlink_aops; 324 result->i_data.a_ops = &hpfs_symlink_aops;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 970ea987b3f6..f590b1160c6c 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -702,7 +702,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
702 inode->i_ctime = proc_ino->i_ctime; 702 inode->i_ctime = proc_ino->i_ctime;
703 inode->i_ino = proc_ino->i_ino; 703 inode->i_ino = proc_ino->i_ino;
704 inode->i_mode = proc_ino->i_mode; 704 inode->i_mode = proc_ino->i_mode;
705 inode->i_nlink = proc_ino->i_nlink; 705 set_nlink(inode, proc_ino->i_nlink);
706 inode->i_size = proc_ino->i_size; 706 inode->i_size = proc_ino->i_size;
707 inode->i_blocks = proc_ino->i_blocks; 707 inode->i_blocks = proc_ino->i_blocks;
708 708
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ec889538e5a6..0be5a78598d0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -970,7 +970,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
970 970
971 d_instantiate(path.dentry, inode); 971 d_instantiate(path.dentry, inode);
972 inode->i_size = size; 972 inode->i_size = size;
973 inode->i_nlink = 0; 973 clear_nlink(inode);
974 974
975 error = -ENFILE; 975 error = -ENFILE;
976 file = alloc_file(&path, FMODE_WRITE | FMODE_READ, 976 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
diff --git a/fs/inode.c b/fs/inode.c
index ecbb68dc7e2a..ee4e66b998f4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,7 +142,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
142 atomic_set(&inode->i_count, 1); 142 atomic_set(&inode->i_count, 1);
143 inode->i_op = &empty_iops; 143 inode->i_op = &empty_iops;
144 inode->i_fop = &empty_fops; 144 inode->i_fop = &empty_fops;
145 inode->i_nlink = 1; 145 inode->__i_nlink = 1;
146 inode->i_opflags = 0; 146 inode->i_opflags = 0;
147 inode->i_uid = 0; 147 inode->i_uid = 0;
148 inode->i_gid = 0; 148 inode->i_gid = 0;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 46844ff39d61..f950059525fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1327,7 +1327,7 @@ static int isofs_read_inode(struct inode *inode)
1327 inode->i_mode = S_IFDIR | sbi->s_dmode; 1327 inode->i_mode = S_IFDIR | sbi->s_dmode;
1328 else 1328 else
1329 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 1329 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
1330 inode->i_nlink = 1; /* 1330 set_nlink(inode, 1); /*
1331 * Set to 1. We know there are 2, but 1331 * Set to 1. We know there are 2, but
1332 * the find utility tries to optimize 1332 * the find utility tries to optimize
1333 * if it is 2, and it screws up. It is 1333 * if it is 2, and it screws up. It is
@@ -1345,7 +1345,7 @@ static int isofs_read_inode(struct inode *inode)
1345 */ 1345 */
1346 inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO; 1346 inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
1347 } 1347 }
1348 inode->i_nlink = 1; 1348 set_nlink(inode, 1);
1349 } 1349 }
1350 inode->i_uid = sbi->s_uid; 1350 inode->i_uid = sbi->s_uid;
1351 inode->i_gid = sbi->s_gid; 1351 inode->i_gid = sbi->s_gid;
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 1fbc7de88f50..70e79d0c756a 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -363,7 +363,7 @@ repeat:
363 break; 363 break;
364 case SIG('P', 'X'): 364 case SIG('P', 'X'):
365 inode->i_mode = isonum_733(rr->u.PX.mode); 365 inode->i_mode = isonum_733(rr->u.PX.mode);
366 inode->i_nlink = isonum_733(rr->u.PX.n_links); 366 set_nlink(inode, isonum_733(rr->u.PX.n_links));
367 inode->i_uid = isonum_733(rr->u.PX.uid); 367 inode->i_uid = isonum_733(rr->u.PX.uid);
368 inode->i_gid = isonum_733(rr->u.PX.gid); 368 inode->i_gid = isonum_733(rr->u.PX.gid);
369 break; 369 break;
@@ -496,7 +496,7 @@ repeat:
496 goto out; 496 goto out;
497 } 497 }
498 inode->i_mode = reloc->i_mode; 498 inode->i_mode = reloc->i_mode;
499 inode->i_nlink = reloc->i_nlink; 499 set_nlink(inode, reloc->i_nlink);
500 inode->i_uid = reloc->i_uid; 500 inode->i_uid = reloc->i_uid;
501 inode->i_gid = reloc->i_gid; 501 inode->i_gid = reloc->i_gid;
502 inode->i_rdev = reloc->i_rdev; 502 inode->i_rdev = reloc->i_rdev;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 9fe061fb8779..fea8dd661d2b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1135,6 +1135,14 @@ static int journal_get_superblock(journal_t *journal)
1135 goto out; 1135 goto out;
1136 } 1136 }
1137 1137
1138 if (be32_to_cpu(sb->s_first) == 0 ||
1139 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1140 printk(KERN_WARNING
1141 "JBD: Invalid start block of journal: %u\n",
1142 be32_to_cpu(sb->s_first));
1143 goto out;
1144 }
1145
1138 return 0; 1146 return 0;
1139 1147
1140out: 1148out:
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index eef6979821a4..68d704db787f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -352,7 +352,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
352 J_ASSERT(commit_transaction->t_state == T_RUNNING); 352 J_ASSERT(commit_transaction->t_state == T_RUNNING);
353 353
354 trace_jbd2_start_commit(journal, commit_transaction); 354 trace_jbd2_start_commit(journal, commit_transaction);
355 jbd_debug(1, "JBD: starting commit of transaction %d\n", 355 jbd_debug(1, "JBD2: starting commit of transaction %d\n",
356 commit_transaction->t_tid); 356 commit_transaction->t_tid);
357 357
358 write_lock(&journal->j_state_lock); 358 write_lock(&journal->j_state_lock);
@@ -427,7 +427,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
427 __jbd2_journal_clean_checkpoint_list(journal); 427 __jbd2_journal_clean_checkpoint_list(journal);
428 spin_unlock(&journal->j_list_lock); 428 spin_unlock(&journal->j_list_lock);
429 429
430 jbd_debug (3, "JBD: commit phase 1\n"); 430 jbd_debug(3, "JBD2: commit phase 1\n");
431 431
432 /* 432 /*
433 * Switch to a new revoke table. 433 * Switch to a new revoke table.
@@ -447,7 +447,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
447 wake_up(&journal->j_wait_transaction_locked); 447 wake_up(&journal->j_wait_transaction_locked);
448 write_unlock(&journal->j_state_lock); 448 write_unlock(&journal->j_state_lock);
449 449
450 jbd_debug (3, "JBD: commit phase 2\n"); 450 jbd_debug(3, "JBD2: commit phase 2\n");
451 451
452 /* 452 /*
453 * Now start flushing things to disk, in the order they appear 453 * Now start flushing things to disk, in the order they appear
@@ -462,7 +462,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
462 WRITE_SYNC); 462 WRITE_SYNC);
463 blk_finish_plug(&plug); 463 blk_finish_plug(&plug);
464 464
465 jbd_debug(3, "JBD: commit phase 2\n"); 465 jbd_debug(3, "JBD2: commit phase 2\n");
466 466
467 /* 467 /*
468 * Way to go: we have now written out all of the data for a 468 * Way to go: we have now written out all of the data for a
@@ -522,7 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
522 522
523 J_ASSERT (bufs == 0); 523 J_ASSERT (bufs == 0);
524 524
525 jbd_debug(4, "JBD: get descriptor\n"); 525 jbd_debug(4, "JBD2: get descriptor\n");
526 526
527 descriptor = jbd2_journal_get_descriptor_buffer(journal); 527 descriptor = jbd2_journal_get_descriptor_buffer(journal);
528 if (!descriptor) { 528 if (!descriptor) {
@@ -531,7 +531,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
531 } 531 }
532 532
533 bh = jh2bh(descriptor); 533 bh = jh2bh(descriptor);
534 jbd_debug(4, "JBD: got buffer %llu (%p)\n", 534 jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
535 (unsigned long long)bh->b_blocknr, bh->b_data); 535 (unsigned long long)bh->b_blocknr, bh->b_data);
536 header = (journal_header_t *)&bh->b_data[0]; 536 header = (journal_header_t *)&bh->b_data[0];
537 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 537 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
@@ -625,7 +625,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
625 commit_transaction->t_buffers == NULL || 625 commit_transaction->t_buffers == NULL ||
626 space_left < tag_bytes + 16) { 626 space_left < tag_bytes + 16) {
627 627
628 jbd_debug(4, "JBD: Submit %d IOs\n", bufs); 628 jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
629 629
630 /* Write an end-of-descriptor marker before 630 /* Write an end-of-descriptor marker before
631 submitting the IOs. "tag" still points to 631 submitting the IOs. "tag" still points to
@@ -707,7 +707,7 @@ start_journal_io:
707 so we incur less scheduling load. 707 so we incur less scheduling load.
708 */ 708 */
709 709
710 jbd_debug(3, "JBD: commit phase 3\n"); 710 jbd_debug(3, "JBD2: commit phase 3\n");
711 711
712 /* 712 /*
713 * akpm: these are BJ_IO, and j_list_lock is not needed. 713 * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -771,7 +771,7 @@ wait_for_iobuf:
771 771
772 J_ASSERT (commit_transaction->t_shadow_list == NULL); 772 J_ASSERT (commit_transaction->t_shadow_list == NULL);
773 773
774 jbd_debug(3, "JBD: commit phase 4\n"); 774 jbd_debug(3, "JBD2: commit phase 4\n");
775 775
776 /* Here we wait for the revoke record and descriptor record buffers */ 776 /* Here we wait for the revoke record and descriptor record buffers */
777 wait_for_ctlbuf: 777 wait_for_ctlbuf:
@@ -801,7 +801,7 @@ wait_for_iobuf:
801 if (err) 801 if (err)
802 jbd2_journal_abort(journal, err); 802 jbd2_journal_abort(journal, err);
803 803
804 jbd_debug(3, "JBD: commit phase 5\n"); 804 jbd_debug(3, "JBD2: commit phase 5\n");
805 write_lock(&journal->j_state_lock); 805 write_lock(&journal->j_state_lock);
806 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); 806 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
807 commit_transaction->t_state = T_COMMIT_JFLUSH; 807 commit_transaction->t_state = T_COMMIT_JFLUSH;
@@ -830,7 +830,7 @@ wait_for_iobuf:
830 transaction can be removed from any checkpoint list it was on 830 transaction can be removed from any checkpoint list it was on
831 before. */ 831 before. */
832 832
833 jbd_debug(3, "JBD: commit phase 6\n"); 833 jbd_debug(3, "JBD2: commit phase 6\n");
834 834
835 J_ASSERT(list_empty(&commit_transaction->t_inode_list)); 835 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
836 J_ASSERT(commit_transaction->t_buffers == NULL); 836 J_ASSERT(commit_transaction->t_buffers == NULL);
@@ -964,7 +964,7 @@ restart_loop:
964 964
965 /* Done with this transaction! */ 965 /* Done with this transaction! */
966 966
967 jbd_debug(3, "JBD: commit phase 7\n"); 967 jbd_debug(3, "JBD2: commit phase 7\n");
968 968
969 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); 969 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
970 970
@@ -1039,7 +1039,7 @@ restart_loop:
1039 journal->j_commit_callback(journal, commit_transaction); 1039 journal->j_commit_callback(journal, commit_transaction);
1040 1040
1041 trace_jbd2_end_commit(journal, commit_transaction); 1041 trace_jbd2_end_commit(journal, commit_transaction);
1042 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1042 jbd_debug(1, "JBD2: commit %d complete, head %d\n",
1043 journal->j_commit_sequence, journal->j_tail_sequence); 1043 journal->j_commit_sequence, journal->j_tail_sequence);
1044 if (to_free) 1044 if (to_free)
1045 kfree(commit_transaction); 1045 kfree(commit_transaction);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f24df13adc4e..0fa0123151d3 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -491,7 +491,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
491 */ 491 */
492 492
493 journal->j_commit_request = target; 493 journal->j_commit_request = target;
494 jbd_debug(1, "JBD: requesting commit %d/%d\n", 494 jbd_debug(1, "JBD2: requesting commit %d/%d\n",
495 journal->j_commit_request, 495 journal->j_commit_request,
496 journal->j_commit_sequence); 496 journal->j_commit_sequence);
497 wake_up(&journal->j_wait_commit); 497 wake_up(&journal->j_wait_commit);
@@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
500 /* This should never happen, but if it does, preserve 500 /* This should never happen, but if it does, preserve
501 the evidence before kjournald goes into a loop and 501 the evidence before kjournald goes into a loop and
502 increments j_commit_sequence beyond all recognition. */ 502 increments j_commit_sequence beyond all recognition. */
503 WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", 503 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
504 journal->j_commit_request, 504 journal->j_commit_request,
505 journal->j_commit_sequence, 505 journal->j_commit_sequence,
506 target, journal->j_running_transaction ? 506 target, journal->j_running_transaction ?
@@ -645,7 +645,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
645 } 645 }
646#endif 646#endif
647 while (tid_gt(tid, journal->j_commit_sequence)) { 647 while (tid_gt(tid, journal->j_commit_sequence)) {
648 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 648 jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
649 tid, journal->j_commit_sequence); 649 tid, journal->j_commit_sequence);
650 wake_up(&journal->j_wait_commit); 650 wake_up(&journal->j_wait_commit);
651 read_unlock(&journal->j_state_lock); 651 read_unlock(&journal->j_state_lock);
@@ -1093,7 +1093,7 @@ static int journal_reset(journal_t *journal)
1093 first = be32_to_cpu(sb->s_first); 1093 first = be32_to_cpu(sb->s_first);
1094 last = be32_to_cpu(sb->s_maxlen); 1094 last = be32_to_cpu(sb->s_maxlen);
1095 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { 1095 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
1096 printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", 1096 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
1097 first, last); 1097 first, last);
1098 journal_fail_superblock(journal); 1098 journal_fail_superblock(journal);
1099 return -EINVAL; 1099 return -EINVAL;
@@ -1139,7 +1139,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1139 */ 1139 */
1140 if (sb->s_start == 0 && journal->j_tail_sequence == 1140 if (sb->s_start == 0 && journal->j_tail_sequence ==
1141 journal->j_transaction_sequence) { 1141 journal->j_transaction_sequence) {
1142 jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 1142 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1143 "(start %ld, seq %d, errno %d)\n", 1143 "(start %ld, seq %d, errno %d)\n",
1144 journal->j_tail, journal->j_tail_sequence, 1144 journal->j_tail, journal->j_tail_sequence,
1145 journal->j_errno); 1145 journal->j_errno);
@@ -1163,7 +1163,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1163 } 1163 }
1164 1164
1165 read_lock(&journal->j_state_lock); 1165 read_lock(&journal->j_state_lock);
1166 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 1166 jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
1167 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1167 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1168 1168
1169 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1169 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1216,8 +1216,8 @@ static int journal_get_superblock(journal_t *journal)
1216 ll_rw_block(READ, 1, &bh); 1216 ll_rw_block(READ, 1, &bh);
1217 wait_on_buffer(bh); 1217 wait_on_buffer(bh);
1218 if (!buffer_uptodate(bh)) { 1218 if (!buffer_uptodate(bh)) {
1219 printk (KERN_ERR 1219 printk(KERN_ERR
1220 "JBD: IO error reading journal superblock\n"); 1220 "JBD2: IO error reading journal superblock\n");
1221 goto out; 1221 goto out;
1222 } 1222 }
1223 } 1223 }
@@ -1228,7 +1228,7 @@ static int journal_get_superblock(journal_t *journal)
1228 1228
1229 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 1229 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
1230 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 1230 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
1231 printk(KERN_WARNING "JBD: no valid journal superblock found\n"); 1231 printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
1232 goto out; 1232 goto out;
1233 } 1233 }
1234 1234
@@ -1240,14 +1240,22 @@ static int journal_get_superblock(journal_t *journal)
1240 journal->j_format_version = 2; 1240 journal->j_format_version = 2;
1241 break; 1241 break;
1242 default: 1242 default:
1243 printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); 1243 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
1244 goto out; 1244 goto out;
1245 } 1245 }
1246 1246
1247 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) 1247 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
1248 journal->j_maxlen = be32_to_cpu(sb->s_maxlen); 1248 journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
1249 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { 1249 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
1250 printk (KERN_WARNING "JBD: journal file too short\n"); 1250 printk(KERN_WARNING "JBD2: journal file too short\n");
1251 goto out;
1252 }
1253
1254 if (be32_to_cpu(sb->s_first) == 0 ||
1255 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1256 printk(KERN_WARNING
1257 "JBD2: Invalid start block of journal: %u\n",
1258 be32_to_cpu(sb->s_first));
1251 goto out; 1259 goto out;
1252 } 1260 }
1253 1261
@@ -1310,8 +1318,8 @@ int jbd2_journal_load(journal_t *journal)
1310 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 1318 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
1311 (sb->s_feature_incompat & 1319 (sb->s_feature_incompat &
1312 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 1320 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
1313 printk (KERN_WARNING 1321 printk(KERN_WARNING
1314 "JBD: Unrecognised features on journal\n"); 1322 "JBD2: Unrecognised features on journal\n");
1315 return -EINVAL; 1323 return -EINVAL;
1316 } 1324 }
1317 } 1325 }
@@ -1346,7 +1354,7 @@ int jbd2_journal_load(journal_t *journal)
1346 return 0; 1354 return 0;
1347 1355
1348recovery_error: 1356recovery_error:
1349 printk (KERN_WARNING "JBD: recovery failed\n"); 1357 printk(KERN_WARNING "JBD2: recovery failed\n");
1350 return -EIO; 1358 return -EIO;
1351} 1359}
1352 1360
@@ -1577,7 +1585,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
1577 struct buffer_head *bh; 1585 struct buffer_head *bh;
1578 1586
1579 printk(KERN_WARNING 1587 printk(KERN_WARNING
1580 "JBD: Converting superblock from version 1 to 2.\n"); 1588 "JBD2: Converting superblock from version 1 to 2.\n");
1581 1589
1582 /* Pre-initialise new fields to zero */ 1590 /* Pre-initialise new fields to zero */
1583 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); 1591 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
@@ -1694,7 +1702,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1694 if (!journal->j_tail) 1702 if (!journal->j_tail)
1695 goto no_recovery; 1703 goto no_recovery;
1696 1704
1697 printk (KERN_WARNING "JBD: %s recovery information on journal\n", 1705 printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
1698 write ? "Clearing" : "Ignoring"); 1706 write ? "Clearing" : "Ignoring");
1699 1707
1700 err = jbd2_journal_skip_recovery(journal); 1708 err = jbd2_journal_skip_recovery(journal);
@@ -2020,7 +2028,7 @@ static int journal_init_jbd2_journal_head_cache(void)
2020 retval = 0; 2028 retval = 0;
2021 if (!jbd2_journal_head_cache) { 2029 if (!jbd2_journal_head_cache) {
2022 retval = -ENOMEM; 2030 retval = -ENOMEM;
2023 printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); 2031 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
2024 } 2032 }
2025 return retval; 2033 return retval;
2026} 2034}
@@ -2383,7 +2391,7 @@ static void __exit journal_exit(void)
2383#ifdef CONFIG_JBD2_DEBUG 2391#ifdef CONFIG_JBD2_DEBUG
2384 int n = atomic_read(&nr_journal_heads); 2392 int n = atomic_read(&nr_journal_heads);
2385 if (n) 2393 if (n)
2386 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); 2394 printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
2387#endif 2395#endif
2388 jbd2_remove_debugfs_entry(); 2396 jbd2_remove_debugfs_entry();
2389 jbd2_remove_jbd_stats_proc_entry(); 2397 jbd2_remove_jbd_stats_proc_entry();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 1cad869494f0..da6d7baf1390 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -89,7 +89,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
89 err = jbd2_journal_bmap(journal, next, &blocknr); 89 err = jbd2_journal_bmap(journal, next, &blocknr);
90 90
91 if (err) { 91 if (err) {
92 printk (KERN_ERR "JBD: bad block at offset %u\n", 92 printk(KERN_ERR "JBD2: bad block at offset %u\n",
93 next); 93 next);
94 goto failed; 94 goto failed;
95 } 95 }
@@ -138,14 +138,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
138 *bhp = NULL; 138 *bhp = NULL;
139 139
140 if (offset >= journal->j_maxlen) { 140 if (offset >= journal->j_maxlen) {
141 printk(KERN_ERR "JBD: corrupted journal superblock\n"); 141 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
142 return -EIO; 142 return -EIO;
143 } 143 }
144 144
145 err = jbd2_journal_bmap(journal, offset, &blocknr); 145 err = jbd2_journal_bmap(journal, offset, &blocknr);
146 146
147 if (err) { 147 if (err) {
148 printk (KERN_ERR "JBD: bad block at offset %u\n", 148 printk(KERN_ERR "JBD2: bad block at offset %u\n",
149 offset); 149 offset);
150 return err; 150 return err;
151 } 151 }
@@ -163,7 +163,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
163 } 163 }
164 164
165 if (!buffer_uptodate(bh)) { 165 if (!buffer_uptodate(bh)) {
166 printk (KERN_ERR "JBD: Failed to read block at offset %u\n", 166 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
167 offset); 167 offset);
168 brelse(bh); 168 brelse(bh);
169 return -EIO; 169 return -EIO;
@@ -251,10 +251,10 @@ int jbd2_journal_recover(journal_t *journal)
251 if (!err) 251 if (!err)
252 err = do_one_pass(journal, &info, PASS_REPLAY); 252 err = do_one_pass(journal, &info, PASS_REPLAY);
253 253
254 jbd_debug(1, "JBD: recovery, exit status %d, " 254 jbd_debug(1, "JBD2: recovery, exit status %d, "
255 "recovered transactions %u to %u\n", 255 "recovered transactions %u to %u\n",
256 err, info.start_transaction, info.end_transaction); 256 err, info.start_transaction, info.end_transaction);
257 jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", 257 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
258 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 258 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
259 259
260 /* Restart the log at the next transaction ID, thus invalidating 260 /* Restart the log at the next transaction ID, thus invalidating
@@ -293,14 +293,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
293 err = do_one_pass(journal, &info, PASS_SCAN); 293 err = do_one_pass(journal, &info, PASS_SCAN);
294 294
295 if (err) { 295 if (err) {
296 printk(KERN_ERR "JBD: error %d scanning journal\n", err); 296 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
297 ++journal->j_transaction_sequence; 297 ++journal->j_transaction_sequence;
298 } else { 298 } else {
299#ifdef CONFIG_JBD2_DEBUG 299#ifdef CONFIG_JBD2_DEBUG
300 int dropped = info.end_transaction - 300 int dropped = info.end_transaction -
301 be32_to_cpu(journal->j_superblock->s_sequence); 301 be32_to_cpu(journal->j_superblock->s_sequence);
302 jbd_debug(1, 302 jbd_debug(1,
303 "JBD: ignoring %d transaction%s from the journal.\n", 303 "JBD2: ignoring %d transaction%s from the journal.\n",
304 dropped, (dropped == 1) ? "" : "s"); 304 dropped, (dropped == 1) ? "" : "s");
305#endif 305#endif
306 journal->j_transaction_sequence = ++info.end_transaction; 306 journal->j_transaction_sequence = ++info.end_transaction;
@@ -338,7 +338,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
338 wrap(journal, *next_log_block); 338 wrap(journal, *next_log_block);
339 err = jread(&obh, journal, io_block); 339 err = jread(&obh, journal, io_block);
340 if (err) { 340 if (err) {
341 printk(KERN_ERR "JBD: IO error %d recovering block " 341 printk(KERN_ERR "JBD2: IO error %d recovering block "
342 "%lu in log\n", err, io_block); 342 "%lu in log\n", err, io_block);
343 return 1; 343 return 1;
344 } else { 344 } else {
@@ -411,7 +411,7 @@ static int do_one_pass(journal_t *journal,
411 * either the next descriptor block or the final commit 411 * either the next descriptor block or the final commit
412 * record. */ 412 * record. */
413 413
414 jbd_debug(3, "JBD: checking block %ld\n", next_log_block); 414 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
415 err = jread(&bh, journal, next_log_block); 415 err = jread(&bh, journal, next_log_block);
416 if (err) 416 if (err)
417 goto failed; 417 goto failed;
@@ -491,8 +491,8 @@ static int do_one_pass(journal_t *journal,
491 /* Recover what we can, but 491 /* Recover what we can, but
492 * report failure at the end. */ 492 * report failure at the end. */
493 success = err; 493 success = err;
494 printk (KERN_ERR 494 printk(KERN_ERR
495 "JBD: IO error %d recovering " 495 "JBD2: IO error %d recovering "
496 "block %ld in log\n", 496 "block %ld in log\n",
497 err, io_block); 497 err, io_block);
498 } else { 498 } else {
@@ -520,7 +520,7 @@ static int do_one_pass(journal_t *journal,
520 journal->j_blocksize); 520 journal->j_blocksize);
521 if (nbh == NULL) { 521 if (nbh == NULL) {
522 printk(KERN_ERR 522 printk(KERN_ERR
523 "JBD: Out of memory " 523 "JBD2: Out of memory "
524 "during recovery.\n"); 524 "during recovery.\n");
525 err = -ENOMEM; 525 err = -ENOMEM;
526 brelse(bh); 526 brelse(bh);
@@ -689,7 +689,7 @@ static int do_one_pass(journal_t *journal,
689 /* It's really bad news if different passes end up at 689 /* It's really bad news if different passes end up at
690 * different places (but possible due to IO errors). */ 690 * different places (but possible due to IO errors). */
691 if (info->end_transaction != next_commit_ID) { 691 if (info->end_transaction != next_commit_ID) {
692 printk (KERN_ERR "JBD: recovery pass %d ended at " 692 printk(KERN_ERR "JBD2: recovery pass %d ended at "
693 "transaction %u, expected %u\n", 693 "transaction %u, expected %u\n",
694 pass, next_commit_ID, info->end_transaction); 694 pass, next_commit_ID, info->end_transaction);
695 if (!success) 695 if (!success)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d7109414cdd..a0e41a4c080e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -27,6 +27,7 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
30#include <linux/bug.h>
30#include <linux/module.h> 31#include <linux/module.h>
31 32
32static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
@@ -115,7 +116,7 @@ static inline void update_t_max_wait(transaction_t *transaction,
115 */ 116 */
116 117
117static int start_this_handle(journal_t *journal, handle_t *handle, 118static int start_this_handle(journal_t *journal, handle_t *handle,
118 int gfp_mask) 119 gfp_t gfp_mask)
119{ 120{
120 transaction_t *transaction, *new_transaction = NULL; 121 transaction_t *transaction, *new_transaction = NULL;
121 tid_t tid; 122 tid_t tid;
@@ -124,7 +125,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
124 unsigned long ts = jiffies; 125 unsigned long ts = jiffies;
125 126
126 if (nblocks > journal->j_max_transaction_buffers) { 127 if (nblocks > journal->j_max_transaction_buffers) {
127 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 128 printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
128 current->comm, nblocks, 129 current->comm, nblocks,
129 journal->j_max_transaction_buffers); 130 journal->j_max_transaction_buffers);
130 return -ENOSPC; 131 return -ENOSPC;
@@ -320,7 +321,7 @@ static handle_t *new_handle(int nblocks)
320 * Return a pointer to a newly allocated handle, or an ERR_PTR() value 321 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
321 * on failure. 322 * on failure.
322 */ 323 */
323handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) 324handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
324{ 325{
325 handle_t *handle = journal_current_handle(); 326 handle_t *handle = journal_current_handle();
326 int err; 327 int err;
@@ -443,7 +444,7 @@ out:
443 * transaction capabable of guaranteeing the requested number of 444 * transaction capabable of guaranteeing the requested number of
444 * credits. 445 * credits.
445 */ 446 */
446int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) 447int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
447{ 448{
448 transaction_t *transaction = handle->h_transaction; 449 transaction_t *transaction = handle->h_transaction;
449 journal_t *journal = transaction->t_journal; 450 journal_t *journal = transaction->t_journal;
@@ -563,7 +564,7 @@ static void warn_dirty_buffer(struct buffer_head *bh)
563 char b[BDEVNAME_SIZE]; 564 char b[BDEVNAME_SIZE];
564 565
565 printk(KERN_WARNING 566 printk(KERN_WARNING
566 "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " 567 "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
567 "There's a risk of filesystem corruption in case of system " 568 "There's a risk of filesystem corruption in case of system "
568 "crash.\n", 569 "crash.\n",
569 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); 570 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
@@ -1049,6 +1050,10 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
1049 * mark dirty metadata which needs to be journaled as part of the current 1050 * mark dirty metadata which needs to be journaled as part of the current
1050 * transaction. 1051 * transaction.
1051 * 1052 *
1053 * The buffer must have previously had jbd2_journal_get_write_access()
1054 * called so that it has a valid journal_head attached to the buffer
1055 * head.
1056 *
1052 * The buffer is placed on the transaction's metadata list and is marked 1057 * The buffer is placed on the transaction's metadata list and is marked
1053 * as belonging to the transaction. 1058 * as belonging to the transaction.
1054 * 1059 *
@@ -1065,11 +1070,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1065 transaction_t *transaction = handle->h_transaction; 1070 transaction_t *transaction = handle->h_transaction;
1066 journal_t *journal = transaction->t_journal; 1071 journal_t *journal = transaction->t_journal;
1067 struct journal_head *jh = bh2jh(bh); 1072 struct journal_head *jh = bh2jh(bh);
1073 int ret = 0;
1068 1074
1069 jbd_debug(5, "journal_head %p\n", jh); 1075 jbd_debug(5, "journal_head %p\n", jh);
1070 JBUFFER_TRACE(jh, "entry"); 1076 JBUFFER_TRACE(jh, "entry");
1071 if (is_handle_aborted(handle)) 1077 if (is_handle_aborted(handle))
1072 goto out; 1078 goto out;
1079 if (!buffer_jbd(bh)) {
1080 ret = -EUCLEAN;
1081 goto out;
1082 }
1073 1083
1074 jbd_lock_bh_state(bh); 1084 jbd_lock_bh_state(bh);
1075 1085
@@ -1093,8 +1103,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1093 */ 1103 */
1094 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { 1104 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
1095 JBUFFER_TRACE(jh, "fastpath"); 1105 JBUFFER_TRACE(jh, "fastpath");
1096 J_ASSERT_JH(jh, jh->b_transaction == 1106 if (unlikely(jh->b_transaction !=
1097 journal->j_running_transaction); 1107 journal->j_running_transaction)) {
1108 printk(KERN_EMERG "JBD: %s: "
1109 "jh->b_transaction (%llu, %p, %u) != "
1110 "journal->j_running_transaction (%p, %u)",
1111 journal->j_devname,
1112 (unsigned long long) bh->b_blocknr,
1113 jh->b_transaction,
1114 jh->b_transaction ? jh->b_transaction->t_tid : 0,
1115 journal->j_running_transaction,
1116 journal->j_running_transaction ?
1117 journal->j_running_transaction->t_tid : 0);
1118 ret = -EINVAL;
1119 }
1098 goto out_unlock_bh; 1120 goto out_unlock_bh;
1099 } 1121 }
1100 1122
@@ -1108,9 +1130,32 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1108 */ 1130 */
1109 if (jh->b_transaction != transaction) { 1131 if (jh->b_transaction != transaction) {
1110 JBUFFER_TRACE(jh, "already on other transaction"); 1132 JBUFFER_TRACE(jh, "already on other transaction");
1111 J_ASSERT_JH(jh, jh->b_transaction == 1133 if (unlikely(jh->b_transaction !=
1112 journal->j_committing_transaction); 1134 journal->j_committing_transaction)) {
1113 J_ASSERT_JH(jh, jh->b_next_transaction == transaction); 1135 printk(KERN_EMERG "JBD: %s: "
1136 "jh->b_transaction (%llu, %p, %u) != "
1137 "journal->j_committing_transaction (%p, %u)",
1138 journal->j_devname,
1139 (unsigned long long) bh->b_blocknr,
1140 jh->b_transaction,
1141 jh->b_transaction ? jh->b_transaction->t_tid : 0,
1142 journal->j_committing_transaction,
1143 journal->j_committing_transaction ?
1144 journal->j_committing_transaction->t_tid : 0);
1145 ret = -EINVAL;
1146 }
1147 if (unlikely(jh->b_next_transaction != transaction)) {
1148 printk(KERN_EMERG "JBD: %s: "
1149 "jh->b_next_transaction (%llu, %p, %u) != "
1150 "transaction (%p, %u)",
1151 journal->j_devname,
1152 (unsigned long long) bh->b_blocknr,
1153 jh->b_next_transaction,
1154 jh->b_next_transaction ?
1155 jh->b_next_transaction->t_tid : 0,
1156 transaction, transaction->t_tid);
1157 ret = -EINVAL;
1158 }
1114 /* And this case is illegal: we can't reuse another 1159 /* And this case is illegal: we can't reuse another
1115 * transaction's data buffer, ever. */ 1160 * transaction's data buffer, ever. */
1116 goto out_unlock_bh; 1161 goto out_unlock_bh;
@@ -1127,7 +1172,8 @@ out_unlock_bh:
1127 jbd_unlock_bh_state(bh); 1172 jbd_unlock_bh_state(bh);
1128out: 1173out:
1129 JBUFFER_TRACE(jh, "exit"); 1174 JBUFFER_TRACE(jh, "exit");
1130 return 0; 1175 WARN_ON(ret); /* All errors are bugs, so dump the stack */
1176 return ret;
1131} 1177}
1132 1178
1133/* 1179/*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 9659b7c00468..be6169bd8acd 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -245,7 +245,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
245 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, 245 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
246 dentry->d_name.len, dead_f, now); 246 dentry->d_name.len, dead_f, now);
247 if (dead_f->inocache) 247 if (dead_f->inocache)
248 dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink; 248 set_nlink(dentry->d_inode, dead_f->inocache->pino_nlink);
249 if (!ret) 249 if (!ret)
250 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 250 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
251 return ret; 251 return ret;
@@ -278,7 +278,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
278 278
279 if (!ret) { 279 if (!ret) {
280 mutex_lock(&f->sem); 280 mutex_lock(&f->sem);
281 old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink; 281 set_nlink(old_dentry->d_inode, ++f->inocache->pino_nlink);
282 mutex_unlock(&f->sem); 282 mutex_unlock(&f->sem);
283 d_instantiate(dentry, old_dentry->d_inode); 283 d_instantiate(dentry, old_dentry->d_inode);
284 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 284 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -497,7 +497,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
497 f = JFFS2_INODE_INFO(inode); 497 f = JFFS2_INODE_INFO(inode);
498 498
499 /* Directories get nlink 2 at start */ 499 /* Directories get nlink 2 at start */
500 inode->i_nlink = 2; 500 set_nlink(inode, 2);
501 /* but ic->pino_nlink is the parent ino# */ 501 /* but ic->pino_nlink is the parent ino# */
502 f->inocache->pino_nlink = dir_i->i_ino; 502 f->inocache->pino_nlink = dir_i->i_ino;
503 503
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index bbcb9755dd2b..7286e44ac665 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -278,7 +278,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
278 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); 278 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
279 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); 279 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
280 280
281 inode->i_nlink = f->inocache->pino_nlink; 281 set_nlink(inode, f->inocache->pino_nlink);
282 282
283 inode->i_blocks = (inode->i_size + 511) >> 9; 283 inode->i_blocks = (inode->i_size + 511) >> 9;
284 284
@@ -291,7 +291,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
291 case S_IFDIR: 291 case S_IFDIR:
292 { 292 {
293 struct jffs2_full_dirent *fd; 293 struct jffs2_full_dirent *fd;
294 inode->i_nlink = 2; /* parent and '.' */ 294 set_nlink(inode, 2); /* parent and '.' */
295 295
296 for (fd=f->dents; fd; fd = fd->next) { 296 for (fd=f->dents; fd; fd = fd->next) {
297 if (fd->type == DT_DIR && fd->ino) 297 if (fd->type == DT_DIR && fd->ino)
@@ -453,7 +453,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
453 iput(inode); 453 iput(inode);
454 return ERR_PTR(ret); 454 return ERR_PTR(ret);
455 } 455 }
456 inode->i_nlink = 1; 456 set_nlink(inode, 1);
457 inode->i_ino = je32_to_cpu(ri->ino); 457 inode->i_ino = je32_to_cpu(ri->ino);
458 inode->i_mode = jemode_to_cpu(ri->mode); 458 inode->i_mode = jemode_to_cpu(ri->mode);
459 inode->i_gid = je16_to_cpu(ri->gid); 459 inode->i_gid = je16_to_cpu(ri->gid);
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index b78b2f978f04..1b6f15f191b3 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -457,7 +457,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
457 /* read the page of fixed disk inode (AIT) in raw mode */ 457 /* read the page of fixed disk inode (AIT) in raw mode */
458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
459 if (mp == NULL) { 459 if (mp == NULL) {
460 ip->i_nlink = 1; /* Don't want iput() deleting it */ 460 set_nlink(ip, 1); /* Don't want iput() deleting it */
461 iput(ip); 461 iput(ip);
462 return (NULL); 462 return (NULL);
463 } 463 }
@@ -469,7 +469,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
469 /* copy on-disk inode to in-memory inode */ 469 /* copy on-disk inode to in-memory inode */
470 if ((copy_from_dinode(dp, ip)) != 0) { 470 if ((copy_from_dinode(dp, ip)) != 0) {
471 /* handle bad return by returning NULL for ip */ 471 /* handle bad return by returning NULL for ip */
472 ip->i_nlink = 1; /* Don't want iput() deleting it */ 472 set_nlink(ip, 1); /* Don't want iput() deleting it */
473 iput(ip); 473 iput(ip);
474 /* release the page */ 474 /* release the page */
475 release_metapage(mp); 475 release_metapage(mp);
@@ -3076,7 +3076,7 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3076 ip->i_mode |= 0001; 3076 ip->i_mode |= 0001;
3077 } 3077 }
3078 } 3078 }
3079 ip->i_nlink = le32_to_cpu(dip->di_nlink); 3079 set_nlink(ip, le32_to_cpu(dip->di_nlink));
3080 3080
3081 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); 3081 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
3082 if (sbi->uid == -1) 3082 if (sbi->uid == -1)
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 2686531e235a..c1a3e603279c 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -157,7 +157,7 @@ fail_drop:
157 dquot_drop(inode); 157 dquot_drop(inode);
158 inode->i_flags |= S_NOQUOTA; 158 inode->i_flags |= S_NOQUOTA;
159fail_unlock: 159fail_unlock:
160 inode->i_nlink = 0; 160 clear_nlink(inode);
161 unlock_new_inode(inode); 161 unlock_new_inode(inode);
162fail_put: 162fail_put:
163 iput(inode); 163 iput(inode);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index e17545e15664..a112ad96e474 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -172,7 +172,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
172 mutex_unlock(&JFS_IP(dip)->commit_mutex); 172 mutex_unlock(&JFS_IP(dip)->commit_mutex);
173 if (rc) { 173 if (rc) {
174 free_ea_wmap(ip); 174 free_ea_wmap(ip);
175 ip->i_nlink = 0; 175 clear_nlink(ip);
176 unlock_new_inode(ip); 176 unlock_new_inode(ip);
177 iput(ip); 177 iput(ip);
178 } else { 178 } else {
@@ -292,7 +292,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
292 goto out3; 292 goto out3;
293 } 293 }
294 294
295 ip->i_nlink = 2; /* for '.' */ 295 set_nlink(ip, 2); /* for '.' */
296 ip->i_op = &jfs_dir_inode_operations; 296 ip->i_op = &jfs_dir_inode_operations;
297 ip->i_fop = &jfs_dir_operations; 297 ip->i_fop = &jfs_dir_operations;
298 298
@@ -311,7 +311,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
311 mutex_unlock(&JFS_IP(dip)->commit_mutex); 311 mutex_unlock(&JFS_IP(dip)->commit_mutex);
312 if (rc) { 312 if (rc) {
313 free_ea_wmap(ip); 313 free_ea_wmap(ip);
314 ip->i_nlink = 0; 314 clear_nlink(ip);
315 unlock_new_inode(ip); 315 unlock_new_inode(ip);
316 iput(ip); 316 iput(ip);
317 } else { 317 } else {
@@ -844,7 +844,7 @@ static int jfs_link(struct dentry *old_dentry,
844 rc = txCommit(tid, 2, &iplist[0], 0); 844 rc = txCommit(tid, 2, &iplist[0], 0);
845 845
846 if (rc) { 846 if (rc) {
847 ip->i_nlink--; /* never instantiated */ 847 drop_nlink(ip); /* never instantiated */
848 iput(ip); 848 iput(ip);
849 } else 849 } else
850 d_instantiate(dentry, ip); 850 d_instantiate(dentry, ip);
@@ -1048,7 +1048,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1048 mutex_unlock(&JFS_IP(dip)->commit_mutex); 1048 mutex_unlock(&JFS_IP(dip)->commit_mutex);
1049 if (rc) { 1049 if (rc) {
1050 free_ea_wmap(ip); 1050 free_ea_wmap(ip);
1051 ip->i_nlink = 0; 1051 clear_nlink(ip);
1052 unlock_new_inode(ip); 1052 unlock_new_inode(ip);
1053 iput(ip); 1053 iput(ip);
1054 } else { 1054 } else {
@@ -1433,7 +1433,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1433 mutex_unlock(&JFS_IP(dir)->commit_mutex); 1433 mutex_unlock(&JFS_IP(dir)->commit_mutex);
1434 if (rc) { 1434 if (rc) {
1435 free_ea_wmap(ip); 1435 free_ea_wmap(ip);
1436 ip->i_nlink = 0; 1436 clear_nlink(ip);
1437 unlock_new_inode(ip); 1437 unlock_new_inode(ip);
1438 iput(ip); 1438 iput(ip);
1439 } else { 1439 } else {
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 06c8a67cbe76..a44eff076c17 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -485,7 +485,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
485 goto out_unload; 485 goto out_unload;
486 } 486 }
487 inode->i_ino = 0; 487 inode->i_ino = 0;
488 inode->i_nlink = 1;
489 inode->i_size = sb->s_bdev->bd_inode->i_size; 488 inode->i_size = sb->s_bdev->bd_inode->i_size;
490 inode->i_mapping->a_ops = &jfs_metapage_aops; 489 inode->i_mapping->a_ops = &jfs_metapage_aops;
491 insert_inode_hash(inode); 490 insert_inode_hash(inode);
diff --git a/fs/libfs.c b/fs/libfs.c
index c18e9a1235b6..f6d411eef1e7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -490,7 +490,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
490 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 490 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
491 inode->i_op = &simple_dir_inode_operations; 491 inode->i_op = &simple_dir_inode_operations;
492 inode->i_fop = &simple_dir_operations; 492 inode->i_fop = &simple_dir_operations;
493 inode->i_nlink = 2; 493 set_nlink(inode, 2);
494 root = d_alloc_root(inode); 494 root = d_alloc_root(inode);
495 if (!root) { 495 if (!root) {
496 iput(inode); 496 iput(inode);
@@ -510,8 +510,10 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
510 if (!dentry) 510 if (!dentry)
511 goto out; 511 goto out;
512 inode = new_inode(s); 512 inode = new_inode(s);
513 if (!inode) 513 if (!inode) {
514 dput(dentry);
514 goto out; 515 goto out;
516 }
515 inode->i_mode = S_IFREG | files->mode; 517 inode->i_mode = S_IFREG | files->mode;
516 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 518 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
517 inode->i_fop = files->ops; 519 inode->i_fop = files->ops;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index b3ff3d894165..b7d7f67cee5a 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -197,7 +197,7 @@ static int logfs_remove_inode(struct inode *inode)
197{ 197{
198 int ret; 198 int ret;
199 199
200 inode->i_nlink--; 200 drop_nlink(inode);
201 ret = write_inode(inode); 201 ret = write_inode(inode);
202 LOGFS_BUG_ON(ret, inode->i_sb); 202 LOGFS_BUG_ON(ret, inode->i_sb);
203 return ret; 203 return ret;
@@ -433,7 +433,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
433 433
434 ta = kzalloc(sizeof(*ta), GFP_KERNEL); 434 ta = kzalloc(sizeof(*ta), GFP_KERNEL);
435 if (!ta) { 435 if (!ta) {
436 inode->i_nlink--; 436 drop_nlink(inode);
437 iput(inode); 437 iput(inode);
438 return -ENOMEM; 438 return -ENOMEM;
439 } 439 }
@@ -456,7 +456,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
456 abort_transaction(inode, ta); 456 abort_transaction(inode, ta);
457 li->li_flags |= LOGFS_IF_STILLBORN; 457 li->li_flags |= LOGFS_IF_STILLBORN;
458 /* FIXME: truncate symlink */ 458 /* FIXME: truncate symlink */
459 inode->i_nlink--; 459 drop_nlink(inode);
460 iput(inode); 460 iput(inode);
461 goto out; 461 goto out;
462 } 462 }
@@ -563,7 +563,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
563 563
564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
565 ihold(inode); 565 ihold(inode);
566 inode->i_nlink++; 566 inc_nlink(inode);
567 mark_inode_dirty_sync(inode); 567 mark_inode_dirty_sync(inode);
568 568
569 return __logfs_create(dir, dentry, inode, NULL, 0); 569 return __logfs_create(dir, dentry, inode, NULL, 0);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index edfea7a3a747..7e441ad5f792 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -93,7 +93,7 @@ static struct inode *__logfs_iget(struct super_block *sb, ino_t ino)
93 /* inode->i_nlink == 0 can be true when called from 93 /* inode->i_nlink == 0 can be true when called from
94 * block validator */ 94 * block validator */
95 /* set i_nlink to 0 to prevent caching */ 95 /* set i_nlink to 0 to prevent caching */
96 inode->i_nlink = 0; 96 clear_nlink(inode);
97 logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE; 97 logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE;
98 iget_failed(inode); 98 iget_failed(inode);
99 if (!err) 99 if (!err)
@@ -199,7 +199,6 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
199 inode->i_blocks = 0; 199 inode->i_blocks = 0;
200 inode->i_ctime = CURRENT_TIME; 200 inode->i_ctime = CURRENT_TIME;
201 inode->i_mtime = CURRENT_TIME; 201 inode->i_mtime = CURRENT_TIME;
202 inode->i_nlink = 1;
203 li->li_refcount = 1; 202 li->li_refcount = 1;
204 INIT_LIST_HEAD(&li->li_freeing_list); 203 INIT_LIST_HEAD(&li->li_freeing_list);
205 204
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index d8d09380c7de..2ac4217b7901 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -126,7 +126,7 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
126 inode->i_atime = be64_to_timespec(di->di_atime); 126 inode->i_atime = be64_to_timespec(di->di_atime);
127 inode->i_ctime = be64_to_timespec(di->di_ctime); 127 inode->i_ctime = be64_to_timespec(di->di_ctime);
128 inode->i_mtime = be64_to_timespec(di->di_mtime); 128 inode->i_mtime = be64_to_timespec(di->di_mtime);
129 inode->i_nlink = be32_to_cpu(di->di_refcount); 129 set_nlink(inode, be32_to_cpu(di->di_refcount));
130 inode->i_generation = be32_to_cpu(di->di_generation); 130 inode->i_generation = be32_to_cpu(di->di_generation);
131 131
132 switch (inode->i_mode & S_IFMT) { 132 switch (inode->i_mode & S_IFMT) {
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e7d23e25bf1d..64cdcd662ffc 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -446,7 +446,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
446 inode->i_mode = raw_inode->i_mode; 446 inode->i_mode = raw_inode->i_mode;
447 inode->i_uid = (uid_t)raw_inode->i_uid; 447 inode->i_uid = (uid_t)raw_inode->i_uid;
448 inode->i_gid = (gid_t)raw_inode->i_gid; 448 inode->i_gid = (gid_t)raw_inode->i_gid;
449 inode->i_nlink = raw_inode->i_nlinks; 449 set_nlink(inode, raw_inode->i_nlinks);
450 inode->i_size = raw_inode->i_size; 450 inode->i_size = raw_inode->i_size;
451 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time; 451 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
452 inode->i_mtime.tv_nsec = 0; 452 inode->i_mtime.tv_nsec = 0;
@@ -479,7 +479,7 @@ static struct inode *V2_minix_iget(struct inode *inode)
479 inode->i_mode = raw_inode->i_mode; 479 inode->i_mode = raw_inode->i_mode;
480 inode->i_uid = (uid_t)raw_inode->i_uid; 480 inode->i_uid = (uid_t)raw_inode->i_uid;
481 inode->i_gid = (gid_t)raw_inode->i_gid; 481 inode->i_gid = (gid_t)raw_inode->i_gid;
482 inode->i_nlink = raw_inode->i_nlinks; 482 set_nlink(inode, raw_inode->i_nlinks);
483 inode->i_size = raw_inode->i_size; 483 inode->i_size = raw_inode->i_size;
484 inode->i_mtime.tv_sec = raw_inode->i_mtime; 484 inode->i_mtime.tv_sec = raw_inode->i_mtime;
485 inode->i_atime.tv_sec = raw_inode->i_atime; 485 inode->i_atime.tv_sec = raw_inode->i_atime;
diff --git a/fs/namei.c b/fs/namei.c
index 7657be4352bf..ac6d214da827 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -137,7 +137,7 @@ static int do_getname(const char __user *filename, char *page)
137 return retval; 137 return retval;
138} 138}
139 139
140static char *getname_flags(const char __user * filename, int flags) 140static char *getname_flags(const char __user *filename, int flags, int *empty)
141{ 141{
142 char *tmp, *result; 142 char *tmp, *result;
143 143
@@ -148,6 +148,8 @@ static char *getname_flags(const char __user * filename, int flags)
148 148
149 result = tmp; 149 result = tmp;
150 if (retval < 0) { 150 if (retval < 0) {
151 if (retval == -ENOENT && empty)
152 *empty = 1;
151 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { 153 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
152 __putname(tmp); 154 __putname(tmp);
153 result = ERR_PTR(retval); 155 result = ERR_PTR(retval);
@@ -160,7 +162,7 @@ static char *getname_flags(const char __user * filename, int flags)
160 162
161char *getname(const char __user * filename) 163char *getname(const char __user * filename)
162{ 164{
163 return getname_flags(filename, 0); 165 return getname_flags(filename, 0, 0);
164} 166}
165 167
166#ifdef CONFIG_AUDITSYSCALL 168#ifdef CONFIG_AUDITSYSCALL
@@ -1798,11 +1800,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1798 return __lookup_hash(&this, base, NULL); 1800 return __lookup_hash(&this, base, NULL);
1799} 1801}
1800 1802
1801int user_path_at(int dfd, const char __user *name, unsigned flags, 1803int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
1802 struct path *path) 1804 struct path *path, int *empty)
1803{ 1805{
1804 struct nameidata nd; 1806 struct nameidata nd;
1805 char *tmp = getname_flags(name, flags); 1807 char *tmp = getname_flags(name, flags, empty);
1806 int err = PTR_ERR(tmp); 1808 int err = PTR_ERR(tmp);
1807 if (!IS_ERR(tmp)) { 1809 if (!IS_ERR(tmp)) {
1808 1810
@@ -1816,6 +1818,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1816 return err; 1818 return err;
1817} 1819}
1818 1820
1821int user_path_at(int dfd, const char __user *name, unsigned flags,
1822 struct path *path)
1823{
1824 return user_path_at_empty(dfd, name, flags, path, 0);
1825}
1826
1819static int user_path_parent(int dfd, const char __user *path, 1827static int user_path_parent(int dfd, const char __user *path,
1820 struct nameidata *nd, char **name) 1828 struct nameidata *nd, char **name)
1821{ 1829{
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 202f370526a7..5b5fa33b6b9d 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -228,7 +228,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
228 228
229 DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode); 229 DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode);
230 230
231 inode->i_nlink = 1; 231 set_nlink(inode, 1);
232 inode->i_uid = server->m.uid; 232 inode->i_uid = server->m.uid;
233 inode->i_gid = server->m.gid; 233 inode->i_gid = server->m.gid;
234 234
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4dc6d078f108..c07a55aec838 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -320,7 +320,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
320 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); 320 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
321 inode->i_version = 0; 321 inode->i_version = 0;
322 inode->i_size = 0; 322 inode->i_size = 0;
323 inode->i_nlink = 0; 323 clear_nlink(inode);
324 inode->i_uid = -2; 324 inode->i_uid = -2;
325 inode->i_gid = -2; 325 inode->i_gid = -2;
326 inode->i_blocks = 0; 326 inode->i_blocks = 0;
@@ -355,7 +355,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
355 | NFS_INO_INVALID_DATA 355 | NFS_INO_INVALID_DATA
356 | NFS_INO_REVAL_PAGECACHE; 356 | NFS_INO_REVAL_PAGECACHE;
357 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 357 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
358 inode->i_nlink = fattr->nlink; 358 set_nlink(inode, fattr->nlink);
359 else if (nfs_server_capable(inode, NFS_CAP_NLINK)) 359 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
361 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 361 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
@@ -1361,7 +1361,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1361 invalid |= NFS_INO_INVALID_ATTR; 1361 invalid |= NFS_INO_INVALID_ATTR;
1362 if (S_ISDIR(inode->i_mode)) 1362 if (S_ISDIR(inode->i_mode))
1363 invalid |= NFS_INO_INVALID_DATA; 1363 invalid |= NFS_INO_INVALID_DATA;
1364 inode->i_nlink = fattr->nlink; 1364 set_nlink(inode, fattr->nlink);
1365 } 1365 }
1366 } else if (server->caps & NFS_CAP_NLINK) 1366 } else if (server->caps & NFS_CAP_NLINK)
1367 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1367 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 666628b395f1..b50ffb72e5b3 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -354,7 +354,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
354 354
355 failed_acl: 355 failed_acl:
356 failed_bmap: 356 failed_bmap:
357 inode->i_nlink = 0; 357 clear_nlink(inode);
358 iput(inode); /* raw_inode will be deleted through 358 iput(inode); /* raw_inode will be deleted through
359 generic_delete_inode() */ 359 generic_delete_inode() */
360 goto failed; 360 goto failed;
@@ -396,7 +396,7 @@ int nilfs_read_inode_common(struct inode *inode,
396 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 396 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
397 inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); 397 inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid);
398 inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); 398 inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid);
399 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 399 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
400 inode->i_size = le64_to_cpu(raw_inode->i_size); 400 inode->i_size = le64_to_cpu(raw_inode->i_size);
401 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 401 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
402 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 402 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index a3141990061e..768982de10e4 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -289,7 +289,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
289 nilfs_warning(inode->i_sb, __func__, 289 nilfs_warning(inode->i_sb, __func__,
290 "deleting nonexistent file (%lu), %d\n", 290 "deleting nonexistent file (%lu), %d\n",
291 inode->i_ino, inode->i_nlink); 291 inode->i_ino, inode->i_nlink);
292 inode->i_nlink = 1; 292 set_nlink(inode, 1);
293 } 293 }
294 err = nilfs_delete_entry(de, page); 294 err = nilfs_delete_entry(de, page);
295 if (err) 295 if (err)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 1371487da955..97e2dacbc867 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -612,7 +612,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
612 * might be tricky due to vfs interactions. Need to think about this 612 * might be tricky due to vfs interactions. Need to think about this
613 * some more when implementing the unlink command. 613 * some more when implementing the unlink command.
614 */ 614 */
615 vi->i_nlink = le16_to_cpu(m->link_count); 615 set_nlink(vi, le16_to_cpu(m->link_count));
616 /* 616 /*
617 * FIXME: Reparse points can have the directory bit set even though 617 * FIXME: Reparse points can have the directory bit set even though
618 * they would be S_IFLNK. Need to deal with this further below when we 618 * they would be S_IFLNK. Need to deal with this further below when we
@@ -634,7 +634,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
634 vi->i_mode &= ~vol->dmask; 634 vi->i_mode &= ~vol->dmask;
635 /* Things break without this kludge! */ 635 /* Things break without this kludge! */
636 if (vi->i_nlink > 1) 636 if (vi->i_nlink > 1)
637 vi->i_nlink = 1; 637 set_nlink(vi, 1);
638 } else { 638 } else {
639 vi->i_mode |= S_IFREG; 639 vi->i_mode |= S_IFREG;
640 /* Apply the file permissions mask set in the mount options. */ 640 /* Apply the file permissions mask set in the mount options. */
@@ -1242,7 +1242,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1242 vi->i_version = base_vi->i_version; 1242 vi->i_version = base_vi->i_version;
1243 vi->i_uid = base_vi->i_uid; 1243 vi->i_uid = base_vi->i_uid;
1244 vi->i_gid = base_vi->i_gid; 1244 vi->i_gid = base_vi->i_gid;
1245 vi->i_nlink = base_vi->i_nlink; 1245 set_nlink(vi, base_vi->i_nlink);
1246 vi->i_mtime = base_vi->i_mtime; 1246 vi->i_mtime = base_vi->i_mtime;
1247 vi->i_ctime = base_vi->i_ctime; 1247 vi->i_ctime = base_vi->i_ctime;
1248 vi->i_atime = base_vi->i_atime; 1248 vi->i_atime = base_vi->i_atime;
@@ -1508,7 +1508,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1508 vi->i_version = base_vi->i_version; 1508 vi->i_version = base_vi->i_version;
1509 vi->i_uid = base_vi->i_uid; 1509 vi->i_uid = base_vi->i_uid;
1510 vi->i_gid = base_vi->i_gid; 1510 vi->i_gid = base_vi->i_gid;
1511 vi->i_nlink = base_vi->i_nlink; 1511 set_nlink(vi, base_vi->i_nlink);
1512 vi->i_mtime = base_vi->i_mtime; 1512 vi->i_mtime = base_vi->i_mtime;
1513 vi->i_ctime = base_vi->i_ctime; 1513 vi->i_ctime = base_vi->i_ctime;
1514 vi->i_atime = base_vi->i_atime; 1514 vi->i_atime = base_vi->i_atime;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8582e3f4f120..e2878b5895fb 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2292,7 +2292,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2292 ocfs2_journal_dirty(handle, di_bh); 2292 ocfs2_journal_dirty(handle, di_bh);
2293 2293
2294 i_size_write(inode, size); 2294 i_size_write(inode, size);
2295 inode->i_nlink = 2; 2295 set_nlink(inode, 2);
2296 inode->i_blocks = ocfs2_inode_sector_count(inode); 2296 inode->i_blocks = ocfs2_inode_sector_count(inode);
2297 2297
2298 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); 2298 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
@@ -2354,7 +2354,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2354 ocfs2_journal_dirty(handle, new_bh); 2354 ocfs2_journal_dirty(handle, new_bh);
2355 2355
2356 i_size_write(inode, inode->i_sb->s_blocksize); 2356 i_size_write(inode, inode->i_sb->s_blocksize);
2357 inode->i_nlink = 2; 2357 set_nlink(inode, 2);
2358 inode->i_blocks = ocfs2_inode_sector_count(inode); 2358 inode->i_blocks = ocfs2_inode_sector_count(inode);
2359 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 2359 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
2360 if (status < 0) { 2360 if (status < 0) {
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7642d7ca73e5..e1ed5e502ff2 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2092,7 +2092,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2092 inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 2092 inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
2093 inode->i_gid = be32_to_cpu(lvb->lvb_igid); 2093 inode->i_gid = be32_to_cpu(lvb->lvb_igid);
2094 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2094 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
2095 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 2095 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2096 ocfs2_unpack_timespec(&inode->i_atime, 2096 ocfs2_unpack_timespec(&inode->i_atime,
2097 be64_to_cpu(lvb->lvb_iatime_packed)); 2097 be64_to_cpu(lvb->lvb_iatime_packed));
2098 ocfs2_unpack_timespec(&inode->i_mtime, 2098 ocfs2_unpack_timespec(&inode->i_mtime,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index b4c8bb6b8d28..a22d2c098890 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -291,7 +291,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
291 (unsigned long long)OCFS2_I(inode)->ip_blkno, 291 (unsigned long long)OCFS2_I(inode)->ip_blkno,
292 (unsigned long long)le64_to_cpu(fe->i_blkno)); 292 (unsigned long long)le64_to_cpu(fe->i_blkno));
293 293
294 inode->i_nlink = ocfs2_read_links_count(fe); 294 set_nlink(inode, ocfs2_read_links_count(fe));
295 295
296 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno, 296 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno,
297 le32_to_cpu(fe->i_flags)); 297 le32_to_cpu(fe->i_flags));
@@ -1290,7 +1290,7 @@ void ocfs2_refresh_inode(struct inode *inode,
1290 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); 1290 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
1291 ocfs2_set_inode_flags(inode); 1291 ocfs2_set_inode_flags(inode);
1292 i_size_write(inode, le64_to_cpu(fe->i_size)); 1292 i_size_write(inode, le64_to_cpu(fe->i_size));
1293 inode->i_nlink = ocfs2_read_links_count(fe); 1293 set_nlink(inode, ocfs2_read_links_count(fe));
1294 inode->i_uid = le32_to_cpu(fe->i_uid); 1294 inode->i_uid = le32_to_cpu(fe->i_uid);
1295 inode->i_gid = le32_to_cpu(fe->i_gid); 1295 inode->i_gid = le32_to_cpu(fe->i_gid);
1296 inode->i_mode = le16_to_cpu(fe->i_mode); 1296 inode->i_mode = le16_to_cpu(fe->i_mode);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 53aa41ed7bf3..a8b2bfea574e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -199,9 +199,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
199 * these are used by the support functions here and in 199 * these are used by the support functions here and in
200 * callers. */ 200 * callers. */
201 if (S_ISDIR(mode)) 201 if (S_ISDIR(mode))
202 inode->i_nlink = 2; 202 set_nlink(inode, 2);
203 else
204 inode->i_nlink = 1;
205 inode_init_owner(inode, dir, mode); 203 inode_init_owner(inode, dir, mode);
206 dquot_initialize(inode); 204 dquot_initialize(inode);
207 return inode; 205 return inode;
@@ -1379,7 +1377,7 @@ static int ocfs2_rename(struct inode *old_dir,
1379 } 1377 }
1380 1378
1381 if (new_inode) { 1379 if (new_inode) {
1382 new_inode->i_nlink--; 1380 drop_nlink(new_inode);
1383 new_inode->i_ctime = CURRENT_TIME; 1381 new_inode->i_ctime = CURRENT_TIME;
1384 } 1382 }
1385 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; 1383 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
@@ -1387,9 +1385,9 @@ static int ocfs2_rename(struct inode *old_dir,
1387 if (update_dot_dot) { 1385 if (update_dot_dot) {
1388 status = ocfs2_update_entry(old_inode, handle, 1386 status = ocfs2_update_entry(old_inode, handle,
1389 &old_inode_dot_dot_res, new_dir); 1387 &old_inode_dot_dot_res, new_dir);
1390 old_dir->i_nlink--; 1388 drop_nlink(old_dir);
1391 if (new_inode) { 1389 if (new_inode) {
1392 new_inode->i_nlink--; 1390 drop_nlink(new_inode);
1393 } else { 1391 } else {
1394 inc_nlink(new_dir); 1392 inc_nlink(new_dir);
1395 mark_inode_dirty(new_dir); 1393 mark_inode_dirty(new_dir);
@@ -2018,7 +2016,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2018 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; 2016 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2019 if (S_ISDIR(inode->i_mode)) 2017 if (S_ISDIR(inode->i_mode))
2020 ocfs2_add_links_count(orphan_fe, 1); 2018 ocfs2_add_links_count(orphan_fe, 1);
2021 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2019 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2022 ocfs2_journal_dirty(handle, orphan_dir_bh); 2020 ocfs2_journal_dirty(handle, orphan_dir_bh);
2023 2021
2024 status = __ocfs2_add_entry(handle, orphan_dir_inode, name, 2022 status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
@@ -2116,7 +2114,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2116 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; 2114 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2117 if (S_ISDIR(inode->i_mode)) 2115 if (S_ISDIR(inode->i_mode))
2118 ocfs2_add_links_count(orphan_fe, -1); 2116 ocfs2_add_links_count(orphan_fe, -1);
2119 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2117 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2120 ocfs2_journal_dirty(handle, orphan_dir_bh); 2118 ocfs2_journal_dirty(handle, orphan_dir_bh);
2121 2119
2122leave: 2120leave:
@@ -2282,7 +2280,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2282 goto leave; 2280 goto leave;
2283 } 2281 }
2284 2282
2285 inode->i_nlink = 0; 2283 clear_nlink(inode);
2286 /* do the real work now. */ 2284 /* do the real work now. */
2287 status = __ocfs2_mknod_locked(dir, inode, 2285 status = __ocfs2_mknod_locked(dir, inode,
2288 0, &new_di_bh, parent_di_bh, handle, 2286 0, &new_di_bh, parent_di_bh, handle,
@@ -2437,7 +2435,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2437 di = (struct ocfs2_dinode *)di_bh->b_data; 2435 di = (struct ocfs2_dinode *)di_bh->b_data;
2438 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); 2436 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
2439 di->i_orphaned_slot = 0; 2437 di->i_orphaned_slot = 0;
2440 inode->i_nlink = 1; 2438 set_nlink(inode, 1);
2441 ocfs2_set_links_count(di, inode->i_nlink); 2439 ocfs2_set_links_count(di, inode->i_nlink);
2442 ocfs2_journal_dirty(handle, di_bh); 2440 ocfs2_journal_dirty(handle, di_bh);
2443 2441
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a2a5bff774e3..e4e0ff7962e2 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -242,7 +242,7 @@ found:
242 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 242 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
243 inode->i_op = &openprom_inode_operations; 243 inode->i_op = &openprom_inode_operations;
244 inode->i_fop = &openprom_operations; 244 inode->i_fop = &openprom_operations;
245 inode->i_nlink = 2; 245 set_nlink(inode, 2);
246 break; 246 break;
247 case op_inode_prop: 247 case op_inode_prop:
248 if (!strcmp(dp->name, "options") && (len == 17) && 248 if (!strcmp(dp->name, "options") && (len == 17) &&
@@ -251,7 +251,7 @@ found:
251 else 251 else
252 inode->i_mode = S_IFREG | S_IRUGO; 252 inode->i_mode = S_IFREG | S_IRUGO;
253 inode->i_fop = &openpromfs_prop_ops; 253 inode->i_fop = &openpromfs_prop_ops;
254 inode->i_nlink = 1; 254 set_nlink(inode, 1);
255 inode->i_size = ent_oi->u.prop->length; 255 inode->i_size = ent_oi->u.prop->length;
256 break; 256 break;
257 } 257 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d4f4913f00db..2db1bd3173b2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2308,7 +2308,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2308 ei = PROC_I(inode); 2308 ei = PROC_I(inode);
2309 inode->i_mode = p->mode; 2309 inode->i_mode = p->mode;
2310 if (S_ISDIR(inode->i_mode)) 2310 if (S_ISDIR(inode->i_mode))
2311 inode->i_nlink = 2; /* Use getattr to fix if necessary */ 2311 set_nlink(inode, 2); /* Use getattr to fix if necessary */
2312 if (p->iop) 2312 if (p->iop)
2313 inode->i_op = p->iop; 2313 inode->i_op = p->iop;
2314 if (p->fop) 2314 if (p->fop)
@@ -2702,7 +2702,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2702 2702
2703 inode->i_mode = p->mode; 2703 inode->i_mode = p->mode;
2704 if (S_ISDIR(inode->i_mode)) 2704 if (S_ISDIR(inode->i_mode))
2705 inode->i_nlink = 2; 2705 set_nlink(inode, 2);
2706 if (S_ISLNK(inode->i_mode)) 2706 if (S_ISLNK(inode->i_mode))
2707 inode->i_size = 64; 2707 inode->i_size = 64;
2708 if (p->iop) 2708 if (p->iop)
@@ -3041,8 +3041,8 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
3041 inode->i_fop = &proc_tgid_base_operations; 3041 inode->i_fop = &proc_tgid_base_operations;
3042 inode->i_flags|=S_IMMUTABLE; 3042 inode->i_flags|=S_IMMUTABLE;
3043 3043
3044 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 3044 set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
3045 ARRAY_SIZE(tgid_base_stuff)); 3045 ARRAY_SIZE(tgid_base_stuff)));
3046 3046
3047 d_set_d_op(dentry, &pid_dentry_operations); 3047 d_set_d_op(dentry, &pid_dentry_operations);
3048 3048
@@ -3293,8 +3293,8 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3293 inode->i_fop = &proc_tid_base_operations; 3293 inode->i_fop = &proc_tid_base_operations;
3294 inode->i_flags|=S_IMMUTABLE; 3294 inode->i_flags|=S_IMMUTABLE;
3295 3295
3296 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3296 set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
3297 ARRAY_SIZE(tid_base_stuff)); 3297 ARRAY_SIZE(tid_base_stuff)));
3298 3298
3299 d_set_d_op(dentry, &pid_dentry_operations); 3299 d_set_d_op(dentry, &pid_dentry_operations);
3300 3300
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 9d99131d0d65..10090d9c7ad5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -283,7 +283,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
283 struct inode *inode = dentry->d_inode; 283 struct inode *inode = dentry->d_inode;
284 struct proc_dir_entry *de = PROC_I(inode)->pde; 284 struct proc_dir_entry *de = PROC_I(inode)->pde;
285 if (de && de->nlink) 285 if (de && de->nlink)
286 inode->i_nlink = de->nlink; 286 set_nlink(inode, de->nlink);
287 287
288 generic_fillattr(inode, stat); 288 generic_fillattr(inode, stat);
289 return 0; 289 return 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7ed72d6c1c6f..7737c5468a40 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -445,7 +445,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
445 if (de->size) 445 if (de->size)
446 inode->i_size = de->size; 446 inode->i_size = de->size;
447 if (de->nlink) 447 if (de->nlink)
448 inode->i_nlink = de->nlink; 448 set_nlink(inode, de->nlink);
449 if (de->proc_iops) 449 if (de->proc_iops)
450 inode->i_op = de->proc_iops; 450 inode->i_op = de->proc_iops;
451 if (de->proc_fops) { 451 if (de->proc_fops) {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index df594803f45a..a6b62173d4c3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -49,7 +49,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
49 inode->i_fop = &proc_sys_file_operations; 49 inode->i_fop = &proc_sys_file_operations;
50 } else { 50 } else {
51 inode->i_mode |= S_IFDIR; 51 inode->i_mode |= S_IFDIR;
52 inode->i_nlink = 0; 52 clear_nlink(inode);
53 inode->i_op = &proc_sys_dir_operations; 53 inode->i_op = &proc_sys_dir_operations;
54 inode->i_fop = &proc_sys_dir_file_operations; 54 inode->i_fop = &proc_sys_dir_file_operations;
55 } 55 }
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2b0646613f5a..3bdd21418432 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -379,7 +379,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
379 inode->i_mode = le16_to_cpu(raw_inode->di_mode); 379 inode->i_mode = le16_to_cpu(raw_inode->di_mode);
380 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid); 380 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid);
381 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid); 381 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid);
382 inode->i_nlink = le16_to_cpu(raw_inode->di_nlink); 382 set_nlink(inode, le16_to_cpu(raw_inode->di_nlink));
383 inode->i_size = le32_to_cpu(raw_inode->di_size); 383 inode->i_size = le32_to_cpu(raw_inode->di_size);
384 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime); 384 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime);
385 inode->i_mtime.tv_nsec = 0; 385 inode->i_mtime.tv_nsec = 0;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 10b6be3ca280..aae0edb95c6c 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -363,12 +363,15 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
363 } 363 }
364 364
365 sb = quotactl_block(special); 365 sb = quotactl_block(special);
366 if (IS_ERR(sb)) 366 if (IS_ERR(sb)) {
367 return PTR_ERR(sb); 367 ret = PTR_ERR(sb);
368 goto out;
369 }
368 370
369 ret = do_quotactl(sb, type, cmds, id, addr, pathp); 371 ret = do_quotactl(sb, type, cmds, id, addr, pathp);
370 372
371 drop_super(sb); 373 drop_super(sb);
374out:
372 if (pathp && !IS_ERR(pathp)) 375 if (pathp && !IS_ERR(pathp))
373 path_put(pathp); 376 path_put(pathp);
374 return ret; 377 return ret;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9b0d4b78b4fb..950f13af0951 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1154,7 +1154,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1154 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1154 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1155 set_inode_sd_version(inode, STAT_DATA_V1); 1155 set_inode_sd_version(inode, STAT_DATA_V1);
1156 inode->i_mode = sd_v1_mode(sd); 1156 inode->i_mode = sd_v1_mode(sd);
1157 inode->i_nlink = sd_v1_nlink(sd); 1157 set_nlink(inode, sd_v1_nlink(sd));
1158 inode->i_uid = sd_v1_uid(sd); 1158 inode->i_uid = sd_v1_uid(sd);
1159 inode->i_gid = sd_v1_gid(sd); 1159 inode->i_gid = sd_v1_gid(sd);
1160 inode->i_size = sd_v1_size(sd); 1160 inode->i_size = sd_v1_size(sd);
@@ -1199,7 +1199,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1199 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); 1199 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1200 1200
1201 inode->i_mode = sd_v2_mode(sd); 1201 inode->i_mode = sd_v2_mode(sd);
1202 inode->i_nlink = sd_v2_nlink(sd); 1202 set_nlink(inode, sd_v2_nlink(sd));
1203 inode->i_uid = sd_v2_uid(sd); 1203 inode->i_uid = sd_v2_uid(sd);
1204 inode->i_size = sd_v2_size(sd); 1204 inode->i_size = sd_v2_size(sd);
1205 inode->i_gid = sd_v2_gid(sd); 1205 inode->i_gid = sd_v2_gid(sd);
@@ -1444,7 +1444,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
1444 /* a stale NFS handle can trigger this without it being an error */ 1444 /* a stale NFS handle can trigger this without it being an error */
1445 pathrelse(&path_to_sd); 1445 pathrelse(&path_to_sd);
1446 reiserfs_make_bad_inode(inode); 1446 reiserfs_make_bad_inode(inode);
1447 inode->i_nlink = 0; 1447 clear_nlink(inode);
1448 return; 1448 return;
1449 } 1449 }
1450 1450
@@ -1832,7 +1832,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1832#endif 1832#endif
1833 1833
1834 /* fill stat data */ 1834 /* fill stat data */
1835 inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); 1835 set_nlink(inode, (S_ISDIR(mode) ? 2 : 1));
1836 1836
1837 /* uid and gid must already be set by the caller for quota init */ 1837 /* uid and gid must already be set by the caller for quota init */
1838 1838
@@ -1987,7 +1987,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1987 make_bad_inode(inode); 1987 make_bad_inode(inode);
1988 1988
1989 out_inserted_sd: 1989 out_inserted_sd:
1990 inode->i_nlink = 0; 1990 clear_nlink(inode);
1991 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1991 th->t_trans_id = 0; /* so the caller can't use this handle later */
1992 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ 1992 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1993 iput(inode); 1993 iput(inode);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ef392324bbf1..80058e8ce361 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -19,7 +19,7 @@
19#include <linux/reiserfs_xattr.h> 19#include <linux/reiserfs_xattr.h>
20#include <linux/quotaops.h> 20#include <linux/quotaops.h>
21 21
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } 22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); 23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
24 24
25// directory item contains array of entry headers. This performs 25// directory item contains array of entry headers. This performs
@@ -622,7 +622,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
622 dentry->d_name.len, inode, 1 /*visible */ ); 622 dentry->d_name.len, inode, 1 /*visible */ );
623 if (retval) { 623 if (retval) {
624 int err; 624 int err;
625 inode->i_nlink--; 625 drop_nlink(inode);
626 reiserfs_update_sd(&th, inode); 626 reiserfs_update_sd(&th, inode);
627 err = journal_end(&th, dir->i_sb, jbegin_count); 627 err = journal_end(&th, dir->i_sb, jbegin_count);
628 if (err) 628 if (err)
@@ -702,7 +702,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
702 dentry->d_name.len, inode, 1 /*visible */ ); 702 dentry->d_name.len, inode, 1 /*visible */ );
703 if (retval) { 703 if (retval) {
704 int err; 704 int err;
705 inode->i_nlink--; 705 drop_nlink(inode);
706 reiserfs_update_sd(&th, inode); 706 reiserfs_update_sd(&th, inode);
707 err = journal_end(&th, dir->i_sb, jbegin_count); 707 err = journal_end(&th, dir->i_sb, jbegin_count);
708 if (err) 708 if (err)
@@ -787,7 +787,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
787 dentry->d_name.len, inode, 1 /*visible */ ); 787 dentry->d_name.len, inode, 1 /*visible */ );
788 if (retval) { 788 if (retval) {
789 int err; 789 int err;
790 inode->i_nlink = 0; 790 clear_nlink(inode);
791 DEC_DIR_INODE_NLINK(dir); 791 DEC_DIR_INODE_NLINK(dir);
792 reiserfs_update_sd(&th, inode); 792 reiserfs_update_sd(&th, inode);
793 err = journal_end(&th, dir->i_sb, jbegin_count); 793 err = journal_end(&th, dir->i_sb, jbegin_count);
@@ -964,7 +964,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
964 reiserfs_warning(inode->i_sb, "reiserfs-7042", 964 reiserfs_warning(inode->i_sb, "reiserfs-7042",
965 "deleting nonexistent file (%lu), %d", 965 "deleting nonexistent file (%lu), %d",
966 inode->i_ino, inode->i_nlink); 966 inode->i_ino, inode->i_nlink);
967 inode->i_nlink = 1; 967 set_nlink(inode, 1);
968 } 968 }
969 969
970 drop_nlink(inode); 970 drop_nlink(inode);
@@ -1086,7 +1086,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
1086 dentry->d_name.len, inode, 1 /*visible */ ); 1086 dentry->d_name.len, inode, 1 /*visible */ );
1087 if (retval) { 1087 if (retval) {
1088 int err; 1088 int err;
1089 inode->i_nlink--; 1089 drop_nlink(inode);
1090 reiserfs_update_sd(&th, inode); 1090 reiserfs_update_sd(&th, inode);
1091 err = journal_end(&th, parent_dir->i_sb, jbegin_count); 1091 err = journal_end(&th, parent_dir->i_sb, jbegin_count);
1092 if (err) 1092 if (err)
@@ -1129,7 +1129,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1129 1129
1130 retval = journal_begin(&th, dir->i_sb, jbegin_count); 1130 retval = journal_begin(&th, dir->i_sb, jbegin_count);
1131 if (retval) { 1131 if (retval) {
1132 inode->i_nlink--; 1132 drop_nlink(inode);
1133 reiserfs_write_unlock(dir->i_sb); 1133 reiserfs_write_unlock(dir->i_sb);
1134 return retval; 1134 return retval;
1135 } 1135 }
@@ -1144,7 +1144,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1144 1144
1145 if (retval) { 1145 if (retval) {
1146 int err; 1146 int err;
1147 inode->i_nlink--; 1147 drop_nlink(inode);
1148 err = journal_end(&th, dir->i_sb, jbegin_count); 1148 err = journal_end(&th, dir->i_sb, jbegin_count);
1149 reiserfs_write_unlock(dir->i_sb); 1149 reiserfs_write_unlock(dir->i_sb);
1150 return err ? err : retval; 1150 return err ? err : retval;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2305e3121cb1..8b4089f30408 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -337,7 +337,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
337 inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK; 337 inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK;
338 inode->i_dataoffset = pos + inode->i_metasize; 338 inode->i_dataoffset = pos + inode->i_metasize;
339 339
340 i->i_nlink = 1; /* Hard to decide.. */ 340 set_nlink(i, 1); /* Hard to decide.. */
341 i->i_size = be32_to_cpu(ri.size); 341 i->i_size = be32_to_cpu(ri.size);
342 i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; 342 i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
343 i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; 343 i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 04bebcaa2373..fd7b3b3bda13 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -159,7 +159,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
159 frag_offset = 0; 159 frag_offset = 0;
160 } 160 }
161 161
162 inode->i_nlink = 1; 162 set_nlink(inode, 1);
163 inode->i_size = le32_to_cpu(sqsh_ino->file_size); 163 inode->i_size = le32_to_cpu(sqsh_ino->file_size);
164 inode->i_fop = &generic_ro_fops; 164 inode->i_fop = &generic_ro_fops;
165 inode->i_mode |= S_IFREG; 165 inode->i_mode |= S_IFREG;
@@ -203,7 +203,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
203 } 203 }
204 204
205 xattr_id = le32_to_cpu(sqsh_ino->xattr); 205 xattr_id = le32_to_cpu(sqsh_ino->xattr);
206 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 206 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
207 inode->i_size = le64_to_cpu(sqsh_ino->file_size); 207 inode->i_size = le64_to_cpu(sqsh_ino->file_size);
208 inode->i_op = &squashfs_inode_ops; 208 inode->i_op = &squashfs_inode_ops;
209 inode->i_fop = &generic_ro_fops; 209 inode->i_fop = &generic_ro_fops;
@@ -232,7 +232,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
232 if (err < 0) 232 if (err < 0)
233 goto failed_read; 233 goto failed_read;
234 234
235 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 235 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
236 inode->i_size = le16_to_cpu(sqsh_ino->file_size); 236 inode->i_size = le16_to_cpu(sqsh_ino->file_size);
237 inode->i_op = &squashfs_dir_inode_ops; 237 inode->i_op = &squashfs_dir_inode_ops;
238 inode->i_fop = &squashfs_dir_ops; 238 inode->i_fop = &squashfs_dir_ops;
@@ -257,7 +257,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
257 goto failed_read; 257 goto failed_read;
258 258
259 xattr_id = le32_to_cpu(sqsh_ino->xattr); 259 xattr_id = le32_to_cpu(sqsh_ino->xattr);
260 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 260 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
261 inode->i_size = le32_to_cpu(sqsh_ino->file_size); 261 inode->i_size = le32_to_cpu(sqsh_ino->file_size);
262 inode->i_op = &squashfs_dir_inode_ops; 262 inode->i_op = &squashfs_dir_inode_ops;
263 inode->i_fop = &squashfs_dir_ops; 263 inode->i_fop = &squashfs_dir_ops;
@@ -284,7 +284,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
284 if (err < 0) 284 if (err < 0)
285 goto failed_read; 285 goto failed_read;
286 286
287 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 287 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
288 inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); 288 inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
289 inode->i_op = &squashfs_symlink_inode_ops; 289 inode->i_op = &squashfs_symlink_inode_ops;
290 inode->i_data.a_ops = &squashfs_symlink_aops; 290 inode->i_data.a_ops = &squashfs_symlink_aops;
@@ -325,7 +325,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
325 inode->i_mode |= S_IFCHR; 325 inode->i_mode |= S_IFCHR;
326 else 326 else
327 inode->i_mode |= S_IFBLK; 327 inode->i_mode |= S_IFBLK;
328 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 328 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
329 rdev = le32_to_cpu(sqsh_ino->rdev); 329 rdev = le32_to_cpu(sqsh_ino->rdev);
330 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 330 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
331 331
@@ -349,7 +349,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
349 inode->i_mode |= S_IFBLK; 349 inode->i_mode |= S_IFBLK;
350 xattr_id = le32_to_cpu(sqsh_ino->xattr); 350 xattr_id = le32_to_cpu(sqsh_ino->xattr);
351 inode->i_op = &squashfs_inode_ops; 351 inode->i_op = &squashfs_inode_ops;
352 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 352 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
353 rdev = le32_to_cpu(sqsh_ino->rdev); 353 rdev = le32_to_cpu(sqsh_ino->rdev);
354 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 354 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
355 355
@@ -370,7 +370,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
370 inode->i_mode |= S_IFIFO; 370 inode->i_mode |= S_IFIFO;
371 else 371 else
372 inode->i_mode |= S_IFSOCK; 372 inode->i_mode |= S_IFSOCK;
373 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 373 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
374 init_special_inode(inode, inode->i_mode, 0); 374 init_special_inode(inode, inode->i_mode, 0);
375 break; 375 break;
376 } 376 }
@@ -389,7 +389,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
389 inode->i_mode |= S_IFSOCK; 389 inode->i_mode |= S_IFSOCK;
390 xattr_id = le32_to_cpu(sqsh_ino->xattr); 390 xattr_id = le32_to_cpu(sqsh_ino->xattr);
391 inode->i_op = &squashfs_inode_ops; 391 inode->i_op = &squashfs_inode_ops;
392 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 392 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
393 init_special_inode(inode, inode->i_mode, 0); 393 init_special_inode(inode, inode->i_mode, 0);
394 break; 394 break;
395 } 395 }
diff --git a/fs/stack.c b/fs/stack.c
index b4f2ab48a61f..9c11519245a6 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -71,6 +71,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
71 dest->i_ctime = src->i_ctime; 71 dest->i_ctime = src->i_ctime;
72 dest->i_blkbits = src->i_blkbits; 72 dest->i_blkbits = src->i_blkbits;
73 dest->i_flags = src->i_flags; 73 dest->i_flags = src->i_flags;
74 dest->i_nlink = src->i_nlink; 74 set_nlink(dest, src->i_nlink);
75} 75}
76EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); 76EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/stat.c b/fs/stat.c
index 78a3aa83c7ea..8806b8997d2e 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -294,15 +294,16 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
294{ 294{
295 struct path path; 295 struct path path;
296 int error; 296 int error;
297 int empty = 0;
297 298
298 if (bufsiz <= 0) 299 if (bufsiz <= 0)
299 return -EINVAL; 300 return -EINVAL;
300 301
301 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path); 302 error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty);
302 if (!error) { 303 if (!error) {
303 struct inode *inode = path.dentry->d_inode; 304 struct inode *inode = path.dentry->d_inode;
304 305
305 error = -EINVAL; 306 error = empty ? -ENOENT : -EINVAL;
306 if (inode->i_op->readlink) { 307 if (inode->i_op->readlink) {
307 error = security_inode_readlink(path.dentry); 308 error = security_inode_readlink(path.dentry);
308 if (!error) { 309 if (!error) {
diff --git a/fs/super.c b/fs/super.c
index 32a81f3467e0..afd0f1ad45e0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -727,8 +727,13 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
727 727
728 if (sb->s_op->remount_fs) { 728 if (sb->s_op->remount_fs) {
729 retval = sb->s_op->remount_fs(sb, &flags, data); 729 retval = sb->s_op->remount_fs(sb, &flags, data);
730 if (retval) 730 if (retval) {
731 return retval; 731 if (!force)
732 return retval;
733 /* If forced remount, go ahead despite any errors */
734 WARN(1, "forced remount of a %s fs returned %i\n",
735 sb->s_type->name, retval);
736 }
732 } 737 }
733 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 738 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
734 739
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e23f28894a3a..c81b22f3ace1 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -218,7 +218,7 @@ static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
218 } 218 }
219 219
220 if (sysfs_type(sd) == SYSFS_DIR) 220 if (sysfs_type(sd) == SYSFS_DIR)
221 inode->i_nlink = sd->s_dir.subdirs + 2; 221 set_nlink(inode, sd->s_dir.subdirs + 2);
222} 222}
223 223
224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0630eb969a28..25ffb3e9a3f8 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -219,7 +219,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
219 inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode); 219 inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode);
220 inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid); 220 inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid);
221 inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid); 221 inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid);
222 inode->i_nlink = fs16_to_cpu(sbi, raw_inode->i_nlink); 222 set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink));
223 inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); 223 inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
224 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime); 224 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
225 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime); 225 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b28121278d46..20403dc5d437 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -129,7 +129,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
129 goto out_ino; 129 goto out_ino;
130 130
131 inode->i_flags |= (S_NOCMTIME | S_NOATIME); 131 inode->i_flags |= (S_NOCMTIME | S_NOATIME);
132 inode->i_nlink = le32_to_cpu(ino->nlink); 132 set_nlink(inode, le32_to_cpu(ino->nlink));
133 inode->i_uid = le32_to_cpu(ino->uid); 133 inode->i_uid = le32_to_cpu(ino->uid);
134 inode->i_gid = le32_to_cpu(ino->gid); 134 inode->i_gid = le32_to_cpu(ino->gid);
135 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); 135 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 16f19f55e63f..bf18f7a04544 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -558,10 +558,10 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
558 } 558 }
559 559
560 ubifs_assert(inode->i_nlink == 1); 560 ubifs_assert(inode->i_nlink == 1);
561 inode->i_nlink = 0; 561 clear_nlink(inode);
562 err = remove_xattr(c, host, inode, &nm); 562 err = remove_xattr(c, host, inode, &nm);
563 if (err) 563 if (err)
564 inode->i_nlink = 1; 564 set_nlink(inode, 1);
565 565
566 /* If @i_nlink is 0, 'iput()' will delete the inode */ 566 /* If @i_nlink is 0, 'iput()' will delete the inode */
567 iput(inode); 567 iput(inode);
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 95518a9f589e..987585bb0a1d 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -59,8 +59,8 @@ static int __load_block_bitmap(struct super_block *sb,
59 int nr_groups = bitmap->s_nr_groups; 59 int nr_groups = bitmap->s_nr_groups;
60 60
61 if (block_group >= nr_groups) { 61 if (block_group >= nr_groups) {
62 udf_debug("block_group (%d) > nr_groups (%d)\n", block_group, 62 udf_debug("block_group (%d) > nr_groups (%d)\n",
63 nr_groups); 63 block_group, nr_groups);
64 } 64 }
65 65
66 if (bitmap->s_block_bitmap[block_group]) { 66 if (bitmap->s_block_bitmap[block_group]) {
@@ -126,8 +126,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
126 if (bloc->logicalBlockNum + count < count || 126 if (bloc->logicalBlockNum + count < count ||
127 (bloc->logicalBlockNum + count) > partmap->s_partition_len) { 127 (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
128 udf_debug("%d < %d || %d + %d > %d\n", 128 udf_debug("%d < %d || %d + %d > %d\n",
129 bloc->logicalBlockNum, 0, bloc->logicalBlockNum, 129 bloc->logicalBlockNum, 0,
130 count, partmap->s_partition_len); 130 bloc->logicalBlockNum, count,
131 partmap->s_partition_len);
131 goto error_return; 132 goto error_return;
132 } 133 }
133 134
@@ -155,7 +156,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
155 if (udf_set_bit(bit + i, bh->b_data)) { 156 if (udf_set_bit(bit + i, bh->b_data)) {
156 udf_debug("bit %ld already set\n", bit + i); 157 udf_debug("bit %ld already set\n", bit + i);
157 udf_debug("byte=%2x\n", 158 udf_debug("byte=%2x\n",
158 ((char *)bh->b_data)[(bit + i) >> 3]); 159 ((char *)bh->b_data)[(bit + i) >> 3]);
159 } 160 }
160 } 161 }
161 udf_add_free_space(sb, sbi->s_partition, count); 162 udf_add_free_space(sb, sbi->s_partition, count);
@@ -369,7 +370,8 @@ static void udf_table_free_blocks(struct super_block *sb,
369 if (bloc->logicalBlockNum + count < count || 370 if (bloc->logicalBlockNum + count < count ||
370 (bloc->logicalBlockNum + count) > partmap->s_partition_len) { 371 (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
371 udf_debug("%d < %d || %d + %d > %d\n", 372 udf_debug("%d < %d || %d + %d > %d\n",
372 bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count, 373 bloc->logicalBlockNum, 0,
374 bloc->logicalBlockNum, count,
373 partmap->s_partition_len); 375 partmap->s_partition_len);
374 goto error_return; 376 goto error_return;
375 } 377 }
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 2ffdb6733af1..3e44f575fb9c 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -162,8 +162,8 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
162 int padlen; 162 int padlen;
163 163
164 if ((!buffer) || (!offset)) { 164 if ((!buffer) || (!offset)) {
165 udf_debug("invalidparms\n, buffer=%p, offset=%p\n", buffer, 165 udf_debug("invalidparms, buffer=%p, offset=%p\n",
166 offset); 166 buffer, offset);
167 return NULL; 167 return NULL;
168 } 168 }
169 169
@@ -201,7 +201,7 @@ struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offs
201 struct short_ad *sa; 201 struct short_ad *sa;
202 202
203 if ((!ptr) || (!offset)) { 203 if ((!ptr) || (!offset)) {
204 printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n"); 204 pr_err("%s: invalidparms\n", __func__);
205 return NULL; 205 return NULL;
206 } 206 }
207 207
@@ -223,7 +223,7 @@ struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset
223 struct long_ad *la; 223 struct long_ad *la;
224 224
225 if ((!ptr) || (!offset)) { 225 if ((!ptr) || (!offset)) {
226 printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n"); 226 pr_err("%s: invalidparms\n", __func__);
227 return NULL; 227 return NULL;
228 } 228 }
229 229
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1d1358ed80c1..4fd1d809738c 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -37,6 +37,7 @@
37#include <linux/writeback.h> 37#include <linux/writeback.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/crc-itu-t.h> 39#include <linux/crc-itu-t.h>
40#include <linux/mpage.h>
40 41
41#include "udf_i.h" 42#include "udf_i.h"
42#include "udf_sb.h" 43#include "udf_sb.h"
@@ -83,12 +84,10 @@ void udf_evict_inode(struct inode *inode)
83 end_writeback(inode); 84 end_writeback(inode);
84 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && 85 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
85 inode->i_size != iinfo->i_lenExtents) { 86 inode->i_size != iinfo->i_lenExtents) {
86 printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " 87 udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
87 "inode size %llu different from extent length %llu. " 88 inode->i_ino, inode->i_mode,
88 "Filesystem need not be standards compliant.\n", 89 (unsigned long long)inode->i_size,
89 inode->i_sb->s_id, inode->i_ino, inode->i_mode, 90 (unsigned long long)iinfo->i_lenExtents);
90 (unsigned long long)inode->i_size,
91 (unsigned long long)iinfo->i_lenExtents);
92 } 91 }
93 kfree(iinfo->i_ext.i_data); 92 kfree(iinfo->i_ext.i_data);
94 iinfo->i_ext.i_data = NULL; 93 iinfo->i_ext.i_data = NULL;
@@ -104,7 +103,13 @@ static int udf_writepage(struct page *page, struct writeback_control *wbc)
104 103
105static int udf_readpage(struct file *file, struct page *page) 104static int udf_readpage(struct file *file, struct page *page)
106{ 105{
107 return block_read_full_page(page, udf_get_block); 106 return mpage_readpage(page, udf_get_block);
107}
108
109static int udf_readpages(struct file *file, struct address_space *mapping,
110 struct list_head *pages, unsigned nr_pages)
111{
112 return mpage_readpages(mapping, pages, nr_pages, udf_get_block);
108} 113}
109 114
110static int udf_write_begin(struct file *file, struct address_space *mapping, 115static int udf_write_begin(struct file *file, struct address_space *mapping,
@@ -139,6 +144,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
139 144
140const struct address_space_operations udf_aops = { 145const struct address_space_operations udf_aops = {
141 .readpage = udf_readpage, 146 .readpage = udf_readpage,
147 .readpages = udf_readpages,
142 .writepage = udf_writepage, 148 .writepage = udf_writepage,
143 .write_begin = udf_write_begin, 149 .write_begin = udf_write_begin,
144 .write_end = generic_write_end, 150 .write_end = generic_write_end,
@@ -1169,16 +1175,15 @@ static void __udf_read_inode(struct inode *inode)
1169 */ 1175 */
1170 bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); 1176 bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
1171 if (!bh) { 1177 if (!bh) {
1172 printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", 1178 udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino);
1173 inode->i_ino);
1174 make_bad_inode(inode); 1179 make_bad_inode(inode);
1175 return; 1180 return;
1176 } 1181 }
1177 1182
1178 if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && 1183 if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE &&
1179 ident != TAG_IDENT_USE) { 1184 ident != TAG_IDENT_USE) {
1180 printk(KERN_ERR "udf: udf_read_inode(ino %ld) " 1185 udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n",
1181 "failed ident=%d\n", inode->i_ino, ident); 1186 inode->i_ino, ident);
1182 brelse(bh); 1187 brelse(bh);
1183 make_bad_inode(inode); 1188 make_bad_inode(inode);
1184 return; 1189 return;
@@ -1218,8 +1223,8 @@ static void __udf_read_inode(struct inode *inode)
1218 } 1223 }
1219 brelse(ibh); 1224 brelse(ibh);
1220 } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { 1225 } else if (fe->icbTag.strategyType != cpu_to_le16(4)) {
1221 printk(KERN_ERR "udf: unsupported strategy type: %d\n", 1226 udf_err(inode->i_sb, "unsupported strategy type: %d\n",
1222 le16_to_cpu(fe->icbTag.strategyType)); 1227 le16_to_cpu(fe->icbTag.strategyType));
1223 brelse(bh); 1228 brelse(bh);
1224 make_bad_inode(inode); 1229 make_bad_inode(inode);
1225 return; 1230 return;
@@ -1236,6 +1241,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1236 int offset; 1241 int offset;
1237 struct udf_sb_info *sbi = UDF_SB(inode->i_sb); 1242 struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
1238 struct udf_inode_info *iinfo = UDF_I(inode); 1243 struct udf_inode_info *iinfo = UDF_I(inode);
1244 unsigned int link_count;
1239 1245
1240 fe = (struct fileEntry *)bh->b_data; 1246 fe = (struct fileEntry *)bh->b_data;
1241 efe = (struct extendedFileEntry *)bh->b_data; 1247 efe = (struct extendedFileEntry *)bh->b_data;
@@ -1318,9 +1324,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1318 inode->i_mode &= ~sbi->s_umask; 1324 inode->i_mode &= ~sbi->s_umask;
1319 read_unlock(&sbi->s_cred_lock); 1325 read_unlock(&sbi->s_cred_lock);
1320 1326
1321 inode->i_nlink = le16_to_cpu(fe->fileLinkCount); 1327 link_count = le16_to_cpu(fe->fileLinkCount);
1322 if (!inode->i_nlink) 1328 if (!link_count)
1323 inode->i_nlink = 1; 1329 link_count = 1;
1330 set_nlink(inode, link_count);
1324 1331
1325 inode->i_size = le64_to_cpu(fe->informationLength); 1332 inode->i_size = le64_to_cpu(fe->informationLength);
1326 iinfo->i_lenExtents = inode->i_size; 1333 iinfo->i_lenExtents = inode->i_size;
@@ -1413,9 +1420,8 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1413 udf_debug("METADATA BITMAP FILE-----\n"); 1420 udf_debug("METADATA BITMAP FILE-----\n");
1414 break; 1421 break;
1415 default: 1422 default:
1416 printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown " 1423 udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n",
1417 "file type=%d\n", inode->i_ino, 1424 inode->i_ino, fe->icbTag.fileType);
1418 fe->icbTag.fileType);
1419 make_bad_inode(inode); 1425 make_bad_inode(inode);
1420 return; 1426 return;
1421 } 1427 }
@@ -1438,8 +1444,8 @@ static int udf_alloc_i_data(struct inode *inode, size_t size)
1438 iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL); 1444 iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL);
1439 1445
1440 if (!iinfo->i_ext.i_data) { 1446 if (!iinfo->i_ext.i_data) {
1441 printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) " 1447 udf_err(inode->i_sb, "(ino %ld) no free memory\n",
1442 "no free memory\n", inode->i_ino); 1448 inode->i_ino);
1443 return -ENOMEM; 1449 return -ENOMEM;
1444 } 1450 }
1445 1451
@@ -1689,9 +1695,8 @@ out:
1689 if (do_sync) { 1695 if (do_sync) {
1690 sync_dirty_buffer(bh); 1696 sync_dirty_buffer(bh);
1691 if (buffer_write_io_error(bh)) { 1697 if (buffer_write_io_error(bh)) {
1692 printk(KERN_WARNING "IO error syncing udf inode " 1698 udf_warn(inode->i_sb, "IO error syncing udf inode [%08lx]\n",
1693 "[%s:%08lx]\n", inode->i_sb->s_id, 1699 inode->i_ino);
1694 inode->i_ino);
1695 err = -EIO; 1700 err = -EIO;
1696 } 1701 }
1697 } 1702 }
@@ -1982,8 +1987,7 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
1982 *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK; 1987 *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK;
1983 break; 1988 break;
1984 default: 1989 default:
1985 udf_debug("alloc_type = %d unsupported\n", 1990 udf_debug("alloc_type = %d unsupported\n", iinfo->i_alloc_type);
1986 iinfo->i_alloc_type);
1987 return -1; 1991 return -1;
1988 } 1992 }
1989 1993
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 43e24a3b8e10..6583fe9b0645 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -38,7 +38,7 @@ unsigned int udf_get_last_session(struct super_block *sb)
38 38
39 if (i == 0) { 39 if (i == 0) {
40 udf_debug("XA disk: %s, vol_desc_start=%d\n", 40 udf_debug("XA disk: %s, vol_desc_start=%d\n",
41 (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); 41 ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba);
42 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ 42 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
43 vol_desc_start = ms_info.addr.lba; 43 vol_desc_start = ms_info.addr.lba;
44 } else { 44 } else {
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 9215700c00a4..c175b4dabc14 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -204,6 +204,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
204{ 204{
205 struct tag *tag_p; 205 struct tag *tag_p;
206 struct buffer_head *bh = NULL; 206 struct buffer_head *bh = NULL;
207 u8 checksum;
207 208
208 /* Read the block */ 209 /* Read the block */
209 if (block == 0xFFFFFFFF) 210 if (block == 0xFFFFFFFF)
@@ -211,8 +212,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
211 212
212 bh = udf_tread(sb, block); 213 bh = udf_tread(sb, block);
213 if (!bh) { 214 if (!bh) {
214 udf_debug("block=%d, location=%d: read failed\n", 215 udf_err(sb, "read failed, block=%u, location=%d\n",
215 block, location); 216 block, location);
216 return NULL; 217 return NULL;
217 } 218 }
218 219
@@ -227,16 +228,18 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
227 } 228 }
228 229
229 /* Verify the tag checksum */ 230 /* Verify the tag checksum */
230 if (udf_tag_checksum(tag_p) != tag_p->tagChecksum) { 231 checksum = udf_tag_checksum(tag_p);
231 printk(KERN_ERR "udf: tag checksum failed block %d\n", block); 232 if (checksum != tag_p->tagChecksum) {
233 udf_err(sb, "tag checksum failed, block %u: 0x%02x != 0x%02x\n",
234 block, checksum, tag_p->tagChecksum);
232 goto error_out; 235 goto error_out;
233 } 236 }
234 237
235 /* Verify the tag version */ 238 /* Verify the tag version */
236 if (tag_p->descVersion != cpu_to_le16(0x0002U) && 239 if (tag_p->descVersion != cpu_to_le16(0x0002U) &&
237 tag_p->descVersion != cpu_to_le16(0x0003U)) { 240 tag_p->descVersion != cpu_to_le16(0x0003U)) {
238 udf_debug("tag version 0x%04x != 0x0002 || 0x0003 block %d\n", 241 udf_err(sb, "tag version 0x%04x != 0x0002 || 0x0003, block %u\n",
239 le16_to_cpu(tag_p->descVersion), block); 242 le16_to_cpu(tag_p->descVersion), block);
240 goto error_out; 243 goto error_out;
241 } 244 }
242 245
@@ -248,8 +251,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
248 return bh; 251 return bh;
249 252
250 udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block, 253 udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block,
251 le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength)); 254 le16_to_cpu(tag_p->descCRC),
252 255 le16_to_cpu(tag_p->descCRCLength));
253error_out: 256error_out:
254 brelse(bh); 257 brelse(bh);
255 return NULL; 258 return NULL;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f1dce848ef96..4639e137222f 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -577,8 +577,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
577 577
578 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 578 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
579 if (!fi) { 579 if (!fi) {
580 inode->i_nlink--; 580 inode_dec_link_count(inode);
581 mark_inode_dirty(inode);
582 iput(inode); 581 iput(inode);
583 return err; 582 return err;
584 } 583 }
@@ -618,8 +617,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
618 init_special_inode(inode, mode, rdev); 617 init_special_inode(inode, mode, rdev);
619 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 618 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
620 if (!fi) { 619 if (!fi) {
621 inode->i_nlink--; 620 inode_dec_link_count(inode);
622 mark_inode_dirty(inode);
623 iput(inode); 621 iput(inode);
624 return err; 622 return err;
625 } 623 }
@@ -665,12 +663,11 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
665 inode->i_fop = &udf_dir_operations; 663 inode->i_fop = &udf_dir_operations;
666 fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); 664 fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
667 if (!fi) { 665 if (!fi) {
668 inode->i_nlink--; 666 inode_dec_link_count(inode);
669 mark_inode_dirty(inode);
670 iput(inode); 667 iput(inode);
671 goto out; 668 goto out;
672 } 669 }
673 inode->i_nlink = 2; 670 set_nlink(inode, 2);
674 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 671 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
675 cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location); 672 cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location);
676 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 673 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
@@ -683,7 +680,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
683 680
684 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 681 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
685 if (!fi) { 682 if (!fi) {
686 inode->i_nlink = 0; 683 clear_nlink(inode);
687 mark_inode_dirty(inode); 684 mark_inode_dirty(inode);
688 iput(inode); 685 iput(inode);
689 goto out; 686 goto out;
@@ -799,9 +796,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
799 if (retval) 796 if (retval)
800 goto end_rmdir; 797 goto end_rmdir;
801 if (inode->i_nlink != 2) 798 if (inode->i_nlink != 2)
802 udf_warning(inode->i_sb, "udf_rmdir", 799 udf_warn(inode->i_sb, "empty directory has nlink != 2 (%d)\n",
803 "empty directory has nlink != 2 (%d)", 800 inode->i_nlink);
804 inode->i_nlink);
805 clear_nlink(inode); 801 clear_nlink(inode);
806 inode->i_size = 0; 802 inode->i_size = 0;
807 inode_dec_link_count(dir); 803 inode_dec_link_count(dir);
@@ -840,7 +836,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
840 if (!inode->i_nlink) { 836 if (!inode->i_nlink) {
841 udf_debug("Deleting nonexistent file (%lu), %d\n", 837 udf_debug("Deleting nonexistent file (%lu), %d\n",
842 inode->i_ino, inode->i_nlink); 838 inode->i_ino, inode->i_nlink);
843 inode->i_nlink = 1; 839 set_nlink(inode, 1);
844 } 840 }
845 retval = udf_delete_entry(dir, fi, &fibh, &cfi); 841 retval = udf_delete_entry(dir, fi, &fibh, &cfi);
846 if (retval) 842 if (retval)
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index a71090ea0e07..d6caf01a2097 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -33,8 +33,8 @@ uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
33 struct udf_sb_info *sbi = UDF_SB(sb); 33 struct udf_sb_info *sbi = UDF_SB(sb);
34 struct udf_part_map *map; 34 struct udf_part_map *map;
35 if (partition >= sbi->s_partitions) { 35 if (partition >= sbi->s_partitions) {
36 udf_debug("block=%d, partition=%d, offset=%d: " 36 udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n",
37 "invalid partition\n", block, partition, offset); 37 block, partition, offset);
38 return 0xFFFFFFFF; 38 return 0xFFFFFFFF;
39 } 39 }
40 map = &sbi->s_partmaps[partition]; 40 map = &sbi->s_partmaps[partition];
@@ -60,8 +60,8 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
60 vdata = &map->s_type_specific.s_virtual; 60 vdata = &map->s_type_specific.s_virtual;
61 61
62 if (block > vdata->s_num_entries) { 62 if (block > vdata->s_num_entries) {
63 udf_debug("Trying to access block beyond end of VAT " 63 udf_debug("Trying to access block beyond end of VAT (%d max %d)\n",
64 "(%d max %d)\n", block, vdata->s_num_entries); 64 block, vdata->s_num_entries);
65 return 0xFFFFFFFF; 65 return 0xFFFFFFFF;
66 } 66 }
67 67
@@ -321,9 +321,14 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block,
321 /* We shouldn't mount such media... */ 321 /* We shouldn't mount such media... */
322 BUG_ON(!inode); 322 BUG_ON(!inode);
323 retblk = udf_try_read_meta(inode, block, partition, offset); 323 retblk = udf_try_read_meta(inode, block, partition, offset);
324 if (retblk == 0xFFFFFFFF) { 324 if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) {
325 udf_warning(sb, __func__, "error reading from METADATA, " 325 udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n");
326 "trying to read from MIRROR"); 326 if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) {
327 mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
328 mdata->s_mirror_file_loc, map->s_partition_num);
329 mdata->s_flags |= MF_MIRROR_FE_LOADED;
330 }
331
327 inode = mdata->s_mirror_fe; 332 inode = mdata->s_mirror_fe;
328 if (!inode) 333 if (!inode)
329 return 0xFFFFFFFF; 334 return 0xFFFFFFFF;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7b27b063ff6d..e185253470df 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -75,8 +75,6 @@
75 75
76#define UDF_DEFAULT_BLOCKSIZE 2048 76#define UDF_DEFAULT_BLOCKSIZE 2048
77 77
78static char error_buf[1024];
79
80/* These are the "meat" - everything else is stuffing */ 78/* These are the "meat" - everything else is stuffing */
81static int udf_fill_super(struct super_block *, void *, int); 79static int udf_fill_super(struct super_block *, void *, int);
82static void udf_put_super(struct super_block *); 80static void udf_put_super(struct super_block *);
@@ -92,8 +90,6 @@ static void udf_close_lvid(struct super_block *);
92static unsigned int udf_count_free(struct super_block *); 90static unsigned int udf_count_free(struct super_block *);
93static int udf_statfs(struct dentry *, struct kstatfs *); 91static int udf_statfs(struct dentry *, struct kstatfs *);
94static int udf_show_options(struct seq_file *, struct vfsmount *); 92static int udf_show_options(struct seq_file *, struct vfsmount *);
95static void udf_error(struct super_block *sb, const char *function,
96 const char *fmt, ...);
97 93
98struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) 94struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
99{ 95{
@@ -244,9 +240,8 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
244 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), 240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
245 GFP_KERNEL); 241 GFP_KERNEL);
246 if (!sbi->s_partmaps) { 242 if (!sbi->s_partmaps) {
247 udf_error(sb, __func__, 243 udf_err(sb, "Unable to allocate space for %d partition maps\n",
248 "Unable to allocate space for %d partition maps", 244 count);
249 count);
250 sbi->s_partitions = 0; 245 sbi->s_partitions = 0;
251 return -ENOMEM; 246 return -ENOMEM;
252 } 247 }
@@ -550,8 +545,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
550 uopt->dmode = option & 0777; 545 uopt->dmode = option & 0777;
551 break; 546 break;
552 default: 547 default:
553 printk(KERN_ERR "udf: bad mount option \"%s\" " 548 pr_err("bad mount option \"%s\" or missing value\n", p);
554 "or missing value\n", p);
555 return 0; 549 return 0;
556 } 550 }
557 } 551 }
@@ -645,20 +639,16 @@ static loff_t udf_check_vsd(struct super_block *sb)
645 udf_debug("ISO9660 Boot Record found\n"); 639 udf_debug("ISO9660 Boot Record found\n");
646 break; 640 break;
647 case 1: 641 case 1:
648 udf_debug("ISO9660 Primary Volume Descriptor " 642 udf_debug("ISO9660 Primary Volume Descriptor found\n");
649 "found\n");
650 break; 643 break;
651 case 2: 644 case 2:
652 udf_debug("ISO9660 Supplementary Volume " 645 udf_debug("ISO9660 Supplementary Volume Descriptor found\n");
653 "Descriptor found\n");
654 break; 646 break;
655 case 3: 647 case 3:
656 udf_debug("ISO9660 Volume Partition Descriptor " 648 udf_debug("ISO9660 Volume Partition Descriptor found\n");
657 "found\n");
658 break; 649 break;
659 case 255: 650 case 255:
660 udf_debug("ISO9660 Volume Descriptor Set " 651 udf_debug("ISO9660 Volume Descriptor Set Terminator found\n");
661 "Terminator found\n");
662 break; 652 break;
663 default: 653 default:
664 udf_debug("ISO9660 VRS (%u) found\n", 654 udf_debug("ISO9660 VRS (%u) found\n",
@@ -809,8 +799,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
809 pvoldesc->recordingDateAndTime)) { 799 pvoldesc->recordingDateAndTime)) {
810#ifdef UDFFS_DEBUG 800#ifdef UDFFS_DEBUG
811 struct timestamp *ts = &pvoldesc->recordingDateAndTime; 801 struct timestamp *ts = &pvoldesc->recordingDateAndTime;
812 udf_debug("recording time %04u/%02u/%02u" 802 udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
813 " %02u:%02u (%x)\n",
814 le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, 803 le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
815 ts->minute, le16_to_cpu(ts->typeAndTimezone)); 804 ts->minute, le16_to_cpu(ts->typeAndTimezone));
816#endif 805#endif
@@ -821,7 +810,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
821 strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name, 810 strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
822 outstr->u_len > 31 ? 31 : outstr->u_len); 811 outstr->u_len > 31 ? 31 : outstr->u_len);
823 udf_debug("volIdent[] = '%s'\n", 812 udf_debug("volIdent[] = '%s'\n",
824 UDF_SB(sb)->s_volume_ident); 813 UDF_SB(sb)->s_volume_ident);
825 } 814 }
826 815
827 if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128)) 816 if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
@@ -837,64 +826,57 @@ out1:
837 return ret; 826 return ret;
838} 827}
839 828
829struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
830 u32 meta_file_loc, u32 partition_num)
831{
832 struct kernel_lb_addr addr;
833 struct inode *metadata_fe;
834
835 addr.logicalBlockNum = meta_file_loc;
836 addr.partitionReferenceNum = partition_num;
837
838 metadata_fe = udf_iget(sb, &addr);
839
840 if (metadata_fe == NULL)
841 udf_warn(sb, "metadata inode efe not found\n");
842 else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) {
843 udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n");
844 iput(metadata_fe);
845 metadata_fe = NULL;
846 }
847
848 return metadata_fe;
849}
850
840static int udf_load_metadata_files(struct super_block *sb, int partition) 851static int udf_load_metadata_files(struct super_block *sb, int partition)
841{ 852{
842 struct udf_sb_info *sbi = UDF_SB(sb); 853 struct udf_sb_info *sbi = UDF_SB(sb);
843 struct udf_part_map *map; 854 struct udf_part_map *map;
844 struct udf_meta_data *mdata; 855 struct udf_meta_data *mdata;
845 struct kernel_lb_addr addr; 856 struct kernel_lb_addr addr;
846 int fe_error = 0;
847 857
848 map = &sbi->s_partmaps[partition]; 858 map = &sbi->s_partmaps[partition];
849 mdata = &map->s_type_specific.s_metadata; 859 mdata = &map->s_type_specific.s_metadata;
850 860
851 /* metadata address */ 861 /* metadata address */
852 addr.logicalBlockNum = mdata->s_meta_file_loc;
853 addr.partitionReferenceNum = map->s_partition_num;
854
855 udf_debug("Metadata file location: block = %d part = %d\n", 862 udf_debug("Metadata file location: block = %d part = %d\n",
856 addr.logicalBlockNum, addr.partitionReferenceNum); 863 mdata->s_meta_file_loc, map->s_partition_num);
857 864
858 mdata->s_metadata_fe = udf_iget(sb, &addr); 865 mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb,
866 mdata->s_meta_file_loc, map->s_partition_num);
859 867
860 if (mdata->s_metadata_fe == NULL) { 868 if (mdata->s_metadata_fe == NULL) {
861 udf_warning(sb, __func__, "metadata inode efe not found, " 869 /* mirror file entry */
862 "will try mirror inode."); 870 udf_debug("Mirror metadata file location: block = %d part = %d\n",
863 fe_error = 1; 871 mdata->s_mirror_file_loc, map->s_partition_num);
864 } else if (UDF_I(mdata->s_metadata_fe)->i_alloc_type !=
865 ICBTAG_FLAG_AD_SHORT) {
866 udf_warning(sb, __func__, "metadata inode efe does not have "
867 "short allocation descriptors!");
868 fe_error = 1;
869 iput(mdata->s_metadata_fe);
870 mdata->s_metadata_fe = NULL;
871 }
872 872
873 /* mirror file entry */ 873 mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
874 addr.logicalBlockNum = mdata->s_mirror_file_loc; 874 mdata->s_mirror_file_loc, map->s_partition_num);
875 addr.partitionReferenceNum = map->s_partition_num;
876
877 udf_debug("Mirror metadata file location: block = %d part = %d\n",
878 addr.logicalBlockNum, addr.partitionReferenceNum);
879 875
880 mdata->s_mirror_fe = udf_iget(sb, &addr); 876 if (mdata->s_mirror_fe == NULL) {
881 877 udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
882 if (mdata->s_mirror_fe == NULL) {
883 if (fe_error) {
884 udf_error(sb, __func__, "mirror inode efe not found "
885 "and metadata inode is missing too, exiting...");
886 goto error_exit;
887 } else
888 udf_warning(sb, __func__, "mirror inode efe not found,"
889 " but metadata inode is OK");
890 } else if (UDF_I(mdata->s_mirror_fe)->i_alloc_type !=
891 ICBTAG_FLAG_AD_SHORT) {
892 udf_warning(sb, __func__, "mirror inode efe does not have "
893 "short allocation descriptors!");
894 iput(mdata->s_mirror_fe);
895 mdata->s_mirror_fe = NULL;
896 if (fe_error)
897 goto error_exit; 878 goto error_exit;
879 }
898 } 880 }
899 881
900 /* 882 /*
@@ -907,18 +889,15 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
907 addr.partitionReferenceNum = map->s_partition_num; 889 addr.partitionReferenceNum = map->s_partition_num;
908 890
909 udf_debug("Bitmap file location: block = %d part = %d\n", 891 udf_debug("Bitmap file location: block = %d part = %d\n",
910 addr.logicalBlockNum, addr.partitionReferenceNum); 892 addr.logicalBlockNum, addr.partitionReferenceNum);
911 893
912 mdata->s_bitmap_fe = udf_iget(sb, &addr); 894 mdata->s_bitmap_fe = udf_iget(sb, &addr);
913 895
914 if (mdata->s_bitmap_fe == NULL) { 896 if (mdata->s_bitmap_fe == NULL) {
915 if (sb->s_flags & MS_RDONLY) 897 if (sb->s_flags & MS_RDONLY)
916 udf_warning(sb, __func__, "bitmap inode efe " 898 udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
917 "not found but it's ok since the disc"
918 " is mounted read-only");
919 else { 899 else {
920 udf_error(sb, __func__, "bitmap inode efe not " 900 udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
921 "found and attempted read-write mount");
922 goto error_exit; 901 goto error_exit;
923 } 902 }
924 } 903 }
@@ -971,9 +950,8 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
971 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */ 950 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
972 951
973 if (bitmap == NULL) { 952 if (bitmap == NULL) {
974 udf_error(sb, __func__, 953 udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n",
975 "Unable to allocate space for bitmap " 954 nr_groups);
976 "and %d buffer_head pointers", nr_groups);
977 return NULL; 955 return NULL;
978 } 956 }
979 957
@@ -1003,10 +981,9 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1003 if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) 981 if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE))
1004 map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE; 982 map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE;
1005 983
1006 udf_debug("Partition (%d type %x) starts at physical %d, " 984 udf_debug("Partition (%d type %x) starts at physical %d, block length %d\n",
1007 "block length %d\n", p_index, 985 p_index, map->s_partition_type,
1008 map->s_partition_type, map->s_partition_root, 986 map->s_partition_root, map->s_partition_len);
1009 map->s_partition_len);
1010 987
1011 if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) && 988 if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) &&
1012 strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) 989 strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03))
@@ -1023,12 +1000,12 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1023 map->s_uspace.s_table = udf_iget(sb, &loc); 1000 map->s_uspace.s_table = udf_iget(sb, &loc);
1024 if (!map->s_uspace.s_table) { 1001 if (!map->s_uspace.s_table) {
1025 udf_debug("cannot load unallocSpaceTable (part %d)\n", 1002 udf_debug("cannot load unallocSpaceTable (part %d)\n",
1026 p_index); 1003 p_index);
1027 return 1; 1004 return 1;
1028 } 1005 }
1029 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; 1006 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
1030 udf_debug("unallocSpaceTable (part %d) @ %ld\n", 1007 udf_debug("unallocSpaceTable (part %d) @ %ld\n",
1031 p_index, map->s_uspace.s_table->i_ino); 1008 p_index, map->s_uspace.s_table->i_ino);
1032 } 1009 }
1033 1010
1034 if (phd->unallocSpaceBitmap.extLength) { 1011 if (phd->unallocSpaceBitmap.extLength) {
@@ -1041,8 +1018,8 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1041 bitmap->s_extPosition = le32_to_cpu( 1018 bitmap->s_extPosition = le32_to_cpu(
1042 phd->unallocSpaceBitmap.extPosition); 1019 phd->unallocSpaceBitmap.extPosition);
1043 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; 1020 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
1044 udf_debug("unallocSpaceBitmap (part %d) @ %d\n", p_index, 1021 udf_debug("unallocSpaceBitmap (part %d) @ %d\n",
1045 bitmap->s_extPosition); 1022 p_index, bitmap->s_extPosition);
1046 } 1023 }
1047 1024
1048 if (phd->partitionIntegrityTable.extLength) 1025 if (phd->partitionIntegrityTable.extLength)
@@ -1058,13 +1035,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1058 map->s_fspace.s_table = udf_iget(sb, &loc); 1035 map->s_fspace.s_table = udf_iget(sb, &loc);
1059 if (!map->s_fspace.s_table) { 1036 if (!map->s_fspace.s_table) {
1060 udf_debug("cannot load freedSpaceTable (part %d)\n", 1037 udf_debug("cannot load freedSpaceTable (part %d)\n",
1061 p_index); 1038 p_index);
1062 return 1; 1039 return 1;
1063 } 1040 }
1064 1041
1065 map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; 1042 map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
1066 udf_debug("freedSpaceTable (part %d) @ %ld\n", 1043 udf_debug("freedSpaceTable (part %d) @ %ld\n",
1067 p_index, map->s_fspace.s_table->i_ino); 1044 p_index, map->s_fspace.s_table->i_ino);
1068 } 1045 }
1069 1046
1070 if (phd->freedSpaceBitmap.extLength) { 1047 if (phd->freedSpaceBitmap.extLength) {
@@ -1077,8 +1054,8 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1077 bitmap->s_extPosition = le32_to_cpu( 1054 bitmap->s_extPosition = le32_to_cpu(
1078 phd->freedSpaceBitmap.extPosition); 1055 phd->freedSpaceBitmap.extPosition);
1079 map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; 1056 map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
1080 udf_debug("freedSpaceBitmap (part %d) @ %d\n", p_index, 1057 udf_debug("freedSpaceBitmap (part %d) @ %d\n",
1081 bitmap->s_extPosition); 1058 p_index, bitmap->s_extPosition);
1082 } 1059 }
1083 return 0; 1060 return 0;
1084} 1061}
@@ -1118,11 +1095,9 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
1118 udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block); 1095 udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block);
1119 if (!sbi->s_vat_inode && 1096 if (!sbi->s_vat_inode &&
1120 sbi->s_last_block != blocks - 1) { 1097 sbi->s_last_block != blocks - 1) {
1121 printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the" 1098 pr_notice("Failed to read VAT inode from the last recorded block (%lu), retrying with the last block of the device (%lu).\n",
1122 " last recorded block (%lu), retrying with the last " 1099 (unsigned long)sbi->s_last_block,
1123 "block of the device (%lu).\n", 1100 (unsigned long)blocks - 1);
1124 (unsigned long)sbi->s_last_block,
1125 (unsigned long)blocks - 1);
1126 udf_find_vat_block(sb, p_index, type1_index, blocks - 1); 1101 udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
1127 } 1102 }
1128 if (!sbi->s_vat_inode) 1103 if (!sbi->s_vat_inode)
@@ -1220,8 +1195,8 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
1220 if (map->s_partition_type == UDF_METADATA_MAP25) { 1195 if (map->s_partition_type == UDF_METADATA_MAP25) {
1221 ret = udf_load_metadata_files(sb, i); 1196 ret = udf_load_metadata_files(sb, i);
1222 if (ret) { 1197 if (ret) {
1223 printk(KERN_ERR "UDF-fs: error loading MetaData " 1198 udf_err(sb, "error loading MetaData partition map %d\n",
1224 "partition map %d\n", i); 1199 i);
1225 goto out_bh; 1200 goto out_bh;
1226 } 1201 }
1227 } else { 1202 } else {
@@ -1234,9 +1209,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
1234 * overwrite blocks instead of relocating them). 1209 * overwrite blocks instead of relocating them).
1235 */ 1210 */
1236 sb->s_flags |= MS_RDONLY; 1211 sb->s_flags |= MS_RDONLY;
1237 printk(KERN_NOTICE "UDF-fs: Filesystem marked read-only " 1212 pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n");
1238 "because writing to pseudooverwrite partition is "
1239 "not implemented.\n");
1240 } 1213 }
1241out_bh: 1214out_bh:
1242 /* In case loading failed, we handle cleanup in udf_fill_super */ 1215 /* In case loading failed, we handle cleanup in udf_fill_super */
@@ -1344,9 +1317,8 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1344 struct metadataPartitionMap *mdm = 1317 struct metadataPartitionMap *mdm =
1345 (struct metadataPartitionMap *) 1318 (struct metadataPartitionMap *)
1346 &(lvd->partitionMaps[offset]); 1319 &(lvd->partitionMaps[offset]);
1347 udf_debug("Parsing Logical vol part %d " 1320 udf_debug("Parsing Logical vol part %d type %d id=%s\n",
1348 "type %d id=%s\n", i, type, 1321 i, type, UDF_ID_METADATA);
1349 UDF_ID_METADATA);
1350 1322
1351 map->s_partition_type = UDF_METADATA_MAP25; 1323 map->s_partition_type = UDF_METADATA_MAP25;
1352 map->s_partition_func = udf_get_pblock_meta25; 1324 map->s_partition_func = udf_get_pblock_meta25;
@@ -1361,25 +1333,24 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1361 le32_to_cpu(mdm->allocUnitSize); 1333 le32_to_cpu(mdm->allocUnitSize);
1362 mdata->s_align_unit_size = 1334 mdata->s_align_unit_size =
1363 le16_to_cpu(mdm->alignUnitSize); 1335 le16_to_cpu(mdm->alignUnitSize);
1364 mdata->s_dup_md_flag = 1336 if (mdm->flags & 0x01)
1365 mdm->flags & 0x01; 1337 mdata->s_flags |= MF_DUPLICATE_MD;
1366 1338
1367 udf_debug("Metadata Ident suffix=0x%x\n", 1339 udf_debug("Metadata Ident suffix=0x%x\n",
1368 (le16_to_cpu( 1340 le16_to_cpu(*(__le16 *)
1369 ((__le16 *) 1341 mdm->partIdent.identSuffix));
1370 mdm->partIdent.identSuffix)[0])));
1371 udf_debug("Metadata part num=%d\n", 1342 udf_debug("Metadata part num=%d\n",
1372 le16_to_cpu(mdm->partitionNum)); 1343 le16_to_cpu(mdm->partitionNum));
1373 udf_debug("Metadata part alloc unit size=%d\n", 1344 udf_debug("Metadata part alloc unit size=%d\n",
1374 le32_to_cpu(mdm->allocUnitSize)); 1345 le32_to_cpu(mdm->allocUnitSize));
1375 udf_debug("Metadata file loc=%d\n", 1346 udf_debug("Metadata file loc=%d\n",
1376 le32_to_cpu(mdm->metadataFileLoc)); 1347 le32_to_cpu(mdm->metadataFileLoc));
1377 udf_debug("Mirror file loc=%d\n", 1348 udf_debug("Mirror file loc=%d\n",
1378 le32_to_cpu(mdm->metadataMirrorFileLoc)); 1349 le32_to_cpu(mdm->metadataMirrorFileLoc));
1379 udf_debug("Bitmap file loc=%d\n", 1350 udf_debug("Bitmap file loc=%d\n",
1380 le32_to_cpu(mdm->metadataBitmapFileLoc)); 1351 le32_to_cpu(mdm->metadataBitmapFileLoc));
1381 udf_debug("Duplicate Flag: %d %d\n", 1352 udf_debug("Flags: %d %d\n",
1382 mdata->s_dup_md_flag, mdm->flags); 1353 mdata->s_flags, mdm->flags);
1383 } else { 1354 } else {
1384 udf_debug("Unknown ident: %s\n", 1355 udf_debug("Unknown ident: %s\n",
1385 upm2->partIdent.ident); 1356 upm2->partIdent.ident);
@@ -1389,16 +1360,15 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1389 map->s_partition_num = le16_to_cpu(upm2->partitionNum); 1360 map->s_partition_num = le16_to_cpu(upm2->partitionNum);
1390 } 1361 }
1391 udf_debug("Partition (%d:%d) type %d on volume %d\n", 1362 udf_debug("Partition (%d:%d) type %d on volume %d\n",
1392 i, map->s_partition_num, type, 1363 i, map->s_partition_num, type, map->s_volumeseqnum);
1393 map->s_volumeseqnum);
1394 } 1364 }
1395 1365
1396 if (fileset) { 1366 if (fileset) {
1397 struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]); 1367 struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]);
1398 1368
1399 *fileset = lelb_to_cpu(la->extLocation); 1369 *fileset = lelb_to_cpu(la->extLocation);
1400 udf_debug("FileSet found in LogicalVolDesc at block=%d, " 1370 udf_debug("FileSet found in LogicalVolDesc at block=%d, partition=%d\n",
1401 "partition=%d\n", fileset->logicalBlockNum, 1371 fileset->logicalBlockNum,
1402 fileset->partitionReferenceNum); 1372 fileset->partitionReferenceNum);
1403 } 1373 }
1404 if (lvd->integritySeqExt.extLength) 1374 if (lvd->integritySeqExt.extLength)
@@ -1478,9 +1448,9 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
1478 1448
1479 bh = udf_read_tagged(sb, block, block, &ident); 1449 bh = udf_read_tagged(sb, block, block, &ident);
1480 if (!bh) { 1450 if (!bh) {
1481 printk(KERN_ERR "udf: Block %Lu of volume descriptor " 1451 udf_err(sb,
1482 "sequence is corrupted or we could not read " 1452 "Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
1483 "it.\n", (unsigned long long)block); 1453 (unsigned long long)block);
1484 return 1; 1454 return 1;
1485 } 1455 }
1486 1456
@@ -1553,7 +1523,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
1553 * in a suitable order 1523 * in a suitable order
1554 */ 1524 */
1555 if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { 1525 if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
1556 printk(KERN_ERR "udf: Primary Volume Descriptor not found!\n"); 1526 udf_err(sb, "Primary Volume Descriptor not found!\n");
1557 return 1; 1527 return 1;
1558 } 1528 }
1559 if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block)) 1529 if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block))
@@ -1740,7 +1710,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1740 1710
1741 if (!sb_set_blocksize(sb, uopt->blocksize)) { 1711 if (!sb_set_blocksize(sb, uopt->blocksize)) {
1742 if (!silent) 1712 if (!silent)
1743 printk(KERN_WARNING "UDF-fs: Bad block size\n"); 1713 udf_warn(sb, "Bad block size\n");
1744 return 0; 1714 return 0;
1745 } 1715 }
1746 sbi->s_last_block = uopt->lastblock; 1716 sbi->s_last_block = uopt->lastblock;
@@ -1749,12 +1719,11 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1749 nsr_off = udf_check_vsd(sb); 1719 nsr_off = udf_check_vsd(sb);
1750 if (!nsr_off) { 1720 if (!nsr_off) {
1751 if (!silent) 1721 if (!silent)
1752 printk(KERN_WARNING "UDF-fs: No VRS found\n"); 1722 udf_warn(sb, "No VRS found\n");
1753 return 0; 1723 return 0;
1754 } 1724 }
1755 if (nsr_off == -1) 1725 if (nsr_off == -1)
1756 udf_debug("Failed to read byte 32768. Assuming open " 1726 udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n");
1757 "disc. Skipping validity check\n");
1758 if (!sbi->s_last_block) 1727 if (!sbi->s_last_block)
1759 sbi->s_last_block = udf_get_last_block(sb); 1728 sbi->s_last_block = udf_get_last_block(sb);
1760 } else { 1729 } else {
@@ -1765,7 +1734,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1765 sbi->s_anchor = uopt->anchor; 1734 sbi->s_anchor = uopt->anchor;
1766 if (!udf_find_anchor(sb, fileset)) { 1735 if (!udf_find_anchor(sb, fileset)) {
1767 if (!silent) 1736 if (!silent)
1768 printk(KERN_WARNING "UDF-fs: No anchor found\n"); 1737 udf_warn(sb, "No anchor found\n");
1769 return 0; 1738 return 0;
1770 } 1739 }
1771 return 1; 1740 return 1;
@@ -1937,8 +1906,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1937 1906
1938 if (uopt.flags & (1 << UDF_FLAG_UTF8) && 1907 if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
1939 uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { 1908 uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
1940 udf_error(sb, "udf_read_super", 1909 udf_err(sb, "utf8 cannot be combined with iocharset\n");
1941 "utf8 cannot be combined with iocharset\n");
1942 goto error_out; 1910 goto error_out;
1943 } 1911 }
1944#ifdef CONFIG_UDF_NLS 1912#ifdef CONFIG_UDF_NLS
@@ -1987,15 +1955,14 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1987 ret = udf_load_vrs(sb, &uopt, silent, &fileset); 1955 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1988 if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { 1956 if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
1989 if (!silent) 1957 if (!silent)
1990 printk(KERN_NOTICE 1958 pr_notice("Rescanning with blocksize %d\n",
1991 "UDF-fs: Rescanning with blocksize " 1959 UDF_DEFAULT_BLOCKSIZE);
1992 "%d\n", UDF_DEFAULT_BLOCKSIZE);
1993 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; 1960 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
1994 ret = udf_load_vrs(sb, &uopt, silent, &fileset); 1961 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1995 } 1962 }
1996 } 1963 }
1997 if (!ret) { 1964 if (!ret) {
1998 printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); 1965 udf_warn(sb, "No partition found (1)\n");
1999 goto error_out; 1966 goto error_out;
2000 } 1967 }
2001 1968
@@ -2010,10 +1977,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2010 le16_to_cpu(lvidiu->maxUDFWriteRev); */ 1977 le16_to_cpu(lvidiu->maxUDFWriteRev); */
2011 1978
2012 if (minUDFReadRev > UDF_MAX_READ_VERSION) { 1979 if (minUDFReadRev > UDF_MAX_READ_VERSION) {
2013 printk(KERN_ERR "UDF-fs: minUDFReadRev=%x " 1980 udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
2014 "(max is %x)\n", 1981 le16_to_cpu(lvidiu->minUDFReadRev),
2015 le16_to_cpu(lvidiu->minUDFReadRev), 1982 UDF_MAX_READ_VERSION);
2016 UDF_MAX_READ_VERSION);
2017 goto error_out; 1983 goto error_out;
2018 } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) 1984 } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION)
2019 sb->s_flags |= MS_RDONLY; 1985 sb->s_flags |= MS_RDONLY;
@@ -2027,28 +1993,27 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2027 } 1993 }
2028 1994
2029 if (!sbi->s_partitions) { 1995 if (!sbi->s_partitions) {
2030 printk(KERN_WARNING "UDF-fs: No partition found (2)\n"); 1996 udf_warn(sb, "No partition found (2)\n");
2031 goto error_out; 1997 goto error_out;
2032 } 1998 }
2033 1999
2034 if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & 2000 if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
2035 UDF_PART_FLAG_READ_ONLY) { 2001 UDF_PART_FLAG_READ_ONLY) {
2036 printk(KERN_NOTICE "UDF-fs: Partition marked readonly; " 2002 pr_notice("Partition marked readonly; forcing readonly mount\n");
2037 "forcing readonly mount\n");
2038 sb->s_flags |= MS_RDONLY; 2003 sb->s_flags |= MS_RDONLY;
2039 } 2004 }
2040 2005
2041 if (udf_find_fileset(sb, &fileset, &rootdir)) { 2006 if (udf_find_fileset(sb, &fileset, &rootdir)) {
2042 printk(KERN_WARNING "UDF-fs: No fileset found\n"); 2007 udf_warn(sb, "No fileset found\n");
2043 goto error_out; 2008 goto error_out;
2044 } 2009 }
2045 2010
2046 if (!silent) { 2011 if (!silent) {
2047 struct timestamp ts; 2012 struct timestamp ts;
2048 udf_time_to_disk_stamp(&ts, sbi->s_record_time); 2013 udf_time_to_disk_stamp(&ts, sbi->s_record_time);
2049 udf_info("UDF: Mounting volume '%s', " 2014 udf_info("Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
2050 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", 2015 sbi->s_volume_ident,
2051 sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day, 2016 le16_to_cpu(ts.year), ts.month, ts.day,
2052 ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone)); 2017 ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone));
2053 } 2018 }
2054 if (!(sb->s_flags & MS_RDONLY)) 2019 if (!(sb->s_flags & MS_RDONLY))
@@ -2059,8 +2024,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2059 /* perhaps it's not extensible enough, but for now ... */ 2024 /* perhaps it's not extensible enough, but for now ... */
2060 inode = udf_iget(sb, &rootdir); 2025 inode = udf_iget(sb, &rootdir);
2061 if (!inode) { 2026 if (!inode) {
2062 printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, " 2027 udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
2063 "partition=%d\n",
2064 rootdir.logicalBlockNum, rootdir.partitionReferenceNum); 2028 rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
2065 goto error_out; 2029 goto error_out;
2066 } 2030 }
@@ -2068,7 +2032,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2068 /* Allocate a dentry for the root inode */ 2032 /* Allocate a dentry for the root inode */
2069 sb->s_root = d_alloc_root(inode); 2033 sb->s_root = d_alloc_root(inode);
2070 if (!sb->s_root) { 2034 if (!sb->s_root) {
2071 printk(KERN_ERR "UDF-fs: Couldn't allocate root dentry\n"); 2035 udf_err(sb, "Couldn't allocate root dentry\n");
2072 iput(inode); 2036 iput(inode);
2073 goto error_out; 2037 goto error_out;
2074 } 2038 }
@@ -2096,32 +2060,40 @@ error_out:
2096 return -EINVAL; 2060 return -EINVAL;
2097} 2061}
2098 2062
2099static void udf_error(struct super_block *sb, const char *function, 2063void _udf_err(struct super_block *sb, const char *function,
2100 const char *fmt, ...) 2064 const char *fmt, ...)
2101{ 2065{
2066 struct va_format vaf;
2102 va_list args; 2067 va_list args;
2103 2068
2104 if (!(sb->s_flags & MS_RDONLY)) { 2069 /* mark sb error */
2105 /* mark sb error */ 2070 if (!(sb->s_flags & MS_RDONLY))
2106 sb->s_dirt = 1; 2071 sb->s_dirt = 1;
2107 } 2072
2108 va_start(args, fmt); 2073 va_start(args, fmt);
2109 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2074
2075 vaf.fmt = fmt;
2076 vaf.va = &args;
2077
2078 pr_err("error (device %s): %s: %pV", sb->s_id, function, &vaf);
2079
2110 va_end(args); 2080 va_end(args);
2111 printk(KERN_CRIT "UDF-fs error (device %s): %s: %s\n",
2112 sb->s_id, function, error_buf);
2113} 2081}
2114 2082
2115void udf_warning(struct super_block *sb, const char *function, 2083void _udf_warn(struct super_block *sb, const char *function,
2116 const char *fmt, ...) 2084 const char *fmt, ...)
2117{ 2085{
2086 struct va_format vaf;
2118 va_list args; 2087 va_list args;
2119 2088
2120 va_start(args, fmt); 2089 va_start(args, fmt);
2121 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2090
2091 vaf.fmt = fmt;
2092 vaf.va = &args;
2093
2094 pr_warn("warning (device %s): %s: %pV", sb->s_id, function, &vaf);
2095
2122 va_end(args); 2096 va_end(args);
2123 printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n",
2124 sb->s_id, function, error_buf);
2125} 2097}
2126 2098
2127static void udf_put_super(struct super_block *sb) 2099static void udf_put_super(struct super_block *sb)
@@ -2213,11 +2185,11 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2213 bh = udf_read_ptagged(sb, &loc, 0, &ident); 2185 bh = udf_read_ptagged(sb, &loc, 0, &ident);
2214 2186
2215 if (!bh) { 2187 if (!bh) {
2216 printk(KERN_ERR "udf: udf_count_free failed\n"); 2188 udf_err(sb, "udf_count_free failed\n");
2217 goto out; 2189 goto out;
2218 } else if (ident != TAG_IDENT_SBD) { 2190 } else if (ident != TAG_IDENT_SBD) {
2219 brelse(bh); 2191 brelse(bh);
2220 printk(KERN_ERR "udf: udf_count_free failed\n"); 2192 udf_err(sb, "udf_count_free failed\n");
2221 goto out; 2193 goto out;
2222 } 2194 }
2223 2195
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 8424308db4b4..4b98fee8e161 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -95,23 +95,21 @@ void udf_truncate_tail_extent(struct inode *inode)
95 lbcount += elen; 95 lbcount += elen;
96 if (lbcount > inode->i_size) { 96 if (lbcount > inode->i_size) {
97 if (lbcount - inode->i_size >= inode->i_sb->s_blocksize) 97 if (lbcount - inode->i_size >= inode->i_sb->s_blocksize)
98 printk(KERN_WARNING 98 udf_warn(inode->i_sb,
99 "udf_truncate_tail_extent(): Too long " 99 "Too long extent after EOF in inode %u: i_size: %lld lbcount: %lld extent %u+%u\n",
100 "extent after EOF in inode %u: i_size: " 100 (unsigned)inode->i_ino,
101 "%Ld lbcount: %Ld extent %u+%u\n", 101 (long long)inode->i_size,
102 (unsigned)inode->i_ino, 102 (long long)lbcount,
103 (long long)inode->i_size, 103 (unsigned)eloc.logicalBlockNum,
104 (long long)lbcount, 104 (unsigned)elen);
105 (unsigned)eloc.logicalBlockNum,
106 (unsigned)elen);
107 nelen = elen - (lbcount - inode->i_size); 105 nelen = elen - (lbcount - inode->i_size);
108 epos.offset -= adsize; 106 epos.offset -= adsize;
109 extent_trunc(inode, &epos, &eloc, etype, elen, nelen); 107 extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
110 epos.offset += adsize; 108 epos.offset += adsize;
111 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) 109 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
112 printk(KERN_ERR "udf_truncate_tail_extent(): " 110 udf_err(inode->i_sb,
113 "Extent after EOF in inode %u.\n", 111 "Extent after EOF in inode %u\n",
114 (unsigned)inode->i_ino); 112 (unsigned)inode->i_ino);
115 break; 113 break;
116 } 114 }
117 } 115 }
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 4858c191242b..5142a82e3276 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -54,13 +54,16 @@
54 54
55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ 55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */
56 56
57#define MF_DUPLICATE_MD 0x01
58#define MF_MIRROR_FE_LOADED 0x02
59
57struct udf_meta_data { 60struct udf_meta_data {
58 __u32 s_meta_file_loc; 61 __u32 s_meta_file_loc;
59 __u32 s_mirror_file_loc; 62 __u32 s_mirror_file_loc;
60 __u32 s_bitmap_file_loc; 63 __u32 s_bitmap_file_loc;
61 __u32 s_alloc_unit_size; 64 __u32 s_alloc_unit_size;
62 __u16 s_align_unit_size; 65 __u16 s_align_unit_size;
63 __u8 s_dup_md_flag; 66 int s_flags;
64 struct inode *s_metadata_fe; 67 struct inode *s_metadata_fe;
65 struct inode *s_mirror_fe; 68 struct inode *s_mirror_fe;
66 struct inode *s_bitmap_fe; 69 struct inode *s_bitmap_fe;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index dc8a8dcc5ae1..f34e6fc0cdaa 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -1,6 +1,8 @@
1#ifndef __UDF_DECL_H 1#ifndef __UDF_DECL_H
2#define __UDF_DECL_H 2#define __UDF_DECL_H
3 3
4#define pr_fmt(fmt) "UDF-fs: " fmt
5
4#include "ecma_167.h" 6#include "ecma_167.h"
5#include "osta_udf.h" 7#include "osta_udf.h"
6 8
@@ -16,23 +18,30 @@
16#define UDF_PREALLOCATE 18#define UDF_PREALLOCATE
17#define UDF_DEFAULT_PREALLOC_BLOCKS 8 19#define UDF_DEFAULT_PREALLOC_BLOCKS 8
18 20
21extern __printf(3, 4) void _udf_err(struct super_block *sb,
22 const char *function, const char *fmt, ...);
23#define udf_err(sb, fmt, ...) \
24 _udf_err(sb, __func__, fmt, ##__VA_ARGS__)
25
26extern __printf(3, 4) void _udf_warn(struct super_block *sb,
27 const char *function, const char *fmt, ...);
28#define udf_warn(sb, fmt, ...) \
29 _udf_warn(sb, __func__, fmt, ##__VA_ARGS__)
30
31#define udf_info(fmt, ...) \
32 pr_info("INFO " fmt, ##__VA_ARGS__)
33
19#undef UDFFS_DEBUG 34#undef UDFFS_DEBUG
20 35
21#ifdef UDFFS_DEBUG 36#ifdef UDFFS_DEBUG
22#define udf_debug(f, a...) \ 37#define udf_debug(fmt, ...) \
23do { \ 38 printk(KERN_DEBUG pr_fmt("%s:%d:%s: " fmt), \
24 printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \ 39 __FILE__, __LINE__, __func__, ##__VA_ARGS__)
25 __FILE__, __LINE__, __func__); \
26 printk(f, ##a); \
27} while (0)
28#else 40#else
29#define udf_debug(f, a...) /**/ 41#define udf_debug(fmt, ...) \
42 no_printk(fmt, ##__VA_ARGS__)
30#endif 43#endif
31 44
32#define udf_info(f, a...) \
33 printk(KERN_INFO "UDF-fs INFO " f, ##a);
34
35
36#define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) ) 45#define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) )
37#define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) ) 46#define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) )
38 47
@@ -112,8 +121,6 @@ struct extent_position {
112 121
113/* super.c */ 122/* super.c */
114 123
115extern __printf(3, 4) void udf_warning(struct super_block *, const char *,
116 const char *, ...);
117static inline void udf_updated_lvid(struct super_block *sb) 124static inline void udf_updated_lvid(struct super_block *sb)
118{ 125{
119 struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh; 126 struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh;
@@ -126,6 +133,8 @@ static inline void udf_updated_lvid(struct super_block *sb)
126 UDF_SB(sb)->s_lvid_dirty = 1; 133 UDF_SB(sb)->s_lvid_dirty = 1;
127} 134}
128extern u64 lvid_get_unique_id(struct super_block *sb); 135extern u64 lvid_get_unique_id(struct super_block *sb);
136struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
137 u32 meta_file_loc, u32 partition_num);
129 138
130/* namei.c */ 139/* namei.c */
131extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, 140extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index b8c828c4d200..1f11483eba6a 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -34,9 +34,10 @@
34 * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm 34 * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm
35 */ 35 */
36 36
37#include "udfdecl.h"
38
37#include <linux/types.h> 39#include <linux/types.h>
38#include <linux/kernel.h> 40#include <linux/kernel.h>
39#include "udfdecl.h"
40 41
41#define EPOCH_YEAR 1970 42#define EPOCH_YEAR 1970
42 43
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index d03a90b6ad69..44b815e57f94 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -114,7 +114,7 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
114 cmp_id = ocu_i->u_cmpID; 114 cmp_id = ocu_i->u_cmpID;
115 if (cmp_id != 8 && cmp_id != 16) { 115 if (cmp_id != 8 && cmp_id != 16) {
116 memset(utf_o, 0, sizeof(struct ustr)); 116 memset(utf_o, 0, sizeof(struct ustr));
117 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", 117 pr_err("unknown compression code (%d) stri=%s\n",
118 cmp_id, ocu_i->u_name); 118 cmp_id, ocu_i->u_name);
119 return 0; 119 return 0;
120 } 120 }
@@ -242,7 +242,7 @@ try_again:
242 if (utf_cnt) { 242 if (utf_cnt) {
243error_out: 243error_out:
244 ocu[++u_len] = '?'; 244 ocu[++u_len] = '?';
245 printk(KERN_DEBUG "udf: bad UTF-8 character\n"); 245 printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
246 } 246 }
247 247
248 ocu[length - 1] = (uint8_t)u_len + 1; 248 ocu[length - 1] = (uint8_t)u_len + 1;
@@ -267,7 +267,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
267 cmp_id = ocu_i->u_cmpID; 267 cmp_id = ocu_i->u_cmpID;
268 if (cmp_id != 8 && cmp_id != 16) { 268 if (cmp_id != 8 && cmp_id != 16) {
269 memset(utf_o, 0, sizeof(struct ustr)); 269 memset(utf_o, 0, sizeof(struct ustr));
270 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", 270 pr_err("unknown compression code (%d) stri=%s\n",
271 cmp_id, ocu_i->u_name); 271 cmp_id, ocu_i->u_name);
272 return 0; 272 return 0;
273 } 273 }
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 2eabf04af3de..78a4c70d46b5 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -341,7 +341,7 @@ cg_found:
341 341
342fail_remove_inode: 342fail_remove_inode:
343 unlock_super(sb); 343 unlock_super(sb);
344 inode->i_nlink = 0; 344 clear_nlink(inode);
345 iput(inode); 345 iput(inode);
346 UFSD("EXIT (FAILED): err %d\n", err); 346 UFSD("EXIT (FAILED): err %d\n", err);
347 return ERR_PTR(err); 347 return ERR_PTR(err);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index b4d791a83207..879b13436fa4 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -589,7 +589,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
589 * Copy data to the in-core inode. 589 * Copy data to the in-core inode.
590 */ 590 */
591 inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode); 591 inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
592 inode->i_nlink = fs16_to_cpu(sb, ufs_inode->ui_nlink); 592 set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
593 if (inode->i_nlink == 0) { 593 if (inode->i_nlink == 0) {
594 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); 594 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
595 return -1; 595 return -1;
@@ -637,7 +637,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
637 * Copy data to the in-core inode. 637 * Copy data to the in-core inode.
638 */ 638 */
639 inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode); 639 inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
640 inode->i_nlink = fs16_to_cpu(sb, ufs2_inode->ui_nlink); 640 set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
641 if (inode->i_nlink == 0) { 641 if (inode->i_nlink == 0) {
642 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); 642 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
643 return -1; 643 return -1;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9ba2a07b7343..23ce927973a4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1153,7 +1153,7 @@ xfs_setup_inode(
1153 hlist_add_fake(&inode->i_hash); 1153 hlist_add_fake(&inode->i_hash);
1154 1154
1155 inode->i_mode = ip->i_d.di_mode; 1155 inode->i_mode = ip->i_d.di_mode;
1156 inode->i_nlink = ip->i_d.di_nlink; 1156 set_nlink(inode, ip->i_d.di_nlink);
1157 inode->i_uid = ip->i_d.di_uid; 1157 inode->i_uid = ip->i_d.di_uid;
1158 inode->i_gid = ip->i_d.di_gid; 1158 inode->i_gid = ip->i_d.di_gid;
1159 1159
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 62157c03caf7..4df926199369 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -165,6 +165,7 @@ struct dentry_operations {
165 unsigned int, const char *, const struct qstr *); 165 unsigned int, const char *, const struct qstr *);
166 int (*d_delete)(const struct dentry *); 166 int (*d_delete)(const struct dentry *);
167 void (*d_release)(struct dentry *); 167 void (*d_release)(struct dentry *);
168 void (*d_prune)(struct dentry *);
168 void (*d_iput)(struct dentry *, struct inode *); 169 void (*d_iput)(struct dentry *, struct inode *);
169 char *(*d_dname)(struct dentry *, char *, int); 170 char *(*d_dname)(struct dentry *, char *, int);
170 struct vfsmount *(*d_automount)(struct path *); 171 struct vfsmount *(*d_automount)(struct path *);
@@ -184,8 +185,9 @@ struct dentry_operations {
184#define DCACHE_OP_COMPARE 0x0002 185#define DCACHE_OP_COMPARE 0x0002
185#define DCACHE_OP_REVALIDATE 0x0004 186#define DCACHE_OP_REVALIDATE 0x0004
186#define DCACHE_OP_DELETE 0x0008 187#define DCACHE_OP_DELETE 0x0008
188#define DCACHE_OP_PRUNE 0x0010
187 189
188#define DCACHE_DISCONNECTED 0x0010 190#define DCACHE_DISCONNECTED 0x0020
189 /* This dentry is possibly not currently connected to the dcache tree, in 191 /* This dentry is possibly not currently connected to the dcache tree, in
190 * which case its parent will either be itself, or will have this flag as 192 * which case its parent will either be itself, or will have this flag as
191 * well. nfsd will not use a dentry with this bit set, but will first 193 * well. nfsd will not use a dentry with this bit set, but will first
@@ -196,8 +198,8 @@ struct dentry_operations {
196 * dentry into place and return that dentry rather than the passed one, 198 * dentry into place and return that dentry rather than the passed one,
197 * typically using d_splice_alias. */ 199 * typically using d_splice_alias. */
198 200
199#define DCACHE_REFERENCED 0x0020 /* Recently used, don't discard. */ 201#define DCACHE_REFERENCED 0x0040 /* Recently used, don't discard. */
200#define DCACHE_RCUACCESS 0x0040 /* Entry has ever been RCU-visible */ 202#define DCACHE_RCUACCESS 0x0080 /* Entry has ever been RCU-visible */
201 203
202#define DCACHE_CANT_MOUNT 0x0100 204#define DCACHE_CANT_MOUNT 0x0100
203#define DCACHE_GENOCIDE 0x0200 205#define DCACHE_GENOCIDE 0x0200
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 53792bf36c71..ce1b719e8bd4 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -197,8 +197,8 @@ struct ext2_group_desc
197 197
198/* Flags that should be inherited by new inodes from their parent. */ 198/* Flags that should be inherited by new inodes from their parent. */
199#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\ 199#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\
200 EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\ 200 EXT2_SYNC_FL | EXT2_NODUMP_FL |\
201 EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\ 201 EXT2_NOATIME_FL | EXT2_COMPRBLK_FL |\
202 EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ 202 EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
203 EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) 203 EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)
204 204
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 81965cce6bfa..dec99116a0e4 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -180,8 +180,8 @@ struct ext3_group_desc
180 180
181/* Flags that should be inherited by new inodes from their parent. */ 181/* Flags that should be inherited by new inodes from their parent. */
182#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\ 182#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
183 EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\ 183 EXT3_SYNC_FL | EXT3_NODUMP_FL |\
184 EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\ 184 EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
185 EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ 185 EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
186 EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) 186 EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
187 187
@@ -381,7 +381,7 @@ struct ext3_inode {
381 * Mount flags 381 * Mount flags
382 */ 382 */
383#define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */ 383#define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */
384#define EXT3_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ 384/* EXT3_MOUNT_OLDALLOC was there */
385#define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 385#define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */
386#define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 386#define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */
387#define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 387#define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index 258088ab3c6b..64365252f1b0 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -76,10 +76,6 @@ struct ext3_sb_info {
76 struct mutex s_resize_lock; 76 struct mutex s_resize_lock;
77 unsigned long s_commit_interval; 77 unsigned long s_commit_interval;
78 struct block_device *journal_bdev; 78 struct block_device *journal_bdev;
79#ifdef CONFIG_JBD_DEBUG
80 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
81 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
82#endif
83#ifdef CONFIG_QUOTA 79#ifdef CONFIG_QUOTA
84 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ 80 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
85 int s_jquota_fmt; /* Format of quota to use */ 81 int s_jquota_fmt; /* Format of quota to use */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7a049fd2aa4c..0c4df261af7e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -768,14 +768,25 @@ struct inode {
768 768
769 /* Stat data, not accessed from path walking */ 769 /* Stat data, not accessed from path walking */
770 unsigned long i_ino; 770 unsigned long i_ino;
771 unsigned int i_nlink; 771 /*
772 * Filesystems may only read i_nlink directly. They shall use the
773 * following functions for modification:
774 *
775 * (set|clear|inc|drop)_nlink
776 * inode_(inc|dec)_link_count
777 */
778 union {
779 const unsigned int i_nlink;
780 unsigned int __i_nlink;
781 };
772 dev_t i_rdev; 782 dev_t i_rdev;
773 loff_t i_size;
774 struct timespec i_atime; 783 struct timespec i_atime;
775 struct timespec i_mtime; 784 struct timespec i_mtime;
776 struct timespec i_ctime; 785 struct timespec i_ctime;
777 unsigned int i_blkbits; 786 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
787 unsigned short i_bytes;
778 blkcnt_t i_blocks; 788 blkcnt_t i_blocks;
789 loff_t i_size;
779 790
780#ifdef __NEED_I_SIZE_ORDERED 791#ifdef __NEED_I_SIZE_ORDERED
781 seqcount_t i_size_seqcount; 792 seqcount_t i_size_seqcount;
@@ -783,7 +794,6 @@ struct inode {
783 794
784 /* Misc */ 795 /* Misc */
785 unsigned long i_state; 796 unsigned long i_state;
786 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
787 struct mutex i_mutex; 797 struct mutex i_mutex;
788 798
789 unsigned long dirtied_when; /* jiffies of first dirtying */ 799 unsigned long dirtied_when; /* jiffies of first dirtying */
@@ -797,9 +807,10 @@ struct inode {
797 struct rcu_head i_rcu; 807 struct rcu_head i_rcu;
798 }; 808 };
799 atomic_t i_count; 809 atomic_t i_count;
810 unsigned int i_blkbits;
800 u64 i_version; 811 u64 i_version;
801 unsigned short i_bytes;
802 atomic_t i_dio_count; 812 atomic_t i_dio_count;
813 atomic_t i_writecount;
803 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 814 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
804 struct file_lock *i_flock; 815 struct file_lock *i_flock;
805 struct address_space i_data; 816 struct address_space i_data;
@@ -823,7 +834,6 @@ struct inode {
823#ifdef CONFIG_IMA 834#ifdef CONFIG_IMA
824 atomic_t i_readcount; /* struct files open RO */ 835 atomic_t i_readcount; /* struct files open RO */
825#endif 836#endif
826 atomic_t i_writecount;
827 void *i_private; /* fs or device private pointer */ 837 void *i_private; /* fs or device private pointer */
828}; 838};
829 839
@@ -1755,6 +1765,19 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
1755} 1765}
1756 1766
1757/** 1767/**
1768 * set_nlink - directly set an inode's link count
1769 * @inode: inode
1770 * @nlink: new nlink (should be non-zero)
1771 *
1772 * This is a low-level filesystem helper to replace any
1773 * direct filesystem manipulation of i_nlink.
1774 */
1775static inline void set_nlink(struct inode *inode, unsigned int nlink)
1776{
1777 inode->__i_nlink = nlink;
1778}
1779
1780/**
1758 * inc_nlink - directly increment an inode's link count 1781 * inc_nlink - directly increment an inode's link count
1759 * @inode: inode 1782 * @inode: inode
1760 * 1783 *
@@ -1764,7 +1787,7 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
1764 */ 1787 */
1765static inline void inc_nlink(struct inode *inode) 1788static inline void inc_nlink(struct inode *inode)
1766{ 1789{
1767 inode->i_nlink++; 1790 inode->__i_nlink++;
1768} 1791}
1769 1792
1770static inline void inode_inc_link_count(struct inode *inode) 1793static inline void inode_inc_link_count(struct inode *inode)
@@ -1786,7 +1809,7 @@ static inline void inode_inc_link_count(struct inode *inode)
1786 */ 1809 */
1787static inline void drop_nlink(struct inode *inode) 1810static inline void drop_nlink(struct inode *inode)
1788{ 1811{
1789 inode->i_nlink--; 1812 inode->__i_nlink--;
1790} 1813}
1791 1814
1792/** 1815/**
@@ -1799,7 +1822,7 @@ static inline void drop_nlink(struct inode *inode)
1799 */ 1822 */
1800static inline void clear_nlink(struct inode *inode) 1823static inline void clear_nlink(struct inode *inode)
1801{ 1824{
1802 inode->i_nlink = 0; 1825 inode->__i_nlink = 0;
1803} 1826}
1804 1827
1805static inline void inode_dec_link_count(struct inode *inode) 1828static inline void inode_dec_link_count(struct inode *inode)
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index e6a5e34bed4f..c7acdde3243d 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -244,6 +244,7 @@ typedef struct journal_superblock_s
244 244
245#include <linux/fs.h> 245#include <linux/fs.h>
246#include <linux/sched.h> 246#include <linux/sched.h>
247#include <linux/jbd_common.h>
247 248
248#define J_ASSERT(assert) BUG_ON(!(assert)) 249#define J_ASSERT(assert) BUG_ON(!(assert))
249 250
@@ -270,69 +271,6 @@ typedef struct journal_superblock_s
270#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) 271#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why)
271#endif 272#endif
272 273
273enum jbd_state_bits {
274 BH_JBD /* Has an attached ext3 journal_head */
275 = BH_PrivateStart,
276 BH_JWrite, /* Being written to log (@@@ DEBUGGING) */
277 BH_Freed, /* Has been freed (truncated) */
278 BH_Revoked, /* Has been revoked from the log */
279 BH_RevokeValid, /* Revoked flag is valid */
280 BH_JBDDirty, /* Is dirty but journaled */
281 BH_State, /* Pins most journal_head state */
282 BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
283 BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
284};
285
286BUFFER_FNS(JBD, jbd)
287BUFFER_FNS(JWrite, jwrite)
288BUFFER_FNS(JBDDirty, jbddirty)
289TAS_BUFFER_FNS(JBDDirty, jbddirty)
290BUFFER_FNS(Revoked, revoked)
291TAS_BUFFER_FNS(Revoked, revoked)
292BUFFER_FNS(RevokeValid, revokevalid)
293TAS_BUFFER_FNS(RevokeValid, revokevalid)
294BUFFER_FNS(Freed, freed)
295
296static inline struct buffer_head *jh2bh(struct journal_head *jh)
297{
298 return jh->b_bh;
299}
300
301static inline struct journal_head *bh2jh(struct buffer_head *bh)
302{
303 return bh->b_private;
304}
305
306static inline void jbd_lock_bh_state(struct buffer_head *bh)
307{
308 bit_spin_lock(BH_State, &bh->b_state);
309}
310
311static inline int jbd_trylock_bh_state(struct buffer_head *bh)
312{
313 return bit_spin_trylock(BH_State, &bh->b_state);
314}
315
316static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
317{
318 return bit_spin_is_locked(BH_State, &bh->b_state);
319}
320
321static inline void jbd_unlock_bh_state(struct buffer_head *bh)
322{
323 bit_spin_unlock(BH_State, &bh->b_state);
324}
325
326static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
327{
328 bit_spin_lock(BH_JournalHead, &bh->b_state);
329}
330
331static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
332{
333 bit_spin_unlock(BH_JournalHead, &bh->b_state);
334}
335
336struct jbd_revoke_table_s; 274struct jbd_revoke_table_s;
337 275
338/** 276/**
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 38f307b8c334..2092ea21e469 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -275,6 +275,7 @@ typedef struct journal_superblock_s
275 275
276#include <linux/fs.h> 276#include <linux/fs.h>
277#include <linux/sched.h> 277#include <linux/sched.h>
278#include <linux/jbd_common.h>
278 279
279#define J_ASSERT(assert) BUG_ON(!(assert)) 280#define J_ASSERT(assert) BUG_ON(!(assert))
280 281
@@ -302,70 +303,6 @@ typedef struct journal_superblock_s
302#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) 303#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why)
303#endif 304#endif
304 305
305enum jbd_state_bits {
306 BH_JBD /* Has an attached ext3 journal_head */
307 = BH_PrivateStart,
308 BH_JWrite, /* Being written to log (@@@ DEBUGGING) */
309 BH_Freed, /* Has been freed (truncated) */
310 BH_Revoked, /* Has been revoked from the log */
311 BH_RevokeValid, /* Revoked flag is valid */
312 BH_JBDDirty, /* Is dirty but journaled */
313 BH_State, /* Pins most journal_head state */
314 BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
315 BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
316 BH_JBDPrivateStart, /* First bit available for private use by FS */
317};
318
319BUFFER_FNS(JBD, jbd)
320BUFFER_FNS(JWrite, jwrite)
321BUFFER_FNS(JBDDirty, jbddirty)
322TAS_BUFFER_FNS(JBDDirty, jbddirty)
323BUFFER_FNS(Revoked, revoked)
324TAS_BUFFER_FNS(Revoked, revoked)
325BUFFER_FNS(RevokeValid, revokevalid)
326TAS_BUFFER_FNS(RevokeValid, revokevalid)
327BUFFER_FNS(Freed, freed)
328
329static inline struct buffer_head *jh2bh(struct journal_head *jh)
330{
331 return jh->b_bh;
332}
333
334static inline struct journal_head *bh2jh(struct buffer_head *bh)
335{
336 return bh->b_private;
337}
338
339static inline void jbd_lock_bh_state(struct buffer_head *bh)
340{
341 bit_spin_lock(BH_State, &bh->b_state);
342}
343
344static inline int jbd_trylock_bh_state(struct buffer_head *bh)
345{
346 return bit_spin_trylock(BH_State, &bh->b_state);
347}
348
349static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
350{
351 return bit_spin_is_locked(BH_State, &bh->b_state);
352}
353
354static inline void jbd_unlock_bh_state(struct buffer_head *bh)
355{
356 bit_spin_unlock(BH_State, &bh->b_state);
357}
358
359static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
360{
361 bit_spin_lock(BH_JournalHead, &bh->b_state);
362}
363
364static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
365{
366 bit_spin_unlock(BH_JournalHead, &bh->b_state);
367}
368
369/* Flags in jbd_inode->i_flags */ 306/* Flags in jbd_inode->i_flags */
370#define __JI_COMMIT_RUNNING 0 307#define __JI_COMMIT_RUNNING 0
371/* Commit of the inode data in progress. We use this flag to protect us from 308/* Commit of the inode data in progress. We use this flag to protect us from
@@ -1106,9 +1043,9 @@ static inline handle_t *journal_current_handle(void)
1106 */ 1043 */
1107 1044
1108extern handle_t *jbd2_journal_start(journal_t *, int nblocks); 1045extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
1109extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask); 1046extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask);
1110extern int jbd2_journal_restart(handle_t *, int nblocks); 1047extern int jbd2_journal_restart(handle_t *, int nblocks);
1111extern int jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask); 1048extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
1112extern int jbd2_journal_extend (handle_t *, int nblocks); 1049extern int jbd2_journal_extend (handle_t *, int nblocks);
1113extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); 1050extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
1114extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); 1051extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h
new file mode 100644
index 000000000000..6230f8556a4e
--- /dev/null
+++ b/include/linux/jbd_common.h
@@ -0,0 +1,68 @@
1#ifndef _LINUX_JBD_STATE_H
2#define _LINUX_JBD_STATE_H
3
4enum jbd_state_bits {
5 BH_JBD /* Has an attached ext3 journal_head */
6 = BH_PrivateStart,
7 BH_JWrite, /* Being written to log (@@@ DEBUGGING) */
8 BH_Freed, /* Has been freed (truncated) */
9 BH_Revoked, /* Has been revoked from the log */
10 BH_RevokeValid, /* Revoked flag is valid */
11 BH_JBDDirty, /* Is dirty but journaled */
12 BH_State, /* Pins most journal_head state */
13 BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
14 BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
15 BH_JBDPrivateStart, /* First bit available for private use by FS */
16};
17
18BUFFER_FNS(JBD, jbd)
19BUFFER_FNS(JWrite, jwrite)
20BUFFER_FNS(JBDDirty, jbddirty)
21TAS_BUFFER_FNS(JBDDirty, jbddirty)
22BUFFER_FNS(Revoked, revoked)
23TAS_BUFFER_FNS(Revoked, revoked)
24BUFFER_FNS(RevokeValid, revokevalid)
25TAS_BUFFER_FNS(RevokeValid, revokevalid)
26BUFFER_FNS(Freed, freed)
27
28static inline struct buffer_head *jh2bh(struct journal_head *jh)
29{
30 return jh->b_bh;
31}
32
33static inline struct journal_head *bh2jh(struct buffer_head *bh)
34{
35 return bh->b_private;
36}
37
38static inline void jbd_lock_bh_state(struct buffer_head *bh)
39{
40 bit_spin_lock(BH_State, &bh->b_state);
41}
42
43static inline int jbd_trylock_bh_state(struct buffer_head *bh)
44{
45 return bit_spin_trylock(BH_State, &bh->b_state);
46}
47
48static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
49{
50 return bit_spin_is_locked(BH_State, &bh->b_state);
51}
52
53static inline void jbd_unlock_bh_state(struct buffer_head *bh)
54{
55 bit_spin_unlock(BH_State, &bh->b_state);
56}
57
58static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
59{
60 bit_spin_lock(BH_JournalHead, &bh->b_state);
61}
62
63static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
64{
65 bit_spin_unlock(BH_JournalHead, &bh->b_state);
66}
67
68#endif
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 409328d1cbbb..ffc02135c483 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -67,6 +67,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
67#define LOOKUP_EMPTY 0x4000 67#define LOOKUP_EMPTY 0x4000
68 68
69extern int user_path_at(int, const char __user *, unsigned, struct path *); 69extern int user_path_at(int, const char __user *, unsigned, struct path *);
70extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
70 71
71#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) 72#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
72#define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path) 73#define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 851ebf1a4476..4c069d8bd740 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -131,10 +131,10 @@ void unregister_virtio_device(struct virtio_device *dev);
131 * virtio_driver - operations for a virtio I/O driver 131 * virtio_driver - operations for a virtio I/O driver
132 * @driver: underlying device driver (populate name and owner). 132 * @driver: underlying device driver (populate name and owner).
133 * @id_table: the ids serviced by this driver. 133 * @id_table: the ids serviced by this driver.
134 * @feature_table: an array of feature numbers supported by this device. 134 * @feature_table: an array of feature numbers supported by this driver.
135 * @feature_table_size: number of entries in the feature table array. 135 * @feature_table_size: number of entries in the feature table array.
136 * @probe: the function to call when a device is found. Returns 0 or -errno. 136 * @probe: the function to call when a device is found. Returns 0 or -errno.
137 * @remove: the function when a device is removed. 137 * @remove: the function to call when a device is removed.
138 * @config_changed: optional function to call when the device configuration 138 * @config_changed: optional function to call when the device configuration
139 * changes; may be called in interrupt context. 139 * changes; may be called in interrupt context.
140 */ 140 */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 39c88c5ad19d..add4790b21fe 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -155,6 +155,9 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
155#define virtio_config_val(vdev, fbit, offset, v) \ 155#define virtio_config_val(vdev, fbit, offset, v) \
156 virtio_config_buf((vdev), (fbit), (offset), (v), sizeof(*v)) 156 virtio_config_buf((vdev), (fbit), (offset), (v), sizeof(*v))
157 157
158#define virtio_config_val_len(vdev, fbit, offset, v, len) \
159 virtio_config_buf((vdev), (fbit), (offset), (v), (len))
160
158static inline int virtio_config_buf(struct virtio_device *vdev, 161static inline int virtio_config_buf(struct virtio_device *vdev,
159 unsigned int fbit, 162 unsigned int fbit,
160 unsigned int offset, 163 unsigned int offset,
diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h
new file mode 100644
index 000000000000..27c7edefbc86
--- /dev/null
+++ b/include/linux/virtio_mmio.h
@@ -0,0 +1,111 @@
1/*
2 * Virtio platform device driver
3 *
4 * Copyright 2011, ARM Ltd.
5 *
6 * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007
7 *
8 * This header is BSD licensed so anyone can use the definitions to implement
9 * compatible drivers/servers.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of IBM nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef _LINUX_VIRTIO_MMIO_H
36#define _LINUX_VIRTIO_MMIO_H
37
38/*
39 * Control registers
40 */
41
42/* Magic value ("virt" string) - Read Only */
43#define VIRTIO_MMIO_MAGIC_VALUE 0x000
44
45/* Virtio device version - Read Only */
46#define VIRTIO_MMIO_VERSION 0x004
47
48/* Virtio device ID - Read Only */
49#define VIRTIO_MMIO_DEVICE_ID 0x008
50
51/* Virtio vendor ID - Read Only */
52#define VIRTIO_MMIO_VENDOR_ID 0x00c
53
54/* Bitmask of the features supported by the host
55 * (32 bits per set) - Read Only */
56#define VIRTIO_MMIO_HOST_FEATURES 0x010
57
58/* Host features set selector - Write Only */
59#define VIRTIO_MMIO_HOST_FEATURES_SEL 0x014
60
61/* Bitmask of features activated by the guest
62 * (32 bits per set) - Write Only */
63#define VIRTIO_MMIO_GUEST_FEATURES 0x020
64
65/* Activated features set selector - Write Only */
66#define VIRTIO_MMIO_GUEST_FEATURES_SET 0x024
67
68/* Guest's memory page size in bytes - Write Only */
69#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028
70
71/* Queue selector - Write Only */
72#define VIRTIO_MMIO_QUEUE_SEL 0x030
73
74/* Maximum size of the currently selected queue - Read Only */
75#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034
76
77/* Queue size for the currently selected queue - Write Only */
78#define VIRTIO_MMIO_QUEUE_NUM 0x038
79
80/* Used Ring alignment for the currently selected queue - Write Only */
81#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c
82
83/* Guest's PFN for the currently selected queue - Read Write */
84#define VIRTIO_MMIO_QUEUE_PFN 0x040
85
86/* Queue notifier - Write Only */
87#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050
88
89/* Interrupt status - Read Only */
90#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060
91
92/* Interrupt acknowledge - Write Only */
93#define VIRTIO_MMIO_INTERRUPT_ACK 0x064
94
95/* Device status register - Read Write */
96#define VIRTIO_MMIO_STATUS 0x070
97
98/* The config space is defined by each driver as
99 * the per-driver configuration space - Read Write */
100#define VIRTIO_MMIO_CONFIG 0x100
101
102
103
104/*
105 * Interrupt flags (re: interrupt status & acknowledge registers)
106 */
107
108#define VIRTIO_MMIO_INT_VRING (1 << 0)
109#define VIRTIO_MMIO_INT_CONFIG (1 << 1)
110
111#endif
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 4a32cb6da425..36be0f6e18a9 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -135,13 +135,13 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p,
135 vr->num = num; 135 vr->num = num;
136 vr->desc = p; 136 vr->desc = p;
137 vr->avail = p + num*sizeof(struct vring_desc); 137 vr->avail = p + num*sizeof(struct vring_desc);
138 vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1) 138 vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__u16)
139 & ~(align - 1)); 139 + align-1) & ~(align - 1));
140} 140}
141 141
142static inline unsigned vring_size(unsigned int num, unsigned long align) 142static inline unsigned vring_size(unsigned int num, unsigned long align)
143{ 143{
144 return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) 144 return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
145 + align - 1) & ~(align - 1)) 145 + align - 1) & ~(align - 1))
146 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; 146 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
147} 147}
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b50a54736242..748ff7cbe555 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -9,9 +9,12 @@
9 9
10struct ext4_allocation_context; 10struct ext4_allocation_context;
11struct ext4_allocation_request; 11struct ext4_allocation_request;
12struct ext4_extent;
12struct ext4_prealloc_space; 13struct ext4_prealloc_space;
13struct ext4_inode_info; 14struct ext4_inode_info;
14struct mpage_da_data; 15struct mpage_da_data;
16struct ext4_map_blocks;
17struct ext4_extent;
15 18
16#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) 19#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
17 20
@@ -1032,9 +1035,9 @@ TRACE_EVENT(ext4_forget,
1032); 1035);
1033 1036
1034TRACE_EVENT(ext4_da_update_reserve_space, 1037TRACE_EVENT(ext4_da_update_reserve_space,
1035 TP_PROTO(struct inode *inode, int used_blocks), 1038 TP_PROTO(struct inode *inode, int used_blocks, int quota_claim),
1036 1039
1037 TP_ARGS(inode, used_blocks), 1040 TP_ARGS(inode, used_blocks, quota_claim),
1038 1041
1039 TP_STRUCT__entry( 1042 TP_STRUCT__entry(
1040 __field( dev_t, dev ) 1043 __field( dev_t, dev )
@@ -1045,6 +1048,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
1045 __field( int, reserved_data_blocks ) 1048 __field( int, reserved_data_blocks )
1046 __field( int, reserved_meta_blocks ) 1049 __field( int, reserved_meta_blocks )
1047 __field( int, allocated_meta_blocks ) 1050 __field( int, allocated_meta_blocks )
1051 __field( int, quota_claim )
1048 ), 1052 ),
1049 1053
1050 TP_fast_assign( 1054 TP_fast_assign(
@@ -1053,19 +1057,24 @@ TRACE_EVENT(ext4_da_update_reserve_space,
1053 __entry->mode = inode->i_mode; 1057 __entry->mode = inode->i_mode;
1054 __entry->i_blocks = inode->i_blocks; 1058 __entry->i_blocks = inode->i_blocks;
1055 __entry->used_blocks = used_blocks; 1059 __entry->used_blocks = used_blocks;
1056 __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; 1060 __entry->reserved_data_blocks =
1057 __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; 1061 EXT4_I(inode)->i_reserved_data_blocks;
1058 __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; 1062 __entry->reserved_meta_blocks =
1063 EXT4_I(inode)->i_reserved_meta_blocks;
1064 __entry->allocated_meta_blocks =
1065 EXT4_I(inode)->i_allocated_meta_blocks;
1066 __entry->quota_claim = quota_claim;
1059 ), 1067 ),
1060 1068
1061 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " 1069 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
1062 "reserved_data_blocks %d reserved_meta_blocks %d " 1070 "reserved_data_blocks %d reserved_meta_blocks %d "
1063 "allocated_meta_blocks %d", 1071 "allocated_meta_blocks %d quota_claim %d",
1064 MAJOR(__entry->dev), MINOR(__entry->dev), 1072 MAJOR(__entry->dev), MINOR(__entry->dev),
1065 (unsigned long) __entry->ino, 1073 (unsigned long) __entry->ino,
1066 __entry->mode, __entry->i_blocks, 1074 __entry->mode, __entry->i_blocks,
1067 __entry->used_blocks, __entry->reserved_data_blocks, 1075 __entry->used_blocks, __entry->reserved_data_blocks,
1068 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) 1076 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks,
1077 __entry->quota_claim)
1069); 1078);
1070 1079
1071TRACE_EVENT(ext4_da_reserve_space, 1080TRACE_EVENT(ext4_da_reserve_space,
@@ -1386,6 +1395,87 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,
1386 TP_ARGS(inode) 1395 TP_ARGS(inode)
1387); 1396);
1388 1397
1398/* 'ux' is the uninitialized extent. */
1399TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
1400 TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
1401 struct ext4_extent *ux),
1402
1403 TP_ARGS(inode, map, ux),
1404
1405 TP_STRUCT__entry(
1406 __field( ino_t, ino )
1407 __field( dev_t, dev )
1408 __field( ext4_lblk_t, m_lblk )
1409 __field( unsigned, m_len )
1410 __field( ext4_lblk_t, u_lblk )
1411 __field( unsigned, u_len )
1412 __field( ext4_fsblk_t, u_pblk )
1413 ),
1414
1415 TP_fast_assign(
1416 __entry->ino = inode->i_ino;
1417 __entry->dev = inode->i_sb->s_dev;
1418 __entry->m_lblk = map->m_lblk;
1419 __entry->m_len = map->m_len;
1420 __entry->u_lblk = le32_to_cpu(ux->ee_block);
1421 __entry->u_len = ext4_ext_get_actual_len(ux);
1422 __entry->u_pblk = ext4_ext_pblock(ux);
1423 ),
1424
1425 TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u "
1426 "u_pblk %llu",
1427 MAJOR(__entry->dev), MINOR(__entry->dev),
1428 (unsigned long) __entry->ino,
1429 __entry->m_lblk, __entry->m_len,
1430 __entry->u_lblk, __entry->u_len, __entry->u_pblk)
1431);
1432
1433/*
1434 * 'ux' is the uninitialized extent.
1435 * 'ix' is the initialized extent to which blocks are transferred.
1436 */
1437TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
1438 TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
1439 struct ext4_extent *ux, struct ext4_extent *ix),
1440
1441 TP_ARGS(inode, map, ux, ix),
1442
1443 TP_STRUCT__entry(
1444 __field( ino_t, ino )
1445 __field( dev_t, dev )
1446 __field( ext4_lblk_t, m_lblk )
1447 __field( unsigned, m_len )
1448 __field( ext4_lblk_t, u_lblk )
1449 __field( unsigned, u_len )
1450 __field( ext4_fsblk_t, u_pblk )
1451 __field( ext4_lblk_t, i_lblk )
1452 __field( unsigned, i_len )
1453 __field( ext4_fsblk_t, i_pblk )
1454 ),
1455
1456 TP_fast_assign(
1457 __entry->ino = inode->i_ino;
1458 __entry->dev = inode->i_sb->s_dev;
1459 __entry->m_lblk = map->m_lblk;
1460 __entry->m_len = map->m_len;
1461 __entry->u_lblk = le32_to_cpu(ux->ee_block);
1462 __entry->u_len = ext4_ext_get_actual_len(ux);
1463 __entry->u_pblk = ext4_ext_pblock(ux);
1464 __entry->i_lblk = le32_to_cpu(ix->ee_block);
1465 __entry->i_len = ext4_ext_get_actual_len(ix);
1466 __entry->i_pblk = ext4_ext_pblock(ix);
1467 ),
1468
1469 TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u "
1470 "u_lblk %u u_len %u u_pblk %llu "
1471 "i_lblk %u i_len %u i_pblk %llu ",
1472 MAJOR(__entry->dev), MINOR(__entry->dev),
1473 (unsigned long) __entry->ino,
1474 __entry->m_lblk, __entry->m_len,
1475 __entry->u_lblk, __entry->u_len, __entry->u_pblk,
1476 __entry->i_lblk, __entry->i_len, __entry->i_pblk)
1477);
1478
1389DECLARE_EVENT_CLASS(ext4__map_blocks_enter, 1479DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
1390 TP_PROTO(struct inode *inode, ext4_lblk_t lblk, 1480 TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
1391 unsigned int len, unsigned int flags), 1481 unsigned int len, unsigned int flags),
@@ -1589,6 +1679,382 @@ DEFINE_EVENT(ext4__trim, ext4_trim_all_free,
1589 TP_ARGS(sb, group, start, len) 1679 TP_ARGS(sb, group, start, len)
1590); 1680);
1591 1681
1682TRACE_EVENT(ext4_ext_handle_uninitialized_extents,
1683 TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
1684 unsigned int allocated, ext4_fsblk_t newblock),
1685
1686 TP_ARGS(inode, map, allocated, newblock),
1687
1688 TP_STRUCT__entry(
1689 __field( ino_t, ino )
1690 __field( dev_t, dev )
1691 __field( ext4_lblk_t, lblk )
1692 __field( ext4_fsblk_t, pblk )
1693 __field( unsigned int, len )
1694 __field( int, flags )
1695 __field( unsigned int, allocated )
1696 __field( ext4_fsblk_t, newblk )
1697 ),
1698
1699 TP_fast_assign(
1700 __entry->ino = inode->i_ino;
1701 __entry->dev = inode->i_sb->s_dev;
1702 __entry->lblk = map->m_lblk;
1703 __entry->pblk = map->m_pblk;
1704 __entry->len = map->m_len;
1705 __entry->flags = map->m_flags;
1706 __entry->allocated = allocated;
1707 __entry->newblk = newblock;
1708 ),
1709
1710 TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %d"
1711 "allocated %d newblock %llu",
1712 MAJOR(__entry->dev), MINOR(__entry->dev),
1713 (unsigned long) __entry->ino,
1714 (unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
1715 __entry->len, __entry->flags,
1716 (unsigned int) __entry->allocated,
1717 (unsigned long long) __entry->newblk)
1718);
1719
1720TRACE_EVENT(ext4_get_implied_cluster_alloc_exit,
1721 TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret),
1722
1723 TP_ARGS(sb, map, ret),
1724
1725 TP_STRUCT__entry(
1726 __field( dev_t, dev )
1727 __field( ext4_lblk_t, lblk )
1728 __field( ext4_fsblk_t, pblk )
1729 __field( unsigned int, len )
1730 __field( unsigned int, flags )
1731 __field( int, ret )
1732 ),
1733
1734 TP_fast_assign(
1735 __entry->dev = sb->s_dev;
1736 __entry->lblk = map->m_lblk;
1737 __entry->pblk = map->m_pblk;
1738 __entry->len = map->m_len;
1739 __entry->flags = map->m_flags;
1740 __entry->ret = ret;
1741 ),
1742
1743 TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %u ret %d",
1744 MAJOR(__entry->dev), MINOR(__entry->dev),
1745 __entry->lblk, (unsigned long long) __entry->pblk,
1746 __entry->len, __entry->flags, __entry->ret)
1747);
1748
1749TRACE_EVENT(ext4_ext_put_in_cache,
1750 TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len,
1751 ext4_fsblk_t start),
1752
1753 TP_ARGS(inode, lblk, len, start),
1754
1755 TP_STRUCT__entry(
1756 __field( ino_t, ino )
1757 __field( dev_t, dev )
1758 __field( ext4_lblk_t, lblk )
1759 __field( unsigned int, len )
1760 __field( ext4_fsblk_t, start )
1761 ),
1762
1763 TP_fast_assign(
1764 __entry->ino = inode->i_ino;
1765 __entry->dev = inode->i_sb->s_dev;
1766 __entry->lblk = lblk;
1767 __entry->len = len;
1768 __entry->start = start;
1769 ),
1770
1771 TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu",
1772 MAJOR(__entry->dev), MINOR(__entry->dev),
1773 (unsigned long) __entry->ino,
1774 (unsigned) __entry->lblk,
1775 __entry->len,
1776 (unsigned long long) __entry->start)
1777);
1778
1779TRACE_EVENT(ext4_ext_in_cache,
1780 TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret),
1781
1782 TP_ARGS(inode, lblk, ret),
1783
1784 TP_STRUCT__entry(
1785 __field( ino_t, ino )
1786 __field( dev_t, dev )
1787 __field( ext4_lblk_t, lblk )
1788 __field( int, ret )
1789 ),
1790
1791 TP_fast_assign(
1792 __entry->ino = inode->i_ino;
1793 __entry->dev = inode->i_sb->s_dev;
1794 __entry->lblk = lblk;
1795 __entry->ret = ret;
1796 ),
1797
1798 TP_printk("dev %d,%d ino %lu lblk %u ret %d",
1799 MAJOR(__entry->dev), MINOR(__entry->dev),
1800 (unsigned long) __entry->ino,
1801 (unsigned) __entry->lblk,
1802 __entry->ret)
1803
1804);
1805
1806TRACE_EVENT(ext4_find_delalloc_range,
1807 TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to,
1808 int reverse, int found, ext4_lblk_t found_blk),
1809
1810 TP_ARGS(inode, from, to, reverse, found, found_blk),
1811
1812 TP_STRUCT__entry(
1813 __field( ino_t, ino )
1814 __field( dev_t, dev )
1815 __field( ext4_lblk_t, from )
1816 __field( ext4_lblk_t, to )
1817 __field( int, reverse )
1818 __field( int, found )
1819 __field( ext4_lblk_t, found_blk )
1820 ),
1821
1822 TP_fast_assign(
1823 __entry->ino = inode->i_ino;
1824 __entry->dev = inode->i_sb->s_dev;
1825 __entry->from = from;
1826 __entry->to = to;
1827 __entry->reverse = reverse;
1828 __entry->found = found;
1829 __entry->found_blk = found_blk;
1830 ),
1831
1832 TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d "
1833 "(blk = %u)",
1834 MAJOR(__entry->dev), MINOR(__entry->dev),
1835 (unsigned long) __entry->ino,
1836 (unsigned) __entry->from, (unsigned) __entry->to,
1837 __entry->reverse, __entry->found,
1838 (unsigned) __entry->found_blk)
1839);
1840
1841TRACE_EVENT(ext4_get_reserved_cluster_alloc,
1842 TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len),
1843
1844 TP_ARGS(inode, lblk, len),
1845
1846 TP_STRUCT__entry(
1847 __field( ino_t, ino )
1848 __field( dev_t, dev )
1849 __field( ext4_lblk_t, lblk )
1850 __field( unsigned int, len )
1851 ),
1852
1853 TP_fast_assign(
1854 __entry->ino = inode->i_ino;
1855 __entry->dev = inode->i_sb->s_dev;
1856 __entry->lblk = lblk;
1857 __entry->len = len;
1858 ),
1859
1860 TP_printk("dev %d,%d ino %lu lblk %u len %u",
1861 MAJOR(__entry->dev), MINOR(__entry->dev),
1862 (unsigned long) __entry->ino,
1863 (unsigned) __entry->lblk,
1864 __entry->len)
1865);
1866
1867TRACE_EVENT(ext4_ext_show_extent,
1868 TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
1869 unsigned short len),
1870
1871 TP_ARGS(inode, lblk, pblk, len),
1872
1873 TP_STRUCT__entry(
1874 __field( ino_t, ino )
1875 __field( dev_t, dev )
1876 __field( ext4_lblk_t, lblk )
1877 __field( ext4_fsblk_t, pblk )
1878 __field( unsigned short, len )
1879 ),
1880
1881 TP_fast_assign(
1882 __entry->ino = inode->i_ino;
1883 __entry->dev = inode->i_sb->s_dev;
1884 __entry->lblk = lblk;
1885 __entry->pblk = pblk;
1886 __entry->len = len;
1887 ),
1888
1889 TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u",
1890 MAJOR(__entry->dev), MINOR(__entry->dev),
1891 (unsigned long) __entry->ino,
1892 (unsigned) __entry->lblk,
1893 (unsigned long long) __entry->pblk,
1894 (unsigned short) __entry->len)
1895);
1896
1897TRACE_EVENT(ext4_remove_blocks,
1898 TP_PROTO(struct inode *inode, struct ext4_extent *ex,
1899 ext4_lblk_t from, ext4_fsblk_t to,
1900 ext4_fsblk_t partial_cluster),
1901
1902 TP_ARGS(inode, ex, from, to, partial_cluster),
1903
1904 TP_STRUCT__entry(
1905 __field( ino_t, ino )
1906 __field( dev_t, dev )
1907 __field( ext4_lblk_t, ee_lblk )
1908 __field( ext4_fsblk_t, ee_pblk )
1909 __field( unsigned short, ee_len )
1910 __field( ext4_lblk_t, from )
1911 __field( ext4_lblk_t, to )
1912 __field( ext4_fsblk_t, partial )
1913 ),
1914
1915 TP_fast_assign(
1916 __entry->ino = inode->i_ino;
1917 __entry->dev = inode->i_sb->s_dev;
1918 __entry->ee_lblk = cpu_to_le32(ex->ee_block);
1919 __entry->ee_pblk = ext4_ext_pblock(ex);
1920 __entry->ee_len = ext4_ext_get_actual_len(ex);
1921 __entry->from = from;
1922 __entry->to = to;
1923 __entry->partial = partial_cluster;
1924 ),
1925
1926 TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
1927 "from %u to %u partial_cluster %u",
1928 MAJOR(__entry->dev), MINOR(__entry->dev),
1929 (unsigned long) __entry->ino,
1930 (unsigned) __entry->ee_lblk,
1931 (unsigned long long) __entry->ee_pblk,
1932 (unsigned short) __entry->ee_len,
1933 (unsigned) __entry->from,
1934 (unsigned) __entry->to,
1935 (unsigned) __entry->partial)
1936);
1937
1938TRACE_EVENT(ext4_ext_rm_leaf,
1939 TP_PROTO(struct inode *inode, ext4_lblk_t start,
1940 struct ext4_extent *ex, ext4_fsblk_t partial_cluster),
1941
1942 TP_ARGS(inode, start, ex, partial_cluster),
1943
1944 TP_STRUCT__entry(
1945 __field( ino_t, ino )
1946 __field( dev_t, dev )
1947 __field( ext4_lblk_t, start )
1948 __field( ext4_lblk_t, ee_lblk )
1949 __field( ext4_fsblk_t, ee_pblk )
1950 __field( short, ee_len )
1951 __field( ext4_fsblk_t, partial )
1952 ),
1953
1954 TP_fast_assign(
1955 __entry->ino = inode->i_ino;
1956 __entry->dev = inode->i_sb->s_dev;
1957 __entry->start = start;
1958 __entry->ee_lblk = le32_to_cpu(ex->ee_block);
1959 __entry->ee_pblk = ext4_ext_pblock(ex);
1960 __entry->ee_len = ext4_ext_get_actual_len(ex);
1961 __entry->partial = partial_cluster;
1962 ),
1963
1964 TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
1965 "partial_cluster %u",
1966 MAJOR(__entry->dev), MINOR(__entry->dev),
1967 (unsigned long) __entry->ino,
1968 (unsigned) __entry->start,
1969 (unsigned) __entry->ee_lblk,
1970 (unsigned long long) __entry->ee_pblk,
1971 (unsigned short) __entry->ee_len,
1972 (unsigned) __entry->partial)
1973);
1974
1975TRACE_EVENT(ext4_ext_rm_idx,
1976 TP_PROTO(struct inode *inode, ext4_fsblk_t pblk),
1977
1978 TP_ARGS(inode, pblk),
1979
1980 TP_STRUCT__entry(
1981 __field( ino_t, ino )
1982 __field( dev_t, dev )
1983 __field( ext4_fsblk_t, pblk )
1984 ),
1985
1986 TP_fast_assign(
1987 __entry->ino = inode->i_ino;
1988 __entry->dev = inode->i_sb->s_dev;
1989 __entry->pblk = pblk;
1990 ),
1991
1992 TP_printk("dev %d,%d ino %lu index_pblk %llu",
1993 MAJOR(__entry->dev), MINOR(__entry->dev),
1994 (unsigned long) __entry->ino,
1995 (unsigned long long) __entry->pblk)
1996);
1997
1998TRACE_EVENT(ext4_ext_remove_space,
1999 TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth),
2000
2001 TP_ARGS(inode, start, depth),
2002
2003 TP_STRUCT__entry(
2004 __field( ino_t, ino )
2005 __field( dev_t, dev )
2006 __field( ext4_lblk_t, start )
2007 __field( int, depth )
2008 ),
2009
2010 TP_fast_assign(
2011 __entry->ino = inode->i_ino;
2012 __entry->dev = inode->i_sb->s_dev;
2013 __entry->start = start;
2014 __entry->depth = depth;
2015 ),
2016
2017 TP_printk("dev %d,%d ino %lu since %u depth %d",
2018 MAJOR(__entry->dev), MINOR(__entry->dev),
2019 (unsigned long) __entry->ino,
2020 (unsigned) __entry->start,
2021 __entry->depth)
2022);
2023
2024TRACE_EVENT(ext4_ext_remove_space_done,
2025 TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth,
2026 ext4_lblk_t partial, unsigned short eh_entries),
2027
2028 TP_ARGS(inode, start, depth, partial, eh_entries),
2029
2030 TP_STRUCT__entry(
2031 __field( ino_t, ino )
2032 __field( dev_t, dev )
2033 __field( ext4_lblk_t, start )
2034 __field( int, depth )
2035 __field( ext4_lblk_t, partial )
2036 __field( unsigned short, eh_entries )
2037 ),
2038
2039 TP_fast_assign(
2040 __entry->ino = inode->i_ino;
2041 __entry->dev = inode->i_sb->s_dev;
2042 __entry->start = start;
2043 __entry->depth = depth;
2044 __entry->partial = partial;
2045 __entry->eh_entries = eh_entries;
2046 ),
2047
2048 TP_printk("dev %d,%d ino %lu since %u depth %d partial %u "
2049 "remaining_entries %u",
2050 MAJOR(__entry->dev), MINOR(__entry->dev),
2051 (unsigned long) __entry->ino,
2052 (unsigned) __entry->start,
2053 __entry->depth,
2054 (unsigned) __entry->partial,
2055 (unsigned short) __entry->eh_entries)
2056);
2057
1592#endif /* _TRACE_EXT4_H */ 2058#endif /* _TRACE_EXT4_H */
1593 2059
1594/* This part must be outside protection */ 2060/* This part must be outside protection */
diff --git a/mm/shmem.c b/mm/shmem.c
index fa4fa6ce13bc..45b9acb575f9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2503,7 +2503,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
2503 2503
2504 d_instantiate(path.dentry, inode); 2504 d_instantiate(path.dentry, inode);
2505 inode->i_size = size; 2505 inode->i_size = size;
2506 inode->i_nlink = 0; /* It is unlinked */ 2506 clear_nlink(inode); /* It is unlinked */
2507#ifndef CONFIG_MMU 2507#ifndef CONFIG_MMU
2508 error = ramfs_nommu_expand_for_mapping(inode, size); 2508 error = ramfs_nommu_expand_for_mapping(inode, size);
2509 if (error) 2509 if (error)
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 19c053b82303..4f554f20dc97 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -9,7 +9,7 @@ config IMA
9 select CRYPTO_HMAC 9 select CRYPTO_HMAC
10 select CRYPTO_MD5 10 select CRYPTO_MD5
11 select CRYPTO_SHA1 11 select CRYPTO_SHA1
12 select TCG_TPM if !S390 12 select TCG_TPM if !S390 && !UML
13 select TCG_TIS if TCG_TPM 13 select TCG_TIS if TCG_TPM
14 help 14 help
15 The Trusted Computing Group(TCG) runtime Integrity 15 The Trusted Computing Group(TCG) runtime Integrity
diff --git a/sound/Kconfig b/sound/Kconfig
index 1fef141ef8e7..261a03c8a209 100644
--- a/sound/Kconfig
+++ b/sound/Kconfig
@@ -59,7 +59,7 @@ config SOUND_OSS_CORE_PRECLAIM
59 59
60source "sound/oss/dmasound/Kconfig" 60source "sound/oss/dmasound/Kconfig"
61 61
62if !M68K 62if !M68K && !UML
63 63
64menuconfig SND 64menuconfig SND
65 tristate "Advanced Linux Sound Architecture" 65 tristate "Advanced Linux Sound Architecture"