aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm/tcm.txt145
-rw-r--r--Documentation/auxdisplay/cfag12864b-example.c1
-rw-r--r--Documentation/cgroups/cgroups.txt32
-rw-r--r--Documentation/cgroups/memory.txt41
-rw-r--r--Documentation/crypto/async-tx-api.txt75
-rw-r--r--Documentation/filesystems/sharedsubtree.txt220
-rw-r--r--Documentation/filesystems/vfs.txt7
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/sysctl/fs.txt17
-rw-r--r--Documentation/sysctl/kernel.txt22
-rw-r--r--Documentation/sysctl/vm.txt41
-rw-r--r--Documentation/vm/.gitignore1
-rw-r--r--Documentation/vm/locking2
-rw-r--r--Documentation/vm/page-types.c200
-rw-r--r--MAINTAINERS33
-rw-r--r--arch/alpha/include/asm/fcntl.h2
-rw-r--r--arch/alpha/kernel/core_marvel.c2
-rw-r--r--arch/alpha/kernel/core_titan.c2
-rw-r--r--arch/alpha/kernel/init_task.c5
-rw-r--r--arch/alpha/kernel/pci_impl.h2
-rw-r--r--arch/alpha/kernel/pci_iommu.c4
-rw-r--r--arch/alpha/kernel/vmlinux.lds.S95
-rw-r--r--arch/arm/Kconfig5
-rw-r--r--arch/arm/common/locomo.c17
-rw-r--r--arch/arm/common/vic.c1
-rw-r--r--arch/arm/configs/littleton_defconfig783
-rw-r--r--arch/arm/configs/pxa3xx_defconfig1332
-rw-r--r--arch/arm/configs/xcep_defconfig1129
-rw-r--r--arch/arm/configs/zylonite_defconfig736
-rw-r--r--arch/arm/include/asm/atomic.h26
-rw-r--r--arch/arm/include/asm/cache.h2
-rw-r--r--arch/arm/include/asm/cputype.h10
-rw-r--r--arch/arm/include/asm/hardware/iop3xx-adma.h81
-rw-r--r--arch/arm/include/asm/hardware/iop_adma.h3
-rw-r--r--arch/arm/include/asm/tcm.h31
-rw-r--r--arch/arm/include/asm/unified.h4
-rw-r--r--arch/arm/kernel/Makefile1
-rw-r--r--arch/arm/kernel/entry-armv.S19
-rw-r--r--arch/arm/kernel/entry-header.S14
-rw-r--r--arch/arm/kernel/kprobes.c19
-rw-r--r--arch/arm/kernel/setup.c2
-rw-r--r--arch/arm/kernel/tcm.c246
-rw-r--r--arch/arm/kernel/tcm.h17
-rw-r--r--arch/arm/kernel/vmlinux.lds.S57
-rw-r--r--arch/arm/lib/copy_page.S16
-rw-r--r--arch/arm/mach-at91/at91cap9_devices.c10
-rw-r--r--arch/arm/mach-at91/at91sam9g45_devices.c104
-rw-r--r--arch/arm/mach-at91/at91sam9rl_devices.c102
-rw-r--r--arch/arm/mach-at91/board-cap9adk.c2
-rw-r--r--arch/arm/mach-at91/board-neocore926.c2
-rw-r--r--arch/arm/mach-at91/board-sam9m10g45ek.c10
-rw-r--r--arch/arm/mach-at91/board-sam9rlek.c10
-rw-r--r--arch/arm/mach-integrator/integrator_cp.c4
-rw-r--r--arch/arm/mach-iop13xx/include/mach/adma.h119
-rw-r--r--arch/arm/mach-iop13xx/setup.c17
-rw-r--r--arch/arm/mach-pxa/Kconfig36
-rw-r--r--arch/arm/mach-pxa/Makefile4
-rw-r--r--arch/arm/mach-pxa/balloon3.c361
-rw-r--r--arch/arm/mach-pxa/clock.h1
-rw-r--r--arch/arm/mach-pxa/cm-x270.c146
-rw-r--r--arch/arm/mach-pxa/cm-x300.c71
-rw-r--r--arch/arm/mach-pxa/colibri-pxa300.c1
-rw-r--r--arch/arm/mach-pxa/colibri-pxa320.c33
-rw-r--r--arch/arm/mach-pxa/colibri-pxa3xx.c52
-rw-r--r--arch/arm/mach-pxa/corgi.c233
-rw-r--r--arch/arm/mach-pxa/csb726.c54
-rw-r--r--arch/arm/mach-pxa/devices.c27
-rw-r--r--arch/arm/mach-pxa/devices.h2
-rw-r--r--arch/arm/mach-pxa/e740.c1
-rw-r--r--arch/arm/mach-pxa/e750.c1
-rw-r--r--arch/arm/mach-pxa/em-x270.c45
-rw-r--r--arch/arm/mach-pxa/eseries.c39
-rw-r--r--arch/arm/mach-pxa/gumstix.c5
-rw-r--r--arch/arm/mach-pxa/hx4700.c65
-rw-r--r--arch/arm/mach-pxa/idp.c5
-rw-r--r--arch/arm/mach-pxa/imote2.c3
-rw-r--r--arch/arm/mach-pxa/include/mach/balloon3.h134
-rw-r--r--arch/arm/mach-pxa/include/mach/colibri.h6
-rw-r--r--arch/arm/mach-pxa/include/mach/entry-macro.S25
-rw-r--r--arch/arm/mach-pxa/include/mach/hardware.h17
-rw-r--r--arch/arm/mach-pxa/include/mach/irda.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/irqs.h42
-rw-r--r--arch/arm/mach-pxa/include/mach/mfp.h301
-rw-r--r--arch/arm/mach-pxa/include/mach/mmc.h5
-rw-r--r--arch/arm/mach-pxa/include/mach/palmtc.h86
-rw-r--r--arch/arm/mach-pxa/include/mach/palmtx.h5
-rw-r--r--arch/arm/mach-pxa/include/mach/pxa3xx-regs.h4
-rw-r--r--arch/arm/mach-pxa/include/mach/pxafb.h3
-rw-r--r--arch/arm/mach-pxa/include/mach/regs-intc.h11
-rw-r--r--arch/arm/mach-pxa/include/mach/uncompress.h2
-rw-r--r--arch/arm/mach-pxa/irq.c8
-rw-r--r--arch/arm/mach-pxa/littleton.c43
-rw-r--r--arch/arm/mach-pxa/lubbock.c18
-rw-r--r--arch/arm/mach-pxa/magician.c59
-rw-r--r--arch/arm/mach-pxa/mainstone.c16
-rw-r--r--arch/arm/mach-pxa/mioa701.c84
-rw-r--r--arch/arm/mach-pxa/palmld.c143
-rw-r--r--arch/arm/mach-pxa/palmt5.c111
-rw-r--r--arch/arm/mach-pxa/palmtc.c436
-rw-r--r--arch/arm/mach-pxa/palmte2.c110
-rw-r--r--arch/arm/mach-pxa/palmtx.c225
-rw-r--r--arch/arm/mach-pxa/palmz72.c116
-rw-r--r--arch/arm/mach-pxa/pcm990-baseboard.c67
-rw-r--r--arch/arm/mach-pxa/poodle.c80
-rw-r--r--arch/arm/mach-pxa/pxa2xx.c1
-rw-r--r--arch/arm/mach-pxa/pxa300.c2
-rw-r--r--arch/arm/mach-pxa/pxa320.c2
-rw-r--r--arch/arm/mach-pxa/pxa930.c19
-rw-r--r--arch/arm/mach-pxa/spitz.c238
-rw-r--r--arch/arm/mach-pxa/tosa.c91
-rw-r--r--arch/arm/mach-pxa/treo680.c159
-rw-r--r--arch/arm/mach-pxa/trizeps4.c4
-rw-r--r--arch/arm/mach-pxa/xcep.c187
-rw-r--r--arch/arm/mach-pxa/zylonite.c3
-rw-r--r--arch/arm/mach-realview/core.c24
-rw-r--r--arch/arm/mach-realview/core.h4
-rw-r--r--arch/arm/mach-realview/realview_eb.c40
-rw-r--r--arch/arm/mach-realview/realview_pb1176.c40
-rw-r--r--arch/arm/mach-realview/realview_pb11mp.c40
-rw-r--r--arch/arm/mach-realview/realview_pba8.c40
-rw-r--r--arch/arm/mach-realview/realview_pbx.c40
-rw-r--r--arch/arm/mach-s3c2410/Kconfig5
-rw-r--r--arch/arm/mach-s3c2412/Kconfig3
-rw-r--r--arch/arm/mach-s3c2440/Kconfig6
-rw-r--r--arch/arm/mach-s3c6400/Kconfig1
-rw-r--r--arch/arm/mach-s3c6410/Kconfig1
-rw-r--r--arch/arm/mach-sa1100/dma.c2
-rw-r--r--arch/arm/mach-u300/Kconfig12
-rw-r--r--arch/arm/mach-u300/Makefile3
-rw-r--r--arch/arm/mach-u300/core.c14
-rw-r--r--arch/arm/mach-u300/dummyspichip.c290
-rw-r--r--arch/arm/mach-u300/gpio.c13
-rw-r--r--arch/arm/mach-u300/i2c.c43
-rw-r--r--arch/arm/mach-u300/i2c.h23
-rw-r--r--arch/arm/mach-u300/include/mach/memory.h8
-rw-r--r--arch/arm/mach-u300/include/mach/syscon.h120
-rw-r--r--arch/arm/mach-u300/mmc.c22
-rw-r--r--arch/arm/mach-u300/padmux.c395
-rw-r--r--arch/arm/mach-u300/padmux.h28
-rw-r--r--arch/arm/mach-u300/spi.c124
-rw-r--r--arch/arm/mach-u300/spi.h26
-rw-r--r--arch/arm/mach-u300/timer.c15
-rw-r--r--arch/arm/mach-versatile/core.c4
-rw-r--r--arch/arm/mach-versatile/versatile_pb.c4
-rw-r--r--arch/arm/mm/Kconfig5
-rw-r--r--arch/arm/mm/fault.c110
-rw-r--r--arch/arm/mm/init.c8
-rw-r--r--arch/arm/plat-iop/adma.c4
-rw-r--r--arch/arm/plat-pxa/dma.c281
-rw-r--r--arch/arm/plat-pxa/include/plat/mfp.h73
-rw-r--r--arch/arm/plat-pxa/mfp.c3
-rw-r--r--arch/arm/plat-s3c/gpio.c2
-rw-r--r--arch/arm/plat-s3c64xx/dma.c6
-rw-r--r--arch/arm/plat-s3c64xx/include/plat/dma-plat.h2
-rw-r--r--arch/arm/plat-s3c64xx/include/plat/irqs.h10
-rw-r--r--arch/arm/plat-s3c64xx/s3c6400-clock.c8
-rw-r--r--arch/arm/plat-stmp3xxx/dma.c2
-rw-r--r--arch/arm/tools/mach-types20
-rw-r--r--arch/blackfin/kernel/vmlinux.lds.S56
-rw-r--r--arch/cris/arch-v10/kernel/time.c1
-rw-r--r--arch/cris/arch-v32/kernel/smp.c2
-rw-r--r--arch/cris/arch-v32/kernel/time.c1
-rw-r--r--arch/cris/arch-v32/mach-a3/io.c2
-rw-r--r--arch/cris/arch-v32/mach-fs/io.c2
-rw-r--r--arch/cris/include/arch-v10/arch/mmu.h9
-rw-r--r--arch/cris/include/arch-v32/arch/mmu.h10
-rw-r--r--arch/cris/include/asm/hardirq.h12
-rw-r--r--arch/cris/include/asm/pgtable.h2
-rw-r--r--arch/cris/kernel/irq.c5
-rw-r--r--arch/cris/kernel/vmlinux.lds.S37
-rw-r--r--arch/frv/kernel/pm.c14
-rw-r--r--arch/frv/mb93090-mb00/Makefile2
-rw-r--r--arch/frv/mb93090-mb00/flash.c90
-rw-r--r--arch/h8300/kernel/vmlinux.lds.S25
-rw-r--r--arch/m32r/include/asm/page.h4
-rw-r--r--arch/m32r/include/asm/processor.h2
-rw-r--r--arch/m32r/include/asm/thread_info.h15
-rw-r--r--arch/m32r/kernel/entry.S7
-rw-r--r--arch/m32r/kernel/head.S4
-rw-r--r--arch/m32r/kernel/vmlinux.lds.S78
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/Makefile31
-rw-r--r--arch/microblaze/boot/Makefile41
l---------arch/microblaze/boot/dts/system.dts1
-rw-r--r--arch/microblaze/boot/linked_dtb.S3
-rw-r--r--arch/microblaze/configs/mmu_defconfig30
-rw-r--r--arch/microblaze/configs/nommu_defconfig35
-rw-r--r--arch/microblaze/include/asm/asm-compat.h17
-rw-r--r--arch/microblaze/include/asm/io.h3
-rw-r--r--arch/microblaze/include/asm/ipc.h1
-rw-r--r--arch/microblaze/include/asm/page.h3
-rw-r--r--arch/microblaze/include/asm/setup.h2
-rw-r--r--arch/microblaze/include/asm/syscall.h99
-rw-r--r--arch/microblaze/kernel/cpu/cpuinfo.c3
-rw-r--r--arch/microblaze/kernel/entry.S72
-rw-r--r--arch/microblaze/kernel/exceptions.c33
-rw-r--r--arch/microblaze/kernel/head.S14
-rw-r--r--arch/microblaze/kernel/hw_exception_handler.S10
-rw-r--r--arch/microblaze/kernel/process.c1
-rw-r--r--arch/microblaze/kernel/ptrace.c62
-rw-r--r--arch/microblaze/kernel/setup.c12
-rw-r--r--arch/microblaze/kernel/vmlinux.lds.S72
-rw-r--r--arch/microblaze/mm/init.c3
-rw-r--r--arch/mips/lasat/sysctl.c18
-rw-r--r--arch/mn10300/kernel/vmlinux.lds.S40
-rw-r--r--arch/parisc/include/asm/fcntl.h2
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S104
-rw-r--r--arch/powerpc/Kconfig6
-rw-r--r--arch/powerpc/Makefile11
-rw-r--r--arch/powerpc/include/asm/device.h11
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h27
-rw-r--r--arch/powerpc/include/asm/fsldma.h136
-rw-r--r--arch/powerpc/include/asm/iommu.h10
-rw-r--r--arch/powerpc/include/asm/pmc.h2
-rw-r--r--arch/powerpc/include/asm/pte-40x.h1
-rw-r--r--arch/powerpc/include/asm/pte-8xx.h1
-rw-r--r--arch/powerpc/include/asm/pte-common.h5
-rw-r--r--arch/powerpc/kernel/dma-iommu.c16
-rw-r--r--arch/powerpc/kernel/dma.c15
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S1
-rw-r--r--arch/powerpc/kernel/pci-common.c2
-rw-r--r--arch/powerpc/kernel/process.c17
-rw-r--r--arch/powerpc/kernel/prom_init.c3
-rw-r--r--arch/powerpc/kernel/vdso.c14
-rw-r--r--arch/powerpc/kernel/vio.c4
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S69
-rw-r--r--arch/powerpc/mm/pgtable.c19
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S1
-rw-r--r--arch/powerpc/platforms/cell/beat_iommu.c2
-rw-r--r--arch/powerpc/platforms/cell/iommu.c9
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c2
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c8
-rwxr-xr-xarch/powerpc/relocs_check.pl56
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c2
-rw-r--r--arch/powerpc/xmon/xmon.c16
-rw-r--r--arch/s390/appldata/appldata_base.c9
-rw-r--r--arch/s390/kernel/debug.c4
-rw-r--r--arch/s390/mm/cmm.c4
-rw-r--r--arch/sh/drivers/dma/Kconfig12
-rw-r--r--arch/sh/drivers/dma/Makefile3
-rw-r--r--arch/sh/include/asm/dma-sh.h13
-rw-r--r--arch/um/include/asm/common.lds.S29
-rw-r--r--arch/um/kernel/dyn.lds.S9
-rw-r--r--arch/um/kernel/uml.lds.S26
-rw-r--r--arch/x86/include/asm/nmi.h3
-rw-r--r--arch/x86/include/asm/pci.h6
-rw-r--r--arch/x86/kernel/apic/nmi.c4
-rw-r--r--arch/x86/kernel/vsyscall_64.c10
-rw-r--r--arch/x86/mm/fault.c19
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/xtensa/kernel/vmlinux.lds.S75
-rw-r--r--crypto/async_tx/Kconfig9
-rw-r--r--crypto/async_tx/Makefile3
-rw-r--r--crypto/async_tx/async_memcpy.c44
-rw-r--r--crypto/async_tx/async_memset.c43
-rw-r--r--crypto/async_tx/async_pq.c395
-rw-r--r--crypto/async_tx/async_raid6_recov.c468
-rw-r--r--crypto/async_tx/async_tx.c87
-rw-r--r--crypto/async_tx/async_xor.c207
-rw-r--r--crypto/async_tx/raid6test.c240
-rw-r--r--drivers/acpi/button.c45
-rw-r--r--drivers/cdrom/cdrom.c8
-rw-r--r--drivers/char/Kconfig8
-rw-r--r--drivers/char/Makefile1
-rw-r--r--drivers/char/agp/intel-agp.c37
-rw-r--r--drivers/char/bfin-otp.c173
-rw-r--r--drivers/char/hpet.c21
-rw-r--r--drivers/char/hvc_console.c6
-rw-r--r--drivers/char/hvc_console.h12
-rw-r--r--drivers/char/hvc_iucv.c4
-rw-r--r--drivers/char/mem.c2
-rw-r--r--drivers/char/mwave/mwavedd.c22
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/char/rio/rioctrl.c2
-rw-r--r--drivers/char/uv_mmtimer.c216
-rw-r--r--drivers/dca/dca-core.c124
-rw-r--r--drivers/dma/Kconfig14
-rw-r--r--drivers/dma/Makefile4
-rw-r--r--drivers/dma/at_hdmac.c60
-rw-r--r--drivers/dma/at_hdmac_regs.h1
-rw-r--r--drivers/dma/dmaengine.c94
-rw-r--r--drivers/dma/dmatest.c40
-rw-r--r--drivers/dma/dw_dmac.c50
-rw-r--r--drivers/dma/dw_dmac_regs.h1
-rw-r--r--drivers/dma/fsldma.c288
-rw-r--r--drivers/dma/fsldma.h4
-rw-r--r--drivers/dma/ioat.c202
-rw-r--r--drivers/dma/ioat/Makefile2
-rw-r--r--drivers/dma/ioat/dca.c (renamed from drivers/dma/ioat_dca.c)13
-rw-r--r--drivers/dma/ioat/dma.c1238
-rw-r--r--drivers/dma/ioat/dma.h337
-rw-r--r--drivers/dma/ioat/dma_v2.c871
-rw-r--r--drivers/dma/ioat/dma_v2.h190
-rw-r--r--drivers/dma/ioat/dma_v3.c1223
-rw-r--r--drivers/dma/ioat/hw.h215
-rw-r--r--drivers/dma/ioat/pci.c210
-rw-r--r--drivers/dma/ioat/registers.h (renamed from drivers/dma/ioatdma_registers.h)54
-rw-r--r--drivers/dma/ioat_dma.c1741
-rw-r--r--drivers/dma/ioatdma.h165
-rw-r--r--drivers/dma/ioatdma_hw.h70
-rw-r--r--drivers/dma/iop-adma.c491
-rw-r--r--drivers/dma/iovlock.c10
-rw-r--r--drivers/dma/mv_xor.c7
-rw-r--r--drivers/dma/mv_xor.h4
-rw-r--r--drivers/dma/shdma.c786
-rw-r--r--drivers/dma/shdma.h64
-rw-r--r--drivers/dma/txx9dmac.c24
-rw-r--r--drivers/dma/txx9dmac.h1
-rw-r--r--drivers/edac/Kconfig13
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/cpc925_edac.c6
-rw-r--r--drivers/edac/edac_device.c5
-rw-r--r--drivers/edac/edac_mc.c4
-rw-r--r--drivers/edac/edac_pci.c4
-rw-r--r--drivers/edac/i3200_edac.c527
-rw-r--r--drivers/edac/mpc85xx_edac.c30
-rw-r--r--drivers/edac/mv64x60_edac.c22
-rw-r--r--drivers/gpu/drm/Kconfig1
-rw-r--r--drivers/gpu/drm/drm_gem.c13
-rw-r--r--drivers/gpu/drm/i915/Makefile1
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c194
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c133
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h77
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c876
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c92
-rw-r--r--drivers/gpu/drm/i915/i915_opregion.c22
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h34
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.c170
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h315
-rw-r--r--drivers/gpu/drm/i915/i915_trace_points.c11
-rw-r--r--drivers/gpu/drm/i915/intel_bios.c3
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c9
-rw-r--r--drivers/gpu/drm/i915/intel_display.c616
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h5
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c63
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c502
-rw-r--r--drivers/i2c/busses/Kconfig4
-rw-r--r--drivers/idle/i7300_idle.c20
-rw-r--r--drivers/infiniband/core/mad_rmpp.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c11
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c7
-rw-r--r--drivers/input/misc/Kconfig1
-rw-r--r--drivers/md/Kconfig26
-rw-r--r--drivers/md/bitmap.c5
-rw-r--r--drivers/md/linear.c3
-rw-r--r--drivers/md/md.c25
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/multipath.c6
-rw-r--r--drivers/md/raid0.c8
-rw-r--r--drivers/md/raid1.c15
-rw-r--r--drivers/md/raid10.c12
-rw-r--r--drivers/md/raid5.c1493
-rw-r--r--drivers/md/raid5.h28
-rw-r--r--drivers/media/dvb/dvb-core/dvbdev.h5
-rw-r--r--drivers/media/dvb/dvb-usb/Kconfig2
-rw-r--r--drivers/media/video/saa7164/saa7164-api.c8
-rw-r--r--drivers/media/video/saa7164/saa7164-cmd.c2
-rw-r--r--drivers/media/video/saa7164/saa7164-core.c6
-rw-r--r--drivers/media/video/saa7164/saa7164.h4
-rw-r--r--drivers/memstick/core/memstick.c2
-rw-r--r--drivers/misc/sgi-gru/grukservices.c2
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c3
-rw-r--r--drivers/mmc/host/atmel-mci.c9
-rw-r--r--drivers/mmc/host/mmci.c109
-rw-r--r--drivers/mmc/host/mmci.h3
-rw-r--r--drivers/mmc/host/pxamci.c104
-rw-r--r--drivers/mtd/Kconfig2
-rw-r--r--drivers/mtd/maps/Kconfig2
-rw-r--r--drivers/net/cris/eth_v10.c20
-rw-r--r--drivers/net/irda/pxaficp_ir.c47
-rw-r--r--drivers/net/mlx4/fw.c5
-rw-r--r--drivers/net/wireless/arlan-proc.c28
-rw-r--r--drivers/parport/procfs.c12
-rw-r--r--drivers/pci/hotplug/pciehp.h107
-rw-r--r--drivers/pci/hotplug/pciehp_acpi.c17
-rw-r--r--drivers/pci/hotplug/pciehp_core.c136
-rw-r--r--drivers/pci/hotplug/pciehp_ctrl.c109
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c109
-rw-r--r--drivers/pci/hotplug/pciehp_pci.c23
-rw-r--r--drivers/pci/pcie/aer/aerdrv.c2
-rw-r--r--drivers/pci/pcie/aspm.c3
-rw-r--r--drivers/pcmcia/Makefile1
-rw-r--r--drivers/pcmcia/pxa2xx_base.c18
-rw-r--r--drivers/pcmcia/pxa2xx_palmtc.c230
-rw-r--r--drivers/rtc/rtc-pxa.c27
-rw-r--r--drivers/rtc/rtc-sa1100.c23
-rw-r--r--drivers/serial/cpm_uart/cpm_uart_core.c2
-rw-r--r--drivers/serial/crisv10.c1
-rw-r--r--drivers/serial/pxa.c20
-rw-r--r--drivers/spi/amba-pl022.c8
-rw-r--r--drivers/spi/pxa2xx_spi.c30
-rw-r--r--drivers/staging/go7007/Makefile5
-rw-r--r--drivers/usb/Kconfig1
-rw-r--r--drivers/usb/host/ohci-pxa27x.c30
-rw-r--r--drivers/usb/serial/sierra.c5
-rw-r--r--drivers/video/backlight/da903x_bl.c20
-rw-r--r--drivers/video/pxafb.c32
-rw-r--r--drivers/vlynq/vlynq.c2
-rw-r--r--fs/adfs/inode.c7
-rw-r--r--fs/attr.c46
-rw-r--r--fs/befs/linuxvfs.c7
-rw-r--r--fs/binfmt_elf.c52
-rw-r--r--fs/binfmt_elf_fdpic.c17
-rw-r--r--fs/binfmt_flat.c22
-rw-r--r--fs/block_dev.c140
-rw-r--r--fs/btrfs/async-thread.c254
-rw-r--r--fs/btrfs/async-thread.h12
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/compression.c8
-rw-r--r--fs/btrfs/ctree.c6
-rw-r--r--fs/btrfs/ctree.h78
-rw-r--r--fs/btrfs/dir-item.c47
-rw-r--r--fs/btrfs/disk-io.c235
-rw-r--r--fs/btrfs/export.c133
-rw-r--r--fs/btrfs/extent-tree.c1662
-rw-r--r--fs/btrfs/extent_io.c330
-rw-r--r--fs/btrfs/extent_io.h16
-rw-r--r--fs/btrfs/extent_map.c103
-rw-r--r--fs/btrfs/extent_map.h5
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/free-space-cache.c36
-rw-r--r--fs/btrfs/inode-item.c4
-rw-r--r--fs/btrfs/inode-map.c93
-rw-r--r--fs/btrfs/inode.c664
-rw-r--r--fs/btrfs/ioctl.c339
-rw-r--r--fs/btrfs/ioctl.h3
-rw-r--r--fs/btrfs/ordered-data.c33
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/orphan.c20
-rw-r--r--fs/btrfs/relocation.c280
-rw-r--r--fs/btrfs/root-tree.c138
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/transaction.c38
-rw-r--r--fs/btrfs/tree-log.c25
-rw-r--r--fs/btrfs/volumes.c117
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/char_dev.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/inode.c53
-rw-r--r--fs/coda/coda_int.h1
-rw-r--r--fs/compat.c24
-rw-r--r--fs/drop_caches.c4
-rw-r--r--fs/ecryptfs/Kconfig4
-rw-r--r--fs/ecryptfs/crypto.c39
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/ecryptfs/keystore.c39
-rw-r--r--fs/ecryptfs/kthread.c24
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/ecryptfs/mmap.c4
-rw-r--r--fs/ecryptfs/read_write.c32
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/exec.c114
-rw-r--r--fs/exofs/super.c6
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/inode.c3
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/fat/inode.c16
-rw-r--r--fs/fcntl.c108
-rw-r--r--fs/file_table.c6
-rw-r--r--fs/fuse/dir.c14
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/fuse/inode.c11
-rw-r--r--fs/gfs2/aops.c3
-rw-r--r--fs/hfs/mdb.c6
-rw-r--r--fs/hfsplus/super.c6
-rw-r--r--fs/hugetlbfs/inode.c45
-rw-r--r--fs/inode.c89
-rw-r--r--fs/internal.h1
-rw-r--r--fs/ioctl.c9
-rw-r--r--fs/isofs/inode.c8
-rw-r--r--fs/jfs/super.c9
-rw-r--r--fs/libfs.c13
-rw-r--r--fs/namespace.c77
-rw-r--r--fs/ncpfs/inode.c12
-rw-r--r--fs/ncpfs/ioctl.c6
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/inode.c54
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nls/nls_base.c3
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/super.c10
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/proc/array.c7
-rw-r--r--fs/proc/meminfo.c9
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/uptime.c7
-rw-r--r--fs/ramfs/file-nommu.c18
-rw-r--r--fs/read_write.c3
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/seq_file.c74
-rw-r--r--fs/smbfs/inode.c10
-rw-r--r--fs/super.c67
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c3
-rw-r--r--include/acpi/button.h25
-rw-r--r--include/asm-generic/fcntl.h13
-rw-r--r--include/asm-generic/mman-common.h1
-rw-r--r--include/asm-generic/siginfo.h8
-rw-r--r--include/asm-generic/vmlinux.lds.h4
-rw-r--r--include/drm/drm_pciids.h1
-rw-r--r--include/drm/i915_drm.h19
-rw-r--r--include/linux/amba/mmci.h (renamed from arch/arm/include/asm/mach/mmc.h)9
-rw-r--r--include/linux/amba/pl022.h8
-rw-r--r--include/linux/async_tx.h129
-rw-r--r--include/linux/binfmts.h2
-rw-r--r--include/linux/cgroup.h53
-rw-r--r--include/linux/configfs.h4
-rw-r--r--include/linux/dca.h11
-rw-r--r--include/linux/debugfs.h2
-rw-r--r--include/linux/dmaengine.h179
-rw-r--r--include/linux/fb.h1
-rw-r--r--include/linux/fs.h11
-rw-r--r--include/linux/ftrace.h4
-rw-r--r--include/linux/futex.h10
-rw-r--r--include/linux/hugetlb.h18
-rw-r--r--include/linux/memcontrol.h10
-rw-r--r--include/linux/mm.h24
-rw-r--r--include/linux/mm_types.h7
-rw-r--r--include/linux/mmzone.h13
-rw-r--r--include/linux/module.h17
-rw-r--r--include/linux/page-flags.h17
-rw-r--r--include/linux/page_cgroup.h13
-rw-r--r--include/linux/pci_ids.h10
-rw-r--r--include/linux/prctl.h2
-rw-r--r--include/linux/relay.h2
-rw-r--r--include/linux/res_counter.h64
-rw-r--r--include/linux/rmap.h21
-rw-r--r--include/linux/sched.h17
-rw-r--r--include/linux/security.h2
-rw-r--r--include/linux/seq_file.h38
-rw-r--r--include/linux/signal.h2
-rw-r--r--include/linux/swap.h41
-rw-r--r--include/linux/swapops.h38
-rw-r--r--include/linux/sysctl.h19
-rw-r--r--include/linux/time.h28
-rw-r--r--include/linux/tracehook.h34
-rw-r--r--include/linux/tracepoint.h2
-rw-r--r--include/linux/unaligned/be_byteshift.h2
-rw-r--r--include/linux/unaligned/le_byteshift.h2
-rw-r--r--include/linux/vgaarb.h3
-rw-r--r--include/linux/writeback.h11
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/ndisc.h2
-rw-r--r--ipc/ipc_sysctl.c16
-rw-r--r--ipc/mq_sysctl.c8
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/audit.c18
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/cgroup.c1107
-rw-r--r--kernel/cgroup_debug.c105
-rw-r--r--kernel/cgroup_freezer.c15
-rw-r--r--kernel/cpuset.c66
-rw-r--r--kernel/exit.c146
-rw-r--r--kernel/fork.c34
-rw-r--r--kernel/gcov/Kconfig2
-rw-r--r--kernel/hung_task.c4
-rw-r--r--kernel/module.c159
-rw-r--r--kernel/ns_cgroup.c16
-rw-r--r--kernel/params.c7
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/ptrace.c11
-rw-r--r--kernel/res_counter.c21
-rw-r--r--kernel/sched.c39
-rw-r--r--kernel/sched_fair.c4
-rw-r--r--kernel/signal.c168
-rw-r--r--kernel/slow-work.c12
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/sys.c22
-rw-r--r--kernel/sysctl.c112
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/timeconv.c127
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/utsname_sysctl.c4
-rw-r--r--lib/decompress_inflate.c8
-rw-r--r--lib/decompress_unlzma.c10
-rw-r--r--mm/Kconfig14
-rw-r--r--mm/Makefile6
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/hugetlb.c12
-rw-r--r--mm/hwpoison-inject.c41
-rw-r--r--mm/ksm.c14
-rw-r--r--mm/madvise.c30
-rw-r--r--mm/memcontrol.c737
-rw-r--r--mm/memory-failure.c832
-rw-r--r--mm/memory.c86
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nommu.c82
-rw-r--r--mm/page-writeback.c27
-rw-r--r--mm/page_alloc.c44
-rw-r--r--mm/rmap.c60
-rw-r--r--mm/shmem.c5
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/truncate.c136
-rw-r--r--mm/vmscan.c51
-rw-r--r--net/bridge/br_netfilter.c4
-rw-r--r--net/decnet/dn_dev.c5
-rw-r--r--net/decnet/sysctl_net_decnet.c2
-rw-r--r--net/ipv4/devinet.c12
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv6/addrconf.c8
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/irda/irsysctl.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/phonet/sysctl.c4
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/sysctl.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c2
-rw-r--r--security/device_cgroup.c3
-rw-r--r--security/lsm_audit.c2
-rw-r--r--security/min_addr.c4
-rw-r--r--security/selinux/hooks.c2
-rw-r--r--sound/arm/pxa2xx-ac97.c20
623 files changed, 29885 insertions, 12842 deletions
diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt
new file mode 100644
index 000000000000..074f4be6667f
--- /dev/null
+++ b/Documentation/arm/tcm.txt
@@ -0,0 +1,145 @@
1ARM TCM (Tightly-Coupled Memory) handling in Linux
2----
3Written by Linus Walleij <linus.walleij@stericsson.com>
4
5Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
6This is usually just a few (4-64) KiB of RAM inside the ARM
7processor.
8
9Due to being embedded inside the CPU The TCM has a
10Harvard-architecture, so there is an ITCM (instruction TCM)
11and a DTCM (data TCM). The DTCM can not contain any
12instructions, but the ITCM can actually contain data.
13The size of DTCM or ITCM is minimum 4KiB so the typical
14minimum configuration is 4KiB ITCM and 4KiB DTCM.
15
16ARM CPU:s have special registers to read out status, physical
17location and size of TCM memories. arch/arm/include/asm/cputype.h
18defines a CPUID_TCM register that you can read out from the
19system control coprocessor. Documentation from ARM can be found
20at http://infocenter.arm.com, search for "TCM Status Register"
21to see documents for all CPUs. Reading this register you can
22determine if ITCM (bit 0) and/or DTCM (bit 16) is present in the
23machine.
24
25There is further a TCM region register (search for "TCM Region
26Registers" at the ARM site) that can report and modify the location
27size of TCM memories at runtime. This is used to read out and modify
28TCM location and size. Notice that this is not a MMU table: you
29actually move the physical location of the TCM around. At the
30place you put it, it will mask any underlying RAM from the
31CPU so it is usually wise not to overlap any physical RAM with
32the TCM. The TCM memory exists totally outside the MMU and will
33override any MMU mappings.
34
35Code executing inside the ITCM does not "see" any MMU mappings
36and e.g. register accesses must be made to physical addresses.
37
38TCM is used for a few things:
39
40- FIQ and other interrupt handlers that need deterministic
41 timing and cannot wait for cache misses.
42
43- Idle loops where all external RAM is set to self-refresh
44 retention mode, so only on-chip RAM is accessible by
45 the CPU and then we hang inside ITCM waiting for an
46 interrupt.
47
48- Other operations which implies shutting off or reconfiguring
49 the external RAM controller.
50
51There is an interface for using TCM on the ARM architecture
52in <asm/tcm.h>. Using this interface it is possible to:
53
54- Define the physical address and size of ITCM and DTCM.
55
56- Tag functions to be compiled into ITCM.
57
58- Tag data and constants to be allocated to DTCM and ITCM.
59
60- Have the remaining TCM RAM added to a special
61 allocation pool with gen_pool_create() and gen_pool_add()
62 and provice tcm_alloc() and tcm_free() for this
63 memory. Such a heap is great for things like saving
64 device state when shutting off device power domains.
65
66A machine that has TCM memory shall select HAVE_TCM in
67arch/arm/Kconfig for itself, and then the
68rest of the functionality will depend on the physical
69location and size of ITCM and DTCM to be defined in
70mach/memory.h for the machine. Code that needs to use
71TCM shall #include <asm/tcm.h> If the TCM is not located
72at the place given in memory.h it will be moved using
73the TCM Region registers.
74
75Functions to go into itcm can be tagged like this:
76int __tcmfunc foo(int bar);
77
78Variables to go into dtcm can be tagged like this:
79int __tcmdata foo;
80
81Constants can be tagged like this:
82int __tcmconst foo;
83
84To put assembler into TCM just use
85.section ".tcm.text" or .section ".tcm.data"
86respectively.
87
88Example code:
89
90#include <asm/tcm.h>
91
92/* Uninitialized data */
93static u32 __tcmdata tcmvar;
94/* Initialized data */
95static u32 __tcmdata tcmassigned = 0x2BADBABEU;
96/* Constant */
97static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
98
99static void __tcmlocalfunc tcm_to_tcm(void)
100{
101 int i;
102 for (i = 0; i < 100; i++)
103 tcmvar ++;
104}
105
106static void __tcmfunc hello_tcm(void)
107{
108 /* Some abstract code that runs in ITCM */
109 int i;
110 for (i = 0; i < 100; i++) {
111 tcmvar ++;
112 }
113 tcm_to_tcm();
114}
115
116static void __init test_tcm(void)
117{
118 u32 *tcmem;
119 int i;
120
121 hello_tcm();
122 printk("Hello TCM executed from ITCM RAM\n");
123
124 printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar);
125 tcmvar = 0xDEADBEEFU;
126 printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar);
127
128 printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned);
129
130 printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst);
131
132 /* Allocate some TCM memory from the pool */
133 tcmem = tcm_alloc(20);
134 if (tcmem) {
135 printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem);
136 tcmem[0] = 0xDEADBEEFU;
137 tcmem[1] = 0x2BADBABEU;
138 tcmem[2] = 0xCAFEBABEU;
139 tcmem[3] = 0xDEADBEEFU;
140 tcmem[4] = 0x2BADBABEU;
141 for (i = 0; i < 5; i++)
142 printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
143 tcm_free(tcmem, 20);
144 }
145}
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c
index 1d2c010bae12..e7823ffb1ca0 100644
--- a/Documentation/auxdisplay/cfag12864b-example.c
+++ b/Documentation/auxdisplay/cfag12864b-example.c
@@ -194,7 +194,6 @@ static void cfag12864b_blit(void)
194 */ 194 */
195 195
196#include <stdio.h> 196#include <stdio.h>
197#include <string.h>
198 197
199#define EXAMPLES 6 198#define EXAMPLES 6
200 199
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 6eb1a97e88ce..455d4e6d346d 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0:
408 408
409# echo 0 > tasks 409# echo 0 > tasks
410 410
4112.3 Mounting hierarchies by name
412--------------------------------
413
414Passing the name=<x> option when mounting a cgroups hierarchy
415associates the given name with the hierarchy. This can be used when
416mounting a pre-existing hierarchy, in order to refer to it by name
417rather than by its set of active subsystems. Each hierarchy is either
418nameless, or has a unique name.
419
420The name should match [\w.-]+
421
422When passing a name=<x> option for a new hierarchy, you need to
423specify subsystems manually; the legacy behaviour of mounting all
424subsystems when none are explicitly specified is not supported when
425you give a subsystem a name.
426
427The name of the subsystem appears as part of the hierarchy description
428in /proc/mounts and /proc/<pid>/cgroups.
429
430
4113. Kernel API 4313. Kernel API
412============= 432=============
413 433
@@ -501,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
501called multiple times against a cgroup. 521called multiple times against a cgroup.
502 522
503int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 523int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
504 struct task_struct *task) 524 struct task_struct *task, bool threadgroup)
505(cgroup_mutex held by caller) 525(cgroup_mutex held by caller)
506 526
507Called prior to moving a task into a cgroup; if the subsystem 527Called prior to moving a task into a cgroup; if the subsystem
@@ -509,14 +529,20 @@ returns an error, this will abort the attach operation. If a NULL
509task is passed, then a successful result indicates that *any* 529task is passed, then a successful result indicates that *any*
510unspecified task can be moved into the cgroup. Note that this isn't 530unspecified task can be moved into the cgroup. Note that this isn't
511called on a fork. If this method returns 0 (success) then this should 531called on a fork. If this method returns 0 (success) then this should
512remain valid while the caller holds cgroup_mutex. 532remain valid while the caller holds cgroup_mutex. If threadgroup is
533true, then a successful result indicates that all threads in the given
534thread's threadgroup can be moved together.
513 535
514void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 536void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
515 struct cgroup *old_cgrp, struct task_struct *task) 537 struct cgroup *old_cgrp, struct task_struct *task,
538 bool threadgroup)
516(cgroup_mutex held by caller) 539(cgroup_mutex held by caller)
517 540
518Called after the task has been attached to the cgroup, to allow any 541Called after the task has been attached to the cgroup, to allow any
519post-attachment activity that requires memory allocations or blocking. 542post-attachment activity that requires memory allocations or blocking.
543If threadgroup is true, the subsystem should take care of all threads
544in the specified thread's threadgroup. Currently does not support any
545subsystem that might need the old_cgrp for every thread in the group.
520 546
521void fork(struct cgroup_subsy *ss, struct task_struct *task) 547void fork(struct cgroup_subsy *ss, struct task_struct *task)
522 548
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 23d1262c0775..b871f2552b45 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that
179pages that are selected for reclaiming come from the per cgroup LRU 179pages that are selected for reclaiming come from the per cgroup LRU
180list. 180list.
181 181
182NOTE: Reclaim does not work for the root cgroup, since we cannot set any
183limits on the root cgroup.
184
1822. Locking 1852. Locking
183 186
184The memory controller uses the following hierarchy 187The memory controller uses the following hierarchy
@@ -210,6 +213,7 @@ We can alter the memory limit:
210NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, 213NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
211mega or gigabytes. 214mega or gigabytes.
212NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). 215NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
216NOTE: We cannot set limits on the root cgroup any more.
213 217
214# cat /cgroups/0/memory.limit_in_bytes 218# cat /cgroups/0/memory.limit_in_bytes
2154194304 2194194304
@@ -375,7 +379,42 @@ cgroups created below it.
375 379
376NOTE2: This feature can be enabled/disabled per subtree. 380NOTE2: This feature can be enabled/disabled per subtree.
377 381
3787. TODO 3827. Soft limits
383
384Soft limits allow for greater sharing of memory. The idea behind soft limits
385is to allow control groups to use as much of the memory as needed, provided
386
387a. There is no memory contention
388b. They do not exceed their hard limit
389
390When the system detects memory contention or low memory control groups
391are pushed back to their soft limits. If the soft limit of each control
392group is very high, they are pushed back as much as possible to make
393sure that one control group does not starve the others of memory.
394
395Please note that soft limits is a best effort feature, it comes with
396no guarantees, but it does its best to make sure that when memory is
397heavily contended for, memory is allocated based on the soft limit
398hints/setup. Currently soft limit based reclaim is setup such that
399it gets invoked from balance_pgdat (kswapd).
400
4017.1 Interface
402
403Soft limits can be setup by using the following commands (in this example we
404assume a soft limit of 256 megabytes)
405
406# echo 256M > memory.soft_limit_in_bytes
407
408If we want to change this to 1G, we can at any time use
409
410# echo 1G > memory.soft_limit_in_bytes
411
412NOTE1: Soft limits take effect over a long period of time, since they involve
413 reclaiming memory for balancing between memory cgroups
414NOTE2: It is recommended to set the soft limit always below the hard limit,
415 otherwise the hard limit will take precedence.
416
4178. TODO
379 418
3801. Add support for accounting huge pages (as a separate controller) 4191. Add support for accounting huge pages (as a separate controller)
3812. Make per-cgroup scanner reclaim not-shared pages first 4202. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 9f59fcbf5d82..ba046b8fa92f 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,20 +54,23 @@ features surfaced as a result:
54 54
553.1 General format of the API: 553.1 General format of the API:
56struct dma_async_tx_descriptor * 56struct dma_async_tx_descriptor *
57async_<operation>(<op specific parameters>, 57async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
58 enum async_tx_flags flags,
59 struct dma_async_tx_descriptor *dependency,
60 dma_async_tx_callback callback_routine,
61 void *callback_parameter);
62 58
633.2 Supported operations: 593.2 Supported operations:
64memcpy - memory copy between a source and a destination buffer 60memcpy - memory copy between a source and a destination buffer
65memset - fill a destination buffer with a byte value 61memset - fill a destination buffer with a byte value
66xor - xor a series of source buffers and write the result to a 62xor - xor a series of source buffers and write the result to a
67 destination buffer 63 destination buffer
68xor_zero_sum - xor a series of source buffers and set a flag if the 64xor_val - xor a series of source buffers and set a flag if the
69 result is zero. The implementation attempts to prevent 65 result is zero. The implementation attempts to prevent
70 writes to memory 66 writes to memory
67pq - generate the p+q (raid6 syndrome) from a series of source buffers
68pq_val - validate that a p and or q buffer are in sync with a given series of
69 sources
70datap - (raid6_datap_recov) recover a raid6 data block and the p block
71 from the given sources
722data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
73 sources
71 74
723.3 Descriptor management: 753.3 Descriptor management:
73The return value is non-NULL and points to a 'descriptor' when the operation 76The return value is non-NULL and points to a 'descriptor' when the operation
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
80recycle (or free) the descriptor. A descriptor can be acked by one of the 83recycle (or free) the descriptor. A descriptor can be acked by one of the
81following methods: 84following methods:
821/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted 851/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
832/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent 862/ submitting an unacknowledged descriptor as a dependency to another
84 descriptor of a new operation. 87 async_tx call will implicitly set the acknowledged state.
853/ calling async_tx_ack() on the descriptor. 883/ calling async_tx_ack() on the descriptor.
86 89
873.4 When does the operation execute? 903.4 When does the operation execute?
@@ -119,30 +122,42 @@ of an operation.
119Perform a xor->copy->xor operation where each operation depends on the 122Perform a xor->copy->xor operation where each operation depends on the
120result from the previous operation: 123result from the previous operation:
121 124
122void complete_xor_copy_xor(void *param) 125void callback(void *param)
123{ 126{
124 printk("complete\n"); 127 struct completion *cmp = param;
128
129 complete(cmp);
125} 130}
126 131
127int run_xor_copy_xor(struct page **xor_srcs, 132void run_xor_copy_xor(struct page **xor_srcs,
128 int xor_src_cnt, 133 int xor_src_cnt,
129 struct page *xor_dest, 134 struct page *xor_dest,
130 size_t xor_len, 135 size_t xor_len,
131 struct page *copy_src, 136 struct page *copy_src,
132 struct page *copy_dest, 137 struct page *copy_dest,
133 size_t copy_len) 138 size_t copy_len)
134{ 139{
135 struct dma_async_tx_descriptor *tx; 140 struct dma_async_tx_descriptor *tx;
141 addr_conv_t addr_conv[xor_src_cnt];
142 struct async_submit_ctl submit;
143 addr_conv_t addr_conv[NDISKS];
144 struct completion cmp;
145
146 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
147 addr_conv);
148 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
136 149
137 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, 150 submit->depend_tx = tx;
138 ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); 151 tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
139 tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, 152
140 ASYNC_TX_DEP_ACK, tx, NULL, NULL); 153 init_completion(&cmp);
141 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, 154 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
142 ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, 155 callback, &cmp, addr_conv);
143 tx, complete_xor_copy_xor, NULL); 156 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
144 157
145 async_tx_issue_pending_all(); 158 async_tx_issue_pending_all();
159
160 wait_for_completion(&cmp);
146} 161}
147 162
148See include/linux/async_tx.h for more information on the flags. See the 163See include/linux/async_tx.h for more information on the flags. See the
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index 736540045dc7..23a181074f94 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -4,7 +4,7 @@ Shared Subtrees
4Contents: 4Contents:
5 1) Overview 5 1) Overview
6 2) Features 6 2) Features
7 3) smount command 7 3) Setting mount states
8 4) Use-case 8 4) Use-case
9 5) Detailed semantics 9 5) Detailed semantics
10 6) Quiz 10 6) Quiz
@@ -41,14 +41,14 @@ replicas continue to be exactly same.
41 41
42 Here is an example: 42 Here is an example:
43 43
44 Lets say /mnt has a mount that is shared. 44 Let's say /mnt has a mount that is shared.
45 mount --make-shared /mnt 45 mount --make-shared /mnt
46 46
47 note: mount command does not yet support the --make-shared flag. 47 Note: mount(8) command now supports the --make-shared flag,
48 I have included a small C program which does the same by executing 48 so the sample 'smount' program is no longer needed and has been
49 'smount /mnt shared' 49 removed.
50 50
51 #mount --bind /mnt /tmp 51 # mount --bind /mnt /tmp
52 The above command replicates the mount at /mnt to the mountpoint /tmp 52 The above command replicates the mount at /mnt to the mountpoint /tmp
53 and the contents of both the mounts remain identical. 53 and the contents of both the mounts remain identical.
54 54
@@ -58,8 +58,8 @@ replicas continue to be exactly same.
58 #ls /tmp 58 #ls /tmp
59 a b c 59 a b c
60 60
61 Now lets say we mount a device at /tmp/a 61 Now let's say we mount a device at /tmp/a
62 #mount /dev/sd0 /tmp/a 62 # mount /dev/sd0 /tmp/a
63 63
64 #ls /tmp/a 64 #ls /tmp/a
65 t1 t2 t2 65 t1 t2 t2
@@ -80,21 +80,20 @@ replicas continue to be exactly same.
80 80
81 Here is an example: 81 Here is an example:
82 82
83 Lets say /mnt has a mount which is shared. 83 Let's say /mnt has a mount which is shared.
84 #mount --make-shared /mnt 84 # mount --make-shared /mnt
85 85
86 Lets bind mount /mnt to /tmp 86 Let's bind mount /mnt to /tmp
87 #mount --bind /mnt /tmp 87 # mount --bind /mnt /tmp
88 88
89 the new mount at /tmp becomes a shared mount and it is a replica of 89 the new mount at /tmp becomes a shared mount and it is a replica of
90 the mount at /mnt. 90 the mount at /mnt.
91 91
92 Now lets make the mount at /tmp; a slave of /mnt 92 Now let's make the mount at /tmp; a slave of /mnt
93 #mount --make-slave /tmp 93 # mount --make-slave /tmp
94 [or smount /tmp slave]
95 94
96 lets mount /dev/sd0 on /mnt/a 95 let's mount /dev/sd0 on /mnt/a
97 #mount /dev/sd0 /mnt/a 96 # mount /dev/sd0 /mnt/a
98 97
99 #ls /mnt/a 98 #ls /mnt/a
100 t1 t2 t3 99 t1 t2 t3
@@ -104,9 +103,9 @@ replicas continue to be exactly same.
104 103
105 Note the mount event has propagated to the mount at /tmp 104 Note the mount event has propagated to the mount at /tmp
106 105
107 However lets see what happens if we mount something on the mount at /tmp 106 However let's see what happens if we mount something on the mount at /tmp
108 107
109 #mount /dev/sd1 /tmp/b 108 # mount /dev/sd1 /tmp/b
110 109
111 #ls /tmp/b 110 #ls /tmp/b
112 s1 s2 s3 111 s1 s2 s3
@@ -124,12 +123,11 @@ replicas continue to be exactly same.
124 123
1252d) A unbindable mount is a unbindable private mount 1242d) A unbindable mount is a unbindable private mount
126 125
127 lets say we have a mount at /mnt and we make is unbindable 126 let's say we have a mount at /mnt and we make is unbindable
128 127
129 #mount --make-unbindable /mnt 128 # mount --make-unbindable /mnt
130 [ smount /mnt unbindable ]
131 129
132 Lets try to bind mount this mount somewhere else. 130 Let's try to bind mount this mount somewhere else.
133 # mount --bind /mnt /tmp 131 # mount --bind /mnt /tmp
134 mount: wrong fs type, bad option, bad superblock on /mnt, 132 mount: wrong fs type, bad option, bad superblock on /mnt,
135 or too many mounted file systems 133 or too many mounted file systems
@@ -137,149 +135,15 @@ replicas continue to be exactly same.
137 Binding a unbindable mount is a invalid operation. 135 Binding a unbindable mount is a invalid operation.
138 136
139 137
1403) smount command 1383) Setting mount states
141 139
142 Currently the mount command is not aware of shared subtree features. 140 The mount command (util-linux package) can be used to set mount
143 Work is in progress to add the support in mount ( util-linux package ). 141 states:
144 Till then use the following program.
145 142
146 ------------------------------------------------------------------------ 143 mount --make-shared mountpoint
147 // 144 mount --make-slave mountpoint
148 //this code was developed my Miklos Szeredi <miklos@szeredi.hu> 145 mount --make-private mountpoint
149 //and modified by Ram Pai <linuxram@us.ibm.com> 146 mount --make-unbindable mountpoint
150 // sample usage:
151 // smount /tmp shared
152 //
153 #include <stdio.h>
154 #include <stdlib.h>
155 #include <unistd.h>
156 #include <string.h>
157 #include <sys/mount.h>
158 #include <sys/fsuid.h>
159
160 #ifndef MS_REC
161 #define MS_REC 0x4000 /* 16384: Recursive loopback */
162 #endif
163
164 #ifndef MS_SHARED
165 #define MS_SHARED 1<<20 /* Shared */
166 #endif
167
168 #ifndef MS_PRIVATE
169 #define MS_PRIVATE 1<<18 /* Private */
170 #endif
171
172 #ifndef MS_SLAVE
173 #define MS_SLAVE 1<<19 /* Slave */
174 #endif
175
176 #ifndef MS_UNBINDABLE
177 #define MS_UNBINDABLE 1<<17 /* Unbindable */
178 #endif
179
180 int main(int argc, char *argv[])
181 {
182 int type;
183 if(argc != 3) {
184 fprintf(stderr, "usage: %s dir "
185 "<rshared|rslave|rprivate|runbindable|shared|slave"
186 "|private|unbindable>\n" , argv[0]);
187 return 1;
188 }
189
190 fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
191
192 if (strcmp(argv[2],"rshared")==0)
193 type=(MS_SHARED|MS_REC);
194 else if (strcmp(argv[2],"rslave")==0)
195 type=(MS_SLAVE|MS_REC);
196 else if (strcmp(argv[2],"rprivate")==0)
197 type=(MS_PRIVATE|MS_REC);
198 else if (strcmp(argv[2],"runbindable")==0)
199 type=(MS_UNBINDABLE|MS_REC);
200 else if (strcmp(argv[2],"shared")==0)
201 type=MS_SHARED;
202 else if (strcmp(argv[2],"slave")==0)
203 type=MS_SLAVE;
204 else if (strcmp(argv[2],"private")==0)
205 type=MS_PRIVATE;
206 else if (strcmp(argv[2],"unbindable")==0)
207 type=MS_UNBINDABLE;
208 else {
209 fprintf(stderr, "invalid operation: %s\n", argv[2]);
210 return 1;
211 }
212 setfsuid(getuid());
213
214 if(mount("", argv[1], "dontcare", type, "") == -1) {
215 perror("mount");
216 return 1;
217 }
218 return 0;
219 }
220 -----------------------------------------------------------------------
221
222 Copy the above code snippet into smount.c
223 gcc -o smount smount.c
224
225
226 (i) To mark all the mounts under /mnt as shared execute the following
227 command:
228
229 smount /mnt rshared
230 the corresponding syntax planned for mount command is
231 mount --make-rshared /mnt
232
233 just to mark a mount /mnt as shared, execute the following
234 command:
235 smount /mnt shared
236 the corresponding syntax planned for mount command is
237 mount --make-shared /mnt
238
239 (ii) To mark all the shared mounts under /mnt as slave execute the
240 following
241
242 command:
243 smount /mnt rslave
244 the corresponding syntax planned for mount command is
245 mount --make-rslave /mnt
246
247 just to mark a mount /mnt as slave, execute the following
248 command:
249 smount /mnt slave
250 the corresponding syntax planned for mount command is
251 mount --make-slave /mnt
252
253 (iii) To mark all the mounts under /mnt as private execute the
254 following command:
255
256 smount /mnt rprivate
257 the corresponding syntax planned for mount command is
258 mount --make-rprivate /mnt
259
260 just to mark a mount /mnt as private, execute the following
261 command:
262 smount /mnt private
263 the corresponding syntax planned for mount command is
264 mount --make-private /mnt
265
266 NOTE: by default all the mounts are created as private. But if
267 you want to change some shared/slave/unbindable mount as
268 private at a later point in time, this command can help.
269
270 (iv) To mark all the mounts under /mnt as unbindable execute the
271 following
272
273 command:
274 smount /mnt runbindable
275 the corresponding syntax planned for mount command is
276 mount --make-runbindable /mnt
277
278 just to mark a mount /mnt as unbindable, execute the following
279 command:
280 smount /mnt unbindable
281 the corresponding syntax planned for mount command is
282 mount --make-unbindable /mnt
283 147
284 148
2854) Use cases 1494) Use cases
@@ -350,7 +214,7 @@ replicas continue to be exactly same.
350 mount --rbind / /view/v3 214 mount --rbind / /view/v3
351 mount --rbind / /view/v4 215 mount --rbind / /view/v4
352 216
353 and if /usr has a versioning filesystem mounted, than that 217 and if /usr has a versioning filesystem mounted, then that
354 mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and 218 mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
355 /view/v4/usr too 219 /view/v4/usr too
356 220
@@ -390,7 +254,7 @@ replicas continue to be exactly same.
390 254
391 For example: 255 For example:
392 mount --make-shared /mnt 256 mount --make-shared /mnt
393 mount --bin /mnt /tmp 257 mount --bind /mnt /tmp
394 258
395 The mount at /mnt and that at /tmp are both shared and belong 259 The mount at /mnt and that at /tmp are both shared and belong
396 to the same peer group. Anything mounted or unmounted under 260 to the same peer group. Anything mounted or unmounted under
@@ -558,7 +422,7 @@ replicas continue to be exactly same.
558 then the subtree under the unbindable mount is pruned in the new 422 then the subtree under the unbindable mount is pruned in the new
559 location. 423 location.
560 424
561 eg: lets say we have the following mount tree. 425 eg: let's say we have the following mount tree.
562 426
563 A 427 A
564 / \ 428 / \
@@ -566,7 +430,7 @@ replicas continue to be exactly same.
566 / \ / \ 430 / \ / \
567 D E F G 431 D E F G
568 432
569 Lets say all the mount except the mount C in the tree are 433 Let's say all the mount except the mount C in the tree are
570 of a type other than unbindable. 434 of a type other than unbindable.
571 435
572 If this tree is rbound to say Z 436 If this tree is rbound to say Z
@@ -683,13 +547,13 @@ replicas continue to be exactly same.
683 'b' on mounts that receive propagation from mount 'B' and does not have 547 'b' on mounts that receive propagation from mount 'B' and does not have
684 sub-mounts within them are unmounted. 548 sub-mounts within them are unmounted.
685 549
686 Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to 550 Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
687 each other. 551 each other.
688 552
689 lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount 553 let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
690 'B1', 'B2' and 'B3' respectively. 554 'B1', 'B2' and 'B3' respectively.
691 555
692 lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on 556 let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
693 mount 'B1', 'B2' and 'B3' respectively. 557 mount 'B1', 'B2' and 'B3' respectively.
694 558
695 if 'C1' is unmounted, all the mounts that are most-recently-mounted on 559 if 'C1' is unmounted, all the mounts that are most-recently-mounted on
@@ -710,7 +574,7 @@ replicas continue to be exactly same.
710 A cloned namespace contains all the mounts as that of the parent 574 A cloned namespace contains all the mounts as that of the parent
711 namespace. 575 namespace.
712 576
713 Lets say 'A' and 'B' are the corresponding mounts in the parent and the 577 Let's say 'A' and 'B' are the corresponding mounts in the parent and the
714 child namespace. 578 child namespace.
715 579
716 If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to 580 If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
@@ -759,11 +623,11 @@ replicas continue to be exactly same.
759 mount --make-slave /mnt 623 mount --make-slave /mnt
760 624
761 At this point we have the first mount at /tmp and 625 At this point we have the first mount at /tmp and
762 its root dentry is 1. Lets call this mount 'A' 626 its root dentry is 1. Let's call this mount 'A'
763 And then we have a second mount at /tmp1 with root 627 And then we have a second mount at /tmp1 with root
764 dentry 2. Lets call this mount 'B' 628 dentry 2. Let's call this mount 'B'
765 Next we have a third mount at /mnt with root dentry 629 Next we have a third mount at /mnt with root dentry
766 mnt. Lets call this mount 'C' 630 mnt. Let's call this mount 'C'
767 631
768 'B' is the slave of 'A' and 'C' is a slave of 'B' 632 'B' is the slave of 'A' and 'C' is a slave of 'B'
769 A -> B -> C 633 A -> B -> C
@@ -794,7 +658,7 @@ replicas continue to be exactly same.
794 658
795 Q3 Why is unbindable mount needed? 659 Q3 Why is unbindable mount needed?
796 660
797 Lets say we want to replicate the mount tree at multiple 661 Let's say we want to replicate the mount tree at multiple
798 locations within the same subtree. 662 locations within the same subtree.
799 663
800 if one rbind mounts a tree within the same subtree 'n' times 664 if one rbind mounts a tree within the same subtree 'n' times
@@ -803,7 +667,7 @@ replicas continue to be exactly same.
803 mounts. Here is a example. 667 mounts. Here is a example.
804 668
805 step 1: 669 step 1:
806 lets say the root tree has just two directories with 670 let's say the root tree has just two directories with
807 one vfsmount. 671 one vfsmount.
808 root 672 root
809 / \ 673 / \
@@ -875,7 +739,7 @@ replicas continue to be exactly same.
875 Unclonable mounts come in handy here. 739 Unclonable mounts come in handy here.
876 740
877 step 1: 741 step 1:
878 lets say the root tree has just two directories with 742 let's say the root tree has just two directories with
879 one vfsmount. 743 one vfsmount.
880 root 744 root
881 / \ 745 / \
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f49eecf2e573..623f094c9d8d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -536,6 +536,7 @@ struct address_space_operations {
536 /* migrate the contents of a page to the specified target */ 536 /* migrate the contents of a page to the specified target */
537 int (*migratepage) (struct page *, struct page *); 537 int (*migratepage) (struct page *, struct page *);
538 int (*launder_page) (struct page *); 538 int (*launder_page) (struct page *);
539 int (*error_remove_page) (struct mapping *mapping, struct page *page);
539}; 540};
540 541
541 writepage: called by the VM to write a dirty page to backing store. 542 writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
694 prevent redirtying the page, it is kept locked during the whole 695 prevent redirtying the page, it is kept locked during the whole
695 operation. 696 operation.
696 697
698 error_remove_page: normally set to generic_error_remove_page if truncation
699 is ok for this address space. Used for memory failure handling.
700 Setting this implies you deal with pages going away under you,
701 unless you have them locked or reference counts increased.
702
703
697The File Object 704The File Object
698=============== 705===============
699 706
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index aafca0a8f66a..947374977ca5 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -135,6 +135,7 @@ Code Seq# Include File Comments
135 <http://mikonos.dia.unisa.it/tcfs> 135 <http://mikonos.dia.unisa.it/tcfs>
136'l' 40-7F linux/udf_fs_i.h in development: 136'l' 40-7F linux/udf_fs_i.h in development:
137 <http://sourceforge.net/projects/linux-udf/> 137 <http://sourceforge.net/projects/linux-udf/>
138'm' 00-09 linux/mmtimer.h
138'm' all linux/mtio.h conflict! 139'm' all linux/mtio.h conflict!
139'm' all linux/soundcard.h conflict! 140'm' all linux/soundcard.h conflict!
140'm' all linux/synclink.h conflict! 141'm' all linux/synclink.h conflict!
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 1458448436cc..62682500878a 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -96,13 +96,16 @@ handles that the Linux kernel will allocate. When you get lots
96of error messages about running out of file handles, you might 96of error messages about running out of file handles, you might
97want to increase this limit. 97want to increase this limit.
98 98
99The three values in file-nr denote the number of allocated 99Historically, the three values in file-nr denoted the number of
100file handles, the number of unused file handles and the maximum 100allocated file handles, the number of allocated but unused file
101number of file handles. When the allocated file handles come 101handles, and the maximum number of file handles. Linux 2.6 always
102close to the maximum, but the number of unused file handles is 102reports 0 as the number of free file handles -- this is not an
103significantly greater than 0, you've encountered a peak in your 103error, it just means that the number of allocated file handles
104usage of file handles and you don't need to increase the maximum. 104exactly matches the number of used file handles.
105 105
106Attempts to allocate more file descriptors than file-max are
107reported with printk, look for "VFS: file-max limit <number>
108reached".
106============================================================== 109==============================================================
107 110
108nr_open: 111nr_open:
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index b3d8b4922740..a028b92001ed 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -22,6 +22,7 @@ show up in /proc/sys/kernel:
22- callhome [ S390 only ] 22- callhome [ S390 only ]
23- auto_msgmni 23- auto_msgmni
24- core_pattern 24- core_pattern
25- core_pipe_limit
25- core_uses_pid 26- core_uses_pid
26- ctrl-alt-del 27- ctrl-alt-del
27- dentry-state 28- dentry-state
@@ -135,6 +136,27 @@ core_pattern is used to specify a core dumpfile pattern name.
135 136
136============================================================== 137==============================================================
137 138
139core_pipe_limit:
140
141This sysctl is only applicable when core_pattern is configured to pipe core
142files to user space helper a (when the first character of core_pattern is a '|',
143see above). When collecting cores via a pipe to an application, it is
144occasionally usefull for the collecting application to gather data about the
145crashing process from its /proc/pid directory. In order to do this safely, the
146kernel must wait for the collecting process to exit, so as not to remove the
147crashing processes proc files prematurely. This in turn creates the possibility
148that a misbehaving userspace collecting process can block the reaping of a
149crashed process simply by never exiting. This sysctl defends against that. It
150defines how many concurrent crashing processes may be piped to user space
151applications in parallel. If this value is exceeded, then those crashing
152processes above that value are noted via the kernel log and their cores are
153skipped. 0 is a special value, indicating that unlimited processes may be
154captured in parallel, but that no waiting will take place (i.e. the collecting
155process is not guaranteed access to /proc/<crahing pid>/). This value defaults
156to 0.
157
158==============================================================
159
138core_uses_pid: 160core_uses_pid:
139 161
140The default coredump filename is "core". By setting 162The default coredump filename is "core". By setting
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index e6fb1ec2744b..a6e360d2055c 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm:
32- legacy_va_layout 32- legacy_va_layout
33- lowmem_reserve_ratio 33- lowmem_reserve_ratio
34- max_map_count 34- max_map_count
35- memory_failure_early_kill
36- memory_failure_recovery
35- min_free_kbytes 37- min_free_kbytes
36- min_slab_ratio 38- min_slab_ratio
37- min_unmapped_ratio 39- min_unmapped_ratio
@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm:
53- vfs_cache_pressure 55- vfs_cache_pressure
54- zone_reclaim_mode 56- zone_reclaim_mode
55 57
56
57============================================================== 58==============================================================
58 59
59block_dump 60block_dump
@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation.
275 276
276The default value is 65536. 277The default value is 65536.
277 278
279=============================================================
280
281memory_failure_early_kill:
282
283Control how to kill processes when uncorrected memory error (typically
284a 2bit error in a memory module) is detected in the background by hardware
285that cannot be handled by the kernel. In some cases (like the page
286still having a valid copy on disk) the kernel will handle the failure
287transparently without affecting any applications. But if there is
288no other uptodate copy of the data it will kill to prevent any data
289corruptions from propagating.
290
2911: Kill all processes that have the corrupted and not reloadable page mapped
292as soon as the corruption is detected. Note this is not supported
293for a few types of pages, like kernel internally allocated data or
294the swap cache, but works for the majority of user pages.
295
2960: Only unmap the corrupted page from all processes and only kill a process
297who tries to access it.
298
299The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
300handle this if they want to.
301
302This is only active on architectures/platforms with advanced machine
303check handling and depends on the hardware capabilities.
304
305Applications can override this setting individually with the PR_MCE_KILL prctl
306
307==============================================================
308
309memory_failure_recovery
310
311Enable memory failure recovery (when supported by the platform)
312
3131: Attempt recovery.
314
3150: Always panic on a memory failure.
316
278============================================================== 317==============================================================
279 318
280min_free_kbytes: 319min_free_kbytes:
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore
index 33e8a023df02..09b164a5700f 100644
--- a/Documentation/vm/.gitignore
+++ b/Documentation/vm/.gitignore
@@ -1 +1,2 @@
1page-types
1slabinfo 2slabinfo
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f366fa956179..25fadb448760 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
80mm start up ... this is a loose form of stability on mm_users. For 80mm start up ... this is a loose form of stability on mm_users. For
81example, it is used in copy_mm to protect against a racing tlb_gather_mmu 81example, it is used in copy_mm to protect against a racing tlb_gather_mmu
82single address space optimization, so that the zap_page_range (from 82single address space optimization, so that the zap_page_range (from
83vmtruncate) does not lose sending ipi's to cloned threads that might 83truncate) does not lose sending ipi's to cloned threads that might
84be spawned underneath it and go to user mode to drag in pte's into tlbs. 84be spawned underneath it and go to user mode to drag in pte's into tlbs.
85 85
86swap_lock 86swap_lock
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index 3eda8ea00852..fa1a30d9e9d5 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -5,6 +5,7 @@
5 * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> 5 * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
6 */ 6 */
7 7
8#define _LARGEFILE64_SOURCE
8#include <stdio.h> 9#include <stdio.h>
9#include <stdlib.h> 10#include <stdlib.h>
10#include <unistd.h> 11#include <unistd.h>
@@ -13,12 +14,33 @@
13#include <string.h> 14#include <string.h>
14#include <getopt.h> 15#include <getopt.h>
15#include <limits.h> 16#include <limits.h>
17#include <assert.h>
16#include <sys/types.h> 18#include <sys/types.h>
17#include <sys/errno.h> 19#include <sys/errno.h>
18#include <sys/fcntl.h> 20#include <sys/fcntl.h>
19 21
20 22
21/* 23/*
24 * pagemap kernel ABI bits
25 */
26
27#define PM_ENTRY_BYTES sizeof(uint64_t)
28#define PM_STATUS_BITS 3
29#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
30#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
31#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
32#define PM_PSHIFT_BITS 6
33#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
34#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
35#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
36#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
37#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
38
39#define PM_PRESENT PM_STATUS(4LL)
40#define PM_SWAP PM_STATUS(2LL)
41
42
43/*
22 * kernel page flags 44 * kernel page flags
23 */ 45 */
24 46
@@ -126,6 +148,14 @@ static int nr_addr_ranges;
126static unsigned long opt_offset[MAX_ADDR_RANGES]; 148static unsigned long opt_offset[MAX_ADDR_RANGES];
127static unsigned long opt_size[MAX_ADDR_RANGES]; 149static unsigned long opt_size[MAX_ADDR_RANGES];
128 150
151#define MAX_VMAS 10240
152static int nr_vmas;
153static unsigned long pg_start[MAX_VMAS];
154static unsigned long pg_end[MAX_VMAS];
155static unsigned long voffset;
156
157static int pagemap_fd;
158
129#define MAX_BIT_FILTERS 64 159#define MAX_BIT_FILTERS 64
130static int nr_bit_filters; 160static int nr_bit_filters;
131static uint64_t opt_mask[MAX_BIT_FILTERS]; 161static uint64_t opt_mask[MAX_BIT_FILTERS];
@@ -135,7 +165,6 @@ static int page_size;
135 165
136#define PAGES_BATCH (64 << 10) /* 64k pages */ 166#define PAGES_BATCH (64 << 10) /* 64k pages */
137static int kpageflags_fd; 167static int kpageflags_fd;
138static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
139 168
140#define HASH_SHIFT 13 169#define HASH_SHIFT 13
141#define HASH_SIZE (1 << HASH_SHIFT) 170#define HASH_SIZE (1 << HASH_SHIFT)
@@ -158,6 +187,11 @@ static uint64_t page_flags[HASH_SIZE];
158 type __min2 = (y); \ 187 type __min2 = (y); \
159 __min1 < __min2 ? __min1 : __min2; }) 188 __min1 < __min2 ? __min1 : __min2; })
160 189
190#define max_t(type, x, y) ({ \
191 type __max1 = (x); \
192 type __max2 = (y); \
193 __max1 > __max2 ? __max1 : __max2; })
194
161static unsigned long pages2mb(unsigned long pages) 195static unsigned long pages2mb(unsigned long pages)
162{ 196{
163 return (pages * page_size) >> 20; 197 return (pages * page_size) >> 20;
@@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags)
224static void show_page_range(unsigned long offset, uint64_t flags) 258static void show_page_range(unsigned long offset, uint64_t flags)
225{ 259{
226 static uint64_t flags0; 260 static uint64_t flags0;
261 static unsigned long voff;
227 static unsigned long index; 262 static unsigned long index;
228 static unsigned long count; 263 static unsigned long count;
229 264
230 if (flags == flags0 && offset == index + count) { 265 if (flags == flags0 && offset == index + count &&
266 (!opt_pid || voffset == voff + count)) {
231 count++; 267 count++;
232 return; 268 return;
233 } 269 }
234 270
235 if (count) 271 if (count) {
236 printf("%lu\t%lu\t%s\n", 272 if (opt_pid)
273 printf("%lx\t", voff);
274 printf("%lx\t%lx\t%s\n",
237 index, count, page_flag_name(flags0)); 275 index, count, page_flag_name(flags0));
276 }
238 277
239 flags0 = flags; 278 flags0 = flags;
240 index = offset; 279 index = offset;
280 voff = voffset;
241 count = 1; 281 count = 1;
242} 282}
243 283
244static void show_page(unsigned long offset, uint64_t flags) 284static void show_page(unsigned long offset, uint64_t flags)
245{ 285{
246 printf("%lu\t%s\n", offset, page_flag_name(flags)); 286 if (opt_pid)
287 printf("%lx\t", voffset);
288 printf("%lx\t%s\n", offset, page_flag_name(flags));
247} 289}
248 290
249static void show_summary(void) 291static void show_summary(void)
@@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count)
383 lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); 425 lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
384 426
385 while (count) { 427 while (count) {
428 uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
429
386 batch = min_t(unsigned long, count, PAGES_BATCH); 430 batch = min_t(unsigned long, count, PAGES_BATCH);
387 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); 431 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
388 if (n == 0) 432 if (n == 0)
@@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count)
404 } 448 }
405} 449}
406 450
451
452#define PAGEMAP_BATCH 4096
453static unsigned long task_pfn(unsigned long pgoff)
454{
455 static uint64_t buf[PAGEMAP_BATCH];
456 static unsigned long start;
457 static long count;
458 uint64_t pfn;
459
460 if (pgoff < start || pgoff >= start + count) {
461 if (lseek64(pagemap_fd,
462 (uint64_t)pgoff * PM_ENTRY_BYTES,
463 SEEK_SET) < 0) {
464 perror("pagemap seek");
465 exit(EXIT_FAILURE);
466 }
467 count = read(pagemap_fd, buf, sizeof(buf));
468 if (count == 0)
469 return 0;
470 if (count < 0) {
471 perror("pagemap read");
472 exit(EXIT_FAILURE);
473 }
474 if (count % PM_ENTRY_BYTES) {
475 fatal("pagemap read not aligned.\n");
476 exit(EXIT_FAILURE);
477 }
478 count /= PM_ENTRY_BYTES;
479 start = pgoff;
480 }
481
482 pfn = buf[pgoff - start];
483 if (pfn & PM_PRESENT)
484 pfn = PM_PFRAME(pfn);
485 else
486 pfn = 0;
487
488 return pfn;
489}
490
491static void walk_task(unsigned long index, unsigned long count)
492{
493 int i = 0;
494 const unsigned long end = index + count;
495
496 while (index < end) {
497
498 while (pg_end[i] <= index)
499 if (++i >= nr_vmas)
500 return;
501 if (pg_start[i] >= end)
502 return;
503
504 voffset = max_t(unsigned long, pg_start[i], index);
505 index = min_t(unsigned long, pg_end[i], end);
506
507 assert(voffset < index);
508 for (; voffset < index; voffset++) {
509 unsigned long pfn = task_pfn(voffset);
510 if (pfn)
511 walk_pfn(pfn, 1);
512 }
513 }
514}
515
516static void add_addr_range(unsigned long offset, unsigned long size)
517{
518 if (nr_addr_ranges >= MAX_ADDR_RANGES)
519 fatal("too many addr ranges\n");
520
521 opt_offset[nr_addr_ranges] = offset;
522 opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
523 nr_addr_ranges++;
524}
525
407static void walk_addr_ranges(void) 526static void walk_addr_ranges(void)
408{ 527{
409 int i; 528 int i;
@@ -415,10 +534,13 @@ static void walk_addr_ranges(void)
415 } 534 }
416 535
417 if (!nr_addr_ranges) 536 if (!nr_addr_ranges)
418 walk_pfn(0, ULONG_MAX); 537 add_addr_range(0, ULONG_MAX);
419 538
420 for (i = 0; i < nr_addr_ranges; i++) 539 for (i = 0; i < nr_addr_ranges; i++)
421 walk_pfn(opt_offset[i], opt_size[i]); 540 if (!opt_pid)
541 walk_pfn(opt_offset[i], opt_size[i]);
542 else
543 walk_task(opt_offset[i], opt_size[i]);
422 544
423 close(kpageflags_fd); 545 close(kpageflags_fd);
424} 546}
@@ -446,8 +568,8 @@ static void usage(void)
446" -r|--raw Raw mode, for kernel developers\n" 568" -r|--raw Raw mode, for kernel developers\n"
447" -a|--addr addr-spec Walk a range of pages\n" 569" -a|--addr addr-spec Walk a range of pages\n"
448" -b|--bits bits-spec Walk pages with specified bits\n" 570" -b|--bits bits-spec Walk pages with specified bits\n"
449#if 0 /* planned features */
450" -p|--pid pid Walk process address space\n" 571" -p|--pid pid Walk process address space\n"
572#if 0 /* planned features */
451" -f|--file filename Walk file address space\n" 573" -f|--file filename Walk file address space\n"
452#endif 574#endif
453" -l|--list Show page details in ranges\n" 575" -l|--list Show page details in ranges\n"
@@ -459,7 +581,7 @@ static void usage(void)
459" N+M pages range from N to N+M-1\n" 581" N+M pages range from N to N+M-1\n"
460" N,M pages range from N to M-1\n" 582" N,M pages range from N to M-1\n"
461" N, pages range from N to end\n" 583" N, pages range from N to end\n"
462" ,M pages range from 0 to M\n" 584" ,M pages range from 0 to M-1\n"
463"bits-spec:\n" 585"bits-spec:\n"
464" bit1,bit2 (flags & (bit1|bit2)) != 0\n" 586" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
465" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" 587" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
@@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str)
496 618
497static void parse_pid(const char *str) 619static void parse_pid(const char *str)
498{ 620{
621 FILE *file;
622 char buf[5000];
623
499 opt_pid = parse_number(str); 624 opt_pid = parse_number(str);
500}
501 625
502static void parse_file(const char *name) 626 sprintf(buf, "/proc/%d/pagemap", opt_pid);
503{ 627 pagemap_fd = open(buf, O_RDONLY);
628 if (pagemap_fd < 0) {
629 perror(buf);
630 exit(EXIT_FAILURE);
631 }
632
633 sprintf(buf, "/proc/%d/maps", opt_pid);
634 file = fopen(buf, "r");
635 if (!file) {
636 perror(buf);
637 exit(EXIT_FAILURE);
638 }
639
640 while (fgets(buf, sizeof(buf), file) != NULL) {
641 unsigned long vm_start;
642 unsigned long vm_end;
643 unsigned long long pgoff;
644 int major, minor;
645 char r, w, x, s;
646 unsigned long ino;
647 int n;
648
649 n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
650 &vm_start,
651 &vm_end,
652 &r, &w, &x, &s,
653 &pgoff,
654 &major, &minor,
655 &ino);
656 if (n < 10) {
657 fprintf(stderr, "unexpected line: %s\n", buf);
658 continue;
659 }
660 pg_start[nr_vmas] = vm_start / page_size;
661 pg_end[nr_vmas] = vm_end / page_size;
662 if (++nr_vmas >= MAX_VMAS) {
663 fprintf(stderr, "too many VMAs\n");
664 break;
665 }
666 }
667 fclose(file);
504} 668}
505 669
506static void add_addr_range(unsigned long offset, unsigned long size) 670static void parse_file(const char *name)
507{ 671{
508 if (nr_addr_ranges >= MAX_ADDR_RANGES)
509 fatal("too much addr ranges\n");
510
511 opt_offset[nr_addr_ranges] = offset;
512 opt_size[nr_addr_ranges] = size;
513 nr_addr_ranges++;
514} 672}
515 673
516static void parse_addr_range(const char *optarg) 674static void parse_addr_range(const char *optarg)
@@ -676,8 +834,10 @@ int main(int argc, char *argv[])
676 } 834 }
677 } 835 }
678 836
837 if (opt_list && opt_pid)
838 printf("voffset\t");
679 if (opt_list == 1) 839 if (opt_list == 1)
680 printf("offset\tcount\tflags\n"); 840 printf("offset\tlen\tflags\n");
681 if (opt_list == 2) 841 if (opt_list == 2)
682 printf("offset\tflags\n"); 842 printf("offset\tflags\n");
683 843
diff --git a/MAINTAINERS b/MAINTAINERS
index 7c1c0b05b298..c450f3abb8c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -257,12 +257,6 @@ W: http://www.lesswatts.org/projects/acpi/
257S: Supported 257S: Supported
258F: drivers/acpi/fan.c 258F: drivers/acpi/fan.c
259 259
260ACPI PCI HOTPLUG DRIVER
261M: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
262L: linux-pci@vger.kernel.org
263S: Supported
264F: drivers/pci/hotplug/acpi*
265
266ACPI THERMAL DRIVER 260ACPI THERMAL DRIVER
267M: Zhang Rui <rui.zhang@intel.com> 261M: Zhang Rui <rui.zhang@intel.com>
268L: linux-acpi@vger.kernel.org 262L: linux-acpi@vger.kernel.org
@@ -689,7 +683,7 @@ S: Maintained
689ARM/INTEL IXP4XX ARM ARCHITECTURE 683ARM/INTEL IXP4XX ARM ARCHITECTURE
690M: Imre Kaloz <kaloz@openwrt.org> 684M: Imre Kaloz <kaloz@openwrt.org>
691M: Krzysztof Halasa <khc@pm.waw.pl> 685M: Krzysztof Halasa <khc@pm.waw.pl>
692L: linux-arm-kernel@lists.infradead.org 686L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
693S: Maintained 687S: Maintained
694F: arch/arm/mach-ixp4xx/ 688F: arch/arm/mach-ixp4xx/
695 689
@@ -746,18 +740,22 @@ M: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
746M: Dirk Opfer <dirk@opfer-online.de> 740M: Dirk Opfer <dirk@opfer-online.de>
747S: Maintained 741S: Maintained
748 742
749ARM/PALMTX,PALMT5,PALMLD,PALMTE2 SUPPORT 743ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT
750M: Marek Vasut <marek.vasut@gmail.com> 744P: Marek Vasut
745M: marek.vasut@gmail.com
746L: linux-arm-kernel@lists.infradead.org
751W: http://hackndev.com 747W: http://hackndev.com
752S: Maintained 748S: Maintained
753 749
754ARM/PALM TREO 680 SUPPORT 750ARM/PALM TREO 680 SUPPORT
755M: Tomas Cech <sleep_walker@suse.cz> 751M: Tomas Cech <sleep_walker@suse.cz>
752L: linux-arm-kernel@lists.infradead.org
756W: http://hackndev.com 753W: http://hackndev.com
757S: Maintained 754S: Maintained
758 755
759ARM/PALMZ72 SUPPORT 756ARM/PALMZ72 SUPPORT
760M: Sergey Lapin <slapin@ossfans.org> 757M: Sergey Lapin <slapin@ossfans.org>
758L: linux-arm-kernel@lists.infradead.org
761W: http://hackndev.com 759W: http://hackndev.com
762S: Maintained 760S: Maintained
763 761
@@ -2331,7 +2329,9 @@ S: Orphan
2331F: drivers/hwmon/ 2329F: drivers/hwmon/
2332 2330
2333HARDWARE RANDOM NUMBER GENERATOR CORE 2331HARDWARE RANDOM NUMBER GENERATOR CORE
2334S: Orphan 2332M: Matt Mackall <mpm@selenic.com>
2333M: Herbert Xu <herbert@gondor.apana.org.au>
2334S: Odd fixes
2335F: Documentation/hw_random.txt 2335F: Documentation/hw_random.txt
2336F: drivers/char/hw_random/ 2336F: drivers/char/hw_random/
2337F: include/linux/hw_random.h 2337F: include/linux/hw_random.h
@@ -4003,11 +4003,11 @@ F: Documentation/PCI/
4003F: drivers/pci/ 4003F: drivers/pci/
4004F: include/linux/pci* 4004F: include/linux/pci*
4005 4005
4006PCIE HOTPLUG DRIVER 4006PCI HOTPLUG
4007M: Kristen Carlson Accardi <kristen.c.accardi@intel.com> 4007M: Jesse Barnes <jbarnes@virtuousgeek.org>
4008L: linux-pci@vger.kernel.org 4008L: linux-pci@vger.kernel.org
4009S: Supported 4009S: Supported
4010F: drivers/pci/pcie/ 4010F: drivers/pci/hotplug
4011 4011
4012PCMCIA SUBSYSTEM 4012PCMCIA SUBSYSTEM
4013P: Linux PCMCIA Team 4013P: Linux PCMCIA Team
@@ -4670,12 +4670,6 @@ F: drivers/serial/serial_lh7a40x.c
4670F: drivers/usb/gadget/lh7a40* 4670F: drivers/usb/gadget/lh7a40*
4671F: drivers/usb/host/ohci-lh7a40* 4671F: drivers/usb/host/ohci-lh7a40*
4672 4672
4673SHPC HOTPLUG DRIVER
4674M: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
4675L: linux-pci@vger.kernel.org
4676S: Supported
4677F: drivers/pci/hotplug/shpchp*
4678
4679SIMPLE FIRMWARE INTERFACE (SFI) 4673SIMPLE FIRMWARE INTERFACE (SFI)
4680P: Len Brown 4674P: Len Brown
4681M: lenb@kernel.org 4675M: lenb@kernel.org
@@ -4687,7 +4681,6 @@ F: arch/x86/kernel/*sfi*
4687F: drivers/sfi/ 4681F: drivers/sfi/
4688F: include/linux/sfi*.h 4682F: include/linux/sfi*.h
4689 4683
4690
4691SIMTEC EB110ATX (Chalice CATS) 4684SIMTEC EB110ATX (Chalice CATS)
4692P: Ben Dooks 4685P: Ben Dooks
4693M: Vincent Sanders <support@simtec.co.uk> 4686M: Vincent Sanders <support@simtec.co.uk>
diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 25da0017ec87..e42823e954aa 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -26,6 +26,8 @@
26#define F_GETOWN 6 /* for sockets. */ 26#define F_GETOWN 6 /* for sockets. */
27#define F_SETSIG 10 /* for sockets. */ 27#define F_SETSIG 10 /* for sockets. */
28#define F_GETSIG 11 /* for sockets. */ 28#define F_GETSIG 11 /* for sockets. */
29#define F_SETOWN_EX 12
30#define F_GETOWN_EX 13
29 31
30/* for posix fcntl() and lockf() */ 32/* for posix fcntl() and lockf() */
31#define F_RDLCK 1 33#define F_RDLCK 1
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index e302daecbe56..8e059e58b0ac 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -1016,7 +1016,7 @@ marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *m
1016{ 1016{
1017 struct marvel_agp_aperture *aper = agp->aperture.sysdata; 1017 struct marvel_agp_aperture *aper = agp->aperture.sysdata;
1018 return iommu_bind(aper->arena, aper->pg_start + pg_start, 1018 return iommu_bind(aper->arena, aper->pg_start + pg_start,
1019 mem->page_count, mem->memory); 1019 mem->page_count, mem->pages);
1020} 1020}
1021 1021
1022static int 1022static int
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 319fcb74611e..76686497b1e2 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -680,7 +680,7 @@ titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *me
680{ 680{
681 struct titan_agp_aperture *aper = agp->aperture.sysdata; 681 struct titan_agp_aperture *aper = agp->aperture.sysdata;
682 return iommu_bind(aper->arena, aper->pg_start + pg_start, 682 return iommu_bind(aper->arena, aper->pg_start + pg_start,
683 mem->page_count, mem->memory); 683 mem->page_count, mem->pages);
684} 684}
685 685
686static int 686static int
diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c
index 19b86328ffd7..6f80ca4f9766 100644
--- a/arch/alpha/kernel/init_task.c
+++ b/arch/alpha/kernel/init_task.c
@@ -13,6 +13,5 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
13struct task_struct init_task = INIT_TASK(init_task); 13struct task_struct init_task = INIT_TASK(init_task);
14EXPORT_SYMBOL(init_task); 14EXPORT_SYMBOL(init_task);
15 15
16union thread_union init_thread_union 16union thread_union init_thread_union __init_task_data =
17 __attribute__((section(".data.init_thread"))) 17 { INIT_THREAD_INFO(init_task) };
18 = { INIT_THREAD_INFO(init_task) };
diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h
index 00edd04b585e..85457b2d4516 100644
--- a/arch/alpha/kernel/pci_impl.h
+++ b/arch/alpha/kernel/pci_impl.h
@@ -198,7 +198,7 @@ extern unsigned long size_for_memory(unsigned long max);
198 198
199extern int iommu_reserve(struct pci_iommu_arena *, long, long); 199extern int iommu_reserve(struct pci_iommu_arena *, long, long);
200extern int iommu_release(struct pci_iommu_arena *, long, long); 200extern int iommu_release(struct pci_iommu_arena *, long, long);
201extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *); 201extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **);
202extern int iommu_unbind(struct pci_iommu_arena *, long, long); 202extern int iommu_unbind(struct pci_iommu_arena *, long, long);
203 203
204 204
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index d15aedfe6066..8449504f5e0b 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -876,7 +876,7 @@ iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count)
876 876
877int 877int
878iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, 878iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count,
879 unsigned long *physaddrs) 879 struct page **pages)
880{ 880{
881 unsigned long flags; 881 unsigned long flags;
882 unsigned long *ptes; 882 unsigned long *ptes;
@@ -896,7 +896,7 @@ iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count,
896 } 896 }
897 897
898 for(i = 0, j = pg_start; i < pg_count; i++, j++) 898 for(i = 0, j = pg_start; i < pg_count; i++, j++)
899 ptes[j] = mk_iommu_pte(physaddrs[i]); 899 ptes[j] = mk_iommu_pte(page_to_phys(pages[i]));
900 900
901 spin_unlock_irqrestore(&arena->lock, flags); 901 spin_unlock_irqrestore(&arena->lock, flags);
902 902
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 6dc03c35caa0..2906665b1c10 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -1,5 +1,6 @@
1#include <asm-generic/vmlinux.lds.h> 1#include <asm-generic/vmlinux.lds.h>
2#include <asm/page.h> 2#include <asm/page.h>
3#include <asm/thread_info.h>
3 4
4OUTPUT_FORMAT("elf64-alpha") 5OUTPUT_FORMAT("elf64-alpha")
5OUTPUT_ARCH(alpha) 6OUTPUT_ARCH(alpha)
@@ -31,88 +32,21 @@ SECTIONS
31 } :kernel 32 } :kernel
32 33
33 RODATA 34 RODATA
34 35 EXCEPTION_TABLE(16)
35 /* Exception table */
36 . = ALIGN(16);
37 __ex_table : {
38 __start___ex_table = .;
39 *(__ex_table)
40 __stop___ex_table = .;
41 }
42 36
43 /* Will be freed after init */ 37 /* Will be freed after init */
44 . = ALIGN(PAGE_SIZE); 38 __init_begin = ALIGN(PAGE_SIZE);
45 /* Init code and data */ 39 INIT_TEXT_SECTION(PAGE_SIZE)
46 __init_begin = .; 40 INIT_DATA_SECTION(16)
47 .init.text : {
48 _sinittext = .;
49 INIT_TEXT
50 _einittext = .;
51 }
52 .init.data : {
53 INIT_DATA
54 }
55
56 . = ALIGN(16);
57 .init.setup : {
58 __setup_start = .;
59 *(.init.setup)
60 __setup_end = .;
61 }
62
63 . = ALIGN(8);
64 .initcall.init : {
65 __initcall_start = .;
66 INITCALLS
67 __initcall_end = .;
68 }
69
70#ifdef CONFIG_BLK_DEV_INITRD
71 . = ALIGN(PAGE_SIZE);
72 .init.ramfs : {
73 __initramfs_start = .;
74 *(.init.ramfs)
75 __initramfs_end = .;
76 }
77#endif
78
79 . = ALIGN(8);
80 .con_initcall.init : {
81 __con_initcall_start = .;
82 *(.con_initcall.init)
83 __con_initcall_end = .;
84 }
85
86 . = ALIGN(8);
87 SECURITY_INIT
88
89 PERCPU(PAGE_SIZE) 41 PERCPU(PAGE_SIZE)
90 42 /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
91 . = ALIGN(2 * PAGE_SIZE); 43 needed for the THREAD_SIZE aligned init_task gets freed after init */
44 . = ALIGN(THREAD_SIZE);
92 __init_end = .; 45 __init_end = .;
93 /* Freed after init ends here */ 46 /* Freed after init ends here */
94 47
95 /* Note 2 page alignment above. */
96 .data.init_thread : {
97 *(.data.init_thread)
98 }
99
100 . = ALIGN(PAGE_SIZE);
101 .data.page_aligned : {
102 *(.data.page_aligned)
103 }
104
105 . = ALIGN(64);
106 .data.cacheline_aligned : {
107 *(.data.cacheline_aligned)
108 }
109
110 _data = .; 48 _data = .;
111 /* Data */ 49 RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
112 .data : {
113 DATA_DATA
114 CONSTRUCTORS
115 }
116 50
117 .got : { 51 .got : {
118 *(.got) 52 *(.got)
@@ -122,16 +56,7 @@ SECTIONS
122 } 56 }
123 _edata = .; /* End of data section */ 57 _edata = .; /* End of data section */
124 58
125 __bss_start = .; 59 BSS_SECTION(0, 0, 0)
126 .sbss : {
127 *(.sbss)
128 *(.scommon)
129 }
130 .bss : {
131 *(.bss)
132 *(COMMON)
133 }
134 __bss_stop = .;
135 _end = .; 60 _end = .;
136 61
137 .mdebug 0 : { 62 .mdebug 0 : {
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d778a699f577..1c4119c60040 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -46,6 +46,10 @@ config GENERIC_CLOCKEVENTS_BROADCAST
46 depends on GENERIC_CLOCKEVENTS 46 depends on GENERIC_CLOCKEVENTS
47 default y if SMP && !LOCAL_TIMERS 47 default y if SMP && !LOCAL_TIMERS
48 48
49config HAVE_TCM
50 bool
51 select GENERIC_ALLOCATOR
52
49config NO_IOPORT 53config NO_IOPORT
50 bool 54 bool
51 55
@@ -649,6 +653,7 @@ config ARCH_U300
649 bool "ST-Ericsson U300 Series" 653 bool "ST-Ericsson U300 Series"
650 depends on MMU 654 depends on MMU
651 select CPU_ARM926T 655 select CPU_ARM926T
656 select HAVE_TCM
652 select ARM_AMBA 657 select ARM_AMBA
653 select ARM_VIC 658 select ARM_VIC
654 select GENERIC_TIME 659 select GENERIC_TIME
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 2293f0ce061e..bd36c778c819 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -865,6 +865,7 @@ void locomo_gpio_set_dir(struct device *dev, unsigned int bits, unsigned int dir
865 865
866 spin_unlock_irqrestore(&lchip->lock, flags); 866 spin_unlock_irqrestore(&lchip->lock, flags);
867} 867}
868EXPORT_SYMBOL(locomo_gpio_set_dir);
868 869
869int locomo_gpio_read_level(struct device *dev, unsigned int bits) 870int locomo_gpio_read_level(struct device *dev, unsigned int bits)
870{ 871{
@@ -882,6 +883,7 @@ int locomo_gpio_read_level(struct device *dev, unsigned int bits)
882 ret &= bits; 883 ret &= bits;
883 return ret; 884 return ret;
884} 885}
886EXPORT_SYMBOL(locomo_gpio_read_level);
885 887
886int locomo_gpio_read_output(struct device *dev, unsigned int bits) 888int locomo_gpio_read_output(struct device *dev, unsigned int bits)
887{ 889{
@@ -899,6 +901,7 @@ int locomo_gpio_read_output(struct device *dev, unsigned int bits)
899 ret &= bits; 901 ret &= bits;
900 return ret; 902 return ret;
901} 903}
904EXPORT_SYMBOL(locomo_gpio_read_output);
902 905
903void locomo_gpio_write(struct device *dev, unsigned int bits, unsigned int set) 906void locomo_gpio_write(struct device *dev, unsigned int bits, unsigned int set)
904{ 907{
@@ -920,6 +923,7 @@ void locomo_gpio_write(struct device *dev, unsigned int bits, unsigned int set)
920 923
921 spin_unlock_irqrestore(&lchip->lock, flags); 924 spin_unlock_irqrestore(&lchip->lock, flags);
922} 925}
926EXPORT_SYMBOL(locomo_gpio_write);
923 927
924static void locomo_m62332_sendbit(void *mapbase, int bit) 928static void locomo_m62332_sendbit(void *mapbase, int bit)
925{ 929{
@@ -1084,13 +1088,12 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int
1084 1088
1085 spin_unlock_irqrestore(&lchip->lock, flags); 1089 spin_unlock_irqrestore(&lchip->lock, flags);
1086} 1090}
1091EXPORT_SYMBOL(locomo_m62332_senddata);
1087 1092
1088/* 1093/*
1089 * Frontlight control 1094 * Frontlight control
1090 */ 1095 */
1091 1096
1092static struct locomo *locomo_chip_driver(struct locomo_dev *ldev);
1093
1094void locomo_frontlight_set(struct locomo_dev *dev, int duty, int vr, int bpwf) 1097void locomo_frontlight_set(struct locomo_dev *dev, int duty, int vr, int bpwf)
1095{ 1098{
1096 unsigned long flags; 1099 unsigned long flags;
@@ -1182,11 +1185,13 @@ int locomo_driver_register(struct locomo_driver *driver)
1182 driver->drv.bus = &locomo_bus_type; 1185 driver->drv.bus = &locomo_bus_type;
1183 return driver_register(&driver->drv); 1186 return driver_register(&driver->drv);
1184} 1187}
1188EXPORT_SYMBOL(locomo_driver_register);
1185 1189
1186void locomo_driver_unregister(struct locomo_driver *driver) 1190void locomo_driver_unregister(struct locomo_driver *driver)
1187{ 1191{
1188 driver_unregister(&driver->drv); 1192 driver_unregister(&driver->drv);
1189} 1193}
1194EXPORT_SYMBOL(locomo_driver_unregister);
1190 1195
1191static int __init locomo_init(void) 1196static int __init locomo_init(void)
1192{ 1197{
@@ -1208,11 +1213,3 @@ module_exit(locomo_exit);
1208MODULE_DESCRIPTION("Sharp LoCoMo core driver"); 1213MODULE_DESCRIPTION("Sharp LoCoMo core driver");
1209MODULE_LICENSE("GPL"); 1214MODULE_LICENSE("GPL");
1210MODULE_AUTHOR("John Lenz <lenz@cs.wisc.edu>"); 1215MODULE_AUTHOR("John Lenz <lenz@cs.wisc.edu>");
1211
1212EXPORT_SYMBOL(locomo_driver_register);
1213EXPORT_SYMBOL(locomo_driver_unregister);
1214EXPORT_SYMBOL(locomo_gpio_set_dir);
1215EXPORT_SYMBOL(locomo_gpio_read_level);
1216EXPORT_SYMBOL(locomo_gpio_read_output);
1217EXPORT_SYMBOL(locomo_gpio_write);
1218EXPORT_SYMBOL(locomo_m62332_senddata);
diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index 920ced0b73c5..f232941de8ab 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -22,6 +22,7 @@
22#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/io.h> 23#include <linux/io.h>
24#include <linux/sysdev.h> 24#include <linux/sysdev.h>
25#include <linux/device.h>
25#include <linux/amba/bus.h> 26#include <linux/amba/bus.h>
26 27
27#include <asm/mach/irq.h> 28#include <asm/mach/irq.h>
diff --git a/arch/arm/configs/littleton_defconfig b/arch/arm/configs/littleton_defconfig
deleted file mode 100644
index 1db496908052..000000000000
--- a/arch/arm/configs/littleton_defconfig
+++ /dev/null
@@ -1,783 +0,0 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.24-rc5
4# Fri Dec 21 11:06:19 2007
5#
6CONFIG_ARM=y
7CONFIG_SYS_SUPPORTS_APM_EMULATION=y
8CONFIG_GENERIC_GPIO=y
9CONFIG_GENERIC_TIME=y
10CONFIG_GENERIC_CLOCKEVENTS=y
11CONFIG_MMU=y
12# CONFIG_NO_IOPORT is not set
13CONFIG_GENERIC_HARDIRQS=y
14CONFIG_STACKTRACE_SUPPORT=y
15CONFIG_LOCKDEP_SUPPORT=y
16CONFIG_TRACE_IRQFLAGS_SUPPORT=y
17CONFIG_HARDIRQS_SW_RESEND=y
18CONFIG_GENERIC_IRQ_PROBE=y
19CONFIG_RWSEM_GENERIC_SPINLOCK=y
20# CONFIG_ARCH_HAS_ILOG2_U32 is not set
21# CONFIG_ARCH_HAS_ILOG2_U64 is not set
22CONFIG_GENERIC_HWEIGHT=y
23CONFIG_GENERIC_CALIBRATE_DELAY=y
24CONFIG_ZONE_DMA=y
25CONFIG_ARCH_MTD_XIP=y
26CONFIG_VECTORS_BASE=0xffff0000
27CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
28
29#
30# General setup
31#
32CONFIG_EXPERIMENTAL=y
33CONFIG_BROKEN_ON_SMP=y
34CONFIG_LOCK_KERNEL=y
35CONFIG_INIT_ENV_ARG_LIMIT=32
36CONFIG_LOCALVERSION=""
37CONFIG_LOCALVERSION_AUTO=y
38CONFIG_SWAP=y
39CONFIG_SYSVIPC=y
40CONFIG_SYSVIPC_SYSCTL=y
41# CONFIG_POSIX_MQUEUE is not set
42# CONFIG_BSD_PROCESS_ACCT is not set
43# CONFIG_TASKSTATS is not set
44# CONFIG_USER_NS is not set
45# CONFIG_PID_NS is not set
46# CONFIG_AUDIT is not set
47# CONFIG_IKCONFIG is not set
48CONFIG_LOG_BUF_SHIFT=14
49# CONFIG_CGROUPS is not set
50CONFIG_FAIR_GROUP_SCHED=y
51CONFIG_FAIR_USER_SCHED=y
52# CONFIG_FAIR_CGROUP_SCHED is not set
53CONFIG_SYSFS_DEPRECATED=y
54# CONFIG_RELAY is not set
55CONFIG_BLK_DEV_INITRD=y
56CONFIG_INITRAMFS_SOURCE=""
57CONFIG_CC_OPTIMIZE_FOR_SIZE=y
58CONFIG_SYSCTL=y
59# CONFIG_EMBEDDED is not set
60CONFIG_UID16=y
61CONFIG_SYSCTL_SYSCALL=y
62CONFIG_KALLSYMS=y
63# CONFIG_KALLSYMS_ALL is not set
64# CONFIG_KALLSYMS_EXTRA_PASS is not set
65CONFIG_HOTPLUG=y
66CONFIG_PRINTK=y
67CONFIG_BUG=y
68CONFIG_ELF_CORE=y
69CONFIG_BASE_FULL=y
70CONFIG_FUTEX=y
71CONFIG_ANON_INODES=y
72CONFIG_EPOLL=y
73CONFIG_SIGNALFD=y
74CONFIG_EVENTFD=y
75CONFIG_SHMEM=y
76CONFIG_VM_EVENT_COUNTERS=y
77CONFIG_SLAB=y
78# CONFIG_SLUB is not set
79# CONFIG_SLOB is not set
80CONFIG_RT_MUTEXES=y
81# CONFIG_TINY_SHMEM is not set
82CONFIG_BASE_SMALL=0
83CONFIG_MODULES=y
84CONFIG_MODULE_UNLOAD=y
85CONFIG_MODULE_FORCE_UNLOAD=y
86# CONFIG_MODVERSIONS is not set
87# CONFIG_MODULE_SRCVERSION_ALL is not set
88# CONFIG_KMOD is not set
89CONFIG_BLOCK=y
90# CONFIG_LBD is not set
91# CONFIG_BLK_DEV_IO_TRACE is not set
92# CONFIG_LSF is not set
93# CONFIG_BLK_DEV_BSG is not set
94
95#
96# IO Schedulers
97#
98CONFIG_IOSCHED_NOOP=y
99CONFIG_IOSCHED_AS=y
100CONFIG_IOSCHED_DEADLINE=y
101CONFIG_IOSCHED_CFQ=y
102# CONFIG_DEFAULT_AS is not set
103# CONFIG_DEFAULT_DEADLINE is not set
104CONFIG_DEFAULT_CFQ=y
105# CONFIG_DEFAULT_NOOP is not set
106CONFIG_DEFAULT_IOSCHED="cfq"
107
108#
109# System Type
110#
111# CONFIG_ARCH_AAEC2000 is not set
112# CONFIG_ARCH_INTEGRATOR is not set
113# CONFIG_ARCH_REALVIEW is not set
114# CONFIG_ARCH_VERSATILE is not set
115# CONFIG_ARCH_AT91 is not set
116# CONFIG_ARCH_CLPS7500 is not set
117# CONFIG_ARCH_CLPS711X is not set
118# CONFIG_ARCH_CO285 is not set
119# CONFIG_ARCH_EBSA110 is not set
120# CONFIG_ARCH_EP93XX is not set
121# CONFIG_ARCH_FOOTBRIDGE is not set
122# CONFIG_ARCH_NETX is not set
123# CONFIG_ARCH_H720X is not set
124# CONFIG_ARCH_IMX is not set
125# CONFIG_ARCH_IOP13XX is not set
126# CONFIG_ARCH_IOP32X is not set
127# CONFIG_ARCH_IOP33X is not set
128# CONFIG_ARCH_IXP23XX is not set
129# CONFIG_ARCH_IXP2000 is not set
130# CONFIG_ARCH_IXP4XX is not set
131# CONFIG_ARCH_L7200 is not set
132# CONFIG_ARCH_KS8695 is not set
133# CONFIG_ARCH_NS9XXX is not set
134# CONFIG_ARCH_MXC is not set
135# CONFIG_ARCH_PNX4008 is not set
136CONFIG_ARCH_PXA=y
137# CONFIG_ARCH_RPC is not set
138# CONFIG_ARCH_SA1100 is not set
139# CONFIG_ARCH_S3C2410 is not set
140# CONFIG_ARCH_SHARK is not set
141# CONFIG_ARCH_LH7A40X is not set
142# CONFIG_ARCH_DAVINCI is not set
143# CONFIG_ARCH_OMAP is not set
144
145#
146# Intel PXA2xx/PXA3xx Implementations
147#
148
149#
150# Supported PXA3xx Processor Variants
151#
152CONFIG_CPU_PXA300=y
153CONFIG_CPU_PXA310=y
154# CONFIG_CPU_PXA320 is not set
155# CONFIG_ARCH_LUBBOCK is not set
156# CONFIG_MACH_LOGICPD_PXA270 is not set
157# CONFIG_MACH_MAINSTONE is not set
158# CONFIG_ARCH_PXA_IDP is not set
159# CONFIG_PXA_SHARPSL is not set
160# CONFIG_MACH_TRIZEPS4 is not set
161# CONFIG_MACH_EM_X270 is not set
162# CONFIG_MACH_ZYLONITE is not set
163CONFIG_MACH_LITTLETON=y
164# CONFIG_MACH_ARMCORE is not set
165CONFIG_PXA3xx=y
166CONFIG_PXA_SSP=y
167
168#
169# Boot options
170#
171
172#
173# Power management
174#
175
176#
177# Processor Type
178#
179CONFIG_CPU_32=y
180CONFIG_CPU_XSC3=y
181CONFIG_CPU_32v5=y
182CONFIG_CPU_ABRT_EV5T=y
183CONFIG_CPU_CACHE_VIVT=y
184CONFIG_CPU_TLB_V4WBI=y
185CONFIG_CPU_CP15=y
186CONFIG_CPU_CP15_MMU=y
187CONFIG_IO_36=y
188
189#
190# Processor Features
191#
192# CONFIG_ARM_THUMB is not set
193# CONFIG_CPU_DCACHE_DISABLE is not set
194# CONFIG_CPU_BPREDICT_DISABLE is not set
195# CONFIG_OUTER_CACHE is not set
196CONFIG_IWMMXT=y
197
198#
199# Bus support
200#
201# CONFIG_PCI_SYSCALL is not set
202# CONFIG_ARCH_SUPPORTS_MSI is not set
203# CONFIG_PCCARD is not set
204
205#
206# Kernel Features
207#
208CONFIG_TICK_ONESHOT=y
209# CONFIG_NO_HZ is not set
210# CONFIG_HIGH_RES_TIMERS is not set
211CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
212CONFIG_PREEMPT=y
213CONFIG_HZ=100
214CONFIG_AEABI=y
215CONFIG_OABI_COMPAT=y
216# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
217CONFIG_SELECT_MEMORY_MODEL=y
218CONFIG_FLATMEM_MANUAL=y
219# CONFIG_DISCONTIGMEM_MANUAL is not set
220# CONFIG_SPARSEMEM_MANUAL is not set
221CONFIG_FLATMEM=y
222CONFIG_FLAT_NODE_MEM_MAP=y
223# CONFIG_SPARSEMEM_STATIC is not set
224# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
225CONFIG_SPLIT_PTLOCK_CPUS=4096
226# CONFIG_RESOURCES_64BIT is not set
227CONFIG_ZONE_DMA_FLAG=1
228CONFIG_BOUNCE=y
229CONFIG_VIRT_TO_BUS=y
230CONFIG_ALIGNMENT_TRAP=y
231
232#
233# Boot options
234#
235CONFIG_ZBOOT_ROM_TEXT=0x0
236CONFIG_ZBOOT_ROM_BSS=0x0
237CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfsroot/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS2,38400 mem=64M"
238# CONFIG_XIP_KERNEL is not set
239# CONFIG_KEXEC is not set
240
241#
242# CPU Frequency scaling
243#
244# CONFIG_CPU_FREQ is not set
245
246#
247# Floating point emulation
248#
249
250#
251# At least one emulation must be selected
252#
253CONFIG_FPE_NWFPE=y
254# CONFIG_FPE_NWFPE_XP is not set
255# CONFIG_FPE_FASTFPE is not set
256
257#
258# Userspace binary formats
259#
260CONFIG_BINFMT_ELF=y
261# CONFIG_BINFMT_AOUT is not set
262# CONFIG_BINFMT_MISC is not set
263
264#
265# Power management options
266#
267# CONFIG_PM is not set
268CONFIG_SUSPEND_UP_POSSIBLE=y
269
270#
271# Networking
272#
273CONFIG_NET=y
274
275#
276# Networking options
277#
278CONFIG_PACKET=y
279# CONFIG_PACKET_MMAP is not set
280CONFIG_UNIX=y
281CONFIG_XFRM=y
282# CONFIG_XFRM_USER is not set
283# CONFIG_XFRM_SUB_POLICY is not set
284# CONFIG_XFRM_MIGRATE is not set
285# CONFIG_NET_KEY is not set
286CONFIG_INET=y
287# CONFIG_IP_MULTICAST is not set
288# CONFIG_IP_ADVANCED_ROUTER is not set
289CONFIG_IP_FIB_HASH=y
290CONFIG_IP_PNP=y
291# CONFIG_IP_PNP_DHCP is not set
292# CONFIG_IP_PNP_BOOTP is not set
293# CONFIG_IP_PNP_RARP is not set
294# CONFIG_NET_IPIP is not set
295# CONFIG_NET_IPGRE is not set
296# CONFIG_ARPD is not set
297# CONFIG_SYN_COOKIES is not set
298# CONFIG_INET_AH is not set
299# CONFIG_INET_ESP is not set
300# CONFIG_INET_IPCOMP is not set
301# CONFIG_INET_XFRM_TUNNEL is not set
302# CONFIG_INET_TUNNEL is not set
303CONFIG_INET_XFRM_MODE_TRANSPORT=y
304CONFIG_INET_XFRM_MODE_TUNNEL=y
305CONFIG_INET_XFRM_MODE_BEET=y
306# CONFIG_INET_LRO is not set
307CONFIG_INET_DIAG=y
308CONFIG_INET_TCP_DIAG=y
309# CONFIG_TCP_CONG_ADVANCED is not set
310CONFIG_TCP_CONG_CUBIC=y
311CONFIG_DEFAULT_TCP_CONG="cubic"
312# CONFIG_TCP_MD5SIG is not set
313# CONFIG_IPV6 is not set
314# CONFIG_INET6_XFRM_TUNNEL is not set
315# CONFIG_INET6_TUNNEL is not set
316# CONFIG_NETWORK_SECMARK is not set
317# CONFIG_NETFILTER is not set
318# CONFIG_IP_DCCP is not set
319# CONFIG_IP_SCTP is not set
320# CONFIG_TIPC is not set
321# CONFIG_ATM is not set
322# CONFIG_BRIDGE is not set
323# CONFIG_VLAN_8021Q is not set
324# CONFIG_DECNET is not set
325# CONFIG_LLC2 is not set
326# CONFIG_IPX is not set
327# CONFIG_ATALK is not set
328# CONFIG_X25 is not set
329# CONFIG_LAPB is not set
330# CONFIG_ECONET is not set
331# CONFIG_WAN_ROUTER is not set
332# CONFIG_NET_SCHED is not set
333
334#
335# Network testing
336#
337# CONFIG_NET_PKTGEN is not set
338# CONFIG_HAMRADIO is not set
339# CONFIG_IRDA is not set
340# CONFIG_BT is not set
341# CONFIG_AF_RXRPC is not set
342
343#
344# Wireless
345#
346# CONFIG_CFG80211 is not set
347# CONFIG_WIRELESS_EXT is not set
348# CONFIG_MAC80211 is not set
349# CONFIG_IEEE80211 is not set
350# CONFIG_RFKILL is not set
351# CONFIG_NET_9P is not set
352
353#
354# Device Drivers
355#
356
357#
358# Generic Driver Options
359#
360CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
361# CONFIG_STANDALONE is not set
362# CONFIG_PREVENT_FIRMWARE_BUILD is not set
363CONFIG_FW_LOADER=y
364# CONFIG_DEBUG_DRIVER is not set
365# CONFIG_DEBUG_DEVRES is not set
366# CONFIG_SYS_HYPERVISOR is not set
367# CONFIG_CONNECTOR is not set
368# CONFIG_MTD is not set
369# CONFIG_PARPORT is not set
370# CONFIG_BLK_DEV is not set
371# CONFIG_MISC_DEVICES is not set
372# CONFIG_IDE is not set
373
374#
375# SCSI device support
376#
377# CONFIG_RAID_ATTRS is not set
378# CONFIG_SCSI is not set
379# CONFIG_SCSI_DMA is not set
380# CONFIG_SCSI_NETLINK is not set
381# CONFIG_ATA is not set
382# CONFIG_MD is not set
383CONFIG_NETDEVICES=y
384# CONFIG_NETDEVICES_MULTIQUEUE is not set
385# CONFIG_DUMMY is not set
386# CONFIG_BONDING is not set
387# CONFIG_MACVLAN is not set
388# CONFIG_EQUALIZER is not set
389# CONFIG_TUN is not set
390# CONFIG_VETH is not set
391# CONFIG_PHYLIB is not set
392CONFIG_NET_ETHERNET=y
393CONFIG_MII=y
394# CONFIG_AX88796 is not set
395CONFIG_SMC91X=y
396# CONFIG_DM9000 is not set
397# CONFIG_SMC911X is not set
398# CONFIG_IBM_NEW_EMAC_ZMII is not set
399# CONFIG_IBM_NEW_EMAC_RGMII is not set
400# CONFIG_IBM_NEW_EMAC_TAH is not set
401# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
402# CONFIG_B44 is not set
403# CONFIG_NETDEV_1000 is not set
404# CONFIG_NETDEV_10000 is not set
405
406#
407# Wireless LAN
408#
409# CONFIG_WLAN_PRE80211 is not set
410# CONFIG_WLAN_80211 is not set
411# CONFIG_WAN is not set
412# CONFIG_PPP is not set
413# CONFIG_SLIP is not set
414# CONFIG_SHAPER is not set
415# CONFIG_NETCONSOLE is not set
416# CONFIG_NETPOLL is not set
417# CONFIG_NET_POLL_CONTROLLER is not set
418# CONFIG_ISDN is not set
419
420#
421# Input device support
422#
423CONFIG_INPUT=y
424# CONFIG_INPUT_FF_MEMLESS is not set
425# CONFIG_INPUT_POLLDEV is not set
426
427#
428# Userland interfaces
429#
430CONFIG_INPUT_MOUSEDEV=y
431# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
432CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
433CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
434# CONFIG_INPUT_JOYDEV is not set
435# CONFIG_INPUT_EVDEV is not set
436# CONFIG_INPUT_EVBUG is not set
437
438#
439# Input Device Drivers
440#
441# CONFIG_INPUT_KEYBOARD is not set
442# CONFIG_INPUT_MOUSE is not set
443# CONFIG_INPUT_JOYSTICK is not set
444# CONFIG_INPUT_TABLET is not set
445# CONFIG_INPUT_TOUCHSCREEN is not set
446# CONFIG_INPUT_MISC is not set
447
448#
449# Hardware I/O ports
450#
451# CONFIG_SERIO is not set
452# CONFIG_GAMEPORT is not set
453
454#
455# Character devices
456#
457CONFIG_VT=y
458CONFIG_VT_CONSOLE=y
459CONFIG_HW_CONSOLE=y
460# CONFIG_VT_HW_CONSOLE_BINDING is not set
461# CONFIG_SERIAL_NONSTANDARD is not set
462
463#
464# Serial drivers
465#
466# CONFIG_SERIAL_8250 is not set
467
468#
469# Non-8250 serial port support
470#
471CONFIG_SERIAL_PXA=y
472CONFIG_SERIAL_PXA_CONSOLE=y
473CONFIG_SERIAL_CORE=y
474CONFIG_SERIAL_CORE_CONSOLE=y
475CONFIG_UNIX98_PTYS=y
476# CONFIG_LEGACY_PTYS is not set
477# CONFIG_IPMI_HANDLER is not set
478# CONFIG_HW_RANDOM is not set
479# CONFIG_NVRAM is not set
480# CONFIG_R3964 is not set
481# CONFIG_RAW_DRIVER is not set
482# CONFIG_TCG_TPM is not set
483# CONFIG_I2C is not set
484
485#
486# SPI support
487#
488# CONFIG_SPI is not set
489# CONFIG_SPI_MASTER is not set
490# CONFIG_W1 is not set
491# CONFIG_POWER_SUPPLY is not set
492# CONFIG_HWMON is not set
493# CONFIG_WATCHDOG is not set
494
495#
496# Sonics Silicon Backplane
497#
498CONFIG_SSB_POSSIBLE=y
499# CONFIG_SSB is not set
500
501#
502# Multifunction device drivers
503#
504# CONFIG_MFD_SM501 is not set
505
506#
507# Multimedia devices
508#
509# CONFIG_VIDEO_DEV is not set
510# CONFIG_DVB_CORE is not set
511# CONFIG_DAB is not set
512
513#
514# Graphics support
515#
516# CONFIG_VGASTATE is not set
517# CONFIG_VIDEO_OUTPUT_CONTROL is not set
518CONFIG_FB=y
519# CONFIG_FIRMWARE_EDID is not set
520# CONFIG_FB_DDC is not set
521CONFIG_FB_CFB_FILLRECT=y
522CONFIG_FB_CFB_COPYAREA=y
523CONFIG_FB_CFB_IMAGEBLIT=y
524# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
525# CONFIG_FB_SYS_FILLRECT is not set
526# CONFIG_FB_SYS_COPYAREA is not set
527# CONFIG_FB_SYS_IMAGEBLIT is not set
528# CONFIG_FB_SYS_FOPS is not set
529CONFIG_FB_DEFERRED_IO=y
530# CONFIG_FB_SVGALIB is not set
531# CONFIG_FB_MACMODES is not set
532# CONFIG_FB_BACKLIGHT is not set
533# CONFIG_FB_MODE_HELPERS is not set
534# CONFIG_FB_TILEBLITTING is not set
535
536#
537# Frame buffer hardware drivers
538#
539# CONFIG_FB_S1D13XXX is not set
540CONFIG_FB_PXA=y
541# CONFIG_FB_PXA_PARAMETERS is not set
542# CONFIG_FB_MBX is not set
543# CONFIG_FB_VIRTUAL is not set
544# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
545
546#
547# Display device support
548#
549# CONFIG_DISPLAY_SUPPORT is not set
550
551#
552# Console display driver support
553#
554# CONFIG_VGA_CONSOLE is not set
555CONFIG_DUMMY_CONSOLE=y
556CONFIG_FRAMEBUFFER_CONSOLE=y
557# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
558# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
559CONFIG_FONTS=y
560# CONFIG_FONT_8x8 is not set
561CONFIG_FONT_8x16=y
562# CONFIG_FONT_6x11 is not set
563# CONFIG_FONT_7x14 is not set
564# CONFIG_FONT_PEARL_8x8 is not set
565# CONFIG_FONT_ACORN_8x8 is not set
566# CONFIG_FONT_MINI_4x6 is not set
567# CONFIG_FONT_SUN8x16 is not set
568# CONFIG_FONT_SUN12x22 is not set
569# CONFIG_FONT_10x18 is not set
570CONFIG_LOGO=y
571CONFIG_LOGO_LINUX_MONO=y
572CONFIG_LOGO_LINUX_VGA16=y
573CONFIG_LOGO_LINUX_CLUT224=y
574
575#
576# Sound
577#
578# CONFIG_SOUND is not set
579# CONFIG_HID_SUPPORT is not set
580# CONFIG_USB_SUPPORT is not set
581# CONFIG_MMC is not set
582# CONFIG_NEW_LEDS is not set
583CONFIG_RTC_LIB=y
584# CONFIG_RTC_CLASS is not set
585
586#
587# File systems
588#
589# CONFIG_EXT2_FS is not set
590# CONFIG_EXT3_FS is not set
591# CONFIG_EXT4DEV_FS is not set
592# CONFIG_REISERFS_FS is not set
593# CONFIG_JFS_FS is not set
594CONFIG_FS_POSIX_ACL=y
595# CONFIG_XFS_FS is not set
596# CONFIG_GFS2_FS is not set
597# CONFIG_OCFS2_FS is not set
598# CONFIG_MINIX_FS is not set
599# CONFIG_ROMFS_FS is not set
600# CONFIG_INOTIFY is not set
601# CONFIG_QUOTA is not set
602# CONFIG_DNOTIFY is not set
603# CONFIG_AUTOFS_FS is not set
604# CONFIG_AUTOFS4_FS is not set
605# CONFIG_FUSE_FS is not set
606
607#
608# CD-ROM/DVD Filesystems
609#
610# CONFIG_ISO9660_FS is not set
611# CONFIG_UDF_FS is not set
612
613#
614# DOS/FAT/NT Filesystems
615#
616# CONFIG_MSDOS_FS is not set
617# CONFIG_VFAT_FS is not set
618# CONFIG_NTFS_FS is not set
619
620#
621# Pseudo filesystems
622#
623CONFIG_PROC_FS=y
624CONFIG_PROC_SYSCTL=y
625CONFIG_SYSFS=y
626# CONFIG_TMPFS is not set
627# CONFIG_HUGETLB_PAGE is not set
628# CONFIG_CONFIGFS_FS is not set
629
630#
631# Miscellaneous filesystems
632#
633# CONFIG_ADFS_FS is not set
634# CONFIG_AFFS_FS is not set
635# CONFIG_HFS_FS is not set
636# CONFIG_HFSPLUS_FS is not set
637# CONFIG_BEFS_FS is not set
638# CONFIG_BFS_FS is not set
639# CONFIG_EFS_FS is not set
640# CONFIG_CRAMFS is not set
641# CONFIG_VXFS_FS is not set
642# CONFIG_HPFS_FS is not set
643# CONFIG_QNX4FS_FS is not set
644# CONFIG_SYSV_FS is not set
645# CONFIG_UFS_FS is not set
646CONFIG_NETWORK_FILESYSTEMS=y
647CONFIG_NFS_FS=y
648CONFIG_NFS_V3=y
649CONFIG_NFS_V3_ACL=y
650CONFIG_NFS_V4=y
651CONFIG_NFS_DIRECTIO=y
652# CONFIG_NFSD is not set
653CONFIG_ROOT_NFS=y
654CONFIG_LOCKD=y
655CONFIG_LOCKD_V4=y
656CONFIG_NFS_ACL_SUPPORT=y
657CONFIG_NFS_COMMON=y
658CONFIG_SUNRPC=y
659CONFIG_SUNRPC_GSS=y
660# CONFIG_SUNRPC_BIND34 is not set
661CONFIG_RPCSEC_GSS_KRB5=y
662# CONFIG_RPCSEC_GSS_SPKM3 is not set
663# CONFIG_SMB_FS is not set
664# CONFIG_CIFS is not set
665# CONFIG_NCP_FS is not set
666# CONFIG_CODA_FS is not set
667# CONFIG_AFS_FS is not set
668
669#
670# Partition Types
671#
672# CONFIG_PARTITION_ADVANCED is not set
673CONFIG_MSDOS_PARTITION=y
674# CONFIG_NLS is not set
675# CONFIG_DLM is not set
676# CONFIG_INSTRUMENTATION is not set
677
678#
679# Kernel hacking
680#
681CONFIG_PRINTK_TIME=y
682CONFIG_ENABLE_WARN_DEPRECATED=y
683CONFIG_ENABLE_MUST_CHECK=y
684CONFIG_MAGIC_SYSRQ=y
685# CONFIG_UNUSED_SYMBOLS is not set
686# CONFIG_DEBUG_FS is not set
687# CONFIG_HEADERS_CHECK is not set
688CONFIG_DEBUG_KERNEL=y
689# CONFIG_DEBUG_SHIRQ is not set
690CONFIG_DETECT_SOFTLOCKUP=y
691CONFIG_SCHED_DEBUG=y
692# CONFIG_SCHEDSTATS is not set
693# CONFIG_TIMER_STATS is not set
694# CONFIG_DEBUG_SLAB is not set
695# CONFIG_DEBUG_PREEMPT is not set
696# CONFIG_DEBUG_RT_MUTEXES is not set
697# CONFIG_RT_MUTEX_TESTER is not set
698# CONFIG_DEBUG_SPINLOCK is not set
699# CONFIG_DEBUG_MUTEXES is not set
700# CONFIG_DEBUG_LOCK_ALLOC is not set
701# CONFIG_PROVE_LOCKING is not set
702# CONFIG_LOCK_STAT is not set
703# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
704# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
705# CONFIG_DEBUG_KOBJECT is not set
706CONFIG_DEBUG_BUGVERBOSE=y
707CONFIG_DEBUG_INFO=y
708# CONFIG_DEBUG_VM is not set
709# CONFIG_DEBUG_LIST is not set
710# CONFIG_DEBUG_SG is not set
711CONFIG_FRAME_POINTER=y
712CONFIG_FORCED_INLINING=y
713# CONFIG_BOOT_PRINTK_DELAY is not set
714# CONFIG_RCU_TORTURE_TEST is not set
715# CONFIG_FAULT_INJECTION is not set
716# CONFIG_SAMPLES is not set
717CONFIG_DEBUG_USER=y
718CONFIG_DEBUG_ERRORS=y
719CONFIG_DEBUG_LL=y
720# CONFIG_DEBUG_ICEDCC is not set
721
722#
723# Security options
724#
725# CONFIG_KEYS is not set
726# CONFIG_SECURITY is not set
727# CONFIG_SECURITY_FILE_CAPABILITIES is not set
728CONFIG_CRYPTO=y
729CONFIG_CRYPTO_ALGAPI=y
730CONFIG_CRYPTO_BLKCIPHER=y
731CONFIG_CRYPTO_MANAGER=y
732# CONFIG_CRYPTO_HMAC is not set
733# CONFIG_CRYPTO_XCBC is not set
734# CONFIG_CRYPTO_NULL is not set
735# CONFIG_CRYPTO_MD4 is not set
736CONFIG_CRYPTO_MD5=y
737# CONFIG_CRYPTO_SHA1 is not set
738# CONFIG_CRYPTO_SHA256 is not set
739# CONFIG_CRYPTO_SHA512 is not set
740# CONFIG_CRYPTO_WP512 is not set
741# CONFIG_CRYPTO_TGR192 is not set
742# CONFIG_CRYPTO_GF128MUL is not set
743# CONFIG_CRYPTO_ECB is not set
744CONFIG_CRYPTO_CBC=y
745# CONFIG_CRYPTO_PCBC is not set
746# CONFIG_CRYPTO_LRW is not set
747# CONFIG_CRYPTO_XTS is not set
748# CONFIG_CRYPTO_CRYPTD is not set
749CONFIG_CRYPTO_DES=y
750# CONFIG_CRYPTO_FCRYPT is not set
751# CONFIG_CRYPTO_BLOWFISH is not set
752# CONFIG_CRYPTO_TWOFISH is not set
753# CONFIG_CRYPTO_SERPENT is not set
754# CONFIG_CRYPTO_AES is not set
755# CONFIG_CRYPTO_CAST5 is not set
756# CONFIG_CRYPTO_CAST6 is not set
757# CONFIG_CRYPTO_TEA is not set
758# CONFIG_CRYPTO_ARC4 is not set
759# CONFIG_CRYPTO_KHAZAD is not set
760# CONFIG_CRYPTO_ANUBIS is not set
761# CONFIG_CRYPTO_SEED is not set
762# CONFIG_CRYPTO_DEFLATE is not set
763# CONFIG_CRYPTO_MICHAEL_MIC is not set
764# CONFIG_CRYPTO_CRC32C is not set
765# CONFIG_CRYPTO_CAMELLIA is not set
766# CONFIG_CRYPTO_TEST is not set
767# CONFIG_CRYPTO_AUTHENC is not set
768CONFIG_CRYPTO_HW=y
769
770#
771# Library routines
772#
773CONFIG_BITREVERSE=y
774CONFIG_CRC_CCITT=y
775# CONFIG_CRC16 is not set
776# CONFIG_CRC_ITU_T is not set
777CONFIG_CRC32=y
778# CONFIG_CRC7 is not set
779# CONFIG_LIBCRC32C is not set
780CONFIG_PLIST=y
781CONFIG_HAS_IOMEM=y
782CONFIG_HAS_IOPORT=y
783CONFIG_HAS_DMA=y
diff --git a/arch/arm/configs/pxa3xx_defconfig b/arch/arm/configs/pxa3xx_defconfig
new file mode 100644
index 000000000000..733b851e5b7e
--- /dev/null
+++ b/arch/arm/configs/pxa3xx_defconfig
@@ -0,0 +1,1332 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.31-rc1
4# Mon Jul 13 22:48:49 2009
5#
6CONFIG_ARM=y
7CONFIG_HAVE_PWM=y
8CONFIG_SYS_SUPPORTS_APM_EMULATION=y
9CONFIG_GENERIC_GPIO=y
10CONFIG_GENERIC_TIME=y
11CONFIG_GENERIC_CLOCKEVENTS=y
12CONFIG_MMU=y
13CONFIG_GENERIC_HARDIRQS=y
14CONFIG_STACKTRACE_SUPPORT=y
15CONFIG_HAVE_LATENCYTOP_SUPPORT=y
16CONFIG_LOCKDEP_SUPPORT=y
17CONFIG_TRACE_IRQFLAGS_SUPPORT=y
18CONFIG_HARDIRQS_SW_RESEND=y
19CONFIG_GENERIC_IRQ_PROBE=y
20CONFIG_RWSEM_GENERIC_SPINLOCK=y
21CONFIG_GENERIC_HWEIGHT=y
22CONFIG_GENERIC_CALIBRATE_DELAY=y
23CONFIG_ARCH_MTD_XIP=y
24CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
25CONFIG_VECTORS_BASE=0xffff0000
26CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
27CONFIG_CONSTRUCTORS=y
28
29#
30# General setup
31#
32CONFIG_EXPERIMENTAL=y
33CONFIG_BROKEN_ON_SMP=y
34CONFIG_LOCK_KERNEL=y
35CONFIG_INIT_ENV_ARG_LIMIT=32
36CONFIG_LOCALVERSION=""
37CONFIG_LOCALVERSION_AUTO=y
38CONFIG_SWAP=y
39CONFIG_SYSVIPC=y
40CONFIG_SYSVIPC_SYSCTL=y
41# CONFIG_POSIX_MQUEUE is not set
42# CONFIG_BSD_PROCESS_ACCT is not set
43# CONFIG_TASKSTATS is not set
44# CONFIG_AUDIT is not set
45
46#
47# RCU Subsystem
48#
49# CONFIG_CLASSIC_RCU is not set
50CONFIG_TREE_RCU=y
51# CONFIG_PREEMPT_RCU is not set
52# CONFIG_RCU_TRACE is not set
53CONFIG_RCU_FANOUT=32
54# CONFIG_RCU_FANOUT_EXACT is not set
55# CONFIG_TREE_RCU_TRACE is not set
56# CONFIG_PREEMPT_RCU_TRACE is not set
57# CONFIG_IKCONFIG is not set
58CONFIG_LOG_BUF_SHIFT=18
59CONFIG_GROUP_SCHED=y
60CONFIG_FAIR_GROUP_SCHED=y
61# CONFIG_RT_GROUP_SCHED is not set
62CONFIG_USER_SCHED=y
63# CONFIG_CGROUP_SCHED is not set
64# CONFIG_CGROUPS is not set
65CONFIG_SYSFS_DEPRECATED=y
66CONFIG_SYSFS_DEPRECATED_V2=y
67# CONFIG_RELAY is not set
68CONFIG_NAMESPACES=y
69# CONFIG_UTS_NS is not set
70# CONFIG_IPC_NS is not set
71# CONFIG_USER_NS is not set
72# CONFIG_PID_NS is not set
73# CONFIG_NET_NS is not set
74# CONFIG_BLK_DEV_INITRD is not set
75# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
76CONFIG_SYSCTL=y
77CONFIG_ANON_INODES=y
78# CONFIG_EMBEDDED is not set
79CONFIG_UID16=y
80CONFIG_SYSCTL_SYSCALL=y
81CONFIG_KALLSYMS=y
82CONFIG_KALLSYMS_ALL=y
83# CONFIG_KALLSYMS_EXTRA_PASS is not set
84CONFIG_HOTPLUG=y
85CONFIG_PRINTK=y
86CONFIG_BUG=y
87CONFIG_ELF_CORE=y
88CONFIG_BASE_FULL=y
89CONFIG_FUTEX=y
90CONFIG_EPOLL=y
91CONFIG_SIGNALFD=y
92CONFIG_TIMERFD=y
93CONFIG_EVENTFD=y
94CONFIG_SHMEM=y
95CONFIG_AIO=y
96
97#
98# Performance Counters
99#
100CONFIG_VM_EVENT_COUNTERS=y
101# CONFIG_STRIP_ASM_SYMS is not set
102CONFIG_COMPAT_BRK=y
103CONFIG_SLAB=y
104# CONFIG_SLUB is not set
105# CONFIG_SLOB is not set
106# CONFIG_PROFILING is not set
107# CONFIG_MARKERS is not set
108CONFIG_HAVE_OPROFILE=y
109# CONFIG_KPROBES is not set
110CONFIG_HAVE_KPROBES=y
111CONFIG_HAVE_KRETPROBES=y
112CONFIG_HAVE_CLK=y
113
114#
115# GCOV-based kernel profiling
116#
117# CONFIG_SLOW_WORK is not set
118CONFIG_HAVE_GENERIC_DMA_COHERENT=y
119CONFIG_SLABINFO=y
120CONFIG_RT_MUTEXES=y
121CONFIG_BASE_SMALL=0
122CONFIG_MODULES=y
123# CONFIG_MODULE_FORCE_LOAD is not set
124# CONFIG_MODULE_UNLOAD is not set
125# CONFIG_MODVERSIONS is not set
126# CONFIG_MODULE_SRCVERSION_ALL is not set
127CONFIG_BLOCK=y
128CONFIG_LBDAF=y
129# CONFIG_BLK_DEV_BSG is not set
130# CONFIG_BLK_DEV_INTEGRITY is not set
131
132#
133# IO Schedulers
134#
135CONFIG_IOSCHED_NOOP=y
136CONFIG_IOSCHED_AS=y
137CONFIG_IOSCHED_DEADLINE=y
138CONFIG_IOSCHED_CFQ=y
139# CONFIG_DEFAULT_AS is not set
140# CONFIG_DEFAULT_DEADLINE is not set
141CONFIG_DEFAULT_CFQ=y
142# CONFIG_DEFAULT_NOOP is not set
143CONFIG_DEFAULT_IOSCHED="cfq"
144# CONFIG_FREEZER is not set
145
146#
147# System Type
148#
149# CONFIG_ARCH_AAEC2000 is not set
150# CONFIG_ARCH_INTEGRATOR is not set
151# CONFIG_ARCH_REALVIEW is not set
152# CONFIG_ARCH_VERSATILE is not set
153# CONFIG_ARCH_AT91 is not set
154# CONFIG_ARCH_CLPS711X is not set
155# CONFIG_ARCH_GEMINI is not set
156# CONFIG_ARCH_EBSA110 is not set
157# CONFIG_ARCH_EP93XX is not set
158# CONFIG_ARCH_FOOTBRIDGE is not set
159# CONFIG_ARCH_MXC is not set
160# CONFIG_ARCH_STMP3XXX is not set
161# CONFIG_ARCH_NETX is not set
162# CONFIG_ARCH_H720X is not set
163# CONFIG_ARCH_IOP13XX is not set
164# CONFIG_ARCH_IOP32X is not set
165# CONFIG_ARCH_IOP33X is not set
166# CONFIG_ARCH_IXP23XX is not set
167# CONFIG_ARCH_IXP2000 is not set
168# CONFIG_ARCH_IXP4XX is not set
169# CONFIG_ARCH_L7200 is not set
170# CONFIG_ARCH_KIRKWOOD is not set
171# CONFIG_ARCH_LOKI is not set
172# CONFIG_ARCH_MV78XX0 is not set
173# CONFIG_ARCH_ORION5X is not set
174# CONFIG_ARCH_MMP is not set
175# CONFIG_ARCH_KS8695 is not set
176# CONFIG_ARCH_NS9XXX is not set
177# CONFIG_ARCH_W90X900 is not set
178# CONFIG_ARCH_PNX4008 is not set
179CONFIG_ARCH_PXA=y
180# CONFIG_ARCH_MSM is not set
181# CONFIG_ARCH_RPC is not set
182# CONFIG_ARCH_SA1100 is not set
183# CONFIG_ARCH_S3C2410 is not set
184# CONFIG_ARCH_S3C64XX is not set
185# CONFIG_ARCH_SHARK is not set
186# CONFIG_ARCH_LH7A40X is not set
187# CONFIG_ARCH_U300 is not set
188# CONFIG_ARCH_DAVINCI is not set
189# CONFIG_ARCH_OMAP is not set
190
191#
192# Intel PXA2xx/PXA3xx Implementations
193#
194
195#
196# Supported PXA3xx Processor Variants
197#
198CONFIG_CPU_PXA300=y
199CONFIG_CPU_PXA310=y
200CONFIG_CPU_PXA320=y
201CONFIG_CPU_PXA930=y
202CONFIG_CPU_PXA935=y
203# CONFIG_ARCH_GUMSTIX is not set
204# CONFIG_MACH_INTELMOTE2 is not set
205# CONFIG_MACH_STARGATE2 is not set
206# CONFIG_ARCH_LUBBOCK is not set
207# CONFIG_MACH_LOGICPD_PXA270 is not set
208# CONFIG_MACH_MAINSTONE is not set
209# CONFIG_MACH_MP900C is not set
210# CONFIG_ARCH_PXA_IDP is not set
211# CONFIG_PXA_SHARPSL is not set
212# CONFIG_ARCH_VIPER is not set
213# CONFIG_ARCH_PXA_ESERIES is not set
214# CONFIG_TRIZEPS_PXA is not set
215# CONFIG_MACH_H5000 is not set
216# CONFIG_MACH_EM_X270 is not set
217# CONFIG_MACH_EXEDA is not set
218# CONFIG_MACH_COLIBRI is not set
219# CONFIG_MACH_COLIBRI300 is not set
220# CONFIG_MACH_COLIBRI320 is not set
221CONFIG_MACH_ZYLONITE=y
222CONFIG_MACH_LITTLETON=y
223CONFIG_MACH_TAVOREVB=y
224CONFIG_MACH_SAAR=y
225# CONFIG_MACH_ARMCORE is not set
226# CONFIG_MACH_CM_X300 is not set
227# CONFIG_MACH_H4700 is not set
228# CONFIG_MACH_MAGICIAN is not set
229# CONFIG_MACH_HIMALAYA is not set
230# CONFIG_MACH_MIOA701 is not set
231# CONFIG_MACH_PCM027 is not set
232# CONFIG_ARCH_PXA_PALM is not set
233# CONFIG_MACH_CSB726 is not set
234# CONFIG_PXA_EZX is not set
235CONFIG_PXA3xx=y
236CONFIG_PXA_SSP=y
237CONFIG_PXA_HAVE_BOARD_IRQS=y
238CONFIG_PLAT_PXA=y
239
240#
241# Processor Type
242#
243CONFIG_CPU_32=y
244CONFIG_CPU_XSC3=y
245CONFIG_CPU_32v5=y
246CONFIG_CPU_ABRT_EV5T=y
247CONFIG_CPU_PABRT_NOIFAR=y
248CONFIG_CPU_CACHE_VIVT=y
249CONFIG_CPU_TLB_V4WBI=y
250CONFIG_CPU_CP15=y
251CONFIG_CPU_CP15_MMU=y
252CONFIG_IO_36=y
253
254#
255# Processor Features
256#
257CONFIG_ARM_THUMB=y
258# CONFIG_CPU_DCACHE_DISABLE is not set
259# CONFIG_CPU_BPREDICT_DISABLE is not set
260CONFIG_OUTER_CACHE=y
261CONFIG_CACHE_XSC3L2=y
262CONFIG_IWMMXT=y
263CONFIG_COMMON_CLKDEV=y
264
265#
266# Bus support
267#
268# CONFIG_PCI_SYSCALL is not set
269# CONFIG_ARCH_SUPPORTS_MSI is not set
270# CONFIG_PCCARD is not set
271
272#
273# Kernel Features
274#
275CONFIG_TICK_ONESHOT=y
276# CONFIG_NO_HZ is not set
277# CONFIG_HIGH_RES_TIMERS is not set
278CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
279CONFIG_VMSPLIT_3G=y
280# CONFIG_VMSPLIT_2G is not set
281# CONFIG_VMSPLIT_1G is not set
282CONFIG_PAGE_OFFSET=0xC0000000
283CONFIG_PREEMPT=y
284CONFIG_HZ=100
285CONFIG_AEABI=y
286CONFIG_OABI_COMPAT=y
287# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
288# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
289# CONFIG_HIGHMEM is not set
290CONFIG_SELECT_MEMORY_MODEL=y
291CONFIG_FLATMEM_MANUAL=y
292# CONFIG_DISCONTIGMEM_MANUAL is not set
293# CONFIG_SPARSEMEM_MANUAL is not set
294CONFIG_FLATMEM=y
295CONFIG_FLAT_NODE_MEM_MAP=y
296CONFIG_PAGEFLAGS_EXTENDED=y
297CONFIG_SPLIT_PTLOCK_CPUS=4096
298# CONFIG_PHYS_ADDR_T_64BIT is not set
299CONFIG_ZONE_DMA_FLAG=0
300CONFIG_VIRT_TO_BUS=y
301CONFIG_HAVE_MLOCK=y
302CONFIG_HAVE_MLOCKED_PAGE_BIT=y
303CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
304CONFIG_ALIGNMENT_TRAP=y
305# CONFIG_UACCESS_WITH_MEMCPY is not set
306
307#
308# Boot options
309#
310CONFIG_ZBOOT_ROM_TEXT=0x0
311CONFIG_ZBOOT_ROM_BSS=0x0
312CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfsroot/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS0,115200 mem=64M debug"
313# CONFIG_XIP_KERNEL is not set
314# CONFIG_KEXEC is not set
315
316#
317# CPU Power Management
318#
319# CONFIG_CPU_FREQ is not set
320# CONFIG_CPU_IDLE is not set
321
322#
323# Floating point emulation
324#
325
326#
327# At least one emulation must be selected
328#
329CONFIG_FPE_NWFPE=y
330# CONFIG_FPE_NWFPE_XP is not set
331# CONFIG_FPE_FASTFPE is not set
332
333#
334# Userspace binary formats
335#
336CONFIG_BINFMT_ELF=y
337# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
338CONFIG_HAVE_AOUT=y
339# CONFIG_BINFMT_AOUT is not set
340# CONFIG_BINFMT_MISC is not set
341
342#
343# Power management options
344#
345# CONFIG_PM is not set
346CONFIG_ARCH_SUSPEND_POSSIBLE=y
347CONFIG_NET=y
348
349#
350# Networking options
351#
352CONFIG_PACKET=y
353# CONFIG_PACKET_MMAP is not set
354CONFIG_UNIX=y
355# CONFIG_NET_KEY is not set
356CONFIG_INET=y
357# CONFIG_IP_MULTICAST is not set
358# CONFIG_IP_ADVANCED_ROUTER is not set
359CONFIG_IP_FIB_HASH=y
360CONFIG_IP_PNP=y
361# CONFIG_IP_PNP_DHCP is not set
362# CONFIG_IP_PNP_BOOTP is not set
363# CONFIG_IP_PNP_RARP is not set
364# CONFIG_NET_IPIP is not set
365# CONFIG_NET_IPGRE is not set
366# CONFIG_ARPD is not set
367# CONFIG_SYN_COOKIES is not set
368# CONFIG_INET_AH is not set
369# CONFIG_INET_ESP is not set
370# CONFIG_INET_IPCOMP is not set
371# CONFIG_INET_XFRM_TUNNEL is not set
372# CONFIG_INET_TUNNEL is not set
373# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
374# CONFIG_INET_XFRM_MODE_TUNNEL is not set
375# CONFIG_INET_XFRM_MODE_BEET is not set
376# CONFIG_INET_LRO is not set
377# CONFIG_INET_DIAG is not set
378# CONFIG_TCP_CONG_ADVANCED is not set
379CONFIG_TCP_CONG_CUBIC=y
380CONFIG_DEFAULT_TCP_CONG="cubic"
381# CONFIG_TCP_MD5SIG is not set
382# CONFIG_IPV6 is not set
383# CONFIG_NETWORK_SECMARK is not set
384# CONFIG_NETFILTER is not set
385# CONFIG_IP_DCCP is not set
386# CONFIG_IP_SCTP is not set
387# CONFIG_TIPC is not set
388# CONFIG_ATM is not set
389# CONFIG_BRIDGE is not set
390# CONFIG_NET_DSA is not set
391# CONFIG_VLAN_8021Q is not set
392# CONFIG_DECNET is not set
393# CONFIG_LLC2 is not set
394# CONFIG_IPX is not set
395# CONFIG_ATALK is not set
396# CONFIG_X25 is not set
397# CONFIG_LAPB is not set
398# CONFIG_ECONET is not set
399# CONFIG_WAN_ROUTER is not set
400# CONFIG_PHONET is not set
401# CONFIG_IEEE802154 is not set
402# CONFIG_NET_SCHED is not set
403# CONFIG_DCB is not set
404
405#
406# Network testing
407#
408# CONFIG_NET_PKTGEN is not set
409# CONFIG_HAMRADIO is not set
410# CONFIG_CAN is not set
411# CONFIG_IRDA is not set
412# CONFIG_BT is not set
413# CONFIG_AF_RXRPC is not set
414# CONFIG_WIRELESS is not set
415# CONFIG_WIMAX is not set
416# CONFIG_RFKILL is not set
417# CONFIG_NET_9P is not set
418
419#
420# Device Drivers
421#
422
423#
424# Generic Driver Options
425#
426CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
427# CONFIG_STANDALONE is not set
428# CONFIG_PREVENT_FIRMWARE_BUILD is not set
429CONFIG_FW_LOADER=y
430CONFIG_FIRMWARE_IN_KERNEL=y
431CONFIG_EXTRA_FIRMWARE=""
432# CONFIG_DEBUG_DRIVER is not set
433# CONFIG_DEBUG_DEVRES is not set
434# CONFIG_SYS_HYPERVISOR is not set
435# CONFIG_CONNECTOR is not set
436CONFIG_MTD=y
437# CONFIG_MTD_DEBUG is not set
438CONFIG_MTD_CONCAT=y
439CONFIG_MTD_PARTITIONS=y
440# CONFIG_MTD_TESTS is not set
441# CONFIG_MTD_REDBOOT_PARTS is not set
442# CONFIG_MTD_CMDLINE_PARTS is not set
443# CONFIG_MTD_AFS_PARTS is not set
444# CONFIG_MTD_AR7_PARTS is not set
445
446#
447# User Modules And Translation Layers
448#
449CONFIG_MTD_CHAR=y
450CONFIG_MTD_BLKDEVS=y
451CONFIG_MTD_BLOCK=y
452# CONFIG_FTL is not set
453# CONFIG_NFTL is not set
454# CONFIG_INFTL is not set
455# CONFIG_RFD_FTL is not set
456# CONFIG_SSFDC is not set
457# CONFIG_MTD_OOPS is not set
458
459#
460# RAM/ROM/Flash chip drivers
461#
462# CONFIG_MTD_CFI is not set
463# CONFIG_MTD_JEDECPROBE is not set
464CONFIG_MTD_MAP_BANK_WIDTH_1=y
465CONFIG_MTD_MAP_BANK_WIDTH_2=y
466CONFIG_MTD_MAP_BANK_WIDTH_4=y
467# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
468# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
469# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
470CONFIG_MTD_CFI_I1=y
471CONFIG_MTD_CFI_I2=y
472# CONFIG_MTD_CFI_I4 is not set
473# CONFIG_MTD_CFI_I8 is not set
474# CONFIG_MTD_RAM is not set
475# CONFIG_MTD_ROM is not set
476# CONFIG_MTD_ABSENT is not set
477
478#
479# Mapping drivers for chip access
480#
481# CONFIG_MTD_COMPLEX_MAPPINGS is not set
482# CONFIG_MTD_PLATRAM is not set
483
484#
485# Self-contained MTD device drivers
486#
487# CONFIG_MTD_DATAFLASH is not set
488# CONFIG_MTD_M25P80 is not set
489# CONFIG_MTD_SLRAM is not set
490# CONFIG_MTD_PHRAM is not set
491# CONFIG_MTD_MTDRAM is not set
492# CONFIG_MTD_BLOCK2MTD is not set
493
494#
495# Disk-On-Chip Device Drivers
496#
497# CONFIG_MTD_DOC2000 is not set
498# CONFIG_MTD_DOC2001 is not set
499# CONFIG_MTD_DOC2001PLUS is not set
500CONFIG_MTD_NAND=y
501CONFIG_MTD_NAND_VERIFY_WRITE=y
502# CONFIG_MTD_NAND_ECC_SMC is not set
503# CONFIG_MTD_NAND_MUSEUM_IDS is not set
504# CONFIG_MTD_NAND_H1900 is not set
505# CONFIG_MTD_NAND_GPIO is not set
506CONFIG_MTD_NAND_IDS=y
507# CONFIG_MTD_NAND_DISKONCHIP is not set
508# CONFIG_MTD_NAND_SHARPSL is not set
509CONFIG_MTD_NAND_PXA3xx=y
510CONFIG_MTD_NAND_PXA3xx_BUILTIN=y
511# CONFIG_MTD_NAND_NANDSIM is not set
512# CONFIG_MTD_NAND_PLATFORM is not set
513CONFIG_MTD_ONENAND=y
514CONFIG_MTD_ONENAND_VERIFY_WRITE=y
515CONFIG_MTD_ONENAND_GENERIC=y
516# CONFIG_MTD_ONENAND_OTP is not set
517# CONFIG_MTD_ONENAND_2X_PROGRAM is not set
518# CONFIG_MTD_ONENAND_SIM is not set
519
520#
521# LPDDR flash memory drivers
522#
523# CONFIG_MTD_LPDDR is not set
524
525#
526# UBI - Unsorted block images
527#
528# CONFIG_MTD_UBI is not set
529# CONFIG_PARPORT is not set
530CONFIG_BLK_DEV=y
531# CONFIG_BLK_DEV_COW_COMMON is not set
532CONFIG_BLK_DEV_LOOP=y
533# CONFIG_BLK_DEV_CRYPTOLOOP is not set
534# CONFIG_BLK_DEV_NBD is not set
535CONFIG_BLK_DEV_RAM=y
536CONFIG_BLK_DEV_RAM_COUNT=16
537CONFIG_BLK_DEV_RAM_SIZE=4096
538# CONFIG_BLK_DEV_XIP is not set
539# CONFIG_CDROM_PKTCDVD is not set
540# CONFIG_ATA_OVER_ETH is not set
541# CONFIG_MG_DISK is not set
542# CONFIG_MISC_DEVICES is not set
543CONFIG_HAVE_IDE=y
544# CONFIG_IDE is not set
545
546#
547# SCSI device support
548#
549# CONFIG_RAID_ATTRS is not set
550# CONFIG_SCSI is not set
551# CONFIG_SCSI_DMA is not set
552# CONFIG_SCSI_NETLINK is not set
553# CONFIG_ATA is not set
554# CONFIG_MD is not set
555CONFIG_NETDEVICES=y
556# CONFIG_DUMMY is not set
557# CONFIG_BONDING is not set
558# CONFIG_MACVLAN is not set
559# CONFIG_EQUALIZER is not set
560# CONFIG_TUN is not set
561# CONFIG_VETH is not set
562# CONFIG_PHYLIB is not set
563CONFIG_NET_ETHERNET=y
564CONFIG_MII=y
565# CONFIG_AX88796 is not set
566CONFIG_SMC91X=y
567# CONFIG_DM9000 is not set
568# CONFIG_ENC28J60 is not set
569# CONFIG_ETHOC is not set
570# CONFIG_SMC911X is not set
571# CONFIG_SMSC911X is not set
572# CONFIG_DNET is not set
573# CONFIG_IBM_NEW_EMAC_ZMII is not set
574# CONFIG_IBM_NEW_EMAC_RGMII is not set
575# CONFIG_IBM_NEW_EMAC_TAH is not set
576# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
577# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
578# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
579# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
580# CONFIG_B44 is not set
581# CONFIG_KS8842 is not set
582# CONFIG_NETDEV_1000 is not set
583# CONFIG_NETDEV_10000 is not set
584
585#
586# Wireless LAN
587#
588# CONFIG_WLAN_PRE80211 is not set
589# CONFIG_WLAN_80211 is not set
590
591#
592# Enable WiMAX (Networking options) to see the WiMAX drivers
593#
594# CONFIG_WAN is not set
595# CONFIG_PPP is not set
596# CONFIG_SLIP is not set
597# CONFIG_NETCONSOLE is not set
598# CONFIG_NETPOLL is not set
599# CONFIG_NET_POLL_CONTROLLER is not set
600# CONFIG_ISDN is not set
601
602#
603# Input device support
604#
605CONFIG_INPUT=y
606# CONFIG_INPUT_FF_MEMLESS is not set
607# CONFIG_INPUT_POLLDEV is not set
608
609#
610# Userland interfaces
611#
612CONFIG_INPUT_MOUSEDEV=y
613# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
614CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
615CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
616# CONFIG_INPUT_JOYDEV is not set
617# CONFIG_INPUT_EVDEV is not set
618# CONFIG_INPUT_EVBUG is not set
619
620#
621# Input Device Drivers
622#
623CONFIG_INPUT_KEYBOARD=y
624# CONFIG_KEYBOARD_ATKBD is not set
625# CONFIG_KEYBOARD_SUNKBD is not set
626# CONFIG_KEYBOARD_LKKBD is not set
627# CONFIG_KEYBOARD_XTKBD is not set
628# CONFIG_KEYBOARD_NEWTON is not set
629# CONFIG_KEYBOARD_STOWAWAY is not set
630# CONFIG_KEYBOARD_LM8323 is not set
631CONFIG_KEYBOARD_PXA27x=y
632CONFIG_KEYBOARD_PXA930_ROTARY=y
633CONFIG_KEYBOARD_GPIO=y
634CONFIG_INPUT_MOUSE=y
635CONFIG_MOUSE_PS2=y
636CONFIG_MOUSE_PS2_ALPS=y
637CONFIG_MOUSE_PS2_LOGIPS2PP=y
638CONFIG_MOUSE_PS2_SYNAPTICS=y
639CONFIG_MOUSE_PS2_TRACKPOINT=y
640# CONFIG_MOUSE_PS2_ELANTECH is not set
641# CONFIG_MOUSE_PS2_TOUCHKIT is not set
642# CONFIG_MOUSE_SERIAL is not set
643# CONFIG_MOUSE_VSXXXAA is not set
644# CONFIG_MOUSE_GPIO is not set
645CONFIG_MOUSE_PXA930_TRKBALL=y
646# CONFIG_MOUSE_SYNAPTICS_I2C is not set
647# CONFIG_INPUT_JOYSTICK is not set
648# CONFIG_INPUT_TABLET is not set
649CONFIG_INPUT_TOUCHSCREEN=y
650# CONFIG_TOUCHSCREEN_ADS7846 is not set
651# CONFIG_TOUCHSCREEN_AD7877 is not set
652# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
653# CONFIG_TOUCHSCREEN_AD7879_SPI is not set
654# CONFIG_TOUCHSCREEN_AD7879 is not set
655CONFIG_TOUCHSCREEN_DA9034=y
656# CONFIG_TOUCHSCREEN_EETI is not set
657# CONFIG_TOUCHSCREEN_FUJITSU is not set
658# CONFIG_TOUCHSCREEN_GUNZE is not set
659# CONFIG_TOUCHSCREEN_ELO is not set
660# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
661# CONFIG_TOUCHSCREEN_MTOUCH is not set
662# CONFIG_TOUCHSCREEN_INEXIO is not set
663# CONFIG_TOUCHSCREEN_MK712 is not set
664# CONFIG_TOUCHSCREEN_PENMOUNT is not set
665# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
666# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
667# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
668# CONFIG_TOUCHSCREEN_TSC2007 is not set
669# CONFIG_TOUCHSCREEN_W90X900 is not set
670# CONFIG_INPUT_MISC is not set
671
672#
673# Hardware I/O ports
674#
675CONFIG_SERIO=y
676CONFIG_SERIO_SERPORT=y
677CONFIG_SERIO_LIBPS2=y
678# CONFIG_SERIO_RAW is not set
679# CONFIG_GAMEPORT is not set
680
681#
682# Character devices
683#
684CONFIG_VT=y
685CONFIG_CONSOLE_TRANSLATIONS=y
686CONFIG_VT_CONSOLE=y
687CONFIG_HW_CONSOLE=y
688# CONFIG_VT_HW_CONSOLE_BINDING is not set
689CONFIG_DEVKMEM=y
690# CONFIG_SERIAL_NONSTANDARD is not set
691
692#
693# Serial drivers
694#
695# CONFIG_SERIAL_8250 is not set
696
697#
698# Non-8250 serial port support
699#
700# CONFIG_SERIAL_MAX3100 is not set
701CONFIG_SERIAL_PXA=y
702CONFIG_SERIAL_PXA_CONSOLE=y
703CONFIG_SERIAL_CORE=y
704CONFIG_SERIAL_CORE_CONSOLE=y
705CONFIG_UNIX98_PTYS=y
706# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
707# CONFIG_LEGACY_PTYS is not set
708# CONFIG_IPMI_HANDLER is not set
709# CONFIG_HW_RANDOM is not set
710# CONFIG_R3964 is not set
711# CONFIG_RAW_DRIVER is not set
712# CONFIG_TCG_TPM is not set
713CONFIG_I2C=y
714CONFIG_I2C_BOARDINFO=y
715# CONFIG_I2C_CHARDEV is not set
716# CONFIG_I2C_HELPER_AUTO is not set
717
718#
719# I2C Algorithms
720#
721# CONFIG_I2C_ALGOBIT is not set
722# CONFIG_I2C_ALGOPCF is not set
723# CONFIG_I2C_ALGOPCA is not set
724
725#
726# I2C Hardware Bus support
727#
728
729#
730# I2C system bus drivers (mostly embedded / system-on-chip)
731#
732# CONFIG_I2C_DESIGNWARE is not set
733# CONFIG_I2C_GPIO is not set
734# CONFIG_I2C_OCORES is not set
735CONFIG_I2C_PXA=y
736# CONFIG_I2C_PXA_SLAVE is not set
737# CONFIG_I2C_SIMTEC is not set
738
739#
740# External I2C/SMBus adapter drivers
741#
742# CONFIG_I2C_PARPORT_LIGHT is not set
743# CONFIG_I2C_TAOS_EVM is not set
744
745#
746# Other I2C/SMBus bus drivers
747#
748# CONFIG_I2C_PCA_PLATFORM is not set
749# CONFIG_I2C_STUB is not set
750
751#
752# Miscellaneous I2C Chip support
753#
754# CONFIG_DS1682 is not set
755# CONFIG_SENSORS_TSL2550 is not set
756# CONFIG_I2C_DEBUG_CORE is not set
757# CONFIG_I2C_DEBUG_ALGO is not set
758# CONFIG_I2C_DEBUG_BUS is not set
759# CONFIG_I2C_DEBUG_CHIP is not set
760CONFIG_SPI=y
761# CONFIG_SPI_DEBUG is not set
762CONFIG_SPI_MASTER=y
763
764#
765# SPI Master Controller Drivers
766#
767# CONFIG_SPI_BITBANG is not set
768# CONFIG_SPI_GPIO is not set
769CONFIG_SPI_PXA2XX=y
770
771#
772# SPI Protocol Masters
773#
774# CONFIG_SPI_SPIDEV is not set
775# CONFIG_SPI_TLE62X0 is not set
776CONFIG_ARCH_REQUIRE_GPIOLIB=y
777CONFIG_GPIOLIB=y
778# CONFIG_DEBUG_GPIO is not set
779# CONFIG_GPIO_SYSFS is not set
780
781#
782# Memory mapped GPIO expanders:
783#
784
785#
786# I2C GPIO expanders:
787#
788CONFIG_GPIO_MAX732X=y
789CONFIG_GPIO_PCA953X=y
790CONFIG_GPIO_PCF857X=y
791
792#
793# PCI GPIO expanders:
794#
795
796#
797# SPI GPIO expanders:
798#
799CONFIG_GPIO_MAX7301=y
800# CONFIG_GPIO_MCP23S08 is not set
801# CONFIG_W1 is not set
802CONFIG_POWER_SUPPLY=y
803CONFIG_POWER_SUPPLY_DEBUG=y
804CONFIG_PDA_POWER=y
805# CONFIG_BATTERY_DS2760 is not set
806# CONFIG_BATTERY_BQ27x00 is not set
807CONFIG_BATTERY_DA9030=y
808# CONFIG_BATTERY_MAX17040 is not set
809# CONFIG_HWMON is not set
810# CONFIG_THERMAL is not set
811# CONFIG_THERMAL_HWMON is not set
812# CONFIG_WATCHDOG is not set
813CONFIG_SSB_POSSIBLE=y
814
815#
816# Sonics Silicon Backplane
817#
818# CONFIG_SSB is not set
819
820#
821# Multifunction device drivers
822#
823# CONFIG_MFD_CORE is not set
824# CONFIG_MFD_SM501 is not set
825# CONFIG_MFD_ASIC3 is not set
826# CONFIG_HTC_EGPIO is not set
827# CONFIG_HTC_PASIC3 is not set
828# CONFIG_TPS65010 is not set
829# CONFIG_TWL4030_CORE is not set
830# CONFIG_MFD_TMIO is not set
831# CONFIG_MFD_T7L66XB is not set
832# CONFIG_MFD_TC6387XB is not set
833# CONFIG_MFD_TC6393XB is not set
834CONFIG_PMIC_DA903X=y
835# CONFIG_MFD_WM8400 is not set
836# CONFIG_MFD_WM8350_I2C is not set
837# CONFIG_MFD_PCF50633 is not set
838# CONFIG_AB3100_CORE is not set
839# CONFIG_EZX_PCAP is not set
840# CONFIG_MEDIA_SUPPORT is not set
841
842#
843# Graphics support
844#
845# CONFIG_VGASTATE is not set
846# CONFIG_VIDEO_OUTPUT_CONTROL is not set
847CONFIG_FB=y
848# CONFIG_FIRMWARE_EDID is not set
849# CONFIG_FB_DDC is not set
850# CONFIG_FB_BOOT_VESA_SUPPORT is not set
851CONFIG_FB_CFB_FILLRECT=y
852CONFIG_FB_CFB_COPYAREA=y
853CONFIG_FB_CFB_IMAGEBLIT=y
854# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
855# CONFIG_FB_SYS_FILLRECT is not set
856# CONFIG_FB_SYS_COPYAREA is not set
857# CONFIG_FB_SYS_IMAGEBLIT is not set
858# CONFIG_FB_FOREIGN_ENDIAN is not set
859# CONFIG_FB_SYS_FOPS is not set
860# CONFIG_FB_SVGALIB is not set
861# CONFIG_FB_MACMODES is not set
862# CONFIG_FB_BACKLIGHT is not set
863# CONFIG_FB_MODE_HELPERS is not set
864# CONFIG_FB_TILEBLITTING is not set
865
866#
867# Frame buffer hardware drivers
868#
869# CONFIG_FB_S1D13XXX is not set
870CONFIG_FB_PXA=y
871# CONFIG_FB_PXA_OVERLAY is not set
872# CONFIG_FB_PXA_SMARTPANEL is not set
873# CONFIG_FB_PXA_PARAMETERS is not set
874# CONFIG_FB_MBX is not set
875# CONFIG_FB_W100 is not set
876# CONFIG_FB_VIRTUAL is not set
877# CONFIG_FB_METRONOME is not set
878# CONFIG_FB_MB862XX is not set
879# CONFIG_FB_BROADSHEET is not set
880CONFIG_BACKLIGHT_LCD_SUPPORT=y
881CONFIG_LCD_CLASS_DEVICE=y
882# CONFIG_LCD_LTV350QV is not set
883# CONFIG_LCD_ILI9320 is not set
884CONFIG_LCD_TDO24M=y
885# CONFIG_LCD_VGG2432A4 is not set
886# CONFIG_LCD_PLATFORM is not set
887CONFIG_BACKLIGHT_CLASS_DEVICE=y
888# CONFIG_BACKLIGHT_GENERIC is not set
889CONFIG_BACKLIGHT_PWM=y
890CONFIG_BACKLIGHT_DA903X=y
891
892#
893# Display device support
894#
895# CONFIG_DISPLAY_SUPPORT is not set
896
897#
898# Console display driver support
899#
900# CONFIG_VGA_CONSOLE is not set
901CONFIG_DUMMY_CONSOLE=y
902CONFIG_FRAMEBUFFER_CONSOLE=y
903CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
904# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
905CONFIG_FONTS=y
906# CONFIG_FONT_8x8 is not set
907# CONFIG_FONT_8x16 is not set
908CONFIG_FONT_6x11=y
909# CONFIG_FONT_7x14 is not set
910# CONFIG_FONT_PEARL_8x8 is not set
911# CONFIG_FONT_ACORN_8x8 is not set
912# CONFIG_FONT_MINI_4x6 is not set
913# CONFIG_FONT_SUN8x16 is not set
914# CONFIG_FONT_SUN12x22 is not set
915# CONFIG_FONT_10x18 is not set
916CONFIG_LOGO=y
917CONFIG_LOGO_LINUX_MONO=y
918CONFIG_LOGO_LINUX_VGA16=y
919CONFIG_LOGO_LINUX_CLUT224=y
920# CONFIG_SOUND is not set
921# CONFIG_HID_SUPPORT is not set
922# CONFIG_USB_SUPPORT is not set
923CONFIG_MMC=y
924# CONFIG_MMC_DEBUG is not set
925# CONFIG_MMC_UNSAFE_RESUME is not set
926
927#
928# MMC/SD/SDIO Card Drivers
929#
930CONFIG_MMC_BLOCK=y
931CONFIG_MMC_BLOCK_BOUNCE=y
932# CONFIG_SDIO_UART is not set
933# CONFIG_MMC_TEST is not set
934
935#
936# MMC/SD/SDIO Host Controller Drivers
937#
938CONFIG_MMC_PXA=y
939# CONFIG_MMC_SDHCI is not set
940# CONFIG_MMC_SPI is not set
941# CONFIG_MEMSTICK is not set
942# CONFIG_ACCESSIBILITY is not set
943CONFIG_NEW_LEDS=y
944CONFIG_LEDS_CLASS=m
945
946#
947# LED drivers
948#
949# CONFIG_LEDS_PCA9532 is not set
950CONFIG_LEDS_GPIO=m
951CONFIG_LEDS_GPIO_PLATFORM=y
952# CONFIG_LEDS_LP5521 is not set
953# CONFIG_LEDS_PCA955X is not set
954CONFIG_LEDS_DA903X=m
955# CONFIG_LEDS_DAC124S085 is not set
956# CONFIG_LEDS_PWM is not set
957# CONFIG_LEDS_BD2802 is not set
958
959#
960# LED Triggers
961#
962CONFIG_LEDS_TRIGGERS=y
963CONFIG_LEDS_TRIGGER_TIMER=m
964CONFIG_LEDS_TRIGGER_HEARTBEAT=m
965CONFIG_LEDS_TRIGGER_BACKLIGHT=m
966CONFIG_LEDS_TRIGGER_GPIO=m
967CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
968
969#
970# iptables trigger is under Netfilter config (LED target)
971#
972CONFIG_RTC_LIB=y
973# CONFIG_RTC_CLASS is not set
974# CONFIG_DMADEVICES is not set
975# CONFIG_AUXDISPLAY is not set
976CONFIG_REGULATOR=y
977CONFIG_REGULATOR_DEBUG=y
978# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
979CONFIG_REGULATOR_VIRTUAL_CONSUMER=y
980# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set
981# CONFIG_REGULATOR_BQ24022 is not set
982# CONFIG_REGULATOR_MAX1586 is not set
983CONFIG_REGULATOR_DA903X=y
984# CONFIG_REGULATOR_LP3971 is not set
985# CONFIG_UIO is not set
986# CONFIG_STAGING is not set
987
988#
989# File systems
990#
991# CONFIG_EXT2_FS is not set
992# CONFIG_EXT3_FS is not set
993# CONFIG_EXT4_FS is not set
994# CONFIG_REISERFS_FS is not set
995# CONFIG_JFS_FS is not set
996CONFIG_FS_POSIX_ACL=y
997# CONFIG_XFS_FS is not set
998# CONFIG_GFS2_FS is not set
999# CONFIG_OCFS2_FS is not set
1000# CONFIG_BTRFS_FS is not set
1001CONFIG_FILE_LOCKING=y
1002CONFIG_FSNOTIFY=y
1003CONFIG_DNOTIFY=y
1004# CONFIG_INOTIFY is not set
1005CONFIG_INOTIFY_USER=y
1006# CONFIG_QUOTA is not set
1007# CONFIG_AUTOFS_FS is not set
1008# CONFIG_AUTOFS4_FS is not set
1009# CONFIG_FUSE_FS is not set
1010
1011#
1012# Caches
1013#
1014# CONFIG_FSCACHE is not set
1015
1016#
1017# CD-ROM/DVD Filesystems
1018#
1019# CONFIG_ISO9660_FS is not set
1020# CONFIG_UDF_FS is not set
1021
1022#
1023# DOS/FAT/NT Filesystems
1024#
1025# CONFIG_MSDOS_FS is not set
1026# CONFIG_VFAT_FS is not set
1027# CONFIG_NTFS_FS is not set
1028
1029#
1030# Pseudo filesystems
1031#
1032CONFIG_PROC_FS=y
1033CONFIG_PROC_SYSCTL=y
1034CONFIG_PROC_PAGE_MONITOR=y
1035CONFIG_SYSFS=y
1036# CONFIG_TMPFS is not set
1037# CONFIG_HUGETLB_PAGE is not set
1038# CONFIG_CONFIGFS_FS is not set
1039CONFIG_MISC_FILESYSTEMS=y
1040# CONFIG_ADFS_FS is not set
1041# CONFIG_AFFS_FS is not set
1042# CONFIG_HFS_FS is not set
1043# CONFIG_HFSPLUS_FS is not set
1044# CONFIG_BEFS_FS is not set
1045# CONFIG_BFS_FS is not set
1046# CONFIG_EFS_FS is not set
1047CONFIG_JFFS2_FS=y
1048CONFIG_JFFS2_FS_DEBUG=0
1049CONFIG_JFFS2_FS_WRITEBUFFER=y
1050CONFIG_JFFS2_FS_WBUF_VERIFY=y
1051# CONFIG_JFFS2_SUMMARY is not set
1052# CONFIG_JFFS2_FS_XATTR is not set
1053CONFIG_JFFS2_COMPRESSION_OPTIONS=y
1054CONFIG_JFFS2_ZLIB=y
1055CONFIG_JFFS2_LZO=y
1056CONFIG_JFFS2_RTIME=y
1057CONFIG_JFFS2_RUBIN=y
1058# CONFIG_JFFS2_CMODE_NONE is not set
1059CONFIG_JFFS2_CMODE_PRIORITY=y
1060# CONFIG_JFFS2_CMODE_SIZE is not set
1061# CONFIG_JFFS2_CMODE_FAVOURLZO is not set
1062# CONFIG_CRAMFS is not set
1063# CONFIG_SQUASHFS is not set
1064# CONFIG_VXFS_FS is not set
1065# CONFIG_MINIX_FS is not set
1066# CONFIG_OMFS_FS is not set
1067# CONFIG_HPFS_FS is not set
1068# CONFIG_QNX4FS_FS is not set
1069# CONFIG_ROMFS_FS is not set
1070# CONFIG_SYSV_FS is not set
1071# CONFIG_UFS_FS is not set
1072# CONFIG_NILFS2_FS is not set
1073CONFIG_NETWORK_FILESYSTEMS=y
1074CONFIG_NFS_FS=y
1075CONFIG_NFS_V3=y
1076CONFIG_NFS_V3_ACL=y
1077CONFIG_NFS_V4=y
1078# CONFIG_NFS_V4_1 is not set
1079CONFIG_ROOT_NFS=y
1080# CONFIG_NFSD is not set
1081CONFIG_LOCKD=y
1082CONFIG_LOCKD_V4=y
1083CONFIG_NFS_ACL_SUPPORT=y
1084CONFIG_NFS_COMMON=y
1085CONFIG_SUNRPC=y
1086CONFIG_SUNRPC_GSS=y
1087CONFIG_RPCSEC_GSS_KRB5=y
1088# CONFIG_RPCSEC_GSS_SPKM3 is not set
1089# CONFIG_SMB_FS is not set
1090# CONFIG_CIFS is not set
1091# CONFIG_NCP_FS is not set
1092# CONFIG_CODA_FS is not set
1093# CONFIG_AFS_FS is not set
1094
1095#
1096# Partition Types
1097#
1098# CONFIG_PARTITION_ADVANCED is not set
1099CONFIG_MSDOS_PARTITION=y
1100CONFIG_NLS=y
1101CONFIG_NLS_DEFAULT="iso8859-1"
1102# CONFIG_NLS_CODEPAGE_437 is not set
1103# CONFIG_NLS_CODEPAGE_737 is not set
1104# CONFIG_NLS_CODEPAGE_775 is not set
1105# CONFIG_NLS_CODEPAGE_850 is not set
1106# CONFIG_NLS_CODEPAGE_852 is not set
1107# CONFIG_NLS_CODEPAGE_855 is not set
1108# CONFIG_NLS_CODEPAGE_857 is not set
1109# CONFIG_NLS_CODEPAGE_860 is not set
1110# CONFIG_NLS_CODEPAGE_861 is not set
1111# CONFIG_NLS_CODEPAGE_862 is not set
1112# CONFIG_NLS_CODEPAGE_863 is not set
1113# CONFIG_NLS_CODEPAGE_864 is not set
1114# CONFIG_NLS_CODEPAGE_865 is not set
1115# CONFIG_NLS_CODEPAGE_866 is not set
1116# CONFIG_NLS_CODEPAGE_869 is not set
1117# CONFIG_NLS_CODEPAGE_936 is not set
1118# CONFIG_NLS_CODEPAGE_950 is not set
1119# CONFIG_NLS_CODEPAGE_932 is not set
1120# CONFIG_NLS_CODEPAGE_949 is not set
1121# CONFIG_NLS_CODEPAGE_874 is not set
1122# CONFIG_NLS_ISO8859_8 is not set
1123# CONFIG_NLS_CODEPAGE_1250 is not set
1124# CONFIG_NLS_CODEPAGE_1251 is not set
1125# CONFIG_NLS_ASCII is not set
1126# CONFIG_NLS_ISO8859_1 is not set
1127# CONFIG_NLS_ISO8859_2 is not set
1128# CONFIG_NLS_ISO8859_3 is not set
1129# CONFIG_NLS_ISO8859_4 is not set
1130# CONFIG_NLS_ISO8859_5 is not set
1131# CONFIG_NLS_ISO8859_6 is not set
1132# CONFIG_NLS_ISO8859_7 is not set
1133# CONFIG_NLS_ISO8859_9 is not set
1134# CONFIG_NLS_ISO8859_13 is not set
1135# CONFIG_NLS_ISO8859_14 is not set
1136# CONFIG_NLS_ISO8859_15 is not set
1137# CONFIG_NLS_KOI8_R is not set
1138# CONFIG_NLS_KOI8_U is not set
1139# CONFIG_NLS_UTF8 is not set
1140# CONFIG_DLM is not set
1141
1142#
1143# Kernel hacking
1144#
1145CONFIG_PRINTK_TIME=y
1146CONFIG_ENABLE_WARN_DEPRECATED=y
1147CONFIG_ENABLE_MUST_CHECK=y
1148CONFIG_FRAME_WARN=1024
1149CONFIG_MAGIC_SYSRQ=y
1150# CONFIG_UNUSED_SYMBOLS is not set
1151# CONFIG_DEBUG_FS is not set
1152# CONFIG_HEADERS_CHECK is not set
1153CONFIG_DEBUG_KERNEL=y
1154CONFIG_DEBUG_SHIRQ=y
1155CONFIG_DETECT_SOFTLOCKUP=y
1156CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
1157CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1
1158CONFIG_DETECT_HUNG_TASK=y
1159# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
1160CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
1161# CONFIG_SCHED_DEBUG is not set
1162# CONFIG_SCHEDSTATS is not set
1163# CONFIG_TIMER_STATS is not set
1164# CONFIG_DEBUG_OBJECTS is not set
1165# CONFIG_DEBUG_SLAB is not set
1166# CONFIG_DEBUG_KMEMLEAK is not set
1167CONFIG_DEBUG_PREEMPT=y
1168# CONFIG_DEBUG_RT_MUTEXES is not set
1169# CONFIG_RT_MUTEX_TESTER is not set
1170CONFIG_DEBUG_SPINLOCK=y
1171# CONFIG_DEBUG_MUTEXES is not set
1172# CONFIG_DEBUG_LOCK_ALLOC is not set
1173# CONFIG_PROVE_LOCKING is not set
1174# CONFIG_LOCK_STAT is not set
1175CONFIG_DEBUG_SPINLOCK_SLEEP=y
1176# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
1177# CONFIG_DEBUG_KOBJECT is not set
1178CONFIG_DEBUG_BUGVERBOSE=y
1179# CONFIG_DEBUG_INFO is not set
1180# CONFIG_DEBUG_VM is not set
1181# CONFIG_DEBUG_WRITECOUNT is not set
1182CONFIG_DEBUG_MEMORY_INIT=y
1183# CONFIG_DEBUG_LIST is not set
1184# CONFIG_DEBUG_SG is not set
1185# CONFIG_DEBUG_NOTIFIERS is not set
1186# CONFIG_BOOT_PRINTK_DELAY is not set
1187# CONFIG_RCU_TORTURE_TEST is not set
1188# CONFIG_RCU_CPU_STALL_DETECTOR is not set
1189# CONFIG_BACKTRACE_SELF_TEST is not set
1190# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
1191# CONFIG_FAULT_INJECTION is not set
1192# CONFIG_LATENCYTOP is not set
1193# CONFIG_SYSCTL_SYSCALL_CHECK is not set
1194# CONFIG_PAGE_POISONING is not set
1195CONFIG_HAVE_FUNCTION_TRACER=y
1196CONFIG_TRACING_SUPPORT=y
1197# CONFIG_FTRACE is not set
1198# CONFIG_SAMPLES is not set
1199CONFIG_HAVE_ARCH_KGDB=y
1200# CONFIG_KGDB is not set
1201# CONFIG_KMEMCHECK is not set
1202CONFIG_ARM_UNWIND=y
1203CONFIG_DEBUG_USER=y
1204# CONFIG_DEBUG_ERRORS is not set
1205# CONFIG_DEBUG_STACK_USAGE is not set
1206# CONFIG_DEBUG_LL is not set
1207
1208#
1209# Security options
1210#
1211# CONFIG_KEYS is not set
1212# CONFIG_SECURITY is not set
1213# CONFIG_SECURITYFS is not set
1214# CONFIG_SECURITY_FILE_CAPABILITIES is not set
1215CONFIG_CRYPTO=y
1216
1217#
1218# Crypto core or helper
1219#
1220# CONFIG_CRYPTO_FIPS is not set
1221CONFIG_CRYPTO_ALGAPI=y
1222CONFIG_CRYPTO_ALGAPI2=y
1223CONFIG_CRYPTO_AEAD2=y
1224CONFIG_CRYPTO_BLKCIPHER=y
1225CONFIG_CRYPTO_BLKCIPHER2=y
1226CONFIG_CRYPTO_HASH=y
1227CONFIG_CRYPTO_HASH2=y
1228CONFIG_CRYPTO_RNG2=y
1229CONFIG_CRYPTO_PCOMP=y
1230CONFIG_CRYPTO_MANAGER=y
1231CONFIG_CRYPTO_MANAGER2=y
1232# CONFIG_CRYPTO_GF128MUL is not set
1233# CONFIG_CRYPTO_NULL is not set
1234CONFIG_CRYPTO_WORKQUEUE=y
1235# CONFIG_CRYPTO_CRYPTD is not set
1236# CONFIG_CRYPTO_AUTHENC is not set
1237# CONFIG_CRYPTO_TEST is not set
1238
1239#
1240# Authenticated Encryption with Associated Data
1241#
1242# CONFIG_CRYPTO_CCM is not set
1243# CONFIG_CRYPTO_GCM is not set
1244# CONFIG_CRYPTO_SEQIV is not set
1245
1246#
1247# Block modes
1248#
1249CONFIG_CRYPTO_CBC=y
1250# CONFIG_CRYPTO_CTR is not set
1251# CONFIG_CRYPTO_CTS is not set
1252# CONFIG_CRYPTO_ECB is not set
1253# CONFIG_CRYPTO_LRW is not set
1254# CONFIG_CRYPTO_PCBC is not set
1255# CONFIG_CRYPTO_XTS is not set
1256
1257#
1258# Hash modes
1259#
1260# CONFIG_CRYPTO_HMAC is not set
1261# CONFIG_CRYPTO_XCBC is not set
1262
1263#
1264# Digest
1265#
1266# CONFIG_CRYPTO_CRC32C is not set
1267# CONFIG_CRYPTO_MD4 is not set
1268CONFIG_CRYPTO_MD5=y
1269# CONFIG_CRYPTO_MICHAEL_MIC is not set
1270# CONFIG_CRYPTO_RMD128 is not set
1271# CONFIG_CRYPTO_RMD160 is not set
1272# CONFIG_CRYPTO_RMD256 is not set
1273# CONFIG_CRYPTO_RMD320 is not set
1274# CONFIG_CRYPTO_SHA1 is not set
1275# CONFIG_CRYPTO_SHA256 is not set
1276# CONFIG_CRYPTO_SHA512 is not set
1277# CONFIG_CRYPTO_TGR192 is not set
1278# CONFIG_CRYPTO_WP512 is not set
1279
1280#
1281# Ciphers
1282#
1283# CONFIG_CRYPTO_AES is not set
1284# CONFIG_CRYPTO_ANUBIS is not set
1285# CONFIG_CRYPTO_ARC4 is not set
1286# CONFIG_CRYPTO_BLOWFISH is not set
1287# CONFIG_CRYPTO_CAMELLIA is not set
1288# CONFIG_CRYPTO_CAST5 is not set
1289# CONFIG_CRYPTO_CAST6 is not set
1290CONFIG_CRYPTO_DES=y
1291# CONFIG_CRYPTO_FCRYPT is not set
1292# CONFIG_CRYPTO_KHAZAD is not set
1293# CONFIG_CRYPTO_SALSA20 is not set
1294# CONFIG_CRYPTO_SEED is not set
1295# CONFIG_CRYPTO_SERPENT is not set
1296# CONFIG_CRYPTO_TEA is not set
1297# CONFIG_CRYPTO_TWOFISH is not set
1298
1299#
1300# Compression
1301#
1302# CONFIG_CRYPTO_DEFLATE is not set
1303# CONFIG_CRYPTO_ZLIB is not set
1304# CONFIG_CRYPTO_LZO is not set
1305
1306#
1307# Random Number Generation
1308#
1309# CONFIG_CRYPTO_ANSI_CPRNG is not set
1310# CONFIG_CRYPTO_HW is not set
1311# CONFIG_BINARY_PRINTF is not set
1312
1313#
1314# Library routines
1315#
1316CONFIG_BITREVERSE=y
1317CONFIG_GENERIC_FIND_LAST_BIT=y
1318# CONFIG_CRC_CCITT is not set
1319# CONFIG_CRC16 is not set
1320# CONFIG_CRC_T10DIF is not set
1321# CONFIG_CRC_ITU_T is not set
1322CONFIG_CRC32=y
1323# CONFIG_CRC7 is not set
1324# CONFIG_LIBCRC32C is not set
1325CONFIG_ZLIB_INFLATE=y
1326CONFIG_ZLIB_DEFLATE=y
1327CONFIG_LZO_COMPRESS=y
1328CONFIG_LZO_DECOMPRESS=y
1329CONFIG_HAS_IOMEM=y
1330CONFIG_HAS_IOPORT=y
1331CONFIG_HAS_DMA=y
1332CONFIG_NLATTR=y
diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig
new file mode 100644
index 000000000000..33bb7250946b
--- /dev/null
+++ b/arch/arm/configs/xcep_defconfig
@@ -0,0 +1,1129 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.31-rc6
4# Thu Aug 20 09:02:37 2009
5#
6CONFIG_ARM=y
7CONFIG_SYS_SUPPORTS_APM_EMULATION=y
8CONFIG_GENERIC_GPIO=y
9CONFIG_GENERIC_TIME=y
10CONFIG_GENERIC_CLOCKEVENTS=y
11CONFIG_MMU=y
12CONFIG_GENERIC_HARDIRQS=y
13CONFIG_STACKTRACE_SUPPORT=y
14CONFIG_HAVE_LATENCYTOP_SUPPORT=y
15CONFIG_LOCKDEP_SUPPORT=y
16CONFIG_TRACE_IRQFLAGS_SUPPORT=y
17CONFIG_HARDIRQS_SW_RESEND=y
18CONFIG_GENERIC_IRQ_PROBE=y
19CONFIG_RWSEM_GENERIC_SPINLOCK=y
20CONFIG_GENERIC_HWEIGHT=y
21CONFIG_GENERIC_CALIBRATE_DELAY=y
22CONFIG_ARCH_MTD_XIP=y
23CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
24CONFIG_VECTORS_BASE=0xffff0000
25CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
26CONFIG_CONSTRUCTORS=y
27
28#
29# General setup
30#
31CONFIG_EXPERIMENTAL=y
32CONFIG_BROKEN_ON_SMP=y
33CONFIG_INIT_ENV_ARG_LIMIT=32
34CONFIG_LOCALVERSION=".xcep-itech"
35# CONFIG_LOCALVERSION_AUTO is not set
36CONFIG_SYSVIPC=y
37CONFIG_SYSVIPC_SYSCTL=y
38# CONFIG_POSIX_MQUEUE is not set
39CONFIG_BSD_PROCESS_ACCT=y
40# CONFIG_BSD_PROCESS_ACCT_V3 is not set
41# CONFIG_TASKSTATS is not set
42# CONFIG_AUDIT is not set
43
44#
45# RCU Subsystem
46#
47CONFIG_CLASSIC_RCU=y
48# CONFIG_TREE_RCU is not set
49# CONFIG_PREEMPT_RCU is not set
50# CONFIG_TREE_RCU_TRACE is not set
51# CONFIG_PREEMPT_RCU_TRACE is not set
52CONFIG_IKCONFIG=y
53CONFIG_IKCONFIG_PROC=y
54CONFIG_LOG_BUF_SHIFT=16
55# CONFIG_GROUP_SCHED is not set
56# CONFIG_CGROUPS is not set
57CONFIG_SYSFS_DEPRECATED=y
58CONFIG_SYSFS_DEPRECATED_V2=y
59# CONFIG_RELAY is not set
60# CONFIG_NAMESPACES is not set
61CONFIG_BLK_DEV_INITRD=y
62CONFIG_INITRAMFS_SOURCE=""
63CONFIG_RD_GZIP=y
64# CONFIG_RD_BZIP2 is not set
65# CONFIG_RD_LZMA is not set
66CONFIG_CC_OPTIMIZE_FOR_SIZE=y
67CONFIG_SYSCTL=y
68CONFIG_ANON_INODES=y
69CONFIG_EMBEDDED=y
70# CONFIG_UID16 is not set
71CONFIG_SYSCTL_SYSCALL=y
72CONFIG_KALLSYMS=y
73# CONFIG_KALLSYMS_ALL is not set
74# CONFIG_KALLSYMS_EXTRA_PASS is not set
75CONFIG_HOTPLUG=y
76CONFIG_PRINTK=y
77CONFIG_BUG=y
78CONFIG_ELF_CORE=y
79CONFIG_BASE_FULL=y
80CONFIG_FUTEX=y
81CONFIG_EPOLL=y
82CONFIG_SIGNALFD=y
83CONFIG_TIMERFD=y
84CONFIG_EVENTFD=y
85# CONFIG_SHMEM is not set
86CONFIG_AIO=y
87
88#
89# Performance Counters
90#
91# CONFIG_VM_EVENT_COUNTERS is not set
92CONFIG_STRIP_ASM_SYMS=y
93# CONFIG_COMPAT_BRK is not set
94# CONFIG_SLAB is not set
95# CONFIG_SLUB is not set
96CONFIG_SLOB=y
97# CONFIG_PROFILING is not set
98CONFIG_TRACEPOINTS=y
99CONFIG_MARKERS=y
100CONFIG_HAVE_OPROFILE=y
101CONFIG_KPROBES=y
102CONFIG_KRETPROBES=y
103CONFIG_HAVE_KPROBES=y
104CONFIG_HAVE_KRETPROBES=y
105CONFIG_HAVE_CLK=y
106
107#
108# GCOV-based kernel profiling
109#
110# CONFIG_SLOW_WORK is not set
111CONFIG_HAVE_GENERIC_DMA_COHERENT=y
112CONFIG_RT_MUTEXES=y
113CONFIG_BASE_SMALL=0
114CONFIG_MODULES=y
115# CONFIG_MODULE_FORCE_LOAD is not set
116CONFIG_MODULE_UNLOAD=y
117# CONFIG_MODULE_FORCE_UNLOAD is not set
118CONFIG_MODVERSIONS=y
119CONFIG_MODULE_SRCVERSION_ALL=y
120# CONFIG_BLOCK is not set
121# CONFIG_FREEZER is not set
122
123#
124# System Type
125#
126# CONFIG_ARCH_AAEC2000 is not set
127# CONFIG_ARCH_INTEGRATOR is not set
128# CONFIG_ARCH_REALVIEW is not set
129# CONFIG_ARCH_VERSATILE is not set
130# CONFIG_ARCH_AT91 is not set
131# CONFIG_ARCH_CLPS711X is not set
132# CONFIG_ARCH_GEMINI is not set
133# CONFIG_ARCH_EBSA110 is not set
134# CONFIG_ARCH_EP93XX is not set
135# CONFIG_ARCH_FOOTBRIDGE is not set
136# CONFIG_ARCH_MXC is not set
137# CONFIG_ARCH_STMP3XXX is not set
138# CONFIG_ARCH_NETX is not set
139# CONFIG_ARCH_H720X is not set
140# CONFIG_ARCH_IOP13XX is not set
141# CONFIG_ARCH_IOP32X is not set
142# CONFIG_ARCH_IOP33X is not set
143# CONFIG_ARCH_IXP23XX is not set
144# CONFIG_ARCH_IXP2000 is not set
145# CONFIG_ARCH_IXP4XX is not set
146# CONFIG_ARCH_L7200 is not set
147# CONFIG_ARCH_KIRKWOOD is not set
148# CONFIG_ARCH_LOKI is not set
149# CONFIG_ARCH_MV78XX0 is not set
150# CONFIG_ARCH_ORION5X is not set
151# CONFIG_ARCH_MMP is not set
152# CONFIG_ARCH_KS8695 is not set
153# CONFIG_ARCH_NS9XXX is not set
154# CONFIG_ARCH_W90X900 is not set
155# CONFIG_ARCH_PNX4008 is not set
156CONFIG_ARCH_PXA=y
157# CONFIG_ARCH_MSM is not set
158# CONFIG_ARCH_RPC is not set
159# CONFIG_ARCH_SA1100 is not set
160# CONFIG_ARCH_S3C2410 is not set
161# CONFIG_ARCH_S3C64XX is not set
162# CONFIG_ARCH_SHARK is not set
163# CONFIG_ARCH_LH7A40X is not set
164# CONFIG_ARCH_U300 is not set
165# CONFIG_ARCH_DAVINCI is not set
166# CONFIG_ARCH_OMAP is not set
167
168#
169# Intel PXA2xx/PXA3xx Implementations
170#
171# CONFIG_ARCH_GUMSTIX is not set
172# CONFIG_MACH_INTELMOTE2 is not set
173# CONFIG_MACH_STARGATE2 is not set
174# CONFIG_ARCH_LUBBOCK is not set
175# CONFIG_MACH_LOGICPD_PXA270 is not set
176# CONFIG_MACH_MAINSTONE is not set
177# CONFIG_MACH_MP900C is not set
178# CONFIG_ARCH_PXA_IDP is not set
179# CONFIG_PXA_SHARPSL is not set
180# CONFIG_ARCH_VIPER is not set
181# CONFIG_ARCH_PXA_ESERIES is not set
182# CONFIG_TRIZEPS_PXA is not set
183# CONFIG_MACH_H5000 is not set
184# CONFIG_MACH_EM_X270 is not set
185# CONFIG_MACH_EXEDA is not set
186# CONFIG_MACH_COLIBRI is not set
187# CONFIG_MACH_COLIBRI300 is not set
188# CONFIG_MACH_COLIBRI320 is not set
189# CONFIG_MACH_ZYLONITE is not set
190# CONFIG_MACH_LITTLETON is not set
191# CONFIG_MACH_TAVOREVB is not set
192# CONFIG_MACH_SAAR is not set
193# CONFIG_MACH_ARMCORE is not set
194# CONFIG_MACH_CM_X300 is not set
195# CONFIG_MACH_H4700 is not set
196# CONFIG_MACH_MAGICIAN is not set
197# CONFIG_MACH_HIMALAYA is not set
198# CONFIG_MACH_MIOA701 is not set
199# CONFIG_MACH_PCM027 is not set
200# CONFIG_ARCH_PXA_PALM is not set
201# CONFIG_MACH_CSB726 is not set
202# CONFIG_PXA_EZX is not set
203CONFIG_MACH_XCEP=y
204CONFIG_PXA25x=y
205CONFIG_PXA_SSP=y
206CONFIG_PLAT_PXA=y
207
208#
209# Processor Type
210#
211CONFIG_CPU_32=y
212CONFIG_CPU_XSCALE=y
213CONFIG_CPU_32v5=y
214CONFIG_CPU_ABRT_EV5T=y
215CONFIG_CPU_PABRT_NOIFAR=y
216CONFIG_CPU_CACHE_VIVT=y
217CONFIG_CPU_TLB_V4WBI=y
218CONFIG_CPU_CP15=y
219CONFIG_CPU_CP15_MMU=y
220
221#
222# Processor Features
223#
224CONFIG_ARM_THUMB=y
225# CONFIG_CPU_DCACHE_DISABLE is not set
226CONFIG_IWMMXT=y
227CONFIG_XSCALE_PMU=y
228CONFIG_COMMON_CLKDEV=y
229
230#
231# Bus support
232#
233# CONFIG_PCI_SYSCALL is not set
234# CONFIG_ARCH_SUPPORTS_MSI is not set
235# CONFIG_PCCARD is not set
236
237#
238# Kernel Features
239#
240CONFIG_TICK_ONESHOT=y
241CONFIG_NO_HZ=y
242CONFIG_HIGH_RES_TIMERS=y
243CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
244CONFIG_VMSPLIT_3G=y
245# CONFIG_VMSPLIT_2G is not set
246# CONFIG_VMSPLIT_1G is not set
247CONFIG_PAGE_OFFSET=0xC0000000
248# CONFIG_PREEMPT is not set
249CONFIG_HZ=100
250CONFIG_AEABI=y
251CONFIG_OABI_COMPAT=y
252# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
253# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
254# CONFIG_HIGHMEM is not set
255CONFIG_SELECT_MEMORY_MODEL=y
256CONFIG_FLATMEM_MANUAL=y
257# CONFIG_DISCONTIGMEM_MANUAL is not set
258# CONFIG_SPARSEMEM_MANUAL is not set
259CONFIG_FLATMEM=y
260CONFIG_FLAT_NODE_MEM_MAP=y
261CONFIG_PAGEFLAGS_EXTENDED=y
262CONFIG_SPLIT_PTLOCK_CPUS=4096
263# CONFIG_PHYS_ADDR_T_64BIT is not set
264CONFIG_ZONE_DMA_FLAG=0
265CONFIG_VIRT_TO_BUS=y
266CONFIG_HAVE_MLOCK=y
267CONFIG_HAVE_MLOCKED_PAGE_BIT=y
268CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
269CONFIG_ALIGNMENT_TRAP=y
270# CONFIG_UACCESS_WITH_MEMCPY is not set
271
272#
273# Boot options
274#
275CONFIG_ZBOOT_ROM_TEXT=0x0
276CONFIG_ZBOOT_ROM_BSS=0x0
277CONFIG_CMDLINE="root=mtd4 rootfstype=jffs2 ro console=ttyS0,115200"
278# CONFIG_XIP_KERNEL is not set
279# CONFIG_KEXEC is not set
280
281#
282# CPU Power Management
283#
284# CONFIG_CPU_FREQ is not set
285# CONFIG_CPU_IDLE is not set
286
287#
288# Floating point emulation
289#
290
291#
292# At least one emulation must be selected
293#
294CONFIG_FPE_NWFPE=y
295# CONFIG_FPE_NWFPE_XP is not set
296# CONFIG_FPE_FASTFPE is not set
297
298#
299# Userspace binary formats
300#
301CONFIG_BINFMT_ELF=y
302# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
303CONFIG_HAVE_AOUT=y
304# CONFIG_BINFMT_AOUT is not set
305# CONFIG_BINFMT_MISC is not set
306
307#
308# Power management options
309#
310# CONFIG_PM is not set
311CONFIG_ARCH_SUSPEND_POSSIBLE=y
312CONFIG_NET=y
313
314#
315# Networking options
316#
317CONFIG_PACKET=m
318CONFIG_PACKET_MMAP=y
319CONFIG_UNIX=y
320CONFIG_XFRM=y
321# CONFIG_XFRM_USER is not set
322# CONFIG_XFRM_SUB_POLICY is not set
323# CONFIG_XFRM_MIGRATE is not set
324# CONFIG_XFRM_STATISTICS is not set
325CONFIG_NET_KEY=y
326# CONFIG_NET_KEY_MIGRATE is not set
327CONFIG_INET=y
328CONFIG_IP_MULTICAST=y
329# CONFIG_IP_ADVANCED_ROUTER is not set
330CONFIG_IP_FIB_HASH=y
331CONFIG_IP_PNP=y
332CONFIG_IP_PNP_DHCP=y
333CONFIG_IP_PNP_BOOTP=y
334# CONFIG_IP_PNP_RARP is not set
335# CONFIG_NET_IPIP is not set
336# CONFIG_NET_IPGRE is not set
337# CONFIG_IP_MROUTE is not set
338# CONFIG_ARPD is not set
339# CONFIG_SYN_COOKIES is not set
340# CONFIG_INET_AH is not set
341# CONFIG_INET_ESP is not set
342# CONFIG_INET_IPCOMP is not set
343# CONFIG_INET_XFRM_TUNNEL is not set
344# CONFIG_INET_TUNNEL is not set
345# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
346# CONFIG_INET_XFRM_MODE_TUNNEL is not set
347# CONFIG_INET_XFRM_MODE_BEET is not set
348CONFIG_INET_LRO=y
349# CONFIG_INET_DIAG is not set
350# CONFIG_TCP_CONG_ADVANCED is not set
351CONFIG_TCP_CONG_CUBIC=y
352CONFIG_DEFAULT_TCP_CONG="cubic"
353# CONFIG_TCP_MD5SIG is not set
354# CONFIG_IPV6 is not set
355# CONFIG_NETWORK_SECMARK is not set
356# CONFIG_NETFILTER is not set
357# CONFIG_IP_DCCP is not set
358# CONFIG_IP_SCTP is not set
359# CONFIG_TIPC is not set
360# CONFIG_ATM is not set
361# CONFIG_BRIDGE is not set
362# CONFIG_NET_DSA is not set
363# CONFIG_VLAN_8021Q is not set
364# CONFIG_DECNET is not set
365# CONFIG_LLC2 is not set
366# CONFIG_IPX is not set
367# CONFIG_ATALK is not set
368# CONFIG_X25 is not set
369# CONFIG_LAPB is not set
370# CONFIG_ECONET is not set
371# CONFIG_WAN_ROUTER is not set
372# CONFIG_PHONET is not set
373# CONFIG_IEEE802154 is not set
374# CONFIG_NET_SCHED is not set
375# CONFIG_DCB is not set
376
377#
378# Network testing
379#
380# CONFIG_NET_PKTGEN is not set
381# CONFIG_NET_TCPPROBE is not set
382# CONFIG_NET_DROP_MONITOR is not set
383# CONFIG_HAMRADIO is not set
384# CONFIG_CAN is not set
385# CONFIG_IRDA is not set
386# CONFIG_BT is not set
387# CONFIG_AF_RXRPC is not set
388# CONFIG_WIRELESS is not set
389# CONFIG_WIMAX is not set
390# CONFIG_RFKILL is not set
391# CONFIG_NET_9P is not set
392
393#
394# Device Drivers
395#
396
397#
398# Generic Driver Options
399#
400CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
401CONFIG_STANDALONE=y
402# CONFIG_PREVENT_FIRMWARE_BUILD is not set
403# CONFIG_FW_LOADER is not set
404# CONFIG_DEBUG_DRIVER is not set
405# CONFIG_DEBUG_DEVRES is not set
406# CONFIG_SYS_HYPERVISOR is not set
407# CONFIG_CONNECTOR is not set
408CONFIG_MTD=y
409# CONFIG_MTD_DEBUG is not set
410CONFIG_MTD_CONCAT=y
411CONFIG_MTD_PARTITIONS=y
412# CONFIG_MTD_TESTS is not set
413# CONFIG_MTD_REDBOOT_PARTS is not set
414# CONFIG_MTD_CMDLINE_PARTS is not set
415# CONFIG_MTD_AFS_PARTS is not set
416# CONFIG_MTD_AR7_PARTS is not set
417
418#
419# User Modules And Translation Layers
420#
421CONFIG_MTD_CHAR=y
422# CONFIG_MTD_OOPS is not set
423
424#
425# RAM/ROM/Flash chip drivers
426#
427CONFIG_MTD_CFI=y
428# CONFIG_MTD_JEDECPROBE is not set
429CONFIG_MTD_GEN_PROBE=y
430# CONFIG_MTD_CFI_ADV_OPTIONS is not set
431CONFIG_MTD_MAP_BANK_WIDTH_1=y
432CONFIG_MTD_MAP_BANK_WIDTH_2=y
433CONFIG_MTD_MAP_BANK_WIDTH_4=y
434# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
435# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
436# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
437CONFIG_MTD_CFI_I1=y
438CONFIG_MTD_CFI_I2=y
439# CONFIG_MTD_CFI_I4 is not set
440# CONFIG_MTD_CFI_I8 is not set
441CONFIG_MTD_CFI_INTELEXT=y
442# CONFIG_MTD_CFI_AMDSTD is not set
443# CONFIG_MTD_CFI_STAA is not set
444CONFIG_MTD_CFI_UTIL=y
445# CONFIG_MTD_RAM is not set
446# CONFIG_MTD_ROM is not set
447# CONFIG_MTD_ABSENT is not set
448# CONFIG_MTD_XIP is not set
449
450#
451# Mapping drivers for chip access
452#
453CONFIG_MTD_COMPLEX_MAPPINGS=y
454CONFIG_MTD_PHYSMAP=y
455# CONFIG_MTD_PHYSMAP_COMPAT is not set
456CONFIG_MTD_PXA2XX=y
457# CONFIG_MTD_ARM_INTEGRATOR is not set
458# CONFIG_MTD_PLATRAM is not set
459
460#
461# Self-contained MTD device drivers
462#
463# CONFIG_MTD_SLRAM is not set
464# CONFIG_MTD_PHRAM is not set
465# CONFIG_MTD_MTDRAM is not set
466
467#
468# Disk-On-Chip Device Drivers
469#
470# CONFIG_MTD_DOC2000 is not set
471# CONFIG_MTD_DOC2001 is not set
472# CONFIG_MTD_DOC2001PLUS is not set
473# CONFIG_MTD_NAND is not set
474# CONFIG_MTD_ONENAND is not set
475
476#
477# LPDDR flash memory drivers
478#
479# CONFIG_MTD_LPDDR is not set
480
481#
482# UBI - Unsorted block images
483#
484# CONFIG_MTD_UBI is not set
485# CONFIG_PARPORT is not set
486# CONFIG_MISC_DEVICES is not set
487CONFIG_HAVE_IDE=y
488
489#
490# SCSI device support
491#
492# CONFIG_SCSI_DMA is not set
493# CONFIG_SCSI_NETLINK is not set
494CONFIG_NETDEVICES=y
495# CONFIG_DUMMY is not set
496# CONFIG_BONDING is not set
497# CONFIG_MACVLAN is not set
498# CONFIG_EQUALIZER is not set
499# CONFIG_TUN is not set
500# CONFIG_VETH is not set
501# CONFIG_PHYLIB is not set
502CONFIG_NET_ETHERNET=y
503CONFIG_MII=y
504# CONFIG_AX88796 is not set
505CONFIG_SMC91X=y
506# CONFIG_DM9000 is not set
507# CONFIG_ETHOC is not set
508# CONFIG_SMC911X is not set
509# CONFIG_SMSC911X is not set
510# CONFIG_DNET is not set
511# CONFIG_IBM_NEW_EMAC_ZMII is not set
512# CONFIG_IBM_NEW_EMAC_RGMII is not set
513# CONFIG_IBM_NEW_EMAC_TAH is not set
514# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
515# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
516# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
517# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
518# CONFIG_B44 is not set
519# CONFIG_KS8842 is not set
520# CONFIG_NETDEV_1000 is not set
521# CONFIG_NETDEV_10000 is not set
522
523#
524# Wireless LAN
525#
526# CONFIG_WLAN_PRE80211 is not set
527# CONFIG_WLAN_80211 is not set
528
529#
530# Enable WiMAX (Networking options) to see the WiMAX drivers
531#
532# CONFIG_WAN is not set
533# CONFIG_PPP is not set
534# CONFIG_SLIP is not set
535# CONFIG_NETCONSOLE is not set
536# CONFIG_NETPOLL is not set
537# CONFIG_NET_POLL_CONTROLLER is not set
538# CONFIG_ISDN is not set
539
540#
541# Input device support
542#
543CONFIG_INPUT=y
544# CONFIG_INPUT_FF_MEMLESS is not set
545# CONFIG_INPUT_POLLDEV is not set
546
547#
548# Userland interfaces
549#
550# CONFIG_INPUT_MOUSEDEV is not set
551# CONFIG_INPUT_JOYDEV is not set
552# CONFIG_INPUT_EVDEV is not set
553# CONFIG_INPUT_EVBUG is not set
554
555#
556# Input Device Drivers
557#
558# CONFIG_INPUT_KEYBOARD is not set
559# CONFIG_INPUT_MOUSE is not set
560# CONFIG_INPUT_JOYSTICK is not set
561# CONFIG_INPUT_TABLET is not set
562# CONFIG_INPUT_TOUCHSCREEN is not set
563# CONFIG_INPUT_MISC is not set
564
565#
566# Hardware I/O ports
567#
568# CONFIG_SERIO is not set
569# CONFIG_GAMEPORT is not set
570
571#
572# Character devices
573#
574CONFIG_VT=y
575CONFIG_CONSOLE_TRANSLATIONS=y
576CONFIG_VT_CONSOLE=y
577CONFIG_HW_CONSOLE=y
578# CONFIG_VT_HW_CONSOLE_BINDING is not set
579# CONFIG_DEVKMEM is not set
580# CONFIG_SERIAL_NONSTANDARD is not set
581
582#
583# Serial drivers
584#
585# CONFIG_SERIAL_8250 is not set
586
587#
588# Non-8250 serial port support
589#
590CONFIG_SERIAL_PXA=y
591CONFIG_SERIAL_PXA_CONSOLE=y
592CONFIG_SERIAL_CORE=y
593CONFIG_SERIAL_CORE_CONSOLE=y
594CONFIG_UNIX98_PTYS=y
595# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
596# CONFIG_LEGACY_PTYS is not set
597# CONFIG_IPMI_HANDLER is not set
598# CONFIG_HW_RANDOM is not set
599# CONFIG_R3964 is not set
600# CONFIG_TCG_TPM is not set
601CONFIG_I2C=m
602CONFIG_I2C_BOARDINFO=y
603CONFIG_I2C_CHARDEV=m
604CONFIG_I2C_HELPER_AUTO=y
605
606#
607# I2C Hardware Bus support
608#
609
610#
611# I2C system bus drivers (mostly embedded / system-on-chip)
612#
613# CONFIG_I2C_DESIGNWARE is not set
614# CONFIG_I2C_GPIO is not set
615# CONFIG_I2C_OCORES is not set
616CONFIG_I2C_PXA=m
617# CONFIG_I2C_PXA_SLAVE is not set
618# CONFIG_I2C_SIMTEC is not set
619
620#
621# External I2C/SMBus adapter drivers
622#
623# CONFIG_I2C_PARPORT_LIGHT is not set
624# CONFIG_I2C_TAOS_EVM is not set
625
626#
627# Other I2C/SMBus bus drivers
628#
629# CONFIG_I2C_PCA_PLATFORM is not set
630# CONFIG_I2C_STUB is not set
631
632#
633# Miscellaneous I2C Chip support
634#
635# CONFIG_DS1682 is not set
636# CONFIG_SENSORS_PCF8574 is not set
637# CONFIG_PCF8575 is not set
638# CONFIG_SENSORS_PCA9539 is not set
639# CONFIG_SENSORS_TSL2550 is not set
640# CONFIG_I2C_DEBUG_CORE is not set
641# CONFIG_I2C_DEBUG_ALGO is not set
642# CONFIG_I2C_DEBUG_BUS is not set
643# CONFIG_I2C_DEBUG_CHIP is not set
644# CONFIG_SPI is not set
645CONFIG_ARCH_REQUIRE_GPIOLIB=y
646CONFIG_GPIOLIB=y
647# CONFIG_DEBUG_GPIO is not set
648# CONFIG_GPIO_SYSFS is not set
649
650#
651# Memory mapped GPIO expanders:
652#
653
654#
655# I2C GPIO expanders:
656#
657# CONFIG_GPIO_MAX732X is not set
658# CONFIG_GPIO_PCA953X is not set
659# CONFIG_GPIO_PCF857X is not set
660
661#
662# PCI GPIO expanders:
663#
664
665#
666# SPI GPIO expanders:
667#
668# CONFIG_W1 is not set
669# CONFIG_POWER_SUPPLY is not set
670CONFIG_HWMON=m
671# CONFIG_HWMON_VID is not set
672# CONFIG_SENSORS_AD7414 is not set
673# CONFIG_SENSORS_AD7418 is not set
674CONFIG_SENSORS_ADM1021=m
675# CONFIG_SENSORS_ADM1025 is not set
676# CONFIG_SENSORS_ADM1026 is not set
677# CONFIG_SENSORS_ADM1029 is not set
678# CONFIG_SENSORS_ADM1031 is not set
679# CONFIG_SENSORS_ADM9240 is not set
680# CONFIG_SENSORS_ADT7462 is not set
681# CONFIG_SENSORS_ADT7470 is not set
682# CONFIG_SENSORS_ADT7473 is not set
683# CONFIG_SENSORS_ADT7475 is not set
684# CONFIG_SENSORS_ATXP1 is not set
685# CONFIG_SENSORS_DS1621 is not set
686# CONFIG_SENSORS_F71805F is not set
687# CONFIG_SENSORS_F71882FG is not set
688# CONFIG_SENSORS_F75375S is not set
689# CONFIG_SENSORS_G760A is not set
690# CONFIG_SENSORS_GL518SM is not set
691# CONFIG_SENSORS_GL520SM is not set
692# CONFIG_SENSORS_IT87 is not set
693# CONFIG_SENSORS_LM63 is not set
694# CONFIG_SENSORS_LM75 is not set
695# CONFIG_SENSORS_LM77 is not set
696# CONFIG_SENSORS_LM78 is not set
697# CONFIG_SENSORS_LM80 is not set
698# CONFIG_SENSORS_LM83 is not set
699# CONFIG_SENSORS_LM85 is not set
700# CONFIG_SENSORS_LM87 is not set
701# CONFIG_SENSORS_LM90 is not set
702# CONFIG_SENSORS_LM92 is not set
703# CONFIG_SENSORS_LM93 is not set
704# CONFIG_SENSORS_LTC4215 is not set
705# CONFIG_SENSORS_LTC4245 is not set
706# CONFIG_SENSORS_LM95241 is not set
707# CONFIG_SENSORS_MAX1619 is not set
708CONFIG_SENSORS_MAX6650=m
709# CONFIG_SENSORS_PC87360 is not set
710# CONFIG_SENSORS_PC87427 is not set
711# CONFIG_SENSORS_PCF8591 is not set
712# CONFIG_SENSORS_SHT15 is not set
713# CONFIG_SENSORS_DME1737 is not set
714# CONFIG_SENSORS_SMSC47M1 is not set
715# CONFIG_SENSORS_SMSC47M192 is not set
716# CONFIG_SENSORS_SMSC47B397 is not set
717# CONFIG_SENSORS_ADS7828 is not set
718# CONFIG_SENSORS_THMC50 is not set
719# CONFIG_SENSORS_TMP401 is not set
720# CONFIG_SENSORS_VT1211 is not set
721# CONFIG_SENSORS_W83781D is not set
722# CONFIG_SENSORS_W83791D is not set
723# CONFIG_SENSORS_W83792D is not set
724# CONFIG_SENSORS_W83793 is not set
725# CONFIG_SENSORS_W83L785TS is not set
726# CONFIG_SENSORS_W83L786NG is not set
727# CONFIG_SENSORS_W83627HF is not set
728# CONFIG_SENSORS_W83627EHF is not set
729# CONFIG_HWMON_DEBUG_CHIP is not set
730# CONFIG_THERMAL is not set
731# CONFIG_WATCHDOG is not set
732CONFIG_SSB_POSSIBLE=y
733
734#
735# Sonics Silicon Backplane
736#
737# CONFIG_SSB is not set
738
739#
740# Multifunction device drivers
741#
742# CONFIG_MFD_CORE is not set
743# CONFIG_MFD_SM501 is not set
744# CONFIG_MFD_ASIC3 is not set
745# CONFIG_HTC_EGPIO is not set
746# CONFIG_HTC_PASIC3 is not set
747# CONFIG_TPS65010 is not set
748# CONFIG_MFD_TMIO is not set
749# CONFIG_MFD_T7L66XB is not set
750# CONFIG_MFD_TC6387XB is not set
751# CONFIG_MFD_TC6393XB is not set
752# CONFIG_MFD_WM8400 is not set
753# CONFIG_MFD_WM8350_I2C is not set
754# CONFIG_MFD_PCF50633 is not set
755# CONFIG_AB3100_CORE is not set
756# CONFIG_MEDIA_SUPPORT is not set
757
758#
759# Graphics support
760#
761# CONFIG_VGASTATE is not set
762# CONFIG_VIDEO_OUTPUT_CONTROL is not set
763# CONFIG_FB is not set
764# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
765
766#
767# Display device support
768#
769# CONFIG_DISPLAY_SUPPORT is not set
770
771#
772# Console display driver support
773#
774# CONFIG_VGA_CONSOLE is not set
775CONFIG_DUMMY_CONSOLE=y
776# CONFIG_SOUND is not set
777# CONFIG_HID_SUPPORT is not set
778# CONFIG_USB_SUPPORT is not set
779# CONFIG_MMC is not set
780# CONFIG_MEMSTICK is not set
781# CONFIG_ACCESSIBILITY is not set
782# CONFIG_NEW_LEDS is not set
783CONFIG_RTC_LIB=y
784CONFIG_RTC_CLASS=m
785
786#
787# RTC interfaces
788#
789CONFIG_RTC_INTF_SYSFS=y
790CONFIG_RTC_INTF_PROC=y
791CONFIG_RTC_INTF_DEV=y
792# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
793# CONFIG_RTC_DRV_TEST is not set
794
795#
796# I2C RTC drivers
797#
798# CONFIG_RTC_DRV_DS1307 is not set
799# CONFIG_RTC_DRV_DS1374 is not set
800# CONFIG_RTC_DRV_DS1672 is not set
801# CONFIG_RTC_DRV_MAX6900 is not set
802# CONFIG_RTC_DRV_RS5C372 is not set
803# CONFIG_RTC_DRV_ISL1208 is not set
804# CONFIG_RTC_DRV_X1205 is not set
805# CONFIG_RTC_DRV_PCF8563 is not set
806# CONFIG_RTC_DRV_PCF8583 is not set
807# CONFIG_RTC_DRV_M41T80 is not set
808# CONFIG_RTC_DRV_S35390A is not set
809# CONFIG_RTC_DRV_FM3130 is not set
810# CONFIG_RTC_DRV_RX8581 is not set
811# CONFIG_RTC_DRV_RX8025 is not set
812
813#
814# SPI RTC drivers
815#
816
817#
818# Platform RTC drivers
819#
820# CONFIG_RTC_DRV_CMOS is not set
821# CONFIG_RTC_DRV_DS1286 is not set
822# CONFIG_RTC_DRV_DS1511 is not set
823# CONFIG_RTC_DRV_DS1553 is not set
824# CONFIG_RTC_DRV_DS1742 is not set
825# CONFIG_RTC_DRV_STK17TA8 is not set
826# CONFIG_RTC_DRV_M48T86 is not set
827# CONFIG_RTC_DRV_M48T35 is not set
828# CONFIG_RTC_DRV_M48T59 is not set
829# CONFIG_RTC_DRV_BQ4802 is not set
830# CONFIG_RTC_DRV_V3020 is not set
831
832#
833# on-CPU RTC drivers
834#
835CONFIG_RTC_DRV_SA1100=m
836# CONFIG_RTC_DRV_PXA is not set
837CONFIG_DMADEVICES=y
838
839#
840# DMA Devices
841#
842# CONFIG_AUXDISPLAY is not set
843# CONFIG_REGULATOR is not set
844# CONFIG_UIO is not set
845# CONFIG_STAGING is not set
846
847#
848# File systems
849#
850CONFIG_FILE_LOCKING=y
851# CONFIG_FSNOTIFY is not set
852# CONFIG_DNOTIFY is not set
853# CONFIG_INOTIFY is not set
854# CONFIG_INOTIFY_USER is not set
855# CONFIG_QUOTA is not set
856# CONFIG_AUTOFS_FS is not set
857# CONFIG_AUTOFS4_FS is not set
858# CONFIG_FUSE_FS is not set
859
860#
861# Caches
862#
863# CONFIG_FSCACHE is not set
864
865#
866# Pseudo filesystems
867#
868CONFIG_PROC_FS=y
869CONFIG_PROC_SYSCTL=y
870CONFIG_PROC_PAGE_MONITOR=y
871CONFIG_SYSFS=y
872CONFIG_TMPFS=y
873# CONFIG_TMPFS_POSIX_ACL is not set
874# CONFIG_HUGETLB_PAGE is not set
875# CONFIG_CONFIGFS_FS is not set
876CONFIG_MISC_FILESYSTEMS=y
877CONFIG_JFFS2_FS=y
878CONFIG_JFFS2_FS_DEBUG=0
879CONFIG_JFFS2_FS_WRITEBUFFER=y
880CONFIG_JFFS2_FS_WBUF_VERIFY=y
881# CONFIG_JFFS2_SUMMARY is not set
882# CONFIG_JFFS2_FS_XATTR is not set
883# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
884CONFIG_JFFS2_ZLIB=y
885# CONFIG_JFFS2_LZO is not set
886CONFIG_JFFS2_RTIME=y
887# CONFIG_JFFS2_RUBIN is not set
888# CONFIG_ROMFS_FS is not set
889CONFIG_NETWORK_FILESYSTEMS=y
890CONFIG_NFS_FS=m
891CONFIG_NFS_V3=y
892# CONFIG_NFS_V3_ACL is not set
893# CONFIG_NFS_V4 is not set
894# CONFIG_NFSD is not set
895CONFIG_LOCKD=m
896CONFIG_LOCKD_V4=y
897CONFIG_NFS_COMMON=y
898CONFIG_SUNRPC=m
899# CONFIG_RPCSEC_GSS_KRB5 is not set
900# CONFIG_RPCSEC_GSS_SPKM3 is not set
901# CONFIG_SMB_FS is not set
902# CONFIG_CIFS is not set
903# CONFIG_NCP_FS is not set
904# CONFIG_CODA_FS is not set
905# CONFIG_AFS_FS is not set
906CONFIG_NLS=m
907CONFIG_NLS_DEFAULT="utf8"
908# CONFIG_NLS_CODEPAGE_437 is not set
909# CONFIG_NLS_CODEPAGE_737 is not set
910# CONFIG_NLS_CODEPAGE_775 is not set
911# CONFIG_NLS_CODEPAGE_850 is not set
912# CONFIG_NLS_CODEPAGE_852 is not set
913# CONFIG_NLS_CODEPAGE_855 is not set
914# CONFIG_NLS_CODEPAGE_857 is not set
915# CONFIG_NLS_CODEPAGE_860 is not set
916# CONFIG_NLS_CODEPAGE_861 is not set
917# CONFIG_NLS_CODEPAGE_862 is not set
918# CONFIG_NLS_CODEPAGE_863 is not set
919# CONFIG_NLS_CODEPAGE_864 is not set
920# CONFIG_NLS_CODEPAGE_865 is not set
921# CONFIG_NLS_CODEPAGE_866 is not set
922# CONFIG_NLS_CODEPAGE_869 is not set
923# CONFIG_NLS_CODEPAGE_936 is not set
924# CONFIG_NLS_CODEPAGE_950 is not set
925# CONFIG_NLS_CODEPAGE_932 is not set
926# CONFIG_NLS_CODEPAGE_949 is not set
927# CONFIG_NLS_CODEPAGE_874 is not set
928# CONFIG_NLS_ISO8859_8 is not set
929# CONFIG_NLS_CODEPAGE_1250 is not set
930# CONFIG_NLS_CODEPAGE_1251 is not set
931# CONFIG_NLS_ASCII is not set
932# CONFIG_NLS_ISO8859_1 is not set
933# CONFIG_NLS_ISO8859_2 is not set
934# CONFIG_NLS_ISO8859_3 is not set
935# CONFIG_NLS_ISO8859_4 is not set
936# CONFIG_NLS_ISO8859_5 is not set
937# CONFIG_NLS_ISO8859_6 is not set
938# CONFIG_NLS_ISO8859_7 is not set
939# CONFIG_NLS_ISO8859_9 is not set
940# CONFIG_NLS_ISO8859_13 is not set
941# CONFIG_NLS_ISO8859_14 is not set
942# CONFIG_NLS_ISO8859_15 is not set
943# CONFIG_NLS_KOI8_R is not set
944# CONFIG_NLS_KOI8_U is not set
945CONFIG_NLS_UTF8=m
946# CONFIG_DLM is not set
947
948#
949# Kernel hacking
950#
951CONFIG_PRINTK_TIME=y
952CONFIG_ENABLE_WARN_DEPRECATED=y
953CONFIG_ENABLE_MUST_CHECK=y
954CONFIG_FRAME_WARN=1024
955# CONFIG_MAGIC_SYSRQ is not set
956# CONFIG_UNUSED_SYMBOLS is not set
957# CONFIG_DEBUG_FS is not set
958# CONFIG_HEADERS_CHECK is not set
959CONFIG_DEBUG_KERNEL=y
960# CONFIG_DEBUG_SHIRQ is not set
961CONFIG_DETECT_SOFTLOCKUP=y
962# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
963CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
964CONFIG_DETECT_HUNG_TASK=y
965# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
966CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
967# CONFIG_SCHED_DEBUG is not set
968# CONFIG_SCHEDSTATS is not set
969# CONFIG_TIMER_STATS is not set
970# CONFIG_DEBUG_OBJECTS is not set
971# CONFIG_DEBUG_KMEMLEAK is not set
972# CONFIG_DEBUG_RT_MUTEXES is not set
973# CONFIG_RT_MUTEX_TESTER is not set
974# CONFIG_DEBUG_SPINLOCK is not set
975# CONFIG_DEBUG_MUTEXES is not set
976# CONFIG_DEBUG_LOCK_ALLOC is not set
977# CONFIG_PROVE_LOCKING is not set
978# CONFIG_LOCK_STAT is not set
979# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
980# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
981# CONFIG_DEBUG_KOBJECT is not set
982# CONFIG_DEBUG_BUGVERBOSE is not set
983# CONFIG_DEBUG_INFO is not set
984# CONFIG_DEBUG_VM is not set
985# CONFIG_DEBUG_WRITECOUNT is not set
986# CONFIG_DEBUG_MEMORY_INIT is not set
987# CONFIG_DEBUG_LIST is not set
988# CONFIG_DEBUG_SG is not set
989# CONFIG_DEBUG_NOTIFIERS is not set
990CONFIG_FRAME_POINTER=y
991# CONFIG_BOOT_PRINTK_DELAY is not set
992# CONFIG_RCU_TORTURE_TEST is not set
993# CONFIG_RCU_CPU_STALL_DETECTOR is not set
994# CONFIG_KPROBES_SANITY_TEST is not set
995# CONFIG_BACKTRACE_SELF_TEST is not set
996# CONFIG_FAULT_INJECTION is not set
997# CONFIG_LATENCYTOP is not set
998CONFIG_SYSCTL_SYSCALL_CHECK=y
999# CONFIG_PAGE_POISONING is not set
1000CONFIG_HAVE_FUNCTION_TRACER=y
1001CONFIG_TRACING_SUPPORT=y
1002# CONFIG_FTRACE is not set
1003# CONFIG_SAMPLES is not set
1004CONFIG_HAVE_ARCH_KGDB=y
1005# CONFIG_KGDB is not set
1006# CONFIG_ARM_UNWIND is not set
1007# CONFIG_DEBUG_USER is not set
1008# CONFIG_DEBUG_ERRORS is not set
1009# CONFIG_DEBUG_STACK_USAGE is not set
1010# CONFIG_DEBUG_LL is not set
1011
1012#
1013# Security options
1014#
1015# CONFIG_KEYS is not set
1016# CONFIG_SECURITY is not set
1017# CONFIG_SECURITYFS is not set
1018# CONFIG_SECURITY_FILE_CAPABILITIES is not set
1019CONFIG_CRYPTO=y
1020
1021#
1022# Crypto core or helper
1023#
1024# CONFIG_CRYPTO_FIPS is not set
1025CONFIG_CRYPTO_ALGAPI=m
1026CONFIG_CRYPTO_ALGAPI2=m
1027CONFIG_CRYPTO_HASH=m
1028CONFIG_CRYPTO_HASH2=m
1029# CONFIG_CRYPTO_MANAGER is not set
1030# CONFIG_CRYPTO_MANAGER2 is not set
1031# CONFIG_CRYPTO_GF128MUL is not set
1032# CONFIG_CRYPTO_NULL is not set
1033# CONFIG_CRYPTO_CRYPTD is not set
1034# CONFIG_CRYPTO_AUTHENC is not set
1035# CONFIG_CRYPTO_TEST is not set
1036
1037#
1038# Authenticated Encryption with Associated Data
1039#
1040# CONFIG_CRYPTO_CCM is not set
1041# CONFIG_CRYPTO_GCM is not set
1042# CONFIG_CRYPTO_SEQIV is not set
1043
1044#
1045# Block modes
1046#
1047# CONFIG_CRYPTO_CBC is not set
1048# CONFIG_CRYPTO_CTR is not set
1049# CONFIG_CRYPTO_CTS is not set
1050# CONFIG_CRYPTO_ECB is not set
1051# CONFIG_CRYPTO_LRW is not set
1052# CONFIG_CRYPTO_PCBC is not set
1053# CONFIG_CRYPTO_XTS is not set
1054
1055#
1056# Hash modes
1057#
1058# CONFIG_CRYPTO_HMAC is not set
1059# CONFIG_CRYPTO_XCBC is not set
1060
1061#
1062# Digest
1063#
1064CONFIG_CRYPTO_CRC32C=m
1065# CONFIG_CRYPTO_MD4 is not set
1066# CONFIG_CRYPTO_MD5 is not set
1067# CONFIG_CRYPTO_MICHAEL_MIC is not set
1068# CONFIG_CRYPTO_RMD128 is not set
1069# CONFIG_CRYPTO_RMD160 is not set
1070# CONFIG_CRYPTO_RMD256 is not set
1071# CONFIG_CRYPTO_RMD320 is not set
1072# CONFIG_CRYPTO_SHA1 is not set
1073# CONFIG_CRYPTO_SHA256 is not set
1074# CONFIG_CRYPTO_SHA512 is not set
1075# CONFIG_CRYPTO_TGR192 is not set
1076# CONFIG_CRYPTO_WP512 is not set
1077
1078#
1079# Ciphers
1080#
1081# CONFIG_CRYPTO_AES is not set
1082# CONFIG_CRYPTO_ANUBIS is not set
1083# CONFIG_CRYPTO_ARC4 is not set
1084# CONFIG_CRYPTO_BLOWFISH is not set
1085# CONFIG_CRYPTO_CAMELLIA is not set
1086# CONFIG_CRYPTO_CAST5 is not set
1087# CONFIG_CRYPTO_CAST6 is not set
1088# CONFIG_CRYPTO_DES is not set
1089# CONFIG_CRYPTO_FCRYPT is not set
1090# CONFIG_CRYPTO_KHAZAD is not set
1091# CONFIG_CRYPTO_SALSA20 is not set
1092# CONFIG_CRYPTO_SEED is not set
1093# CONFIG_CRYPTO_SERPENT is not set
1094# CONFIG_CRYPTO_TEA is not set
1095# CONFIG_CRYPTO_TWOFISH is not set
1096
1097#
1098# Compression
1099#
1100# CONFIG_CRYPTO_DEFLATE is not set
1101# CONFIG_CRYPTO_ZLIB is not set
1102# CONFIG_CRYPTO_LZO is not set
1103
1104#
1105# Random Number Generation
1106#
1107# CONFIG_CRYPTO_ANSI_CPRNG is not set
1108# CONFIG_CRYPTO_HW is not set
1109# CONFIG_BINARY_PRINTF is not set
1110
1111#
1112# Library routines
1113#
1114CONFIG_BITREVERSE=y
1115CONFIG_GENERIC_FIND_LAST_BIT=y
1116# CONFIG_CRC_CCITT is not set
1117# CONFIG_CRC16 is not set
1118# CONFIG_CRC_T10DIF is not set
1119# CONFIG_CRC_ITU_T is not set
1120CONFIG_CRC32=y
1121# CONFIG_CRC7 is not set
1122CONFIG_LIBCRC32C=m
1123CONFIG_ZLIB_INFLATE=y
1124CONFIG_ZLIB_DEFLATE=y
1125CONFIG_DECOMPRESS_GZIP=y
1126CONFIG_HAS_IOMEM=y
1127CONFIG_HAS_IOPORT=y
1128CONFIG_HAS_DMA=y
1129CONFIG_NLATTR=y
diff --git a/arch/arm/configs/zylonite_defconfig b/arch/arm/configs/zylonite_defconfig
deleted file mode 100644
index 7949d04a3602..000000000000
--- a/arch/arm/configs/zylonite_defconfig
+++ /dev/null
@@ -1,736 +0,0 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.23
4# Tue Oct 23 13:33:20 2007
5#
6CONFIG_ARM=y
7CONFIG_SYS_SUPPORTS_APM_EMULATION=y
8CONFIG_GENERIC_GPIO=y
9CONFIG_GENERIC_TIME=y
10CONFIG_GENERIC_CLOCKEVENTS=y
11CONFIG_MMU=y
12# CONFIG_NO_IOPORT is not set
13CONFIG_GENERIC_HARDIRQS=y
14CONFIG_STACKTRACE_SUPPORT=y
15CONFIG_LOCKDEP_SUPPORT=y
16CONFIG_TRACE_IRQFLAGS_SUPPORT=y
17CONFIG_HARDIRQS_SW_RESEND=y
18CONFIG_GENERIC_IRQ_PROBE=y
19CONFIG_RWSEM_GENERIC_SPINLOCK=y
20# CONFIG_ARCH_HAS_ILOG2_U32 is not set
21# CONFIG_ARCH_HAS_ILOG2_U64 is not set
22CONFIG_GENERIC_HWEIGHT=y
23CONFIG_GENERIC_CALIBRATE_DELAY=y
24CONFIG_ZONE_DMA=y
25CONFIG_ARCH_MTD_XIP=y
26CONFIG_VECTORS_BASE=0xffff0000
27CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
28
29#
30# General setup
31#
32CONFIG_EXPERIMENTAL=y
33CONFIG_BROKEN_ON_SMP=y
34CONFIG_INIT_ENV_ARG_LIMIT=32
35CONFIG_LOCALVERSION=""
36CONFIG_LOCALVERSION_AUTO=y
37CONFIG_SWAP=y
38CONFIG_SYSVIPC=y
39CONFIG_SYSVIPC_SYSCTL=y
40# CONFIG_POSIX_MQUEUE is not set
41# CONFIG_BSD_PROCESS_ACCT is not set
42# CONFIG_TASKSTATS is not set
43# CONFIG_USER_NS is not set
44# CONFIG_AUDIT is not set
45# CONFIG_IKCONFIG is not set
46CONFIG_LOG_BUF_SHIFT=18
47# CONFIG_CGROUPS is not set
48CONFIG_FAIR_GROUP_SCHED=y
49CONFIG_FAIR_USER_SCHED=y
50# CONFIG_FAIR_CGROUP_SCHED is not set
51CONFIG_SYSFS_DEPRECATED=y
52# CONFIG_RELAY is not set
53# CONFIG_BLK_DEV_INITRD is not set
54# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
55CONFIG_SYSCTL=y
56# CONFIG_EMBEDDED is not set
57CONFIG_UID16=y
58CONFIG_SYSCTL_SYSCALL=y
59CONFIG_KALLSYMS=y
60# CONFIG_KALLSYMS_EXTRA_PASS is not set
61CONFIG_HOTPLUG=y
62CONFIG_PRINTK=y
63CONFIG_BUG=y
64CONFIG_ELF_CORE=y
65CONFIG_BASE_FULL=y
66CONFIG_FUTEX=y
67CONFIG_ANON_INODES=y
68CONFIG_EPOLL=y
69CONFIG_SIGNALFD=y
70CONFIG_EVENTFD=y
71CONFIG_SHMEM=y
72CONFIG_VM_EVENT_COUNTERS=y
73CONFIG_SLUB_DEBUG=y
74# CONFIG_SLAB is not set
75CONFIG_SLUB=y
76# CONFIG_SLOB is not set
77CONFIG_RT_MUTEXES=y
78# CONFIG_TINY_SHMEM is not set
79CONFIG_BASE_SMALL=0
80CONFIG_MODULES=y
81# CONFIG_MODULE_UNLOAD is not set
82# CONFIG_MODVERSIONS is not set
83# CONFIG_MODULE_SRCVERSION_ALL is not set
84# CONFIG_KMOD is not set
85CONFIG_BLOCK=y
86# CONFIG_LBD is not set
87# CONFIG_BLK_DEV_IO_TRACE is not set
88# CONFIG_LSF is not set
89# CONFIG_BLK_DEV_BSG is not set
90
91#
92# IO Schedulers
93#
94CONFIG_IOSCHED_NOOP=y
95CONFIG_IOSCHED_AS=y
96CONFIG_IOSCHED_DEADLINE=y
97CONFIG_IOSCHED_CFQ=y
98# CONFIG_DEFAULT_AS is not set
99# CONFIG_DEFAULT_DEADLINE is not set
100CONFIG_DEFAULT_CFQ=y
101# CONFIG_DEFAULT_NOOP is not set
102CONFIG_DEFAULT_IOSCHED="cfq"
103
104#
105# System Type
106#
107# CONFIG_ARCH_AAEC2000 is not set
108# CONFIG_ARCH_INTEGRATOR is not set
109# CONFIG_ARCH_REALVIEW is not set
110# CONFIG_ARCH_VERSATILE is not set
111# CONFIG_ARCH_AT91 is not set
112# CONFIG_ARCH_CLPS7500 is not set
113# CONFIG_ARCH_CLPS711X is not set
114# CONFIG_ARCH_CO285 is not set
115# CONFIG_ARCH_EBSA110 is not set
116# CONFIG_ARCH_EP93XX is not set
117# CONFIG_ARCH_FOOTBRIDGE is not set
118# CONFIG_ARCH_NETX is not set
119# CONFIG_ARCH_H720X is not set
120# CONFIG_ARCH_IMX is not set
121# CONFIG_ARCH_IOP13XX is not set
122# CONFIG_ARCH_IOP32X is not set
123# CONFIG_ARCH_IOP33X is not set
124# CONFIG_ARCH_IXP23XX is not set
125# CONFIG_ARCH_IXP2000 is not set
126# CONFIG_ARCH_IXP4XX is not set
127# CONFIG_ARCH_L7200 is not set
128# CONFIG_ARCH_KS8695 is not set
129# CONFIG_ARCH_NS9XXX is not set
130# CONFIG_ARCH_MXC is not set
131# CONFIG_ARCH_PNX4008 is not set
132CONFIG_ARCH_PXA=y
133# CONFIG_ARCH_RPC is not set
134# CONFIG_ARCH_SA1100 is not set
135# CONFIG_ARCH_S3C2410 is not set
136# CONFIG_ARCH_SHARK is not set
137# CONFIG_ARCH_LH7A40X is not set
138# CONFIG_ARCH_DAVINCI is not set
139# CONFIG_ARCH_OMAP is not set
140
141#
142# Intel PXA2xx/PXA3xx Implementations
143#
144
145#
146# Supported PXA3xx Processor Variants
147#
148CONFIG_CPU_PXA300=y
149CONFIG_CPU_PXA310=y
150CONFIG_CPU_PXA320=y
151# CONFIG_ARCH_LUBBOCK is not set
152# CONFIG_MACH_LOGICPD_PXA270 is not set
153# CONFIG_MACH_MAINSTONE is not set
154# CONFIG_ARCH_PXA_IDP is not set
155# CONFIG_PXA_SHARPSL is not set
156# CONFIG_MACH_TRIZEPS4 is not set
157# CONFIG_MACH_EM_X270 is not set
158CONFIG_MACH_ZYLONITE=y
159# CONFIG_MACH_ARMCORE is not set
160CONFIG_PXA3xx=y
161
162#
163# Boot options
164#
165
166#
167# Power management
168#
169
170#
171# Processor Type
172#
173CONFIG_CPU_32=y
174CONFIG_CPU_XSC3=y
175CONFIG_CPU_32v5=y
176CONFIG_CPU_ABRT_EV5T=y
177CONFIG_CPU_CACHE_VIVT=y
178CONFIG_CPU_TLB_V4WBI=y
179CONFIG_CPU_CP15=y
180CONFIG_CPU_CP15_MMU=y
181CONFIG_IO_36=y
182
183#
184# Processor Features
185#
186# CONFIG_ARM_THUMB is not set
187# CONFIG_CPU_DCACHE_DISABLE is not set
188# CONFIG_CPU_BPREDICT_DISABLE is not set
189# CONFIG_OUTER_CACHE is not set
190CONFIG_IWMMXT=y
191
192#
193# Bus support
194#
195# CONFIG_PCI_SYSCALL is not set
196# CONFIG_ARCH_SUPPORTS_MSI is not set
197# CONFIG_PCCARD is not set
198
199#
200# Kernel Features
201#
202# CONFIG_TICK_ONESHOT is not set
203# CONFIG_NO_HZ is not set
204# CONFIG_HIGH_RES_TIMERS is not set
205CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
206# CONFIG_PREEMPT is not set
207CONFIG_HZ=100
208CONFIG_AEABI=y
209CONFIG_OABI_COMPAT=y
210# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
211CONFIG_SELECT_MEMORY_MODEL=y
212CONFIG_FLATMEM_MANUAL=y
213# CONFIG_DISCONTIGMEM_MANUAL is not set
214# CONFIG_SPARSEMEM_MANUAL is not set
215CONFIG_FLATMEM=y
216CONFIG_FLAT_NODE_MEM_MAP=y
217# CONFIG_SPARSEMEM_STATIC is not set
218# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
219CONFIG_SPLIT_PTLOCK_CPUS=4096
220# CONFIG_RESOURCES_64BIT is not set
221CONFIG_ZONE_DMA_FLAG=1
222CONFIG_BOUNCE=y
223CONFIG_VIRT_TO_BUS=y
224CONFIG_ALIGNMENT_TRAP=y
225
226#
227# Boot options
228#
229CONFIG_ZBOOT_ROM_TEXT=0x0
230CONFIG_ZBOOT_ROM_BSS=0x0
231CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfs/rootfs/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS0,38400 mem=64M debug"
232# CONFIG_XIP_KERNEL is not set
233# CONFIG_KEXEC is not set
234
235#
236# Floating point emulation
237#
238
239#
240# At least one emulation must be selected
241#
242CONFIG_FPE_NWFPE=y
243# CONFIG_FPE_NWFPE_XP is not set
244# CONFIG_FPE_FASTFPE is not set
245
246#
247# Userspace binary formats
248#
249CONFIG_BINFMT_ELF=y
250# CONFIG_BINFMT_AOUT is not set
251# CONFIG_BINFMT_MISC is not set
252
253#
254# Power management options
255#
256# CONFIG_PM is not set
257CONFIG_SUSPEND_UP_POSSIBLE=y
258
259#
260# Networking
261#
262CONFIG_NET=y
263
264#
265# Networking options
266#
267CONFIG_PACKET=y
268# CONFIG_PACKET_MMAP is not set
269CONFIG_UNIX=y
270# CONFIG_NET_KEY is not set
271CONFIG_INET=y
272# CONFIG_IP_MULTICAST is not set
273# CONFIG_IP_ADVANCED_ROUTER is not set
274CONFIG_IP_FIB_HASH=y
275CONFIG_IP_PNP=y
276CONFIG_IP_PNP_DHCP=y
277CONFIG_IP_PNP_BOOTP=y
278CONFIG_IP_PNP_RARP=y
279# CONFIG_NET_IPIP is not set
280# CONFIG_NET_IPGRE is not set
281# CONFIG_ARPD is not set
282# CONFIG_SYN_COOKIES is not set
283# CONFIG_INET_AH is not set
284# CONFIG_INET_ESP is not set
285# CONFIG_INET_IPCOMP is not set
286# CONFIG_INET_XFRM_TUNNEL is not set
287# CONFIG_INET_TUNNEL is not set
288# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
289# CONFIG_INET_XFRM_MODE_TUNNEL is not set
290# CONFIG_INET_XFRM_MODE_BEET is not set
291# CONFIG_INET_LRO is not set
292# CONFIG_INET_DIAG is not set
293# CONFIG_TCP_CONG_ADVANCED is not set
294CONFIG_TCP_CONG_CUBIC=y
295CONFIG_DEFAULT_TCP_CONG="cubic"
296# CONFIG_TCP_MD5SIG is not set
297# CONFIG_IPV6 is not set
298# CONFIG_INET6_XFRM_TUNNEL is not set
299# CONFIG_INET6_TUNNEL is not set
300# CONFIG_NETWORK_SECMARK is not set
301# CONFIG_NETFILTER is not set
302# CONFIG_IP_DCCP is not set
303# CONFIG_IP_SCTP is not set
304# CONFIG_TIPC is not set
305# CONFIG_ATM is not set
306# CONFIG_BRIDGE is not set
307# CONFIG_VLAN_8021Q is not set
308# CONFIG_DECNET is not set
309# CONFIG_LLC2 is not set
310# CONFIG_IPX is not set
311# CONFIG_ATALK is not set
312# CONFIG_X25 is not set
313# CONFIG_LAPB is not set
314# CONFIG_ECONET is not set
315# CONFIG_WAN_ROUTER is not set
316# CONFIG_NET_SCHED is not set
317
318#
319# Network testing
320#
321# CONFIG_NET_PKTGEN is not set
322# CONFIG_HAMRADIO is not set
323# CONFIG_IRDA is not set
324# CONFIG_BT is not set
325# CONFIG_AF_RXRPC is not set
326
327#
328# Wireless
329#
330# CONFIG_CFG80211 is not set
331# CONFIG_WIRELESS_EXT is not set
332# CONFIG_MAC80211 is not set
333# CONFIG_IEEE80211 is not set
334# CONFIG_RFKILL is not set
335# CONFIG_NET_9P is not set
336
337#
338# Device Drivers
339#
340
341#
342# Generic Driver Options
343#
344CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
345CONFIG_STANDALONE=y
346CONFIG_PREVENT_FIRMWARE_BUILD=y
347CONFIG_FW_LOADER=y
348# CONFIG_SYS_HYPERVISOR is not set
349# CONFIG_CONNECTOR is not set
350# CONFIG_MTD is not set
351# CONFIG_PARPORT is not set
352# CONFIG_BLK_DEV is not set
353# CONFIG_MISC_DEVICES is not set
354# CONFIG_IDE is not set
355
356#
357# SCSI device support
358#
359# CONFIG_RAID_ATTRS is not set
360# CONFIG_SCSI is not set
361# CONFIG_SCSI_DMA is not set
362# CONFIG_SCSI_NETLINK is not set
363# CONFIG_ATA is not set
364# CONFIG_MD is not set
365CONFIG_NETDEVICES=y
366# CONFIG_NETDEVICES_MULTIQUEUE is not set
367# CONFIG_DUMMY is not set
368# CONFIG_BONDING is not set
369# CONFIG_MACVLAN is not set
370# CONFIG_EQUALIZER is not set
371# CONFIG_TUN is not set
372# CONFIG_VETH is not set
373# CONFIG_PHYLIB is not set
374CONFIG_NET_ETHERNET=y
375CONFIG_MII=y
376# CONFIG_AX88796 is not set
377CONFIG_SMC91X=y
378# CONFIG_DM9000 is not set
379# CONFIG_SMC911X is not set
380# CONFIG_IBM_NEW_EMAC_ZMII is not set
381# CONFIG_IBM_NEW_EMAC_RGMII is not set
382# CONFIG_IBM_NEW_EMAC_TAH is not set
383# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
384# CONFIG_B44 is not set
385# CONFIG_NETDEV_1000 is not set
386# CONFIG_NETDEV_10000 is not set
387
388#
389# Wireless LAN
390#
391# CONFIG_WLAN_PRE80211 is not set
392# CONFIG_WLAN_80211 is not set
393# CONFIG_WAN is not set
394# CONFIG_PPP is not set
395# CONFIG_SLIP is not set
396# CONFIG_SHAPER is not set
397# CONFIG_NETCONSOLE is not set
398# CONFIG_NETPOLL is not set
399# CONFIG_NET_POLL_CONTROLLER is not set
400# CONFIG_ISDN is not set
401
402#
403# Input device support
404#
405CONFIG_INPUT=y
406# CONFIG_INPUT_FF_MEMLESS is not set
407# CONFIG_INPUT_POLLDEV is not set
408
409#
410# Userland interfaces
411#
412CONFIG_INPUT_MOUSEDEV=y
413# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
414CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
415CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
416# CONFIG_INPUT_JOYDEV is not set
417# CONFIG_INPUT_EVDEV is not set
418# CONFIG_INPUT_EVBUG is not set
419
420#
421# Input Device Drivers
422#
423# CONFIG_INPUT_KEYBOARD is not set
424# CONFIG_INPUT_MOUSE is not set
425# CONFIG_INPUT_JOYSTICK is not set
426# CONFIG_INPUT_TABLET is not set
427# CONFIG_INPUT_TOUCHSCREEN is not set
428# CONFIG_INPUT_MISC is not set
429
430#
431# Hardware I/O ports
432#
433# CONFIG_SERIO is not set
434# CONFIG_GAMEPORT is not set
435
436#
437# Character devices
438#
439CONFIG_VT=y
440CONFIG_VT_CONSOLE=y
441CONFIG_HW_CONSOLE=y
442# CONFIG_VT_HW_CONSOLE_BINDING is not set
443# CONFIG_SERIAL_NONSTANDARD is not set
444
445#
446# Serial drivers
447#
448# CONFIG_SERIAL_8250 is not set
449
450#
451# Non-8250 serial port support
452#
453CONFIG_SERIAL_PXA=y
454CONFIG_SERIAL_PXA_CONSOLE=y
455CONFIG_SERIAL_CORE=y
456CONFIG_SERIAL_CORE_CONSOLE=y
457CONFIG_UNIX98_PTYS=y
458# CONFIG_LEGACY_PTYS is not set
459# CONFIG_IPMI_HANDLER is not set
460# CONFIG_HW_RANDOM is not set
461# CONFIG_NVRAM is not set
462# CONFIG_R3964 is not set
463# CONFIG_RAW_DRIVER is not set
464# CONFIG_TCG_TPM is not set
465# CONFIG_I2C is not set
466
467#
468# SPI support
469#
470# CONFIG_SPI is not set
471# CONFIG_SPI_MASTER is not set
472# CONFIG_W1 is not set
473# CONFIG_POWER_SUPPLY is not set
474# CONFIG_HWMON is not set
475
476#
477# Sonics Silicon Backplane
478#
479CONFIG_SSB_POSSIBLE=y
480# CONFIG_SSB is not set
481
482#
483# Multifunction device drivers
484#
485# CONFIG_MFD_SM501 is not set
486
487#
488# Multimedia devices
489#
490# CONFIG_VIDEO_DEV is not set
491# CONFIG_DVB_CORE is not set
492# CONFIG_DAB is not set
493
494#
495# Graphics support
496#
497# CONFIG_VGASTATE is not set
498# CONFIG_VIDEO_OUTPUT_CONTROL is not set
499CONFIG_FB=y
500# CONFIG_FIRMWARE_EDID is not set
501# CONFIG_FB_DDC is not set
502CONFIG_FB_CFB_FILLRECT=y
503CONFIG_FB_CFB_COPYAREA=y
504CONFIG_FB_CFB_IMAGEBLIT=y
505# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
506# CONFIG_FB_SYS_FILLRECT is not set
507# CONFIG_FB_SYS_COPYAREA is not set
508# CONFIG_FB_SYS_IMAGEBLIT is not set
509# CONFIG_FB_SYS_FOPS is not set
510CONFIG_FB_DEFERRED_IO=y
511# CONFIG_FB_SVGALIB is not set
512# CONFIG_FB_MACMODES is not set
513# CONFIG_FB_BACKLIGHT is not set
514# CONFIG_FB_MODE_HELPERS is not set
515# CONFIG_FB_TILEBLITTING is not set
516
517#
518# Frame buffer hardware drivers
519#
520# CONFIG_FB_S1D13XXX is not set
521CONFIG_FB_PXA=y
522# CONFIG_FB_PXA_PARAMETERS is not set
523# CONFIG_FB_MBX is not set
524# CONFIG_FB_VIRTUAL is not set
525# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
526
527#
528# Display device support
529#
530# CONFIG_DISPLAY_SUPPORT is not set
531
532#
533# Console display driver support
534#
535# CONFIG_VGA_CONSOLE is not set
536CONFIG_DUMMY_CONSOLE=y
537CONFIG_FRAMEBUFFER_CONSOLE=y
538CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
539# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
540CONFIG_FONTS=y
541# CONFIG_FONT_8x8 is not set
542# CONFIG_FONT_8x16 is not set
543CONFIG_FONT_6x11=y
544# CONFIG_FONT_7x14 is not set
545# CONFIG_FONT_PEARL_8x8 is not set
546# CONFIG_FONT_ACORN_8x8 is not set
547# CONFIG_FONT_MINI_4x6 is not set
548# CONFIG_FONT_SUN8x16 is not set
549# CONFIG_FONT_SUN12x22 is not set
550# CONFIG_FONT_10x18 is not set
551CONFIG_LOGO=y
552CONFIG_LOGO_LINUX_MONO=y
553CONFIG_LOGO_LINUX_VGA16=y
554CONFIG_LOGO_LINUX_CLUT224=y
555
556#
557# Sound
558#
559# CONFIG_SOUND is not set
560# CONFIG_HID_SUPPORT is not set
561# CONFIG_USB_SUPPORT is not set
562# CONFIG_MMC is not set
563# CONFIG_NEW_LEDS is not set
564CONFIG_RTC_LIB=y
565# CONFIG_RTC_CLASS is not set
566
567#
568# File systems
569#
570# CONFIG_EXT2_FS is not set
571# CONFIG_EXT3_FS is not set
572# CONFIG_EXT4DEV_FS is not set
573# CONFIG_REISERFS_FS is not set
574# CONFIG_JFS_FS is not set
575CONFIG_FS_POSIX_ACL=y
576# CONFIG_XFS_FS is not set
577# CONFIG_GFS2_FS is not set
578# CONFIG_OCFS2_FS is not set
579# CONFIG_MINIX_FS is not set
580# CONFIG_ROMFS_FS is not set
581# CONFIG_INOTIFY is not set
582# CONFIG_QUOTA is not set
583CONFIG_DNOTIFY=y
584# CONFIG_AUTOFS_FS is not set
585# CONFIG_AUTOFS4_FS is not set
586# CONFIG_FUSE_FS is not set
587
588#
589# CD-ROM/DVD Filesystems
590#
591# CONFIG_ISO9660_FS is not set
592# CONFIG_UDF_FS is not set
593
594#
595# DOS/FAT/NT Filesystems
596#
597# CONFIG_MSDOS_FS is not set
598# CONFIG_VFAT_FS is not set
599# CONFIG_NTFS_FS is not set
600
601#
602# Pseudo filesystems
603#
604CONFIG_PROC_FS=y
605CONFIG_PROC_SYSCTL=y
606CONFIG_SYSFS=y
607# CONFIG_TMPFS is not set
608# CONFIG_HUGETLB_PAGE is not set
609# CONFIG_CONFIGFS_FS is not set
610
611#
612# Miscellaneous filesystems
613#
614# CONFIG_ADFS_FS is not set
615# CONFIG_AFFS_FS is not set
616# CONFIG_HFS_FS is not set
617# CONFIG_HFSPLUS_FS is not set
618# CONFIG_BEFS_FS is not set
619# CONFIG_BFS_FS is not set
620# CONFIG_EFS_FS is not set
621# CONFIG_CRAMFS is not set
622# CONFIG_VXFS_FS is not set
623# CONFIG_HPFS_FS is not set
624# CONFIG_QNX4FS_FS is not set
625# CONFIG_SYSV_FS is not set
626# CONFIG_UFS_FS is not set
627CONFIG_NETWORK_FILESYSTEMS=y
628CONFIG_NFS_FS=y
629CONFIG_NFS_V3=y
630CONFIG_NFS_V3_ACL=y
631CONFIG_NFS_V4=y
632CONFIG_NFS_DIRECTIO=y
633# CONFIG_NFSD is not set
634CONFIG_ROOT_NFS=y
635CONFIG_LOCKD=y
636CONFIG_LOCKD_V4=y
637CONFIG_NFS_ACL_SUPPORT=y
638CONFIG_NFS_COMMON=y
639CONFIG_SUNRPC=y
640CONFIG_SUNRPC_GSS=y
641# CONFIG_SUNRPC_BIND34 is not set
642CONFIG_RPCSEC_GSS_KRB5=y
643# CONFIG_RPCSEC_GSS_SPKM3 is not set
644# CONFIG_SMB_FS is not set
645# CONFIG_CIFS is not set
646# CONFIG_NCP_FS is not set
647# CONFIG_CODA_FS is not set
648# CONFIG_AFS_FS is not set
649
650#
651# Partition Types
652#
653# CONFIG_PARTITION_ADVANCED is not set
654CONFIG_MSDOS_PARTITION=y
655# CONFIG_NLS is not set
656# CONFIG_DLM is not set
657# CONFIG_INSTRUMENTATION is not set
658
659#
660# Kernel hacking
661#
662# CONFIG_PRINTK_TIME is not set
663CONFIG_ENABLE_MUST_CHECK=y
664# CONFIG_MAGIC_SYSRQ is not set
665# CONFIG_UNUSED_SYMBOLS is not set
666# CONFIG_DEBUG_FS is not set
667# CONFIG_HEADERS_CHECK is not set
668# CONFIG_DEBUG_KERNEL is not set
669# CONFIG_SLUB_DEBUG_ON is not set
670CONFIG_DEBUG_BUGVERBOSE=y
671CONFIG_FRAME_POINTER=y
672# CONFIG_SAMPLES is not set
673CONFIG_DEBUG_USER=y
674
675#
676# Security options
677#
678# CONFIG_KEYS is not set
679# CONFIG_SECURITY is not set
680# CONFIG_SECURITY_FILE_CAPABILITIES is not set
681CONFIG_CRYPTO=y
682CONFIG_CRYPTO_ALGAPI=y
683CONFIG_CRYPTO_BLKCIPHER=y
684CONFIG_CRYPTO_MANAGER=y
685# CONFIG_CRYPTO_HMAC is not set
686# CONFIG_CRYPTO_XCBC is not set
687# CONFIG_CRYPTO_NULL is not set
688# CONFIG_CRYPTO_MD4 is not set
689CONFIG_CRYPTO_MD5=y
690# CONFIG_CRYPTO_SHA1 is not set
691# CONFIG_CRYPTO_SHA256 is not set
692# CONFIG_CRYPTO_SHA512 is not set
693# CONFIG_CRYPTO_WP512 is not set
694# CONFIG_CRYPTO_TGR192 is not set
695# CONFIG_CRYPTO_GF128MUL is not set
696# CONFIG_CRYPTO_ECB is not set
697CONFIG_CRYPTO_CBC=y
698# CONFIG_CRYPTO_PCBC is not set
699# CONFIG_CRYPTO_LRW is not set
700# CONFIG_CRYPTO_XTS is not set
701# CONFIG_CRYPTO_CRYPTD is not set
702CONFIG_CRYPTO_DES=y
703# CONFIG_CRYPTO_FCRYPT is not set
704# CONFIG_CRYPTO_BLOWFISH is not set
705# CONFIG_CRYPTO_TWOFISH is not set
706# CONFIG_CRYPTO_SERPENT is not set
707# CONFIG_CRYPTO_AES is not set
708# CONFIG_CRYPTO_CAST5 is not set
709# CONFIG_CRYPTO_CAST6 is not set
710# CONFIG_CRYPTO_TEA is not set
711# CONFIG_CRYPTO_ARC4 is not set
712# CONFIG_CRYPTO_KHAZAD is not set
713# CONFIG_CRYPTO_ANUBIS is not set
714# CONFIG_CRYPTO_SEED is not set
715# CONFIG_CRYPTO_DEFLATE is not set
716# CONFIG_CRYPTO_MICHAEL_MIC is not set
717# CONFIG_CRYPTO_CRC32C is not set
718# CONFIG_CRYPTO_CAMELLIA is not set
719# CONFIG_CRYPTO_TEST is not set
720# CONFIG_CRYPTO_AUTHENC is not set
721# CONFIG_CRYPTO_HW is not set
722
723#
724# Library routines
725#
726CONFIG_BITREVERSE=y
727# CONFIG_CRC_CCITT is not set
728# CONFIG_CRC16 is not set
729# CONFIG_CRC_ITU_T is not set
730CONFIG_CRC32=y
731# CONFIG_CRC7 is not set
732# CONFIG_LIBCRC32C is not set
733CONFIG_PLIST=y
734CONFIG_HAS_IOMEM=y
735CONFIG_HAS_IOPORT=y
736CONFIG_HAS_DMA=y
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9ed2377fe8e5..d0daeab2234e 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -19,31 +19,21 @@
19 19
20#ifdef __KERNEL__ 20#ifdef __KERNEL__
21 21
22/*
23 * On ARM, ordinary assignment (str instruction) doesn't clear the local
24 * strex/ldrex monitor on some implementations. The reason we can use it for
25 * atomic_set() is the clrex or dummy strex done on every exception return.
26 */
22#define atomic_read(v) ((v)->counter) 27#define atomic_read(v) ((v)->counter)
28#define atomic_set(v,i) (((v)->counter) = (i))
23 29
24#if __LINUX_ARM_ARCH__ >= 6 30#if __LINUX_ARM_ARCH__ >= 6
25 31
26/* 32/*
27 * ARMv6 UP and SMP safe atomic ops. We use load exclusive and 33 * ARMv6 UP and SMP safe atomic ops. We use load exclusive and
28 * store exclusive to ensure that these are atomic. We may loop 34 * store exclusive to ensure that these are atomic. We may loop
29 * to ensure that the update happens. Writing to 'v->counter' 35 * to ensure that the update happens.
30 * without using the following operations WILL break the atomic
31 * nature of these ops.
32 */ 36 */
33static inline void atomic_set(atomic_t *v, int i)
34{
35 unsigned long tmp;
36
37 __asm__ __volatile__("@ atomic_set\n"
38"1: ldrex %0, [%1]\n"
39" strex %0, %2, [%1]\n"
40" teq %0, #0\n"
41" bne 1b"
42 : "=&r" (tmp)
43 : "r" (&v->counter), "r" (i)
44 : "cc");
45}
46
47static inline void atomic_add(int i, atomic_t *v) 37static inline void atomic_add(int i, atomic_t *v)
48{ 38{
49 unsigned long tmp; 39 unsigned long tmp;
@@ -163,8 +153,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
163#error SMP not supported on pre-ARMv6 CPUs 153#error SMP not supported on pre-ARMv6 CPUs
164#endif 154#endif
165 155
166#define atomic_set(v,i) (((v)->counter) = (i))
167
168static inline int atomic_add_return(int i, atomic_t *v) 156static inline int atomic_add_return(int i, atomic_t *v)
169{ 157{
170 unsigned long flags; 158 unsigned long flags;
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index feaa75f0013e..66c160b8547f 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -4,7 +4,7 @@
4#ifndef __ASMARM_CACHE_H 4#ifndef __ASMARM_CACHE_H
5#define __ASMARM_CACHE_H 5#define __ASMARM_CACHE_H
6 6
7#define L1_CACHE_SHIFT 5 7#define L1_CACHE_SHIFT CONFIG_ARM_L1_CACHE_SHIFT
8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
9 9
10/* 10/*
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index b3e656c6fb78..20ae96cc0020 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -63,6 +63,11 @@ static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
63 return read_cpuid(CPUID_CACHETYPE); 63 return read_cpuid(CPUID_CACHETYPE);
64} 64}
65 65
66static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void)
67{
68 return read_cpuid(CPUID_TCM);
69}
70
66/* 71/*
67 * Intel's XScale3 core supports some v6 features (supersections, L2) 72 * Intel's XScale3 core supports some v6 features (supersections, L2)
68 * but advertises itself as v5 as it does not support the v6 ISA. For 73 * but advertises itself as v5 as it does not support the v6 ISA. For
@@ -73,7 +78,10 @@ static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
73#else 78#else
74static inline int cpu_is_xsc3(void) 79static inline int cpu_is_xsc3(void)
75{ 80{
76 if ((read_cpuid_id() & 0xffffe000) == 0x69056000) 81 unsigned int id;
82 id = read_cpuid_id() & 0xffffe000;
83 /* It covers both Intel ID and Marvell ID */
84 if ((id == 0x69056000) || (id == 0x56056000))
77 return 1; 85 return 1;
78 86
79 return 0; 87 return 0;
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 83e6ba338e2c..1a8c7279a28b 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -187,11 +187,74 @@ union iop3xx_desc {
187 void *ptr; 187 void *ptr;
188}; 188};
189 189
190/* No support for p+q operations */
191static inline int
192iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
193{
194 BUG();
195 return 0;
196}
197
198static inline void
199iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
200 unsigned long flags)
201{
202 BUG();
203}
204
205static inline void
206iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
207{
208 BUG();
209}
210
211static inline void
212iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
213 dma_addr_t addr, unsigned char coef)
214{
215 BUG();
216}
217
218static inline int
219iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
220{
221 BUG();
222 return 0;
223}
224
225static inline void
226iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
227 unsigned long flags)
228{
229 BUG();
230}
231
232static inline void
233iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
234{
235 BUG();
236}
237
238#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
239
240static inline void
241iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
242 dma_addr_t *src)
243{
244 BUG();
245}
246
190static inline int iop_adma_get_max_xor(void) 247static inline int iop_adma_get_max_xor(void)
191{ 248{
192 return 32; 249 return 32;
193} 250}
194 251
252static inline int iop_adma_get_max_pq(void)
253{
254 BUG();
255 return 0;
256}
257
195static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) 258static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
196{ 259{
197 int id = chan->device->id; 260 int id = chan->device->id;
@@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
332 return slot_cnt; 395 return slot_cnt;
333} 396}
334 397
398static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
399{
400 return 0;
401}
402
335static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, 403static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
336 struct iop_adma_chan *chan) 404 struct iop_adma_chan *chan)
337{ 405{
@@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
349 return 0; 417 return 0;
350} 418}
351 419
420
421static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
422 struct iop_adma_chan *chan)
423{
424 BUG();
425 return 0;
426}
427
352static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, 428static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
353 struct iop_adma_chan *chan) 429 struct iop_adma_chan *chan)
354{ 430{
@@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
756 hw_desc->src[0] = val; 832 hw_desc->src[0] = val;
757} 833}
758 834
759static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) 835static inline enum sum_check_flags
836iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
760{ 837{
761 struct iop3xx_desc_aau *hw_desc = desc->hw_desc; 838 struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
762 struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; 839 struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
763 840
764 iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); 841 iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
765 return desc_ctrl.zero_result_err; 842 return desc_ctrl.zero_result_err << SUM_CHECK_P;
766} 843}
767 844
768static inline void iop_chan_append(struct iop_adma_chan *chan) 845static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h
index 385c6e8cbbd2..59b8c3892f76 100644
--- a/arch/arm/include/asm/hardware/iop_adma.h
+++ b/arch/arm/include/asm/hardware/iop_adma.h
@@ -86,6 +86,7 @@ struct iop_adma_chan {
86 * @idx: pool index 86 * @idx: pool index
87 * @unmap_src_cnt: number of xor sources 87 * @unmap_src_cnt: number of xor sources
88 * @unmap_len: transaction bytecount 88 * @unmap_len: transaction bytecount
89 * @tx_list: list of descriptors that are associated with one operation
89 * @async_tx: support for the async_tx api 90 * @async_tx: support for the async_tx api
90 * @group_list: list of slots that make up a multi-descriptor transaction 91 * @group_list: list of slots that make up a multi-descriptor transaction
91 * for example transfer lengths larger than the supported hw max 92 * for example transfer lengths larger than the supported hw max
@@ -102,10 +103,12 @@ struct iop_adma_desc_slot {
102 u16 idx; 103 u16 idx;
103 u16 unmap_src_cnt; 104 u16 unmap_src_cnt;
104 size_t unmap_len; 105 size_t unmap_len;
106 struct list_head tx_list;
105 struct dma_async_tx_descriptor async_tx; 107 struct dma_async_tx_descriptor async_tx;
106 union { 108 union {
107 u32 *xor_check_result; 109 u32 *xor_check_result;
108 u32 *crc32_result; 110 u32 *crc32_result;
111 u32 *pq_check_result;
109 }; 112 };
110}; 113};
111 114
diff --git a/arch/arm/include/asm/tcm.h b/arch/arm/include/asm/tcm.h
new file mode 100644
index 000000000000..5929ef5d927a
--- /dev/null
+++ b/arch/arm/include/asm/tcm.h
@@ -0,0 +1,31 @@
1/*
2 *
3 * Copyright (C) 2008-2009 ST-Ericsson AB
4 * License terms: GNU General Public License (GPL) version 2
5 *
6 * Author: Rickard Andersson <rickard.andersson@stericsson.com>
7 * Author: Linus Walleij <linus.walleij@stericsson.com>
8 *
9 */
10#ifndef __ASMARM_TCM_H
11#define __ASMARM_TCM_H
12
13#ifndef CONFIG_HAVE_TCM
14#error "You should not be including tcm.h unless you have a TCM!"
15#endif
16
17#include <linux/compiler.h>
18
19/* Tag variables with this */
20#define __tcmdata __section(.tcm.data)
21/* Tag constants with this */
22#define __tcmconst __section(.tcm.rodata)
23/* Tag functions inside TCM called from outside TCM with this */
24#define __tcmfunc __attribute__((long_call)) __section(.tcm.text) noinline
25/* Tag function inside TCM called from inside TCM with this */
26#define __tcmlocalfunc __section(.tcm.text)
27
28void *tcm_alloc(size_t len);
29void tcm_free(void *addr, size_t len);
30
31#endif
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index 073e85b9b961..bc631161e9c6 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -35,7 +35,9 @@
35 35
36#define ARM(x...) 36#define ARM(x...)
37#define THUMB(x...) x 37#define THUMB(x...) x
38#ifdef __ASSEMBLY__
38#define W(instr) instr.w 39#define W(instr) instr.w
40#endif
39#define BSYM(sym) sym + 1 41#define BSYM(sym) sym + 1
40 42
41#else /* !CONFIG_THUMB2_KERNEL */ 43#else /* !CONFIG_THUMB2_KERNEL */
@@ -45,7 +47,9 @@
45 47
46#define ARM(x...) x 48#define ARM(x...) x
47#define THUMB(x...) 49#define THUMB(x...)
50#ifdef __ASSEMBLY__
48#define W(instr) instr 51#define W(instr) instr
52#endif
49#define BSYM(sym) sym 53#define BSYM(sym) sym
50 54
51#endif /* CONFIG_THUMB2_KERNEL */ 55#endif /* CONFIG_THUMB2_KERNEL */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index c446aeff7b89..79087dd6d869 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o
35obj-$(CONFIG_ARM_THUMBEE) += thumbee.o 35obj-$(CONFIG_ARM_THUMBEE) += thumbee.o
36obj-$(CONFIG_KGDB) += kgdb.o 36obj-$(CONFIG_KGDB) += kgdb.o
37obj-$(CONFIG_ARM_UNWIND) += unwind.o 37obj-$(CONFIG_ARM_UNWIND) += unwind.o
38obj-$(CONFIG_HAVE_TCM) += tcm.o
38 39
39obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o 40obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o
40AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312 41AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 3d727a8a23bc..0a2ba51cf35d 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -272,7 +272,15 @@ __und_svc:
272 @ 272 @
273 @ r0 - instruction 273 @ r0 - instruction
274 @ 274 @
275#ifndef CONFIG_THUMB2_KERNEL
275 ldr r0, [r2, #-4] 276 ldr r0, [r2, #-4]
277#else
278 ldrh r0, [r2, #-2] @ Thumb instruction at LR - 2
279 and r9, r0, #0xf800
280 cmp r9, #0xe800 @ 32-bit instruction if xx >= 0
281 ldrhhs r9, [r2] @ bottom 16 bits
282 orrhs r0, r9, r0, lsl #16
283#endif
276 adr r9, BSYM(1f) 284 adr r9, BSYM(1f)
277 bl call_fpe 285 bl call_fpe
278 286
@@ -678,7 +686,9 @@ ENTRY(fp_enter)
678 .word no_fp 686 .word no_fp
679 .previous 687 .previous
680 688
681no_fp: mov pc, lr 689ENTRY(no_fp)
690 mov pc, lr
691ENDPROC(no_fp)
682 692
683__und_usr_unknown: 693__und_usr_unknown:
684 enable_irq 694 enable_irq
@@ -734,13 +744,6 @@ ENTRY(__switch_to)
734#ifdef CONFIG_MMU 744#ifdef CONFIG_MMU
735 ldr r6, [r2, #TI_CPU_DOMAIN] 745 ldr r6, [r2, #TI_CPU_DOMAIN]
736#endif 746#endif
737#if __LINUX_ARM_ARCH__ >= 6
738#ifdef CONFIG_CPU_32v6K
739 clrex
740#else
741 strex r5, r4, [ip] @ Clear exclusive monitor
742#endif
743#endif
744#if defined(CONFIG_HAS_TLS_REG) 747#if defined(CONFIG_HAS_TLS_REG)
745 mcr p15, 0, r3, c13, c0, 3 @ set TLS register 748 mcr p15, 0, r3, c13, c0, 3 @ set TLS register
746#elif !defined(CONFIG_TLS_REG_EMUL) 749#elif !defined(CONFIG_TLS_REG_EMUL)
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index a4eaf4f920c5..e17e3c30d957 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -76,13 +76,25 @@
76#ifndef CONFIG_THUMB2_KERNEL 76#ifndef CONFIG_THUMB2_KERNEL
77 .macro svc_exit, rpsr 77 .macro svc_exit, rpsr
78 msr spsr_cxsf, \rpsr 78 msr spsr_cxsf, \rpsr
79#if defined(CONFIG_CPU_32v6K)
80 clrex @ clear the exclusive monitor
79 ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr 81 ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr
82#elif defined (CONFIG_CPU_V6)
83 ldr r0, [sp]
84 strex r1, r2, [sp] @ clear the exclusive monitor
85 ldmib sp, {r1 - pc}^ @ load r1 - pc, cpsr
86#endif
80 .endm 87 .endm
81 88
82 .macro restore_user_regs, fast = 0, offset = 0 89 .macro restore_user_regs, fast = 0, offset = 0
83 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr 90 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr
84 ldr lr, [sp, #\offset + S_PC]! @ get pc 91 ldr lr, [sp, #\offset + S_PC]! @ get pc
85 msr spsr_cxsf, r1 @ save in spsr_svc 92 msr spsr_cxsf, r1 @ save in spsr_svc
93#if defined(CONFIG_CPU_32v6K)
94 clrex @ clear the exclusive monitor
95#elif defined (CONFIG_CPU_V6)
96 strex r1, r2, [sp] @ clear the exclusive monitor
97#endif
86 .if \fast 98 .if \fast
87 ldmdb sp, {r1 - lr}^ @ get calling r1 - lr 99 ldmdb sp, {r1 - lr}^ @ get calling r1 - lr
88 .else 100 .else
@@ -98,6 +110,7 @@
98 .endm 110 .endm
99#else /* CONFIG_THUMB2_KERNEL */ 111#else /* CONFIG_THUMB2_KERNEL */
100 .macro svc_exit, rpsr 112 .macro svc_exit, rpsr
113 clrex @ clear the exclusive monitor
101 ldr r0, [sp, #S_SP] @ top of the stack 114 ldr r0, [sp, #S_SP] @ top of the stack
102 ldr r1, [sp, #S_PC] @ return address 115 ldr r1, [sp, #S_PC] @ return address
103 tst r0, #4 @ orig stack 8-byte aligned? 116 tst r0, #4 @ orig stack 8-byte aligned?
@@ -110,6 +123,7 @@
110 .endm 123 .endm
111 124
112 .macro restore_user_regs, fast = 0, offset = 0 125 .macro restore_user_regs, fast = 0, offset = 0
126 clrex @ clear the exclusive monitor
113 mov r2, sp 127 mov r2, sp
114 load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr 128 load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr
115 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr 129 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index f692efddd449..60c62c377fa9 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -22,6 +22,7 @@
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <linux/kprobes.h> 23#include <linux/kprobes.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/stop_machine.h>
25#include <linux/stringify.h> 26#include <linux/stringify.h>
26#include <asm/traps.h> 27#include <asm/traps.h>
27#include <asm/cacheflush.h> 28#include <asm/cacheflush.h>
@@ -83,10 +84,24 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
83 flush_insns(p->addr, 1); 84 flush_insns(p->addr, 1);
84} 85}
85 86
87/*
88 * The actual disarming is done here on each CPU and synchronized using
89 * stop_machine. This synchronization is necessary on SMP to avoid removing
90 * a probe between the moment the 'Undefined Instruction' exception is raised
91 * and the moment the exception handler reads the faulting instruction from
92 * memory.
93 */
94int __kprobes __arch_disarm_kprobe(void *p)
95{
96 struct kprobe *kp = p;
97 *kp->addr = kp->opcode;
98 flush_insns(kp->addr, 1);
99 return 0;
100}
101
86void __kprobes arch_disarm_kprobe(struct kprobe *p) 102void __kprobes arch_disarm_kprobe(struct kprobe *p)
87{ 103{
88 *p->addr = p->opcode; 104 stop_machine(__arch_disarm_kprobe, p, &cpu_online_map);
89 flush_insns(p->addr, 1);
90} 105}
91 106
92void __kprobes arch_remove_kprobe(struct kprobe *p) 107void __kprobes arch_remove_kprobe(struct kprobe *p)
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index d4d4f77c91b2..c6c57b640b6b 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -45,6 +45,7 @@
45 45
46#include "compat.h" 46#include "compat.h"
47#include "atags.h" 47#include "atags.h"
48#include "tcm.h"
48 49
49#ifndef MEM_SIZE 50#ifndef MEM_SIZE
50#define MEM_SIZE (16*1024*1024) 51#define MEM_SIZE (16*1024*1024)
@@ -749,6 +750,7 @@ void __init setup_arch(char **cmdline_p)
749#endif 750#endif
750 751
751 cpu_init(); 752 cpu_init();
753 tcm_init();
752 754
753 /* 755 /*
754 * Set up various architecture-specific pointers 756 * Set up various architecture-specific pointers
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
new file mode 100644
index 000000000000..e50303868f1b
--- /dev/null
+++ b/arch/arm/kernel/tcm.c
@@ -0,0 +1,246 @@
1/*
2 * Copyright (C) 2008-2009 ST-Ericsson AB
3 * License terms: GNU General Public License (GPL) version 2
4 * TCM memory handling for ARM systems
5 *
6 * Author: Linus Walleij <linus.walleij@stericsson.com>
7 * Author: Rickard Andersson <rickard.andersson@stericsson.com>
8 */
9#include <linux/init.h>
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/stddef.h>
13#include <linux/ioport.h>
14#include <linux/genalloc.h>
15#include <linux/string.h> /* memcpy */
16#include <asm/page.h> /* PAGE_SHIFT */
17#include <asm/cputype.h>
18#include <asm/mach/map.h>
19#include <mach/memory.h>
20#include "tcm.h"
21
22/* Scream and warn about misuse */
23#if !defined(ITCM_OFFSET) || !defined(ITCM_END) || \
24 !defined(DTCM_OFFSET) || !defined(DTCM_END)
25#error "TCM support selected but offsets not defined!"
26#endif
27
28static struct gen_pool *tcm_pool;
29
30/* TCM section definitions from the linker */
31extern char __itcm_start, __sitcm_text, __eitcm_text;
32extern char __dtcm_start, __sdtcm_data, __edtcm_data;
33
34/*
35 * TCM memory resources
36 */
37static struct resource dtcm_res = {
38 .name = "DTCM RAM",
39 .start = DTCM_OFFSET,
40 .end = DTCM_END,
41 .flags = IORESOURCE_MEM
42};
43
44static struct resource itcm_res = {
45 .name = "ITCM RAM",
46 .start = ITCM_OFFSET,
47 .end = ITCM_END,
48 .flags = IORESOURCE_MEM
49};
50
51static struct map_desc dtcm_iomap[] __initdata = {
52 {
53 .virtual = DTCM_OFFSET,
54 .pfn = __phys_to_pfn(DTCM_OFFSET),
55 .length = (DTCM_END - DTCM_OFFSET + 1),
56 .type = MT_UNCACHED
57 }
58};
59
60static struct map_desc itcm_iomap[] __initdata = {
61 {
62 .virtual = ITCM_OFFSET,
63 .pfn = __phys_to_pfn(ITCM_OFFSET),
64 .length = (ITCM_END - ITCM_OFFSET + 1),
65 .type = MT_UNCACHED
66 }
67};
68
69/*
70 * Allocate a chunk of TCM memory
71 */
72void *tcm_alloc(size_t len)
73{
74 unsigned long vaddr;
75
76 if (!tcm_pool)
77 return NULL;
78
79 vaddr = gen_pool_alloc(tcm_pool, len);
80 if (!vaddr)
81 return NULL;
82
83 return (void *) vaddr;
84}
85EXPORT_SYMBOL(tcm_alloc);
86
87/*
88 * Free a chunk of TCM memory
89 */
90void tcm_free(void *addr, size_t len)
91{
92 gen_pool_free(tcm_pool, (unsigned long) addr, len);
93}
94EXPORT_SYMBOL(tcm_free);
95
96
97static void __init setup_tcm_bank(u8 type, u32 offset, u32 expected_size)
98{
99 const int tcm_sizes[16] = { 0, -1, -1, 4, 8, 16, 32, 64, 128,
100 256, 512, 1024, -1, -1, -1, -1 };
101 u32 tcm_region;
102 int tcm_size;
103
104 /* Read the special TCM region register c9, 0 */
105 if (!type)
106 asm("mrc p15, 0, %0, c9, c1, 0"
107 : "=r" (tcm_region));
108 else
109 asm("mrc p15, 0, %0, c9, c1, 1"
110 : "=r" (tcm_region));
111
112 tcm_size = tcm_sizes[(tcm_region >> 2) & 0x0f];
113 if (tcm_size < 0) {
114 pr_err("CPU: %sTCM of unknown size!\n",
115 type ? "I" : "D");
116 } else {
117 pr_info("CPU: found %sTCM %dk @ %08x, %senabled\n",
118 type ? "I" : "D",
119 tcm_size,
120 (tcm_region & 0xfffff000U),
121 (tcm_region & 1) ? "" : "not ");
122 }
123
124 if (tcm_size != expected_size) {
125 pr_crit("CPU: %sTCM was detected %dk but expected %dk!\n",
126 type ? "I" : "D",
127 tcm_size,
128 expected_size);
129 /* Adjust to the expected size? what can we do... */
130 }
131
132 /* Force move the TCM bank to where we want it, enable */
133 tcm_region = offset | (tcm_region & 0x00000ffeU) | 1;
134
135 if (!type)
136 asm("mcr p15, 0, %0, c9, c1, 0"
137 : /* No output operands */
138 : "r" (tcm_region));
139 else
140 asm("mcr p15, 0, %0, c9, c1, 1"
141 : /* No output operands */
142 : "r" (tcm_region));
143
144 pr_debug("CPU: moved %sTCM %dk to %08x, enabled\n",
145 type ? "I" : "D",
146 tcm_size,
147 (tcm_region & 0xfffff000U));
148}
149
150/*
151 * This initializes the TCM memory
152 */
153void __init tcm_init(void)
154{
155 u32 tcm_status = read_cpuid_tcmstatus();
156 char *start;
157 char *end;
158 char *ram;
159
160 /* Setup DTCM if present */
161 if (tcm_status & (1 << 16)) {
162 setup_tcm_bank(0, DTCM_OFFSET,
163 (DTCM_END - DTCM_OFFSET + 1) >> 10);
164 request_resource(&iomem_resource, &dtcm_res);
165 iotable_init(dtcm_iomap, 1);
166 /* Copy data from RAM to DTCM */
167 start = &__sdtcm_data;
168 end = &__edtcm_data;
169 ram = &__dtcm_start;
170 memcpy(start, ram, (end-start));
171 pr_debug("CPU DTCM: copied data from %p - %p\n", start, end);
172 }
173
174 /* Setup ITCM if present */
175 if (tcm_status & 1) {
176 setup_tcm_bank(1, ITCM_OFFSET,
177 (ITCM_END - ITCM_OFFSET + 1) >> 10);
178 request_resource(&iomem_resource, &itcm_res);
179 iotable_init(itcm_iomap, 1);
180 /* Copy code from RAM to ITCM */
181 start = &__sitcm_text;
182 end = &__eitcm_text;
183 ram = &__itcm_start;
184 memcpy(start, ram, (end-start));
185 pr_debug("CPU ITCM: copied code from %p - %p\n", start, end);
186 }
187}
188
189/*
190 * This creates the TCM memory pool and has to be done later,
191 * during the core_initicalls, since the allocator is not yet
192 * up and running when the first initialization runs.
193 */
194static int __init setup_tcm_pool(void)
195{
196 u32 tcm_status = read_cpuid_tcmstatus();
197 u32 dtcm_pool_start = (u32) &__edtcm_data;
198 u32 itcm_pool_start = (u32) &__eitcm_text;
199 int ret;
200
201 /*
202 * Set up malloc pool, 2^2 = 4 bytes granularity since
203 * the TCM is sometimes just 4 KiB. NB: pages and cache
204 * line alignments does not matter in TCM!
205 */
206 tcm_pool = gen_pool_create(2, -1);
207
208 pr_debug("Setting up TCM memory pool\n");
209
210 /* Add the rest of DTCM to the TCM pool */
211 if (tcm_status & (1 << 16)) {
212 if (dtcm_pool_start < DTCM_END) {
213 ret = gen_pool_add(tcm_pool, dtcm_pool_start,
214 DTCM_END - dtcm_pool_start + 1, -1);
215 if (ret) {
216 pr_err("CPU DTCM: could not add DTCM " \
217 "remainder to pool!\n");
218 return ret;
219 }
220 pr_debug("CPU DTCM: Added %08x bytes @ %08x to " \
221 "the TCM memory pool\n",
222 DTCM_END - dtcm_pool_start + 1,
223 dtcm_pool_start);
224 }
225 }
226
227 /* Add the rest of ITCM to the TCM pool */
228 if (tcm_status & 1) {
229 if (itcm_pool_start < ITCM_END) {
230 ret = gen_pool_add(tcm_pool, itcm_pool_start,
231 ITCM_END - itcm_pool_start + 1, -1);
232 if (ret) {
233 pr_err("CPU ITCM: could not add ITCM " \
234 "remainder to pool!\n");
235 return ret;
236 }
237 pr_debug("CPU ITCM: Added %08x bytes @ %08x to " \
238 "the TCM memory pool\n",
239 ITCM_END - itcm_pool_start + 1,
240 itcm_pool_start);
241 }
242 }
243 return 0;
244}
245
246core_initcall(setup_tcm_pool);
diff --git a/arch/arm/kernel/tcm.h b/arch/arm/kernel/tcm.h
new file mode 100644
index 000000000000..8015ad434a40
--- /dev/null
+++ b/arch/arm/kernel/tcm.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) 2008-2009 ST-Ericsson AB
3 * License terms: GNU General Public License (GPL) version 2
4 * TCM memory handling for ARM systems
5 *
6 * Author: Linus Walleij <linus.walleij@stericsson.com>
7 * Author: Rickard Andersson <rickard.andersson@stericsson.com>
8 */
9
10#ifdef CONFIG_HAVE_TCM
11void __init tcm_init(void);
12#else
13/* No TCM support, just blank inlines to be optimized out */
14inline void tcm_init(void)
15{
16}
17#endif
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 5cc4812c9763..aecf87dfbaec 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -199,6 +199,63 @@ SECTIONS
199 } 199 }
200 _edata_loc = __data_loc + SIZEOF(.data); 200 _edata_loc = __data_loc + SIZEOF(.data);
201 201
202#ifdef CONFIG_HAVE_TCM
203 /*
204 * We align everything to a page boundary so we can
205 * free it after init has commenced and TCM contents have
206 * been copied to its destination.
207 */
208 .tcm_start : {
209 . = ALIGN(PAGE_SIZE);
210 __tcm_start = .;
211 __itcm_start = .;
212 }
213
214 /*
215 * Link these to the ITCM RAM
216 * Put VMA to the TCM address and LMA to the common RAM
217 * and we'll upload the contents from RAM to TCM and free
218 * the used RAM after that.
219 */
220 .text_itcm ITCM_OFFSET : AT(__itcm_start)
221 {
222 __sitcm_text = .;
223 *(.tcm.text)
224 *(.tcm.rodata)
225 . = ALIGN(4);
226 __eitcm_text = .;
227 }
228
229 /*
230 * Reset the dot pointer, this is needed to create the
231 * relative __dtcm_start below (to be used as extern in code).
232 */
233 . = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
234
235 .dtcm_start : {
236 __dtcm_start = .;
237 }
238
239 /* TODO: add remainder of ITCM as well, that can be used for data! */
240 .data_dtcm DTCM_OFFSET : AT(__dtcm_start)
241 {
242 . = ALIGN(4);
243 __sdtcm_data = .;
244 *(.tcm.data)
245 . = ALIGN(4);
246 __edtcm_data = .;
247 }
248
249 /* Reset the dot pointer or the linker gets confused */
250 . = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
251
252 /* End marker for freeing TCM copy in linked object */
253 .tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
254 . = ALIGN(PAGE_SIZE);
255 __tcm_end = .;
256 }
257#endif
258
202 .bss : { 259 .bss : {
203 __bss_start = .; /* BSS */ 260 __bss_start = .; /* BSS */
204 *(.bss) 261 *(.bss)
diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
index 6ae04db1ca4f..6ee2f6706f86 100644
--- a/arch/arm/lib/copy_page.S
+++ b/arch/arm/lib/copy_page.S
@@ -12,8 +12,9 @@
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <asm/assembler.h> 13#include <asm/assembler.h>
14#include <asm/asm-offsets.h> 14#include <asm/asm-offsets.h>
15#include <asm/cache.h>
15 16
16#define COPY_COUNT (PAGE_SZ/64 PLD( -1 )) 17#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 ))
17 18
18 .text 19 .text
19 .align 5 20 .align 5
@@ -26,17 +27,16 @@
26ENTRY(copy_page) 27ENTRY(copy_page)
27 stmfd sp!, {r4, lr} @ 2 28 stmfd sp!, {r4, lr} @ 2
28 PLD( pld [r1, #0] ) 29 PLD( pld [r1, #0] )
29 PLD( pld [r1, #32] ) 30 PLD( pld [r1, #L1_CACHE_BYTES] )
30 mov r2, #COPY_COUNT @ 1 31 mov r2, #COPY_COUNT @ 1
31 ldmia r1!, {r3, r4, ip, lr} @ 4+1 32 ldmia r1!, {r3, r4, ip, lr} @ 4+1
321: PLD( pld [r1, #64] ) 331: PLD( pld [r1, #2 * L1_CACHE_BYTES])
33 PLD( pld [r1, #96] ) 34 PLD( pld [r1, #3 * L1_CACHE_BYTES])
342: stmia r0!, {r3, r4, ip, lr} @ 4 352:
35 ldmia r1!, {r3, r4, ip, lr} @ 4+1 36 .rept (2 * L1_CACHE_BYTES / 16 - 1)
36 stmia r0!, {r3, r4, ip, lr} @ 4
37 ldmia r1!, {r3, r4, ip, lr} @ 4+1
38 stmia r0!, {r3, r4, ip, lr} @ 4 37 stmia r0!, {r3, r4, ip, lr} @ 4
39 ldmia r1!, {r3, r4, ip, lr} @ 4 38 ldmia r1!, {r3, r4, ip, lr} @ 4
39 .endr
40 subs r2, r2, #1 @ 1 40 subs r2, r2, #1 @ 1
41 stmia r0!, {r3, r4, ip, lr} @ 4 41 stmia r0!, {r3, r4, ip, lr} @ 4
42 ldmgtia r1!, {r3, r4, ip, lr} @ 4 42 ldmgtia r1!, {r3, r4, ip, lr} @ 4
diff --git a/arch/arm/mach-at91/at91cap9_devices.c b/arch/arm/mach-at91/at91cap9_devices.c
index 412aa49ad2fb..d1f775e86353 100644
--- a/arch/arm/mach-at91/at91cap9_devices.c
+++ b/arch/arm/mach-at91/at91cap9_devices.c
@@ -771,9 +771,9 @@ void __init at91_add_device_pwm(u32 mask) {}
771 * AC97 771 * AC97
772 * -------------------------------------------------------------------- */ 772 * -------------------------------------------------------------------- */
773 773
774#if defined(CONFIG_SND_AT91_AC97) || defined(CONFIG_SND_AT91_AC97_MODULE) 774#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE)
775static u64 ac97_dmamask = DMA_BIT_MASK(32); 775static u64 ac97_dmamask = DMA_BIT_MASK(32);
776static struct atmel_ac97_data ac97_data; 776static struct ac97c_platform_data ac97_data;
777 777
778static struct resource ac97_resources[] = { 778static struct resource ac97_resources[] = {
779 [0] = { 779 [0] = {
@@ -789,7 +789,7 @@ static struct resource ac97_resources[] = {
789}; 789};
790 790
791static struct platform_device at91cap9_ac97_device = { 791static struct platform_device at91cap9_ac97_device = {
792 .name = "ac97c", 792 .name = "atmel_ac97c",
793 .id = 1, 793 .id = 1,
794 .dev = { 794 .dev = {
795 .dma_mask = &ac97_dmamask, 795 .dma_mask = &ac97_dmamask,
@@ -800,7 +800,7 @@ static struct platform_device at91cap9_ac97_device = {
800 .num_resources = ARRAY_SIZE(ac97_resources), 800 .num_resources = ARRAY_SIZE(ac97_resources),
801}; 801};
802 802
803void __init at91_add_device_ac97(struct atmel_ac97_data *data) 803void __init at91_add_device_ac97(struct ac97c_platform_data *data)
804{ 804{
805 if (!data) 805 if (!data)
806 return; 806 return;
@@ -818,7 +818,7 @@ void __init at91_add_device_ac97(struct atmel_ac97_data *data)
818 platform_device_register(&at91cap9_ac97_device); 818 platform_device_register(&at91cap9_ac97_device);
819} 819}
820#else 820#else
821void __init at91_add_device_ac97(struct atmel_ac97_data *data) {} 821void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
822#endif 822#endif
823 823
824 824
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index d746e8621bc2..d581cff80c4c 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -24,11 +24,59 @@
24#include <mach/at91sam9g45.h> 24#include <mach/at91sam9g45.h>
25#include <mach/at91sam9g45_matrix.h> 25#include <mach/at91sam9g45_matrix.h>
26#include <mach/at91sam9_smc.h> 26#include <mach/at91sam9_smc.h>
27#include <mach/at_hdmac.h>
27 28
28#include "generic.h" 29#include "generic.h"
29 30
30 31
31/* -------------------------------------------------------------------- 32/* --------------------------------------------------------------------
33 * HDMAC - AHB DMA Controller
34 * -------------------------------------------------------------------- */
35
36#if defined(CONFIG_AT_HDMAC) || defined(CONFIG_AT_HDMAC_MODULE)
37static u64 hdmac_dmamask = DMA_BIT_MASK(32);
38
39static struct at_dma_platform_data atdma_pdata = {
40 .nr_channels = 8,
41};
42
43static struct resource hdmac_resources[] = {
44 [0] = {
45 .start = AT91_BASE_SYS + AT91_DMA,
46 .end = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1,
47 .flags = IORESOURCE_MEM,
48 },
49 [2] = {
50 .start = AT91SAM9G45_ID_DMA,
51 .end = AT91SAM9G45_ID_DMA,
52 .flags = IORESOURCE_IRQ,
53 },
54};
55
56static struct platform_device at_hdmac_device = {
57 .name = "at_hdmac",
58 .id = -1,
59 .dev = {
60 .dma_mask = &hdmac_dmamask,
61 .coherent_dma_mask = DMA_BIT_MASK(32),
62 .platform_data = &atdma_pdata,
63 },
64 .resource = hdmac_resources,
65 .num_resources = ARRAY_SIZE(hdmac_resources),
66};
67
68void __init at91_add_device_hdmac(void)
69{
70 dma_cap_set(DMA_MEMCPY, atdma_pdata.cap_mask);
71 dma_cap_set(DMA_SLAVE, atdma_pdata.cap_mask);
72 platform_device_register(&at_hdmac_device);
73}
74#else
75void __init at91_add_device_hdmac(void) {}
76#endif
77
78
79/* --------------------------------------------------------------------
32 * USB Host (OHCI) 80 * USB Host (OHCI)
33 * -------------------------------------------------------------------- */ 81 * -------------------------------------------------------------------- */
34 82
@@ -550,6 +598,61 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices)
550 598
551 599
552/* -------------------------------------------------------------------- 600/* --------------------------------------------------------------------
601 * AC97
602 * -------------------------------------------------------------------- */
603
604#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE)
605static u64 ac97_dmamask = DMA_BIT_MASK(32);
606static struct ac97c_platform_data ac97_data;
607
608static struct resource ac97_resources[] = {
609 [0] = {
610 .start = AT91SAM9G45_BASE_AC97C,
611 .end = AT91SAM9G45_BASE_AC97C + SZ_16K - 1,
612 .flags = IORESOURCE_MEM,
613 },
614 [1] = {
615 .start = AT91SAM9G45_ID_AC97C,
616 .end = AT91SAM9G45_ID_AC97C,
617 .flags = IORESOURCE_IRQ,
618 },
619};
620
621static struct platform_device at91sam9g45_ac97_device = {
622 .name = "atmel_ac97c",
623 .id = 0,
624 .dev = {
625 .dma_mask = &ac97_dmamask,
626 .coherent_dma_mask = DMA_BIT_MASK(32),
627 .platform_data = &ac97_data,
628 },
629 .resource = ac97_resources,
630 .num_resources = ARRAY_SIZE(ac97_resources),
631};
632
633void __init at91_add_device_ac97(struct ac97c_platform_data *data)
634{
635 if (!data)
636 return;
637
638 at91_set_A_periph(AT91_PIN_PD8, 0); /* AC97FS */
639 at91_set_A_periph(AT91_PIN_PD9, 0); /* AC97CK */
640 at91_set_A_periph(AT91_PIN_PD7, 0); /* AC97TX */
641 at91_set_A_periph(AT91_PIN_PD6, 0); /* AC97RX */
642
643 /* reset */
644 if (data->reset_pin)
645 at91_set_gpio_output(data->reset_pin, 0);
646
647 ac97_data = *data;
648 platform_device_register(&at91sam9g45_ac97_device);
649}
650#else
651void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
652#endif
653
654
655/* --------------------------------------------------------------------
553 * LCD Controller 656 * LCD Controller
554 * -------------------------------------------------------------------- */ 657 * -------------------------------------------------------------------- */
555 658
@@ -1220,6 +1323,7 @@ void __init at91_add_device_serial(void) {}
1220 */ 1323 */
1221static int __init at91_add_standard_devices(void) 1324static int __init at91_add_standard_devices(void)
1222{ 1325{
1326 at91_add_device_hdmac();
1223 at91_add_device_rtc(); 1327 at91_add_device_rtc();
1224 at91_add_device_rtt(); 1328 at91_add_device_rtt();
1225 at91_add_device_watchdog(); 1329 at91_add_device_watchdog();
diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
index 728186515cdf..d345f5453dbe 100644
--- a/arch/arm/mach-at91/at91sam9rl_devices.c
+++ b/arch/arm/mach-at91/at91sam9rl_devices.c
@@ -21,11 +21,57 @@
21#include <mach/at91sam9rl.h> 21#include <mach/at91sam9rl.h>
22#include <mach/at91sam9rl_matrix.h> 22#include <mach/at91sam9rl_matrix.h>
23#include <mach/at91sam9_smc.h> 23#include <mach/at91sam9_smc.h>
24#include <mach/at_hdmac.h>
24 25
25#include "generic.h" 26#include "generic.h"
26 27
27 28
28/* -------------------------------------------------------------------- 29/* --------------------------------------------------------------------
30 * HDMAC - AHB DMA Controller
31 * -------------------------------------------------------------------- */
32
33#if defined(CONFIG_AT_HDMAC) || defined(CONFIG_AT_HDMAC_MODULE)
34static u64 hdmac_dmamask = DMA_BIT_MASK(32);
35
36static struct at_dma_platform_data atdma_pdata = {
37 .nr_channels = 2,
38};
39
40static struct resource hdmac_resources[] = {
41 [0] = {
42 .start = AT91_BASE_SYS + AT91_DMA,
43 .end = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1,
44 .flags = IORESOURCE_MEM,
45 },
46 [2] = {
47 .start = AT91SAM9RL_ID_DMA,
48 .end = AT91SAM9RL_ID_DMA,
49 .flags = IORESOURCE_IRQ,
50 },
51};
52
53static struct platform_device at_hdmac_device = {
54 .name = "at_hdmac",
55 .id = -1,
56 .dev = {
57 .dma_mask = &hdmac_dmamask,
58 .coherent_dma_mask = DMA_BIT_MASK(32),
59 .platform_data = &atdma_pdata,
60 },
61 .resource = hdmac_resources,
62 .num_resources = ARRAY_SIZE(hdmac_resources),
63};
64
65void __init at91_add_device_hdmac(void)
66{
67 dma_cap_set(DMA_MEMCPY, atdma_pdata.cap_mask);
68 platform_device_register(&at_hdmac_device);
69}
70#else
71void __init at91_add_device_hdmac(void) {}
72#endif
73
74/* --------------------------------------------------------------------
29 * USB HS Device (Gadget) 75 * USB HS Device (Gadget)
30 * -------------------------------------------------------------------- */ 76 * -------------------------------------------------------------------- */
31 77
@@ -398,6 +444,61 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices)
398 444
399 445
400/* -------------------------------------------------------------------- 446/* --------------------------------------------------------------------
447 * AC97
448 * -------------------------------------------------------------------- */
449
450#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE)
451static u64 ac97_dmamask = DMA_BIT_MASK(32);
452static struct ac97c_platform_data ac97_data;
453
454static struct resource ac97_resources[] = {
455 [0] = {
456 .start = AT91SAM9RL_BASE_AC97C,
457 .end = AT91SAM9RL_BASE_AC97C + SZ_16K - 1,
458 .flags = IORESOURCE_MEM,
459 },
460 [1] = {
461 .start = AT91SAM9RL_ID_AC97C,
462 .end = AT91SAM9RL_ID_AC97C,
463 .flags = IORESOURCE_IRQ,
464 },
465};
466
467static struct platform_device at91sam9rl_ac97_device = {
468 .name = "atmel_ac97c",
469 .id = 0,
470 .dev = {
471 .dma_mask = &ac97_dmamask,
472 .coherent_dma_mask = DMA_BIT_MASK(32),
473 .platform_data = &ac97_data,
474 },
475 .resource = ac97_resources,
476 .num_resources = ARRAY_SIZE(ac97_resources),
477};
478
479void __init at91_add_device_ac97(struct ac97c_platform_data *data)
480{
481 if (!data)
482 return;
483
484 at91_set_A_periph(AT91_PIN_PD1, 0); /* AC97FS */
485 at91_set_A_periph(AT91_PIN_PD2, 0); /* AC97CK */
486 at91_set_A_periph(AT91_PIN_PD3, 0); /* AC97TX */
487 at91_set_A_periph(AT91_PIN_PD4, 0); /* AC97RX */
488
489 /* reset */
490 if (data->reset_pin)
491 at91_set_gpio_output(data->reset_pin, 0);
492
493 ac97_data = *data;
494 platform_device_register(&at91sam9rl_ac97_device);
495}
496#else
497void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
498#endif
499
500
501/* --------------------------------------------------------------------
401 * LCD Controller 502 * LCD Controller
402 * -------------------------------------------------------------------- */ 503 * -------------------------------------------------------------------- */
403 504
@@ -1103,6 +1204,7 @@ void __init at91_add_device_serial(void) {}
1103 */ 1204 */
1104static int __init at91_add_standard_devices(void) 1205static int __init at91_add_standard_devices(void)
1105{ 1206{
1207 at91_add_device_hdmac();
1106 at91_add_device_rtc(); 1208 at91_add_device_rtc();
1107 at91_add_device_rtt(); 1209 at91_add_device_rtt();
1108 at91_add_device_watchdog(); 1210 at91_add_device_watchdog();
diff --git a/arch/arm/mach-at91/board-cap9adk.c b/arch/arm/mach-at91/board-cap9adk.c
index 83a1a0fef47b..d6940870e403 100644
--- a/arch/arm/mach-at91/board-cap9adk.c
+++ b/arch/arm/mach-at91/board-cap9adk.c
@@ -364,7 +364,7 @@ static struct atmel_lcdfb_info __initdata cap9adk_lcdc_data;
364/* 364/*
365 * AC97 365 * AC97
366 */ 366 */
367static struct atmel_ac97_data cap9adk_ac97_data = { 367static struct ac97c_platform_data cap9adk_ac97_data = {
368// .reset_pin = ... not connected 368// .reset_pin = ... not connected
369}; 369};
370 370
diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c
index 8c0b71c95be4..7c1e382330fb 100644
--- a/arch/arm/mach-at91/board-neocore926.c
+++ b/arch/arm/mach-at91/board-neocore926.c
@@ -340,7 +340,7 @@ static void __init neocore926_add_device_buttons(void) {}
340/* 340/*
341 * AC97 341 * AC97
342 */ 342 */
343static struct atmel_ac97_data neocore926_ac97_data = { 343static struct ac97c_platform_data neocore926_ac97_data = {
344 .reset_pin = AT91_PIN_PA13, 344 .reset_pin = AT91_PIN_PA13,
345}; 345};
346 346
diff --git a/arch/arm/mach-at91/board-sam9m10g45ek.c b/arch/arm/mach-at91/board-sam9m10g45ek.c
index b8558eae5229..64c3843f323d 100644
--- a/arch/arm/mach-at91/board-sam9m10g45ek.c
+++ b/arch/arm/mach-at91/board-sam9m10g45ek.c
@@ -311,6 +311,14 @@ static void __init ek_add_device_buttons(void) {}
311 311
312 312
313/* 313/*
314 * AC97
315 * reset_pin is not connected: NRST
316 */
317static struct ac97c_platform_data ek_ac97_data = {
318};
319
320
321/*
314 * LEDs ... these could all be PWM-driven, for variable brightness 322 * LEDs ... these could all be PWM-driven, for variable brightness
315 */ 323 */
316static struct gpio_led ek_leds[] = { 324static struct gpio_led ek_leds[] = {
@@ -372,6 +380,8 @@ static void __init ek_board_init(void)
372 at91_add_device_lcdc(&ek_lcdc_data); 380 at91_add_device_lcdc(&ek_lcdc_data);
373 /* Push Buttons */ 381 /* Push Buttons */
374 ek_add_device_buttons(); 382 ek_add_device_buttons();
383 /* AC97 */
384 at91_add_device_ac97(&ek_ac97_data);
375 /* LEDs */ 385 /* LEDs */
376 at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds)); 386 at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds));
377 at91_pwm_leds(ek_pwm_led, ARRAY_SIZE(ek_pwm_led)); 387 at91_pwm_leds(ek_pwm_led, ARRAY_SIZE(ek_pwm_led));
diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c
index 94ffb5c103b9..bd28e989e54e 100644
--- a/arch/arm/mach-at91/board-sam9rlek.c
+++ b/arch/arm/mach-at91/board-sam9rlek.c
@@ -211,6 +211,14 @@ static struct atmel_lcdfb_info __initdata ek_lcdc_data;
211 211
212 212
213/* 213/*
214 * AC97
215 * reset_pin is not connected: NRST
216 */
217static struct ac97c_platform_data ek_ac97_data = {
218};
219
220
221/*
214 * LEDs 222 * LEDs
215 */ 223 */
216static struct gpio_led ek_leds[] = { 224static struct gpio_led ek_leds[] = {
@@ -299,6 +307,8 @@ static void __init ek_board_init(void)
299 at91_add_device_mmc(0, &ek_mmc_data); 307 at91_add_device_mmc(0, &ek_mmc_data);
300 /* LCD Controller */ 308 /* LCD Controller */
301 at91_add_device_lcdc(&ek_lcdc_data); 309 at91_add_device_lcdc(&ek_lcdc_data);
310 /* AC97 */
311 at91_add_device_ac97(&ek_ac97_data);
302 /* Touch Screen Controller */ 312 /* Touch Screen Controller */
303 at91_add_device_tsadcc(); 313 at91_add_device_tsadcc();
304 /* LEDs */ 314 /* LEDs */
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 2a318eba1b07..3f35293d457a 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -19,6 +19,7 @@
19#include <linux/amba/bus.h> 19#include <linux/amba/bus.h>
20#include <linux/amba/kmi.h> 20#include <linux/amba/kmi.h>
21#include <linux/amba/clcd.h> 21#include <linux/amba/clcd.h>
22#include <linux/amba/mmci.h>
22#include <linux/io.h> 23#include <linux/io.h>
23 24
24#include <asm/clkdev.h> 25#include <asm/clkdev.h>
@@ -35,7 +36,6 @@
35#include <asm/mach/arch.h> 36#include <asm/mach/arch.h>
36#include <asm/mach/flash.h> 37#include <asm/mach/flash.h>
37#include <asm/mach/irq.h> 38#include <asm/mach/irq.h>
38#include <asm/mach/mmc.h>
39#include <asm/mach/map.h> 39#include <asm/mach/map.h>
40#include <asm/mach/time.h> 40#include <asm/mach/time.h>
41 41
@@ -400,7 +400,7 @@ static unsigned int mmc_status(struct device *dev)
400 return status & 8; 400 return status & 8;
401} 401}
402 402
403static struct mmc_platform_data mmc_data = { 403static struct mmci_platform_data mmc_data = {
404 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 404 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
405 .status = mmc_status, 405 .status = mmc_status,
406 .gpio_wp = -1, 406 .gpio_wp = -1,
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 5722e86f2174..6d3782d85a9f 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void)
150 return 16; 150 return 16;
151} 151}
152 152
153#define iop_adma_get_max_pq iop_adma_get_max_xor
154
153static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) 155static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
154{ 156{
155 return __raw_readl(ADMA_ADAR(chan)); 157 return __raw_readl(ADMA_ADAR(chan));
@@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
211#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 213#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
212#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 214#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
213#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 215#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
216#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
214#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) 217#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
218#define iop_chan_pq_slot_count iop_chan_xor_slot_count
219#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
215 220
216static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, 221static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
217 struct iop_adma_chan *chan) 222 struct iop_adma_chan *chan)
@@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
220 return hw_desc->dest_addr; 225 return hw_desc->dest_addr;
221} 226}
222 227
228static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
229 struct iop_adma_chan *chan)
230{
231 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
232 return hw_desc->q_dest_addr;
233}
234
223static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, 235static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
224 struct iop_adma_chan *chan) 236 struct iop_adma_chan *chan)
225{ 237{
@@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
319 return 1; 331 return 1;
320} 332}
321 333
334static inline void
335iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
336 unsigned long flags)
337{
338 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
339 union {
340 u32 value;
341 struct iop13xx_adma_desc_ctrl field;
342 } u_desc_ctrl;
343
344 u_desc_ctrl.value = 0;
345 u_desc_ctrl.field.src_select = src_cnt - 1;
346 u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
347 u_desc_ctrl.field.pq_xfer_en = 1;
348 u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
349 u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
350 hw_desc->desc_ctrl = u_desc_ctrl.value;
351}
352
353static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
354{
355 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
356 union {
357 u32 value;
358 struct iop13xx_adma_desc_ctrl field;
359 } u_desc_ctrl;
360
361 u_desc_ctrl.value = hw_desc->desc_ctrl;
362 return u_desc_ctrl.field.pq_xfer_en;
363}
364
365static inline void
366iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
367 unsigned long flags)
368{
369 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
370 union {
371 u32 value;
372 struct iop13xx_adma_desc_ctrl field;
373 } u_desc_ctrl;
374
375 u_desc_ctrl.value = 0;
376 u_desc_ctrl.field.src_select = src_cnt - 1;
377 u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
378 u_desc_ctrl.field.zero_result = 1;
379 u_desc_ctrl.field.status_write_back_en = 1;
380 u_desc_ctrl.field.pq_xfer_en = 1;
381 u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
382 u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
383 hw_desc->desc_ctrl = u_desc_ctrl.value;
384}
385
322static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, 386static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
323 struct iop_adma_chan *chan, 387 struct iop_adma_chan *chan,
324 u32 byte_count) 388 u32 byte_count)
@@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
351 } 415 }
352} 416}
353 417
418#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count
354 419
355static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, 420static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
356 struct iop_adma_chan *chan, 421 struct iop_adma_chan *chan,
@@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
361 hw_desc->upper_dest_addr = 0; 426 hw_desc->upper_dest_addr = 0;
362} 427}
363 428
429static inline void
430iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
431{
432 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
433
434 hw_desc->dest_addr = addr[0];
435 hw_desc->q_dest_addr = addr[1];
436 hw_desc->upper_dest_addr = 0;
437}
438
364static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, 439static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
365 dma_addr_t addr) 440 dma_addr_t addr)
366{ 441{
@@ -389,6 +464,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
389} 464}
390 465
391static inline void 466static inline void
467iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
468 dma_addr_t addr, unsigned char coef)
469{
470 int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
471 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
472 struct iop13xx_adma_src *src;
473 int i = 0;
474
475 do {
476 iter = iop_hw_desc_slot_idx(hw_desc, i);
477 src = &iter->src[src_idx];
478 src->src_addr = addr;
479 src->pq_upper_src_addr = 0;
480 src->pq_dmlt = coef;
481 slot_cnt -= slots_per_op;
482 if (slot_cnt) {
483 i += slots_per_op;
484 addr += IOP_ADMA_PQ_MAX_BYTE_COUNT;
485 }
486 } while (slot_cnt);
487}
488
489static inline void
392iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, 490iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
393 struct iop_adma_chan *chan) 491 struct iop_adma_chan *chan)
394{ 492{
@@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
399} 497}
400 498
401#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr 499#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
500#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
501
502static inline void
503iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
504 dma_addr_t *src)
505{
506 iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]);
507 iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]);
508}
402 509
403static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, 510static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
404 u32 next_desc_addr) 511 u32 next_desc_addr)
@@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
428 hw_desc->block_fill_data = val; 535 hw_desc->block_fill_data = val;
429} 536}
430 537
431static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) 538static inline enum sum_check_flags
539iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
432{ 540{
433 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 541 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
434 struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; 542 struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
435 struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; 543 struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
544 enum sum_check_flags flags;
436 545
437 BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); 546 BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
438 547
439 if (desc_ctrl.pq_xfer_en) 548 flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
440 return byte_count.zero_result_err_q; 549 flags |= byte_count.zero_result_err << SUM_CHECK_P;
441 else 550
442 return byte_count.zero_result_err; 551 return flags;
443} 552}
444 553
445static inline void iop_chan_append(struct iop_adma_chan *chan) 554static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index bee42c609df6..5c147fb66a01 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
477 plat_data = &iop13xx_adma_0_data; 477 plat_data = &iop13xx_adma_0_data;
478 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 478 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
479 dma_cap_set(DMA_XOR, plat_data->cap_mask); 479 dma_cap_set(DMA_XOR, plat_data->cap_mask);
480 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 480 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
481 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
482 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 481 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
483 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
484 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 482 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
485 break; 483 break;
486 case IOP13XX_INIT_ADMA_1: 484 case IOP13XX_INIT_ADMA_1:
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
489 plat_data = &iop13xx_adma_1_data; 487 plat_data = &iop13xx_adma_1_data;
490 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 488 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
491 dma_cap_set(DMA_XOR, plat_data->cap_mask); 489 dma_cap_set(DMA_XOR, plat_data->cap_mask);
492 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 490 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
493 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
494 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 491 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
495 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
496 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 492 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
497 break; 493 break;
498 case IOP13XX_INIT_ADMA_2: 494 case IOP13XX_INIT_ADMA_2:
@@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void)
501 plat_data = &iop13xx_adma_2_data; 497 plat_data = &iop13xx_adma_2_data;
502 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 498 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
503 dma_cap_set(DMA_XOR, plat_data->cap_mask); 499 dma_cap_set(DMA_XOR, plat_data->cap_mask);
504 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 500 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
505 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
506 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 501 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
507 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
508 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 502 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
509 dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); 503 dma_cap_set(DMA_PQ, plat_data->cap_mask);
510 dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); 504 dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
511 dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
512 break; 505 break;
513 } 506 }
514 } 507 }
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 89c992b8f75b..a6f8eab14ba5 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -21,6 +21,11 @@ config CPU_PXA930
21 21
22config CPU_PXA935 22config CPU_PXA935
23 bool "PXA935 (codename Tavor-P65)" 23 bool "PXA935 (codename Tavor-P65)"
24 select CPU_PXA930
25
26config CPU_PXA950
27 bool "PXA950 (codename Tavor-PV2)"
28 select CPU_PXA930
24 29
25endmenu 30endmenu
26 31
@@ -79,6 +84,12 @@ config MACH_MP900C
79 bool "Nec Mobilepro 900/c" 84 bool "Nec Mobilepro 900/c"
80 select PXA25x 85 select PXA25x
81 86
87config MACH_BALLOON3
88 bool "Balloon 3 board"
89 select PXA27x
90 select IWMMXT
91 select PXA_HAVE_BOARD_IRQS
92
82config ARCH_PXA_IDP 93config ARCH_PXA_IDP
83 bool "Accelent Xscale IDP" 94 bool "Accelent Xscale IDP"
84 select PXA25x 95 select PXA25x
@@ -371,6 +382,15 @@ config MACH_PALMTE2
371 Say Y here if you intend to run this kernel on a Palm Tungsten|E2 382 Say Y here if you intend to run this kernel on a Palm Tungsten|E2
372 handheld computer. 383 handheld computer.
373 384
385config MACH_PALMTC
386 bool "Palm Tungsten|C"
387 default y
388 depends on ARCH_PXA_PALM
389 select PXA25x
390 help
391 Say Y here if you intend to run this kernel on a Palm Tungsten|C
392 handheld computer.
393
374config MACH_PALMT5 394config MACH_PALMT5
375 bool "Palm Tungsten|T5" 395 bool "Palm Tungsten|T5"
376 default y 396 default y
@@ -458,6 +478,7 @@ config PXA_EZX
458 select PXA27x 478 select PXA27x
459 select IWMMXT 479 select IWMMXT
460 select HAVE_PWM 480 select HAVE_PWM
481 select PXA_HAVE_BOARD_IRQS
461 482
462config MACH_EZX_A780 483config MACH_EZX_A780
463 bool "Motorola EZX A780" 484 bool "Motorola EZX A780"
@@ -489,6 +510,21 @@ config MACH_EZX_E2
489 default y 510 default y
490 depends on PXA_EZX 511 depends on PXA_EZX
491 512
513config MACH_XCEP
514 bool "Iskratel Electronics XCEP"
515 select PXA25x
516 select MTD
517 select MTD_PARTITIONS
518 select MTD_PHYSMAP
519 select MTD_CFI_INTELEXT
520 select MTD_CFI
521 select MTD_CHAR
522 select SMC91X
523 select PXA_SSP
524 help
525 PXA255 based Single Board Computer with SMC 91C111 ethernet chip and 64 MB of flash.
526 Tuned for usage in Libera instruments for particle accelerators.
527
492endmenu 528endmenu
493 529
494config PXA25x 530config PXA25x
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index d4c6122a342f..f10e152bfc27 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_GUMSTIX_AM300EPD) += am300epd.o
31obj-$(CONFIG_ARCH_LUBBOCK) += lubbock.o 31obj-$(CONFIG_ARCH_LUBBOCK) += lubbock.o
32obj-$(CONFIG_MACH_LOGICPD_PXA270) += lpd270.o 32obj-$(CONFIG_MACH_LOGICPD_PXA270) += lpd270.o
33obj-$(CONFIG_MACH_MAINSTONE) += mainstone.o 33obj-$(CONFIG_MACH_MAINSTONE) += mainstone.o
34obj-$(CONFIG_MACH_BALLOON3) += balloon3.o
34obj-$(CONFIG_MACH_MP900C) += mp900.o 35obj-$(CONFIG_MACH_MP900C) += mp900.o
35obj-$(CONFIG_ARCH_PXA_IDP) += idp.o 36obj-$(CONFIG_ARCH_PXA_IDP) += idp.o
36obj-$(CONFIG_MACH_TRIZEPS4) += trizeps4.o 37obj-$(CONFIG_MACH_TRIZEPS4) += trizeps4.o
@@ -58,6 +59,7 @@ obj-$(CONFIG_MACH_E750) += e750.o
58obj-$(CONFIG_MACH_E400) += e400.o 59obj-$(CONFIG_MACH_E400) += e400.o
59obj-$(CONFIG_MACH_E800) += e800.o 60obj-$(CONFIG_MACH_E800) += e800.o
60obj-$(CONFIG_MACH_PALMTE2) += palmte2.o 61obj-$(CONFIG_MACH_PALMTE2) += palmte2.o
62obj-$(CONFIG_MACH_PALMTC) += palmtc.o
61obj-$(CONFIG_MACH_PALMT5) += palmt5.o 63obj-$(CONFIG_MACH_PALMT5) += palmt5.o
62obj-$(CONFIG_MACH_PALMTX) += palmtx.o 64obj-$(CONFIG_MACH_PALMTX) += palmtx.o
63obj-$(CONFIG_MACH_PALMLD) += palmld.o 65obj-$(CONFIG_MACH_PALMLD) += palmld.o
@@ -78,6 +80,8 @@ obj-$(CONFIG_MACH_ARMCORE) += cm-x2xx.o cm-x255.o cm-x270.o
78obj-$(CONFIG_MACH_CM_X300) += cm-x300.o 80obj-$(CONFIG_MACH_CM_X300) += cm-x300.o
79obj-$(CONFIG_PXA_EZX) += ezx.o 81obj-$(CONFIG_PXA_EZX) += ezx.o
80 82
83obj-$(CONFIG_MACH_XCEP) += xcep.o
84
81obj-$(CONFIG_MACH_INTELMOTE2) += imote2.o 85obj-$(CONFIG_MACH_INTELMOTE2) += imote2.o
82obj-$(CONFIG_MACH_STARGATE2) += stargate2.o 86obj-$(CONFIG_MACH_STARGATE2) += stargate2.o
83obj-$(CONFIG_MACH_CSB726) += csb726.o 87obj-$(CONFIG_MACH_CSB726) += csb726.o
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
new file mode 100644
index 000000000000..f23138b8fca3
--- /dev/null
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -0,0 +1,361 @@
1/*
2 * linux/arch/arm/mach-pxa/balloon3.c
3 *
4 * Support for Balloonboard.org Balloon3 board.
5 *
6 * Author: Nick Bane, Wookey, Jonathan McDowell
7 * Created: June, 2006
8 * Copyright: Toby Churchill Ltd
9 * Derived from mainstone.c, by Nico Pitre
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <linux/init.h>
17#include <linux/platform_device.h>
18#include <linux/sysdev.h>
19#include <linux/interrupt.h>
20#include <linux/sched.h>
21#include <linux/bitops.h>
22#include <linux/fb.h>
23#include <linux/gpio.h>
24#include <linux/ioport.h>
25#include <linux/mtd/mtd.h>
26#include <linux/mtd/partitions.h>
27#include <linux/types.h>
28
29#include <asm/setup.h>
30#include <asm/mach-types.h>
31#include <asm/irq.h>
32#include <asm/sizes.h>
33
34#include <asm/mach/arch.h>
35#include <asm/mach/map.h>
36#include <asm/mach/irq.h>
37#include <asm/mach/flash.h>
38
39#include <mach/pxa27x.h>
40#include <mach/balloon3.h>
41#include <mach/audio.h>
42#include <mach/pxafb.h>
43#include <mach/mmc.h>
44#include <mach/udc.h>
45#include <mach/pxa27x-udc.h>
46#include <mach/irda.h>
47#include <mach/ohci.h>
48
49#include <plat/i2c.h>
50
51#include "generic.h"
52#include "devices.h"
53
54static unsigned long balloon3_irq_enabled;
55
56static unsigned long balloon3_features_present =
57 (1 << BALLOON3_FEATURE_OHCI) | (1 << BALLOON3_FEATURE_CF) |
58 (1 << BALLOON3_FEATURE_AUDIO) |
59 (1 << BALLOON3_FEATURE_TOPPOLY);
60
61int balloon3_has(enum balloon3_features feature)
62{
63 return (balloon3_features_present & (1 << feature)) ? 1 : 0;
64}
65EXPORT_SYMBOL_GPL(balloon3_has);
66
67int __init parse_balloon3_features(char *arg)
68{
69 if (!arg)
70 return 0;
71
72 return strict_strtoul(arg, 0, &balloon3_features_present);
73}
74early_param("balloon3_features", parse_balloon3_features);
75
76static void balloon3_mask_irq(unsigned int irq)
77{
78 int balloon3_irq = (irq - BALLOON3_IRQ(0));
79 balloon3_irq_enabled &= ~(1 << balloon3_irq);
80 __raw_writel(~balloon3_irq_enabled, BALLOON3_INT_CONTROL_REG);
81}
82
83static void balloon3_unmask_irq(unsigned int irq)
84{
85 int balloon3_irq = (irq - BALLOON3_IRQ(0));
86 balloon3_irq_enabled |= (1 << balloon3_irq);
87 __raw_writel(~balloon3_irq_enabled, BALLOON3_INT_CONTROL_REG);
88}
89
90static struct irq_chip balloon3_irq_chip = {
91 .name = "FPGA",
92 .ack = balloon3_mask_irq,
93 .mask = balloon3_mask_irq,
94 .unmask = balloon3_unmask_irq,
95};
96
97static void balloon3_irq_handler(unsigned int irq, struct irq_desc *desc)
98{
99 unsigned long pending = __raw_readl(BALLOON3_INT_CONTROL_REG) &
100 balloon3_irq_enabled;
101
102 do {
103 /* clear useless edge notification */
104 if (desc->chip->ack)
105 desc->chip->ack(BALLOON3_AUX_NIRQ);
106 while (pending) {
107 irq = BALLOON3_IRQ(0) + __ffs(pending);
108 generic_handle_irq(irq);
109 pending &= pending - 1;
110 }
111 pending = __raw_readl(BALLOON3_INT_CONTROL_REG) &
112 balloon3_irq_enabled;
113 } while (pending);
114}
115
116static void __init balloon3_init_irq(void)
117{
118 int irq;
119
120 pxa27x_init_irq();
121 /* setup extra Balloon3 irqs */
122 for (irq = BALLOON3_IRQ(0); irq <= BALLOON3_IRQ(7); irq++) {
123 set_irq_chip(irq, &balloon3_irq_chip);
124 set_irq_handler(irq, handle_level_irq);
125 set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
126 }
127
128 set_irq_chained_handler(BALLOON3_AUX_NIRQ, balloon3_irq_handler);
129 set_irq_type(BALLOON3_AUX_NIRQ, IRQ_TYPE_EDGE_FALLING);
130
131 pr_debug("%s: chained handler installed - irq %d automatically "
132 "enabled\n", __func__, BALLOON3_AUX_NIRQ);
133}
134
135static void balloon3_backlight_power(int on)
136{
137 pr_debug("%s: power is %s\n", __func__, on ? "on" : "off");
138 gpio_set_value(BALLOON3_GPIO_RUN_BACKLIGHT, on);
139}
140
141static unsigned long balloon3_lcd_pin_config[] = {
142 /* LCD - 16bpp Active TFT */
143 GPIO58_LCD_LDD_0,
144 GPIO59_LCD_LDD_1,
145 GPIO60_LCD_LDD_2,
146 GPIO61_LCD_LDD_3,
147 GPIO62_LCD_LDD_4,
148 GPIO63_LCD_LDD_5,
149 GPIO64_LCD_LDD_6,
150 GPIO65_LCD_LDD_7,
151 GPIO66_LCD_LDD_8,
152 GPIO67_LCD_LDD_9,
153 GPIO68_LCD_LDD_10,
154 GPIO69_LCD_LDD_11,
155 GPIO70_LCD_LDD_12,
156 GPIO71_LCD_LDD_13,
157 GPIO72_LCD_LDD_14,
158 GPIO73_LCD_LDD_15,
159 GPIO74_LCD_FCLK,
160 GPIO75_LCD_LCLK,
161 GPIO76_LCD_PCLK,
162 GPIO77_LCD_BIAS,
163
164 GPIO99_GPIO, /* Backlight */
165};
166
167static struct pxafb_mode_info balloon3_lcd_modes[] = {
168 {
169 .pixclock = 38000,
170 .xres = 480,
171 .yres = 640,
172 .bpp = 16,
173 .hsync_len = 8,
174 .left_margin = 8,
175 .right_margin = 8,
176 .vsync_len = 2,
177 .upper_margin = 4,
178 .lower_margin = 5,
179 .sync = 0,
180 },
181};
182
183static struct pxafb_mach_info balloon3_pxafb_info = {
184 .modes = balloon3_lcd_modes,
185 .num_modes = ARRAY_SIZE(balloon3_lcd_modes),
186 .lcd_conn = LCD_COLOR_TFT_16BPP | LCD_PCLK_EDGE_FALL,
187 .pxafb_backlight_power = balloon3_backlight_power,
188};
189
190static unsigned long balloon3_mmc_pin_config[] = {
191 GPIO32_MMC_CLK,
192 GPIO92_MMC_DAT_0,
193 GPIO109_MMC_DAT_1,
194 GPIO110_MMC_DAT_2,
195 GPIO111_MMC_DAT_3,
196 GPIO112_MMC_CMD,
197};
198
199static void balloon3_mci_setpower(struct device *dev, unsigned int vdd)
200{
201 struct pxamci_platform_data *p_d = dev->platform_data;
202
203 if ((1 << vdd) & p_d->ocr_mask) {
204 pr_debug("%s: on\n", __func__);
205 /* FIXME something to prod here? */
206 } else {
207 pr_debug("%s: off\n", __func__);
208 /* FIXME something to prod here? */
209 }
210}
211
212static struct pxamci_platform_data balloon3_mci_platform_data = {
213 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
214 .setpower = balloon3_mci_setpower,
215};
216
217static int balloon3_udc_is_connected(void)
218{
219 pr_debug("%s: udc connected\n", __func__);
220 return 1;
221}
222
223static void balloon3_udc_command(int cmd)
224{
225 switch (cmd) {
226 case PXA2XX_UDC_CMD_CONNECT:
227 UP2OCR |= (UP2OCR_DPPUE + UP2OCR_DPPUBE);
228 pr_debug("%s: connect\n", __func__);
229 break;
230 case PXA2XX_UDC_CMD_DISCONNECT:
231 UP2OCR &= ~UP2OCR_DPPUE;
232 pr_debug("%s: disconnect\n", __func__);
233 break;
234 }
235}
236
237static struct pxa2xx_udc_mach_info balloon3_udc_info = {
238 .udc_is_connected = balloon3_udc_is_connected,
239 .udc_command = balloon3_udc_command,
240};
241
242static struct pxaficp_platform_data balloon3_ficp_platform_data = {
243 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF,
244};
245
246static unsigned long balloon3_ohci_pin_config[] = {
247 GPIO88_USBH1_PWR,
248 GPIO89_USBH1_PEN,
249};
250
251static struct pxaohci_platform_data balloon3_ohci_platform_data = {
252 .port_mode = PMM_PERPORT_MODE,
253 .flags = ENABLE_PORT_ALL | POWER_CONTROL_LOW | POWER_SENSE_LOW,
254};
255
256static unsigned long balloon3_pin_config[] __initdata = {
257 /* Select BTUART 'COM1/ttyS0' as IO option for pins 42/43/44/45 */
258 GPIO42_BTUART_RXD,
259 GPIO43_BTUART_TXD,
260 GPIO44_BTUART_CTS,
261 GPIO45_BTUART_RTS,
262
263 /* Wakeup GPIO */
264 GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
265
266 /* NAND & IDLE LED GPIOs */
267 GPIO9_GPIO,
268 GPIO10_GPIO,
269};
270
271static struct gpio_led balloon3_gpio_leds[] = {
272 {
273 .name = "balloon3:green:idle",
274 .default_trigger = "heartbeat",
275 .gpio = BALLOON3_GPIO_LED_IDLE,
276 .active_low = 1,
277 },
278 {
279 .name = "balloon3:green:nand",
280 .default_trigger = "nand-disk",
281 .gpio = BALLOON3_GPIO_LED_NAND,
282 .active_low = 1,
283 },
284};
285
286static struct gpio_led_platform_data balloon3_gpio_leds_platform_data = {
287 .leds = balloon3_gpio_leds,
288 .num_leds = ARRAY_SIZE(balloon3_gpio_leds),
289};
290
291static struct platform_device balloon3led_device = {
292 .name = "leds-gpio",
293 .id = -1,
294 .dev = {
295 .platform_data = &balloon3_gpio_leds_platform_data,
296 },
297};
298
299static void __init balloon3_init(void)
300{
301 pr_info("Initialising Balloon3\n");
302
303 /* system bus arbiter setting
304 * - Core_Park
305 * - LCD_wt:DMA_wt:CORE_Wt = 2:3:4
306 */
307 ARB_CNTRL = ARB_CORE_PARK | 0x234;
308
309 pxa_set_i2c_info(NULL);
310 if (balloon3_has(BALLOON3_FEATURE_AUDIO))
311 pxa_set_ac97_info(NULL);
312
313 if (balloon3_has(BALLOON3_FEATURE_TOPPOLY)) {
314 pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_lcd_pin_config));
315 gpio_request(BALLOON3_GPIO_RUN_BACKLIGHT,
316 "LCD Backlight Power");
317 gpio_direction_output(BALLOON3_GPIO_RUN_BACKLIGHT, 1);
318 set_pxa_fb_info(&balloon3_pxafb_info);
319 }
320
321 if (balloon3_has(BALLOON3_FEATURE_MMC)) {
322 pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_mmc_pin_config));
323 pxa_set_mci_info(&balloon3_mci_platform_data);
324 }
325 pxa_set_ficp_info(&balloon3_ficp_platform_data);
326 if (balloon3_has(BALLOON3_FEATURE_OHCI)) {
327 pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_ohci_pin_config));
328 pxa_set_ohci_info(&balloon3_ohci_platform_data);
329 }
330 pxa_set_udc_info(&balloon3_udc_info);
331
332 pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_pin_config));
333
334 platform_device_register(&balloon3led_device);
335}
336
337static struct map_desc balloon3_io_desc[] __initdata = {
338 { /* CPLD/FPGA */
339 .virtual = BALLOON3_FPGA_VIRT,
340 .pfn = __phys_to_pfn(BALLOON3_FPGA_PHYS),
341 .length = BALLOON3_FPGA_LENGTH,
342 .type = MT_DEVICE,
343 },
344};
345
346static void __init balloon3_map_io(void)
347{
348 pxa_map_io();
349 iotable_init(balloon3_io_desc, ARRAY_SIZE(balloon3_io_desc));
350}
351
352MACHINE_START(BALLOON3, "Balloon3")
353 /* Maintainer: Nick Bane. */
354 .phys_io = 0x40000000,
355 .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc,
356 .map_io = balloon3_map_io,
357 .init_irq = balloon3_init_irq,
358 .timer = &pxa_timer,
359 .init_machine = balloon3_init,
360 .boot_params = PHYS_OFFSET + 0x100,
361MACHINE_END
diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h
index 5599bceff738..978a3667e90d 100644
--- a/arch/arm/mach-pxa/clock.h
+++ b/arch/arm/mach-pxa/clock.h
@@ -12,7 +12,6 @@ struct clk {
12 unsigned int cken; 12 unsigned int cken;
13 unsigned int delay; 13 unsigned int delay;
14 unsigned int enabled; 14 unsigned int enabled;
15 struct clk *other;
16}; 15};
17 16
18#define INIT_CLKREG(_clk,_devname,_conname) \ 17#define INIT_CLKREG(_clk,_devname,_conname) \
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index 1d2cec25391d..eea78b6c2bc5 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -13,13 +13,18 @@
13#include <linux/sysdev.h> 13#include <linux/sysdev.h>
14#include <linux/irq.h> 14#include <linux/irq.h>
15#include <linux/gpio.h> 15#include <linux/gpio.h>
16#include <linux/delay.h>
16 17
17#include <linux/rtc-v3020.h> 18#include <linux/rtc-v3020.h>
18#include <video/mbxfb.h> 19#include <video/mbxfb.h>
19 20
21#include <linux/spi/spi.h>
22#include <linux/spi/libertas_spi.h>
23
20#include <mach/pxa27x.h> 24#include <mach/pxa27x.h>
21#include <mach/ohci.h> 25#include <mach/ohci.h>
22#include <mach/mmc.h> 26#include <mach/mmc.h>
27#include <mach/pxa2xx_spi.h>
23 28
24#include "generic.h" 29#include "generic.h"
25 30
@@ -34,6 +39,10 @@
34/* MMC power enable */ 39/* MMC power enable */
35#define GPIO105_MMC_POWER (105) 40#define GPIO105_MMC_POWER (105)
36 41
42/* WLAN GPIOS */
43#define GPIO19_WLAN_STRAP (19)
44#define GPIO102_WLAN_RST (102)
45
37static unsigned long cmx270_pin_config[] = { 46static unsigned long cmx270_pin_config[] = {
38 /* AC'97 */ 47 /* AC'97 */
39 GPIO28_AC97_BITCLK, 48 GPIO28_AC97_BITCLK,
@@ -94,8 +103,8 @@ static unsigned long cmx270_pin_config[] = {
94 GPIO26_SSP1_RXD, 103 GPIO26_SSP1_RXD,
95 104
96 /* SSP2 */ 105 /* SSP2 */
97 GPIO19_SSP2_SCLK, 106 GPIO19_GPIO, /* SSP2 clock is used as GPIO for Libertas pin-strap */
98 GPIO14_SSP2_SFRM, 107 GPIO14_GPIO,
99 GPIO87_SSP2_TXD, 108 GPIO87_SSP2_TXD,
100 GPIO88_SSP2_RXD, 109 GPIO88_SSP2_RXD,
101 110
@@ -123,6 +132,7 @@ static unsigned long cmx270_pin_config[] = {
123 GPIO0_GPIO | WAKEUP_ON_EDGE_BOTH, 132 GPIO0_GPIO | WAKEUP_ON_EDGE_BOTH,
124 GPIO105_GPIO | MFP_LPM_DRIVE_HIGH, /* MMC/SD power */ 133 GPIO105_GPIO | MFP_LPM_DRIVE_HIGH, /* MMC/SD power */
125 GPIO53_GPIO, /* PC card reset */ 134 GPIO53_GPIO, /* PC card reset */
135 GPIO102_GPIO, /* WLAN reset */
126 136
127 /* NAND controls */ 137 /* NAND controls */
128 GPIO11_GPIO | MFP_LPM_DRIVE_HIGH, /* NAND CE# */ 138 GPIO11_GPIO | MFP_LPM_DRIVE_HIGH, /* NAND CE# */
@@ -131,6 +141,7 @@ static unsigned long cmx270_pin_config[] = {
131 /* interrupts */ 141 /* interrupts */
132 GPIO10_GPIO, /* DM9000 interrupt */ 142 GPIO10_GPIO, /* DM9000 interrupt */
133 GPIO83_GPIO, /* MMC card detect */ 143 GPIO83_GPIO, /* MMC card detect */
144 GPIO95_GPIO, /* WLAN interrupt */
134}; 145};
135 146
136/* V3020 RTC */ 147/* V3020 RTC */
@@ -271,64 +282,114 @@ static inline void cmx270_init_ohci(void) {}
271#endif 282#endif
272 283
273#if defined(CONFIG_MMC) || defined(CONFIG_MMC_MODULE) 284#if defined(CONFIG_MMC) || defined(CONFIG_MMC_MODULE)
274static int cmx270_mci_init(struct device *dev, 285static struct pxamci_platform_data cmx270_mci_platform_data = {
275 irq_handler_t cmx270_detect_int, 286 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
276 void *data) 287 .gpio_card_detect = GPIO83_MMC_IRQ,
288 .gpio_card_ro = -1,
289 .gpio_power = GPIO105_MMC_POWER,
290 .gpio_power_invert = 1,
291};
292
293static void __init cmx270_init_mmc(void)
277{ 294{
278 int err; 295 pxa_set_mci_info(&cmx270_mci_platform_data);
296}
297#else
298static inline void cmx270_init_mmc(void) {}
299#endif
300
301#if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE)
302static struct pxa2xx_spi_master cm_x270_spi_info = {
303 .num_chipselect = 1,
304 .enable_dma = 1,
305};
306
307static struct pxa2xx_spi_chip cm_x270_libertas_chip = {
308 .rx_threshold = 1,
309 .tx_threshold = 1,
310 .timeout = 1000,
311 .gpio_cs = 14,
312};
313
314static unsigned long cm_x270_libertas_pin_config[] = {
315 /* SSP2 */
316 GPIO19_SSP2_SCLK,
317 GPIO14_GPIO,
318 GPIO87_SSP2_TXD,
319 GPIO88_SSP2_RXD,
320
321};
279 322
280 err = gpio_request(GPIO105_MMC_POWER, "MMC/SD power"); 323static int cm_x270_libertas_setup(struct spi_device *spi)
281 if (err) { 324{
282 dev_warn(dev, "power gpio unavailable\n"); 325 int err = gpio_request(GPIO19_WLAN_STRAP, "WLAN STRAP");
326 if (err)
283 return err; 327 return err;
284 }
285 328
286 gpio_direction_output(GPIO105_MMC_POWER, 0); 329 err = gpio_request(GPIO102_WLAN_RST, "WLAN RST");
330 if (err)
331 goto err_free_strap;
287 332
288 err = request_irq(CMX270_MMC_IRQ, cmx270_detect_int, 333 err = gpio_direction_output(GPIO102_WLAN_RST, 0);
289 IRQF_DISABLED | IRQF_TRIGGER_FALLING, 334 if (err)
290 "MMC card detect", data); 335 goto err_free_strap;
291 if (err) { 336 msleep(100);
292 gpio_free(GPIO105_MMC_POWER); 337
293 dev_err(dev, "cmx270_mci_init: MMC/SD: can't" 338 err = gpio_direction_output(GPIO19_WLAN_STRAP, 1);
294 " request MMC card detect IRQ\n"); 339 if (err)
295 } 340 goto err_free_strap;
341 msleep(100);
342
343 pxa2xx_mfp_config(ARRAY_AND_SIZE(cm_x270_libertas_pin_config));
344
345 gpio_set_value(GPIO102_WLAN_RST, 1);
346 msleep(100);
347
348 spi->bits_per_word = 16;
349 spi_setup(spi);
350
351 return 0;
352
353err_free_strap:
354 gpio_free(GPIO19_WLAN_STRAP);
296 355
297 return err; 356 return err;
298} 357}
299 358
300static void cmx270_mci_setpower(struct device *dev, unsigned int vdd) 359static int cm_x270_libertas_teardown(struct spi_device *spi)
301{ 360{
302 struct pxamci_platform_data *p_d = dev->platform_data; 361 gpio_set_value(GPIO102_WLAN_RST, 0);
303 362 gpio_free(GPIO102_WLAN_RST);
304 if ((1 << vdd) & p_d->ocr_mask) { 363 gpio_free(GPIO19_WLAN_STRAP);
305 dev_dbg(dev, "power on\n");
306 gpio_set_value(GPIO105_MMC_POWER, 0);
307 } else {
308 gpio_set_value(GPIO105_MMC_POWER, 1);
309 dev_dbg(dev, "power off\n");
310 }
311}
312 364
313static void cmx270_mci_exit(struct device *dev, void *data) 365 return 0;
314{
315 free_irq(CMX270_MMC_IRQ, data);
316 gpio_free(GPIO105_MMC_POWER);
317} 366}
318 367
319static struct pxamci_platform_data cmx270_mci_platform_data = { 368struct libertas_spi_platform_data cm_x270_libertas_pdata = {
320 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 369 .use_dummy_writes = 1,
321 .init = cmx270_mci_init, 370 .setup = cm_x270_libertas_setup,
322 .setpower = cmx270_mci_setpower, 371 .teardown = cm_x270_libertas_teardown,
323 .exit = cmx270_mci_exit,
324}; 372};
325 373
326static void __init cmx270_init_mmc(void) 374static struct spi_board_info cm_x270_spi_devices[] __initdata = {
375 {
376 .modalias = "libertas_spi",
377 .max_speed_hz = 13000000,
378 .bus_num = 2,
379 .irq = gpio_to_irq(95),
380 .chip_select = 0,
381 .controller_data = &cm_x270_libertas_chip,
382 .platform_data = &cm_x270_libertas_pdata,
383 },
384};
385
386static void __init cmx270_init_spi(void)
327{ 387{
328 pxa_set_mci_info(&cmx270_mci_platform_data); 388 pxa2xx_set_spi_info(2, &cm_x270_spi_info);
389 spi_register_board_info(ARRAY_AND_SIZE(cm_x270_spi_devices));
329} 390}
330#else 391#else
331static inline void cmx270_init_mmc(void) {} 392static inline void cmx270_init_spi(void) {}
332#endif 393#endif
333 394
334void __init cmx270_init(void) 395void __init cmx270_init(void)
@@ -343,4 +404,5 @@ void __init cmx270_init(void)
343 cmx270_init_mmc(); 404 cmx270_init_mmc();
344 cmx270_init_ohci(); 405 cmx270_init_ohci();
345 cmx270_init_2700G(); 406 cmx270_init_2700G();
407 cmx270_init_spi();
346} 408}
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 465da26591bd..aac2cda60e09 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -306,68 +306,21 @@ static void cm_x300_mci_exit(struct device *dev, void *data)
306} 306}
307 307
308static struct pxamci_platform_data cm_x300_mci_platform_data = { 308static struct pxamci_platform_data cm_x300_mci_platform_data = {
309 .detect_delay = 20, 309 .detect_delay = 20,
310 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 310 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
311 .init = cm_x300_mci_init, 311 .init = cm_x300_mci_init,
312 .exit = cm_x300_mci_exit, 312 .exit = cm_x300_mci_exit,
313 .gpio_card_detect = -1,
314 .gpio_card_ro = -1,
315 .gpio_power = -1,
313}; 316};
314 317
315static int cm_x300_mci2_ro(struct device *dev)
316{
317 return gpio_get_value(GPIO85_MMC2_WP);
318}
319
320static int cm_x300_mci2_init(struct device *dev,
321 irq_handler_t cm_x300_detect_int,
322 void *data)
323{
324 int err;
325
326 /*
327 * setup GPIO for CM-X300 MMC controller
328 */
329 err = gpio_request(GPIO82_MMC2_IRQ, "mmc card detect");
330 if (err)
331 goto err_request_cd;
332 gpio_direction_input(GPIO82_MMC2_IRQ);
333
334 err = gpio_request(GPIO85_MMC2_WP, "mmc write protect");
335 if (err)
336 goto err_request_wp;
337 gpio_direction_input(GPIO85_MMC2_WP);
338
339 err = request_irq(CM_X300_MMC2_IRQ, cm_x300_detect_int,
340 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
341 "MMC card detect", data);
342 if (err) {
343 printk(KERN_ERR "%s: MMC/SD/SDIO: "
344 "can't request card detect IRQ\n", __func__);
345 goto err_request_irq;
346 }
347
348 return 0;
349
350err_request_irq:
351 gpio_free(GPIO85_MMC2_WP);
352err_request_wp:
353 gpio_free(GPIO82_MMC2_IRQ);
354err_request_cd:
355 return err;
356}
357
358static void cm_x300_mci2_exit(struct device *dev, void *data)
359{
360 free_irq(CM_X300_MMC2_IRQ, data);
361 gpio_free(GPIO82_MMC2_IRQ);
362 gpio_free(GPIO85_MMC2_WP);
363}
364
365static struct pxamci_platform_data cm_x300_mci2_platform_data = { 318static struct pxamci_platform_data cm_x300_mci2_platform_data = {
366 .detect_delay = 20, 319 .detect_delay = 20,
367 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 320 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
368 .init = cm_x300_mci2_init, 321 .gpio_card_detect = GPIO82_MMC2_IRQ,
369 .exit = cm_x300_mci2_exit, 322 .gpio_card_ro = GPIO85_MMC2_WP,
370 .get_ro = cm_x300_mci2_ro, 323 .gpio_power = -1,
371}; 324};
372 325
373static void __init cm_x300_init_mmc(void) 326static void __init cm_x300_init_mmc(void)
diff --git a/arch/arm/mach-pxa/colibri-pxa300.c b/arch/arm/mach-pxa/colibri-pxa300.c
index 7c9c34c19ae2..37c239c56568 100644
--- a/arch/arm/mach-pxa/colibri-pxa300.c
+++ b/arch/arm/mach-pxa/colibri-pxa300.c
@@ -172,6 +172,7 @@ void __init colibri_pxa300_init(void)
172{ 172{
173 colibri_pxa300_init_eth(); 173 colibri_pxa300_init_eth();
174 colibri_pxa300_init_ohci(); 174 colibri_pxa300_init_ohci();
175 colibri_pxa3xx_init_nand();
175 colibri_pxa300_init_lcd(); 176 colibri_pxa300_init_lcd();
176 colibri_pxa3xx_init_lcd(mfp_to_gpio(GPIO39_GPIO)); 177 colibri_pxa3xx_init_lcd(mfp_to_gpio(GPIO39_GPIO));
177 colibri_pxa310_init_ac97(); 178 colibri_pxa310_init_ac97();
diff --git a/arch/arm/mach-pxa/colibri-pxa320.c b/arch/arm/mach-pxa/colibri-pxa320.c
index a18d37b3c5e6..494572825c7d 100644
--- a/arch/arm/mach-pxa/colibri-pxa320.c
+++ b/arch/arm/mach-pxa/colibri-pxa320.c
@@ -164,15 +164,48 @@ static inline void __init colibri_pxa320_init_ac97(void)
164static inline void colibri_pxa320_init_ac97(void) {} 164static inline void colibri_pxa320_init_ac97(void) {}
165#endif 165#endif
166 166
167/*
168 * The following configuration is verified to work with the Toradex Orchid
169 * carrier board
170 */
171static mfp_cfg_t colibri_pxa320_uart_pin_config[] __initdata = {
172 /* UART 1 configuration (may be set by bootloader) */
173 GPIO99_UART1_CTS,
174 GPIO104_UART1_RTS,
175 GPIO97_UART1_RXD,
176 GPIO98_UART1_TXD,
177 GPIO101_UART1_DTR,
178 GPIO103_UART1_DSR,
179 GPIO100_UART1_DCD,
180 GPIO102_UART1_RI,
181
182 /* UART 2 configuration */
183 GPIO109_UART2_CTS,
184 GPIO112_UART2_RTS,
185 GPIO110_UART2_RXD,
186 GPIO111_UART2_TXD,
187
188 /* UART 3 configuration */
189 GPIO30_UART3_RXD,
190 GPIO31_UART3_TXD,
191};
192
193static void __init colibri_pxa320_init_uart(void)
194{
195 pxa3xx_mfp_config(ARRAY_AND_SIZE(colibri_pxa320_uart_pin_config));
196}
197
167void __init colibri_pxa320_init(void) 198void __init colibri_pxa320_init(void)
168{ 199{
169 colibri_pxa320_init_eth(); 200 colibri_pxa320_init_eth();
170 colibri_pxa320_init_ohci(); 201 colibri_pxa320_init_ohci();
202 colibri_pxa3xx_init_nand();
171 colibri_pxa320_init_lcd(); 203 colibri_pxa320_init_lcd();
172 colibri_pxa3xx_init_lcd(mfp_to_gpio(GPIO49_GPIO)); 204 colibri_pxa3xx_init_lcd(mfp_to_gpio(GPIO49_GPIO));
173 colibri_pxa320_init_ac97(); 205 colibri_pxa320_init_ac97();
174 colibri_pxa3xx_init_mmc(ARRAY_AND_SIZE(colibri_pxa320_mmc_pin_config), 206 colibri_pxa3xx_init_mmc(ARRAY_AND_SIZE(colibri_pxa320_mmc_pin_config),
175 mfp_to_gpio(MFP_PIN_GPIO28)); 207 mfp_to_gpio(MFP_PIN_GPIO28));
208 colibri_pxa320_init_uart();
176} 209}
177 210
178MACHINE_START(COLIBRI320, "Toradex Colibri PXA320") 211MACHINE_START(COLIBRI320, "Toradex Colibri PXA320")
diff --git a/arch/arm/mach-pxa/colibri-pxa3xx.c b/arch/arm/mach-pxa/colibri-pxa3xx.c
index ea34e34f8cd8..efebaf4d734d 100644
--- a/arch/arm/mach-pxa/colibri-pxa3xx.c
+++ b/arch/arm/mach-pxa/colibri-pxa3xx.c
@@ -25,6 +25,7 @@
25#include <mach/colibri.h> 25#include <mach/colibri.h>
26#include <mach/mmc.h> 26#include <mach/mmc.h>
27#include <mach/pxafb.h> 27#include <mach/pxafb.h>
28#include <mach/pxa3xx_nand.h>
28 29
29#include "generic.h" 30#include "generic.h"
30#include "devices.h" 31#include "devices.h"
@@ -95,10 +96,13 @@ static void colibri_pxa3xx_mci_exit(struct device *dev, void *data)
95} 96}
96 97
97static struct pxamci_platform_data colibri_pxa3xx_mci_platform_data = { 98static struct pxamci_platform_data colibri_pxa3xx_mci_platform_data = {
98 .detect_delay = 20, 99 .detect_delay = 20,
99 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 100 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
100 .init = colibri_pxa3xx_mci_init, 101 .init = colibri_pxa3xx_mci_init,
101 .exit = colibri_pxa3xx_mci_exit, 102 .exit = colibri_pxa3xx_mci_exit,
103 .gpio_card_detect = -1,
104 .gpio_card_ro = -1,
105 .gpio_power = -1,
102}; 106};
103 107
104void __init colibri_pxa3xx_init_mmc(mfp_cfg_t *pins, int len, int detect_pin) 108void __init colibri_pxa3xx_init_mmc(mfp_cfg_t *pins, int len, int detect_pin)
@@ -154,3 +158,43 @@ void __init colibri_pxa3xx_init_lcd(int bl_pin)
154} 158}
155#endif 159#endif
156 160
161#if defined(CONFIG_MTD_NAND_PXA3xx) || defined(CONFIG_MTD_NAND_PXA3xx_MODULE)
162static struct mtd_partition colibri_nand_partitions[] = {
163 {
164 .name = "bootloader",
165 .offset = 0,
166 .size = SZ_512K,
167 .mask_flags = MTD_WRITEABLE, /* force read-only */
168 },
169 {
170 .name = "kernel",
171 .offset = MTDPART_OFS_APPEND,
172 .size = SZ_4M,
173 .mask_flags = MTD_WRITEABLE, /* force read-only */
174 },
175 {
176 .name = "reserved",
177 .offset = MTDPART_OFS_APPEND,
178 .size = SZ_1M,
179 .mask_flags = MTD_WRITEABLE, /* force read-only */
180 },
181 {
182 .name = "fs",
183 .offset = MTDPART_OFS_APPEND,
184 .size = MTDPART_SIZ_FULL,
185 },
186};
187
188static struct pxa3xx_nand_platform_data colibri_nand_info = {
189 .enable_arbiter = 1,
190 .keep_config = 1,
191 .parts = colibri_nand_partitions,
192 .nr_parts = ARRAY_SIZE(colibri_nand_partitions),
193};
194
195void __init colibri_pxa3xx_init_nand(void)
196{
197 pxa3xx_set_nand_info(&colibri_nand_info);
198}
199#endif
200
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 5363e1aea3fb..b536b5a5a10d 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -29,6 +29,7 @@
29#include <linux/spi/ads7846.h> 29#include <linux/spi/ads7846.h>
30#include <linux/spi/corgi_lcd.h> 30#include <linux/spi/corgi_lcd.h>
31#include <linux/mtd/sharpsl.h> 31#include <linux/mtd/sharpsl.h>
32#include <linux/input/matrix_keypad.h>
32#include <video/w100fb.h> 33#include <video/w100fb.h>
33 34
34#include <asm/setup.h> 35#include <asm/setup.h>
@@ -104,6 +105,28 @@ static unsigned long corgi_pin_config[] __initdata = {
104 GPIO6_MMC_CLK, 105 GPIO6_MMC_CLK,
105 GPIO8_MMC_CS0, 106 GPIO8_MMC_CS0,
106 107
108 /* GPIO Matrix Keypad */
109 GPIO66_GPIO, /* column 0 */
110 GPIO67_GPIO, /* column 1 */
111 GPIO68_GPIO, /* column 2 */
112 GPIO69_GPIO, /* column 3 */
113 GPIO70_GPIO, /* column 4 */
114 GPIO71_GPIO, /* column 5 */
115 GPIO72_GPIO, /* column 6 */
116 GPIO73_GPIO, /* column 7 */
117 GPIO74_GPIO, /* column 8 */
118 GPIO75_GPIO, /* column 9 */
119 GPIO76_GPIO, /* column 10 */
120 GPIO77_GPIO, /* column 11 */
121 GPIO58_GPIO, /* row 0 */
122 GPIO59_GPIO, /* row 1 */
123 GPIO60_GPIO, /* row 2 */
124 GPIO61_GPIO, /* row 3 */
125 GPIO62_GPIO, /* row 4 */
126 GPIO63_GPIO, /* row 5 */
127 GPIO64_GPIO, /* row 6 */
128 GPIO65_GPIO, /* row 7 */
129
107 /* GPIO */ 130 /* GPIO */
108 GPIO9_GPIO, /* CORGI_GPIO_nSD_DETECT */ 131 GPIO9_GPIO, /* CORGI_GPIO_nSD_DETECT */
109 GPIO7_GPIO, /* CORGI_GPIO_nSD_WP */ 132 GPIO7_GPIO, /* CORGI_GPIO_nSD_WP */
@@ -267,9 +290,115 @@ static struct platform_device corgifb_device = {
267/* 290/*
268 * Corgi Keyboard Device 291 * Corgi Keyboard Device
269 */ 292 */
293#define CORGI_KEY_CALENDER KEY_F1
294#define CORGI_KEY_ADDRESS KEY_F2
295#define CORGI_KEY_FN KEY_F3
296#define CORGI_KEY_CANCEL KEY_F4
297#define CORGI_KEY_OFF KEY_SUSPEND
298#define CORGI_KEY_EXOK KEY_F5
299#define CORGI_KEY_EXCANCEL KEY_F6
300#define CORGI_KEY_EXJOGDOWN KEY_F7
301#define CORGI_KEY_EXJOGUP KEY_F8
302#define CORGI_KEY_JAP1 KEY_LEFTCTRL
303#define CORGI_KEY_JAP2 KEY_LEFTALT
304#define CORGI_KEY_MAIL KEY_F10
305#define CORGI_KEY_OK KEY_F11
306#define CORGI_KEY_MENU KEY_F12
307
308static const uint32_t corgikbd_keymap[] = {
309 KEY(0, 1, KEY_1),
310 KEY(0, 2, KEY_3),
311 KEY(0, 3, KEY_5),
312 KEY(0, 4, KEY_6),
313 KEY(0, 5, KEY_7),
314 KEY(0, 6, KEY_9),
315 KEY(0, 7, KEY_0),
316 KEY(0, 8, KEY_BACKSPACE),
317 KEY(1, 1, KEY_2),
318 KEY(1, 2, KEY_4),
319 KEY(1, 3, KEY_R),
320 KEY(1, 4, KEY_Y),
321 KEY(1, 5, KEY_8),
322 KEY(1, 6, KEY_I),
323 KEY(1, 7, KEY_O),
324 KEY(1, 8, KEY_P),
325 KEY(2, 0, KEY_TAB),
326 KEY(2, 1, KEY_Q),
327 KEY(2, 2, KEY_E),
328 KEY(2, 3, KEY_T),
329 KEY(2, 4, KEY_G),
330 KEY(2, 5, KEY_U),
331 KEY(2, 6, KEY_J),
332 KEY(2, 7, KEY_K),
333 KEY(3, 0, CORGI_KEY_CALENDER),
334 KEY(3, 1, KEY_W),
335 KEY(3, 2, KEY_S),
336 KEY(3, 3, KEY_F),
337 KEY(3, 4, KEY_V),
338 KEY(3, 5, KEY_H),
339 KEY(3, 6, KEY_M),
340 KEY(3, 7, KEY_L),
341 KEY(3, 9, KEY_RIGHTSHIFT),
342 KEY(4, 0, CORGI_KEY_ADDRESS),
343 KEY(4, 1, KEY_A),
344 KEY(4, 2, KEY_D),
345 KEY(4, 3, KEY_C),
346 KEY(4, 4, KEY_B),
347 KEY(4, 5, KEY_N),
348 KEY(4, 6, KEY_DOT),
349 KEY(4, 8, KEY_ENTER),
350 KEY(4, 10, KEY_LEFTSHIFT),
351 KEY(5, 0, CORGI_KEY_MAIL),
352 KEY(5, 1, KEY_Z),
353 KEY(5, 2, KEY_X),
354 KEY(5, 3, KEY_MINUS),
355 KEY(5, 4, KEY_SPACE),
356 KEY(5, 5, KEY_COMMA),
357 KEY(5, 7, KEY_UP),
358 KEY(5, 11, CORGI_KEY_FN),
359 KEY(6, 0, KEY_SYSRQ),
360 KEY(6, 1, CORGI_KEY_JAP1),
361 KEY(6, 2, CORGI_KEY_JAP2),
362 KEY(6, 3, CORGI_KEY_CANCEL),
363 KEY(6, 4, CORGI_KEY_OK),
364 KEY(6, 5, CORGI_KEY_MENU),
365 KEY(6, 6, KEY_LEFT),
366 KEY(6, 7, KEY_DOWN),
367 KEY(6, 8, KEY_RIGHT),
368 KEY(7, 0, CORGI_KEY_OFF),
369 KEY(7, 1, CORGI_KEY_EXOK),
370 KEY(7, 2, CORGI_KEY_EXCANCEL),
371 KEY(7, 3, CORGI_KEY_EXJOGDOWN),
372 KEY(7, 4, CORGI_KEY_EXJOGUP),
373};
374
375static struct matrix_keymap_data corgikbd_keymap_data = {
376 .keymap = corgikbd_keymap,
377 .keymap_size = ARRAY_SIZE(corgikbd_keymap),
378};
379
380static const int corgikbd_row_gpios[] =
381 { 58, 59, 60, 61, 62, 63, 64, 65 };
382static const int corgikbd_col_gpios[] =
383 { 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77 };
384
385static struct matrix_keypad_platform_data corgikbd_pdata = {
386 .keymap_data = &corgikbd_keymap_data,
387 .row_gpios = corgikbd_row_gpios,
388 .col_gpios = corgikbd_col_gpios,
389 .num_row_gpios = ARRAY_SIZE(corgikbd_row_gpios),
390 .num_col_gpios = ARRAY_SIZE(corgikbd_col_gpios),
391 .col_scan_delay_us = 10,
392 .debounce_ms = 10,
393 .wakeup = 1,
394};
395
270static struct platform_device corgikbd_device = { 396static struct platform_device corgikbd_device = {
271 .name = "corgi-keyboard", 397 .name = "matrix-keypad",
272 .id = -1, 398 .id = -1,
399 .dev = {
400 .platform_data = &corgikbd_pdata,
401 },
273}; 402};
274 403
275/* 404/*
@@ -307,111 +436,20 @@ static struct platform_device corgiled_device = {
307 * The card detect interrupt isn't debounced so we delay it by 250ms 436 * The card detect interrupt isn't debounced so we delay it by 250ms
308 * to give the card a chance to fully insert/eject. 437 * to give the card a chance to fully insert/eject.
309 */ 438 */
310static struct pxamci_platform_data corgi_mci_platform_data;
311
312static int corgi_mci_init(struct device *dev, irq_handler_t corgi_detect_int, void *data)
313{
314 int err;
315
316 err = gpio_request(CORGI_GPIO_nSD_DETECT, "nSD_DETECT");
317 if (err)
318 goto err_out;
319
320 err = gpio_request(CORGI_GPIO_nSD_WP, "nSD_WP");
321 if (err)
322 goto err_free_1;
323
324 err = gpio_request(CORGI_GPIO_SD_PWR, "SD_PWR");
325 if (err)
326 goto err_free_2;
327
328 gpio_direction_input(CORGI_GPIO_nSD_DETECT);
329 gpio_direction_input(CORGI_GPIO_nSD_WP);
330 gpio_direction_output(CORGI_GPIO_SD_PWR, 0);
331
332 corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250);
333
334 err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int,
335 IRQF_DISABLED | IRQF_TRIGGER_RISING |
336 IRQF_TRIGGER_FALLING,
337 "MMC card detect", data);
338 if (err) {
339 pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n",
340 __func__);
341 goto err_free_3;
342 }
343 return 0;
344
345err_free_3:
346 gpio_free(CORGI_GPIO_SD_PWR);
347err_free_2:
348 gpio_free(CORGI_GPIO_nSD_WP);
349err_free_1:
350 gpio_free(CORGI_GPIO_nSD_DETECT);
351err_out:
352 return err;
353}
354
355static void corgi_mci_setpower(struct device *dev, unsigned int vdd)
356{
357 struct pxamci_platform_data* p_d = dev->platform_data;
358
359 gpio_set_value(CORGI_GPIO_SD_PWR, ((1 << vdd) & p_d->ocr_mask));
360}
361
362static int corgi_mci_get_ro(struct device *dev)
363{
364 return gpio_get_value(CORGI_GPIO_nSD_WP);
365}
366
367static void corgi_mci_exit(struct device *dev, void *data)
368{
369 free_irq(CORGI_IRQ_GPIO_nSD_DETECT, data);
370 gpio_free(CORGI_GPIO_SD_PWR);
371 gpio_free(CORGI_GPIO_nSD_WP);
372 gpio_free(CORGI_GPIO_nSD_DETECT);
373}
374
375static struct pxamci_platform_data corgi_mci_platform_data = { 439static struct pxamci_platform_data corgi_mci_platform_data = {
376 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 440 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
377 .init = corgi_mci_init, 441 .gpio_card_detect = -1,
378 .get_ro = corgi_mci_get_ro, 442 .gpio_card_ro = CORGI_GPIO_nSD_WP,
379 .setpower = corgi_mci_setpower, 443 .gpio_power = CORGI_GPIO_SD_PWR,
380 .exit = corgi_mci_exit,
381}; 444};
382 445
383 446
384/* 447/*
385 * Irda 448 * Irda
386 */ 449 */
387static void corgi_irda_transceiver_mode(struct device *dev, int mode)
388{
389 gpio_set_value(CORGI_GPIO_IR_ON, mode & IR_OFF);
390 pxa2xx_transceiver_mode(dev, mode);
391}
392
393static int corgi_irda_startup(struct device *dev)
394{
395 int err;
396
397 err = gpio_request(CORGI_GPIO_IR_ON, "IR_ON");
398 if (err)
399 return err;
400
401 gpio_direction_output(CORGI_GPIO_IR_ON, 1);
402 return 0;
403}
404
405static void corgi_irda_shutdown(struct device *dev)
406{
407 gpio_free(CORGI_GPIO_IR_ON);
408}
409
410static struct pxaficp_platform_data corgi_ficp_platform_data = { 450static struct pxaficp_platform_data corgi_ficp_platform_data = {
451 .gpio_pwdown = CORGI_GPIO_IR_ON,
411 .transceiver_cap = IR_SIRMODE | IR_OFF, 452 .transceiver_cap = IR_SIRMODE | IR_OFF,
412 .transceiver_mode = corgi_irda_transceiver_mode,
413 .startup = corgi_irda_startup,
414 .shutdown = corgi_irda_shutdown,
415}; 453};
416 454
417 455
@@ -636,6 +674,7 @@ static void __init corgi_init(void)
636 corgi_init_spi(); 674 corgi_init_spi();
637 675
638 pxa_set_udc_info(&udc_info); 676 pxa_set_udc_info(&udc_info);
677 corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250);
639 pxa_set_mci_info(&corgi_mci_platform_data); 678 pxa_set_mci_info(&corgi_mci_platform_data);
640 pxa_set_ficp_info(&corgi_ficp_platform_data); 679 pxa_set_ficp_info(&corgi_ficp_platform_data);
641 pxa_set_i2c_info(NULL); 680 pxa_set_i2c_info(NULL);
diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c
index 7d3e1b46e550..79141f862728 100644
--- a/arch/arm/mach-pxa/csb726.c
+++ b/arch/arm/mach-pxa/csb726.c
@@ -130,61 +130,17 @@ static struct pxamci_platform_data csb726_mci_data;
130static int csb726_mci_init(struct device *dev, 130static int csb726_mci_init(struct device *dev,
131 irq_handler_t detect, void *data) 131 irq_handler_t detect, void *data)
132{ 132{
133 int err;
134
135 csb726_mci_data.detect_delay = msecs_to_jiffies(500); 133 csb726_mci_data.detect_delay = msecs_to_jiffies(500);
136
137 err = gpio_request(CSB726_GPIO_MMC_DETECT, "MMC detect");
138 if (err)
139 goto err_det_req;
140
141 err = gpio_direction_input(CSB726_GPIO_MMC_DETECT);
142 if (err)
143 goto err_det_dir;
144
145 err = gpio_request(CSB726_GPIO_MMC_RO, "MMC ro");
146 if (err)
147 goto err_ro_req;
148
149 err = gpio_direction_input(CSB726_GPIO_MMC_RO);
150 if (err)
151 goto err_ro_dir;
152
153 err = request_irq(gpio_to_irq(CSB726_GPIO_MMC_DETECT), detect,
154 IRQF_DISABLED, "MMC card detect", data);
155 if (err)
156 goto err_irq;
157
158 return 0; 134 return 0;
159
160err_irq:
161err_ro_dir:
162 gpio_free(CSB726_GPIO_MMC_RO);
163err_ro_req:
164err_det_dir:
165 gpio_free(CSB726_GPIO_MMC_DETECT);
166err_det_req:
167 return err;
168}
169
170static int csb726_mci_get_ro(struct device *dev)
171{
172 return gpio_get_value(CSB726_GPIO_MMC_RO);
173}
174
175static void csb726_mci_exit(struct device *dev, void *data)
176{
177 free_irq(gpio_to_irq(CSB726_GPIO_MMC_DETECT), data);
178 gpio_free(CSB726_GPIO_MMC_RO);
179 gpio_free(CSB726_GPIO_MMC_DETECT);
180} 135}
181 136
182static struct pxamci_platform_data csb726_mci = { 137static struct pxamci_platform_data csb726_mci = {
183 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 138 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
184 .init = csb726_mci_init, 139 .init = csb726_mci_init,
185 .get_ro = csb726_mci_get_ro,
186 /* FIXME setpower */ 140 /* FIXME setpower */
187 .exit = csb726_mci_exit, 141 .gpio_card_detect = CSB726_GPIO_MMC_DETECT,
142 .gpio_card_ro = CSB726_GPIO_MMC_RO,
143 .gpio_power = -1,
188}; 144};
189 145
190static struct pxaohci_platform_data csb726_ohci_platform_data = { 146static struct pxaohci_platform_data csb726_ohci_platform_data = {
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index ecc08f360b68..46fabe1cca11 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -935,6 +935,33 @@ void __init pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info)
935{ 935{
936 pxa_register_device(&pxa3xx_device_nand, info); 936 pxa_register_device(&pxa3xx_device_nand, info);
937} 937}
938
939static struct resource pxa3xx_resources_gcu[] = {
940 {
941 .start = 0x54000000,
942 .end = 0x54000fff,
943 .flags = IORESOURCE_MEM,
944 },
945 {
946 .start = IRQ_GCU,
947 .end = IRQ_GCU,
948 .flags = IORESOURCE_IRQ,
949 },
950};
951
952static u64 pxa3xx_gcu_dmamask = DMA_BIT_MASK(32);
953
954struct platform_device pxa3xx_device_gcu = {
955 .name = "pxa3xx-gcu",
956 .id = -1,
957 .num_resources = ARRAY_SIZE(pxa3xx_resources_gcu),
958 .resource = pxa3xx_resources_gcu,
959 .dev = {
960 .dma_mask = &pxa3xx_gcu_dmamask,
961 .coherent_dma_mask = 0xffffffff,
962 },
963};
964
938#endif /* CONFIG_PXA3xx */ 965#endif /* CONFIG_PXA3xx */
939 966
940/* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1. 967/* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1.
diff --git a/arch/arm/mach-pxa/devices.h b/arch/arm/mach-pxa/devices.h
index ecc24a4dca6d..93817d99761e 100644
--- a/arch/arm/mach-pxa/devices.h
+++ b/arch/arm/mach-pxa/devices.h
@@ -35,4 +35,6 @@ extern struct platform_device pxa27x_device_pwm1;
35extern struct platform_device pxa3xx_device_nand; 35extern struct platform_device pxa3xx_device_nand;
36extern struct platform_device pxa3xx_device_i2c_power; 36extern struct platform_device pxa3xx_device_i2c_power;
37 37
38extern struct platform_device pxa3xx_device_gcu;
39
38void __init pxa_register_device(struct platform_device *dev, void *data); 40void __init pxa_register_device(struct platform_device *dev, void *data);
diff --git a/arch/arm/mach-pxa/e740.c b/arch/arm/mach-pxa/e740.c
index a36fc17f671d..49acdfa6650d 100644
--- a/arch/arm/mach-pxa/e740.c
+++ b/arch/arm/mach-pxa/e740.c
@@ -199,7 +199,6 @@ static void __init e740_init(void)
199 platform_add_devices(devices, ARRAY_SIZE(devices)); 199 platform_add_devices(devices, ARRAY_SIZE(devices));
200 pxa_set_udc_info(&e7xx_udc_mach_info); 200 pxa_set_udc_info(&e7xx_udc_mach_info);
201 pxa_set_ac97_info(NULL); 201 pxa_set_ac97_info(NULL);
202 e7xx_irda_init();
203 pxa_set_ficp_info(&e7xx_ficp_platform_data); 202 pxa_set_ficp_info(&e7xx_ficp_platform_data);
204} 203}
205 204
diff --git a/arch/arm/mach-pxa/e750.c b/arch/arm/mach-pxa/e750.c
index 1d00110590e5..4052ece3ef49 100644
--- a/arch/arm/mach-pxa/e750.c
+++ b/arch/arm/mach-pxa/e750.c
@@ -200,7 +200,6 @@ static void __init e750_init(void)
200 platform_add_devices(devices, ARRAY_SIZE(devices)); 200 platform_add_devices(devices, ARRAY_SIZE(devices));
201 pxa_set_udc_info(&e7xx_udc_mach_info); 201 pxa_set_udc_info(&e7xx_udc_mach_info);
202 pxa_set_ac97_info(NULL); 202 pxa_set_ac97_info(NULL);
203 e7xx_irda_init();
204 pxa_set_ficp_info(&e7xx_ficp_platform_data); 203 pxa_set_ficp_info(&e7xx_ficp_platform_data);
205} 204}
206 205
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 9cd09465a0e8..aec7f4214b14 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -646,13 +646,16 @@ static int em_x270_mci_get_ro(struct device *dev)
646} 646}
647 647
648static struct pxamci_platform_data em_x270_mci_platform_data = { 648static struct pxamci_platform_data em_x270_mci_platform_data = {
649 .ocr_mask = MMC_VDD_20_21|MMC_VDD_21_22|MMC_VDD_22_23| 649 .ocr_mask = MMC_VDD_20_21|MMC_VDD_21_22|MMC_VDD_22_23|
650 MMC_VDD_24_25|MMC_VDD_25_26|MMC_VDD_26_27| 650 MMC_VDD_24_25|MMC_VDD_25_26|MMC_VDD_26_27|
651 MMC_VDD_27_28|MMC_VDD_28_29|MMC_VDD_29_30| 651 MMC_VDD_27_28|MMC_VDD_28_29|MMC_VDD_29_30|
652 MMC_VDD_30_31|MMC_VDD_31_32, 652 MMC_VDD_30_31|MMC_VDD_31_32,
653 .init = em_x270_mci_init, 653 .init = em_x270_mci_init,
654 .setpower = em_x270_mci_setpower, 654 .setpower = em_x270_mci_setpower,
655 .exit = em_x270_mci_exit, 655 .exit = em_x270_mci_exit,
656 .gpio_card_detect = -1,
657 .gpio_card_ro = -1,
658 .gpio_power = -1,
656}; 659};
657 660
658static void __init em_x270_init_mmc(void) 661static void __init em_x270_init_mmc(void)
@@ -1022,22 +1025,32 @@ static int em_x270_sensor_power(struct device *dev, int on)
1022 return 0; 1025 return 0;
1023} 1026}
1024 1027
1025static struct soc_camera_link iclink = {
1026 .bus_id = 0,
1027 .power = em_x270_sensor_power,
1028};
1029
1030static struct i2c_board_info em_x270_i2c_cam_info[] = { 1028static struct i2c_board_info em_x270_i2c_cam_info[] = {
1031 { 1029 {
1032 I2C_BOARD_INFO("mt9m111", 0x48), 1030 I2C_BOARD_INFO("mt9m111", 0x48),
1031 },
1032};
1033
1034static struct soc_camera_link iclink = {
1035 .bus_id = 0,
1036 .power = em_x270_sensor_power,
1037 .board_info = &em_x270_i2c_cam_info[0],
1038 .i2c_adapter_id = 0,
1039 .module_name = "mt9m111",
1040};
1041
1042static struct platform_device em_x270_camera = {
1043 .name = "soc-camera-pdrv",
1044 .id = -1,
1045 .dev = {
1033 .platform_data = &iclink, 1046 .platform_data = &iclink,
1034 }, 1047 },
1035}; 1048};
1036 1049
1037static void __init em_x270_init_camera(void) 1050static void __init em_x270_init_camera(void)
1038{ 1051{
1039 i2c_register_board_info(0, ARRAY_AND_SIZE(em_x270_i2c_cam_info));
1040 pxa_set_camera_info(&em_x270_camera_platform_data); 1052 pxa_set_camera_info(&em_x270_camera_platform_data);
1053 platform_device_register(&em_x270_camera);
1041} 1054}
1042#else 1055#else
1043static inline void em_x270_init_camera(void) {} 1056static inline void em_x270_init_camera(void) {}
@@ -1103,6 +1116,7 @@ REGULATOR_CONSUMER(ldo5, NULL, "vcc cam");
1103REGULATOR_CONSUMER(ldo10, &pxa_device_mci.dev, "vcc sdio"); 1116REGULATOR_CONSUMER(ldo10, &pxa_device_mci.dev, "vcc sdio");
1104REGULATOR_CONSUMER(ldo12, NULL, "vcc usb"); 1117REGULATOR_CONSUMER(ldo12, NULL, "vcc usb");
1105REGULATOR_CONSUMER(ldo19, &em_x270_gprs_userspace_consumer.dev, "vcc gprs"); 1118REGULATOR_CONSUMER(ldo19, &em_x270_gprs_userspace_consumer.dev, "vcc gprs");
1119REGULATOR_CONSUMER(buck2, NULL, "vcc_core");
1106 1120
1107#define REGULATOR_INIT(_ldo, _min_uV, _max_uV, _ops_mask) \ 1121#define REGULATOR_INIT(_ldo, _min_uV, _max_uV, _ops_mask) \
1108 static struct regulator_init_data _ldo##_data = { \ 1122 static struct regulator_init_data _ldo##_data = { \
@@ -1125,6 +1139,7 @@ REGULATOR_INIT(ldo10, 2000000, 3200000,
1125 REGULATOR_CHANGE_STATUS | REGULATOR_CHANGE_VOLTAGE); 1139 REGULATOR_CHANGE_STATUS | REGULATOR_CHANGE_VOLTAGE);
1126REGULATOR_INIT(ldo12, 3000000, 3000000, REGULATOR_CHANGE_STATUS); 1140REGULATOR_INIT(ldo12, 3000000, 3000000, REGULATOR_CHANGE_STATUS);
1127REGULATOR_INIT(ldo19, 3200000, 3200000, REGULATOR_CHANGE_STATUS); 1141REGULATOR_INIT(ldo19, 3200000, 3200000, REGULATOR_CHANGE_STATUS);
1142REGULATOR_INIT(buck2, 1000000, 1650000, REGULATOR_CHANGE_VOLTAGE);
1128 1143
1129struct led_info em_x270_led_info = { 1144struct led_info em_x270_led_info = {
1130 .name = "em-x270:orange", 1145 .name = "em-x270:orange",
@@ -1194,6 +1209,8 @@ struct da903x_subdev_info em_x270_da9030_subdevs[] = {
1194 DA9030_LDO(12), 1209 DA9030_LDO(12),
1195 DA9030_LDO(19), 1210 DA9030_LDO(19),
1196 1211
1212 DA9030_SUBDEV(regulator, BUCK2, &buck2_data),
1213
1197 DA9030_SUBDEV(led, LED_PC, &em_x270_led_info), 1214 DA9030_SUBDEV(led, LED_PC, &em_x270_led_info),
1198 DA9030_SUBDEV(backlight, WLED, &em_x270_led_info), 1215 DA9030_SUBDEV(backlight, WLED, &em_x270_led_info),
1199 DA9030_SUBDEV(battery, BAT, &em_x270_batterty_info), 1216 DA9030_SUBDEV(battery, BAT, &em_x270_batterty_info),
@@ -1245,7 +1262,6 @@ static void __init em_x270_init_i2c(void)
1245 1262
1246static void __init em_x270_module_init(void) 1263static void __init em_x270_module_init(void)
1247{ 1264{
1248 pr_info("%s\n", __func__);
1249 pxa2xx_mfp_config(ARRAY_AND_SIZE(em_x270_pin_config)); 1265 pxa2xx_mfp_config(ARRAY_AND_SIZE(em_x270_pin_config));
1250 1266
1251 mmc_cd = GPIO13_MMC_CD; 1267 mmc_cd = GPIO13_MMC_CD;
@@ -1257,7 +1273,6 @@ static void __init em_x270_module_init(void)
1257 1273
1258static void __init em_x270_exeda_init(void) 1274static void __init em_x270_exeda_init(void)
1259{ 1275{
1260 pr_info("%s\n", __func__);
1261 pxa2xx_mfp_config(ARRAY_AND_SIZE(exeda_pin_config)); 1276 pxa2xx_mfp_config(ARRAY_AND_SIZE(exeda_pin_config));
1262 1277
1263 mmc_cd = GPIO114_MMC_CD; 1278 mmc_cd = GPIO114_MMC_CD;
diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c
index c60dadf847a6..91417f035069 100644
--- a/arch/arm/mach-pxa/eseries.c
+++ b/arch/arm/mach-pxa/eseries.c
@@ -47,44 +47,9 @@ struct pxa2xx_udc_mach_info e7xx_udc_mach_info = {
47 .gpio_pullup_inverted = 1 47 .gpio_pullup_inverted = 1
48}; 48};
49 49
50static void e7xx_irda_transceiver_mode(struct device *dev, int mode)
51{
52 if (mode & IR_OFF) {
53 gpio_set_value(GPIO_E7XX_IR_OFF, 1);
54 pxa2xx_transceiver_mode(dev, mode);
55 } else {
56 pxa2xx_transceiver_mode(dev, mode);
57 gpio_set_value(GPIO_E7XX_IR_OFF, 0);
58 }
59}
60
61int e7xx_irda_init(void)
62{
63 int ret;
64
65 ret = gpio_request(GPIO_E7XX_IR_OFF, "IrDA power");
66 if (ret)
67 goto out;
68
69 ret = gpio_direction_output(GPIO_E7XX_IR_OFF, 0);
70 if (ret)
71 goto out;
72
73 e7xx_irda_transceiver_mode(NULL, IR_SIRMODE | IR_OFF);
74out:
75 return ret;
76}
77
78static void e7xx_irda_shutdown(struct device *dev)
79{
80 e7xx_irda_transceiver_mode(dev, IR_SIRMODE | IR_OFF);
81 gpio_free(GPIO_E7XX_IR_OFF);
82}
83
84struct pxaficp_platform_data e7xx_ficp_platform_data = { 50struct pxaficp_platform_data e7xx_ficp_platform_data = {
85 .transceiver_cap = IR_SIRMODE | IR_OFF, 51 .gpio_pwdown = GPIO_E7XX_IR_OFF,
86 .transceiver_mode = e7xx_irda_transceiver_mode, 52 .transceiver_cap = IR_SIRMODE | IR_OFF,
87 .shutdown = e7xx_irda_shutdown,
88}; 53};
89 54
90int eseries_tmio_enable(struct platform_device *dev) 55int eseries_tmio_enable(struct platform_device *dev)
diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c
index ca9912ea78d9..1708c0109844 100644
--- a/arch/arm/mach-pxa/gumstix.c
+++ b/arch/arm/mach-pxa/gumstix.c
@@ -88,7 +88,10 @@ static struct platform_device *devices[] __initdata = {
88 88
89#ifdef CONFIG_MMC_PXA 89#ifdef CONFIG_MMC_PXA
90static struct pxamci_platform_data gumstix_mci_platform_data = { 90static struct pxamci_platform_data gumstix_mci_platform_data = {
91 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 91 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
92 .gpio_card_detect = -1,
93 .gpio_card_ro = -1,
94 .gpio_power = -1,
92}; 95};
93 96
94static void __init gumstix_mmc_init(void) 97static void __init gumstix_mmc_init(void)
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 81359d574f88..abff9e132749 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -45,6 +45,7 @@
45#include <mach/irda.h> 45#include <mach/irda.h>
46#include <mach/pxa2xx_spi.h> 46#include <mach/pxa2xx_spi.h>
47 47
48#include <video/platform_lcd.h>
48#include <video/w100fb.h> 49#include <video/w100fb.h>
49 50
50#include "devices.h" 51#include "devices.h"
@@ -174,14 +175,9 @@ static int hx4700_gpio_request(struct gpio_ress *gpios, int size)
174 * IRDA 175 * IRDA
175 */ 176 */
176 177
177static void irda_transceiver_mode(struct device *dev, int mode)
178{
179 gpio_set_value(GPIO105_HX4700_nIR_ON, mode & IR_OFF);
180}
181
182static struct pxaficp_platform_data ficp_info = { 178static struct pxaficp_platform_data ficp_info = {
183 .transceiver_cap = IR_SIRMODE | IR_OFF, 179 .gpio_pwdown = GPIO105_HX4700_nIR_ON,
184 .transceiver_mode = irda_transceiver_mode, 180 .transceiver_cap = IR_SIRMODE | IR_OFF,
185}; 181};
186 182
187/* 183/*
@@ -368,8 +364,6 @@ static struct platform_device egpio = {
368 * LCD - Sony display connected to ATI Imageon w3220 364 * LCD - Sony display connected to ATI Imageon w3220
369 */ 365 */
370 366
371static int lcd_power;
372
373static void sony_lcd_init(void) 367static void sony_lcd_init(void)
374{ 368{
375 gpio_set_value(GPIO84_HX4700_LCD_SQN, 1); 369 gpio_set_value(GPIO84_HX4700_LCD_SQN, 1);
@@ -410,35 +404,6 @@ static void sony_lcd_off(void)
410 gpio_set_value(GPIO110_HX4700_LCD_LVDD_3V3_ON, 0); 404 gpio_set_value(GPIO110_HX4700_LCD_LVDD_3V3_ON, 0);
411} 405}
412 406
413static int hx4700_lcd_set_power(struct lcd_device *ldev, int level)
414{
415 switch (level) {
416 case FB_BLANK_UNBLANK:
417 sony_lcd_init();
418 break;
419 case FB_BLANK_NORMAL:
420 case FB_BLANK_VSYNC_SUSPEND:
421 case FB_BLANK_HSYNC_SUSPEND:
422 case FB_BLANK_POWERDOWN:
423 sony_lcd_off();
424 break;
425 }
426 lcd_power = level;
427 return 0;
428}
429
430static int hx4700_lcd_get_power(struct lcd_device *lm)
431{
432 return lcd_power;
433}
434
435static struct lcd_ops hx4700_lcd_ops = {
436 .get_power = hx4700_lcd_get_power,
437 .set_power = hx4700_lcd_set_power,
438};
439
440static struct lcd_device *hx4700_lcd_device;
441
442#ifdef CONFIG_PM 407#ifdef CONFIG_PM
443static void w3220_lcd_suspend(struct w100fb_par *wfb) 408static void w3220_lcd_suspend(struct w100fb_par *wfb)
444{ 409{
@@ -573,6 +538,27 @@ static struct platform_device w3220 = {
573 .resource = w3220_resources, 538 .resource = w3220_resources,
574}; 539};
575 540
541static void hx4700_lcd_set_power(struct plat_lcd_data *pd, unsigned int power)
542{
543 if (power)
544 sony_lcd_init();
545 else
546 sony_lcd_off();
547}
548
549static struct plat_lcd_data hx4700_lcd_data = {
550 .set_power = hx4700_lcd_set_power,
551};
552
553static struct platform_device hx4700_lcd = {
554 .name = "platform-lcd",
555 .id = -1,
556 .dev = {
557 .platform_data = &hx4700_lcd_data,
558 .parent = &w3220.dev,
559 },
560};
561
576/* 562/*
577 * Backlight 563 * Backlight
578 */ 564 */
@@ -872,9 +858,6 @@ static void __init hx4700_init(void)
872 pxa2xx_set_spi_info(2, &pxa_ssp2_master_info); 858 pxa2xx_set_spi_info(2, &pxa_ssp2_master_info);
873 spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info)); 859 spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info));
874 860
875 hx4700_lcd_device = lcd_device_register("w100fb", NULL,
876 (void *)&w3220_info, &hx4700_lcd_ops);
877
878 gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 0); 861 gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 0);
879 mdelay(10); 862 mdelay(10);
880 gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 1); 863 gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 1);
diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index b6243b59d9be..b6486ef20b17 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -168,7 +168,10 @@ static struct pxafb_mach_info sharp_lm8v31 = {
168}; 168};
169 169
170static struct pxamci_platform_data idp_mci_platform_data = { 170static struct pxamci_platform_data idp_mci_platform_data = {
171 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 171 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
172 .gpio_card_detect = -1,
173 .gpio_card_ro = -1,
174 .gpio_power = -1,
172}; 175};
173 176
174static void __init idp_init(void) 177static void __init idp_init(void)
diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c
index 961807dc6467..2a4945db31c5 100644
--- a/arch/arm/mach-pxa/imote2.c
+++ b/arch/arm/mach-pxa/imote2.c
@@ -389,6 +389,9 @@ static int imote2_mci_get_ro(struct device *dev)
389static struct pxamci_platform_data imote2_mci_platform_data = { 389static struct pxamci_platform_data imote2_mci_platform_data = {
390 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, /* default anyway */ 390 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, /* default anyway */
391 .get_ro = imote2_mci_get_ro, 391 .get_ro = imote2_mci_get_ro,
392 .gpio_card_detect = -1,
393 .gpio_card_ro = -1,
394 .gpio_power = -1,
392}; 395};
393 396
394static struct mtd_partition imote2flash_partitions[] = { 397static struct mtd_partition imote2flash_partitions[] = {
diff --git a/arch/arm/mach-pxa/include/mach/balloon3.h b/arch/arm/mach-pxa/include/mach/balloon3.h
new file mode 100644
index 000000000000..bfec09b1814b
--- /dev/null
+++ b/arch/arm/mach-pxa/include/mach/balloon3.h
@@ -0,0 +1,134 @@
1/*
2 * linux/include/asm-arm/arch-pxa/balloon3.h
3 *
4 * Authors: Nick Bane and Wookey
5 * Created: Oct, 2005
6 * Copyright: Toby Churchill Ltd
7 * Cribbed from mainstone.c, by Nicholas Pitre
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#ifndef ASM_ARCH_BALLOON3_H
15#define ASM_ARCH_BALLOON3_H
16
17enum balloon3_features {
18 BALLOON3_FEATURE_OHCI,
19 BALLOON3_FEATURE_MMC,
20 BALLOON3_FEATURE_CF,
21 BALLOON3_FEATURE_AUDIO,
22 BALLOON3_FEATURE_TOPPOLY,
23};
24
25#define BALLOON3_FPGA_PHYS PXA_CS4_PHYS
26#define BALLOON3_FPGA_VIRT (0xf1000000) /* as per balloon2 */
27#define BALLOON3_FPGA_LENGTH 0x01000000
28
29/* FPGA/CPLD registers */
30#define BALLOON3_PCMCIA0_REG (BALLOON3_FPGA_VIRT + 0x00e00008)
31/* fixme - same for now */
32#define BALLOON3_PCMCIA1_REG (BALLOON3_FPGA_VIRT + 0x00e00008)
33#define BALLOON3_NANDIO_IO_REG (BALLOON3_FPGA_VIRT + 0x00e00000)
34/* fpga/cpld interrupt control register */
35#define BALLOON3_INT_CONTROL_REG (BALLOON3_FPGA_VIRT + 0x00e0000C)
36#define BALLOON3_NANDIO_CTL2_REG (BALLOON3_FPGA_VIRT + 0x00e00010)
37#define BALLOON3_NANDIO_CTL_REG (BALLOON3_FPGA_VIRT + 0x00e00014)
38#define BALLOON3_VERSION_REG (BALLOON3_FPGA_VIRT + 0x00e0001c)
39
40#define BALLOON3_SAMOSA_ADDR_REG (BALLOON3_FPGA_VIRT + 0x00c00000)
41#define BALLOON3_SAMOSA_DATA_REG (BALLOON3_FPGA_VIRT + 0x00c00004)
42#define BALLOON3_SAMOSA_STATUS_REG (BALLOON3_FPGA_VIRT + 0x00c0001c)
43
44/* GPIOs for irqs */
45#define BALLOON3_GPIO_AUX_NIRQ (94)
46#define BALLOON3_GPIO_CODEC_IRQ (95)
47
48/* Timer and Idle LED locations */
49#define BALLOON3_GPIO_LED_NAND (9)
50#define BALLOON3_GPIO_LED_IDLE (10)
51
52/* backlight control */
53#define BALLOON3_GPIO_RUN_BACKLIGHT (99)
54
55#define BALLOON3_GPIO_S0_CD (105)
56
57/* FPGA Interrupt Mask/Acknowledge Register */
58#define BALLOON3_INT_S0_IRQ (1 << 0) /* PCMCIA 0 IRQ */
59#define BALLOON3_INT_S0_STSCHG (1 << 1) /* PCMCIA 0 status changed */
60
61/* CF Status Register */
62#define BALLOON3_PCMCIA_nIRQ (1 << 0) /* IRQ / ready signal */
63#define BALLOON3_PCMCIA_nSTSCHG_BVD1 (1 << 1)
64 /* VDD sense / card status changed */
65
66/* CF control register (write) */
67#define BALLOON3_PCMCIA_RESET (1 << 0) /* Card reset signal */
68#define BALLOON3_PCMCIA_ENABLE (1 << 1)
69#define BALLOON3_PCMCIA_ADD_ENABLE (1 << 2)
70
71/* CPLD (and FPGA) interface definitions */
72#define CPLD_LCD0_DATA_SET 0x00
73#define CPLD_LCD0_DATA_CLR 0x10
74#define CPLD_LCD0_COMMAND_SET 0x01
75#define CPLD_LCD0_COMMAND_CLR 0x11
76#define CPLD_LCD1_DATA_SET 0x02
77#define CPLD_LCD1_DATA_CLR 0x12
78#define CPLD_LCD1_COMMAND_SET 0x03
79#define CPLD_LCD1_COMMAND_CLR 0x13
80
81#define CPLD_MISC_SET 0x07
82#define CPLD_MISC_CLR 0x17
83#define CPLD_MISC_LOON_NRESET_BIT 0
84#define CPLD_MISC_LOON_UNSUSP_BIT 1
85#define CPLD_MISC_RUN_5V_BIT 2
86#define CPLD_MISC_CHG_D0_BIT 3
87#define CPLD_MISC_CHG_D1_BIT 4
88#define CPLD_MISC_DAC_NCS_BIT 5
89
90#define CPLD_LCD_SET 0x08
91#define CPLD_LCD_CLR 0x18
92#define CPLD_LCD_BACKLIGHT_EN_0_BIT 0
93#define CPLD_LCD_BACKLIGHT_EN_1_BIT 1
94#define CPLD_LCD_LED_RED_BIT 4
95#define CPLD_LCD_LED_GREEN_BIT 5
96#define CPLD_LCD_NRESET_BIT 7
97
98#define CPLD_LCD_RO_SET 0x09
99#define CPLD_LCD_RO_CLR 0x19
100#define CPLD_LCD_RO_LCD0_nWAIT_BIT 0
101#define CPLD_LCD_RO_LCD1_nWAIT_BIT 1
102
103#define CPLD_SERIAL_SET 0x0a
104#define CPLD_SERIAL_CLR 0x1a
105#define CPLD_SERIAL_GSM_RI_BIT 0
106#define CPLD_SERIAL_GSM_CTS_BIT 1
107#define CPLD_SERIAL_GSM_DTR_BIT 2
108#define CPLD_SERIAL_LPR_CTS_BIT 3
109#define CPLD_SERIAL_TC232_CTS_BIT 4
110#define CPLD_SERIAL_TC232_DSR_BIT 5
111
112#define CPLD_SROUTING_SET 0x0b
113#define CPLD_SROUTING_CLR 0x1b
114#define CPLD_SROUTING_MSP430_LPR 0
115#define CPLD_SROUTING_MSP430_TC232 1
116#define CPLD_SROUTING_MSP430_GSM 2
117#define CPLD_SROUTING_LOON_LPR (0 << 4)
118#define CPLD_SROUTING_LOON_TC232 (1 << 4)
119#define CPLD_SROUTING_LOON_GSM (2 << 4)
120
121#define CPLD_AROUTING_SET 0x0c
122#define CPLD_AROUTING_CLR 0x1c
123#define CPLD_AROUTING_MIC2PHONE_BIT 0
124#define CPLD_AROUTING_PHONE2INT_BIT 1
125#define CPLD_AROUTING_PHONE2EXT_BIT 2
126#define CPLD_AROUTING_LOONL2INT_BIT 3
127#define CPLD_AROUTING_LOONL2EXT_BIT 4
128#define CPLD_AROUTING_LOONR2PHONE_BIT 5
129#define CPLD_AROUTING_LOONR2INT_BIT 6
130#define CPLD_AROUTING_LOONR2EXT_BIT 7
131
132extern int balloon3_has(enum balloon3_features feature);
133
134#endif
diff --git a/arch/arm/mach-pxa/include/mach/colibri.h b/arch/arm/mach-pxa/include/mach/colibri.h
index a88d7caff0d1..811743c56147 100644
--- a/arch/arm/mach-pxa/include/mach/colibri.h
+++ b/arch/arm/mach-pxa/include/mach/colibri.h
@@ -23,6 +23,12 @@ static inline void colibri_pxa3xx_init_lcd(int bl_pin) {}
23extern void colibri_pxa3xx_init_eth(struct ax_plat_data *plat_data); 23extern void colibri_pxa3xx_init_eth(struct ax_plat_data *plat_data);
24#endif 24#endif
25 25
26#if defined(CONFIG_MTD_NAND_PXA3xx) || defined(CONFIG_MTD_NAND_PXA3xx_MODULE)
27extern void colibri_pxa3xx_init_nand(void);
28#else
29static inline void colibri_pxa3xx_init_nand(void) {}
30#endif
31
26/* physical memory regions */ 32/* physical memory regions */
27#define COLIBRI_SDRAM_BASE 0xa0000000 /* SDRAM region */ 33#define COLIBRI_SDRAM_BASE 0xa0000000 /* SDRAM region */
28 34
diff --git a/arch/arm/mach-pxa/include/mach/entry-macro.S b/arch/arm/mach-pxa/include/mach/entry-macro.S
index f6b4bf3e73d2..241880608ac6 100644
--- a/arch/arm/mach-pxa/include/mach/entry-macro.S
+++ b/arch/arm/mach-pxa/include/mach/entry-macro.S
@@ -24,34 +24,27 @@
24 mov \tmp, \tmp, lsr #13 24 mov \tmp, \tmp, lsr #13
25 and \tmp, \tmp, #0x7 @ Core G 25 and \tmp, \tmp, #0x7 @ Core G
26 cmp \tmp, #1 26 cmp \tmp, #1
27 bhi 1004f 27 bhi 1002f
28 28
29 @ Core Generation 1 (PXA25x)
29 mov \base, #io_p2v(0x40000000) @ IIR Ctl = 0x40d00000 30 mov \base, #io_p2v(0x40000000) @ IIR Ctl = 0x40d00000
30 add \base, \base, #0x00d00000 31 add \base, \base, #0x00d00000
31 ldr \irqstat, [\base, #0] @ ICIP 32 ldr \irqstat, [\base, #0] @ ICIP
32 ldr \irqnr, [\base, #4] @ ICMR 33 ldr \irqnr, [\base, #4] @ ICMR
33 b 1002f
34 34
351004:
36 mrc p6, 0, \irqstat, c6, c0, 0 @ ICIP2
37 mrc p6, 0, \irqnr, c7, c0, 0 @ ICMR2
38 ands \irqnr, \irqstat, \irqnr 35 ands \irqnr, \irqstat, \irqnr
39 beq 1003f 36 beq 1001f
40 rsb \irqstat, \irqnr, #0 37 rsb \irqstat, \irqnr, #0
41 and \irqstat, \irqstat, \irqnr 38 and \irqstat, \irqstat, \irqnr
42 clz \irqnr, \irqstat 39 clz \irqnr, \irqstat
43 rsb \irqnr, \irqnr, #31 40 rsb \irqnr, \irqnr, #(31 + PXA_IRQ(0))
44 add \irqnr, \irqnr, #(32 + PXA_IRQ(0))
45 b 1001f 41 b 1001f
461003:
47 mrc p6, 0, \irqstat, c0, c0, 0 @ ICIP
48 mrc p6, 0, \irqnr, c1, c0, 0 @ ICMR
491002: 421002:
50 ands \irqnr, \irqstat, \irqnr 43 @ Core Generation 2 (PXA27x) or Core Generation 3 (PXA3xx)
44 mrc p6, 0, \irqstat, c5, c0, 0 @ ICHP
45 tst \irqstat, #0x80000000
51 beq 1001f 46 beq 1001f
52 rsb \irqstat, \irqnr, #0 47 bic \irqstat, \irqstat, #0x80000000
53 and \irqstat, \irqstat, \irqnr 48 mov \irqnr, \irqstat, lsr #16
54 clz \irqnr, \irqstat
55 rsb \irqnr, \irqnr, #(31 + PXA_IRQ(0))
561001: 491001:
57 .endm 50 .endm
diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h
index 16ab79547dae..aa3d9f70a08a 100644
--- a/arch/arm/mach-pxa/include/mach/hardware.h
+++ b/arch/arm/mach-pxa/include/mach/hardware.h
@@ -197,6 +197,16 @@
197#define __cpu_is_pxa935(id) (0) 197#define __cpu_is_pxa935(id) (0)
198#endif 198#endif
199 199
200#ifdef CONFIG_CPU_PXA950
201#define __cpu_is_pxa950(id) \
202 ({ \
203 unsigned int _id = (id) >> 4 & 0xfff; \
204 id == 0x697; \
205 })
206#else
207#define __cpu_is_pxa950(id) (0)
208#endif
209
200#define cpu_is_pxa210() \ 210#define cpu_is_pxa210() \
201 ({ \ 211 ({ \
202 __cpu_is_pxa210(read_cpuid_id()); \ 212 __cpu_is_pxa210(read_cpuid_id()); \
@@ -249,6 +259,13 @@
249 __cpu_is_pxa935(id); \ 259 __cpu_is_pxa935(id); \
250 }) 260 })
251 261
262#define cpu_is_pxa950() \
263 ({ \
264 unsigned int id = read_cpuid(CPUID_ID); \
265 __cpu_is_pxa950(id); \
266 })
267
268
252/* 269/*
253 * CPUID Core Generation Bit 270 * CPUID Core Generation Bit
254 * <= 0x2 for pxa21x/pxa25x/pxa26x/pxa27x 271 * <= 0x2 for pxa21x/pxa25x/pxa26x/pxa27x
diff --git a/arch/arm/mach-pxa/include/mach/irda.h b/arch/arm/mach-pxa/include/mach/irda.h
index 0a50c3c763df..3cd41f77dda4 100644
--- a/arch/arm/mach-pxa/include/mach/irda.h
+++ b/arch/arm/mach-pxa/include/mach/irda.h
@@ -12,6 +12,8 @@ struct pxaficp_platform_data {
12 void (*transceiver_mode)(struct device *dev, int mode); 12 void (*transceiver_mode)(struct device *dev, int mode);
13 int (*startup)(struct device *dev); 13 int (*startup)(struct device *dev);
14 void (*shutdown)(struct device *dev); 14 void (*shutdown)(struct device *dev);
15 int gpio_pwdown; /* powerdown GPIO for the IrDA chip */
16 bool gpio_pwdown_inverted; /* gpio_pwdown is inverted */
15}; 17};
16 18
17extern void pxa_set_ficp_info(struct pxaficp_platform_data *info); 19extern void pxa_set_ficp_info(struct pxaficp_platform_data *info);
diff --git a/arch/arm/mach-pxa/include/mach/irqs.h b/arch/arm/mach-pxa/include/mach/irqs.h
index 6a1d95993342..3677a9af9c87 100644
--- a/arch/arm/mach-pxa/include/mach/irqs.h
+++ b/arch/arm/mach-pxa/include/mach/irqs.h
@@ -68,9 +68,10 @@
68#ifdef CONFIG_PXA3xx 68#ifdef CONFIG_PXA3xx
69#define IRQ_SSP4 PXA_IRQ(13) /* SSP4 service request */ 69#define IRQ_SSP4 PXA_IRQ(13) /* SSP4 service request */
70#define IRQ_CIR PXA_IRQ(34) /* Consumer IR */ 70#define IRQ_CIR PXA_IRQ(34) /* Consumer IR */
71#define IRQ_COMM_WDT PXA_IRQ(35) /* Comm WDT interrupt */
71#define IRQ_TSI PXA_IRQ(36) /* Touch Screen Interface (PXA320) */ 72#define IRQ_TSI PXA_IRQ(36) /* Touch Screen Interface (PXA320) */
72#define IRQ_USIM2 PXA_IRQ(38) /* USIM2 Controller */ 73#define IRQ_USIM2 PXA_IRQ(38) /* USIM2 Controller */
73#define IRQ_GRPHICS PXA_IRQ(39) /* Graphics Controller */ 74#define IRQ_GCU PXA_IRQ(39) /* Graphics Controller */
74#define IRQ_MMC2 PXA_IRQ(41) /* MMC2 Controller */ 75#define IRQ_MMC2 PXA_IRQ(41) /* MMC2 Controller */
75#define IRQ_1WIRE PXA_IRQ(44) /* 1-Wire Controller */ 76#define IRQ_1WIRE PXA_IRQ(44) /* 1-Wire Controller */
76#define IRQ_NAND PXA_IRQ(45) /* NAND Controller */ 77#define IRQ_NAND PXA_IRQ(45) /* NAND Controller */
@@ -81,8 +82,31 @@
81#define IRQ_MMC3 PXA_IRQ(55) /* MMC3 Controller (PXA310) */ 82#define IRQ_MMC3 PXA_IRQ(55) /* MMC3 Controller (PXA310) */
82#endif 83#endif
83 84
84#define PXA_GPIO_IRQ_BASE PXA_IRQ(64) 85#ifdef CONFIG_CPU_PXA935
85#define PXA_GPIO_IRQ_NUM (128) 86#define IRQ_U2O PXA_IRQ(64) /* USB OTG 2.0 Controller (PXA935) */
87#define IRQ_U2H PXA_IRQ(65) /* USB Host 2.0 Controller (PXA935) */
88
89#define IRQ_MMC3_PXA935 PXA_IRQ(72) /* MMC3 Controller (PXA935) */
90#define IRQ_MMC4_PXA935 PXA_IRQ(73) /* MMC4 Controller (PXA935) */
91#define IRQ_MMC5_PXA935 PXA_IRQ(74) /* MMC5 Controller (PXA935) */
92
93#define IRQ_U2P PXA_IRQ(93) /* USB PHY D+/D- Lines (PXA935) */
94#endif
95
96#ifdef CONFIG_CPU_PXA930
97#define IRQ_ENHROT PXA_IRQ(37) /* Enhanced Rotary (PXA930) */
98#define IRQ_ACIPC0 PXA_IRQ(5)
99#define IRQ_ACIPC1 PXA_IRQ(40)
100#define IRQ_ACIPC2 PXA_IRQ(19)
101#define IRQ_TRKBALL PXA_IRQ(43) /* Track Ball */
102#endif
103
104#ifdef CONFIG_CPU_PXA950
105#define IRQ_GC500 PXA_IRQ(70) /* Graphics Controller (PXA950) */
106#endif
107
108#define PXA_GPIO_IRQ_BASE PXA_IRQ(96)
109#define PXA_GPIO_IRQ_NUM (192)
86 110
87#define GPIO_2_x_TO_IRQ(x) (PXA_GPIO_IRQ_BASE + (x)) 111#define GPIO_2_x_TO_IRQ(x) (PXA_GPIO_IRQ_BASE + (x))
88#define IRQ_GPIO(x) (((x) < 2) ? (IRQ_GPIO0 + (x)) : GPIO_2_x_TO_IRQ(x)) 112#define IRQ_GPIO(x) (((x) < 2) ? (IRQ_GPIO0 + (x)) : GPIO_2_x_TO_IRQ(x))
@@ -105,6 +129,8 @@
105#define IRQ_BOARD_END (IRQ_BOARD_START + 70) 129#define IRQ_BOARD_END (IRQ_BOARD_START + 70)
106#elif defined(CONFIG_MACH_ZYLONITE) 130#elif defined(CONFIG_MACH_ZYLONITE)
107#define IRQ_BOARD_END (IRQ_BOARD_START + 32) 131#define IRQ_BOARD_END (IRQ_BOARD_START + 32)
132#elif defined(CONFIG_PXA_EZX)
133#define IRQ_BOARD_END (IRQ_BOARD_START + 23)
108#else 134#else
109#define IRQ_BOARD_END (IRQ_BOARD_START + 16) 135#define IRQ_BOARD_END (IRQ_BOARD_START + 16)
110#endif 136#endif
@@ -237,6 +263,16 @@
237#define MAINSTONE_S1_STSCHG_IRQ MAINSTONE_IRQ(14) 263#define MAINSTONE_S1_STSCHG_IRQ MAINSTONE_IRQ(14)
238#define MAINSTONE_S1_IRQ MAINSTONE_IRQ(15) 264#define MAINSTONE_S1_IRQ MAINSTONE_IRQ(15)
239 265
266/* Balloon3 Interrupts */
267#define BALLOON3_IRQ(x) (IRQ_BOARD_START + (x))
268
269#define BALLOON3_BP_CF_NRDY_IRQ BALLOON3_IRQ(0)
270#define BALLOON3_BP_NSTSCHG_IRQ BALLOON3_IRQ(1)
271
272#define BALLOON3_AUX_NIRQ IRQ_GPIO(BALLOON3_GPIO_AUX_NIRQ)
273#define BALLOON3_CODEC_IRQ IRQ_GPIO(BALLOON3_GPIO_CODEC_IRQ)
274#define BALLOON3_S0_CD_IRQ IRQ_GPIO(BALLOON3_GPIO_S0_CD)
275
240/* LoCoMo Interrupts (CONFIG_SHARP_LOCOMO) */ 276/* LoCoMo Interrupts (CONFIG_SHARP_LOCOMO) */
241#define IRQ_LOCOMO_KEY_BASE (IRQ_BOARD_START + 0) 277#define IRQ_LOCOMO_KEY_BASE (IRQ_BOARD_START + 0)
242#define IRQ_LOCOMO_GPIO_BASE (IRQ_BOARD_START + 1) 278#define IRQ_LOCOMO_GPIO_BASE (IRQ_BOARD_START + 1)
diff --git a/arch/arm/mach-pxa/include/mach/mfp.h b/arch/arm/mach-pxa/include/mach/mfp.h
index 482185053a92..271e249ae34f 100644
--- a/arch/arm/mach-pxa/include/mach/mfp.h
+++ b/arch/arm/mach-pxa/include/mach/mfp.h
@@ -16,305 +16,6 @@
16#ifndef __ASM_ARCH_MFP_H 16#ifndef __ASM_ARCH_MFP_H
17#define __ASM_ARCH_MFP_H 17#define __ASM_ARCH_MFP_H
18 18
19#define mfp_to_gpio(m) ((m) % 128) 19#include <plat/mfp.h>
20
21/* list of all the configurable MFP pins */
22enum {
23 MFP_PIN_INVALID = -1,
24
25 MFP_PIN_GPIO0 = 0,
26 MFP_PIN_GPIO1,
27 MFP_PIN_GPIO2,
28 MFP_PIN_GPIO3,
29 MFP_PIN_GPIO4,
30 MFP_PIN_GPIO5,
31 MFP_PIN_GPIO6,
32 MFP_PIN_GPIO7,
33 MFP_PIN_GPIO8,
34 MFP_PIN_GPIO9,
35 MFP_PIN_GPIO10,
36 MFP_PIN_GPIO11,
37 MFP_PIN_GPIO12,
38 MFP_PIN_GPIO13,
39 MFP_PIN_GPIO14,
40 MFP_PIN_GPIO15,
41 MFP_PIN_GPIO16,
42 MFP_PIN_GPIO17,
43 MFP_PIN_GPIO18,
44 MFP_PIN_GPIO19,
45 MFP_PIN_GPIO20,
46 MFP_PIN_GPIO21,
47 MFP_PIN_GPIO22,
48 MFP_PIN_GPIO23,
49 MFP_PIN_GPIO24,
50 MFP_PIN_GPIO25,
51 MFP_PIN_GPIO26,
52 MFP_PIN_GPIO27,
53 MFP_PIN_GPIO28,
54 MFP_PIN_GPIO29,
55 MFP_PIN_GPIO30,
56 MFP_PIN_GPIO31,
57 MFP_PIN_GPIO32,
58 MFP_PIN_GPIO33,
59 MFP_PIN_GPIO34,
60 MFP_PIN_GPIO35,
61 MFP_PIN_GPIO36,
62 MFP_PIN_GPIO37,
63 MFP_PIN_GPIO38,
64 MFP_PIN_GPIO39,
65 MFP_PIN_GPIO40,
66 MFP_PIN_GPIO41,
67 MFP_PIN_GPIO42,
68 MFP_PIN_GPIO43,
69 MFP_PIN_GPIO44,
70 MFP_PIN_GPIO45,
71 MFP_PIN_GPIO46,
72 MFP_PIN_GPIO47,
73 MFP_PIN_GPIO48,
74 MFP_PIN_GPIO49,
75 MFP_PIN_GPIO50,
76 MFP_PIN_GPIO51,
77 MFP_PIN_GPIO52,
78 MFP_PIN_GPIO53,
79 MFP_PIN_GPIO54,
80 MFP_PIN_GPIO55,
81 MFP_PIN_GPIO56,
82 MFP_PIN_GPIO57,
83 MFP_PIN_GPIO58,
84 MFP_PIN_GPIO59,
85 MFP_PIN_GPIO60,
86 MFP_PIN_GPIO61,
87 MFP_PIN_GPIO62,
88 MFP_PIN_GPIO63,
89 MFP_PIN_GPIO64,
90 MFP_PIN_GPIO65,
91 MFP_PIN_GPIO66,
92 MFP_PIN_GPIO67,
93 MFP_PIN_GPIO68,
94 MFP_PIN_GPIO69,
95 MFP_PIN_GPIO70,
96 MFP_PIN_GPIO71,
97 MFP_PIN_GPIO72,
98 MFP_PIN_GPIO73,
99 MFP_PIN_GPIO74,
100 MFP_PIN_GPIO75,
101 MFP_PIN_GPIO76,
102 MFP_PIN_GPIO77,
103 MFP_PIN_GPIO78,
104 MFP_PIN_GPIO79,
105 MFP_PIN_GPIO80,
106 MFP_PIN_GPIO81,
107 MFP_PIN_GPIO82,
108 MFP_PIN_GPIO83,
109 MFP_PIN_GPIO84,
110 MFP_PIN_GPIO85,
111 MFP_PIN_GPIO86,
112 MFP_PIN_GPIO87,
113 MFP_PIN_GPIO88,
114 MFP_PIN_GPIO89,
115 MFP_PIN_GPIO90,
116 MFP_PIN_GPIO91,
117 MFP_PIN_GPIO92,
118 MFP_PIN_GPIO93,
119 MFP_PIN_GPIO94,
120 MFP_PIN_GPIO95,
121 MFP_PIN_GPIO96,
122 MFP_PIN_GPIO97,
123 MFP_PIN_GPIO98,
124 MFP_PIN_GPIO99,
125 MFP_PIN_GPIO100,
126 MFP_PIN_GPIO101,
127 MFP_PIN_GPIO102,
128 MFP_PIN_GPIO103,
129 MFP_PIN_GPIO104,
130 MFP_PIN_GPIO105,
131 MFP_PIN_GPIO106,
132 MFP_PIN_GPIO107,
133 MFP_PIN_GPIO108,
134 MFP_PIN_GPIO109,
135 MFP_PIN_GPIO110,
136 MFP_PIN_GPIO111,
137 MFP_PIN_GPIO112,
138 MFP_PIN_GPIO113,
139 MFP_PIN_GPIO114,
140 MFP_PIN_GPIO115,
141 MFP_PIN_GPIO116,
142 MFP_PIN_GPIO117,
143 MFP_PIN_GPIO118,
144 MFP_PIN_GPIO119,
145 MFP_PIN_GPIO120,
146 MFP_PIN_GPIO121,
147 MFP_PIN_GPIO122,
148 MFP_PIN_GPIO123,
149 MFP_PIN_GPIO124,
150 MFP_PIN_GPIO125,
151 MFP_PIN_GPIO126,
152 MFP_PIN_GPIO127,
153 MFP_PIN_GPIO0_2,
154 MFP_PIN_GPIO1_2,
155 MFP_PIN_GPIO2_2,
156 MFP_PIN_GPIO3_2,
157 MFP_PIN_GPIO4_2,
158 MFP_PIN_GPIO5_2,
159 MFP_PIN_GPIO6_2,
160 MFP_PIN_GPIO7_2,
161 MFP_PIN_GPIO8_2,
162 MFP_PIN_GPIO9_2,
163 MFP_PIN_GPIO10_2,
164 MFP_PIN_GPIO11_2,
165 MFP_PIN_GPIO12_2,
166 MFP_PIN_GPIO13_2,
167 MFP_PIN_GPIO14_2,
168 MFP_PIN_GPIO15_2,
169 MFP_PIN_GPIO16_2,
170 MFP_PIN_GPIO17_2,
171
172 MFP_PIN_ULPI_STP,
173 MFP_PIN_ULPI_NXT,
174 MFP_PIN_ULPI_DIR,
175
176 MFP_PIN_nXCVREN,
177 MFP_PIN_DF_CLE_nOE,
178 MFP_PIN_DF_nADV1_ALE,
179 MFP_PIN_DF_SCLK_E,
180 MFP_PIN_DF_SCLK_S,
181 MFP_PIN_nBE0,
182 MFP_PIN_nBE1,
183 MFP_PIN_DF_nADV2_ALE,
184 MFP_PIN_DF_INT_RnB,
185 MFP_PIN_DF_nCS0,
186 MFP_PIN_DF_nCS1,
187 MFP_PIN_nLUA,
188 MFP_PIN_nLLA,
189 MFP_PIN_DF_nWE,
190 MFP_PIN_DF_ALE_nWE,
191 MFP_PIN_DF_nRE_nOE,
192 MFP_PIN_DF_ADDR0,
193 MFP_PIN_DF_ADDR1,
194 MFP_PIN_DF_ADDR2,
195 MFP_PIN_DF_ADDR3,
196 MFP_PIN_DF_IO0,
197 MFP_PIN_DF_IO1,
198 MFP_PIN_DF_IO2,
199 MFP_PIN_DF_IO3,
200 MFP_PIN_DF_IO4,
201 MFP_PIN_DF_IO5,
202 MFP_PIN_DF_IO6,
203 MFP_PIN_DF_IO7,
204 MFP_PIN_DF_IO8,
205 MFP_PIN_DF_IO9,
206 MFP_PIN_DF_IO10,
207 MFP_PIN_DF_IO11,
208 MFP_PIN_DF_IO12,
209 MFP_PIN_DF_IO13,
210 MFP_PIN_DF_IO14,
211 MFP_PIN_DF_IO15,
212
213 /* additional pins on PXA930 */
214 MFP_PIN_GSIM_UIO,
215 MFP_PIN_GSIM_UCLK,
216 MFP_PIN_GSIM_UDET,
217 MFP_PIN_GSIM_nURST,
218 MFP_PIN_PMIC_INT,
219 MFP_PIN_RDY,
220
221 MFP_PIN_MAX,
222};
223
224/*
225 * a possible MFP configuration is represented by a 32-bit integer
226 *
227 * bit 0.. 9 - MFP Pin Number (1024 Pins Maximum)
228 * bit 10..12 - Alternate Function Selection
229 * bit 13..15 - Drive Strength
230 * bit 16..18 - Low Power Mode State
231 * bit 19..20 - Low Power Mode Edge Detection
232 * bit 21..22 - Run Mode Pull State
233 *
234 * to facilitate the definition, the following macros are provided
235 *
236 * MFP_CFG_DEFAULT - default MFP configuration value, with
237 * alternate function = 0,
238 * drive strength = fast 3mA (MFP_DS03X)
239 * low power mode = default
240 * edge detection = none
241 *
242 * MFP_CFG - default MFPR value with alternate function
243 * MFP_CFG_DRV - default MFPR value with alternate function and
244 * pin drive strength
245 * MFP_CFG_LPM - default MFPR value with alternate function and
246 * low power mode
247 * MFP_CFG_X - default MFPR value with alternate function,
248 * pin drive strength and low power mode
249 */
250
251typedef unsigned long mfp_cfg_t;
252
253#define MFP_PIN(x) ((x) & 0x3ff)
254
255#define MFP_AF0 (0x0 << 10)
256#define MFP_AF1 (0x1 << 10)
257#define MFP_AF2 (0x2 << 10)
258#define MFP_AF3 (0x3 << 10)
259#define MFP_AF4 (0x4 << 10)
260#define MFP_AF5 (0x5 << 10)
261#define MFP_AF6 (0x6 << 10)
262#define MFP_AF7 (0x7 << 10)
263#define MFP_AF_MASK (0x7 << 10)
264#define MFP_AF(x) (((x) >> 10) & 0x7)
265
266#define MFP_DS01X (0x0 << 13)
267#define MFP_DS02X (0x1 << 13)
268#define MFP_DS03X (0x2 << 13)
269#define MFP_DS04X (0x3 << 13)
270#define MFP_DS06X (0x4 << 13)
271#define MFP_DS08X (0x5 << 13)
272#define MFP_DS10X (0x6 << 13)
273#define MFP_DS13X (0x7 << 13)
274#define MFP_DS_MASK (0x7 << 13)
275#define MFP_DS(x) (((x) >> 13) & 0x7)
276
277#define MFP_LPM_DEFAULT (0x0 << 16)
278#define MFP_LPM_DRIVE_LOW (0x1 << 16)
279#define MFP_LPM_DRIVE_HIGH (0x2 << 16)
280#define MFP_LPM_PULL_LOW (0x3 << 16)
281#define MFP_LPM_PULL_HIGH (0x4 << 16)
282#define MFP_LPM_FLOAT (0x5 << 16)
283#define MFP_LPM_INPUT (0x6 << 16)
284#define MFP_LPM_STATE_MASK (0x7 << 16)
285#define MFP_LPM_STATE(x) (((x) >> 16) & 0x7)
286
287#define MFP_LPM_EDGE_NONE (0x0 << 19)
288#define MFP_LPM_EDGE_RISE (0x1 << 19)
289#define MFP_LPM_EDGE_FALL (0x2 << 19)
290#define MFP_LPM_EDGE_BOTH (0x3 << 19)
291#define MFP_LPM_EDGE_MASK (0x3 << 19)
292#define MFP_LPM_EDGE(x) (((x) >> 19) & 0x3)
293
294#define MFP_PULL_NONE (0x0 << 21)
295#define MFP_PULL_LOW (0x1 << 21)
296#define MFP_PULL_HIGH (0x2 << 21)
297#define MFP_PULL_BOTH (0x3 << 21)
298#define MFP_PULL_MASK (0x3 << 21)
299#define MFP_PULL(x) (((x) >> 21) & 0x3)
300
301#define MFP_CFG_DEFAULT (MFP_AF0 | MFP_DS03X | MFP_LPM_DEFAULT |\
302 MFP_LPM_EDGE_NONE | MFP_PULL_NONE)
303
304#define MFP_CFG(pin, af) \
305 ((MFP_CFG_DEFAULT & ~MFP_AF_MASK) |\
306 (MFP_PIN(MFP_PIN_##pin) | MFP_##af))
307
308#define MFP_CFG_DRV(pin, af, drv) \
309 ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_DS_MASK)) |\
310 (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_##drv))
311
312#define MFP_CFG_LPM(pin, af, lpm) \
313 ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_LPM_STATE_MASK)) |\
314 (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_LPM_##lpm))
315
316#define MFP_CFG_X(pin, af, drv, lpm) \
317 ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_DS_MASK | MFP_LPM_STATE_MASK)) |\
318 (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_##drv | MFP_LPM_##lpm))
319 20
320#endif /* __ASM_ARCH_MFP_H */ 21#endif /* __ASM_ARCH_MFP_H */
diff --git a/arch/arm/mach-pxa/include/mach/mmc.h b/arch/arm/mach-pxa/include/mach/mmc.h
index 6d1304c9270f..02a69dc2ee63 100644
--- a/arch/arm/mach-pxa/include/mach/mmc.h
+++ b/arch/arm/mach-pxa/include/mach/mmc.h
@@ -14,6 +14,11 @@ struct pxamci_platform_data {
14 int (*get_ro)(struct device *); 14 int (*get_ro)(struct device *);
15 void (*setpower)(struct device *, unsigned int); 15 void (*setpower)(struct device *, unsigned int);
16 void (*exit)(struct device *, void *); 16 void (*exit)(struct device *, void *);
17 int gpio_card_detect; /* gpio detecting card insertion */
18 int gpio_card_ro; /* gpio detecting read only toggle */
19 bool gpio_card_ro_invert; /* gpio ro is inverted */
20 int gpio_power; /* gpio powering up MMC bus */
21 bool gpio_power_invert; /* gpio power is inverted */
17}; 22};
18 23
19extern void pxa_set_mci_info(struct pxamci_platform_data *info); 24extern void pxa_set_mci_info(struct pxamci_platform_data *info);
diff --git a/arch/arm/mach-pxa/include/mach/palmtc.h b/arch/arm/mach-pxa/include/mach/palmtc.h
new file mode 100644
index 000000000000..3dc9b074ab46
--- /dev/null
+++ b/arch/arm/mach-pxa/include/mach/palmtc.h
@@ -0,0 +1,86 @@
1/*
2 * linux/include/asm-arm/arch-pxa/palmtc-gpio.h
3 *
4 * GPIOs and interrupts for Palm Tungsten|C Handheld Computer
5 *
6 * Authors: Alex Osborne <bobofdoom@gmail.com>
7 * Marek Vasut <marek.vasut@gmail.com>
8 * Holger Bocklet <bitz.email@gmx.net>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 */
15
16#ifndef _INCLUDE_PALMTC_H_
17#define _INCLUDE_PALMTC_H_
18
19/** HERE ARE GPIOs **/
20
21/* GPIOs */
22#define GPIO_NR_PALMTC_EARPHONE_DETECT 2
23#define GPIO_NR_PALMTC_CRADLE_DETECT 5
24#define GPIO_NR_PALMTC_HOTSYNC_BUTTON 7
25
26/* SD/MMC */
27#define GPIO_NR_PALMTC_SD_DETECT_N 12
28#define GPIO_NR_PALMTC_SD_POWER 32
29#define GPIO_NR_PALMTC_SD_READONLY 54
30
31/* WLAN */
32#define GPIO_NR_PALMTC_PCMCIA_READY 13
33#define GPIO_NR_PALMTC_PCMCIA_PWRREADY 14
34#define GPIO_NR_PALMTC_PCMCIA_POWER1 15
35#define GPIO_NR_PALMTC_PCMCIA_POWER2 33
36#define GPIO_NR_PALMTC_PCMCIA_POWER3 55
37#define GPIO_NR_PALMTC_PCMCIA_RESET 78
38
39/* UDC */
40#define GPIO_NR_PALMTC_USB_DETECT_N 4
41#define GPIO_NR_PALMTC_USB_POWER 36
42
43/* LCD/BACKLIGHT */
44#define GPIO_NR_PALMTC_BL_POWER 16
45#define GPIO_NR_PALMTC_LCD_POWER 44
46#define GPIO_NR_PALMTC_LCD_BLANK 38
47
48/* UART */
49#define GPIO_NR_PALMTC_RS232_POWER 37
50
51/* IRDA */
52#define GPIO_NR_PALMTC_IR_DISABLE 45
53
54/* IRQs */
55#define IRQ_GPIO_PALMTC_SD_DETECT_N IRQ_GPIO(GPIO_NR_PALMTC_SD_DETECT_N)
56#define IRQ_GPIO_PALMTC_WLAN_READY IRQ_GPIO(GPIO_NR_PALMTC_WLAN_READY)
57
58/* UCB1400 GPIOs */
59#define GPIO_NR_PALMTC_POWER_DETECT (0x80 | 0x00)
60#define GPIO_NR_PALMTC_HEADPHONE_DETECT (0x80 | 0x01)
61#define GPIO_NR_PALMTC_SPEAKER_ENABLE (0x80 | 0x03)
62#define GPIO_NR_PALMTC_VIBRA_POWER (0x80 | 0x05)
63#define GPIO_NR_PALMTC_LED_POWER (0x80 | 0x07)
64
65/** HERE ARE INIT VALUES **/
66#define PALMTC_UCB1400_GPIO_OFFSET 0x80
67
68/* BATTERY */
69#define PALMTC_BAT_MAX_VOLTAGE 4000 /* 4.00V maximum voltage */
70#define PALMTC_BAT_MIN_VOLTAGE 3550 /* 3.55V critical voltage */
71#define PALMTC_BAT_MAX_CURRENT 0 /* unknokn */
72#define PALMTC_BAT_MIN_CURRENT 0 /* unknown */
73#define PALMTC_BAT_MAX_CHARGE 1 /* unknown */
74#define PALMTC_BAT_MIN_CHARGE 1 /* unknown */
75#define PALMTC_MAX_LIFE_MINS 240 /* on-life in minutes */
76
77#define PALMTC_BAT_MEASURE_DELAY (HZ * 1)
78
79/* BACKLIGHT */
80#define PALMTC_MAX_INTENSITY 0xFE
81#define PALMTC_DEFAULT_INTENSITY 0x7E
82#define PALMTC_LIMIT_MASK 0x7F
83#define PALMTC_PRESCALER 0x3F
84#define PALMTC_PERIOD_NS 3500
85
86#endif
diff --git a/arch/arm/mach-pxa/include/mach/palmtx.h b/arch/arm/mach-pxa/include/mach/palmtx.h
index e74082c872e1..1be0db6ed55e 100644
--- a/arch/arm/mach-pxa/include/mach/palmtx.h
+++ b/arch/arm/mach-pxa/include/mach/palmtx.h
@@ -82,6 +82,11 @@
82#define PALMTX_PHYS_FLASH_START PXA_CS0_PHYS /* ChipSelect 0 */ 82#define PALMTX_PHYS_FLASH_START PXA_CS0_PHYS /* ChipSelect 0 */
83#define PALMTX_PHYS_NAND_START PXA_CS1_PHYS /* ChipSelect 1 */ 83#define PALMTX_PHYS_NAND_START PXA_CS1_PHYS /* ChipSelect 1 */
84 84
85#define PALMTX_NAND_ALE_PHYS (PALMTX_PHYS_NAND_START | (1 << 24))
86#define PALMTX_NAND_CLE_PHYS (PALMTX_PHYS_NAND_START | (1 << 25))
87#define PALMTX_NAND_ALE_VIRT 0xff100000
88#define PALMTX_NAND_CLE_VIRT 0xff200000
89
85/* TOUCHSCREEN */ 90/* TOUCHSCREEN */
86#define AC97_LINK_FRAME 21 91#define AC97_LINK_FRAME 21
87 92
diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h b/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h
index 7d1a059b3d43..e91d63cfe811 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h
@@ -208,7 +208,7 @@
208#define CKEN_MVED 43 /* < MVED clock enable */ 208#define CKEN_MVED 43 /* < MVED clock enable */
209 209
210/* Note: GCU clock enable bit differs on PXA300/PXA310 and PXA320 */ 210/* Note: GCU clock enable bit differs on PXA300/PXA310 and PXA320 */
211#define PXA300_CKEN_GRAPHICS 42 /* Graphics controller clock enable */ 211#define CKEN_PXA300_GCU 42 /* Graphics controller clock enable */
212#define PXA320_CKEN_GRAPHICS 7 /* Graphics controller clock enable */ 212#define CKEN_PXA320_GCU 7 /* Graphics controller clock enable */
213 213
214#endif /* __ASM_ARCH_PXA3XX_REGS_H */ 214#endif /* __ASM_ARCH_PXA3XX_REGS_H */
diff --git a/arch/arm/mach-pxa/include/mach/pxafb.h b/arch/arm/mach-pxa/include/mach/pxafb.h
index 6932720ba04e..f73061c90b5e 100644
--- a/arch/arm/mach-pxa/include/mach/pxafb.h
+++ b/arch/arm/mach-pxa/include/mach/pxafb.h
@@ -118,7 +118,8 @@ struct pxafb_mach_info {
118 u_int fixed_modes:1, 118 u_int fixed_modes:1,
119 cmap_inverse:1, 119 cmap_inverse:1,
120 cmap_static:1, 120 cmap_static:1,
121 unused:29; 121 acceleration_enabled:1,
122 unused:28;
122 123
123 /* The following should be defined in LCCR0 124 /* The following should be defined in LCCR0
124 * LCCR0_Act or LCCR0_Pas Active or Passive 125 * LCCR0_Act or LCCR0_Pas Active or Passive
diff --git a/arch/arm/mach-pxa/include/mach/regs-intc.h b/arch/arm/mach-pxa/include/mach/regs-intc.h
index ad23e74b762f..68464ce1c1ea 100644
--- a/arch/arm/mach-pxa/include/mach/regs-intc.h
+++ b/arch/arm/mach-pxa/include/mach/regs-intc.h
@@ -13,6 +13,7 @@
13#define ICFP __REG(0x40D0000C) /* Interrupt Controller FIQ Pending Register */ 13#define ICFP __REG(0x40D0000C) /* Interrupt Controller FIQ Pending Register */
14#define ICPR __REG(0x40D00010) /* Interrupt Controller Pending Register */ 14#define ICPR __REG(0x40D00010) /* Interrupt Controller Pending Register */
15#define ICCR __REG(0x40D00014) /* Interrupt Controller Control Register */ 15#define ICCR __REG(0x40D00014) /* Interrupt Controller Control Register */
16#define ICHP __REG(0x40D00018) /* Interrupt Controller Highest Priority Register */
16 17
17#define ICIP2 __REG(0x40D0009C) /* Interrupt Controller IRQ Pending Register 2 */ 18#define ICIP2 __REG(0x40D0009C) /* Interrupt Controller IRQ Pending Register 2 */
18#define ICMR2 __REG(0x40D000A0) /* Interrupt Controller Mask Register 2 */ 19#define ICMR2 __REG(0x40D000A0) /* Interrupt Controller Mask Register 2 */
@@ -20,4 +21,14 @@
20#define ICFP2 __REG(0x40D000A8) /* Interrupt Controller FIQ Pending Register 2 */ 21#define ICFP2 __REG(0x40D000A8) /* Interrupt Controller FIQ Pending Register 2 */
21#define ICPR2 __REG(0x40D000AC) /* Interrupt Controller Pending Register 2 */ 22#define ICPR2 __REG(0x40D000AC) /* Interrupt Controller Pending Register 2 */
22 23
24#define ICIP3 __REG(0x40D00130) /* Interrupt Controller IRQ Pending Register 3 */
25#define ICMR3 __REG(0x40D00134) /* Interrupt Controller Mask Register 3 */
26#define ICLR3 __REG(0x40D00138) /* Interrupt Controller Level Register 3 */
27#define ICFP3 __REG(0x40D0013C) /* Interrupt Controller FIQ Pending Register 3 */
28#define ICPR3 __REG(0x40D00140) /* Interrupt Controller Pending Register 3 */
29
30#define IPR(x) __REG(0x40D0001C + (x < 32 ? (x << 2) \
31 : (x < 64 ? (0x94 + ((x - 32) << 2)) \
32 : (0x128 + ((x - 64) << 2)))))
33
23#endif /* __ASM_MACH_REGS_INTC_H */ 34#endif /* __ASM_MACH_REGS_INTC_H */
diff --git a/arch/arm/mach-pxa/include/mach/uncompress.h b/arch/arm/mach-pxa/include/mach/uncompress.h
index b54749413e96..237734b5b1be 100644
--- a/arch/arm/mach-pxa/include/mach/uncompress.h
+++ b/arch/arm/mach-pxa/include/mach/uncompress.h
@@ -37,7 +37,7 @@ static inline void arch_decomp_setup(void)
37{ 37{
38 if (machine_is_littleton() || machine_is_intelmote2() 38 if (machine_is_littleton() || machine_is_intelmote2()
39 || machine_is_csb726() || machine_is_stargate2() 39 || machine_is_csb726() || machine_is_stargate2()
40 || machine_is_cm_x300()) 40 || machine_is_cm_x300() || machine_is_balloon3())
41 UART = STUART; 41 UART = STUART;
42} 42}
43 43
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index f6e0300e4f64..d694ce289668 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -120,7 +120,7 @@ static void __init pxa_init_low_gpio_irq(set_wake_t fn)
120 120
121void __init pxa_init_irq(int irq_nr, set_wake_t fn) 121void __init pxa_init_irq(int irq_nr, set_wake_t fn)
122{ 122{
123 int irq; 123 int irq, i;
124 124
125 pxa_internal_irq_nr = irq_nr; 125 pxa_internal_irq_nr = irq_nr;
126 126
@@ -129,6 +129,12 @@ void __init pxa_init_irq(int irq_nr, set_wake_t fn)
129 _ICLR(irq) = 0; /* all IRQs are IRQ, not FIQ */ 129 _ICLR(irq) = 0; /* all IRQs are IRQ, not FIQ */
130 } 130 }
131 131
132 /* initialize interrupt priority */
133 if (cpu_is_pxa27x() || cpu_is_pxa3xx()) {
134 for (i = 0; i < irq_nr; i++)
135 IPR(i) = i | (1 << 31);
136 }
137
132 /* only unmasked interrupts kick us out of idle */ 138 /* only unmasked interrupts kick us out of idle */
133 ICCR = 1; 139 ICCR = 1;
134 140
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 55b3788fd1ae..13848955d133 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -265,45 +265,12 @@ static inline void littleton_init_keypad(void) {}
265#endif 265#endif
266 266
267#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE) 267#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
268static int littleton_mci_init(struct device *dev,
269 irq_handler_t littleton_detect_int, void *data)
270{
271 int err, gpio_cd = GPIO_MMC1_CARD_DETECT;
272
273 err = gpio_request(gpio_cd, "mmc card detect");
274 if (err)
275 goto err_request_cd;
276
277 gpio_direction_input(gpio_cd);
278
279 err = request_irq(gpio_to_irq(gpio_cd), littleton_detect_int,
280 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
281 "mmc card detect", data);
282 if (err) {
283 dev_err(dev, "failed to request card detect IRQ\n");
284 goto err_request_irq;
285 }
286 return 0;
287
288err_request_irq:
289 gpio_free(gpio_cd);
290err_request_cd:
291 return err;
292}
293
294static void littleton_mci_exit(struct device *dev, void *data)
295{
296 int gpio_cd = GPIO_MMC1_CARD_DETECT;
297
298 free_irq(gpio_to_irq(gpio_cd), data);
299 gpio_free(gpio_cd);
300}
301
302static struct pxamci_platform_data littleton_mci_platform_data = { 268static struct pxamci_platform_data littleton_mci_platform_data = {
303 .detect_delay = 20, 269 .detect_delay = 20,
304 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 270 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
305 .init = littleton_mci_init, 271 .gpio_card_detect = GPIO_MMC1_CARD_DETECT,
306 .exit = littleton_mci_exit, 272 .gpio_card_ro = -1,
273 .gpio_power = -1,
307}; 274};
308 275
309static void __init littleton_init_mmc(void) 276static void __init littleton_init_mmc(void)
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index f04c8333dff7..c6a94d3fdd61 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -482,11 +482,14 @@ static void lubbock_mci_exit(struct device *dev, void *data)
482} 482}
483 483
484static struct pxamci_platform_data lubbock_mci_platform_data = { 484static struct pxamci_platform_data lubbock_mci_platform_data = {
485 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 485 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
486 .detect_delay = 1, 486 .detect_delay = 1,
487 .init = lubbock_mci_init, 487 .init = lubbock_mci_init,
488 .get_ro = lubbock_mci_get_ro, 488 .get_ro = lubbock_mci_get_ro,
489 .exit = lubbock_mci_exit, 489 .exit = lubbock_mci_exit,
490 .gpio_card_detect = -1,
491 .gpio_card_ro = -1,
492 .gpio_power = -1,
490}; 493};
491 494
492static void lubbock_irda_transceiver_mode(struct device *dev, int mode) 495static void lubbock_irda_transceiver_mode(struct device *dev, int mode)
@@ -504,8 +507,9 @@ static void lubbock_irda_transceiver_mode(struct device *dev, int mode)
504} 507}
505 508
506static struct pxaficp_platform_data lubbock_ficp_platform_data = { 509static struct pxaficp_platform_data lubbock_ficp_platform_data = {
507 .transceiver_cap = IR_SIRMODE | IR_FIRMODE, 510 .gpio_pwdown = -1,
508 .transceiver_mode = lubbock_irda_transceiver_mode, 511 .transceiver_cap = IR_SIRMODE | IR_FIRMODE,
512 .transceiver_mode = lubbock_irda_transceiver_mode,
509}; 513};
510 514
511static void __init lubbock_init(void) 515static void __init lubbock_init(void)
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index ca39669cffc5..5360c07f5138 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -140,15 +140,9 @@ static unsigned long magician_pin_config[] __initdata = {
140 * IRDA 140 * IRDA
141 */ 141 */
142 142
143static void magician_irda_transceiver_mode(struct device *dev, int mode)
144{
145 gpio_set_value(GPIO83_MAGICIAN_nIR_EN, mode & IR_OFF);
146 pxa2xx_transceiver_mode(dev, mode);
147}
148
149static struct pxaficp_platform_data magician_ficp_info = { 143static struct pxaficp_platform_data magician_ficp_info = {
150 .transceiver_cap = IR_SIRMODE | IR_OFF, 144 .gpio_pwdown = GPIO83_MAGICIAN_nIR_EN,
151 .transceiver_mode = magician_irda_transceiver_mode, 145 .transceiver_cap = IR_SIRMODE | IR_OFF,
152}; 146};
153 147
154/* 148/*
@@ -651,55 +645,24 @@ static struct platform_device bq24022 = {
651static int magician_mci_init(struct device *dev, 645static int magician_mci_init(struct device *dev,
652 irq_handler_t detect_irq, void *data) 646 irq_handler_t detect_irq, void *data)
653{ 647{
654 int err; 648 return request_irq(IRQ_MAGICIAN_SD, detect_irq,
655
656 err = request_irq(IRQ_MAGICIAN_SD, detect_irq,
657 IRQF_DISABLED | IRQF_SAMPLE_RANDOM, 649 IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
658 "MMC card detect", data); 650 "mmc card detect", data);
659 if (err)
660 goto err_request_irq;
661 err = gpio_request(EGPIO_MAGICIAN_SD_POWER, "SD_POWER");
662 if (err)
663 goto err_request_power;
664 err = gpio_request(EGPIO_MAGICIAN_nSD_READONLY, "nSD_READONLY");
665 if (err)
666 goto err_request_readonly;
667
668 return 0;
669
670err_request_readonly:
671 gpio_free(EGPIO_MAGICIAN_SD_POWER);
672err_request_power:
673 free_irq(IRQ_MAGICIAN_SD, data);
674err_request_irq:
675 return err;
676}
677
678static void magician_mci_setpower(struct device *dev, unsigned int vdd)
679{
680 struct pxamci_platform_data *pdata = dev->platform_data;
681
682 gpio_set_value(EGPIO_MAGICIAN_SD_POWER, (1 << vdd) & pdata->ocr_mask);
683}
684
685static int magician_mci_get_ro(struct device *dev)
686{
687 return (!gpio_get_value(EGPIO_MAGICIAN_nSD_READONLY));
688} 651}
689 652
690static void magician_mci_exit(struct device *dev, void *data) 653static void magician_mci_exit(struct device *dev, void *data)
691{ 654{
692 gpio_free(EGPIO_MAGICIAN_nSD_READONLY);
693 gpio_free(EGPIO_MAGICIAN_SD_POWER);
694 free_irq(IRQ_MAGICIAN_SD, data); 655 free_irq(IRQ_MAGICIAN_SD, data);
695} 656}
696 657
697static struct pxamci_platform_data magician_mci_info = { 658static struct pxamci_platform_data magician_mci_info = {
698 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 659 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
699 .init = magician_mci_init, 660 .init = magician_mci_init,
700 .get_ro = magician_mci_get_ro, 661 .exit = magician_mci_exit,
701 .setpower = magician_mci_setpower, 662 .gpio_card_detect = -1,
702 .exit = magician_mci_exit, 663 .gpio_card_ro = EGPIO_MAGICIAN_nSD_READONLY,
664 .gpio_card_ro_invert = 1,
665 .gpio_power = EGPIO_MAGICIAN_SD_POWER,
703}; 666};
704 667
705 668
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index f4dabf0273ca..a4eeae345e64 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -450,10 +450,13 @@ static void mainstone_mci_exit(struct device *dev, void *data)
450} 450}
451 451
452static struct pxamci_platform_data mainstone_mci_platform_data = { 452static struct pxamci_platform_data mainstone_mci_platform_data = {
453 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 453 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
454 .init = mainstone_mci_init, 454 .init = mainstone_mci_init,
455 .setpower = mainstone_mci_setpower, 455 .setpower = mainstone_mci_setpower,
456 .exit = mainstone_mci_exit, 456 .exit = mainstone_mci_exit,
457 .gpio_card_detect = -1,
458 .gpio_card_ro = -1,
459 .gpio_power = -1,
457}; 460};
458 461
459static void mainstone_irda_transceiver_mode(struct device *dev, int mode) 462static void mainstone_irda_transceiver_mode(struct device *dev, int mode)
@@ -476,8 +479,9 @@ static void mainstone_irda_transceiver_mode(struct device *dev, int mode)
476} 479}
477 480
478static struct pxaficp_platform_data mainstone_ficp_platform_data = { 481static struct pxaficp_platform_data mainstone_ficp_platform_data = {
479 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, 482 .gpio_pwdown = -1,
480 .transceiver_mode = mainstone_irda_transceiver_mode, 483 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF,
484 .transceiver_mode = mainstone_irda_transceiver_mode,
481}; 485};
482 486
483static struct gpio_keys_button gpio_keys_button[] = { 487static struct gpio_keys_button gpio_keys_button[] = {
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index 2d28132c725b..3cab452e5567 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -434,72 +434,15 @@ struct gpio_vbus_mach_info gpio_vbus_data = {
434/* 434/*
435 * SDIO/MMC Card controller 435 * SDIO/MMC Card controller
436 */ 436 */
437static void mci_setpower(struct device *dev, unsigned int vdd)
438{
439 struct pxamci_platform_data *p_d = dev->platform_data;
440
441 if ((1 << vdd) & p_d->ocr_mask)
442 gpio_set_value(GPIO91_SDIO_EN, 1); /* enable SDIO power */
443 else
444 gpio_set_value(GPIO91_SDIO_EN, 0); /* disable SDIO power */
445}
446
447static int mci_get_ro(struct device *dev)
448{
449 return gpio_get_value(GPIO78_SDIO_RO);
450}
451
452struct gpio_ress mci_gpios[] = {
453 MIO_GPIO_IN(GPIO78_SDIO_RO, "SDIO readonly detect"),
454 MIO_GPIO_IN(GPIO15_SDIO_INSERT, "SDIO insertion detect"),
455 MIO_GPIO_OUT(GPIO91_SDIO_EN, 0, "SDIO power enable")
456};
457
458static void mci_exit(struct device *dev, void *data)
459{
460 mio_gpio_free(ARRAY_AND_SIZE(mci_gpios));
461 free_irq(gpio_to_irq(GPIO15_SDIO_INSERT), data);
462}
463
464static struct pxamci_platform_data mioa701_mci_info;
465
466/** 437/**
467 * The card detect interrupt isn't debounced so we delay it by 250ms 438 * The card detect interrupt isn't debounced so we delay it by 250ms
468 * to give the card a chance to fully insert/eject. 439 * to give the card a chance to fully insert/eject.
469 */ 440 */
470static int mci_init(struct device *dev, irq_handler_t detect_int, void *data)
471{
472 int rc;
473 int irq = gpio_to_irq(GPIO15_SDIO_INSERT);
474
475 rc = mio_gpio_request(ARRAY_AND_SIZE(mci_gpios));
476 if (rc)
477 goto err_gpio;
478 /* enable RE/FE interrupt on card insertion and removal */
479 rc = request_irq(irq, detect_int,
480 IRQF_DISABLED | IRQF_TRIGGER_RISING |
481 IRQF_TRIGGER_FALLING,
482 "MMC card detect", data);
483 if (rc)
484 goto err_irq;
485
486 mioa701_mci_info.detect_delay = msecs_to_jiffies(250);
487 return 0;
488
489err_irq:
490 dev_err(dev, "mioa701_mci_init: MMC/SD:"
491 " can't request MMC card detect IRQ\n");
492 mio_gpio_free(ARRAY_AND_SIZE(mci_gpios));
493err_gpio:
494 return rc;
495}
496
497static struct pxamci_platform_data mioa701_mci_info = { 441static struct pxamci_platform_data mioa701_mci_info = {
498 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 442 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
499 .init = mci_init, 443 .gpio_card_detect = GPIO15_SDIO_INSERT,
500 .get_ro = mci_get_ro, 444 .gpio_card_ro = GPIO78_SDIO_RO,
501 .setpower = mci_setpower, 445 .gpio_power = GPIO91_SDIO_EN,
502 .exit = mci_exit,
503}; 446};
504 447
505/* FlashRAM */ 448/* FlashRAM */
@@ -765,19 +708,20 @@ static struct i2c_board_info __initdata mioa701_pi2c_devices[] = {
765 }, 708 },
766}; 709};
767 710
768static struct soc_camera_link iclink = {
769 .bus_id = 0, /* Must match id in pxa27x_device_camera in device.c */
770};
771
772/* Board I2C devices. */ 711/* Board I2C devices. */
773static struct i2c_board_info __initdata mioa701_i2c_devices[] = { 712static struct i2c_board_info __initdata mioa701_i2c_devices[] = {
774 { 713 {
775 /* Must initialize before the camera(s) */
776 I2C_BOARD_INFO("mt9m111", 0x5d), 714 I2C_BOARD_INFO("mt9m111", 0x5d),
777 .platform_data = &iclink,
778 }, 715 },
779}; 716};
780 717
718static struct soc_camera_link iclink = {
719 .bus_id = 0, /* Match id in pxa27x_device_camera in device.c */
720 .board_info = &mioa701_i2c_devices[0],
721 .i2c_adapter_id = 0,
722 .module_name = "mt9m111",
723};
724
781struct i2c_pxa_platform_data i2c_pdata = { 725struct i2c_pxa_platform_data i2c_pdata = {
782 .fast_mode = 1, 726 .fast_mode = 1,
783}; 727};
@@ -811,6 +755,7 @@ MIO_SIMPLE_DEV(pxa2xx_pcm, "pxa2xx-pcm", NULL)
811MIO_SIMPLE_DEV(mioa701_sound, "mioa701-wm9713", NULL) 755MIO_SIMPLE_DEV(mioa701_sound, "mioa701-wm9713", NULL)
812MIO_SIMPLE_DEV(mioa701_board, "mioa701-board", NULL) 756MIO_SIMPLE_DEV(mioa701_board, "mioa701-board", NULL)
813MIO_SIMPLE_DEV(gpio_vbus, "gpio-vbus", &gpio_vbus_data); 757MIO_SIMPLE_DEV(gpio_vbus, "gpio-vbus", &gpio_vbus_data);
758MIO_SIMPLE_DEV(mioa701_camera, "soc-camera-pdrv",&iclink);
814 759
815static struct platform_device *devices[] __initdata = { 760static struct platform_device *devices[] __initdata = {
816 &mioa701_gpio_keys, 761 &mioa701_gpio_keys,
@@ -821,6 +766,7 @@ static struct platform_device *devices[] __initdata = {
821 &power_dev, 766 &power_dev,
822 &strataflash, 767 &strataflash,
823 &gpio_vbus, 768 &gpio_vbus,
769 &mioa701_camera,
824 &mioa701_board, 770 &mioa701_board,
825}; 771};
826 772
@@ -841,7 +787,7 @@ static void mioa701_restart(char c, const char *cmd)
841static struct gpio_ress global_gpios[] = { 787static struct gpio_ress global_gpios[] = {
842 MIO_GPIO_OUT(GPIO9_CHARGE_EN, 1, "Charger enable"), 788 MIO_GPIO_OUT(GPIO9_CHARGE_EN, 1, "Charger enable"),
843 MIO_GPIO_OUT(GPIO18_POWEROFF, 0, "Power Off"), 789 MIO_GPIO_OUT(GPIO18_POWEROFF, 0, "Power Off"),
844 MIO_GPIO_OUT(GPIO87_LCD_POWER, 0, "LCD Power") 790 MIO_GPIO_OUT(GPIO87_LCD_POWER, 0, "LCD Power"),
845}; 791};
846 792
847static void __init mioa701_machine_init(void) 793static void __init mioa701_machine_init(void)
@@ -855,6 +801,7 @@ static void __init mioa701_machine_init(void)
855 mio_gpio_request(ARRAY_AND_SIZE(global_gpios)); 801 mio_gpio_request(ARRAY_AND_SIZE(global_gpios));
856 bootstrap_init(); 802 bootstrap_init();
857 set_pxa_fb_info(&mioa701_pxafb_info); 803 set_pxa_fb_info(&mioa701_pxafb_info);
804 mioa701_mci_info.detect_delay = msecs_to_jiffies(250);
858 pxa_set_mci_info(&mioa701_mci_info); 805 pxa_set_mci_info(&mioa701_mci_info);
859 pxa_set_keypad_info(&mioa701_keypad_info); 806 pxa_set_keypad_info(&mioa701_keypad_info);
860 wm97xx_bat_set_pdata(&mioa701_battery_data); 807 wm97xx_bat_set_pdata(&mioa701_battery_data);
@@ -869,7 +816,6 @@ static void __init mioa701_machine_init(void)
869 pxa_set_i2c_info(&i2c_pdata); 816 pxa_set_i2c_info(&i2c_pdata);
870 pxa27x_set_i2c_power_info(NULL); 817 pxa27x_set_i2c_power_info(NULL);
871 pxa_set_camera_info(&mioa701_pxacamera_platform_data); 818 pxa_set_camera_info(&mioa701_pxacamera_platform_data);
872 i2c_register_board_info(0, ARRAY_AND_SIZE(mioa701_i2c_devices));
873} 819}
874 820
875static void mioa701_machine_exit(void) 821static void mioa701_machine_exit(void)
diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index 169fcc18154e..1ad029dd4438 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -25,6 +25,9 @@
25#include <linux/wm97xx_batt.h> 25#include <linux/wm97xx_batt.h>
26#include <linux/power_supply.h> 26#include <linux/power_supply.h>
27#include <linux/sysdev.h> 27#include <linux/sysdev.h>
28#include <linux/mtd/mtd.h>
29#include <linux/mtd/partitions.h>
30#include <linux/mtd/physmap.h>
28 31
29#include <asm/mach-types.h> 32#include <asm/mach-types.h>
30#include <asm/mach/arch.h> 33#include <asm/mach/arch.h>
@@ -141,85 +144,50 @@ static unsigned long palmld_pin_config[] __initdata = {
141}; 144};
142 145
143/****************************************************************************** 146/******************************************************************************
144 * SD/MMC card controller 147 * NOR Flash
145 ******************************************************************************/ 148 ******************************************************************************/
146static int palmld_mci_init(struct device *dev, irq_handler_t palmld_detect_int, 149static struct mtd_partition palmld_partitions[] = {
147 void *data) 150 {
148{ 151 .name = "Flash",
149 int err = 0; 152 .offset = 0x00000000,
150 153 .size = MTDPART_SIZ_FULL,
151 /* Setup an interrupt for detecting card insert/remove events */ 154 .mask_flags = 0
152 err = gpio_request(GPIO_NR_PALMLD_SD_DETECT_N, "SD IRQ");
153 if (err)
154 goto err;
155 err = gpio_direction_input(GPIO_NR_PALMLD_SD_DETECT_N);
156 if (err)
157 goto err2;
158 err = request_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N),
159 palmld_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
160 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
161 "SD/MMC card detect", data);
162 if (err) {
163 printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
164 __func__);
165 goto err2;
166 } 155 }
156};
167 157
168 err = gpio_request(GPIO_NR_PALMLD_SD_POWER, "SD_POWER"); 158static struct physmap_flash_data palmld_flash_data[] = {
169 if (err) 159 {
170 goto err3; 160 .width = 2, /* bankwidth in bytes */
171 err = gpio_direction_output(GPIO_NR_PALMLD_SD_POWER, 0); 161 .parts = palmld_partitions,
172 if (err) 162 .nr_parts = ARRAY_SIZE(palmld_partitions)
173 goto err4; 163 }
174 164};
175 err = gpio_request(GPIO_NR_PALMLD_SD_READONLY, "SD_READONLY");
176 if (err)
177 goto err4;
178 err = gpio_direction_input(GPIO_NR_PALMLD_SD_READONLY);
179 if (err)
180 goto err5;
181
182 printk(KERN_DEBUG "%s: irq registered\n", __func__);
183
184 return 0;
185
186err5:
187 gpio_free(GPIO_NR_PALMLD_SD_READONLY);
188err4:
189 gpio_free(GPIO_NR_PALMLD_SD_POWER);
190err3:
191 free_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N), data);
192err2:
193 gpio_free(GPIO_NR_PALMLD_SD_DETECT_N);
194err:
195 return err;
196}
197
198static void palmld_mci_exit(struct device *dev, void *data)
199{
200 gpio_free(GPIO_NR_PALMLD_SD_READONLY);
201 gpio_free(GPIO_NR_PALMLD_SD_POWER);
202 free_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N), data);
203 gpio_free(GPIO_NR_PALMLD_SD_DETECT_N);
204}
205 165
206static void palmld_mci_power(struct device *dev, unsigned int vdd) 166static struct resource palmld_flash_resource = {
207{ 167 .start = PXA_CS0_PHYS,
208 struct pxamci_platform_data *p_d = dev->platform_data; 168 .end = PXA_CS0_PHYS + SZ_4M - 1,
209 gpio_set_value(GPIO_NR_PALMLD_SD_POWER, p_d->ocr_mask & (1 << vdd)); 169 .flags = IORESOURCE_MEM,
210} 170};
211 171
212static int palmld_mci_get_ro(struct device *dev) 172static struct platform_device palmld_flash = {
213{ 173 .name = "physmap-flash",
214 return gpio_get_value(GPIO_NR_PALMLD_SD_READONLY); 174 .id = 0,
215} 175 .resource = &palmld_flash_resource,
176 .num_resources = 1,
177 .dev = {
178 .platform_data = palmld_flash_data,
179 },
180};
216 181
182/******************************************************************************
183 * SD/MMC card controller
184 ******************************************************************************/
217static struct pxamci_platform_data palmld_mci_platform_data = { 185static struct pxamci_platform_data palmld_mci_platform_data = {
218 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 186 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
219 .setpower = palmld_mci_power, 187 .gpio_card_detect = GPIO_NR_PALMLD_SD_DETECT_N,
220 .get_ro = palmld_mci_get_ro, 188 .gpio_card_ro = GPIO_NR_PALMLD_SD_READONLY,
221 .init = palmld_mci_init, 189 .gpio_power = GPIO_NR_PALMLD_SD_POWER,
222 .exit = palmld_mci_exit, 190 .detect_delay = 20,
223}; 191};
224 192
225/****************************************************************************** 193/******************************************************************************
@@ -336,35 +304,9 @@ static struct platform_device palmld_backlight = {
336/****************************************************************************** 304/******************************************************************************
337 * IrDA 305 * IrDA
338 ******************************************************************************/ 306 ******************************************************************************/
339static int palmld_irda_startup(struct device *dev)
340{
341 int err;
342 err = gpio_request(GPIO_NR_PALMLD_IR_DISABLE, "IR DISABLE");
343 if (err)
344 goto err;
345 err = gpio_direction_output(GPIO_NR_PALMLD_IR_DISABLE, 1);
346 if (err)
347 gpio_free(GPIO_NR_PALMLD_IR_DISABLE);
348err:
349 return err;
350}
351
352static void palmld_irda_shutdown(struct device *dev)
353{
354 gpio_free(GPIO_NR_PALMLD_IR_DISABLE);
355}
356
357static void palmld_irda_transceiver_mode(struct device *dev, int mode)
358{
359 gpio_set_value(GPIO_NR_PALMLD_IR_DISABLE, mode & IR_OFF);
360 pxa2xx_transceiver_mode(dev, mode);
361}
362
363static struct pxaficp_platform_data palmld_ficp_platform_data = { 307static struct pxaficp_platform_data palmld_ficp_platform_data = {
364 .startup = palmld_irda_startup, 308 .gpio_pwdown = GPIO_NR_PALMLD_IR_DISABLE,
365 .shutdown = palmld_irda_shutdown, 309 .transceiver_cap = IR_SIRMODE | IR_OFF,
366 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF,
367 .transceiver_mode = palmld_irda_transceiver_mode,
368}; 310};
369 311
370/****************************************************************************** 312/******************************************************************************
@@ -560,6 +502,7 @@ static struct platform_device *devices[] __initdata = {
560 &power_supply, 502 &power_supply,
561 &palmld_asoc, 503 &palmld_asoc,
562 &palmld_hdd, 504 &palmld_hdd,
505 &palmld_flash,
563}; 506};
564 507
565static struct map_desc palmld_io_desc[] __initdata = { 508static struct map_desc palmld_io_desc[] __initdata = {
diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c
index 33f726ff55e5..2dd7ce28556b 100644
--- a/arch/arm/mach-pxa/palmt5.c
+++ b/arch/arm/mach-pxa/palmt5.c
@@ -124,83 +124,12 @@ static unsigned long palmt5_pin_config[] __initdata = {
124/****************************************************************************** 124/******************************************************************************
125 * SD/MMC card controller 125 * SD/MMC card controller
126 ******************************************************************************/ 126 ******************************************************************************/
127static int palmt5_mci_init(struct device *dev, irq_handler_t palmt5_detect_int,
128 void *data)
129{
130 int err = 0;
131
132 /* Setup an interrupt for detecting card insert/remove events */
133 err = gpio_request(GPIO_NR_PALMT5_SD_DETECT_N, "SD IRQ");
134 if (err)
135 goto err;
136 err = gpio_direction_input(GPIO_NR_PALMT5_SD_DETECT_N);
137 if (err)
138 goto err2;
139 err = request_irq(gpio_to_irq(GPIO_NR_PALMT5_SD_DETECT_N),
140 palmt5_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
141 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
142 "SD/MMC card detect", data);
143 if (err) {
144 printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
145 __func__);
146 goto err2;
147 }
148
149 err = gpio_request(GPIO_NR_PALMT5_SD_POWER, "SD_POWER");
150 if (err)
151 goto err3;
152 err = gpio_direction_output(GPIO_NR_PALMT5_SD_POWER, 0);
153 if (err)
154 goto err4;
155
156 err = gpio_request(GPIO_NR_PALMT5_SD_READONLY, "SD_READONLY");
157 if (err)
158 goto err4;
159 err = gpio_direction_input(GPIO_NR_PALMT5_SD_READONLY);
160 if (err)
161 goto err5;
162
163 printk(KERN_DEBUG "%s: irq registered\n", __func__);
164
165 return 0;
166
167err5:
168 gpio_free(GPIO_NR_PALMT5_SD_READONLY);
169err4:
170 gpio_free(GPIO_NR_PALMT5_SD_POWER);
171err3:
172 free_irq(gpio_to_irq(GPIO_NR_PALMT5_SD_DETECT_N), data);
173err2:
174 gpio_free(GPIO_NR_PALMT5_SD_DETECT_N);
175err:
176 return err;
177}
178
179static void palmt5_mci_exit(struct device *dev, void *data)
180{
181 gpio_free(GPIO_NR_PALMT5_SD_READONLY);
182 gpio_free(GPIO_NR_PALMT5_SD_POWER);
183 free_irq(IRQ_GPIO_PALMT5_SD_DETECT_N, data);
184 gpio_free(GPIO_NR_PALMT5_SD_DETECT_N);
185}
186
187static void palmt5_mci_power(struct device *dev, unsigned int vdd)
188{
189 struct pxamci_platform_data *p_d = dev->platform_data;
190 gpio_set_value(GPIO_NR_PALMT5_SD_POWER, p_d->ocr_mask & (1 << vdd));
191}
192
193static int palmt5_mci_get_ro(struct device *dev)
194{
195 return gpio_get_value(GPIO_NR_PALMT5_SD_READONLY);
196}
197
198static struct pxamci_platform_data palmt5_mci_platform_data = { 127static struct pxamci_platform_data palmt5_mci_platform_data = {
199 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 128 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
200 .setpower = palmt5_mci_power, 129 .gpio_card_detect = GPIO_NR_PALMT5_SD_DETECT_N,
201 .get_ro = palmt5_mci_get_ro, 130 .gpio_card_ro = GPIO_NR_PALMT5_SD_READONLY,
202 .init = palmt5_mci_init, 131 .gpio_power = GPIO_NR_PALMT5_SD_POWER,
203 .exit = palmt5_mci_exit, 132 .detect_delay = 20,
204}; 133};
205 134
206/****************************************************************************** 135/******************************************************************************
@@ -314,35 +243,9 @@ static struct platform_device palmt5_backlight = {
314/****************************************************************************** 243/******************************************************************************
315 * IrDA 244 * IrDA
316 ******************************************************************************/ 245 ******************************************************************************/
317static int palmt5_irda_startup(struct device *dev)
318{
319 int err;
320 err = gpio_request(GPIO_NR_PALMT5_IR_DISABLE, "IR DISABLE");
321 if (err)
322 goto err;
323 err = gpio_direction_output(GPIO_NR_PALMT5_IR_DISABLE, 1);
324 if (err)
325 gpio_free(GPIO_NR_PALMT5_IR_DISABLE);
326err:
327 return err;
328}
329
330static void palmt5_irda_shutdown(struct device *dev)
331{
332 gpio_free(GPIO_NR_PALMT5_IR_DISABLE);
333}
334
335static void palmt5_irda_transceiver_mode(struct device *dev, int mode)
336{
337 gpio_set_value(GPIO_NR_PALMT5_IR_DISABLE, mode & IR_OFF);
338 pxa2xx_transceiver_mode(dev, mode);
339}
340
341static struct pxaficp_platform_data palmt5_ficp_platform_data = { 246static struct pxaficp_platform_data palmt5_ficp_platform_data = {
342 .startup = palmt5_irda_startup, 247 .gpio_pwdown = GPIO_NR_PALMT5_IR_DISABLE,
343 .shutdown = palmt5_irda_shutdown, 248 .transceiver_cap = IR_SIRMODE | IR_OFF,
344 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF,
345 .transceiver_mode = palmt5_irda_transceiver_mode,
346}; 249};
347 250
348/****************************************************************************** 251/******************************************************************************
diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c
new file mode 100644
index 000000000000..bb2cc0dd44ec
--- /dev/null
+++ b/arch/arm/mach-pxa/palmtc.c
@@ -0,0 +1,436 @@
1/*
2 * linux/arch/arm/mach-pxa/palmtc.c
3 *
4 * Support for the Palm Tungsten|C
5 *
6 * Author: Marek Vasut <marek.vasut@gmail.com>
7 *
8 * Based on work of:
9 * Petr Blaha <p3t3@centrum.cz>
10 * Chetan S. Kumar <shivakumar.chetan@gmail.com>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
17#include <linux/platform_device.h>
18#include <linux/delay.h>
19#include <linux/irq.h>
20#include <linux/input.h>
21#include <linux/pwm_backlight.h>
22#include <linux/gpio.h>
23#include <linux/input/matrix_keypad.h>
24#include <linux/ucb1400.h>
25#include <linux/power_supply.h>
26#include <linux/gpio_keys.h>
27#include <linux/mtd/physmap.h>
28
29#include <asm/mach-types.h>
30#include <asm/mach/arch.h>
31#include <asm/mach/map.h>
32
33#include <mach/audio.h>
34#include <mach/palmtc.h>
35#include <mach/mmc.h>
36#include <mach/pxafb.h>
37#include <mach/mfp-pxa25x.h>
38#include <mach/irda.h>
39#include <mach/udc.h>
40#include <mach/pxa2xx-regs.h>
41
42#include "generic.h"
43#include "devices.h"
44
45/******************************************************************************
46 * Pin configuration
47 ******************************************************************************/
48static unsigned long palmtc_pin_config[] __initdata = {
49 /* MMC */
50 GPIO6_MMC_CLK,
51 GPIO8_MMC_CS0,
52 GPIO12_GPIO, /* detect */
53 GPIO32_GPIO, /* power */
54 GPIO54_GPIO, /* r/o switch */
55
56 /* PCMCIA */
57 GPIO52_nPCE_1,
58 GPIO53_nPCE_2,
59 GPIO50_nPIOR,
60 GPIO51_nPIOW,
61 GPIO49_nPWE,
62 GPIO48_nPOE,
63 GPIO52_nPCE_1,
64 GPIO53_nPCE_2,
65 GPIO57_nIOIS16,
66 GPIO56_nPWAIT,
67
68 /* AC97 */
69 GPIO28_AC97_BITCLK,
70 GPIO29_AC97_SDATA_IN_0,
71 GPIO30_AC97_SDATA_OUT,
72 GPIO31_AC97_SYNC,
73
74 /* IrDA */
75 GPIO45_GPIO, /* ir disable */
76 GPIO46_FICP_RXD,
77 GPIO47_FICP_TXD,
78
79 /* PWM */
80 GPIO17_PWM1_OUT,
81
82 /* USB */
83 GPIO4_GPIO, /* detect */
84 GPIO36_GPIO, /* pullup */
85
86 /* LCD */
87 GPIO58_LCD_LDD_0,
88 GPIO59_LCD_LDD_1,
89 GPIO60_LCD_LDD_2,
90 GPIO61_LCD_LDD_3,
91 GPIO62_LCD_LDD_4,
92 GPIO63_LCD_LDD_5,
93 GPIO64_LCD_LDD_6,
94 GPIO65_LCD_LDD_7,
95 GPIO66_LCD_LDD_8,
96 GPIO67_LCD_LDD_9,
97 GPIO68_LCD_LDD_10,
98 GPIO69_LCD_LDD_11,
99 GPIO70_LCD_LDD_12,
100 GPIO71_LCD_LDD_13,
101 GPIO72_LCD_LDD_14,
102 GPIO73_LCD_LDD_15,
103 GPIO74_LCD_FCLK,
104 GPIO75_LCD_LCLK,
105 GPIO76_LCD_PCLK,
106 GPIO77_LCD_BIAS,
107
108 /* MATRIX KEYPAD */
109 GPIO0_GPIO | WAKEUP_ON_EDGE_BOTH, /* in 0 */
110 GPIO9_GPIO | WAKEUP_ON_EDGE_BOTH, /* in 1 */
111 GPIO10_GPIO | WAKEUP_ON_EDGE_BOTH, /* in 2 */
112 GPIO11_GPIO | WAKEUP_ON_EDGE_BOTH, /* in 3 */
113 GPIO18_GPIO | MFP_LPM_DRIVE_LOW, /* out 0 */
114 GPIO19_GPIO | MFP_LPM_DRIVE_LOW, /* out 1 */
115 GPIO20_GPIO | MFP_LPM_DRIVE_LOW, /* out 2 */
116 GPIO21_GPIO | MFP_LPM_DRIVE_LOW, /* out 3 */
117 GPIO22_GPIO | MFP_LPM_DRIVE_LOW, /* out 4 */
118 GPIO23_GPIO | MFP_LPM_DRIVE_LOW, /* out 5 */
119 GPIO24_GPIO | MFP_LPM_DRIVE_LOW, /* out 6 */
120 GPIO25_GPIO | MFP_LPM_DRIVE_LOW, /* out 7 */
121 GPIO26_GPIO | MFP_LPM_DRIVE_LOW, /* out 8 */
122 GPIO27_GPIO | MFP_LPM_DRIVE_LOW, /* out 9 */
123 GPIO79_GPIO | MFP_LPM_DRIVE_LOW, /* out 10 */
124 GPIO80_GPIO | MFP_LPM_DRIVE_LOW, /* out 11 */
125
126 /* PXA GPIO KEYS */
127 GPIO7_GPIO | WAKEUP_ON_EDGE_BOTH, /* hotsync button on cradle */
128
129 /* MISC */
130 GPIO1_RST, /* reset */
131 GPIO2_GPIO, /* earphone detect */
132 GPIO16_GPIO, /* backlight switch */
133};
134
135/******************************************************************************
136 * SD/MMC card controller
137 ******************************************************************************/
138static struct pxamci_platform_data palmtc_mci_platform_data = {
139 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
140 .gpio_power = GPIO_NR_PALMTC_SD_POWER,
141 .gpio_card_ro = GPIO_NR_PALMTC_SD_READONLY,
142 .gpio_card_detect = GPIO_NR_PALMTC_SD_DETECT_N,
143 .detect_delay = 20,
144};
145
146/******************************************************************************
147 * GPIO keys
148 ******************************************************************************/
149static struct gpio_keys_button palmtc_pxa_buttons[] = {
150 {KEY_F8, GPIO_NR_PALMTC_HOTSYNC_BUTTON, 1, "HotSync Button", EV_KEY, 1},
151};
152
153static struct gpio_keys_platform_data palmtc_pxa_keys_data = {
154 .buttons = palmtc_pxa_buttons,
155 .nbuttons = ARRAY_SIZE(palmtc_pxa_buttons),
156};
157
158static struct platform_device palmtc_pxa_keys = {
159 .name = "gpio-keys",
160 .id = -1,
161 .dev = {
162 .platform_data = &palmtc_pxa_keys_data,
163 },
164};
165
166/******************************************************************************
167 * Backlight
168 ******************************************************************************/
169static int palmtc_backlight_init(struct device *dev)
170{
171 int ret;
172
173 ret = gpio_request(GPIO_NR_PALMTC_BL_POWER, "BL POWER");
174 if (ret)
175 goto err;
176 ret = gpio_direction_output(GPIO_NR_PALMTC_BL_POWER, 1);
177 if (ret)
178 goto err2;
179
180 return 0;
181
182err2:
183 gpio_free(GPIO_NR_PALMTC_BL_POWER);
184err:
185 return ret;
186}
187
188static int palmtc_backlight_notify(int brightness)
189{
190 /* backlight is on when GPIO16 AF0 is high */
191 gpio_set_value(GPIO_NR_PALMTC_BL_POWER, brightness);
192 return brightness;
193}
194
195static void palmtc_backlight_exit(struct device *dev)
196{
197 gpio_free(GPIO_NR_PALMTC_BL_POWER);
198}
199
200static struct platform_pwm_backlight_data palmtc_backlight_data = {
201 .pwm_id = 1,
202 .max_brightness = PALMTC_MAX_INTENSITY,
203 .dft_brightness = PALMTC_MAX_INTENSITY,
204 .pwm_period_ns = PALMTC_PERIOD_NS,
205 .init = palmtc_backlight_init,
206 .notify = palmtc_backlight_notify,
207 .exit = palmtc_backlight_exit,
208};
209
210static struct platform_device palmtc_backlight = {
211 .name = "pwm-backlight",
212 .dev = {
213 .parent = &pxa25x_device_pwm1.dev,
214 .platform_data = &palmtc_backlight_data,
215 },
216};
217
218/******************************************************************************
219 * IrDA
220 ******************************************************************************/
221static struct pxaficp_platform_data palmtc_ficp_platform_data = {
222 .gpio_pwdown = GPIO_NR_PALMTC_IR_DISABLE,
223 .transceiver_cap = IR_SIRMODE | IR_OFF,
224};
225
226/******************************************************************************
227 * Keyboard
228 ******************************************************************************/
229static const uint32_t palmtc_matrix_keys[] = {
230 KEY(0, 0, KEY_F1),
231 KEY(0, 1, KEY_X),
232 KEY(0, 2, KEY_POWER),
233 KEY(0, 3, KEY_TAB),
234 KEY(0, 4, KEY_A),
235 KEY(0, 5, KEY_Q),
236 KEY(0, 6, KEY_LEFTSHIFT),
237 KEY(0, 7, KEY_Z),
238 KEY(0, 8, KEY_S),
239 KEY(0, 9, KEY_W),
240 KEY(0, 10, KEY_E),
241 KEY(0, 11, KEY_UP),
242
243 KEY(1, 0, KEY_F2),
244 KEY(1, 1, KEY_DOWN),
245 KEY(1, 3, KEY_D),
246 KEY(1, 4, KEY_C),
247 KEY(1, 5, KEY_F),
248 KEY(1, 6, KEY_R),
249 KEY(1, 7, KEY_SPACE),
250 KEY(1, 8, KEY_V),
251 KEY(1, 9, KEY_G),
252 KEY(1, 10, KEY_T),
253 KEY(1, 11, KEY_LEFT),
254
255 KEY(2, 0, KEY_F3),
256 KEY(2, 1, KEY_LEFTCTRL),
257 KEY(2, 3, KEY_H),
258 KEY(2, 4, KEY_Y),
259 KEY(2, 5, KEY_N),
260 KEY(2, 6, KEY_J),
261 KEY(2, 7, KEY_U),
262 KEY(2, 8, KEY_M),
263 KEY(2, 9, KEY_K),
264 KEY(2, 10, KEY_I),
265 KEY(2, 11, KEY_RIGHT),
266
267 KEY(3, 0, KEY_F4),
268 KEY(3, 1, KEY_ENTER),
269 KEY(3, 3, KEY_DOT),
270 KEY(3, 4, KEY_L),
271 KEY(3, 5, KEY_O),
272 KEY(3, 6, KEY_LEFTALT),
273 KEY(3, 7, KEY_ENTER),
274 KEY(3, 8, KEY_BACKSPACE),
275 KEY(3, 9, KEY_P),
276 KEY(3, 10, KEY_B),
277 KEY(3, 11, KEY_FN),
278};
279
280const struct matrix_keymap_data palmtc_keymap_data = {
281 .keymap = palmtc_matrix_keys,
282 .keymap_size = ARRAY_SIZE(palmtc_matrix_keys),
283};
284
285const static unsigned int palmtc_keypad_row_gpios[] = {
286 0, 9, 10, 11
287};
288
289const static unsigned int palmtc_keypad_col_gpios[] = {
290 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 79, 80
291};
292
293static struct matrix_keypad_platform_data palmtc_keypad_platform_data = {
294 .keymap_data = &palmtc_keymap_data,
295 .col_gpios = palmtc_keypad_row_gpios,
296 .num_col_gpios = 12,
297 .row_gpios = palmtc_keypad_col_gpios,
298 .num_row_gpios = 4,
299 .active_low = 1,
300
301 .debounce_ms = 20,
302 .col_scan_delay_us = 5,
303};
304
305static struct platform_device palmtc_keyboard = {
306 .name = "matrix-keypad",
307 .id = -1,
308 .dev = {
309 .platform_data = &palmtc_keypad_platform_data,
310 },
311};
312
313/******************************************************************************
314 * UDC
315 ******************************************************************************/
316static struct pxa2xx_udc_mach_info palmtc_udc_info __initdata = {
317 .gpio_vbus = GPIO_NR_PALMTC_USB_DETECT_N,
318 .gpio_vbus_inverted = 1,
319 .gpio_pullup = GPIO_NR_PALMTC_USB_POWER,
320};
321
322/******************************************************************************
323 * Touchscreen / Battery / GPIO-extender
324 ******************************************************************************/
325static struct platform_device palmtc_ucb1400_core = {
326 .name = "ucb1400_core",
327 .id = -1,
328};
329
330/******************************************************************************
331 * NOR Flash
332 ******************************************************************************/
333static struct resource palmtc_flash_resource = {
334 .start = PXA_CS0_PHYS,
335 .end = PXA_CS0_PHYS + SZ_16M - 1,
336 .flags = IORESOURCE_MEM,
337};
338
339static struct mtd_partition palmtc_flash_parts[] = {
340 {
341 .name = "U-Boot Bootloader",
342 .offset = 0x0,
343 .size = 0x40000,
344 },
345 {
346 .name = "Linux Kernel",
347 .offset = 0x40000,
348 .size = 0x2c0000,
349 },
350 {
351 .name = "Filesystem",
352 .offset = 0x300000,
353 .size = 0xcc0000,
354 },
355 {
356 .name = "U-Boot Environment",
357 .offset = 0xfc0000,
358 .size = MTDPART_SIZ_FULL,
359 },
360};
361
362static struct physmap_flash_data palmtc_flash_data = {
363 .width = 4,
364 .parts = palmtc_flash_parts,
365 .nr_parts = ARRAY_SIZE(palmtc_flash_parts),
366};
367
368static struct platform_device palmtc_flash = {
369 .name = "physmap-flash",
370 .id = -1,
371 .resource = &palmtc_flash_resource,
372 .num_resources = 1,
373 .dev = {
374 .platform_data = &palmtc_flash_data,
375 },
376};
377
378/******************************************************************************
379 * Framebuffer
380 ******************************************************************************/
381static struct pxafb_mode_info palmtc_lcd_modes[] = {
382{
383 .pixclock = 115384,
384 .xres = 320,
385 .yres = 320,
386 .bpp = 16,
387
388 .left_margin = 27,
389 .right_margin = 7,
390 .upper_margin = 7,
391 .lower_margin = 8,
392
393 .hsync_len = 6,
394 .vsync_len = 1,
395},
396};
397
398static struct pxafb_mach_info palmtc_lcd_screen = {
399 .modes = palmtc_lcd_modes,
400 .num_modes = ARRAY_SIZE(palmtc_lcd_modes),
401 .lcd_conn = LCD_COLOR_TFT_16BPP | LCD_PCLK_EDGE_FALL,
402};
403
404/******************************************************************************
405 * Machine init
406 ******************************************************************************/
407static struct platform_device *devices[] __initdata = {
408 &palmtc_backlight,
409 &palmtc_ucb1400_core,
410 &palmtc_keyboard,
411 &palmtc_pxa_keys,
412 &palmtc_flash,
413};
414
415static void __init palmtc_init(void)
416{
417 pxa2xx_mfp_config(ARRAY_AND_SIZE(palmtc_pin_config));
418
419 set_pxa_fb_info(&palmtc_lcd_screen);
420 pxa_set_mci_info(&palmtc_mci_platform_data);
421 pxa_set_udc_info(&palmtc_udc_info);
422 pxa_set_ac97_info(NULL);
423 pxa_set_ficp_info(&palmtc_ficp_platform_data);
424
425 platform_add_devices(devices, ARRAY_SIZE(devices));
426};
427
428MACHINE_START(PALMTC, "Palm Tungsten|C")
429 .phys_io = 0x40000000,
430 .boot_params = 0xa0000100,
431 .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc,
432 .map_io = pxa_map_io,
433 .init_irq = pxa25x_init_irq,
434 .timer = &pxa_timer,
435 .init_machine = palmtc_init
436MACHINE_END
diff --git a/arch/arm/mach-pxa/palmte2.c b/arch/arm/mach-pxa/palmte2.c
index d823b09801df..277c4062e3c6 100644
--- a/arch/arm/mach-pxa/palmte2.c
+++ b/arch/arm/mach-pxa/palmte2.c
@@ -117,83 +117,11 @@ static unsigned long palmte2_pin_config[] __initdata = {
117/****************************************************************************** 117/******************************************************************************
118 * SD/MMC card controller 118 * SD/MMC card controller
119 ******************************************************************************/ 119 ******************************************************************************/
120static int palmte2_mci_init(struct device *dev,
121 irq_handler_t palmte2_detect_int, void *data)
122{
123 int err = 0;
124
125 /* Setup an interrupt for detecting card insert/remove events */
126 err = gpio_request(GPIO_NR_PALMTE2_SD_DETECT_N, "SD IRQ");
127 if (err)
128 goto err;
129 err = gpio_direction_input(GPIO_NR_PALMTE2_SD_DETECT_N);
130 if (err)
131 goto err2;
132 err = request_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N),
133 palmte2_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
134 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
135 "SD/MMC card detect", data);
136 if (err) {
137 printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
138 __func__);
139 goto err2;
140 }
141
142 err = gpio_request(GPIO_NR_PALMTE2_SD_POWER, "SD_POWER");
143 if (err)
144 goto err3;
145 err = gpio_direction_output(GPIO_NR_PALMTE2_SD_POWER, 0);
146 if (err)
147 goto err4;
148
149 err = gpio_request(GPIO_NR_PALMTE2_SD_READONLY, "SD_READONLY");
150 if (err)
151 goto err4;
152 err = gpio_direction_input(GPIO_NR_PALMTE2_SD_READONLY);
153 if (err)
154 goto err5;
155
156 printk(KERN_DEBUG "%s: irq registered\n", __func__);
157
158 return 0;
159
160err5:
161 gpio_free(GPIO_NR_PALMTE2_SD_READONLY);
162err4:
163 gpio_free(GPIO_NR_PALMTE2_SD_POWER);
164err3:
165 free_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N), data);
166err2:
167 gpio_free(GPIO_NR_PALMTE2_SD_DETECT_N);
168err:
169 return err;
170}
171
172static void palmte2_mci_exit(struct device *dev, void *data)
173{
174 gpio_free(GPIO_NR_PALMTE2_SD_READONLY);
175 gpio_free(GPIO_NR_PALMTE2_SD_POWER);
176 free_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N), data);
177 gpio_free(GPIO_NR_PALMTE2_SD_DETECT_N);
178}
179
180static void palmte2_mci_power(struct device *dev, unsigned int vdd)
181{
182 struct pxamci_platform_data *p_d = dev->platform_data;
183 gpio_set_value(GPIO_NR_PALMTE2_SD_POWER, p_d->ocr_mask & (1 << vdd));
184}
185
186static int palmte2_mci_get_ro(struct device *dev)
187{
188 return gpio_get_value(GPIO_NR_PALMTE2_SD_READONLY);
189}
190
191static struct pxamci_platform_data palmte2_mci_platform_data = { 120static struct pxamci_platform_data palmte2_mci_platform_data = {
192 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 121 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
193 .setpower = palmte2_mci_power, 122 .gpio_card_detect = GPIO_NR_PALMTE2_SD_DETECT_N,
194 .get_ro = palmte2_mci_get_ro, 123 .gpio_card_ro = GPIO_NR_PALMTE2_SD_READONLY,
195 .init = palmte2_mci_init, 124 .gpio_power = GPIO_NR_PALMTE2_SD_POWER,
196 .exit = palmte2_mci_exit,
197}; 125};
198 126
199/****************************************************************************** 127/******************************************************************************
@@ -287,35 +215,9 @@ static struct platform_device palmte2_backlight = {
287/****************************************************************************** 215/******************************************************************************
288 * IrDA 216 * IrDA
289 ******************************************************************************/ 217 ******************************************************************************/
290static int palmte2_irda_startup(struct device *dev)
291{
292 int err;
293 err = gpio_request(GPIO_NR_PALMTE2_IR_DISABLE, "IR DISABLE");
294 if (err)
295 goto err;
296 err = gpio_direction_output(GPIO_NR_PALMTE2_IR_DISABLE, 1);
297 if (err)
298 gpio_free(GPIO_NR_PALMTE2_IR_DISABLE);
299err:
300 return err;
301}
302
303static void palmte2_irda_shutdown(struct device *dev)
304{
305 gpio_free(GPIO_NR_PALMTE2_IR_DISABLE);
306}
307
308static void palmte2_irda_transceiver_mode(struct device *dev, int mode)
309{
310 gpio_set_value(GPIO_NR_PALMTE2_IR_DISABLE, mode & IR_OFF);
311 pxa2xx_transceiver_mode(dev, mode);
312}
313
314static struct pxaficp_platform_data palmte2_ficp_platform_data = { 218static struct pxaficp_platform_data palmte2_ficp_platform_data = {
315 .startup = palmte2_irda_startup, 219 .gpio_pwdown = GPIO_NR_PALMTE2_IR_DISABLE,
316 .shutdown = palmte2_irda_shutdown, 220 .transceiver_cap = IR_SIRMODE | IR_OFF,
317 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF,
318 .transceiver_mode = palmte2_irda_transceiver_mode,
319}; 221};
320 222
321/****************************************************************************** 223/******************************************************************************
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index 83d020879581..76a2b37eaf30 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -28,6 +28,10 @@
28#include <linux/wm97xx_batt.h> 28#include <linux/wm97xx_batt.h>
29#include <linux/power_supply.h> 29#include <linux/power_supply.h>
30#include <linux/usb/gpio_vbus.h> 30#include <linux/usb/gpio_vbus.h>
31#include <linux/mtd/nand.h>
32#include <linux/mtd/partitions.h>
33#include <linux/mtd/mtd.h>
34#include <linux/mtd/physmap.h>
31 35
32#include <asm/mach-types.h> 36#include <asm/mach-types.h>
33#include <asm/mach/arch.h> 37#include <asm/mach/arch.h>
@@ -131,6 +135,10 @@ static unsigned long palmtx_pin_config[] __initdata = {
131 GPIO34_FFUART_RXD, 135 GPIO34_FFUART_RXD,
132 GPIO39_FFUART_TXD, 136 GPIO39_FFUART_TXD,
133 137
138 /* NAND */
139 GPIO15_nCS_1,
140 GPIO18_RDY,
141
134 /* MISC. */ 142 /* MISC. */
135 GPIO10_GPIO, /* hotsync button */ 143 GPIO10_GPIO, /* hotsync button */
136 GPIO12_GPIO, /* power detect */ 144 GPIO12_GPIO, /* power detect */
@@ -138,85 +146,50 @@ static unsigned long palmtx_pin_config[] __initdata = {
138}; 146};
139 147
140/****************************************************************************** 148/******************************************************************************
141 * SD/MMC card controller 149 * NOR Flash
142 ******************************************************************************/ 150 ******************************************************************************/
143static int palmtx_mci_init(struct device *dev, irq_handler_t palmtx_detect_int, 151static struct mtd_partition palmtx_partitions[] = {
144 void *data) 152 {
145{ 153 .name = "Flash",
146 int err = 0; 154 .offset = 0x00000000,
147 155 .size = MTDPART_SIZ_FULL,
148 /* Setup an interrupt for detecting card insert/remove events */ 156 .mask_flags = 0
149 err = gpio_request(GPIO_NR_PALMTX_SD_DETECT_N, "SD IRQ");
150 if (err)
151 goto err;
152 err = gpio_direction_input(GPIO_NR_PALMTX_SD_DETECT_N);
153 if (err)
154 goto err2;
155 err = request_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N),
156 palmtx_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
157 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
158 "SD/MMC card detect", data);
159 if (err) {
160 printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
161 __func__);
162 goto err2;
163 } 157 }
158};
164 159
165 err = gpio_request(GPIO_NR_PALMTX_SD_POWER, "SD_POWER"); 160static struct physmap_flash_data palmtx_flash_data[] = {
166 if (err) 161 {
167 goto err3; 162 .width = 2, /* bankwidth in bytes */
168 err = gpio_direction_output(GPIO_NR_PALMTX_SD_POWER, 0); 163 .parts = palmtx_partitions,
169 if (err) 164 .nr_parts = ARRAY_SIZE(palmtx_partitions)
170 goto err4; 165 }
171 166};
172 err = gpio_request(GPIO_NR_PALMTX_SD_READONLY, "SD_READONLY");
173 if (err)
174 goto err4;
175 err = gpio_direction_input(GPIO_NR_PALMTX_SD_READONLY);
176 if (err)
177 goto err5;
178
179 printk(KERN_DEBUG "%s: irq registered\n", __func__);
180
181 return 0;
182
183err5:
184 gpio_free(GPIO_NR_PALMTX_SD_READONLY);
185err4:
186 gpio_free(GPIO_NR_PALMTX_SD_POWER);
187err3:
188 free_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N), data);
189err2:
190 gpio_free(GPIO_NR_PALMTX_SD_DETECT_N);
191err:
192 return err;
193}
194
195static void palmtx_mci_exit(struct device *dev, void *data)
196{
197 gpio_free(GPIO_NR_PALMTX_SD_READONLY);
198 gpio_free(GPIO_NR_PALMTX_SD_POWER);
199 free_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N), data);
200 gpio_free(GPIO_NR_PALMTX_SD_DETECT_N);
201}
202 167
203static void palmtx_mci_power(struct device *dev, unsigned int vdd) 168static struct resource palmtx_flash_resource = {
204{ 169 .start = PXA_CS0_PHYS,
205 struct pxamci_platform_data *p_d = dev->platform_data; 170 .end = PXA_CS0_PHYS + SZ_8M - 1,
206 gpio_set_value(GPIO_NR_PALMTX_SD_POWER, p_d->ocr_mask & (1 << vdd)); 171 .flags = IORESOURCE_MEM,
207} 172};
208 173
209static int palmtx_mci_get_ro(struct device *dev) 174static struct platform_device palmtx_flash = {
210{ 175 .name = "physmap-flash",
211 return gpio_get_value(GPIO_NR_PALMTX_SD_READONLY); 176 .id = 0,
212} 177 .resource = &palmtx_flash_resource,
178 .num_resources = 1,
179 .dev = {
180 .platform_data = palmtx_flash_data,
181 },
182};
213 183
184/******************************************************************************
185 * SD/MMC card controller
186 ******************************************************************************/
214static struct pxamci_platform_data palmtx_mci_platform_data = { 187static struct pxamci_platform_data palmtx_mci_platform_data = {
215 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 188 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
216 .setpower = palmtx_mci_power, 189 .gpio_card_detect = GPIO_NR_PALMTX_SD_DETECT_N,
217 .get_ro = palmtx_mci_get_ro, 190 .gpio_card_ro = GPIO_NR_PALMTX_SD_READONLY,
218 .init = palmtx_mci_init, 191 .gpio_power = GPIO_NR_PALMTX_SD_POWER,
219 .exit = palmtx_mci_exit, 192 .detect_delay = 20,
220}; 193};
221 194
222/****************************************************************************** 195/******************************************************************************
@@ -330,35 +303,9 @@ static struct platform_device palmtx_backlight = {
330/****************************************************************************** 303/******************************************************************************
331 * IrDA 304 * IrDA
332 ******************************************************************************/ 305 ******************************************************************************/
333static int palmtx_irda_startup(struct device *dev)
334{
335 int err;
336 err = gpio_request(GPIO_NR_PALMTX_IR_DISABLE, "IR DISABLE");
337 if (err)
338 goto err;
339 err = gpio_direction_output(GPIO_NR_PALMTX_IR_DISABLE, 1);
340 if (err)
341 gpio_free(GPIO_NR_PALMTX_IR_DISABLE);
342err:
343 return err;
344}
345
346static void palmtx_irda_shutdown(struct device *dev)
347{
348 gpio_free(GPIO_NR_PALMTX_IR_DISABLE);
349}
350
351static void palmtx_irda_transceiver_mode(struct device *dev, int mode)
352{
353 gpio_set_value(GPIO_NR_PALMTX_IR_DISABLE, mode & IR_OFF);
354 pxa2xx_transceiver_mode(dev, mode);
355}
356
357static struct pxaficp_platform_data palmtx_ficp_platform_data = { 306static struct pxaficp_platform_data palmtx_ficp_platform_data = {
358 .startup = palmtx_irda_startup, 307 .gpio_pwdown = GPIO_NR_PALMTX_IR_DISABLE,
359 .shutdown = palmtx_irda_shutdown, 308 .transceiver_cap = IR_SIRMODE | IR_OFF,
360 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF,
361 .transceiver_mode = palmtx_irda_transceiver_mode,
362}; 309};
363 310
364/****************************************************************************** 311/******************************************************************************
@@ -493,6 +440,68 @@ static struct pxafb_mach_info palmtx_lcd_screen = {
493}; 440};
494 441
495/****************************************************************************** 442/******************************************************************************
443 * NAND Flash
444 ******************************************************************************/
445static void palmtx_nand_cmd_ctl(struct mtd_info *mtd, int cmd,
446 unsigned int ctrl)
447{
448 struct nand_chip *this = mtd->priv;
449 unsigned long nandaddr = (unsigned long)this->IO_ADDR_W;
450
451 if (cmd == NAND_CMD_NONE)
452 return;
453
454 if (ctrl & NAND_CLE)
455 writeb(cmd, PALMTX_NAND_CLE_VIRT);
456 else if (ctrl & NAND_ALE)
457 writeb(cmd, PALMTX_NAND_ALE_VIRT);
458 else
459 writeb(cmd, nandaddr);
460}
461
462static struct mtd_partition palmtx_partition_info[] = {
463 [0] = {
464 .name = "palmtx-0",
465 .offset = 0,
466 .size = MTDPART_SIZ_FULL
467 },
468};
469
470static const char *palmtx_part_probes[] = { "cmdlinepart", NULL };
471
472struct platform_nand_data palmtx_nand_platdata = {
473 .chip = {
474 .nr_chips = 1,
475 .chip_offset = 0,
476 .nr_partitions = ARRAY_SIZE(palmtx_partition_info),
477 .partitions = palmtx_partition_info,
478 .chip_delay = 20,
479 .part_probe_types = palmtx_part_probes,
480 },
481 .ctrl = {
482 .cmd_ctrl = palmtx_nand_cmd_ctl,
483 },
484};
485
486static struct resource palmtx_nand_resource[] = {
487 [0] = {
488 .start = PXA_CS1_PHYS,
489 .end = PXA_CS1_PHYS + SZ_1M - 1,
490 .flags = IORESOURCE_MEM,
491 },
492};
493
494static struct platform_device palmtx_nand = {
495 .name = "gen_nand",
496 .num_resources = ARRAY_SIZE(palmtx_nand_resource),
497 .resource = palmtx_nand_resource,
498 .id = -1,
499 .dev = {
500 .platform_data = &palmtx_nand_platdata,
501 }
502};
503
504/******************************************************************************
496 * Power management - standby 505 * Power management - standby
497 ******************************************************************************/ 506 ******************************************************************************/
498static void __init palmtx_pm_init(void) 507static void __init palmtx_pm_init(void)
@@ -518,6 +527,8 @@ static struct platform_device *devices[] __initdata = {
518 &power_supply, 527 &power_supply,
519 &palmtx_asoc, 528 &palmtx_asoc,
520 &palmtx_gpio_vbus, 529 &palmtx_gpio_vbus,
530 &palmtx_flash,
531 &palmtx_nand,
521}; 532};
522 533
523static struct map_desc palmtx_io_desc[] __initdata = { 534static struct map_desc palmtx_io_desc[] __initdata = {
@@ -525,8 +536,18 @@ static struct map_desc palmtx_io_desc[] __initdata = {
525 .virtual = PALMTX_PCMCIA_VIRT, 536 .virtual = PALMTX_PCMCIA_VIRT,
526 .pfn = __phys_to_pfn(PALMTX_PCMCIA_PHYS), 537 .pfn = __phys_to_pfn(PALMTX_PCMCIA_PHYS),
527 .length = PALMTX_PCMCIA_SIZE, 538 .length = PALMTX_PCMCIA_SIZE,
528 .type = MT_DEVICE 539 .type = MT_DEVICE,
529}, 540}, {
541 .virtual = PALMTX_NAND_ALE_VIRT,
542 .pfn = __phys_to_pfn(PALMTX_NAND_ALE_PHYS),
543 .length = SZ_1M,
544 .type = MT_DEVICE,
545}, {
546 .virtual = PALMTX_NAND_CLE_VIRT,
547 .pfn = __phys_to_pfn(PALMTX_NAND_CLE_PHYS),
548 .length = SZ_1M,
549 .type = MT_DEVICE,
550}
530}; 551};
531 552
532static void __init palmtx_map_io(void) 553static void __init palmtx_map_io(void)
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index c3645aa3fa3d..c2bf493c5f53 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -129,88 +129,14 @@ static unsigned long palmz72_pin_config[] __initdata = {
129/****************************************************************************** 129/******************************************************************************
130 * SD/MMC card controller 130 * SD/MMC card controller
131 ******************************************************************************/ 131 ******************************************************************************/
132static int palmz72_mci_init(struct device *dev, 132/* SD_POWER is not actually power, but it is more like chip
133 irq_handler_t palmz72_detect_int, void *data) 133 * select, i.e. it is inverted */
134{
135 int err = 0;
136
137 /* Setup an interrupt for detecting card insert/remove events */
138 err = gpio_request(GPIO_NR_PALMZ72_SD_DETECT_N, "SD IRQ");
139 if (err)
140 goto err;
141 err = gpio_direction_input(GPIO_NR_PALMZ72_SD_DETECT_N);
142 if (err)
143 goto err2;
144 err = request_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N),
145 palmz72_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
146 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
147 "SD/MMC card detect", data);
148 if (err) {
149 printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
150 __func__);
151 goto err2;
152 }
153
154 /* SD_POWER is not actually power, but it is more like chip
155 * select, i.e. it is inverted */
156
157 err = gpio_request(GPIO_NR_PALMZ72_SD_POWER_N, "SD_POWER");
158 if (err)
159 goto err3;
160 err = gpio_direction_output(GPIO_NR_PALMZ72_SD_POWER_N, 0);
161 if (err)
162 goto err4;
163 err = gpio_request(GPIO_NR_PALMZ72_SD_RO, "SD_RO");
164 if (err)
165 goto err4;
166 err = gpio_direction_input(GPIO_NR_PALMZ72_SD_RO);
167 if (err)
168 goto err5;
169
170 printk(KERN_DEBUG "%s: irq registered\n", __func__);
171
172 return 0;
173
174err5:
175 gpio_free(GPIO_NR_PALMZ72_SD_RO);
176err4:
177 gpio_free(GPIO_NR_PALMZ72_SD_POWER_N);
178err3:
179 free_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N), data);
180err2:
181 gpio_free(GPIO_NR_PALMZ72_SD_DETECT_N);
182err:
183 return err;
184}
185
186static void palmz72_mci_exit(struct device *dev, void *data)
187{
188 gpio_free(GPIO_NR_PALMZ72_SD_POWER_N);
189 free_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N), data);
190 gpio_free(GPIO_NR_PALMZ72_SD_DETECT_N);
191 gpio_free(GPIO_NR_PALMZ72_SD_RO);
192}
193
194static void palmz72_mci_power(struct device *dev, unsigned int vdd)
195{
196 struct pxamci_platform_data *p_d = dev->platform_data;
197 if (p_d->ocr_mask & (1 << vdd))
198 gpio_set_value(GPIO_NR_PALMZ72_SD_POWER_N, 0);
199 else
200 gpio_set_value(GPIO_NR_PALMZ72_SD_POWER_N, 1);
201}
202
203static int palmz72_mci_ro(struct device *dev)
204{
205 return gpio_get_value(GPIO_NR_PALMZ72_SD_RO);
206}
207
208static struct pxamci_platform_data palmz72_mci_platform_data = { 134static struct pxamci_platform_data palmz72_mci_platform_data = {
209 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 135 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
210 .setpower = palmz72_mci_power, 136 .gpio_card_detect = GPIO_NR_PALMZ72_SD_DETECT_N,
211 .get_ro = palmz72_mci_ro, 137 .gpio_card_ro = GPIO_NR_PALMZ72_SD_RO,
212 .init = palmz72_mci_init, 138 .gpio_power = GPIO_NR_PALMZ72_SD_POWER_N,
213 .exit = palmz72_mci_exit, 139 .gpio_power_invert = 1,
214}; 140};
215 141
216/****************************************************************************** 142/******************************************************************************
@@ -304,35 +230,9 @@ static struct platform_device palmz72_backlight = {
304/****************************************************************************** 230/******************************************************************************
305 * IrDA 231 * IrDA
306 ******************************************************************************/ 232 ******************************************************************************/
307static int palmz72_irda_startup(struct device *dev)
308{
309 int err;
310 err = gpio_request(GPIO_NR_PALMZ72_IR_DISABLE, "IR DISABLE");
311 if (err)
312 goto err;
313 err = gpio_direction_output(GPIO_NR_PALMZ72_IR_DISABLE, 1);
314 if (err)
315 gpio_free(GPIO_NR_PALMZ72_IR_DISABLE);
316err:
317 return err;
318}
319
320static void palmz72_irda_shutdown(struct device *dev)
321{
322 gpio_free(GPIO_NR_PALMZ72_IR_DISABLE);
323}
324
325static void palmz72_irda_transceiver_mode(struct device *dev, int mode)
326{
327 gpio_set_value(GPIO_NR_PALMZ72_IR_DISABLE, mode & IR_OFF);
328 pxa2xx_transceiver_mode(dev, mode);
329}
330
331static struct pxaficp_platform_data palmz72_ficp_platform_data = { 233static struct pxaficp_platform_data palmz72_ficp_platform_data = {
332 .startup = palmz72_irda_startup, 234 .gpio_pwdown = GPIO_NR_PALMZ72_IR_DISABLE,
333 .shutdown = palmz72_irda_shutdown,
334 .transceiver_cap = IR_SIRMODE | IR_OFF, 235 .transceiver_cap = IR_SIRMODE | IR_OFF,
335 .transceiver_mode = palmz72_irda_transceiver_mode,
336}; 236};
337 237
338/****************************************************************************** 238/******************************************************************************
diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index 01791d74e08e..bbda57078e0f 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -321,11 +321,14 @@ static void pcm990_mci_exit(struct device *dev, void *data)
321#define MSECS_PER_JIFFY (1000/HZ) 321#define MSECS_PER_JIFFY (1000/HZ)
322 322
323static struct pxamci_platform_data pcm990_mci_platform_data = { 323static struct pxamci_platform_data pcm990_mci_platform_data = {
324 .detect_delay = 250 / MSECS_PER_JIFFY, 324 .detect_delay = 250 / MSECS_PER_JIFFY,
325 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 325 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
326 .init = pcm990_mci_init, 326 .init = pcm990_mci_init,
327 .setpower = pcm990_mci_setpower, 327 .setpower = pcm990_mci_setpower,
328 .exit = pcm990_mci_exit, 328 .exit = pcm990_mci_exit,
329 .gpio_card_detect = -1,
330 .gpio_card_ro = -1,
331 .gpio_power = -1,
329}; 332};
330 333
331static struct pxaohci_platform_data pcm990_ohci_platform_data = { 334static struct pxaohci_platform_data pcm990_ohci_platform_data = {
@@ -427,25 +430,56 @@ static void pcm990_camera_free_bus(struct soc_camera_link *link)
427 gpio_bus_switch = -EINVAL; 430 gpio_bus_switch = -EINVAL;
428} 431}
429 432
430static struct soc_camera_link iclink = {
431 .bus_id = 0, /* Must match with the camera ID above */
432 .query_bus_param = pcm990_camera_query_bus_param,
433 .set_bus_param = pcm990_camera_set_bus_param,
434 .free_bus = pcm990_camera_free_bus,
435};
436
437/* Board I2C devices. */ 433/* Board I2C devices. */
438static struct i2c_board_info __initdata pcm990_i2c_devices[] = { 434static struct i2c_board_info __initdata pcm990_i2c_devices[] = {
439 { 435 {
440 /* Must initialize before the camera(s) */ 436 /* Must initialize before the camera(s) */
441 I2C_BOARD_INFO("pca9536", 0x41), 437 I2C_BOARD_INFO("pca9536", 0x41),
442 .platform_data = &pca9536_data, 438 .platform_data = &pca9536_data,
443 }, { 439 },
440};
441
442static struct i2c_board_info pcm990_camera_i2c[] = {
443 {
444 I2C_BOARD_INFO("mt9v022", 0x48), 444 I2C_BOARD_INFO("mt9v022", 0x48),
445 .platform_data = &iclink, /* With extender */
446 }, { 445 }, {
447 I2C_BOARD_INFO("mt9m001", 0x5d), 446 I2C_BOARD_INFO("mt9m001", 0x5d),
448 .platform_data = &iclink, /* With extender */ 447 },
448};
449
450static struct soc_camera_link iclink[] = {
451 {
452 .bus_id = 0, /* Must match with the camera ID */
453 .board_info = &pcm990_camera_i2c[0],
454 .i2c_adapter_id = 0,
455 .query_bus_param = pcm990_camera_query_bus_param,
456 .set_bus_param = pcm990_camera_set_bus_param,
457 .free_bus = pcm990_camera_free_bus,
458 .module_name = "mt9v022",
459 }, {
460 .bus_id = 0, /* Must match with the camera ID */
461 .board_info = &pcm990_camera_i2c[1],
462 .i2c_adapter_id = 0,
463 .query_bus_param = pcm990_camera_query_bus_param,
464 .set_bus_param = pcm990_camera_set_bus_param,
465 .free_bus = pcm990_camera_free_bus,
466 .module_name = "mt9m001",
467 },
468};
469
470static struct platform_device pcm990_camera[] = {
471 {
472 .name = "soc-camera-pdrv",
473 .id = 0,
474 .dev = {
475 .platform_data = &iclink[0],
476 },
477 }, {
478 .name = "soc-camera-pdrv",
479 .id = 1,
480 .dev = {
481 .platform_data = &iclink[1],
482 },
449 }, 483 },
450}; 484};
451#endif /* CONFIG_VIDEO_PXA27x ||CONFIG_VIDEO_PXA27x_MODULE */ 485#endif /* CONFIG_VIDEO_PXA27x ||CONFIG_VIDEO_PXA27x_MODULE */
@@ -501,6 +535,9 @@ void __init pcm990_baseboard_init(void)
501 pxa_set_camera_info(&pcm990_pxacamera_platform_data); 535 pxa_set_camera_info(&pcm990_pxacamera_platform_data);
502 536
503 i2c_register_board_info(0, ARRAY_AND_SIZE(pcm990_i2c_devices)); 537 i2c_register_board_info(0, ARRAY_AND_SIZE(pcm990_i2c_devices));
538
539 platform_device_register(&pcm990_camera[0]);
540 platform_device_register(&pcm990_camera[1]);
504#endif 541#endif
505 542
506 printk(KERN_INFO "PCM-990 Evaluation baseboard initialized\n"); 543 printk(KERN_INFO "PCM-990 Evaluation baseboard initialized\n");
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 9352d4a34837..a186994f77fb 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -245,20 +245,10 @@ static inline void poodle_init_spi(void) {}
245 * The card detect interrupt isn't debounced so we delay it by 250ms 245 * The card detect interrupt isn't debounced so we delay it by 250ms
246 * to give the card a chance to fully insert/eject. 246 * to give the card a chance to fully insert/eject.
247 */ 247 */
248static struct pxamci_platform_data poodle_mci_platform_data;
249
250static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int, void *data) 248static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int, void *data)
251{ 249{
252 int err; 250 int err;
253 251
254 err = gpio_request(POODLE_GPIO_nSD_DETECT, "nSD_DETECT");
255 if (err)
256 goto err_out;
257
258 err = gpio_request(POODLE_GPIO_nSD_WP, "nSD_WP");
259 if (err)
260 goto err_free_1;
261
262 err = gpio_request(POODLE_GPIO_SD_PWR, "SD_PWR"); 252 err = gpio_request(POODLE_GPIO_SD_PWR, "SD_PWR");
263 if (err) 253 if (err)
264 goto err_free_2; 254 goto err_free_2;
@@ -267,34 +257,14 @@ static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int,
267 if (err) 257 if (err)
268 goto err_free_3; 258 goto err_free_3;
269 259
270 gpio_direction_input(POODLE_GPIO_nSD_DETECT);
271 gpio_direction_input(POODLE_GPIO_nSD_WP);
272
273 gpio_direction_output(POODLE_GPIO_SD_PWR, 0); 260 gpio_direction_output(POODLE_GPIO_SD_PWR, 0);
274 gpio_direction_output(POODLE_GPIO_SD_PWR1, 0); 261 gpio_direction_output(POODLE_GPIO_SD_PWR1, 0);
275 262
276 poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250);
277
278 err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int,
279 IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
280 "MMC card detect", data);
281 if (err) {
282 pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n",
283 __func__);
284 goto err_free_4;
285 }
286
287 return 0; 263 return 0;
288 264
289err_free_4:
290 gpio_free(POODLE_GPIO_SD_PWR1);
291err_free_3: 265err_free_3:
292 gpio_free(POODLE_GPIO_SD_PWR); 266 gpio_free(POODLE_GPIO_SD_PWR);
293err_free_2: 267err_free_2:
294 gpio_free(POODLE_GPIO_nSD_WP);
295err_free_1:
296 gpio_free(POODLE_GPIO_nSD_DETECT);
297err_out:
298 return err; 268 return err;
299} 269}
300 270
@@ -312,62 +282,29 @@ static void poodle_mci_setpower(struct device *dev, unsigned int vdd)
312 } 282 }
313} 283}
314 284
315static int poodle_mci_get_ro(struct device *dev)
316{
317 return !!gpio_get_value(POODLE_GPIO_nSD_WP);
318 return GPLR(POODLE_GPIO_nSD_WP) & GPIO_bit(POODLE_GPIO_nSD_WP);
319}
320
321
322static void poodle_mci_exit(struct device *dev, void *data) 285static void poodle_mci_exit(struct device *dev, void *data)
323{ 286{
324 free_irq(POODLE_IRQ_GPIO_nSD_DETECT, data);
325 gpio_free(POODLE_GPIO_SD_PWR1); 287 gpio_free(POODLE_GPIO_SD_PWR1);
326 gpio_free(POODLE_GPIO_SD_PWR); 288 gpio_free(POODLE_GPIO_SD_PWR);
327 gpio_free(POODLE_GPIO_nSD_WP);
328 gpio_free(POODLE_GPIO_nSD_DETECT);
329} 289}
330 290
331static struct pxamci_platform_data poodle_mci_platform_data = { 291static struct pxamci_platform_data poodle_mci_platform_data = {
332 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 292 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
333 .init = poodle_mci_init, 293 .init = poodle_mci_init,
334 .get_ro = poodle_mci_get_ro, 294 .setpower = poodle_mci_setpower,
335 .setpower = poodle_mci_setpower, 295 .exit = poodle_mci_exit,
336 .exit = poodle_mci_exit, 296 .gpio_card_detect = POODLE_IRQ_GPIO_nSD_DETECT,
297 .gpio_card_ro = POODLE_GPIO_nSD_WP,
298 .gpio_power = -1,
337}; 299};
338 300
339 301
340/* 302/*
341 * Irda 303 * Irda
342 */ 304 */
343static void poodle_irda_transceiver_mode(struct device *dev, int mode)
344{
345 gpio_set_value(POODLE_GPIO_IR_ON, mode & IR_OFF);
346 pxa2xx_transceiver_mode(dev, mode);
347}
348
349static int poodle_irda_startup(struct device *dev)
350{
351 int err;
352
353 err = gpio_request(POODLE_GPIO_IR_ON, "IR_ON");
354 if (err)
355 return err;
356
357 gpio_direction_output(POODLE_GPIO_IR_ON, 1);
358 return 0;
359}
360
361static void poodle_irda_shutdown(struct device *dev)
362{
363 gpio_free(POODLE_GPIO_IR_ON);
364}
365
366static struct pxaficp_platform_data poodle_ficp_platform_data = { 305static struct pxaficp_platform_data poodle_ficp_platform_data = {
306 .gpio_pwdown = POODLE_GPIO_IR_ON,
367 .transceiver_cap = IR_SIRMODE | IR_OFF, 307 .transceiver_cap = IR_SIRMODE | IR_OFF,
368 .transceiver_mode = poodle_irda_transceiver_mode,
369 .startup = poodle_irda_startup,
370 .shutdown = poodle_irda_shutdown,
371}; 308};
372 309
373 310
@@ -521,6 +458,7 @@ static void __init poodle_init(void)
521 set_pxa_fb_parent(&poodle_locomo_device.dev); 458 set_pxa_fb_parent(&poodle_locomo_device.dev);
522 set_pxa_fb_info(&poodle_fb_info); 459 set_pxa_fb_info(&poodle_fb_info);
523 pxa_set_udc_info(&udc_info); 460 pxa_set_udc_info(&udc_info);
461 poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250);
524 pxa_set_mci_info(&poodle_mci_platform_data); 462 pxa_set_mci_info(&poodle_mci_platform_data);
525 pxa_set_ficp_info(&poodle_ficp_platform_data); 463 pxa_set_ficp_info(&poodle_ficp_platform_data);
526 pxa_set_i2c_info(NULL); 464 pxa_set_i2c_info(NULL);
diff --git a/arch/arm/mach-pxa/pxa2xx.c b/arch/arm/mach-pxa/pxa2xx.c
index 2f3394f85917..868270421b8c 100644
--- a/arch/arm/mach-pxa/pxa2xx.c
+++ b/arch/arm/mach-pxa/pxa2xx.c
@@ -52,3 +52,4 @@ void pxa2xx_transceiver_mode(struct device *dev, int mode)
52 } else 52 } else
53 BUG(); 53 BUG();
54} 54}
55EXPORT_SYMBOL_GPL(pxa2xx_transceiver_mode);
diff --git a/arch/arm/mach-pxa/pxa300.c b/arch/arm/mach-pxa/pxa300.c
index 4ba6d21f851c..f4af6e2bef89 100644
--- a/arch/arm/mach-pxa/pxa300.c
+++ b/arch/arm/mach-pxa/pxa300.c
@@ -84,9 +84,11 @@ static struct mfp_addr_map pxa310_mfp_addr_map[] __initdata = {
84}; 84};
85 85
86static DEFINE_PXA3_CKEN(common_nand, NAND, 156000000, 0); 86static DEFINE_PXA3_CKEN(common_nand, NAND, 156000000, 0);
87static DEFINE_PXA3_CKEN(gcu, PXA300_GCU, 0, 0);
87 88
88static struct clk_lookup common_clkregs[] = { 89static struct clk_lookup common_clkregs[] = {
89 INIT_CLKREG(&clk_common_nand, "pxa3xx-nand", NULL), 90 INIT_CLKREG(&clk_common_nand, "pxa3xx-nand", NULL),
91 INIT_CLKREG(&clk_gcu, "pxa3xx-gcu", NULL),
90}; 92};
91 93
92static DEFINE_PXA3_CKEN(pxa310_mmc3, MMC3, 19500000, 0); 94static DEFINE_PXA3_CKEN(pxa310_mmc3, MMC3, 19500000, 0);
diff --git a/arch/arm/mach-pxa/pxa320.c b/arch/arm/mach-pxa/pxa320.c
index 8b3d97efadab..c7373e74a109 100644
--- a/arch/arm/mach-pxa/pxa320.c
+++ b/arch/arm/mach-pxa/pxa320.c
@@ -78,9 +78,11 @@ static struct mfp_addr_map pxa320_mfp_addr_map[] __initdata = {
78}; 78};
79 79
80static DEFINE_PXA3_CKEN(pxa320_nand, NAND, 104000000, 0); 80static DEFINE_PXA3_CKEN(pxa320_nand, NAND, 104000000, 0);
81static DEFINE_PXA3_CKEN(gcu, PXA320_GCU, 0, 0);
81 82
82static struct clk_lookup pxa320_clkregs[] = { 83static struct clk_lookup pxa320_clkregs[] = {
83 INIT_CLKREG(&clk_pxa320_nand, "pxa3xx-nand", NULL), 84 INIT_CLKREG(&clk_pxa320_nand, "pxa3xx-nand", NULL),
85 INIT_CLKREG(&clk_gcu, "pxa3xx-gcu", NULL),
84}; 86};
85 87
86static int __init pxa320_init(void) 88static int __init pxa320_init(void)
diff --git a/arch/arm/mach-pxa/pxa930.c b/arch/arm/mach-pxa/pxa930.c
index 71131742fffd..064292008288 100644
--- a/arch/arm/mach-pxa/pxa930.c
+++ b/arch/arm/mach-pxa/pxa930.c
@@ -176,13 +176,30 @@ static struct mfp_addr_map pxa930_mfp_addr_map[] __initdata = {
176 MFP_ADDR_END, 176 MFP_ADDR_END,
177}; 177};
178 178
179static struct mfp_addr_map pxa935_mfp_addr_map[] __initdata = {
180 MFP_ADDR(GPIO159, 0x0524),
181 MFP_ADDR(GPIO163, 0x0534),
182 MFP_ADDR(GPIO167, 0x0544),
183 MFP_ADDR(GPIO168, 0x0548),
184 MFP_ADDR(GPIO169, 0x054c),
185 MFP_ADDR(GPIO170, 0x0550),
186 MFP_ADDR(GPIO171, 0x0554),
187 MFP_ADDR(GPIO172, 0x0558),
188 MFP_ADDR(GPIO173, 0x055c),
189
190 MFP_ADDR_END,
191};
192
179static int __init pxa930_init(void) 193static int __init pxa930_init(void)
180{ 194{
181 if (cpu_is_pxa930()) { 195 if (cpu_is_pxa930() || cpu_is_pxa935()) {
182 mfp_init_base(io_p2v(MFPR_BASE)); 196 mfp_init_base(io_p2v(MFPR_BASE));
183 mfp_init_addr(pxa930_mfp_addr_map); 197 mfp_init_addr(pxa930_mfp_addr_map);
184 } 198 }
185 199
200 if (cpu_is_pxa935())
201 mfp_init_addr(pxa935_mfp_addr_map);
202
186 return 0; 203 return 0;
187} 204}
188 205
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index dda310fe71c8..ee8d6038ce82 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -24,6 +24,7 @@
24#include <linux/spi/ads7846.h> 24#include <linux/spi/ads7846.h>
25#include <linux/spi/corgi_lcd.h> 25#include <linux/spi/corgi_lcd.h>
26#include <linux/mtd/sharpsl.h> 26#include <linux/mtd/sharpsl.h>
27#include <linux/input/matrix_keypad.h>
27 28
28#include <asm/setup.h> 29#include <asm/setup.h>
29#include <asm/mach-types.h> 30#include <asm/mach-types.h>
@@ -111,6 +112,26 @@ static unsigned long spitz_pin_config[] __initdata = {
111 GPIO105_GPIO, /* SPITZ_GPIO_CF_IRQ */ 112 GPIO105_GPIO, /* SPITZ_GPIO_CF_IRQ */
112 GPIO106_GPIO, /* SPITZ_GPIO_CF2_IRQ */ 113 GPIO106_GPIO, /* SPITZ_GPIO_CF2_IRQ */
113 114
115 /* GPIO matrix keypad */
116 GPIO88_GPIO, /* column 0 */
117 GPIO23_GPIO, /* column 1 */
118 GPIO24_GPIO, /* column 2 */
119 GPIO25_GPIO, /* column 3 */
120 GPIO26_GPIO, /* column 4 */
121 GPIO27_GPIO, /* column 5 */
122 GPIO52_GPIO, /* column 6 */
123 GPIO103_GPIO, /* column 7 */
124 GPIO107_GPIO, /* column 8 */
125 GPIO108_GPIO, /* column 9 */
126 GPIO114_GPIO, /* column 10 */
127 GPIO12_GPIO, /* row 0 */
128 GPIO17_GPIO, /* row 1 */
129 GPIO91_GPIO, /* row 2 */
130 GPIO34_GPIO, /* row 3 */
131 GPIO36_GPIO, /* row 4 */
132 GPIO38_GPIO, /* row 5 */
133 GPIO39_GPIO, /* row 6 */
134
114 /* I2C */ 135 /* I2C */
115 GPIO117_I2C_SCL, 136 GPIO117_I2C_SCL,
116 GPIO118_I2C_SDA, 137 GPIO118_I2C_SDA,
@@ -242,9 +263,115 @@ EXPORT_SYMBOL(spitzscoop2_device);
242/* 263/*
243 * Spitz Keyboard Device 264 * Spitz Keyboard Device
244 */ 265 */
266#define SPITZ_KEY_CALENDAR KEY_F1
267#define SPITZ_KEY_ADDRESS KEY_F2
268#define SPITZ_KEY_FN KEY_F3
269#define SPITZ_KEY_CANCEL KEY_F4
270#define SPITZ_KEY_EXOK KEY_F5
271#define SPITZ_KEY_EXCANCEL KEY_F6
272#define SPITZ_KEY_EXJOGDOWN KEY_F7
273#define SPITZ_KEY_EXJOGUP KEY_F8
274#define SPITZ_KEY_JAP1 KEY_LEFTALT
275#define SPITZ_KEY_JAP2 KEY_RIGHTCTRL
276#define SPITZ_KEY_SYNC KEY_F9
277#define SPITZ_KEY_MAIL KEY_F10
278#define SPITZ_KEY_OK KEY_F11
279#define SPITZ_KEY_MENU KEY_F12
280
281static const uint32_t spitzkbd_keymap[] = {
282 KEY(0, 0, KEY_LEFTCTRL),
283 KEY(0, 1, KEY_1),
284 KEY(0, 2, KEY_3),
285 KEY(0, 3, KEY_5),
286 KEY(0, 4, KEY_6),
287 KEY(0, 5, KEY_7),
288 KEY(0, 6, KEY_9),
289 KEY(0, 7, KEY_0),
290 KEY(0, 8, KEY_BACKSPACE),
291 KEY(0, 9, SPITZ_KEY_EXOK), /* EXOK */
292 KEY(0, 10, SPITZ_KEY_EXCANCEL), /* EXCANCEL */
293 KEY(1, 1, KEY_2),
294 KEY(1, 2, KEY_4),
295 KEY(1, 3, KEY_R),
296 KEY(1, 4, KEY_Y),
297 KEY(1, 5, KEY_8),
298 KEY(1, 6, KEY_I),
299 KEY(1, 7, KEY_O),
300 KEY(1, 8, KEY_P),
301 KEY(1, 9, SPITZ_KEY_EXJOGDOWN), /* EXJOGDOWN */
302 KEY(1, 10, SPITZ_KEY_EXJOGUP), /* EXJOGUP */
303 KEY(2, 0, KEY_TAB),
304 KEY(2, 1, KEY_Q),
305 KEY(2, 2, KEY_E),
306 KEY(2, 3, KEY_T),
307 KEY(2, 4, KEY_G),
308 KEY(2, 5, KEY_U),
309 KEY(2, 6, KEY_J),
310 KEY(2, 7, KEY_K),
311 KEY(3, 0, SPITZ_KEY_ADDRESS), /* ADDRESS */
312 KEY(3, 1, KEY_W),
313 KEY(3, 2, KEY_S),
314 KEY(3, 3, KEY_F),
315 KEY(3, 4, KEY_V),
316 KEY(3, 5, KEY_H),
317 KEY(3, 6, KEY_M),
318 KEY(3, 7, KEY_L),
319 KEY(3, 9, KEY_RIGHTSHIFT),
320 KEY(4, 0, SPITZ_KEY_CALENDAR), /* CALENDAR */
321 KEY(4, 1, KEY_A),
322 KEY(4, 2, KEY_D),
323 KEY(4, 3, KEY_C),
324 KEY(4, 4, KEY_B),
325 KEY(4, 5, KEY_N),
326 KEY(4, 6, KEY_DOT),
327 KEY(4, 8, KEY_ENTER),
328 KEY(4, 9, KEY_LEFTSHIFT),
329 KEY(5, 0, SPITZ_KEY_MAIL), /* MAIL */
330 KEY(5, 1, KEY_Z),
331 KEY(5, 2, KEY_X),
332 KEY(5, 3, KEY_MINUS),
333 KEY(5, 4, KEY_SPACE),
334 KEY(5, 5, KEY_COMMA),
335 KEY(5, 7, KEY_UP),
336 KEY(5, 10, SPITZ_KEY_FN), /* FN */
337 KEY(6, 0, KEY_SYSRQ),
338 KEY(6, 1, SPITZ_KEY_JAP1), /* JAP1 */
339 KEY(6, 2, SPITZ_KEY_JAP2), /* JAP2 */
340 KEY(6, 3, SPITZ_KEY_CANCEL), /* CANCEL */
341 KEY(6, 4, SPITZ_KEY_OK), /* OK */
342 KEY(6, 5, SPITZ_KEY_MENU), /* MENU */
343 KEY(6, 6, KEY_LEFT),
344 KEY(6, 7, KEY_DOWN),
345 KEY(6, 8, KEY_RIGHT),
346};
347
348static const struct matrix_keymap_data spitzkbd_keymap_data = {
349 .keymap = spitzkbd_keymap,
350 .keymap_size = ARRAY_SIZE(spitzkbd_keymap),
351};
352
353static const uint32_t spitzkbd_row_gpios[] =
354 { 12, 17, 91, 34, 36, 38, 39 };
355static const uint32_t spitzkbd_col_gpios[] =
356 { 88, 23, 24, 25, 26, 27, 52, 103, 107, 108, 114 };
357
358static struct matrix_keypad_platform_data spitzkbd_pdata = {
359 .keymap_data = &spitzkbd_keymap_data,
360 .row_gpios = spitzkbd_row_gpios,
361 .col_gpios = spitzkbd_col_gpios,
362 .num_row_gpios = ARRAY_SIZE(spitzkbd_row_gpios),
363 .num_col_gpios = ARRAY_SIZE(spitzkbd_col_gpios),
364 .col_scan_delay_us = 10,
365 .debounce_ms = 10,
366 .wakeup = 1,
367};
368
245static struct platform_device spitzkbd_device = { 369static struct platform_device spitzkbd_device = {
246 .name = "spitz-keyboard", 370 .name = "matrix-keypad",
247 .id = -1, 371 .id = -1,
372 .dev = {
373 .platform_data = &spitzkbd_pdata,
374 },
248}; 375};
249 376
250 377
@@ -296,6 +423,7 @@ static struct ads7846_platform_data spitz_ads7846_info = {
296 .vref_delay_usecs = 100, 423 .vref_delay_usecs = 100,
297 .x_plate_ohms = 419, 424 .x_plate_ohms = 419,
298 .y_plate_ohms = 486, 425 .y_plate_ohms = 486,
426 .pressure_max = 1024,
299 .gpio_pendown = SPITZ_GPIO_TP_INT, 427 .gpio_pendown = SPITZ_GPIO_TP_INT,
300 .wait_for_sync = spitz_wait_for_hsync, 428 .wait_for_sync = spitz_wait_for_hsync,
301}; 429};
@@ -378,45 +506,6 @@ static inline void spitz_init_spi(void) {}
378 * The card detect interrupt isn't debounced so we delay it by 250ms 506 * The card detect interrupt isn't debounced so we delay it by 250ms
379 * to give the card a chance to fully insert/eject. 507 * to give the card a chance to fully insert/eject.
380 */ 508 */
381
382static struct pxamci_platform_data spitz_mci_platform_data;
383
384static int spitz_mci_init(struct device *dev, irq_handler_t spitz_detect_int, void *data)
385{
386 int err;
387
388 err = gpio_request(SPITZ_GPIO_nSD_DETECT, "nSD_DETECT");
389 if (err)
390 goto err_out;
391
392 err = gpio_request(SPITZ_GPIO_nSD_WP, "nSD_WP");
393 if (err)
394 goto err_free_1;
395
396 gpio_direction_input(SPITZ_GPIO_nSD_DETECT);
397 gpio_direction_input(SPITZ_GPIO_nSD_WP);
398
399 spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250);
400
401 err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int,
402 IRQF_DISABLED | IRQF_TRIGGER_RISING |
403 IRQF_TRIGGER_FALLING,
404 "MMC card detect", data);
405 if (err) {
406 pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n",
407 __func__);
408 goto err_free_2;
409 }
410 return 0;
411
412err_free_2:
413 gpio_free(SPITZ_GPIO_nSD_WP);
414err_free_1:
415 gpio_free(SPITZ_GPIO_nSD_DETECT);
416err_out:
417 return err;
418}
419
420static void spitz_mci_setpower(struct device *dev, unsigned int vdd) 509static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
421{ 510{
422 struct pxamci_platform_data* p_d = dev->platform_data; 511 struct pxamci_platform_data* p_d = dev->platform_data;
@@ -427,24 +516,12 @@ static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
427 spitz_card_pwr_ctrl(SPITZ_PWR_SD, 0x0000); 516 spitz_card_pwr_ctrl(SPITZ_PWR_SD, 0x0000);
428} 517}
429 518
430static int spitz_mci_get_ro(struct device *dev)
431{
432 return gpio_get_value(SPITZ_GPIO_nSD_WP);
433}
434
435static void spitz_mci_exit(struct device *dev, void *data)
436{
437 free_irq(SPITZ_IRQ_GPIO_nSD_DETECT, data);
438 gpio_free(SPITZ_GPIO_nSD_WP);
439 gpio_free(SPITZ_GPIO_nSD_DETECT);
440}
441
442static struct pxamci_platform_data spitz_mci_platform_data = { 519static struct pxamci_platform_data spitz_mci_platform_data = {
443 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 520 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
444 .init = spitz_mci_init, 521 .setpower = spitz_mci_setpower,
445 .get_ro = spitz_mci_get_ro, 522 .gpio_card_detect = SPITZ_GPIO_nSD_DETECT,
446 .setpower = spitz_mci_setpower, 523 .gpio_card_ro = SPITZ_GPIO_nSD_WP,
447 .exit = spitz_mci_exit, 524 .gpio_power = -1,
448}; 525};
449 526
450 527
@@ -484,50 +561,10 @@ static struct pxaohci_platform_data spitz_ohci_platform_data = {
484/* 561/*
485 * Irda 562 * Irda
486 */ 563 */
487static int spitz_irda_startup(struct device *dev)
488{
489 int rc;
490
491 rc = gpio_request(SPITZ_GPIO_IR_ON, "IrDA on");
492 if (rc)
493 goto err;
494
495 rc = gpio_direction_output(SPITZ_GPIO_IR_ON, 1);
496 if (rc)
497 goto err_dir;
498
499 return 0;
500
501err_dir:
502 gpio_free(SPITZ_GPIO_IR_ON);
503err:
504 return rc;
505}
506
507static void spitz_irda_shutdown(struct device *dev)
508{
509 gpio_free(SPITZ_GPIO_IR_ON);
510}
511
512static void spitz_irda_transceiver_mode(struct device *dev, int mode)
513{
514 gpio_set_value(SPITZ_GPIO_IR_ON, mode & IR_OFF);
515 pxa2xx_transceiver_mode(dev, mode);
516}
517
518#ifdef CONFIG_MACH_AKITA
519static void akita_irda_transceiver_mode(struct device *dev, int mode)
520{
521 gpio_set_value(AKITA_GPIO_IR_ON, mode & IR_OFF);
522 pxa2xx_transceiver_mode(dev, mode);
523}
524#endif
525 564
526static struct pxaficp_platform_data spitz_ficp_platform_data = { 565static struct pxaficp_platform_data spitz_ficp_platform_data = {
566/* .gpio_pwdown is set in spitz_init() and akita_init() accordingly */
527 .transceiver_cap = IR_SIRMODE | IR_OFF, 567 .transceiver_cap = IR_SIRMODE | IR_OFF,
528 .transceiver_mode = spitz_irda_transceiver_mode,
529 .startup = spitz_irda_startup,
530 .shutdown = spitz_irda_shutdown,
531}; 568};
532 569
533 570
@@ -695,6 +732,7 @@ static void __init common_init(void)
695 spitz_init_spi(); 732 spitz_init_spi();
696 733
697 platform_add_devices(devices, ARRAY_SIZE(devices)); 734 platform_add_devices(devices, ARRAY_SIZE(devices));
735 spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250);
698 pxa_set_mci_info(&spitz_mci_platform_data); 736 pxa_set_mci_info(&spitz_mci_platform_data);
699 pxa_set_ohci_info(&spitz_ohci_platform_data); 737 pxa_set_ohci_info(&spitz_ohci_platform_data);
700 pxa_set_ficp_info(&spitz_ficp_platform_data); 738 pxa_set_ficp_info(&spitz_ficp_platform_data);
@@ -705,6 +743,8 @@ static void __init common_init(void)
705#if defined(CONFIG_MACH_SPITZ) || defined(CONFIG_MACH_BORZOI) 743#if defined(CONFIG_MACH_SPITZ) || defined(CONFIG_MACH_BORZOI)
706static void __init spitz_init(void) 744static void __init spitz_init(void)
707{ 745{
746 spitz_ficp_platform_data.gpio_pwdown = SPITZ_GPIO_IR_ON;
747
708 platform_scoop_config = &spitz_pcmcia_config; 748 platform_scoop_config = &spitz_pcmcia_config;
709 749
710 common_init(); 750 common_init();
@@ -747,7 +787,7 @@ static struct nand_ecclayout akita_oobinfo = {
747 787
748static void __init akita_init(void) 788static void __init akita_init(void)
749{ 789{
750 spitz_ficp_platform_data.transceiver_mode = akita_irda_transceiver_mode; 790 spitz_ficp_platform_data.gpio_pwdown = AKITA_GPIO_IR_ON;
751 791
752 sharpsl_nand_platform_data.badblock_pattern = &sharpsl_akita_bbt; 792 sharpsl_nand_platform_data.badblock_pattern = &sharpsl_akita_bbt;
753 sharpsl_nand_platform_data.ecc_layout = &akita_oobinfo; 793 sharpsl_nand_platform_data.ecc_layout = &akita_oobinfo;
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 117ad5920e53..e81a52673d49 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -247,49 +247,10 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = {
247/* 247/*
248 * MMC/SD Device 248 * MMC/SD Device
249 */ 249 */
250static struct pxamci_platform_data tosa_mci_platform_data;
251
252static int tosa_mci_init(struct device *dev, irq_handler_t tosa_detect_int, void *data) 250static int tosa_mci_init(struct device *dev, irq_handler_t tosa_detect_int, void *data)
253{ 251{
254 int err; 252 int err;
255 253
256 tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250);
257
258 err = gpio_request(TOSA_GPIO_nSD_DETECT, "MMC/SD card detect");
259 if (err) {
260 printk(KERN_ERR "tosa_mci_init: can't request nSD_DETECT gpio\n");
261 goto err_gpio_detect;
262 }
263 err = gpio_direction_input(TOSA_GPIO_nSD_DETECT);
264 if (err)
265 goto err_gpio_detect_dir;
266
267 err = request_irq(TOSA_IRQ_GPIO_nSD_DETECT, tosa_detect_int,
268 IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
269 "MMC/SD card detect", data);
270 if (err) {
271 printk(KERN_ERR "tosa_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
272 goto err_irq;
273 }
274
275 err = gpio_request(TOSA_GPIO_SD_WP, "SD Write Protect");
276 if (err) {
277 printk(KERN_ERR "tosa_mci_init: can't request SD_WP gpio\n");
278 goto err_gpio_wp;
279 }
280 err = gpio_direction_input(TOSA_GPIO_SD_WP);
281 if (err)
282 goto err_gpio_wp_dir;
283
284 err = gpio_request(TOSA_GPIO_PWR_ON, "SD Power");
285 if (err) {
286 printk(KERN_ERR "tosa_mci_init: can't request SD_PWR gpio\n");
287 goto err_gpio_pwr;
288 }
289 err = gpio_direction_output(TOSA_GPIO_PWR_ON, 0);
290 if (err)
291 goto err_gpio_pwr_dir;
292
293 err = gpio_request(TOSA_GPIO_nSD_INT, "SD Int"); 254 err = gpio_request(TOSA_GPIO_nSD_INT, "SD Int");
294 if (err) { 255 if (err) {
295 printk(KERN_ERR "tosa_mci_init: can't request SD_PWR gpio\n"); 256 printk(KERN_ERR "tosa_mci_init: can't request SD_PWR gpio\n");
@@ -304,51 +265,21 @@ static int tosa_mci_init(struct device *dev, irq_handler_t tosa_detect_int, void
304err_gpio_int_dir: 265err_gpio_int_dir:
305 gpio_free(TOSA_GPIO_nSD_INT); 266 gpio_free(TOSA_GPIO_nSD_INT);
306err_gpio_int: 267err_gpio_int:
307err_gpio_pwr_dir:
308 gpio_free(TOSA_GPIO_PWR_ON);
309err_gpio_pwr:
310err_gpio_wp_dir:
311 gpio_free(TOSA_GPIO_SD_WP);
312err_gpio_wp:
313 free_irq(TOSA_IRQ_GPIO_nSD_DETECT, data);
314err_irq:
315err_gpio_detect_dir:
316 gpio_free(TOSA_GPIO_nSD_DETECT);
317err_gpio_detect:
318 return err; 268 return err;
319} 269}
320 270
321static void tosa_mci_setpower(struct device *dev, unsigned int vdd)
322{
323 struct pxamci_platform_data* p_d = dev->platform_data;
324
325 if (( 1 << vdd) & p_d->ocr_mask) {
326 gpio_set_value(TOSA_GPIO_PWR_ON, 1);
327 } else {
328 gpio_set_value(TOSA_GPIO_PWR_ON, 0);
329 }
330}
331
332static int tosa_mci_get_ro(struct device *dev)
333{
334 return gpio_get_value(TOSA_GPIO_SD_WP);
335}
336
337static void tosa_mci_exit(struct device *dev, void *data) 271static void tosa_mci_exit(struct device *dev, void *data)
338{ 272{
339 gpio_free(TOSA_GPIO_nSD_INT); 273 gpio_free(TOSA_GPIO_nSD_INT);
340 gpio_free(TOSA_GPIO_PWR_ON);
341 gpio_free(TOSA_GPIO_SD_WP);
342 free_irq(TOSA_IRQ_GPIO_nSD_DETECT, data);
343 gpio_free(TOSA_GPIO_nSD_DETECT);
344} 274}
345 275
346static struct pxamci_platform_data tosa_mci_platform_data = { 276static struct pxamci_platform_data tosa_mci_platform_data = {
347 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 277 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
348 .init = tosa_mci_init, 278 .init = tosa_mci_init,
349 .get_ro = tosa_mci_get_ro, 279 .exit = tosa_mci_exit,
350 .setpower = tosa_mci_setpower, 280 .gpio_card_detect = TOSA_GPIO_nSD_DETECT,
351 .exit = tosa_mci_exit, 281 .gpio_card_ro = TOSA_GPIO_SD_WP,
282 .gpio_power = TOSA_GPIO_PWR_ON,
352}; 283};
353 284
354/* 285/*
@@ -406,10 +337,11 @@ static void tosa_irda_shutdown(struct device *dev)
406} 337}
407 338
408static struct pxaficp_platform_data tosa_ficp_platform_data = { 339static struct pxaficp_platform_data tosa_ficp_platform_data = {
409 .transceiver_cap = IR_SIRMODE | IR_OFF, 340 .gpio_pwdown = -1,
410 .transceiver_mode = tosa_irda_transceiver_mode, 341 .transceiver_cap = IR_SIRMODE | IR_OFF,
411 .startup = tosa_irda_startup, 342 .transceiver_mode = tosa_irda_transceiver_mode,
412 .shutdown = tosa_irda_shutdown, 343 .startup = tosa_irda_startup,
344 .shutdown = tosa_irda_shutdown,
413}; 345};
414 346
415/* 347/*
@@ -910,6 +842,7 @@ static void __init tosa_init(void)
910 dummy = gpiochip_reserve(TOSA_SCOOP_JC_GPIO_BASE, 12); 842 dummy = gpiochip_reserve(TOSA_SCOOP_JC_GPIO_BASE, 12);
911 dummy = gpiochip_reserve(TOSA_TC6393XB_GPIO_BASE, 16); 843 dummy = gpiochip_reserve(TOSA_TC6393XB_GPIO_BASE, 16);
912 844
845 tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250);
913 pxa_set_mci_info(&tosa_mci_platform_data); 846 pxa_set_mci_info(&tosa_mci_platform_data);
914 pxa_set_udc_info(&udc_info); 847 pxa_set_udc_info(&udc_info);
915 pxa_set_ficp_info(&tosa_ficp_platform_data); 848 pxa_set_ficp_info(&tosa_ficp_platform_data);
diff --git a/arch/arm/mach-pxa/treo680.c b/arch/arm/mach-pxa/treo680.c
index 753ec4df17b9..fe085076fbf2 100644
--- a/arch/arm/mach-pxa/treo680.c
+++ b/arch/arm/mach-pxa/treo680.c
@@ -153,87 +153,11 @@ static unsigned long treo680_pin_config[] __initdata = {
153/****************************************************************************** 153/******************************************************************************
154 * SD/MMC card controller 154 * SD/MMC card controller
155 ******************************************************************************/ 155 ******************************************************************************/
156static int treo680_mci_init(struct device *dev,
157 irq_handler_t treo680_detect_int, void *data)
158{
159 int err = 0;
160
161 /* Setup an interrupt for detecting card insert/remove events */
162 err = gpio_request(GPIO_NR_TREO680_SD_DETECT_N, "SD IRQ");
163
164 if (err)
165 goto err;
166
167 err = gpio_direction_input(GPIO_NR_TREO680_SD_DETECT_N);
168 if (err)
169 goto err2;
170
171 err = request_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N),
172 treo680_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
173 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
174 "SD/MMC card detect", data);
175
176 if (err) {
177 dev_err(dev, "%s: cannot request SD/MMC card detect IRQ\n",
178 __func__);
179 goto err2;
180 }
181
182 err = gpio_request(GPIO_NR_TREO680_SD_POWER, "SD_POWER");
183 if (err)
184 goto err3;
185
186 err = gpio_direction_output(GPIO_NR_TREO680_SD_POWER, 1);
187 if (err)
188 goto err4;
189
190 err = gpio_request(GPIO_NR_TREO680_SD_READONLY, "SD_READONLY");
191 if (err)
192 goto err4;
193
194 err = gpio_direction_input(GPIO_NR_TREO680_SD_READONLY);
195 if (err)
196 goto err5;
197
198 return 0;
199
200err5:
201 gpio_free(GPIO_NR_TREO680_SD_READONLY);
202err4:
203 gpio_free(GPIO_NR_TREO680_SD_POWER);
204err3:
205 free_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N), data);
206err2:
207 gpio_free(GPIO_NR_TREO680_SD_DETECT_N);
208err:
209 return err;
210}
211
212static void treo680_mci_exit(struct device *dev, void *data)
213{
214 gpio_free(GPIO_NR_TREO680_SD_READONLY);
215 gpio_free(GPIO_NR_TREO680_SD_POWER);
216 free_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N), data);
217 gpio_free(GPIO_NR_TREO680_SD_DETECT_N);
218}
219
220static void treo680_mci_power(struct device *dev, unsigned int vdd)
221{
222 struct pxamci_platform_data *p_d = dev->platform_data;
223 gpio_set_value(GPIO_NR_TREO680_SD_POWER, p_d->ocr_mask & (1 << vdd));
224}
225
226static int treo680_mci_get_ro(struct device *dev)
227{
228 return gpio_get_value(GPIO_NR_TREO680_SD_READONLY);
229}
230
231static struct pxamci_platform_data treo680_mci_platform_data = { 156static struct pxamci_platform_data treo680_mci_platform_data = {
232 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, 157 .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
233 .setpower = treo680_mci_power, 158 .gpio_card_detect = GPIO_NR_TREO680_SD_DETECT_N,
234 .get_ro = treo680_mci_get_ro, 159 .gpio_card_ro = GPIO_NR_TREO680_SD_READONLY,
235 .init = treo680_mci_init, 160 .gpio_power = GPIO_NR_TREO680_SD_POWER,
236 .exit = treo680_mci_exit,
237}; 161};
238 162
239/****************************************************************************** 163/******************************************************************************
@@ -330,16 +254,9 @@ static int treo680_backlight_init(struct device *dev)
330 ret = gpio_direction_output(GPIO_NR_TREO680_BL_POWER, 0); 254 ret = gpio_direction_output(GPIO_NR_TREO680_BL_POWER, 0);
331 if (ret) 255 if (ret)
332 goto err2; 256 goto err2;
333 ret = gpio_request(GPIO_NR_TREO680_LCD_POWER, "LCD POWER");
334 if (ret)
335 goto err2;
336 ret = gpio_direction_output(GPIO_NR_TREO680_LCD_POWER, 0);
337 if (ret)
338 goto err3;
339 257
340 return 0; 258 return 0;
341err3: 259
342 gpio_free(GPIO_NR_TREO680_LCD_POWER);
343err2: 260err2:
344 gpio_free(GPIO_NR_TREO680_BL_POWER); 261 gpio_free(GPIO_NR_TREO680_BL_POWER);
345err: 262err:
@@ -355,7 +272,6 @@ static int treo680_backlight_notify(int brightness)
355static void treo680_backlight_exit(struct device *dev) 272static void treo680_backlight_exit(struct device *dev)
356{ 273{
357 gpio_free(GPIO_NR_TREO680_BL_POWER); 274 gpio_free(GPIO_NR_TREO680_BL_POWER);
358 gpio_free(GPIO_NR_TREO680_LCD_POWER);
359} 275}
360 276
361static struct platform_pwm_backlight_data treo680_backlight_data = { 277static struct platform_pwm_backlight_data treo680_backlight_data = {
@@ -379,44 +295,9 @@ static struct platform_device treo680_backlight = {
379/****************************************************************************** 295/******************************************************************************
380 * IrDA 296 * IrDA
381 ******************************************************************************/ 297 ******************************************************************************/
382static void treo680_transceiver_mode(struct device *dev, int mode)
383{
384 gpio_set_value(GPIO_NR_TREO680_IR_EN, mode & IR_OFF);
385 pxa2xx_transceiver_mode(dev, mode);
386}
387
388static int treo680_irda_startup(struct device *dev)
389{
390 int err;
391
392 err = gpio_request(GPIO_NR_TREO680_IR_EN, "Ir port disable");
393 if (err)
394 goto err1;
395
396 err = gpio_direction_output(GPIO_NR_TREO680_IR_EN, 1);
397 if (err)
398 goto err2;
399
400 return 0;
401
402err2:
403 dev_err(dev, "treo680_irda: cannot change IR gpio direction\n");
404 gpio_free(GPIO_NR_TREO680_IR_EN);
405err1:
406 dev_err(dev, "treo680_irda: cannot allocate IR gpio\n");
407 return err;
408}
409
410static void treo680_irda_shutdown(struct device *dev)
411{
412 gpio_free(GPIO_NR_TREO680_IR_EN);
413}
414
415static struct pxaficp_platform_data treo680_ficp_info = { 298static struct pxaficp_platform_data treo680_ficp_info = {
416 .transceiver_cap = IR_FIRMODE | IR_SIRMODE | IR_OFF, 299 .gpio_pwdown = GPIO_NR_TREO680_IR_EN,
417 .startup = treo680_irda_startup, 300 .transceiver_cap = IR_SIRMODE | IR_OFF,
418 .shutdown = treo680_irda_shutdown,
419 .transceiver_mode = treo680_transceiver_mode,
420}; 301};
421 302
422/****************************************************************************** 303/******************************************************************************
@@ -546,6 +427,11 @@ static struct pxafb_mode_info treo680_lcd_modes[] = {
546}, 427},
547}; 428};
548 429
430static void treo680_lcd_power(int on, struct fb_var_screeninfo *info)
431{
432 gpio_set_value(GPIO_NR_TREO680_BL_POWER, on);
433}
434
549static struct pxafb_mach_info treo680_lcd_screen = { 435static struct pxafb_mach_info treo680_lcd_screen = {
550 .modes = treo680_lcd_modes, 436 .modes = treo680_lcd_modes,
551 .num_modes = ARRAY_SIZE(treo680_lcd_modes), 437 .num_modes = ARRAY_SIZE(treo680_lcd_modes),
@@ -585,11 +471,32 @@ static void __init treo680_udc_init(void)
585 } 471 }
586} 472}
587 473
474static void __init treo680_lcd_power_init(void)
475{
476 int ret;
477
478 ret = gpio_request(GPIO_NR_TREO680_LCD_POWER, "LCD POWER");
479 if (ret) {
480 pr_err("Treo680: LCD power GPIO request failed!\n");
481 return;
482 }
483
484 ret = gpio_direction_output(GPIO_NR_TREO680_LCD_POWER, 0);
485 if (ret) {
486 pr_err("Treo680: setting LCD power GPIO direction failed!\n");
487 gpio_free(GPIO_NR_TREO680_LCD_POWER);
488 return;
489 }
490
491 treo680_lcd_screen.pxafb_lcd_power = treo680_lcd_power;
492}
493
588static void __init treo680_init(void) 494static void __init treo680_init(void)
589{ 495{
590 treo680_pm_init(); 496 treo680_pm_init();
591 pxa2xx_mfp_config(ARRAY_AND_SIZE(treo680_pin_config)); 497 pxa2xx_mfp_config(ARRAY_AND_SIZE(treo680_pin_config));
592 pxa_set_keypad_info(&treo680_keypad_platform_data); 498 pxa_set_keypad_info(&treo680_keypad_platform_data);
499 treo680_lcd_power_init();
593 set_pxa_fb_info(&treo680_lcd_screen); 500 set_pxa_fb_info(&treo680_lcd_screen);
594 pxa_set_mci_info(&treo680_mci_platform_data); 501 pxa_set_mci_info(&treo680_mci_platform_data);
595 treo680_udc_init(); 502 treo680_udc_init();
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index 825f540176d2..3981e0356d12 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -367,6 +367,9 @@ static struct pxamci_platform_data trizeps4_mci_platform_data = {
367 .exit = trizeps4_mci_exit, 367 .exit = trizeps4_mci_exit,
368 .get_ro = NULL, /* write-protection not supported */ 368 .get_ro = NULL, /* write-protection not supported */
369 .setpower = NULL, /* power-switching not supported */ 369 .setpower = NULL, /* power-switching not supported */
370 .gpio_card_detect = -1,
371 .gpio_card_ro = -1,
372 .gpio_power = -1,
370}; 373};
371 374
372/**************************************************************************** 375/****************************************************************************
@@ -412,6 +415,7 @@ static void trizeps4_irda_transceiver_mode(struct device *dev, int mode)
412} 415}
413 416
414static struct pxaficp_platform_data trizeps4_ficp_platform_data = { 417static struct pxaficp_platform_data trizeps4_ficp_platform_data = {
418 .gpio_pwdown = -1,
415 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, 419 .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF,
416 .transceiver_mode = trizeps4_irda_transceiver_mode, 420 .transceiver_mode = trizeps4_irda_transceiver_mode,
417 .startup = trizeps4_irda_startup, 421 .startup = trizeps4_irda_startup,
diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c
new file mode 100644
index 000000000000..3fd79cbb36c8
--- /dev/null
+++ b/arch/arm/mach-pxa/xcep.c
@@ -0,0 +1,187 @@
1/* linux/arch/arm/mach-pxa/xcep.c
2 *
3 * Support for the Iskratel Electronics XCEP platform as used in
4 * the Libera instruments from Instrumentation Technologies.
5 *
6 * Author: Ales Bardorfer <ales@i-tech.si>
7 * Contributions by: Abbott, MG (Michael) <michael.abbott@diamond.ac.uk>
8 * Contributions by: Matej Kenda <matej.kenda@i-tech.si>
9 * Created: June 2006
10 * Copyright: (C) 2006-2009 Instrumentation Technologies
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
17#include <linux/platform_device.h>
18#include <linux/i2c.h>
19#include <linux/smc91x.h>
20#include <linux/mtd/mtd.h>
21#include <linux/mtd/partitions.h>
22#include <linux/mtd/physmap.h>
23
24#include <asm/mach-types.h>
25#include <asm/mach/arch.h>
26#include <asm/mach/irq.h>
27#include <asm/mach/map.h>
28
29#include <plat/i2c.h>
30
31#include <mach/hardware.h>
32#include <mach/pxa2xx-regs.h>
33#include <mach/mfp-pxa25x.h>
34
35#include "generic.h"
36
37#define XCEP_ETH_PHYS (PXA_CS3_PHYS + 0x00000300)
38#define XCEP_ETH_PHYS_END (PXA_CS3_PHYS + 0x000fffff)
39#define XCEP_ETH_ATTR (PXA_CS3_PHYS + 0x02000000)
40#define XCEP_ETH_ATTR_END (PXA_CS3_PHYS + 0x020fffff)
41#define XCEP_ETH_IRQ IRQ_GPIO0
42
43/* XCEP CPLD base */
44#define XCEP_CPLD_BASE 0xf0000000
45
46
47/* Flash partitions. */
48
49static struct mtd_partition xcep_partitions[] = {
50 {
51 .name = "Bootloader",
52 .size = 0x00040000,
53 .offset = 0,
54 .mask_flags = MTD_WRITEABLE
55 }, {
56 .name = "Bootloader ENV",
57 .size = 0x00040000,
58 .offset = 0x00040000,
59 .mask_flags = MTD_WRITEABLE
60 }, {
61 .name = "Kernel",
62 .size = 0x00100000,
63 .offset = 0x00080000,
64 }, {
65 .name = "Rescue fs",
66 .size = 0x00280000,
67 .offset = 0x00180000,
68 }, {
69 .name = "Filesystem",
70 .size = MTDPART_SIZ_FULL,
71 .offset = 0x00400000
72 }
73};
74
75static struct physmap_flash_data xcep_flash_data[] = {
76 {
77 .width = 4, /* bankwidth in bytes */
78 .parts = xcep_partitions,
79 .nr_parts = ARRAY_SIZE(xcep_partitions)
80 }
81};
82
83static struct resource flash_resource = {
84 .start = PXA_CS0_PHYS,
85 .end = PXA_CS0_PHYS + SZ_32M - 1,
86 .flags = IORESOURCE_MEM,
87};
88
89static struct platform_device flash_device = {
90 .name = "physmap-flash",
91 .id = 0,
92 .dev = {
93 .platform_data = xcep_flash_data,
94 },
95 .resource = &flash_resource,
96 .num_resources = 1,
97};
98
99
100
101/* SMC LAN91C111 network controller. */
102
103static struct resource smc91x_resources[] = {
104 [0] = {
105 .name = "smc91x-regs",
106 .start = XCEP_ETH_PHYS,
107 .end = XCEP_ETH_PHYS_END,
108 .flags = IORESOURCE_MEM,
109 },
110 [1] = {
111 .start = XCEP_ETH_IRQ,
112 .end = XCEP_ETH_IRQ,
113 .flags = IORESOURCE_IRQ,
114 },
115 [2] = {
116 .name = "smc91x-attrib",
117 .start = XCEP_ETH_ATTR,
118 .end = XCEP_ETH_ATTR_END,
119 .flags = IORESOURCE_MEM,
120 },
121};
122
123static struct smc91x_platdata xcep_smc91x_info = {
124 .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA,
125};
126
127static struct platform_device smc91x_device = {
128 .name = "smc91x",
129 .id = -1,
130 .num_resources = ARRAY_SIZE(smc91x_resources),
131 .resource = smc91x_resources,
132 .dev = {
133 .platform_data = &xcep_smc91x_info,
134 },
135};
136
137
138static struct platform_device *devices[] __initdata = {
139 &flash_device,
140 &smc91x_device,
141};
142
143
144/* We have to state that there are HWMON devices on the I2C bus on XCEP.
145 * Drivers for HWMON verify capabilities of the adapter when loading and
146 * refuse to attach if the adapter doesn't support HWMON class of devices.
147 * See also Documentation/i2c/porting-clients. */
148static struct i2c_pxa_platform_data xcep_i2c_platform_data = {
149 .class = I2C_CLASS_HWMON
150};
151
152
153static mfp_cfg_t xcep_pin_config[] __initdata = {
154 GPIO79_nCS_3, /* SMC 91C111 chip select. */
155 GPIO80_nCS_4, /* CPLD chip select. */
156 /* SSP communication to MSP430 */
157 GPIO23_SSP1_SCLK,
158 GPIO24_SSP1_SFRM,
159 GPIO25_SSP1_TXD,
160 GPIO26_SSP1_RXD,
161 GPIO27_SSP1_EXTCLK
162};
163
164static void __init xcep_init(void)
165{
166 pxa2xx_mfp_config(ARRAY_AND_SIZE(xcep_pin_config));
167
168 /* See Intel XScale Developer's Guide for details */
169 /* Set RDF and RDN to appropriate values (chip select 3 (smc91x)) */
170 MSC1 = (MSC1 & 0xffff) | 0xD5540000;
171 /* Set RDF and RDN to appropriate values (chip select 5 (fpga)) */
172 MSC2 = (MSC2 & 0xffff) | 0x72A00000;
173
174 platform_add_devices(ARRAY_AND_SIZE(devices));
175 pxa_set_i2c_info(&xcep_i2c_platform_data);
176}
177
178MACHINE_START(XCEP, "Iskratel XCEP")
179 .phys_io = 0x40000000,
180 .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc,
181 .boot_params = 0xa0000100,
182 .init_machine = xcep_init,
183 .map_io = pxa_map_io,
184 .init_irq = pxa25x_init_irq,
185 .timer = &pxa_timer,
186MACHINE_END
187
diff --git a/arch/arm/mach-pxa/zylonite.c b/arch/arm/mach-pxa/zylonite.c
index 218d2001f1df..09784d3954e4 100644
--- a/arch/arm/mach-pxa/zylonite.c
+++ b/arch/arm/mach-pxa/zylonite.c
@@ -290,6 +290,9 @@ static struct pxamci_platform_data zylonite_mci_platform_data = {
290 .init = zylonite_mci_init, 290 .init = zylonite_mci_init,
291 .exit = zylonite_mci_exit, 291 .exit = zylonite_mci_exit,
292 .get_ro = zylonite_mci_ro, 292 .get_ro = zylonite_mci_ro,
293 .gpio_card_detect = -1,
294 .gpio_card_ro = -1,
295 .gpio_power = -1,
293}; 296};
294 297
295static struct pxamci_platform_data zylonite_mci2_platform_data = { 298static struct pxamci_platform_data zylonite_mci2_platform_data = {
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index dc3519c50ab2..a2083b60e3fb 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -30,6 +30,7 @@
30#include <linux/io.h> 30#include <linux/io.h>
31#include <linux/smsc911x.h> 31#include <linux/smsc911x.h>
32#include <linux/ata_platform.h> 32#include <linux/ata_platform.h>
33#include <linux/amba/mmci.h>
33 34
34#include <asm/clkdev.h> 35#include <asm/clkdev.h>
35#include <asm/system.h> 36#include <asm/system.h>
@@ -44,7 +45,6 @@
44#include <asm/mach/flash.h> 45#include <asm/mach/flash.h>
45#include <asm/mach/irq.h> 46#include <asm/mach/irq.h>
46#include <asm/mach/map.h> 47#include <asm/mach/map.h>
47#include <asm/mach/mmc.h>
48 48
49#include <asm/hardware/gic.h> 49#include <asm/hardware/gic.h>
50 50
@@ -237,14 +237,14 @@ static unsigned int realview_mmc_status(struct device *dev)
237 return readl(REALVIEW_SYSMCI) & mask; 237 return readl(REALVIEW_SYSMCI) & mask;
238} 238}
239 239
240struct mmc_platform_data realview_mmc0_plat_data = { 240struct mmci_platform_data realview_mmc0_plat_data = {
241 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 241 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
242 .status = realview_mmc_status, 242 .status = realview_mmc_status,
243 .gpio_wp = 17, 243 .gpio_wp = 17,
244 .gpio_cd = 16, 244 .gpio_cd = 16,
245}; 245};
246 246
247struct mmc_platform_data realview_mmc1_plat_data = { 247struct mmci_platform_data realview_mmc1_plat_data = {
248 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 248 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
249 .status = realview_mmc_status, 249 .status = realview_mmc_status,
250 .gpio_wp = 19, 250 .gpio_wp = 19,
@@ -296,31 +296,31 @@ static struct clk ref24_clk = {
296 296
297static struct clk_lookup lookups[] = { 297static struct clk_lookup lookups[] = {
298 { /* UART0 */ 298 { /* UART0 */
299 .dev_id = "dev:f1", 299 .dev_id = "dev:uart0",
300 .clk = &ref24_clk, 300 .clk = &ref24_clk,
301 }, { /* UART1 */ 301 }, { /* UART1 */
302 .dev_id = "dev:f2", 302 .dev_id = "dev:uart1",
303 .clk = &ref24_clk, 303 .clk = &ref24_clk,
304 }, { /* UART2 */ 304 }, { /* UART2 */
305 .dev_id = "dev:f3", 305 .dev_id = "dev:uart2",
306 .clk = &ref24_clk, 306 .clk = &ref24_clk,
307 }, { /* UART3 */ 307 }, { /* UART3 */
308 .dev_id = "fpga:09", 308 .dev_id = "fpga:uart3",
309 .clk = &ref24_clk, 309 .clk = &ref24_clk,
310 }, { /* KMI0 */ 310 }, { /* KMI0 */
311 .dev_id = "fpga:06", 311 .dev_id = "fpga:kmi0",
312 .clk = &ref24_clk, 312 .clk = &ref24_clk,
313 }, { /* KMI1 */ 313 }, { /* KMI1 */
314 .dev_id = "fpga:07", 314 .dev_id = "fpga:kmi1",
315 .clk = &ref24_clk, 315 .clk = &ref24_clk,
316 }, { /* MMC0 */ 316 }, { /* MMC0 */
317 .dev_id = "fpga:05", 317 .dev_id = "fpga:mmc0",
318 .clk = &ref24_clk, 318 .clk = &ref24_clk,
319 }, { /* EB:CLCD */ 319 }, { /* EB:CLCD */
320 .dev_id = "dev:20", 320 .dev_id = "dev:clcd",
321 .clk = &oscvco_clk, 321 .clk = &oscvco_clk,
322 }, { /* PB:CLCD */ 322 }, { /* PB:CLCD */
323 .dev_id = "issp:20", 323 .dev_id = "issp:clcd",
324 .clk = &oscvco_clk, 324 .clk = &oscvco_clk,
325 } 325 }
326}; 326};
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 59a337ba4be7..46cd6acb4d40 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -47,8 +47,8 @@ static struct amba_device name##_device = { \
47extern struct platform_device realview_flash_device; 47extern struct platform_device realview_flash_device;
48extern struct platform_device realview_cf_device; 48extern struct platform_device realview_cf_device;
49extern struct platform_device realview_i2c_device; 49extern struct platform_device realview_i2c_device;
50extern struct mmc_platform_data realview_mmc0_plat_data; 50extern struct mmci_platform_data realview_mmc0_plat_data;
51extern struct mmc_platform_data realview_mmc1_plat_data; 51extern struct mmci_platform_data realview_mmc1_plat_data;
52extern struct clcd_board clcd_plat_data; 52extern struct clcd_board clcd_plat_data;
53extern void __iomem *gic_cpu_base_addr; 53extern void __iomem *gic_cpu_base_addr;
54extern void __iomem *timer0_va_base; 54extern void __iomem *timer0_va_base;
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index abd13b448671..1d65e64ae571 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -24,6 +24,7 @@
24#include <linux/sysdev.h> 24#include <linux/sysdev.h>
25#include <linux/amba/bus.h> 25#include <linux/amba/bus.h>
26#include <linux/amba/pl061.h> 26#include <linux/amba/pl061.h>
27#include <linux/amba/mmci.h>
27#include <linux/io.h> 28#include <linux/io.h>
28 29
29#include <mach/hardware.h> 30#include <mach/hardware.h>
@@ -37,7 +38,6 @@
37 38
38#include <asm/mach/arch.h> 39#include <asm/mach/arch.h>
39#include <asm/mach/map.h> 40#include <asm/mach/map.h>
40#include <asm/mach/mmc.h>
41#include <asm/mach/time.h> 41#include <asm/mach/time.h>
42 42
43#include <mach/board-eb.h> 43#include <mach/board-eb.h>
@@ -193,27 +193,27 @@ static struct pl061_platform_data gpio2_plat_data = {
193#define EB_SSP_DMA { 9, 8 } 193#define EB_SSP_DMA { 9, 8 }
194 194
195/* FPGA Primecells */ 195/* FPGA Primecells */
196AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); 196AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL);
197AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); 197AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data);
198AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); 198AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL);
199AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); 199AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL);
200AMBA_DEVICE(uart3, "fpga:09", EB_UART3, NULL); 200AMBA_DEVICE(uart3, "fpga:uart3", EB_UART3, NULL);
201 201
202/* DevChip Primecells */ 202/* DevChip Primecells */
203AMBA_DEVICE(smc, "dev:00", EB_SMC, NULL); 203AMBA_DEVICE(smc, "dev:smc", EB_SMC, NULL);
204AMBA_DEVICE(clcd, "dev:20", EB_CLCD, &clcd_plat_data); 204AMBA_DEVICE(clcd, "dev:clcd", EB_CLCD, &clcd_plat_data);
205AMBA_DEVICE(dmac, "dev:30", DMAC, NULL); 205AMBA_DEVICE(dmac, "dev:dmac", DMAC, NULL);
206AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); 206AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL);
207AMBA_DEVICE(wdog, "dev:e1", EB_WATCHDOG, NULL); 207AMBA_DEVICE(wdog, "dev:wdog", EB_WATCHDOG, NULL);
208AMBA_DEVICE(gpio0, "dev:e4", EB_GPIO0, &gpio0_plat_data); 208AMBA_DEVICE(gpio0, "dev:gpio0", EB_GPIO0, &gpio0_plat_data);
209AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); 209AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data);
210AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); 210AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data);
211AMBA_DEVICE(rtc, "dev:e8", EB_RTC, NULL); 211AMBA_DEVICE(rtc, "dev:rtc", EB_RTC, NULL);
212AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); 212AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL);
213AMBA_DEVICE(uart0, "dev:f1", EB_UART0, NULL); 213AMBA_DEVICE(uart0, "dev:uart0", EB_UART0, NULL);
214AMBA_DEVICE(uart1, "dev:f2", EB_UART1, NULL); 214AMBA_DEVICE(uart1, "dev:uart1", EB_UART1, NULL);
215AMBA_DEVICE(uart2, "dev:f3", EB_UART2, NULL); 215AMBA_DEVICE(uart2, "dev:uart2", EB_UART2, NULL);
216AMBA_DEVICE(ssp0, "dev:f4", EB_SSP, NULL); 216AMBA_DEVICE(ssp0, "dev:ssp0", EB_SSP, NULL);
217 217
218static struct amba_device *amba_devs[] __initdata = { 218static struct amba_device *amba_devs[] __initdata = {
219 &dmac_device, 219 &dmac_device,
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index 17fbb0e889b6..2817fe099319 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -24,6 +24,7 @@
24#include <linux/sysdev.h> 24#include <linux/sysdev.h>
25#include <linux/amba/bus.h> 25#include <linux/amba/bus.h>
26#include <linux/amba/pl061.h> 26#include <linux/amba/pl061.h>
27#include <linux/amba/mmci.h>
27#include <linux/io.h> 28#include <linux/io.h>
28 29
29#include <mach/hardware.h> 30#include <mach/hardware.h>
@@ -37,7 +38,6 @@
37#include <asm/mach/arch.h> 38#include <asm/mach/arch.h>
38#include <asm/mach/flash.h> 39#include <asm/mach/flash.h>
39#include <asm/mach/map.h> 40#include <asm/mach/map.h>
40#include <asm/mach/mmc.h>
41#include <asm/mach/time.h> 41#include <asm/mach/time.h>
42 42
43#include <mach/board-pb1176.h> 43#include <mach/board-pb1176.h>
@@ -170,29 +170,29 @@ static struct pl061_platform_data gpio2_plat_data = {
170#define PB1176_SSP_DMA { 9, 8 } 170#define PB1176_SSP_DMA { 9, 8 }
171 171
172/* FPGA Primecells */ 172/* FPGA Primecells */
173AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); 173AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL);
174AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); 174AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data);
175AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); 175AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL);
176AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); 176AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL);
177AMBA_DEVICE(uart3, "fpga:09", PB1176_UART3, NULL); 177AMBA_DEVICE(uart3, "fpga:uart3", PB1176_UART3, NULL);
178 178
179/* DevChip Primecells */ 179/* DevChip Primecells */
180AMBA_DEVICE(smc, "dev:00", PB1176_SMC, NULL); 180AMBA_DEVICE(smc, "dev:smc", PB1176_SMC, NULL);
181AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); 181AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL);
182AMBA_DEVICE(wdog, "dev:e1", PB1176_WATCHDOG, NULL); 182AMBA_DEVICE(wdog, "dev:wdog", PB1176_WATCHDOG, NULL);
183AMBA_DEVICE(gpio0, "dev:e4", PB1176_GPIO0, &gpio0_plat_data); 183AMBA_DEVICE(gpio0, "dev:gpio0", PB1176_GPIO0, &gpio0_plat_data);
184AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); 184AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data);
185AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); 185AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data);
186AMBA_DEVICE(rtc, "dev:e8", PB1176_RTC, NULL); 186AMBA_DEVICE(rtc, "dev:rtc", PB1176_RTC, NULL);
187AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); 187AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL);
188AMBA_DEVICE(uart0, "dev:f1", PB1176_UART0, NULL); 188AMBA_DEVICE(uart0, "dev:uart0", PB1176_UART0, NULL);
189AMBA_DEVICE(uart1, "dev:f2", PB1176_UART1, NULL); 189AMBA_DEVICE(uart1, "dev:uart1", PB1176_UART1, NULL);
190AMBA_DEVICE(uart2, "dev:f3", PB1176_UART2, NULL); 190AMBA_DEVICE(uart2, "dev:uart2", PB1176_UART2, NULL);
191AMBA_DEVICE(ssp0, "dev:f4", PB1176_SSP, NULL); 191AMBA_DEVICE(ssp0, "dev:ssp0", PB1176_SSP, NULL);
192 192
193/* Primecells on the NEC ISSP chip */ 193/* Primecells on the NEC ISSP chip */
194AMBA_DEVICE(clcd, "issp:20", PB1176_CLCD, &clcd_plat_data); 194AMBA_DEVICE(clcd, "issp:clcd", PB1176_CLCD, &clcd_plat_data);
195//AMBA_DEVICE(dmac, "issp:30", PB1176_DMAC, NULL); 195//AMBA_DEVICE(dmac, "issp:dmac", PB1176_DMAC, NULL);
196 196
197static struct amba_device *amba_devs[] __initdata = { 197static struct amba_device *amba_devs[] __initdata = {
198// &dmac_device, 198// &dmac_device,
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index fdd042b85f40..94680fcf726d 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -24,6 +24,7 @@
24#include <linux/sysdev.h> 24#include <linux/sysdev.h>
25#include <linux/amba/bus.h> 25#include <linux/amba/bus.h>
26#include <linux/amba/pl061.h> 26#include <linux/amba/pl061.h>
27#include <linux/amba/mmci.h>
27#include <linux/io.h> 28#include <linux/io.h>
28 29
29#include <mach/hardware.h> 30#include <mach/hardware.h>
@@ -38,7 +39,6 @@
38#include <asm/mach/arch.h> 39#include <asm/mach/arch.h>
39#include <asm/mach/flash.h> 40#include <asm/mach/flash.h>
40#include <asm/mach/map.h> 41#include <asm/mach/map.h>
41#include <asm/mach/mmc.h>
42#include <asm/mach/time.h> 42#include <asm/mach/time.h>
43 43
44#include <mach/board-pb11mp.h> 44#include <mach/board-pb11mp.h>
@@ -172,29 +172,29 @@ static struct pl061_platform_data gpio2_plat_data = {
172#define PB11MP_SSP_DMA { 9, 8 } 172#define PB11MP_SSP_DMA { 9, 8 }
173 173
174/* FPGA Primecells */ 174/* FPGA Primecells */
175AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); 175AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL);
176AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); 176AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data);
177AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); 177AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL);
178AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); 178AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL);
179AMBA_DEVICE(uart3, "fpga:09", PB11MP_UART3, NULL); 179AMBA_DEVICE(uart3, "fpga:uart3", PB11MP_UART3, NULL);
180 180
181/* DevChip Primecells */ 181/* DevChip Primecells */
182AMBA_DEVICE(smc, "dev:00", PB11MP_SMC, NULL); 182AMBA_DEVICE(smc, "dev:smc", PB11MP_SMC, NULL);
183AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); 183AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL);
184AMBA_DEVICE(wdog, "dev:e1", PB11MP_WATCHDOG, NULL); 184AMBA_DEVICE(wdog, "dev:wdog", PB11MP_WATCHDOG, NULL);
185AMBA_DEVICE(gpio0, "dev:e4", PB11MP_GPIO0, &gpio0_plat_data); 185AMBA_DEVICE(gpio0, "dev:gpio0", PB11MP_GPIO0, &gpio0_plat_data);
186AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); 186AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data);
187AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); 187AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data);
188AMBA_DEVICE(rtc, "dev:e8", PB11MP_RTC, NULL); 188AMBA_DEVICE(rtc, "dev:rtc", PB11MP_RTC, NULL);
189AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); 189AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL);
190AMBA_DEVICE(uart0, "dev:f1", PB11MP_UART0, NULL); 190AMBA_DEVICE(uart0, "dev:uart0", PB11MP_UART0, NULL);
191AMBA_DEVICE(uart1, "dev:f2", PB11MP_UART1, NULL); 191AMBA_DEVICE(uart1, "dev:uart1", PB11MP_UART1, NULL);
192AMBA_DEVICE(uart2, "dev:f3", PB11MP_UART2, NULL); 192AMBA_DEVICE(uart2, "dev:uart2", PB11MP_UART2, NULL);
193AMBA_DEVICE(ssp0, "dev:f4", PB11MP_SSP, NULL); 193AMBA_DEVICE(ssp0, "dev:ssp0", PB11MP_SSP, NULL);
194 194
195/* Primecells on the NEC ISSP chip */ 195/* Primecells on the NEC ISSP chip */
196AMBA_DEVICE(clcd, "issp:20", PB11MP_CLCD, &clcd_plat_data); 196AMBA_DEVICE(clcd, "issp:clcd", PB11MP_CLCD, &clcd_plat_data);
197AMBA_DEVICE(dmac, "issp:30", DMAC, NULL); 197AMBA_DEVICE(dmac, "issp:dmac", DMAC, NULL);
198 198
199static struct amba_device *amba_devs[] __initdata = { 199static struct amba_device *amba_devs[] __initdata = {
200 &dmac_device, 200 &dmac_device,
diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c
index 70bba9900d97..941beb2b9709 100644
--- a/arch/arm/mach-realview/realview_pba8.c
+++ b/arch/arm/mach-realview/realview_pba8.c
@@ -24,6 +24,7 @@
24#include <linux/sysdev.h> 24#include <linux/sysdev.h>
25#include <linux/amba/bus.h> 25#include <linux/amba/bus.h>
26#include <linux/amba/pl061.h> 26#include <linux/amba/pl061.h>
27#include <linux/amba/mmci.h>
27#include <linux/io.h> 28#include <linux/io.h>
28 29
29#include <asm/irq.h> 30#include <asm/irq.h>
@@ -34,7 +35,6 @@
34 35
35#include <asm/mach/arch.h> 36#include <asm/mach/arch.h>
36#include <asm/mach/map.h> 37#include <asm/mach/map.h>
37#include <asm/mach/mmc.h>
38#include <asm/mach/time.h> 38#include <asm/mach/time.h>
39 39
40#include <mach/hardware.h> 40#include <mach/hardware.h>
@@ -162,29 +162,29 @@ static struct pl061_platform_data gpio2_plat_data = {
162#define PBA8_SSP_DMA { 9, 8 } 162#define PBA8_SSP_DMA { 9, 8 }
163 163
164/* FPGA Primecells */ 164/* FPGA Primecells */
165AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); 165AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL);
166AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); 166AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data);
167AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); 167AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL);
168AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); 168AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL);
169AMBA_DEVICE(uart3, "fpga:09", PBA8_UART3, NULL); 169AMBA_DEVICE(uart3, "fpga:uart3", PBA8_UART3, NULL);
170 170
171/* DevChip Primecells */ 171/* DevChip Primecells */
172AMBA_DEVICE(smc, "dev:00", PBA8_SMC, NULL); 172AMBA_DEVICE(smc, "dev:smc", PBA8_SMC, NULL);
173AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); 173AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL);
174AMBA_DEVICE(wdog, "dev:e1", PBA8_WATCHDOG, NULL); 174AMBA_DEVICE(wdog, "dev:wdog", PBA8_WATCHDOG, NULL);
175AMBA_DEVICE(gpio0, "dev:e4", PBA8_GPIO0, &gpio0_plat_data); 175AMBA_DEVICE(gpio0, "dev:gpio0", PBA8_GPIO0, &gpio0_plat_data);
176AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); 176AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data);
177AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); 177AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data);
178AMBA_DEVICE(rtc, "dev:e8", PBA8_RTC, NULL); 178AMBA_DEVICE(rtc, "dev:rtc", PBA8_RTC, NULL);
179AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); 179AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL);
180AMBA_DEVICE(uart0, "dev:f1", PBA8_UART0, NULL); 180AMBA_DEVICE(uart0, "dev:uart0", PBA8_UART0, NULL);
181AMBA_DEVICE(uart1, "dev:f2", PBA8_UART1, NULL); 181AMBA_DEVICE(uart1, "dev:uart1", PBA8_UART1, NULL);
182AMBA_DEVICE(uart2, "dev:f3", PBA8_UART2, NULL); 182AMBA_DEVICE(uart2, "dev:uart2", PBA8_UART2, NULL);
183AMBA_DEVICE(ssp0, "dev:f4", PBA8_SSP, NULL); 183AMBA_DEVICE(ssp0, "dev:ssp0", PBA8_SSP, NULL);
184 184
185/* Primecells on the NEC ISSP chip */ 185/* Primecells on the NEC ISSP chip */
186AMBA_DEVICE(clcd, "issp:20", PBA8_CLCD, &clcd_plat_data); 186AMBA_DEVICE(clcd, "issp:clcd", PBA8_CLCD, &clcd_plat_data);
187AMBA_DEVICE(dmac, "issp:30", DMAC, NULL); 187AMBA_DEVICE(dmac, "issp:dmac", DMAC, NULL);
188 188
189static struct amba_device *amba_devs[] __initdata = { 189static struct amba_device *amba_devs[] __initdata = {
190 &dmac_device, 190 &dmac_device,
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index ce6c5d25fbef..7e4bc6cdca52 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -23,6 +23,7 @@
23#include <linux/sysdev.h> 23#include <linux/sysdev.h>
24#include <linux/amba/bus.h> 24#include <linux/amba/bus.h>
25#include <linux/amba/pl061.h> 25#include <linux/amba/pl061.h>
26#include <linux/amba/mmci.h>
26#include <linux/io.h> 27#include <linux/io.h>
27 28
28#include <asm/irq.h> 29#include <asm/irq.h>
@@ -34,7 +35,6 @@
34 35
35#include <asm/mach/arch.h> 36#include <asm/mach/arch.h>
36#include <asm/mach/map.h> 37#include <asm/mach/map.h>
37#include <asm/mach/mmc.h>
38#include <asm/mach/time.h> 38#include <asm/mach/time.h>
39 39
40#include <mach/hardware.h> 40#include <mach/hardware.h>
@@ -182,29 +182,29 @@ static struct pl061_platform_data gpio2_plat_data = {
182#define PBX_SSP_DMA { 9, 8 } 182#define PBX_SSP_DMA { 9, 8 }
183 183
184/* FPGA Primecells */ 184/* FPGA Primecells */
185AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); 185AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL);
186AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); 186AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data);
187AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); 187AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL);
188AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); 188AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL);
189AMBA_DEVICE(uart3, "fpga:09", PBX_UART3, NULL); 189AMBA_DEVICE(uart3, "fpga:uart3", PBX_UART3, NULL);
190 190
191/* DevChip Primecells */ 191/* DevChip Primecells */
192AMBA_DEVICE(smc, "dev:00", PBX_SMC, NULL); 192AMBA_DEVICE(smc, "dev:smc", PBX_SMC, NULL);
193AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); 193AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL);
194AMBA_DEVICE(wdog, "dev:e1", PBX_WATCHDOG, NULL); 194AMBA_DEVICE(wdog, "dev:wdog", PBX_WATCHDOG, NULL);
195AMBA_DEVICE(gpio0, "dev:e4", PBX_GPIO0, &gpio0_plat_data); 195AMBA_DEVICE(gpio0, "dev:gpio0", PBX_GPIO0, &gpio0_plat_data);
196AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); 196AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data);
197AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); 197AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data);
198AMBA_DEVICE(rtc, "dev:e8", PBX_RTC, NULL); 198AMBA_DEVICE(rtc, "dev:rtc", PBX_RTC, NULL);
199AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); 199AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL);
200AMBA_DEVICE(uart0, "dev:f1", PBX_UART0, NULL); 200AMBA_DEVICE(uart0, "dev:uart0", PBX_UART0, NULL);
201AMBA_DEVICE(uart1, "dev:f2", PBX_UART1, NULL); 201AMBA_DEVICE(uart1, "dev:uart1", PBX_UART1, NULL);
202AMBA_DEVICE(uart2, "dev:f3", PBX_UART2, NULL); 202AMBA_DEVICE(uart2, "dev:uart2", PBX_UART2, NULL);
203AMBA_DEVICE(ssp0, "dev:f4", PBX_SSP, NULL); 203AMBA_DEVICE(ssp0, "dev:ssp0", PBX_SSP, NULL);
204 204
205/* Primecells on the NEC ISSP chip */ 205/* Primecells on the NEC ISSP chip */
206AMBA_DEVICE(clcd, "issp:20", PBX_CLCD, &clcd_plat_data); 206AMBA_DEVICE(clcd, "issp:clcd", PBX_CLCD, &clcd_plat_data);
207AMBA_DEVICE(dmac, "issp:30", DMAC, NULL); 207AMBA_DEVICE(dmac, "issp:dmac", DMAC, NULL);
208 208
209static struct amba_device *amba_devs[] __initdata = { 209static struct amba_device *amba_devs[] __initdata = {
210 &dmac_device, 210 &dmac_device,
diff --git a/arch/arm/mach-s3c2410/Kconfig b/arch/arm/mach-s3c2410/Kconfig
index d8c023d4df30..3d4e9da3fa52 100644
--- a/arch/arm/mach-s3c2410/Kconfig
+++ b/arch/arm/mach-s3c2410/Kconfig
@@ -77,6 +77,7 @@ config ARCH_H1940
77 select CPU_S3C2410 77 select CPU_S3C2410
78 select PM_H1940 if PM 78 select PM_H1940 if PM
79 select S3C_DEV_USB_HOST 79 select S3C_DEV_USB_HOST
80 select S3C_DEV_NAND
80 help 81 help
81 Say Y here if you are using the HP IPAQ H1940 82 Say Y here if you are using the HP IPAQ H1940
82 83
@@ -89,6 +90,7 @@ config MACH_N30
89 bool "Acer N30 family" 90 bool "Acer N30 family"
90 select CPU_S3C2410 91 select CPU_S3C2410
91 select S3C_DEV_USB_HOST 92 select S3C_DEV_USB_HOST
93 select S3C_DEV_NAND
92 help 94 help
93 Say Y here if you want suppt for the Acer N30, Acer N35, 95 Say Y here if you want suppt for the Acer N30, Acer N35,
94 Navman PiN570, Yakumo AlphaX or Airis NC05 PDAs. 96 Navman PiN570, Yakumo AlphaX or Airis NC05 PDAs.
@@ -103,6 +105,7 @@ config ARCH_BAST
103 select S3C24XX_DCLK 105 select S3C24XX_DCLK
104 select ISA 106 select ISA
105 select S3C_DEV_USB_HOST 107 select S3C_DEV_USB_HOST
108 select S3C_DEV_NAND
106 help 109 help
107 Say Y here if you are using the Simtec Electronics EB2410ITX 110 Say Y here if you are using the Simtec Electronics EB2410ITX
108 development board (also known as BAST) 111 development board (also known as BAST)
@@ -111,6 +114,7 @@ config MACH_OTOM
111 bool "NexVision OTOM Board" 114 bool "NexVision OTOM Board"
112 select CPU_S3C2410 115 select CPU_S3C2410
113 select S3C_DEV_USB_HOST 116 select S3C_DEV_USB_HOST
117 select S3C_DEV_NAND
114 help 118 help
115 Say Y here if you are using the Nex Vision OTOM board 119 Say Y here if you are using the Nex Vision OTOM board
116 120
@@ -154,6 +158,7 @@ config MACH_QT2410
154 bool "QT2410" 158 bool "QT2410"
155 select CPU_S3C2410 159 select CPU_S3C2410
156 select S3C_DEV_USB_HOST 160 select S3C_DEV_USB_HOST
161 select S3C_DEV_NAND
157 help 162 help
158 Say Y here if you are using the Armzone QT2410 163 Say Y here if you are using the Armzone QT2410
159 164
diff --git a/arch/arm/mach-s3c2412/Kconfig b/arch/arm/mach-s3c2412/Kconfig
index 35c1bde89cf2..c2bdc4635d12 100644
--- a/arch/arm/mach-s3c2412/Kconfig
+++ b/arch/arm/mach-s3c2412/Kconfig
@@ -48,6 +48,7 @@ config MACH_JIVE
48 bool "Logitech Jive" 48 bool "Logitech Jive"
49 select CPU_S3C2412 49 select CPU_S3C2412
50 select S3C_DEV_USB_HOST 50 select S3C_DEV_USB_HOST
51 select S3C_DEV_NAND
51 help 52 help
52 Say Y here if you are using the Logitech Jive. 53 Say Y here if you are using the Logitech Jive.
53 54
@@ -61,6 +62,7 @@ config MACH_SMDK2413
61 select MACH_S3C2413 62 select MACH_S3C2413
62 select MACH_SMDK 63 select MACH_SMDK
63 select S3C_DEV_USB_HOST 64 select S3C_DEV_USB_HOST
65 select S3C_DEV_NAND
64 help 66 help
65 Say Y here if you are using an SMDK2413 67 Say Y here if you are using an SMDK2413
66 68
@@ -84,6 +86,7 @@ config MACH_VSTMS
84 bool "VMSTMS" 86 bool "VMSTMS"
85 select CPU_S3C2412 87 select CPU_S3C2412
86 select S3C_DEV_USB_HOST 88 select S3C_DEV_USB_HOST
89 select S3C_DEV_NAND
87 help 90 help
88 Say Y here if you are using an VSTMS board 91 Say Y here if you are using an VSTMS board
89 92
diff --git a/arch/arm/mach-s3c2440/Kconfig b/arch/arm/mach-s3c2440/Kconfig
index 8ae1b288f7fa..d7bba919a77e 100644
--- a/arch/arm/mach-s3c2440/Kconfig
+++ b/arch/arm/mach-s3c2440/Kconfig
@@ -48,6 +48,7 @@ config MACH_OSIRIS
48 select S3C2440_XTAL_12000000 48 select S3C2440_XTAL_12000000
49 select S3C2410_IOTIMING if S3C2440_CPUFREQ 49 select S3C2410_IOTIMING if S3C2440_CPUFREQ
50 select S3C_DEV_USB_HOST 50 select S3C_DEV_USB_HOST
51 select S3C_DEV_NAND
51 help 52 help
52 Say Y here if you are using the Simtec IM2440D20 module, also 53 Say Y here if you are using the Simtec IM2440D20 module, also
53 known as the Osiris. 54 known as the Osiris.
@@ -57,6 +58,7 @@ config MACH_RX3715
57 select CPU_S3C2440 58 select CPU_S3C2440
58 select S3C2440_XTAL_16934400 59 select S3C2440_XTAL_16934400
59 select PM_H1940 if PM 60 select PM_H1940 if PM
61 select S3C_DEV_NAND
60 help 62 help
61 Say Y here if you are using the HP iPAQ rx3715. 63 Say Y here if you are using the HP iPAQ rx3715.
62 64
@@ -66,6 +68,7 @@ config ARCH_S3C2440
66 select S3C2440_XTAL_16934400 68 select S3C2440_XTAL_16934400
67 select MACH_SMDK 69 select MACH_SMDK
68 select S3C_DEV_USB_HOST 70 select S3C_DEV_USB_HOST
71 select S3C_DEV_NAND
69 help 72 help
70 Say Y here if you are using the SMDK2440. 73 Say Y here if you are using the SMDK2440.
71 74
@@ -74,6 +77,7 @@ config MACH_NEXCODER_2440
74 select CPU_S3C2440 77 select CPU_S3C2440
75 select S3C2440_XTAL_12000000 78 select S3C2440_XTAL_12000000
76 select S3C_DEV_USB_HOST 79 select S3C_DEV_USB_HOST
80 select S3C_DEV_NAND
77 help 81 help
78 Say Y here if you are using the Nex Vision NEXCODER 2440 Light Board 82 Say Y here if you are using the Nex Vision NEXCODER 2440 Light Board
79 83
@@ -88,6 +92,7 @@ config MACH_AT2440EVB
88 bool "Avantech AT2440EVB development board" 92 bool "Avantech AT2440EVB development board"
89 select CPU_S3C2440 93 select CPU_S3C2440
90 select S3C_DEV_USB_HOST 94 select S3C_DEV_USB_HOST
95 select S3C_DEV_NAND
91 help 96 help
92 Say Y here if you are using the AT2440EVB development board 97 Say Y here if you are using the AT2440EVB development board
93 98
@@ -97,6 +102,7 @@ config MACH_MINI2440
97 select EEPROM_AT24 102 select EEPROM_AT24
98 select LEDS_TRIGGER_BACKLIGHT 103 select LEDS_TRIGGER_BACKLIGHT
99 select SND_S3C24XX_SOC_S3C24XX_UDA134X 104 select SND_S3C24XX_SOC_S3C24XX_UDA134X
105 select S3C_DEV_NAND
100 help 106 help
101 Say Y here to select support for the MINI2440. Is a 10cm x 10cm board 107 Say Y here to select support for the MINI2440. Is a 10cm x 10cm board
102 available via various sources. It can come with a 3.5" or 7" touch LCD. 108 available via various sources. It can come with a 3.5" or 7" touch LCD.
diff --git a/arch/arm/mach-s3c6400/Kconfig b/arch/arm/mach-s3c6400/Kconfig
index f5af212066c3..770b72067e3d 100644
--- a/arch/arm/mach-s3c6400/Kconfig
+++ b/arch/arm/mach-s3c6400/Kconfig
@@ -26,6 +26,7 @@ config MACH_SMDK6400
26 bool "SMDK6400" 26 bool "SMDK6400"
27 select CPU_S3C6400 27 select CPU_S3C6400
28 select S3C_DEV_HSMMC 28 select S3C_DEV_HSMMC
29 select S3C_DEV_NAND
29 select S3C6400_SETUP_SDHCI 30 select S3C6400_SETUP_SDHCI
30 help 31 help
31 Machine support for the Samsung SMDK6400 32 Machine support for the Samsung SMDK6400
diff --git a/arch/arm/mach-s3c6410/Kconfig b/arch/arm/mach-s3c6410/Kconfig
index f9d0f09f9761..53fc3ff657f7 100644
--- a/arch/arm/mach-s3c6410/Kconfig
+++ b/arch/arm/mach-s3c6410/Kconfig
@@ -102,6 +102,7 @@ config MACH_HMT
102 bool "Airgoo HMT" 102 bool "Airgoo HMT"
103 select CPU_S3C6410 103 select CPU_S3C6410
104 select S3C_DEV_FB 104 select S3C_DEV_FB
105 select S3C_DEV_NAND
105 select S3C_DEV_USB_HOST 106 select S3C_DEV_USB_HOST
106 select S3C64XX_SETUP_FB_24BPP 107 select S3C64XX_SETUP_FB_24BPP
107 select HAVE_PWM 108 select HAVE_PWM
diff --git a/arch/arm/mach-sa1100/dma.c b/arch/arm/mach-sa1100/dma.c
index 95f9c5a6d6d5..cb4521a6f42d 100644
--- a/arch/arm/mach-sa1100/dma.c
+++ b/arch/arm/mach-sa1100/dma.c
@@ -39,7 +39,7 @@ typedef struct {
39 39
40static sa1100_dma_t dma_chan[SA1100_DMA_CHANNELS]; 40static sa1100_dma_t dma_chan[SA1100_DMA_CHANNELS];
41 41
42static spinlock_t dma_list_lock; 42static DEFINE_SPINLOCK(dma_list_lock);
43 43
44 44
45static irqreturn_t dma_irq_handler(int irq, void *dev_id) 45static irqreturn_t dma_irq_handler(int irq, void *dev_id)
diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig
index 337b9aabce49..801b21e7f677 100644
--- a/arch/arm/mach-u300/Kconfig
+++ b/arch/arm/mach-u300/Kconfig
@@ -81,6 +81,18 @@ config MACH_U300_SEMI_IS_SHARED
81 Memory Interface) from both from access and application 81 Memory Interface) from both from access and application
82 side. 82 side.
83 83
84config MACH_U300_SPIDUMMY
85 bool "SSP/SPI dummy chip"
86 select SPI
87 select SPI_MASTER
88 select SPI_PL022
89 help
90 This creates a small kernel module that creates a dummy
91 SPI device to be used for loopback tests. Regularly used
92 to test reference designs. If you're not testing SPI,
93 you don't need it. Selecting this will activate the
94 SPI framework and ARM PL022 support.
95
84comment "All the settings below must match the bootloader's settings" 96comment "All the settings below must match the bootloader's settings"
85 97
86config MACH_U300_ACCESS_MEM_SIZE 98config MACH_U300_ACCESS_MEM_SIZE
diff --git a/arch/arm/mach-u300/Makefile b/arch/arm/mach-u300/Makefile
index 24950e0df4b4..885b5c027c1e 100644
--- a/arch/arm/mach-u300/Makefile
+++ b/arch/arm/mach-u300/Makefile
@@ -9,3 +9,6 @@ obj- :=
9 9
10obj-$(CONFIG_ARCH_U300) += u300.o 10obj-$(CONFIG_ARCH_U300) += u300.o
11obj-$(CONFIG_MMC) += mmc.o 11obj-$(CONFIG_MMC) += mmc.o
12obj-$(CONFIG_SPI_PL022) += spi.o
13obj-$(CONFIG_MACH_U300_SPIDUMMY) += dummyspichip.o
14obj-$(CONFIG_I2C_STU300) += i2c.o
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 2e9b8ccd8ec2..be60d6deee8b 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -32,6 +32,8 @@
32 32
33#include "clock.h" 33#include "clock.h"
34#include "mmc.h" 34#include "mmc.h"
35#include "spi.h"
36#include "i2c.h"
35 37
36/* 38/*
37 * Static I/O mappings that are needed for booting the U300 platforms. The 39 * Static I/O mappings that are needed for booting the U300 platforms. The
@@ -378,14 +380,14 @@ static struct platform_device wdog_device = {
378}; 380};
379 381
380static struct platform_device i2c0_device = { 382static struct platform_device i2c0_device = {
381 .name = "stddci2c", 383 .name = "stu300",
382 .id = 0, 384 .id = 0,
383 .num_resources = ARRAY_SIZE(i2c0_resources), 385 .num_resources = ARRAY_SIZE(i2c0_resources),
384 .resource = i2c0_resources, 386 .resource = i2c0_resources,
385}; 387};
386 388
387static struct platform_device i2c1_device = { 389static struct platform_device i2c1_device = {
388 .name = "stddci2c", 390 .name = "stu300",
389 .id = 1, 391 .id = 1,
390 .num_resources = ARRAY_SIZE(i2c1_resources), 392 .num_resources = ARRAY_SIZE(i2c1_resources),
391 .resource = i2c1_resources, 393 .resource = i2c1_resources,
@@ -611,6 +613,8 @@ void __init u300_init_devices(void)
611 /* Wait for the PLL208 to lock if not locked in yet */ 613 /* Wait for the PLL208 to lock if not locked in yet */
612 while (!(readw(U300_SYSCON_VBASE + U300_SYSCON_CSR) & 614 while (!(readw(U300_SYSCON_VBASE + U300_SYSCON_CSR) &
613 U300_SYSCON_CSR_PLL208_LOCK_IND)); 615 U300_SYSCON_CSR_PLL208_LOCK_IND));
616 /* Initialize SPI device with some board specifics */
617 u300_spi_init(&pl022_device);
614 618
615 /* Register the AMBA devices in the AMBA bus abstraction layer */ 619 /* Register the AMBA devices in the AMBA bus abstraction layer */
616 u300_clock_primecells(); 620 u300_clock_primecells();
@@ -622,6 +626,12 @@ void __init u300_init_devices(void)
622 626
623 u300_assign_physmem(); 627 u300_assign_physmem();
624 628
629 /* Register subdevices on the I2C buses */
630 u300_i2c_register_board_devices();
631
632 /* Register subdevices on the SPI bus */
633 u300_spi_register_board_devices();
634
625 /* Register the platform devices */ 635 /* Register the platform devices */
626 platform_add_devices(platform_devs, ARRAY_SIZE(platform_devs)); 636 platform_add_devices(platform_devs, ARRAY_SIZE(platform_devs));
627 637
diff --git a/arch/arm/mach-u300/dummyspichip.c b/arch/arm/mach-u300/dummyspichip.c
new file mode 100644
index 000000000000..962f9de454de
--- /dev/null
+++ b/arch/arm/mach-u300/dummyspichip.c
@@ -0,0 +1,290 @@
1/*
2 * arch/arm/mach-u300/dummyspichip.c
3 *
4 * Copyright (C) 2007-2009 ST-Ericsson AB
5 * License terms: GNU General Public License (GPL) version 2
6 * This is a dummy loopback SPI "chip" used for testing SPI.
7 * Author: Linus Walleij <linus.walleij@stericsson.com>
8 */
9#include <linux/init.h>
10#include <linux/module.h>
11#include <linux/kernel.h>
12#include <linux/device.h>
13#include <linux/err.h>
14#include <linux/sysfs.h>
15#include <linux/mutex.h>
16#include <linux/spi/spi.h>
17#include <linux/dma-mapping.h>
18/*
19 * WARNING! Do not include this pl022-specific controller header
20 * for any generic driver. It is only done in this dummy chip
21 * because we alter the chip configuration in order to test some
22 * different settings on the loopback device. Normal chip configs
23 * shall be STATIC and not altered by the driver!
24 */
25#include <linux/amba/pl022.h>
26
27struct dummy {
28 struct device *dev;
29 struct mutex lock;
30};
31
32#define DMA_TEST_SIZE 2048
33
34/* When we cat /sys/bus/spi/devices/spi0.0/looptest this will be triggered */
35static ssize_t dummy_looptest(struct device *dev,
36 struct device_attribute *attr, char *buf)
37{
38 struct spi_device *spi = to_spi_device(dev);
39 struct dummy *p_dummy = dev_get_drvdata(&spi->dev);
40
41 /*
42 * WARNING! Do not dereference the chip-specific data in any normal
43 * driver for a chip. It is usually STATIC and shall not be read
44 * or written to. Your chip driver should NOT depend on fields in this
45 * struct, this is just used here to alter the behaviour of the chip
46 * in order to perform tests.
47 */
48 struct pl022_config_chip *chip_info = spi->controller_data;
49 int status;
50 u8 txbuf[14] = {0xDE, 0xAD, 0xBE, 0xEF, 0x2B, 0xAD,
51 0xCA, 0xFE, 0xBA, 0xBE, 0xB1, 0x05,
52 0xF0, 0x0D};
53 u8 rxbuf[14];
54 u8 *bigtxbuf_virtual;
55 u8 *bigrxbuf_virtual;
56
57 if (mutex_lock_interruptible(&p_dummy->lock))
58 return -ERESTARTSYS;
59
60 bigtxbuf_virtual = kmalloc(DMA_TEST_SIZE, GFP_KERNEL);
61 if (bigtxbuf_virtual == NULL) {
62 status = -ENOMEM;
63 goto out;
64 }
65 bigrxbuf_virtual = kmalloc(DMA_TEST_SIZE, GFP_KERNEL);
66
67 /* Fill TXBUF with some happy pattern */
68 memset(bigtxbuf_virtual, 0xAA, DMA_TEST_SIZE);
69
70 /*
71 * Force chip to 8 bit mode
72 * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC!
73 */
74 chip_info->data_size = SSP_DATA_BITS_8;
75 /* You should NOT DO THIS EITHER */
76 spi->master->setup(spi);
77
78 /* Now run the tests for 8bit mode */
79 pr_info("Simple test 1: write 0xAA byte, read back garbage byte "
80 "in 8bit mode\n");
81 status = spi_w8r8(spi, 0xAA);
82 if (status < 0)
83 pr_warning("Siple test 1: FAILURE: spi_write_then_read "
84 "failed with status %d\n", status);
85 else
86 pr_info("Simple test 1: SUCCESS!\n");
87
88 pr_info("Simple test 2: write 8 bytes, read back 8 bytes garbage "
89 "in 8bit mode (full FIFO)\n");
90 status = spi_write_then_read(spi, &txbuf[0], 8, &rxbuf[0], 8);
91 if (status < 0)
92 pr_warning("Simple test 2: FAILURE: spi_write_then_read() "
93 "failed with status %d\n", status);
94 else
95 pr_info("Simple test 2: SUCCESS!\n");
96
97 pr_info("Simple test 3: write 14 bytes, read back 14 bytes garbage "
98 "in 8bit mode (see if we overflow FIFO)\n");
99 status = spi_write_then_read(spi, &txbuf[0], 14, &rxbuf[0], 14);
100 if (status < 0)
101 pr_warning("Simple test 3: FAILURE: failed with status %d "
102 "(probably FIFO overrun)\n", status);
103 else
104 pr_info("Simple test 3: SUCCESS!\n");
105
106 pr_info("Simple test 4: write 8 bytes with spi_write(), read 8 "
107 "bytes garbage with spi_read() in 8bit mode\n");
108 status = spi_write(spi, &txbuf[0], 8);
109 if (status < 0)
110 pr_warning("Simple test 4 step 1: FAILURE: spi_write() "
111 "failed with status %d\n", status);
112 else
113 pr_info("Simple test 4 step 1: SUCCESS!\n");
114 status = spi_read(spi, &rxbuf[0], 8);
115 if (status < 0)
116 pr_warning("Simple test 4 step 2: FAILURE: spi_read() "
117 "failed with status %d\n", status);
118 else
119 pr_info("Simple test 4 step 2: SUCCESS!\n");
120
121 pr_info("Simple test 5: write 14 bytes with spi_write(), read "
122 "14 bytes garbage with spi_read() in 8bit mode\n");
123 status = spi_write(spi, &txbuf[0], 14);
124 if (status < 0)
125 pr_warning("Simple test 5 step 1: FAILURE: spi_write() "
126 "failed with status %d (probably FIFO overrun)\n",
127 status);
128 else
129 pr_info("Simple test 5 step 1: SUCCESS!\n");
130 status = spi_read(spi, &rxbuf[0], 14);
131 if (status < 0)
132 pr_warning("Simple test 5 step 2: FAILURE: spi_read() "
133 "failed with status %d (probably FIFO overrun)\n",
134 status);
135 else
136 pr_info("Simple test 5: SUCCESS!\n");
137
138 pr_info("Simple test 6: write %d bytes with spi_write(), "
139 "read %d bytes garbage with spi_read() in 8bit mode\n",
140 DMA_TEST_SIZE, DMA_TEST_SIZE);
141 status = spi_write(spi, &bigtxbuf_virtual[0], DMA_TEST_SIZE);
142 if (status < 0)
143 pr_warning("Simple test 6 step 1: FAILURE: spi_write() "
144 "failed with status %d (probably FIFO overrun)\n",
145 status);
146 else
147 pr_info("Simple test 6 step 1: SUCCESS!\n");
148 status = spi_read(spi, &bigrxbuf_virtual[0], DMA_TEST_SIZE);
149 if (status < 0)
150 pr_warning("Simple test 6 step 2: FAILURE: spi_read() "
151 "failed with status %d (probably FIFO overrun)\n",
152 status);
153 else
154 pr_info("Simple test 6: SUCCESS!\n");
155
156
157 /*
158 * Force chip to 16 bit mode
159 * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC!
160 */
161 chip_info->data_size = SSP_DATA_BITS_16;
162 /* You should NOT DO THIS EITHER */
163 spi->master->setup(spi);
164
165 pr_info("Simple test 7: write 0xAA byte, read back garbage byte "
166 "in 16bit bus mode\n");
167 status = spi_w8r8(spi, 0xAA);
168 if (status == -EIO)
169 pr_info("Simple test 7: SUCCESS! (expected failure with "
170 "status EIO)\n");
171 else if (status < 0)
172 pr_warning("Siple test 7: FAILURE: spi_write_then_read "
173 "failed with status %d\n", status);
174 else
175 pr_warning("Siple test 7: FAILURE: spi_write_then_read "
176 "succeeded but it was expected to fail!\n");
177
178 pr_info("Simple test 8: write 8 bytes, read back 8 bytes garbage "
179 "in 16bit mode (full FIFO)\n");
180 status = spi_write_then_read(spi, &txbuf[0], 8, &rxbuf[0], 8);
181 if (status < 0)
182 pr_warning("Simple test 8: FAILURE: spi_write_then_read() "
183 "failed with status %d\n", status);
184 else
185 pr_info("Simple test 8: SUCCESS!\n");
186
187 pr_info("Simple test 9: write 14 bytes, read back 14 bytes garbage "
188 "in 16bit mode (see if we overflow FIFO)\n");
189 status = spi_write_then_read(spi, &txbuf[0], 14, &rxbuf[0], 14);
190 if (status < 0)
191 pr_warning("Simple test 9: FAILURE: failed with status %d "
192 "(probably FIFO overrun)\n", status);
193 else
194 pr_info("Simple test 9: SUCCESS!\n");
195
196 pr_info("Simple test 10: write %d bytes with spi_write(), "
197 "read %d bytes garbage with spi_read() in 16bit mode\n",
198 DMA_TEST_SIZE, DMA_TEST_SIZE);
199 status = spi_write(spi, &bigtxbuf_virtual[0], DMA_TEST_SIZE);
200 if (status < 0)
201 pr_warning("Simple test 10 step 1: FAILURE: spi_write() "
202 "failed with status %d (probably FIFO overrun)\n",
203 status);
204 else
205 pr_info("Simple test 10 step 1: SUCCESS!\n");
206
207 status = spi_read(spi, &bigrxbuf_virtual[0], DMA_TEST_SIZE);
208 if (status < 0)
209 pr_warning("Simple test 10 step 2: FAILURE: spi_read() "
210 "failed with status %d (probably FIFO overrun)\n",
211 status);
212 else
213 pr_info("Simple test 10: SUCCESS!\n");
214
215 status = sprintf(buf, "loop test complete\n");
216 kfree(bigrxbuf_virtual);
217 kfree(bigtxbuf_virtual);
218 out:
219 mutex_unlock(&p_dummy->lock);
220 return status;
221}
222
223static DEVICE_ATTR(looptest, S_IRUGO, dummy_looptest, NULL);
224
225static int __devinit pl022_dummy_probe(struct spi_device *spi)
226{
227 struct dummy *p_dummy;
228 int status;
229
230 dev_info(&spi->dev, "probing dummy SPI device\n");
231
232 p_dummy = kzalloc(sizeof *p_dummy, GFP_KERNEL);
233 if (!p_dummy)
234 return -ENOMEM;
235
236 dev_set_drvdata(&spi->dev, p_dummy);
237 mutex_init(&p_dummy->lock);
238
239 /* sysfs hook */
240 status = device_create_file(&spi->dev, &dev_attr_looptest);
241 if (status) {
242 dev_dbg(&spi->dev, "device_create_file looptest failure.\n");
243 goto out_dev_create_looptest_failed;
244 }
245
246 return 0;
247
248out_dev_create_looptest_failed:
249 dev_set_drvdata(&spi->dev, NULL);
250 kfree(p_dummy);
251 return status;
252}
253
254static int __devexit pl022_dummy_remove(struct spi_device *spi)
255{
256 struct dummy *p_dummy = dev_get_drvdata(&spi->dev);
257
258 dev_info(&spi->dev, "removing dummy SPI device\n");
259 device_remove_file(&spi->dev, &dev_attr_looptest);
260 dev_set_drvdata(&spi->dev, NULL);
261 kfree(p_dummy);
262
263 return 0;
264}
265
266static struct spi_driver pl022_dummy_driver = {
267 .driver = {
268 .name = "spi-dummy",
269 .owner = THIS_MODULE,
270 },
271 .probe = pl022_dummy_probe,
272 .remove = __devexit_p(pl022_dummy_remove),
273};
274
275static int __init pl022_init_dummy(void)
276{
277 return spi_register_driver(&pl022_dummy_driver);
278}
279
280static void __exit pl022_exit_dummy(void)
281{
282 spi_unregister_driver(&pl022_dummy_driver);
283}
284
285module_init(pl022_init_dummy);
286module_exit(pl022_exit_dummy);
287
288MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
289MODULE_DESCRIPTION("PL022 SSP/SPI DUMMY Linux driver");
290MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-u300/gpio.c b/arch/arm/mach-u300/gpio.c
index 308cdb197a92..63c8f27fb15a 100644
--- a/arch/arm/mach-u300/gpio.c
+++ b/arch/arm/mach-u300/gpio.c
@@ -25,11 +25,6 @@
25#include <linux/platform_device.h> 25#include <linux/platform_device.h>
26#include <linux/gpio.h> 26#include <linux/gpio.h>
27 27
28/* Need access to SYSCON registers for PADmuxing */
29#include <mach/syscon.h>
30
31#include "padmux.h"
32
33/* Reference to GPIO block clock */ 28/* Reference to GPIO block clock */
34static struct clk *clk; 29static struct clk *clk;
35 30
@@ -606,14 +601,6 @@ static int __init gpio_probe(struct platform_device *pdev)
606 writel(U300_GPIO_CR_BLOCK_CLKRQ_ENABLE, virtbase + U300_GPIO_CR); 601 writel(U300_GPIO_CR_BLOCK_CLKRQ_ENABLE, virtbase + U300_GPIO_CR);
607#endif 602#endif
608 603
609 /* Set up some padmuxing here */
610#ifdef CONFIG_MMC
611 pmx_set_mission_mode_mmc();
612#endif
613#ifdef CONFIG_SPI_PL022
614 pmx_set_mission_mode_spi();
615#endif
616
617 gpio_set_initial_values(); 604 gpio_set_initial_values();
618 605
619 for (num_irqs = 0 ; num_irqs < U300_GPIO_NUM_PORTS; num_irqs++) { 606 for (num_irqs = 0 ; num_irqs < U300_GPIO_NUM_PORTS; num_irqs++) {
diff --git a/arch/arm/mach-u300/i2c.c b/arch/arm/mach-u300/i2c.c
new file mode 100644
index 000000000000..10be1f888b27
--- /dev/null
+++ b/arch/arm/mach-u300/i2c.c
@@ -0,0 +1,43 @@
1/*
2 * arch/arm/mach-u300/i2c.c
3 *
4 * Copyright (C) 2009 ST-Ericsson AB
5 * License terms: GNU General Public License (GPL) version 2
6 *
7 * Register board i2c devices
8 * Author: Linus Walleij <linus.walleij@stericsson.com>
9 */
10#include <linux/kernel.h>
11#include <linux/i2c.h>
12#include <mach/irqs.h>
13
14static struct i2c_board_info __initdata bus0_i2c_board_info[] = {
15 {
16 .type = "ab3100",
17 .addr = 0x48,
18 .irq = IRQ_U300_IRQ0_EXT,
19 },
20};
21
22static struct i2c_board_info __initdata bus1_i2c_board_info[] = {
23#ifdef CONFIG_MACH_U300_BS335
24 {
25 .type = "fwcam",
26 .addr = 0x10,
27 },
28 {
29 .type = "fwcam",
30 .addr = 0x5d,
31 },
32#else
33 { },
34#endif
35};
36
37void __init u300_i2c_register_board_devices(void)
38{
39 i2c_register_board_info(0, bus0_i2c_board_info,
40 ARRAY_SIZE(bus0_i2c_board_info));
41 i2c_register_board_info(1, bus1_i2c_board_info,
42 ARRAY_SIZE(bus1_i2c_board_info));
43}
diff --git a/arch/arm/mach-u300/i2c.h b/arch/arm/mach-u300/i2c.h
new file mode 100644
index 000000000000..485c02e5c06d
--- /dev/null
+++ b/arch/arm/mach-u300/i2c.h
@@ -0,0 +1,23 @@
1/*
2 * arch/arm/mach-u300/i2c.h
3 *
4 * Copyright (C) 2009 ST-Ericsson AB
5 * License terms: GNU General Public License (GPL) version 2
6 *
7 * Register board i2c devices
8 * Author: Linus Walleij <linus.walleij@stericsson.com>
9 */
10
11#ifndef MACH_U300_I2C_H
12#define MACH_U300_I2C_H
13
14#ifdef CONFIG_I2C_STU300
15void __init u300_i2c_register_board_devices(void);
16#else
17/* Compile out this stuff if no I2C adapter is available */
18static inline void __init u300_i2c_register_board_devices(void)
19{
20}
21#endif
22
23#endif
diff --git a/arch/arm/mach-u300/include/mach/memory.h b/arch/arm/mach-u300/include/mach/memory.h
index bf134bcc129d..ab000df7fc03 100644
--- a/arch/arm/mach-u300/include/mach/memory.h
+++ b/arch/arm/mach-u300/include/mach/memory.h
@@ -35,6 +35,14 @@
35#endif 35#endif
36 36
37/* 37/*
38 * TCM memory whereabouts
39 */
40#define ITCM_OFFSET 0xffff2000
41#define ITCM_END 0xffff3fff
42#define DTCM_OFFSET 0xffff4000
43#define DTCM_END 0xffff5fff
44
45/*
38 * We enable a real big DMA buffer if need be. 46 * We enable a real big DMA buffer if need be.
39 */ 47 */
40#define CONSISTENT_DMA_SIZE SZ_4M 48#define CONSISTENT_DMA_SIZE SZ_4M
diff --git a/arch/arm/mach-u300/include/mach/syscon.h b/arch/arm/mach-u300/include/mach/syscon.h
index 1c90d1b1ccb6..7444f5c7da97 100644
--- a/arch/arm/mach-u300/include/mach/syscon.h
+++ b/arch/arm/mach-u300/include/mach/syscon.h
@@ -240,8 +240,13 @@
240#define U300_SYSCON_PMC1LR_CDI_MASK (0xC000) 240#define U300_SYSCON_PMC1LR_CDI_MASK (0xC000)
241#define U300_SYSCON_PMC1LR_CDI_CDI (0x0000) 241#define U300_SYSCON_PMC1LR_CDI_CDI (0x0000)
242#define U300_SYSCON_PMC1LR_CDI_EMIF (0x4000) 242#define U300_SYSCON_PMC1LR_CDI_EMIF (0x4000)
243#ifdef CONFIG_MACH_U300_BS335
244#define U300_SYSCON_PMC1LR_CDI_CDI2 (0x8000)
245#define U300_SYSCON_PMC1LR_CDI_WCDMA_APP_GPIO (0xC000)
246#elif CONFIG_MACH_U300_BS365
243#define U300_SYSCON_PMC1LR_CDI_GPIO (0x8000) 247#define U300_SYSCON_PMC1LR_CDI_GPIO (0x8000)
244#define U300_SYSCON_PMC1LR_CDI_WCDMA (0xC000) 248#define U300_SYSCON_PMC1LR_CDI_WCDMA (0xC000)
249#endif
245#define U300_SYSCON_PMC1LR_PDI_MASK (0x3000) 250#define U300_SYSCON_PMC1LR_PDI_MASK (0x3000)
246#define U300_SYSCON_PMC1LR_PDI_PDI (0x0000) 251#define U300_SYSCON_PMC1LR_PDI_PDI (0x0000)
247#define U300_SYSCON_PMC1LR_PDI_EGG (0x1000) 252#define U300_SYSCON_PMC1LR_PDI_EGG (0x1000)
@@ -345,19 +350,69 @@
345#define U300_SYSCON_MMCR_MASK (0x0003) 350#define U300_SYSCON_MMCR_MASK (0x0003)
346#define U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE (0x0002) 351#define U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE (0x0002)
347#define U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE (0x0001) 352#define U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE (0x0001)
348 353/* Pull up/down control (R/W) */
354#define U300_SYSCON_PUCR (0x104)
355#define U300_SYSCON_PUCR_EMIF_1_WAIT_N_PU_ENABLE (0x0200)
356#define U300_SYSCON_PUCR_EMIF_1_NFIF_READY_PU_ENABLE (0x0100)
357#define U300_SYSCON_PUCR_EMIF_1_16BIT_PU_ENABLE (0x0080)
358#define U300_SYSCON_PUCR_EMIF_1_8BIT_PU_ENABLE (0x0040)
359#define U300_SYSCON_PUCR_KEY_IN_PU_EN_MASK (0x003F)
360/* Padmux 2 control */
361#define U300_SYSCON_PMC2R (0x100)
362#define U300_SYSCON_PMC2R_APP_MISC_0_MASK (0x00C0)
363#define U300_SYSCON_PMC2R_APP_MISC_0_APP_GPIO (0x0000)
364#define U300_SYSCON_PMC2R_APP_MISC_0_EMIF_SDRAM (0x0040)
365#define U300_SYSCON_PMC2R_APP_MISC_0_MMC (0x0080)
366#define U300_SYSCON_PMC2R_APP_MISC_0_CDI2 (0x00C0)
367#define U300_SYSCON_PMC2R_APP_MISC_1_MASK (0x0300)
368#define U300_SYSCON_PMC2R_APP_MISC_1_APP_GPIO (0x0000)
369#define U300_SYSCON_PMC2R_APP_MISC_1_EMIF_SDRAM (0x0100)
370#define U300_SYSCON_PMC2R_APP_MISC_1_MMC (0x0200)
371#define U300_SYSCON_PMC2R_APP_MISC_1_CDI2 (0x0300)
372#define U300_SYSCON_PMC2R_APP_MISC_2_MASK (0x0C00)
373#define U300_SYSCON_PMC2R_APP_MISC_2_APP_GPIO (0x0000)
374#define U300_SYSCON_PMC2R_APP_MISC_2_EMIF_SDRAM (0x0400)
375#define U300_SYSCON_PMC2R_APP_MISC_2_MMC (0x0800)
376#define U300_SYSCON_PMC2R_APP_MISC_2_CDI2 (0x0C00)
377#define U300_SYSCON_PMC2R_APP_MISC_3_MASK (0x3000)
378#define U300_SYSCON_PMC2R_APP_MISC_3_APP_GPIO (0x0000)
379#define U300_SYSCON_PMC2R_APP_MISC_3_EMIF_SDRAM (0x1000)
380#define U300_SYSCON_PMC2R_APP_MISC_3_MMC (0x2000)
381#define U300_SYSCON_PMC2R_APP_MISC_3_CDI2 (0x3000)
382#define U300_SYSCON_PMC2R_APP_MISC_4_MASK (0xC000)
383#define U300_SYSCON_PMC2R_APP_MISC_4_APP_GPIO (0x0000)
384#define U300_SYSCON_PMC2R_APP_MISC_4_EMIF_SDRAM (0x4000)
385#define U300_SYSCON_PMC2R_APP_MISC_4_MMC (0x8000)
386#define U300_SYSCON_PMC2R_APP_MISC_4_ACC_GPIO (0xC000)
349/* TODO: More SYSCON registers missing */ 387/* TODO: More SYSCON registers missing */
350#define U300_SYSCON_PMC3R (0x10c) 388#define U300_SYSCON_PMC3R (0x10c)
351#define U300_SYSCON_PMC3R_APP_MISC_11_MASK (0xc000) 389#define U300_SYSCON_PMC3R_APP_MISC_11_MASK (0xc000)
352#define U300_SYSCON_PMC3R_APP_MISC_11_SPI (0x4000) 390#define U300_SYSCON_PMC3R_APP_MISC_11_SPI (0x4000)
353#define U300_SYSCON_PMC3R_APP_MISC_10_MASK (0x3000) 391#define U300_SYSCON_PMC3R_APP_MISC_10_MASK (0x3000)
354#define U300_SYSCON_PMC3R_APP_MISC_10_SPI (0x1000) 392#define U300_SYSCON_PMC3R_APP_MISC_10_SPI (0x1000)
355/* TODO: Missing other configs, I just added the SPI stuff */ 393/* TODO: Missing other configs */
356 394#define U300_SYSCON_PMC4R (0x168)
395#define U300_SYSCON_PMC4R_APP_MISC_12_MASK (0x0003)
396#define U300_SYSCON_PMC4R_APP_MISC_12_APP_GPIO (0x0000)
397#define U300_SYSCON_PMC4R_APP_MISC_13_MASK (0x000C)
398#define U300_SYSCON_PMC4R_APP_MISC_13_CDI (0x0000)
399#define U300_SYSCON_PMC4R_APP_MISC_13_SMIA (0x0004)
400#define U300_SYSCON_PMC4R_APP_MISC_13_SMIA2 (0x0008)
401#define U300_SYSCON_PMC4R_APP_MISC_13_APP_GPIO (0x000C)
402#define U300_SYSCON_PMC4R_APP_MISC_14_MASK (0x0030)
403#define U300_SYSCON_PMC4R_APP_MISC_14_CDI (0x0000)
404#define U300_SYSCON_PMC4R_APP_MISC_14_SMIA (0x0010)
405#define U300_SYSCON_PMC4R_APP_MISC_14_CDI2 (0x0020)
406#define U300_SYSCON_PMC4R_APP_MISC_14_APP_GPIO (0x0030)
407#define U300_SYSCON_PMC4R_APP_MISC_16_MASK (0x0300)
408#define U300_SYSCON_PMC4R_APP_MISC_16_APP_GPIO_13 (0x0000)
409#define U300_SYSCON_PMC4R_APP_MISC_16_APP_UART1_CTS (0x0100)
410#define U300_SYSCON_PMC4R_APP_MISC_16_EMIF_1_STATIC_CS5_N (0x0200)
357/* SYS_0_CLK_CONTROL first clock control 16bit (R/W) */ 411/* SYS_0_CLK_CONTROL first clock control 16bit (R/W) */
358#define U300_SYSCON_S0CCR (0x120) 412#define U300_SYSCON_S0CCR (0x120)
359#define U300_SYSCON_S0CCR_FIELD_MASK (0x43FF) 413#define U300_SYSCON_S0CCR_FIELD_MASK (0x43FF)
360#define U300_SYSCON_S0CCR_CLOCK_REQ (0x4000) 414#define U300_SYSCON_S0CCR_CLOCK_REQ (0x4000)
415#define U300_SYSCON_S0CCR_CLOCK_REQ_MONITOR (0x2000)
361#define U300_SYSCON_S0CCR_CLOCK_INV (0x0200) 416#define U300_SYSCON_S0CCR_CLOCK_INV (0x0200)
362#define U300_SYSCON_S0CCR_CLOCK_FREQ_MASK (0x01E0) 417#define U300_SYSCON_S0CCR_CLOCK_FREQ_MASK (0x01E0)
363#define U300_SYSCON_S0CCR_CLOCK_SELECT_MASK (0x001E) 418#define U300_SYSCON_S0CCR_CLOCK_SELECT_MASK (0x001E)
@@ -375,6 +430,7 @@
375#define U300_SYSCON_S1CCR (0x124) 430#define U300_SYSCON_S1CCR (0x124)
376#define U300_SYSCON_S1CCR_FIELD_MASK (0x43FF) 431#define U300_SYSCON_S1CCR_FIELD_MASK (0x43FF)
377#define U300_SYSCON_S1CCR_CLOCK_REQ (0x4000) 432#define U300_SYSCON_S1CCR_CLOCK_REQ (0x4000)
433#define U300_SYSCON_S1CCR_CLOCK_REQ_MONITOR (0x2000)
378#define U300_SYSCON_S1CCR_CLOCK_INV (0x0200) 434#define U300_SYSCON_S1CCR_CLOCK_INV (0x0200)
379#define U300_SYSCON_S1CCR_CLOCK_FREQ_MASK (0x01E0) 435#define U300_SYSCON_S1CCR_CLOCK_FREQ_MASK (0x01E0)
380#define U300_SYSCON_S1CCR_CLOCK_SELECT_MASK (0x001E) 436#define U300_SYSCON_S1CCR_CLOCK_SELECT_MASK (0x001E)
@@ -393,6 +449,7 @@
393#define U300_SYSCON_S2CCR_FIELD_MASK (0xC3FF) 449#define U300_SYSCON_S2CCR_FIELD_MASK (0xC3FF)
394#define U300_SYSCON_S2CCR_CLK_STEAL (0x8000) 450#define U300_SYSCON_S2CCR_CLK_STEAL (0x8000)
395#define U300_SYSCON_S2CCR_CLOCK_REQ (0x4000) 451#define U300_SYSCON_S2CCR_CLOCK_REQ (0x4000)
452#define U300_SYSCON_S2CCR_CLOCK_REQ_MONITOR (0x2000)
396#define U300_SYSCON_S2CCR_CLOCK_INV (0x0200) 453#define U300_SYSCON_S2CCR_CLOCK_INV (0x0200)
397#define U300_SYSCON_S2CCR_CLOCK_FREQ_MASK (0x01E0) 454#define U300_SYSCON_S2CCR_CLOCK_FREQ_MASK (0x01E0)
398#define U300_SYSCON_S2CCR_CLOCK_SELECT_MASK (0x001E) 455#define U300_SYSCON_S2CCR_CLOCK_SELECT_MASK (0x001E)
@@ -425,6 +482,44 @@
425#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_0_SDRAM (0x000C) 482#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_0_SDRAM (0x000C)
426#define U300_SYSCON_MCR_PM1G_MODE_ENABLE (0x0002) 483#define U300_SYSCON_MCR_PM1G_MODE_ENABLE (0x0002)
427#define U300_SYSCON_MCR_PMTG5_MODE_ENABLE (0x0001) 484#define U300_SYSCON_MCR_PMTG5_MODE_ENABLE (0x0001)
485/* SC_PLL_IRQ_CONTROL 16bit (R/W) */
486#define U300_SYSCON_PICR (0x0130)
487#define U300_SYSCON_PICR_MASK (0x00FF)
488#define U300_SYSCON_PICR_FORCE_PLL208_LOCK_LOW_ENABLE (0x0080)
489#define U300_SYSCON_PICR_FORCE_PLL208_LOCK_HIGH_ENABLE (0x0040)
490#define U300_SYSCON_PICR_FORCE_PLL13_LOCK_LOW_ENABLE (0x0020)
491#define U300_SYSCON_PICR_FORCE_PLL13_LOCK_HIGH_ENABLE (0x0010)
492#define U300_SYSCON_PICR_IRQMASK_PLL13_UNLOCK_ENABLE (0x0008)
493#define U300_SYSCON_PICR_IRQMASK_PLL13_LOCK_ENABLE (0x0004)
494#define U300_SYSCON_PICR_IRQMASK_PLL208_UNLOCK_ENABLE (0x0002)
495#define U300_SYSCON_PICR_IRQMASK_PLL208_LOCK_ENABLE (0x0001)
496/* SC_PLL_IRQ_STATUS 16 bit (R/-) */
497#define U300_SYSCON_PISR (0x0134)
498#define U300_SYSCON_PISR_MASK (0x000F)
499#define U300_SYSCON_PISR_PLL13_UNLOCK_IND (0x0008)
500#define U300_SYSCON_PISR_PLL13_LOCK_IND (0x0004)
501#define U300_SYSCON_PISR_PLL208_UNLOCK_IND (0x0002)
502#define U300_SYSCON_PISR_PLL208_LOCK_IND (0x0001)
503/* SC_PLL_IRQ_CLEAR 16 bit (-/W) */
504#define U300_SYSCON_PICLR (0x0138)
505#define U300_SYSCON_PICLR_MASK (0x000F)
506#define U300_SYSCON_PICLR_RWMASK (0x0000)
507#define U300_SYSCON_PICLR_PLL13_UNLOCK_SC (0x0008)
508#define U300_SYSCON_PICLR_PLL13_LOCK_SC (0x0004)
509#define U300_SYSCON_PICLR_PLL208_UNLOCK_SC (0x0002)
510#define U300_SYSCON_PICLR_PLL208_LOCK_SC (0x0001)
511/* CAMIF_CONTROL 16 bit (-/W) */
512#define U300_SYSCON_CICR (0x013C)
513#define U300_SYSCON_CICR_MASK (0x0FFF)
514#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_MASK (0x0F00)
515#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_PORT1 (0x0C00)
516#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_PORT0 (0x0300)
517#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_MASK (0x00F0)
518#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_PORT1 (0x00C0)
519#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_PORT0 (0x0030)
520#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_MASK (0x000F)
521#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_PORT1 (0x000C)
522#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_PORT0 (0x0003)
428/* Clock activity observability register 0 */ 523/* Clock activity observability register 0 */
429#define U300_SYSCON_C0OAR (0x140) 524#define U300_SYSCON_C0OAR (0x140)
430#define U300_SYSCON_C0OAR_MASK (0xFFFF) 525#define U300_SYSCON_C0OAR_MASK (0xFFFF)
@@ -513,7 +608,7 @@
513/** 608/**
514 * CPU medium frequency in MHz 609 * CPU medium frequency in MHz
515 */ 610 */
516#define SYSCON_CPU_CLOCK_MEDIUM 104 611#define SYSCON_CPU_CLOCK_MEDIUM 52
517/** 612/**
518 * CPU low frequency in MHz 613 * CPU low frequency in MHz
519 */ 614 */
@@ -527,7 +622,7 @@
527/** 622/**
528 * EMIF medium frequency in MHz 623 * EMIF medium frequency in MHz
529 */ 624 */
530#define SYSCON_EMIF_CLOCK_MEDIUM 104 625#define SYSCON_EMIF_CLOCK_MEDIUM 52
531/** 626/**
532 * EMIF low frequency in MHz 627 * EMIF low frequency in MHz
533 */ 628 */
@@ -541,7 +636,7 @@
541/** 636/**
542 * AHB medium frequency in MHz 637 * AHB medium frequency in MHz
543 */ 638 */
544#define SYSCON_AHB_CLOCK_MEDIUM 52 639#define SYSCON_AHB_CLOCK_MEDIUM 26
545/** 640/**
546 * AHB low frequency in MHz 641 * AHB low frequency in MHz
547 */ 642 */
@@ -553,6 +648,15 @@ enum syscon_busmaster {
553 SYSCON_BM_VIDEO_ENC 648 SYSCON_BM_VIDEO_ENC
554}; 649};
555 650
651/* Selectr a resistor or a set of resistors */
652enum syscon_pull_up_down {
653 SYSCON_PU_KEY_IN_EN,
654 SYSCON_PU_EMIF_1_8_BIT_EN,
655 SYSCON_PU_EMIF_1_16_BIT_EN,
656 SYSCON_PU_EMIF_1_NFIF_READY_EN,
657 SYSCON_PU_EMIF_1_NFIF_WAIT_N_EN,
658};
659
556/* 660/*
557 * Note that this array must match the order of the array "clk_reg" 661 * Note that this array must match the order of the array "clk_reg"
558 * in syscon.c 662 * in syscon.c
@@ -575,6 +679,7 @@ enum syscon_clk {
575 SYSCON_CLKCONTROL_SPI, 679 SYSCON_CLKCONTROL_SPI,
576 SYSCON_CLKCONTROL_I2S0_CORE, 680 SYSCON_CLKCONTROL_I2S0_CORE,
577 SYSCON_CLKCONTROL_I2S1_CORE, 681 SYSCON_CLKCONTROL_I2S1_CORE,
682 SYSCON_CLKCONTROL_UART1,
578 SYSCON_CLKCONTROL_AAIF, 683 SYSCON_CLKCONTROL_AAIF,
579 SYSCON_CLKCONTROL_AHB, 684 SYSCON_CLKCONTROL_AHB,
580 SYSCON_CLKCONTROL_APEX, 685 SYSCON_CLKCONTROL_APEX,
@@ -604,7 +709,8 @@ enum syscon_sysclk_mode {
604 709
605enum syscon_sysclk_req { 710enum syscon_sysclk_req {
606 SYSCON_SYSCLKREQ_DISABLED, 711 SYSCON_SYSCLKREQ_DISABLED,
607 SYSCON_SYSCLKREQ_ACTIVE_LOW 712 SYSCON_SYSCLKREQ_ACTIVE_LOW,
713 SYSCON_SYSCLKREQ_MONITOR
608}; 714};
609 715
610enum syscon_clk_mode { 716enum syscon_clk_mode {
diff --git a/arch/arm/mach-u300/mmc.c b/arch/arm/mach-u300/mmc.c
index 585cc013639d..7b6b016786bb 100644
--- a/arch/arm/mach-u300/mmc.c
+++ b/arch/arm/mach-u300/mmc.c
@@ -19,15 +19,16 @@
19#include <linux/regulator/consumer.h> 19#include <linux/regulator/consumer.h>
20#include <linux/regulator/machine.h> 20#include <linux/regulator/machine.h>
21#include <linux/gpio.h> 21#include <linux/gpio.h>
22#include <linux/amba/mmci.h>
22 23
23#include <asm/mach/mmc.h>
24#include "mmc.h" 24#include "mmc.h"
25#include "padmux.h"
25 26
26struct mmci_card_event { 27struct mmci_card_event {
27 struct input_dev *mmc_input; 28 struct input_dev *mmc_input;
28 int mmc_inserted; 29 int mmc_inserted;
29 struct work_struct workq; 30 struct work_struct workq;
30 struct mmc_platform_data mmc0_plat_data; 31 struct mmci_platform_data mmc0_plat_data;
31}; 32};
32 33
33static unsigned int mmc_status(struct device *dev) 34static unsigned int mmc_status(struct device *dev)
@@ -146,6 +147,7 @@ int __devinit mmc_init(struct amba_device *adev)
146{ 147{
147 struct mmci_card_event *mmci_card; 148 struct mmci_card_event *mmci_card;
148 struct device *mmcsd_device = &adev->dev; 149 struct device *mmcsd_device = &adev->dev;
150 struct pmx *pmx;
149 int ret = 0; 151 int ret = 0;
150 152
151 mmci_card = kzalloc(sizeof(struct mmci_card_event), GFP_KERNEL); 153 mmci_card = kzalloc(sizeof(struct mmci_card_event), GFP_KERNEL);
@@ -158,6 +160,8 @@ int __devinit mmc_init(struct amba_device *adev)
158 mmci_card->mmc0_plat_data.status = mmc_status; 160 mmci_card->mmc0_plat_data.status = mmc_status;
159 mmci_card->mmc0_plat_data.gpio_wp = -1; 161 mmci_card->mmc0_plat_data.gpio_wp = -1;
160 mmci_card->mmc0_plat_data.gpio_cd = -1; 162 mmci_card->mmc0_plat_data.gpio_cd = -1;
163 mmci_card->mmc0_plat_data.capabilities = MMC_CAP_MMC_HIGHSPEED |
164 MMC_CAP_SD_HIGHSPEED | MMC_CAP_4_BIT_DATA;
161 165
162 mmcsd_device->platform_data = (void *) &mmci_card->mmc0_plat_data; 166 mmcsd_device->platform_data = (void *) &mmci_card->mmc0_plat_data;
163 167
@@ -207,6 +211,20 @@ int __devinit mmc_init(struct amba_device *adev)
207 211
208 input_set_drvdata(mmci_card->mmc_input, mmci_card); 212 input_set_drvdata(mmci_card->mmc_input, mmci_card);
209 213
214 /*
215 * Setup padmuxing for MMC. Since this must always be
216 * compiled into the kernel, pmx is never released.
217 */
218 pmx = pmx_get(mmcsd_device, U300_APP_PMX_MMC_SETTING);
219
220 if (IS_ERR(pmx))
221 pr_warning("Could not get padmux handle\n");
222 else {
223 ret = pmx_activate(mmcsd_device, pmx);
224 if (IS_ERR_VALUE(ret))
225 pr_warning("Could not activate padmuxing\n");
226 }
227
210 ret = gpio_register_callback(U300_GPIO_PIN_MMC_CD, mmci_callback, 228 ret = gpio_register_callback(U300_GPIO_PIN_MMC_CD, mmci_callback,
211 mmci_card); 229 mmci_card);
212 230
diff --git a/arch/arm/mach-u300/padmux.c b/arch/arm/mach-u300/padmux.c
index f3664564f086..4c93c6cefd37 100644
--- a/arch/arm/mach-u300/padmux.c
+++ b/arch/arm/mach-u300/padmux.c
@@ -6,53 +6,362 @@
6 * Copyright (C) 2009 ST-Ericsson AB 6 * Copyright (C) 2009 ST-Ericsson AB
7 * License terms: GNU General Public License (GPL) version 2 7 * License terms: GNU General Public License (GPL) version 2
8 * U300 PADMUX functions 8 * U300 PADMUX functions
9 * Author: Linus Walleij <linus.walleij@stericsson.com> 9 * Author: Martin Persson <martin.persson@stericsson.com>
10 *
11 */ 10 */
12#include <linux/io.h> 11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/device.h>
13#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/errno.h>
17#include <linux/io.h>
18#include <linux/mutex.h>
19#include <linux/string.h>
20#include <linux/bug.h>
21#include <linux/debugfs.h>
22#include <linux/seq_file.h>
14#include <mach/u300-regs.h> 23#include <mach/u300-regs.h>
15#include <mach/syscon.h> 24#include <mach/syscon.h>
16
17#include "padmux.h" 25#include "padmux.h"
18 26
19/* Set the PAD MUX to route the MMC reader correctly to GPIO0. */ 27static DEFINE_MUTEX(pmx_mutex);
20void pmx_set_mission_mode_mmc(void) 28
21{ 29const u32 pmx_registers[] = {
22 u16 val; 30 (U300_SYSCON_VBASE + U300_SYSCON_PMC1LR),
23 31 (U300_SYSCON_VBASE + U300_SYSCON_PMC1HR),
24 val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1LR); 32 (U300_SYSCON_VBASE + U300_SYSCON_PMC2R),
25 val &= ~U300_SYSCON_PMC1LR_MMCSD_MASK; 33 (U300_SYSCON_VBASE + U300_SYSCON_PMC3R),
26 writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1LR); 34 (U300_SYSCON_VBASE + U300_SYSCON_PMC4R)
27 val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); 35};
28 val &= ~U300_SYSCON_PMC1HR_APP_GPIO_1_MASK; 36
29 val |= U300_SYSCON_PMC1HR_APP_GPIO_1_MMC; 37/* High level functionality */
30 writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); 38
31} 39/* Lazy dog:
32 40 * onmask = {
33void pmx_set_mission_mode_spi(void) 41 * {"PMC1LR" mask, "PMC1LR" value},
34{ 42 * {"PMC1HR" mask, "PMC1HR" value},
35 u16 val; 43 * {"PMC2R" mask, "PMC2R" value},
36 44 * {"PMC3R" mask, "PMC3R" value},
37 /* Set up padmuxing so the SPI port and its chipselects are active */ 45 * {"PMC4R" mask, "PMC4R" value}
38 val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); 46 * }
39 /* 47 */
40 * Activate the SPI port (disable the use of these pins for generic 48static struct pmx mmc_setting = {
41 * GPIO, DSP, AAIF 49 .setting = U300_APP_PMX_MMC_SETTING,
42 */ 50 .default_on = false,
43 val &= ~U300_SYSCON_PMC1HR_APP_SPI_2_MASK; 51 .activated = false,
44 val |= U300_SYSCON_PMC1HR_APP_SPI_2_SPI; 52 .name = "MMC",
45 /* 53 .onmask = {
46 * Use GPIO pin SPI CS1 for CS1 actually (it can be used for other 54 {U300_SYSCON_PMC1LR_MMCSD_MASK,
47 * things also) 55 U300_SYSCON_PMC1LR_MMCSD_MMCSD},
48 */ 56 {0, 0},
49 val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK; 57 {0, 0},
50 val |= U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI; 58 {0, 0},
51 /* 59 {U300_SYSCON_PMC4R_APP_MISC_12_MASK,
52 * Use GPIO pin SPI CS2 for CS2 actually (it can be used for other 60 U300_SYSCON_PMC4R_APP_MISC_12_APP_GPIO}
53 * things also) 61 },
54 */ 62};
55 val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK; 63
56 val |= U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI; 64static struct pmx spi_setting = {
57 writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); 65 .setting = U300_APP_PMX_SPI_SETTING,
66 .default_on = false,
67 .activated = false,
68 .name = "SPI",
69 .onmask = {{0, 0},
70 {U300_SYSCON_PMC1HR_APP_SPI_2_MASK |
71 U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK |
72 U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK,
73 U300_SYSCON_PMC1HR_APP_SPI_2_SPI |
74 U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI |
75 U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI},
76 {0, 0},
77 {0, 0},
78 {0, 0}
79 },
80};
81
82/* Available padmux settings */
83static struct pmx *pmx_settings[] = {
84 &mmc_setting,
85 &spi_setting,
86};
87
88static void update_registers(struct pmx *pmx, bool activate)
89{
90 u16 regval, val, mask;
91 int i;
92
93 for (i = 0; i < ARRAY_SIZE(pmx_registers); i++) {
94 if (activate)
95 val = pmx->onmask[i].val;
96 else
97 val = 0;
98
99 mask = pmx->onmask[i].mask;
100 if (mask != 0) {
101 regval = readw(pmx_registers[i]);
102 regval &= ~mask;
103 regval |= val;
104 writew(regval, pmx_registers[i]);
105 }
106 }
107}
108
109struct pmx *pmx_get(struct device *dev, enum pmx_settings setting)
110{
111 int i;
112 struct pmx *pmx = ERR_PTR(-ENOENT);
113
114 if (dev == NULL)
115 return ERR_PTR(-EINVAL);
116
117 mutex_lock(&pmx_mutex);
118 for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
119
120 if (setting == pmx_settings[i]->setting) {
121
122 if (pmx_settings[i]->dev != NULL) {
123 WARN(1, "padmux: required setting "
124 "in use by another consumer\n");
125 } else {
126 pmx = pmx_settings[i];
127 pmx->dev = dev;
128 dev_dbg(dev, "padmux: setting nr %d is now "
129 "bound to %s and ready to use\n",
130 setting, dev_name(dev));
131 break;
132 }
133 }
134 }
135 mutex_unlock(&pmx_mutex);
136
137 return pmx;
138}
139EXPORT_SYMBOL(pmx_get);
140
141int pmx_put(struct device *dev, struct pmx *pmx)
142{
143 int i;
144 int ret = -ENOENT;
145
146 if (pmx == NULL || dev == NULL)
147 return -EINVAL;
148
149 mutex_lock(&pmx_mutex);
150 for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
151
152 if (pmx->setting == pmx_settings[i]->setting) {
153
154 if (dev != pmx->dev) {
155 WARN(1, "padmux: cannot release handle as "
156 "it is bound to another consumer\n");
157 ret = -EINVAL;
158 break;
159 } else {
160 pmx_settings[i]->dev = NULL;
161 ret = 0;
162 break;
163 }
164 }
165 }
166 mutex_unlock(&pmx_mutex);
167
168 return ret;
169}
170EXPORT_SYMBOL(pmx_put);
171
172int pmx_activate(struct device *dev, struct pmx *pmx)
173{
174 int i, j, ret;
175 ret = 0;
176
177 if (pmx == NULL || dev == NULL)
178 return -EINVAL;
179
180 mutex_lock(&pmx_mutex);
181
182 /* Make sure the required bits are not used */
183 for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
184
185 if (pmx_settings[i]->dev == NULL || pmx_settings[i] == pmx)
186 continue;
187
188 for (j = 0; j < ARRAY_SIZE(pmx_registers); j++) {
189
190 if (pmx_settings[i]->onmask[j].mask & pmx->
191 onmask[j].mask) {
192 /* More than one entry on the same bits */
193 WARN(1, "padmux: cannot activate "
194 "setting. Bit conflict with "
195 "an active setting\n");
196
197 ret = -EUSERS;
198 goto exit;
199 }
200 }
201 }
202 update_registers(pmx, true);
203 pmx->activated = true;
204 dev_dbg(dev, "padmux: setting nr %d is activated\n",
205 pmx->setting);
206
207exit:
208 mutex_unlock(&pmx_mutex);
209 return ret;
210}
211EXPORT_SYMBOL(pmx_activate);
212
213int pmx_deactivate(struct device *dev, struct pmx *pmx)
214{
215 int i;
216 int ret = -ENOENT;
217
218 if (pmx == NULL || dev == NULL)
219 return -EINVAL;
220
221 mutex_lock(&pmx_mutex);
222 for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
223
224 if (pmx_settings[i]->dev == NULL)
225 continue;
226
227 if (pmx->setting == pmx_settings[i]->setting) {
228
229 if (dev != pmx->dev) {
230 WARN(1, "padmux: cannot deactivate "
231 "pmx setting as it was activated "
232 "by another consumer\n");
233
234 ret = -EBUSY;
235 continue;
236 } else {
237 update_registers(pmx, false);
238 pmx_settings[i]->dev = NULL;
239 pmx->activated = false;
240 ret = 0;
241 dev_dbg(dev, "padmux: setting nr %d is deactivated",
242 pmx->setting);
243 break;
244 }
245 }
246 }
247 mutex_unlock(&pmx_mutex);
248
249 return ret;
250}
251EXPORT_SYMBOL(pmx_deactivate);
252
253/*
254 * For internal use only. If it is to be exported,
255 * it should be reentrant. Notice that pmx_activate
256 * (i.e. runtime settings) always override default settings.
257 */
258static int pmx_set_default(void)
259{
260 /* Used to identify several entries on the same bits */
261 u16 modbits[ARRAY_SIZE(pmx_registers)];
262
263 int i, j;
264
265 memset(modbits, 0, ARRAY_SIZE(pmx_registers) * sizeof(u16));
266
267 for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
268
269 if (!pmx_settings[i]->default_on)
270 continue;
271
272 for (j = 0; j < ARRAY_SIZE(pmx_registers); j++) {
273
274 /* Make sure there is only one entry on the same bits */
275 if (modbits[j] & pmx_settings[i]->onmask[j].mask) {
276 BUG();
277 return -EUSERS;
278 }
279 modbits[j] |= pmx_settings[i]->onmask[j].mask;
280 }
281 update_registers(pmx_settings[i], true);
282 }
283 return 0;
58} 284}
285
286#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG))
287static int pmx_show(struct seq_file *s, void *data)
288{
289 int i;
290 seq_printf(s, "-------------------------------------------------\n");
291 seq_printf(s, "SETTING BOUND TO DEVICE STATE\n");
292 seq_printf(s, "-------------------------------------------------\n");
293 mutex_lock(&pmx_mutex);
294 for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
295 /* Format pmx and device name nicely */
296 char cdp[33];
297 int chars;
298
299 chars = snprintf(&cdp[0], 17, "%s", pmx_settings[i]->name);
300 while (chars < 16) {
301 cdp[chars] = ' ';
302 chars++;
303 }
304 chars = snprintf(&cdp[16], 17, "%s", pmx_settings[i]->dev ?
305 dev_name(pmx_settings[i]->dev) : "N/A");
306 while (chars < 16) {
307 cdp[chars+16] = ' ';
308 chars++;
309 }
310 cdp[32] = '\0';
311
312 seq_printf(s,
313 "%s\t%s\n",
314 &cdp[0],
315 pmx_settings[i]->activated ?
316 "ACTIVATED" : "DEACTIVATED"
317 );
318
319 }
320 mutex_unlock(&pmx_mutex);
321 return 0;
322}
323
324static int pmx_open(struct inode *inode, struct file *file)
325{
326 return single_open(file, pmx_show, NULL);
327}
328
329static const struct file_operations pmx_operations = {
330 .owner = THIS_MODULE,
331 .open = pmx_open,
332 .read = seq_read,
333 .llseek = seq_lseek,
334 .release = single_release,
335};
336
337static int __init init_pmx_read_debugfs(void)
338{
339 /* Expose a simple debugfs interface to view pmx settings */
340 (void) debugfs_create_file("padmux", S_IFREG | S_IRUGO,
341 NULL, NULL,
342 &pmx_operations);
343 return 0;
344}
345
346/*
347 * This needs to come in after the core_initcall(),
348 * because debugfs is not available until
349 * the subsystems come up.
350 */
351module_init(init_pmx_read_debugfs);
352#endif
353
354static int __init pmx_init(void)
355{
356 int ret;
357
358 ret = pmx_set_default();
359
360 if (IS_ERR_VALUE(ret))
361 pr_crit("padmux: default settings could not be set\n");
362
363 return 0;
364}
365
366/* Should be initialized before consumers */
367core_initcall(pmx_init);
diff --git a/arch/arm/mach-u300/padmux.h b/arch/arm/mach-u300/padmux.h
index 8c2099ac5046..6e8b86064097 100644
--- a/arch/arm/mach-u300/padmux.h
+++ b/arch/arm/mach-u300/padmux.h
@@ -6,14 +6,34 @@
6 * Copyright (C) 2009 ST-Ericsson AB 6 * Copyright (C) 2009 ST-Ericsson AB
7 * License terms: GNU General Public License (GPL) version 2 7 * License terms: GNU General Public License (GPL) version 2
8 * U300 PADMUX API 8 * U300 PADMUX API
9 * Author: Linus Walleij <linus.walleij@stericsson.com> 9 * Author: Martin Persson <martin.persson@stericsson.com>
10 *
11 */ 10 */
12 11
13#ifndef __MACH_U300_PADMUX_H 12#ifndef __MACH_U300_PADMUX_H
14#define __MACH_U300_PADMUX_H 13#define __MACH_U300_PADMUX_H
15 14
16void pmx_set_mission_mode_mmc(void); 15enum pmx_settings {
17void pmx_set_mission_mode_spi(void); 16 U300_APP_PMX_MMC_SETTING,
17 U300_APP_PMX_SPI_SETTING
18};
19
20struct pmx_onmask {
21 u16 mask; /* Mask bits */
22 u16 val; /* Value when active */
23};
24
25struct pmx {
26 struct device *dev;
27 enum pmx_settings setting;
28 char *name;
29 bool activated;
30 bool default_on;
31 struct pmx_onmask onmask[];
32};
33
34struct pmx *pmx_get(struct device *dev, enum pmx_settings setting);
35int pmx_put(struct device *dev, struct pmx *pmx);
36int pmx_activate(struct device *dev, struct pmx *pmx);
37int pmx_deactivate(struct device *dev, struct pmx *pmx);
18 38
19#endif 39#endif
diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c
new file mode 100644
index 000000000000..f0e887bea30e
--- /dev/null
+++ b/arch/arm/mach-u300/spi.c
@@ -0,0 +1,124 @@
1/*
2 * arch/arm/mach-u300/spi.c
3 *
4 * Copyright (C) 2009 ST-Ericsson AB
5 * License terms: GNU General Public License (GPL) version 2
6 *
7 * Author: Linus Walleij <linus.walleij@stericsson.com>
8 */
9#include <linux/device.h>
10#include <linux/amba/bus.h>
11#include <linux/spi/spi.h>
12#include <linux/amba/pl022.h>
13#include <linux/err.h>
14#include "padmux.h"
15
16/*
17 * The following is for the actual devices on the SSP/SPI bus
18 */
19#ifdef CONFIG_MACH_U300_SPIDUMMY
20static void select_dummy_chip(u32 chipselect)
21{
22 pr_debug("CORE: %s called with CS=0x%x (%s)\n",
23 __func__,
24 chipselect,
25 chipselect ? "unselect chip" : "select chip");
26 /*
27 * Here you would write the chip select value to the GPIO pins if
28 * this was a real chip (but this is a loopback dummy).
29 */
30}
31
32struct pl022_config_chip dummy_chip_info = {
33 /* Nominally this is LOOPBACK_DISABLED, but this is our dummy chip! */
34 .lbm = LOOPBACK_ENABLED,
35 /*
36 * available POLLING_TRANSFER and INTERRUPT_TRANSFER,
37 * DMA_TRANSFER does not work
38 */
39 .com_mode = INTERRUPT_TRANSFER,
40 .iface = SSP_INTERFACE_MOTOROLA_SPI,
41 /* We can only act as master but SSP_SLAVE is possible in theory */
42 .hierarchy = SSP_MASTER,
43 /* 0 = drive TX even as slave, 1 = do not drive TX as slave */
44 .slave_tx_disable = 0,
45 /* LSB first */
46 .endian_tx = SSP_TX_LSB,
47 .endian_rx = SSP_RX_LSB,
48 .data_size = SSP_DATA_BITS_8, /* used to be 12 in some default */
49 .rx_lev_trig = SSP_RX_1_OR_MORE_ELEM,
50 .tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC,
51 .clk_phase = SSP_CLK_SECOND_EDGE,
52 .clk_pol = SSP_CLK_POL_IDLE_LOW,
53 .ctrl_len = SSP_BITS_12,
54 .wait_state = SSP_MWIRE_WAIT_ZERO,
55 .duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX,
56 /*
57 * This is where you insert a call to a function to enable CS
58 * (usually GPIO) for a certain chip.
59 */
60 .cs_control = select_dummy_chip,
61};
62#endif
63
64static struct spi_board_info u300_spi_devices[] = {
65#ifdef CONFIG_MACH_U300_SPIDUMMY
66 {
67 /* A dummy chip used for loopback tests */
68 .modalias = "spi-dummy",
69 /* Really dummy, pass in additional chip config here */
70 .platform_data = NULL,
71 /* This defines how the controller shall handle the device */
72 .controller_data = &dummy_chip_info,
73 /* .irq - no external IRQ routed from this device */
74 .max_speed_hz = 1000000,
75 .bus_num = 0, /* Only one bus on this chip */
76 .chip_select = 0,
77 /* Means SPI_CS_HIGH, change if e.g low CS */
78 .mode = 0,
79 },
80#endif
81};
82
83static struct pl022_ssp_controller ssp_platform_data = {
84 /* If you have several SPI buses this varies, we have only bus 0 */
85 .bus_id = 0,
86 /* Set this to 1 when we think we got DMA working */
87 .enable_dma = 0,
88 /*
89 * On the APP CPU GPIO 4, 5 and 6 are connected as generic
90 * chip selects for SPI. (Same on U330, U335 and U365.)
91 * TODO: make sure the GPIO driver can select these properly
92 * and do padmuxing accordingly too.
93 */
94 .num_chipselect = 3,
95};
96
97
98void __init u300_spi_init(struct amba_device *adev)
99{
100 struct pmx *pmx;
101
102 adev->dev.platform_data = &ssp_platform_data;
103 /*
104 * Setup padmuxing for SPI. Since this must always be
105 * compiled into the kernel, pmx is never released.
106 */
107 pmx = pmx_get(&adev->dev, U300_APP_PMX_SPI_SETTING);
108
109 if (IS_ERR(pmx))
110 dev_warn(&adev->dev, "Could not get padmux handle\n");
111 else {
112 int ret;
113
114 ret = pmx_activate(&adev->dev, pmx);
115 if (IS_ERR_VALUE(ret))
116 dev_warn(&adev->dev, "Could not activate padmuxing\n");
117 }
118
119}
120void __init u300_spi_register_board_devices(void)
121{
122 /* Register any SPI devices */
123 spi_register_board_info(u300_spi_devices, ARRAY_SIZE(u300_spi_devices));
124}
diff --git a/arch/arm/mach-u300/spi.h b/arch/arm/mach-u300/spi.h
new file mode 100644
index 000000000000..bd3d867e240f
--- /dev/null
+++ b/arch/arm/mach-u300/spi.h
@@ -0,0 +1,26 @@
1/*
2 * arch/arm/mach-u300/spi.h
3 *
4 * Copyright (C) 2009 ST-Ericsson AB
5 * License terms: GNU General Public License (GPL) version 2
6 *
7 * Author: Linus Walleij <linus.walleij@stericsson.com>
8 */
9#ifndef SPI_H
10#define SPI_H
11#include <linux/amba/bus.h>
12
13#ifdef CONFIG_SPI_PL022
14void __init u300_spi_init(struct amba_device *adev);
15void __init u300_spi_register_board_devices(void);
16#else
17/* Compile out SPI support if PL022 is not selected */
18static inline void __init u300_spi_init(struct amba_device *adev)
19{
20}
21static inline void __init u300_spi_register_board_devices(void)
22{
23}
24#endif
25
26#endif
diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c
index cce53204880e..26d26f5100fe 100644
--- a/arch/arm/mach-u300/timer.c
+++ b/arch/arm/mach-u300/timer.c
@@ -346,6 +346,21 @@ static struct clocksource clocksource_u300_1mhz = {
346 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 346 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
347}; 347};
348 348
349/*
350 * Override the global weak sched_clock symbol with this
351 * local implementation which uses the clocksource to get some
352 * better resolution when scheduling the kernel. We accept that
353 * this wraps around for now, since it is just a relative time
354 * stamp. (Inspired by OMAP implementation.)
355 */
356unsigned long long notrace sched_clock(void)
357{
358 return clocksource_cyc2ns(clocksource_u300_1mhz.read(
359 &clocksource_u300_1mhz),
360 clocksource_u300_1mhz.mult,
361 clocksource_u300_1mhz.shift);
362}
363
349 364
350/* 365/*
351 * This sets up the system timers, clock source and clock event. 366 * This sets up the system timers, clock source and clock event.
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 975eae41ee66..e13be7c444ca 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -27,6 +27,7 @@
27#include <linux/amba/bus.h> 27#include <linux/amba/bus.h>
28#include <linux/amba/clcd.h> 28#include <linux/amba/clcd.h>
29#include <linux/amba/pl061.h> 29#include <linux/amba/pl061.h>
30#include <linux/amba/mmci.h>
30#include <linux/clocksource.h> 31#include <linux/clocksource.h>
31#include <linux/clockchips.h> 32#include <linux/clockchips.h>
32#include <linux/cnt32_to_63.h> 33#include <linux/cnt32_to_63.h>
@@ -47,7 +48,6 @@
47#include <asm/mach/irq.h> 48#include <asm/mach/irq.h>
48#include <asm/mach/time.h> 49#include <asm/mach/time.h>
49#include <asm/mach/map.h> 50#include <asm/mach/map.h>
50#include <asm/mach/mmc.h>
51 51
52#include "core.h" 52#include "core.h"
53#include "clock.h" 53#include "clock.h"
@@ -369,7 +369,7 @@ unsigned int mmc_status(struct device *dev)
369 return readl(VERSATILE_SYSMCI) & mask; 369 return readl(VERSATILE_SYSMCI) & mask;
370} 370}
371 371
372static struct mmc_platform_data mmc0_plat_data = { 372static struct mmci_platform_data mmc0_plat_data = {
373 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 373 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
374 .status = mmc_status, 374 .status = mmc_status,
375 .gpio_wp = -1, 375 .gpio_wp = -1,
diff --git a/arch/arm/mach-versatile/versatile_pb.c b/arch/arm/mach-versatile/versatile_pb.c
index 9af8d8154df5..239cd30fc4f5 100644
--- a/arch/arm/mach-versatile/versatile_pb.c
+++ b/arch/arm/mach-versatile/versatile_pb.c
@@ -24,6 +24,7 @@
24#include <linux/sysdev.h> 24#include <linux/sysdev.h>
25#include <linux/amba/bus.h> 25#include <linux/amba/bus.h>
26#include <linux/amba/pl061.h> 26#include <linux/amba/pl061.h>
27#include <linux/amba/mmci.h>
27#include <linux/io.h> 28#include <linux/io.h>
28 29
29#include <mach/hardware.h> 30#include <mach/hardware.h>
@@ -31,7 +32,6 @@
31#include <asm/mach-types.h> 32#include <asm/mach-types.h>
32 33
33#include <asm/mach/arch.h> 34#include <asm/mach/arch.h>
34#include <asm/mach/mmc.h>
35 35
36#include "core.h" 36#include "core.h"
37 37
@@ -41,7 +41,7 @@
41#define IRQ_MMCI1A IRQ_SIC_MMCI1A 41#define IRQ_MMCI1A IRQ_SIC_MMCI1A
42#endif 42#endif
43 43
44static struct mmc_platform_data mmc1_plat_data = { 44static struct mmci_platform_data mmc1_plat_data = {
45 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, 45 .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
46 .status = mmc_status, 46 .status = mmc_status,
47 .gpio_wp = -1, 47 .gpio_wp = -1,
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 5fe595aeba69..8d43e58f9244 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -771,3 +771,8 @@ config CACHE_XSC3L2
771 select OUTER_CACHE 771 select OUTER_CACHE
772 help 772 help
773 This option enables the L2 cache on XScale3. 773 This option enables the L2 cache on XScale3.
774
775config ARM_L1_CACHE_SHIFT
776 int
777 default 6 if ARCH_OMAP3
778 default 5
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index cc8829d7e116..379f78556055 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -25,6 +25,19 @@
25 25
26#include "fault.h" 26#include "fault.h"
27 27
28/*
29 * Fault status register encodings. We steal bit 31 for our own purposes.
30 */
31#define FSR_LNX_PF (1 << 31)
32#define FSR_WRITE (1 << 11)
33#define FSR_FS4 (1 << 10)
34#define FSR_FS3_0 (15)
35
36static inline int fsr_fs(unsigned int fsr)
37{
38 return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6;
39}
40
28#ifdef CONFIG_MMU 41#ifdef CONFIG_MMU
29 42
30#ifdef CONFIG_KPROBES 43#ifdef CONFIG_KPROBES
@@ -182,18 +195,35 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
182#define VM_FAULT_BADMAP 0x010000 195#define VM_FAULT_BADMAP 0x010000
183#define VM_FAULT_BADACCESS 0x020000 196#define VM_FAULT_BADACCESS 0x020000
184 197
185static int 198/*
199 * Check that the permissions on the VMA allow for the fault which occurred.
200 * If we encountered a write fault, we must have write permission, otherwise
201 * we allow any permission.
202 */
203static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
204{
205 unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
206
207 if (fsr & FSR_WRITE)
208 mask = VM_WRITE;
209 if (fsr & FSR_LNX_PF)
210 mask = VM_EXEC;
211
212 return vma->vm_flags & mask ? false : true;
213}
214
215static int __kprobes
186__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, 216__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
187 struct task_struct *tsk) 217 struct task_struct *tsk)
188{ 218{
189 struct vm_area_struct *vma; 219 struct vm_area_struct *vma;
190 int fault, mask; 220 int fault;
191 221
192 vma = find_vma(mm, addr); 222 vma = find_vma(mm, addr);
193 fault = VM_FAULT_BADMAP; 223 fault = VM_FAULT_BADMAP;
194 if (!vma) 224 if (unlikely(!vma))
195 goto out; 225 goto out;
196 if (vma->vm_start > addr) 226 if (unlikely(vma->vm_start > addr))
197 goto check_stack; 227 goto check_stack;
198 228
199 /* 229 /*
@@ -201,47 +231,24 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
201 * memory access, so we can handle it. 231 * memory access, so we can handle it.
202 */ 232 */
203good_area: 233good_area:
204 if (fsr & (1 << 11)) /* write? */ 234 if (access_error(fsr, vma)) {
205 mask = VM_WRITE; 235 fault = VM_FAULT_BADACCESS;
206 else
207 mask = VM_READ|VM_EXEC|VM_WRITE;
208
209 fault = VM_FAULT_BADACCESS;
210 if (!(vma->vm_flags & mask))
211 goto out; 236 goto out;
237 }
212 238
213 /* 239 /*
214 * If for any reason at all we couldn't handle 240 * If for any reason at all we couldn't handle the fault, make
215 * the fault, make sure we exit gracefully rather 241 * sure we exit gracefully rather than endlessly redo the fault.
216 * than endlessly redo the fault.
217 */ 242 */
218survive: 243 fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & FSR_WRITE) ? FAULT_FLAG_WRITE : 0);
219 fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & (1 << 11)) ? FAULT_FLAG_WRITE : 0); 244 if (unlikely(fault & VM_FAULT_ERROR))
220 if (unlikely(fault & VM_FAULT_ERROR)) { 245 return fault;
221 if (fault & VM_FAULT_OOM)
222 goto out_of_memory;
223 else if (fault & VM_FAULT_SIGBUS)
224 return fault;
225 BUG();
226 }
227 if (fault & VM_FAULT_MAJOR) 246 if (fault & VM_FAULT_MAJOR)
228 tsk->maj_flt++; 247 tsk->maj_flt++;
229 else 248 else
230 tsk->min_flt++; 249 tsk->min_flt++;
231 return fault; 250 return fault;
232 251
233out_of_memory:
234 if (!is_global_init(tsk))
235 goto out;
236
237 /*
238 * If we are out of memory for pid1, sleep for a while and retry
239 */
240 up_read(&mm->mmap_sem);
241 yield();
242 down_read(&mm->mmap_sem);
243 goto survive;
244
245check_stack: 252check_stack:
246 if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) 253 if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
247 goto good_area; 254 goto good_area;
@@ -278,6 +285,13 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
278 if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) 285 if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
279 goto no_context; 286 goto no_context;
280 down_read(&mm->mmap_sem); 287 down_read(&mm->mmap_sem);
288 } else {
289 /*
290 * The above down_read_trylock() might have succeeded in
291 * which case, we'll have missed the might_sleep() from
292 * down_read()
293 */
294 might_sleep();
281 } 295 }
282 296
283 fault = __do_page_fault(mm, addr, fsr, tsk); 297 fault = __do_page_fault(mm, addr, fsr, tsk);
@@ -289,6 +303,16 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
289 if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) 303 if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
290 return 0; 304 return 0;
291 305
306 if (fault & VM_FAULT_OOM) {
307 /*
308 * We ran out of memory, call the OOM killer, and return to
309 * userspace (which will retry the fault, or kill us if we
310 * got oom-killed)
311 */
312 pagefault_out_of_memory();
313 return 0;
314 }
315
292 /* 316 /*
293 * If we are in kernel mode at this point, we 317 * If we are in kernel mode at this point, we
294 * have no context to handle this fault with. 318 * have no context to handle this fault with.
@@ -296,16 +320,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
296 if (!user_mode(regs)) 320 if (!user_mode(regs))
297 goto no_context; 321 goto no_context;
298 322
299 if (fault & VM_FAULT_OOM) {
300 /*
301 * We ran out of memory, or some other thing
302 * happened to us that made us unable to handle
303 * the page fault gracefully.
304 */
305 printk("VM: killing process %s\n", tsk->comm);
306 do_group_exit(SIGKILL);
307 return 0;
308 }
309 if (fault & VM_FAULT_SIGBUS) { 323 if (fault & VM_FAULT_SIGBUS) {
310 /* 324 /*
311 * We had some memory, but were unable to 325 * We had some memory, but were unable to
@@ -489,10 +503,10 @@ hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *)
489asmlinkage void __exception 503asmlinkage void __exception
490do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) 504do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
491{ 505{
492 const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); 506 const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
493 struct siginfo info; 507 struct siginfo info;
494 508
495 if (!inf->fn(addr, fsr, regs)) 509 if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
496 return; 510 return;
497 511
498 printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n", 512 printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
@@ -508,6 +522,6 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
508asmlinkage void __exception 522asmlinkage void __exception
509do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) 523do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
510{ 524{
511 do_translation_fault(addr, 0, regs); 525 do_translation_fault(addr, FSR_LNX_PF, regs);
512} 526}
513 527
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index f982606d7bf9..877c492f8e10 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -613,6 +613,14 @@ void __init mem_init(void)
613 613
614void free_initmem(void) 614void free_initmem(void)
615{ 615{
616#ifdef CONFIG_HAVE_TCM
617 extern char *__tcm_start, *__tcm_end;
618
619 totalram_pages += free_area(__phys_to_pfn(__pa(__tcm_start)),
620 __phys_to_pfn(__pa(__tcm_end)),
621 "TCM link");
622#endif
623
616 if (!machine_is_integrator() && !machine_is_cintegrator()) 624 if (!machine_is_integrator() && !machine_is_cintegrator())
617 totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)), 625 totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)),
618 __phys_to_pfn(__pa(__init_end)), 626 __phys_to_pfn(__pa(__init_end)),
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index 3c127aabe214..1ff6a37e893c 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
179 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); 179 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
180 #else 180 #else
181 dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); 181 dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
182 dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
183 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); 182 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
184 #endif 183 #endif
185 184
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
188 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); 187 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
189 #else 188 #else
190 dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); 189 dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
191 dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
192 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); 190 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
193 #endif 191 #endif
194 192
@@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void)
198 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); 196 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
199 #else 197 #else
200 dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); 198 dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
201 dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); 199 dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
202 dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); 200 dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
203 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); 201 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
204 #endif 202 #endif
diff --git a/arch/arm/plat-pxa/dma.c b/arch/arm/plat-pxa/dma.c
index 70aeee407f7d..2975798d411f 100644
--- a/arch/arm/plat-pxa/dma.c
+++ b/arch/arm/plat-pxa/dma.c
@@ -17,22 +17,266 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/dma-mapping.h>
20 21
21#include <asm/system.h> 22#include <asm/system.h>
22#include <asm/irq.h> 23#include <asm/irq.h>
24#include <asm/memory.h>
23#include <mach/hardware.h> 25#include <mach/hardware.h>
24#include <mach/dma.h> 26#include <mach/dma.h>
25 27
28#define DMA_DEBUG_NAME "pxa_dma"
29#define DMA_MAX_REQUESTERS 64
30
26struct dma_channel { 31struct dma_channel {
27 char *name; 32 char *name;
28 pxa_dma_prio prio; 33 pxa_dma_prio prio;
29 void (*irq_handler)(int, void *); 34 void (*irq_handler)(int, void *);
30 void *data; 35 void *data;
36 spinlock_t lock;
31}; 37};
32 38
33static struct dma_channel *dma_channels; 39static struct dma_channel *dma_channels;
34static int num_dma_channels; 40static int num_dma_channels;
35 41
42/*
43 * Debug fs
44 */
45#ifdef CONFIG_DEBUG_FS
46#include <linux/debugfs.h>
47#include <linux/uaccess.h>
48#include <linux/seq_file.h>
49
50static struct dentry *dbgfs_root, *dbgfs_state, **dbgfs_chan;
51
52static int dbg_show_requester_chan(struct seq_file *s, void *p)
53{
54 int pos = 0;
55 int chan = (int)s->private;
56 int i;
57 u32 drcmr;
58
59 pos += seq_printf(s, "DMA channel %d requesters list :\n", chan);
60 for (i = 0; i < DMA_MAX_REQUESTERS; i++) {
61 drcmr = DRCMR(i);
62 if ((drcmr & DRCMR_CHLNUM) == chan)
63 pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i,
64 !!(drcmr & DRCMR_MAPVLD));
65 }
66 return pos;
67}
68
69static inline int dbg_burst_from_dcmd(u32 dcmd)
70{
71 int burst = (dcmd >> 16) & 0x3;
72
73 return burst ? 4 << burst : 0;
74}
75
76static int is_phys_valid(unsigned long addr)
77{
78 return pfn_valid(__phys_to_pfn(addr));
79}
80
81#define DCSR_STR(flag) (dcsr & DCSR_##flag ? #flag" " : "")
82#define DCMD_STR(flag) (dcmd & DCMD_##flag ? #flag" " : "")
83
84static int dbg_show_descriptors(struct seq_file *s, void *p)
85{
86 int pos = 0;
87 int chan = (int)s->private;
88 int i, max_show = 20, burst, width;
89 u32 dcmd;
90 unsigned long phys_desc;
91 struct pxa_dma_desc *desc;
92 unsigned long flags;
93
94 spin_lock_irqsave(&dma_channels[chan].lock, flags);
95 phys_desc = DDADR(chan);
96
97 pos += seq_printf(s, "DMA channel %d descriptors :\n", chan);
98 pos += seq_printf(s, "[%03d] First descriptor unknown\n", 0);
99 for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) {
100 desc = phys_to_virt(phys_desc);
101 dcmd = desc->dcmd;
102 burst = dbg_burst_from_dcmd(dcmd);
103 width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
104
105 pos += seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
106 i, phys_desc, desc);
107 pos += seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
108 pos += seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
109 pos += seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
110 pos += seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d"
111 " width=%d len=%d)\n",
112 dcmd,
113 DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
114 DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
115 DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
116 DCMD_STR(ENDIAN), burst, width,
117 dcmd & DCMD_LENGTH);
118 phys_desc = desc->ddadr;
119 }
120 if (i == max_show)
121 pos += seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
122 i, phys_desc);
123 else
124 pos += seq_printf(s, "[%03d] Desc at %08lx is %s\n",
125 i, phys_desc, phys_desc == DDADR_STOP ?
126 "DDADR_STOP" : "invalid");
127
128 spin_unlock_irqrestore(&dma_channels[chan].lock, flags);
129 return pos;
130}
131
132static int dbg_show_chan_state(struct seq_file *s, void *p)
133{
134 int pos = 0;
135 int chan = (int)s->private;
136 u32 dcsr, dcmd;
137 int burst, width;
138 static char *str_prio[] = { "high", "normal", "low" };
139
140 dcsr = DCSR(chan);
141 dcmd = DCMD(chan);
142 burst = dbg_burst_from_dcmd(dcmd);
143 width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
144
145 pos += seq_printf(s, "DMA channel %d\n", chan);
146 pos += seq_printf(s, "\tPriority : %s\n",
147 str_prio[dma_channels[chan].prio]);
148 pos += seq_printf(s, "\tUnaligned transfer bit: %s\n",
149 DALGN & (1 << chan) ? "yes" : "no");
150 pos += seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
151 dcsr, DCSR_STR(RUN), DCSR_STR(NODESC),
152 DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN),
153 DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN),
154 DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST),
155 DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND),
156 DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR),
157 DCSR_STR(STARTINTR), DCSR_STR(BUSERR));
158
159 pos += seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d"
160 " len=%d)\n",
161 dcmd,
162 DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
163 DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
164 DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
165 DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH);
166 pos += seq_printf(s, "\tDSADR = %08x\n", DSADR(chan));
167 pos += seq_printf(s, "\tDTADR = %08x\n", DTADR(chan));
168 pos += seq_printf(s, "\tDDADR = %08x\n", DDADR(chan));
169 return pos;
170}
171
172static int dbg_show_state(struct seq_file *s, void *p)
173{
174 int pos = 0;
175
176 /* basic device status */
177 pos += seq_printf(s, "DMA engine status\n");
178 pos += seq_printf(s, "\tChannel number: %d\n", num_dma_channels);
179
180 return pos;
181}
182
183#define DBGFS_FUNC_DECL(name) \
184static int dbg_open_##name(struct inode *inode, struct file *file) \
185{ \
186 return single_open(file, dbg_show_##name, inode->i_private); \
187} \
188static const struct file_operations dbg_fops_##name = { \
189 .owner = THIS_MODULE, \
190 .open = dbg_open_##name, \
191 .llseek = seq_lseek, \
192 .read = seq_read, \
193 .release = single_release, \
194}
195
196DBGFS_FUNC_DECL(state);
197DBGFS_FUNC_DECL(chan_state);
198DBGFS_FUNC_DECL(descriptors);
199DBGFS_FUNC_DECL(requester_chan);
200
201static struct dentry *pxa_dma_dbg_alloc_chan(int ch, struct dentry *chandir)
202{
203 char chan_name[11];
204 struct dentry *chan, *chan_state = NULL, *chan_descr = NULL;
205 struct dentry *chan_reqs = NULL;
206 void *dt;
207
208 scnprintf(chan_name, sizeof(chan_name), "%d", ch);
209 chan = debugfs_create_dir(chan_name, chandir);
210 dt = (void *)ch;
211
212 if (chan)
213 chan_state = debugfs_create_file("state", 0400, chan, dt,
214 &dbg_fops_chan_state);
215 if (chan_state)
216 chan_descr = debugfs_create_file("descriptors", 0400, chan, dt,
217 &dbg_fops_descriptors);
218 if (chan_descr)
219 chan_reqs = debugfs_create_file("requesters", 0400, chan, dt,
220 &dbg_fops_requester_chan);
221 if (!chan_reqs)
222 goto err_state;
223
224 return chan;
225
226err_state:
227 debugfs_remove_recursive(chan);
228 return NULL;
229}
230
231static void pxa_dma_init_debugfs(void)
232{
233 int i;
234 struct dentry *chandir;
235
236 dbgfs_root = debugfs_create_dir(DMA_DEBUG_NAME, NULL);
237 if (IS_ERR(dbgfs_root) || !dbgfs_root)
238 goto err_root;
239
240 dbgfs_state = debugfs_create_file("state", 0400, dbgfs_root, NULL,
241 &dbg_fops_state);
242 if (!dbgfs_state)
243 goto err_state;
244
245 dbgfs_chan = kmalloc(sizeof(*dbgfs_state) * num_dma_channels,
246 GFP_KERNEL);
247 if (!dbgfs_state)
248 goto err_alloc;
249
250 chandir = debugfs_create_dir("channels", dbgfs_root);
251 if (!chandir)
252 goto err_chandir;
253
254 for (i = 0; i < num_dma_channels; i++) {
255 dbgfs_chan[i] = pxa_dma_dbg_alloc_chan(i, chandir);
256 if (!dbgfs_chan[i])
257 goto err_chans;
258 }
259
260 return;
261err_chans:
262err_chandir:
263 kfree(dbgfs_chan);
264err_alloc:
265err_state:
266 debugfs_remove_recursive(dbgfs_root);
267err_root:
268 pr_err("pxa_dma: debugfs is not available\n");
269}
270
271static void __exit pxa_dma_cleanup_debugfs(void)
272{
273 debugfs_remove_recursive(dbgfs_root);
274}
275#else
276static inline void pxa_dma_init_debugfs(void) {}
277static inline void pxa_dma_cleanup_debugfs(void) {}
278#endif
279
36int pxa_request_dma (char *name, pxa_dma_prio prio, 280int pxa_request_dma (char *name, pxa_dma_prio prio,
37 void (*irq_handler)(int, void *), 281 void (*irq_handler)(int, void *),
38 void *data) 282 void *data)
@@ -71,6 +315,7 @@ int pxa_request_dma (char *name, pxa_dma_prio prio,
71 local_irq_restore(flags); 315 local_irq_restore(flags);
72 return i; 316 return i;
73} 317}
318EXPORT_SYMBOL(pxa_request_dma);
74 319
75void pxa_free_dma (int dma_ch) 320void pxa_free_dma (int dma_ch)
76{ 321{
@@ -88,24 +333,26 @@ void pxa_free_dma (int dma_ch)
88 dma_channels[dma_ch].name = NULL; 333 dma_channels[dma_ch].name = NULL;
89 local_irq_restore(flags); 334 local_irq_restore(flags);
90} 335}
336EXPORT_SYMBOL(pxa_free_dma);
91 337
92static irqreturn_t dma_irq_handler(int irq, void *dev_id) 338static irqreturn_t dma_irq_handler(int irq, void *dev_id)
93{ 339{
94 int i, dint = DINT; 340 int i, dint = DINT;
341 struct dma_channel *channel;
95 342
96 for (i = 0; i < num_dma_channels; i++) { 343 while (dint) {
97 if (dint & (1 << i)) { 344 i = __ffs(dint);
98 struct dma_channel *channel = &dma_channels[i]; 345 dint &= (dint - 1);
99 if (channel->name && channel->irq_handler) { 346 channel = &dma_channels[i];
100 channel->irq_handler(i, channel->data); 347 if (channel->name && channel->irq_handler) {
101 } else { 348 channel->irq_handler(i, channel->data);
102 /* 349 } else {
103 * IRQ for an unregistered DMA channel: 350 /*
104 * let's clear the interrupts and disable it. 351 * IRQ for an unregistered DMA channel:
105 */ 352 * let's clear the interrupts and disable it.
106 printk (KERN_WARNING "spurious IRQ for DMA channel %d\n", i); 353 */
107 DCSR(i) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR; 354 printk (KERN_WARNING "spurious IRQ for DMA channel %d\n", i);
108 } 355 DCSR(i) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR;
109 } 356 }
110 } 357 }
111 return IRQ_HANDLED; 358 return IRQ_HANDLED;
@@ -127,6 +374,7 @@ int __init pxa_init_dma(int irq, int num_ch)
127 for (i = 0; i < num_ch; i++) { 374 for (i = 0; i < num_ch; i++) {
128 DCSR(i) = 0; 375 DCSR(i) = 0;
129 dma_channels[i].prio = min((i & 0xf) >> 2, DMA_PRIO_LOW); 376 dma_channels[i].prio = min((i & 0xf) >> 2, DMA_PRIO_LOW);
377 spin_lock_init(&dma_channels[i].lock);
130 } 378 }
131 379
132 ret = request_irq(irq, dma_irq_handler, IRQF_DISABLED, "DMA", NULL); 380 ret = request_irq(irq, dma_irq_handler, IRQF_DISABLED, "DMA", NULL);
@@ -135,10 +383,9 @@ int __init pxa_init_dma(int irq, int num_ch)
135 kfree(dma_channels); 383 kfree(dma_channels);
136 return ret; 384 return ret;
137 } 385 }
138
139 num_dma_channels = num_ch; 386 num_dma_channels = num_ch;
387
388 pxa_dma_init_debugfs();
389
140 return 0; 390 return 0;
141} 391}
142
143EXPORT_SYMBOL(pxa_request_dma);
144EXPORT_SYMBOL(pxa_free_dma);
diff --git a/arch/arm/plat-pxa/include/plat/mfp.h b/arch/arm/plat-pxa/include/plat/mfp.h
index 64019464c8db..22086e696e8e 100644
--- a/arch/arm/plat-pxa/include/plat/mfp.h
+++ b/arch/arm/plat-pxa/include/plat/mfp.h
@@ -150,6 +150,74 @@ enum {
150 MFP_PIN_GPIO125, 150 MFP_PIN_GPIO125,
151 MFP_PIN_GPIO126, 151 MFP_PIN_GPIO126,
152 MFP_PIN_GPIO127, 152 MFP_PIN_GPIO127,
153
154 MFP_PIN_GPIO128,
155 MFP_PIN_GPIO129,
156 MFP_PIN_GPIO130,
157 MFP_PIN_GPIO131,
158 MFP_PIN_GPIO132,
159 MFP_PIN_GPIO133,
160 MFP_PIN_GPIO134,
161 MFP_PIN_GPIO135,
162 MFP_PIN_GPIO136,
163 MFP_PIN_GPIO137,
164 MFP_PIN_GPIO138,
165 MFP_PIN_GPIO139,
166 MFP_PIN_GPIO140,
167 MFP_PIN_GPIO141,
168 MFP_PIN_GPIO142,
169 MFP_PIN_GPIO143,
170 MFP_PIN_GPIO144,
171 MFP_PIN_GPIO145,
172 MFP_PIN_GPIO146,
173 MFP_PIN_GPIO147,
174 MFP_PIN_GPIO148,
175 MFP_PIN_GPIO149,
176 MFP_PIN_GPIO150,
177 MFP_PIN_GPIO151,
178 MFP_PIN_GPIO152,
179 MFP_PIN_GPIO153,
180 MFP_PIN_GPIO154,
181 MFP_PIN_GPIO155,
182 MFP_PIN_GPIO156,
183 MFP_PIN_GPIO157,
184 MFP_PIN_GPIO158,
185 MFP_PIN_GPIO159,
186 MFP_PIN_GPIO160,
187 MFP_PIN_GPIO161,
188 MFP_PIN_GPIO162,
189 MFP_PIN_GPIO163,
190 MFP_PIN_GPIO164,
191 MFP_PIN_GPIO165,
192 MFP_PIN_GPIO166,
193 MFP_PIN_GPIO167,
194 MFP_PIN_GPIO168,
195 MFP_PIN_GPIO169,
196 MFP_PIN_GPIO170,
197 MFP_PIN_GPIO171,
198 MFP_PIN_GPIO172,
199 MFP_PIN_GPIO173,
200 MFP_PIN_GPIO174,
201 MFP_PIN_GPIO175,
202 MFP_PIN_GPIO176,
203 MFP_PIN_GPIO177,
204 MFP_PIN_GPIO178,
205 MFP_PIN_GPIO179,
206 MFP_PIN_GPIO180,
207 MFP_PIN_GPIO181,
208 MFP_PIN_GPIO182,
209 MFP_PIN_GPIO183,
210 MFP_PIN_GPIO184,
211 MFP_PIN_GPIO185,
212 MFP_PIN_GPIO186,
213 MFP_PIN_GPIO187,
214 MFP_PIN_GPIO188,
215 MFP_PIN_GPIO189,
216 MFP_PIN_GPIO190,
217 MFP_PIN_GPIO191,
218
219 MFP_PIN_GPIO255 = 255,
220
153 MFP_PIN_GPIO0_2, 221 MFP_PIN_GPIO0_2,
154 MFP_PIN_GPIO1_2, 222 MFP_PIN_GPIO1_2,
155 MFP_PIN_GPIO2_2, 223 MFP_PIN_GPIO2_2,
@@ -325,8 +393,9 @@ typedef unsigned long mfp_cfg_t;
325#define MFP_PULL_LOW (0x1 << 21) 393#define MFP_PULL_LOW (0x1 << 21)
326#define MFP_PULL_HIGH (0x2 << 21) 394#define MFP_PULL_HIGH (0x2 << 21)
327#define MFP_PULL_BOTH (0x3 << 21) 395#define MFP_PULL_BOTH (0x3 << 21)
328#define MFP_PULL_MASK (0x3 << 21) 396#define MFP_PULL_FLOAT (0x4 << 21)
329#define MFP_PULL(x) (((x) >> 21) & 0x3) 397#define MFP_PULL_MASK (0x7 << 21)
398#define MFP_PULL(x) (((x) >> 21) & 0x7)
330 399
331#define MFP_CFG_DEFAULT (MFP_AF0 | MFP_DS03X | MFP_LPM_DEFAULT |\ 400#define MFP_CFG_DEFAULT (MFP_AF0 | MFP_DS03X | MFP_LPM_DEFAULT |\
332 MFP_LPM_EDGE_NONE | MFP_PULL_NONE) 401 MFP_LPM_EDGE_NONE | MFP_PULL_NONE)
diff --git a/arch/arm/plat-pxa/mfp.c b/arch/arm/plat-pxa/mfp.c
index e716c622a17c..9405d0379c85 100644
--- a/arch/arm/plat-pxa/mfp.c
+++ b/arch/arm/plat-pxa/mfp.c
@@ -77,11 +77,13 @@
77 * MFPR_PULL_LOW 1 0 1 77 * MFPR_PULL_LOW 1 0 1
78 * MFPR_PULL_HIGH 1 1 0 78 * MFPR_PULL_HIGH 1 1 0
79 * MFPR_PULL_BOTH 1 1 1 79 * MFPR_PULL_BOTH 1 1 1
80 * MFPR_PULL_FLOAT 1 0 0
80 */ 81 */
81#define MFPR_PULL_NONE (0) 82#define MFPR_PULL_NONE (0)
82#define MFPR_PULL_LOW (MFPR_PULL_SEL | MFPR_PULLDOWN_EN) 83#define MFPR_PULL_LOW (MFPR_PULL_SEL | MFPR_PULLDOWN_EN)
83#define MFPR_PULL_BOTH (MFPR_PULL_LOW | MFPR_PULLUP_EN) 84#define MFPR_PULL_BOTH (MFPR_PULL_LOW | MFPR_PULLUP_EN)
84#define MFPR_PULL_HIGH (MFPR_PULL_SEL | MFPR_PULLUP_EN) 85#define MFPR_PULL_HIGH (MFPR_PULL_SEL | MFPR_PULLUP_EN)
86#define MFPR_PULL_FLOAT (MFPR_PULL_SEL)
85 87
86/* mfp_spin_lock is used to ensure that MFP register configuration 88/* mfp_spin_lock is used to ensure that MFP register configuration
87 * (most likely a read-modify-write operation) is atomic, and that 89 * (most likely a read-modify-write operation) is atomic, and that
@@ -116,6 +118,7 @@ static const unsigned long mfpr_pull[] = {
116 MFPR_PULL_LOW, 118 MFPR_PULL_LOW,
117 MFPR_PULL_HIGH, 119 MFPR_PULL_HIGH,
118 MFPR_PULL_BOTH, 120 MFPR_PULL_BOTH,
121 MFPR_PULL_FLOAT,
119}; 122};
120 123
121/* mapping of MFP_LPM_EDGE_* definitions to MFPR_EDGE_* register bits */ 124/* mapping of MFP_LPM_EDGE_* definitions to MFPR_EDGE_* register bits */
diff --git a/arch/arm/plat-s3c/gpio.c b/arch/arm/plat-s3c/gpio.c
index 260fdc6ad685..5ff24e0f9f89 100644
--- a/arch/arm/plat-s3c/gpio.c
+++ b/arch/arm/plat-s3c/gpio.c
@@ -28,7 +28,7 @@ static __init void s3c_gpiolib_track(struct s3c_gpio_chip *chip)
28 28
29 gpn = chip->chip.base; 29 gpn = chip->chip.base;
30 for (i = 0; i < chip->chip.ngpio; i++, gpn++) { 30 for (i = 0; i < chip->chip.ngpio; i++, gpn++) {
31 BUG_ON(gpn > ARRAY_SIZE(s3c_gpios)); 31 BUG_ON(gpn >= ARRAY_SIZE(s3c_gpios));
32 s3c_gpios[gpn] = chip; 32 s3c_gpios[gpn] = chip;
33 } 33 }
34} 34}
diff --git a/arch/arm/plat-s3c64xx/dma.c b/arch/arm/plat-s3c64xx/dma.c
index 67aa93dbb69e..266a10745a85 100644
--- a/arch/arm/plat-s3c64xx/dma.c
+++ b/arch/arm/plat-s3c64xx/dma.c
@@ -345,13 +345,13 @@ int s3c2410_dma_enqueue(unsigned int channel, void *id,
345 if (!chan) 345 if (!chan)
346 return -EINVAL; 346 return -EINVAL;
347 347
348 buff = kzalloc(sizeof(struct s3c64xx_dma_buff), GFP_KERNEL); 348 buff = kzalloc(sizeof(struct s3c64xx_dma_buff), GFP_ATOMIC);
349 if (!buff) { 349 if (!buff) {
350 printk(KERN_ERR "%s: no memory for buffer\n", __func__); 350 printk(KERN_ERR "%s: no memory for buffer\n", __func__);
351 return -ENOMEM; 351 return -ENOMEM;
352 } 352 }
353 353
354 lli = dma_pool_alloc(dma_pool, GFP_KERNEL, &buff->lli_dma); 354 lli = dma_pool_alloc(dma_pool, GFP_ATOMIC, &buff->lli_dma);
355 if (!lli) { 355 if (!lli) {
356 printk(KERN_ERR "%s: no memory for lli\n", __func__); 356 printk(KERN_ERR "%s: no memory for lli\n", __func__);
357 ret = -ENOMEM; 357 ret = -ENOMEM;
@@ -697,7 +697,7 @@ static int __init s3c64xx_dma_init(void)
697 697
698 printk(KERN_INFO "%s: Registering DMA channels\n", __func__); 698 printk(KERN_INFO "%s: Registering DMA channels\n", __func__);
699 699
700 dma_pool = dma_pool_create("DMA-LLI", NULL, 32, 16, 0); 700 dma_pool = dma_pool_create("DMA-LLI", NULL, sizeof(struct pl080s_lli), 16, 0);
701 if (!dma_pool) { 701 if (!dma_pool) {
702 printk(KERN_ERR "%s: failed to create pool\n", __func__); 702 printk(KERN_ERR "%s: failed to create pool\n", __func__);
703 return -ENOMEM; 703 return -ENOMEM;
diff --git a/arch/arm/plat-s3c64xx/include/plat/dma-plat.h b/arch/arm/plat-s3c64xx/include/plat/dma-plat.h
index 0c30dd986725..8f76a1e474d6 100644
--- a/arch/arm/plat-s3c64xx/include/plat/dma-plat.h
+++ b/arch/arm/plat-s3c64xx/include/plat/dma-plat.h
@@ -26,7 +26,7 @@ struct s3c64xx_dma_buff {
26 struct s3c64xx_dma_buff *next; 26 struct s3c64xx_dma_buff *next;
27 27
28 void *pw; 28 void *pw;
29 struct pl080_lli *lli; 29 struct pl080s_lli *lli;
30 dma_addr_t lli_dma; 30 dma_addr_t lli_dma;
31}; 31};
32 32
diff --git a/arch/arm/plat-s3c64xx/include/plat/irqs.h b/arch/arm/plat-s3c64xx/include/plat/irqs.h
index 743a70094d04..7956fd3bb194 100644
--- a/arch/arm/plat-s3c64xx/include/plat/irqs.h
+++ b/arch/arm/plat-s3c64xx/include/plat/irqs.h
@@ -194,9 +194,17 @@
194 194
195#define IRQ_EINT_GROUP(group, no) (IRQ_EINT_GROUP##group##_BASE + (no)) 195#define IRQ_EINT_GROUP(group, no) (IRQ_EINT_GROUP##group##_BASE + (no))
196 196
197/* Define a group of interrupts for board-specific use (eg, for MFD
198 * interrupt controllers). */
199#define IRQ_BOARD_START (IRQ_EINT_GROUP9_BASE + IRQ_EINT_GROUP9_NR + 1)
200
201#define IRQ_BOARD_NR 16
202
203#define IRQ_BOARD_END (IRQ_BOARD_START + IRQ_BOARD_NR)
204
197/* Set the default NR_IRQS */ 205/* Set the default NR_IRQS */
198 206
199#define NR_IRQS (IRQ_EINT_GROUP9_BASE + IRQ_EINT_GROUP9_NR + 1) 207#define NR_IRQS (IRQ_BOARD_END + 1)
200 208
201#endif /* __ASM_PLAT_S3C64XX_IRQS_H */ 209#endif /* __ASM_PLAT_S3C64XX_IRQS_H */
202 210
diff --git a/arch/arm/plat-s3c64xx/s3c6400-clock.c b/arch/arm/plat-s3c64xx/s3c6400-clock.c
index febac1950d8e..9745852261e0 100644
--- a/arch/arm/plat-s3c64xx/s3c6400-clock.c
+++ b/arch/arm/plat-s3c64xx/s3c6400-clock.c
@@ -302,8 +302,8 @@ static int s3c64xx_setrate_clksrc(struct clk *clk, unsigned long rate)
302 return -EINVAL; 302 return -EINVAL;
303 303
304 val = __raw_readl(reg); 304 val = __raw_readl(reg);
305 val &= ~(0xf << sclk->shift); 305 val &= ~(0xf << sclk->divider_shift);
306 val |= (div - 1) << sclk->shift; 306 val |= (div - 1) << sclk->divider_shift;
307 __raw_writel(val, reg); 307 __raw_writel(val, reg);
308 308
309 return 0; 309 return 0;
@@ -328,6 +328,8 @@ static int s3c64xx_setparent_clksrc(struct clk *clk, struct clk *parent)
328 clksrc |= src_nr << sclk->shift; 328 clksrc |= src_nr << sclk->shift;
329 329
330 __raw_writel(clksrc, S3C_CLK_SRC); 330 __raw_writel(clksrc, S3C_CLK_SRC);
331
332 clk->parent = parent;
331 return 0; 333 return 0;
332 } 334 }
333 335
@@ -343,7 +345,7 @@ static unsigned long s3c64xx_roundrate_clksrc(struct clk *clk,
343 if (rate > parent_rate) 345 if (rate > parent_rate)
344 rate = parent_rate; 346 rate = parent_rate;
345 else { 347 else {
346 div = rate / parent_rate; 348 div = parent_rate / rate;
347 349
348 if (div == 0) 350 if (div == 0)
349 div = 1; 351 div = 1;
diff --git a/arch/arm/plat-stmp3xxx/dma.c b/arch/arm/plat-stmp3xxx/dma.c
index d2f497764dce..ef88f25fb870 100644
--- a/arch/arm/plat-stmp3xxx/dma.c
+++ b/arch/arm/plat-stmp3xxx/dma.c
@@ -264,7 +264,7 @@ int stmp3xxx_dma_make_chain(int ch, struct stmp37xx_circ_dma_chain *chain,
264 stmp3xxx_dma_free_command(ch, 264 stmp3xxx_dma_free_command(ch,
265 &descriptors 265 &descriptors
266 [i]); 266 [i]);
267 } while (i-- >= 0); 267 } while (i-- > 0);
268 } 268 }
269 return err; 269 return err;
270 } 270 }
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index c8c55b469342..94be7bb6cb9a 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -12,7 +12,7 @@
12# 12#
13# http://www.arm.linux.org.uk/developer/machines/?action=new 13# http://www.arm.linux.org.uk/developer/machines/?action=new
14# 14#
15# Last update: Sat Sep 12 12:00:16 2009 15# Last update: Fri Sep 18 21:42:00 2009
16# 16#
17# machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number 17# machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number
18# 18#
@@ -1638,7 +1638,7 @@ mx35evb MACH_MX35EVB MX35EVB 1643
1638aml_m8050 MACH_AML_M8050 AML_M8050 1644 1638aml_m8050 MACH_AML_M8050 AML_M8050 1644
1639mx35_3ds MACH_MX35_3DS MX35_3DS 1645 1639mx35_3ds MACH_MX35_3DS MX35_3DS 1645
1640mars MACH_MARS MARS 1646 1640mars MACH_MARS MARS 1646
1641ntosd_644xa MACH_NTOSD_644XA NTOSD_644XA 1647 1641neuros_osd2 MACH_NEUROS_OSD2 NEUROS_OSD2 1647
1642badger MACH_BADGER BADGER 1648 1642badger MACH_BADGER BADGER 1648
1643trizeps4wl MACH_TRIZEPS4WL TRIZEPS4WL 1649 1643trizeps4wl MACH_TRIZEPS4WL TRIZEPS4WL 1649
1644trizeps5 MACH_TRIZEPS5 TRIZEPS5 1650 1644trizeps5 MACH_TRIZEPS5 TRIZEPS5 1650
@@ -1654,7 +1654,7 @@ vf10xx MACH_VF10XX VF10XX 1659
1654zoran43xx MACH_ZORAN43XX ZORAN43XX 1660 1654zoran43xx MACH_ZORAN43XX ZORAN43XX 1660
1655sonix926 MACH_SONIX926 SONIX926 1661 1655sonix926 MACH_SONIX926 SONIX926 1661
1656celestialsemi MACH_CELESTIALSEMI CELESTIALSEMI 1662 1656celestialsemi MACH_CELESTIALSEMI CELESTIALSEMI 1662
1657cc9m2443 MACH_CC9M2443 CC9M2443 1663 1657cc9m2443js MACH_CC9M2443JS CC9M2443JS 1663
1658tw5334 MACH_TW5334 TW5334 1664 1658tw5334 MACH_TW5334 TW5334 1664
1659omap_htcartemis MACH_HTCARTEMIS HTCARTEMIS 1665 1659omap_htcartemis MACH_HTCARTEMIS HTCARTEMIS 1665
1660nal_hlite MACH_NAL_HLITE NAL_HLITE 1666 1660nal_hlite MACH_NAL_HLITE NAL_HLITE 1666
@@ -1802,7 +1802,7 @@ ccw9p9215js MACH_CCW9P9215JS CCW9P9215JS 1811
1802rd88f5181l_ge MACH_RD88F5181L_GE RD88F5181L_GE 1812 1802rd88f5181l_ge MACH_RD88F5181L_GE RD88F5181L_GE 1812
1803sifmain MACH_SIFMAIN SIFMAIN 1813 1803sifmain MACH_SIFMAIN SIFMAIN 1813
1804sam9_l9261 MACH_SAM9_L9261 SAM9_L9261 1814 1804sam9_l9261 MACH_SAM9_L9261 SAM9_L9261 1814
1805cc9m2443js MACH_CC9M2443JS CC9M2443JS 1815 1805cc9m2443 MACH_CC9M2443 CC9M2443 1815
1806xaria300 MACH_XARIA300 XARIA300 1816 1806xaria300 MACH_XARIA300 XARIA300 1816
1807it9200 MACH_IT9200 IT9200 1817 1807it9200 MACH_IT9200 IT9200 1817
1808rd88f5181l_fxo MACH_RD88F5181L_FXO RD88F5181L_FXO 1818 1808rd88f5181l_fxo MACH_RD88F5181L_FXO RD88F5181L_FXO 1818
@@ -2409,3 +2409,15 @@ platypus MACH_PLATYPUS PLATYPUS 2422
2409pss2 MACH_PSS2 PSS2 2423 2409pss2 MACH_PSS2 PSS2 2423
2410davinci_apm150 MACH_DAVINCI_APM150 DAVINCI_APM150 2424 2410davinci_apm150 MACH_DAVINCI_APM150 DAVINCI_APM150 2424
2411str9100 MACH_STR9100 STR9100 2425 2411str9100 MACH_STR9100 STR9100 2425
2412net5big MACH_NET5BIG NET5BIG 2426
2413seabed9263 MACH_SEABED9263 SEABED9263 2427
2414mx51_m2id MACH_MX51_M2ID MX51_M2ID 2428
2415octvocplus_eb MACH_OCTVOCPLUS_EB OCTVOCPLUS_EB 2429
2416klk_firefox MACH_KLK_FIREFOX KLK_FIREFOX 2430
2417klk_wirma_module MACH_KLK_WIRMA_MODULE KLK_WIRMA_MODULE 2431
2418klk_wirma_mmi MACH_KLK_WIRMA_MMI KLK_WIRMA_MMI 2432
2419supersonic MACH_SUPERSONIC SUPERSONIC 2433
2420liberty MACH_LIBERTY LIBERTY 2434
2421mh355 MACH_MH355 MH355 2435
2422pc7802 MACH_PC7802 PC7802 2436
2423gnet_sgc MACH_GNET_SGC GNET_SGC 2437
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 21ac7c26079e..ffd90fbbc8f9 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -96,8 +96,7 @@ SECTIONS
96 { 96 {
97 __sdata = .; 97 __sdata = .;
98 /* This gets done first, so the glob doesn't suck it in */ 98 /* This gets done first, so the glob doesn't suck it in */
99 . = ALIGN(32); 99 CACHELINE_ALIGNED_DATA(32)
100 *(.data.cacheline_aligned)
101 100
102#if !L1_DATA_A_LENGTH 101#if !L1_DATA_A_LENGTH
103 . = ALIGN(32); 102 . = ALIGN(32);
@@ -116,12 +115,7 @@ SECTIONS
116 DATA_DATA 115 DATA_DATA
117 CONSTRUCTORS 116 CONSTRUCTORS
118 117
119 /* make sure the init_task is aligned to the 118 INIT_TASK_DATA(THREAD_SIZE)
120 * kernel thread size so we can locate the kernel
121 * stack properly and quickly.
122 */
123 . = ALIGN(THREAD_SIZE);
124 *(.init_task.data)
125 119
126 __edata = .; 120 __edata = .;
127 } 121 }
@@ -134,39 +128,10 @@ SECTIONS
134 . = ALIGN(PAGE_SIZE); 128 . = ALIGN(PAGE_SIZE);
135 ___init_begin = .; 129 ___init_begin = .;
136 130
137 .init.text : 131 INIT_TEXT_SECTION(PAGE_SIZE)
138 { 132 . = ALIGN(16);
139 . = ALIGN(PAGE_SIZE); 133 INIT_DATA_SECTION(16)
140 __sinittext = .;
141 INIT_TEXT
142 __einittext = .;
143 }
144 .init.data :
145 {
146 . = ALIGN(16);
147 INIT_DATA
148 }
149 .init.setup :
150 {
151 . = ALIGN(16);
152 ___setup_start = .;
153 *(.init.setup)
154 ___setup_end = .;
155 }
156 .initcall.init :
157 {
158 ___initcall_start = .;
159 INITCALLS
160 ___initcall_end = .;
161 }
162 .con_initcall.init :
163 {
164 ___con_initcall_start = .;
165 *(.con_initcall.init)
166 ___con_initcall_end = .;
167 }
168 PERCPU(4) 134 PERCPU(4)
169 SECURITY_INIT
170 135
171 /* we have to discard exit text and such at runtime, not link time, to 136 /* we have to discard exit text and such at runtime, not link time, to
172 * handle embedded cross-section references (alt instructions, bug 137 * handle embedded cross-section references (alt instructions, bug
@@ -181,18 +146,9 @@ SECTIONS
181 EXIT_DATA 146 EXIT_DATA
182 } 147 }
183 148
184 .init.ramfs :
185 {
186 . = ALIGN(4);
187 ___initramfs_start = .;
188 *(.init.ramfs)
189 . = ALIGN(4);
190 ___initramfs_end = .;
191 }
192
193 __l1_lma_start = .; 149 __l1_lma_start = .;
194 150
195 .text_l1 L1_CODE_START : AT(LOADADDR(.init.ramfs) + SIZEOF(.init.ramfs)) 151 .text_l1 L1_CODE_START : AT(LOADADDR(.exit.data) + SIZEOF(.exit.data))
196 { 152 {
197 . = ALIGN(4); 153 . = ALIGN(4);
198 __stext_l1 = .; 154 __stext_l1 = .;
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 2b73c7a5b649..31ca1418d5a7 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -28,7 +28,6 @@
28 28
29extern void update_xtime_from_cmos(void); 29extern void update_xtime_from_cmos(void);
30extern int set_rtc_mmss(unsigned long nowtime); 30extern int set_rtc_mmss(unsigned long nowtime);
31extern int setup_irq(int, struct irqaction *);
32extern int have_rtc; 31extern int have_rtc;
33 32
34unsigned long get_ns_in_jiffie(void) 33unsigned long get_ns_in_jiffie(void)
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index d2a3ff8c4d37..058adddf4e4b 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -52,8 +52,6 @@ static struct mm_struct* flush_mm;
52static struct vm_area_struct* flush_vma; 52static struct vm_area_struct* flush_vma;
53static unsigned long flush_addr; 53static unsigned long flush_addr;
54 54
55extern int setup_irq(int, struct irqaction *);
56
57/* Mode registers */ 55/* Mode registers */
58static unsigned long irq_regs[NR_CPUS] = { 56static unsigned long irq_regs[NR_CPUS] = {
59 regi_irq, 57 regi_irq,
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index 65633d0dab86..b1920d8de403 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -46,7 +46,6 @@ unsigned long timer_regs[NR_CPUS] =
46 46
47extern void update_xtime_from_cmos(void); 47extern void update_xtime_from_cmos(void);
48extern int set_rtc_mmss(unsigned long nowtime); 48extern int set_rtc_mmss(unsigned long nowtime);
49extern int setup_irq(int, struct irqaction *);
50extern int have_rtc; 49extern int have_rtc;
51 50
52#ifdef CONFIG_CPU_FREQ 51#ifdef CONFIG_CPU_FREQ
diff --git a/arch/cris/arch-v32/mach-a3/io.c b/arch/cris/arch-v32/mach-a3/io.c
index c22f67ecd9f3..090ceb99ef0b 100644
--- a/arch/cris/arch-v32/mach-a3/io.c
+++ b/arch/cris/arch-v32/mach-a3/io.c
@@ -36,7 +36,7 @@ struct crisv32_ioport crisv32_ioports[] = {
36 }, 36 },
37}; 37};
38 38
39#define NBR_OF_PORTS sizeof(crisv32_ioports)/sizeof(struct crisv32_ioport) 39#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports)
40 40
41struct crisv32_iopin crisv32_led_net0_green; 41struct crisv32_iopin crisv32_led_net0_green;
42struct crisv32_iopin crisv32_led_net0_red; 42struct crisv32_iopin crisv32_led_net0_red;
diff --git a/arch/cris/arch-v32/mach-fs/io.c b/arch/cris/arch-v32/mach-fs/io.c
index cb6327b1f8f8..a6958661fa8e 100644
--- a/arch/cris/arch-v32/mach-fs/io.c
+++ b/arch/cris/arch-v32/mach-fs/io.c
@@ -52,7 +52,7 @@ struct crisv32_ioport crisv32_ioports[] = {
52 } 52 }
53}; 53};
54 54
55#define NBR_OF_PORTS sizeof(crisv32_ioports)/sizeof(struct crisv32_ioport) 55#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports)
56 56
57struct crisv32_iopin crisv32_led_net0_green; 57struct crisv32_iopin crisv32_led_net0_green;
58struct crisv32_iopin crisv32_led_net0_red; 58struct crisv32_iopin crisv32_led_net0_red;
diff --git a/arch/cris/include/arch-v10/arch/mmu.h b/arch/cris/include/arch-v10/arch/mmu.h
index df84f1716e6b..e829e5a37bbe 100644
--- a/arch/cris/include/arch-v10/arch/mmu.h
+++ b/arch/cris/include/arch-v10/arch/mmu.h
@@ -33,10 +33,10 @@ typedef struct
33 33
34/* CRIS PTE bits (see R_TLB_LO in the register description) 34/* CRIS PTE bits (see R_TLB_LO in the register description)
35 * 35 *
36 * Bit: 31-13 12-------4 3 2 1 0 36 * Bit: 31 30-13 12-------4 3 2 1 0
37 * ________________________________________________ 37 * _______________________________________________________
38 * | pfn | reserved | global | valid | kernel | we | 38 * | cache |pfn | reserved | global | valid | kernel | we |
39 * |_____|__________|________|_______|________|_____| 39 * |_______|____|__________|________|_______|________|_____|
40 * 40 *
41 * (pfn = physical frame number) 41 * (pfn = physical frame number)
42 */ 42 */
@@ -53,6 +53,7 @@ typedef struct
53#define _PAGE_VALID (1<<2) /* page is valid */ 53#define _PAGE_VALID (1<<2) /* page is valid */
54#define _PAGE_SILENT_READ (1<<2) /* synonym */ 54#define _PAGE_SILENT_READ (1<<2) /* synonym */
55#define _PAGE_GLOBAL (1<<3) /* global page - context is ignored */ 55#define _PAGE_GLOBAL (1<<3) /* global page - context is ignored */
56#define _PAGE_NO_CACHE (1<<31) /* part of the uncached memory map */
56 57
57/* Bits the HW doesn't care about but the kernel uses them in SW */ 58/* Bits the HW doesn't care about but the kernel uses them in SW */
58 59
diff --git a/arch/cris/include/arch-v32/arch/mmu.h b/arch/cris/include/arch-v32/arch/mmu.h
index 6bcdc3fdf7dc..c1a13e05e963 100644
--- a/arch/cris/include/arch-v32/arch/mmu.h
+++ b/arch/cris/include/arch-v32/arch/mmu.h
@@ -28,10 +28,10 @@ typedef struct
28/* 28/*
29 * CRISv32 PTE bits: 29 * CRISv32 PTE bits:
30 * 30 *
31 * Bit: 31-13 12-5 4 3 2 1 0 31 * Bit: 31 30-13 12-5 4 3 2 1 0
32 * +-----+------+--------+-------+--------+-------+---------+ 32 * +-------+-----+------+--------+-------+--------+-------+---------+
33 * | pfn | zero | global | valid | kernel | write | execute | 33 * | cache | pfn | zero | global | valid | kernel | write | execute |
34 * +-----+------+--------+-------+--------+-------+---------+ 34 * +-------+-----+------+--------+-------+--------+-------+---------+
35 */ 35 */
36 36
37/* 37/*
@@ -45,6 +45,8 @@ typedef struct
45#define _PAGE_VALID (1 << 3) /* Page is valid. */ 45#define _PAGE_VALID (1 << 3) /* Page is valid. */
46#define _PAGE_SILENT_READ (1 << 3) /* Same as above. */ 46#define _PAGE_SILENT_READ (1 << 3) /* Same as above. */
47#define _PAGE_GLOBAL (1 << 4) /* Global page. */ 47#define _PAGE_GLOBAL (1 << 4) /* Global page. */
48#define _PAGE_NO_CACHE (1 << 31) /* part of the uncached memory map */
49
48 50
49/* 51/*
50 * The hardware doesn't care about these bits, but the kernel uses them in 52 * The hardware doesn't care about these bits, but the kernel uses them in
diff --git a/arch/cris/include/asm/hardirq.h b/arch/cris/include/asm/hardirq.h
index 74178adeb1cd..17bb12d760b2 100644
--- a/arch/cris/include/asm/hardirq.h
+++ b/arch/cris/include/asm/hardirq.h
@@ -2,16 +2,6 @@
2#define __ASM_HARDIRQ_H 2#define __ASM_HARDIRQ_H
3 3
4#include <asm/irq.h> 4#include <asm/irq.h>
5#include <linux/threads.h>
6#include <linux/cache.h>
7
8typedef struct {
9 unsigned int __softirq_pending;
10} ____cacheline_aligned irq_cpustat_t;
11
12#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
13
14void ack_bad_irq(unsigned int irq);
15 5
16#define HARDIRQ_BITS 8 6#define HARDIRQ_BITS 8
17 7
@@ -24,4 +14,6 @@ void ack_bad_irq(unsigned int irq);
24# error HARDIRQ_BITS is too low! 14# error HARDIRQ_BITS is too low!
25#endif 15#endif
26 16
17#include <asm-generic/hardirq.h>
18
27#endif /* __ASM_HARDIRQ_H */ 19#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h
index 50aa974aa834..1fcce00f01f4 100644
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -197,6 +197,8 @@ static inline pte_t __mk_pte(void * page, pgprot_t pgprot)
197static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 197static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
198{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; } 198{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }
199 199
200#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) | _PAGE_NO_CACHE))
201
200 202
201/* pte_val refers to a page in the 0x4xxxxxxx physical DRAM interval 203/* pte_val refers to a page in the 0x4xxxxxxx physical DRAM interval
202 * __pte_page(pte_val) refers to the "virtual" DRAM interval 204 * __pte_page(pte_val) refers to the "virtual" DRAM interval
diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c
index 7f642fcffbfc..0ca7d9892cc6 100644
--- a/arch/cris/kernel/irq.c
+++ b/arch/cris/kernel/irq.c
@@ -38,11 +38,6 @@
38 38
39#include <asm/io.h> 39#include <asm/io.h>
40 40
41void ack_bad_irq(unsigned int irq)
42{
43 printk("unexpected IRQ trap at vector %02x\n", irq);
44}
45
46int show_interrupts(struct seq_file *p, void *v) 41int show_interrupts(struct seq_file *p, void *v)
47{ 42{
48 int i = *(loff_t *) v, j; 43 int i = *(loff_t *) v, j;
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index 6c81836b9229..bbfda67d2907 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -51,10 +51,7 @@ SECTIONS
51 _etext = . ; /* End of text section. */ 51 _etext = . ; /* End of text section. */
52 __etext = .; 52 __etext = .;
53 53
54 . = ALIGN(4); /* Exception table. */ 54 EXCEPTION_TABLE(4)
55 __start___ex_table = .;
56 __ex_table : { *(__ex_table) }
57 __stop___ex_table = .;
58 55
59 RODATA 56 RODATA
60 57
@@ -67,36 +64,24 @@ SECTIONS
67 __edata = . ; /* End of data section. */ 64 __edata = . ; /* End of data section. */
68 _edata = . ; 65 _edata = . ;
69 66
70 . = ALIGN(PAGE_SIZE); /* init_task and stack, must be aligned. */ 67 INIT_TASK_DATA_SECTION(PAGE_SIZE)
71 .data.init_task : { *(.data.init_task) }
72 68
73 . = ALIGN(PAGE_SIZE); /* Init code and data. */ 69 . = ALIGN(PAGE_SIZE); /* Init code and data. */
74 __init_begin = .; 70 __init_begin = .;
75 .init.text : { 71 INIT_TEXT_SECTION(PAGE_SIZE)
76 _sinittext = .;
77 INIT_TEXT
78 _einittext = .;
79 }
80 .init.data : { INIT_DATA } 72 .init.data : { INIT_DATA }
81 . = ALIGN(16); 73 .init.setup : { INIT_SETUP(16) }
82 __setup_start = .;
83 .init.setup : { *(.init.setup) }
84 __setup_end = .;
85#ifdef CONFIG_ETRAX_ARCH_V32 74#ifdef CONFIG_ETRAX_ARCH_V32
86 __start___param = .; 75 __start___param = .;
87 __param : { *(__param) } 76 __param : { *(__param) }
88 __stop___param = .; 77 __stop___param = .;
89#endif 78#endif
90 .initcall.init : { 79 .initcall.init : {
91 __initcall_start = .; 80 INIT_CALLS
92 INITCALLS
93 __initcall_end = .;
94 } 81 }
95 82
96 .con_initcall.init : { 83 .con_initcall.init : {
97 __con_initcall_start = .; 84 CON_INITCALL
98 *(.con_initcall.init)
99 __con_initcall_end = .;
100 } 85 }
101 SECURITY_INIT 86 SECURITY_INIT
102 87
@@ -114,9 +99,7 @@ SECTIONS
114 PERCPU(PAGE_SIZE) 99 PERCPU(PAGE_SIZE)
115 100
116 .init.ramfs : { 101 .init.ramfs : {
117 __initramfs_start = .; 102 INIT_RAM_FS
118 *(.init.ramfs)
119 __initramfs_end = .;
120 } 103 }
121#endif 104#endif
122 105
@@ -130,11 +113,7 @@ SECTIONS
130 __init_end = .; 113 __init_end = .;
131 114
132 __data_end = . ; /* Move to _edata ? */ 115 __data_end = . ; /* Move to _edata ? */
133 __bss_start = .; /* BSS. */ 116 BSS_SECTION(0, 0, 0)
134 .bss : {
135 *(COMMON)
136 *(.bss)
137 }
138 117
139 . = ALIGN (0x20); 118 . = ALIGN (0x20);
140 _end = .; 119 _end = .;
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index be722fc1acff..0d4d3e3a4cfc 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len)
150/* 150/*
151 * Send us to sleep. 151 * Send us to sleep.
152 */ 152 */
153static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp, 153static int sysctl_pm_do_suspend(ctl_table *ctl, int write,
154 void __user *buffer, size_t *lenp, loff_t *fpos) 154 void __user *buffer, size_t *lenp, loff_t *fpos)
155{ 155{
156 int retval, mode; 156 int retval, mode;
@@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode)
198} 198}
199 199
200 200
201static int cmode_procctl(ctl_table *ctl, int write, struct file *filp, 201static int cmode_procctl(ctl_table *ctl, int write,
202 void __user *buffer, size_t *lenp, loff_t *fpos) 202 void __user *buffer, size_t *lenp, loff_t *fpos)
203{ 203{
204 int new_cmode; 204 int new_cmode;
205 205
206 if (!write) 206 if (!write)
207 return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); 207 return proc_dointvec(ctl, write, buffer, lenp, fpos);
208 208
209 new_cmode = user_atoi(buffer, *lenp); 209 new_cmode = user_atoi(buffer, *lenp);
210 210
@@ -301,13 +301,13 @@ static int try_set_cm(int new_cm)
301 return 0; 301 return 0;
302} 302}
303 303
304static int p0_procctl(ctl_table *ctl, int write, struct file *filp, 304static int p0_procctl(ctl_table *ctl, int write,
305 void __user *buffer, size_t *lenp, loff_t *fpos) 305 void __user *buffer, size_t *lenp, loff_t *fpos)
306{ 306{
307 int new_p0; 307 int new_p0;
308 308
309 if (!write) 309 if (!write)
310 return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); 310 return proc_dointvec(ctl, write, buffer, lenp, fpos);
311 311
312 new_p0 = user_atoi(buffer, *lenp); 312 new_p0 = user_atoi(buffer, *lenp);
313 313
@@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table,
345 return 1; 345 return 1;
346} 346}
347 347
348static int cm_procctl(ctl_table *ctl, int write, struct file *filp, 348static int cm_procctl(ctl_table *ctl, int write,
349 void __user *buffer, size_t *lenp, loff_t *fpos) 349 void __user *buffer, size_t *lenp, loff_t *fpos)
350{ 350{
351 int new_cm; 351 int new_cm;
352 352
353 if (!write) 353 if (!write)
354 return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); 354 return proc_dointvec(ctl, write, buffer, lenp, fpos);
355 355
356 new_cm = user_atoi(buffer, *lenp); 356 new_cm = user_atoi(buffer, *lenp);
357 357
diff --git a/arch/frv/mb93090-mb00/Makefile b/arch/frv/mb93090-mb00/Makefile
index 76595e870733..b73b542f8f48 100644
--- a/arch/frv/mb93090-mb00/Makefile
+++ b/arch/frv/mb93090-mb00/Makefile
@@ -11,3 +11,5 @@ else
11obj-y += pci-dma-nommu.o 11obj-y += pci-dma-nommu.o
12endif 12endif
13endif 13endif
14
15obj-$(CONFIG_MTD) += flash.o
diff --git a/arch/frv/mb93090-mb00/flash.c b/arch/frv/mb93090-mb00/flash.c
new file mode 100644
index 000000000000..c0e3707c2299
--- /dev/null
+++ b/arch/frv/mb93090-mb00/flash.c
@@ -0,0 +1,90 @@
1/* Flash mappings for the MB93090-MB00 motherboard
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/init.h>
13#include <linux/platform_device.h>
14#include <linux/mtd/partitions.h>
15#include <linux/mtd/physmap.h>
16
17#define MB93090_BOOTROM_ADDR 0xFF000000 /* Boot ROM */
18#define MB93090_BOOTROM_SIZE (2 * 1024 * 1024)
19#define MB93090_USERROM_ADDR 0xFF200000 /* User ROM */
20#define MB93090_USERROM_SIZE (2 * 1024 * 1024)
21
22/*
23 * default MTD partition table for both main flash devices, expected to be
24 * overridden by RedBoot
25 */
26static struct mtd_partition mb93090_partitions[] = {
27 {
28 .name = "Filesystem",
29 .size = MTDPART_SIZ_FULL,
30 .offset = 0,
31 }
32};
33
34/*
35 * Definition of the MB93090 Boot ROM (on the CPU card)
36 */
37static struct physmap_flash_data mb93090_bootrom_data = {
38 .width = 2,
39 .nr_parts = 2,
40 .parts = mb93090_partitions,
41};
42
43static struct resource mb93090_bootrom_resource = {
44 .start = MB93090_BOOTROM_ADDR,
45 .end = MB93090_BOOTROM_ADDR + MB93090_BOOTROM_SIZE - 1,
46 .flags = IORESOURCE_MEM,
47};
48
49static struct platform_device mb93090_bootrom = {
50 .name = "physmap-flash",
51 .id = 0,
52 .dev.platform_data = &mb93090_bootrom_data,
53 .num_resources = 1,
54 .resource = &mb93090_bootrom_resource,
55};
56
57/*
58 * Definition of the MB93090 User ROM definition (on the motherboard)
59 */
60static struct physmap_flash_data mb93090_userrom_data = {
61 .width = 2,
62 .nr_parts = 2,
63 .parts = mb93090_partitions,
64};
65
66static struct resource mb93090_userrom_resource = {
67 .start = MB93090_USERROM_ADDR,
68 .end = MB93090_USERROM_ADDR + MB93090_USERROM_SIZE - 1,
69 .flags = IORESOURCE_MEM,
70};
71
72static struct platform_device mb93090_userrom = {
73 .name = "physmap-flash",
74 .id = 1,
75 .dev.platform_data = &mb93090_userrom_data,
76 .num_resources = 1,
77 .resource = &mb93090_userrom_resource,
78};
79
80/*
81 * register the MB93090 flashes
82 */
83static int __init mb93090_mtd_init(void)
84{
85 platform_device_register(&mb93090_bootrom);
86 platform_device_register(&mb93090_userrom);
87 return 0;
88}
89
90module_init(mb93090_mtd_init);
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 662b02ecb86e..b9e24907e6ea 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -1,5 +1,6 @@
1#define VMLINUX_SYMBOL(_sym_) _##_sym_ 1#define VMLINUX_SYMBOL(_sym_) _##_sym_
2#include <asm-generic/vmlinux.lds.h> 2#include <asm-generic/vmlinux.lds.h>
3#include <asm/page.h>
3 4
4/* target memory map */ 5/* target memory map */
5#ifdef CONFIG_H8300H_GENERIC 6#ifdef CONFIG_H8300H_GENERIC
@@ -79,11 +80,8 @@ SECTIONS
79 SCHED_TEXT 80 SCHED_TEXT
80 LOCK_TEXT 81 LOCK_TEXT
81 __etext = . ; 82 __etext = . ;
82 . = ALIGN(16); /* Exception table */
83 ___start___ex_table = .;
84 *(__ex_table)
85 ___stop___ex_table = .;
86 } 83 }
84 EXCEPTION_TABLE(16)
87 85
88 RODATA 86 RODATA
89#if defined(CONFIG_ROMKERNEL) 87#if defined(CONFIG_ROMKERNEL)
@@ -100,8 +98,7 @@ SECTIONS
100 __sdata = . ; 98 __sdata = . ;
101 ___data_start = . ; 99 ___data_start = . ;
102 100
103 . = ALIGN(0x2000) ; 101 INIT_TASK_DATA(0x2000)
104 *(.data.init_task)
105 . = ALIGN(0x4) ; 102 . = ALIGN(0x4) ;
106 DATA_DATA 103 DATA_DATA
107 . = ALIGN(0x4) ; 104 . = ALIGN(0x4) ;
@@ -114,24 +111,16 @@ SECTIONS
114 __einittext = .; 111 __einittext = .;
115 INIT_DATA 112 INIT_DATA
116 . = ALIGN(0x4) ; 113 . = ALIGN(0x4) ;
114 INIT_SETUP(0x4)
117 ___setup_start = .; 115 ___setup_start = .;
118 *(.init.setup) 116 *(.init.setup)
119 . = ALIGN(0x4) ; 117 . = ALIGN(0x4) ;
120 ___setup_end = .; 118 ___setup_end = .;
121 ___initcall_start = .; 119 INIT_CALLS
122 INITCALLS 120 CON_INITCALL
123 ___initcall_end = .;
124 ___con_initcall_start = .;
125 *(.con_initcall.init)
126 ___con_initcall_end = .;
127 EXIT_TEXT 121 EXIT_TEXT
128 EXIT_DATA 122 EXIT_DATA
129#if defined(CONFIG_BLK_DEV_INITRD) 123 INIT_RAM_FS
130 . = ALIGN(4);
131 ___initramfs_start = .;
132 *(.init.ramfs)
133 ___initramfs_end = .;
134#endif
135 . = ALIGN(0x4) ; 124 . = ALIGN(0x4) ;
136 ___init_end = .; 125 ___init_end = .;
137 __edata = . ; 126 __edata = . ;
diff --git a/arch/m32r/include/asm/page.h b/arch/m32r/include/asm/page.h
index 11777f7a5628..725ede8f2889 100644
--- a/arch/m32r/include/asm/page.h
+++ b/arch/m32r/include/asm/page.h
@@ -1,9 +1,11 @@
1#ifndef _ASM_M32R_PAGE_H 1#ifndef _ASM_M32R_PAGE_H
2#define _ASM_M32R_PAGE_H 2#define _ASM_M32R_PAGE_H
3 3
4#include <linux/const.h>
5
4/* PAGE_SHIFT determines the page size */ 6/* PAGE_SHIFT determines the page size */
5#define PAGE_SHIFT 12 7#define PAGE_SHIFT 12
6#define PAGE_SIZE (1UL << PAGE_SHIFT) 8#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
7#define PAGE_MASK (~(PAGE_SIZE-1)) 9#define PAGE_MASK (~(PAGE_SIZE-1))
8 10
9#ifndef __ASSEMBLY__ 11#ifndef __ASSEMBLY__
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 1a997fc148a2..8397c249989b 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -140,8 +140,6 @@ unsigned long get_wchan(struct task_struct *p);
140#define KSTK_EIP(tsk) ((tsk)->thread.lr) 140#define KSTK_EIP(tsk) ((tsk)->thread.lr)
141#define KSTK_ESP(tsk) ((tsk)->thread.sp) 141#define KSTK_ESP(tsk) ((tsk)->thread.sp)
142 142
143#define THREAD_SIZE (2*PAGE_SIZE)
144
145#define cpu_relax() barrier() 143#define cpu_relax() barrier()
146 144
147#endif /* _ASM_M32R_PROCESSOR_H */ 145#endif /* _ASM_M32R_PROCESSOR_H */
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 71578151a403..ed240b6e8e77 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -55,6 +55,8 @@ struct thread_info {
55 55
56#define PREEMPT_ACTIVE 0x10000000 56#define PREEMPT_ACTIVE 0x10000000
57 57
58#define THREAD_SIZE (PAGE_SIZE << 1)
59
58/* 60/*
59 * macros/functions for gaining access to the thread information structure 61 * macros/functions for gaining access to the thread information structure
60 */ 62 */
@@ -76,8 +78,6 @@ struct thread_info {
76#define init_thread_info (init_thread_union.thread_info) 78#define init_thread_info (init_thread_union.thread_info)
77#define init_stack (init_thread_union.stack) 79#define init_stack (init_thread_union.stack)
78 80
79#define THREAD_SIZE (2*PAGE_SIZE)
80
81/* how to get the thread information struct from C */ 81/* how to get the thread information struct from C */
82static inline struct thread_info *current_thread_info(void) 82static inline struct thread_info *current_thread_info(void)
83{ 83{
@@ -125,17 +125,6 @@ static inline unsigned int get_thread_fault_code(void)
125 return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT; 125 return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT;
126} 126}
127 127
128#else /* !__ASSEMBLY__ */
129
130#define THREAD_SIZE 8192
131
132/* how to get the thread information struct from ASM */
133#define GET_THREAD_INFO(reg) GET_THREAD_INFO reg
134 .macro GET_THREAD_INFO reg
135 ldi \reg, #-THREAD_SIZE
136 and \reg, sp
137 .endm
138
139#endif 128#endif
140 129
141/* 130/*
diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S
index 612d35b082a6..403869833b98 100644
--- a/arch/m32r/kernel/entry.S
+++ b/arch/m32r/kernel/entry.S
@@ -118,6 +118,13 @@
118#define resume_kernel restore_all 118#define resume_kernel restore_all
119#endif 119#endif
120 120
121/* how to get the thread information struct from ASM */
122#define GET_THREAD_INFO(reg) GET_THREAD_INFO reg
123 .macro GET_THREAD_INFO reg
124 ldi \reg, #-THREAD_SIZE
125 and \reg, sp
126 .endm
127
121ENTRY(ret_from_fork) 128ENTRY(ret_from_fork)
122 pop r0 129 pop r0
123 bl schedule_tail 130 bl schedule_tail
diff --git a/arch/m32r/kernel/head.S b/arch/m32r/kernel/head.S
index 0a7194439eb1..a46652dd83e6 100644
--- a/arch/m32r/kernel/head.S
+++ b/arch/m32r/kernel/head.S
@@ -268,13 +268,13 @@ ENTRY(empty_zero_page)
268/*------------------------------------------------------------------------ 268/*------------------------------------------------------------------------
269 * Stack area 269 * Stack area
270 */ 270 */
271 .section .spi 271 .section .init.data, "aw"
272 ALIGN 272 ALIGN
273 .global spi_stack_top 273 .global spi_stack_top
274 .zero 1024 274 .zero 1024
275spi_stack_top: 275spi_stack_top:
276 276
277 .section .spu 277 .section .init.data, "aw"
278 ALIGN 278 ALIGN
279 .global spu_stack_top 279 .global spu_stack_top
280 .zero 1024 280 .zero 1024
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index de5e21cca6a5..8ceb6181d805 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -4,6 +4,7 @@
4#include <asm-generic/vmlinux.lds.h> 4#include <asm-generic/vmlinux.lds.h>
5#include <asm/addrspace.h> 5#include <asm/addrspace.h>
6#include <asm/page.h> 6#include <asm/page.h>
7#include <asm/thread_info.h>
7 8
8OUTPUT_ARCH(m32r) 9OUTPUT_ARCH(m32r)
9#if defined(__LITTLE_ENDIAN__) 10#if defined(__LITTLE_ENDIAN__)
@@ -40,83 +41,22 @@ SECTIONS
40#endif 41#endif
41 _etext = .; /* End of text section */ 42 _etext = .; /* End of text section */
42 43
43 . = ALIGN(16); /* Exception table */ 44 EXCEPTION_TABLE(16)
44 __start___ex_table = .;
45 __ex_table : { *(__ex_table) }
46 __stop___ex_table = .;
47
48 RODATA 45 RODATA
49 46 RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
50 /* writeable */
51 .data : { /* Data */
52 *(.spu)
53 *(.spi)
54 DATA_DATA
55 CONSTRUCTORS
56 }
57
58 . = ALIGN(4096);
59 __nosave_begin = .;
60 .data_nosave : { *(.data.nosave) }
61 . = ALIGN(4096);
62 __nosave_end = .;
63
64 . = ALIGN(32);
65 .data.cacheline_aligned : { *(.data.cacheline_aligned) }
66
67 _edata = .; /* End of data section */ 47 _edata = .; /* End of data section */
68 48
69 . = ALIGN(8192); /* init_task */
70 .data.init_task : { *(.data.init_task) }
71
72 /* will be freed after init */ 49 /* will be freed after init */
73 . = ALIGN(4096); /* Init code and data */ 50 . = ALIGN(PAGE_SIZE); /* Init code and data */
74 __init_begin = .; 51 __init_begin = .;
75 .init.text : { 52 INIT_TEXT_SECTION(PAGE_SIZE)
76 _sinittext = .; 53 INIT_DATA_SECTION(16)
77 INIT_TEXT 54 PERCPU(PAGE_SIZE)
78 _einittext = .; 55 . = ALIGN(PAGE_SIZE);
79 }
80 .init.data : { INIT_DATA }
81 . = ALIGN(16);
82 __setup_start = .;
83 .init.setup : { *(.init.setup) }
84 __setup_end = .;
85 __initcall_start = .;
86 .initcall.init : {
87 INITCALLS
88 }
89 __initcall_end = .;
90 __con_initcall_start = .;
91 .con_initcall.init : { *(.con_initcall.init) }
92 __con_initcall_end = .;
93 SECURITY_INIT
94 . = ALIGN(4);
95 __alt_instructions = .;
96 .altinstructions : { *(.altinstructions) }
97 __alt_instructions_end = .;
98 .altinstr_replacement : { *(.altinstr_replacement) }
99 /* .exit.text is discard at runtime, not link time, to deal with references
100 from .altinstructions and .eh_frame */
101 .exit.text : { EXIT_TEXT }
102 .exit.data : { EXIT_DATA }
103
104#ifdef CONFIG_BLK_DEV_INITRD
105 . = ALIGN(4096);
106 __initramfs_start = .;
107 .init.ramfs : { *(.init.ramfs) }
108 __initramfs_end = .;
109#endif
110
111 PERCPU(4096)
112 . = ALIGN(4096);
113 __init_end = .; 56 __init_end = .;
114 /* freed after init ends here */ 57 /* freed after init ends here */
115 58
116 __bss_start = .; /* BSS */ 59 BSS_SECTION(0, 0, 4)
117 .bss : { *(.bss) }
118 . = ALIGN(4);
119 __bss_stop = .;
120 60
121 _end = . ; 61 _end = . ;
122 62
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 2db722d80d4d..bbd8327f1890 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -6,6 +6,7 @@ mainmenu "Linux/Microblaze Kernel Configuration"
6config MICROBLAZE 6config MICROBLAZE
7 def_bool y 7 def_bool y
8 select HAVE_LMB 8 select HAVE_LMB
9 select USB_ARCH_HAS_EHCI
9 select ARCH_WANT_OPTIONAL_GPIOLIB 10 select ARCH_WANT_OPTIONAL_GPIOLIB
10 11
11config SWAP 12config SWAP
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 8439598d4655..34187354304a 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -37,12 +37,12 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
37CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) 37CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
38 38
39# r31 holds current when in kernel mode 39# r31 holds current when in kernel mode
40KBUILD_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2) 40KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
41 41
42LDFLAGS := 42LDFLAGS :=
43LDFLAGS_vmlinux := 43LDFLAGS_vmlinux :=
44 44
45LIBGCC := $(shell $(CC) $(KBUILD_KERNEL) -print-libgcc-file-name) 45LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
46 46
47head-y := arch/microblaze/kernel/head.o 47head-y := arch/microblaze/kernel/head.o
48libs-y += arch/microblaze/lib/ 48libs-y += arch/microblaze/lib/
@@ -53,22 +53,41 @@ core-y += arch/microblaze/platform/
53 53
54boot := arch/microblaze/boot 54boot := arch/microblaze/boot
55 55
56# Are we making a simpleImage.<boardname> target? If so, crack out the boardname
57DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
58
59ifneq ($(DTB),)
60 core-y += $(boot)/
61endif
62
56# defines filename extension depending memory management type 63# defines filename extension depending memory management type
57ifeq ($(CONFIG_MMU),) 64ifeq ($(CONFIG_MMU),)
58MMU := -nommu 65MMU := -nommu
59endif 66endif
60 67
61export MMU 68export MMU DTB
62 69
63all: linux.bin 70all: linux.bin
64 71
72BOOT_TARGETS = linux.bin linux.bin.gz simpleImage.%
73
65archclean: 74archclean:
66 $(Q)$(MAKE) $(clean)=$(boot) 75 $(Q)$(MAKE) $(clean)=$(boot)
67 76
68linux.bin linux.bin.gz: vmlinux 77$(BOOT_TARGETS): vmlinux
69 $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ 78 $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
70 79
71define archhelp 80define archhelp
72 echo '* linux.bin - Create raw binary' 81 echo '* linux.bin - Create raw binary'
73 echo ' linux.bin.gz - Create compressed raw binary' 82 echo ' linux.bin.gz - Create compressed raw binary'
83 echo ' simpleImage.<dt> - ELF image with $(arch)/boot/dts/<dt>.dts linked in'
84 echo ' - stripped elf with fdt blob
85 echo ' simpleImage.<dt>.unstrip - full ELF image with fdt blob'
86 echo ' *_defconfig - Select default config from arch/microblaze/configs'
87 echo ''
88 echo ' Targets with <dt> embed a device tree blob inside the image'
89 echo ' These targets support board with firmware that does not'
90 echo ' support passing a device tree directly. Replace <dt> with the'
91 echo ' name of a dts file from the arch/microblaze/boot/dts/ directory'
92 echo ' (minus the .dts extension).'
74endef 93endef
diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile
index c2bb043a029d..21f13322a4ca 100644
--- a/arch/microblaze/boot/Makefile
+++ b/arch/microblaze/boot/Makefile
@@ -2,10 +2,24 @@
2# arch/microblaze/boot/Makefile 2# arch/microblaze/boot/Makefile
3# 3#
4 4
5targets := linux.bin linux.bin.gz 5obj-y += linked_dtb.o
6
7targets := linux.bin linux.bin.gz simpleImage.%
6 8
7OBJCOPYFLAGS_linux.bin := -O binary 9OBJCOPYFLAGS_linux.bin := -O binary
8 10
11# Where the DTS files live
12dtstree := $(srctree)/$(src)/dts
13
14# Ensure system.dtb exists
15$(obj)/linked_dtb.o: $(obj)/system.dtb
16
17# Generate system.dtb from $(DTB).dtb
18ifneq ($(DTB),system)
19$(obj)/system.dtb: $(obj)/$(DTB).dtb
20 $(call if_changed,cp)
21endif
22
9$(obj)/linux.bin: vmlinux FORCE 23$(obj)/linux.bin: vmlinux FORCE
10 [ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \ 24 [ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \
11 touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image" 25 touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image"
@@ -16,4 +30,27 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE
16 $(call if_changed,gzip) 30 $(call if_changed,gzip)
17 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' 31 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
18 32
19clean-kernel += linux.bin linux.bin.gz 33quiet_cmd_cp = CP $< $@$2
34 cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
35
36quiet_cmd_strip = STRIP $@
37 cmd_strip = $(STRIP) -K _start -K _end -K __log_buf -K _fdt_start vmlinux -o $@
38
39$(obj)/simpleImage.%: vmlinux FORCE
40 $(call if_changed,cp,.unstrip)
41 $(call if_changed,strip)
42 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
43
44# Rule to build device tree blobs
45DTC = $(objtree)/scripts/dtc/dtc
46
47# Rule to build device tree blobs
48quiet_cmd_dtc = DTC $@
49 cmd_dtc = $(DTC) -O dtb -o $(obj)/$*.dtb -b 0 -p 1024 $(dtstree)/$*.dts
50
51$(obj)/%.dtb: $(dtstree)/%.dts FORCE
52 $(call if_changed,dtc)
53
54clean-kernel += linux.bin linux.bin.gz simpleImage.*
55
56clean-files += *.dtb
diff --git a/arch/microblaze/boot/dts/system.dts b/arch/microblaze/boot/dts/system.dts
new file mode 120000
index 000000000000..7cb657892f21
--- /dev/null
+++ b/arch/microblaze/boot/dts/system.dts
@@ -0,0 +1 @@
../../platform/generic/system.dts \ No newline at end of file
diff --git a/arch/microblaze/boot/linked_dtb.S b/arch/microblaze/boot/linked_dtb.S
new file mode 100644
index 000000000000..cb2b537aebee
--- /dev/null
+++ b/arch/microblaze/boot/linked_dtb.S
@@ -0,0 +1,3 @@
1.section __fdt_blob,"a"
2.incbin "arch/microblaze/boot/system.dtb"
3
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 09c32962b66f..bb7c374713ad 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.31-rc6 3# Linux kernel version: 2.6.31
4# Tue Aug 18 11:00:02 2009 4# Thu Sep 24 10:28:50 2009
5# 5#
6CONFIG_MICROBLAZE=y 6CONFIG_MICROBLAZE=y
7# CONFIG_SWAP is not set 7# CONFIG_SWAP is not set
@@ -42,11 +42,12 @@ CONFIG_SYSVIPC_SYSCTL=y
42# 42#
43# RCU Subsystem 43# RCU Subsystem
44# 44#
45CONFIG_CLASSIC_RCU=y 45CONFIG_TREE_RCU=y
46# CONFIG_TREE_RCU is not set 46# CONFIG_TREE_PREEMPT_RCU is not set
47# CONFIG_PREEMPT_RCU is not set 47# CONFIG_RCU_TRACE is not set
48CONFIG_RCU_FANOUT=32
49# CONFIG_RCU_FANOUT_EXACT is not set
48# CONFIG_TREE_RCU_TRACE is not set 50# CONFIG_TREE_RCU_TRACE is not set
49# CONFIG_PREEMPT_RCU_TRACE is not set
50CONFIG_IKCONFIG=y 51CONFIG_IKCONFIG=y
51CONFIG_IKCONFIG_PROC=y 52CONFIG_IKCONFIG_PROC=y
52CONFIG_LOG_BUF_SHIFT=17 53CONFIG_LOG_BUF_SHIFT=17
@@ -260,6 +261,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
260# CONFIG_NETFILTER is not set 261# CONFIG_NETFILTER is not set
261# CONFIG_IP_DCCP is not set 262# CONFIG_IP_DCCP is not set
262# CONFIG_IP_SCTP is not set 263# CONFIG_IP_SCTP is not set
264# CONFIG_RDS is not set
263# CONFIG_TIPC is not set 265# CONFIG_TIPC is not set
264# CONFIG_ATM is not set 266# CONFIG_ATM is not set
265# CONFIG_BRIDGE is not set 267# CONFIG_BRIDGE is not set
@@ -357,12 +359,10 @@ CONFIG_NET_ETHERNET=y
357# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set 359# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
358# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set 360# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
359# CONFIG_KS8842 is not set 361# CONFIG_KS8842 is not set
362CONFIG_XILINX_EMACLITE=y
360CONFIG_NETDEV_1000=y 363CONFIG_NETDEV_1000=y
361CONFIG_NETDEV_10000=y 364CONFIG_NETDEV_10000=y
362 365CONFIG_WLAN=y
363#
364# Wireless LAN
365#
366# CONFIG_WLAN_PRE80211 is not set 366# CONFIG_WLAN_PRE80211 is not set
367# CONFIG_WLAN_80211 is not set 367# CONFIG_WLAN_80211 is not set
368 368
@@ -460,6 +460,7 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
460# CONFIG_DISPLAY_SUPPORT is not set 460# CONFIG_DISPLAY_SUPPORT is not set
461# CONFIG_SOUND is not set 461# CONFIG_SOUND is not set
462# CONFIG_USB_SUPPORT is not set 462# CONFIG_USB_SUPPORT is not set
463CONFIG_USB_ARCH_HAS_EHCI=y
463# CONFIG_MMC is not set 464# CONFIG_MMC is not set
464# CONFIG_MEMSTICK is not set 465# CONFIG_MEMSTICK is not set
465# CONFIG_NEW_LEDS is not set 466# CONFIG_NEW_LEDS is not set
@@ -488,6 +489,7 @@ CONFIG_EXT2_FS=y
488# CONFIG_GFS2_FS is not set 489# CONFIG_GFS2_FS is not set
489# CONFIG_OCFS2_FS is not set 490# CONFIG_OCFS2_FS is not set
490# CONFIG_BTRFS_FS is not set 491# CONFIG_BTRFS_FS is not set
492# CONFIG_NILFS2_FS is not set
491CONFIG_FILE_LOCKING=y 493CONFIG_FILE_LOCKING=y
492CONFIG_FSNOTIFY=y 494CONFIG_FSNOTIFY=y
493# CONFIG_DNOTIFY is not set 495# CONFIG_DNOTIFY is not set
@@ -546,7 +548,6 @@ CONFIG_MISC_FILESYSTEMS=y
546# CONFIG_ROMFS_FS is not set 548# CONFIG_ROMFS_FS is not set
547# CONFIG_SYSV_FS is not set 549# CONFIG_SYSV_FS is not set
548# CONFIG_UFS_FS is not set 550# CONFIG_UFS_FS is not set
549# CONFIG_NILFS2_FS is not set
550CONFIG_NETWORK_FILESYSTEMS=y 551CONFIG_NETWORK_FILESYSTEMS=y
551CONFIG_NFS_FS=y 552CONFIG_NFS_FS=y
552CONFIG_NFS_V3=y 553CONFIG_NFS_V3=y
@@ -671,18 +672,20 @@ CONFIG_DEBUG_INFO=y
671# CONFIG_DEBUG_LIST is not set 672# CONFIG_DEBUG_LIST is not set
672# CONFIG_DEBUG_SG is not set 673# CONFIG_DEBUG_SG is not set
673# CONFIG_DEBUG_NOTIFIERS is not set 674# CONFIG_DEBUG_NOTIFIERS is not set
675# CONFIG_DEBUG_CREDENTIALS is not set
674# CONFIG_BOOT_PRINTK_DELAY is not set 676# CONFIG_BOOT_PRINTK_DELAY is not set
675# CONFIG_RCU_TORTURE_TEST is not set 677# CONFIG_RCU_TORTURE_TEST is not set
676# CONFIG_RCU_CPU_STALL_DETECTOR is not set 678# CONFIG_RCU_CPU_STALL_DETECTOR is not set
677# CONFIG_BACKTRACE_SELF_TEST is not set 679# CONFIG_BACKTRACE_SELF_TEST is not set
678# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set 680# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
681# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
679# CONFIG_FAULT_INJECTION is not set 682# CONFIG_FAULT_INJECTION is not set
680# CONFIG_SYSCTL_SYSCALL_CHECK is not set 683# CONFIG_SYSCTL_SYSCALL_CHECK is not set
681# CONFIG_PAGE_POISONING is not set 684# CONFIG_PAGE_POISONING is not set
682# CONFIG_SAMPLES is not set 685# CONFIG_SAMPLES is not set
683# CONFIG_KMEMCHECK is not set 686# CONFIG_KMEMCHECK is not set
684CONFIG_EARLY_PRINTK=y 687CONFIG_EARLY_PRINTK=y
685CONFIG_HEART_BEAT=y 688# CONFIG_HEART_BEAT is not set
686CONFIG_DEBUG_BOOTMEM=y 689CONFIG_DEBUG_BOOTMEM=y
687 690
688# 691#
@@ -697,7 +700,6 @@ CONFIG_CRYPTO=y
697# 700#
698# Crypto core or helper 701# Crypto core or helper
699# 702#
700# CONFIG_CRYPTO_FIPS is not set
701# CONFIG_CRYPTO_MANAGER is not set 703# CONFIG_CRYPTO_MANAGER is not set
702# CONFIG_CRYPTO_MANAGER2 is not set 704# CONFIG_CRYPTO_MANAGER2 is not set
703# CONFIG_CRYPTO_GF128MUL is not set 705# CONFIG_CRYPTO_GF128MUL is not set
@@ -729,11 +731,13 @@ CONFIG_CRYPTO=y
729# 731#
730# CONFIG_CRYPTO_HMAC is not set 732# CONFIG_CRYPTO_HMAC is not set
731# CONFIG_CRYPTO_XCBC is not set 733# CONFIG_CRYPTO_XCBC is not set
734# CONFIG_CRYPTO_VMAC is not set
732 735
733# 736#
734# Digest 737# Digest
735# 738#
736# CONFIG_CRYPTO_CRC32C is not set 739# CONFIG_CRYPTO_CRC32C is not set
740# CONFIG_CRYPTO_GHASH is not set
737# CONFIG_CRYPTO_MD4 is not set 741# CONFIG_CRYPTO_MD4 is not set
738# CONFIG_CRYPTO_MD5 is not set 742# CONFIG_CRYPTO_MD5 is not set
739# CONFIG_CRYPTO_MICHAEL_MIC is not set 743# CONFIG_CRYPTO_MICHAEL_MIC is not set
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index 8b638615a972..adb839bab704 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.31-rc6 3# Linux kernel version: 2.6.31
4# Tue Aug 18 10:35:30 2009 4# Thu Sep 24 10:29:43 2009
5# 5#
6CONFIG_MICROBLAZE=y 6CONFIG_MICROBLAZE=y
7# CONFIG_SWAP is not set 7# CONFIG_SWAP is not set
@@ -44,11 +44,12 @@ CONFIG_BSD_PROCESS_ACCT_V3=y
44# 44#
45# RCU Subsystem 45# RCU Subsystem
46# 46#
47CONFIG_CLASSIC_RCU=y 47CONFIG_TREE_RCU=y
48# CONFIG_TREE_RCU is not set 48# CONFIG_TREE_PREEMPT_RCU is not set
49# CONFIG_PREEMPT_RCU is not set 49# CONFIG_RCU_TRACE is not set
50CONFIG_RCU_FANOUT=32
51# CONFIG_RCU_FANOUT_EXACT is not set
50# CONFIG_TREE_RCU_TRACE is not set 52# CONFIG_TREE_RCU_TRACE is not set
51# CONFIG_PREEMPT_RCU_TRACE is not set
52CONFIG_IKCONFIG=y 53CONFIG_IKCONFIG=y
53CONFIG_IKCONFIG_PROC=y 54CONFIG_IKCONFIG_PROC=y
54CONFIG_LOG_BUF_SHIFT=17 55CONFIG_LOG_BUF_SHIFT=17
@@ -243,6 +244,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
243# CONFIG_NETFILTER is not set 244# CONFIG_NETFILTER is not set
244# CONFIG_IP_DCCP is not set 245# CONFIG_IP_DCCP is not set
245# CONFIG_IP_SCTP is not set 246# CONFIG_IP_SCTP is not set
247# CONFIG_RDS is not set
246# CONFIG_TIPC is not set 248# CONFIG_TIPC is not set
247# CONFIG_ATM is not set 249# CONFIG_ATM is not set
248# CONFIG_BRIDGE is not set 250# CONFIG_BRIDGE is not set
@@ -272,6 +274,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
272# CONFIG_AF_RXRPC is not set 274# CONFIG_AF_RXRPC is not set
273CONFIG_WIRELESS=y 275CONFIG_WIRELESS=y
274# CONFIG_CFG80211 is not set 276# CONFIG_CFG80211 is not set
277CONFIG_CFG80211_DEFAULT_PS_VALUE=0
275CONFIG_WIRELESS_OLD_REGULATORY=y 278CONFIG_WIRELESS_OLD_REGULATORY=y
276# CONFIG_WIRELESS_EXT is not set 279# CONFIG_WIRELESS_EXT is not set
277# CONFIG_LIB80211 is not set 280# CONFIG_LIB80211 is not set
@@ -279,7 +282,6 @@ CONFIG_WIRELESS_OLD_REGULATORY=y
279# 282#
280# CFG80211 needs to be enabled for MAC80211 283# CFG80211 needs to be enabled for MAC80211
281# 284#
282CONFIG_MAC80211_DEFAULT_PS_VALUE=0
283# CONFIG_WIMAX is not set 285# CONFIG_WIMAX is not set
284# CONFIG_RFKILL is not set 286# CONFIG_RFKILL is not set
285# CONFIG_NET_9P is not set 287# CONFIG_NET_9P is not set
@@ -304,6 +306,7 @@ CONFIG_MTD_PARTITIONS=y
304# CONFIG_MTD_TESTS is not set 306# CONFIG_MTD_TESTS is not set
305# CONFIG_MTD_REDBOOT_PARTS is not set 307# CONFIG_MTD_REDBOOT_PARTS is not set
306CONFIG_MTD_CMDLINE_PARTS=y 308CONFIG_MTD_CMDLINE_PARTS=y
309# CONFIG_MTD_OF_PARTS is not set
307# CONFIG_MTD_AR7_PARTS is not set 310# CONFIG_MTD_AR7_PARTS is not set
308 311
309# 312#
@@ -349,6 +352,7 @@ CONFIG_MTD_RAM=y
349# 352#
350# CONFIG_MTD_COMPLEX_MAPPINGS is not set 353# CONFIG_MTD_COMPLEX_MAPPINGS is not set
351# CONFIG_MTD_PHYSMAP is not set 354# CONFIG_MTD_PHYSMAP is not set
355# CONFIG_MTD_PHYSMAP_OF is not set
352CONFIG_MTD_UCLINUX=y 356CONFIG_MTD_UCLINUX=y
353# CONFIG_MTD_PLATRAM is not set 357# CONFIG_MTD_PLATRAM is not set
354 358
@@ -429,12 +433,10 @@ CONFIG_NET_ETHERNET=y
429# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set 433# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
430# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set 434# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
431# CONFIG_KS8842 is not set 435# CONFIG_KS8842 is not set
436# CONFIG_XILINX_EMACLITE is not set
432CONFIG_NETDEV_1000=y 437CONFIG_NETDEV_1000=y
433CONFIG_NETDEV_10000=y 438CONFIG_NETDEV_10000=y
434 439CONFIG_WLAN=y
435#
436# Wireless LAN
437#
438# CONFIG_WLAN_PRE80211 is not set 440# CONFIG_WLAN_PRE80211 is not set
439# CONFIG_WLAN_80211 is not set 441# CONFIG_WLAN_80211 is not set
440 442
@@ -535,7 +537,7 @@ CONFIG_VIDEO_OUTPUT_CONTROL=y
535CONFIG_USB_SUPPORT=y 537CONFIG_USB_SUPPORT=y
536CONFIG_USB_ARCH_HAS_HCD=y 538CONFIG_USB_ARCH_HAS_HCD=y
537# CONFIG_USB_ARCH_HAS_OHCI is not set 539# CONFIG_USB_ARCH_HAS_OHCI is not set
538# CONFIG_USB_ARCH_HAS_EHCI is not set 540CONFIG_USB_ARCH_HAS_EHCI=y
539# CONFIG_USB is not set 541# CONFIG_USB is not set
540# CONFIG_USB_OTG_WHITELIST is not set 542# CONFIG_USB_OTG_WHITELIST is not set
541# CONFIG_USB_OTG_BLACKLIST_HUB is not set 543# CONFIG_USB_OTG_BLACKLIST_HUB is not set
@@ -579,6 +581,7 @@ CONFIG_FS_POSIX_ACL=y
579# CONFIG_GFS2_FS is not set 581# CONFIG_GFS2_FS is not set
580# CONFIG_OCFS2_FS is not set 582# CONFIG_OCFS2_FS is not set
581# CONFIG_BTRFS_FS is not set 583# CONFIG_BTRFS_FS is not set
584# CONFIG_NILFS2_FS is not set
582CONFIG_FILE_LOCKING=y 585CONFIG_FILE_LOCKING=y
583CONFIG_FSNOTIFY=y 586CONFIG_FSNOTIFY=y
584# CONFIG_DNOTIFY is not set 587# CONFIG_DNOTIFY is not set
@@ -639,7 +642,6 @@ CONFIG_ROMFS_BACKED_BY_BLOCK=y
639CONFIG_ROMFS_ON_BLOCK=y 642CONFIG_ROMFS_ON_BLOCK=y
640# CONFIG_SYSV_FS is not set 643# CONFIG_SYSV_FS is not set
641# CONFIG_UFS_FS is not set 644# CONFIG_UFS_FS is not set
642# CONFIG_NILFS2_FS is not set
643CONFIG_NETWORK_FILESYSTEMS=y 645CONFIG_NETWORK_FILESYSTEMS=y
644CONFIG_NFS_FS=y 646CONFIG_NFS_FS=y
645CONFIG_NFS_V3=y 647CONFIG_NFS_V3=y
@@ -710,18 +712,20 @@ CONFIG_DEBUG_INFO=y
710CONFIG_DEBUG_LIST=y 712CONFIG_DEBUG_LIST=y
711CONFIG_DEBUG_SG=y 713CONFIG_DEBUG_SG=y
712# CONFIG_DEBUG_NOTIFIERS is not set 714# CONFIG_DEBUG_NOTIFIERS is not set
715# CONFIG_DEBUG_CREDENTIALS is not set
713# CONFIG_BOOT_PRINTK_DELAY is not set 716# CONFIG_BOOT_PRINTK_DELAY is not set
714# CONFIG_RCU_TORTURE_TEST is not set 717# CONFIG_RCU_TORTURE_TEST is not set
715# CONFIG_RCU_CPU_STALL_DETECTOR is not set 718# CONFIG_RCU_CPU_STALL_DETECTOR is not set
716# CONFIG_BACKTRACE_SELF_TEST is not set 719# CONFIG_BACKTRACE_SELF_TEST is not set
717# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set 720# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
721# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
718# CONFIG_FAULT_INJECTION is not set 722# CONFIG_FAULT_INJECTION is not set
719CONFIG_SYSCTL_SYSCALL_CHECK=y 723CONFIG_SYSCTL_SYSCALL_CHECK=y
720# CONFIG_PAGE_POISONING is not set 724# CONFIG_PAGE_POISONING is not set
721# CONFIG_DYNAMIC_DEBUG is not set 725# CONFIG_DYNAMIC_DEBUG is not set
722# CONFIG_SAMPLES is not set 726# CONFIG_SAMPLES is not set
723CONFIG_EARLY_PRINTK=y 727CONFIG_EARLY_PRINTK=y
724CONFIG_HEART_BEAT=y 728# CONFIG_HEART_BEAT is not set
725# CONFIG_DEBUG_BOOTMEM is not set 729# CONFIG_DEBUG_BOOTMEM is not set
726 730
727# 731#
@@ -736,7 +740,6 @@ CONFIG_CRYPTO=y
736# 740#
737# Crypto core or helper 741# Crypto core or helper
738# 742#
739# CONFIG_CRYPTO_FIPS is not set
740# CONFIG_CRYPTO_MANAGER is not set 743# CONFIG_CRYPTO_MANAGER is not set
741# CONFIG_CRYPTO_MANAGER2 is not set 744# CONFIG_CRYPTO_MANAGER2 is not set
742# CONFIG_CRYPTO_GF128MUL is not set 745# CONFIG_CRYPTO_GF128MUL is not set
@@ -768,11 +771,13 @@ CONFIG_CRYPTO=y
768# 771#
769# CONFIG_CRYPTO_HMAC is not set 772# CONFIG_CRYPTO_HMAC is not set
770# CONFIG_CRYPTO_XCBC is not set 773# CONFIG_CRYPTO_XCBC is not set
774# CONFIG_CRYPTO_VMAC is not set
771 775
772# 776#
773# Digest 777# Digest
774# 778#
775# CONFIG_CRYPTO_CRC32C is not set 779# CONFIG_CRYPTO_CRC32C is not set
780# CONFIG_CRYPTO_GHASH is not set
776# CONFIG_CRYPTO_MD4 is not set 781# CONFIG_CRYPTO_MD4 is not set
777# CONFIG_CRYPTO_MD5 is not set 782# CONFIG_CRYPTO_MD5 is not set
778# CONFIG_CRYPTO_MICHAEL_MIC is not set 783# CONFIG_CRYPTO_MICHAEL_MIC is not set
diff --git a/arch/microblaze/include/asm/asm-compat.h b/arch/microblaze/include/asm/asm-compat.h
new file mode 100644
index 000000000000..e7bc9dc11b57
--- /dev/null
+++ b/arch/microblaze/include/asm/asm-compat.h
@@ -0,0 +1,17 @@
1#ifndef _ASM_MICROBLAZE_ASM_COMPAT_H
2#define _ASM_MICROBLAZE_ASM_COMPAT_H
3
4#include <asm/types.h>
5
6#ifdef __ASSEMBLY__
7# define stringify_in_c(...) __VA_ARGS__
8# define ASM_CONST(x) x
9#else
10/* This version of stringify will deal with commas... */
11# define __stringify_in_c(...) #__VA_ARGS__
12# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
13# define __ASM_CONST(x) x##UL
14# define ASM_CONST(x) __ASM_CONST(x)
15#endif
16
17#endif /* _ASM_MICROBLAZE_ASM_COMPAT_H */
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 7c3ec13b44d8..fc9997b73c09 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -210,6 +210,9 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size,
210#define in_be32(a) __raw_readl((const void __iomem __force *)(a)) 210#define in_be32(a) __raw_readl((const void __iomem __force *)(a))
211#define in_be16(a) __raw_readw(a) 211#define in_be16(a) __raw_readw(a)
212 212
213#define writel_be(v, a) out_be32((__force unsigned *)a, v)
214#define readl_be(a) in_be32((__force unsigned *)a)
215
213/* 216/*
214 * Little endian 217 * Little endian
215 */ 218 */
diff --git a/arch/microblaze/include/asm/ipc.h b/arch/microblaze/include/asm/ipc.h
deleted file mode 100644
index a46e3d9c2a3f..000000000000
--- a/arch/microblaze/include/asm/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ipc.h>
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 72aceae88680..880c988c2237 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -17,6 +17,7 @@
17 17
18#include <linux/pfn.h> 18#include <linux/pfn.h>
19#include <asm/setup.h> 19#include <asm/setup.h>
20#include <asm/asm-compat.h>
20#include <linux/const.h> 21#include <linux/const.h>
21 22
22#ifdef __KERNEL__ 23#ifdef __KERNEL__
@@ -26,6 +27,8 @@
26#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) 27#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
27#define PAGE_MASK (~(PAGE_SIZE-1)) 28#define PAGE_MASK (~(PAGE_SIZE-1))
28 29
30#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR))
31
29#ifndef __ASSEMBLY__ 32#ifndef __ASSEMBLY__
30 33
31#define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) 34#define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 27f8dafd8c34..ed67c9ed15b8 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -38,7 +38,7 @@ extern void early_console_reg_tlb_alloc(unsigned int addr);
38void time_init(void); 38void time_init(void);
39void init_IRQ(void); 39void init_IRQ(void);
40void machine_early_init(const char *cmdline, unsigned int ram, 40void machine_early_init(const char *cmdline, unsigned int ram,
41 unsigned int fdt); 41 unsigned int fdt, unsigned int msr);
42 42
43void machine_restart(char *cmd); 43void machine_restart(char *cmd);
44void machine_shutdown(void); 44void machine_shutdown(void);
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
new file mode 100644
index 000000000000..048dfcd8d89d
--- /dev/null
+++ b/arch/microblaze/include/asm/syscall.h
@@ -0,0 +1,99 @@
1#ifndef __ASM_MICROBLAZE_SYSCALL_H
2#define __ASM_MICROBLAZE_SYSCALL_H
3
4#include <linux/kernel.h>
5#include <linux/sched.h>
6#include <asm/ptrace.h>
7
8/* The system call number is given by the user in R12 */
9static inline long syscall_get_nr(struct task_struct *task,
10 struct pt_regs *regs)
11{
12 return regs->r12;
13}
14
15static inline void syscall_rollback(struct task_struct *task,
16 struct pt_regs *regs)
17{
18 /* TODO. */
19}
20
21static inline long syscall_get_error(struct task_struct *task,
22 struct pt_regs *regs)
23{
24 return IS_ERR_VALUE(regs->r3) ? regs->r3 : 0;
25}
26
27static inline long syscall_get_return_value(struct task_struct *task,
28 struct pt_regs *regs)
29{
30 return regs->r3;
31}
32
33static inline void syscall_set_return_value(struct task_struct *task,
34 struct pt_regs *regs,
35 int error, long val)
36{
37 if (error)
38 regs->r3 = -error;
39 else
40 regs->r3 = val;
41}
42
43static inline microblaze_reg_t microblaze_get_syscall_arg(struct pt_regs *regs,
44 unsigned int n)
45{
46 switch (n) {
47 case 5: return regs->r10;
48 case 4: return regs->r9;
49 case 3: return regs->r8;
50 case 2: return regs->r7;
51 case 1: return regs->r6;
52 case 0: return regs->r5;
53 default:
54 BUG();
55 }
56 return ~0;
57}
58
59static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
60 unsigned int n,
61 unsigned long val)
62{
63 switch (n) {
64 case 5:
65 regs->r10 = val;
66 case 4:
67 regs->r9 = val;
68 case 3:
69 regs->r8 = val;
70 case 2:
71 regs->r7 = val;
72 case 1:
73 regs->r6 = val;
74 case 0:
75 regs->r5 = val;
76 default:
77 BUG();
78 }
79}
80
81static inline void syscall_get_arguments(struct task_struct *task,
82 struct pt_regs *regs,
83 unsigned int i, unsigned int n,
84 unsigned long *args)
85{
86 while (n--)
87 *args++ = microblaze_get_syscall_arg(regs, i++);
88}
89
90static inline void syscall_set_arguments(struct task_struct *task,
91 struct pt_regs *regs,
92 unsigned int i, unsigned int n,
93 const unsigned long *args)
94{
95 while (n--)
96 microblaze_set_syscall_arg(regs, i++, *args++);
97}
98
99#endif /* __ASM_MICROBLAZE_SYSCALL_H */
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index c411c6757deb..3539babc1c18 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -28,6 +28,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
28 {"7.10.d", 0x0b}, 28 {"7.10.d", 0x0b},
29 {"7.20.a", 0x0c}, 29 {"7.20.a", 0x0c},
30 {"7.20.b", 0x0d}, 30 {"7.20.b", 0x0d},
31 {"7.20.c", 0x0e},
31 /* FIXME There is no keycode defined in MBV for these versions */ 32 /* FIXME There is no keycode defined in MBV for these versions */
32 {"2.10.a", 0x10}, 33 {"2.10.a", 0x10},
33 {"3.00.a", 0x20}, 34 {"3.00.a", 0x20},
@@ -49,6 +50,8 @@ const struct family_string_key family_string_lookup[] = {
49 {"spartan3a", 0xa}, 50 {"spartan3a", 0xa},
50 {"spartan3an", 0xb}, 51 {"spartan3an", 0xb},
51 {"spartan3adsp", 0xc}, 52 {"spartan3adsp", 0xc},
53 {"spartan6", 0xd},
54 {"virtex6", 0xe},
52 /* FIXME There is no key code defined for spartan2 */ 55 /* FIXME There is no key code defined for spartan2 */
53 {"spartan2", 0xf0}, 56 {"spartan2", 0xf0},
54 {NULL, 0}, 57 {NULL, 0},
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index c7353e79f4a2..acc1f05d1e2c 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -308,38 +308,69 @@ C_ENTRY(_user_exception):
308 swi r12, r1, PTO+PT_R0; 308 swi r12, r1, PTO+PT_R0;
309 tovirt(r1,r1) 309 tovirt(r1,r1)
310 310
311 la r15, r0, ret_from_trap-8
312/* where the trap should return need -8 to adjust for rtsd r15, 8*/ 311/* where the trap should return need -8 to adjust for rtsd r15, 8*/
313/* Jump to the appropriate function for the system call number in r12 312/* Jump to the appropriate function for the system call number in r12
314 * (r12 is not preserved), or return an error if r12 is not valid. The LP 313 * (r12 is not preserved), or return an error if r12 is not valid. The LP
315 * register should point to the location where 314 * register should point to the location where
316 * the called function should return. [note that MAKE_SYS_CALL uses label 1] */ 315 * the called function should return. [note that MAKE_SYS_CALL uses label 1] */
317 /* See if the system call number is valid. */ 316
317 # Step into virtual mode.
318 set_vms;
319 addik r11, r0, 3f
320 rtid r11, 0
321 nop
3223:
323 add r11, r0, CURRENT_TASK /* Get current task ptr into r11 */
324 lwi r11, r11, TS_THREAD_INFO /* get thread info */
325 lwi r11, r11, TI_FLAGS /* get flags in thread info */
326 andi r11, r11, _TIF_WORK_SYSCALL_MASK
327 beqi r11, 4f
328
329 addik r3, r0, -ENOSYS
330 swi r3, r1, PTO + PT_R3
331 brlid r15, do_syscall_trace_enter
332 addik r5, r1, PTO + PT_R0
333
334 # do_syscall_trace_enter returns the new syscall nr.
335 addk r12, r0, r3
336 lwi r5, r1, PTO+PT_R5;
337 lwi r6, r1, PTO+PT_R6;
338 lwi r7, r1, PTO+PT_R7;
339 lwi r8, r1, PTO+PT_R8;
340 lwi r9, r1, PTO+PT_R9;
341 lwi r10, r1, PTO+PT_R10;
3424:
343/* Jump to the appropriate function for the system call number in r12
344 * (r12 is not preserved), or return an error if r12 is not valid.
345 * The LP register should point to the location where the called function
346 * should return. [note that MAKE_SYS_CALL uses label 1] */
347 /* See if the system call number is valid */
318 addi r11, r12, -__NR_syscalls; 348 addi r11, r12, -__NR_syscalls;
319 bgei r11,1f; 349 bgei r11,5f;
320 /* Figure out which function to use for this system call. */ 350 /* Figure out which function to use for this system call. */
321 /* Note Microblaze barrel shift is optional, so don't rely on it */ 351 /* Note Microblaze barrel shift is optional, so don't rely on it */
322 add r12, r12, r12; /* convert num -> ptr */ 352 add r12, r12, r12; /* convert num -> ptr */
323 add r12, r12, r12; 353 add r12, r12, r12;
324 354
325 /* Trac syscalls and stored them to r0_ram */ 355 /* Trac syscalls and stored them to r0_ram */
326 lwi r3, r12, 0x400 + TOPHYS(r0_ram) 356 lwi r3, r12, 0x400 + r0_ram
327 addi r3, r3, 1 357 addi r3, r3, 1
328 swi r3, r12, 0x400 + TOPHYS(r0_ram) 358 swi r3, r12, 0x400 + r0_ram
359
360 # Find and jump into the syscall handler.
361 lwi r12, r12, sys_call_table
362 /* where the trap should return need -8 to adjust for rtsd r15, 8 */
363 la r15, r0, ret_from_trap-8
364 bra r12
329 365
330 lwi r12, r12, TOPHYS(sys_call_table); /* Function ptr */
331 /* Make the system call. to r12*/
332 set_vms;
333 rtid r12, 0;
334 nop;
335 /* The syscall number is invalid, return an error. */ 366 /* The syscall number is invalid, return an error. */
3361: VM_ON; /* RETURN() expects virtual mode*/ 3675:
337 addi r3, r0, -ENOSYS; 368 addi r3, r0, -ENOSYS;
338 rtsd r15,8; /* looks like a normal subroutine return */ 369 rtsd r15,8; /* looks like a normal subroutine return */
339 or r0, r0, r0 370 or r0, r0, r0
340 371
341 372
342/* Entry point used to return from a syscall/trap. */ 373/* Entry point used to return from a syscall/trap */
343/* We re-enable BIP bit before state restore */ 374/* We re-enable BIP bit before state restore */
344C_ENTRY(ret_from_trap): 375C_ENTRY(ret_from_trap):
345 set_bip; /* Ints masked for state restore*/ 376 set_bip; /* Ints masked for state restore*/
@@ -349,6 +380,23 @@ C_ENTRY(ret_from_trap):
349 380
350 /* We're returning to user mode, so check for various conditions that 381 /* We're returning to user mode, so check for various conditions that
351 * trigger rescheduling. */ 382 * trigger rescheduling. */
383 # FIXME: Restructure all these flag checks.
384 add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
385 lwi r11, r11, TS_THREAD_INFO; /* get thread info */
386 lwi r11, r11, TI_FLAGS; /* get flags in thread info */
387 andi r11, r11, _TIF_WORK_SYSCALL_MASK
388 beqi r11, 1f
389
390 swi r3, r1, PTO + PT_R3
391 swi r4, r1, PTO + PT_R4
392 brlid r15, do_syscall_trace_leave
393 addik r5, r1, PTO + PT_R0
394 lwi r3, r1, PTO + PT_R3
395 lwi r4, r1, PTO + PT_R4
3961:
397
398 /* We're returning to user mode, so check for various conditions that
399 * trigger rescheduling. */
352 /* Get current task ptr into r11 */ 400 /* Get current task ptr into r11 */
353 add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ 401 add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
354 lwi r11, r11, TS_THREAD_INFO; /* get thread info */ 402 lwi r11, r11, TS_THREAD_INFO; /* get thread info */
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 0cb64a31e89a..d9f70f83097f 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -72,7 +72,8 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
72#endif 72#endif
73 73
74#if 0 74#if 0
75 printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n", 75 printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x " \
76 "ESR=%08x\n",
76 type, user_mode(regs) ? "user" : "kernel", fsr, 77 type, user_mode(regs) ? "user" : "kernel", fsr,
77 (unsigned int) regs->pc, (unsigned int) regs->esr); 78 (unsigned int) regs->pc, (unsigned int) regs->esr);
78#endif 79#endif
@@ -80,42 +81,50 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
80 switch (type & 0x1F) { 81 switch (type & 0x1F) {
81 case MICROBLAZE_ILL_OPCODE_EXCEPTION: 82 case MICROBLAZE_ILL_OPCODE_EXCEPTION:
82 if (user_mode(regs)) { 83 if (user_mode(regs)) {
83 printk(KERN_WARNING "Illegal opcode exception in user mode.\n"); 84 pr_debug(KERN_WARNING "Illegal opcode exception " \
85 "in user mode.\n");
84 _exception(SIGILL, regs, ILL_ILLOPC, addr); 86 _exception(SIGILL, regs, ILL_ILLOPC, addr);
85 return; 87 return;
86 } 88 }
87 printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n"); 89 printk(KERN_WARNING "Illegal opcode exception " \
90 "in kernel mode.\n");
88 die("opcode exception", regs, SIGBUS); 91 die("opcode exception", regs, SIGBUS);
89 break; 92 break;
90 case MICROBLAZE_IBUS_EXCEPTION: 93 case MICROBLAZE_IBUS_EXCEPTION:
91 if (user_mode(regs)) { 94 if (user_mode(regs)) {
92 printk(KERN_WARNING "Instruction bus error exception in user mode.\n"); 95 pr_debug(KERN_WARNING "Instruction bus error " \
96 "exception in user mode.\n");
93 _exception(SIGBUS, regs, BUS_ADRERR, addr); 97 _exception(SIGBUS, regs, BUS_ADRERR, addr);
94 return; 98 return;
95 } 99 }
96 printk(KERN_WARNING "Instruction bus error exception in kernel mode.\n"); 100 printk(KERN_WARNING "Instruction bus error exception " \
101 "in kernel mode.\n");
97 die("bus exception", regs, SIGBUS); 102 die("bus exception", regs, SIGBUS);
98 break; 103 break;
99 case MICROBLAZE_DBUS_EXCEPTION: 104 case MICROBLAZE_DBUS_EXCEPTION:
100 if (user_mode(regs)) { 105 if (user_mode(regs)) {
101 printk(KERN_WARNING "Data bus error exception in user mode.\n"); 106 pr_debug(KERN_WARNING "Data bus error exception " \
107 "in user mode.\n");
102 _exception(SIGBUS, regs, BUS_ADRERR, addr); 108 _exception(SIGBUS, regs, BUS_ADRERR, addr);
103 return; 109 return;
104 } 110 }
105 printk(KERN_WARNING "Data bus error exception in kernel mode.\n"); 111 printk(KERN_WARNING "Data bus error exception " \
112 "in kernel mode.\n");
106 die("bus exception", regs, SIGBUS); 113 die("bus exception", regs, SIGBUS);
107 break; 114 break;
108 case MICROBLAZE_DIV_ZERO_EXCEPTION: 115 case MICROBLAZE_DIV_ZERO_EXCEPTION:
109 if (user_mode(regs)) { 116 if (user_mode(regs)) {
110 printk(KERN_WARNING "Divide by zero exception in user mode\n"); 117 pr_debug(KERN_WARNING "Divide by zero exception " \
111 _exception(SIGILL, regs, ILL_ILLOPC, addr); 118 "in user mode\n");
119 _exception(SIGILL, regs, FPE_INTDIV, addr);
112 return; 120 return;
113 } 121 }
114 printk(KERN_WARNING "Divide by zero exception in kernel mode.\n"); 122 printk(KERN_WARNING "Divide by zero exception " \
123 "in kernel mode.\n");
115 die("Divide by exception", regs, SIGBUS); 124 die("Divide by exception", regs, SIGBUS);
116 break; 125 break;
117 case MICROBLAZE_FPU_EXCEPTION: 126 case MICROBLAZE_FPU_EXCEPTION:
118 printk(KERN_WARNING "FPU exception\n"); 127 pr_debug(KERN_WARNING "FPU exception\n");
119 /* IEEE FP exception */ 128 /* IEEE FP exception */
120 /* I removed fsr variable and use code var for storing fsr */ 129 /* I removed fsr variable and use code var for storing fsr */
121 if (fsr & FSR_IO) 130 if (fsr & FSR_IO)
@@ -133,7 +142,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
133 142
134#ifdef CONFIG_MMU 143#ifdef CONFIG_MMU
135 case MICROBLAZE_PRIVILEGED_EXCEPTION: 144 case MICROBLAZE_PRIVILEGED_EXCEPTION:
136 printk(KERN_WARNING "Privileged exception\n"); 145 pr_debug(KERN_WARNING "Privileged exception\n");
137 /* "brk r0,r0" - used as debug breakpoint */ 146 /* "brk r0,r0" - used as debug breakpoint */
138 if (get_user(code, (unsigned long *)regs->pc) == 0 147 if (get_user(code, (unsigned long *)regs->pc) == 0
139 && code == 0x980c0000) { 148 && code == 0x980c0000) {
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index e41c6ce2a7be..697ce3007f30 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -54,6 +54,16 @@ ENTRY(_start)
54 mfs r1, rmsr 54 mfs r1, rmsr
55 andi r1, r1, ~2 55 andi r1, r1, ~2
56 mts rmsr, r1 56 mts rmsr, r1
57/*
58 * Here is checking mechanism which check if Microblaze has msr instructions
59 * We load msr and compare it with previous r1 value - if is the same,
60 * msr instructions works if not - cpu don't have them.
61 */
62 /* r8=0 - I have msr instr, 1 - I don't have them */
63 rsubi r0, r0, 1 /* set the carry bit */
64 msrclr r0, 0x4 /* try to clear it */
65 /* read the carry bit, r8 will be '0' if msrclr exists */
66 addik r8, r0, 0
57 67
58/* r7 may point to an FDT, or there may be one linked in. 68/* r7 may point to an FDT, or there may be one linked in.
59 if it's in r7, we've got to save it away ASAP. 69 if it's in r7, we've got to save it away ASAP.
@@ -209,8 +219,8 @@ start_here:
209 * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for 219 * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for
210 * the function. 220 * the function.
211 */ 221 */
212 la r8, r0, machine_early_init 222 la r9, r0, machine_early_init
213 brald r15, r8 223 brald r15, r9
214 nop 224 nop
215 225
216#ifndef CONFIG_MMU 226#ifndef CONFIG_MMU
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
index 3288c9737671..6b0288ebccd6 100644
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -84,9 +84,10 @@
84#define NUM_TO_REG(num) r ## num 84#define NUM_TO_REG(num) r ## num
85 85
86#ifdef CONFIG_MMU 86#ifdef CONFIG_MMU
87/* FIXME you can't change first load of MSR because there is
88 * hardcoded jump bri 4 */
89 #define RESTORE_STATE \ 87 #define RESTORE_STATE \
88 lwi r5, r1, 0; \
89 mts rmsr, r5; \
90 nop; \
90 lwi r3, r1, PT_R3; \ 91 lwi r3, r1, PT_R3; \
91 lwi r4, r1, PT_R4; \ 92 lwi r4, r1, PT_R4; \
92 lwi r5, r1, PT_R5; \ 93 lwi r5, r1, PT_R5; \
@@ -309,6 +310,9 @@ _hw_exception_handler:
309 lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */ 310 lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */
310#endif 311#endif
311 312
313 mfs r5, rmsr;
314 nop
315 swi r5, r1, 0;
312 mfs r3, resr 316 mfs r3, resr
313 nop 317 nop
314 mfs r4, rear; 318 mfs r4, rear;
@@ -380,6 +384,8 @@ handle_other_ex: /* Handle Other exceptions here */
380 addk r8, r17, r0; /* Load exception address */ 384 addk r8, r17, r0; /* Load exception address */
381 bralid r15, full_exception; /* Branch to the handler */ 385 bralid r15, full_exception; /* Branch to the handler */
382 nop; 386 nop;
387 mts r0, rfsr; /* Clear sticky fsr */
388 nop
383 389
384 /* 390 /*
385 * Trigger execution of the signal handler by enabling 391 * Trigger execution of the signal handler by enabling
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 00b12c6d5326..4201c743cc9f 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -235,6 +235,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
235 regs->pc = pc; 235 regs->pc = pc;
236 regs->r1 = usp; 236 regs->r1 = usp;
237 regs->pt_mode = 0; 237 regs->pt_mode = 0;
238 regs->msr |= MSR_UMS;
238} 239}
239 240
240#ifdef CONFIG_MMU 241#ifdef CONFIG_MMU
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 53ff39af6a5c..4b3ac32754de 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -29,6 +29,10 @@
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/ptrace.h> 30#include <linux/ptrace.h>
31#include <linux/signal.h> 31#include <linux/signal.h>
32#include <linux/elf.h>
33#include <linux/audit.h>
34#include <linux/seccomp.h>
35#include <linux/tracehook.h>
32 36
33#include <linux/errno.h> 37#include <linux/errno.h>
34#include <asm/processor.h> 38#include <asm/processor.h>
@@ -174,6 +178,64 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
174 return rval; 178 return rval;
175} 179}
176 180
181asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
182{
183 long ret = 0;
184
185 secure_computing(regs->r12);
186
187 if (test_thread_flag(TIF_SYSCALL_TRACE) &&
188 tracehook_report_syscall_entry(regs))
189 /*
190 * Tracing decided this syscall should not happen.
191 * We'll return a bogus call number to get an ENOSYS
192 * error, but leave the original number in regs->regs[0].
193 */
194 ret = -1L;
195
196 if (unlikely(current->audit_context))
197 audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
198 regs->r5, regs->r6,
199 regs->r7, regs->r8);
200
201 return ret ?: regs->r12;
202}
203
204asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
205{
206 int step;
207
208 if (unlikely(current->audit_context))
209 audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
210
211 step = test_thread_flag(TIF_SINGLESTEP);
212 if (step || test_thread_flag(TIF_SYSCALL_TRACE))
213 tracehook_report_syscall_exit(regs, step);
214}
215
216#if 0
217static asmlinkage void syscall_trace(void)
218{
219 if (!test_thread_flag(TIF_SYSCALL_TRACE))
220 return;
221 if (!(current->ptrace & PT_PTRACED))
222 return;
223 /* The 0x80 provides a way for the tracing parent to distinguish
224 between a syscall stop and SIGTRAP delivery */
225 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
226 ? 0x80 : 0));
227 /*
228 * this isn't the same as continuing with a signal, but it will do
229 * for normal use. strace only continues with a signal if the
230 * stopping signal is not SIGTRAP. -brl
231 */
232 if (current->exit_code) {
233 send_sig(current->exit_code, current, 1);
234 current->exit_code = 0;
235 }
236}
237#endif
238
177void ptrace_disable(struct task_struct *child) 239void ptrace_disable(struct task_struct *child)
178{ 240{
179 /* nothing to do */ 241 /* nothing to do */
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 2a97bf513b64..8c1e0f4dcf18 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -94,7 +94,7 @@ inline unsigned get_romfs_len(unsigned *addr)
94#endif /* CONFIG_MTD_UCLINUX_EBSS */ 94#endif /* CONFIG_MTD_UCLINUX_EBSS */
95 95
96void __init machine_early_init(const char *cmdline, unsigned int ram, 96void __init machine_early_init(const char *cmdline, unsigned int ram,
97 unsigned int fdt) 97 unsigned int fdt, unsigned int msr)
98{ 98{
99 unsigned long *src, *dst = (unsigned long *)0x0; 99 unsigned long *src, *dst = (unsigned long *)0x0;
100 100
@@ -157,6 +157,16 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
157 early_printk("New klimit: 0x%08x\n", (unsigned)klimit); 157 early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
158#endif 158#endif
159 159
160#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
161 if (msr)
162 early_printk("!!!Your kernel has setup MSR instruction but "
163 "CPU don't have it %d\n", msr);
164#else
165 if (!msr)
166 early_printk("!!!Your kernel not setup MSR instruction but "
167 "CPU have it %d\n", msr);
168#endif
169
160 for (src = __ivt_start; src < __ivt_end; src++, dst++) 170 for (src = __ivt_start; src < __ivt_end; src++, dst++)
161 *dst = *src; 171 *dst = *src;
162 172
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index ec5fa91a48d8..e704188d7855 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -12,13 +12,16 @@ OUTPUT_FORMAT("elf32-microblaze", "elf32-microblaze", "elf32-microblaze")
12OUTPUT_ARCH(microblaze) 12OUTPUT_ARCH(microblaze)
13ENTRY(_start) 13ENTRY(_start)
14 14
15#include <asm/page.h>
15#include <asm-generic/vmlinux.lds.h> 16#include <asm-generic/vmlinux.lds.h>
17#include <asm/thread_info.h>
16 18
17jiffies = jiffies_64 + 4; 19jiffies = jiffies_64 + 4;
18 20
19SECTIONS { 21SECTIONS {
20 . = CONFIG_KERNEL_START; 22 . = CONFIG_KERNEL_START;
21 .text : { 23 _start = CONFIG_KERNEL_BASE_ADDR;
24 .text : AT(ADDR(.text) - LOAD_OFFSET) {
22 _text = . ; 25 _text = . ;
23 _stext = . ; 26 _stext = . ;
24 *(.text .text.*) 27 *(.text .text.*)
@@ -33,24 +36,22 @@ SECTIONS {
33 } 36 }
34 37
35 . = ALIGN (4) ; 38 . = ALIGN (4) ;
36 _fdt_start = . ; /* place for fdt blob */ 39 __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
37 . = . + 0x4000; 40 _fdt_start = . ; /* place for fdt blob */
38 _fdt_end = . ; 41 *(__fdt_blob) ; /* Any link-placed DTB */
42 . = _fdt_start + 0x4000; /* Pad up to 16kbyte */
43 _fdt_end = . ;
44 }
39 45
40 . = ALIGN(16); 46 . = ALIGN(16);
41 RODATA 47 RODATA
42 . = ALIGN(16); 48 EXCEPTION_TABLE(16)
43 __ex_table : {
44 __start___ex_table = .;
45 *(__ex_table)
46 __stop___ex_table = .;
47 }
48 49
49 /* 50 /*
50 * sdata2 section can go anywhere, but must be word aligned 51 * sdata2 section can go anywhere, but must be word aligned
51 * and SDA2_BASE must point to the middle of it 52 * and SDA2_BASE must point to the middle of it
52 */ 53 */
53 .sdata2 : { 54 .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
54 _ssrw = .; 55 _ssrw = .;
55 . = ALIGN(4096); /* page aligned when MMU used - origin 0x8 */ 56 . = ALIGN(4096); /* page aligned when MMU used - origin 0x8 */
56 *(.sdata2) 57 *(.sdata2)
@@ -61,12 +62,7 @@ SECTIONS {
61 } 62 }
62 63
63 _sdata = . ; 64 _sdata = . ;
64 .data ALIGN (4096) : { /* page aligned when MMU used - origin 0x4 */ 65 RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
65 DATA_DATA
66 CONSTRUCTORS
67 }
68 . = ALIGN(32);
69 .data.cacheline_aligned : { *(.data.cacheline_aligned) }
70 _edata = . ; 66 _edata = . ;
71 67
72 /* Reserve some low RAM for r0 based memory references */ 68 /* Reserve some low RAM for r0 based memory references */
@@ -74,18 +70,14 @@ SECTIONS {
74 r0_ram = . ; 70 r0_ram = . ;
75 . = . + 4096; /* a page should be enough */ 71 . = . + 4096; /* a page should be enough */
76 72
77 /* The initial task */
78 . = ALIGN(8192);
79 .data.init_task : { *(.data.init_task) }
80
81 /* Under the microblaze ABI, .sdata and .sbss must be contiguous */ 73 /* Under the microblaze ABI, .sdata and .sbss must be contiguous */
82 . = ALIGN(8); 74 . = ALIGN(8);
83 .sdata : { 75 .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
84 _ssro = .; 76 _ssro = .;
85 *(.sdata) 77 *(.sdata)
86 } 78 }
87 79
88 .sbss : { 80 .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {
89 _ssbss = .; 81 _ssbss = .;
90 *(.sbss) 82 *(.sbss)
91 _esbss = .; 83 _esbss = .;
@@ -96,47 +88,36 @@ SECTIONS {
96 88
97 __init_begin = .; 89 __init_begin = .;
98 90
99 . = ALIGN(4096); 91 INIT_TEXT_SECTION(PAGE_SIZE)
100 .init.text : {
101 _sinittext = . ;
102 INIT_TEXT
103 _einittext = .;
104 }
105 92
106 .init.data : { 93 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
107 INIT_DATA 94 INIT_DATA
108 } 95 }
109 96
110 . = ALIGN(4); 97 . = ALIGN(4);
111 .init.ivt : { 98 .init.ivt : AT(ADDR(.init.ivt) - LOAD_OFFSET) {
112 __ivt_start = .; 99 __ivt_start = .;
113 *(.init.ivt) 100 *(.init.ivt)
114 __ivt_end = .; 101 __ivt_end = .;
115 } 102 }
116 103
117 .init.setup : { 104 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
118 __setup_start = .; 105 INIT_SETUP(0)
119 *(.init.setup)
120 __setup_end = .;
121 } 106 }
122 107
123 .initcall.init : { 108 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET ) {
124 __initcall_start = .; 109 INIT_CALLS
125 INITCALLS
126 __initcall_end = .;
127 } 110 }
128 111
129 .con_initcall.init : { 112 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
130 __con_initcall_start = .; 113 CON_INITCALL
131 *(.con_initcall.init)
132 __con_initcall_end = .;
133 } 114 }
134 115
135 SECURITY_INIT 116 SECURITY_INIT
136 117
137 __init_end_before_initramfs = .; 118 __init_end_before_initramfs = .;
138 119
139 .init.ramfs ALIGN(4096) : { 120 .init.ramfs ALIGN(4096) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
140 __initramfs_start = .; 121 __initramfs_start = .;
141 *(.init.ramfs) 122 *(.init.ramfs)
142 __initramfs_end = .; 123 __initramfs_end = .;
@@ -152,7 +133,8 @@ SECTIONS {
152 } 133 }
153 __init_end = .; 134 __init_end = .;
154 135
155 .bss ALIGN (4096) : { /* page aligned when MMU used */ 136 .bss ALIGN (4096) : AT(ADDR(.bss) - LOAD_OFFSET) {
137 /* page aligned when MMU used */
156 __bss_start = . ; 138 __bss_start = . ;
157 *(.bss*) 139 *(.bss*)
158 *(COMMON) 140 *(COMMON)
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 1110784eb3f7..a44892e7cd5b 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -180,7 +180,8 @@ void free_initrd_mem(unsigned long start, unsigned long end)
180 totalram_pages++; 180 totalram_pages++;
181 pages++; 181 pages++;
182 } 182 }
183 printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages); 183 printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n",
184 (int)(pages * (PAGE_SIZE / 1024)));
184} 185}
185#endif 186#endif
186 187
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 3f04d4c406b7..b3deed8db619 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table,
56 56
57 57
58/* And the same for proc */ 58/* And the same for proc */
59int proc_dolasatstring(ctl_table *table, int write, struct file *filp, 59int proc_dolasatstring(ctl_table *table, int write,
60 void *buffer, size_t *lenp, loff_t *ppos) 60 void *buffer, size_t *lenp, loff_t *ppos)
61{ 61{
62 int r; 62 int r;
63 63
64 r = proc_dostring(table, write, filp, buffer, lenp, ppos); 64 r = proc_dostring(table, write, buffer, lenp, ppos);
65 if ((!write) || r) 65 if ((!write) || r)
66 return r; 66 return r;
67 67
@@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
71} 71}
72 72
73/* proc function to write EEPROM after changing int entry */ 73/* proc function to write EEPROM after changing int entry */
74int proc_dolasatint(ctl_table *table, int write, struct file *filp, 74int proc_dolasatint(ctl_table *table, int write,
75 void *buffer, size_t *lenp, loff_t *ppos) 75 void *buffer, size_t *lenp, loff_t *ppos)
76{ 76{
77 int r; 77 int r;
78 78
79 r = proc_dointvec(table, write, filp, buffer, lenp, ppos); 79 r = proc_dointvec(table, write, buffer, lenp, ppos);
80 if ((!write) || r) 80 if ((!write) || r)
81 return r; 81 return r;
82 82
@@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp,
89static int rtctmp; 89static int rtctmp;
90 90
91/* proc function to read/write RealTime Clock */ 91/* proc function to read/write RealTime Clock */
92int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, 92int proc_dolasatrtc(ctl_table *table, int write,
93 void *buffer, size_t *lenp, loff_t *ppos) 93 void *buffer, size_t *lenp, loff_t *ppos)
94{ 94{
95 struct timespec ts; 95 struct timespec ts;
@@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
102 if (rtctmp < 0) 102 if (rtctmp < 0)
103 rtctmp = 0; 103 rtctmp = 0;
104 } 104 }
105 r = proc_dointvec(table, write, filp, buffer, lenp, ppos); 105 r = proc_dointvec(table, write, buffer, lenp, ppos);
106 if (r) 106 if (r)
107 return r; 107 return r;
108 108
@@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table,
154#endif 154#endif
155 155
156#ifdef CONFIG_INET 156#ifdef CONFIG_INET
157int proc_lasat_ip(ctl_table *table, int write, struct file *filp, 157int proc_lasat_ip(ctl_table *table, int write,
158 void *buffer, size_t *lenp, loff_t *ppos) 158 void *buffer, size_t *lenp, loff_t *ppos)
159{ 159{
160 unsigned int ip; 160 unsigned int ip;
@@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table,
231 return 0; 231 return 0;
232} 232}
233 233
234int proc_lasat_prid(ctl_table *table, int write, struct file *filp, 234int proc_lasat_prid(ctl_table *table, int write,
235 void *buffer, size_t *lenp, loff_t *ppos) 235 void *buffer, size_t *lenp, loff_t *ppos)
236{ 236{
237 int r; 237 int r;
238 238
239 r = proc_dointvec(table, write, filp, buffer, lenp, ppos); 239 r = proc_dointvec(table, write, buffer, lenp, ppos);
240 if (r < 0) 240 if (r < 0)
241 return r; 241 return r;
242 if (write) { 242 if (write) {
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index 76f41bdb79c4..10549dcfb610 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -44,24 +44,8 @@ SECTIONS
44 RO_DATA(PAGE_SIZE) 44 RO_DATA(PAGE_SIZE)
45 45
46 /* writeable */ 46 /* writeable */
47 .data : { /* Data */ 47 RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
48 DATA_DATA 48 _edata = .;
49 CONSTRUCTORS
50 }
51
52 .data_nosave : { NOSAVE_DATA; }
53
54 .data.page_aligned : { PAGE_ALIGNED_DATA(PAGE_SIZE); }
55 .data.cacheline_aligned : { CACHELINE_ALIGNED_DATA(32); }
56
57 /* rarely changed data like cpu maps */
58 . = ALIGN(32);
59 .data.read_mostly : AT(ADDR(.data.read_mostly)) {
60 READ_MOSTLY_DATA(32);
61 _edata = .; /* End of data section */
62 }
63
64 .data.init_task : { INIT_TASK_DATA(THREAD_SIZE); }
65 49
66 /* might get freed after init */ 50 /* might get freed after init */
67 . = ALIGN(PAGE_SIZE); 51 . = ALIGN(PAGE_SIZE);
@@ -74,22 +58,8 @@ SECTIONS
74 /* will be freed after init */ 58 /* will be freed after init */
75 . = ALIGN(PAGE_SIZE); /* Init code and data */ 59 . = ALIGN(PAGE_SIZE); /* Init code and data */
76 __init_begin = .; 60 __init_begin = .;
77 .init.text : { 61 INIT_TEXT_SECTION(PAGE_SIZE)
78 _sinittext = .; 62 INIT_DATA_SECTION(16)
79 INIT_TEXT;
80 _einittext = .;
81 }
82 .init.data : { INIT_DATA; }
83 .setup.init : { INIT_SETUP(16); }
84
85 __initcall_start = .;
86 .initcall.init : {
87 INITCALLS
88 }
89 __initcall_end = .;
90 .con_initcall.init : { CON_INITCALL; }
91
92 SECURITY_INIT
93 . = ALIGN(4); 63 . = ALIGN(4);
94 __alt_instructions = .; 64 __alt_instructions = .;
95 .altinstructions : { *(.altinstructions) } 65 .altinstructions : { *(.altinstructions) }
@@ -100,8 +70,6 @@ SECTIONS
100 .exit.text : { EXIT_TEXT; } 70 .exit.text : { EXIT_TEXT; }
101 .exit.data : { EXIT_DATA; } 71 .exit.data : { EXIT_DATA; }
102 72
103 .init.ramfs : { INIT_RAM_FS; }
104
105 PERCPU(32) 73 PERCPU(32)
106 . = ALIGN(PAGE_SIZE); 74 . = ALIGN(PAGE_SIZE);
107 __init_end = .; 75 __init_end = .;
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h
index 1e1c824764ee..5f39d5597ced 100644
--- a/arch/parisc/include/asm/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
@@ -28,6 +28,8 @@
28#define F_SETOWN 12 /* for sockets. */ 28#define F_SETOWN 12 /* for sockets. */
29#define F_SETSIG 13 /* for sockets. */ 29#define F_SETSIG 13 /* for sockets. */
30#define F_GETSIG 14 /* for sockets. */ 30#define F_GETSIG 14 /* for sockets. */
31#define F_GETOWN_EX 15
32#define F_SETOWN_EX 16
31 33
32/* for posix fcntl() and lockf() */ 34/* for posix fcntl() and lockf() */
33#define F_RDLCK 01 35#define F_RDLCK 01
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index aea1784edbd1..775be2791bc2 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -77,13 +77,7 @@ SECTIONS
77 */ 77 */
78 . = ALIGN(PAGE_SIZE); 78 . = ALIGN(PAGE_SIZE);
79 data_start = .; 79 data_start = .;
80 . = ALIGN(16); 80 EXCEPTION_TABLE(16)
81 /* Exception table */
82 __ex_table : {
83 __start___ex_table = .;
84 *(__ex_table)
85 __stop___ex_table = .;
86 }
87 81
88 NOTES 82 NOTES
89 83
@@ -94,23 +88,8 @@ SECTIONS
94 __stop___unwind = .; 88 __stop___unwind = .;
95 } 89 }
96 90
97 /* rarely changed data like cpu maps */
98 . = ALIGN(16);
99 .data.read_mostly : {
100 *(.data.read_mostly)
101 }
102
103 . = ALIGN(L1_CACHE_BYTES);
104 /* Data */ 91 /* Data */
105 .data : { 92 RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
106 DATA_DATA
107 CONSTRUCTORS
108 }
109
110 . = ALIGN(L1_CACHE_BYTES);
111 .data.cacheline_aligned : {
112 *(.data.cacheline_aligned)
113 }
114 93
115 /* PA-RISC locks requires 16-byte alignment */ 94 /* PA-RISC locks requires 16-byte alignment */
116 . = ALIGN(16); 95 . = ALIGN(16);
@@ -118,17 +97,6 @@ SECTIONS
118 *(.data.lock_aligned) 97 *(.data.lock_aligned)
119 } 98 }
120 99
121 /* nosave data is really only used for software suspend...it's here
122 * just in case we ever implement it
123 */
124 . = ALIGN(PAGE_SIZE);
125 __nosave_begin = .;
126 .data_nosave : {
127 *(.data.nosave)
128 }
129 . = ALIGN(PAGE_SIZE);
130 __nosave_end = .;
131
132 /* End of data section */ 100 /* End of data section */
133 _edata = .; 101 _edata = .;
134 102
@@ -147,14 +115,6 @@ SECTIONS
147 } 115 }
148 __bss_stop = .; 116 __bss_stop = .;
149 117
150
151 /* assembler code expects init_task to be 16k aligned */
152 . = ALIGN(16384);
153 /* init_task */
154 .data.init_task : {
155 *(.data.init_task)
156 }
157
158#ifdef CONFIG_64BIT 118#ifdef CONFIG_64BIT
159 . = ALIGN(16); 119 . = ALIGN(16);
160 /* Linkage tables */ 120 /* Linkage tables */
@@ -172,64 +132,8 @@ SECTIONS
172 /* reserve space for interrupt stack by aligning __init* to 16k */ 132 /* reserve space for interrupt stack by aligning __init* to 16k */
173 . = ALIGN(16384); 133 . = ALIGN(16384);
174 __init_begin = .; 134 __init_begin = .;
175 .init.text : { 135 INIT_TEXT_SECTION(16384)
176 _sinittext = .; 136 INIT_DATA_SECTION(16)
177 INIT_TEXT
178 _einittext = .;
179 }
180 .init.data : {
181 INIT_DATA
182 }
183 . = ALIGN(16);
184 .init.setup : {
185 __setup_start = .;
186 *(.init.setup)
187 __setup_end = .;
188 }
189 .initcall.init : {
190 __initcall_start = .;
191 INITCALLS
192 __initcall_end = .;
193 }
194 .con_initcall.init : {
195 __con_initcall_start = .;
196 *(.con_initcall.init)
197 __con_initcall_end = .;
198 }
199 SECURITY_INIT
200
201 /* alternate instruction replacement. This is a mechanism x86 uses
202 * to detect the CPU type and replace generic instruction sequences
203 * with CPU specific ones. We don't currently do this in PA, but
204 * it seems like a good idea...
205 */
206 . = ALIGN(4);
207 .altinstructions : {
208 __alt_instructions = .;
209 *(.altinstructions)
210 __alt_instructions_end = .;
211 }
212 .altinstr_replacement : {
213 *(.altinstr_replacement)
214 }
215
216 /* .exit.text is discard at runtime, not link time, to deal with references
217 * from .altinstructions and .eh_frame
218 */
219 .exit.text : {
220 EXIT_TEXT
221 }
222 .exit.data : {
223 EXIT_DATA
224 }
225#ifdef CONFIG_BLK_DEV_INITRD
226 . = ALIGN(PAGE_SIZE);
227 .init.ramfs : {
228 __initramfs_start = .;
229 *(.init.ramfs)
230 __initramfs_end = .;
231 }
232#endif
233 137
234 PERCPU(PAGE_SIZE) 138 PERCPU(PAGE_SIZE)
235 . = ALIGN(PAGE_SIZE); 139 . = ALIGN(PAGE_SIZE);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4fd479059d65..10a0a5488a44 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -385,9 +385,15 @@ config NUMA
385 385
386config NODES_SHIFT 386config NODES_SHIFT
387 int 387 int
388 default "8" if PPC64
388 default "4" 389 default "4"
389 depends on NEED_MULTIPLE_NODES 390 depends on NEED_MULTIPLE_NODES
390 391
392config MAX_ACTIVE_REGIONS
393 int
394 default "256" if PPC64
395 default "32"
396
391config ARCH_SELECT_MEMORY_MODEL 397config ARCH_SELECT_MEMORY_MODEL
392 def_bool y 398 def_bool y
393 depends on PPC64 399 depends on PPC64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index aacf629c1a9f..1a54a3b3a3fa 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -164,6 +164,17 @@ PHONY += $(BOOT_TARGETS)
164 164
165boot := arch/$(ARCH)/boot 165boot := arch/$(ARCH)/boot
166 166
167ifeq ($(CONFIG_RELOCATABLE),y)
168quiet_cmd_relocs_check = CALL $<
169 cmd_relocs_check = perl $< "$(OBJDUMP)" "$(obj)/vmlinux"
170
171PHONY += relocs_check
172relocs_check: arch/powerpc/relocs_check.pl vmlinux
173 $(call cmd,relocs_check)
174
175zImage: relocs_check
176endif
177
167$(BOOT_TARGETS): vmlinux 178$(BOOT_TARGETS): vmlinux
168 $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) 179 $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
169 180
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 9dade15d1ab4..6d94d27ed850 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -15,7 +15,16 @@ struct dev_archdata {
15 15
16 /* DMA operations on that device */ 16 /* DMA operations on that device */
17 struct dma_map_ops *dma_ops; 17 struct dma_map_ops *dma_ops;
18 void *dma_data; 18
19 /*
20 * When an iommu is in use, dma_data is used as a ptr to the base of the
21 * iommu_table. Otherwise, it is a simple numerical offset.
22 */
23 union {
24 dma_addr_t dma_offset;
25 void *iommu_table_base;
26 } dma_data;
27
19#ifdef CONFIG_SWIOTLB 28#ifdef CONFIG_SWIOTLB
20 dma_addr_t max_direct_dma_addr; 29 dma_addr_t max_direct_dma_addr;
21#endif 30#endif
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index cb2ca41dd526..e281daebddca 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -26,7 +26,6 @@ extern void *dma_direct_alloc_coherent(struct device *dev, size_t size,
26extern void dma_direct_free_coherent(struct device *dev, size_t size, 26extern void dma_direct_free_coherent(struct device *dev, size_t size,
27 void *vaddr, dma_addr_t dma_handle); 27 void *vaddr, dma_addr_t dma_handle);
28 28
29extern unsigned long get_dma_direct_offset(struct device *dev);
30 29
31#ifdef CONFIG_NOT_COHERENT_CACHE 30#ifdef CONFIG_NOT_COHERENT_CACHE
32/* 31/*
@@ -90,6 +89,28 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
90 dev->archdata.dma_ops = ops; 89 dev->archdata.dma_ops = ops;
91} 90}
92 91
92/*
93 * get_dma_offset()
94 *
95 * Get the dma offset on configurations where the dma address can be determined
96 * from the physical address by looking at a simple offset. Direct dma and
97 * swiotlb use this function, but it is typically not used by implementations
98 * with an iommu.
99 */
100static inline dma_addr_t get_dma_offset(struct device *dev)
101{
102 if (dev)
103 return dev->archdata.dma_data.dma_offset;
104
105 return PCI_DRAM_OFFSET;
106}
107
108static inline void set_dma_offset(struct device *dev, dma_addr_t off)
109{
110 if (dev)
111 dev->archdata.dma_data.dma_offset = off;
112}
113
93/* this will be removed soon */ 114/* this will be removed soon */
94#define flush_write_buffers() 115#define flush_write_buffers()
95 116
@@ -181,12 +202,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
181 202
182static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) 203static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
183{ 204{
184 return paddr + get_dma_direct_offset(dev); 205 return paddr + get_dma_offset(dev);
185} 206}
186 207
187static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) 208static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
188{ 209{
189 return daddr - get_dma_direct_offset(dev); 210 return daddr - get_dma_offset(dev);
190} 211}
191 212
192#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 213#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h
new file mode 100644
index 000000000000..a67aeed17d40
--- /dev/null
+++ b/arch/powerpc/include/asm/fsldma.h
@@ -0,0 +1,136 @@
1/*
2 * Freescale MPC83XX / MPC85XX DMA Controller
3 *
4 * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
5 *
6 * This file is licensed under the terms of the GNU General Public License
7 * version 2. This program is licensed "as is" without any warranty of any
8 * kind, whether express or implied.
9 */
10
11#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
12#define __ARCH_POWERPC_ASM_FSLDMA_H__
13
14#include <linux/dmaengine.h>
15
16/*
17 * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
18 *
19 * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
20 * transfers. An example usage would be an accelerated copy between two
21 * scatterlists. Another example use would be an accelerated copy from
22 * multiple non-contiguous device buffers into a single scatterlist.
23 *
24 * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
25 * structure contains a list of hardware addresses that should be copied
26 * to/from the scatterlist passed into device_prep_slave_sg(). The structure
27 * also has some fields to enable hardware-specific features.
28 */
29
30/**
31 * struct fsl_dma_hw_addr
32 * @entry: linked list entry
33 * @address: the hardware address
34 * @length: length to transfer
35 *
36 * Holds a single physical hardware address / length pair for use
37 * with the DMAEngine DMA_SLAVE API.
38 */
39struct fsl_dma_hw_addr {
40 struct list_head entry;
41
42 dma_addr_t address;
43 size_t length;
44};
45
46/**
47 * struct fsl_dma_slave
48 * @addresses: a linked list of struct fsl_dma_hw_addr structures
49 * @request_count: value for DMA request count
50 * @src_loop_size: setup and enable constant source-address DMA transfers
51 * @dst_loop_size: setup and enable constant destination address DMA transfers
52 * @external_start: enable externally started DMA transfers
53 * @external_pause: enable externally paused DMA transfers
54 *
55 * Holds a list of address / length pairs for use with the DMAEngine
56 * DMA_SLAVE API implementation for the Freescale DMA controller.
57 */
58struct fsl_dma_slave {
59
60 /* List of hardware address/length pairs */
61 struct list_head addresses;
62
63 /* Support for extra controller features */
64 unsigned int request_count;
65 unsigned int src_loop_size;
66 unsigned int dst_loop_size;
67 bool external_start;
68 bool external_pause;
69};
70
71/**
72 * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
73 * @slave: the &struct fsl_dma_slave to add to
74 * @address: the hardware address to add
75 * @length: the length of bytes to transfer from @address
76 *
77 * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
78 * success, -ERRNO otherwise.
79 */
80static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
81 dma_addr_t address, size_t length)
82{
83 struct fsl_dma_hw_addr *addr;
84
85 addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
86 if (!addr)
87 return -ENOMEM;
88
89 INIT_LIST_HEAD(&addr->entry);
90 addr->address = address;
91 addr->length = length;
92
93 list_add_tail(&addr->entry, &slave->addresses);
94 return 0;
95}
96
97/**
98 * fsl_dma_slave_free - free a struct fsl_dma_slave
99 * @slave: the struct fsl_dma_slave to free
100 *
101 * Free a struct fsl_dma_slave and all associated address/length pairs
102 */
103static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
104{
105 struct fsl_dma_hw_addr *addr, *tmp;
106
107 if (slave) {
108 list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
109 list_del(&addr->entry);
110 kfree(addr);
111 }
112
113 kfree(slave);
114 }
115}
116
117/**
118 * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
119 * @gfp: the flags to pass to kmalloc when allocating this structure
120 *
121 * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
122 * struct fsl_dma_slave on success, or NULL on failure.
123 */
124static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
125{
126 struct fsl_dma_slave *slave;
127
128 slave = kzalloc(sizeof(*slave), gfp);
129 if (!slave)
130 return NULL;
131
132 INIT_LIST_HEAD(&slave->addresses);
133 return slave;
134}
135
136#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 7464c0daddd1..edfc9803ec91 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -70,6 +70,16 @@ struct iommu_table {
70 70
71struct scatterlist; 71struct scatterlist;
72 72
73static inline void set_iommu_table_base(struct device *dev, void *base)
74{
75 dev->archdata.dma_data.iommu_table_base = base;
76}
77
78static inline void *get_iommu_table_base(struct device *dev)
79{
80 return dev->archdata.dma_data.iommu_table_base;
81}
82
73/* Frees table for an individual device node */ 83/* Frees table for an individual device node */
74extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); 84extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
75 85
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index ccc68b50d05d..5a9ede4962cb 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -29,7 +29,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq);
29void release_pmc_hardware(void); 29void release_pmc_hardware(void);
30void ppc_enable_pmcs(void); 30void ppc_enable_pmcs(void);
31 31
32#ifdef CONFIG_PPC64 32#ifdef CONFIG_PPC_BOOK3S_64
33#include <asm/lppaca.h> 33#include <asm/lppaca.h>
34 34
35static inline void ppc_set_pmu_inuse(int inuse) 35static inline void ppc_set_pmu_inuse(int inuse)
diff --git a/arch/powerpc/include/asm/pte-40x.h b/arch/powerpc/include/asm/pte-40x.h
index 6c3e1f4378d4..ec0b0b0d1df9 100644
--- a/arch/powerpc/include/asm/pte-40x.h
+++ b/arch/powerpc/include/asm/pte-40x.h
@@ -43,6 +43,7 @@
43#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ 43#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */
44#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ 44#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */
45#define _PAGE_USER 0x010 /* matches one of the zone permission bits */ 45#define _PAGE_USER 0x010 /* matches one of the zone permission bits */
46#define _PAGE_SPECIAL 0x020 /* software: Special page */
46#define _PAGE_RW 0x040 /* software: Writes permitted */ 47#define _PAGE_RW 0x040 /* software: Writes permitted */
47#define _PAGE_DIRTY 0x080 /* software: dirty page */ 48#define _PAGE_DIRTY 0x080 /* software: dirty page */
48#define _PAGE_HWWRITE 0x100 /* hardware: Dirty & RW, set in exception */ 49#define _PAGE_HWWRITE 0x100 /* hardware: Dirty & RW, set in exception */
diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h
index 94e979718dcf..dd5ea95fe61e 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -32,6 +32,7 @@
32#define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */ 32#define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */
33#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ 33#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */
34#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ 34#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */
35#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */
35 36
36/* These five software bits must be masked out when the entry is loaded 37/* These five software bits must be masked out when the entry is loaded
37 * into the TLB. 38 * into the TLB.
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index c3b65076a263..f2b370180a09 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -25,9 +25,6 @@
25#ifndef _PAGE_WRITETHRU 25#ifndef _PAGE_WRITETHRU
26#define _PAGE_WRITETHRU 0 26#define _PAGE_WRITETHRU 0
27#endif 27#endif
28#ifndef _PAGE_SPECIAL
29#define _PAGE_SPECIAL 0
30#endif
31#ifndef _PAGE_4K_PFN 28#ifndef _PAGE_4K_PFN
32#define _PAGE_4K_PFN 0 29#define _PAGE_4K_PFN 0
33#endif 30#endif
@@ -179,7 +176,5 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
179#define HAVE_PAGE_AGP 176#define HAVE_PAGE_AGP
180 177
181/* Advertise support for _PAGE_SPECIAL */ 178/* Advertise support for _PAGE_SPECIAL */
182#ifdef _PAGE_SPECIAL
183#define __HAVE_ARCH_PTE_SPECIAL 179#define __HAVE_ARCH_PTE_SPECIAL
184#endif
185 180
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 87ddb3fb948c..37771a518119 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -18,7 +18,7 @@
18static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, 18static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
19 dma_addr_t *dma_handle, gfp_t flag) 19 dma_addr_t *dma_handle, gfp_t flag)
20{ 20{
21 return iommu_alloc_coherent(dev, dev->archdata.dma_data, size, 21 return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
22 dma_handle, device_to_mask(dev), flag, 22 dma_handle, device_to_mask(dev), flag,
23 dev_to_node(dev)); 23 dev_to_node(dev));
24} 24}
@@ -26,7 +26,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
26static void dma_iommu_free_coherent(struct device *dev, size_t size, 26static void dma_iommu_free_coherent(struct device *dev, size_t size,
27 void *vaddr, dma_addr_t dma_handle) 27 void *vaddr, dma_addr_t dma_handle)
28{ 28{
29 iommu_free_coherent(dev->archdata.dma_data, size, vaddr, dma_handle); 29 iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
30} 30}
31 31
32/* Creates TCEs for a user provided buffer. The user buffer must be 32/* Creates TCEs for a user provided buffer. The user buffer must be
@@ -39,8 +39,8 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
39 enum dma_data_direction direction, 39 enum dma_data_direction direction,
40 struct dma_attrs *attrs) 40 struct dma_attrs *attrs)
41{ 41{
42 return iommu_map_page(dev, dev->archdata.dma_data, page, offset, size, 42 return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
43 device_to_mask(dev), direction, attrs); 43 size, device_to_mask(dev), direction, attrs);
44} 44}
45 45
46 46
@@ -48,7 +48,7 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
48 size_t size, enum dma_data_direction direction, 48 size_t size, enum dma_data_direction direction,
49 struct dma_attrs *attrs) 49 struct dma_attrs *attrs)
50{ 50{
51 iommu_unmap_page(dev->archdata.dma_data, dma_handle, size, direction, 51 iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
52 attrs); 52 attrs);
53} 53}
54 54
@@ -57,7 +57,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
57 int nelems, enum dma_data_direction direction, 57 int nelems, enum dma_data_direction direction,
58 struct dma_attrs *attrs) 58 struct dma_attrs *attrs)
59{ 59{
60 return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems, 60 return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
61 device_to_mask(dev), direction, attrs); 61 device_to_mask(dev), direction, attrs);
62} 62}
63 63
@@ -65,14 +65,14 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
65 int nelems, enum dma_data_direction direction, 65 int nelems, enum dma_data_direction direction,
66 struct dma_attrs *attrs) 66 struct dma_attrs *attrs)
67{ 67{
68 iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction, 68 iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction,
69 attrs); 69 attrs);
70} 70}
71 71
72/* We support DMA to/from any memory page via the iommu */ 72/* We support DMA to/from any memory page via the iommu */
73static int dma_iommu_dma_supported(struct device *dev, u64 mask) 73static int dma_iommu_dma_supported(struct device *dev, u64 mask)
74{ 74{
75 struct iommu_table *tbl = dev->archdata.dma_data; 75 struct iommu_table *tbl = get_iommu_table_base(dev);
76 76
77 if (!tbl || tbl->it_offset > mask) { 77 if (!tbl || tbl->it_offset > mask) {
78 printk(KERN_INFO 78 printk(KERN_INFO
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 21b784d7e7d0..6215062caf8c 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -21,13 +21,6 @@
21 * default the offset is PCI_DRAM_OFFSET. 21 * default the offset is PCI_DRAM_OFFSET.
22 */ 22 */
23 23
24unsigned long get_dma_direct_offset(struct device *dev)
25{
26 if (dev)
27 return (unsigned long)dev->archdata.dma_data;
28
29 return PCI_DRAM_OFFSET;
30}
31 24
32void *dma_direct_alloc_coherent(struct device *dev, size_t size, 25void *dma_direct_alloc_coherent(struct device *dev, size_t size,
33 dma_addr_t *dma_handle, gfp_t flag) 26 dma_addr_t *dma_handle, gfp_t flag)
@@ -37,7 +30,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
37 ret = __dma_alloc_coherent(dev, size, dma_handle, flag); 30 ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
38 if (ret == NULL) 31 if (ret == NULL)
39 return NULL; 32 return NULL;
40 *dma_handle += get_dma_direct_offset(dev); 33 *dma_handle += get_dma_offset(dev);
41 return ret; 34 return ret;
42#else 35#else
43 struct page *page; 36 struct page *page;
@@ -51,7 +44,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
51 return NULL; 44 return NULL;
52 ret = page_address(page); 45 ret = page_address(page);
53 memset(ret, 0, size); 46 memset(ret, 0, size);
54 *dma_handle = virt_to_abs(ret) + get_dma_direct_offset(dev); 47 *dma_handle = virt_to_abs(ret) + get_dma_offset(dev);
55 48
56 return ret; 49 return ret;
57#endif 50#endif
@@ -75,7 +68,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
75 int i; 68 int i;
76 69
77 for_each_sg(sgl, sg, nents, i) { 70 for_each_sg(sgl, sg, nents, i) {
78 sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev); 71 sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
79 sg->dma_length = sg->length; 72 sg->dma_length = sg->length;
80 __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); 73 __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
81 } 74 }
@@ -110,7 +103,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
110{ 103{
111 BUG_ON(dir == DMA_NONE); 104 BUG_ON(dir == DMA_NONE);
112 __dma_sync_page(page, offset, size, dir); 105 __dma_sync_page(page, offset, size, dir);
113 return page_to_phys(page) + offset + get_dma_direct_offset(dev); 106 return page_to_phys(page) + offset + get_dma_offset(dev);
114} 107}
115 108
116static inline void dma_direct_unmap_page(struct device *dev, 109static inline void dma_direct_unmap_page(struct device *dev,
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 9048f96237f6..24dcc0ecf246 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -17,7 +17,6 @@
17#include <asm/cputable.h> 17#include <asm/cputable.h>
18#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/thread_info.h> 19#include <asm/thread_info.h>
20#include <asm/reg.h>
21#include <asm/exception-64e.h> 20#include <asm/exception-64e.h>
22#include <asm/bug.h> 21#include <asm/bug.h>
23#include <asm/irqflags.h> 22#include <asm/irqflags.h>
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e9f4840096b3..bb8209e34931 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1117,7 +1117,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
1117 1117
1118 /* Hook up default DMA ops */ 1118 /* Hook up default DMA ops */
1119 sd->dma_ops = pci_dma_ops; 1119 sd->dma_ops = pci_dma_ops;
1120 sd->dma_data = (void *)PCI_DRAM_OFFSET; 1120 set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
1121 1121
1122 /* Additional platform DMA/iommu setup */ 1122 /* Additional platform DMA/iommu setup */
1123 if (ppc_md.pci_dma_dev_setup) 1123 if (ppc_md.pci_dma_dev_setup)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 0a3216433051..1168c5f440ab 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1165,7 +1165,22 @@ static inline unsigned long brk_rnd(void)
1165 1165
1166unsigned long arch_randomize_brk(struct mm_struct *mm) 1166unsigned long arch_randomize_brk(struct mm_struct *mm)
1167{ 1167{
1168 unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); 1168 unsigned long base = mm->brk;
1169 unsigned long ret;
1170
1171#ifdef CONFIG_PPC64
1172 /*
1173 * If we are using 1TB segments and we are allowed to randomise
1174 * the heap, we can put it above 1TB so it is backed by a 1TB
1175 * segment. Otherwise the heap will be in the bottom 1TB
1176 * which always uses 256MB segments and this may result in a
1177 * performance penalty.
1178 */
1179 if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
1180 base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
1181#endif
1182
1183 ret = PAGE_ALIGN(base + brk_rnd());
1169 1184
1170 if (ret < mm->brk) 1185 if (ret < mm->brk)
1171 return mm->brk; 1186 return mm->brk;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 864334b337a3..bafac2e41ae1 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -800,7 +800,7 @@ static void __init prom_send_capabilities(void)
800 root = call_prom("open", 1, 1, ADDR("/")); 800 root = call_prom("open", 1, 1, ADDR("/"));
801 if (root != 0) { 801 if (root != 0) {
802 /* try calling the ibm,client-architecture-support method */ 802 /* try calling the ibm,client-architecture-support method */
803 prom_printf("Calling ibm,client-architecture..."); 803 prom_printf("Calling ibm,client-architecture-support...");
804 if (call_prom_ret("call-method", 3, 2, &ret, 804 if (call_prom_ret("call-method", 3, 2, &ret,
805 ADDR("ibm,client-architecture-support"), 805 ADDR("ibm,client-architecture-support"),
806 root, 806 root,
@@ -814,6 +814,7 @@ static void __init prom_send_capabilities(void)
814 return; 814 return;
815 } 815 }
816 call_prom("close", 1, 0, root); 816 call_prom("close", 1, 0, root);
817 prom_printf(" not implemented\n");
817 } 818 }
818 819
819 /* no ibm,client-architecture-support call, try the old way */ 820 /* no ibm,client-architecture-support call, try the old way */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 3faaf29bdb29..94e2df3cae07 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -241,6 +241,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
241 } 241 }
242 242
243 /* 243 /*
244 * Put vDSO base into mm struct. We need to do this before calling
245 * install_special_mapping or the perf counter mmap tracking code
246 * will fail to recognise it as a vDSO (since arch_vma_name fails).
247 */
248 current->mm->context.vdso_base = vdso_base;
249
250 /*
244 * our vma flags don't have VM_WRITE so by default, the process isn't 251 * our vma flags don't have VM_WRITE so by default, the process isn't
245 * allowed to write those pages. 252 * allowed to write those pages.
246 * gdb can break that with ptrace interface, and thus trigger COW on 253 * gdb can break that with ptrace interface, and thus trigger COW on
@@ -260,11 +267,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
260 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 267 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
261 VM_ALWAYSDUMP, 268 VM_ALWAYSDUMP,
262 vdso_pagelist); 269 vdso_pagelist);
263 if (rc) 270 if (rc) {
271 current->mm->context.vdso_base = 0;
264 goto fail_mmapsem; 272 goto fail_mmapsem;
265 273 }
266 /* Put vDSO base into mm struct */
267 current->mm->context.vdso_base = vdso_base;
268 274
269 up_write(&mm->mmap_sem); 275 up_write(&mm->mmap_sem);
270 return 0; 276 return 0;
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index bc7b41edbdfc..77f64218abf3 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1054,6 +1054,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
1054 return NULL; 1054 return NULL;
1055 1055
1056 tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); 1056 tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
1057 if (tbl == NULL)
1058 return NULL;
1057 1059
1058 of_parse_dma_window(dev->dev.archdata.of_node, dma_window, 1060 of_parse_dma_window(dev->dev.archdata.of_node, dma_window,
1059 &tbl->it_index, &offset, &size); 1061 &tbl->it_index, &offset, &size);
@@ -1233,7 +1235,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
1233 vio_cmo_set_dma_ops(viodev); 1235 vio_cmo_set_dma_ops(viodev);
1234 else 1236 else
1235 viodev->dev.archdata.dma_ops = &dma_iommu_ops; 1237 viodev->dev.archdata.dma_ops = &dma_iommu_ops;
1236 viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); 1238 set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev));
1237 set_dev_node(&viodev->dev, of_node_to_nid(of_node)); 1239 set_dev_node(&viodev->dev, of_node_to_nid(of_node));
1238 1240
1239 /* init generic 'struct device' fields: */ 1241 /* init generic 'struct device' fields: */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 58da4070723d..f56429362a12 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -6,6 +6,7 @@
6#include <asm/page.h> 6#include <asm/page.h>
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/cache.h> 8#include <asm/cache.h>
9#include <asm/thread_info.h>
9 10
10ENTRY(_stext) 11ENTRY(_stext)
11 12
@@ -71,12 +72,7 @@ SECTIONS
71 /* Read-only data */ 72 /* Read-only data */
72 RODATA 73 RODATA
73 74
74 /* Exception & bug tables */ 75 EXCEPTION_TABLE(0)
75 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
76 __start___ex_table = .;
77 *(__ex_table)
78 __stop___ex_table = .;
79 }
80 76
81 NOTES :kernel :notes 77 NOTES :kernel :notes
82 78
@@ -93,12 +89,7 @@ SECTIONS
93 */ 89 */
94 . = ALIGN(PAGE_SIZE); 90 . = ALIGN(PAGE_SIZE);
95 __init_begin = .; 91 __init_begin = .;
96 92 INIT_TEXT_SECTION(PAGE_SIZE) :kernel
97 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
98 _sinittext = .;
99 INIT_TEXT
100 _einittext = .;
101 } :kernel
102 93
103 /* .exit.text is discarded at runtime, not link time, 94 /* .exit.text is discarded at runtime, not link time,
104 * to deal with references from __bug_table 95 * to deal with references from __bug_table
@@ -122,23 +113,16 @@ SECTIONS
122#endif 113#endif
123 } 114 }
124 115
125 . = ALIGN(16);
126 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { 116 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
127 __setup_start = .; 117 INIT_SETUP(16)
128 *(.init.setup)
129 __setup_end = .;
130 } 118 }
131 119
132 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { 120 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
133 __initcall_start = .; 121 INIT_CALLS
134 INITCALLS 122 }
135 __initcall_end = .;
136 }
137 123
138 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { 124 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
139 __con_initcall_start = .; 125 CON_INITCALL
140 *(.con_initcall.init)
141 __con_initcall_end = .;
142 } 126 }
143 127
144 SECURITY_INIT 128 SECURITY_INIT
@@ -169,14 +153,10 @@ SECTIONS
169 __stop___fw_ftr_fixup = .; 153 __stop___fw_ftr_fixup = .;
170 } 154 }
171#endif 155#endif
172#ifdef CONFIG_BLK_DEV_INITRD
173 . = ALIGN(PAGE_SIZE);
174 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { 156 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
175 __initramfs_start = .; 157 INIT_RAM_FS
176 *(.init.ramfs)
177 __initramfs_end = .;
178 } 158 }
179#endif 159
180 PERCPU(PAGE_SIZE) 160 PERCPU(PAGE_SIZE)
181 161
182 . = ALIGN(8); 162 . = ALIGN(8);
@@ -240,36 +220,24 @@ SECTIONS
240#endif 220#endif
241 221
242 /* The initial task and kernel stack */ 222 /* The initial task and kernel stack */
243#ifdef CONFIG_PPC32
244 . = ALIGN(8192);
245#else
246 . = ALIGN(16384);
247#endif
248 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 223 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
249 *(.data.init_task) 224 INIT_TASK_DATA(THREAD_SIZE)
250 } 225 }
251 226
252 . = ALIGN(PAGE_SIZE);
253 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 227 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
254 *(.data.page_aligned) 228 PAGE_ALIGNED_DATA(PAGE_SIZE)
255 } 229 }
256 230
257 . = ALIGN(L1_CACHE_BYTES);
258 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { 231 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
259 *(.data.cacheline_aligned) 232 CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
260 } 233 }
261 234
262 . = ALIGN(L1_CACHE_BYTES);
263 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { 235 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
264 *(.data.read_mostly) 236 READ_MOSTLY_DATA(L1_CACHE_BYTES)
265 } 237 }
266 238
267 . = ALIGN(PAGE_SIZE);
268 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { 239 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
269 __nosave_begin = .; 240 NOSAVE_DATA
270 *(.data.nosave)
271 . = ALIGN(PAGE_SIZE);
272 __nosave_end = .;
273 } 241 }
274 242
275 . = ALIGN(PAGE_SIZE); 243 . = ALIGN(PAGE_SIZE);
@@ -280,14 +248,7 @@ SECTIONS
280 * And finally the bss 248 * And finally the bss
281 */ 249 */
282 250
283 .bss : AT(ADDR(.bss) - LOAD_OFFSET) { 251 BSS_SECTION(0, 0, 0)
284 __bss_start = .;
285 *(.sbss) *(.scommon)
286 *(.dynbss)
287 *(.bss)
288 *(COMMON)
289 __bss_stop = .;
290 }
291 252
292 . = ALIGN(PAGE_SIZE); 253 . = ALIGN(PAGE_SIZE);
293 _end = . ; 254 _end = . ;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83f1551ec2c9..53040931de32 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -30,6 +30,8 @@
30#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
31#include <asm/tlb.h> 31#include <asm/tlb.h>
32 32
33#include "mmu_decl.h"
34
33DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 35DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
34 36
35#ifdef CONFIG_SMP 37#ifdef CONFIG_SMP
@@ -166,7 +168,7 @@ struct page * maybe_pte_to_page(pte_t pte)
166 * support falls into the same category. 168 * support falls into the same category.
167 */ 169 */
168 170
169static pte_t set_pte_filter(pte_t pte) 171static pte_t set_pte_filter(pte_t pte, unsigned long addr)
170{ 172{
171 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 173 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
172 if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || 174 if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
@@ -175,6 +177,17 @@ static pte_t set_pte_filter(pte_t pte)
175 if (!pg) 177 if (!pg)
176 return pte; 178 return pte;
177 if (!test_bit(PG_arch_1, &pg->flags)) { 179 if (!test_bit(PG_arch_1, &pg->flags)) {
180#ifdef CONFIG_8xx
181 /* On 8xx, cache control instructions (particularly
182 * "dcbst" from flush_dcache_icache) fault as write
183 * operation if there is an unpopulated TLB entry
184 * for the address in question. To workaround that,
185 * we invalidate the TLB here, thus avoiding dcbst
186 * misbehaviour.
187 */
188 /* 8xx doesn't care about PID, size or ind args */
189 _tlbil_va(addr, 0, 0, 0);
190#endif /* CONFIG_8xx */
178 flush_dcache_icache_page(pg); 191 flush_dcache_icache_page(pg);
179 set_bit(PG_arch_1, &pg->flags); 192 set_bit(PG_arch_1, &pg->flags);
180 } 193 }
@@ -194,7 +207,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
194 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so 207 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
195 * instead we "filter out" the exec permission for non clean pages. 208 * instead we "filter out" the exec permission for non clean pages.
196 */ 209 */
197static pte_t set_pte_filter(pte_t pte) 210static pte_t set_pte_filter(pte_t pte, unsigned long addr)
198{ 211{
199 struct page *pg; 212 struct page *pg;
200 213
@@ -276,7 +289,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
276 * this context might not have been activated yet when this 289 * this context might not have been activated yet when this
277 * is called. 290 * is called.
278 */ 291 */
279 pte = set_pte_filter(pte); 292 pte = set_pte_filter(pte, addr);
280 293
281 /* Perform the setting of the PTE */ 294 /* Perform the setting of the PTE */
282 __set_pte_at(mm, addr, ptep, pte, 0); 295 __set_pte_at(mm, addr, ptep, pte, 0);
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index ef1cccf71173..f288279e679d 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -18,7 +18,6 @@
18#include <asm/asm-offsets.h> 18#include <asm/asm-offsets.h>
19#include <asm/cputable.h> 19#include <asm/cputable.h>
20#include <asm/pgtable.h> 20#include <asm/pgtable.h>
21#include <asm/reg.h>
22#include <asm/exception-64e.h> 21#include <asm/exception-64e.h>
23#include <asm/ppc-opcode.h> 22#include <asm/ppc-opcode.h>
24 23
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
index 93b0efddd658..39d361c5c6d2 100644
--- a/arch/powerpc/platforms/cell/beat_iommu.c
+++ b/arch/powerpc/platforms/cell/beat_iommu.c
@@ -77,7 +77,7 @@ static void __init celleb_init_direct_mapping(void)
77static void celleb_dma_dev_setup(struct device *dev) 77static void celleb_dma_dev_setup(struct device *dev)
78{ 78{
79 dev->archdata.dma_ops = get_pci_dma_ops(); 79 dev->archdata.dma_ops = get_pci_dma_ops();
80 dev->archdata.dma_data = (void *)celleb_dma_direct_offset; 80 set_dma_offset(dev, celleb_dma_direct_offset);
81} 81}
82 82
83static void celleb_pci_dma_dev_setup(struct pci_dev *pdev) 83static void celleb_pci_dma_dev_setup(struct pci_dev *pdev)
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 416db17eb18f..ca5bfdfe47f2 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -657,15 +657,13 @@ static void cell_dma_dev_setup_fixed(struct device *dev);
657 657
658static void cell_dma_dev_setup(struct device *dev) 658static void cell_dma_dev_setup(struct device *dev)
659{ 659{
660 struct dev_archdata *archdata = &dev->archdata;
661
662 /* Order is important here, these are not mutually exclusive */ 660 /* Order is important here, these are not mutually exclusive */
663 if (get_dma_ops(dev) == &dma_iommu_fixed_ops) 661 if (get_dma_ops(dev) == &dma_iommu_fixed_ops)
664 cell_dma_dev_setup_fixed(dev); 662 cell_dma_dev_setup_fixed(dev);
665 else if (get_pci_dma_ops() == &dma_iommu_ops) 663 else if (get_pci_dma_ops() == &dma_iommu_ops)
666 archdata->dma_data = cell_get_iommu_table(dev); 664 set_iommu_table_base(dev, cell_get_iommu_table(dev));
667 else if (get_pci_dma_ops() == &dma_direct_ops) 665 else if (get_pci_dma_ops() == &dma_direct_ops)
668 archdata->dma_data = (void *)cell_dma_direct_offset; 666 set_dma_offset(dev, cell_dma_direct_offset);
669 else 667 else
670 BUG(); 668 BUG();
671} 669}
@@ -973,11 +971,10 @@ static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
973 971
974static void cell_dma_dev_setup_fixed(struct device *dev) 972static void cell_dma_dev_setup_fixed(struct device *dev)
975{ 973{
976 struct dev_archdata *archdata = &dev->archdata;
977 u64 addr; 974 u64 addr;
978 975
979 addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base; 976 addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base;
980 archdata->dma_data = (void *)addr; 977 set_dma_offset(dev, addr);
981 978
982 dev_dbg(dev, "iommu: fixed addr = %llx\n", addr); 979 dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
983} 980}
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 6c1e1011959e..9d53cb481a7c 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -193,7 +193,7 @@ static void pci_dma_dev_setup_iseries(struct pci_dev *pdev)
193 pdn->iommu_table = iommu_init_table(tbl, -1); 193 pdn->iommu_table = iommu_init_table(tbl, -1);
194 else 194 else
195 kfree(tbl); 195 kfree(tbl);
196 pdev->dev.archdata.dma_data = pdn->iommu_table; 196 set_iommu_table_base(&pdev->dev, pdn->iommu_table);
197} 197}
198#else 198#else
199#define pci_dma_dev_setup_iseries NULL 199#define pci_dma_dev_setup_iseries NULL
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index a0ff03a3d8da..7b1d608ea3c8 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -189,7 +189,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
189 } 189 }
190#endif 190#endif
191 191
192 dev->dev.archdata.dma_data = &iommu_table_iobmap; 192 set_iommu_table_base(&dev->dev, &iommu_table_iobmap);
193} 193}
194 194
195static void pci_dma_bus_setup_null(struct pci_bus *b) { } 195static void pci_dma_bus_setup_null(struct pci_bus *b) { }
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 661c8e02bcba..1a0000a4b6d6 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -482,7 +482,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
482 phb->node); 482 phb->node);
483 iommu_table_setparms(phb, dn, tbl); 483 iommu_table_setparms(phb, dn, tbl);
484 PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); 484 PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
485 dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table; 485 set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
486 return; 486 return;
487 } 487 }
488 488
@@ -494,7 +494,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
494 dn = dn->parent; 494 dn = dn->parent;
495 495
496 if (dn && PCI_DN(dn)) 496 if (dn && PCI_DN(dn))
497 dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table; 497 set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
498 else 498 else
499 printk(KERN_WARNING "iommu: Device %s has no iommu table\n", 499 printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
500 pci_name(dev)); 500 pci_name(dev));
@@ -538,7 +538,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
538 */ 538 */
539 if (dma_window == NULL || pdn->parent == NULL) { 539 if (dma_window == NULL || pdn->parent == NULL) {
540 pr_debug(" no dma window for device, linking to parent\n"); 540 pr_debug(" no dma window for device, linking to parent\n");
541 dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table; 541 set_iommu_table_base(&dev->dev, PCI_DN(pdn)->iommu_table);
542 return; 542 return;
543 } 543 }
544 544
@@ -554,7 +554,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
554 pr_debug(" found DMA window, table: %p\n", pci->iommu_table); 554 pr_debug(" found DMA window, table: %p\n", pci->iommu_table);
555 } 555 }
556 556
557 dev->dev.archdata.dma_data = pci->iommu_table; 557 set_iommu_table_base(&dev->dev, pci->iommu_table);
558} 558}
559#else /* CONFIG_PCI */ 559#else /* CONFIG_PCI */
560#define pci_dma_bus_setup_pSeries NULL 560#define pci_dma_bus_setup_pSeries NULL
diff --git a/arch/powerpc/relocs_check.pl b/arch/powerpc/relocs_check.pl
new file mode 100755
index 000000000000..d2571096c3e9
--- /dev/null
+++ b/arch/powerpc/relocs_check.pl
@@ -0,0 +1,56 @@
1#!/usr/bin/perl
2
3# Copyright © 2009 IBM Corporation
4
5# This program is free software; you can redistribute it and/or
6# modify it under the terms of the GNU General Public License
7# as published by the Free Software Foundation; either version
8# 2 of the License, or (at your option) any later version.
9
10# This script checks the relcoations of a vmlinux for "suspicious"
11# relocations.
12
13use strict;
14use warnings;
15
16if ($#ARGV != 1) {
17 die "$0 [path to objdump] [path to vmlinux]\n";
18}
19
20# Have Kbuild supply the path to objdump so we handle cross compilation.
21my $objdump = shift;
22my $vmlinux = shift;
23my $bad_relocs_count = 0;
24my $bad_relocs = "";
25my $old_binutils = 0;
26
27open(FD, "$objdump -R $vmlinux|") or die;
28while (<FD>) {
29 study $_;
30
31 # Only look at relcoation lines.
32 next if (!/\s+R_/);
33
34 # These relocations are okay
35 next if (/R_PPC64_RELATIVE/ or /R_PPC64_NONE/ or
36 /R_PPC64_ADDR64\s+mach_/);
37
38 # If we see this type of relcoation it's an idication that
39 # we /may/ be using an old version of binutils.
40 if (/R_PPC64_UADDR64/) {
41 $old_binutils++;
42 }
43
44 $bad_relocs_count++;
45 $bad_relocs .= $_;
46}
47
48if ($bad_relocs_count) {
49 print "WARNING: $bad_relocs_count bad relocations\n";
50 print $bad_relocs;
51}
52
53if ($old_binutils) {
54 print "WARNING: You need at binutils >= 2.19 to build a ".
55 "CONFIG_RELCOATABLE kernel\n";
56}
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 89639ecbf381..ae3c4db86fe8 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -297,7 +297,7 @@ static void pci_dma_dev_setup_dart(struct pci_dev *dev)
297 /* We only have one iommu table on the mac for now, which makes 297 /* We only have one iommu table on the mac for now, which makes
298 * things simple. Setup all PCI devices to point to this table 298 * things simple. Setup all PCI devices to point to this table
299 */ 299 */
300 dev->dev.archdata.dma_data = &iommu_table_dart; 300 set_iommu_table_base(&dev->dev, &iommu_table_dart);
301} 301}
302 302
303static void pci_dma_bus_setup_dart(struct pci_bus *bus) 303static void pci_dma_bus_setup_dart(struct pci_bus *bus)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0e09a45ac79a..c6f0a71b405e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -335,6 +335,16 @@ int cpus_are_in_xmon(void)
335} 335}
336#endif 336#endif
337 337
338static inline int unrecoverable_excp(struct pt_regs *regs)
339{
340#ifdef CONFIG_4xx
341 /* We have no MSR_RI bit on 4xx, so we simply return false */
342 return 0;
343#else
344 return ((regs->msr & MSR_RI) == 0);
345#endif
346}
347
338static int xmon_core(struct pt_regs *regs, int fromipi) 348static int xmon_core(struct pt_regs *regs, int fromipi)
339{ 349{
340 int cmd = 0; 350 int cmd = 0;
@@ -388,7 +398,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
388 bp = NULL; 398 bp = NULL;
389 if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) 399 if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF))
390 bp = at_breakpoint(regs->nip); 400 bp = at_breakpoint(regs->nip);
391 if (bp || (regs->msr & MSR_RI) == 0) 401 if (bp || unrecoverable_excp(regs))
392 fromipi = 0; 402 fromipi = 0;
393 403
394 if (!fromipi) { 404 if (!fromipi) {
@@ -399,7 +409,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
399 cpu, BP_NUM(bp)); 409 cpu, BP_NUM(bp));
400 xmon_print_symbol(regs->nip, " ", ")\n"); 410 xmon_print_symbol(regs->nip, " ", ")\n");
401 } 411 }
402 if ((regs->msr & MSR_RI) == 0) 412 if (unrecoverable_excp(regs))
403 printf("WARNING: exception is not recoverable, " 413 printf("WARNING: exception is not recoverable, "
404 "can't continue\n"); 414 "can't continue\n");
405 release_output_lock(); 415 release_output_lock();
@@ -490,7 +500,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
490 printf("Stopped at breakpoint %x (", BP_NUM(bp)); 500 printf("Stopped at breakpoint %x (", BP_NUM(bp));
491 xmon_print_symbol(regs->nip, " ", ")\n"); 501 xmon_print_symbol(regs->nip, " ", ")\n");
492 } 502 }
493 if ((regs->msr & MSR_RI) == 0) 503 if (unrecoverable_excp(regs))
494 printf("WARNING: exception is not recoverable, " 504 printf("WARNING: exception is not recoverable, "
495 "can't continue\n"); 505 "can't continue\n");
496 remove_bpts(); 506 remove_bpts();
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 264528e4f58d..b55fd7ed1c31 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev;
50 * /proc entries (sysctl) 50 * /proc entries (sysctl)
51 */ 51 */
52static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; 52static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
53static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, 53static int appldata_timer_handler(ctl_table *ctl, int write,
54 void __user *buffer, size_t *lenp, loff_t *ppos); 54 void __user *buffer, size_t *lenp, loff_t *ppos);
55static int appldata_interval_handler(ctl_table *ctl, int write, 55static int appldata_interval_handler(ctl_table *ctl, int write,
56 struct file *filp,
57 void __user *buffer, 56 void __user *buffer,
58 size_t *lenp, loff_t *ppos); 57 size_t *lenp, loff_t *ppos);
59 58
@@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd)
247 * Start/Stop timer, show status of timer (0 = not active, 1 = active) 246 * Start/Stop timer, show status of timer (0 = not active, 1 = active)
248 */ 247 */
249static int 248static int
250appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, 249appldata_timer_handler(ctl_table *ctl, int write,
251 void __user *buffer, size_t *lenp, loff_t *ppos) 250 void __user *buffer, size_t *lenp, loff_t *ppos)
252{ 251{
253 int len; 252 int len;
@@ -289,7 +288,7 @@ out:
289 * current timer interval. 288 * current timer interval.
290 */ 289 */
291static int 290static int
292appldata_interval_handler(ctl_table *ctl, int write, struct file *filp, 291appldata_interval_handler(ctl_table *ctl, int write,
293 void __user *buffer, size_t *lenp, loff_t *ppos) 292 void __user *buffer, size_t *lenp, loff_t *ppos)
294{ 293{
295 int len, interval; 294 int len, interval;
@@ -335,7 +334,7 @@ out:
335 * monitoring (0 = not in process, 1 = in process) 334 * monitoring (0 = not in process, 1 = in process)
336 */ 335 */
337static int 336static int
338appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, 337appldata_generic_handler(ctl_table *ctl, int write,
339 void __user *buffer, size_t *lenp, loff_t *ppos) 338 void __user *buffer, size_t *lenp, loff_t *ppos)
340{ 339{
341 struct appldata_ops *ops = NULL, *tmp_ops; 340 struct appldata_ops *ops = NULL, *tmp_ops;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4c512561687d..20f282c911c2 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -881,11 +881,11 @@ static int debug_active=1;
881 * if debug_active is already off 881 * if debug_active is already off
882 */ 882 */
883static int 883static int
884s390dbf_procactive(ctl_table *table, int write, struct file *filp, 884s390dbf_procactive(ctl_table *table, int write,
885 void __user *buffer, size_t *lenp, loff_t *ppos) 885 void __user *buffer, size_t *lenp, loff_t *ppos)
886{ 886{
887 if (!write || debug_stoppable || !debug_active) 887 if (!write || debug_stoppable || !debug_active)
888 return proc_dointvec(table, write, filp, buffer, lenp, ppos); 888 return proc_dointvec(table, write, buffer, lenp, ppos);
889 else 889 else
890 return 0; 890 return 0;
891} 891}
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 413c240cbca7..b201135cc18c 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp)
262static struct ctl_table cmm_table[]; 262static struct ctl_table cmm_table[];
263 263
264static int 264static int
265cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, 265cmm_pages_handler(ctl_table *ctl, int write,
266 void __user *buffer, size_t *lenp, loff_t *ppos) 266 void __user *buffer, size_t *lenp, loff_t *ppos)
267{ 267{
268 char buf[16], *p; 268 char buf[16], *p;
@@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
303} 303}
304 304
305static int 305static int
306cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, 306cmm_timeout_handler(ctl_table *ctl, int write,
307 void __user *buffer, size_t *lenp, loff_t *ppos) 307 void __user *buffer, size_t *lenp, loff_t *ppos)
308{ 308{
309 char buf[64], *p; 309 char buf[64], *p;
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index b91fa8dbf047..4d58eb0973d4 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -1,12 +1,9 @@
1menu "DMA support" 1menu "DMA support"
2 2
3config SH_DMA_API
4 bool
5 3
6config SH_DMA 4config SH_DMA
7 bool "SuperH on-chip DMA controller (DMAC) support" 5 bool "SuperH on-chip DMA controller (DMAC) support"
8 depends on CPU_SH3 || CPU_SH4 6 depends on CPU_SH3 || CPU_SH4
9 select SH_DMA_API
10 default n 7 default n
11 8
12config SH_DMA_IRQ_MULTI 9config SH_DMA_IRQ_MULTI
@@ -19,6 +16,15 @@ config SH_DMA_IRQ_MULTI
19 CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \ 16 CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
20 CPU_SUBTYPE_SH7760 17 CPU_SUBTYPE_SH7760
21 18
19config SH_DMA_API
20 depends on SH_DMA
21 bool "SuperH DMA API support"
22 default n
23 help
24 SH_DMA_API always enabled DMA API of used SuperH.
25 If you want to use DMA ENGINE, you must not enable this.
26 Please enable DMA_ENGINE and SH_DMAE.
27
22config NR_ONCHIP_DMA_CHANNELS 28config NR_ONCHIP_DMA_CHANNELS
23 int 29 int
24 depends on SH_DMA 30 depends on SH_DMA
diff --git a/arch/sh/drivers/dma/Makefile b/arch/sh/drivers/dma/Makefile
index c6068137b46f..d88c9484762c 100644
--- a/arch/sh/drivers/dma/Makefile
+++ b/arch/sh/drivers/dma/Makefile
@@ -2,8 +2,7 @@
2# Makefile for the SuperH DMA specific kernel interface routines under Linux. 2# Makefile for the SuperH DMA specific kernel interface routines under Linux.
3# 3#
4 4
5obj-$(CONFIG_SH_DMA_API) += dma-api.o dma-sysfs.o 5obj-$(CONFIG_SH_DMA_API) += dma-sh.o dma-api.o dma-sysfs.o
6obj-$(CONFIG_SH_DMA) += dma-sh.o
7obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o 6obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o
8obj-$(CONFIG_G2_DMA) += dma-g2.o 7obj-$(CONFIG_G2_DMA) += dma-g2.o
9obj-$(CONFIG_SH_DMABRG) += dmabrg.o 8obj-$(CONFIG_SH_DMABRG) += dmabrg.o
diff --git a/arch/sh/include/asm/dma-sh.h b/arch/sh/include/asm/dma-sh.h
index 68a5f4cb0343..78eed3e0bdf5 100644
--- a/arch/sh/include/asm/dma-sh.h
+++ b/arch/sh/include/asm/dma-sh.h
@@ -116,4 +116,17 @@ static u32 dma_base_addr[] __maybe_unused = {
116#define CHCR 0x0C 116#define CHCR 0x0C
117#define DMAOR 0x40 117#define DMAOR 0x40
118 118
119/*
120 * for dma engine
121 *
122 * SuperH DMA mode
123 */
124#define SHDMA_MIX_IRQ (1 << 1)
125#define SHDMA_DMAOR1 (1 << 2)
126#define SHDMA_DMAE1 (1 << 3)
127
128struct sh_dmae_pdata {
129 unsigned int mode;
130};
131
119#endif /* __DMA_SH_H */ 132#endif /* __DMA_SH_H */
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index 37ecc5577a9a..ac55b9efa1ce 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -16,11 +16,7 @@
16 16
17 . = ALIGN(4096); 17 . = ALIGN(4096);
18 .note : { *(.note.*) } 18 .note : { *(.note.*) }
19 __ex_table : { 19 EXCEPTION_TABLE(0)
20 __start___ex_table = .;
21 *(__ex_table)
22 __stop___ex_table = .;
23 }
24 20
25 BUG_TABLE 21 BUG_TABLE
26 22
@@ -43,28 +39,17 @@
43 } 39 }
44 40
45 .init.setup : { 41 .init.setup : {
46 __setup_start = .; 42 INIT_SETUP(0)
47 *(.init.setup)
48 __setup_end = .;
49 } 43 }
50 44
51 . = ALIGN(32); 45 PERCPU(32)
52 .data.percpu : {
53 __per_cpu_start = . ;
54 *(.data.percpu)
55 __per_cpu_end = . ;
56 }
57 46
58 .initcall.init : { 47 .initcall.init : {
59 __initcall_start = .; 48 INIT_CALLS
60 INITCALLS
61 __initcall_end = .;
62 } 49 }
63 50
64 .con_initcall.init : { 51 .con_initcall.init : {
65 __con_initcall_start = .; 52 CON_INITCALL
66 *(.con_initcall.init)
67 __con_initcall_end = .;
68 } 53 }
69 54
70 .uml.initcall.init : { 55 .uml.initcall.init : {
@@ -118,8 +103,6 @@
118 103
119 . = ALIGN(4096); 104 . = ALIGN(4096);
120 .init.ramfs : { 105 .init.ramfs : {
121 __initramfs_start = .; 106 INIT_RAM_FS
122 *(.init.ramfs)
123 __initramfs_end = .;
124 } 107 }
125 108
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 715a188c0472..7fcad58e216d 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -16,11 +16,7 @@ SECTIONS
16 _text = .; 16 _text = .;
17 _stext = .; 17 _stext = .;
18 __init_begin = .; 18 __init_begin = .;
19 .init.text : { 19 INIT_TEXT_SECTION(PAGE_SIZE)
20 _sinittext = .;
21 INIT_TEXT
22 _einittext = .;
23 }
24 20
25 . = ALIGN(PAGE_SIZE); 21 . = ALIGN(PAGE_SIZE);
26 22
@@ -96,8 +92,7 @@ SECTIONS
96 .init_array : { *(.init_array) } 92 .init_array : { *(.init_array) }
97 .fini_array : { *(.fini_array) } 93 .fini_array : { *(.fini_array) }
98 .data : { 94 .data : {
99 . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ 95 INIT_TASK_DATA(KERNEL_STACK_SIZE)
100 *(.data.init_task)
101 . = ALIGN(KERNEL_STACK_SIZE); 96 . = ALIGN(KERNEL_STACK_SIZE);
102 *(.data.init_irqstack) 97 *(.data.init_irqstack)
103 DATA_DATA 98 DATA_DATA
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 2ebd39765db8..e7a6cca667aa 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -22,11 +22,7 @@ SECTIONS
22 _text = .; 22 _text = .;
23 _stext = .; 23 _stext = .;
24 __init_begin = .; 24 __init_begin = .;
25 .init.text : { 25 INIT_TEXT_SECTION(PAGE_SIZE)
26 _sinittext = .;
27 INIT_TEXT
28 _einittext = .;
29 }
30 . = ALIGN(PAGE_SIZE); 26 . = ALIGN(PAGE_SIZE);
31 27
32 .text : 28 .text :
@@ -52,8 +48,7 @@ SECTIONS
52 init.data : { INIT_DATA } 48 init.data : { INIT_DATA }
53 .data : 49 .data :
54 { 50 {
55 . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ 51 INIT_TASK_DATA(KERNEL_STACK_SIZE)
56 *(.data.init_task)
57 . = ALIGN(KERNEL_STACK_SIZE); 52 . = ALIGN(KERNEL_STACK_SIZE);
58 *(.data.init_irqstack) 53 *(.data.init_irqstack)
59 DATA_DATA 54 DATA_DATA
@@ -81,19 +76,10 @@ SECTIONS
81 _edata = .; 76 _edata = .;
82 PROVIDE (edata = .); 77 PROVIDE (edata = .);
83 . = ALIGN(PAGE_SIZE); 78 . = ALIGN(PAGE_SIZE);
84 .sbss : 79 __bss_start = .;
85 { 80 PROVIDE(_bss_start = .);
86 __bss_start = .; 81 SBSS(0)
87 PROVIDE(_bss_start = .); 82 BSS(0)
88 *(.sbss)
89 *(.scommon)
90 }
91 .bss :
92 {
93 *(.dynbss)
94 *(.bss)
95 *(COMMON)
96 }
97 _end = .; 83 _end = .;
98 PROVIDE (end = .); 84 PROVIDE (end = .);
99 85
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index e63cf7d441e1..139d4c1a33a7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
40#define NMI_INVALID 3 40#define NMI_INVALID 3
41 41
42struct ctl_table; 42struct ctl_table;
43struct file; 43extern int proc_nmi_enabled(struct ctl_table *, int ,
44extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
45 void __user *, size_t *, loff_t *); 44 void __user *, size_t *, loff_t *);
46extern int unknown_nmi_panic; 45extern int unknown_nmi_panic;
47 46
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f76a162c082c..ada8c201d513 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus)
143static inline const struct cpumask * 143static inline const struct cpumask *
144cpumask_of_pcibus(const struct pci_bus *bus) 144cpumask_of_pcibus(const struct pci_bus *bus)
145{ 145{
146 return cpumask_of_node(__pcibus_to_node(bus)); 146 int node;
147
148 node = __pcibus_to_node(bus);
149 return (node == -1) ? cpu_online_mask :
150 cpumask_of_node(node);
147} 151}
148#endif 152#endif
149 153
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index cb66a22d98ad..7ff61d6a188a 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
508/* 508/*
509 * proc handler for /proc/sys/kernel/nmi 509 * proc handler for /proc/sys/kernel/nmi
510 */ 510 */
511int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, 511int proc_nmi_enabled(struct ctl_table *table, int write,
512 void __user *buffer, size_t *length, loff_t *ppos) 512 void __user *buffer, size_t *length, loff_t *ppos)
513{ 513{
514 int old_state; 514 int old_state;
515 515
516 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; 516 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
517 old_state = nmi_watchdog_enabled; 517 old_state = nmi_watchdog_enabled;
518 proc_dointvec(table, write, file, buffer, length, ppos); 518 proc_dointvec(table, write, buffer, length, ppos);
519 if (!!old_state == !!nmi_watchdog_enabled) 519 if (!!old_state == !!nmi_watchdog_enabled)
520 return 0; 520 return 0;
521 521
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index cf53a78e2dcf..8cb4974ff599 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
228} 228}
229 229
230#ifdef CONFIG_SYSCTL 230#ifdef CONFIG_SYSCTL
231
232static int
233vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
234 void __user *buffer, size_t *lenp, loff_t *ppos)
235{
236 return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
237}
238
239static ctl_table kernel_table2[] = { 231static ctl_table kernel_table2[] = {
240 { .procname = "vsyscall64", 232 { .procname = "vsyscall64",
241 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), 233 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
242 .mode = 0644, 234 .mode = 0644,
243 .proc_handler = vsyscall_sysctl_change }, 235 .proc_handler = proc_dointvec },
244 {} 236 {}
245}; 237};
246 238
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 82728f2c6d55..f4cee9028cf0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
167 info.si_errno = 0; 167 info.si_errno = 0;
168 info.si_code = si_code; 168 info.si_code = si_code;
169 info.si_addr = (void __user *)address; 169 info.si_addr = (void __user *)address;
170 info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
170 171
171 force_sig_info(si_signo, &info, tsk); 172 force_sig_info(si_signo, &info, tsk);
172} 173}
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
790} 791}
791 792
792static void 793static void
793do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) 794do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
795 unsigned int fault)
794{ 796{
795 struct task_struct *tsk = current; 797 struct task_struct *tsk = current;
796 struct mm_struct *mm = tsk->mm; 798 struct mm_struct *mm = tsk->mm;
799 int code = BUS_ADRERR;
797 800
798 up_read(&mm->mmap_sem); 801 up_read(&mm->mmap_sem);
799 802
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
809 tsk->thread.error_code = error_code; 812 tsk->thread.error_code = error_code;
810 tsk->thread.trap_no = 14; 813 tsk->thread.trap_no = 14;
811 814
812 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 815#ifdef CONFIG_MEMORY_FAILURE
816 if (fault & VM_FAULT_HWPOISON) {
817 printk(KERN_ERR
818 "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
819 tsk->comm, tsk->pid, address);
820 code = BUS_MCEERR_AR;
821 }
822#endif
823 force_sig_info_fault(SIGBUS, code, address, tsk);
813} 824}
814 825
815static noinline void 826static noinline void
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
819 if (fault & VM_FAULT_OOM) { 830 if (fault & VM_FAULT_OOM) {
820 out_of_memory(regs, error_code, address); 831 out_of_memory(regs, error_code, address);
821 } else { 832 } else {
822 if (fault & VM_FAULT_SIGBUS) 833 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
823 do_sigbus(regs, error_code, address); 834 do_sigbus(regs, error_code, address, fault);
824 else 835 else
825 BUG(); 836 BUG();
826 } 837 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 24952fdc7e40..dd38bfbefd1f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
144 144
145 mb(); 145 mb();
146} 146}
147EXPORT_SYMBOL_GPL(clflush_cache_range);
147 148
148static void __cpa_flush_all(void *arg) 149static void __cpa_flush_all(void *arg)
149{ 150{
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 5db96d4304de..1331fcf26143 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum)
646 646
647#else /* CONFIG_X86_32 */ 647#else /* CONFIG_X86_32 */
648 648
649static unsigned char mp_bus_to_node[BUS_NR] = { 649static int mp_bus_to_node[BUS_NR] = {
650 [0 ... BUS_NR - 1] = -1 650 [0 ... BUS_NR - 1] = -1
651}; 651};
652 652
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 921b6ff3b645..9b526154c9ba 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -15,6 +15,8 @@
15 */ 15 */
16 16
17#include <asm-generic/vmlinux.lds.h> 17#include <asm-generic/vmlinux.lds.h>
18#include <asm/page.h>
19#include <asm/thread_info.h>
18 20
19#include <variant/core.h> 21#include <variant/core.h>
20#include <platform/hardware.h> 22#include <platform/hardware.h>
@@ -107,41 +109,18 @@ SECTIONS
107 109
108 .fixup : { *(.fixup) } 110 .fixup : { *(.fixup) }
109 111
110 . = ALIGN(16); 112 EXCEPTION_TABLE(16)
111
112 __ex_table : {
113 __start___ex_table = .;
114 *(__ex_table)
115 __stop___ex_table = .;
116 }
117
118 /* Data section */ 113 /* Data section */
119 114
120 . = ALIGN(XCHAL_ICACHE_LINESIZE);
121 _fdata = .; 115 _fdata = .;
122 .data : 116 RW_DATA_SECTION(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE)
123 {
124 DATA_DATA
125 CONSTRUCTORS
126 . = ALIGN(XCHAL_ICACHE_LINESIZE);
127 *(.data.cacheline_aligned)
128 }
129
130 _edata = .; 117 _edata = .;
131 118
132 /* The initial task */
133 . = ALIGN(8192);
134 .data.init_task : { *(.data.init_task) }
135
136 /* Initialization code and data: */ 119 /* Initialization code and data: */
137 120
138 . = ALIGN(1 << 12); 121 . = ALIGN(PAGE_SIZE);
139 __init_begin = .; 122 __init_begin = .;
140 .init.text : { 123 INIT_TEXT_SECTION(PAGE_SIZE)
141 _sinittext = .;
142 INIT_TEXT
143 _einittext = .;
144 }
145 124
146 .init.data : 125 .init.data :
147 { 126 {
@@ -168,36 +147,15 @@ SECTIONS
168 .DebugInterruptVector.text); 147 .DebugInterruptVector.text);
169 148
170 __boot_reloc_table_end = ABSOLUTE(.) ; 149 __boot_reloc_table_end = ABSOLUTE(.) ;
171 }
172 150
173 . = ALIGN(XCHAL_ICACHE_LINESIZE); 151 INIT_SETUP(XCHAL_ICACHE_LINESIZE)
174 152 INIT_CALLS
175 __setup_start = .; 153 CON_INITCALL
176 .init.setup : { *(.init.setup) } 154 SECURITY_INITCALL
177 __setup_end = .; 155 INIT_RAM_FS
178
179 __initcall_start = .;
180 .initcall.init : {
181 INITCALLS
182 } 156 }
183 __initcall_end = .;
184
185 __con_initcall_start = .;
186 .con_initcall.init : { *(.con_initcall.init) }
187 __con_initcall_end = .;
188
189 SECURITY_INIT
190
191
192#ifdef CONFIG_BLK_DEV_INITRD
193 . = ALIGN(4096);
194 __initramfs_start =.;
195 .init.ramfs : { *(.init.ramfs) }
196 __initramfs_end = .;
197#endif
198
199 PERCPU(4096)
200 157
158 PERCPU(PAGE_SIZE)
201 159
202 /* We need this dummy segment here */ 160 /* We need this dummy segment here */
203 161
@@ -252,16 +210,11 @@ SECTIONS
252 .DoubleExceptionVector.literal) 210 .DoubleExceptionVector.literal)
253 211
254 . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; 212 . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
255 . = ALIGN(1 << 12); 213 . = ALIGN(PAGE_SIZE);
256 214
257 __init_end = .; 215 __init_end = .;
258 216
259 . = ALIGN(8192); 217 BSS_SECTION(0, 8192, 0)
260
261 /* BSS section */
262 _bss_start = .;
263 .bss : { *(.bss.page_aligned) *(.bss) }
264 _bss_end = .;
265 218
266 _end = .; 219 _end = .;
267 220
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index d8fb39145986..e5aeb2b79e6f 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -14,3 +14,12 @@ config ASYNC_MEMSET
14 tristate 14 tristate
15 select ASYNC_CORE 15 select ASYNC_CORE
16 16
17config ASYNC_PQ
18 tristate
19 select ASYNC_CORE
20
21config ASYNC_RAID6_RECOV
22 tristate
23 select ASYNC_CORE
24 select ASYNC_PQ
25
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 27baa7d52fbc..d1e0e6f72bc1 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
2obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o 2obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
3obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o 3obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
4obj-$(CONFIG_ASYNC_XOR) += async_xor.o 4obj-$(CONFIG_ASYNC_XOR) += async_xor.o
5obj-$(CONFIG_ASYNC_PQ) += async_pq.o
6obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
7obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index ddccfb01c416..0ec1fb69d4ea 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -33,28 +33,31 @@
33 * async_memcpy - attempt to copy memory with a dma engine. 33 * async_memcpy - attempt to copy memory with a dma engine.
34 * @dest: destination page 34 * @dest: destination page
35 * @src: src page 35 * @src: src page
36 * @offset: offset in pages to start transaction 36 * @dest_offset: offset into 'dest' to start transaction
37 * @src_offset: offset into 'src' to start transaction
37 * @len: length in bytes 38 * @len: length in bytes
38 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, 39 * @submit: submission / completion modifiers
39 * @depend_tx: memcpy depends on the result of this transaction 40 *
40 * @cb_fn: function to call when the memcpy completes 41 * honored flags: ASYNC_TX_ACK
41 * @cb_param: parameter to pass to the callback routine
42 */ 42 */
43struct dma_async_tx_descriptor * 43struct dma_async_tx_descriptor *
44async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, 44async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
45 unsigned int src_offset, size_t len, enum async_tx_flags flags, 45 unsigned int src_offset, size_t len,
46 struct dma_async_tx_descriptor *depend_tx, 46 struct async_submit_ctl *submit)
47 dma_async_tx_callback cb_fn, void *cb_param)
48{ 47{
49 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, 48 struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
50 &dest, 1, &src, 1, len); 49 &dest, 1, &src, 1, len);
51 struct dma_device *device = chan ? chan->device : NULL; 50 struct dma_device *device = chan ? chan->device : NULL;
52 struct dma_async_tx_descriptor *tx = NULL; 51 struct dma_async_tx_descriptor *tx = NULL;
53 52
54 if (device) { 53 if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
55 dma_addr_t dma_dest, dma_src; 54 dma_addr_t dma_dest, dma_src;
56 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 55 unsigned long dma_prep_flags = 0;
57 56
57 if (submit->cb_fn)
58 dma_prep_flags |= DMA_PREP_INTERRUPT;
59 if (submit->flags & ASYNC_TX_FENCE)
60 dma_prep_flags |= DMA_PREP_FENCE;
58 dma_dest = dma_map_page(device->dev, dest, dest_offset, len, 61 dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
59 DMA_FROM_DEVICE); 62 DMA_FROM_DEVICE);
60 63
@@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
67 70
68 if (tx) { 71 if (tx) {
69 pr_debug("%s: (async) len: %zu\n", __func__, len); 72 pr_debug("%s: (async) len: %zu\n", __func__, len);
70 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 73 async_tx_submit(chan, tx, submit);
71 } else { 74 } else {
72 void *dest_buf, *src_buf; 75 void *dest_buf, *src_buf;
73 pr_debug("%s: (sync) len: %zu\n", __func__, len); 76 pr_debug("%s: (sync) len: %zu\n", __func__, len);
74 77
75 /* wait for any prerequisite operations */ 78 /* wait for any prerequisite operations */
76 async_tx_quiesce(&depend_tx); 79 async_tx_quiesce(&submit->depend_tx);
77 80
78 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; 81 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
79 src_buf = kmap_atomic(src, KM_USER1) + src_offset; 82 src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
83 kunmap_atomic(dest_buf, KM_USER0); 86 kunmap_atomic(dest_buf, KM_USER0);
84 kunmap_atomic(src_buf, KM_USER1); 87 kunmap_atomic(src_buf, KM_USER1);
85 88
86 async_tx_sync_epilog(cb_fn, cb_param); 89 async_tx_sync_epilog(submit);
87 } 90 }
88 91
89 return tx; 92 return tx;
90} 93}
91EXPORT_SYMBOL_GPL(async_memcpy); 94EXPORT_SYMBOL_GPL(async_memcpy);
92 95
93static int __init async_memcpy_init(void)
94{
95 return 0;
96}
97
98static void __exit async_memcpy_exit(void)
99{
100 do { } while (0);
101}
102
103module_init(async_memcpy_init);
104module_exit(async_memcpy_exit);
105
106MODULE_AUTHOR("Intel Corporation"); 96MODULE_AUTHOR("Intel Corporation");
107MODULE_DESCRIPTION("asynchronous memcpy api"); 97MODULE_DESCRIPTION("asynchronous memcpy api");
108MODULE_LICENSE("GPL"); 98MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 5b5eb99bb244..58e4a8752aee 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,26 +35,26 @@
35 * @val: fill value 35 * @val: fill value
36 * @offset: offset in pages to start transaction 36 * @offset: offset in pages to start transaction
37 * @len: length in bytes 37 * @len: length in bytes
38 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 38 *
39 * @depend_tx: memset depends on the result of this transaction 39 * honored flags: ASYNC_TX_ACK
40 * @cb_fn: function to call when the memcpy completes
41 * @cb_param: parameter to pass to the callback routine
42 */ 40 */
43struct dma_async_tx_descriptor * 41struct dma_async_tx_descriptor *
44async_memset(struct page *dest, int val, unsigned int offset, 42async_memset(struct page *dest, int val, unsigned int offset, size_t len,
45 size_t len, enum async_tx_flags flags, 43 struct async_submit_ctl *submit)
46 struct dma_async_tx_descriptor *depend_tx,
47 dma_async_tx_callback cb_fn, void *cb_param)
48{ 44{
49 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, 45 struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
50 &dest, 1, NULL, 0, len); 46 &dest, 1, NULL, 0, len);
51 struct dma_device *device = chan ? chan->device : NULL; 47 struct dma_device *device = chan ? chan->device : NULL;
52 struct dma_async_tx_descriptor *tx = NULL; 48 struct dma_async_tx_descriptor *tx = NULL;
53 49
54 if (device) { 50 if (device && is_dma_fill_aligned(device, offset, 0, len)) {
55 dma_addr_t dma_dest; 51 dma_addr_t dma_dest;
56 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 52 unsigned long dma_prep_flags = 0;
57 53
54 if (submit->cb_fn)
55 dma_prep_flags |= DMA_PREP_INTERRUPT;
56 if (submit->flags & ASYNC_TX_FENCE)
57 dma_prep_flags |= DMA_PREP_FENCE;
58 dma_dest = dma_map_page(device->dev, dest, offset, len, 58 dma_dest = dma_map_page(device->dev, dest, offset, len,
59 DMA_FROM_DEVICE); 59 DMA_FROM_DEVICE);
60 60
@@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset,
64 64
65 if (tx) { 65 if (tx) {
66 pr_debug("%s: (async) len: %zu\n", __func__, len); 66 pr_debug("%s: (async) len: %zu\n", __func__, len);
67 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 67 async_tx_submit(chan, tx, submit);
68 } else { /* run the memset synchronously */ 68 } else { /* run the memset synchronously */
69 void *dest_buf; 69 void *dest_buf;
70 pr_debug("%s: (sync) len: %zu\n", __func__, len); 70 pr_debug("%s: (sync) len: %zu\n", __func__, len);
71 71
72 dest_buf = (void *) (((char *) page_address(dest)) + offset); 72 dest_buf = page_address(dest) + offset;
73 73
74 /* wait for any prerequisite operations */ 74 /* wait for any prerequisite operations */
75 async_tx_quiesce(&depend_tx); 75 async_tx_quiesce(&submit->depend_tx);
76 76
77 memset(dest_buf, val, len); 77 memset(dest_buf, val, len);
78 78
79 async_tx_sync_epilog(cb_fn, cb_param); 79 async_tx_sync_epilog(submit);
80 } 80 }
81 81
82 return tx; 82 return tx;
83} 83}
84EXPORT_SYMBOL_GPL(async_memset); 84EXPORT_SYMBOL_GPL(async_memset);
85 85
86static int __init async_memset_init(void)
87{
88 return 0;
89}
90
91static void __exit async_memset_exit(void)
92{
93 do { } while (0);
94}
95
96module_init(async_memset_init);
97module_exit(async_memset_exit);
98
99MODULE_AUTHOR("Intel Corporation"); 86MODULE_AUTHOR("Intel Corporation");
100MODULE_DESCRIPTION("asynchronous memset api"); 87MODULE_DESCRIPTION("asynchronous memset api");
101MODULE_LICENSE("GPL"); 88MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
new file mode 100644
index 000000000000..b88db6d1dc65
--- /dev/null
+++ b/crypto/async_tx/async_pq.c
@@ -0,0 +1,395 @@
1/*
2 * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
3 * Copyright(c) 2009 Intel Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * The full GNU General Public License is included in this distribution in the
20 * file called COPYING.
21 */
22#include <linux/kernel.h>
23#include <linux/interrupt.h>
24#include <linux/dma-mapping.h>
25#include <linux/raid/pq.h>
26#include <linux/async_tx.h>
27
28/**
29 * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
30 */
31static struct page *scribble;
32
33static bool is_raid6_zero_block(struct page *p)
34{
35 return p == (void *) raid6_empty_zero_page;
36}
37
38/* the struct page *blocks[] parameter passed to async_gen_syndrome()
39 * and async_syndrome_val() contains the 'P' destination address at
40 * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
41 *
42 * note: these are macros as they are used as lvalues
43 */
44#define P(b, d) (b[d-2])
45#define Q(b, d) (b[d-1])
46
47/**
48 * do_async_gen_syndrome - asynchronously calculate P and/or Q
49 */
50static __async_inline struct dma_async_tx_descriptor *
51do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
52 const unsigned char *scfs, unsigned int offset, int disks,
53 size_t len, dma_addr_t *dma_src,
54 struct async_submit_ctl *submit)
55{
56 struct dma_async_tx_descriptor *tx = NULL;
57 struct dma_device *dma = chan->device;
58 enum dma_ctrl_flags dma_flags = 0;
59 enum async_tx_flags flags_orig = submit->flags;
60 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
61 dma_async_tx_callback cb_param_orig = submit->cb_param;
62 int src_cnt = disks - 2;
63 unsigned char coefs[src_cnt];
64 unsigned short pq_src_cnt;
65 dma_addr_t dma_dest[2];
66 int src_off = 0;
67 int idx;
68 int i;
69
70 /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
71 if (P(blocks, disks))
72 dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
73 len, DMA_BIDIRECTIONAL);
74 else
75 dma_flags |= DMA_PREP_PQ_DISABLE_P;
76 if (Q(blocks, disks))
77 dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
78 len, DMA_BIDIRECTIONAL);
79 else
80 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
81
82 /* convert source addresses being careful to collapse 'empty'
83 * sources and update the coefficients accordingly
84 */
85 for (i = 0, idx = 0; i < src_cnt; i++) {
86 if (is_raid6_zero_block(blocks[i]))
87 continue;
88 dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
89 DMA_TO_DEVICE);
90 coefs[idx] = scfs[i];
91 idx++;
92 }
93 src_cnt = idx;
94
95 while (src_cnt > 0) {
96 submit->flags = flags_orig;
97 pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
98 /* if we are submitting additional pqs, leave the chain open,
99 * clear the callback parameters, and leave the destination
100 * buffers mapped
101 */
102 if (src_cnt > pq_src_cnt) {
103 submit->flags &= ~ASYNC_TX_ACK;
104 submit->flags |= ASYNC_TX_FENCE;
105 dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
106 submit->cb_fn = NULL;
107 submit->cb_param = NULL;
108 } else {
109 dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
110 submit->cb_fn = cb_fn_orig;
111 submit->cb_param = cb_param_orig;
112 if (cb_fn_orig)
113 dma_flags |= DMA_PREP_INTERRUPT;
114 }
115 if (submit->flags & ASYNC_TX_FENCE)
116 dma_flags |= DMA_PREP_FENCE;
117
118 /* Since we have clobbered the src_list we are committed
119 * to doing this asynchronously. Drivers force forward
120 * progress in case they can not provide a descriptor
121 */
122 for (;;) {
123 tx = dma->device_prep_dma_pq(chan, dma_dest,
124 &dma_src[src_off],
125 pq_src_cnt,
126 &coefs[src_off], len,
127 dma_flags);
128 if (likely(tx))
129 break;
130 async_tx_quiesce(&submit->depend_tx);
131 dma_async_issue_pending(chan);
132 }
133
134 async_tx_submit(chan, tx, submit);
135 submit->depend_tx = tx;
136
137 /* drop completed sources */
138 src_cnt -= pq_src_cnt;
139 src_off += pq_src_cnt;
140
141 dma_flags |= DMA_PREP_CONTINUE;
142 }
143
144 return tx;
145}
146
147/**
148 * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
149 */
150static void
151do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
152 size_t len, struct async_submit_ctl *submit)
153{
154 void **srcs;
155 int i;
156
157 if (submit->scribble)
158 srcs = submit->scribble;
159 else
160 srcs = (void **) blocks;
161
162 for (i = 0; i < disks; i++) {
163 if (is_raid6_zero_block(blocks[i])) {
164 BUG_ON(i > disks - 3); /* P or Q can't be zero */
165 srcs[i] = blocks[i];
166 } else
167 srcs[i] = page_address(blocks[i]) + offset;
168 }
169 raid6_call.gen_syndrome(disks, len, srcs);
170 async_tx_sync_epilog(submit);
171}
172
173/**
174 * async_gen_syndrome - asynchronously calculate a raid6 syndrome
175 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
176 * @offset: common offset into each block (src and dest) to start transaction
177 * @disks: number of blocks (including missing P or Q, see below)
178 * @len: length of operation in bytes
179 * @submit: submission/completion modifiers
180 *
181 * General note: This routine assumes a field of GF(2^8) with a
182 * primitive polynomial of 0x11d and a generator of {02}.
183 *
184 * 'disks' note: callers can optionally omit either P or Q (but not
185 * both) from the calculation by setting blocks[disks-2] or
186 * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <=
187 * PAGE_SIZE as a temporary buffer of this size is used in the
188 * synchronous path. 'disks' always accounts for both destination
189 * buffers.
190 *
191 * 'blocks' note: if submit->scribble is NULL then the contents of
192 * 'blocks' may be overridden
193 */
194struct dma_async_tx_descriptor *
195async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
196 size_t len, struct async_submit_ctl *submit)
197{
198 int src_cnt = disks - 2;
199 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
200 &P(blocks, disks), 2,
201 blocks, src_cnt, len);
202 struct dma_device *device = chan ? chan->device : NULL;
203 dma_addr_t *dma_src = NULL;
204
205 BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
206
207 if (submit->scribble)
208 dma_src = submit->scribble;
209 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
210 dma_src = (dma_addr_t *) blocks;
211
212 if (dma_src && device &&
213 (src_cnt <= dma_maxpq(device, 0) ||
214 dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
215 is_dma_pq_aligned(device, offset, 0, len)) {
216 /* run the p+q asynchronously */
217 pr_debug("%s: (async) disks: %d len: %zu\n",
218 __func__, disks, len);
219 return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
220 disks, len, dma_src, submit);
221 }
222
223 /* run the pq synchronously */
224 pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
225
226 /* wait for any prerequisite operations */
227 async_tx_quiesce(&submit->depend_tx);
228
229 if (!P(blocks, disks)) {
230 P(blocks, disks) = scribble;
231 BUG_ON(len + offset > PAGE_SIZE);
232 }
233 if (!Q(blocks, disks)) {
234 Q(blocks, disks) = scribble;
235 BUG_ON(len + offset > PAGE_SIZE);
236 }
237 do_sync_gen_syndrome(blocks, offset, disks, len, submit);
238
239 return NULL;
240}
241EXPORT_SYMBOL_GPL(async_gen_syndrome);
242
243/**
244 * async_syndrome_val - asynchronously validate a raid6 syndrome
245 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
246 * @offset: common offset into each block (src and dest) to start transaction
247 * @disks: number of blocks (including missing P or Q, see below)
248 * @len: length of operation in bytes
249 * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
250 * @spare: temporary result buffer for the synchronous case
251 * @submit: submission / completion modifiers
252 *
253 * The same notes from async_gen_syndrome apply to the 'blocks',
254 * and 'disks' parameters of this routine. The synchronous path
255 * requires a temporary result buffer and submit->scribble to be
256 * specified.
257 */
258struct dma_async_tx_descriptor *
259async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
260 size_t len, enum sum_check_flags *pqres, struct page *spare,
261 struct async_submit_ctl *submit)
262{
263 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
264 NULL, 0, blocks, disks,
265 len);
266 struct dma_device *device = chan ? chan->device : NULL;
267 struct dma_async_tx_descriptor *tx;
268 enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
269 dma_addr_t *dma_src = NULL;
270
271 BUG_ON(disks < 4);
272
273 if (submit->scribble)
274 dma_src = submit->scribble;
275 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
276 dma_src = (dma_addr_t *) blocks;
277
278 if (dma_src && device && disks <= dma_maxpq(device, 0) &&
279 is_dma_pq_aligned(device, offset, 0, len)) {
280 struct device *dev = device->dev;
281 dma_addr_t *pq = &dma_src[disks-2];
282 int i;
283
284 pr_debug("%s: (async) disks: %d len: %zu\n",
285 __func__, disks, len);
286 if (!P(blocks, disks))
287 dma_flags |= DMA_PREP_PQ_DISABLE_P;
288 if (!Q(blocks, disks))
289 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
290 if (submit->flags & ASYNC_TX_FENCE)
291 dma_flags |= DMA_PREP_FENCE;
292 for (i = 0; i < disks; i++)
293 if (likely(blocks[i])) {
294 BUG_ON(is_raid6_zero_block(blocks[i]));
295 dma_src[i] = dma_map_page(dev, blocks[i],
296 offset, len,
297 DMA_TO_DEVICE);
298 }
299
300 for (;;) {
301 tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
302 disks - 2,
303 raid6_gfexp,
304 len, pqres,
305 dma_flags);
306 if (likely(tx))
307 break;
308 async_tx_quiesce(&submit->depend_tx);
309 dma_async_issue_pending(chan);
310 }
311 async_tx_submit(chan, tx, submit);
312
313 return tx;
314 } else {
315 struct page *p_src = P(blocks, disks);
316 struct page *q_src = Q(blocks, disks);
317 enum async_tx_flags flags_orig = submit->flags;
318 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
319 void *scribble = submit->scribble;
320 void *cb_param_orig = submit->cb_param;
321 void *p, *q, *s;
322
323 pr_debug("%s: (sync) disks: %d len: %zu\n",
324 __func__, disks, len);
325
326 /* caller must provide a temporary result buffer and
327 * allow the input parameters to be preserved
328 */
329 BUG_ON(!spare || !scribble);
330
331 /* wait for any prerequisite operations */
332 async_tx_quiesce(&submit->depend_tx);
333
334 /* recompute p and/or q into the temporary buffer and then
335 * check to see the result matches the current value
336 */
337 tx = NULL;
338 *pqres = 0;
339 if (p_src) {
340 init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
341 NULL, NULL, scribble);
342 tx = async_xor(spare, blocks, offset, disks-2, len, submit);
343 async_tx_quiesce(&tx);
344 p = page_address(p_src) + offset;
345 s = page_address(spare) + offset;
346 *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
347 }
348
349 if (q_src) {
350 P(blocks, disks) = NULL;
351 Q(blocks, disks) = spare;
352 init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
353 tx = async_gen_syndrome(blocks, offset, disks, len, submit);
354 async_tx_quiesce(&tx);
355 q = page_address(q_src) + offset;
356 s = page_address(spare) + offset;
357 *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
358 }
359
360 /* restore P, Q and submit */
361 P(blocks, disks) = p_src;
362 Q(blocks, disks) = q_src;
363
364 submit->cb_fn = cb_fn_orig;
365 submit->cb_param = cb_param_orig;
366 submit->flags = flags_orig;
367 async_tx_sync_epilog(submit);
368
369 return NULL;
370 }
371}
372EXPORT_SYMBOL_GPL(async_syndrome_val);
373
374static int __init async_pq_init(void)
375{
376 scribble = alloc_page(GFP_KERNEL);
377
378 if (scribble)
379 return 0;
380
381 pr_err("%s: failed to allocate required spare page\n", __func__);
382
383 return -ENOMEM;
384}
385
386static void __exit async_pq_exit(void)
387{
388 put_page(scribble);
389}
390
391module_init(async_pq_init);
392module_exit(async_pq_exit);
393
394MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
395MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
new file mode 100644
index 000000000000..6d73dde4786d
--- /dev/null
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -0,0 +1,468 @@
1/*
2 * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
3 * Copyright(c) 2009 Intel Corporation
4 *
5 * based on raid6recov.c:
6 * Copyright 2002 H. Peter Anvin
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 51
20 * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 */
23#include <linux/kernel.h>
24#include <linux/interrupt.h>
25#include <linux/dma-mapping.h>
26#include <linux/raid/pq.h>
27#include <linux/async_tx.h>
28
29static struct dma_async_tx_descriptor *
30async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
31 size_t len, struct async_submit_ctl *submit)
32{
33 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
34 &dest, 1, srcs, 2, len);
35 struct dma_device *dma = chan ? chan->device : NULL;
36 const u8 *amul, *bmul;
37 u8 ax, bx;
38 u8 *a, *b, *c;
39
40 if (dma) {
41 dma_addr_t dma_dest[2];
42 dma_addr_t dma_src[2];
43 struct device *dev = dma->dev;
44 struct dma_async_tx_descriptor *tx;
45 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
46
47 if (submit->flags & ASYNC_TX_FENCE)
48 dma_flags |= DMA_PREP_FENCE;
49 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
50 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
51 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
52 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
53 len, dma_flags);
54 if (tx) {
55 async_tx_submit(chan, tx, submit);
56 return tx;
57 }
58
59 /* could not get a descriptor, unmap and fall through to
60 * the synchronous path
61 */
62 dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
63 dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
64 dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
65 }
66
67 /* run the operation synchronously */
68 async_tx_quiesce(&submit->depend_tx);
69 amul = raid6_gfmul[coef[0]];
70 bmul = raid6_gfmul[coef[1]];
71 a = page_address(srcs[0]);
72 b = page_address(srcs[1]);
73 c = page_address(dest);
74
75 while (len--) {
76 ax = amul[*a++];
77 bx = bmul[*b++];
78 *c++ = ax ^ bx;
79 }
80
81 return NULL;
82}
83
84static struct dma_async_tx_descriptor *
85async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
86 struct async_submit_ctl *submit)
87{
88 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
89 &dest, 1, &src, 1, len);
90 struct dma_device *dma = chan ? chan->device : NULL;
91 const u8 *qmul; /* Q multiplier table */
92 u8 *d, *s;
93
94 if (dma) {
95 dma_addr_t dma_dest[2];
96 dma_addr_t dma_src[1];
97 struct device *dev = dma->dev;
98 struct dma_async_tx_descriptor *tx;
99 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
100
101 if (submit->flags & ASYNC_TX_FENCE)
102 dma_flags |= DMA_PREP_FENCE;
103 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
104 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
105 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
106 len, dma_flags);
107 if (tx) {
108 async_tx_submit(chan, tx, submit);
109 return tx;
110 }
111
112 /* could not get a descriptor, unmap and fall through to
113 * the synchronous path
114 */
115 dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
116 dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
117 }
118
119 /* no channel available, or failed to allocate a descriptor, so
120 * perform the operation synchronously
121 */
122 async_tx_quiesce(&submit->depend_tx);
123 qmul = raid6_gfmul[coef];
124 d = page_address(dest);
125 s = page_address(src);
126
127 while (len--)
128 *d++ = qmul[*s++];
129
130 return NULL;
131}
132
133static struct dma_async_tx_descriptor *
134__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
135 struct async_submit_ctl *submit)
136{
137 struct dma_async_tx_descriptor *tx = NULL;
138 struct page *p, *q, *a, *b;
139 struct page *srcs[2];
140 unsigned char coef[2];
141 enum async_tx_flags flags = submit->flags;
142 dma_async_tx_callback cb_fn = submit->cb_fn;
143 void *cb_param = submit->cb_param;
144 void *scribble = submit->scribble;
145
146 p = blocks[4-2];
147 q = blocks[4-1];
148
149 a = blocks[faila];
150 b = blocks[failb];
151
152 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
153 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
154 srcs[0] = p;
155 srcs[1] = q;
156 coef[0] = raid6_gfexi[failb-faila];
157 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
158 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
159 tx = async_sum_product(b, srcs, coef, bytes, submit);
160
161 /* Dy = P+Pxy+Dx */
162 srcs[0] = p;
163 srcs[1] = b;
164 init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
165 cb_param, scribble);
166 tx = async_xor(a, srcs, 0, 2, bytes, submit);
167
168 return tx;
169
170}
171
172static struct dma_async_tx_descriptor *
173__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
174 struct async_submit_ctl *submit)
175{
176 struct dma_async_tx_descriptor *tx = NULL;
177 struct page *p, *q, *g, *dp, *dq;
178 struct page *srcs[2];
179 unsigned char coef[2];
180 enum async_tx_flags flags = submit->flags;
181 dma_async_tx_callback cb_fn = submit->cb_fn;
182 void *cb_param = submit->cb_param;
183 void *scribble = submit->scribble;
184 int uninitialized_var(good);
185 int i;
186
187 for (i = 0; i < 3; i++) {
188 if (i == faila || i == failb)
189 continue;
190 else {
191 good = i;
192 break;
193 }
194 }
195 BUG_ON(i >= 3);
196
197 p = blocks[5-2];
198 q = blocks[5-1];
199 g = blocks[good];
200
201 /* Compute syndrome with zero for the missing data pages
202 * Use the dead data pages as temporary storage for delta p and
203 * delta q
204 */
205 dp = blocks[faila];
206 dq = blocks[failb];
207
208 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
209 tx = async_memcpy(dp, g, 0, 0, bytes, submit);
210 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
211 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
212
213 /* compute P + Pxy */
214 srcs[0] = dp;
215 srcs[1] = p;
216 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
217 NULL, NULL, scribble);
218 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
219
220 /* compute Q + Qxy */
221 srcs[0] = dq;
222 srcs[1] = q;
223 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
224 NULL, NULL, scribble);
225 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
226
227 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
228 srcs[0] = dp;
229 srcs[1] = dq;
230 coef[0] = raid6_gfexi[failb-faila];
231 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
232 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
233 tx = async_sum_product(dq, srcs, coef, bytes, submit);
234
235 /* Dy = P+Pxy+Dx */
236 srcs[0] = dp;
237 srcs[1] = dq;
238 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
239 cb_param, scribble);
240 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
241
242 return tx;
243}
244
245static struct dma_async_tx_descriptor *
246__2data_recov_n(int disks, size_t bytes, int faila, int failb,
247 struct page **blocks, struct async_submit_ctl *submit)
248{
249 struct dma_async_tx_descriptor *tx = NULL;
250 struct page *p, *q, *dp, *dq;
251 struct page *srcs[2];
252 unsigned char coef[2];
253 enum async_tx_flags flags = submit->flags;
254 dma_async_tx_callback cb_fn = submit->cb_fn;
255 void *cb_param = submit->cb_param;
256 void *scribble = submit->scribble;
257
258 p = blocks[disks-2];
259 q = blocks[disks-1];
260
261 /* Compute syndrome with zero for the missing data pages
262 * Use the dead data pages as temporary storage for
263 * delta p and delta q
264 */
265 dp = blocks[faila];
266 blocks[faila] = (void *)raid6_empty_zero_page;
267 blocks[disks-2] = dp;
268 dq = blocks[failb];
269 blocks[failb] = (void *)raid6_empty_zero_page;
270 blocks[disks-1] = dq;
271
272 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
273 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
274
275 /* Restore pointer table */
276 blocks[faila] = dp;
277 blocks[failb] = dq;
278 blocks[disks-2] = p;
279 blocks[disks-1] = q;
280
281 /* compute P + Pxy */
282 srcs[0] = dp;
283 srcs[1] = p;
284 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
285 NULL, NULL, scribble);
286 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
287
288 /* compute Q + Qxy */
289 srcs[0] = dq;
290 srcs[1] = q;
291 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
292 NULL, NULL, scribble);
293 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
294
295 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
296 srcs[0] = dp;
297 srcs[1] = dq;
298 coef[0] = raid6_gfexi[failb-faila];
299 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
300 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
301 tx = async_sum_product(dq, srcs, coef, bytes, submit);
302
303 /* Dy = P+Pxy+Dx */
304 srcs[0] = dp;
305 srcs[1] = dq;
306 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
307 cb_param, scribble);
308 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
309
310 return tx;
311}
312
313/**
314 * async_raid6_2data_recov - asynchronously calculate two missing data blocks
315 * @disks: number of disks in the RAID-6 array
316 * @bytes: block size
317 * @faila: first failed drive index
318 * @failb: second failed drive index
319 * @blocks: array of source pointers where the last two entries are p and q
320 * @submit: submission/completion modifiers
321 */
322struct dma_async_tx_descriptor *
323async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
324 struct page **blocks, struct async_submit_ctl *submit)
325{
326 BUG_ON(faila == failb);
327 if (failb < faila)
328 swap(faila, failb);
329
330 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
331
332 /* we need to preserve the contents of 'blocks' for the async
333 * case, so punt to synchronous if a scribble buffer is not available
334 */
335 if (!submit->scribble) {
336 void **ptrs = (void **) blocks;
337 int i;
338
339 async_tx_quiesce(&submit->depend_tx);
340 for (i = 0; i < disks; i++)
341 ptrs[i] = page_address(blocks[i]);
342
343 raid6_2data_recov(disks, bytes, faila, failb, ptrs);
344
345 async_tx_sync_epilog(submit);
346
347 return NULL;
348 }
349
350 switch (disks) {
351 case 4:
352 /* dma devices do not uniformly understand a zero source pq
353 * operation (in contrast to the synchronous case), so
354 * explicitly handle the 4 disk special case
355 */
356 return __2data_recov_4(bytes, faila, failb, blocks, submit);
357 case 5:
358 /* dma devices do not uniformly understand a single
359 * source pq operation (in contrast to the synchronous
360 * case), so explicitly handle the 5 disk special case
361 */
362 return __2data_recov_5(bytes, faila, failb, blocks, submit);
363 default:
364 return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
365 }
366}
367EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
368
369/**
370 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
371 * @disks: number of disks in the RAID-6 array
372 * @bytes: block size
373 * @faila: failed drive index
374 * @blocks: array of source pointers where the last two entries are p and q
375 * @submit: submission/completion modifiers
376 */
377struct dma_async_tx_descriptor *
378async_raid6_datap_recov(int disks, size_t bytes, int faila,
379 struct page **blocks, struct async_submit_ctl *submit)
380{
381 struct dma_async_tx_descriptor *tx = NULL;
382 struct page *p, *q, *dq;
383 u8 coef;
384 enum async_tx_flags flags = submit->flags;
385 dma_async_tx_callback cb_fn = submit->cb_fn;
386 void *cb_param = submit->cb_param;
387 void *scribble = submit->scribble;
388 struct page *srcs[2];
389
390 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
391
392 /* we need to preserve the contents of 'blocks' for the async
393 * case, so punt to synchronous if a scribble buffer is not available
394 */
395 if (!scribble) {
396 void **ptrs = (void **) blocks;
397 int i;
398
399 async_tx_quiesce(&submit->depend_tx);
400 for (i = 0; i < disks; i++)
401 ptrs[i] = page_address(blocks[i]);
402
403 raid6_datap_recov(disks, bytes, faila, ptrs);
404
405 async_tx_sync_epilog(submit);
406
407 return NULL;
408 }
409
410 p = blocks[disks-2];
411 q = blocks[disks-1];
412
413 /* Compute syndrome with zero for the missing data page
414 * Use the dead data page as temporary storage for delta q
415 */
416 dq = blocks[faila];
417 blocks[faila] = (void *)raid6_empty_zero_page;
418 blocks[disks-1] = dq;
419
420 /* in the 4 disk case we only need to perform a single source
421 * multiplication
422 */
423 if (disks == 4) {
424 int good = faila == 0 ? 1 : 0;
425 struct page *g = blocks[good];
426
427 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
428 scribble);
429 tx = async_memcpy(p, g, 0, 0, bytes, submit);
430
431 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
432 scribble);
433 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
434 } else {
435 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
436 scribble);
437 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
438 }
439
440 /* Restore pointer table */
441 blocks[faila] = dq;
442 blocks[disks-1] = q;
443
444 /* calculate g^{-faila} */
445 coef = raid6_gfinv[raid6_gfexp[faila]];
446
447 srcs[0] = dq;
448 srcs[1] = q;
449 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
450 NULL, NULL, scribble);
451 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
452
453 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
454 tx = async_mult(dq, dq, coef, bytes, submit);
455
456 srcs[0] = p;
457 srcs[1] = dq;
458 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
459 cb_param, scribble);
460 tx = async_xor(p, srcs, 0, 2, bytes, submit);
461
462 return tx;
463}
464EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
465
466MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
467MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
468MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 06eb6cc09fef..f9cdf04fe7c0 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void)
42 async_dmaengine_put(); 42 async_dmaengine_put();
43} 43}
44 44
45module_init(async_tx_init);
46module_exit(async_tx_exit);
47
45/** 48/**
46 * __async_tx_find_channel - find a channel to carry out the operation or let 49 * __async_tx_find_channel - find a channel to carry out the operation or let
47 * the transaction execute synchronously 50 * the transaction execute synchronously
48 * @depend_tx: transaction dependency 51 * @submit: transaction dependency and submission modifiers
49 * @tx_type: transaction type 52 * @tx_type: transaction type
50 */ 53 */
51struct dma_chan * 54struct dma_chan *
52__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 55__async_tx_find_channel(struct async_submit_ctl *submit,
53 enum dma_transaction_type tx_type) 56 enum dma_transaction_type tx_type)
54{ 57{
58 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
59
55 /* see if we can keep the chain on one channel */ 60 /* see if we can keep the chain on one channel */
56 if (depend_tx && 61 if (depend_tx &&
57 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) 62 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
59 return async_dma_find_channel(tx_type); 64 return async_dma_find_channel(tx_type);
60} 65}
61EXPORT_SYMBOL_GPL(__async_tx_find_channel); 66EXPORT_SYMBOL_GPL(__async_tx_find_channel);
62#else
63static int __init async_tx_init(void)
64{
65 printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
66 return 0;
67}
68
69static void __exit async_tx_exit(void)
70{
71 do { } while (0);
72}
73#endif 67#endif
74 68
75 69
@@ -83,10 +77,14 @@ static void
83async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, 77async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
84 struct dma_async_tx_descriptor *tx) 78 struct dma_async_tx_descriptor *tx)
85{ 79{
86 struct dma_chan *chan; 80 struct dma_chan *chan = depend_tx->chan;
87 struct dma_device *device; 81 struct dma_device *device = chan->device;
88 struct dma_async_tx_descriptor *intr_tx = (void *) ~0; 82 struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
89 83
84 #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
85 BUG();
86 #endif
87
90 /* first check to see if we can still append to depend_tx */ 88 /* first check to see if we can still append to depend_tx */
91 spin_lock_bh(&depend_tx->lock); 89 spin_lock_bh(&depend_tx->lock);
92 if (depend_tx->parent && depend_tx->chan == tx->chan) { 90 if (depend_tx->parent && depend_tx->chan == tx->chan) {
@@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
96 } 94 }
97 spin_unlock_bh(&depend_tx->lock); 95 spin_unlock_bh(&depend_tx->lock);
98 96
99 if (!intr_tx) 97 /* attached dependency, flush the parent channel */
98 if (!intr_tx) {
99 device->device_issue_pending(chan);
100 return; 100 return;
101 101 }
102 chan = depend_tx->chan;
103 device = chan->device;
104 102
105 /* see if we can schedule an interrupt 103 /* see if we can schedule an interrupt
106 * otherwise poll for completion 104 * otherwise poll for completion
@@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
134 intr_tx->tx_submit(intr_tx); 132 intr_tx->tx_submit(intr_tx);
135 async_tx_ack(intr_tx); 133 async_tx_ack(intr_tx);
136 } 134 }
135 device->device_issue_pending(chan);
137 } else { 136 } else {
138 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 137 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
139 panic("%s: DMA_ERROR waiting for depend_tx\n", 138 panic("%s: DMA_ERROR waiting for depend_tx\n",
@@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
144 143
145 144
146/** 145/**
147 * submit_disposition - while holding depend_tx->lock we must avoid submitting 146 * submit_disposition - flags for routing an incoming operation
148 * new operations to prevent a circular locking dependency with
149 * drivers that already hold a channel lock when calling
150 * async_tx_run_dependencies.
151 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock 147 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
152 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch 148 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
153 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly 149 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
150 *
151 * while holding depend_tx->lock we must avoid submitting new operations
152 * to prevent a circular locking dependency with drivers that already
153 * hold a channel lock when calling async_tx_run_dependencies.
154 */ 154 */
155enum submit_disposition { 155enum submit_disposition {
156 ASYNC_TX_SUBMITTED, 156 ASYNC_TX_SUBMITTED,
@@ -160,11 +160,12 @@ enum submit_disposition {
160 160
161void 161void
162async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 162async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
163 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 163 struct async_submit_ctl *submit)
164 dma_async_tx_callback cb_fn, void *cb_param)
165{ 164{
166 tx->callback = cb_fn; 165 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
167 tx->callback_param = cb_param; 166
167 tx->callback = submit->cb_fn;
168 tx->callback_param = submit->cb_param;
168 169
169 if (depend_tx) { 170 if (depend_tx) {
170 enum submit_disposition s; 171 enum submit_disposition s;
@@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
220 tx->tx_submit(tx); 221 tx->tx_submit(tx);
221 } 222 }
222 223
223 if (flags & ASYNC_TX_ACK) 224 if (submit->flags & ASYNC_TX_ACK)
224 async_tx_ack(tx); 225 async_tx_ack(tx);
225 226
226 if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) 227 if (depend_tx)
227 async_tx_ack(depend_tx); 228 async_tx_ack(depend_tx);
228} 229}
229EXPORT_SYMBOL_GPL(async_tx_submit); 230EXPORT_SYMBOL_GPL(async_tx_submit);
230 231
231/** 232/**
232 * async_trigger_callback - schedules the callback function to be run after 233 * async_trigger_callback - schedules the callback function to be run
233 * any dependent operations have been completed. 234 * @submit: submission and completion parameters
234 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 235 *
235 * @depend_tx: 'callback' requires the completion of this transaction 236 * honored flags: ASYNC_TX_ACK
236 * @cb_fn: function to call after depend_tx completes 237 *
237 * @cb_param: parameter to pass to the callback routine 238 * The callback is run after any dependent operations have completed.
238 */ 239 */
239struct dma_async_tx_descriptor * 240struct dma_async_tx_descriptor *
240async_trigger_callback(enum async_tx_flags flags, 241async_trigger_callback(struct async_submit_ctl *submit)
241 struct dma_async_tx_descriptor *depend_tx,
242 dma_async_tx_callback cb_fn, void *cb_param)
243{ 242{
244 struct dma_chan *chan; 243 struct dma_chan *chan;
245 struct dma_device *device; 244 struct dma_device *device;
246 struct dma_async_tx_descriptor *tx; 245 struct dma_async_tx_descriptor *tx;
246 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
247 247
248 if (depend_tx) { 248 if (depend_tx) {
249 chan = depend_tx->chan; 249 chan = depend_tx->chan;
@@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags,
262 if (tx) { 262 if (tx) {
263 pr_debug("%s: (async)\n", __func__); 263 pr_debug("%s: (async)\n", __func__);
264 264
265 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 265 async_tx_submit(chan, tx, submit);
266 } else { 266 } else {
267 pr_debug("%s: (sync)\n", __func__); 267 pr_debug("%s: (sync)\n", __func__);
268 268
269 /* wait for any prerequisite operations */ 269 /* wait for any prerequisite operations */
270 async_tx_quiesce(&depend_tx); 270 async_tx_quiesce(&submit->depend_tx);
271 271
272 async_tx_sync_epilog(cb_fn, cb_param); 272 async_tx_sync_epilog(submit);
273 } 273 }
274 274
275 return tx; 275 return tx;
@@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
295} 295}
296EXPORT_SYMBOL_GPL(async_tx_quiesce); 296EXPORT_SYMBOL_GPL(async_tx_quiesce);
297 297
298module_init(async_tx_init);
299module_exit(async_tx_exit);
300
301MODULE_AUTHOR("Intel Corporation"); 298MODULE_AUTHOR("Intel Corporation");
302MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); 299MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
303MODULE_LICENSE("GPL"); 300MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 90dd3f8bd283..b459a9034aac 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,19 +33,16 @@
33/* do_async_xor - dma map the pages and perform the xor with an engine */ 33/* do_async_xor - dma map the pages and perform the xor with an engine */
34static __async_inline struct dma_async_tx_descriptor * 34static __async_inline struct dma_async_tx_descriptor *
35do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, 35do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
36 unsigned int offset, int src_cnt, size_t len, 36 unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
37 enum async_tx_flags flags, 37 struct async_submit_ctl *submit)
38 struct dma_async_tx_descriptor *depend_tx,
39 dma_async_tx_callback cb_fn, void *cb_param)
40{ 38{
41 struct dma_device *dma = chan->device; 39 struct dma_device *dma = chan->device;
42 dma_addr_t *dma_src = (dma_addr_t *) src_list;
43 struct dma_async_tx_descriptor *tx = NULL; 40 struct dma_async_tx_descriptor *tx = NULL;
44 int src_off = 0; 41 int src_off = 0;
45 int i; 42 int i;
46 dma_async_tx_callback _cb_fn; 43 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
47 void *_cb_param; 44 void *cb_param_orig = submit->cb_param;
48 enum async_tx_flags async_flags; 45 enum async_tx_flags flags_orig = submit->flags;
49 enum dma_ctrl_flags dma_flags; 46 enum dma_ctrl_flags dma_flags;
50 int xor_src_cnt; 47 int xor_src_cnt;
51 dma_addr_t dma_dest; 48 dma_addr_t dma_dest;
@@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
63 } 60 }
64 61
65 while (src_cnt) { 62 while (src_cnt) {
66 async_flags = flags; 63 submit->flags = flags_orig;
67 dma_flags = 0; 64 dma_flags = 0;
68 xor_src_cnt = min(src_cnt, dma->max_xor); 65 xor_src_cnt = min(src_cnt, (int)dma->max_xor);
69 /* if we are submitting additional xors, leave the chain open, 66 /* if we are submitting additional xors, leave the chain open,
70 * clear the callback parameters, and leave the destination 67 * clear the callback parameters, and leave the destination
71 * buffer mapped 68 * buffer mapped
72 */ 69 */
73 if (src_cnt > xor_src_cnt) { 70 if (src_cnt > xor_src_cnt) {
74 async_flags &= ~ASYNC_TX_ACK; 71 submit->flags &= ~ASYNC_TX_ACK;
72 submit->flags |= ASYNC_TX_FENCE;
75 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; 73 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
76 _cb_fn = NULL; 74 submit->cb_fn = NULL;
77 _cb_param = NULL; 75 submit->cb_param = NULL;
78 } else { 76 } else {
79 _cb_fn = cb_fn; 77 submit->cb_fn = cb_fn_orig;
80 _cb_param = cb_param; 78 submit->cb_param = cb_param_orig;
81 } 79 }
82 if (_cb_fn) 80 if (submit->cb_fn)
83 dma_flags |= DMA_PREP_INTERRUPT; 81 dma_flags |= DMA_PREP_INTERRUPT;
84 82 if (submit->flags & ASYNC_TX_FENCE)
83 dma_flags |= DMA_PREP_FENCE;
85 /* Since we have clobbered the src_list we are committed 84 /* Since we have clobbered the src_list we are committed
86 * to doing this asynchronously. Drivers force forward progress 85 * to doing this asynchronously. Drivers force forward progress
87 * in case they can not provide a descriptor 86 * in case they can not provide a descriptor
@@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
90 xor_src_cnt, len, dma_flags); 89 xor_src_cnt, len, dma_flags);
91 90
92 if (unlikely(!tx)) 91 if (unlikely(!tx))
93 async_tx_quiesce(&depend_tx); 92 async_tx_quiesce(&submit->depend_tx);
94 93
95 /* spin wait for the preceeding transactions to complete */ 94 /* spin wait for the preceeding transactions to complete */
96 while (unlikely(!tx)) { 95 while (unlikely(!tx)) {
@@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
101 dma_flags); 100 dma_flags);
102 } 101 }
103 102
104 async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, 103 async_tx_submit(chan, tx, submit);
105 _cb_param); 104 submit->depend_tx = tx;
106
107 depend_tx = tx;
108 flags |= ASYNC_TX_DEP_ACK;
109 105
110 if (src_cnt > xor_src_cnt) { 106 if (src_cnt > xor_src_cnt) {
111 /* drop completed sources */ 107 /* drop completed sources */
@@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
124 120
125static void 121static void
126do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, 122do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
127 int src_cnt, size_t len, enum async_tx_flags flags, 123 int src_cnt, size_t len, struct async_submit_ctl *submit)
128 dma_async_tx_callback cb_fn, void *cb_param)
129{ 124{
130 int i; 125 int i;
131 int xor_src_cnt; 126 int xor_src_cnt;
132 int src_off = 0; 127 int src_off = 0;
133 void *dest_buf; 128 void *dest_buf;
134 void **srcs = (void **) src_list; 129 void **srcs;
130
131 if (submit->scribble)
132 srcs = submit->scribble;
133 else
134 srcs = (void **) src_list;
135 135
136 /* reuse the 'src_list' array to convert to buffer pointers */ 136 /* convert to buffer pointers */
137 for (i = 0; i < src_cnt; i++) 137 for (i = 0; i < src_cnt; i++)
138 srcs[i] = page_address(src_list[i]) + offset; 138 srcs[i] = page_address(src_list[i]) + offset;
139 139
140 /* set destination address */ 140 /* set destination address */
141 dest_buf = page_address(dest) + offset; 141 dest_buf = page_address(dest) + offset;
142 142
143 if (flags & ASYNC_TX_XOR_ZERO_DST) 143 if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
144 memset(dest_buf, 0, len); 144 memset(dest_buf, 0, len);
145 145
146 while (src_cnt > 0) { 146 while (src_cnt > 0) {
@@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
153 src_off += xor_src_cnt; 153 src_off += xor_src_cnt;
154 } 154 }
155 155
156 async_tx_sync_epilog(cb_fn, cb_param); 156 async_tx_sync_epilog(submit);
157} 157}
158 158
159/** 159/**
160 * async_xor - attempt to xor a set of blocks with a dma engine. 160 * async_xor - attempt to xor a set of blocks with a dma engine.
161 * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
162 * flag must be set to not include dest data in the calculation. The
163 * assumption with dma eninges is that they only use the destination
164 * buffer as a source when it is explicity specified in the source list.
165 * @dest: destination page 161 * @dest: destination page
166 * @src_list: array of source pages (if the dest is also a source it must be 162 * @src_list: array of source pages
167 * at index zero). The contents of this array may be overwritten. 163 * @offset: common src/dst offset to start transaction
168 * @offset: offset in pages to start transaction
169 * @src_cnt: number of source pages 164 * @src_cnt: number of source pages
170 * @len: length in bytes 165 * @len: length in bytes
171 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, 166 * @submit: submission / completion modifiers
172 * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 167 *
173 * @depend_tx: xor depends on the result of this transaction. 168 * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
174 * @cb_fn: function to call when the xor completes 169 *
175 * @cb_param: parameter to pass to the callback routine 170 * xor_blocks always uses the dest as a source so the
171 * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
172 * the calculation. The assumption with dma eninges is that they only
173 * use the destination buffer as a source when it is explicity specified
174 * in the source list.
175 *
176 * src_list note: if the dest is also a source it must be at index zero.
177 * The contents of this array will be overwritten if a scribble region
178 * is not specified.
176 */ 179 */
177struct dma_async_tx_descriptor * 180struct dma_async_tx_descriptor *
178async_xor(struct page *dest, struct page **src_list, unsigned int offset, 181async_xor(struct page *dest, struct page **src_list, unsigned int offset,
179 int src_cnt, size_t len, enum async_tx_flags flags, 182 int src_cnt, size_t len, struct async_submit_ctl *submit)
180 struct dma_async_tx_descriptor *depend_tx,
181 dma_async_tx_callback cb_fn, void *cb_param)
182{ 183{
183 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, 184 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
184 &dest, 1, src_list, 185 &dest, 1, src_list,
185 src_cnt, len); 186 src_cnt, len);
187 dma_addr_t *dma_src = NULL;
188
186 BUG_ON(src_cnt <= 1); 189 BUG_ON(src_cnt <= 1);
187 190
188 if (chan) { 191 if (submit->scribble)
192 dma_src = submit->scribble;
193 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
194 dma_src = (dma_addr_t *) src_list;
195
196 if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
189 /* run the xor asynchronously */ 197 /* run the xor asynchronously */
190 pr_debug("%s (async): len: %zu\n", __func__, len); 198 pr_debug("%s (async): len: %zu\n", __func__, len);
191 199
192 return do_async_xor(chan, dest, src_list, offset, src_cnt, len, 200 return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
193 flags, depend_tx, cb_fn, cb_param); 201 dma_src, submit);
194 } else { 202 } else {
195 /* run the xor synchronously */ 203 /* run the xor synchronously */
196 pr_debug("%s (sync): len: %zu\n", __func__, len); 204 pr_debug("%s (sync): len: %zu\n", __func__, len);
205 WARN_ONCE(chan, "%s: no space for dma address conversion\n",
206 __func__);
197 207
198 /* in the sync case the dest is an implied source 208 /* in the sync case the dest is an implied source
199 * (assumes the dest is the first source) 209 * (assumes the dest is the first source)
200 */ 210 */
201 if (flags & ASYNC_TX_XOR_DROP_DST) { 211 if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
202 src_cnt--; 212 src_cnt--;
203 src_list++; 213 src_list++;
204 } 214 }
205 215
206 /* wait for any prerequisite operations */ 216 /* wait for any prerequisite operations */
207 async_tx_quiesce(&depend_tx); 217 async_tx_quiesce(&submit->depend_tx);
208 218
209 do_sync_xor(dest, src_list, offset, src_cnt, len, 219 do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
210 flags, cb_fn, cb_param);
211 220
212 return NULL; 221 return NULL;
213 } 222 }
@@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
222} 231}
223 232
224/** 233/**
225 * async_xor_zero_sum - attempt a xor parity check with a dma engine. 234 * async_xor_val - attempt a xor parity check with a dma engine.
226 * @dest: destination page used if the xor is performed synchronously 235 * @dest: destination page used if the xor is performed synchronously
227 * @src_list: array of source pages. The dest page must be listed as a source 236 * @src_list: array of source pages
228 * at index zero. The contents of this array may be overwritten.
229 * @offset: offset in pages to start transaction 237 * @offset: offset in pages to start transaction
230 * @src_cnt: number of source pages 238 * @src_cnt: number of source pages
231 * @len: length in bytes 239 * @len: length in bytes
232 * @result: 0 if sum == 0 else non-zero 240 * @result: 0 if sum == 0 else non-zero
233 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 241 * @submit: submission / completion modifiers
234 * @depend_tx: xor depends on the result of this transaction. 242 *
235 * @cb_fn: function to call when the xor completes 243 * honored flags: ASYNC_TX_ACK
236 * @cb_param: parameter to pass to the callback routine 244 *
245 * src_list note: if the dest is also a source it must be at index zero.
246 * The contents of this array will be overwritten if a scribble region
247 * is not specified.
237 */ 248 */
238struct dma_async_tx_descriptor * 249struct dma_async_tx_descriptor *
239async_xor_zero_sum(struct page *dest, struct page **src_list, 250async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
240 unsigned int offset, int src_cnt, size_t len, 251 int src_cnt, size_t len, enum sum_check_flags *result,
241 u32 *result, enum async_tx_flags flags, 252 struct async_submit_ctl *submit)
242 struct dma_async_tx_descriptor *depend_tx,
243 dma_async_tx_callback cb_fn, void *cb_param)
244{ 253{
245 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, 254 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
246 &dest, 1, src_list, 255 &dest, 1, src_list,
247 src_cnt, len); 256 src_cnt, len);
248 struct dma_device *device = chan ? chan->device : NULL; 257 struct dma_device *device = chan ? chan->device : NULL;
249 struct dma_async_tx_descriptor *tx = NULL; 258 struct dma_async_tx_descriptor *tx = NULL;
259 dma_addr_t *dma_src = NULL;
250 260
251 BUG_ON(src_cnt <= 1); 261 BUG_ON(src_cnt <= 1);
252 262
253 if (device && src_cnt <= device->max_xor) { 263 if (submit->scribble)
254 dma_addr_t *dma_src = (dma_addr_t *) src_list; 264 dma_src = submit->scribble;
255 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 265 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
266 dma_src = (dma_addr_t *) src_list;
267
268 if (dma_src && device && src_cnt <= device->max_xor &&
269 is_dma_xor_aligned(device, offset, 0, len)) {
270 unsigned long dma_prep_flags = 0;
256 int i; 271 int i;
257 272
258 pr_debug("%s: (async) len: %zu\n", __func__, len); 273 pr_debug("%s: (async) len: %zu\n", __func__, len);
259 274
275 if (submit->cb_fn)
276 dma_prep_flags |= DMA_PREP_INTERRUPT;
277 if (submit->flags & ASYNC_TX_FENCE)
278 dma_prep_flags |= DMA_PREP_FENCE;
260 for (i = 0; i < src_cnt; i++) 279 for (i = 0; i < src_cnt; i++)
261 dma_src[i] = dma_map_page(device->dev, src_list[i], 280 dma_src[i] = dma_map_page(device->dev, src_list[i],
262 offset, len, DMA_TO_DEVICE); 281 offset, len, DMA_TO_DEVICE);
263 282
264 tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, 283 tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
265 len, result, 284 len, result,
266 dma_prep_flags); 285 dma_prep_flags);
267 if (unlikely(!tx)) { 286 if (unlikely(!tx)) {
268 async_tx_quiesce(&depend_tx); 287 async_tx_quiesce(&submit->depend_tx);
269 288
270 while (!tx) { 289 while (!tx) {
271 dma_async_issue_pending(chan); 290 dma_async_issue_pending(chan);
272 tx = device->device_prep_dma_zero_sum(chan, 291 tx = device->device_prep_dma_xor_val(chan,
273 dma_src, src_cnt, len, result, 292 dma_src, src_cnt, len, result,
274 dma_prep_flags); 293 dma_prep_flags);
275 } 294 }
276 } 295 }
277 296
278 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 297 async_tx_submit(chan, tx, submit);
279 } else { 298 } else {
280 unsigned long xor_flags = flags; 299 enum async_tx_flags flags_orig = submit->flags;
281 300
282 pr_debug("%s: (sync) len: %zu\n", __func__, len); 301 pr_debug("%s: (sync) len: %zu\n", __func__, len);
302 WARN_ONCE(device && src_cnt <= device->max_xor,
303 "%s: no space for dma address conversion\n",
304 __func__);
283 305
284 xor_flags |= ASYNC_TX_XOR_DROP_DST; 306 submit->flags |= ASYNC_TX_XOR_DROP_DST;
285 xor_flags &= ~ASYNC_TX_ACK; 307 submit->flags &= ~ASYNC_TX_ACK;
286 308
287 tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, 309 tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
288 depend_tx, NULL, NULL);
289 310
290 async_tx_quiesce(&tx); 311 async_tx_quiesce(&tx);
291 312
292 *result = page_is_zero(dest, offset, len) ? 0 : 1; 313 *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
293 314
294 async_tx_sync_epilog(cb_fn, cb_param); 315 async_tx_sync_epilog(submit);
316 submit->flags = flags_orig;
295 } 317 }
296 318
297 return tx; 319 return tx;
298} 320}
299EXPORT_SYMBOL_GPL(async_xor_zero_sum); 321EXPORT_SYMBOL_GPL(async_xor_val);
300
301static int __init async_xor_init(void)
302{
303 #ifdef CONFIG_ASYNC_TX_DMA
304 /* To conserve stack space the input src_list (array of page pointers)
305 * is reused to hold the array of dma addresses passed to the driver.
306 * This conversion is only possible when dma_addr_t is less than the
307 * the size of a pointer. HIGHMEM64G is known to violate this
308 * assumption.
309 */
310 BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
311 #endif
312
313 return 0;
314}
315
316static void __exit async_xor_exit(void)
317{
318 do { } while (0);
319}
320
321module_init(async_xor_init);
322module_exit(async_xor_exit);
323 322
324MODULE_AUTHOR("Intel Corporation"); 323MODULE_AUTHOR("Intel Corporation");
325MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); 324MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
new file mode 100644
index 000000000000..3ec27c7e62ea
--- /dev/null
+++ b/crypto/async_tx/raid6test.c
@@ -0,0 +1,240 @@
1/*
2 * asynchronous raid6 recovery self test
3 * Copyright (c) 2009, Intel Corporation.
4 *
5 * based on drivers/md/raid6test/test.c:
6 * Copyright 2002-2007 H. Peter Anvin
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 */
22#include <linux/async_tx.h>
23#include <linux/random.h>
24
25#undef pr
26#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
27
28#define NDISKS 16 /* Including P and Q */
29
30static struct page *dataptrs[NDISKS];
31static addr_conv_t addr_conv[NDISKS];
32static struct page *data[NDISKS+3];
33static struct page *spare;
34static struct page *recovi;
35static struct page *recovj;
36
37static void callback(void *param)
38{
39 struct completion *cmp = param;
40
41 complete(cmp);
42}
43
44static void makedata(int disks)
45{
46 int i, j;
47
48 for (i = 0; i < disks; i++) {
49 for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
50 u32 *p = page_address(data[i]) + j;
51
52 *p = random32();
53 }
54
55 dataptrs[i] = data[i];
56 }
57}
58
59static char disk_type(int d, int disks)
60{
61 if (d == disks - 2)
62 return 'P';
63 else if (d == disks - 1)
64 return 'Q';
65 else
66 return 'D';
67}
68
69/* Recover two failed blocks. */
70static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
71{
72 struct async_submit_ctl submit;
73 struct completion cmp;
74 struct dma_async_tx_descriptor *tx = NULL;
75 enum sum_check_flags result = ~0;
76
77 if (faila > failb)
78 swap(faila, failb);
79
80 if (failb == disks-1) {
81 if (faila == disks-2) {
82 /* P+Q failure. Just rebuild the syndrome. */
83 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
84 tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
85 } else {
86 struct page *blocks[disks];
87 struct page *dest;
88 int count = 0;
89 int i;
90
91 /* data+Q failure. Reconstruct data from P,
92 * then rebuild syndrome
93 */
94 for (i = disks; i-- ; ) {
95 if (i == faila || i == failb)
96 continue;
97 blocks[count++] = ptrs[i];
98 }
99 dest = ptrs[faila];
100 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
101 NULL, NULL, addr_conv);
102 tx = async_xor(dest, blocks, 0, count, bytes, &submit);
103
104 init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
105 tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
106 }
107 } else {
108 if (failb == disks-2) {
109 /* data+P failure. */
110 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
111 tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
112 } else {
113 /* data+data failure. */
114 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
115 tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
116 }
117 }
118 init_completion(&cmp);
119 init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
120 tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
121 async_tx_issue_pending(tx);
122
123 if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
124 pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
125 __func__, faila, failb, disks);
126
127 if (result != 0)
128 pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
129 __func__, faila, failb, result);
130}
131
132static int test_disks(int i, int j, int disks)
133{
134 int erra, errb;
135
136 memset(page_address(recovi), 0xf0, PAGE_SIZE);
137 memset(page_address(recovj), 0xba, PAGE_SIZE);
138
139 dataptrs[i] = recovi;
140 dataptrs[j] = recovj;
141
142 raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
143
144 erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
145 errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
146
147 pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n",
148 __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
149 (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
150
151 dataptrs[i] = data[i];
152 dataptrs[j] = data[j];
153
154 return erra || errb;
155}
156
157static int test(int disks, int *tests)
158{
159 struct dma_async_tx_descriptor *tx;
160 struct async_submit_ctl submit;
161 struct completion cmp;
162 int err = 0;
163 int i, j;
164
165 recovi = data[disks];
166 recovj = data[disks+1];
167 spare = data[disks+2];
168
169 makedata(disks);
170
171 /* Nuke syndromes */
172 memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
173 memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
174
175 /* Generate assumed good syndrome */
176 init_completion(&cmp);
177 init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
178 tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
179 async_tx_issue_pending(tx);
180
181 if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
182 pr("error: initial gen_syndrome(%d) timed out\n", disks);
183 return 1;
184 }
185
186 pr("testing the %d-disk case...\n", disks);
187 for (i = 0; i < disks-1; i++)
188 for (j = i+1; j < disks; j++) {
189 (*tests)++;
190 err += test_disks(i, j, disks);
191 }
192
193 return err;
194}
195
196
197static int raid6_test(void)
198{
199 int err = 0;
200 int tests = 0;
201 int i;
202
203 for (i = 0; i < NDISKS+3; i++) {
204 data[i] = alloc_page(GFP_KERNEL);
205 if (!data[i]) {
206 while (i--)
207 put_page(data[i]);
208 return -ENOMEM;
209 }
210 }
211
212 /* the 4-disk and 5-disk cases are special for the recovery code */
213 if (NDISKS > 4)
214 err += test(4, &tests);
215 if (NDISKS > 5)
216 err += test(5, &tests);
217 err += test(NDISKS, &tests);
218
219 pr("\n");
220 pr("complete (%d tests, %d failure%s)\n",
221 tests, err, err == 1 ? "" : "s");
222
223 for (i = 0; i < NDISKS+3; i++)
224 put_page(data[i]);
225
226 return 0;
227}
228
229static void raid6_test_exit(void)
230{
231}
232
233/* when compiled-in wait for drivers to load first (assumes dma drivers
234 * are also compliled-in)
235 */
236late_initcall(raid6_test);
237module_exit(raid6_test_exit);
238MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
239MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
240MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index d295bdccc09c..9335b87c5174 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -115,6 +115,9 @@ static const struct file_operations acpi_button_state_fops = {
115 .release = single_release, 115 .release = single_release,
116}; 116};
117 117
118static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
119static struct acpi_device *lid_device;
120
118/* -------------------------------------------------------------------------- 121/* --------------------------------------------------------------------------
119 FS Interface (/proc) 122 FS Interface (/proc)
120 -------------------------------------------------------------------------- */ 123 -------------------------------------------------------------------------- */
@@ -231,11 +234,38 @@ static int acpi_button_remove_fs(struct acpi_device *device)
231/* -------------------------------------------------------------------------- 234/* --------------------------------------------------------------------------
232 Driver Interface 235 Driver Interface
233 -------------------------------------------------------------------------- */ 236 -------------------------------------------------------------------------- */
237int acpi_lid_notifier_register(struct notifier_block *nb)
238{
239 return blocking_notifier_chain_register(&acpi_lid_notifier, nb);
240}
241EXPORT_SYMBOL(acpi_lid_notifier_register);
242
243int acpi_lid_notifier_unregister(struct notifier_block *nb)
244{
245 return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb);
246}
247EXPORT_SYMBOL(acpi_lid_notifier_unregister);
248
249int acpi_lid_open(void)
250{
251 acpi_status status;
252 unsigned long long state;
253
254 status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL,
255 &state);
256 if (ACPI_FAILURE(status))
257 return -ENODEV;
258
259 return !!state;
260}
261EXPORT_SYMBOL(acpi_lid_open);
262
234static int acpi_lid_send_state(struct acpi_device *device) 263static int acpi_lid_send_state(struct acpi_device *device)
235{ 264{
236 struct acpi_button *button = acpi_driver_data(device); 265 struct acpi_button *button = acpi_driver_data(device);
237 unsigned long long state; 266 unsigned long long state;
238 acpi_status status; 267 acpi_status status;
268 int ret;
239 269
240 status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state); 270 status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
241 if (ACPI_FAILURE(status)) 271 if (ACPI_FAILURE(status))
@@ -244,7 +274,12 @@ static int acpi_lid_send_state(struct acpi_device *device)
244 /* input layer checks if event is redundant */ 274 /* input layer checks if event is redundant */
245 input_report_switch(button->input, SW_LID, !state); 275 input_report_switch(button->input, SW_LID, !state);
246 input_sync(button->input); 276 input_sync(button->input);
247 return 0; 277
278 ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
279 if (ret == NOTIFY_DONE)
280 ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
281 device);
282 return ret;
248} 283}
249 284
250static void acpi_button_notify(struct acpi_device *device, u32 event) 285static void acpi_button_notify(struct acpi_device *device, u32 event)
@@ -366,8 +401,14 @@ static int acpi_button_add(struct acpi_device *device)
366 error = input_register_device(input); 401 error = input_register_device(input);
367 if (error) 402 if (error)
368 goto err_remove_fs; 403 goto err_remove_fs;
369 if (button->type == ACPI_BUTTON_TYPE_LID) 404 if (button->type == ACPI_BUTTON_TYPE_LID) {
370 acpi_lid_send_state(device); 405 acpi_lid_send_state(device);
406 /*
407 * This assumes there's only one lid device, or if there are
408 * more we only care about the last one...
409 */
410 lid_device = device;
411 }
371 412
372 if (device->wakeup.flags.valid) { 413 if (device->wakeup.flags.valid) {
373 /* Button's GPE is run-wake GPE */ 414 /* Button's GPE is run-wake GPE */
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 71d1b9bab70b..614da5b8613a 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info,
3412 return 0; 3412 return 0;
3413} 3413}
3414 3414
3415static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, 3415static int cdrom_sysctl_info(ctl_table *ctl, int write,
3416 void __user *buffer, size_t *lenp, loff_t *ppos) 3416 void __user *buffer, size_t *lenp, loff_t *ppos)
3417{ 3417{
3418 int pos; 3418 int pos;
@@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
3489 goto done; 3489 goto done;
3490doit: 3490doit:
3491 mutex_unlock(&cdrom_mutex); 3491 mutex_unlock(&cdrom_mutex);
3492 return proc_dostring(ctl, write, filp, buffer, lenp, ppos); 3492 return proc_dostring(ctl, write, buffer, lenp, ppos);
3493done: 3493done:
3494 printk(KERN_INFO "cdrom: info buffer too small\n"); 3494 printk(KERN_INFO "cdrom: info buffer too small\n");
3495 goto doit; 3495 goto doit;
@@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void)
3525 mutex_unlock(&cdrom_mutex); 3525 mutex_unlock(&cdrom_mutex);
3526} 3526}
3527 3527
3528static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp, 3528static int cdrom_sysctl_handler(ctl_table *ctl, int write,
3529 void __user *buffer, size_t *lenp, loff_t *ppos) 3529 void __user *buffer, size_t *lenp, loff_t *ppos)
3530{ 3530{
3531 int ret; 3531 int ret;
3532 3532
3533 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 3533 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3534 3534
3535 if (write) { 3535 if (write) {
3536 3536
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 6a06913b01d3..08a6f50ae791 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -1087,6 +1087,14 @@ config MMTIMER
1087 The mmtimer device allows direct userspace access to the 1087 The mmtimer device allows direct userspace access to the
1088 Altix system timer. 1088 Altix system timer.
1089 1089
1090config UV_MMTIMER
1091 tristate "UV_MMTIMER Memory mapped RTC for SGI UV"
1092 depends on X86_UV
1093 default m
1094 help
1095 The uv_mmtimer device allows direct userspace access to the
1096 UV system timer.
1097
1090source "drivers/char/tpm/Kconfig" 1098source "drivers/char/tpm/Kconfig"
1091 1099
1092config TELCLOCK 1100config TELCLOCK
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 66f779ad4f4c..19a79dd79eee 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_RAW_DRIVER) += raw.o
58obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o 58obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
59obj-$(CONFIG_MSPEC) += mspec.o 59obj-$(CONFIG_MSPEC) += mspec.o
60obj-$(CONFIG_MMTIMER) += mmtimer.o 60obj-$(CONFIG_MMTIMER) += mmtimer.o
61obj-$(CONFIG_UV_MMTIMER) += uv_mmtimer.o
61obj-$(CONFIG_VIOTAPE) += viotape.o 62obj-$(CONFIG_VIOTAPE) += viotape.o
62obj-$(CONFIG_HVCS) += hvcs.o 63obj-$(CONFIG_HVCS) += hvcs.o
63obj-$(CONFIG_IBM_BSR) += bsr.o 64obj-$(CONFIG_IBM_BSR) += bsr.o
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 1540e693d91e..4068467ce7b9 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -46,6 +46,8 @@
46#define PCI_DEVICE_ID_INTEL_Q35_IG 0x29B2 46#define PCI_DEVICE_ID_INTEL_Q35_IG 0x29B2
47#define PCI_DEVICE_ID_INTEL_Q33_HB 0x29D0 47#define PCI_DEVICE_ID_INTEL_Q33_HB 0x29D0
48#define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2 48#define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2
49#define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40
50#define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42
49#define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40 51#define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40
50#define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42 52#define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42
51#define PCI_DEVICE_ID_INTEL_IGD_E_HB 0x2E00 53#define PCI_DEVICE_ID_INTEL_IGD_E_HB 0x2E00
@@ -91,6 +93,7 @@
91 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \ 93 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \
92 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \ 94 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \
93 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \ 95 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \
96 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \
94 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \ 97 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \
95 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \ 98 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \
96 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB) 99 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB)
@@ -804,23 +807,39 @@ static void intel_i830_setup_flush(void)
804 if (!intel_private.i8xx_page) 807 if (!intel_private.i8xx_page)
805 return; 808 return;
806 809
807 /* make page uncached */
808 map_page_into_agp(intel_private.i8xx_page);
809
810 intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page); 810 intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
811 if (!intel_private.i8xx_flush_page) 811 if (!intel_private.i8xx_flush_page)
812 intel_i830_fini_flush(); 812 intel_i830_fini_flush();
813} 813}
814 814
815static void
816do_wbinvd(void *null)
817{
818 wbinvd();
819}
820
821/* The chipset_flush interface needs to get data that has already been
822 * flushed out of the CPU all the way out to main memory, because the GPU
823 * doesn't snoop those buffers.
824 *
825 * The 8xx series doesn't have the same lovely interface for flushing the
826 * chipset write buffers that the later chips do. According to the 865
827 * specs, it's 64 octwords, or 1KB. So, to get those previous things in
828 * that buffer out, we just fill 1KB and clflush it out, on the assumption
829 * that it'll push whatever was in there out. It appears to work.
830 */
815static void intel_i830_chipset_flush(struct agp_bridge_data *bridge) 831static void intel_i830_chipset_flush(struct agp_bridge_data *bridge)
816{ 832{
817 unsigned int *pg = intel_private.i8xx_flush_page; 833 unsigned int *pg = intel_private.i8xx_flush_page;
818 int i;
819 834
820 for (i = 0; i < 256; i += 2) 835 memset(pg, 0, 1024);
821 *(pg + i) = i;
822 836
823 wmb(); 837 if (cpu_has_clflush) {
838 clflush_cache_range(pg, 1024);
839 } else {
840 if (on_each_cpu(do_wbinvd, NULL, 1) != 0)
841 printk(KERN_ERR "Timed out waiting for cache flush.\n");
842 }
824} 843}
825 844
826/* The intel i830 automatically initializes the agp aperture during POST. 845/* The intel i830 automatically initializes the agp aperture during POST.
@@ -1341,6 +1360,7 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size)
1341 case PCI_DEVICE_ID_INTEL_Q45_HB: 1360 case PCI_DEVICE_ID_INTEL_Q45_HB:
1342 case PCI_DEVICE_ID_INTEL_G45_HB: 1361 case PCI_DEVICE_ID_INTEL_G45_HB:
1343 case PCI_DEVICE_ID_INTEL_G41_HB: 1362 case PCI_DEVICE_ID_INTEL_G41_HB:
1363 case PCI_DEVICE_ID_INTEL_B43_HB:
1344 case PCI_DEVICE_ID_INTEL_IGDNG_D_HB: 1364 case PCI_DEVICE_ID_INTEL_IGDNG_D_HB:
1345 case PCI_DEVICE_ID_INTEL_IGDNG_M_HB: 1365 case PCI_DEVICE_ID_INTEL_IGDNG_M_HB:
1346 case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB: 1366 case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB:
@@ -2335,6 +2355,8 @@ static const struct intel_driver_description {
2335 "Q45/Q43", NULL, &intel_i965_driver }, 2355 "Q45/Q43", NULL, &intel_i965_driver },
2336 { PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0, 2356 { PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0,
2337 "G45/G43", NULL, &intel_i965_driver }, 2357 "G45/G43", NULL, &intel_i965_driver },
2358 { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, 0,
2359 "B43", NULL, &intel_i965_driver },
2338 { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0, 2360 { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0,
2339 "G41", NULL, &intel_i965_driver }, 2361 "G41", NULL, &intel_i965_driver },
2340 { PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0, 2362 { PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0,
@@ -2535,6 +2557,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
2535 ID(PCI_DEVICE_ID_INTEL_Q45_HB), 2557 ID(PCI_DEVICE_ID_INTEL_Q45_HB),
2536 ID(PCI_DEVICE_ID_INTEL_G45_HB), 2558 ID(PCI_DEVICE_ID_INTEL_G45_HB),
2537 ID(PCI_DEVICE_ID_INTEL_G41_HB), 2559 ID(PCI_DEVICE_ID_INTEL_G41_HB),
2560 ID(PCI_DEVICE_ID_INTEL_B43_HB),
2538 ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB), 2561 ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB),
2539 ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB), 2562 ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB),
2540 ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB), 2563 ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB),
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c
index 0a01329451e4..e3dd24bff514 100644
--- a/drivers/char/bfin-otp.c
+++ b/drivers/char/bfin-otp.c
@@ -1,8 +1,7 @@
1/* 1/*
2 * Blackfin On-Chip OTP Memory Interface 2 * Blackfin On-Chip OTP Memory Interface
3 * Supports BF52x/BF54x
4 * 3 *
5 * Copyright 2007-2008 Analog Devices Inc. 4 * Copyright 2007-2009 Analog Devices Inc.
6 * 5 *
7 * Enter bugs at http://blackfin.uclinux.org/ 6 * Enter bugs at http://blackfin.uclinux.org/
8 * 7 *
@@ -17,8 +16,10 @@
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/mutex.h> 17#include <linux/mutex.h>
19#include <linux/types.h> 18#include <linux/types.h>
19#include <mtd/mtd-abi.h>
20 20
21#include <asm/blackfin.h> 21#include <asm/blackfin.h>
22#include <asm/bfrom.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
24#define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args) 25#define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args)
@@ -30,39 +31,6 @@
30 31
31static DEFINE_MUTEX(bfin_otp_lock); 32static DEFINE_MUTEX(bfin_otp_lock);
32 33
33/* OTP Boot ROM functions */
34#define _BOOTROM_OTP_COMMAND 0xEF000018
35#define _BOOTROM_OTP_READ 0xEF00001A
36#define _BOOTROM_OTP_WRITE 0xEF00001C
37
38static u32 (* const otp_command)(u32 command, u32 value) = (void *)_BOOTROM_OTP_COMMAND;
39static u32 (* const otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_READ;
40static u32 (* const otp_write)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_WRITE;
41
42/* otp_command(): defines for "command" */
43#define OTP_INIT 0x00000001
44#define OTP_CLOSE 0x00000002
45
46/* otp_{read,write}(): defines for "flags" */
47#define OTP_LOWER_HALF 0x00000000 /* select upper/lower 64-bit half (bit 0) */
48#define OTP_UPPER_HALF 0x00000001
49#define OTP_NO_ECC 0x00000010 /* do not use ECC */
50#define OTP_LOCK 0x00000020 /* sets page protection bit for page */
51#define OTP_ACCESS_READ 0x00001000
52#define OTP_ACCESS_READWRITE 0x00002000
53
54/* Return values for all functions */
55#define OTP_SUCCESS 0x00000000
56#define OTP_MASTER_ERROR 0x001
57#define OTP_WRITE_ERROR 0x003
58#define OTP_READ_ERROR 0x005
59#define OTP_ACC_VIO_ERROR 0x009
60#define OTP_DATA_MULT_ERROR 0x011
61#define OTP_ECC_MULT_ERROR 0x021
62#define OTP_PREV_WR_ERROR 0x041
63#define OTP_DATA_SB_WARN 0x100
64#define OTP_ECC_SB_WARN 0x200
65
66/** 34/**
67 * bfin_otp_read - Read OTP pages 35 * bfin_otp_read - Read OTP pages
68 * 36 *
@@ -86,9 +54,11 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
86 page = *pos / (sizeof(u64) * 2); 54 page = *pos / (sizeof(u64) * 2);
87 while (bytes_done < count) { 55 while (bytes_done < count) {
88 flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF); 56 flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
89 stamp("processing page %i (%s)", page, (flags == OTP_UPPER_HALF ? "upper" : "lower")); 57 stamp("processing page %i (0x%x:%s)", page, flags,
90 ret = otp_read(page, flags, &content); 58 (flags & OTP_UPPER_HALF ? "upper" : "lower"));
59 ret = bfrom_OtpRead(page, flags, &content);
91 if (ret & OTP_MASTER_ERROR) { 60 if (ret & OTP_MASTER_ERROR) {
61 stamp("error from otp: 0x%x", ret);
92 bytes_done = -EIO; 62 bytes_done = -EIO;
93 break; 63 break;
94 } 64 }
@@ -96,7 +66,7 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
96 bytes_done = -EFAULT; 66 bytes_done = -EFAULT;
97 break; 67 break;
98 } 68 }
99 if (flags == OTP_UPPER_HALF) 69 if (flags & OTP_UPPER_HALF)
100 ++page; 70 ++page;
101 bytes_done += sizeof(content); 71 bytes_done += sizeof(content);
102 *pos += sizeof(content); 72 *pos += sizeof(content);
@@ -108,14 +78,53 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
108} 78}
109 79
110#ifdef CONFIG_BFIN_OTP_WRITE_ENABLE 80#ifdef CONFIG_BFIN_OTP_WRITE_ENABLE
81static bool allow_writes;
82
83/**
84 * bfin_otp_init_timing - setup OTP timing parameters
85 *
86 * Required before doing any write operation. Algorithms from HRM.
87 */
88static u32 bfin_otp_init_timing(void)
89{
90 u32 tp1, tp2, tp3, timing;
91
92 tp1 = get_sclk() / 1000000;
93 tp2 = (2 * get_sclk() / 10000000) << 8;
94 tp3 = (0x1401) << 15;
95 timing = tp1 | tp2 | tp3;
96 if (bfrom_OtpCommand(OTP_INIT, timing))
97 return 0;
98
99 return timing;
100}
101
102/**
103 * bfin_otp_deinit_timing - set timings to only allow reads
104 *
105 * Should be called after all writes are done.
106 */
107static void bfin_otp_deinit_timing(u32 timing)
108{
109 /* mask bits [31:15] so that any attempts to write fail */
110 bfrom_OtpCommand(OTP_CLOSE, 0);
111 bfrom_OtpCommand(OTP_INIT, timing & ~(-1 << 15));
112 bfrom_OtpCommand(OTP_CLOSE, 0);
113}
114
111/** 115/**
112 * bfin_otp_write - Write OTP pages 116 * bfin_otp_write - write OTP pages
113 * 117 *
114 * All writes must be in half page chunks (half page == 64 bits). 118 * All writes must be in half page chunks (half page == 64 bits).
115 */ 119 */
116static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos) 120static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos)
117{ 121{
118 stampit(); 122 ssize_t bytes_done;
123 u32 timing, page, base_flags, flags, ret;
124 u64 content;
125
126 if (!allow_writes)
127 return -EACCES;
119 128
120 if (count % sizeof(u64)) 129 if (count % sizeof(u64))
121 return -EMSGSIZE; 130 return -EMSGSIZE;
@@ -123,20 +132,96 @@ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t
123 if (mutex_lock_interruptible(&bfin_otp_lock)) 132 if (mutex_lock_interruptible(&bfin_otp_lock))
124 return -ERESTARTSYS; 133 return -ERESTARTSYS;
125 134
126 /* need otp_init() documentation before this can be implemented */ 135 stampit();
136
137 timing = bfin_otp_init_timing();
138 if (timing == 0) {
139 mutex_unlock(&bfin_otp_lock);
140 return -EIO;
141 }
142
143 base_flags = OTP_CHECK_FOR_PREV_WRITE;
144
145 bytes_done = 0;
146 page = *pos / (sizeof(u64) * 2);
147 while (bytes_done < count) {
148 flags = base_flags | (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
149 stamp("processing page %i (0x%x:%s) from %p", page, flags,
150 (flags & OTP_UPPER_HALF ? "upper" : "lower"), buff + bytes_done);
151 if (copy_from_user(&content, buff + bytes_done, sizeof(content))) {
152 bytes_done = -EFAULT;
153 break;
154 }
155 ret = bfrom_OtpWrite(page, flags, &content);
156 if (ret & OTP_MASTER_ERROR) {
157 stamp("error from otp: 0x%x", ret);
158 bytes_done = -EIO;
159 break;
160 }
161 if (flags & OTP_UPPER_HALF)
162 ++page;
163 bytes_done += sizeof(content);
164 *pos += sizeof(content);
165 }
166
167 bfin_otp_deinit_timing(timing);
127 168
128 mutex_unlock(&bfin_otp_lock); 169 mutex_unlock(&bfin_otp_lock);
129 170
171 return bytes_done;
172}
173
174static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
175{
176 stampit();
177
178 switch (cmd) {
179 case OTPLOCK: {
180 u32 timing;
181 int ret = -EIO;
182
183 if (!allow_writes)
184 return -EACCES;
185
186 if (mutex_lock_interruptible(&bfin_otp_lock))
187 return -ERESTARTSYS;
188
189 timing = bfin_otp_init_timing();
190 if (timing) {
191 u32 otp_result = bfrom_OtpWrite(arg, OTP_LOCK, NULL);
192 stamp("locking page %lu resulted in 0x%x", arg, otp_result);
193 if (!(otp_result & OTP_MASTER_ERROR))
194 ret = 0;
195
196 bfin_otp_deinit_timing(timing);
197 }
198
199 mutex_unlock(&bfin_otp_lock);
200
201 return ret;
202 }
203
204 case MEMLOCK:
205 allow_writes = false;
206 return 0;
207
208 case MEMUNLOCK:
209 allow_writes = true;
210 return 0;
211 }
212
130 return -EINVAL; 213 return -EINVAL;
131} 214}
132#else 215#else
133# define bfin_otp_write NULL 216# define bfin_otp_write NULL
217# define bfin_otp_ioctl NULL
134#endif 218#endif
135 219
136static struct file_operations bfin_otp_fops = { 220static struct file_operations bfin_otp_fops = {
137 .owner = THIS_MODULE, 221 .owner = THIS_MODULE,
138 .read = bfin_otp_read, 222 .unlocked_ioctl = bfin_otp_ioctl,
139 .write = bfin_otp_write, 223 .read = bfin_otp_read,
224 .write = bfin_otp_write,
140}; 225};
141 226
142static struct miscdevice bfin_otp_misc_device = { 227static struct miscdevice bfin_otp_misc_device = {
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 4a9f3492b921..70a770ac0138 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -166,9 +166,8 @@ static irqreturn_t hpet_interrupt(int irq, void *data)
166 unsigned long m, t; 166 unsigned long m, t;
167 167
168 t = devp->hd_ireqfreq; 168 t = devp->hd_ireqfreq;
169 m = read_counter(&devp->hd_hpet->hpet_mc); 169 m = read_counter(&devp->hd_timer->hpet_compare);
170 write_counter(t + m + devp->hd_hpets->hp_delta, 170 write_counter(t + m, &devp->hd_timer->hpet_compare);
171 &devp->hd_timer->hpet_compare);
172 } 171 }
173 172
174 if (devp->hd_flags & HPET_SHARED_IRQ) 173 if (devp->hd_flags & HPET_SHARED_IRQ)
@@ -504,21 +503,25 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp)
504 g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK; 503 g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK;
505 504
506 if (devp->hd_flags & HPET_PERIODIC) { 505 if (devp->hd_flags & HPET_PERIODIC) {
507 write_counter(t, &timer->hpet_compare);
508 g |= Tn_TYPE_CNF_MASK; 506 g |= Tn_TYPE_CNF_MASK;
509 v |= Tn_TYPE_CNF_MASK; 507 v |= Tn_TYPE_CNF_MASK | Tn_VAL_SET_CNF_MASK;
510 writeq(v, &timer->hpet_config);
511 v |= Tn_VAL_SET_CNF_MASK;
512 writeq(v, &timer->hpet_config); 508 writeq(v, &timer->hpet_config);
513 local_irq_save(flags); 509 local_irq_save(flags);
514 510
515 /* NOTE: what we modify here is a hidden accumulator 511 /*
512 * NOTE: First we modify the hidden accumulator
516 * register supported by periodic-capable comparators. 513 * register supported by periodic-capable comparators.
517 * We never want to modify the (single) counter; that 514 * We never want to modify the (single) counter; that
518 * would affect all the comparators. 515 * would affect all the comparators. The value written
516 * is the counter value when the first interrupt is due.
519 */ 517 */
520 m = read_counter(&hpet->hpet_mc); 518 m = read_counter(&hpet->hpet_mc);
521 write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare); 519 write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare);
520 /*
521 * Then we modify the comparator, indicating the period
522 * for subsequent interrupt.
523 */
524 write_counter(t, &timer->hpet_compare);
522 } else { 525 } else {
523 local_irq_save(flags); 526 local_irq_save(flags);
524 m = read_counter(&hpet->hpet_mc); 527 m = read_counter(&hpet->hpet_mc);
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 25ce15bb1c08..a632f25f144a 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -678,7 +678,7 @@ int hvc_poll(struct hvc_struct *hp)
678EXPORT_SYMBOL_GPL(hvc_poll); 678EXPORT_SYMBOL_GPL(hvc_poll);
679 679
680/** 680/**
681 * hvc_resize() - Update terminal window size information. 681 * __hvc_resize() - Update terminal window size information.
682 * @hp: HVC console pointer 682 * @hp: HVC console pointer
683 * @ws: Terminal window size structure 683 * @ws: Terminal window size structure
684 * 684 *
@@ -687,12 +687,12 @@ EXPORT_SYMBOL_GPL(hvc_poll);
687 * 687 *
688 * Locking: Locking free; the function MUST be called holding hp->lock 688 * Locking: Locking free; the function MUST be called holding hp->lock
689 */ 689 */
690void hvc_resize(struct hvc_struct *hp, struct winsize ws) 690void __hvc_resize(struct hvc_struct *hp, struct winsize ws)
691{ 691{
692 hp->ws = ws; 692 hp->ws = ws;
693 schedule_work(&hp->tty_resize); 693 schedule_work(&hp->tty_resize);
694} 694}
695EXPORT_SYMBOL_GPL(hvc_resize); 695EXPORT_SYMBOL_GPL(__hvc_resize);
696 696
697/* 697/*
698 * This kthread is either polling or interrupt driven. This is determined by 698 * This kthread is either polling or interrupt driven. This is determined by
diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h
index 3c85d78c975c..10950ca706d8 100644
--- a/drivers/char/hvc_console.h
+++ b/drivers/char/hvc_console.h
@@ -28,6 +28,7 @@
28#define HVC_CONSOLE_H 28#define HVC_CONSOLE_H
29#include <linux/kref.h> 29#include <linux/kref.h>
30#include <linux/tty.h> 30#include <linux/tty.h>
31#include <linux/spinlock.h>
31 32
32/* 33/*
33 * This is the max number of console adapters that can/will be found as 34 * This is the max number of console adapters that can/will be found as
@@ -88,7 +89,16 @@ int hvc_poll(struct hvc_struct *hp);
88void hvc_kick(void); 89void hvc_kick(void);
89 90
90/* Resize hvc tty terminal window */ 91/* Resize hvc tty terminal window */
91extern void hvc_resize(struct hvc_struct *hp, struct winsize ws); 92extern void __hvc_resize(struct hvc_struct *hp, struct winsize ws);
93
94static inline void hvc_resize(struct hvc_struct *hp, struct winsize ws)
95{
96 unsigned long flags;
97
98 spin_lock_irqsave(&hp->lock, flags);
99 __hvc_resize(hp, ws);
100 spin_unlock_irqrestore(&hp->lock, flags);
101}
92 102
93/* default notifier for irq based notification */ 103/* default notifier for irq based notification */
94extern int notifier_add_irq(struct hvc_struct *hp, int data); 104extern int notifier_add_irq(struct hvc_struct *hp, int data);
diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c
index 0ecac7e532f6..b8a5d654d3d0 100644
--- a/drivers/char/hvc_iucv.c
+++ b/drivers/char/hvc_iucv.c
@@ -273,7 +273,9 @@ static int hvc_iucv_write(struct hvc_iucv_private *priv,
273 case MSG_TYPE_WINSIZE: 273 case MSG_TYPE_WINSIZE:
274 if (rb->mbuf->datalen != sizeof(struct winsize)) 274 if (rb->mbuf->datalen != sizeof(struct winsize))
275 break; 275 break;
276 hvc_resize(priv->hvc, *((struct winsize *) rb->mbuf->data)); 276 /* The caller must ensure that the hvc is locked, which
277 * is the case when called from hvc_iucv_get_chars() */
278 __hvc_resize(priv->hvc, *((struct winsize *) rb->mbuf->data));
277 break; 279 break;
278 280
279 case MSG_TYPE_ERROR: /* ignored ... */ 281 case MSG_TYPE_ERROR: /* ignored ... */
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 0aede1d6a9ea..6c8b65d069e5 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -690,7 +690,7 @@ static ssize_t read_zero(struct file * file, char __user * buf,
690 690
691 if (chunk > PAGE_SIZE) 691 if (chunk > PAGE_SIZE)
692 chunk = PAGE_SIZE; /* Just for latency reasons */ 692 chunk = PAGE_SIZE; /* Just for latency reasons */
693 unwritten = clear_user(buf, chunk); 693 unwritten = __clear_user(buf, chunk);
694 written += chunk - unwritten; 694 written += chunk - unwritten;
695 if (unwritten) 695 if (unwritten)
696 break; 696 break;
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 94ad2c3bfc4a..a4ec50c95072 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -281,12 +281,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
281 case IOCTL_MW_REGISTER_IPC: { 281 case IOCTL_MW_REGISTER_IPC: {
282 unsigned int ipcnum = (unsigned int) ioarg; 282 unsigned int ipcnum = (unsigned int) ioarg;
283 283
284 PRINTK_3(TRACE_MWAVE,
285 "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
286 " ipcnum %x entry usIntCount %x\n",
287 ipcnum,
288 pDrvData->IPCs[ipcnum].usIntCount);
289
290 if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { 284 if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
291 PRINTK_ERROR(KERN_ERR_MWAVE 285 PRINTK_ERROR(KERN_ERR_MWAVE
292 "mwavedd::mwave_ioctl:" 286 "mwavedd::mwave_ioctl:"
@@ -295,6 +289,12 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
295 ipcnum); 289 ipcnum);
296 return -EINVAL; 290 return -EINVAL;
297 } 291 }
292 PRINTK_3(TRACE_MWAVE,
293 "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
294 " ipcnum %x entry usIntCount %x\n",
295 ipcnum,
296 pDrvData->IPCs[ipcnum].usIntCount);
297
298 lock_kernel(); 298 lock_kernel();
299 pDrvData->IPCs[ipcnum].bIsHere = FALSE; 299 pDrvData->IPCs[ipcnum].bIsHere = FALSE;
300 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE; 300 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
@@ -310,11 +310,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
310 case IOCTL_MW_GET_IPC: { 310 case IOCTL_MW_GET_IPC: {
311 unsigned int ipcnum = (unsigned int) ioarg; 311 unsigned int ipcnum = (unsigned int) ioarg;
312 312
313 PRINTK_3(TRACE_MWAVE,
314 "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
315 " ipcnum %x, usIntCount %x\n",
316 ipcnum,
317 pDrvData->IPCs[ipcnum].usIntCount);
318 if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { 313 if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
319 PRINTK_ERROR(KERN_ERR_MWAVE 314 PRINTK_ERROR(KERN_ERR_MWAVE
320 "mwavedd::mwave_ioctl:" 315 "mwavedd::mwave_ioctl:"
@@ -322,6 +317,11 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
322 " Invalid ipcnum %x\n", ipcnum); 317 " Invalid ipcnum %x\n", ipcnum);
323 return -EINVAL; 318 return -EINVAL;
324 } 319 }
320 PRINTK_3(TRACE_MWAVE,
321 "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
322 " ipcnum %x, usIntCount %x\n",
323 ipcnum,
324 pDrvData->IPCs[ipcnum].usIntCount);
325 325
326 lock_kernel(); 326 lock_kernel();
327 if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { 327 if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index d8a9255e1a3f..04b505e5a5e2 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1231,7 +1231,7 @@ static char sysctl_bootid[16];
1231 * as an ASCII string in the standard UUID format. If accesses via the 1231 * as an ASCII string in the standard UUID format. If accesses via the
1232 * sysctl system call, it is returned as 16 bytes of binary data. 1232 * sysctl system call, it is returned as 16 bytes of binary data.
1233 */ 1233 */
1234static int proc_do_uuid(ctl_table *table, int write, struct file *filp, 1234static int proc_do_uuid(ctl_table *table, int write,
1235 void __user *buffer, size_t *lenp, loff_t *ppos) 1235 void __user *buffer, size_t *lenp, loff_t *ppos)
1236{ 1236{
1237 ctl_table fake_table; 1237 ctl_table fake_table;
@@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
1254 fake_table.data = buf; 1254 fake_table.data = buf;
1255 fake_table.maxlen = sizeof(buf); 1255 fake_table.maxlen = sizeof(buf);
1256 1256
1257 return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos); 1257 return proc_dostring(&fake_table, write, buffer, lenp, ppos);
1258} 1258}
1259 1259
1260static int uuid_strategy(ctl_table *table, 1260static int uuid_strategy(ctl_table *table,
diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c
index eecee0f576d2..74339559f0b9 100644
--- a/drivers/char/rio/rioctrl.c
+++ b/drivers/char/rio/rioctrl.c
@@ -873,7 +873,7 @@ int riocontrol(struct rio_info *p, dev_t dev, int cmd, unsigned long arg, int su
873 /* 873 /*
874 ** It is important that the product code is an unsigned object! 874 ** It is important that the product code is an unsigned object!
875 */ 875 */
876 if (DownLoad.ProductCode > MAX_PRODUCT) { 876 if (DownLoad.ProductCode >= MAX_PRODUCT) {
877 rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode); 877 rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode);
878 p->RIOError.Error = NO_SUCH_PRODUCT; 878 p->RIOError.Error = NO_SUCH_PRODUCT;
879 return -ENXIO; 879 return -ENXIO;
diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c
new file mode 100644
index 000000000000..867b67be9f0a
--- /dev/null
+++ b/drivers/char/uv_mmtimer.c
@@ -0,0 +1,216 @@
1/*
2 * Timer device implementation for SGI UV platform.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (c) 2009 Silicon Graphics, Inc. All rights reserved.
9 *
10 */
11
12#include <linux/types.h>
13#include <linux/kernel.h>
14#include <linux/ioctl.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/fs.h>
20#include <linux/mmtimer.h>
21#include <linux/miscdevice.h>
22#include <linux/posix-timers.h>
23#include <linux/interrupt.h>
24#include <linux/time.h>
25#include <linux/math64.h>
26#include <linux/smp_lock.h>
27
28#include <asm/genapic.h>
29#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h>
31#include <asm/uv/uv.h>
32
33MODULE_AUTHOR("Dimitri Sivanich <sivanich@sgi.com>");
34MODULE_DESCRIPTION("SGI UV Memory Mapped RTC Timer");
35MODULE_LICENSE("GPL");
36
37/* name of the device, usually in /dev */
38#define UV_MMTIMER_NAME "mmtimer"
39#define UV_MMTIMER_DESC "SGI UV Memory Mapped RTC Timer"
40#define UV_MMTIMER_VERSION "1.0"
41
42static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
43 unsigned long arg);
44static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma);
45
46/*
47 * Period in femtoseconds (10^-15 s)
48 */
49static unsigned long uv_mmtimer_femtoperiod;
50
51static const struct file_operations uv_mmtimer_fops = {
52 .owner = THIS_MODULE,
53 .mmap = uv_mmtimer_mmap,
54 .unlocked_ioctl = uv_mmtimer_ioctl,
55};
56
57/**
58 * uv_mmtimer_ioctl - ioctl interface for /dev/uv_mmtimer
59 * @file: file structure for the device
60 * @cmd: command to execute
61 * @arg: optional argument to command
62 *
63 * Executes the command specified by @cmd. Returns 0 for success, < 0 for
64 * failure.
65 *
66 * Valid commands:
67 *
68 * %MMTIMER_GETOFFSET - Should return the offset (relative to the start
69 * of the page where the registers are mapped) for the counter in question.
70 *
71 * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15)
72 * seconds
73 *
74 * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address
75 * specified by @arg
76 *
77 * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter
78 *
79 * %MMTIMER_MMAPAVAIL - Returns 1 if registers can be mmap'd into userspace
80 *
81 * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it
82 * in the address specified by @arg.
83 */
84static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
85 unsigned long arg)
86{
87 int ret = 0;
88
89 switch (cmd) {
90 case MMTIMER_GETOFFSET: /* offset of the counter */
91 /*
92 * UV RTC register is on its own page
93 */
94 if (PAGE_SIZE <= (1 << 16))
95 ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1))
96 / 8;
97 else
98 ret = -ENOSYS;
99 break;
100
101 case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */
102 if (copy_to_user((unsigned long __user *)arg,
103 &uv_mmtimer_femtoperiod, sizeof(unsigned long)))
104 ret = -EFAULT;
105 break;
106
107 case MMTIMER_GETFREQ: /* frequency in Hz */
108 if (copy_to_user((unsigned long __user *)arg,
109 &sn_rtc_cycles_per_second,
110 sizeof(unsigned long)))
111 ret = -EFAULT;
112 break;
113
114 case MMTIMER_GETBITS: /* number of bits in the clock */
115 ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK);
116 break;
117
118 case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */
119 ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0;
120 break;
121
122 case MMTIMER_GETCOUNTER:
123 if (copy_to_user((unsigned long __user *)arg,
124 (unsigned long *)uv_local_mmr_address(UVH_RTC),
125 sizeof(unsigned long)))
126 ret = -EFAULT;
127 break;
128 default:
129 ret = -ENOTTY;
130 break;
131 }
132 return ret;
133}
134
135/**
136 * uv_mmtimer_mmap - maps the clock's registers into userspace
137 * @file: file structure for the device
138 * @vma: VMA to map the registers into
139 *
140 * Calls remap_pfn_range() to map the clock's registers into
141 * the calling process' address space.
142 */
143static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma)
144{
145 unsigned long uv_mmtimer_addr;
146
147 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
148 return -EINVAL;
149
150 if (vma->vm_flags & VM_WRITE)
151 return -EPERM;
152
153 if (PAGE_SIZE > (1 << 16))
154 return -ENOSYS;
155
156 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
157
158 uv_mmtimer_addr = UV_LOCAL_MMR_BASE | UVH_RTC;
159 uv_mmtimer_addr &= ~(PAGE_SIZE - 1);
160 uv_mmtimer_addr &= 0xfffffffffffffffUL;
161
162 if (remap_pfn_range(vma, vma->vm_start, uv_mmtimer_addr >> PAGE_SHIFT,
163 PAGE_SIZE, vma->vm_page_prot)) {
164 printk(KERN_ERR "remap_pfn_range failed in uv_mmtimer_mmap\n");
165 return -EAGAIN;
166 }
167
168 return 0;
169}
170
171static struct miscdevice uv_mmtimer_miscdev = {
172 MISC_DYNAMIC_MINOR,
173 UV_MMTIMER_NAME,
174 &uv_mmtimer_fops
175};
176
177
178/**
179 * uv_mmtimer_init - device initialization routine
180 *
181 * Does initial setup for the uv_mmtimer device.
182 */
183static int __init uv_mmtimer_init(void)
184{
185 if (!is_uv_system()) {
186 printk(KERN_ERR "%s: Hardware unsupported\n", UV_MMTIMER_NAME);
187 return -1;
188 }
189
190 /*
191 * Sanity check the cycles/sec variable
192 */
193 if (sn_rtc_cycles_per_second < 100000) {
194 printk(KERN_ERR "%s: unable to determine clock frequency\n",
195 UV_MMTIMER_NAME);
196 return -1;
197 }
198
199 uv_mmtimer_femtoperiod = ((unsigned long)1E15 +
200 sn_rtc_cycles_per_second / 2) /
201 sn_rtc_cycles_per_second;
202
203 if (misc_register(&uv_mmtimer_miscdev)) {
204 printk(KERN_ERR "%s: failed to register device\n",
205 UV_MMTIMER_NAME);
206 return -1;
207 }
208
209 printk(KERN_INFO "%s: v%s, %ld MHz\n", UV_MMTIMER_DESC,
210 UV_MMTIMER_VERSION,
211 sn_rtc_cycles_per_second/(unsigned long)1E6);
212
213 return 0;
214}
215
216module_init(uv_mmtimer_init);
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 25b743abfb59..52e6bb70a490 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -28,7 +28,7 @@
28#include <linux/device.h> 28#include <linux/device.h>
29#include <linux/dca.h> 29#include <linux/dca.h>
30 30
31#define DCA_VERSION "1.8" 31#define DCA_VERSION "1.12.1"
32 32
33MODULE_VERSION(DCA_VERSION); 33MODULE_VERSION(DCA_VERSION);
34MODULE_LICENSE("GPL"); 34MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
36 36
37static DEFINE_SPINLOCK(dca_lock); 37static DEFINE_SPINLOCK(dca_lock);
38 38
39static LIST_HEAD(dca_providers); 39static LIST_HEAD(dca_domains);
40 40
41static struct dca_provider *dca_find_provider_by_dev(struct device *dev) 41static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
42{ 42{
43 struct dca_provider *dca, *ret = NULL; 43 struct pci_dev *pdev = to_pci_dev(dev);
44 struct pci_bus *bus = pdev->bus;
44 45
45 list_for_each_entry(dca, &dca_providers, node) { 46 while (bus->parent)
46 if ((!dev) || (dca->ops->dev_managed(dca, dev))) { 47 bus = bus->parent;
47 ret = dca; 48
48 break; 49 return bus;
49 } 50}
51
52static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
53{
54 struct dca_domain *domain;
55
56 domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
57 if (!domain)
58 return NULL;
59
60 INIT_LIST_HEAD(&domain->dca_providers);
61 domain->pci_rc = rc;
62
63 return domain;
64}
65
66static void dca_free_domain(struct dca_domain *domain)
67{
68 list_del(&domain->node);
69 kfree(domain);
70}
71
72static struct dca_domain *dca_find_domain(struct pci_bus *rc)
73{
74 struct dca_domain *domain;
75
76 list_for_each_entry(domain, &dca_domains, node)
77 if (domain->pci_rc == rc)
78 return domain;
79
80 return NULL;
81}
82
83static struct dca_domain *dca_get_domain(struct device *dev)
84{
85 struct pci_bus *rc;
86 struct dca_domain *domain;
87
88 rc = dca_pci_rc_from_dev(dev);
89 domain = dca_find_domain(rc);
90
91 if (!domain) {
92 domain = dca_allocate_domain(rc);
93 if (domain)
94 list_add(&domain->node, &dca_domains);
95 }
96
97 return domain;
98}
99
100static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
101{
102 struct dca_provider *dca;
103 struct pci_bus *rc;
104 struct dca_domain *domain;
105
106 if (dev) {
107 rc = dca_pci_rc_from_dev(dev);
108 domain = dca_find_domain(rc);
109 if (!domain)
110 return NULL;
111 } else {
112 if (!list_empty(&dca_domains))
113 domain = list_first_entry(&dca_domains,
114 struct dca_domain,
115 node);
116 else
117 return NULL;
50 } 118 }
51 119
52 return ret; 120 list_for_each_entry(dca, &domain->dca_providers, node)
121 if ((!dev) || (dca->ops->dev_managed(dca, dev)))
122 return dca;
123
124 return NULL;
53} 125}
54 126
55/** 127/**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
61 struct dca_provider *dca; 133 struct dca_provider *dca;
62 int err, slot = -ENODEV; 134 int err, slot = -ENODEV;
63 unsigned long flags; 135 unsigned long flags;
136 struct pci_bus *pci_rc;
137 struct dca_domain *domain;
64 138
65 if (!dev) 139 if (!dev)
66 return -EFAULT; 140 return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
74 return -EEXIST; 148 return -EEXIST;
75 } 149 }
76 150
77 list_for_each_entry(dca, &dca_providers, node) { 151 pci_rc = dca_pci_rc_from_dev(dev);
152 domain = dca_find_domain(pci_rc);
153 if (!domain) {
154 spin_unlock_irqrestore(&dca_lock, flags);
155 return -ENODEV;
156 }
157
158 list_for_each_entry(dca, &domain->dca_providers, node) {
78 slot = dca->ops->add_requester(dca, dev); 159 slot = dca->ops->add_requester(dca, dev);
79 if (slot >= 0) 160 if (slot >= 0)
80 break; 161 break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
222{ 303{
223 int err; 304 int err;
224 unsigned long flags; 305 unsigned long flags;
306 struct dca_domain *domain;
225 307
226 err = dca_sysfs_add_provider(dca, dev); 308 err = dca_sysfs_add_provider(dca, dev);
227 if (err) 309 if (err)
228 return err; 310 return err;
229 311
230 spin_lock_irqsave(&dca_lock, flags); 312 spin_lock_irqsave(&dca_lock, flags);
231 list_add(&dca->node, &dca_providers); 313 domain = dca_get_domain(dev);
314 if (!domain) {
315 spin_unlock_irqrestore(&dca_lock, flags);
316 return -ENODEV;
317 }
318 list_add(&dca->node, &domain->dca_providers);
232 spin_unlock_irqrestore(&dca_lock, flags); 319 spin_unlock_irqrestore(&dca_lock, flags);
233 320
234 blocking_notifier_call_chain(&dca_provider_chain, 321 blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
241 * unregister_dca_provider - remove a dca provider 328 * unregister_dca_provider - remove a dca provider
242 * @dca - struct created by alloc_dca_provider() 329 * @dca - struct created by alloc_dca_provider()
243 */ 330 */
244void unregister_dca_provider(struct dca_provider *dca) 331void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
245{ 332{
246 unsigned long flags; 333 unsigned long flags;
334 struct pci_bus *pci_rc;
335 struct dca_domain *domain;
247 336
248 blocking_notifier_call_chain(&dca_provider_chain, 337 blocking_notifier_call_chain(&dca_provider_chain,
249 DCA_PROVIDER_REMOVE, NULL); 338 DCA_PROVIDER_REMOVE, NULL);
250 339
251 spin_lock_irqsave(&dca_lock, flags); 340 spin_lock_irqsave(&dca_lock, flags);
341
252 list_del(&dca->node); 342 list_del(&dca->node);
343
344 pci_rc = dca_pci_rc_from_dev(dev);
345 domain = dca_find_domain(pci_rc);
346 if (list_empty(&domain->dca_providers))
347 dca_free_domain(domain);
348
253 spin_unlock_irqrestore(&dca_lock, flags); 349 spin_unlock_irqrestore(&dca_lock, flags);
254 350
255 dca_sysfs_remove_provider(dca); 351 dca_sysfs_remove_provider(dca);
@@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify);
276 372
277static int __init dca_init(void) 373static int __init dca_init(void)
278{ 374{
279 printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION); 375 pr_info("dca service started, version %s\n", DCA_VERSION);
280 return dca_sysfs_init(); 376 return dca_sysfs_init();
281} 377}
282 378
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 81e1020fb514..5903a88351bf 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -17,11 +17,15 @@ if DMADEVICES
17 17
18comment "DMA Devices" 18comment "DMA Devices"
19 19
20config ASYNC_TX_DISABLE_CHANNEL_SWITCH
21 bool
22
20config INTEL_IOATDMA 23config INTEL_IOATDMA
21 tristate "Intel I/OAT DMA support" 24 tristate "Intel I/OAT DMA support"
22 depends on PCI && X86 25 depends on PCI && X86
23 select DMA_ENGINE 26 select DMA_ENGINE
24 select DCA 27 select DCA
28 select ASYNC_TX_DISABLE_CHANNEL_SWITCH
25 help 29 help
26 Enable support for the Intel(R) I/OAT DMA engine present 30 Enable support for the Intel(R) I/OAT DMA engine present
27 in recent Intel Xeon chipsets. 31 in recent Intel Xeon chipsets.
@@ -97,6 +101,14 @@ config TXX9_DMAC
97 Support the TXx9 SoC internal DMA controller. This can be 101 Support the TXx9 SoC internal DMA controller. This can be
98 integrated in chips such as the Toshiba TX4927/38/39. 102 integrated in chips such as the Toshiba TX4927/38/39.
99 103
104config SH_DMAE
105 tristate "Renesas SuperH DMAC support"
106 depends on SUPERH && SH_DMA
107 depends on !SH_DMA_API
108 select DMA_ENGINE
109 help
110 Enable support for the Renesas SuperH DMA controllers.
111
100config DMA_ENGINE 112config DMA_ENGINE
101 bool 113 bool
102 114
@@ -116,7 +128,7 @@ config NET_DMA
116 128
117config ASYNC_TX_DMA 129config ASYNC_TX_DMA
118 bool "Async_tx: Offload support for the async_tx api" 130 bool "Async_tx: Offload support for the async_tx api"
119 depends on DMA_ENGINE && !HIGHMEM64G 131 depends on DMA_ENGINE
120 help 132 help
121 This allows the async_tx api to take advantage of offload engines for 133 This allows the async_tx api to take advantage of offload engines for
122 memcpy, memset, xor, and raid6 p+q operations. If your platform has 134 memcpy, memset, xor, and raid6 p+q operations. If your platform has
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 40e1e0083571..eca71ba78ae9 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,8 +1,7 @@
1obj-$(CONFIG_DMA_ENGINE) += dmaengine.o 1obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
2obj-$(CONFIG_NET_DMA) += iovlock.o 2obj-$(CONFIG_NET_DMA) += iovlock.o
3obj-$(CONFIG_DMATEST) += dmatest.o 3obj-$(CONFIG_DMATEST) += dmatest.o
4obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o 4obj-$(CONFIG_INTEL_IOATDMA) += ioat/
5ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
6obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o 5obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
7obj-$(CONFIG_FSL_DMA) += fsldma.o 6obj-$(CONFIG_FSL_DMA) += fsldma.o
8obj-$(CONFIG_MV_XOR) += mv_xor.o 7obj-$(CONFIG_MV_XOR) += mv_xor.o
@@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o
10obj-$(CONFIG_AT_HDMAC) += at_hdmac.o 9obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
11obj-$(CONFIG_MX3_IPU) += ipu/ 10obj-$(CONFIG_MX3_IPU) += ipu/
12obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o 11obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
12obj-$(CONFIG_SH_DMAE) += shdma.o
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c8522e6f1ad2..7585c4164bd5 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
87 desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); 87 desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
88 if (desc) { 88 if (desc) {
89 memset(desc, 0, sizeof(struct at_desc)); 89 memset(desc, 0, sizeof(struct at_desc));
90 INIT_LIST_HEAD(&desc->tx_list);
90 dma_async_tx_descriptor_init(&desc->txd, chan); 91 dma_async_tx_descriptor_init(&desc->txd, chan);
91 /* txd.flags will be overwritten in prep functions */ 92 /* txd.flags will be overwritten in prep functions */
92 desc->txd.flags = DMA_CTRL_ACK; 93 desc->txd.flags = DMA_CTRL_ACK;
@@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
150 struct at_desc *child; 151 struct at_desc *child;
151 152
152 spin_lock_bh(&atchan->lock); 153 spin_lock_bh(&atchan->lock);
153 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 154 list_for_each_entry(child, &desc->tx_list, desc_node)
154 dev_vdbg(chan2dev(&atchan->chan_common), 155 dev_vdbg(chan2dev(&atchan->chan_common),
155 "moving child desc %p to freelist\n", 156 "moving child desc %p to freelist\n",
156 child); 157 child);
157 list_splice_init(&desc->txd.tx_list, &atchan->free_list); 158 list_splice_init(&desc->tx_list, &atchan->free_list);
158 dev_vdbg(chan2dev(&atchan->chan_common), 159 dev_vdbg(chan2dev(&atchan->chan_common),
159 "moving desc %p to freelist\n", desc); 160 "moving desc %p to freelist\n", desc);
160 list_add(&desc->desc_node, &atchan->free_list); 161 list_add(&desc->desc_node, &atchan->free_list);
@@ -247,30 +248,33 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
247 param = txd->callback_param; 248 param = txd->callback_param;
248 249
249 /* move children to free_list */ 250 /* move children to free_list */
250 list_splice_init(&txd->tx_list, &atchan->free_list); 251 list_splice_init(&desc->tx_list, &atchan->free_list);
251 /* move myself to free_list */ 252 /* move myself to free_list */
252 list_move(&desc->desc_node, &atchan->free_list); 253 list_move(&desc->desc_node, &atchan->free_list);
253 254
254 /* unmap dma addresses */ 255 /* unmap dma addresses */
255 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { 256 if (!atchan->chan_common.private) {
256 if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) 257 struct device *parent = chan2parent(&atchan->chan_common);
257 dma_unmap_single(chan2parent(&atchan->chan_common), 258 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
258 desc->lli.daddr, 259 if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
259 desc->len, DMA_FROM_DEVICE); 260 dma_unmap_single(parent,
260 else 261 desc->lli.daddr,
261 dma_unmap_page(chan2parent(&atchan->chan_common), 262 desc->len, DMA_FROM_DEVICE);
262 desc->lli.daddr, 263 else
263 desc->len, DMA_FROM_DEVICE); 264 dma_unmap_page(parent,
264 } 265 desc->lli.daddr,
265 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { 266 desc->len, DMA_FROM_DEVICE);
266 if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) 267 }
267 dma_unmap_single(chan2parent(&atchan->chan_common), 268 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
268 desc->lli.saddr, 269 if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
269 desc->len, DMA_TO_DEVICE); 270 dma_unmap_single(parent,
270 else 271 desc->lli.saddr,
271 dma_unmap_page(chan2parent(&atchan->chan_common), 272 desc->len, DMA_TO_DEVICE);
272 desc->lli.saddr, 273 else
273 desc->len, DMA_TO_DEVICE); 274 dma_unmap_page(parent,
275 desc->lli.saddr,
276 desc->len, DMA_TO_DEVICE);
277 }
274 } 278 }
275 279
276 /* 280 /*
@@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan)
334 /* This one is currently in progress */ 338 /* This one is currently in progress */
335 return; 339 return;
336 340
337 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 341 list_for_each_entry(child, &desc->tx_list, desc_node)
338 if (!(child->lli.ctrla & ATC_DONE)) 342 if (!(child->lli.ctrla & ATC_DONE))
339 /* Currently in progress */ 343 /* Currently in progress */
340 return; 344 return;
@@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan)
407 dev_crit(chan2dev(&atchan->chan_common), 411 dev_crit(chan2dev(&atchan->chan_common),
408 " cookie: %d\n", bad_desc->txd.cookie); 412 " cookie: %d\n", bad_desc->txd.cookie);
409 atc_dump_lli(atchan, &bad_desc->lli); 413 atc_dump_lli(atchan, &bad_desc->lli);
410 list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) 414 list_for_each_entry(child, &bad_desc->tx_list, desc_node)
411 atc_dump_lli(atchan, &child->lli); 415 atc_dump_lli(atchan, &child->lli);
412 416
413 /* Pretend the descriptor completed successfully */ 417 /* Pretend the descriptor completed successfully */
@@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
587 prev->lli.dscr = desc->txd.phys; 591 prev->lli.dscr = desc->txd.phys;
588 /* insert the link descriptor to the LD ring */ 592 /* insert the link descriptor to the LD ring */
589 list_add_tail(&desc->desc_node, 593 list_add_tail(&desc->desc_node,
590 &first->txd.tx_list); 594 &first->tx_list);
591 } 595 }
592 prev = desc; 596 prev = desc;
593 } 597 }
@@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
646 650
647 reg_width = atslave->reg_width; 651 reg_width = atslave->reg_width;
648 652
649 sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
650
651 ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; 653 ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
652 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; 654 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
653 655
@@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
687 prev->lli.dscr = desc->txd.phys; 689 prev->lli.dscr = desc->txd.phys;
688 /* insert the link descriptor to the LD ring */ 690 /* insert the link descriptor to the LD ring */
689 list_add_tail(&desc->desc_node, 691 list_add_tail(&desc->desc_node,
690 &first->txd.tx_list); 692 &first->tx_list);
691 } 693 }
692 prev = desc; 694 prev = desc;
693 total_len += len; 695 total_len += len;
@@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
729 prev->lli.dscr = desc->txd.phys; 731 prev->lli.dscr = desc->txd.phys;
730 /* insert the link descriptor to the LD ring */ 732 /* insert the link descriptor to the LD ring */
731 list_add_tail(&desc->desc_node, 733 list_add_tail(&desc->desc_node,
732 &first->txd.tx_list); 734 &first->tx_list);
733 } 735 }
734 prev = desc; 736 prev = desc;
735 total_len += len; 737 total_len += len;
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index 4c972afc49ec..495457e3dc4b 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -165,6 +165,7 @@ struct at_desc {
165 struct at_lli lli; 165 struct at_lli lli;
166 166
167 /* THEN values for driver housekeeping */ 167 /* THEN values for driver housekeeping */
168 struct list_head tx_list;
168 struct dma_async_tx_descriptor txd; 169 struct dma_async_tx_descriptor txd;
169 struct list_head desc_node; 170 struct list_head desc_node;
170 size_t len; 171 size_t len;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5a87384ea4ff..bd0b248de2cf 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -608,6 +608,40 @@ void dmaengine_put(void)
608} 608}
609EXPORT_SYMBOL(dmaengine_put); 609EXPORT_SYMBOL(dmaengine_put);
610 610
611static bool device_has_all_tx_types(struct dma_device *device)
612{
613 /* A device that satisfies this test has channels that will never cause
614 * an async_tx channel switch event as all possible operation types can
615 * be handled.
616 */
617 #ifdef CONFIG_ASYNC_TX_DMA
618 if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
619 return false;
620 #endif
621
622 #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
623 if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
624 return false;
625 #endif
626
627 #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
628 if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
629 return false;
630 #endif
631
632 #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
633 if (!dma_has_cap(DMA_XOR, device->cap_mask))
634 return false;
635 #endif
636
637 #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
638 if (!dma_has_cap(DMA_PQ, device->cap_mask))
639 return false;
640 #endif
641
642 return true;
643}
644
611static int get_dma_id(struct dma_device *device) 645static int get_dma_id(struct dma_device *device)
612{ 646{
613 int rc; 647 int rc;
@@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device)
644 !device->device_prep_dma_memcpy); 678 !device->device_prep_dma_memcpy);
645 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && 679 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
646 !device->device_prep_dma_xor); 680 !device->device_prep_dma_xor);
647 BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && 681 BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
648 !device->device_prep_dma_zero_sum); 682 !device->device_prep_dma_xor_val);
683 BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
684 !device->device_prep_dma_pq);
685 BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
686 !device->device_prep_dma_pq_val);
649 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && 687 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
650 !device->device_prep_dma_memset); 688 !device->device_prep_dma_memset);
651 BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && 689 BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
661 BUG_ON(!device->device_issue_pending); 699 BUG_ON(!device->device_issue_pending);
662 BUG_ON(!device->dev); 700 BUG_ON(!device->dev);
663 701
702 /* note: this only matters in the
703 * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
704 */
705 if (device_has_all_tx_types(device))
706 dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
707
664 idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); 708 idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
665 if (!idr_ref) 709 if (!idr_ref)
666 return -ENOMEM; 710 return -ENOMEM;
@@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
933{ 977{
934 tx->chan = chan; 978 tx->chan = chan;
935 spin_lock_init(&tx->lock); 979 spin_lock_init(&tx->lock);
936 INIT_LIST_HEAD(&tx->tx_list);
937} 980}
938EXPORT_SYMBOL(dma_async_tx_descriptor_init); 981EXPORT_SYMBOL(dma_async_tx_descriptor_init);
939 982
940/* dma_wait_for_async_tx - spin wait for a transaction to complete 983/* dma_wait_for_async_tx - spin wait for a transaction to complete
941 * @tx: in-flight transaction to wait on 984 * @tx: in-flight transaction to wait on
942 *
943 * This routine assumes that tx was obtained from a call to async_memcpy,
944 * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
945 * and submitted). Walking the parent chain is only meant to cover for DMA
946 * drivers that do not implement the DMA_INTERRUPT capability and may race with
947 * the driver's descriptor cleanup routine.
948 */ 985 */
949enum dma_status 986enum dma_status
950dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 987dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
951{ 988{
952 enum dma_status status; 989 unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
953 struct dma_async_tx_descriptor *iter;
954 struct dma_async_tx_descriptor *parent;
955 990
956 if (!tx) 991 if (!tx)
957 return DMA_SUCCESS; 992 return DMA_SUCCESS;
958 993
959 WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" 994 while (tx->cookie == -EBUSY) {
960 " %s\n", __func__, dma_chan_name(tx->chan)); 995 if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
961 996 pr_err("%s timeout waiting for descriptor submission\n",
962 /* poll through the dependency chain, return when tx is complete */ 997 __func__);
963 do { 998 return DMA_ERROR;
964 iter = tx; 999 }
965 1000 cpu_relax();
966 /* find the root of the unsubmitted dependency chain */ 1001 }
967 do { 1002 return dma_sync_wait(tx->chan, tx->cookie);
968 parent = iter->parent;
969 if (!parent)
970 break;
971 else
972 iter = parent;
973 } while (parent);
974
975 /* there is a small window for ->parent == NULL and
976 * ->cookie == -EBUSY
977 */
978 while (iter->cookie == -EBUSY)
979 cpu_relax();
980
981 status = dma_sync_wait(iter->chan, iter->cookie);
982 } while (status == DMA_IN_PROGRESS || (iter != tx));
983
984 return status;
985} 1003}
986EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); 1004EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
987 1005
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index d93017fc7872..a32a4cf7b1e0 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO);
48MODULE_PARM_DESC(xor_sources, 48MODULE_PARM_DESC(xor_sources,
49 "Number of xor source buffers (default: 3)"); 49 "Number of xor source buffers (default: 3)");
50 50
51static unsigned int pq_sources = 3;
52module_param(pq_sources, uint, S_IRUGO);
53MODULE_PARM_DESC(pq_sources,
54 "Number of p+q source buffers (default: 3)");
55
51/* 56/*
52 * Initialization patterns. All bytes in the source buffer has bit 7 57 * Initialization patterns. All bytes in the source buffer has bit 7
53 * set, all bytes in the destination buffer has bit 7 cleared. 58 * set, all bytes in the destination buffer has bit 7 cleared.
@@ -232,6 +237,7 @@ static int dmatest_func(void *data)
232 dma_cookie_t cookie; 237 dma_cookie_t cookie;
233 enum dma_status status; 238 enum dma_status status;
234 enum dma_ctrl_flags flags; 239 enum dma_ctrl_flags flags;
240 u8 pq_coefs[pq_sources];
235 int ret; 241 int ret;
236 int src_cnt; 242 int src_cnt;
237 int dst_cnt; 243 int dst_cnt;
@@ -248,6 +254,11 @@ static int dmatest_func(void *data)
248 else if (thread->type == DMA_XOR) { 254 else if (thread->type == DMA_XOR) {
249 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ 255 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
250 dst_cnt = 1; 256 dst_cnt = 1;
257 } else if (thread->type == DMA_PQ) {
258 src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
259 dst_cnt = 2;
260 for (i = 0; i < pq_sources; i++)
261 pq_coefs[i] = 1;
251 } else 262 } else
252 goto err_srcs; 263 goto err_srcs;
253 264
@@ -283,6 +294,7 @@ static int dmatest_func(void *data)
283 dma_addr_t dma_dsts[dst_cnt]; 294 dma_addr_t dma_dsts[dst_cnt];
284 struct completion cmp; 295 struct completion cmp;
285 unsigned long tmo = msecs_to_jiffies(3000); 296 unsigned long tmo = msecs_to_jiffies(3000);
297 u8 align = 0;
286 298
287 total_tests++; 299 total_tests++;
288 300
@@ -290,6 +302,18 @@ static int dmatest_func(void *data)
290 src_off = dmatest_random() % (test_buf_size - len + 1); 302 src_off = dmatest_random() % (test_buf_size - len + 1);
291 dst_off = dmatest_random() % (test_buf_size - len + 1); 303 dst_off = dmatest_random() % (test_buf_size - len + 1);
292 304
305 /* honor alignment restrictions */
306 if (thread->type == DMA_MEMCPY)
307 align = dev->copy_align;
308 else if (thread->type == DMA_XOR)
309 align = dev->xor_align;
310 else if (thread->type == DMA_PQ)
311 align = dev->pq_align;
312
313 len = (len >> align) << align;
314 src_off = (src_off >> align) << align;
315 dst_off = (dst_off >> align) << align;
316
293 dmatest_init_srcs(thread->srcs, src_off, len); 317 dmatest_init_srcs(thread->srcs, src_off, len);
294 dmatest_init_dsts(thread->dsts, dst_off, len); 318 dmatest_init_dsts(thread->dsts, dst_off, len);
295 319
@@ -306,6 +330,7 @@ static int dmatest_func(void *data)
306 DMA_BIDIRECTIONAL); 330 DMA_BIDIRECTIONAL);
307 } 331 }
308 332
333
309 if (thread->type == DMA_MEMCPY) 334 if (thread->type == DMA_MEMCPY)
310 tx = dev->device_prep_dma_memcpy(chan, 335 tx = dev->device_prep_dma_memcpy(chan,
311 dma_dsts[0] + dst_off, 336 dma_dsts[0] + dst_off,
@@ -316,6 +341,15 @@ static int dmatest_func(void *data)
316 dma_dsts[0] + dst_off, 341 dma_dsts[0] + dst_off,
317 dma_srcs, xor_sources, 342 dma_srcs, xor_sources,
318 len, flags); 343 len, flags);
344 else if (thread->type == DMA_PQ) {
345 dma_addr_t dma_pq[dst_cnt];
346
347 for (i = 0; i < dst_cnt; i++)
348 dma_pq[i] = dma_dsts[i] + dst_off;
349 tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
350 pq_sources, pq_coefs,
351 len, flags);
352 }
319 353
320 if (!tx) { 354 if (!tx) {
321 for (i = 0; i < src_cnt; i++) 355 for (i = 0; i < src_cnt; i++)
@@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
459 op = "copy"; 493 op = "copy";
460 else if (type == DMA_XOR) 494 else if (type == DMA_XOR)
461 op = "xor"; 495 op = "xor";
496 else if (type == DMA_PQ)
497 op = "pq";
462 else 498 else
463 return -EINVAL; 499 return -EINVAL;
464 500
@@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
514 cnt = dmatest_add_threads(dtc, DMA_XOR); 550 cnt = dmatest_add_threads(dtc, DMA_XOR);
515 thread_count += cnt > 0 ? cnt : 0; 551 thread_count += cnt > 0 ? cnt : 0;
516 } 552 }
553 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
554 cnt = dmatest_add_threads(dtc, DMA_PQ);
555 thread_count += cnt > 0 ?: 0;
556 }
517 557
518 pr_info("dmatest: Started %u threads using %s\n", 558 pr_info("dmatest: Started %u threads using %s\n",
519 thread_count, dma_chan_name(chan)); 559 thread_count, dma_chan_name(chan));
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 933c143b6a74..2eea823516a7 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
116{ 116{
117 struct dw_desc *child; 117 struct dw_desc *child;
118 118
119 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 119 list_for_each_entry(child, &desc->tx_list, desc_node)
120 dma_sync_single_for_cpu(chan2parent(&dwc->chan), 120 dma_sync_single_for_cpu(chan2parent(&dwc->chan),
121 child->txd.phys, sizeof(child->lli), 121 child->txd.phys, sizeof(child->lli),
122 DMA_TO_DEVICE); 122 DMA_TO_DEVICE);
@@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
137 dwc_sync_desc_for_cpu(dwc, desc); 137 dwc_sync_desc_for_cpu(dwc, desc);
138 138
139 spin_lock_bh(&dwc->lock); 139 spin_lock_bh(&dwc->lock);
140 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 140 list_for_each_entry(child, &desc->tx_list, desc_node)
141 dev_vdbg(chan2dev(&dwc->chan), 141 dev_vdbg(chan2dev(&dwc->chan),
142 "moving child desc %p to freelist\n", 142 "moving child desc %p to freelist\n",
143 child); 143 child);
144 list_splice_init(&desc->txd.tx_list, &dwc->free_list); 144 list_splice_init(&desc->tx_list, &dwc->free_list);
145 dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); 145 dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
146 list_add(&desc->desc_node, &dwc->free_list); 146 list_add(&desc->desc_node, &dwc->free_list);
147 spin_unlock_bh(&dwc->lock); 147 spin_unlock_bh(&dwc->lock);
@@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
209 param = txd->callback_param; 209 param = txd->callback_param;
210 210
211 dwc_sync_desc_for_cpu(dwc, desc); 211 dwc_sync_desc_for_cpu(dwc, desc);
212 list_splice_init(&txd->tx_list, &dwc->free_list); 212 list_splice_init(&desc->tx_list, &dwc->free_list);
213 list_move(&desc->desc_node, &dwc->free_list); 213 list_move(&desc->desc_node, &dwc->free_list);
214 214
215 /* 215 if (!dwc->chan.private) {
216 * We use dma_unmap_page() regardless of how the buffers were 216 struct device *parent = chan2parent(&dwc->chan);
217 * mapped before they were submitted... 217 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
218 */ 218 if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
219 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) 219 dma_unmap_single(parent, desc->lli.dar,
220 dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar, 220 desc->len, DMA_FROM_DEVICE);
221 desc->len, DMA_FROM_DEVICE); 221 else
222 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) 222 dma_unmap_page(parent, desc->lli.dar,
223 dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar, 223 desc->len, DMA_FROM_DEVICE);
224 desc->len, DMA_TO_DEVICE); 224 }
225 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
226 if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
227 dma_unmap_single(parent, desc->lli.sar,
228 desc->len, DMA_TO_DEVICE);
229 else
230 dma_unmap_page(parent, desc->lli.sar,
231 desc->len, DMA_TO_DEVICE);
232 }
233 }
225 234
226 /* 235 /*
227 * The API requires that no submissions are done from a 236 * The API requires that no submissions are done from a
@@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
289 /* This one is currently in progress */ 298 /* This one is currently in progress */
290 return; 299 return;
291 300
292 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 301 list_for_each_entry(child, &desc->tx_list, desc_node)
293 if (child->lli.llp == llp) 302 if (child->lli.llp == llp)
294 /* Currently in progress */ 303 /* Currently in progress */
295 return; 304 return;
@@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
356 dev_printk(KERN_CRIT, chan2dev(&dwc->chan), 365 dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
357 " cookie: %d\n", bad_desc->txd.cookie); 366 " cookie: %d\n", bad_desc->txd.cookie);
358 dwc_dump_lli(dwc, &bad_desc->lli); 367 dwc_dump_lli(dwc, &bad_desc->lli);
359 list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) 368 list_for_each_entry(child, &bad_desc->tx_list, desc_node)
360 dwc_dump_lli(dwc, &child->lli); 369 dwc_dump_lli(dwc, &child->lli);
361 370
362 /* Pretend the descriptor completed successfully */ 371 /* Pretend the descriptor completed successfully */
@@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
608 prev->txd.phys, sizeof(prev->lli), 617 prev->txd.phys, sizeof(prev->lli),
609 DMA_TO_DEVICE); 618 DMA_TO_DEVICE);
610 list_add_tail(&desc->desc_node, 619 list_add_tail(&desc->desc_node,
611 &first->txd.tx_list); 620 &first->tx_list);
612 } 621 }
613 prev = desc; 622 prev = desc;
614 } 623 }
@@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
658 reg_width = dws->reg_width; 667 reg_width = dws->reg_width;
659 prev = first = NULL; 668 prev = first = NULL;
660 669
661 sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
662
663 switch (direction) { 670 switch (direction) {
664 case DMA_TO_DEVICE: 671 case DMA_TO_DEVICE:
665 ctllo = (DWC_DEFAULT_CTLLO 672 ctllo = (DWC_DEFAULT_CTLLO
@@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
700 sizeof(prev->lli), 707 sizeof(prev->lli),
701 DMA_TO_DEVICE); 708 DMA_TO_DEVICE);
702 list_add_tail(&desc->desc_node, 709 list_add_tail(&desc->desc_node,
703 &first->txd.tx_list); 710 &first->tx_list);
704 } 711 }
705 prev = desc; 712 prev = desc;
706 total_len += len; 713 total_len += len;
@@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
746 sizeof(prev->lli), 753 sizeof(prev->lli),
747 DMA_TO_DEVICE); 754 DMA_TO_DEVICE);
748 list_add_tail(&desc->desc_node, 755 list_add_tail(&desc->desc_node,
749 &first->txd.tx_list); 756 &first->tx_list);
750 } 757 }
751 prev = desc; 758 prev = desc;
752 total_len += len; 759 total_len += len;
@@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
902 break; 909 break;
903 } 910 }
904 911
912 INIT_LIST_HEAD(&desc->tx_list);
905 dma_async_tx_descriptor_init(&desc->txd, chan); 913 dma_async_tx_descriptor_init(&desc->txd, chan);
906 desc->txd.tx_submit = dwc_tx_submit; 914 desc->txd.tx_submit = dwc_tx_submit;
907 desc->txd.flags = DMA_CTRL_ACK; 915 desc->txd.flags = DMA_CTRL_ACK;
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 13a580767031..d9a939f67f46 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -217,6 +217,7 @@ struct dw_desc {
217 217
218 /* THEN values for driver housekeeping */ 218 /* THEN values for driver housekeeping */
219 struct list_head desc_node; 219 struct list_head desc_node;
220 struct list_head tx_list;
220 struct dma_async_tx_descriptor txd; 221 struct dma_async_tx_descriptor txd;
221 size_t len; 222 size_t len;
222}; 223};
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index ef87a8984145..296f9e747fac 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -34,6 +34,7 @@
34#include <linux/dmapool.h> 34#include <linux/dmapool.h>
35#include <linux/of_platform.h> 35#include <linux/of_platform.h>
36 36
37#include <asm/fsldma.h>
37#include "fsldma.h" 38#include "fsldma.h"
38 39
39static void dma_init(struct fsl_dma_chan *fsl_chan) 40static void dma_init(struct fsl_dma_chan *fsl_chan)
@@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
280} 281}
281 282
282/** 283/**
283 * fsl_chan_toggle_ext_pause - Toggle channel external pause status 284 * fsl_chan_set_request_count - Set DMA Request Count for external control
284 * @fsl_chan : Freescale DMA channel 285 * @fsl_chan : Freescale DMA channel
285 * @size : Pause control size, 0 for disable external pause control. 286 * @size : Number of bytes to transfer in a single request
286 * The maximum is 1024. 287 *
288 * The Freescale DMA channel can be controlled by the external signal DREQ#.
289 * The DMA request count is how many bytes are allowed to transfer before
290 * pausing the channel, after which a new assertion of DREQ# resumes channel
291 * operation.
287 * 292 *
288 * The Freescale DMA channel can be controlled by the external 293 * A size of 0 disables external pause control. The maximum size is 1024.
289 * signal DREQ#. The pause control size is how many bytes are allowed
290 * to transfer before pausing the channel, after which a new assertion
291 * of DREQ# resumes channel operation.
292 */ 294 */
293static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size) 295static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
294{ 296{
295 if (size > 1024) 297 BUG_ON(size > 1024);
296 return; 298 DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
299 DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
300 | ((__ilog2(size) << 24) & 0x0f000000),
301 32);
302}
297 303
298 if (size) { 304/**
299 DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, 305 * fsl_chan_toggle_ext_pause - Toggle channel external pause status
300 DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) 306 * @fsl_chan : Freescale DMA channel
301 | ((__ilog2(size) << 24) & 0x0f000000), 307 * @enable : 0 is disabled, 1 is enabled.
302 32); 308 *
309 * The Freescale DMA channel can be controlled by the external signal DREQ#.
310 * The DMA Request Count feature should be used in addition to this feature
311 * to set the number of bytes to transfer before pausing the channel.
312 */
313static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
314{
315 if (enable)
303 fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; 316 fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
304 } else 317 else
305 fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; 318 fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
306} 319}
307 320
@@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
326static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) 339static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
327{ 340{
328 struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); 341 struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
329 struct fsl_desc_sw *desc; 342 struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
343 struct fsl_desc_sw *child;
330 unsigned long flags; 344 unsigned long flags;
331 dma_cookie_t cookie; 345 dma_cookie_t cookie;
332 346
@@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
334 spin_lock_irqsave(&fsl_chan->desc_lock, flags); 348 spin_lock_irqsave(&fsl_chan->desc_lock, flags);
335 349
336 cookie = fsl_chan->common.cookie; 350 cookie = fsl_chan->common.cookie;
337 list_for_each_entry(desc, &tx->tx_list, node) { 351 list_for_each_entry(child, &desc->tx_list, node) {
338 cookie++; 352 cookie++;
339 if (cookie < 0) 353 if (cookie < 0)
340 cookie = 1; 354 cookie = 1;
@@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
343 } 357 }
344 358
345 fsl_chan->common.cookie = cookie; 359 fsl_chan->common.cookie = cookie;
346 append_ld_queue(fsl_chan, tx_to_fsl_desc(tx)); 360 append_ld_queue(fsl_chan, desc);
347 list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev); 361 list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
348 362
349 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); 363 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
350 364
@@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
366 desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc); 380 desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
367 if (desc_sw) { 381 if (desc_sw) {
368 memset(desc_sw, 0, sizeof(struct fsl_desc_sw)); 382 memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
383 INIT_LIST_HEAD(&desc_sw->tx_list);
369 dma_async_tx_descriptor_init(&desc_sw->async_tx, 384 dma_async_tx_descriptor_init(&desc_sw->async_tx,
370 &fsl_chan->common); 385 &fsl_chan->common);
371 desc_sw->async_tx.tx_submit = fsl_dma_tx_submit; 386 desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
@@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
455 new->async_tx.flags = flags; 470 new->async_tx.flags = flags;
456 471
457 /* Insert the link descriptor to the LD ring */ 472 /* Insert the link descriptor to the LD ring */
458 list_add_tail(&new->node, &new->async_tx.tx_list); 473 list_add_tail(&new->node, &new->tx_list);
459 474
460 /* Set End-of-link to the last link descriptor of new list*/ 475 /* Set End-of-link to the last link descriptor of new list*/
461 set_ld_eol(fsl_chan, new); 476 set_ld_eol(fsl_chan, new);
@@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
513 dma_dest += copy; 528 dma_dest += copy;
514 529
515 /* Insert the link descriptor to the LD ring */ 530 /* Insert the link descriptor to the LD ring */
516 list_add_tail(&new->node, &first->async_tx.tx_list); 531 list_add_tail(&new->node, &first->tx_list);
517 } while (len); 532 } while (len);
518 533
519 new->async_tx.flags = flags; /* client is in control of this ack */ 534 new->async_tx.flags = flags; /* client is in control of this ack */
@@ -528,7 +543,7 @@ fail:
528 if (!first) 543 if (!first)
529 return NULL; 544 return NULL;
530 545
531 list = &first->async_tx.tx_list; 546 list = &first->tx_list;
532 list_for_each_entry_safe_reverse(new, prev, list, node) { 547 list_for_each_entry_safe_reverse(new, prev, list, node) {
533 list_del(&new->node); 548 list_del(&new->node);
534 dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); 549 dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
@@ -538,6 +553,229 @@ fail:
538} 553}
539 554
540/** 555/**
556 * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
557 * @chan: DMA channel
558 * @sgl: scatterlist to transfer to/from
559 * @sg_len: number of entries in @scatterlist
560 * @direction: DMA direction
561 * @flags: DMAEngine flags
562 *
563 * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
564 * DMA_SLAVE API, this gets the device-specific information from the
565 * chan->private variable.
566 */
567static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
568 struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
569 enum dma_data_direction direction, unsigned long flags)
570{
571 struct fsl_dma_chan *fsl_chan;
572 struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
573 struct fsl_dma_slave *slave;
574 struct list_head *tx_list;
575 size_t copy;
576
577 int i;
578 struct scatterlist *sg;
579 size_t sg_used;
580 size_t hw_used;
581 struct fsl_dma_hw_addr *hw;
582 dma_addr_t dma_dst, dma_src;
583
584 if (!chan)
585 return NULL;
586
587 if (!chan->private)
588 return NULL;
589
590 fsl_chan = to_fsl_chan(chan);
591 slave = chan->private;
592
593 if (list_empty(&slave->addresses))
594 return NULL;
595
596 hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
597 hw_used = 0;
598
599 /*
600 * Build the hardware transaction to copy from the scatterlist to
601 * the hardware, or from the hardware to the scatterlist
602 *
603 * If you are copying from the hardware to the scatterlist and it
604 * takes two hardware entries to fill an entire page, then both
605 * hardware entries will be coalesced into the same page
606 *
607 * If you are copying from the scatterlist to the hardware and a
608 * single page can fill two hardware entries, then the data will
609 * be read out of the page into the first hardware entry, and so on
610 */
611 for_each_sg(sgl, sg, sg_len, i) {
612 sg_used = 0;
613
614 /* Loop until the entire scatterlist entry is used */
615 while (sg_used < sg_dma_len(sg)) {
616
617 /*
618 * If we've used up the current hardware address/length
619 * pair, we need to load a new one
620 *
621 * This is done in a while loop so that descriptors with
622 * length == 0 will be skipped
623 */
624 while (hw_used >= hw->length) {
625
626 /*
627 * If the current hardware entry is the last
628 * entry in the list, we're finished
629 */
630 if (list_is_last(&hw->entry, &slave->addresses))
631 goto finished;
632
633 /* Get the next hardware address/length pair */
634 hw = list_entry(hw->entry.next,
635 struct fsl_dma_hw_addr, entry);
636 hw_used = 0;
637 }
638
639 /* Allocate the link descriptor from DMA pool */
640 new = fsl_dma_alloc_descriptor(fsl_chan);
641 if (!new) {
642 dev_err(fsl_chan->dev, "No free memory for "
643 "link descriptor\n");
644 goto fail;
645 }
646#ifdef FSL_DMA_LD_DEBUG
647 dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
648#endif
649
650 /*
651 * Calculate the maximum number of bytes to transfer,
652 * making sure it is less than the DMA controller limit
653 */
654 copy = min_t(size_t, sg_dma_len(sg) - sg_used,
655 hw->length - hw_used);
656 copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
657
658 /*
659 * DMA_FROM_DEVICE
660 * from the hardware to the scatterlist
661 *
662 * DMA_TO_DEVICE
663 * from the scatterlist to the hardware
664 */
665 if (direction == DMA_FROM_DEVICE) {
666 dma_src = hw->address + hw_used;
667 dma_dst = sg_dma_address(sg) + sg_used;
668 } else {
669 dma_src = sg_dma_address(sg) + sg_used;
670 dma_dst = hw->address + hw_used;
671 }
672
673 /* Fill in the descriptor */
674 set_desc_cnt(fsl_chan, &new->hw, copy);
675 set_desc_src(fsl_chan, &new->hw, dma_src);
676 set_desc_dest(fsl_chan, &new->hw, dma_dst);
677
678 /*
679 * If this is not the first descriptor, chain the
680 * current descriptor after the previous descriptor
681 */
682 if (!first) {
683 first = new;
684 } else {
685 set_desc_next(fsl_chan, &prev->hw,
686 new->async_tx.phys);
687 }
688
689 new->async_tx.cookie = 0;
690 async_tx_ack(&new->async_tx);
691
692 prev = new;
693 sg_used += copy;
694 hw_used += copy;
695
696 /* Insert the link descriptor into the LD ring */
697 list_add_tail(&new->node, &first->tx_list);
698 }
699 }
700
701finished:
702
703 /* All of the hardware address/length pairs had length == 0 */
704 if (!first || !new)
705 return NULL;
706
707 new->async_tx.flags = flags;
708 new->async_tx.cookie = -EBUSY;
709
710 /* Set End-of-link to the last link descriptor of new list */
711 set_ld_eol(fsl_chan, new);
712
713 /* Enable extra controller features */
714 if (fsl_chan->set_src_loop_size)
715 fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
716
717 if (fsl_chan->set_dest_loop_size)
718 fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
719
720 if (fsl_chan->toggle_ext_start)
721 fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
722
723 if (fsl_chan->toggle_ext_pause)
724 fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
725
726 if (fsl_chan->set_request_count)
727 fsl_chan->set_request_count(fsl_chan, slave->request_count);
728
729 return &first->async_tx;
730
731fail:
732 /* If first was not set, then we failed to allocate the very first
733 * descriptor, and we're done */
734 if (!first)
735 return NULL;
736
737 /*
738 * First is set, so all of the descriptors we allocated have been added
739 * to first->tx_list, INCLUDING "first" itself. Therefore we
740 * must traverse the list backwards freeing each descriptor in turn
741 *
742 * We're re-using variables for the loop, oh well
743 */
744 tx_list = &first->tx_list;
745 list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
746 list_del_init(&new->node);
747 dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
748 }
749
750 return NULL;
751}
752
753static void fsl_dma_device_terminate_all(struct dma_chan *chan)
754{
755 struct fsl_dma_chan *fsl_chan;
756 struct fsl_desc_sw *desc, *tmp;
757 unsigned long flags;
758
759 if (!chan)
760 return;
761
762 fsl_chan = to_fsl_chan(chan);
763
764 /* Halt the DMA engine */
765 dma_halt(fsl_chan);
766
767 spin_lock_irqsave(&fsl_chan->desc_lock, flags);
768
769 /* Remove and free all of the descriptors in the LD queue */
770 list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
771 list_del(&desc->node);
772 dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
773 }
774
775 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
776}
777
778/**
541 * fsl_dma_update_completed_cookie - Update the completed cookie. 779 * fsl_dma_update_completed_cookie - Update the completed cookie.
542 * @fsl_chan : Freescale DMA channel 780 * @fsl_chan : Freescale DMA channel
543 */ 781 */
@@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
883 new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; 1121 new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
884 new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; 1122 new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
885 new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; 1123 new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
1124 new_fsl_chan->set_request_count = fsl_chan_set_request_count;
886 } 1125 }
887 1126
888 spin_lock_init(&new_fsl_chan->desc_lock); 1127 spin_lock_init(&new_fsl_chan->desc_lock);
@@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
962 1201
963 dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); 1202 dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
964 dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); 1203 dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
1204 dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
965 fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; 1205 fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
966 fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; 1206 fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
967 fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; 1207 fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
968 fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; 1208 fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
969 fdev->common.device_is_tx_complete = fsl_dma_is_complete; 1209 fdev->common.device_is_tx_complete = fsl_dma_is_complete;
970 fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; 1210 fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
1211 fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
1212 fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
971 fdev->common.dev = &dev->dev; 1213 fdev->common.dev = &dev->dev;
972 1214
973 fdev->irq = irq_of_parse_and_map(dev->node, 0); 1215 fdev->irq = irq_of_parse_and_map(dev->node, 0);
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index dc7f26865797..0df14cbb8ca3 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -90,6 +90,7 @@ struct fsl_dma_ld_hw {
90struct fsl_desc_sw { 90struct fsl_desc_sw {
91 struct fsl_dma_ld_hw hw; 91 struct fsl_dma_ld_hw hw;
92 struct list_head node; 92 struct list_head node;
93 struct list_head tx_list;
93 struct dma_async_tx_descriptor async_tx; 94 struct dma_async_tx_descriptor async_tx;
94 struct list_head *ld; 95 struct list_head *ld;
95 void *priv; 96 void *priv;
@@ -143,10 +144,11 @@ struct fsl_dma_chan {
143 struct tasklet_struct tasklet; 144 struct tasklet_struct tasklet;
144 u32 feature; 145 u32 feature;
145 146
146 void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size); 147 void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
147 void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable); 148 void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
148 void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size); 149 void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
149 void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size); 150 void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
151 void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
150}; 152};
151 153
152#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common) 154#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
deleted file mode 100644
index 2225bb6ba3d1..000000000000
--- a/drivers/dma/ioat.c
+++ /dev/null
@@ -1,202 +0,0 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2007 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dca.h>
33#include "ioatdma.h"
34#include "ioatdma_registers.h"
35#include "ioatdma_hw.h"
36
37MODULE_VERSION(IOAT_DMA_VERSION);
38MODULE_LICENSE("GPL");
39MODULE_AUTHOR("Intel Corporation");
40
41static struct pci_device_id ioat_pci_tbl[] = {
42 /* I/OAT v1 platforms */
43 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
44 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
45 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
46 { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
47
48 /* I/OAT v2 platforms */
49 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
50
51 /* I/OAT v3 platforms */
52 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
53 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
54 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
55 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
56 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
57 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
58 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
59 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
60 { 0, }
61};
62
63struct ioat_device {
64 struct pci_dev *pdev;
65 void __iomem *iobase;
66 struct ioatdma_device *dma;
67 struct dca_provider *dca;
68};
69
70static int __devinit ioat_probe(struct pci_dev *pdev,
71 const struct pci_device_id *id);
72static void __devexit ioat_remove(struct pci_dev *pdev);
73
74static int ioat_dca_enabled = 1;
75module_param(ioat_dca_enabled, int, 0644);
76MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
77
78static struct pci_driver ioat_pci_driver = {
79 .name = "ioatdma",
80 .id_table = ioat_pci_tbl,
81 .probe = ioat_probe,
82 .remove = __devexit_p(ioat_remove),
83};
84
85static int __devinit ioat_probe(struct pci_dev *pdev,
86 const struct pci_device_id *id)
87{
88 void __iomem *iobase;
89 struct ioat_device *device;
90 unsigned long mmio_start, mmio_len;
91 int err;
92
93 err = pci_enable_device(pdev);
94 if (err)
95 goto err_enable_device;
96
97 err = pci_request_regions(pdev, ioat_pci_driver.name);
98 if (err)
99 goto err_request_regions;
100
101 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
102 if (err)
103 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
104 if (err)
105 goto err_set_dma_mask;
106
107 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
108 if (err)
109 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
110 if (err)
111 goto err_set_dma_mask;
112
113 mmio_start = pci_resource_start(pdev, 0);
114 mmio_len = pci_resource_len(pdev, 0);
115 iobase = ioremap(mmio_start, mmio_len);
116 if (!iobase) {
117 err = -ENOMEM;
118 goto err_ioremap;
119 }
120
121 device = kzalloc(sizeof(*device), GFP_KERNEL);
122 if (!device) {
123 err = -ENOMEM;
124 goto err_kzalloc;
125 }
126 device->pdev = pdev;
127 pci_set_drvdata(pdev, device);
128 device->iobase = iobase;
129
130 pci_set_master(pdev);
131
132 switch (readb(iobase + IOAT_VER_OFFSET)) {
133 case IOAT_VER_1_2:
134 device->dma = ioat_dma_probe(pdev, iobase);
135 if (device->dma && ioat_dca_enabled)
136 device->dca = ioat_dca_init(pdev, iobase);
137 break;
138 case IOAT_VER_2_0:
139 device->dma = ioat_dma_probe(pdev, iobase);
140 if (device->dma && ioat_dca_enabled)
141 device->dca = ioat2_dca_init(pdev, iobase);
142 break;
143 case IOAT_VER_3_0:
144 device->dma = ioat_dma_probe(pdev, iobase);
145 if (device->dma && ioat_dca_enabled)
146 device->dca = ioat3_dca_init(pdev, iobase);
147 break;
148 default:
149 err = -ENODEV;
150 break;
151 }
152 if (!device->dma)
153 err = -ENODEV;
154
155 if (err)
156 goto err_version;
157
158 return 0;
159
160err_version:
161 kfree(device);
162err_kzalloc:
163 iounmap(iobase);
164err_ioremap:
165err_set_dma_mask:
166 pci_release_regions(pdev);
167 pci_disable_device(pdev);
168err_request_regions:
169err_enable_device:
170 return err;
171}
172
173static void __devexit ioat_remove(struct pci_dev *pdev)
174{
175 struct ioat_device *device = pci_get_drvdata(pdev);
176
177 dev_err(&pdev->dev, "Removing dma and dca services\n");
178 if (device->dca) {
179 unregister_dca_provider(device->dca);
180 free_dca_provider(device->dca);
181 device->dca = NULL;
182 }
183
184 if (device->dma) {
185 ioat_dma_remove(device->dma);
186 device->dma = NULL;
187 }
188
189 kfree(device);
190}
191
192static int __init ioat_init_module(void)
193{
194 return pci_register_driver(&ioat_pci_driver);
195}
196module_init(ioat_init_module);
197
198static void __exit ioat_exit_module(void)
199{
200 pci_unregister_driver(&ioat_pci_driver);
201}
202module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644
index 000000000000..8997d3fb9051
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
2ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat/dca.c
index c012a1e15043..69d02615c4d6 100644
--- a/drivers/dma/ioat_dca.c
+++ b/drivers/dma/ioat/dca.c
@@ -33,8 +33,8 @@
33#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) 33#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
34#endif 34#endif
35 35
36#include "ioatdma.h" 36#include "dma.h"
37#include "ioatdma_registers.h" 37#include "registers.h"
38 38
39/* 39/*
40 * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 40 * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
@@ -242,7 +242,8 @@ static struct dca_ops ioat_dca_ops = {
242}; 242};
243 243
244 244
245struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) 245struct dca_provider * __devinit
246ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
246{ 247{
247 struct dca_provider *dca; 248 struct dca_provider *dca;
248 struct ioat_dca_priv *ioatdca; 249 struct ioat_dca_priv *ioatdca;
@@ -407,7 +408,8 @@ static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
407 return slots; 408 return slots;
408} 409}
409 410
410struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) 411struct dca_provider * __devinit
412ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
411{ 413{
412 struct dca_provider *dca; 414 struct dca_provider *dca;
413 struct ioat_dca_priv *ioatdca; 415 struct ioat_dca_priv *ioatdca;
@@ -602,7 +604,8 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
602 return slots; 604 return slots;
603} 605}
604 606
605struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) 607struct dca_provider * __devinit
608ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
606{ 609{
607 struct dca_provider *dca; 610 struct dca_provider *dca;
608 struct ioat_dca_priv *ioatdca; 611 struct ioat_dca_priv *ioatdca;
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644
index 000000000000..c524d36d3c2e
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1238 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dmaengine.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/workqueue.h>
36#include <linux/i7300_idle.h>
37#include "dma.h"
38#include "registers.h"
39#include "hw.h"
40
41int ioat_pending_level = 4;
42module_param(ioat_pending_level, int, 0644);
43MODULE_PARM_DESC(ioat_pending_level,
44 "high-water mark for pushing ioat descriptors (default: 4)");
45
46/* internal functions */
47static void ioat1_cleanup(struct ioat_dma_chan *ioat);
48static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
49
50/**
51 * ioat_dma_do_interrupt - handler used for single vector interrupt mode
52 * @irq: interrupt id
53 * @data: interrupt data
54 */
55static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
56{
57 struct ioatdma_device *instance = data;
58 struct ioat_chan_common *chan;
59 unsigned long attnstatus;
60 int bit;
61 u8 intrctrl;
62
63 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
64
65 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
66 return IRQ_NONE;
67
68 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
69 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
70 return IRQ_NONE;
71 }
72
73 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
74 for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
75 chan = ioat_chan_by_index(instance, bit);
76 tasklet_schedule(&chan->cleanup_task);
77 }
78
79 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
80 return IRQ_HANDLED;
81}
82
83/**
84 * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
85 * @irq: interrupt id
86 * @data: interrupt data
87 */
88static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
89{
90 struct ioat_chan_common *chan = data;
91
92 tasklet_schedule(&chan->cleanup_task);
93
94 return IRQ_HANDLED;
95}
96
97static void ioat1_cleanup_tasklet(unsigned long data);
98
99/* common channel initialization */
100void ioat_init_channel(struct ioatdma_device *device,
101 struct ioat_chan_common *chan, int idx,
102 void (*timer_fn)(unsigned long),
103 void (*tasklet)(unsigned long),
104 unsigned long ioat)
105{
106 struct dma_device *dma = &device->common;
107
108 chan->device = device;
109 chan->reg_base = device->reg_base + (0x80 * (idx + 1));
110 spin_lock_init(&chan->cleanup_lock);
111 chan->common.device = dma;
112 list_add_tail(&chan->common.device_node, &dma->channels);
113 device->idx[idx] = chan;
114 init_timer(&chan->timer);
115 chan->timer.function = timer_fn;
116 chan->timer.data = ioat;
117 tasklet_init(&chan->cleanup_task, tasklet, ioat);
118 tasklet_disable(&chan->cleanup_task);
119}
120
121static void ioat1_timer_event(unsigned long data);
122
123/**
124 * ioat1_dma_enumerate_channels - find and initialize the device's channels
125 * @device: the device to be enumerated
126 */
127static int ioat1_enumerate_channels(struct ioatdma_device *device)
128{
129 u8 xfercap_scale;
130 u32 xfercap;
131 int i;
132 struct ioat_dma_chan *ioat;
133 struct device *dev = &device->pdev->dev;
134 struct dma_device *dma = &device->common;
135
136 INIT_LIST_HEAD(&dma->channels);
137 dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
138 dma->chancnt &= 0x1f; /* bits [4:0] valid */
139 if (dma->chancnt > ARRAY_SIZE(device->idx)) {
140 dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
141 dma->chancnt, ARRAY_SIZE(device->idx));
142 dma->chancnt = ARRAY_SIZE(device->idx);
143 }
144 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
145 xfercap_scale &= 0x1f; /* bits [4:0] valid */
146 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
147 dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
148
149#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
150 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
151 dma->chancnt--;
152#endif
153 for (i = 0; i < dma->chancnt; i++) {
154 ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
155 if (!ioat)
156 break;
157
158 ioat_init_channel(device, &ioat->base, i,
159 ioat1_timer_event,
160 ioat1_cleanup_tasklet,
161 (unsigned long) ioat);
162 ioat->xfercap = xfercap;
163 spin_lock_init(&ioat->desc_lock);
164 INIT_LIST_HEAD(&ioat->free_desc);
165 INIT_LIST_HEAD(&ioat->used_desc);
166 }
167 dma->chancnt = i;
168 return i;
169}
170
171/**
172 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
173 * descriptors to hw
174 * @chan: DMA channel handle
175 */
176static inline void
177__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
178{
179 void __iomem *reg_base = ioat->base.reg_base;
180
181 dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
182 __func__, ioat->pending);
183 ioat->pending = 0;
184 writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
185}
186
187static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
188{
189 struct ioat_dma_chan *ioat = to_ioat_chan(chan);
190
191 if (ioat->pending > 0) {
192 spin_lock_bh(&ioat->desc_lock);
193 __ioat1_dma_memcpy_issue_pending(ioat);
194 spin_unlock_bh(&ioat->desc_lock);
195 }
196}
197
198/**
199 * ioat1_reset_channel - restart a channel
200 * @ioat: IOAT DMA channel handle
201 */
202static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
203{
204 struct ioat_chan_common *chan = &ioat->base;
205 void __iomem *reg_base = chan->reg_base;
206 u32 chansts, chanerr;
207
208 dev_warn(to_dev(chan), "reset\n");
209 chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
210 chansts = *chan->completion & IOAT_CHANSTS_STATUS;
211 if (chanerr) {
212 dev_err(to_dev(chan),
213 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
214 chan_num(chan), chansts, chanerr);
215 writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
216 }
217
218 /*
219 * whack it upside the head with a reset
220 * and wait for things to settle out.
221 * force the pending count to a really big negative
222 * to make sure no one forces an issue_pending
223 * while we're waiting.
224 */
225
226 ioat->pending = INT_MIN;
227 writeb(IOAT_CHANCMD_RESET,
228 reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
229 set_bit(IOAT_RESET_PENDING, &chan->state);
230 mod_timer(&chan->timer, jiffies + RESET_DELAY);
231}
232
233static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
234{
235 struct dma_chan *c = tx->chan;
236 struct ioat_dma_chan *ioat = to_ioat_chan(c);
237 struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
238 struct ioat_chan_common *chan = &ioat->base;
239 struct ioat_desc_sw *first;
240 struct ioat_desc_sw *chain_tail;
241 dma_cookie_t cookie;
242
243 spin_lock_bh(&ioat->desc_lock);
244 /* cookie incr and addition to used_list must be atomic */
245 cookie = c->cookie;
246 cookie++;
247 if (cookie < 0)
248 cookie = 1;
249 c->cookie = cookie;
250 tx->cookie = cookie;
251 dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
252
253 /* write address into NextDescriptor field of last desc in chain */
254 first = to_ioat_desc(desc->tx_list.next);
255 chain_tail = to_ioat_desc(ioat->used_desc.prev);
256 /* make descriptor updates globally visible before chaining */
257 wmb();
258 chain_tail->hw->next = first->txd.phys;
259 list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
260 dump_desc_dbg(ioat, chain_tail);
261 dump_desc_dbg(ioat, first);
262
263 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
264 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
265
266 ioat->active += desc->hw->tx_cnt;
267 ioat->pending += desc->hw->tx_cnt;
268 if (ioat->pending >= ioat_pending_level)
269 __ioat1_dma_memcpy_issue_pending(ioat);
270 spin_unlock_bh(&ioat->desc_lock);
271
272 return cookie;
273}
274
275/**
276 * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
277 * @ioat: the channel supplying the memory pool for the descriptors
278 * @flags: allocation flags
279 */
280static struct ioat_desc_sw *
281ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
282{
283 struct ioat_dma_descriptor *desc;
284 struct ioat_desc_sw *desc_sw;
285 struct ioatdma_device *ioatdma_device;
286 dma_addr_t phys;
287
288 ioatdma_device = ioat->base.device;
289 desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
290 if (unlikely(!desc))
291 return NULL;
292
293 desc_sw = kzalloc(sizeof(*desc_sw), flags);
294 if (unlikely(!desc_sw)) {
295 pci_pool_free(ioatdma_device->dma_pool, desc, phys);
296 return NULL;
297 }
298
299 memset(desc, 0, sizeof(*desc));
300
301 INIT_LIST_HEAD(&desc_sw->tx_list);
302 dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
303 desc_sw->txd.tx_submit = ioat1_tx_submit;
304 desc_sw->hw = desc;
305 desc_sw->txd.phys = phys;
306 set_desc_id(desc_sw, -1);
307
308 return desc_sw;
309}
310
311static int ioat_initial_desc_count = 256;
312module_param(ioat_initial_desc_count, int, 0644);
313MODULE_PARM_DESC(ioat_initial_desc_count,
314 "ioat1: initial descriptors per channel (default: 256)");
315/**
316 * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
317 * @chan: the channel to be filled out
318 */
319static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
320{
321 struct ioat_dma_chan *ioat = to_ioat_chan(c);
322 struct ioat_chan_common *chan = &ioat->base;
323 struct ioat_desc_sw *desc;
324 u32 chanerr;
325 int i;
326 LIST_HEAD(tmp_list);
327
328 /* have we already been set up? */
329 if (!list_empty(&ioat->free_desc))
330 return ioat->desccount;
331
332 /* Setup register to interrupt and write completion status on error */
333 writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
334
335 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
336 if (chanerr) {
337 dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
338 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
339 }
340
341 /* Allocate descriptors */
342 for (i = 0; i < ioat_initial_desc_count; i++) {
343 desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
344 if (!desc) {
345 dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
346 break;
347 }
348 set_desc_id(desc, i);
349 list_add_tail(&desc->node, &tmp_list);
350 }
351 spin_lock_bh(&ioat->desc_lock);
352 ioat->desccount = i;
353 list_splice(&tmp_list, &ioat->free_desc);
354 spin_unlock_bh(&ioat->desc_lock);
355
356 /* allocate a completion writeback area */
357 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
358 chan->completion = pci_pool_alloc(chan->device->completion_pool,
359 GFP_KERNEL, &chan->completion_dma);
360 memset(chan->completion, 0, sizeof(*chan->completion));
361 writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
362 chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
363 writel(((u64) chan->completion_dma) >> 32,
364 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
365
366 tasklet_enable(&chan->cleanup_task);
367 ioat1_dma_start_null_desc(ioat); /* give chain to dma device */
368 dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
369 __func__, ioat->desccount);
370 return ioat->desccount;
371}
372
373/**
374 * ioat1_dma_free_chan_resources - release all the descriptors
375 * @chan: the channel to be cleaned
376 */
377static void ioat1_dma_free_chan_resources(struct dma_chan *c)
378{
379 struct ioat_dma_chan *ioat = to_ioat_chan(c);
380 struct ioat_chan_common *chan = &ioat->base;
381 struct ioatdma_device *ioatdma_device = chan->device;
382 struct ioat_desc_sw *desc, *_desc;
383 int in_use_descs = 0;
384
385 /* Before freeing channel resources first check
386 * if they have been previously allocated for this channel.
387 */
388 if (ioat->desccount == 0)
389 return;
390
391 tasklet_disable(&chan->cleanup_task);
392 del_timer_sync(&chan->timer);
393 ioat1_cleanup(ioat);
394
395 /* Delay 100ms after reset to allow internal DMA logic to quiesce
396 * before removing DMA descriptor resources.
397 */
398 writeb(IOAT_CHANCMD_RESET,
399 chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
400 mdelay(100);
401
402 spin_lock_bh(&ioat->desc_lock);
403 list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
404 dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
405 __func__, desc_id(desc));
406 dump_desc_dbg(ioat, desc);
407 in_use_descs++;
408 list_del(&desc->node);
409 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
410 desc->txd.phys);
411 kfree(desc);
412 }
413 list_for_each_entry_safe(desc, _desc,
414 &ioat->free_desc, node) {
415 list_del(&desc->node);
416 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
417 desc->txd.phys);
418 kfree(desc);
419 }
420 spin_unlock_bh(&ioat->desc_lock);
421
422 pci_pool_free(ioatdma_device->completion_pool,
423 chan->completion,
424 chan->completion_dma);
425
426 /* one is ok since we left it on there on purpose */
427 if (in_use_descs > 1)
428 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
429 in_use_descs - 1);
430
431 chan->last_completion = 0;
432 chan->completion_dma = 0;
433 ioat->pending = 0;
434 ioat->desccount = 0;
435}
436
437/**
438 * ioat1_dma_get_next_descriptor - return the next available descriptor
439 * @ioat: IOAT DMA channel handle
440 *
441 * Gets the next descriptor from the chain, and must be called with the
442 * channel's desc_lock held. Allocates more descriptors if the channel
443 * has run out.
444 */
445static struct ioat_desc_sw *
446ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
447{
448 struct ioat_desc_sw *new;
449
450 if (!list_empty(&ioat->free_desc)) {
451 new = to_ioat_desc(ioat->free_desc.next);
452 list_del(&new->node);
453 } else {
454 /* try to get another desc */
455 new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
456 if (!new) {
457 dev_err(to_dev(&ioat->base), "alloc failed\n");
458 return NULL;
459 }
460 }
461 dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
462 __func__, desc_id(new));
463 prefetch(new->hw);
464 return new;
465}
466
467static struct dma_async_tx_descriptor *
468ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
469 dma_addr_t dma_src, size_t len, unsigned long flags)
470{
471 struct ioat_dma_chan *ioat = to_ioat_chan(c);
472 struct ioat_desc_sw *desc;
473 size_t copy;
474 LIST_HEAD(chain);
475 dma_addr_t src = dma_src;
476 dma_addr_t dest = dma_dest;
477 size_t total_len = len;
478 struct ioat_dma_descriptor *hw = NULL;
479 int tx_cnt = 0;
480
481 spin_lock_bh(&ioat->desc_lock);
482 desc = ioat1_dma_get_next_descriptor(ioat);
483 do {
484 if (!desc)
485 break;
486
487 tx_cnt++;
488 copy = min_t(size_t, len, ioat->xfercap);
489
490 hw = desc->hw;
491 hw->size = copy;
492 hw->ctl = 0;
493 hw->src_addr = src;
494 hw->dst_addr = dest;
495
496 list_add_tail(&desc->node, &chain);
497
498 len -= copy;
499 dest += copy;
500 src += copy;
501 if (len) {
502 struct ioat_desc_sw *next;
503
504 async_tx_ack(&desc->txd);
505 next = ioat1_dma_get_next_descriptor(ioat);
506 hw->next = next ? next->txd.phys : 0;
507 dump_desc_dbg(ioat, desc);
508 desc = next;
509 } else
510 hw->next = 0;
511 } while (len);
512
513 if (!desc) {
514 struct ioat_chan_common *chan = &ioat->base;
515
516 dev_err(to_dev(chan),
517 "chan%d - get_next_desc failed\n", chan_num(chan));
518 list_splice(&chain, &ioat->free_desc);
519 spin_unlock_bh(&ioat->desc_lock);
520 return NULL;
521 }
522 spin_unlock_bh(&ioat->desc_lock);
523
524 desc->txd.flags = flags;
525 desc->len = total_len;
526 list_splice(&chain, &desc->tx_list);
527 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
528 hw->ctl_f.compl_write = 1;
529 hw->tx_cnt = tx_cnt;
530 dump_desc_dbg(ioat, desc);
531
532 return &desc->txd;
533}
534
535static void ioat1_cleanup_tasklet(unsigned long data)
536{
537 struct ioat_dma_chan *chan = (void *)data;
538
539 ioat1_cleanup(chan);
540 writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
541}
542
543void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
544 size_t len, struct ioat_dma_descriptor *hw)
545{
546 struct pci_dev *pdev = chan->device->pdev;
547 size_t offset = len - hw->size;
548
549 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
550 ioat_unmap(pdev, hw->dst_addr - offset, len,
551 PCI_DMA_FROMDEVICE, flags, 1);
552
553 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
554 ioat_unmap(pdev, hw->src_addr - offset, len,
555 PCI_DMA_TODEVICE, flags, 0);
556}
557
558unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
559{
560 unsigned long phys_complete;
561 u64 completion;
562
563 completion = *chan->completion;
564 phys_complete = ioat_chansts_to_addr(completion);
565
566 dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
567 (unsigned long long) phys_complete);
568
569 if (is_ioat_halted(completion)) {
570 u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
571 dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
572 chanerr);
573
574 /* TODO do something to salvage the situation */
575 }
576
577 return phys_complete;
578}
579
580bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
581 unsigned long *phys_complete)
582{
583 *phys_complete = ioat_get_current_completion(chan);
584 if (*phys_complete == chan->last_completion)
585 return false;
586 clear_bit(IOAT_COMPLETION_ACK, &chan->state);
587 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
588
589 return true;
590}
591
592static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
593{
594 struct ioat_chan_common *chan = &ioat->base;
595 struct list_head *_desc, *n;
596 struct dma_async_tx_descriptor *tx;
597
598 dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
599 __func__, phys_complete);
600 list_for_each_safe(_desc, n, &ioat->used_desc) {
601 struct ioat_desc_sw *desc;
602
603 prefetch(n);
604 desc = list_entry(_desc, typeof(*desc), node);
605 tx = &desc->txd;
606 /*
607 * Incoming DMA requests may use multiple descriptors,
608 * due to exceeding xfercap, perhaps. If so, only the
609 * last one will have a cookie, and require unmapping.
610 */
611 dump_desc_dbg(ioat, desc);
612 if (tx->cookie) {
613 chan->completed_cookie = tx->cookie;
614 tx->cookie = 0;
615 ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
616 ioat->active -= desc->hw->tx_cnt;
617 if (tx->callback) {
618 tx->callback(tx->callback_param);
619 tx->callback = NULL;
620 }
621 }
622
623 if (tx->phys != phys_complete) {
624 /*
625 * a completed entry, but not the last, so clean
626 * up if the client is done with the descriptor
627 */
628 if (async_tx_test_ack(tx))
629 list_move_tail(&desc->node, &ioat->free_desc);
630 } else {
631 /*
632 * last used desc. Do not remove, so we can
633 * append from it.
634 */
635
636 /* if nothing else is pending, cancel the
637 * completion timeout
638 */
639 if (n == &ioat->used_desc) {
640 dev_dbg(to_dev(chan),
641 "%s cancel completion timeout\n",
642 __func__);
643 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
644 }
645
646 /* TODO check status bits? */
647 break;
648 }
649 }
650
651 chan->last_completion = phys_complete;
652}
653
654/**
655 * ioat1_cleanup - cleanup up finished descriptors
656 * @chan: ioat channel to be cleaned up
657 *
658 * To prevent lock contention we defer cleanup when the locks are
659 * contended with a terminal timeout that forces cleanup and catches
660 * completion notification errors.
661 */
662static void ioat1_cleanup(struct ioat_dma_chan *ioat)
663{
664 struct ioat_chan_common *chan = &ioat->base;
665 unsigned long phys_complete;
666
667 prefetch(chan->completion);
668
669 if (!spin_trylock_bh(&chan->cleanup_lock))
670 return;
671
672 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
673 spin_unlock_bh(&chan->cleanup_lock);
674 return;
675 }
676
677 if (!spin_trylock_bh(&ioat->desc_lock)) {
678 spin_unlock_bh(&chan->cleanup_lock);
679 return;
680 }
681
682 __cleanup(ioat, phys_complete);
683
684 spin_unlock_bh(&ioat->desc_lock);
685 spin_unlock_bh(&chan->cleanup_lock);
686}
687
688static void ioat1_timer_event(unsigned long data)
689{
690 struct ioat_dma_chan *ioat = (void *) data;
691 struct ioat_chan_common *chan = &ioat->base;
692
693 dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
694
695 spin_lock_bh(&chan->cleanup_lock);
696 if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
697 struct ioat_desc_sw *desc;
698
699 spin_lock_bh(&ioat->desc_lock);
700
701 /* restart active descriptors */
702 desc = to_ioat_desc(ioat->used_desc.prev);
703 ioat_set_chainaddr(ioat, desc->txd.phys);
704 ioat_start(chan);
705
706 ioat->pending = 0;
707 set_bit(IOAT_COMPLETION_PENDING, &chan->state);
708 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
709 spin_unlock_bh(&ioat->desc_lock);
710 } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
711 unsigned long phys_complete;
712
713 spin_lock_bh(&ioat->desc_lock);
714 /* if we haven't made progress and we have already
715 * acknowledged a pending completion once, then be more
716 * forceful with a restart
717 */
718 if (ioat_cleanup_preamble(chan, &phys_complete))
719 __cleanup(ioat, phys_complete);
720 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
721 ioat1_reset_channel(ioat);
722 else {
723 u64 status = ioat_chansts(chan);
724
725 /* manually update the last completion address */
726 if (ioat_chansts_to_addr(status) != 0)
727 *chan->completion = status;
728
729 set_bit(IOAT_COMPLETION_ACK, &chan->state);
730 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
731 }
732 spin_unlock_bh(&ioat->desc_lock);
733 }
734 spin_unlock_bh(&chan->cleanup_lock);
735}
736
737static enum dma_status
738ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
739 dma_cookie_t *done, dma_cookie_t *used)
740{
741 struct ioat_dma_chan *ioat = to_ioat_chan(c);
742
743 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
744 return DMA_SUCCESS;
745
746 ioat1_cleanup(ioat);
747
748 return ioat_is_complete(c, cookie, done, used);
749}
750
751static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
752{
753 struct ioat_chan_common *chan = &ioat->base;
754 struct ioat_desc_sw *desc;
755 struct ioat_dma_descriptor *hw;
756
757 spin_lock_bh(&ioat->desc_lock);
758
759 desc = ioat1_dma_get_next_descriptor(ioat);
760
761 if (!desc) {
762 dev_err(to_dev(chan),
763 "Unable to start null desc - get next desc failed\n");
764 spin_unlock_bh(&ioat->desc_lock);
765 return;
766 }
767
768 hw = desc->hw;
769 hw->ctl = 0;
770 hw->ctl_f.null = 1;
771 hw->ctl_f.int_en = 1;
772 hw->ctl_f.compl_write = 1;
773 /* set size to non-zero value (channel returns error when size is 0) */
774 hw->size = NULL_DESC_BUFFER_SIZE;
775 hw->src_addr = 0;
776 hw->dst_addr = 0;
777 async_tx_ack(&desc->txd);
778 hw->next = 0;
779 list_add_tail(&desc->node, &ioat->used_desc);
780 dump_desc_dbg(ioat, desc);
781
782 ioat_set_chainaddr(ioat, desc->txd.phys);
783 ioat_start(chan);
784 spin_unlock_bh(&ioat->desc_lock);
785}
786
787/*
788 * Perform a IOAT transaction to verify the HW works.
789 */
790#define IOAT_TEST_SIZE 2000
791
792static void __devinit ioat_dma_test_callback(void *dma_async_param)
793{
794 struct completion *cmp = dma_async_param;
795
796 complete(cmp);
797}
798
799/**
800 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
801 * @device: device to be tested
802 */
803int __devinit ioat_dma_self_test(struct ioatdma_device *device)
804{
805 int i;
806 u8 *src;
807 u8 *dest;
808 struct dma_device *dma = &device->common;
809 struct device *dev = &device->pdev->dev;
810 struct dma_chan *dma_chan;
811 struct dma_async_tx_descriptor *tx;
812 dma_addr_t dma_dest, dma_src;
813 dma_cookie_t cookie;
814 int err = 0;
815 struct completion cmp;
816 unsigned long tmo;
817 unsigned long flags;
818
819 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
820 if (!src)
821 return -ENOMEM;
822 dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
823 if (!dest) {
824 kfree(src);
825 return -ENOMEM;
826 }
827
828 /* Fill in src buffer */
829 for (i = 0; i < IOAT_TEST_SIZE; i++)
830 src[i] = (u8)i;
831
832 /* Start copy, using first DMA channel */
833 dma_chan = container_of(dma->channels.next, struct dma_chan,
834 device_node);
835 if (dma->device_alloc_chan_resources(dma_chan) < 1) {
836 dev_err(dev, "selftest cannot allocate chan resource\n");
837 err = -ENODEV;
838 goto out;
839 }
840
841 dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
842 dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
843 flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
844 DMA_PREP_INTERRUPT;
845 tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
846 IOAT_TEST_SIZE, flags);
847 if (!tx) {
848 dev_err(dev, "Self-test prep failed, disabling\n");
849 err = -ENODEV;
850 goto free_resources;
851 }
852
853 async_tx_ack(tx);
854 init_completion(&cmp);
855 tx->callback = ioat_dma_test_callback;
856 tx->callback_param = &cmp;
857 cookie = tx->tx_submit(tx);
858 if (cookie < 0) {
859 dev_err(dev, "Self-test setup failed, disabling\n");
860 err = -ENODEV;
861 goto free_resources;
862 }
863 dma->device_issue_pending(dma_chan);
864
865 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
866
867 if (tmo == 0 ||
868 dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
869 != DMA_SUCCESS) {
870 dev_err(dev, "Self-test copy timed out, disabling\n");
871 err = -ENODEV;
872 goto free_resources;
873 }
874 if (memcmp(src, dest, IOAT_TEST_SIZE)) {
875 dev_err(dev, "Self-test copy failed compare, disabling\n");
876 err = -ENODEV;
877 goto free_resources;
878 }
879
880free_resources:
881 dma->device_free_chan_resources(dma_chan);
882out:
883 kfree(src);
884 kfree(dest);
885 return err;
886}
887
888static char ioat_interrupt_style[32] = "msix";
889module_param_string(ioat_interrupt_style, ioat_interrupt_style,
890 sizeof(ioat_interrupt_style), 0644);
891MODULE_PARM_DESC(ioat_interrupt_style,
892 "set ioat interrupt style: msix (default), "
893 "msix-single-vector, msi, intx)");
894
895/**
896 * ioat_dma_setup_interrupts - setup interrupt handler
897 * @device: ioat device
898 */
899static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
900{
901 struct ioat_chan_common *chan;
902 struct pci_dev *pdev = device->pdev;
903 struct device *dev = &pdev->dev;
904 struct msix_entry *msix;
905 int i, j, msixcnt;
906 int err = -EINVAL;
907 u8 intrctrl = 0;
908
909 if (!strcmp(ioat_interrupt_style, "msix"))
910 goto msix;
911 if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
912 goto msix_single_vector;
913 if (!strcmp(ioat_interrupt_style, "msi"))
914 goto msi;
915 if (!strcmp(ioat_interrupt_style, "intx"))
916 goto intx;
917 dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
918 goto err_no_irq;
919
920msix:
921 /* The number of MSI-X vectors should equal the number of channels */
922 msixcnt = device->common.chancnt;
923 for (i = 0; i < msixcnt; i++)
924 device->msix_entries[i].entry = i;
925
926 err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
927 if (err < 0)
928 goto msi;
929 if (err > 0)
930 goto msix_single_vector;
931
932 for (i = 0; i < msixcnt; i++) {
933 msix = &device->msix_entries[i];
934 chan = ioat_chan_by_index(device, i);
935 err = devm_request_irq(dev, msix->vector,
936 ioat_dma_do_interrupt_msix, 0,
937 "ioat-msix", chan);
938 if (err) {
939 for (j = 0; j < i; j++) {
940 msix = &device->msix_entries[j];
941 chan = ioat_chan_by_index(device, j);
942 devm_free_irq(dev, msix->vector, chan);
943 }
944 goto msix_single_vector;
945 }
946 }
947 intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
948 goto done;
949
950msix_single_vector:
951 msix = &device->msix_entries[0];
952 msix->entry = 0;
953 err = pci_enable_msix(pdev, device->msix_entries, 1);
954 if (err)
955 goto msi;
956
957 err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
958 "ioat-msix", device);
959 if (err) {
960 pci_disable_msix(pdev);
961 goto msi;
962 }
963 goto done;
964
965msi:
966 err = pci_enable_msi(pdev);
967 if (err)
968 goto intx;
969
970 err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
971 "ioat-msi", device);
972 if (err) {
973 pci_disable_msi(pdev);
974 goto intx;
975 }
976 goto done;
977
978intx:
979 err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
980 IRQF_SHARED, "ioat-intx", device);
981 if (err)
982 goto err_no_irq;
983
984done:
985 if (device->intr_quirk)
986 device->intr_quirk(device);
987 intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
988 writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
989 return 0;
990
991err_no_irq:
992 /* Disable all interrupt generation */
993 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
994 dev_err(dev, "no usable interrupts\n");
995 return err;
996}
997
998static void ioat_disable_interrupts(struct ioatdma_device *device)
999{
1000 /* Disable all interrupt generation */
1001 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1002}
1003
1004int __devinit ioat_probe(struct ioatdma_device *device)
1005{
1006 int err = -ENODEV;
1007 struct dma_device *dma = &device->common;
1008 struct pci_dev *pdev = device->pdev;
1009 struct device *dev = &pdev->dev;
1010
1011 /* DMA coherent memory pool for DMA descriptor allocations */
1012 device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
1013 sizeof(struct ioat_dma_descriptor),
1014 64, 0);
1015 if (!device->dma_pool) {
1016 err = -ENOMEM;
1017 goto err_dma_pool;
1018 }
1019
1020 device->completion_pool = pci_pool_create("completion_pool", pdev,
1021 sizeof(u64), SMP_CACHE_BYTES,
1022 SMP_CACHE_BYTES);
1023
1024 if (!device->completion_pool) {
1025 err = -ENOMEM;
1026 goto err_completion_pool;
1027 }
1028
1029 device->enumerate_channels(device);
1030
1031 dma_cap_set(DMA_MEMCPY, dma->cap_mask);
1032 dma->dev = &pdev->dev;
1033
1034 if (!dma->chancnt) {
1035 dev_err(dev, "zero channels detected\n");
1036 goto err_setup_interrupts;
1037 }
1038
1039 err = ioat_dma_setup_interrupts(device);
1040 if (err)
1041 goto err_setup_interrupts;
1042
1043 err = device->self_test(device);
1044 if (err)
1045 goto err_self_test;
1046
1047 return 0;
1048
1049err_self_test:
1050 ioat_disable_interrupts(device);
1051err_setup_interrupts:
1052 pci_pool_destroy(device->completion_pool);
1053err_completion_pool:
1054 pci_pool_destroy(device->dma_pool);
1055err_dma_pool:
1056 return err;
1057}
1058
1059int __devinit ioat_register(struct ioatdma_device *device)
1060{
1061 int err = dma_async_device_register(&device->common);
1062
1063 if (err) {
1064 ioat_disable_interrupts(device);
1065 pci_pool_destroy(device->completion_pool);
1066 pci_pool_destroy(device->dma_pool);
1067 }
1068
1069 return err;
1070}
1071
1072/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
1073static void ioat1_intr_quirk(struct ioatdma_device *device)
1074{
1075 struct pci_dev *pdev = device->pdev;
1076 u32 dmactrl;
1077
1078 pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
1079 if (pdev->msi_enabled)
1080 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
1081 else
1082 dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
1083 pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1084}
1085
1086static ssize_t ring_size_show(struct dma_chan *c, char *page)
1087{
1088 struct ioat_dma_chan *ioat = to_ioat_chan(c);
1089
1090 return sprintf(page, "%d\n", ioat->desccount);
1091}
1092static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
1093
1094static ssize_t ring_active_show(struct dma_chan *c, char *page)
1095{
1096 struct ioat_dma_chan *ioat = to_ioat_chan(c);
1097
1098 return sprintf(page, "%d\n", ioat->active);
1099}
1100static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
1101
1102static ssize_t cap_show(struct dma_chan *c, char *page)
1103{
1104 struct dma_device *dma = c->device;
1105
1106 return sprintf(page, "copy%s%s%s%s%s%s\n",
1107 dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
1108 dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
1109 dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
1110 dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
1111 dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "",
1112 dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
1113
1114}
1115struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
1116
1117static ssize_t version_show(struct dma_chan *c, char *page)
1118{
1119 struct dma_device *dma = c->device;
1120 struct ioatdma_device *device = to_ioatdma_device(dma);
1121
1122 return sprintf(page, "%d.%d\n",
1123 device->version >> 4, device->version & 0xf);
1124}
1125struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
1126
1127static struct attribute *ioat1_attrs[] = {
1128 &ring_size_attr.attr,
1129 &ring_active_attr.attr,
1130 &ioat_cap_attr.attr,
1131 &ioat_version_attr.attr,
1132 NULL,
1133};
1134
1135static ssize_t
1136ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
1137{
1138 struct ioat_sysfs_entry *entry;
1139 struct ioat_chan_common *chan;
1140
1141 entry = container_of(attr, struct ioat_sysfs_entry, attr);
1142 chan = container_of(kobj, struct ioat_chan_common, kobj);
1143
1144 if (!entry->show)
1145 return -EIO;
1146 return entry->show(&chan->common, page);
1147}
1148
1149struct sysfs_ops ioat_sysfs_ops = {
1150 .show = ioat_attr_show,
1151};
1152
1153static struct kobj_type ioat1_ktype = {
1154 .sysfs_ops = &ioat_sysfs_ops,
1155 .default_attrs = ioat1_attrs,
1156};
1157
1158void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
1159{
1160 struct dma_device *dma = &device->common;
1161 struct dma_chan *c;
1162
1163 list_for_each_entry(c, &dma->channels, device_node) {
1164 struct ioat_chan_common *chan = to_chan_common(c);
1165 struct kobject *parent = &c->dev->device.kobj;
1166 int err;
1167
1168 err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
1169 if (err) {
1170 dev_warn(to_dev(chan),
1171 "sysfs init error (%d), continuing...\n", err);
1172 kobject_put(&chan->kobj);
1173 set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
1174 }
1175 }
1176}
1177
1178void ioat_kobject_del(struct ioatdma_device *device)
1179{
1180 struct dma_device *dma = &device->common;
1181 struct dma_chan *c;
1182
1183 list_for_each_entry(c, &dma->channels, device_node) {
1184 struct ioat_chan_common *chan = to_chan_common(c);
1185
1186 if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
1187 kobject_del(&chan->kobj);
1188 kobject_put(&chan->kobj);
1189 }
1190 }
1191}
1192
1193int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
1194{
1195 struct pci_dev *pdev = device->pdev;
1196 struct dma_device *dma;
1197 int err;
1198
1199 device->intr_quirk = ioat1_intr_quirk;
1200 device->enumerate_channels = ioat1_enumerate_channels;
1201 device->self_test = ioat_dma_self_test;
1202 dma = &device->common;
1203 dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1204 dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
1205 dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
1206 dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
1207 dma->device_is_tx_complete = ioat1_dma_is_complete;
1208
1209 err = ioat_probe(device);
1210 if (err)
1211 return err;
1212 ioat_set_tcp_copy_break(4096);
1213 err = ioat_register(device);
1214 if (err)
1215 return err;
1216 ioat_kobject_add(device, &ioat1_ktype);
1217
1218 if (dca)
1219 device->dca = ioat_dca_init(pdev, device->reg_base);
1220
1221 return err;
1222}
1223
1224void __devexit ioat_dma_remove(struct ioatdma_device *device)
1225{
1226 struct dma_device *dma = &device->common;
1227
1228 ioat_disable_interrupts(device);
1229
1230 ioat_kobject_del(device);
1231
1232 dma_async_device_unregister(dma);
1233
1234 pci_pool_destroy(device->dma_pool);
1235 pci_pool_destroy(device->completion_pool);
1236
1237 INIT_LIST_HEAD(&dma->channels);
1238}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644
index 000000000000..c14fdfeb7f33
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,337 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_H
22#define IOATDMA_H
23
24#include <linux/dmaengine.h>
25#include "hw.h"
26#include "registers.h"
27#include <linux/init.h>
28#include <linux/dmapool.h>
29#include <linux/cache.h>
30#include <linux/pci_ids.h>
31#include <net/tcp.h>
32
33#define IOAT_DMA_VERSION "4.00"
34
35#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
36#define IOAT_DMA_DCA_ANY_CPU ~0
37
38#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
39#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
40#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
41#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
42
43#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
44
45/*
46 * workaround for IOAT ver.3.0 null descriptor issue
47 * (channel returns error when size is 0)
48 */
49#define NULL_DESC_BUFFER_SIZE 1
50
51/**
52 * struct ioatdma_device - internal representation of a IOAT device
53 * @pdev: PCI-Express device
54 * @reg_base: MMIO register space base address
55 * @dma_pool: for allocating DMA descriptors
56 * @common: embedded struct dma_device
57 * @version: version of ioatdma device
58 * @msix_entries: irq handlers
59 * @idx: per channel data
60 * @dca: direct cache access context
61 * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
62 * @enumerate_channels: hw version specific channel enumeration
63 * @cleanup_tasklet: select between the v2 and v3 cleanup routines
64 * @timer_fn: select between the v2 and v3 timer watchdog routines
65 * @self_test: hardware version specific self test for each supported op type
66 *
67 * Note: the v3 cleanup routine supports raid operations
68 */
69struct ioatdma_device {
70 struct pci_dev *pdev;
71 void __iomem *reg_base;
72 struct pci_pool *dma_pool;
73 struct pci_pool *completion_pool;
74 struct dma_device common;
75 u8 version;
76 struct msix_entry msix_entries[4];
77 struct ioat_chan_common *idx[4];
78 struct dca_provider *dca;
79 void (*intr_quirk)(struct ioatdma_device *device);
80 int (*enumerate_channels)(struct ioatdma_device *device);
81 void (*cleanup_tasklet)(unsigned long data);
82 void (*timer_fn)(unsigned long data);
83 int (*self_test)(struct ioatdma_device *device);
84};
85
86struct ioat_chan_common {
87 struct dma_chan common;
88 void __iomem *reg_base;
89 unsigned long last_completion;
90 spinlock_t cleanup_lock;
91 dma_cookie_t completed_cookie;
92 unsigned long state;
93 #define IOAT_COMPLETION_PENDING 0
94 #define IOAT_COMPLETION_ACK 1
95 #define IOAT_RESET_PENDING 2
96 #define IOAT_KOBJ_INIT_FAIL 3
97 struct timer_list timer;
98 #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
99 #define IDLE_TIMEOUT msecs_to_jiffies(2000)
100 #define RESET_DELAY msecs_to_jiffies(100)
101 struct ioatdma_device *device;
102 dma_addr_t completion_dma;
103 u64 *completion;
104 struct tasklet_struct cleanup_task;
105 struct kobject kobj;
106};
107
108struct ioat_sysfs_entry {
109 struct attribute attr;
110 ssize_t (*show)(struct dma_chan *, char *);
111};
112
113/**
114 * struct ioat_dma_chan - internal representation of a DMA channel
115 */
116struct ioat_dma_chan {
117 struct ioat_chan_common base;
118
119 size_t xfercap; /* XFERCAP register value expanded out */
120
121 spinlock_t desc_lock;
122 struct list_head free_desc;
123 struct list_head used_desc;
124
125 int pending;
126 u16 desccount;
127 u16 active;
128};
129
130static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
131{
132 return container_of(c, struct ioat_chan_common, common);
133}
134
135static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
136{
137 struct ioat_chan_common *chan = to_chan_common(c);
138
139 return container_of(chan, struct ioat_dma_chan, base);
140}
141
142/**
143 * ioat_is_complete - poll the status of an ioat transaction
144 * @c: channel handle
145 * @cookie: transaction identifier
146 * @done: if set, updated with last completed transaction
147 * @used: if set, updated with last used transaction
148 */
149static inline enum dma_status
150ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
151 dma_cookie_t *done, dma_cookie_t *used)
152{
153 struct ioat_chan_common *chan = to_chan_common(c);
154 dma_cookie_t last_used;
155 dma_cookie_t last_complete;
156
157 last_used = c->cookie;
158 last_complete = chan->completed_cookie;
159
160 if (done)
161 *done = last_complete;
162 if (used)
163 *used = last_used;
164
165 return dma_async_is_complete(cookie, last_complete, last_used);
166}
167
168/* wrapper around hardware descriptor format + additional software fields */
169
170/**
171 * struct ioat_desc_sw - wrapper around hardware descriptor
172 * @hw: hardware DMA descriptor (for memcpy)
173 * @node: this descriptor will either be on the free list,
174 * or attached to a transaction list (tx_list)
175 * @txd: the generic software descriptor for all engines
176 * @id: identifier for debug
177 */
178struct ioat_desc_sw {
179 struct ioat_dma_descriptor *hw;
180 struct list_head node;
181 size_t len;
182 struct list_head tx_list;
183 struct dma_async_tx_descriptor txd;
184 #ifdef DEBUG
185 int id;
186 #endif
187};
188
189#ifdef DEBUG
190#define set_desc_id(desc, i) ((desc)->id = (i))
191#define desc_id(desc) ((desc)->id)
192#else
193#define set_desc_id(desc, i)
194#define desc_id(desc) (0)
195#endif
196
197static inline void
198__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
199 struct dma_async_tx_descriptor *tx, int id)
200{
201 struct device *dev = to_dev(chan);
202
203 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
204 " ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
205 (unsigned long long) tx->phys,
206 (unsigned long long) hw->next, tx->cookie, tx->flags,
207 hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
208}
209
210#define dump_desc_dbg(c, d) \
211 ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
212
213static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
214{
215 #ifdef CONFIG_NET_DMA
216 sysctl_tcp_dma_copybreak = copybreak;
217 #endif
218}
219
220static inline struct ioat_chan_common *
221ioat_chan_by_index(struct ioatdma_device *device, int index)
222{
223 return device->idx[index];
224}
225
226static inline u64 ioat_chansts(struct ioat_chan_common *chan)
227{
228 u8 ver = chan->device->version;
229 u64 status;
230 u32 status_lo;
231
232 /* We need to read the low address first as this causes the
233 * chipset to latch the upper bits for the subsequent read
234 */
235 status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
236 status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
237 status <<= 32;
238 status |= status_lo;
239
240 return status;
241}
242
243static inline void ioat_start(struct ioat_chan_common *chan)
244{
245 u8 ver = chan->device->version;
246
247 writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
248}
249
250static inline u64 ioat_chansts_to_addr(u64 status)
251{
252 return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
253}
254
255static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
256{
257 return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
258}
259
260static inline void ioat_suspend(struct ioat_chan_common *chan)
261{
262 u8 ver = chan->device->version;
263
264 writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
265}
266
267static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
268{
269 struct ioat_chan_common *chan = &ioat->base;
270
271 writel(addr & 0x00000000FFFFFFFF,
272 chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
273 writel(addr >> 32,
274 chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
275}
276
277static inline bool is_ioat_active(unsigned long status)
278{
279 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
280}
281
282static inline bool is_ioat_idle(unsigned long status)
283{
284 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
285}
286
287static inline bool is_ioat_halted(unsigned long status)
288{
289 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
290}
291
292static inline bool is_ioat_suspended(unsigned long status)
293{
294 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
295}
296
297/* channel was fatally programmed */
298static inline bool is_ioat_bug(unsigned long err)
299{
300 return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR|
301 IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR|
302 IOAT_CHANERR_LENGTH_ERR));
303}
304
305static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
306 int direction, enum dma_ctrl_flags flags, bool dst)
307{
308 if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
309 (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
310 pci_unmap_single(pdev, addr, len, direction);
311 else
312 pci_unmap_page(pdev, addr, len, direction);
313}
314
315int __devinit ioat_probe(struct ioatdma_device *device);
316int __devinit ioat_register(struct ioatdma_device *device);
317int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
318int __devinit ioat_dma_self_test(struct ioatdma_device *device);
319void __devexit ioat_dma_remove(struct ioatdma_device *device);
320struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
321 void __iomem *iobase);
322unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
323void ioat_init_channel(struct ioatdma_device *device,
324 struct ioat_chan_common *chan, int idx,
325 void (*timer_fn)(unsigned long),
326 void (*tasklet)(unsigned long),
327 unsigned long ioat);
328void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
329 size_t len, struct ioat_dma_descriptor *hw);
330bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
331 unsigned long *phys_complete);
332void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
333void ioat_kobject_del(struct ioatdma_device *device);
334extern struct sysfs_ops ioat_sysfs_ops;
335extern struct ioat_sysfs_entry ioat_version_attr;
336extern struct ioat_sysfs_entry ioat_cap_attr;
337#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
new file mode 100644
index 000000000000..96ffab7d37a7
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.c
@@ -0,0 +1,871 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
25 * does asynchronous data movement and checksumming operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dmaengine.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/workqueue.h>
36#include <linux/i7300_idle.h>
37#include "dma.h"
38#include "dma_v2.h"
39#include "registers.h"
40#include "hw.h"
41
42int ioat_ring_alloc_order = 8;
43module_param(ioat_ring_alloc_order, int, 0644);
44MODULE_PARM_DESC(ioat_ring_alloc_order,
45 "ioat2+: allocate 2^n descriptors per channel"
46 " (default: 8 max: 16)");
47static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
48module_param(ioat_ring_max_alloc_order, int, 0644);
49MODULE_PARM_DESC(ioat_ring_max_alloc_order,
50 "ioat2+: upper limit for ring size (default: 16)");
51
52void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
53{
54 void * __iomem reg_base = ioat->base.reg_base;
55
56 ioat->pending = 0;
57 ioat->dmacount += ioat2_ring_pending(ioat);
58 ioat->issued = ioat->head;
59 /* make descriptor updates globally visible before notifying channel */
60 wmb();
61 writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
62 dev_dbg(to_dev(&ioat->base),
63 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
64 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
65}
66
67void ioat2_issue_pending(struct dma_chan *chan)
68{
69 struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
70
71 spin_lock_bh(&ioat->ring_lock);
72 if (ioat->pending == 1)
73 __ioat2_issue_pending(ioat);
74 spin_unlock_bh(&ioat->ring_lock);
75}
76
77/**
78 * ioat2_update_pending - log pending descriptors
79 * @ioat: ioat2+ channel
80 *
81 * set pending to '1' unless pending is already set to '2', pending == 2
82 * indicates that submission is temporarily blocked due to an in-flight
83 * reset. If we are already above the ioat_pending_level threshold then
84 * just issue pending.
85 *
86 * called with ring_lock held
87 */
88static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
89{
90 if (unlikely(ioat->pending == 2))
91 return;
92 else if (ioat2_ring_pending(ioat) > ioat_pending_level)
93 __ioat2_issue_pending(ioat);
94 else
95 ioat->pending = 1;
96}
97
98static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
99{
100 struct ioat_ring_ent *desc;
101 struct ioat_dma_descriptor *hw;
102 int idx;
103
104 if (ioat2_ring_space(ioat) < 1) {
105 dev_err(to_dev(&ioat->base),
106 "Unable to start null desc - ring full\n");
107 return;
108 }
109
110 dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
111 __func__, ioat->head, ioat->tail, ioat->issued);
112 idx = ioat2_desc_alloc(ioat, 1);
113 desc = ioat2_get_ring_ent(ioat, idx);
114
115 hw = desc->hw;
116 hw->ctl = 0;
117 hw->ctl_f.null = 1;
118 hw->ctl_f.int_en = 1;
119 hw->ctl_f.compl_write = 1;
120 /* set size to non-zero value (channel returns error when size is 0) */
121 hw->size = NULL_DESC_BUFFER_SIZE;
122 hw->src_addr = 0;
123 hw->dst_addr = 0;
124 async_tx_ack(&desc->txd);
125 ioat2_set_chainaddr(ioat, desc->txd.phys);
126 dump_desc_dbg(ioat, desc);
127 __ioat2_issue_pending(ioat);
128}
129
130static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
131{
132 spin_lock_bh(&ioat->ring_lock);
133 __ioat2_start_null_desc(ioat);
134 spin_unlock_bh(&ioat->ring_lock);
135}
136
137static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
138{
139 struct ioat_chan_common *chan = &ioat->base;
140 struct dma_async_tx_descriptor *tx;
141 struct ioat_ring_ent *desc;
142 bool seen_current = false;
143 u16 active;
144 int i;
145
146 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
147 __func__, ioat->head, ioat->tail, ioat->issued);
148
149 active = ioat2_ring_active(ioat);
150 for (i = 0; i < active && !seen_current; i++) {
151 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
152 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
153 tx = &desc->txd;
154 dump_desc_dbg(ioat, desc);
155 if (tx->cookie) {
156 ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
157 chan->completed_cookie = tx->cookie;
158 tx->cookie = 0;
159 if (tx->callback) {
160 tx->callback(tx->callback_param);
161 tx->callback = NULL;
162 }
163 }
164
165 if (tx->phys == phys_complete)
166 seen_current = true;
167 }
168 ioat->tail += i;
169 BUG_ON(!seen_current); /* no active descs have written a completion? */
170
171 chan->last_completion = phys_complete;
172 if (ioat->head == ioat->tail) {
173 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
174 __func__);
175 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
176 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
177 }
178}
179
180/**
181 * ioat2_cleanup - clean finished descriptors (advance tail pointer)
182 * @chan: ioat channel to be cleaned up
183 */
184static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
185{
186 struct ioat_chan_common *chan = &ioat->base;
187 unsigned long phys_complete;
188
189 prefetch(chan->completion);
190
191 if (!spin_trylock_bh(&chan->cleanup_lock))
192 return;
193
194 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
195 spin_unlock_bh(&chan->cleanup_lock);
196 return;
197 }
198
199 if (!spin_trylock_bh(&ioat->ring_lock)) {
200 spin_unlock_bh(&chan->cleanup_lock);
201 return;
202 }
203
204 __cleanup(ioat, phys_complete);
205
206 spin_unlock_bh(&ioat->ring_lock);
207 spin_unlock_bh(&chan->cleanup_lock);
208}
209
210void ioat2_cleanup_tasklet(unsigned long data)
211{
212 struct ioat2_dma_chan *ioat = (void *) data;
213
214 ioat2_cleanup(ioat);
215 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
216}
217
218void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
219{
220 struct ioat_chan_common *chan = &ioat->base;
221
222 /* set the tail to be re-issued */
223 ioat->issued = ioat->tail;
224 ioat->dmacount = 0;
225 set_bit(IOAT_COMPLETION_PENDING, &chan->state);
226 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
227
228 dev_dbg(to_dev(chan),
229 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
230 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
231
232 if (ioat2_ring_pending(ioat)) {
233 struct ioat_ring_ent *desc;
234
235 desc = ioat2_get_ring_ent(ioat, ioat->tail);
236 ioat2_set_chainaddr(ioat, desc->txd.phys);
237 __ioat2_issue_pending(ioat);
238 } else
239 __ioat2_start_null_desc(ioat);
240}
241
242static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
243{
244 struct ioat_chan_common *chan = &ioat->base;
245 unsigned long phys_complete;
246 u32 status;
247
248 status = ioat_chansts(chan);
249 if (is_ioat_active(status) || is_ioat_idle(status))
250 ioat_suspend(chan);
251 while (is_ioat_active(status) || is_ioat_idle(status)) {
252 status = ioat_chansts(chan);
253 cpu_relax();
254 }
255
256 if (ioat_cleanup_preamble(chan, &phys_complete))
257 __cleanup(ioat, phys_complete);
258
259 __ioat2_restart_chan(ioat);
260}
261
262void ioat2_timer_event(unsigned long data)
263{
264 struct ioat2_dma_chan *ioat = (void *) data;
265 struct ioat_chan_common *chan = &ioat->base;
266
267 spin_lock_bh(&chan->cleanup_lock);
268 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
269 unsigned long phys_complete;
270 u64 status;
271
272 spin_lock_bh(&ioat->ring_lock);
273 status = ioat_chansts(chan);
274
275 /* when halted due to errors check for channel
276 * programming errors before advancing the completion state
277 */
278 if (is_ioat_halted(status)) {
279 u32 chanerr;
280
281 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
282 BUG_ON(is_ioat_bug(chanerr));
283 }
284
285 /* if we haven't made progress and we have already
286 * acknowledged a pending completion once, then be more
287 * forceful with a restart
288 */
289 if (ioat_cleanup_preamble(chan, &phys_complete))
290 __cleanup(ioat, phys_complete);
291 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
292 ioat2_restart_channel(ioat);
293 else {
294 set_bit(IOAT_COMPLETION_ACK, &chan->state);
295 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
296 }
297 spin_unlock_bh(&ioat->ring_lock);
298 } else {
299 u16 active;
300
301 /* if the ring is idle, empty, and oversized try to step
302 * down the size
303 */
304 spin_lock_bh(&ioat->ring_lock);
305 active = ioat2_ring_active(ioat);
306 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
307 reshape_ring(ioat, ioat->alloc_order-1);
308 spin_unlock_bh(&ioat->ring_lock);
309
310 /* keep shrinking until we get back to our minimum
311 * default size
312 */
313 if (ioat->alloc_order > ioat_get_alloc_order())
314 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
315 }
316 spin_unlock_bh(&chan->cleanup_lock);
317}
318
319/**
320 * ioat2_enumerate_channels - find and initialize the device's channels
321 * @device: the device to be enumerated
322 */
323int ioat2_enumerate_channels(struct ioatdma_device *device)
324{
325 struct ioat2_dma_chan *ioat;
326 struct device *dev = &device->pdev->dev;
327 struct dma_device *dma = &device->common;
328 u8 xfercap_log;
329 int i;
330
331 INIT_LIST_HEAD(&dma->channels);
332 dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
333 dma->chancnt &= 0x1f; /* bits [4:0] valid */
334 if (dma->chancnt > ARRAY_SIZE(device->idx)) {
335 dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
336 dma->chancnt, ARRAY_SIZE(device->idx));
337 dma->chancnt = ARRAY_SIZE(device->idx);
338 }
339 xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
340 xfercap_log &= 0x1f; /* bits [4:0] valid */
341 if (xfercap_log == 0)
342 return 0;
343 dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
344
345 /* FIXME which i/oat version is i7300? */
346#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
347 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
348 dma->chancnt--;
349#endif
350 for (i = 0; i < dma->chancnt; i++) {
351 ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
352 if (!ioat)
353 break;
354
355 ioat_init_channel(device, &ioat->base, i,
356 device->timer_fn,
357 device->cleanup_tasklet,
358 (unsigned long) ioat);
359 ioat->xfercap_log = xfercap_log;
360 spin_lock_init(&ioat->ring_lock);
361 }
362 dma->chancnt = i;
363 return i;
364}
365
366static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
367{
368 struct dma_chan *c = tx->chan;
369 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
370 struct ioat_chan_common *chan = &ioat->base;
371 dma_cookie_t cookie = c->cookie;
372
373 cookie++;
374 if (cookie < 0)
375 cookie = 1;
376 tx->cookie = cookie;
377 c->cookie = cookie;
378 dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
379
380 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
381 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
382 ioat2_update_pending(ioat);
383 spin_unlock_bh(&ioat->ring_lock);
384
385 return cookie;
386}
387
388static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
389{
390 struct ioat_dma_descriptor *hw;
391 struct ioat_ring_ent *desc;
392 struct ioatdma_device *dma;
393 dma_addr_t phys;
394
395 dma = to_ioatdma_device(chan->device);
396 hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
397 if (!hw)
398 return NULL;
399 memset(hw, 0, sizeof(*hw));
400
401 desc = kmem_cache_alloc(ioat2_cache, flags);
402 if (!desc) {
403 pci_pool_free(dma->dma_pool, hw, phys);
404 return NULL;
405 }
406 memset(desc, 0, sizeof(*desc));
407
408 dma_async_tx_descriptor_init(&desc->txd, chan);
409 desc->txd.tx_submit = ioat2_tx_submit_unlock;
410 desc->hw = hw;
411 desc->txd.phys = phys;
412 return desc;
413}
414
415static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
416{
417 struct ioatdma_device *dma;
418
419 dma = to_ioatdma_device(chan->device);
420 pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
421 kmem_cache_free(ioat2_cache, desc);
422}
423
424static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
425{
426 struct ioat_ring_ent **ring;
427 int descs = 1 << order;
428 int i;
429
430 if (order > ioat_get_max_alloc_order())
431 return NULL;
432
433 /* allocate the array to hold the software ring */
434 ring = kcalloc(descs, sizeof(*ring), flags);
435 if (!ring)
436 return NULL;
437 for (i = 0; i < descs; i++) {
438 ring[i] = ioat2_alloc_ring_ent(c, flags);
439 if (!ring[i]) {
440 while (i--)
441 ioat2_free_ring_ent(ring[i], c);
442 kfree(ring);
443 return NULL;
444 }
445 set_desc_id(ring[i], i);
446 }
447
448 /* link descs */
449 for (i = 0; i < descs-1; i++) {
450 struct ioat_ring_ent *next = ring[i+1];
451 struct ioat_dma_descriptor *hw = ring[i]->hw;
452
453 hw->next = next->txd.phys;
454 }
455 ring[i]->hw->next = ring[0]->txd.phys;
456
457 return ring;
458}
459
460/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
461 * @chan: channel to be initialized
462 */
463int ioat2_alloc_chan_resources(struct dma_chan *c)
464{
465 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
466 struct ioat_chan_common *chan = &ioat->base;
467 struct ioat_ring_ent **ring;
468 u32 chanerr;
469 int order;
470
471 /* have we already been set up? */
472 if (ioat->ring)
473 return 1 << ioat->alloc_order;
474
475 /* Setup register to interrupt and write completion status on error */
476 writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
477
478 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
479 if (chanerr) {
480 dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
481 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
482 }
483
484 /* allocate a completion writeback area */
485 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
486 chan->completion = pci_pool_alloc(chan->device->completion_pool,
487 GFP_KERNEL, &chan->completion_dma);
488 if (!chan->completion)
489 return -ENOMEM;
490
491 memset(chan->completion, 0, sizeof(*chan->completion));
492 writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
493 chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
494 writel(((u64) chan->completion_dma) >> 32,
495 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
496
497 order = ioat_get_alloc_order();
498 ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
499 if (!ring)
500 return -ENOMEM;
501
502 spin_lock_bh(&ioat->ring_lock);
503 ioat->ring = ring;
504 ioat->head = 0;
505 ioat->issued = 0;
506 ioat->tail = 0;
507 ioat->pending = 0;
508 ioat->alloc_order = order;
509 spin_unlock_bh(&ioat->ring_lock);
510
511 tasklet_enable(&chan->cleanup_task);
512 ioat2_start_null_desc(ioat);
513
514 return 1 << ioat->alloc_order;
515}
516
517bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
518{
519 /* reshape differs from normal ring allocation in that we want
520 * to allocate a new software ring while only
521 * extending/truncating the hardware ring
522 */
523 struct ioat_chan_common *chan = &ioat->base;
524 struct dma_chan *c = &chan->common;
525 const u16 curr_size = ioat2_ring_mask(ioat) + 1;
526 const u16 active = ioat2_ring_active(ioat);
527 const u16 new_size = 1 << order;
528 struct ioat_ring_ent **ring;
529 u16 i;
530
531 if (order > ioat_get_max_alloc_order())
532 return false;
533
534 /* double check that we have at least 1 free descriptor */
535 if (active == curr_size)
536 return false;
537
538 /* when shrinking, verify that we can hold the current active
539 * set in the new ring
540 */
541 if (active >= new_size)
542 return false;
543
544 /* allocate the array to hold the software ring */
545 ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
546 if (!ring)
547 return false;
548
549 /* allocate/trim descriptors as needed */
550 if (new_size > curr_size) {
551 /* copy current descriptors to the new ring */
552 for (i = 0; i < curr_size; i++) {
553 u16 curr_idx = (ioat->tail+i) & (curr_size-1);
554 u16 new_idx = (ioat->tail+i) & (new_size-1);
555
556 ring[new_idx] = ioat->ring[curr_idx];
557 set_desc_id(ring[new_idx], new_idx);
558 }
559
560 /* add new descriptors to the ring */
561 for (i = curr_size; i < new_size; i++) {
562 u16 new_idx = (ioat->tail+i) & (new_size-1);
563
564 ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
565 if (!ring[new_idx]) {
566 while (i--) {
567 u16 new_idx = (ioat->tail+i) & (new_size-1);
568
569 ioat2_free_ring_ent(ring[new_idx], c);
570 }
571 kfree(ring);
572 return false;
573 }
574 set_desc_id(ring[new_idx], new_idx);
575 }
576
577 /* hw link new descriptors */
578 for (i = curr_size-1; i < new_size; i++) {
579 u16 new_idx = (ioat->tail+i) & (new_size-1);
580 struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
581 struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
582
583 hw->next = next->txd.phys;
584 }
585 } else {
586 struct ioat_dma_descriptor *hw;
587 struct ioat_ring_ent *next;
588
589 /* copy current descriptors to the new ring, dropping the
590 * removed descriptors
591 */
592 for (i = 0; i < new_size; i++) {
593 u16 curr_idx = (ioat->tail+i) & (curr_size-1);
594 u16 new_idx = (ioat->tail+i) & (new_size-1);
595
596 ring[new_idx] = ioat->ring[curr_idx];
597 set_desc_id(ring[new_idx], new_idx);
598 }
599
600 /* free deleted descriptors */
601 for (i = new_size; i < curr_size; i++) {
602 struct ioat_ring_ent *ent;
603
604 ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
605 ioat2_free_ring_ent(ent, c);
606 }
607
608 /* fix up hardware ring */
609 hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
610 next = ring[(ioat->tail+new_size) & (new_size-1)];
611 hw->next = next->txd.phys;
612 }
613
614 dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
615 __func__, new_size);
616
617 kfree(ioat->ring);
618 ioat->ring = ring;
619 ioat->alloc_order = order;
620
621 return true;
622}
623
624/**
625 * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
626 * @idx: gets starting descriptor index on successful allocation
627 * @ioat: ioat2,3 channel (ring) to operate on
628 * @num_descs: allocation length
629 */
630int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
631{
632 struct ioat_chan_common *chan = &ioat->base;
633
634 spin_lock_bh(&ioat->ring_lock);
635 /* never allow the last descriptor to be consumed, we need at
636 * least one free at all times to allow for on-the-fly ring
637 * resizing.
638 */
639 while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
640 if (reshape_ring(ioat, ioat->alloc_order + 1) &&
641 ioat2_ring_space(ioat) > num_descs)
642 break;
643
644 if (printk_ratelimit())
645 dev_dbg(to_dev(chan),
646 "%s: ring full! num_descs: %d (%x:%x:%x)\n",
647 __func__, num_descs, ioat->head, ioat->tail,
648 ioat->issued);
649 spin_unlock_bh(&ioat->ring_lock);
650
651 /* progress reclaim in the allocation failure case we
652 * may be called under bh_disabled so we need to trigger
653 * the timer event directly
654 */
655 spin_lock_bh(&chan->cleanup_lock);
656 if (jiffies > chan->timer.expires &&
657 timer_pending(&chan->timer)) {
658 struct ioatdma_device *device = chan->device;
659
660 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
661 spin_unlock_bh(&chan->cleanup_lock);
662 device->timer_fn((unsigned long) ioat);
663 } else
664 spin_unlock_bh(&chan->cleanup_lock);
665 return -ENOMEM;
666 }
667
668 dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
669 __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
670
671 *idx = ioat2_desc_alloc(ioat, num_descs);
672 return 0; /* with ioat->ring_lock held */
673}
674
675struct dma_async_tx_descriptor *
676ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
677 dma_addr_t dma_src, size_t len, unsigned long flags)
678{
679 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
680 struct ioat_dma_descriptor *hw;
681 struct ioat_ring_ent *desc;
682 dma_addr_t dst = dma_dest;
683 dma_addr_t src = dma_src;
684 size_t total_len = len;
685 int num_descs;
686 u16 idx;
687 int i;
688
689 num_descs = ioat2_xferlen_to_descs(ioat, len);
690 if (likely(num_descs) &&
691 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
692 /* pass */;
693 else
694 return NULL;
695 i = 0;
696 do {
697 size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
698
699 desc = ioat2_get_ring_ent(ioat, idx + i);
700 hw = desc->hw;
701
702 hw->size = copy;
703 hw->ctl = 0;
704 hw->src_addr = src;
705 hw->dst_addr = dst;
706
707 len -= copy;
708 dst += copy;
709 src += copy;
710 dump_desc_dbg(ioat, desc);
711 } while (++i < num_descs);
712
713 desc->txd.flags = flags;
714 desc->len = total_len;
715 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
716 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
717 hw->ctl_f.compl_write = 1;
718 dump_desc_dbg(ioat, desc);
719 /* we leave the channel locked to ensure in order submission */
720
721 return &desc->txd;
722}
723
724/**
725 * ioat2_free_chan_resources - release all the descriptors
726 * @chan: the channel to be cleaned
727 */
728void ioat2_free_chan_resources(struct dma_chan *c)
729{
730 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
731 struct ioat_chan_common *chan = &ioat->base;
732 struct ioatdma_device *device = chan->device;
733 struct ioat_ring_ent *desc;
734 const u16 total_descs = 1 << ioat->alloc_order;
735 int descs;
736 int i;
737
738 /* Before freeing channel resources first check
739 * if they have been previously allocated for this channel.
740 */
741 if (!ioat->ring)
742 return;
743
744 tasklet_disable(&chan->cleanup_task);
745 del_timer_sync(&chan->timer);
746 device->cleanup_tasklet((unsigned long) ioat);
747
748 /* Delay 100ms after reset to allow internal DMA logic to quiesce
749 * before removing DMA descriptor resources.
750 */
751 writeb(IOAT_CHANCMD_RESET,
752 chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
753 mdelay(100);
754
755 spin_lock_bh(&ioat->ring_lock);
756 descs = ioat2_ring_space(ioat);
757 dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
758 for (i = 0; i < descs; i++) {
759 desc = ioat2_get_ring_ent(ioat, ioat->head + i);
760 ioat2_free_ring_ent(desc, c);
761 }
762
763 if (descs < total_descs)
764 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
765 total_descs - descs);
766
767 for (i = 0; i < total_descs - descs; i++) {
768 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
769 dump_desc_dbg(ioat, desc);
770 ioat2_free_ring_ent(desc, c);
771 }
772
773 kfree(ioat->ring);
774 ioat->ring = NULL;
775 ioat->alloc_order = 0;
776 pci_pool_free(device->completion_pool, chan->completion,
777 chan->completion_dma);
778 spin_unlock_bh(&ioat->ring_lock);
779
780 chan->last_completion = 0;
781 chan->completion_dma = 0;
782 ioat->pending = 0;
783 ioat->dmacount = 0;
784}
785
786enum dma_status
787ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
788 dma_cookie_t *done, dma_cookie_t *used)
789{
790 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
791 struct ioatdma_device *device = ioat->base.device;
792
793 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
794 return DMA_SUCCESS;
795
796 device->cleanup_tasklet((unsigned long) ioat);
797
798 return ioat_is_complete(c, cookie, done, used);
799}
800
801static ssize_t ring_size_show(struct dma_chan *c, char *page)
802{
803 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
804
805 return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
806}
807static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
808
809static ssize_t ring_active_show(struct dma_chan *c, char *page)
810{
811 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
812
813 /* ...taken outside the lock, no need to be precise */
814 return sprintf(page, "%d\n", ioat2_ring_active(ioat));
815}
816static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
817
818static struct attribute *ioat2_attrs[] = {
819 &ring_size_attr.attr,
820 &ring_active_attr.attr,
821 &ioat_cap_attr.attr,
822 &ioat_version_attr.attr,
823 NULL,
824};
825
826struct kobj_type ioat2_ktype = {
827 .sysfs_ops = &ioat_sysfs_ops,
828 .default_attrs = ioat2_attrs,
829};
830
831int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
832{
833 struct pci_dev *pdev = device->pdev;
834 struct dma_device *dma;
835 struct dma_chan *c;
836 struct ioat_chan_common *chan;
837 int err;
838
839 device->enumerate_channels = ioat2_enumerate_channels;
840 device->cleanup_tasklet = ioat2_cleanup_tasklet;
841 device->timer_fn = ioat2_timer_event;
842 device->self_test = ioat_dma_self_test;
843 dma = &device->common;
844 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
845 dma->device_issue_pending = ioat2_issue_pending;
846 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
847 dma->device_free_chan_resources = ioat2_free_chan_resources;
848 dma->device_is_tx_complete = ioat2_is_complete;
849
850 err = ioat_probe(device);
851 if (err)
852 return err;
853 ioat_set_tcp_copy_break(2048);
854
855 list_for_each_entry(c, &dma->channels, device_node) {
856 chan = to_chan_common(c);
857 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
858 chan->reg_base + IOAT_DCACTRL_OFFSET);
859 }
860
861 err = ioat_register(device);
862 if (err)
863 return err;
864
865 ioat_kobject_add(device, &ioat2_ktype);
866
867 if (dca)
868 device->dca = ioat2_dca_init(pdev, device->reg_base);
869
870 return err;
871}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
new file mode 100644
index 000000000000..1d849ef74d5f
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.h
@@ -0,0 +1,190 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_V2_H
22#define IOATDMA_V2_H
23
24#include <linux/dmaengine.h>
25#include "dma.h"
26#include "hw.h"
27
28
29extern int ioat_pending_level;
30extern int ioat_ring_alloc_order;
31
32/*
33 * workaround for IOAT ver.3.0 null descriptor issue
34 * (channel returns error when size is 0)
35 */
36#define NULL_DESC_BUFFER_SIZE 1
37
38#define IOAT_MAX_ORDER 16
39#define ioat_get_alloc_order() \
40 (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
41#define ioat_get_max_alloc_order() \
42 (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
43
44/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
45 * @base: common ioat channel parameters
46 * @xfercap_log; log2 of channel max transfer length (for fast division)
47 * @head: allocated index
48 * @issued: hardware notification point
49 * @tail: cleanup index
50 * @pending: lock free indicator for issued != head
51 * @dmacount: identical to 'head' except for occasionally resetting to zero
52 * @alloc_order: log2 of the number of allocated descriptors
53 * @ring: software ring buffer implementation of hardware ring
54 * @ring_lock: protects ring attributes
55 */
56struct ioat2_dma_chan {
57 struct ioat_chan_common base;
58 size_t xfercap_log;
59 u16 head;
60 u16 issued;
61 u16 tail;
62 u16 dmacount;
63 u16 alloc_order;
64 int pending;
65 struct ioat_ring_ent **ring;
66 spinlock_t ring_lock;
67};
68
69static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
70{
71 struct ioat_chan_common *chan = to_chan_common(c);
72
73 return container_of(chan, struct ioat2_dma_chan, base);
74}
75
76static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
77{
78 return (1 << ioat->alloc_order) - 1;
79}
80
81/* count of descriptors in flight with the engine */
82static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
83{
84 return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
85}
86
87/* count of descriptors pending submission to hardware */
88static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
89{
90 return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
91}
92
93static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
94{
95 u16 num_descs = ioat2_ring_mask(ioat) + 1;
96 u16 active = ioat2_ring_active(ioat);
97
98 BUG_ON(active > num_descs);
99
100 return num_descs - active;
101}
102
103/* assumes caller already checked space */
104static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
105{
106 ioat->head += len;
107 return ioat->head - len;
108}
109
110static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
111{
112 u16 num_descs = len >> ioat->xfercap_log;
113
114 num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
115 return num_descs;
116}
117
118/**
119 * struct ioat_ring_ent - wrapper around hardware descriptor
120 * @hw: hardware DMA descriptor (for memcpy)
121 * @fill: hardware fill descriptor
122 * @xor: hardware xor descriptor
123 * @xor_ex: hardware xor extension descriptor
124 * @pq: hardware pq descriptor
125 * @pq_ex: hardware pq extension descriptor
126 * @pqu: hardware pq update descriptor
127 * @raw: hardware raw (un-typed) descriptor
128 * @txd: the generic software descriptor for all engines
129 * @len: total transaction length for unmap
130 * @result: asynchronous result of validate operations
131 * @id: identifier for debug
132 */
133
134struct ioat_ring_ent {
135 union {
136 struct ioat_dma_descriptor *hw;
137 struct ioat_fill_descriptor *fill;
138 struct ioat_xor_descriptor *xor;
139 struct ioat_xor_ext_descriptor *xor_ex;
140 struct ioat_pq_descriptor *pq;
141 struct ioat_pq_ext_descriptor *pq_ex;
142 struct ioat_pq_update_descriptor *pqu;
143 struct ioat_raw_descriptor *raw;
144 };
145 size_t len;
146 struct dma_async_tx_descriptor txd;
147 enum sum_check_flags *result;
148 #ifdef DEBUG
149 int id;
150 #endif
151};
152
153static inline struct ioat_ring_ent *
154ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
155{
156 return ioat->ring[idx & ioat2_ring_mask(ioat)];
157}
158
159static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
160{
161 struct ioat_chan_common *chan = &ioat->base;
162
163 writel(addr & 0x00000000FFFFFFFF,
164 chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
165 writel(addr >> 32,
166 chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
167}
168
169int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
170int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
171struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
172struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
173int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
174int ioat2_enumerate_channels(struct ioatdma_device *device);
175struct dma_async_tx_descriptor *
176ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
177 dma_addr_t dma_src, size_t len, unsigned long flags);
178void ioat2_issue_pending(struct dma_chan *chan);
179int ioat2_alloc_chan_resources(struct dma_chan *c);
180void ioat2_free_chan_resources(struct dma_chan *c);
181enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
182 dma_cookie_t *done, dma_cookie_t *used);
183void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
184bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
185void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
186void ioat2_cleanup_tasklet(unsigned long data);
187void ioat2_timer_event(unsigned long data);
188extern struct kobj_type ioat2_ktype;
189extern struct kmem_cache *ioat2_cache;
190#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
new file mode 100644
index 000000000000..35d1e33afd5b
--- /dev/null
+++ b/drivers/dma/ioat/dma_v3.c
@@ -0,0 +1,1223 @@
1/*
2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
4 *
5 * GPL LICENSE SUMMARY
6 *
7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * The full GNU General Public License is included in this distribution in
23 * the file called "COPYING".
24 *
25 * BSD LICENSE
26 *
27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
52 * POSSIBILITY OF SUCH DAMAGE.
53 */
54
55/*
56 * Support routines for v3+ hardware
57 */
58
59#include <linux/pci.h>
60#include <linux/dmaengine.h>
61#include <linux/dma-mapping.h>
62#include "registers.h"
63#include "hw.h"
64#include "dma.h"
65#include "dma_v2.h"
66
67/* ioat hardware assumes at least two sources for raid operations */
68#define src_cnt_to_sw(x) ((x) + 2)
69#define src_cnt_to_hw(x) ((x) - 2)
70
71/* provide a lookup table for setting the source address in the base or
72 * extended descriptor of an xor or pq descriptor
73 */
74static const u8 xor_idx_to_desc __read_mostly = 0xd0;
75static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
76static const u8 pq_idx_to_desc __read_mostly = 0xf8;
77static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
78
79static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
80{
81 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
82
83 return raw->field[xor_idx_to_field[idx]];
84}
85
86static void xor_set_src(struct ioat_raw_descriptor *descs[2],
87 dma_addr_t addr, u32 offset, int idx)
88{
89 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
90
91 raw->field[xor_idx_to_field[idx]] = addr + offset;
92}
93
94static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
95{
96 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
97
98 return raw->field[pq_idx_to_field[idx]];
99}
100
101static void pq_set_src(struct ioat_raw_descriptor *descs[2],
102 dma_addr_t addr, u32 offset, u8 coef, int idx)
103{
104 struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
105 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
106
107 raw->field[pq_idx_to_field[idx]] = addr + offset;
108 pq->coef[idx] = coef;
109}
110
111static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
112 struct ioat_ring_ent *desc, int idx)
113{
114 struct ioat_chan_common *chan = &ioat->base;
115 struct pci_dev *pdev = chan->device->pdev;
116 size_t len = desc->len;
117 size_t offset = len - desc->hw->size;
118 struct dma_async_tx_descriptor *tx = &desc->txd;
119 enum dma_ctrl_flags flags = tx->flags;
120
121 switch (desc->hw->ctl_f.op) {
122 case IOAT_OP_COPY:
123 if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
124 ioat_dma_unmap(chan, flags, len, desc->hw);
125 break;
126 case IOAT_OP_FILL: {
127 struct ioat_fill_descriptor *hw = desc->fill;
128
129 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
130 ioat_unmap(pdev, hw->dst_addr - offset, len,
131 PCI_DMA_FROMDEVICE, flags, 1);
132 break;
133 }
134 case IOAT_OP_XOR_VAL:
135 case IOAT_OP_XOR: {
136 struct ioat_xor_descriptor *xor = desc->xor;
137 struct ioat_ring_ent *ext;
138 struct ioat_xor_ext_descriptor *xor_ex = NULL;
139 int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
140 struct ioat_raw_descriptor *descs[2];
141 int i;
142
143 if (src_cnt > 5) {
144 ext = ioat2_get_ring_ent(ioat, idx + 1);
145 xor_ex = ext->xor_ex;
146 }
147
148 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
149 descs[0] = (struct ioat_raw_descriptor *) xor;
150 descs[1] = (struct ioat_raw_descriptor *) xor_ex;
151 for (i = 0; i < src_cnt; i++) {
152 dma_addr_t src = xor_get_src(descs, i);
153
154 ioat_unmap(pdev, src - offset, len,
155 PCI_DMA_TODEVICE, flags, 0);
156 }
157
158 /* dest is a source in xor validate operations */
159 if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
160 ioat_unmap(pdev, xor->dst_addr - offset, len,
161 PCI_DMA_TODEVICE, flags, 1);
162 break;
163 }
164 }
165
166 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
167 ioat_unmap(pdev, xor->dst_addr - offset, len,
168 PCI_DMA_FROMDEVICE, flags, 1);
169 break;
170 }
171 case IOAT_OP_PQ_VAL:
172 case IOAT_OP_PQ: {
173 struct ioat_pq_descriptor *pq = desc->pq;
174 struct ioat_ring_ent *ext;
175 struct ioat_pq_ext_descriptor *pq_ex = NULL;
176 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
177 struct ioat_raw_descriptor *descs[2];
178 int i;
179
180 if (src_cnt > 3) {
181 ext = ioat2_get_ring_ent(ioat, idx + 1);
182 pq_ex = ext->pq_ex;
183 }
184
185 /* in the 'continue' case don't unmap the dests as sources */
186 if (dmaf_p_disabled_continue(flags))
187 src_cnt--;
188 else if (dmaf_continue(flags))
189 src_cnt -= 3;
190
191 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
192 descs[0] = (struct ioat_raw_descriptor *) pq;
193 descs[1] = (struct ioat_raw_descriptor *) pq_ex;
194 for (i = 0; i < src_cnt; i++) {
195 dma_addr_t src = pq_get_src(descs, i);
196
197 ioat_unmap(pdev, src - offset, len,
198 PCI_DMA_TODEVICE, flags, 0);
199 }
200
201 /* the dests are sources in pq validate operations */
202 if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
203 if (!(flags & DMA_PREP_PQ_DISABLE_P))
204 ioat_unmap(pdev, pq->p_addr - offset,
205 len, PCI_DMA_TODEVICE, flags, 0);
206 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
207 ioat_unmap(pdev, pq->q_addr - offset,
208 len, PCI_DMA_TODEVICE, flags, 0);
209 break;
210 }
211 }
212
213 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
214 if (!(flags & DMA_PREP_PQ_DISABLE_P))
215 ioat_unmap(pdev, pq->p_addr - offset, len,
216 PCI_DMA_BIDIRECTIONAL, flags, 1);
217 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
218 ioat_unmap(pdev, pq->q_addr - offset, len,
219 PCI_DMA_BIDIRECTIONAL, flags, 1);
220 }
221 break;
222 }
223 default:
224 dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
225 __func__, desc->hw->ctl_f.op);
226 }
227}
228
229static bool desc_has_ext(struct ioat_ring_ent *desc)
230{
231 struct ioat_dma_descriptor *hw = desc->hw;
232
233 if (hw->ctl_f.op == IOAT_OP_XOR ||
234 hw->ctl_f.op == IOAT_OP_XOR_VAL) {
235 struct ioat_xor_descriptor *xor = desc->xor;
236
237 if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
238 return true;
239 } else if (hw->ctl_f.op == IOAT_OP_PQ ||
240 hw->ctl_f.op == IOAT_OP_PQ_VAL) {
241 struct ioat_pq_descriptor *pq = desc->pq;
242
243 if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
244 return true;
245 }
246
247 return false;
248}
249
250/**
251 * __cleanup - reclaim used descriptors
252 * @ioat: channel (ring) to clean
253 *
254 * The difference from the dma_v2.c __cleanup() is that this routine
255 * handles extended descriptors and dma-unmapping raid operations.
256 */
257static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
258{
259 struct ioat_chan_common *chan = &ioat->base;
260 struct ioat_ring_ent *desc;
261 bool seen_current = false;
262 u16 active;
263 int i;
264
265 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
266 __func__, ioat->head, ioat->tail, ioat->issued);
267
268 active = ioat2_ring_active(ioat);
269 for (i = 0; i < active && !seen_current; i++) {
270 struct dma_async_tx_descriptor *tx;
271
272 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
273 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
274 dump_desc_dbg(ioat, desc);
275 tx = &desc->txd;
276 if (tx->cookie) {
277 chan->completed_cookie = tx->cookie;
278 ioat3_dma_unmap(ioat, desc, ioat->tail + i);
279 tx->cookie = 0;
280 if (tx->callback) {
281 tx->callback(tx->callback_param);
282 tx->callback = NULL;
283 }
284 }
285
286 if (tx->phys == phys_complete)
287 seen_current = true;
288
289 /* skip extended descriptors */
290 if (desc_has_ext(desc)) {
291 BUG_ON(i + 1 >= active);
292 i++;
293 }
294 }
295 ioat->tail += i;
296 BUG_ON(!seen_current); /* no active descs have written a completion? */
297 chan->last_completion = phys_complete;
298 if (ioat->head == ioat->tail) {
299 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
300 __func__);
301 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
302 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
303 }
304}
305
306static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
307{
308 struct ioat_chan_common *chan = &ioat->base;
309 unsigned long phys_complete;
310
311 prefetch(chan->completion);
312
313 if (!spin_trylock_bh(&chan->cleanup_lock))
314 return;
315
316 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
317 spin_unlock_bh(&chan->cleanup_lock);
318 return;
319 }
320
321 if (!spin_trylock_bh(&ioat->ring_lock)) {
322 spin_unlock_bh(&chan->cleanup_lock);
323 return;
324 }
325
326 __cleanup(ioat, phys_complete);
327
328 spin_unlock_bh(&ioat->ring_lock);
329 spin_unlock_bh(&chan->cleanup_lock);
330}
331
332static void ioat3_cleanup_tasklet(unsigned long data)
333{
334 struct ioat2_dma_chan *ioat = (void *) data;
335
336 ioat3_cleanup(ioat);
337 writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
338 ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
339}
340
341static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
342{
343 struct ioat_chan_common *chan = &ioat->base;
344 unsigned long phys_complete;
345 u32 status;
346
347 status = ioat_chansts(chan);
348 if (is_ioat_active(status) || is_ioat_idle(status))
349 ioat_suspend(chan);
350 while (is_ioat_active(status) || is_ioat_idle(status)) {
351 status = ioat_chansts(chan);
352 cpu_relax();
353 }
354
355 if (ioat_cleanup_preamble(chan, &phys_complete))
356 __cleanup(ioat, phys_complete);
357
358 __ioat2_restart_chan(ioat);
359}
360
361static void ioat3_timer_event(unsigned long data)
362{
363 struct ioat2_dma_chan *ioat = (void *) data;
364 struct ioat_chan_common *chan = &ioat->base;
365
366 spin_lock_bh(&chan->cleanup_lock);
367 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
368 unsigned long phys_complete;
369 u64 status;
370
371 spin_lock_bh(&ioat->ring_lock);
372 status = ioat_chansts(chan);
373
374 /* when halted due to errors check for channel
375 * programming errors before advancing the completion state
376 */
377 if (is_ioat_halted(status)) {
378 u32 chanerr;
379
380 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
381 BUG_ON(is_ioat_bug(chanerr));
382 }
383
384 /* if we haven't made progress and we have already
385 * acknowledged a pending completion once, then be more
386 * forceful with a restart
387 */
388 if (ioat_cleanup_preamble(chan, &phys_complete))
389 __cleanup(ioat, phys_complete);
390 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
391 ioat3_restart_channel(ioat);
392 else {
393 set_bit(IOAT_COMPLETION_ACK, &chan->state);
394 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
395 }
396 spin_unlock_bh(&ioat->ring_lock);
397 } else {
398 u16 active;
399
400 /* if the ring is idle, empty, and oversized try to step
401 * down the size
402 */
403 spin_lock_bh(&ioat->ring_lock);
404 active = ioat2_ring_active(ioat);
405 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
406 reshape_ring(ioat, ioat->alloc_order-1);
407 spin_unlock_bh(&ioat->ring_lock);
408
409 /* keep shrinking until we get back to our minimum
410 * default size
411 */
412 if (ioat->alloc_order > ioat_get_alloc_order())
413 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
414 }
415 spin_unlock_bh(&chan->cleanup_lock);
416}
417
418static enum dma_status
419ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
420 dma_cookie_t *done, dma_cookie_t *used)
421{
422 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
423
424 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
425 return DMA_SUCCESS;
426
427 ioat3_cleanup(ioat);
428
429 return ioat_is_complete(c, cookie, done, used);
430}
431
432static struct dma_async_tx_descriptor *
433ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
434 size_t len, unsigned long flags)
435{
436 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
437 struct ioat_ring_ent *desc;
438 size_t total_len = len;
439 struct ioat_fill_descriptor *fill;
440 int num_descs;
441 u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
442 u16 idx;
443 int i;
444
445 num_descs = ioat2_xferlen_to_descs(ioat, len);
446 if (likely(num_descs) &&
447 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
448 /* pass */;
449 else
450 return NULL;
451 i = 0;
452 do {
453 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
454
455 desc = ioat2_get_ring_ent(ioat, idx + i);
456 fill = desc->fill;
457
458 fill->size = xfer_size;
459 fill->src_data = src_data;
460 fill->dst_addr = dest;
461 fill->ctl = 0;
462 fill->ctl_f.op = IOAT_OP_FILL;
463
464 len -= xfer_size;
465 dest += xfer_size;
466 dump_desc_dbg(ioat, desc);
467 } while (++i < num_descs);
468
469 desc->txd.flags = flags;
470 desc->len = total_len;
471 fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
472 fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
473 fill->ctl_f.compl_write = 1;
474 dump_desc_dbg(ioat, desc);
475
476 /* we leave the channel locked to ensure in order submission */
477 return &desc->txd;
478}
479
480static struct dma_async_tx_descriptor *
481__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
482 dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
483 size_t len, unsigned long flags)
484{
485 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
486 struct ioat_ring_ent *compl_desc;
487 struct ioat_ring_ent *desc;
488 struct ioat_ring_ent *ext;
489 size_t total_len = len;
490 struct ioat_xor_descriptor *xor;
491 struct ioat_xor_ext_descriptor *xor_ex = NULL;
492 struct ioat_dma_descriptor *hw;
493 u32 offset = 0;
494 int num_descs;
495 int with_ext;
496 int i;
497 u16 idx;
498 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
499
500 BUG_ON(src_cnt < 2);
501
502 num_descs = ioat2_xferlen_to_descs(ioat, len);
503 /* we need 2x the number of descriptors to cover greater than 5
504 * sources
505 */
506 if (src_cnt > 5) {
507 with_ext = 1;
508 num_descs *= 2;
509 } else
510 with_ext = 0;
511
512 /* completion writes from the raid engine may pass completion
513 * writes from the legacy engine, so we need one extra null
514 * (legacy) descriptor to ensure all completion writes arrive in
515 * order.
516 */
517 if (likely(num_descs) &&
518 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
519 /* pass */;
520 else
521 return NULL;
522 i = 0;
523 do {
524 struct ioat_raw_descriptor *descs[2];
525 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
526 int s;
527
528 desc = ioat2_get_ring_ent(ioat, idx + i);
529 xor = desc->xor;
530
531 /* save a branch by unconditionally retrieving the
532 * extended descriptor xor_set_src() knows to not write
533 * to it in the single descriptor case
534 */
535 ext = ioat2_get_ring_ent(ioat, idx + i + 1);
536 xor_ex = ext->xor_ex;
537
538 descs[0] = (struct ioat_raw_descriptor *) xor;
539 descs[1] = (struct ioat_raw_descriptor *) xor_ex;
540 for (s = 0; s < src_cnt; s++)
541 xor_set_src(descs, src[s], offset, s);
542 xor->size = xfer_size;
543 xor->dst_addr = dest + offset;
544 xor->ctl = 0;
545 xor->ctl_f.op = op;
546 xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
547
548 len -= xfer_size;
549 offset += xfer_size;
550 dump_desc_dbg(ioat, desc);
551 } while ((i += 1 + with_ext) < num_descs);
552
553 /* last xor descriptor carries the unmap parameters and fence bit */
554 desc->txd.flags = flags;
555 desc->len = total_len;
556 if (result)
557 desc->result = result;
558 xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
559
560 /* completion descriptor carries interrupt bit */
561 compl_desc = ioat2_get_ring_ent(ioat, idx + i);
562 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
563 hw = compl_desc->hw;
564 hw->ctl = 0;
565 hw->ctl_f.null = 1;
566 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
567 hw->ctl_f.compl_write = 1;
568 hw->size = NULL_DESC_BUFFER_SIZE;
569 dump_desc_dbg(ioat, compl_desc);
570
571 /* we leave the channel locked to ensure in order submission */
572 return &desc->txd;
573}
574
575static struct dma_async_tx_descriptor *
576ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
577 unsigned int src_cnt, size_t len, unsigned long flags)
578{
579 return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
580}
581
582struct dma_async_tx_descriptor *
583ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
584 unsigned int src_cnt, size_t len,
585 enum sum_check_flags *result, unsigned long flags)
586{
587 /* the cleanup routine only sets bits on validate failure, it
588 * does not clear bits on validate success... so clear it here
589 */
590 *result = 0;
591
592 return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
593 src_cnt - 1, len, flags);
594}
595
596static void
597dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
598{
599 struct device *dev = to_dev(&ioat->base);
600 struct ioat_pq_descriptor *pq = desc->pq;
601 struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
602 struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
603 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
604 int i;
605
606 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
607 " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
608 desc_id(desc), (unsigned long long) desc->txd.phys,
609 (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
610 desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
611 pq->ctl_f.compl_write,
612 pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
613 pq->ctl_f.src_cnt);
614 for (i = 0; i < src_cnt; i++)
615 dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
616 (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
617 dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
618 dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
619}
620
621static struct dma_async_tx_descriptor *
622__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
623 const dma_addr_t *dst, const dma_addr_t *src,
624 unsigned int src_cnt, const unsigned char *scf,
625 size_t len, unsigned long flags)
626{
627 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
628 struct ioat_chan_common *chan = &ioat->base;
629 struct ioat_ring_ent *compl_desc;
630 struct ioat_ring_ent *desc;
631 struct ioat_ring_ent *ext;
632 size_t total_len = len;
633 struct ioat_pq_descriptor *pq;
634 struct ioat_pq_ext_descriptor *pq_ex = NULL;
635 struct ioat_dma_descriptor *hw;
636 u32 offset = 0;
637 int num_descs;
638 int with_ext;
639 int i, s;
640 u16 idx;
641 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
642
643 dev_dbg(to_dev(chan), "%s\n", __func__);
644 /* the engine requires at least two sources (we provide
645 * at least 1 implied source in the DMA_PREP_CONTINUE case)
646 */
647 BUG_ON(src_cnt + dmaf_continue(flags) < 2);
648
649 num_descs = ioat2_xferlen_to_descs(ioat, len);
650 /* we need 2x the number of descriptors to cover greater than 3
651 * sources
652 */
653 if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
654 with_ext = 1;
655 num_descs *= 2;
656 } else
657 with_ext = 0;
658
659 /* completion writes from the raid engine may pass completion
660 * writes from the legacy engine, so we need one extra null
661 * (legacy) descriptor to ensure all completion writes arrive in
662 * order.
663 */
664 if (likely(num_descs) &&
665 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
666 /* pass */;
667 else
668 return NULL;
669 i = 0;
670 do {
671 struct ioat_raw_descriptor *descs[2];
672 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
673
674 desc = ioat2_get_ring_ent(ioat, idx + i);
675 pq = desc->pq;
676
677 /* save a branch by unconditionally retrieving the
678 * extended descriptor pq_set_src() knows to not write
679 * to it in the single descriptor case
680 */
681 ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
682 pq_ex = ext->pq_ex;
683
684 descs[0] = (struct ioat_raw_descriptor *) pq;
685 descs[1] = (struct ioat_raw_descriptor *) pq_ex;
686
687 for (s = 0; s < src_cnt; s++)
688 pq_set_src(descs, src[s], offset, scf[s], s);
689
690 /* see the comment for dma_maxpq in include/linux/dmaengine.h */
691 if (dmaf_p_disabled_continue(flags))
692 pq_set_src(descs, dst[1], offset, 1, s++);
693 else if (dmaf_continue(flags)) {
694 pq_set_src(descs, dst[0], offset, 0, s++);
695 pq_set_src(descs, dst[1], offset, 1, s++);
696 pq_set_src(descs, dst[1], offset, 0, s++);
697 }
698 pq->size = xfer_size;
699 pq->p_addr = dst[0] + offset;
700 pq->q_addr = dst[1] + offset;
701 pq->ctl = 0;
702 pq->ctl_f.op = op;
703 pq->ctl_f.src_cnt = src_cnt_to_hw(s);
704 pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
705 pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
706
707 len -= xfer_size;
708 offset += xfer_size;
709 } while ((i += 1 + with_ext) < num_descs);
710
711 /* last pq descriptor carries the unmap parameters and fence bit */
712 desc->txd.flags = flags;
713 desc->len = total_len;
714 if (result)
715 desc->result = result;
716 pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
717 dump_pq_desc_dbg(ioat, desc, ext);
718
719 /* completion descriptor carries interrupt bit */
720 compl_desc = ioat2_get_ring_ent(ioat, idx + i);
721 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
722 hw = compl_desc->hw;
723 hw->ctl = 0;
724 hw->ctl_f.null = 1;
725 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
726 hw->ctl_f.compl_write = 1;
727 hw->size = NULL_DESC_BUFFER_SIZE;
728 dump_desc_dbg(ioat, compl_desc);
729
730 /* we leave the channel locked to ensure in order submission */
731 return &desc->txd;
732}
733
734static struct dma_async_tx_descriptor *
735ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
736 unsigned int src_cnt, const unsigned char *scf, size_t len,
737 unsigned long flags)
738{
739 /* handle the single source multiply case from the raid6
740 * recovery path
741 */
742 if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) {
743 dma_addr_t single_source[2];
744 unsigned char single_source_coef[2];
745
746 BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
747 single_source[0] = src[0];
748 single_source[1] = src[0];
749 single_source_coef[0] = scf[0];
750 single_source_coef[1] = 0;
751
752 return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
753 single_source_coef, len, flags);
754 } else
755 return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
756 len, flags);
757}
758
759struct dma_async_tx_descriptor *
760ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
761 unsigned int src_cnt, const unsigned char *scf, size_t len,
762 enum sum_check_flags *pqres, unsigned long flags)
763{
764 /* the cleanup routine only sets bits on validate failure, it
765 * does not clear bits on validate success... so clear it here
766 */
767 *pqres = 0;
768
769 return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
770 flags);
771}
772
773static struct dma_async_tx_descriptor *
774ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
775 unsigned int src_cnt, size_t len, unsigned long flags)
776{
777 unsigned char scf[src_cnt];
778 dma_addr_t pq[2];
779
780 memset(scf, 0, src_cnt);
781 flags |= DMA_PREP_PQ_DISABLE_Q;
782 pq[0] = dst;
783 pq[1] = ~0;
784
785 return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
786 flags);
787}
788
789struct dma_async_tx_descriptor *
790ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
791 unsigned int src_cnt, size_t len,
792 enum sum_check_flags *result, unsigned long flags)
793{
794 unsigned char scf[src_cnt];
795 dma_addr_t pq[2];
796
797 /* the cleanup routine only sets bits on validate failure, it
798 * does not clear bits on validate success... so clear it here
799 */
800 *result = 0;
801
802 memset(scf, 0, src_cnt);
803 flags |= DMA_PREP_PQ_DISABLE_Q;
804 pq[0] = src[0];
805 pq[1] = ~0;
806
807 return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
808 len, flags);
809}
810
811static struct dma_async_tx_descriptor *
812ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
813{
814 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
815 struct ioat_ring_ent *desc;
816 struct ioat_dma_descriptor *hw;
817 u16 idx;
818
819 if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
820 desc = ioat2_get_ring_ent(ioat, idx);
821 else
822 return NULL;
823
824 hw = desc->hw;
825 hw->ctl = 0;
826 hw->ctl_f.null = 1;
827 hw->ctl_f.int_en = 1;
828 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
829 hw->ctl_f.compl_write = 1;
830 hw->size = NULL_DESC_BUFFER_SIZE;
831 hw->src_addr = 0;
832 hw->dst_addr = 0;
833
834 desc->txd.flags = flags;
835 desc->len = 1;
836
837 dump_desc_dbg(ioat, desc);
838
839 /* we leave the channel locked to ensure in order submission */
840 return &desc->txd;
841}
842
843static void __devinit ioat3_dma_test_callback(void *dma_async_param)
844{
845 struct completion *cmp = dma_async_param;
846
847 complete(cmp);
848}
849
850#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
851static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
852{
853 int i, src_idx;
854 struct page *dest;
855 struct page *xor_srcs[IOAT_NUM_SRC_TEST];
856 struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
857 dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
858 dma_addr_t dma_addr, dest_dma;
859 struct dma_async_tx_descriptor *tx;
860 struct dma_chan *dma_chan;
861 dma_cookie_t cookie;
862 u8 cmp_byte = 0;
863 u32 cmp_word;
864 u32 xor_val_result;
865 int err = 0;
866 struct completion cmp;
867 unsigned long tmo;
868 struct device *dev = &device->pdev->dev;
869 struct dma_device *dma = &device->common;
870
871 dev_dbg(dev, "%s\n", __func__);
872
873 if (!dma_has_cap(DMA_XOR, dma->cap_mask))
874 return 0;
875
876 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
877 xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
878 if (!xor_srcs[src_idx]) {
879 while (src_idx--)
880 __free_page(xor_srcs[src_idx]);
881 return -ENOMEM;
882 }
883 }
884
885 dest = alloc_page(GFP_KERNEL);
886 if (!dest) {
887 while (src_idx--)
888 __free_page(xor_srcs[src_idx]);
889 return -ENOMEM;
890 }
891
892 /* Fill in src buffers */
893 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
894 u8 *ptr = page_address(xor_srcs[src_idx]);
895 for (i = 0; i < PAGE_SIZE; i++)
896 ptr[i] = (1 << src_idx);
897 }
898
899 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
900 cmp_byte ^= (u8) (1 << src_idx);
901
902 cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
903 (cmp_byte << 8) | cmp_byte;
904
905 memset(page_address(dest), 0, PAGE_SIZE);
906
907 dma_chan = container_of(dma->channels.next, struct dma_chan,
908 device_node);
909 if (dma->device_alloc_chan_resources(dma_chan) < 1) {
910 err = -ENODEV;
911 goto out;
912 }
913
914 /* test xor */
915 dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
916 for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
917 dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
918 DMA_TO_DEVICE);
919 tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
920 IOAT_NUM_SRC_TEST, PAGE_SIZE,
921 DMA_PREP_INTERRUPT);
922
923 if (!tx) {
924 dev_err(dev, "Self-test xor prep failed\n");
925 err = -ENODEV;
926 goto free_resources;
927 }
928
929 async_tx_ack(tx);
930 init_completion(&cmp);
931 tx->callback = ioat3_dma_test_callback;
932 tx->callback_param = &cmp;
933 cookie = tx->tx_submit(tx);
934 if (cookie < 0) {
935 dev_err(dev, "Self-test xor setup failed\n");
936 err = -ENODEV;
937 goto free_resources;
938 }
939 dma->device_issue_pending(dma_chan);
940
941 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
942
943 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
944 dev_err(dev, "Self-test xor timed out\n");
945 err = -ENODEV;
946 goto free_resources;
947 }
948
949 dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
950 for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
951 u32 *ptr = page_address(dest);
952 if (ptr[i] != cmp_word) {
953 dev_err(dev, "Self-test xor failed compare\n");
954 err = -ENODEV;
955 goto free_resources;
956 }
957 }
958 dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
959
960 /* skip validate if the capability is not present */
961 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
962 goto free_resources;
963
964 /* validate the sources with the destintation page */
965 for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
966 xor_val_srcs[i] = xor_srcs[i];
967 xor_val_srcs[i] = dest;
968
969 xor_val_result = 1;
970
971 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
972 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
973 DMA_TO_DEVICE);
974 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
975 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
976 &xor_val_result, DMA_PREP_INTERRUPT);
977 if (!tx) {
978 dev_err(dev, "Self-test zero prep failed\n");
979 err = -ENODEV;
980 goto free_resources;
981 }
982
983 async_tx_ack(tx);
984 init_completion(&cmp);
985 tx->callback = ioat3_dma_test_callback;
986 tx->callback_param = &cmp;
987 cookie = tx->tx_submit(tx);
988 if (cookie < 0) {
989 dev_err(dev, "Self-test zero setup failed\n");
990 err = -ENODEV;
991 goto free_resources;
992 }
993 dma->device_issue_pending(dma_chan);
994
995 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
996
997 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
998 dev_err(dev, "Self-test validate timed out\n");
999 err = -ENODEV;
1000 goto free_resources;
1001 }
1002
1003 if (xor_val_result != 0) {
1004 dev_err(dev, "Self-test validate failed compare\n");
1005 err = -ENODEV;
1006 goto free_resources;
1007 }
1008
1009 /* skip memset if the capability is not present */
1010 if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1011 goto free_resources;
1012
1013 /* test memset */
1014 dma_addr = dma_map_page(dev, dest, 0,
1015 PAGE_SIZE, DMA_FROM_DEVICE);
1016 tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1017 DMA_PREP_INTERRUPT);
1018 if (!tx) {
1019 dev_err(dev, "Self-test memset prep failed\n");
1020 err = -ENODEV;
1021 goto free_resources;
1022 }
1023
1024 async_tx_ack(tx);
1025 init_completion(&cmp);
1026 tx->callback = ioat3_dma_test_callback;
1027 tx->callback_param = &cmp;
1028 cookie = tx->tx_submit(tx);
1029 if (cookie < 0) {
1030 dev_err(dev, "Self-test memset setup failed\n");
1031 err = -ENODEV;
1032 goto free_resources;
1033 }
1034 dma->device_issue_pending(dma_chan);
1035
1036 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1037
1038 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1039 dev_err(dev, "Self-test memset timed out\n");
1040 err = -ENODEV;
1041 goto free_resources;
1042 }
1043
1044 for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1045 u32 *ptr = page_address(dest);
1046 if (ptr[i]) {
1047 dev_err(dev, "Self-test memset failed compare\n");
1048 err = -ENODEV;
1049 goto free_resources;
1050 }
1051 }
1052
1053 /* test for non-zero parity sum */
1054 xor_val_result = 0;
1055 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1056 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1057 DMA_TO_DEVICE);
1058 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1059 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1060 &xor_val_result, DMA_PREP_INTERRUPT);
1061 if (!tx) {
1062 dev_err(dev, "Self-test 2nd zero prep failed\n");
1063 err = -ENODEV;
1064 goto free_resources;
1065 }
1066
1067 async_tx_ack(tx);
1068 init_completion(&cmp);
1069 tx->callback = ioat3_dma_test_callback;
1070 tx->callback_param = &cmp;
1071 cookie = tx->tx_submit(tx);
1072 if (cookie < 0) {
1073 dev_err(dev, "Self-test 2nd zero setup failed\n");
1074 err = -ENODEV;
1075 goto free_resources;
1076 }
1077 dma->device_issue_pending(dma_chan);
1078
1079 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1080
1081 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1082 dev_err(dev, "Self-test 2nd validate timed out\n");
1083 err = -ENODEV;
1084 goto free_resources;
1085 }
1086
1087 if (xor_val_result != SUM_CHECK_P_RESULT) {
1088 dev_err(dev, "Self-test validate failed compare\n");
1089 err = -ENODEV;
1090 goto free_resources;
1091 }
1092
1093free_resources:
1094 dma->device_free_chan_resources(dma_chan);
1095out:
1096 src_idx = IOAT_NUM_SRC_TEST;
1097 while (src_idx--)
1098 __free_page(xor_srcs[src_idx]);
1099 __free_page(dest);
1100 return err;
1101}
1102
1103static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
1104{
1105 int rc = ioat_dma_self_test(device);
1106
1107 if (rc)
1108 return rc;
1109
1110 rc = ioat_xor_val_self_test(device);
1111 if (rc)
1112 return rc;
1113
1114 return 0;
1115}
1116
1117int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1118{
1119 struct pci_dev *pdev = device->pdev;
1120 struct dma_device *dma;
1121 struct dma_chan *c;
1122 struct ioat_chan_common *chan;
1123 bool is_raid_device = false;
1124 int err;
1125 u16 dev_id;
1126 u32 cap;
1127
1128 device->enumerate_channels = ioat2_enumerate_channels;
1129 device->self_test = ioat3_dma_self_test;
1130 dma = &device->common;
1131 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1132 dma->device_issue_pending = ioat2_issue_pending;
1133 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1134 dma->device_free_chan_resources = ioat2_free_chan_resources;
1135
1136 dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1137 dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1138
1139 cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1140 if (cap & IOAT_CAP_XOR) {
1141 is_raid_device = true;
1142 dma->max_xor = 8;
1143 dma->xor_align = 2;
1144
1145 dma_cap_set(DMA_XOR, dma->cap_mask);
1146 dma->device_prep_dma_xor = ioat3_prep_xor;
1147
1148 dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1149 dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1150 }
1151 if (cap & IOAT_CAP_PQ) {
1152 is_raid_device = true;
1153 dma_set_maxpq(dma, 8, 0);
1154 dma->pq_align = 2;
1155
1156 dma_cap_set(DMA_PQ, dma->cap_mask);
1157 dma->device_prep_dma_pq = ioat3_prep_pq;
1158
1159 dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1160 dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1161
1162 if (!(cap & IOAT_CAP_XOR)) {
1163 dma->max_xor = 8;
1164 dma->xor_align = 2;
1165
1166 dma_cap_set(DMA_XOR, dma->cap_mask);
1167 dma->device_prep_dma_xor = ioat3_prep_pqxor;
1168
1169 dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1170 dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1171 }
1172 }
1173 if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
1174 dma_cap_set(DMA_MEMSET, dma->cap_mask);
1175 dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1176 }
1177
1178
1179 if (is_raid_device) {
1180 dma->device_is_tx_complete = ioat3_is_complete;
1181 device->cleanup_tasklet = ioat3_cleanup_tasklet;
1182 device->timer_fn = ioat3_timer_event;
1183 } else {
1184 dma->device_is_tx_complete = ioat2_is_complete;
1185 device->cleanup_tasklet = ioat2_cleanup_tasklet;
1186 device->timer_fn = ioat2_timer_event;
1187 }
1188
1189 /* -= IOAT ver.3 workarounds =- */
1190 /* Write CHANERRMSK_INT with 3E07h to mask out the errors
1191 * that can cause stability issues for IOAT ver.3
1192 */
1193 pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
1194
1195 /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1196 * (workaround for spurious config parity error after restart)
1197 */
1198 pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1199 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
1200 pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
1201
1202 err = ioat_probe(device);
1203 if (err)
1204 return err;
1205 ioat_set_tcp_copy_break(262144);
1206
1207 list_for_each_entry(c, &dma->channels, device_node) {
1208 chan = to_chan_common(c);
1209 writel(IOAT_DMA_DCA_ANY_CPU,
1210 chan->reg_base + IOAT_DCACTRL_OFFSET);
1211 }
1212
1213 err = ioat_register(device);
1214 if (err)
1215 return err;
1216
1217 ioat_kobject_add(device, &ioat2_ktype);
1218
1219 if (dca)
1220 device->dca = ioat3_dca_init(pdev, device->reg_base);
1221
1222 return 0;
1223}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644
index 000000000000..99afb12bd409
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,215 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef _IOAT_HW_H_
22#define _IOAT_HW_H_
23
24/* PCI Configuration Space Values */
25#define IOAT_PCI_VID 0x8086
26#define IOAT_MMIO_BAR 0
27
28/* CB device ID's */
29#define IOAT_PCI_DID_5000 0x1A38
30#define IOAT_PCI_DID_CNB 0x360B
31#define IOAT_PCI_DID_SCNB 0x65FF
32#define IOAT_PCI_DID_SNB 0x402F
33
34#define IOAT_PCI_RID 0x00
35#define IOAT_PCI_SVID 0x8086
36#define IOAT_PCI_SID 0x8086
37#define IOAT_VER_1_2 0x12 /* Version 1.2 */
38#define IOAT_VER_2_0 0x20 /* Version 2.0 */
39#define IOAT_VER_3_0 0x30 /* Version 3.0 */
40#define IOAT_VER_3_2 0x32 /* Version 3.2 */
41
42struct ioat_dma_descriptor {
43 uint32_t size;
44 union {
45 uint32_t ctl;
46 struct {
47 unsigned int int_en:1;
48 unsigned int src_snoop_dis:1;
49 unsigned int dest_snoop_dis:1;
50 unsigned int compl_write:1;
51 unsigned int fence:1;
52 unsigned int null:1;
53 unsigned int src_brk:1;
54 unsigned int dest_brk:1;
55 unsigned int bundle:1;
56 unsigned int dest_dca:1;
57 unsigned int hint:1;
58 unsigned int rsvd2:13;
59 #define IOAT_OP_COPY 0x00
60 unsigned int op:8;
61 } ctl_f;
62 };
63 uint64_t src_addr;
64 uint64_t dst_addr;
65 uint64_t next;
66 uint64_t rsv1;
67 uint64_t rsv2;
68 /* store some driver data in an unused portion of the descriptor */
69 union {
70 uint64_t user1;
71 uint64_t tx_cnt;
72 };
73 uint64_t user2;
74};
75
76struct ioat_fill_descriptor {
77 uint32_t size;
78 union {
79 uint32_t ctl;
80 struct {
81 unsigned int int_en:1;
82 unsigned int rsvd:1;
83 unsigned int dest_snoop_dis:1;
84 unsigned int compl_write:1;
85 unsigned int fence:1;
86 unsigned int rsvd2:2;
87 unsigned int dest_brk:1;
88 unsigned int bundle:1;
89 unsigned int rsvd4:15;
90 #define IOAT_OP_FILL 0x01
91 unsigned int op:8;
92 } ctl_f;
93 };
94 uint64_t src_data;
95 uint64_t dst_addr;
96 uint64_t next;
97 uint64_t rsv1;
98 uint64_t next_dst_addr;
99 uint64_t user1;
100 uint64_t user2;
101};
102
103struct ioat_xor_descriptor {
104 uint32_t size;
105 union {
106 uint32_t ctl;
107 struct {
108 unsigned int int_en:1;
109 unsigned int src_snoop_dis:1;
110 unsigned int dest_snoop_dis:1;
111 unsigned int compl_write:1;
112 unsigned int fence:1;
113 unsigned int src_cnt:3;
114 unsigned int bundle:1;
115 unsigned int dest_dca:1;
116 unsigned int hint:1;
117 unsigned int rsvd:13;
118 #define IOAT_OP_XOR 0x87
119 #define IOAT_OP_XOR_VAL 0x88
120 unsigned int op:8;
121 } ctl_f;
122 };
123 uint64_t src_addr;
124 uint64_t dst_addr;
125 uint64_t next;
126 uint64_t src_addr2;
127 uint64_t src_addr3;
128 uint64_t src_addr4;
129 uint64_t src_addr5;
130};
131
132struct ioat_xor_ext_descriptor {
133 uint64_t src_addr6;
134 uint64_t src_addr7;
135 uint64_t src_addr8;
136 uint64_t next;
137 uint64_t rsvd[4];
138};
139
140struct ioat_pq_descriptor {
141 uint32_t size;
142 union {
143 uint32_t ctl;
144 struct {
145 unsigned int int_en:1;
146 unsigned int src_snoop_dis:1;
147 unsigned int dest_snoop_dis:1;
148 unsigned int compl_write:1;
149 unsigned int fence:1;
150 unsigned int src_cnt:3;
151 unsigned int bundle:1;
152 unsigned int dest_dca:1;
153 unsigned int hint:1;
154 unsigned int p_disable:1;
155 unsigned int q_disable:1;
156 unsigned int rsvd:11;
157 #define IOAT_OP_PQ 0x89
158 #define IOAT_OP_PQ_VAL 0x8a
159 unsigned int op:8;
160 } ctl_f;
161 };
162 uint64_t src_addr;
163 uint64_t p_addr;
164 uint64_t next;
165 uint64_t src_addr2;
166 uint64_t src_addr3;
167 uint8_t coef[8];
168 uint64_t q_addr;
169};
170
171struct ioat_pq_ext_descriptor {
172 uint64_t src_addr4;
173 uint64_t src_addr5;
174 uint64_t src_addr6;
175 uint64_t next;
176 uint64_t src_addr7;
177 uint64_t src_addr8;
178 uint64_t rsvd[2];
179};
180
181struct ioat_pq_update_descriptor {
182 uint32_t size;
183 union {
184 uint32_t ctl;
185 struct {
186 unsigned int int_en:1;
187 unsigned int src_snoop_dis:1;
188 unsigned int dest_snoop_dis:1;
189 unsigned int compl_write:1;
190 unsigned int fence:1;
191 unsigned int src_cnt:3;
192 unsigned int bundle:1;
193 unsigned int dest_dca:1;
194 unsigned int hint:1;
195 unsigned int p_disable:1;
196 unsigned int q_disable:1;
197 unsigned int rsvd:3;
198 unsigned int coef:8;
199 #define IOAT_OP_PQ_UP 0x8b
200 unsigned int op:8;
201 } ctl_f;
202 };
203 uint64_t src_addr;
204 uint64_t p_addr;
205 uint64_t next;
206 uint64_t src_addr2;
207 uint64_t p_src;
208 uint64_t q_src;
209 uint64_t q_addr;
210};
211
212struct ioat_raw_descriptor {
213 uint64_t field[8];
214};
215#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
new file mode 100644
index 000000000000..d545fae30f37
--- /dev/null
+++ b/drivers/dma/ioat/pci.c
@@ -0,0 +1,210 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2007 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dca.h>
33#include "dma.h"
34#include "dma_v2.h"
35#include "registers.h"
36#include "hw.h"
37
38MODULE_VERSION(IOAT_DMA_VERSION);
39MODULE_LICENSE("Dual BSD/GPL");
40MODULE_AUTHOR("Intel Corporation");
41
42static struct pci_device_id ioat_pci_tbl[] = {
43 /* I/OAT v1 platforms */
44 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
45 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
46 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
47 { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
48
49 /* I/OAT v2 platforms */
50 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
51
52 /* I/OAT v3 platforms */
53 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
54 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
55 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
56 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
57 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
58 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
59 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
60 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
61
62 /* I/OAT v3.2 platforms */
63 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
64 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
65 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
66 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
67 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
68 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
69 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
70 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
71 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
72 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
73
74 { 0, }
75};
76MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
77
78static int __devinit ioat_pci_probe(struct pci_dev *pdev,
79 const struct pci_device_id *id);
80static void __devexit ioat_remove(struct pci_dev *pdev);
81
82static int ioat_dca_enabled = 1;
83module_param(ioat_dca_enabled, int, 0644);
84MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
85
86struct kmem_cache *ioat2_cache;
87
88#define DRV_NAME "ioatdma"
89
90static struct pci_driver ioat_pci_driver = {
91 .name = DRV_NAME,
92 .id_table = ioat_pci_tbl,
93 .probe = ioat_pci_probe,
94 .remove = __devexit_p(ioat_remove),
95};
96
97static struct ioatdma_device *
98alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
99{
100 struct device *dev = &pdev->dev;
101 struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
102
103 if (!d)
104 return NULL;
105 d->pdev = pdev;
106 d->reg_base = iobase;
107 return d;
108}
109
110static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
111{
112 void __iomem * const *iomap;
113 struct device *dev = &pdev->dev;
114 struct ioatdma_device *device;
115 int err;
116
117 err = pcim_enable_device(pdev);
118 if (err)
119 return err;
120
121 err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
122 if (err)
123 return err;
124 iomap = pcim_iomap_table(pdev);
125 if (!iomap)
126 return -ENOMEM;
127
128 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
129 if (err)
130 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
131 if (err)
132 return err;
133
134 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
135 if (err)
136 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
137 if (err)
138 return err;
139
140 device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
141 if (!device)
142 return -ENOMEM;
143
144 pci_set_master(pdev);
145
146 device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
147 if (!device)
148 return -ENOMEM;
149 pci_set_drvdata(pdev, device);
150
151 device->version = readb(device->reg_base + IOAT_VER_OFFSET);
152 if (device->version == IOAT_VER_1_2)
153 err = ioat1_dma_probe(device, ioat_dca_enabled);
154 else if (device->version == IOAT_VER_2_0)
155 err = ioat2_dma_probe(device, ioat_dca_enabled);
156 else if (device->version >= IOAT_VER_3_0)
157 err = ioat3_dma_probe(device, ioat_dca_enabled);
158 else
159 return -ENODEV;
160
161 if (err) {
162 dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
163 return -ENODEV;
164 }
165
166 return 0;
167}
168
169static void __devexit ioat_remove(struct pci_dev *pdev)
170{
171 struct ioatdma_device *device = pci_get_drvdata(pdev);
172
173 if (!device)
174 return;
175
176 dev_err(&pdev->dev, "Removing dma and dca services\n");
177 if (device->dca) {
178 unregister_dca_provider(device->dca, &pdev->dev);
179 free_dca_provider(device->dca);
180 device->dca = NULL;
181 }
182 ioat_dma_remove(device);
183}
184
185static int __init ioat_init_module(void)
186{
187 int err;
188
189 pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
190 DRV_NAME, IOAT_DMA_VERSION);
191
192 ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
193 0, SLAB_HWCACHE_ALIGN, NULL);
194 if (!ioat2_cache)
195 return -ENOMEM;
196
197 err = pci_register_driver(&ioat_pci_driver);
198 if (err)
199 kmem_cache_destroy(ioat2_cache);
200
201 return err;
202}
203module_init(ioat_init_module);
204
205static void __exit ioat_exit_module(void)
206{
207 pci_unregister_driver(&ioat_pci_driver);
208 kmem_cache_destroy(ioat2_cache);
209}
210module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioat/registers.h
index 49bc277424f8..63038e18ab03 100644
--- a/drivers/dma/ioatdma_registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -64,18 +64,37 @@
64 64
65#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ 65#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
66#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 66#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
67#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
68#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
69#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
70
71#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
72#define IOAT_CAP_PAGE_BREAK 0x00000001
73#define IOAT_CAP_CRC 0x00000002
74#define IOAT_CAP_SKIP_MARKER 0x00000004
75#define IOAT_CAP_DCA 0x00000010
76#define IOAT_CAP_CRC_MOVE 0x00000020
77#define IOAT_CAP_FILL_BLOCK 0x00000040
78#define IOAT_CAP_APIC 0x00000080
79#define IOAT_CAP_XOR 0x00000100
80#define IOAT_CAP_PQ 0x00000200
67 81
68#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ 82#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
69 83
70/* DMA Channel Registers */ 84/* DMA Channel Registers */
71#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ 85#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
72#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 86#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
87#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
73#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 88#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
74#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 89#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
75#define IOAT_CHANCTRL_ERR_INT_EN 0x0010 90#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
76#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 91#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
77#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 92#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
78#define IOAT_CHANCTRL_INT_DISABLE 0x0001 93#define IOAT_CHANCTRL_INT_REARM 0x0001
94#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
95 IOAT_CHANCTRL_ERR_COMPLETION_EN |\
96 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
97 IOAT_CHANCTRL_ERR_INT_EN)
79 98
80#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ 99#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
81#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ 100#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
@@ -94,14 +113,14 @@
94#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C 113#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
95#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ 114#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
96 ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) 115 ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
97#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F 116#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
98#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 117#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
99#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 118#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
100#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 119#define IOAT_CHANSTS_STATUS 0x7ULL
101#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 120#define IOAT_CHANSTS_ACTIVE 0x0
102#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 121#define IOAT_CHANSTS_DONE 0x1
103#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 122#define IOAT_CHANSTS_SUSPENDED 0x2
104#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 123#define IOAT_CHANSTS_HALTED 0x3
105 124
106 125
107 126
@@ -204,22 +223,27 @@
204#define IOAT_CDAR_OFFSET_HIGH 0x24 223#define IOAT_CDAR_OFFSET_HIGH 0x24
205 224
206#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ 225#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
207#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 226#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
208#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 227#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
209#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 228#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
210#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 229#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
211#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 230#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
212#define IOAT_CHANERR_CHANCMD_ERR 0x0020 231#define IOAT_CHANERR_CHANCMD_ERR 0x0020
213#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 232#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
214#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 233#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
215#define IOAT_CHANERR_READ_DATA_ERR 0x0100 234#define IOAT_CHANERR_READ_DATA_ERR 0x0100
216#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 235#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
217#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 236#define IOAT_CHANERR_CONTROL_ERR 0x0400
218#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 237#define IOAT_CHANERR_LENGTH_ERR 0x0800
219#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 238#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
220#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 239#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
221#define IOAT_CHANERR_SOFT_ERR 0x4000 240#define IOAT_CHANERR_SOFT_ERR 0x4000
222#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 241#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
242#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
243#define IOAT_CHANERR_XOR_Q_ERR 0x20000
244#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
245
246#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
223 247
224#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ 248#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
225 249
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
deleted file mode 100644
index a600fc0f7962..000000000000
--- a/drivers/dma/ioat_dma.c
+++ /dev/null
@@ -1,1741 +0,0 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dmaengine.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/workqueue.h>
36#include <linux/i7300_idle.h>
37#include "ioatdma.h"
38#include "ioatdma_registers.h"
39#include "ioatdma_hw.h"
40
41#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
42#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
43#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
44#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
45
46#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
47static int ioat_pending_level = 4;
48module_param(ioat_pending_level, int, 0644);
49MODULE_PARM_DESC(ioat_pending_level,
50 "high-water mark for pushing ioat descriptors (default: 4)");
51
52#define RESET_DELAY msecs_to_jiffies(100)
53#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000))
54static void ioat_dma_chan_reset_part2(struct work_struct *work);
55static void ioat_dma_chan_watchdog(struct work_struct *work);
56
57/*
58 * workaround for IOAT ver.3.0 null descriptor issue
59 * (channel returns error when size is 0)
60 */
61#define NULL_DESC_BUFFER_SIZE 1
62
63/* internal functions */
64static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
65static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
66
67static struct ioat_desc_sw *
68ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
69static struct ioat_desc_sw *
70ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
71
72static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
73 struct ioatdma_device *device,
74 int index)
75{
76 return device->idx[index];
77}
78
79/**
80 * ioat_dma_do_interrupt - handler used for single vector interrupt mode
81 * @irq: interrupt id
82 * @data: interrupt data
83 */
84static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
85{
86 struct ioatdma_device *instance = data;
87 struct ioat_dma_chan *ioat_chan;
88 unsigned long attnstatus;
89 int bit;
90 u8 intrctrl;
91
92 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
93
94 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
95 return IRQ_NONE;
96
97 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
98 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
99 return IRQ_NONE;
100 }
101
102 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
103 for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
104 ioat_chan = ioat_lookup_chan_by_index(instance, bit);
105 tasklet_schedule(&ioat_chan->cleanup_task);
106 }
107
108 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
109 return IRQ_HANDLED;
110}
111
112/**
113 * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
114 * @irq: interrupt id
115 * @data: interrupt data
116 */
117static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
118{
119 struct ioat_dma_chan *ioat_chan = data;
120
121 tasklet_schedule(&ioat_chan->cleanup_task);
122
123 return IRQ_HANDLED;
124}
125
126static void ioat_dma_cleanup_tasklet(unsigned long data);
127
128/**
129 * ioat_dma_enumerate_channels - find and initialize the device's channels
130 * @device: the device to be enumerated
131 */
132static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
133{
134 u8 xfercap_scale;
135 u32 xfercap;
136 int i;
137 struct ioat_dma_chan *ioat_chan;
138
139 /*
140 * IOAT ver.3 workarounds
141 */
142 if (device->version == IOAT_VER_3_0) {
143 u32 chan_err_mask;
144 u16 dev_id;
145 u32 dmauncerrsts;
146
147 /*
148 * Write CHANERRMSK_INT with 3E07h to mask out the errors
149 * that can cause stability issues for IOAT ver.3
150 */
151 chan_err_mask = 0x3E07;
152 pci_write_config_dword(device->pdev,
153 IOAT_PCI_CHANERRMASK_INT_OFFSET,
154 chan_err_mask);
155
156 /*
157 * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
158 * (workaround for spurious config parity error after restart)
159 */
160 pci_read_config_word(device->pdev,
161 IOAT_PCI_DEVICE_ID_OFFSET,
162 &dev_id);
163 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
164 dmauncerrsts = 0x10;
165 pci_write_config_dword(device->pdev,
166 IOAT_PCI_DMAUNCERRSTS_OFFSET,
167 dmauncerrsts);
168 }
169 }
170
171 device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
172 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
173 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
174
175#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
176 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
177 device->common.chancnt--;
178 }
179#endif
180 for (i = 0; i < device->common.chancnt; i++) {
181 ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
182 if (!ioat_chan) {
183 device->common.chancnt = i;
184 break;
185 }
186
187 ioat_chan->device = device;
188 ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
189 ioat_chan->xfercap = xfercap;
190 ioat_chan->desccount = 0;
191 INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
192 if (ioat_chan->device->version == IOAT_VER_2_0)
193 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE |
194 IOAT_DMA_DCA_ANY_CPU,
195 ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
196 else if (ioat_chan->device->version == IOAT_VER_3_0)
197 writel(IOAT_DMA_DCA_ANY_CPU,
198 ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
199 spin_lock_init(&ioat_chan->cleanup_lock);
200 spin_lock_init(&ioat_chan->desc_lock);
201 INIT_LIST_HEAD(&ioat_chan->free_desc);
202 INIT_LIST_HEAD(&ioat_chan->used_desc);
203 /* This should be made common somewhere in dmaengine.c */
204 ioat_chan->common.device = &device->common;
205 list_add_tail(&ioat_chan->common.device_node,
206 &device->common.channels);
207 device->idx[i] = ioat_chan;
208 tasklet_init(&ioat_chan->cleanup_task,
209 ioat_dma_cleanup_tasklet,
210 (unsigned long) ioat_chan);
211 tasklet_disable(&ioat_chan->cleanup_task);
212 }
213 return device->common.chancnt;
214}
215
216/**
217 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
218 * descriptors to hw
219 * @chan: DMA channel handle
220 */
221static inline void __ioat1_dma_memcpy_issue_pending(
222 struct ioat_dma_chan *ioat_chan)
223{
224 ioat_chan->pending = 0;
225 writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
226}
227
228static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
229{
230 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
231
232 if (ioat_chan->pending > 0) {
233 spin_lock_bh(&ioat_chan->desc_lock);
234 __ioat1_dma_memcpy_issue_pending(ioat_chan);
235 spin_unlock_bh(&ioat_chan->desc_lock);
236 }
237}
238
239static inline void __ioat2_dma_memcpy_issue_pending(
240 struct ioat_dma_chan *ioat_chan)
241{
242 ioat_chan->pending = 0;
243 writew(ioat_chan->dmacount,
244 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
245}
246
247static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
248{
249 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
250
251 if (ioat_chan->pending > 0) {
252 spin_lock_bh(&ioat_chan->desc_lock);
253 __ioat2_dma_memcpy_issue_pending(ioat_chan);
254 spin_unlock_bh(&ioat_chan->desc_lock);
255 }
256}
257
258
259/**
260 * ioat_dma_chan_reset_part2 - reinit the channel after a reset
261 */
262static void ioat_dma_chan_reset_part2(struct work_struct *work)
263{
264 struct ioat_dma_chan *ioat_chan =
265 container_of(work, struct ioat_dma_chan, work.work);
266 struct ioat_desc_sw *desc;
267
268 spin_lock_bh(&ioat_chan->cleanup_lock);
269 spin_lock_bh(&ioat_chan->desc_lock);
270
271 ioat_chan->completion_virt->low = 0;
272 ioat_chan->completion_virt->high = 0;
273 ioat_chan->pending = 0;
274
275 /*
276 * count the descriptors waiting, and be sure to do it
277 * right for both the CB1 line and the CB2 ring
278 */
279 ioat_chan->dmacount = 0;
280 if (ioat_chan->used_desc.prev) {
281 desc = to_ioat_desc(ioat_chan->used_desc.prev);
282 do {
283 ioat_chan->dmacount++;
284 desc = to_ioat_desc(desc->node.next);
285 } while (&desc->node != ioat_chan->used_desc.next);
286 }
287
288 /*
289 * write the new starting descriptor address
290 * this puts channel engine into ARMED state
291 */
292 desc = to_ioat_desc(ioat_chan->used_desc.prev);
293 switch (ioat_chan->device->version) {
294 case IOAT_VER_1_2:
295 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
296 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
297 writel(((u64) desc->async_tx.phys) >> 32,
298 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
299
300 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
301 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
302 break;
303 case IOAT_VER_2_0:
304 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
305 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
306 writel(((u64) desc->async_tx.phys) >> 32,
307 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
308
309 /* tell the engine to go with what's left to be done */
310 writew(ioat_chan->dmacount,
311 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
312
313 break;
314 }
315 dev_err(&ioat_chan->device->pdev->dev,
316 "chan%d reset - %d descs waiting, %d total desc\n",
317 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
318
319 spin_unlock_bh(&ioat_chan->desc_lock);
320 spin_unlock_bh(&ioat_chan->cleanup_lock);
321}
322
323/**
324 * ioat_dma_reset_channel - restart a channel
325 * @ioat_chan: IOAT DMA channel handle
326 */
327static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
328{
329 u32 chansts, chanerr;
330
331 if (!ioat_chan->used_desc.prev)
332 return;
333
334 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
335 chansts = (ioat_chan->completion_virt->low
336 & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
337 if (chanerr) {
338 dev_err(&ioat_chan->device->pdev->dev,
339 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
340 chan_num(ioat_chan), chansts, chanerr);
341 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
342 }
343
344 /*
345 * whack it upside the head with a reset
346 * and wait for things to settle out.
347 * force the pending count to a really big negative
348 * to make sure no one forces an issue_pending
349 * while we're waiting.
350 */
351
352 spin_lock_bh(&ioat_chan->desc_lock);
353 ioat_chan->pending = INT_MIN;
354 writeb(IOAT_CHANCMD_RESET,
355 ioat_chan->reg_base
356 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
357 spin_unlock_bh(&ioat_chan->desc_lock);
358
359 /* schedule the 2nd half instead of sleeping a long time */
360 schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
361}
362
363/**
364 * ioat_dma_chan_watchdog - watch for stuck channels
365 */
366static void ioat_dma_chan_watchdog(struct work_struct *work)
367{
368 struct ioatdma_device *device =
369 container_of(work, struct ioatdma_device, work.work);
370 struct ioat_dma_chan *ioat_chan;
371 int i;
372
373 union {
374 u64 full;
375 struct {
376 u32 low;
377 u32 high;
378 };
379 } completion_hw;
380 unsigned long compl_desc_addr_hw;
381
382 for (i = 0; i < device->common.chancnt; i++) {
383 ioat_chan = ioat_lookup_chan_by_index(device, i);
384
385 if (ioat_chan->device->version == IOAT_VER_1_2
386 /* have we started processing anything yet */
387 && ioat_chan->last_completion
388 /* have we completed any since last watchdog cycle? */
389 && (ioat_chan->last_completion ==
390 ioat_chan->watchdog_completion)
391 /* has TCP stuck on one cookie since last watchdog? */
392 && (ioat_chan->watchdog_tcp_cookie ==
393 ioat_chan->watchdog_last_tcp_cookie)
394 && (ioat_chan->watchdog_tcp_cookie !=
395 ioat_chan->completed_cookie)
396 /* is there something in the chain to be processed? */
397 /* CB1 chain always has at least the last one processed */
398 && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
399 && ioat_chan->pending == 0) {
400
401 /*
402 * check CHANSTS register for completed
403 * descriptor address.
404 * if it is different than completion writeback,
405 * it is not zero
406 * and it has changed since the last watchdog
407 * we can assume that channel
408 * is still working correctly
409 * and the problem is in completion writeback.
410 * update completion writeback
411 * with actual CHANSTS value
412 * else
413 * try resetting the channel
414 */
415
416 completion_hw.low = readl(ioat_chan->reg_base +
417 IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
418 completion_hw.high = readl(ioat_chan->reg_base +
419 IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
420#if (BITS_PER_LONG == 64)
421 compl_desc_addr_hw =
422 completion_hw.full
423 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
424#else
425 compl_desc_addr_hw =
426 completion_hw.low & IOAT_LOW_COMPLETION_MASK;
427#endif
428
429 if ((compl_desc_addr_hw != 0)
430 && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
431 && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
432 ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
433 ioat_chan->completion_virt->low = completion_hw.low;
434 ioat_chan->completion_virt->high = completion_hw.high;
435 } else {
436 ioat_dma_reset_channel(ioat_chan);
437 ioat_chan->watchdog_completion = 0;
438 ioat_chan->last_compl_desc_addr_hw = 0;
439 }
440
441 /*
442 * for version 2.0 if there are descriptors yet to be processed
443 * and the last completed hasn't changed since the last watchdog
444 * if they haven't hit the pending level
445 * issue the pending to push them through
446 * else
447 * try resetting the channel
448 */
449 } else if (ioat_chan->device->version == IOAT_VER_2_0
450 && ioat_chan->used_desc.prev
451 && ioat_chan->last_completion
452 && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
453
454 if (ioat_chan->pending < ioat_pending_level)
455 ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
456 else {
457 ioat_dma_reset_channel(ioat_chan);
458 ioat_chan->watchdog_completion = 0;
459 }
460 } else {
461 ioat_chan->last_compl_desc_addr_hw = 0;
462 ioat_chan->watchdog_completion
463 = ioat_chan->last_completion;
464 }
465
466 ioat_chan->watchdog_last_tcp_cookie =
467 ioat_chan->watchdog_tcp_cookie;
468 }
469
470 schedule_delayed_work(&device->work, WATCHDOG_DELAY);
471}
472
473static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
474{
475 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
476 struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
477 struct ioat_desc_sw *prev, *new;
478 struct ioat_dma_descriptor *hw;
479 dma_cookie_t cookie;
480 LIST_HEAD(new_chain);
481 u32 copy;
482 size_t len;
483 dma_addr_t src, dst;
484 unsigned long orig_flags;
485 unsigned int desc_count = 0;
486
487 /* src and dest and len are stored in the initial descriptor */
488 len = first->len;
489 src = first->src;
490 dst = first->dst;
491 orig_flags = first->async_tx.flags;
492 new = first;
493
494 spin_lock_bh(&ioat_chan->desc_lock);
495 prev = to_ioat_desc(ioat_chan->used_desc.prev);
496 prefetch(prev->hw);
497 do {
498 copy = min_t(size_t, len, ioat_chan->xfercap);
499
500 async_tx_ack(&new->async_tx);
501
502 hw = new->hw;
503 hw->size = copy;
504 hw->ctl = 0;
505 hw->src_addr = src;
506 hw->dst_addr = dst;
507 hw->next = 0;
508
509 /* chain together the physical address list for the HW */
510 wmb();
511 prev->hw->next = (u64) new->async_tx.phys;
512
513 len -= copy;
514 dst += copy;
515 src += copy;
516
517 list_add_tail(&new->node, &new_chain);
518 desc_count++;
519 prev = new;
520 } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
521
522 if (!new) {
523 dev_err(&ioat_chan->device->pdev->dev,
524 "tx submit failed\n");
525 spin_unlock_bh(&ioat_chan->desc_lock);
526 return -ENOMEM;
527 }
528
529 hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
530 if (first->async_tx.callback) {
531 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
532 if (first != new) {
533 /* move callback into to last desc */
534 new->async_tx.callback = first->async_tx.callback;
535 new->async_tx.callback_param
536 = first->async_tx.callback_param;
537 first->async_tx.callback = NULL;
538 first->async_tx.callback_param = NULL;
539 }
540 }
541
542 new->tx_cnt = desc_count;
543 new->async_tx.flags = orig_flags; /* client is in control of this ack */
544
545 /* store the original values for use in later cleanup */
546 if (new != first) {
547 new->src = first->src;
548 new->dst = first->dst;
549 new->len = first->len;
550 }
551
552 /* cookie incr and addition to used_list must be atomic */
553 cookie = ioat_chan->common.cookie;
554 cookie++;
555 if (cookie < 0)
556 cookie = 1;
557 ioat_chan->common.cookie = new->async_tx.cookie = cookie;
558
559 /* write address into NextDescriptor field of last desc in chain */
560 to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
561 first->async_tx.phys;
562 list_splice_tail(&new_chain, &ioat_chan->used_desc);
563
564 ioat_chan->dmacount += desc_count;
565 ioat_chan->pending += desc_count;
566 if (ioat_chan->pending >= ioat_pending_level)
567 __ioat1_dma_memcpy_issue_pending(ioat_chan);
568 spin_unlock_bh(&ioat_chan->desc_lock);
569
570 return cookie;
571}
572
573static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
574{
575 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
576 struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
577 struct ioat_desc_sw *new;
578 struct ioat_dma_descriptor *hw;
579 dma_cookie_t cookie;
580 u32 copy;
581 size_t len;
582 dma_addr_t src, dst;
583 unsigned long orig_flags;
584 unsigned int desc_count = 0;
585
586 /* src and dest and len are stored in the initial descriptor */
587 len = first->len;
588 src = first->src;
589 dst = first->dst;
590 orig_flags = first->async_tx.flags;
591 new = first;
592
593 /*
594 * ioat_chan->desc_lock is still in force in version 2 path
595 * it gets unlocked at end of this function
596 */
597 do {
598 copy = min_t(size_t, len, ioat_chan->xfercap);
599
600 async_tx_ack(&new->async_tx);
601
602 hw = new->hw;
603 hw->size = copy;
604 hw->ctl = 0;
605 hw->src_addr = src;
606 hw->dst_addr = dst;
607
608 len -= copy;
609 dst += copy;
610 src += copy;
611 desc_count++;
612 } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
613
614 if (!new) {
615 dev_err(&ioat_chan->device->pdev->dev,
616 "tx submit failed\n");
617 spin_unlock_bh(&ioat_chan->desc_lock);
618 return -ENOMEM;
619 }
620
621 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
622 if (first->async_tx.callback) {
623 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
624 if (first != new) {
625 /* move callback into to last desc */
626 new->async_tx.callback = first->async_tx.callback;
627 new->async_tx.callback_param
628 = first->async_tx.callback_param;
629 first->async_tx.callback = NULL;
630 first->async_tx.callback_param = NULL;
631 }
632 }
633
634 new->tx_cnt = desc_count;
635 new->async_tx.flags = orig_flags; /* client is in control of this ack */
636
637 /* store the original values for use in later cleanup */
638 if (new != first) {
639 new->src = first->src;
640 new->dst = first->dst;
641 new->len = first->len;
642 }
643
644 /* cookie incr and addition to used_list must be atomic */
645 cookie = ioat_chan->common.cookie;
646 cookie++;
647 if (cookie < 0)
648 cookie = 1;
649 ioat_chan->common.cookie = new->async_tx.cookie = cookie;
650
651 ioat_chan->dmacount += desc_count;
652 ioat_chan->pending += desc_count;
653 if (ioat_chan->pending >= ioat_pending_level)
654 __ioat2_dma_memcpy_issue_pending(ioat_chan);
655 spin_unlock_bh(&ioat_chan->desc_lock);
656
657 return cookie;
658}
659
660/**
661 * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
662 * @ioat_chan: the channel supplying the memory pool for the descriptors
663 * @flags: allocation flags
664 */
665static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
666 struct ioat_dma_chan *ioat_chan,
667 gfp_t flags)
668{
669 struct ioat_dma_descriptor *desc;
670 struct ioat_desc_sw *desc_sw;
671 struct ioatdma_device *ioatdma_device;
672 dma_addr_t phys;
673
674 ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
675 desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
676 if (unlikely(!desc))
677 return NULL;
678
679 desc_sw = kzalloc(sizeof(*desc_sw), flags);
680 if (unlikely(!desc_sw)) {
681 pci_pool_free(ioatdma_device->dma_pool, desc, phys);
682 return NULL;
683 }
684
685 memset(desc, 0, sizeof(*desc));
686 dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
687 switch (ioat_chan->device->version) {
688 case IOAT_VER_1_2:
689 desc_sw->async_tx.tx_submit = ioat1_tx_submit;
690 break;
691 case IOAT_VER_2_0:
692 case IOAT_VER_3_0:
693 desc_sw->async_tx.tx_submit = ioat2_tx_submit;
694 break;
695 }
696
697 desc_sw->hw = desc;
698 desc_sw->async_tx.phys = phys;
699
700 return desc_sw;
701}
702
703static int ioat_initial_desc_count = 256;
704module_param(ioat_initial_desc_count, int, 0644);
705MODULE_PARM_DESC(ioat_initial_desc_count,
706 "initial descriptors per channel (default: 256)");
707
708/**
709 * ioat2_dma_massage_chan_desc - link the descriptors into a circle
710 * @ioat_chan: the channel to be massaged
711 */
712static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
713{
714 struct ioat_desc_sw *desc, *_desc;
715
716 /* setup used_desc */
717 ioat_chan->used_desc.next = ioat_chan->free_desc.next;
718 ioat_chan->used_desc.prev = NULL;
719
720 /* pull free_desc out of the circle so that every node is a hw
721 * descriptor, but leave it pointing to the list
722 */
723 ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
724 ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
725
726 /* circle link the hw descriptors */
727 desc = to_ioat_desc(ioat_chan->free_desc.next);
728 desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
729 list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
730 desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
731 }
732}
733
734/**
735 * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
736 * @chan: the channel to be filled out
737 */
738static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
739{
740 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
741 struct ioat_desc_sw *desc;
742 u16 chanctrl;
743 u32 chanerr;
744 int i;
745 LIST_HEAD(tmp_list);
746
747 /* have we already been set up? */
748 if (!list_empty(&ioat_chan->free_desc))
749 return ioat_chan->desccount;
750
751 /* Setup register to interrupt and write completion status on error */
752 chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
753 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
754 IOAT_CHANCTRL_ERR_COMPLETION_EN;
755 writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
756
757 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
758 if (chanerr) {
759 dev_err(&ioat_chan->device->pdev->dev,
760 "CHANERR = %x, clearing\n", chanerr);
761 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
762 }
763
764 /* Allocate descriptors */
765 for (i = 0; i < ioat_initial_desc_count; i++) {
766 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
767 if (!desc) {
768 dev_err(&ioat_chan->device->pdev->dev,
769 "Only %d initial descriptors\n", i);
770 break;
771 }
772 list_add_tail(&desc->node, &tmp_list);
773 }
774 spin_lock_bh(&ioat_chan->desc_lock);
775 ioat_chan->desccount = i;
776 list_splice(&tmp_list, &ioat_chan->free_desc);
777 if (ioat_chan->device->version != IOAT_VER_1_2)
778 ioat2_dma_massage_chan_desc(ioat_chan);
779 spin_unlock_bh(&ioat_chan->desc_lock);
780
781 /* allocate a completion writeback area */
782 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
783 ioat_chan->completion_virt =
784 pci_pool_alloc(ioat_chan->device->completion_pool,
785 GFP_KERNEL,
786 &ioat_chan->completion_addr);
787 memset(ioat_chan->completion_virt, 0,
788 sizeof(*ioat_chan->completion_virt));
789 writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
790 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
791 writel(((u64) ioat_chan->completion_addr) >> 32,
792 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
793
794 tasklet_enable(&ioat_chan->cleanup_task);
795 ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */
796 return ioat_chan->desccount;
797}
798
799/**
800 * ioat_dma_free_chan_resources - release all the descriptors
801 * @chan: the channel to be cleaned
802 */
803static void ioat_dma_free_chan_resources(struct dma_chan *chan)
804{
805 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
806 struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
807 struct ioat_desc_sw *desc, *_desc;
808 int in_use_descs = 0;
809
810 /* Before freeing channel resources first check
811 * if they have been previously allocated for this channel.
812 */
813 if (ioat_chan->desccount == 0)
814 return;
815
816 tasklet_disable(&ioat_chan->cleanup_task);
817 ioat_dma_memcpy_cleanup(ioat_chan);
818
819 /* Delay 100ms after reset to allow internal DMA logic to quiesce
820 * before removing DMA descriptor resources.
821 */
822 writeb(IOAT_CHANCMD_RESET,
823 ioat_chan->reg_base
824 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
825 mdelay(100);
826
827 spin_lock_bh(&ioat_chan->desc_lock);
828 switch (ioat_chan->device->version) {
829 case IOAT_VER_1_2:
830 list_for_each_entry_safe(desc, _desc,
831 &ioat_chan->used_desc, node) {
832 in_use_descs++;
833 list_del(&desc->node);
834 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
835 desc->async_tx.phys);
836 kfree(desc);
837 }
838 list_for_each_entry_safe(desc, _desc,
839 &ioat_chan->free_desc, node) {
840 list_del(&desc->node);
841 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
842 desc->async_tx.phys);
843 kfree(desc);
844 }
845 break;
846 case IOAT_VER_2_0:
847 case IOAT_VER_3_0:
848 list_for_each_entry_safe(desc, _desc,
849 ioat_chan->free_desc.next, node) {
850 list_del(&desc->node);
851 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
852 desc->async_tx.phys);
853 kfree(desc);
854 }
855 desc = to_ioat_desc(ioat_chan->free_desc.next);
856 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
857 desc->async_tx.phys);
858 kfree(desc);
859 INIT_LIST_HEAD(&ioat_chan->free_desc);
860 INIT_LIST_HEAD(&ioat_chan->used_desc);
861 break;
862 }
863 spin_unlock_bh(&ioat_chan->desc_lock);
864
865 pci_pool_free(ioatdma_device->completion_pool,
866 ioat_chan->completion_virt,
867 ioat_chan->completion_addr);
868
869 /* one is ok since we left it on there on purpose */
870 if (in_use_descs > 1)
871 dev_err(&ioat_chan->device->pdev->dev,
872 "Freeing %d in use descriptors!\n",
873 in_use_descs - 1);
874
875 ioat_chan->last_completion = ioat_chan->completion_addr = 0;
876 ioat_chan->pending = 0;
877 ioat_chan->dmacount = 0;
878 ioat_chan->desccount = 0;
879 ioat_chan->watchdog_completion = 0;
880 ioat_chan->last_compl_desc_addr_hw = 0;
881 ioat_chan->watchdog_tcp_cookie =
882 ioat_chan->watchdog_last_tcp_cookie = 0;
883}
884
885/**
886 * ioat_dma_get_next_descriptor - return the next available descriptor
887 * @ioat_chan: IOAT DMA channel handle
888 *
889 * Gets the next descriptor from the chain, and must be called with the
890 * channel's desc_lock held. Allocates more descriptors if the channel
891 * has run out.
892 */
893static struct ioat_desc_sw *
894ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
895{
896 struct ioat_desc_sw *new;
897
898 if (!list_empty(&ioat_chan->free_desc)) {
899 new = to_ioat_desc(ioat_chan->free_desc.next);
900 list_del(&new->node);
901 } else {
902 /* try to get another desc */
903 new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
904 if (!new) {
905 dev_err(&ioat_chan->device->pdev->dev,
906 "alloc failed\n");
907 return NULL;
908 }
909 }
910
911 prefetch(new->hw);
912 return new;
913}
914
915static struct ioat_desc_sw *
916ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
917{
918 struct ioat_desc_sw *new;
919
920 /*
921 * used.prev points to where to start processing
922 * used.next points to next free descriptor
923 * if used.prev == NULL, there are none waiting to be processed
924 * if used.next == used.prev.prev, there is only one free descriptor,
925 * and we need to use it to as a noop descriptor before
926 * linking in a new set of descriptors, since the device
927 * has probably already read the pointer to it
928 */
929 if (ioat_chan->used_desc.prev &&
930 ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
931
932 struct ioat_desc_sw *desc;
933 struct ioat_desc_sw *noop_desc;
934 int i;
935
936 /* set up the noop descriptor */
937 noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
938 /* set size to non-zero value (channel returns error when size is 0) */
939 noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
940 noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
941 noop_desc->hw->src_addr = 0;
942 noop_desc->hw->dst_addr = 0;
943
944 ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
945 ioat_chan->pending++;
946 ioat_chan->dmacount++;
947
948 /* try to get a few more descriptors */
949 for (i = 16; i; i--) {
950 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
951 if (!desc) {
952 dev_err(&ioat_chan->device->pdev->dev,
953 "alloc failed\n");
954 break;
955 }
956 list_add_tail(&desc->node, ioat_chan->used_desc.next);
957
958 desc->hw->next
959 = to_ioat_desc(desc->node.next)->async_tx.phys;
960 to_ioat_desc(desc->node.prev)->hw->next
961 = desc->async_tx.phys;
962 ioat_chan->desccount++;
963 }
964
965 ioat_chan->used_desc.next = noop_desc->node.next;
966 }
967 new = to_ioat_desc(ioat_chan->used_desc.next);
968 prefetch(new);
969 ioat_chan->used_desc.next = new->node.next;
970
971 if (ioat_chan->used_desc.prev == NULL)
972 ioat_chan->used_desc.prev = &new->node;
973
974 prefetch(new->hw);
975 return new;
976}
977
978static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
979 struct ioat_dma_chan *ioat_chan)
980{
981 if (!ioat_chan)
982 return NULL;
983
984 switch (ioat_chan->device->version) {
985 case IOAT_VER_1_2:
986 return ioat1_dma_get_next_descriptor(ioat_chan);
987 case IOAT_VER_2_0:
988 case IOAT_VER_3_0:
989 return ioat2_dma_get_next_descriptor(ioat_chan);
990 }
991 return NULL;
992}
993
994static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
995 struct dma_chan *chan,
996 dma_addr_t dma_dest,
997 dma_addr_t dma_src,
998 size_t len,
999 unsigned long flags)
1000{
1001 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
1002 struct ioat_desc_sw *new;
1003
1004 spin_lock_bh(&ioat_chan->desc_lock);
1005 new = ioat_dma_get_next_descriptor(ioat_chan);
1006 spin_unlock_bh(&ioat_chan->desc_lock);
1007
1008 if (new) {
1009 new->len = len;
1010 new->dst = dma_dest;
1011 new->src = dma_src;
1012 new->async_tx.flags = flags;
1013 return &new->async_tx;
1014 } else {
1015 dev_err(&ioat_chan->device->pdev->dev,
1016 "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
1017 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
1018 return NULL;
1019 }
1020}
1021
1022static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
1023 struct dma_chan *chan,
1024 dma_addr_t dma_dest,
1025 dma_addr_t dma_src,
1026 size_t len,
1027 unsigned long flags)
1028{
1029 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
1030 struct ioat_desc_sw *new;
1031
1032 spin_lock_bh(&ioat_chan->desc_lock);
1033 new = ioat2_dma_get_next_descriptor(ioat_chan);
1034
1035 /*
1036 * leave ioat_chan->desc_lock set in ioat 2 path
1037 * it will get unlocked at end of tx_submit
1038 */
1039
1040 if (new) {
1041 new->len = len;
1042 new->dst = dma_dest;
1043 new->src = dma_src;
1044 new->async_tx.flags = flags;
1045 return &new->async_tx;
1046 } else {
1047 spin_unlock_bh(&ioat_chan->desc_lock);
1048 dev_err(&ioat_chan->device->pdev->dev,
1049 "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
1050 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
1051 return NULL;
1052 }
1053}
1054
1055static void ioat_dma_cleanup_tasklet(unsigned long data)
1056{
1057 struct ioat_dma_chan *chan = (void *)data;
1058 ioat_dma_memcpy_cleanup(chan);
1059 writew(IOAT_CHANCTRL_INT_DISABLE,
1060 chan->reg_base + IOAT_CHANCTRL_OFFSET);
1061}
1062
1063static void
1064ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
1065{
1066 if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
1067 if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
1068 pci_unmap_single(ioat_chan->device->pdev,
1069 pci_unmap_addr(desc, dst),
1070 pci_unmap_len(desc, len),
1071 PCI_DMA_FROMDEVICE);
1072 else
1073 pci_unmap_page(ioat_chan->device->pdev,
1074 pci_unmap_addr(desc, dst),
1075 pci_unmap_len(desc, len),
1076 PCI_DMA_FROMDEVICE);
1077 }
1078
1079 if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
1080 if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
1081 pci_unmap_single(ioat_chan->device->pdev,
1082 pci_unmap_addr(desc, src),
1083 pci_unmap_len(desc, len),
1084 PCI_DMA_TODEVICE);
1085 else
1086 pci_unmap_page(ioat_chan->device->pdev,
1087 pci_unmap_addr(desc, src),
1088 pci_unmap_len(desc, len),
1089 PCI_DMA_TODEVICE);
1090 }
1091}
1092
1093/**
1094 * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
1095 * @chan: ioat channel to be cleaned up
1096 */
1097static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
1098{
1099 unsigned long phys_complete;
1100 struct ioat_desc_sw *desc, *_desc;
1101 dma_cookie_t cookie = 0;
1102 unsigned long desc_phys;
1103 struct ioat_desc_sw *latest_desc;
1104
1105 prefetch(ioat_chan->completion_virt);
1106
1107 if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
1108 return;
1109
1110 /* The completion writeback can happen at any time,
1111 so reads by the driver need to be atomic operations
1112 The descriptor physical addresses are limited to 32-bits
1113 when the CPU can only do a 32-bit mov */
1114
1115#if (BITS_PER_LONG == 64)
1116 phys_complete =
1117 ioat_chan->completion_virt->full
1118 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
1119#else
1120 phys_complete =
1121 ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
1122#endif
1123
1124 if ((ioat_chan->completion_virt->full
1125 & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
1126 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
1127 dev_err(&ioat_chan->device->pdev->dev,
1128 "Channel halted, chanerr = %x\n",
1129 readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
1130
1131 /* TODO do something to salvage the situation */
1132 }
1133
1134 if (phys_complete == ioat_chan->last_completion) {
1135 spin_unlock_bh(&ioat_chan->cleanup_lock);
1136 /*
1137 * perhaps we're stuck so hard that the watchdog can't go off?
1138 * try to catch it after 2 seconds
1139 */
1140 if (ioat_chan->device->version != IOAT_VER_3_0) {
1141 if (time_after(jiffies,
1142 ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
1143 ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
1144 ioat_chan->last_completion_time = jiffies;
1145 }
1146 }
1147 return;
1148 }
1149 ioat_chan->last_completion_time = jiffies;
1150
1151 cookie = 0;
1152 if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
1153 spin_unlock_bh(&ioat_chan->cleanup_lock);
1154 return;
1155 }
1156
1157 switch (ioat_chan->device->version) {
1158 case IOAT_VER_1_2:
1159 list_for_each_entry_safe(desc, _desc,
1160 &ioat_chan->used_desc, node) {
1161
1162 /*
1163 * Incoming DMA requests may use multiple descriptors,
1164 * due to exceeding xfercap, perhaps. If so, only the
1165 * last one will have a cookie, and require unmapping.
1166 */
1167 if (desc->async_tx.cookie) {
1168 cookie = desc->async_tx.cookie;
1169 ioat_dma_unmap(ioat_chan, desc);
1170 if (desc->async_tx.callback) {
1171 desc->async_tx.callback(desc->async_tx.callback_param);
1172 desc->async_tx.callback = NULL;
1173 }
1174 }
1175
1176 if (desc->async_tx.phys != phys_complete) {
1177 /*
1178 * a completed entry, but not the last, so clean
1179 * up if the client is done with the descriptor
1180 */
1181 if (async_tx_test_ack(&desc->async_tx)) {
1182 list_move_tail(&desc->node,
1183 &ioat_chan->free_desc);
1184 } else
1185 desc->async_tx.cookie = 0;
1186 } else {
1187 /*
1188 * last used desc. Do not remove, so we can
1189 * append from it, but don't look at it next
1190 * time, either
1191 */
1192 desc->async_tx.cookie = 0;
1193
1194 /* TODO check status bits? */
1195 break;
1196 }
1197 }
1198 break;
1199 case IOAT_VER_2_0:
1200 case IOAT_VER_3_0:
1201 /* has some other thread has already cleaned up? */
1202 if (ioat_chan->used_desc.prev == NULL)
1203 break;
1204
1205 /* work backwards to find latest finished desc */
1206 desc = to_ioat_desc(ioat_chan->used_desc.next);
1207 latest_desc = NULL;
1208 do {
1209 desc = to_ioat_desc(desc->node.prev);
1210 desc_phys = (unsigned long)desc->async_tx.phys
1211 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
1212 if (desc_phys == phys_complete) {
1213 latest_desc = desc;
1214 break;
1215 }
1216 } while (&desc->node != ioat_chan->used_desc.prev);
1217
1218 if (latest_desc != NULL) {
1219
1220 /* work forwards to clear finished descriptors */
1221 for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
1222 &desc->node != latest_desc->node.next &&
1223 &desc->node != ioat_chan->used_desc.next;
1224 desc = to_ioat_desc(desc->node.next)) {
1225 if (desc->async_tx.cookie) {
1226 cookie = desc->async_tx.cookie;
1227 desc->async_tx.cookie = 0;
1228 ioat_dma_unmap(ioat_chan, desc);
1229 if (desc->async_tx.callback) {
1230 desc->async_tx.callback(desc->async_tx.callback_param);
1231 desc->async_tx.callback = NULL;
1232 }
1233 }
1234 }
1235
1236 /* move used.prev up beyond those that are finished */
1237 if (&desc->node == ioat_chan->used_desc.next)
1238 ioat_chan->used_desc.prev = NULL;
1239 else
1240 ioat_chan->used_desc.prev = &desc->node;
1241 }
1242 break;
1243 }
1244
1245 spin_unlock_bh(&ioat_chan->desc_lock);
1246
1247 ioat_chan->last_completion = phys_complete;
1248 if (cookie != 0)
1249 ioat_chan->completed_cookie = cookie;
1250
1251 spin_unlock_bh(&ioat_chan->cleanup_lock);
1252}
1253
1254/**
1255 * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
1256 * @chan: IOAT DMA channel handle
1257 * @cookie: DMA transaction identifier
1258 * @done: if not %NULL, updated with last completed transaction
1259 * @used: if not %NULL, updated with last used transaction
1260 */
1261static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
1262 dma_cookie_t cookie,
1263 dma_cookie_t *done,
1264 dma_cookie_t *used)
1265{
1266 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
1267 dma_cookie_t last_used;
1268 dma_cookie_t last_complete;
1269 enum dma_status ret;
1270
1271 last_used = chan->cookie;
1272 last_complete = ioat_chan->completed_cookie;
1273 ioat_chan->watchdog_tcp_cookie = cookie;
1274
1275 if (done)
1276 *done = last_complete;
1277 if (used)
1278 *used = last_used;
1279
1280 ret = dma_async_is_complete(cookie, last_complete, last_used);
1281 if (ret == DMA_SUCCESS)
1282 return ret;
1283
1284 ioat_dma_memcpy_cleanup(ioat_chan);
1285
1286 last_used = chan->cookie;
1287 last_complete = ioat_chan->completed_cookie;
1288
1289 if (done)
1290 *done = last_complete;
1291 if (used)
1292 *used = last_used;
1293
1294 return dma_async_is_complete(cookie, last_complete, last_used);
1295}
1296
1297static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
1298{
1299 struct ioat_desc_sw *desc;
1300
1301 spin_lock_bh(&ioat_chan->desc_lock);
1302
1303 desc = ioat_dma_get_next_descriptor(ioat_chan);
1304
1305 if (!desc) {
1306 dev_err(&ioat_chan->device->pdev->dev,
1307 "Unable to start null desc - get next desc failed\n");
1308 spin_unlock_bh(&ioat_chan->desc_lock);
1309 return;
1310 }
1311
1312 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
1313 | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
1314 | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
1315 /* set size to non-zero value (channel returns error when size is 0) */
1316 desc->hw->size = NULL_DESC_BUFFER_SIZE;
1317 desc->hw->src_addr = 0;
1318 desc->hw->dst_addr = 0;
1319 async_tx_ack(&desc->async_tx);
1320 switch (ioat_chan->device->version) {
1321 case IOAT_VER_1_2:
1322 desc->hw->next = 0;
1323 list_add_tail(&desc->node, &ioat_chan->used_desc);
1324
1325 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
1326 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
1327 writel(((u64) desc->async_tx.phys) >> 32,
1328 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
1329
1330 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
1331 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
1332 break;
1333 case IOAT_VER_2_0:
1334 case IOAT_VER_3_0:
1335 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
1336 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
1337 writel(((u64) desc->async_tx.phys) >> 32,
1338 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
1339
1340 ioat_chan->dmacount++;
1341 __ioat2_dma_memcpy_issue_pending(ioat_chan);
1342 break;
1343 }
1344 spin_unlock_bh(&ioat_chan->desc_lock);
1345}
1346
1347/*
1348 * Perform a IOAT transaction to verify the HW works.
1349 */
1350#define IOAT_TEST_SIZE 2000
1351
1352static void ioat_dma_test_callback(void *dma_async_param)
1353{
1354 struct completion *cmp = dma_async_param;
1355
1356 complete(cmp);
1357}
1358
1359/**
1360 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
1361 * @device: device to be tested
1362 */
1363static int ioat_dma_self_test(struct ioatdma_device *device)
1364{
1365 int i;
1366 u8 *src;
1367 u8 *dest;
1368 struct dma_chan *dma_chan;
1369 struct dma_async_tx_descriptor *tx;
1370 dma_addr_t dma_dest, dma_src;
1371 dma_cookie_t cookie;
1372 int err = 0;
1373 struct completion cmp;
1374 unsigned long tmo;
1375 unsigned long flags;
1376
1377 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
1378 if (!src)
1379 return -ENOMEM;
1380 dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
1381 if (!dest) {
1382 kfree(src);
1383 return -ENOMEM;
1384 }
1385
1386 /* Fill in src buffer */
1387 for (i = 0; i < IOAT_TEST_SIZE; i++)
1388 src[i] = (u8)i;
1389
1390 /* Start copy, using first DMA channel */
1391 dma_chan = container_of(device->common.channels.next,
1392 struct dma_chan,
1393 device_node);
1394 if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
1395 dev_err(&device->pdev->dev,
1396 "selftest cannot allocate chan resource\n");
1397 err = -ENODEV;
1398 goto out;
1399 }
1400
1401 dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
1402 DMA_TO_DEVICE);
1403 dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
1404 DMA_FROM_DEVICE);
1405 flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE;
1406 tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
1407 IOAT_TEST_SIZE, flags);
1408 if (!tx) {
1409 dev_err(&device->pdev->dev,
1410 "Self-test prep failed, disabling\n");
1411 err = -ENODEV;
1412 goto free_resources;
1413 }
1414
1415 async_tx_ack(tx);
1416 init_completion(&cmp);
1417 tx->callback = ioat_dma_test_callback;
1418 tx->callback_param = &cmp;
1419 cookie = tx->tx_submit(tx);
1420 if (cookie < 0) {
1421 dev_err(&device->pdev->dev,
1422 "Self-test setup failed, disabling\n");
1423 err = -ENODEV;
1424 goto free_resources;
1425 }
1426 device->common.device_issue_pending(dma_chan);
1427
1428 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1429
1430 if (tmo == 0 ||
1431 device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
1432 != DMA_SUCCESS) {
1433 dev_err(&device->pdev->dev,
1434 "Self-test copy timed out, disabling\n");
1435 err = -ENODEV;
1436 goto free_resources;
1437 }
1438 if (memcmp(src, dest, IOAT_TEST_SIZE)) {
1439 dev_err(&device->pdev->dev,
1440 "Self-test copy failed compare, disabling\n");
1441 err = -ENODEV;
1442 goto free_resources;
1443 }
1444
1445free_resources:
1446 device->common.device_free_chan_resources(dma_chan);
1447out:
1448 kfree(src);
1449 kfree(dest);
1450 return err;
1451}
1452
1453static char ioat_interrupt_style[32] = "msix";
1454module_param_string(ioat_interrupt_style, ioat_interrupt_style,
1455 sizeof(ioat_interrupt_style), 0644);
1456MODULE_PARM_DESC(ioat_interrupt_style,
1457 "set ioat interrupt style: msix (default), "
1458 "msix-single-vector, msi, intx)");
1459
1460/**
1461 * ioat_dma_setup_interrupts - setup interrupt handler
1462 * @device: ioat device
1463 */
1464static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
1465{
1466 struct ioat_dma_chan *ioat_chan;
1467 int err, i, j, msixcnt;
1468 u8 intrctrl = 0;
1469
1470 if (!strcmp(ioat_interrupt_style, "msix"))
1471 goto msix;
1472 if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
1473 goto msix_single_vector;
1474 if (!strcmp(ioat_interrupt_style, "msi"))
1475 goto msi;
1476 if (!strcmp(ioat_interrupt_style, "intx"))
1477 goto intx;
1478 dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
1479 ioat_interrupt_style);
1480 goto err_no_irq;
1481
1482msix:
1483 /* The number of MSI-X vectors should equal the number of channels */
1484 msixcnt = device->common.chancnt;
1485 for (i = 0; i < msixcnt; i++)
1486 device->msix_entries[i].entry = i;
1487
1488 err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
1489 if (err < 0)
1490 goto msi;
1491 if (err > 0)
1492 goto msix_single_vector;
1493
1494 for (i = 0; i < msixcnt; i++) {
1495 ioat_chan = ioat_lookup_chan_by_index(device, i);
1496 err = request_irq(device->msix_entries[i].vector,
1497 ioat_dma_do_interrupt_msix,
1498 0, "ioat-msix", ioat_chan);
1499 if (err) {
1500 for (j = 0; j < i; j++) {
1501 ioat_chan =
1502 ioat_lookup_chan_by_index(device, j);
1503 free_irq(device->msix_entries[j].vector,
1504 ioat_chan);
1505 }
1506 goto msix_single_vector;
1507 }
1508 }
1509 intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
1510 device->irq_mode = msix_multi_vector;
1511 goto done;
1512
1513msix_single_vector:
1514 device->msix_entries[0].entry = 0;
1515 err = pci_enable_msix(device->pdev, device->msix_entries, 1);
1516 if (err)
1517 goto msi;
1518
1519 err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
1520 0, "ioat-msix", device);
1521 if (err) {
1522 pci_disable_msix(device->pdev);
1523 goto msi;
1524 }
1525 device->irq_mode = msix_single_vector;
1526 goto done;
1527
1528msi:
1529 err = pci_enable_msi(device->pdev);
1530 if (err)
1531 goto intx;
1532
1533 err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
1534 0, "ioat-msi", device);
1535 if (err) {
1536 pci_disable_msi(device->pdev);
1537 goto intx;
1538 }
1539 /*
1540 * CB 1.2 devices need a bit set in configuration space to enable MSI
1541 */
1542 if (device->version == IOAT_VER_1_2) {
1543 u32 dmactrl;
1544 pci_read_config_dword(device->pdev,
1545 IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
1546 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
1547 pci_write_config_dword(device->pdev,
1548 IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1549 }
1550 device->irq_mode = msi;
1551 goto done;
1552
1553intx:
1554 err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
1555 IRQF_SHARED, "ioat-intx", device);
1556 if (err)
1557 goto err_no_irq;
1558 device->irq_mode = intx;
1559
1560done:
1561 intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
1562 writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
1563 return 0;
1564
1565err_no_irq:
1566 /* Disable all interrupt generation */
1567 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1568 dev_err(&device->pdev->dev, "no usable interrupts\n");
1569 device->irq_mode = none;
1570 return -1;
1571}
1572
1573/**
1574 * ioat_dma_remove_interrupts - remove whatever interrupts were set
1575 * @device: ioat device
1576 */
1577static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
1578{
1579 struct ioat_dma_chan *ioat_chan;
1580 int i;
1581
1582 /* Disable all interrupt generation */
1583 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1584
1585 switch (device->irq_mode) {
1586 case msix_multi_vector:
1587 for (i = 0; i < device->common.chancnt; i++) {
1588 ioat_chan = ioat_lookup_chan_by_index(device, i);
1589 free_irq(device->msix_entries[i].vector, ioat_chan);
1590 }
1591 pci_disable_msix(device->pdev);
1592 break;
1593 case msix_single_vector:
1594 free_irq(device->msix_entries[0].vector, device);
1595 pci_disable_msix(device->pdev);
1596 break;
1597 case msi:
1598 free_irq(device->pdev->irq, device);
1599 pci_disable_msi(device->pdev);
1600 break;
1601 case intx:
1602 free_irq(device->pdev->irq, device);
1603 break;
1604 case none:
1605 dev_warn(&device->pdev->dev,
1606 "call to %s without interrupts setup\n", __func__);
1607 }
1608 device->irq_mode = none;
1609}
1610
1611struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
1612 void __iomem *iobase)
1613{
1614 int err;
1615 struct ioatdma_device *device;
1616
1617 device = kzalloc(sizeof(*device), GFP_KERNEL);
1618 if (!device) {
1619 err = -ENOMEM;
1620 goto err_kzalloc;
1621 }
1622 device->pdev = pdev;
1623 device->reg_base = iobase;
1624 device->version = readb(device->reg_base + IOAT_VER_OFFSET);
1625
1626 /* DMA coherent memory pool for DMA descriptor allocations */
1627 device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
1628 sizeof(struct ioat_dma_descriptor),
1629 64, 0);
1630 if (!device->dma_pool) {
1631 err = -ENOMEM;
1632 goto err_dma_pool;
1633 }
1634
1635 device->completion_pool = pci_pool_create("completion_pool", pdev,
1636 sizeof(u64), SMP_CACHE_BYTES,
1637 SMP_CACHE_BYTES);
1638 if (!device->completion_pool) {
1639 err = -ENOMEM;
1640 goto err_completion_pool;
1641 }
1642
1643 INIT_LIST_HEAD(&device->common.channels);
1644 ioat_dma_enumerate_channels(device);
1645
1646 device->common.device_alloc_chan_resources =
1647 ioat_dma_alloc_chan_resources;
1648 device->common.device_free_chan_resources =
1649 ioat_dma_free_chan_resources;
1650 device->common.dev = &pdev->dev;
1651
1652 dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
1653 device->common.device_is_tx_complete = ioat_dma_is_complete;
1654 switch (device->version) {
1655 case IOAT_VER_1_2:
1656 device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1657 device->common.device_issue_pending =
1658 ioat1_dma_memcpy_issue_pending;
1659 break;
1660 case IOAT_VER_2_0:
1661 case IOAT_VER_3_0:
1662 device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
1663 device->common.device_issue_pending =
1664 ioat2_dma_memcpy_issue_pending;
1665 break;
1666 }
1667
1668 dev_err(&device->pdev->dev,
1669 "Intel(R) I/OAT DMA Engine found,"
1670 " %d channels, device version 0x%02x, driver version %s\n",
1671 device->common.chancnt, device->version, IOAT_DMA_VERSION);
1672
1673 if (!device->common.chancnt) {
1674 dev_err(&device->pdev->dev,
1675 "Intel(R) I/OAT DMA Engine problem found: "
1676 "zero channels detected\n");
1677 goto err_setup_interrupts;
1678 }
1679
1680 err = ioat_dma_setup_interrupts(device);
1681 if (err)
1682 goto err_setup_interrupts;
1683
1684 err = ioat_dma_self_test(device);
1685 if (err)
1686 goto err_self_test;
1687
1688 ioat_set_tcp_copy_break(device);
1689
1690 dma_async_device_register(&device->common);
1691
1692 if (device->version != IOAT_VER_3_0) {
1693 INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
1694 schedule_delayed_work(&device->work,
1695 WATCHDOG_DELAY);
1696 }
1697
1698 return device;
1699
1700err_self_test:
1701 ioat_dma_remove_interrupts(device);
1702err_setup_interrupts:
1703 pci_pool_destroy(device->completion_pool);
1704err_completion_pool:
1705 pci_pool_destroy(device->dma_pool);
1706err_dma_pool:
1707 kfree(device);
1708err_kzalloc:
1709 dev_err(&pdev->dev,
1710 "Intel(R) I/OAT DMA Engine initialization failed\n");
1711 return NULL;
1712}
1713
1714void ioat_dma_remove(struct ioatdma_device *device)
1715{
1716 struct dma_chan *chan, *_chan;
1717 struct ioat_dma_chan *ioat_chan;
1718
1719 if (device->version != IOAT_VER_3_0)
1720 cancel_delayed_work(&device->work);
1721
1722 ioat_dma_remove_interrupts(device);
1723
1724 dma_async_device_unregister(&device->common);
1725
1726 pci_pool_destroy(device->dma_pool);
1727 pci_pool_destroy(device->completion_pool);
1728
1729 iounmap(device->reg_base);
1730 pci_release_regions(device->pdev);
1731 pci_disable_device(device->pdev);
1732
1733 list_for_each_entry_safe(chan, _chan,
1734 &device->common.channels, device_node) {
1735 ioat_chan = to_ioat_chan(chan);
1736 list_del(&chan->device_node);
1737 kfree(ioat_chan);
1738 }
1739 kfree(device);
1740}
1741
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
deleted file mode 100644
index a52ff4bd4601..000000000000
--- a/drivers/dma/ioatdma.h
+++ /dev/null
@@ -1,165 +0,0 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_H
22#define IOATDMA_H
23
24#include <linux/dmaengine.h>
25#include "ioatdma_hw.h"
26#include <linux/init.h>
27#include <linux/dmapool.h>
28#include <linux/cache.h>
29#include <linux/pci_ids.h>
30#include <net/tcp.h>
31
32#define IOAT_DMA_VERSION "3.64"
33
34enum ioat_interrupt {
35 none = 0,
36 msix_multi_vector = 1,
37 msix_single_vector = 2,
38 msi = 3,
39 intx = 4,
40};
41
42#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
43#define IOAT_DMA_DCA_ANY_CPU ~0
44#define IOAT_WATCHDOG_PERIOD (2 * HZ)
45
46
47/**
48 * struct ioatdma_device - internal representation of a IOAT device
49 * @pdev: PCI-Express device
50 * @reg_base: MMIO register space base address
51 * @dma_pool: for allocating DMA descriptors
52 * @common: embedded struct dma_device
53 * @version: version of ioatdma device
54 * @irq_mode: which style irq to use
55 * @msix_entries: irq handlers
56 * @idx: per channel data
57 */
58
59struct ioatdma_device {
60 struct pci_dev *pdev;
61 void __iomem *reg_base;
62 struct pci_pool *dma_pool;
63 struct pci_pool *completion_pool;
64 struct dma_device common;
65 u8 version;
66 enum ioat_interrupt irq_mode;
67 struct delayed_work work;
68 struct msix_entry msix_entries[4];
69 struct ioat_dma_chan *idx[4];
70};
71
72/**
73 * struct ioat_dma_chan - internal representation of a DMA channel
74 */
75struct ioat_dma_chan {
76
77 void __iomem *reg_base;
78
79 dma_cookie_t completed_cookie;
80 unsigned long last_completion;
81 unsigned long last_completion_time;
82
83 size_t xfercap; /* XFERCAP register value expanded out */
84
85 spinlock_t cleanup_lock;
86 spinlock_t desc_lock;
87 struct list_head free_desc;
88 struct list_head used_desc;
89 unsigned long watchdog_completion;
90 int watchdog_tcp_cookie;
91 u32 watchdog_last_tcp_cookie;
92 struct delayed_work work;
93
94 int pending;
95 int dmacount;
96 int desccount;
97
98 struct ioatdma_device *device;
99 struct dma_chan common;
100
101 dma_addr_t completion_addr;
102 union {
103 u64 full; /* HW completion writeback */
104 struct {
105 u32 low;
106 u32 high;
107 };
108 } *completion_virt;
109 unsigned long last_compl_desc_addr_hw;
110 struct tasklet_struct cleanup_task;
111};
112
113/* wrapper around hardware descriptor format + additional software fields */
114
115/**
116 * struct ioat_desc_sw - wrapper around hardware descriptor
117 * @hw: hardware DMA descriptor
118 * @node: this descriptor will either be on the free list,
119 * or attached to a transaction list (async_tx.tx_list)
120 * @tx_cnt: number of descriptors required to complete the transaction
121 * @async_tx: the generic software descriptor for all engines
122 */
123struct ioat_desc_sw {
124 struct ioat_dma_descriptor *hw;
125 struct list_head node;
126 int tx_cnt;
127 size_t len;
128 dma_addr_t src;
129 dma_addr_t dst;
130 struct dma_async_tx_descriptor async_tx;
131};
132
133static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
134{
135 #ifdef CONFIG_NET_DMA
136 switch (dev->version) {
137 case IOAT_VER_1_2:
138 sysctl_tcp_dma_copybreak = 4096;
139 break;
140 case IOAT_VER_2_0:
141 sysctl_tcp_dma_copybreak = 2048;
142 break;
143 case IOAT_VER_3_0:
144 sysctl_tcp_dma_copybreak = 262144;
145 break;
146 }
147 #endif
148}
149
150#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
151struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
152 void __iomem *iobase);
153void ioat_dma_remove(struct ioatdma_device *device);
154struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
155struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
156struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
157#else
158#define ioat_dma_probe(pdev, iobase) NULL
159#define ioat_dma_remove(device) do { } while (0)
160#define ioat_dca_init(pdev, iobase) NULL
161#define ioat2_dca_init(pdev, iobase) NULL
162#define ioat3_dca_init(pdev, iobase) NULL
163#endif
164
165#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
deleted file mode 100644
index afa57eef86c9..000000000000
--- a/drivers/dma/ioatdma_hw.h
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef _IOAT_HW_H_
22#define _IOAT_HW_H_
23
24/* PCI Configuration Space Values */
25#define IOAT_PCI_VID 0x8086
26
27/* CB device ID's */
28#define IOAT_PCI_DID_5000 0x1A38
29#define IOAT_PCI_DID_CNB 0x360B
30#define IOAT_PCI_DID_SCNB 0x65FF
31#define IOAT_PCI_DID_SNB 0x402F
32
33#define IOAT_PCI_RID 0x00
34#define IOAT_PCI_SVID 0x8086
35#define IOAT_PCI_SID 0x8086
36#define IOAT_VER_1_2 0x12 /* Version 1.2 */
37#define IOAT_VER_2_0 0x20 /* Version 2.0 */
38#define IOAT_VER_3_0 0x30 /* Version 3.0 */
39
40struct ioat_dma_descriptor {
41 uint32_t size;
42 uint32_t ctl;
43 uint64_t src_addr;
44 uint64_t dst_addr;
45 uint64_t next;
46 uint64_t rsv1;
47 uint64_t rsv2;
48 uint64_t user1;
49 uint64_t user2;
50};
51
52#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
53#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
54#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
55#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
56#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010
57#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020
58#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
59#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
60#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100
61#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200
62#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400
63
64#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
65#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000
66
67#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001
68#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000
69
70#endif
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 2f052265122f..645ca8d54ec4 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -31,6 +31,7 @@
31#include <linux/platform_device.h> 31#include <linux/platform_device.h>
32#include <linux/memory.h> 32#include <linux/memory.h>
33#include <linux/ioport.h> 33#include <linux/ioport.h>
34#include <linux/raid/pq.h>
34 35
35#include <mach/adma.h> 36#include <mach/adma.h>
36 37
@@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
57 } 58 }
58} 59}
59 60
61static void
62iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
63{
64 struct dma_async_tx_descriptor *tx = &desc->async_tx;
65 struct iop_adma_desc_slot *unmap = desc->group_head;
66 struct device *dev = &iop_chan->device->pdev->dev;
67 u32 len = unmap->unmap_len;
68 enum dma_ctrl_flags flags = tx->flags;
69 u32 src_cnt;
70 dma_addr_t addr;
71 dma_addr_t dest;
72
73 src_cnt = unmap->unmap_src_cnt;
74 dest = iop_desc_get_dest_addr(unmap, iop_chan);
75 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
76 enum dma_data_direction dir;
77
78 if (src_cnt > 1) /* is xor? */
79 dir = DMA_BIDIRECTIONAL;
80 else
81 dir = DMA_FROM_DEVICE;
82
83 dma_unmap_page(dev, dest, len, dir);
84 }
85
86 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
87 while (src_cnt--) {
88 addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
89 if (addr == dest)
90 continue;
91 dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
92 }
93 }
94 desc->group_head = NULL;
95}
96
97static void
98iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
99{
100 struct dma_async_tx_descriptor *tx = &desc->async_tx;
101 struct iop_adma_desc_slot *unmap = desc->group_head;
102 struct device *dev = &iop_chan->device->pdev->dev;
103 u32 len = unmap->unmap_len;
104 enum dma_ctrl_flags flags = tx->flags;
105 u32 src_cnt = unmap->unmap_src_cnt;
106 dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
107 dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
108 int i;
109
110 if (tx->flags & DMA_PREP_CONTINUE)
111 src_cnt -= 3;
112
113 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
114 dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
115 dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
116 }
117
118 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
119 dma_addr_t addr;
120
121 for (i = 0; i < src_cnt; i++) {
122 addr = iop_desc_get_src_addr(unmap, iop_chan, i);
123 dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
124 }
125 if (desc->pq_check_result) {
126 dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
127 dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
128 }
129 }
130
131 desc->group_head = NULL;
132}
133
134
60static dma_cookie_t 135static dma_cookie_t
61iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, 136iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
62 struct iop_adma_chan *iop_chan, dma_cookie_t cookie) 137 struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
63{ 138{
64 BUG_ON(desc->async_tx.cookie < 0); 139 struct dma_async_tx_descriptor *tx = &desc->async_tx;
65 if (desc->async_tx.cookie > 0) { 140
66 cookie = desc->async_tx.cookie; 141 BUG_ON(tx->cookie < 0);
67 desc->async_tx.cookie = 0; 142 if (tx->cookie > 0) {
143 cookie = tx->cookie;
144 tx->cookie = 0;
68 145
69 /* call the callback (must not sleep or submit new 146 /* call the callback (must not sleep or submit new
70 * operations to this channel) 147 * operations to this channel)
71 */ 148 */
72 if (desc->async_tx.callback) 149 if (tx->callback)
73 desc->async_tx.callback( 150 tx->callback(tx->callback_param);
74 desc->async_tx.callback_param);
75 151
76 /* unmap dma addresses 152 /* unmap dma addresses
77 * (unmap_single vs unmap_page?) 153 * (unmap_single vs unmap_page?)
78 */ 154 */
79 if (desc->group_head && desc->unmap_len) { 155 if (desc->group_head && desc->unmap_len) {
80 struct iop_adma_desc_slot *unmap = desc->group_head; 156 if (iop_desc_is_pq(desc))
81 struct device *dev = 157 iop_desc_unmap_pq(iop_chan, desc);
82 &iop_chan->device->pdev->dev; 158 else
83 u32 len = unmap->unmap_len; 159 iop_desc_unmap(iop_chan, desc);
84 enum dma_ctrl_flags flags = desc->async_tx.flags;
85 u32 src_cnt;
86 dma_addr_t addr;
87 dma_addr_t dest;
88
89 src_cnt = unmap->unmap_src_cnt;
90 dest = iop_desc_get_dest_addr(unmap, iop_chan);
91 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
92 enum dma_data_direction dir;
93
94 if (src_cnt > 1) /* is xor? */
95 dir = DMA_BIDIRECTIONAL;
96 else
97 dir = DMA_FROM_DEVICE;
98
99 dma_unmap_page(dev, dest, len, dir);
100 }
101
102 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
103 while (src_cnt--) {
104 addr = iop_desc_get_src_addr(unmap,
105 iop_chan,
106 src_cnt);
107 if (addr == dest)
108 continue;
109 dma_unmap_page(dev, addr, len,
110 DMA_TO_DEVICE);
111 }
112 }
113 desc->group_head = NULL;
114 } 160 }
115 } 161 }
116 162
117 /* run dependent operations */ 163 /* run dependent operations */
118 dma_run_dependencies(&desc->async_tx); 164 dma_run_dependencies(tx);
119 165
120 return cookie; 166 return cookie;
121} 167}
@@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data)
287{ 333{
288 struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; 334 struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
289 335
290 spin_lock(&iop_chan->lock); 336 /* lockdep will flag depedency submissions as potentially
337 * recursive locking, this is not the case as a dependency
338 * submission will never recurse a channels submit routine.
339 * There are checks in async_tx.c to prevent this.
340 */
341 spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
291 __iop_adma_slot_cleanup(iop_chan); 342 __iop_adma_slot_cleanup(iop_chan);
292 spin_unlock(&iop_chan->lock); 343 spin_unlock(&iop_chan->lock);
293} 344}
@@ -370,7 +421,7 @@ retry:
370 } 421 }
371 alloc_tail->group_head = alloc_start; 422 alloc_tail->group_head = alloc_start;
372 alloc_tail->async_tx.cookie = -EBUSY; 423 alloc_tail->async_tx.cookie = -EBUSY;
373 list_splice(&chain, &alloc_tail->async_tx.tx_list); 424 list_splice(&chain, &alloc_tail->tx_list);
374 iop_chan->last_used = last_used; 425 iop_chan->last_used = last_used;
375 iop_desc_clear_next_desc(alloc_start); 426 iop_desc_clear_next_desc(alloc_start);
376 iop_desc_clear_next_desc(alloc_tail); 427 iop_desc_clear_next_desc(alloc_tail);
@@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
429 480
430 old_chain_tail = list_entry(iop_chan->chain.prev, 481 old_chain_tail = list_entry(iop_chan->chain.prev,
431 struct iop_adma_desc_slot, chain_node); 482 struct iop_adma_desc_slot, chain_node);
432 list_splice_init(&sw_desc->async_tx.tx_list, 483 list_splice_init(&sw_desc->tx_list,
433 &old_chain_tail->chain_node); 484 &old_chain_tail->chain_node);
434 485
435 /* fix up the hardware chain */ 486 /* fix up the hardware chain */
@@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
496 547
497 dma_async_tx_descriptor_init(&slot->async_tx, chan); 548 dma_async_tx_descriptor_init(&slot->async_tx, chan);
498 slot->async_tx.tx_submit = iop_adma_tx_submit; 549 slot->async_tx.tx_submit = iop_adma_tx_submit;
550 INIT_LIST_HEAD(&slot->tx_list);
499 INIT_LIST_HEAD(&slot->chain_node); 551 INIT_LIST_HEAD(&slot->chain_node);
500 INIT_LIST_HEAD(&slot->slot_node); 552 INIT_LIST_HEAD(&slot->slot_node);
501 hw_desc = (char *) iop_chan->device->dma_desc_pool; 553 hw_desc = (char *) iop_chan->device->dma_desc_pool;
@@ -660,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
660} 712}
661 713
662static struct dma_async_tx_descriptor * 714static struct dma_async_tx_descriptor *
663iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, 715iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
664 unsigned int src_cnt, size_t len, u32 *result, 716 unsigned int src_cnt, size_t len, u32 *result,
665 unsigned long flags) 717 unsigned long flags)
666{ 718{
667 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 719 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
668 struct iop_adma_desc_slot *sw_desc, *grp_start; 720 struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
696 return sw_desc ? &sw_desc->async_tx : NULL; 748 return sw_desc ? &sw_desc->async_tx : NULL;
697} 749}
698 750
751static struct dma_async_tx_descriptor *
752iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
753 unsigned int src_cnt, const unsigned char *scf, size_t len,
754 unsigned long flags)
755{
756 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
757 struct iop_adma_desc_slot *sw_desc, *g;
758 int slot_cnt, slots_per_op;
759 int continue_srcs;
760
761 if (unlikely(!len))
762 return NULL;
763 BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
764
765 dev_dbg(iop_chan->device->common.dev,
766 "%s src_cnt: %d len: %u flags: %lx\n",
767 __func__, src_cnt, len, flags);
768
769 if (dmaf_p_disabled_continue(flags))
770 continue_srcs = 1+src_cnt;
771 else if (dmaf_continue(flags))
772 continue_srcs = 3+src_cnt;
773 else
774 continue_srcs = 0+src_cnt;
775
776 spin_lock_bh(&iop_chan->lock);
777 slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
778 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
779 if (sw_desc) {
780 int i;
781
782 g = sw_desc->group_head;
783 iop_desc_set_byte_count(g, iop_chan, len);
784
785 /* even if P is disabled its destination address (bits
786 * [3:0]) must match Q. It is ok if P points to an
787 * invalid address, it won't be written.
788 */
789 if (flags & DMA_PREP_PQ_DISABLE_P)
790 dst[0] = dst[1] & 0x7;
791
792 iop_desc_set_pq_addr(g, dst);
793 sw_desc->unmap_src_cnt = src_cnt;
794 sw_desc->unmap_len = len;
795 sw_desc->async_tx.flags = flags;
796 for (i = 0; i < src_cnt; i++)
797 iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
798
799 /* if we are continuing a previous operation factor in
800 * the old p and q values, see the comment for dma_maxpq
801 * in include/linux/dmaengine.h
802 */
803 if (dmaf_p_disabled_continue(flags))
804 iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
805 else if (dmaf_continue(flags)) {
806 iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
807 iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
808 iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
809 }
810 iop_desc_init_pq(g, i, flags);
811 }
812 spin_unlock_bh(&iop_chan->lock);
813
814 return sw_desc ? &sw_desc->async_tx : NULL;
815}
816
817static struct dma_async_tx_descriptor *
818iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
819 unsigned int src_cnt, const unsigned char *scf,
820 size_t len, enum sum_check_flags *pqres,
821 unsigned long flags)
822{
823 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
824 struct iop_adma_desc_slot *sw_desc, *g;
825 int slot_cnt, slots_per_op;
826
827 if (unlikely(!len))
828 return NULL;
829 BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
830
831 dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
832 __func__, src_cnt, len);
833
834 spin_lock_bh(&iop_chan->lock);
835 slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
836 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
837 if (sw_desc) {
838 /* for validate operations p and q are tagged onto the
839 * end of the source list
840 */
841 int pq_idx = src_cnt;
842
843 g = sw_desc->group_head;
844 iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
845 iop_desc_set_pq_zero_sum_byte_count(g, len);
846 g->pq_check_result = pqres;
847 pr_debug("\t%s: g->pq_check_result: %p\n",
848 __func__, g->pq_check_result);
849 sw_desc->unmap_src_cnt = src_cnt+2;
850 sw_desc->unmap_len = len;
851 sw_desc->async_tx.flags = flags;
852 while (src_cnt--)
853 iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
854 src[src_cnt],
855 scf[src_cnt]);
856 iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
857 }
858 spin_unlock_bh(&iop_chan->lock);
859
860 return sw_desc ? &sw_desc->async_tx : NULL;
861}
862
699static void iop_adma_free_chan_resources(struct dma_chan *chan) 863static void iop_adma_free_chan_resources(struct dma_chan *chan)
700{ 864{
701 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 865 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@@ -906,7 +1070,7 @@ out:
906 1070
907#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ 1071#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
908static int __devinit 1072static int __devinit
909iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) 1073iop_adma_xor_val_self_test(struct iop_adma_device *device)
910{ 1074{
911 int i, src_idx; 1075 int i, src_idx;
912 struct page *dest; 1076 struct page *dest;
@@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1002 PAGE_SIZE, DMA_TO_DEVICE); 1166 PAGE_SIZE, DMA_TO_DEVICE);
1003 1167
1004 /* skip zero sum if the capability is not present */ 1168 /* skip zero sum if the capability is not present */
1005 if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) 1169 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
1006 goto free_resources; 1170 goto free_resources;
1007 1171
1008 /* zero sum the sources with the destintation page */ 1172 /* zero sum the sources with the destintation page */
@@ -1016,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1016 dma_srcs[i] = dma_map_page(dma_chan->device->dev, 1180 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
1017 zero_sum_srcs[i], 0, PAGE_SIZE, 1181 zero_sum_srcs[i], 0, PAGE_SIZE,
1018 DMA_TO_DEVICE); 1182 DMA_TO_DEVICE);
1019 tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, 1183 tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
1020 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, 1184 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
1021 &zero_sum_result, 1185 &zero_sum_result,
1022 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1186 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1023 1187
1024 cookie = iop_adma_tx_submit(tx); 1188 cookie = iop_adma_tx_submit(tx);
1025 iop_adma_issue_pending(dma_chan); 1189 iop_adma_issue_pending(dma_chan);
@@ -1072,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1072 dma_srcs[i] = dma_map_page(dma_chan->device->dev, 1236 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
1073 zero_sum_srcs[i], 0, PAGE_SIZE, 1237 zero_sum_srcs[i], 0, PAGE_SIZE,
1074 DMA_TO_DEVICE); 1238 DMA_TO_DEVICE);
1075 tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, 1239 tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
1076 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, 1240 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
1077 &zero_sum_result, 1241 &zero_sum_result,
1078 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1242 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1079 1243
1080 cookie = iop_adma_tx_submit(tx); 1244 cookie = iop_adma_tx_submit(tx);
1081 iop_adma_issue_pending(dma_chan); 1245 iop_adma_issue_pending(dma_chan);
@@ -1105,6 +1269,170 @@ out:
1105 return err; 1269 return err;
1106} 1270}
1107 1271
1272#ifdef CONFIG_MD_RAID6_PQ
1273static int __devinit
1274iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
1275{
1276 /* combined sources, software pq results, and extra hw pq results */
1277 struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
1278 /* ptr to the extra hw pq buffers defined above */
1279 struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
1280 /* address conversion buffers (dma_map / page_address) */
1281 void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
1282 dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
1283 dma_addr_t pq_dest[2];
1284
1285 int i;
1286 struct dma_async_tx_descriptor *tx;
1287 struct dma_chan *dma_chan;
1288 dma_cookie_t cookie;
1289 u32 zero_sum_result;
1290 int err = 0;
1291 struct device *dev;
1292
1293 dev_dbg(device->common.dev, "%s\n", __func__);
1294
1295 for (i = 0; i < ARRAY_SIZE(pq); i++) {
1296 pq[i] = alloc_page(GFP_KERNEL);
1297 if (!pq[i]) {
1298 while (i--)
1299 __free_page(pq[i]);
1300 return -ENOMEM;
1301 }
1302 }
1303
1304 /* Fill in src buffers */
1305 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
1306 pq_sw[i] = page_address(pq[i]);
1307 memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
1308 }
1309 pq_sw[i] = page_address(pq[i]);
1310 pq_sw[i+1] = page_address(pq[i+1]);
1311
1312 dma_chan = container_of(device->common.channels.next,
1313 struct dma_chan,
1314 device_node);
1315 if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
1316 err = -ENODEV;
1317 goto out;
1318 }
1319
1320 dev = dma_chan->device->dev;
1321
1322 /* initialize the dests */
1323 memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
1324 memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
1325
1326 /* test pq */
1327 pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
1328 pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
1329 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
1330 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1331 DMA_TO_DEVICE);
1332
1333 tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
1334 IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
1335 PAGE_SIZE,
1336 DMA_PREP_INTERRUPT |
1337 DMA_CTRL_ACK);
1338
1339 cookie = iop_adma_tx_submit(tx);
1340 iop_adma_issue_pending(dma_chan);
1341 msleep(8);
1342
1343 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1344 DMA_SUCCESS) {
1345 dev_err(dev, "Self-test pq timed out, disabling\n");
1346 err = -ENODEV;
1347 goto free_resources;
1348 }
1349
1350 raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
1351
1352 if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
1353 page_address(pq_hw[0]), PAGE_SIZE) != 0) {
1354 dev_err(dev, "Self-test p failed compare, disabling\n");
1355 err = -ENODEV;
1356 goto free_resources;
1357 }
1358 if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
1359 page_address(pq_hw[1]), PAGE_SIZE) != 0) {
1360 dev_err(dev, "Self-test q failed compare, disabling\n");
1361 err = -ENODEV;
1362 goto free_resources;
1363 }
1364
1365 /* test correct zero sum using the software generated pq values */
1366 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
1367 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1368 DMA_TO_DEVICE);
1369
1370 zero_sum_result = ~0;
1371 tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
1372 pq_src, IOP_ADMA_NUM_SRC_TEST,
1373 raid6_gfexp, PAGE_SIZE, &zero_sum_result,
1374 DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
1375
1376 cookie = iop_adma_tx_submit(tx);
1377 iop_adma_issue_pending(dma_chan);
1378 msleep(8);
1379
1380 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1381 DMA_SUCCESS) {
1382 dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
1383 err = -ENODEV;
1384 goto free_resources;
1385 }
1386
1387 if (zero_sum_result != 0) {
1388 dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
1389 zero_sum_result);
1390 err = -ENODEV;
1391 goto free_resources;
1392 }
1393
1394 /* test incorrect zero sum */
1395 i = IOP_ADMA_NUM_SRC_TEST;
1396 memset(pq_sw[i] + 100, 0, 100);
1397 memset(pq_sw[i+1] + 200, 0, 200);
1398 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
1399 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1400 DMA_TO_DEVICE);
1401
1402 zero_sum_result = 0;
1403 tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
1404 pq_src, IOP_ADMA_NUM_SRC_TEST,
1405 raid6_gfexp, PAGE_SIZE, &zero_sum_result,
1406 DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
1407
1408 cookie = iop_adma_tx_submit(tx);
1409 iop_adma_issue_pending(dma_chan);
1410 msleep(8);
1411
1412 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1413 DMA_SUCCESS) {
1414 dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
1415 err = -ENODEV;
1416 goto free_resources;
1417 }
1418
1419 if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
1420 dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
1421 zero_sum_result);
1422 err = -ENODEV;
1423 goto free_resources;
1424 }
1425
1426free_resources:
1427 iop_adma_free_chan_resources(dma_chan);
1428out:
1429 i = ARRAY_SIZE(pq);
1430 while (i--)
1431 __free_page(pq[i]);
1432 return err;
1433}
1434#endif
1435
1108static int __devexit iop_adma_remove(struct platform_device *dev) 1436static int __devexit iop_adma_remove(struct platform_device *dev)
1109{ 1437{
1110 struct iop_adma_device *device = platform_get_drvdata(dev); 1438 struct iop_adma_device *device = platform_get_drvdata(dev);
@@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1192 dma_dev->max_xor = iop_adma_get_max_xor(); 1520 dma_dev->max_xor = iop_adma_get_max_xor();
1193 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; 1521 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
1194 } 1522 }
1195 if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) 1523 if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
1196 dma_dev->device_prep_dma_zero_sum = 1524 dma_dev->device_prep_dma_xor_val =
1197 iop_adma_prep_dma_zero_sum; 1525 iop_adma_prep_dma_xor_val;
1526 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
1527 dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
1528 dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
1529 }
1530 if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
1531 dma_dev->device_prep_dma_pq_val =
1532 iop_adma_prep_dma_pq_val;
1198 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) 1533 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
1199 dma_dev->device_prep_dma_interrupt = 1534 dma_dev->device_prep_dma_interrupt =
1200 iop_adma_prep_dma_interrupt; 1535 iop_adma_prep_dma_interrupt;
@@ -1248,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1248 } 1583 }
1249 1584
1250 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || 1585 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
1251 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { 1586 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
1252 ret = iop_adma_xor_zero_sum_self_test(adev); 1587 ret = iop_adma_xor_val_self_test(adev);
1253 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); 1588 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
1254 if (ret) 1589 if (ret)
1255 goto err_free_iop_chan; 1590 goto err_free_iop_chan;
1256 } 1591 }
1257 1592
1593 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
1594 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
1595 #ifdef CONFIG_MD_RAID6_PQ
1596 ret = iop_adma_pq_zero_sum_self_test(adev);
1597 dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
1598 #else
1599 /* can not test raid6, so do not publish capability */
1600 dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
1601 dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
1602 ret = 0;
1603 #endif
1604 if (ret)
1605 goto err_free_iop_chan;
1606 }
1607
1258 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " 1608 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
1259 "( %s%s%s%s%s%s%s%s%s%s)\n", 1609 "( %s%s%s%s%s%s%s)\n",
1260 dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", 1610 dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
1261 dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", 1611 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
1262 dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
1263 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", 1612 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
1264 dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", 1613 dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
1265 dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
1266 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", 1614 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
1267 dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
1268 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", 1615 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
1269 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); 1616 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
1270 1617
@@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
1296 if (sw_desc) { 1643 if (sw_desc) {
1297 grp_start = sw_desc->group_head; 1644 grp_start = sw_desc->group_head;
1298 1645
1299 list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); 1646 list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
1300 async_tx_ack(&sw_desc->async_tx); 1647 async_tx_ack(&sw_desc->async_tx);
1301 iop_desc_init_memcpy(grp_start, 0); 1648 iop_desc_init_memcpy(grp_start, 0);
1302 iop_desc_set_byte_count(grp_start, iop_chan, 0); 1649 iop_desc_set_byte_count(grp_start, iop_chan, 0);
@@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
1352 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 1699 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
1353 if (sw_desc) { 1700 if (sw_desc) {
1354 grp_start = sw_desc->group_head; 1701 grp_start = sw_desc->group_head;
1355 list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); 1702 list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
1356 async_tx_ack(&sw_desc->async_tx); 1703 async_tx_ack(&sw_desc->async_tx);
1357 iop_desc_init_null_xor(grp_start, 2, 0); 1704 iop_desc_init_null_xor(grp_start, 2, 0);
1358 iop_desc_set_byte_count(grp_start, iop_chan, 0); 1705 iop_desc_set_byte_count(grp_start, iop_chan, 0);
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
index 9f6fe46a9b87..c0a272c73682 100644
--- a/drivers/dma/iovlock.c
+++ b/drivers/dma/iovlock.c
@@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
183 iov_byte_offset, 183 iov_byte_offset,
184 kdata, 184 kdata,
185 copy); 185 copy);
186 /* poll for a descriptor slot */
187 if (unlikely(dma_cookie < 0)) {
188 dma_async_issue_pending(chan);
189 continue;
190 }
186 191
187 len -= copy; 192 len -= copy;
188 iov[iovec_idx].iov_len -= copy; 193 iov[iovec_idx].iov_len -= copy;
@@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
248 page, 253 page,
249 offset, 254 offset,
250 copy); 255 copy);
256 /* poll for a descriptor slot */
257 if (unlikely(dma_cookie < 0)) {
258 dma_async_issue_pending(chan);
259 continue;
260 }
251 261
252 len -= copy; 262 len -= copy;
253 iov[iovec_idx].iov_len -= copy; 263 iov[iovec_idx].iov_len -= copy;
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 3f23eabe09f2..466ab10c1ff1 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -517,7 +517,7 @@ retry:
517 } 517 }
518 alloc_tail->group_head = alloc_start; 518 alloc_tail->group_head = alloc_start;
519 alloc_tail->async_tx.cookie = -EBUSY; 519 alloc_tail->async_tx.cookie = -EBUSY;
520 list_splice(&chain, &alloc_tail->async_tx.tx_list); 520 list_splice(&chain, &alloc_tail->tx_list);
521 mv_chan->last_used = last_used; 521 mv_chan->last_used = last_used;
522 mv_desc_clear_next_desc(alloc_start); 522 mv_desc_clear_next_desc(alloc_start);
523 mv_desc_clear_next_desc(alloc_tail); 523 mv_desc_clear_next_desc(alloc_tail);
@@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
565 cookie = mv_desc_assign_cookie(mv_chan, sw_desc); 565 cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
566 566
567 if (list_empty(&mv_chan->chain)) 567 if (list_empty(&mv_chan->chain))
568 list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain); 568 list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
569 else { 569 else {
570 new_hw_chain = 0; 570 new_hw_chain = 0;
571 571
572 old_chain_tail = list_entry(mv_chan->chain.prev, 572 old_chain_tail = list_entry(mv_chan->chain.prev,
573 struct mv_xor_desc_slot, 573 struct mv_xor_desc_slot,
574 chain_node); 574 chain_node);
575 list_splice_init(&grp_start->async_tx.tx_list, 575 list_splice_init(&grp_start->tx_list,
576 &old_chain_tail->chain_node); 576 &old_chain_tail->chain_node);
577 577
578 if (!mv_can_chain(grp_start)) 578 if (!mv_can_chain(grp_start))
@@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
632 slot->async_tx.tx_submit = mv_xor_tx_submit; 632 slot->async_tx.tx_submit = mv_xor_tx_submit;
633 INIT_LIST_HEAD(&slot->chain_node); 633 INIT_LIST_HEAD(&slot->chain_node);
634 INIT_LIST_HEAD(&slot->slot_node); 634 INIT_LIST_HEAD(&slot->slot_node);
635 INIT_LIST_HEAD(&slot->tx_list);
635 hw_desc = (char *) mv_chan->device->dma_desc_pool; 636 hw_desc = (char *) mv_chan->device->dma_desc_pool;
636 slot->async_tx.phys = 637 slot->async_tx.phys =
637 (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; 638 (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 06cafe1ef521..977b592e976b 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -126,9 +126,8 @@ struct mv_xor_chan {
126 * @idx: pool index 126 * @idx: pool index
127 * @unmap_src_cnt: number of xor sources 127 * @unmap_src_cnt: number of xor sources
128 * @unmap_len: transaction bytecount 128 * @unmap_len: transaction bytecount
129 * @tx_list: list of slots that make up a multi-descriptor transaction
129 * @async_tx: support for the async_tx api 130 * @async_tx: support for the async_tx api
130 * @group_list: list of slots that make up a multi-descriptor transaction
131 * for example transfer lengths larger than the supported hw max
132 * @xor_check_result: result of zero sum 131 * @xor_check_result: result of zero sum
133 * @crc32_result: result crc calculation 132 * @crc32_result: result crc calculation
134 */ 133 */
@@ -145,6 +144,7 @@ struct mv_xor_desc_slot {
145 u16 unmap_src_cnt; 144 u16 unmap_src_cnt;
146 u32 value; 145 u32 value;
147 size_t unmap_len; 146 size_t unmap_len;
147 struct list_head tx_list;
148 struct dma_async_tx_descriptor async_tx; 148 struct dma_async_tx_descriptor async_tx;
149 union { 149 union {
150 u32 *xor_check_result; 150 u32 *xor_check_result;
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
new file mode 100644
index 000000000000..b3b065c4e5c1
--- /dev/null
+++ b/drivers/dma/shdma.c
@@ -0,0 +1,786 @@
1/*
2 * Renesas SuperH DMA Engine support
3 *
4 * base is drivers/dma/flsdma.c
5 *
6 * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
7 * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
8 * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
9 *
10 * This is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * - DMA of SuperH does not have Hardware DMA chain mode.
16 * - MAX DMA size is 16MB.
17 *
18 */
19
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/interrupt.h>
23#include <linux/dmaengine.h>
24#include <linux/delay.h>
25#include <linux/dma-mapping.h>
26#include <linux/dmapool.h>
27#include <linux/platform_device.h>
28#include <cpu/dma.h>
29#include <asm/dma-sh.h>
30#include "shdma.h"
31
32/* DMA descriptor control */
33#define DESC_LAST (-1)
34#define DESC_COMP (1)
35#define DESC_NCOMP (0)
36
37#define NR_DESCS_PER_CHANNEL 32
38/*
39 * Define the default configuration for dual address memory-memory transfer.
40 * The 0x400 value represents auto-request, external->external.
41 *
42 * And this driver set 4byte burst mode.
43 * If you want to change mode, you need to change RS_DEFAULT of value.
44 * (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
45 */
46#define RS_DEFAULT (RS_DUAL)
47
48#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
49static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
50{
51 ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
52}
53
54static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
55{
56 return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
57}
58
59static void dmae_init(struct sh_dmae_chan *sh_chan)
60{
61 u32 chcr = RS_DEFAULT; /* default is DUAL mode */
62 sh_dmae_writel(sh_chan, chcr, CHCR);
63}
64
65/*
66 * Reset DMA controller
67 *
68 * SH7780 has two DMAOR register
69 */
70static void sh_dmae_ctl_stop(int id)
71{
72 unsigned short dmaor = dmaor_read_reg(id);
73
74 dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
75 dmaor_write_reg(id, dmaor);
76}
77
78static int sh_dmae_rst(int id)
79{
80 unsigned short dmaor;
81
82 sh_dmae_ctl_stop(id);
83 dmaor = (dmaor_read_reg(id)|DMAOR_INIT);
84
85 dmaor_write_reg(id, dmaor);
86 if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) {
87 pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
88 return -EINVAL;
89 }
90 return 0;
91}
92
93static int dmae_is_idle(struct sh_dmae_chan *sh_chan)
94{
95 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
96 if (chcr & CHCR_DE) {
97 if (!(chcr & CHCR_TE))
98 return -EBUSY; /* working */
99 }
100 return 0; /* waiting */
101}
102
103static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
104{
105 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
106 return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
107}
108
109static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw)
110{
111 sh_dmae_writel(sh_chan, hw.sar, SAR);
112 sh_dmae_writel(sh_chan, hw.dar, DAR);
113 sh_dmae_writel(sh_chan,
114 (hw.tcr >> calc_xmit_shift(sh_chan)), TCR);
115}
116
117static void dmae_start(struct sh_dmae_chan *sh_chan)
118{
119 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
120
121 chcr |= (CHCR_DE|CHCR_IE);
122 sh_dmae_writel(sh_chan, chcr, CHCR);
123}
124
125static void dmae_halt(struct sh_dmae_chan *sh_chan)
126{
127 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
128
129 chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
130 sh_dmae_writel(sh_chan, chcr, CHCR);
131}
132
133static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
134{
135 int ret = dmae_is_idle(sh_chan);
136 /* When DMA was working, can not set data to CHCR */
137 if (ret)
138 return ret;
139
140 sh_dmae_writel(sh_chan, val, CHCR);
141 return 0;
142}
143
144#define DMARS1_ADDR 0x04
145#define DMARS2_ADDR 0x08
146#define DMARS_SHIFT 8
147#define DMARS_CHAN_MSK 0x01
148static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
149{
150 u32 addr;
151 int shift = 0;
152 int ret = dmae_is_idle(sh_chan);
153 if (ret)
154 return ret;
155
156 if (sh_chan->id & DMARS_CHAN_MSK)
157 shift = DMARS_SHIFT;
158
159 switch (sh_chan->id) {
160 /* DMARS0 */
161 case 0:
162 case 1:
163 addr = SH_DMARS_BASE;
164 break;
165 /* DMARS1 */
166 case 2:
167 case 3:
168 addr = (SH_DMARS_BASE + DMARS1_ADDR);
169 break;
170 /* DMARS2 */
171 case 4:
172 case 5:
173 addr = (SH_DMARS_BASE + DMARS2_ADDR);
174 break;
175 default:
176 return -EINVAL;
177 }
178
179 ctrl_outw((val << shift) |
180 (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
181 addr);
182
183 return 0;
184}
185
186static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
187{
188 struct sh_desc *desc = tx_to_sh_desc(tx);
189 struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
190 dma_cookie_t cookie;
191
192 spin_lock_bh(&sh_chan->desc_lock);
193
194 cookie = sh_chan->common.cookie;
195 cookie++;
196 if (cookie < 0)
197 cookie = 1;
198
199 /* If desc only in the case of 1 */
200 if (desc->async_tx.cookie != -EBUSY)
201 desc->async_tx.cookie = cookie;
202 sh_chan->common.cookie = desc->async_tx.cookie;
203
204 list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev);
205
206 spin_unlock_bh(&sh_chan->desc_lock);
207
208 return cookie;
209}
210
211static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
212{
213 struct sh_desc *desc, *_desc, *ret = NULL;
214
215 spin_lock_bh(&sh_chan->desc_lock);
216 list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) {
217 if (async_tx_test_ack(&desc->async_tx)) {
218 list_del(&desc->node);
219 ret = desc;
220 break;
221 }
222 }
223 spin_unlock_bh(&sh_chan->desc_lock);
224
225 return ret;
226}
227
228static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc)
229{
230 if (desc) {
231 spin_lock_bh(&sh_chan->desc_lock);
232
233 list_splice_init(&desc->tx_list, &sh_chan->ld_free);
234 list_add(&desc->node, &sh_chan->ld_free);
235
236 spin_unlock_bh(&sh_chan->desc_lock);
237 }
238}
239
240static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
241{
242 struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
243 struct sh_desc *desc;
244
245 spin_lock_bh(&sh_chan->desc_lock);
246 while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
247 spin_unlock_bh(&sh_chan->desc_lock);
248 desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
249 if (!desc) {
250 spin_lock_bh(&sh_chan->desc_lock);
251 break;
252 }
253 dma_async_tx_descriptor_init(&desc->async_tx,
254 &sh_chan->common);
255 desc->async_tx.tx_submit = sh_dmae_tx_submit;
256 desc->async_tx.flags = DMA_CTRL_ACK;
257 INIT_LIST_HEAD(&desc->tx_list);
258 sh_dmae_put_desc(sh_chan, desc);
259
260 spin_lock_bh(&sh_chan->desc_lock);
261 sh_chan->descs_allocated++;
262 }
263 spin_unlock_bh(&sh_chan->desc_lock);
264
265 return sh_chan->descs_allocated;
266}
267
268/*
269 * sh_dma_free_chan_resources - Free all resources of the channel.
270 */
271static void sh_dmae_free_chan_resources(struct dma_chan *chan)
272{
273 struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
274 struct sh_desc *desc, *_desc;
275 LIST_HEAD(list);
276
277 BUG_ON(!list_empty(&sh_chan->ld_queue));
278 spin_lock_bh(&sh_chan->desc_lock);
279
280 list_splice_init(&sh_chan->ld_free, &list);
281 sh_chan->descs_allocated = 0;
282
283 spin_unlock_bh(&sh_chan->desc_lock);
284
285 list_for_each_entry_safe(desc, _desc, &list, node)
286 kfree(desc);
287}
288
289static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
290 struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
291 size_t len, unsigned long flags)
292{
293 struct sh_dmae_chan *sh_chan;
294 struct sh_desc *first = NULL, *prev = NULL, *new;
295 size_t copy_size;
296
297 if (!chan)
298 return NULL;
299
300 if (!len)
301 return NULL;
302
303 sh_chan = to_sh_chan(chan);
304
305 do {
306 /* Allocate the link descriptor from DMA pool */
307 new = sh_dmae_get_desc(sh_chan);
308 if (!new) {
309 dev_err(sh_chan->dev,
310 "No free memory for link descriptor\n");
311 goto err_get_desc;
312 }
313
314 copy_size = min(len, (size_t)SH_DMA_TCR_MAX);
315
316 new->hw.sar = dma_src;
317 new->hw.dar = dma_dest;
318 new->hw.tcr = copy_size;
319 if (!first)
320 first = new;
321
322 new->mark = DESC_NCOMP;
323 async_tx_ack(&new->async_tx);
324
325 prev = new;
326 len -= copy_size;
327 dma_src += copy_size;
328 dma_dest += copy_size;
329 /* Insert the link descriptor to the LD ring */
330 list_add_tail(&new->node, &first->tx_list);
331 } while (len);
332
333 new->async_tx.flags = flags; /* client is in control of this ack */
334 new->async_tx.cookie = -EBUSY; /* Last desc */
335
336 return &first->async_tx;
337
338err_get_desc:
339 sh_dmae_put_desc(sh_chan, first);
340 return NULL;
341
342}
343
344/*
345 * sh_chan_ld_cleanup - Clean up link descriptors
346 *
347 * This function clean up the ld_queue of DMA channel.
348 */
349static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan)
350{
351 struct sh_desc *desc, *_desc;
352
353 spin_lock_bh(&sh_chan->desc_lock);
354 list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
355 dma_async_tx_callback callback;
356 void *callback_param;
357
358 /* non send data */
359 if (desc->mark == DESC_NCOMP)
360 break;
361
362 /* send data sesc */
363 callback = desc->async_tx.callback;
364 callback_param = desc->async_tx.callback_param;
365
366 /* Remove from ld_queue list */
367 list_splice_init(&desc->tx_list, &sh_chan->ld_free);
368
369 dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n",
370 desc);
371
372 list_move(&desc->node, &sh_chan->ld_free);
373 /* Run the link descriptor callback function */
374 if (callback) {
375 spin_unlock_bh(&sh_chan->desc_lock);
376 dev_dbg(sh_chan->dev, "link descriptor %p callback\n",
377 desc);
378 callback(callback_param);
379 spin_lock_bh(&sh_chan->desc_lock);
380 }
381 }
382 spin_unlock_bh(&sh_chan->desc_lock);
383}
384
385static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
386{
387 struct list_head *ld_node;
388 struct sh_dmae_regs hw;
389
390 /* DMA work check */
391 if (dmae_is_idle(sh_chan))
392 return;
393
394 /* Find the first un-transfer desciptor */
395 for (ld_node = sh_chan->ld_queue.next;
396 (ld_node != &sh_chan->ld_queue)
397 && (to_sh_desc(ld_node)->mark == DESC_COMP);
398 ld_node = ld_node->next)
399 cpu_relax();
400
401 if (ld_node != &sh_chan->ld_queue) {
402 /* Get the ld start address from ld_queue */
403 hw = to_sh_desc(ld_node)->hw;
404 dmae_set_reg(sh_chan, hw);
405 dmae_start(sh_chan);
406 }
407}
408
409static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
410{
411 struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
412 sh_chan_xfer_ld_queue(sh_chan);
413}
414
415static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
416 dma_cookie_t cookie,
417 dma_cookie_t *done,
418 dma_cookie_t *used)
419{
420 struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
421 dma_cookie_t last_used;
422 dma_cookie_t last_complete;
423
424 sh_dmae_chan_ld_cleanup(sh_chan);
425
426 last_used = chan->cookie;
427 last_complete = sh_chan->completed_cookie;
428 if (last_complete == -EBUSY)
429 last_complete = last_used;
430
431 if (done)
432 *done = last_complete;
433
434 if (used)
435 *used = last_used;
436
437 return dma_async_is_complete(cookie, last_complete, last_used);
438}
439
440static irqreturn_t sh_dmae_interrupt(int irq, void *data)
441{
442 irqreturn_t ret = IRQ_NONE;
443 struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
444 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
445
446 if (chcr & CHCR_TE) {
447 /* DMA stop */
448 dmae_halt(sh_chan);
449
450 ret = IRQ_HANDLED;
451 tasklet_schedule(&sh_chan->tasklet);
452 }
453
454 return ret;
455}
456
457#if defined(CONFIG_CPU_SH4)
458static irqreturn_t sh_dmae_err(int irq, void *data)
459{
460 int err = 0;
461 struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
462
463 /* IRQ Multi */
464 if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
465 int cnt = 0;
466 switch (irq) {
467#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
468 case DMTE6_IRQ:
469 cnt++;
470#endif
471 case DMTE0_IRQ:
472 if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
473 disable_irq(irq);
474 return IRQ_HANDLED;
475 }
476 default:
477 return IRQ_NONE;
478 }
479 } else {
480 /* reset dma controller */
481 err = sh_dmae_rst(0);
482 if (err)
483 return err;
484 if (shdev->pdata.mode & SHDMA_DMAOR1) {
485 err = sh_dmae_rst(1);
486 if (err)
487 return err;
488 }
489 disable_irq(irq);
490 return IRQ_HANDLED;
491 }
492}
493#endif
494
495static void dmae_do_tasklet(unsigned long data)
496{
497 struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
498 struct sh_desc *desc, *_desc, *cur_desc = NULL;
499 u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
500 list_for_each_entry_safe(desc, _desc,
501 &sh_chan->ld_queue, node) {
502 if ((desc->hw.sar + desc->hw.tcr) == sar_buf) {
503 cur_desc = desc;
504 break;
505 }
506 }
507
508 if (cur_desc) {
509 switch (cur_desc->async_tx.cookie) {
510 case 0: /* other desc data */
511 break;
512 case -EBUSY: /* last desc */
513 sh_chan->completed_cookie =
514 cur_desc->async_tx.cookie;
515 break;
516 default: /* first desc ( 0 < )*/
517 sh_chan->completed_cookie =
518 cur_desc->async_tx.cookie - 1;
519 break;
520 }
521 cur_desc->mark = DESC_COMP;
522 }
523 /* Next desc */
524 sh_chan_xfer_ld_queue(sh_chan);
525 sh_dmae_chan_ld_cleanup(sh_chan);
526}
527
528static unsigned int get_dmae_irq(unsigned int id)
529{
530 unsigned int irq = 0;
531 if (id < ARRAY_SIZE(dmte_irq_map))
532 irq = dmte_irq_map[id];
533 return irq;
534}
535
536static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
537{
538 int err;
539 unsigned int irq = get_dmae_irq(id);
540 unsigned long irqflags = IRQF_DISABLED;
541 struct sh_dmae_chan *new_sh_chan;
542
543 /* alloc channel */
544 new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
545 if (!new_sh_chan) {
546 dev_err(shdev->common.dev, "No free memory for allocating "
547 "dma channels!\n");
548 return -ENOMEM;
549 }
550
551 new_sh_chan->dev = shdev->common.dev;
552 new_sh_chan->id = id;
553
554 /* Init DMA tasklet */
555 tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
556 (unsigned long)new_sh_chan);
557
558 /* Init the channel */
559 dmae_init(new_sh_chan);
560
561 spin_lock_init(&new_sh_chan->desc_lock);
562
563 /* Init descripter manage list */
564 INIT_LIST_HEAD(&new_sh_chan->ld_queue);
565 INIT_LIST_HEAD(&new_sh_chan->ld_free);
566
567 /* copy struct dma_device */
568 new_sh_chan->common.device = &shdev->common;
569
570 /* Add the channel to DMA device channel list */
571 list_add_tail(&new_sh_chan->common.device_node,
572 &shdev->common.channels);
573 shdev->common.chancnt++;
574
575 if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
576 irqflags = IRQF_SHARED;
577#if defined(DMTE6_IRQ)
578 if (irq >= DMTE6_IRQ)
579 irq = DMTE6_IRQ;
580 else
581#endif
582 irq = DMTE0_IRQ;
583 }
584
585 snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
586 "sh-dmae%d", new_sh_chan->id);
587
588 /* set up channel irq */
589 err = request_irq(irq, &sh_dmae_interrupt,
590 irqflags, new_sh_chan->dev_id, new_sh_chan);
591 if (err) {
592 dev_err(shdev->common.dev, "DMA channel %d request_irq error "
593 "with return %d\n", id, err);
594 goto err_no_irq;
595 }
596
597 /* CHCR register control function */
598 new_sh_chan->set_chcr = dmae_set_chcr;
599 /* DMARS register control function */
600 new_sh_chan->set_dmars = dmae_set_dmars;
601
602 shdev->chan[id] = new_sh_chan;
603 return 0;
604
605err_no_irq:
606 /* remove from dmaengine device node */
607 list_del(&new_sh_chan->common.device_node);
608 kfree(new_sh_chan);
609 return err;
610}
611
612static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
613{
614 int i;
615
616 for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
617 if (shdev->chan[i]) {
618 struct sh_dmae_chan *shchan = shdev->chan[i];
619 if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
620 free_irq(dmte_irq_map[i], shchan);
621
622 list_del(&shchan->common.device_node);
623 kfree(shchan);
624 shdev->chan[i] = NULL;
625 }
626 }
627 shdev->common.chancnt = 0;
628}
629
630static int __init sh_dmae_probe(struct platform_device *pdev)
631{
632 int err = 0, cnt, ecnt;
633 unsigned long irqflags = IRQF_DISABLED;
634#if defined(CONFIG_CPU_SH4)
635 int eirq[] = { DMAE0_IRQ,
636#if defined(DMAE1_IRQ)
637 DMAE1_IRQ
638#endif
639 };
640#endif
641 struct sh_dmae_device *shdev;
642
643 shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
644 if (!shdev) {
645 dev_err(&pdev->dev, "No enough memory\n");
646 err = -ENOMEM;
647 goto shdev_err;
648 }
649
650 /* get platform data */
651 if (!pdev->dev.platform_data)
652 goto shdev_err;
653
654 /* platform data */
655 memcpy(&shdev->pdata, pdev->dev.platform_data,
656 sizeof(struct sh_dmae_pdata));
657
658 /* reset dma controller */
659 err = sh_dmae_rst(0);
660 if (err)
661 goto rst_err;
662
663 /* SH7780/85/23 has DMAOR1 */
664 if (shdev->pdata.mode & SHDMA_DMAOR1) {
665 err = sh_dmae_rst(1);
666 if (err)
667 goto rst_err;
668 }
669
670 INIT_LIST_HEAD(&shdev->common.channels);
671
672 dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
673 shdev->common.device_alloc_chan_resources
674 = sh_dmae_alloc_chan_resources;
675 shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
676 shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
677 shdev->common.device_is_tx_complete = sh_dmae_is_complete;
678 shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
679 shdev->common.dev = &pdev->dev;
680
681#if defined(CONFIG_CPU_SH4)
682 /* Non Mix IRQ mode SH7722/SH7730 etc... */
683 if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
684 irqflags = IRQF_SHARED;
685 eirq[0] = DMTE0_IRQ;
686#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
687 eirq[1] = DMTE6_IRQ;
688#endif
689 }
690
691 for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
692 err = request_irq(eirq[ecnt], sh_dmae_err,
693 irqflags, "DMAC Address Error", shdev);
694 if (err) {
695 dev_err(&pdev->dev, "DMA device request_irq"
696 "error (irq %d) with return %d\n",
697 eirq[ecnt], err);
698 goto eirq_err;
699 }
700 }
701#endif /* CONFIG_CPU_SH4 */
702
703 /* Create DMA Channel */
704 for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
705 err = sh_dmae_chan_probe(shdev, cnt);
706 if (err)
707 goto chan_probe_err;
708 }
709
710 platform_set_drvdata(pdev, shdev);
711 dma_async_device_register(&shdev->common);
712
713 return err;
714
715chan_probe_err:
716 sh_dmae_chan_remove(shdev);
717
718eirq_err:
719 for (ecnt-- ; ecnt >= 0; ecnt--)
720 free_irq(eirq[ecnt], shdev);
721
722rst_err:
723 kfree(shdev);
724
725shdev_err:
726 return err;
727}
728
729static int __exit sh_dmae_remove(struct platform_device *pdev)
730{
731 struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
732
733 dma_async_device_unregister(&shdev->common);
734
735 if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
736 free_irq(DMTE0_IRQ, shdev);
737#if defined(DMTE6_IRQ)
738 free_irq(DMTE6_IRQ, shdev);
739#endif
740 }
741
742 /* channel data remove */
743 sh_dmae_chan_remove(shdev);
744
745 if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
746 free_irq(DMAE0_IRQ, shdev);
747#if defined(DMAE1_IRQ)
748 free_irq(DMAE1_IRQ, shdev);
749#endif
750 }
751 kfree(shdev);
752
753 return 0;
754}
755
756static void sh_dmae_shutdown(struct platform_device *pdev)
757{
758 struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
759 sh_dmae_ctl_stop(0);
760 if (shdev->pdata.mode & SHDMA_DMAOR1)
761 sh_dmae_ctl_stop(1);
762}
763
764static struct platform_driver sh_dmae_driver = {
765 .remove = __exit_p(sh_dmae_remove),
766 .shutdown = sh_dmae_shutdown,
767 .driver = {
768 .name = "sh-dma-engine",
769 },
770};
771
772static int __init sh_dmae_init(void)
773{
774 return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
775}
776module_init(sh_dmae_init);
777
778static void __exit sh_dmae_exit(void)
779{
780 platform_driver_unregister(&sh_dmae_driver);
781}
782module_exit(sh_dmae_exit);
783
784MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
785MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
786MODULE_LICENSE("GPL");
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
new file mode 100644
index 000000000000..2b4bc15a2c0a
--- /dev/null
+++ b/drivers/dma/shdma.h
@@ -0,0 +1,64 @@
1/*
2 * Renesas SuperH DMA Engine support
3 *
4 * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
5 * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
6 *
7 * This is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 */
13#ifndef __DMA_SHDMA_H
14#define __DMA_SHDMA_H
15
16#include <linux/device.h>
17#include <linux/dmapool.h>
18#include <linux/dmaengine.h>
19
20#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */
21
22struct sh_dmae_regs {
23 u32 sar; /* SAR / source address */
24 u32 dar; /* DAR / destination address */
25 u32 tcr; /* TCR / transfer count */
26};
27
28struct sh_desc {
29 struct list_head tx_list;
30 struct sh_dmae_regs hw;
31 struct list_head node;
32 struct dma_async_tx_descriptor async_tx;
33 int mark;
34};
35
36struct sh_dmae_chan {
37 dma_cookie_t completed_cookie; /* The maximum cookie completed */
38 spinlock_t desc_lock; /* Descriptor operation lock */
39 struct list_head ld_queue; /* Link descriptors queue */
40 struct list_head ld_free; /* Link descriptors free */
41 struct dma_chan common; /* DMA common channel */
42 struct device *dev; /* Channel device */
43 struct tasklet_struct tasklet; /* Tasklet */
44 int descs_allocated; /* desc count */
45 int id; /* Raw id of this channel */
46 char dev_id[16]; /* unique name per DMAC of channel */
47
48 /* Set chcr */
49 int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs);
50 /* Set DMA resource */
51 int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res);
52};
53
54struct sh_dmae_device {
55 struct dma_device common;
56 struct sh_dmae_chan *chan[MAX_DMA_CHANNELS];
57 struct sh_dmae_pdata pdata;
58};
59
60#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
61#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
62#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
63
64#endif /* __DMA_SHDMA_H */
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 7837930146a4..fb6bb64e8861 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
180 180
181static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) 181static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
182{ 182{
183 if (!list_empty(&desc->txd.tx_list)) 183 if (!list_empty(&desc->tx_list))
184 desc = list_entry(desc->txd.tx_list.prev, 184 desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
185 struct txx9dmac_desc, desc_node);
186 return desc; 185 return desc;
187} 186}
188 187
@@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
197 desc = kzalloc(sizeof(*desc), flags); 196 desc = kzalloc(sizeof(*desc), flags);
198 if (!desc) 197 if (!desc)
199 return NULL; 198 return NULL;
199 INIT_LIST_HEAD(&desc->tx_list);
200 dma_async_tx_descriptor_init(&desc->txd, &dc->chan); 200 dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
201 desc->txd.tx_submit = txx9dmac_tx_submit; 201 desc->txd.tx_submit = txx9dmac_tx_submit;
202 /* txd.flags will be overwritten in prep funcs */ 202 /* txd.flags will be overwritten in prep funcs */
@@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
245 struct txx9dmac_dev *ddev = dc->ddev; 245 struct txx9dmac_dev *ddev = dc->ddev;
246 struct txx9dmac_desc *child; 246 struct txx9dmac_desc *child;
247 247
248 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 248 list_for_each_entry(child, &desc->tx_list, desc_node)
249 dma_sync_single_for_cpu(chan2parent(&dc->chan), 249 dma_sync_single_for_cpu(chan2parent(&dc->chan),
250 child->txd.phys, ddev->descsize, 250 child->txd.phys, ddev->descsize,
251 DMA_TO_DEVICE); 251 DMA_TO_DEVICE);
@@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
267 txx9dmac_sync_desc_for_cpu(dc, desc); 267 txx9dmac_sync_desc_for_cpu(dc, desc);
268 268
269 spin_lock_bh(&dc->lock); 269 spin_lock_bh(&dc->lock);
270 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 270 list_for_each_entry(child, &desc->tx_list, desc_node)
271 dev_vdbg(chan2dev(&dc->chan), 271 dev_vdbg(chan2dev(&dc->chan),
272 "moving child desc %p to freelist\n", 272 "moving child desc %p to freelist\n",
273 child); 273 child);
274 list_splice_init(&desc->txd.tx_list, &dc->free_list); 274 list_splice_init(&desc->tx_list, &dc->free_list);
275 dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", 275 dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
276 desc); 276 desc);
277 list_add(&desc->desc_node, &dc->free_list); 277 list_add(&desc->desc_node, &dc->free_list);
@@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
429 param = txd->callback_param; 429 param = txd->callback_param;
430 430
431 txx9dmac_sync_desc_for_cpu(dc, desc); 431 txx9dmac_sync_desc_for_cpu(dc, desc);
432 list_splice_init(&txd->tx_list, &dc->free_list); 432 list_splice_init(&desc->tx_list, &dc->free_list);
433 list_move(&desc->desc_node, &dc->free_list); 433 list_move(&desc->desc_node, &dc->free_list);
434 434
435 if (!ds) { 435 if (!ds) {
@@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
571 "Bad descriptor submitted for DMA! (cookie: %d)\n", 571 "Bad descriptor submitted for DMA! (cookie: %d)\n",
572 bad_desc->txd.cookie); 572 bad_desc->txd.cookie);
573 txx9dmac_dump_desc(dc, &bad_desc->hwdesc); 573 txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
574 list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) 574 list_for_each_entry(child, &bad_desc->tx_list, desc_node)
575 txx9dmac_dump_desc(dc, &child->hwdesc); 575 txx9dmac_dump_desc(dc, &child->hwdesc);
576 /* Pretend the descriptor completed successfully */ 576 /* Pretend the descriptor completed successfully */
577 txx9dmac_descriptor_complete(dc, bad_desc); 577 txx9dmac_descriptor_complete(dc, bad_desc);
@@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
613 return; 613 return;
614 } 614 }
615 615
616 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 616 list_for_each_entry(child, &desc->tx_list, desc_node)
617 if (desc_read_CHAR(dc, child) == chain) { 617 if (desc_read_CHAR(dc, child) == chain) {
618 /* Currently in progress */ 618 /* Currently in progress */
619 if (csr & TXX9_DMA_CSR_ABCHC) 619 if (csr & TXX9_DMA_CSR_ABCHC)
@@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
823 dma_sync_single_for_device(chan2parent(&dc->chan), 823 dma_sync_single_for_device(chan2parent(&dc->chan),
824 prev->txd.phys, ddev->descsize, 824 prev->txd.phys, ddev->descsize,
825 DMA_TO_DEVICE); 825 DMA_TO_DEVICE);
826 list_add_tail(&desc->desc_node, 826 list_add_tail(&desc->desc_node, &first->tx_list);
827 &first->txd.tx_list);
828 } 827 }
829 prev = desc; 828 prev = desc;
830 } 829 }
@@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
919 prev->txd.phys, 918 prev->txd.phys,
920 ddev->descsize, 919 ddev->descsize,
921 DMA_TO_DEVICE); 920 DMA_TO_DEVICE);
922 list_add_tail(&desc->desc_node, 921 list_add_tail(&desc->desc_node, &first->tx_list);
923 &first->txd.tx_list);
924 } 922 }
925 prev = desc; 923 prev = desc;
926 } 924 }
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
index c907ff01d276..365d42366b9f 100644
--- a/drivers/dma/txx9dmac.h
+++ b/drivers/dma/txx9dmac.h
@@ -231,6 +231,7 @@ struct txx9dmac_desc {
231 231
232 /* THEN values for driver housekeeping */ 232 /* THEN values for driver housekeeping */
233 struct list_head desc_node ____cacheline_aligned; 233 struct list_head desc_node ____cacheline_aligned;
234 struct list_head tx_list;
234 struct dma_async_tx_descriptor txd; 235 struct dma_async_tx_descriptor txd;
235 size_t len; 236 size_t len;
236}; 237};
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index a3ca18e2d7cf..02127e59fe8e 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -133,6 +133,13 @@ config EDAC_I3000
133 Support for error detection and correction on the Intel 133 Support for error detection and correction on the Intel
134 3000 and 3010 server chipsets. 134 3000 and 3010 server chipsets.
135 135
136config EDAC_I3200
137 tristate "Intel 3200"
138 depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL
139 help
140 Support for error detection and correction on the Intel
141 3200 and 3210 server chipsets.
142
136config EDAC_X38 143config EDAC_X38
137 tristate "Intel X38" 144 tristate "Intel X38"
138 depends on EDAC_MM_EDAC && PCI && X86 145 depends on EDAC_MM_EDAC && PCI && X86
@@ -176,11 +183,11 @@ config EDAC_I5100
176 San Clemente MCH. 183 San Clemente MCH.
177 184
178config EDAC_MPC85XX 185config EDAC_MPC85XX
179 tristate "Freescale MPC85xx" 186 tristate "Freescale MPC83xx / MPC85xx"
180 depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx 187 depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx)
181 help 188 help
182 Support for error detection and correction on the Freescale 189 Support for error detection and correction on the Freescale
183 MPC8560, MPC8540, MPC8548 190 MPC8349, MPC8560, MPC8540, MPC8548
184 191
185config EDAC_MV64X60 192config EDAC_MV64X60
186 tristate "Marvell MV64x60" 193 tristate "Marvell MV64x60"
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index cfa033ce53a7..7a473bbe8abd 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
32obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o 32obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o
33obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o 33obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o
34obj-$(CONFIG_EDAC_I3000) += i3000_edac.o 34obj-$(CONFIG_EDAC_I3000) += i3000_edac.o
35obj-$(CONFIG_EDAC_I3200) += i3200_edac.o
35obj-$(CONFIG_EDAC_X38) += x38_edac.o 36obj-$(CONFIG_EDAC_X38) += x38_edac.o
36obj-$(CONFIG_EDAC_I82860) += i82860_edac.o 37obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
37obj-$(CONFIG_EDAC_R82600) += r82600_edac.o 38obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
@@ -49,3 +50,4 @@ obj-$(CONFIG_EDAC_CELL) += cell_edac.o
49obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o 50obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
50obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o 51obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
51obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o 52obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
53
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index 8c54196b5aba..3d50274f1348 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -885,14 +885,14 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
885 885
886 if (!devm_request_mem_region(&pdev->dev, 886 if (!devm_request_mem_region(&pdev->dev,
887 r->start, 887 r->start,
888 r->end - r->start + 1, 888 resource_size(r),
889 pdev->name)) { 889 pdev->name)) {
890 cpc925_printk(KERN_ERR, "Unable to request mem region\n"); 890 cpc925_printk(KERN_ERR, "Unable to request mem region\n");
891 res = -EBUSY; 891 res = -EBUSY;
892 goto err1; 892 goto err1;
893 } 893 }
894 894
895 vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1); 895 vbase = devm_ioremap(&pdev->dev, r->start, resource_size(r));
896 if (!vbase) { 896 if (!vbase) {
897 cpc925_printk(KERN_ERR, "Unable to ioremap device\n"); 897 cpc925_printk(KERN_ERR, "Unable to ioremap device\n");
898 res = -ENOMEM; 898 res = -ENOMEM;
@@ -953,7 +953,7 @@ err3:
953 cpc925_mc_exit(mci); 953 cpc925_mc_exit(mci);
954 edac_mc_free(mci); 954 edac_mc_free(mci);
955err2: 955err2:
956 devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1); 956 devm_release_mem_region(&pdev->dev, r->start, resource_size(r));
957err1: 957err1:
958 devres_release_group(&pdev->dev, cpc925_probe); 958 devres_release_group(&pdev->dev, cpc925_probe);
959out: 959out:
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index b02a6a69a8f0..d5e13c94714f 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -356,7 +356,6 @@ static void complete_edac_device_list_del(struct rcu_head *head)
356 356
357 edac_dev = container_of(head, struct edac_device_ctl_info, rcu); 357 edac_dev = container_of(head, struct edac_device_ctl_info, rcu);
358 INIT_LIST_HEAD(&edac_dev->link); 358 INIT_LIST_HEAD(&edac_dev->link);
359 complete(&edac_dev->removal_complete);
360} 359}
361 360
362/* 361/*
@@ -369,10 +368,8 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info
369 *edac_device) 368 *edac_device)
370{ 369{
371 list_del_rcu(&edac_device->link); 370 list_del_rcu(&edac_device->link);
372
373 init_completion(&edac_device->removal_complete);
374 call_rcu(&edac_device->rcu, complete_edac_device_list_del); 371 call_rcu(&edac_device->rcu, complete_edac_device_list_del);
375 wait_for_completion(&edac_device->removal_complete); 372 rcu_barrier();
376} 373}
377 374
378/* 375/*
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 335b7ebdb11c..b629c41756f0 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -418,16 +418,14 @@ static void complete_mc_list_del(struct rcu_head *head)
418 418
419 mci = container_of(head, struct mem_ctl_info, rcu); 419 mci = container_of(head, struct mem_ctl_info, rcu);
420 INIT_LIST_HEAD(&mci->link); 420 INIT_LIST_HEAD(&mci->link);
421 complete(&mci->complete);
422} 421}
423 422
424static void del_mc_from_global_list(struct mem_ctl_info *mci) 423static void del_mc_from_global_list(struct mem_ctl_info *mci)
425{ 424{
426 atomic_dec(&edac_handlers); 425 atomic_dec(&edac_handlers);
427 list_del_rcu(&mci->link); 426 list_del_rcu(&mci->link);
428 init_completion(&mci->complete);
429 call_rcu(&mci->rcu, complete_mc_list_del); 427 call_rcu(&mci->rcu, complete_mc_list_del);
430 wait_for_completion(&mci->complete); 428 rcu_barrier();
431} 429}
432 430
433/** 431/**
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 30b585b1d60b..efb5d5650783 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -174,7 +174,6 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
174 174
175 pci = container_of(head, struct edac_pci_ctl_info, rcu); 175 pci = container_of(head, struct edac_pci_ctl_info, rcu);
176 INIT_LIST_HEAD(&pci->link); 176 INIT_LIST_HEAD(&pci->link);
177 complete(&pci->complete);
178} 177}
179 178
180/* 179/*
@@ -185,9 +184,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
185static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) 184static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
186{ 185{
187 list_del_rcu(&pci->link); 186 list_del_rcu(&pci->link);
188 init_completion(&pci->complete);
189 call_rcu(&pci->rcu, complete_edac_pci_list_del); 187 call_rcu(&pci->rcu, complete_edac_pci_list_del);
190 wait_for_completion(&pci->complete); 188 rcu_barrier();
191} 189}
192 190
193#if 0 191#if 0
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
new file mode 100644
index 000000000000..fde4db91c4d2
--- /dev/null
+++ b/drivers/edac/i3200_edac.c
@@ -0,0 +1,527 @@
1/*
2 * Intel 3200/3210 Memory Controller kernel module
3 * Copyright (C) 2008-2009 Akamai Technologies, Inc.
4 * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
5 *
6 * This file may be distributed under the terms of the
7 * GNU General Public License.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/pci.h>
13#include <linux/pci_ids.h>
14#include <linux/slab.h>
15#include <linux/edac.h>
16#include <linux/io.h>
17#include "edac_core.h"
18
19#define I3200_REVISION "1.1"
20
21#define EDAC_MOD_STR "i3200_edac"
22
23#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0
24
25#define I3200_RANKS 8
26#define I3200_RANKS_PER_CHANNEL 4
27#define I3200_CHANNELS 2
28
29/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
30
31#define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */
32#define I3200_MCHBAR_HIGH 0x4c
33#define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */
34#define I3200_MMR_WINDOW_SIZE 16384
35
36#define I3200_TOM 0xa0 /* Top of Memory (16b)
37 *
38 * 15:10 reserved
39 * 9:0 total populated physical memory
40 */
41#define I3200_TOM_MASK 0x3ff /* bits 9:0 */
42#define I3200_TOM_SHIFT 26 /* 64MiB grain */
43
44#define I3200_ERRSTS 0xc8 /* Error Status Register (16b)
45 *
46 * 15 reserved
47 * 14 Isochronous TBWRR Run Behind FIFO Full
48 * (ITCV)
49 * 13 Isochronous TBWRR Run Behind FIFO Put
50 * (ITSTV)
51 * 12 reserved
52 * 11 MCH Thermal Sensor Event
53 * for SMI/SCI/SERR (GTSE)
54 * 10 reserved
55 * 9 LOCK to non-DRAM Memory Flag (LCKF)
56 * 8 reserved
57 * 7 DRAM Throttle Flag (DTF)
58 * 6:2 reserved
59 * 1 Multi-bit DRAM ECC Error Flag (DMERR)
60 * 0 Single-bit DRAM ECC Error Flag (DSERR)
61 */
62#define I3200_ERRSTS_UE 0x0002
63#define I3200_ERRSTS_CE 0x0001
64#define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
65
66
67/* Intel MMIO register space - device 0 function 0 - MMR space */
68
69#define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4)
70 *
71 * 15:10 reserved
72 * 9:0 Channel 0 DRAM Rank Boundary Address
73 */
74#define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */
75#define I3200_DRB_MASK 0x3ff /* bits 9:0 */
76#define I3200_DRB_SHIFT 26 /* 64MiB grain */
77
78#define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b)
79 *
80 * 63:48 Error Column Address (ERRCOL)
81 * 47:32 Error Row Address (ERRROW)
82 * 31:29 Error Bank Address (ERRBANK)
83 * 28:27 Error Rank Address (ERRRANK)
84 * 26:24 reserved
85 * 23:16 Error Syndrome (ERRSYND)
86 * 15: 2 reserved
87 * 1 Multiple Bit Error Status (MERRSTS)
88 * 0 Correctable Error Status (CERRSTS)
89 */
90#define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */
91#define I3200_ECCERRLOG_CE 0x1
92#define I3200_ECCERRLOG_UE 0x2
93#define I3200_ECCERRLOG_RANK_BITS 0x18000000
94#define I3200_ECCERRLOG_RANK_SHIFT 27
95#define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000
96#define I3200_ECCERRLOG_SYNDROME_SHIFT 16
97#define I3200_CAPID0 0xe0 /* P.95 of spec for details */
98
99struct i3200_priv {
100 void __iomem *window;
101};
102
103static int nr_channels;
104
105static int how_many_channels(struct pci_dev *pdev)
106{
107 unsigned char capid0_8b; /* 8th byte of CAPID0 */
108
109 pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
110 if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
111 debugf0("In single channel mode.\n");
112 return 1;
113 } else {
114 debugf0("In dual channel mode.\n");
115 return 2;
116 }
117}
118
119static unsigned long eccerrlog_syndrome(u64 log)
120{
121 return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
122 I3200_ECCERRLOG_SYNDROME_SHIFT;
123}
124
125static int eccerrlog_row(int channel, u64 log)
126{
127 u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
128 I3200_ECCERRLOG_RANK_SHIFT);
129 return rank | (channel * I3200_RANKS_PER_CHANNEL);
130}
131
132enum i3200_chips {
133 I3200 = 0,
134};
135
136struct i3200_dev_info {
137 const char *ctl_name;
138};
139
140struct i3200_error_info {
141 u16 errsts;
142 u16 errsts2;
143 u64 eccerrlog[I3200_CHANNELS];
144};
145
146static const struct i3200_dev_info i3200_devs[] = {
147 [I3200] = {
148 .ctl_name = "i3200"
149 },
150};
151
152static struct pci_dev *mci_pdev;
153static int i3200_registered = 1;
154
155
156static void i3200_clear_error_info(struct mem_ctl_info *mci)
157{
158 struct pci_dev *pdev;
159
160 pdev = to_pci_dev(mci->dev);
161
162 /*
163 * Clear any error bits.
164 * (Yes, we really clear bits by writing 1 to them.)
165 */
166 pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
167 I3200_ERRSTS_BITS);
168}
169
170static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
171 struct i3200_error_info *info)
172{
173 struct pci_dev *pdev;
174 struct i3200_priv *priv = mci->pvt_info;
175 void __iomem *window = priv->window;
176
177 pdev = to_pci_dev(mci->dev);
178
179 /*
180 * This is a mess because there is no atomic way to read all the
181 * registers at once and the registers can transition from CE being
182 * overwritten by UE.
183 */
184 pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
185 if (!(info->errsts & I3200_ERRSTS_BITS))
186 return;
187
188 info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
189 if (nr_channels == 2)
190 info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
191
192 pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
193
194 /*
195 * If the error is the same for both reads then the first set
196 * of reads is valid. If there is a change then there is a CE
197 * with no info and the second set of reads is valid and
198 * should be UE info.
199 */
200 if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
201 info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
202 if (nr_channels == 2)
203 info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
204 }
205
206 i3200_clear_error_info(mci);
207}
208
209static void i3200_process_error_info(struct mem_ctl_info *mci,
210 struct i3200_error_info *info)
211{
212 int channel;
213 u64 log;
214
215 if (!(info->errsts & I3200_ERRSTS_BITS))
216 return;
217
218 if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
219 edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
220 info->errsts = info->errsts2;
221 }
222
223 for (channel = 0; channel < nr_channels; channel++) {
224 log = info->eccerrlog[channel];
225 if (log & I3200_ECCERRLOG_UE) {
226 edac_mc_handle_ue(mci, 0, 0,
227 eccerrlog_row(channel, log),
228 "i3200 UE");
229 } else if (log & I3200_ECCERRLOG_CE) {
230 edac_mc_handle_ce(mci, 0, 0,
231 eccerrlog_syndrome(log),
232 eccerrlog_row(channel, log), 0,
233 "i3200 CE");
234 }
235 }
236}
237
238static void i3200_check(struct mem_ctl_info *mci)
239{
240 struct i3200_error_info info;
241
242 debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
243 i3200_get_and_clear_error_info(mci, &info);
244 i3200_process_error_info(mci, &info);
245}
246
247
248void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
249{
250 union {
251 u64 mchbar;
252 struct {
253 u32 mchbar_low;
254 u32 mchbar_high;
255 };
256 } u;
257 void __iomem *window;
258
259 pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
260 pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
261 u.mchbar &= I3200_MCHBAR_MASK;
262
263 if (u.mchbar != (resource_size_t)u.mchbar) {
264 printk(KERN_ERR
265 "i3200: mmio space beyond accessible range (0x%llx)\n",
266 (unsigned long long)u.mchbar);
267 return NULL;
268 }
269
270 window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
271 if (!window)
272 printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
273 (unsigned long long)u.mchbar);
274
275 return window;
276}
277
278
279static void i3200_get_drbs(void __iomem *window,
280 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
281{
282 int i;
283
284 for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
285 drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
286 drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
287 }
288}
289
290static bool i3200_is_stacked(struct pci_dev *pdev,
291 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
292{
293 u16 tom;
294
295 pci_read_config_word(pdev, I3200_TOM, &tom);
296 tom &= I3200_TOM_MASK;
297
298 return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
299}
300
301static unsigned long drb_to_nr_pages(
302 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
303 int channel, int rank)
304{
305 int n;
306
307 n = drbs[channel][rank];
308 if (rank > 0)
309 n -= drbs[channel][rank - 1];
310 if (stacked && (channel == 1) &&
311 drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
312 n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
313
314 n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
315 return n;
316}
317
318static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
319{
320 int rc;
321 int i;
322 struct mem_ctl_info *mci = NULL;
323 unsigned long last_page;
324 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
325 bool stacked;
326 void __iomem *window;
327 struct i3200_priv *priv;
328
329 debugf0("MC: %s()\n", __func__);
330
331 window = i3200_map_mchbar(pdev);
332 if (!window)
333 return -ENODEV;
334
335 i3200_get_drbs(window, drbs);
336 nr_channels = how_many_channels(pdev);
337
338 mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
339 nr_channels, 0);
340 if (!mci)
341 return -ENOMEM;
342
343 debugf3("MC: %s(): init mci\n", __func__);
344
345 mci->dev = &pdev->dev;
346 mci->mtype_cap = MEM_FLAG_DDR2;
347
348 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
349 mci->edac_cap = EDAC_FLAG_SECDED;
350
351 mci->mod_name = EDAC_MOD_STR;
352 mci->mod_ver = I3200_REVISION;
353 mci->ctl_name = i3200_devs[dev_idx].ctl_name;
354 mci->dev_name = pci_name(pdev);
355 mci->edac_check = i3200_check;
356 mci->ctl_page_to_phys = NULL;
357 priv = mci->pvt_info;
358 priv->window = window;
359
360 stacked = i3200_is_stacked(pdev, drbs);
361
362 /*
363 * The dram rank boundary (DRB) reg values are boundary addresses
364 * for each DRAM rank with a granularity of 64MB. DRB regs are
365 * cumulative; the last one will contain the total memory
366 * contained in all ranks.
367 */
368 last_page = -1UL;
369 for (i = 0; i < mci->nr_csrows; i++) {
370 unsigned long nr_pages;
371 struct csrow_info *csrow = &mci->csrows[i];
372
373 nr_pages = drb_to_nr_pages(drbs, stacked,
374 i / I3200_RANKS_PER_CHANNEL,
375 i % I3200_RANKS_PER_CHANNEL);
376
377 if (nr_pages == 0) {
378 csrow->mtype = MEM_EMPTY;
379 continue;
380 }
381
382 csrow->first_page = last_page + 1;
383 last_page += nr_pages;
384 csrow->last_page = last_page;
385 csrow->nr_pages = nr_pages;
386
387 csrow->grain = nr_pages << PAGE_SHIFT;
388 csrow->mtype = MEM_DDR2;
389 csrow->dtype = DEV_UNKNOWN;
390 csrow->edac_mode = EDAC_UNKNOWN;
391 }
392
393 i3200_clear_error_info(mci);
394
395 rc = -ENODEV;
396 if (edac_mc_add_mc(mci)) {
397 debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
398 goto fail;
399 }
400
401 /* get this far and it's successful */
402 debugf3("MC: %s(): success\n", __func__);
403 return 0;
404
405fail:
406 iounmap(window);
407 if (mci)
408 edac_mc_free(mci);
409
410 return rc;
411}
412
413static int __devinit i3200_init_one(struct pci_dev *pdev,
414 const struct pci_device_id *ent)
415{
416 int rc;
417
418 debugf0("MC: %s()\n", __func__);
419
420 if (pci_enable_device(pdev) < 0)
421 return -EIO;
422
423 rc = i3200_probe1(pdev, ent->driver_data);
424 if (!mci_pdev)
425 mci_pdev = pci_dev_get(pdev);
426
427 return rc;
428}
429
430static void __devexit i3200_remove_one(struct pci_dev *pdev)
431{
432 struct mem_ctl_info *mci;
433 struct i3200_priv *priv;
434
435 debugf0("%s()\n", __func__);
436
437 mci = edac_mc_del_mc(&pdev->dev);
438 if (!mci)
439 return;
440
441 priv = mci->pvt_info;
442 iounmap(priv->window);
443
444 edac_mc_free(mci);
445}
446
447static const struct pci_device_id i3200_pci_tbl[] __devinitdata = {
448 {
449 PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
450 I3200},
451 {
452 0,
453 } /* 0 terminated list. */
454};
455
456MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
457
458static struct pci_driver i3200_driver = {
459 .name = EDAC_MOD_STR,
460 .probe = i3200_init_one,
461 .remove = __devexit_p(i3200_remove_one),
462 .id_table = i3200_pci_tbl,
463};
464
465static int __init i3200_init(void)
466{
467 int pci_rc;
468
469 debugf3("MC: %s()\n", __func__);
470
471 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
472 opstate_init();
473
474 pci_rc = pci_register_driver(&i3200_driver);
475 if (pci_rc < 0)
476 goto fail0;
477
478 if (!mci_pdev) {
479 i3200_registered = 0;
480 mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
481 PCI_DEVICE_ID_INTEL_3200_HB, NULL);
482 if (!mci_pdev) {
483 debugf0("i3200 pci_get_device fail\n");
484 pci_rc = -ENODEV;
485 goto fail1;
486 }
487
488 pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
489 if (pci_rc < 0) {
490 debugf0("i3200 init fail\n");
491 pci_rc = -ENODEV;
492 goto fail1;
493 }
494 }
495
496 return 0;
497
498fail1:
499 pci_unregister_driver(&i3200_driver);
500
501fail0:
502 if (mci_pdev)
503 pci_dev_put(mci_pdev);
504
505 return pci_rc;
506}
507
508static void __exit i3200_exit(void)
509{
510 debugf3("MC: %s()\n", __func__);
511
512 pci_unregister_driver(&i3200_driver);
513 if (!i3200_registered) {
514 i3200_remove_one(mci_pdev);
515 pci_dev_put(mci_pdev);
516 }
517}
518
519module_init(i3200_init);
520module_exit(i3200_exit);
521
522MODULE_LICENSE("GPL");
523MODULE_AUTHOR("Akamai Technologies, Inc.");
524MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
525
526module_param(edac_op_state, int, 0444);
527MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 3f2ccfc6407c..157f6504f25e 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -41,7 +41,9 @@ static u32 orig_pci_err_en;
41#endif 41#endif
42 42
43static u32 orig_l2_err_disable; 43static u32 orig_l2_err_disable;
44#ifdef CONFIG_MPC85xx
44static u32 orig_hid1[2]; 45static u32 orig_hid1[2];
46#endif
45 47
46/************************ MC SYSFS parts ***********************************/ 48/************************ MC SYSFS parts ***********************************/
47 49
@@ -646,6 +648,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = {
646 { .compatible = "fsl,mpc8560-l2-cache-controller", }, 648 { .compatible = "fsl,mpc8560-l2-cache-controller", },
647 { .compatible = "fsl,mpc8568-l2-cache-controller", }, 649 { .compatible = "fsl,mpc8568-l2-cache-controller", },
648 { .compatible = "fsl,mpc8572-l2-cache-controller", }, 650 { .compatible = "fsl,mpc8572-l2-cache-controller", },
651 { .compatible = "fsl,p2020-l2-cache-controller", },
649 {}, 652 {},
650}; 653};
651 654
@@ -788,19 +791,20 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
788 csrow = &mci->csrows[index]; 791 csrow = &mci->csrows[index];
789 cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 + 792 cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
790 (index * MPC85XX_MC_CS_BNDS_OFS)); 793 (index * MPC85XX_MC_CS_BNDS_OFS));
791 start = (cs_bnds & 0xfff0000) << 4; 794
792 end = ((cs_bnds & 0xfff) << 20); 795 start = (cs_bnds & 0xffff0000) >> 16;
793 if (start) 796 end = (cs_bnds & 0x0000ffff);
794 start |= 0xfffff;
795 if (end)
796 end |= 0xfffff;
797 797
798 if (start == end) 798 if (start == end)
799 continue; /* not populated */ 799 continue; /* not populated */
800 800
801 start <<= (24 - PAGE_SHIFT);
802 end <<= (24 - PAGE_SHIFT);
803 end |= (1 << (24 - PAGE_SHIFT)) - 1;
804
801 csrow->first_page = start >> PAGE_SHIFT; 805 csrow->first_page = start >> PAGE_SHIFT;
802 csrow->last_page = end >> PAGE_SHIFT; 806 csrow->last_page = end >> PAGE_SHIFT;
803 csrow->nr_pages = csrow->last_page + 1 - csrow->first_page; 807 csrow->nr_pages = end + 1 - start;
804 csrow->grain = 8; 808 csrow->grain = 8;
805 csrow->mtype = mtype; 809 csrow->mtype = mtype;
806 csrow->dtype = DEV_UNKNOWN; 810 csrow->dtype = DEV_UNKNOWN;
@@ -984,6 +988,8 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = {
984 { .compatible = "fsl,mpc8560-memory-controller", }, 988 { .compatible = "fsl,mpc8560-memory-controller", },
985 { .compatible = "fsl,mpc8568-memory-controller", }, 989 { .compatible = "fsl,mpc8568-memory-controller", },
986 { .compatible = "fsl,mpc8572-memory-controller", }, 990 { .compatible = "fsl,mpc8572-memory-controller", },
991 { .compatible = "fsl,mpc8349-memory-controller", },
992 { .compatible = "fsl,p2020-memory-controller", },
987 {}, 993 {},
988}; 994};
989 995
@@ -999,13 +1005,13 @@ static struct of_platform_driver mpc85xx_mc_err_driver = {
999 }, 1005 },
1000}; 1006};
1001 1007
1002 1008#ifdef CONFIG_MPC85xx
1003static void __init mpc85xx_mc_clear_rfxe(void *data) 1009static void __init mpc85xx_mc_clear_rfxe(void *data)
1004{ 1010{
1005 orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1); 1011 orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1);
1006 mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000)); 1012 mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000));
1007} 1013}
1008 1014#endif
1009 1015
1010static int __init mpc85xx_mc_init(void) 1016static int __init mpc85xx_mc_init(void)
1011{ 1017{
@@ -1038,26 +1044,32 @@ static int __init mpc85xx_mc_init(void)
1038 printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); 1044 printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
1039#endif 1045#endif
1040 1046
1047#ifdef CONFIG_MPC85xx
1041 /* 1048 /*
1042 * need to clear HID1[RFXE] to disable machine check int 1049 * need to clear HID1[RFXE] to disable machine check int
1043 * so we can catch it 1050 * so we can catch it
1044 */ 1051 */
1045 if (edac_op_state == EDAC_OPSTATE_INT) 1052 if (edac_op_state == EDAC_OPSTATE_INT)
1046 on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0); 1053 on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);
1054#endif
1047 1055
1048 return 0; 1056 return 0;
1049} 1057}
1050 1058
1051module_init(mpc85xx_mc_init); 1059module_init(mpc85xx_mc_init);
1052 1060
1061#ifdef CONFIG_MPC85xx
1053static void __exit mpc85xx_mc_restore_hid1(void *data) 1062static void __exit mpc85xx_mc_restore_hid1(void *data)
1054{ 1063{
1055 mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]); 1064 mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]);
1056} 1065}
1066#endif
1057 1067
1058static void __exit mpc85xx_mc_exit(void) 1068static void __exit mpc85xx_mc_exit(void)
1059{ 1069{
1070#ifdef CONFIG_MPC85xx
1060 on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); 1071 on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
1072#endif
1061#ifdef CONFIG_PCI 1073#ifdef CONFIG_PCI
1062 of_unregister_platform_driver(&mpc85xx_pci_err_driver); 1074 of_unregister_platform_driver(&mpc85xx_pci_err_driver);
1063#endif 1075#endif
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index 5131aaae8e03..a6b9fec13a74 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -90,7 +90,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev)
90 return -ENOENT; 90 return -ENOENT;
91 } 91 }
92 92
93 pci_serr = ioremap(r->start, r->end - r->start + 1); 93 pci_serr = ioremap(r->start, resource_size(r));
94 if (!pci_serr) 94 if (!pci_serr)
95 return -ENOMEM; 95 return -ENOMEM;
96 96
@@ -140,7 +140,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
140 140
141 if (!devm_request_mem_region(&pdev->dev, 141 if (!devm_request_mem_region(&pdev->dev,
142 r->start, 142 r->start,
143 r->end - r->start + 1, 143 resource_size(r),
144 pdata->name)) { 144 pdata->name)) {
145 printk(KERN_ERR "%s: Error while requesting mem region\n", 145 printk(KERN_ERR "%s: Error while requesting mem region\n",
146 __func__); 146 __func__);
@@ -150,7 +150,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
150 150
151 pdata->pci_vbase = devm_ioremap(&pdev->dev, 151 pdata->pci_vbase = devm_ioremap(&pdev->dev,
152 r->start, 152 r->start,
153 r->end - r->start + 1); 153 resource_size(r));
154 if (!pdata->pci_vbase) { 154 if (!pdata->pci_vbase) {
155 printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); 155 printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
156 res = -ENOMEM; 156 res = -ENOMEM;
@@ -306,7 +306,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
306 306
307 if (!devm_request_mem_region(&pdev->dev, 307 if (!devm_request_mem_region(&pdev->dev,
308 r->start, 308 r->start,
309 r->end - r->start + 1, 309 resource_size(r),
310 pdata->name)) { 310 pdata->name)) {
311 printk(KERN_ERR "%s: Error while request mem region\n", 311 printk(KERN_ERR "%s: Error while request mem region\n",
312 __func__); 312 __func__);
@@ -316,7 +316,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
316 316
317 pdata->sram_vbase = devm_ioremap(&pdev->dev, 317 pdata->sram_vbase = devm_ioremap(&pdev->dev,
318 r->start, 318 r->start,
319 r->end - r->start + 1); 319 resource_size(r));
320 if (!pdata->sram_vbase) { 320 if (!pdata->sram_vbase) {
321 printk(KERN_ERR "%s: Unable to setup SRAM err regs\n", 321 printk(KERN_ERR "%s: Unable to setup SRAM err regs\n",
322 __func__); 322 __func__);
@@ -474,7 +474,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
474 474
475 if (!devm_request_mem_region(&pdev->dev, 475 if (!devm_request_mem_region(&pdev->dev,
476 r->start, 476 r->start,
477 r->end - r->start + 1, 477 resource_size(r),
478 pdata->name)) { 478 pdata->name)) {
479 printk(KERN_ERR "%s: Error while requesting mem region\n", 479 printk(KERN_ERR "%s: Error while requesting mem region\n",
480 __func__); 480 __func__);
@@ -484,7 +484,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
484 484
485 pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev, 485 pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev,
486 r->start, 486 r->start,
487 r->end - r->start + 1); 487 resource_size(r));
488 if (!pdata->cpu_vbase[0]) { 488 if (!pdata->cpu_vbase[0]) {
489 printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); 489 printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
490 res = -ENOMEM; 490 res = -ENOMEM;
@@ -501,7 +501,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
501 501
502 if (!devm_request_mem_region(&pdev->dev, 502 if (!devm_request_mem_region(&pdev->dev,
503 r->start, 503 r->start,
504 r->end - r->start + 1, 504 resource_size(r),
505 pdata->name)) { 505 pdata->name)) {
506 printk(KERN_ERR "%s: Error while requesting mem region\n", 506 printk(KERN_ERR "%s: Error while requesting mem region\n",
507 __func__); 507 __func__);
@@ -511,7 +511,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
511 511
512 pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev, 512 pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev,
513 r->start, 513 r->start,
514 r->end - r->start + 1); 514 resource_size(r));
515 if (!pdata->cpu_vbase[1]) { 515 if (!pdata->cpu_vbase[1]) {
516 printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); 516 printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
517 res = -ENOMEM; 517 res = -ENOMEM;
@@ -726,7 +726,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
726 726
727 if (!devm_request_mem_region(&pdev->dev, 727 if (!devm_request_mem_region(&pdev->dev,
728 r->start, 728 r->start,
729 r->end - r->start + 1, 729 resource_size(r),
730 pdata->name)) { 730 pdata->name)) {
731 printk(KERN_ERR "%s: Error while requesting mem region\n", 731 printk(KERN_ERR "%s: Error while requesting mem region\n",
732 __func__); 732 __func__);
@@ -736,7 +736,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
736 736
737 pdata->mc_vbase = devm_ioremap(&pdev->dev, 737 pdata->mc_vbase = devm_ioremap(&pdev->dev,
738 r->start, 738 r->start,
739 r->end - r->start + 1); 739 resource_size(r));
740 if (!pdata->mc_vbase) { 740 if (!pdata->mc_vbase) {
741 printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__); 741 printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__);
742 res = -ENOMEM; 742 res = -ENOMEM;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index e4d971c8b9d0..f831ea159291 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -102,6 +102,7 @@ config DRM_I915
102 select BACKLIGHT_CLASS_DEVICE if ACPI 102 select BACKLIGHT_CLASS_DEVICE if ACPI
103 select INPUT if ACPI 103 select INPUT if ACPI
104 select ACPI_VIDEO if ACPI 104 select ACPI_VIDEO if ACPI
105 select ACPI_BUTTON if ACPI
105 help 106 help
106 Choose this option if you have a system that has Intel 830M, 845G, 107 Choose this option if you have a system that has Intel 830M, 845G,
107 852GM, 855GM 865G or 915G integrated graphics. If M is selected, the 108 852GM, 855GM 865G or 915G integrated graphics. If M is selected, the
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 230c9ffdd5e9..80391995bdec 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -142,6 +142,19 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size)
142 if (IS_ERR(obj->filp)) 142 if (IS_ERR(obj->filp))
143 goto free; 143 goto free;
144 144
145 /* Basically we want to disable the OOM killer and handle ENOMEM
146 * ourselves by sacrificing pages from cached buffers.
147 * XXX shmem_file_[gs]et_gfp_mask()
148 */
149 mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping,
150 GFP_HIGHUSER |
151 __GFP_COLD |
152 __GFP_FS |
153 __GFP_RECLAIMABLE |
154 __GFP_NORETRY |
155 __GFP_NOWARN |
156 __GFP_NOMEMALLOC);
157
145 kref_init(&obj->refcount); 158 kref_init(&obj->refcount);
146 kref_init(&obj->handlecount); 159 kref_init(&obj->handlecount);
147 obj->size = size; 160 obj->size = size;
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 5269dfa5f620..fa7b9be096bc 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
9 i915_gem.o \ 9 i915_gem.o \
10 i915_gem_debug.o \ 10 i915_gem_debug.o \
11 i915_gem_tiling.o \ 11 i915_gem_tiling.o \
12 i915_trace_points.o \
12 intel_display.o \ 13 intel_display.o \
13 intel_crt.o \ 14 intel_crt.o \
14 intel_lvds.o \ 15 intel_lvds.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1e3bdcee863c..f8ce9a3a420d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -96,11 +96,13 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
96 { 96 {
97 struct drm_gem_object *obj = obj_priv->obj; 97 struct drm_gem_object *obj = obj_priv->obj;
98 98
99 seq_printf(m, " %p: %s %08x %08x %d", 99 seq_printf(m, " %p: %s %8zd %08x %08x %d %s",
100 obj, 100 obj,
101 get_pin_flag(obj_priv), 101 get_pin_flag(obj_priv),
102 obj->size,
102 obj->read_domains, obj->write_domain, 103 obj->read_domains, obj->write_domain,
103 obj_priv->last_rendering_seqno); 104 obj_priv->last_rendering_seqno,
105 obj_priv->dirty ? "dirty" : "");
104 106
105 if (obj->name) 107 if (obj->name)
106 seq_printf(m, " (name: %d)", obj->name); 108 seq_printf(m, " (name: %d)", obj->name);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5a49a1867b35..45d507ebd3ff 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -33,6 +33,7 @@
33#include "intel_drv.h" 33#include "intel_drv.h"
34#include "i915_drm.h" 34#include "i915_drm.h"
35#include "i915_drv.h" 35#include "i915_drv.h"
36#include "i915_trace.h"
36#include <linux/vgaarb.h> 37#include <linux/vgaarb.h>
37 38
38/* Really want an OS-independent resettable timer. Would like to have 39/* Really want an OS-independent resettable timer. Would like to have
@@ -50,14 +51,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
50 u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 51 u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
51 int i; 52 int i;
52 53
54 trace_i915_ring_wait_begin (dev);
55
53 for (i = 0; i < 100000; i++) { 56 for (i = 0; i < 100000; i++) {
54 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 57 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
55 acthd = I915_READ(acthd_reg); 58 acthd = I915_READ(acthd_reg);
56 ring->space = ring->head - (ring->tail + 8); 59 ring->space = ring->head - (ring->tail + 8);
57 if (ring->space < 0) 60 if (ring->space < 0)
58 ring->space += ring->Size; 61 ring->space += ring->Size;
59 if (ring->space >= n) 62 if (ring->space >= n) {
63 trace_i915_ring_wait_end (dev);
60 return 0; 64 return 0;
65 }
61 66
62 if (dev->primary->master) { 67 if (dev->primary->master) {
63 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 68 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
@@ -77,6 +82,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
77 82
78 } 83 }
79 84
85 trace_i915_ring_wait_end (dev);
80 return -EBUSY; 86 return -EBUSY;
81} 87}
82 88
@@ -922,7 +928,8 @@ static int i915_get_bridge_dev(struct drm_device *dev)
922 * how much was set aside so we can use it for our own purposes. 928 * how much was set aside so we can use it for our own purposes.
923 */ 929 */
924static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size, 930static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
925 uint32_t *preallocated_size) 931 uint32_t *preallocated_size,
932 uint32_t *start)
926{ 933{
927 struct drm_i915_private *dev_priv = dev->dev_private; 934 struct drm_i915_private *dev_priv = dev->dev_private;
928 u16 tmp = 0; 935 u16 tmp = 0;
@@ -1009,10 +1016,159 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
1009 return -1; 1016 return -1;
1010 } 1017 }
1011 *preallocated_size = stolen - overhead; 1018 *preallocated_size = stolen - overhead;
1019 *start = overhead;
1012 1020
1013 return 0; 1021 return 0;
1014} 1022}
1015 1023
1024#define PTE_ADDRESS_MASK 0xfffff000
1025#define PTE_ADDRESS_MASK_HIGH 0x000000f0 /* i915+ */
1026#define PTE_MAPPING_TYPE_UNCACHED (0 << 1)
1027#define PTE_MAPPING_TYPE_DCACHE (1 << 1) /* i830 only */
1028#define PTE_MAPPING_TYPE_CACHED (3 << 1)
1029#define PTE_MAPPING_TYPE_MASK (3 << 1)
1030#define PTE_VALID (1 << 0)
1031
1032/**
1033 * i915_gtt_to_phys - take a GTT address and turn it into a physical one
1034 * @dev: drm device
1035 * @gtt_addr: address to translate
1036 *
1037 * Some chip functions require allocations from stolen space but need the
1038 * physical address of the memory in question. We use this routine
1039 * to get a physical address suitable for register programming from a given
1040 * GTT address.
1041 */
1042static unsigned long i915_gtt_to_phys(struct drm_device *dev,
1043 unsigned long gtt_addr)
1044{
1045 unsigned long *gtt;
1046 unsigned long entry, phys;
1047 int gtt_bar = IS_I9XX(dev) ? 0 : 1;
1048 int gtt_offset, gtt_size;
1049
1050 if (IS_I965G(dev)) {
1051 if (IS_G4X(dev) || IS_IGDNG(dev)) {
1052 gtt_offset = 2*1024*1024;
1053 gtt_size = 2*1024*1024;
1054 } else {
1055 gtt_offset = 512*1024;
1056 gtt_size = 512*1024;
1057 }
1058 } else {
1059 gtt_bar = 3;
1060 gtt_offset = 0;
1061 gtt_size = pci_resource_len(dev->pdev, gtt_bar);
1062 }
1063
1064 gtt = ioremap_wc(pci_resource_start(dev->pdev, gtt_bar) + gtt_offset,
1065 gtt_size);
1066 if (!gtt) {
1067 DRM_ERROR("ioremap of GTT failed\n");
1068 return 0;
1069 }
1070
1071 entry = *(volatile u32 *)(gtt + (gtt_addr / 1024));
1072
1073 DRM_DEBUG("GTT addr: 0x%08lx, PTE: 0x%08lx\n", gtt_addr, entry);
1074
1075 /* Mask out these reserved bits on this hardware. */
1076 if (!IS_I9XX(dev) || IS_I915G(dev) || IS_I915GM(dev) ||
1077 IS_I945G(dev) || IS_I945GM(dev)) {
1078 entry &= ~PTE_ADDRESS_MASK_HIGH;
1079 }
1080
1081 /* If it's not a mapping type we know, then bail. */
1082 if ((entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_UNCACHED &&
1083 (entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_CACHED) {
1084 iounmap(gtt);
1085 return 0;
1086 }
1087
1088 if (!(entry & PTE_VALID)) {
1089 DRM_ERROR("bad GTT entry in stolen space\n");
1090 iounmap(gtt);
1091 return 0;
1092 }
1093
1094 iounmap(gtt);
1095
1096 phys =(entry & PTE_ADDRESS_MASK) |
1097 ((uint64_t)(entry & PTE_ADDRESS_MASK_HIGH) << (32 - 4));
1098
1099 DRM_DEBUG("GTT addr: 0x%08lx, phys addr: 0x%08lx\n", gtt_addr, phys);
1100
1101 return phys;
1102}
1103
1104static void i915_warn_stolen(struct drm_device *dev)
1105{
1106 DRM_ERROR("not enough stolen space for compressed buffer, disabling\n");
1107 DRM_ERROR("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
1108}
1109
1110static void i915_setup_compression(struct drm_device *dev, int size)
1111{
1112 struct drm_i915_private *dev_priv = dev->dev_private;
1113 struct drm_mm_node *compressed_fb, *compressed_llb;
1114 unsigned long cfb_base, ll_base;
1115
1116 /* Leave 1M for line length buffer & misc. */
1117 compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0);
1118 if (!compressed_fb) {
1119 i915_warn_stolen(dev);
1120 return;
1121 }
1122
1123 compressed_fb = drm_mm_get_block(compressed_fb, size, 4096);
1124 if (!compressed_fb) {
1125 i915_warn_stolen(dev);
1126 return;
1127 }
1128
1129 cfb_base = i915_gtt_to_phys(dev, compressed_fb->start);
1130 if (!cfb_base) {
1131 DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
1132 drm_mm_put_block(compressed_fb);
1133 }
1134
1135 if (!IS_GM45(dev)) {
1136 compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096,
1137 4096, 0);
1138 if (!compressed_llb) {
1139 i915_warn_stolen(dev);
1140 return;
1141 }
1142
1143 compressed_llb = drm_mm_get_block(compressed_llb, 4096, 4096);
1144 if (!compressed_llb) {
1145 i915_warn_stolen(dev);
1146 return;
1147 }
1148
1149 ll_base = i915_gtt_to_phys(dev, compressed_llb->start);
1150 if (!ll_base) {
1151 DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
1152 drm_mm_put_block(compressed_fb);
1153 drm_mm_put_block(compressed_llb);
1154 }
1155 }
1156
1157 dev_priv->cfb_size = size;
1158
1159 if (IS_GM45(dev)) {
1160 g4x_disable_fbc(dev);
1161 I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
1162 } else {
1163 i8xx_disable_fbc(dev);
1164 I915_WRITE(FBC_CFB_BASE, cfb_base);
1165 I915_WRITE(FBC_LL_BASE, ll_base);
1166 }
1167
1168 DRM_DEBUG("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n", cfb_base,
1169 ll_base, size >> 20);
1170}
1171
1016/* true = enable decode, false = disable decoder */ 1172/* true = enable decode, false = disable decoder */
1017static unsigned int i915_vga_set_decode(void *cookie, bool state) 1173static unsigned int i915_vga_set_decode(void *cookie, bool state)
1018{ 1174{
@@ -1027,6 +1183,7 @@ static unsigned int i915_vga_set_decode(void *cookie, bool state)
1027} 1183}
1028 1184
1029static int i915_load_modeset_init(struct drm_device *dev, 1185static int i915_load_modeset_init(struct drm_device *dev,
1186 unsigned long prealloc_start,
1030 unsigned long prealloc_size, 1187 unsigned long prealloc_size,
1031 unsigned long agp_size) 1188 unsigned long agp_size)
1032{ 1189{
@@ -1047,6 +1204,10 @@ static int i915_load_modeset_init(struct drm_device *dev,
1047 1204
1048 /* Basic memrange allocator for stolen space (aka vram) */ 1205 /* Basic memrange allocator for stolen space (aka vram) */
1049 drm_mm_init(&dev_priv->vram, 0, prealloc_size); 1206 drm_mm_init(&dev_priv->vram, 0, prealloc_size);
1207 DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
1208
1209 /* We're off and running w/KMS */
1210 dev_priv->mm.suspended = 0;
1050 1211
1051 /* Let GEM Manage from end of prealloc space to end of aperture. 1212 /* Let GEM Manage from end of prealloc space to end of aperture.
1052 * 1213 *
@@ -1059,10 +1220,25 @@ static int i915_load_modeset_init(struct drm_device *dev,
1059 */ 1220 */
1060 i915_gem_do_init(dev, prealloc_size, agp_size - 4096); 1221 i915_gem_do_init(dev, prealloc_size, agp_size - 4096);
1061 1222
1223 mutex_lock(&dev->struct_mutex);
1062 ret = i915_gem_init_ringbuffer(dev); 1224 ret = i915_gem_init_ringbuffer(dev);
1225 mutex_unlock(&dev->struct_mutex);
1063 if (ret) 1226 if (ret)
1064 goto out; 1227 goto out;
1065 1228
1229 /* Try to set up FBC with a reasonable compressed buffer size */
1230 if (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev) || IS_GM45(dev)) &&
1231 i915_powersave) {
1232 int cfb_size;
1233
1234 /* Try to get an 8M buffer... */
1235 if (prealloc_size > (9*1024*1024))
1236 cfb_size = 8*1024*1024;
1237 else /* fall back to 7/8 of the stolen space */
1238 cfb_size = prealloc_size * 7 / 8;
1239 i915_setup_compression(dev, cfb_size);
1240 }
1241
1066 /* Allow hardware batchbuffers unless told otherwise. 1242 /* Allow hardware batchbuffers unless told otherwise.
1067 */ 1243 */
1068 dev_priv->allow_batchbuffer = 1; 1244 dev_priv->allow_batchbuffer = 1;
@@ -1180,7 +1356,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
1180 struct drm_i915_private *dev_priv = dev->dev_private; 1356 struct drm_i915_private *dev_priv = dev->dev_private;
1181 resource_size_t base, size; 1357 resource_size_t base, size;
1182 int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1; 1358 int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1;
1183 uint32_t agp_size, prealloc_size; 1359 uint32_t agp_size, prealloc_size, prealloc_start;
1184 1360
1185 /* i915 has 4 more counters */ 1361 /* i915 has 4 more counters */
1186 dev->counters += 4; 1362 dev->counters += 4;
@@ -1234,7 +1410,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
1234 "performance may suffer.\n"); 1410 "performance may suffer.\n");
1235 } 1411 }
1236 1412
1237 ret = i915_probe_agp(dev, &agp_size, &prealloc_size); 1413 ret = i915_probe_agp(dev, &agp_size, &prealloc_size, &prealloc_start);
1238 if (ret) 1414 if (ret)
1239 goto out_iomapfree; 1415 goto out_iomapfree;
1240 1416
@@ -1300,8 +1476,12 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
1300 return ret; 1476 return ret;
1301 } 1477 }
1302 1478
1479 /* Start out suspended */
1480 dev_priv->mm.suspended = 1;
1481
1303 if (drm_core_check_feature(dev, DRIVER_MODESET)) { 1482 if (drm_core_check_feature(dev, DRIVER_MODESET)) {
1304 ret = i915_load_modeset_init(dev, prealloc_size, agp_size); 1483 ret = i915_load_modeset_init(dev, prealloc_start,
1484 prealloc_size, agp_size);
1305 if (ret < 0) { 1485 if (ret < 0) {
1306 DRM_ERROR("failed to init modeset\n"); 1486 DRM_ERROR("failed to init modeset\n");
1307 goto out_workqueue_free; 1487 goto out_workqueue_free;
@@ -1313,6 +1493,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
1313 if (!IS_IGDNG(dev)) 1493 if (!IS_IGDNG(dev))
1314 intel_opregion_init(dev, 0); 1494 intel_opregion_init(dev, 0);
1315 1495
1496 setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
1497 (unsigned long) dev);
1316 return 0; 1498 return 0;
1317 1499
1318out_workqueue_free: 1500out_workqueue_free:
@@ -1333,6 +1515,7 @@ int i915_driver_unload(struct drm_device *dev)
1333 struct drm_i915_private *dev_priv = dev->dev_private; 1515 struct drm_i915_private *dev_priv = dev->dev_private;
1334 1516
1335 destroy_workqueue(dev_priv->wq); 1517 destroy_workqueue(dev_priv->wq);
1518 del_timer_sync(&dev_priv->hangcheck_timer);
1336 1519
1337 io_mapping_free(dev_priv->mm.gtt_mapping); 1520 io_mapping_free(dev_priv->mm.gtt_mapping);
1338 if (dev_priv->mm.gtt_mtrr >= 0) { 1521 if (dev_priv->mm.gtt_mtrr >= 0) {
@@ -1472,6 +1655,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
1472 DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0), 1655 DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
1473 DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0), 1656 DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
1474 DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0), 1657 DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0),
1658 DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0),
1475}; 1659};
1476 1660
1477int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); 1661int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index dbe568c9327b..b93814c0d3e2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -89,6 +89,8 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state)
89 pci_set_power_state(dev->pdev, PCI_D3hot); 89 pci_set_power_state(dev->pdev, PCI_D3hot);
90 } 90 }
91 91
92 dev_priv->suspended = 1;
93
92 return 0; 94 return 0;
93} 95}
94 96
@@ -97,8 +99,6 @@ static int i915_resume(struct drm_device *dev)
97 struct drm_i915_private *dev_priv = dev->dev_private; 99 struct drm_i915_private *dev_priv = dev->dev_private;
98 int ret = 0; 100 int ret = 0;
99 101
100 pci_set_power_state(dev->pdev, PCI_D0);
101 pci_restore_state(dev->pdev);
102 if (pci_enable_device(dev->pdev)) 102 if (pci_enable_device(dev->pdev))
103 return -1; 103 return -1;
104 pci_set_master(dev->pdev); 104 pci_set_master(dev->pdev);
@@ -124,9 +124,135 @@ static int i915_resume(struct drm_device *dev)
124 drm_helper_resume_force_mode(dev); 124 drm_helper_resume_force_mode(dev);
125 } 125 }
126 126
127 dev_priv->suspended = 0;
128
127 return ret; 129 return ret;
128} 130}
129 131
132/**
133 * i965_reset - reset chip after a hang
134 * @dev: drm device to reset
135 * @flags: reset domains
136 *
137 * Reset the chip. Useful if a hang is detected. Returns zero on successful
138 * reset or otherwise an error code.
139 *
140 * Procedure is fairly simple:
141 * - reset the chip using the reset reg
142 * - re-init context state
143 * - re-init hardware status page
144 * - re-init ring buffer
145 * - re-init interrupt state
146 * - re-init display
147 */
148int i965_reset(struct drm_device *dev, u8 flags)
149{
150 drm_i915_private_t *dev_priv = dev->dev_private;
151 unsigned long timeout;
152 u8 gdrst;
153 /*
154 * We really should only reset the display subsystem if we actually
155 * need to
156 */
157 bool need_display = true;
158
159 mutex_lock(&dev->struct_mutex);
160
161 /*
162 * Clear request list
163 */
164 i915_gem_retire_requests(dev);
165
166 if (need_display)
167 i915_save_display(dev);
168
169 if (IS_I965G(dev) || IS_G4X(dev)) {
170 /*
171 * Set the domains we want to reset, then the reset bit (bit 0).
172 * Clear the reset bit after a while and wait for hardware status
173 * bit (bit 1) to be set
174 */
175 pci_read_config_byte(dev->pdev, GDRST, &gdrst);
176 pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0));
177 udelay(50);
178 pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe);
179
180 /* ...we don't want to loop forever though, 500ms should be plenty */
181 timeout = jiffies + msecs_to_jiffies(500);
182 do {
183 udelay(100);
184 pci_read_config_byte(dev->pdev, GDRST, &gdrst);
185 } while ((gdrst & 0x1) && time_after(timeout, jiffies));
186
187 if (gdrst & 0x1) {
188 WARN(true, "i915: Failed to reset chip\n");
189 mutex_unlock(&dev->struct_mutex);
190 return -EIO;
191 }
192 } else {
193 DRM_ERROR("Error occurred. Don't know how to reset this chip.\n");
194 return -ENODEV;
195 }
196
197 /* Ok, now get things going again... */
198
199 /*
200 * Everything depends on having the GTT running, so we need to start
201 * there. Fortunately we don't need to do this unless we reset the
202 * chip at a PCI level.
203 *
204 * Next we need to restore the context, but we don't use those
205 * yet either...
206 *
207 * Ring buffer needs to be re-initialized in the KMS case, or if X
208 * was running at the time of the reset (i.e. we weren't VT
209 * switched away).
210 */
211 if (drm_core_check_feature(dev, DRIVER_MODESET) ||
212 !dev_priv->mm.suspended) {
213 drm_i915_ring_buffer_t *ring = &dev_priv->ring;
214 struct drm_gem_object *obj = ring->ring_obj;
215 struct drm_i915_gem_object *obj_priv = obj->driver_private;
216 dev_priv->mm.suspended = 0;
217
218 /* Stop the ring if it's running. */
219 I915_WRITE(PRB0_CTL, 0);
220 I915_WRITE(PRB0_TAIL, 0);
221 I915_WRITE(PRB0_HEAD, 0);
222
223 /* Initialize the ring. */
224 I915_WRITE(PRB0_START, obj_priv->gtt_offset);
225 I915_WRITE(PRB0_CTL,
226 ((obj->size - 4096) & RING_NR_PAGES) |
227 RING_NO_REPORT |
228 RING_VALID);
229 if (!drm_core_check_feature(dev, DRIVER_MODESET))
230 i915_kernel_lost_context(dev);
231 else {
232 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
233 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
234 ring->space = ring->head - (ring->tail + 8);
235 if (ring->space < 0)
236 ring->space += ring->Size;
237 }
238
239 mutex_unlock(&dev->struct_mutex);
240 drm_irq_uninstall(dev);
241 drm_irq_install(dev);
242 mutex_lock(&dev->struct_mutex);
243 }
244
245 /*
246 * Display needs restore too...
247 */
248 if (need_display)
249 i915_restore_display(dev);
250
251 mutex_unlock(&dev->struct_mutex);
252 return 0;
253}
254
255
130static int __devinit 256static int __devinit
131i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 257i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
132{ 258{
@@ -234,6 +360,8 @@ static int __init i915_init(void)
234{ 360{
235 driver.num_ioctls = i915_max_ioctl; 361 driver.num_ioctls = i915_max_ioctl;
236 362
363 i915_gem_shrinker_init();
364
237 /* 365 /*
238 * If CONFIG_DRM_I915_KMS is set, default to KMS unless 366 * If CONFIG_DRM_I915_KMS is set, default to KMS unless
239 * explicitly disabled with the module pararmeter. 367 * explicitly disabled with the module pararmeter.
@@ -260,6 +388,7 @@ static int __init i915_init(void)
260 388
261static void __exit i915_exit(void) 389static void __exit i915_exit(void)
262{ 390{
391 i915_gem_shrinker_exit();
263 drm_exit(&driver); 392 drm_exit(&driver);
264} 393}
265 394
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a0632f8e76ac..b24b2d145b75 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -48,6 +48,11 @@ enum pipe {
48 PIPE_B, 48 PIPE_B,
49}; 49};
50 50
51enum plane {
52 PLANE_A = 0,
53 PLANE_B,
54};
55
51#define I915_NUM_PIPE 2 56#define I915_NUM_PIPE 2
52 57
53/* Interface history: 58/* Interface history:
@@ -148,6 +153,23 @@ struct drm_i915_error_state {
148 struct timeval time; 153 struct timeval time;
149}; 154};
150 155
156struct drm_i915_display_funcs {
157 void (*dpms)(struct drm_crtc *crtc, int mode);
158 bool (*fbc_enabled)(struct drm_crtc *crtc);
159 void (*enable_fbc)(struct drm_crtc *crtc, unsigned long interval);
160 void (*disable_fbc)(struct drm_device *dev);
161 int (*get_display_clock_speed)(struct drm_device *dev);
162 int (*get_fifo_size)(struct drm_device *dev, int plane);
163 void (*update_wm)(struct drm_device *dev, int planea_clock,
164 int planeb_clock, int sr_hdisplay, int pixel_size);
165 /* clock updates for mode set */
166 /* cursor updates */
167 /* render clock increase/decrease */
168 /* display clock increase/decrease */
169 /* pll clock increase/decrease */
170 /* clock gating init */
171};
172
151typedef struct drm_i915_private { 173typedef struct drm_i915_private {
152 struct drm_device *dev; 174 struct drm_device *dev;
153 175
@@ -198,10 +220,21 @@ typedef struct drm_i915_private {
198 unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds; 220 unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds;
199 int vblank_pipe; 221 int vblank_pipe;
200 222
223 /* For hangcheck timer */
224#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */
225 struct timer_list hangcheck_timer;
226 int hangcheck_count;
227 uint32_t last_acthd;
228
201 bool cursor_needs_physical; 229 bool cursor_needs_physical;
202 230
203 struct drm_mm vram; 231 struct drm_mm vram;
204 232
233 unsigned long cfb_size;
234 unsigned long cfb_pitch;
235 int cfb_fence;
236 int cfb_plane;
237
205 int irq_enabled; 238 int irq_enabled;
206 239
207 struct intel_opregion opregion; 240 struct intel_opregion opregion;
@@ -222,6 +255,8 @@ typedef struct drm_i915_private {
222 unsigned int edp_support:1; 255 unsigned int edp_support:1;
223 int lvds_ssc_freq; 256 int lvds_ssc_freq;
224 257
258 struct notifier_block lid_notifier;
259
225 int crt_ddc_bus; /* -1 = unknown, else GPIO to use for CRT DDC */ 260 int crt_ddc_bus; /* -1 = unknown, else GPIO to use for CRT DDC */
226 struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */ 261 struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */
227 int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */ 262 int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
@@ -234,7 +269,11 @@ typedef struct drm_i915_private {
234 struct work_struct error_work; 269 struct work_struct error_work;
235 struct workqueue_struct *wq; 270 struct workqueue_struct *wq;
236 271
272 /* Display functions */
273 struct drm_i915_display_funcs display;
274
237 /* Register state */ 275 /* Register state */
276 bool suspended;
238 u8 saveLBB; 277 u8 saveLBB;
239 u32 saveDSPACNTR; 278 u32 saveDSPACNTR;
240 u32 saveDSPBCNTR; 279 u32 saveDSPBCNTR;
@@ -350,6 +389,15 @@ typedef struct drm_i915_private {
350 int gtt_mtrr; 389 int gtt_mtrr;
351 390
352 /** 391 /**
392 * Membership on list of all loaded devices, used to evict
393 * inactive buffers under memory pressure.
394 *
395 * Modifications should only be done whilst holding the
396 * shrink_list_lock spinlock.
397 */
398 struct list_head shrink_list;
399
400 /**
353 * List of objects currently involved in rendering from the 401 * List of objects currently involved in rendering from the
354 * ringbuffer. 402 * ringbuffer.
355 * 403 *
@@ -432,7 +480,7 @@ typedef struct drm_i915_private {
432 * It prevents command submission from occuring and makes 480 * It prevents command submission from occuring and makes
433 * every pending request fail 481 * every pending request fail
434 */ 482 */
435 int wedged; 483 atomic_t wedged;
436 484
437 /** Bit 6 swizzling required for X tiling */ 485 /** Bit 6 swizzling required for X tiling */
438 uint32_t bit_6_swizzle_x; 486 uint32_t bit_6_swizzle_x;
@@ -491,10 +539,7 @@ struct drm_i915_gem_object {
491 * This is the same as gtt_space->start 539 * This is the same as gtt_space->start
492 */ 540 */
493 uint32_t gtt_offset; 541 uint32_t gtt_offset;
494 /** 542
495 * Required alignment for the object
496 */
497 uint32_t gtt_alignment;
498 /** 543 /**
499 * Fake offset for use by mmap(2) 544 * Fake offset for use by mmap(2)
500 */ 545 */
@@ -541,6 +586,11 @@ struct drm_i915_gem_object {
541 * in an execbuffer object list. 586 * in an execbuffer object list.
542 */ 587 */
543 int in_execbuffer; 588 int in_execbuffer;
589
590 /**
591 * Advice: are the backing pages purgeable?
592 */
593 int madv;
544}; 594};
545 595
546/** 596/**
@@ -585,6 +635,8 @@ extern int i915_max_ioctl;
585extern unsigned int i915_fbpercrtc; 635extern unsigned int i915_fbpercrtc;
586extern unsigned int i915_powersave; 636extern unsigned int i915_powersave;
587 637
638extern void i915_save_display(struct drm_device *dev);
639extern void i915_restore_display(struct drm_device *dev);
588extern int i915_master_create(struct drm_device *dev, struct drm_master *master); 640extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
589extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master); 641extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master);
590 642
@@ -604,8 +656,10 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
604extern int i915_emit_box(struct drm_device *dev, 656extern int i915_emit_box(struct drm_device *dev,
605 struct drm_clip_rect *boxes, 657 struct drm_clip_rect *boxes,
606 int i, int DR1, int DR4); 658 int i, int DR1, int DR4);
659extern int i965_reset(struct drm_device *dev, u8 flags);
607 660
608/* i915_irq.c */ 661/* i915_irq.c */
662void i915_hangcheck_elapsed(unsigned long data);
609extern int i915_irq_emit(struct drm_device *dev, void *data, 663extern int i915_irq_emit(struct drm_device *dev, void *data,
610 struct drm_file *file_priv); 664 struct drm_file *file_priv);
611extern int i915_irq_wait(struct drm_device *dev, void *data, 665extern int i915_irq_wait(struct drm_device *dev, void *data,
@@ -676,6 +730,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
676 struct drm_file *file_priv); 730 struct drm_file *file_priv);
677int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 731int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
678 struct drm_file *file_priv); 732 struct drm_file *file_priv);
733int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
734 struct drm_file *file_priv);
679int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 735int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
680 struct drm_file *file_priv); 736 struct drm_file *file_priv);
681int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 737int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
@@ -695,6 +751,7 @@ int i915_gem_object_unbind(struct drm_gem_object *obj);
695void i915_gem_release_mmap(struct drm_gem_object *obj); 751void i915_gem_release_mmap(struct drm_gem_object *obj);
696void i915_gem_lastclose(struct drm_device *dev); 752void i915_gem_lastclose(struct drm_device *dev);
697uint32_t i915_get_gem_seqno(struct drm_device *dev); 753uint32_t i915_get_gem_seqno(struct drm_device *dev);
754bool i915_seqno_passed(uint32_t seq1, uint32_t seq2);
698int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); 755int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
699int i915_gem_object_put_fence_reg(struct drm_gem_object *obj); 756int i915_gem_object_put_fence_reg(struct drm_gem_object *obj);
700void i915_gem_retire_requests(struct drm_device *dev); 757void i915_gem_retire_requests(struct drm_device *dev);
@@ -720,6 +777,9 @@ int i915_gem_object_get_pages(struct drm_gem_object *obj);
720void i915_gem_object_put_pages(struct drm_gem_object *obj); 777void i915_gem_object_put_pages(struct drm_gem_object *obj);
721void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv); 778void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv);
722 779
780void i915_gem_shrinker_init(void);
781void i915_gem_shrinker_exit(void);
782
723/* i915_gem_tiling.c */ 783/* i915_gem_tiling.c */
724void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); 784void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
725void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); 785void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
@@ -767,6 +827,8 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; }
767extern void intel_modeset_init(struct drm_device *dev); 827extern void intel_modeset_init(struct drm_device *dev);
768extern void intel_modeset_cleanup(struct drm_device *dev); 828extern void intel_modeset_cleanup(struct drm_device *dev);
769extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state); 829extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
830extern void i8xx_disable_fbc(struct drm_device *dev);
831extern void g4x_disable_fbc(struct drm_device *dev);
770 832
771/** 833/**
772 * Lock test for when it's just for synchronization of ring access. 834 * Lock test for when it's just for synchronization of ring access.
@@ -864,6 +926,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
864 (dev)->pci_device == 0x2E12 || \ 926 (dev)->pci_device == 0x2E12 || \
865 (dev)->pci_device == 0x2E22 || \ 927 (dev)->pci_device == 0x2E22 || \
866 (dev)->pci_device == 0x2E32 || \ 928 (dev)->pci_device == 0x2E32 || \
929 (dev)->pci_device == 0x2E42 || \
867 (dev)->pci_device == 0x0042 || \ 930 (dev)->pci_device == 0x0042 || \
868 (dev)->pci_device == 0x0046) 931 (dev)->pci_device == 0x0046)
869 932
@@ -876,6 +939,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
876 (dev)->pci_device == 0x2E12 || \ 939 (dev)->pci_device == 0x2E12 || \
877 (dev)->pci_device == 0x2E22 || \ 940 (dev)->pci_device == 0x2E22 || \
878 (dev)->pci_device == 0x2E32 || \ 941 (dev)->pci_device == 0x2E32 || \
942 (dev)->pci_device == 0x2E42 || \
879 IS_GM45(dev)) 943 IS_GM45(dev))
880 944
881#define IS_IGDG(dev) ((dev)->pci_device == 0xa001) 945#define IS_IGDG(dev) ((dev)->pci_device == 0xa001)
@@ -909,12 +973,13 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
909#define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev)) 973#define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev))
910#define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev)) 974#define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev))
911#define SUPPORTS_EDP(dev) (IS_IGDNG_M(dev)) 975#define SUPPORTS_EDP(dev) (IS_IGDNG_M(dev))
912#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev)) 976#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev) || IS_I965G(dev))
913/* dsparb controlled by hw only */ 977/* dsparb controlled by hw only */
914#define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev)) 978#define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev))
915 979
916#define HAS_FW_BLC(dev) (IS_I9XX(dev) || IS_G4X(dev) || IS_IGDNG(dev)) 980#define HAS_FW_BLC(dev) (IS_I9XX(dev) || IS_G4X(dev) || IS_IGDNG(dev))
917#define HAS_PIPE_CXSR(dev) (IS_G4X(dev) || IS_IGDNG(dev)) 981#define HAS_PIPE_CXSR(dev) (IS_G4X(dev) || IS_IGDNG(dev))
982#define I915_HAS_FBC(dev) (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev)))
918 983
919#define PRIMARY_RINGBUFFER_SIZE (128*1024) 984#define PRIMARY_RINGBUFFER_SIZE (128*1024)
920 985
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c67317112f4a..40727d4c2919 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,6 +29,7 @@
29#include "drm.h" 29#include "drm.h"
30#include "i915_drm.h" 30#include "i915_drm.h"
31#include "i915_drv.h" 31#include "i915_drv.h"
32#include "i915_trace.h"
32#include "intel_drv.h" 33#include "intel_drv.h"
33#include <linux/swap.h> 34#include <linux/swap.h>
34#include <linux/pci.h> 35#include <linux/pci.h>
@@ -48,11 +49,15 @@ static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
48static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 49static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
49 unsigned alignment); 50 unsigned alignment);
50static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 51static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
51static int i915_gem_evict_something(struct drm_device *dev); 52static int i915_gem_evict_something(struct drm_device *dev, int min_size);
53static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
52static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 54static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
53 struct drm_i915_gem_pwrite *args, 55 struct drm_i915_gem_pwrite *args,
54 struct drm_file *file_priv); 56 struct drm_file *file_priv);
55 57
58static LIST_HEAD(shrink_list);
59static DEFINE_SPINLOCK(shrink_list_lock);
60
56int i915_gem_do_init(struct drm_device *dev, unsigned long start, 61int i915_gem_do_init(struct drm_device *dev, unsigned long start,
57 unsigned long end) 62 unsigned long end)
58{ 63{
@@ -316,6 +321,45 @@ fail_unlock:
316 return ret; 321 return ret;
317} 322}
318 323
324static inline gfp_t
325i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj)
326{
327 return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping);
328}
329
330static inline void
331i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp)
332{
333 mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp);
334}
335
336static int
337i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
338{
339 int ret;
340
341 ret = i915_gem_object_get_pages(obj);
342
343 /* If we've insufficient memory to map in the pages, attempt
344 * to make some space by throwing out some old buffers.
345 */
346 if (ret == -ENOMEM) {
347 struct drm_device *dev = obj->dev;
348 gfp_t gfp;
349
350 ret = i915_gem_evict_something(dev, obj->size);
351 if (ret)
352 return ret;
353
354 gfp = i915_gem_object_get_page_gfp_mask(obj);
355 i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY);
356 ret = i915_gem_object_get_pages(obj);
357 i915_gem_object_set_page_gfp_mask (obj, gfp);
358 }
359
360 return ret;
361}
362
319/** 363/**
320 * This is the fallback shmem pread path, which allocates temporary storage 364 * This is the fallback shmem pread path, which allocates temporary storage
321 * in kernel space to copy_to_user into outside of the struct_mutex, so we 365 * in kernel space to copy_to_user into outside of the struct_mutex, so we
@@ -367,8 +411,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
367 411
368 mutex_lock(&dev->struct_mutex); 412 mutex_lock(&dev->struct_mutex);
369 413
370 ret = i915_gem_object_get_pages(obj); 414 ret = i915_gem_object_get_pages_or_evict(obj);
371 if (ret != 0) 415 if (ret)
372 goto fail_unlock; 416 goto fail_unlock;
373 417
374 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 418 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
@@ -842,8 +886,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
842 886
843 mutex_lock(&dev->struct_mutex); 887 mutex_lock(&dev->struct_mutex);
844 888
845 ret = i915_gem_object_get_pages(obj); 889 ret = i915_gem_object_get_pages_or_evict(obj);
846 if (ret != 0) 890 if (ret)
847 goto fail_unlock; 891 goto fail_unlock;
848 892
849 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 893 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
@@ -1155,28 +1199,22 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1155 /* Now bind it into the GTT if needed */ 1199 /* Now bind it into the GTT if needed */
1156 mutex_lock(&dev->struct_mutex); 1200 mutex_lock(&dev->struct_mutex);
1157 if (!obj_priv->gtt_space) { 1201 if (!obj_priv->gtt_space) {
1158 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); 1202 ret = i915_gem_object_bind_to_gtt(obj, 0);
1159 if (ret) { 1203 if (ret)
1160 mutex_unlock(&dev->struct_mutex); 1204 goto unlock;
1161 return VM_FAULT_SIGBUS;
1162 }
1163
1164 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1165 if (ret) {
1166 mutex_unlock(&dev->struct_mutex);
1167 return VM_FAULT_SIGBUS;
1168 }
1169 1205
1170 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1206 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1207
1208 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1209 if (ret)
1210 goto unlock;
1171 } 1211 }
1172 1212
1173 /* Need a new fence register? */ 1213 /* Need a new fence register? */
1174 if (obj_priv->tiling_mode != I915_TILING_NONE) { 1214 if (obj_priv->tiling_mode != I915_TILING_NONE) {
1175 ret = i915_gem_object_get_fence_reg(obj); 1215 ret = i915_gem_object_get_fence_reg(obj);
1176 if (ret) { 1216 if (ret)
1177 mutex_unlock(&dev->struct_mutex); 1217 goto unlock;
1178 return VM_FAULT_SIGBUS;
1179 }
1180 } 1218 }
1181 1219
1182 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + 1220 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
@@ -1184,18 +1222,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1184 1222
1185 /* Finally, remap it using the new GTT offset */ 1223 /* Finally, remap it using the new GTT offset */
1186 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1224 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1187 1225unlock:
1188 mutex_unlock(&dev->struct_mutex); 1226 mutex_unlock(&dev->struct_mutex);
1189 1227
1190 switch (ret) { 1228 switch (ret) {
1229 case 0:
1230 case -ERESTARTSYS:
1231 return VM_FAULT_NOPAGE;
1191 case -ENOMEM: 1232 case -ENOMEM:
1192 case -EAGAIN: 1233 case -EAGAIN:
1193 return VM_FAULT_OOM; 1234 return VM_FAULT_OOM;
1194 case -EFAULT:
1195 case -EINVAL:
1196 return VM_FAULT_SIGBUS;
1197 default: 1235 default:
1198 return VM_FAULT_NOPAGE; 1236 return VM_FAULT_SIGBUS;
1199 } 1237 }
1200} 1238}
1201 1239
@@ -1388,6 +1426,14 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1388 1426
1389 obj_priv = obj->driver_private; 1427 obj_priv = obj->driver_private;
1390 1428
1429 if (obj_priv->madv != I915_MADV_WILLNEED) {
1430 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1431 drm_gem_object_unreference(obj);
1432 mutex_unlock(&dev->struct_mutex);
1433 return -EINVAL;
1434 }
1435
1436
1391 if (!obj_priv->mmap_offset) { 1437 if (!obj_priv->mmap_offset) {
1392 ret = i915_gem_create_mmap_offset(obj); 1438 ret = i915_gem_create_mmap_offset(obj);
1393 if (ret) { 1439 if (ret) {
@@ -1399,22 +1445,12 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1399 1445
1400 args->offset = obj_priv->mmap_offset; 1446 args->offset = obj_priv->mmap_offset;
1401 1447
1402 obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj);
1403
1404 /* Make sure the alignment is correct for fence regs etc */
1405 if (obj_priv->agp_mem &&
1406 (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) {
1407 drm_gem_object_unreference(obj);
1408 mutex_unlock(&dev->struct_mutex);
1409 return -EINVAL;
1410 }
1411
1412 /* 1448 /*
1413 * Pull it into the GTT so that we have a page list (makes the 1449 * Pull it into the GTT so that we have a page list (makes the
1414 * initial fault faster and any subsequent flushing possible). 1450 * initial fault faster and any subsequent flushing possible).
1415 */ 1451 */
1416 if (!obj_priv->agp_mem) { 1452 if (!obj_priv->agp_mem) {
1417 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); 1453 ret = i915_gem_object_bind_to_gtt(obj, 0);
1418 if (ret) { 1454 if (ret) {
1419 drm_gem_object_unreference(obj); 1455 drm_gem_object_unreference(obj);
1420 mutex_unlock(&dev->struct_mutex); 1456 mutex_unlock(&dev->struct_mutex);
@@ -1437,6 +1473,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
1437 int i; 1473 int i;
1438 1474
1439 BUG_ON(obj_priv->pages_refcount == 0); 1475 BUG_ON(obj_priv->pages_refcount == 0);
1476 BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1440 1477
1441 if (--obj_priv->pages_refcount != 0) 1478 if (--obj_priv->pages_refcount != 0)
1442 return; 1479 return;
@@ -1444,13 +1481,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
1444 if (obj_priv->tiling_mode != I915_TILING_NONE) 1481 if (obj_priv->tiling_mode != I915_TILING_NONE)
1445 i915_gem_object_save_bit_17_swizzle(obj); 1482 i915_gem_object_save_bit_17_swizzle(obj);
1446 1483
1447 for (i = 0; i < page_count; i++) 1484 if (obj_priv->madv == I915_MADV_DONTNEED)
1448 if (obj_priv->pages[i] != NULL) { 1485 obj_priv->dirty = 0;
1449 if (obj_priv->dirty) 1486
1450 set_page_dirty(obj_priv->pages[i]); 1487 for (i = 0; i < page_count; i++) {
1488 if (obj_priv->pages[i] == NULL)
1489 break;
1490
1491 if (obj_priv->dirty)
1492 set_page_dirty(obj_priv->pages[i]);
1493
1494 if (obj_priv->madv == I915_MADV_WILLNEED)
1451 mark_page_accessed(obj_priv->pages[i]); 1495 mark_page_accessed(obj_priv->pages[i]);
1452 page_cache_release(obj_priv->pages[i]); 1496
1453 } 1497 page_cache_release(obj_priv->pages[i]);
1498 }
1454 obj_priv->dirty = 0; 1499 obj_priv->dirty = 0;
1455 1500
1456 drm_free_large(obj_priv->pages); 1501 drm_free_large(obj_priv->pages);
@@ -1489,6 +1534,26 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1489 obj_priv->last_rendering_seqno = 0; 1534 obj_priv->last_rendering_seqno = 0;
1490} 1535}
1491 1536
1537/* Immediately discard the backing storage */
1538static void
1539i915_gem_object_truncate(struct drm_gem_object *obj)
1540{
1541 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1542 struct inode *inode;
1543
1544 inode = obj->filp->f_path.dentry->d_inode;
1545 if (inode->i_op->truncate)
1546 inode->i_op->truncate (inode);
1547
1548 obj_priv->madv = __I915_MADV_PURGED;
1549}
1550
1551static inline int
1552i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1553{
1554 return obj_priv->madv == I915_MADV_DONTNEED;
1555}
1556
1492static void 1557static void
1493i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 1558i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1494{ 1559{
@@ -1577,15 +1642,24 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1577 1642
1578 if ((obj->write_domain & flush_domains) == 1643 if ((obj->write_domain & flush_domains) ==
1579 obj->write_domain) { 1644 obj->write_domain) {
1645 uint32_t old_write_domain = obj->write_domain;
1646
1580 obj->write_domain = 0; 1647 obj->write_domain = 0;
1581 i915_gem_object_move_to_active(obj, seqno); 1648 i915_gem_object_move_to_active(obj, seqno);
1649
1650 trace_i915_gem_object_change_domain(obj,
1651 obj->read_domains,
1652 old_write_domain);
1582 } 1653 }
1583 } 1654 }
1584 1655
1585 } 1656 }
1586 1657
1587 if (was_empty && !dev_priv->mm.suspended) 1658 if (!dev_priv->mm.suspended) {
1588 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1659 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1660 if (was_empty)
1661 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1662 }
1589 return seqno; 1663 return seqno;
1590} 1664}
1591 1665
@@ -1623,6 +1697,8 @@ i915_gem_retire_request(struct drm_device *dev,
1623{ 1697{
1624 drm_i915_private_t *dev_priv = dev->dev_private; 1698 drm_i915_private_t *dev_priv = dev->dev_private;
1625 1699
1700 trace_i915_gem_request_retire(dev, request->seqno);
1701
1626 /* Move any buffers on the active list that are no longer referenced 1702 /* Move any buffers on the active list that are no longer referenced
1627 * by the ringbuffer to the flushing/inactive lists as appropriate. 1703 * by the ringbuffer to the flushing/inactive lists as appropriate.
1628 */ 1704 */
@@ -1671,7 +1747,7 @@ out:
1671/** 1747/**
1672 * Returns true if seq1 is later than seq2. 1748 * Returns true if seq1 is later than seq2.
1673 */ 1749 */
1674static int 1750bool
1675i915_seqno_passed(uint32_t seq1, uint32_t seq2) 1751i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1676{ 1752{
1677 return (int32_t)(seq1 - seq2) >= 0; 1753 return (int32_t)(seq1 - seq2) >= 0;
@@ -1709,7 +1785,7 @@ i915_gem_retire_requests(struct drm_device *dev)
1709 retiring_seqno = request->seqno; 1785 retiring_seqno = request->seqno;
1710 1786
1711 if (i915_seqno_passed(seqno, retiring_seqno) || 1787 if (i915_seqno_passed(seqno, retiring_seqno) ||
1712 dev_priv->mm.wedged) { 1788 atomic_read(&dev_priv->mm.wedged)) {
1713 i915_gem_retire_request(dev, request); 1789 i915_gem_retire_request(dev, request);
1714 1790
1715 list_del(&request->list); 1791 list_del(&request->list);
@@ -1751,6 +1827,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
1751 1827
1752 BUG_ON(seqno == 0); 1828 BUG_ON(seqno == 0);
1753 1829
1830 if (atomic_read(&dev_priv->mm.wedged))
1831 return -EIO;
1832
1754 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 1833 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
1755 if (IS_IGDNG(dev)) 1834 if (IS_IGDNG(dev))
1756 ier = I915_READ(DEIER) | I915_READ(GTIER); 1835 ier = I915_READ(DEIER) | I915_READ(GTIER);
@@ -1763,16 +1842,20 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
1763 i915_driver_irq_postinstall(dev); 1842 i915_driver_irq_postinstall(dev);
1764 } 1843 }
1765 1844
1845 trace_i915_gem_request_wait_begin(dev, seqno);
1846
1766 dev_priv->mm.waiting_gem_seqno = seqno; 1847 dev_priv->mm.waiting_gem_seqno = seqno;
1767 i915_user_irq_get(dev); 1848 i915_user_irq_get(dev);
1768 ret = wait_event_interruptible(dev_priv->irq_queue, 1849 ret = wait_event_interruptible(dev_priv->irq_queue,
1769 i915_seqno_passed(i915_get_gem_seqno(dev), 1850 i915_seqno_passed(i915_get_gem_seqno(dev),
1770 seqno) || 1851 seqno) ||
1771 dev_priv->mm.wedged); 1852 atomic_read(&dev_priv->mm.wedged));
1772 i915_user_irq_put(dev); 1853 i915_user_irq_put(dev);
1773 dev_priv->mm.waiting_gem_seqno = 0; 1854 dev_priv->mm.waiting_gem_seqno = 0;
1855
1856 trace_i915_gem_request_wait_end(dev, seqno);
1774 } 1857 }
1775 if (dev_priv->mm.wedged) 1858 if (atomic_read(&dev_priv->mm.wedged))
1776 ret = -EIO; 1859 ret = -EIO;
1777 1860
1778 if (ret && ret != -ERESTARTSYS) 1861 if (ret && ret != -ERESTARTSYS)
@@ -1803,6 +1886,8 @@ i915_gem_flush(struct drm_device *dev,
1803 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, 1886 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
1804 invalidate_domains, flush_domains); 1887 invalidate_domains, flush_domains);
1805#endif 1888#endif
1889 trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
1890 invalidate_domains, flush_domains);
1806 1891
1807 if (flush_domains & I915_GEM_DOMAIN_CPU) 1892 if (flush_domains & I915_GEM_DOMAIN_CPU)
1808 drm_agp_chipset_flush(dev); 1893 drm_agp_chipset_flush(dev);
@@ -1915,6 +2000,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
1915 return -EINVAL; 2000 return -EINVAL;
1916 } 2001 }
1917 2002
2003 /* blow away mappings if mapped through GTT */
2004 i915_gem_release_mmap(obj);
2005
2006 if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
2007 i915_gem_clear_fence_reg(obj);
2008
1918 /* Move the object to the CPU domain to ensure that 2009 /* Move the object to the CPU domain to ensure that
1919 * any possible CPU writes while it's not in the GTT 2010 * any possible CPU writes while it's not in the GTT
1920 * are flushed when we go to remap it. This will 2011 * are flushed when we go to remap it. This will
@@ -1928,21 +2019,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
1928 return ret; 2019 return ret;
1929 } 2020 }
1930 2021
2022 BUG_ON(obj_priv->active);
2023
1931 if (obj_priv->agp_mem != NULL) { 2024 if (obj_priv->agp_mem != NULL) {
1932 drm_unbind_agp(obj_priv->agp_mem); 2025 drm_unbind_agp(obj_priv->agp_mem);
1933 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 2026 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1934 obj_priv->agp_mem = NULL; 2027 obj_priv->agp_mem = NULL;
1935 } 2028 }
1936 2029
1937 BUG_ON(obj_priv->active);
1938
1939 /* blow away mappings if mapped through GTT */
1940 i915_gem_release_mmap(obj);
1941
1942 if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1943 i915_gem_clear_fence_reg(obj);
1944
1945 i915_gem_object_put_pages(obj); 2030 i915_gem_object_put_pages(obj);
2031 BUG_ON(obj_priv->pages_refcount);
1946 2032
1947 if (obj_priv->gtt_space) { 2033 if (obj_priv->gtt_space) {
1948 atomic_dec(&dev->gtt_count); 2034 atomic_dec(&dev->gtt_count);
@@ -1956,40 +2042,113 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
1956 if (!list_empty(&obj_priv->list)) 2042 if (!list_empty(&obj_priv->list))
1957 list_del_init(&obj_priv->list); 2043 list_del_init(&obj_priv->list);
1958 2044
2045 if (i915_gem_object_is_purgeable(obj_priv))
2046 i915_gem_object_truncate(obj);
2047
2048 trace_i915_gem_object_unbind(obj);
2049
1959 return 0; 2050 return 0;
1960} 2051}
1961 2052
2053static struct drm_gem_object *
2054i915_gem_find_inactive_object(struct drm_device *dev, int min_size)
2055{
2056 drm_i915_private_t *dev_priv = dev->dev_private;
2057 struct drm_i915_gem_object *obj_priv;
2058 struct drm_gem_object *best = NULL;
2059 struct drm_gem_object *first = NULL;
2060
2061 /* Try to find the smallest clean object */
2062 list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
2063 struct drm_gem_object *obj = obj_priv->obj;
2064 if (obj->size >= min_size) {
2065 if ((!obj_priv->dirty ||
2066 i915_gem_object_is_purgeable(obj_priv)) &&
2067 (!best || obj->size < best->size)) {
2068 best = obj;
2069 if (best->size == min_size)
2070 return best;
2071 }
2072 if (!first)
2073 first = obj;
2074 }
2075 }
2076
2077 return best ? best : first;
2078}
2079
1962static int 2080static int
1963i915_gem_evict_something(struct drm_device *dev) 2081i915_gem_evict_everything(struct drm_device *dev)
2082{
2083 drm_i915_private_t *dev_priv = dev->dev_private;
2084 uint32_t seqno;
2085 int ret;
2086 bool lists_empty;
2087
2088 spin_lock(&dev_priv->mm.active_list_lock);
2089 lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2090 list_empty(&dev_priv->mm.flushing_list) &&
2091 list_empty(&dev_priv->mm.active_list));
2092 spin_unlock(&dev_priv->mm.active_list_lock);
2093
2094 if (lists_empty)
2095 return -ENOSPC;
2096
2097 /* Flush everything (on to the inactive lists) and evict */
2098 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2099 seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
2100 if (seqno == 0)
2101 return -ENOMEM;
2102
2103 ret = i915_wait_request(dev, seqno);
2104 if (ret)
2105 return ret;
2106
2107 ret = i915_gem_evict_from_inactive_list(dev);
2108 if (ret)
2109 return ret;
2110
2111 spin_lock(&dev_priv->mm.active_list_lock);
2112 lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2113 list_empty(&dev_priv->mm.flushing_list) &&
2114 list_empty(&dev_priv->mm.active_list));
2115 spin_unlock(&dev_priv->mm.active_list_lock);
2116 BUG_ON(!lists_empty);
2117
2118 return 0;
2119}
2120
2121static int
2122i915_gem_evict_something(struct drm_device *dev, int min_size)
1964{ 2123{
1965 drm_i915_private_t *dev_priv = dev->dev_private; 2124 drm_i915_private_t *dev_priv = dev->dev_private;
1966 struct drm_gem_object *obj; 2125 struct drm_gem_object *obj;
1967 struct drm_i915_gem_object *obj_priv; 2126 int ret;
1968 int ret = 0;
1969 2127
1970 for (;;) { 2128 for (;;) {
2129 i915_gem_retire_requests(dev);
2130
1971 /* If there's an inactive buffer available now, grab it 2131 /* If there's an inactive buffer available now, grab it
1972 * and be done. 2132 * and be done.
1973 */ 2133 */
1974 if (!list_empty(&dev_priv->mm.inactive_list)) { 2134 obj = i915_gem_find_inactive_object(dev, min_size);
1975 obj_priv = list_first_entry(&dev_priv->mm.inactive_list, 2135 if (obj) {
1976 struct drm_i915_gem_object, 2136 struct drm_i915_gem_object *obj_priv;
1977 list); 2137
1978 obj = obj_priv->obj;
1979 BUG_ON(obj_priv->pin_count != 0);
1980#if WATCH_LRU 2138#if WATCH_LRU
1981 DRM_INFO("%s: evicting %p\n", __func__, obj); 2139 DRM_INFO("%s: evicting %p\n", __func__, obj);
1982#endif 2140#endif
2141 obj_priv = obj->driver_private;
2142 BUG_ON(obj_priv->pin_count != 0);
1983 BUG_ON(obj_priv->active); 2143 BUG_ON(obj_priv->active);
1984 2144
1985 /* Wait on the rendering and unbind the buffer. */ 2145 /* Wait on the rendering and unbind the buffer. */
1986 ret = i915_gem_object_unbind(obj); 2146 return i915_gem_object_unbind(obj);
1987 break;
1988 } 2147 }
1989 2148
1990 /* If we didn't get anything, but the ring is still processing 2149 /* If we didn't get anything, but the ring is still processing
1991 * things, wait for one of those things to finish and hopefully 2150 * things, wait for the next to finish and hopefully leave us
1992 * leave us a buffer to evict. 2151 * a buffer to evict.
1993 */ 2152 */
1994 if (!list_empty(&dev_priv->mm.request_list)) { 2153 if (!list_empty(&dev_priv->mm.request_list)) {
1995 struct drm_i915_gem_request *request; 2154 struct drm_i915_gem_request *request;
@@ -2000,16 +2159,9 @@ i915_gem_evict_something(struct drm_device *dev)
2000 2159
2001 ret = i915_wait_request(dev, request->seqno); 2160 ret = i915_wait_request(dev, request->seqno);
2002 if (ret) 2161 if (ret)
2003 break; 2162 return ret;
2004 2163
2005 /* if waiting caused an object to become inactive, 2164 continue;
2006 * then loop around and wait for it. Otherwise, we
2007 * assume that waiting freed and unbound something,
2008 * so there should now be some space in the GTT
2009 */
2010 if (!list_empty(&dev_priv->mm.inactive_list))
2011 continue;
2012 break;
2013 } 2165 }
2014 2166
2015 /* If we didn't have anything on the request list but there 2167 /* If we didn't have anything on the request list but there
@@ -2018,46 +2170,44 @@ i915_gem_evict_something(struct drm_device *dev)
2018 * will get moved to inactive. 2170 * will get moved to inactive.
2019 */ 2171 */
2020 if (!list_empty(&dev_priv->mm.flushing_list)) { 2172 if (!list_empty(&dev_priv->mm.flushing_list)) {
2021 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 2173 struct drm_i915_gem_object *obj_priv;
2022 struct drm_i915_gem_object,
2023 list);
2024 obj = obj_priv->obj;
2025 2174
2026 i915_gem_flush(dev, 2175 /* Find an object that we can immediately reuse */
2027 obj->write_domain, 2176 list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) {
2028 obj->write_domain); 2177 obj = obj_priv->obj;
2029 i915_add_request(dev, NULL, obj->write_domain); 2178 if (obj->size >= min_size)
2179 break;
2030 2180
2031 obj = NULL; 2181 obj = NULL;
2032 continue; 2182 }
2033 }
2034 2183
2035 DRM_ERROR("inactive empty %d request empty %d " 2184 if (obj != NULL) {
2036 "flushing empty %d\n", 2185 uint32_t seqno;
2037 list_empty(&dev_priv->mm.inactive_list),
2038 list_empty(&dev_priv->mm.request_list),
2039 list_empty(&dev_priv->mm.flushing_list));
2040 /* If we didn't do any of the above, there's nothing to be done
2041 * and we just can't fit it in.
2042 */
2043 return -ENOSPC;
2044 }
2045 return ret;
2046}
2047 2186
2048static int 2187 i915_gem_flush(dev,
2049i915_gem_evict_everything(struct drm_device *dev) 2188 obj->write_domain,
2050{ 2189 obj->write_domain);
2051 int ret; 2190 seqno = i915_add_request(dev, NULL, obj->write_domain);
2191 if (seqno == 0)
2192 return -ENOMEM;
2052 2193
2053 for (;;) { 2194 ret = i915_wait_request(dev, seqno);
2054 ret = i915_gem_evict_something(dev); 2195 if (ret)
2055 if (ret != 0) 2196 return ret;
2056 break; 2197
2198 continue;
2199 }
2200 }
2201
2202 /* If we didn't do any of the above, there's no single buffer
2203 * large enough to swap out for the new one, so just evict
2204 * everything and start again. (This should be rare.)
2205 */
2206 if (!list_empty (&dev_priv->mm.inactive_list))
2207 return i915_gem_evict_from_inactive_list(dev);
2208 else
2209 return i915_gem_evict_everything(dev);
2057 } 2210 }
2058 if (ret == -ENOSPC)
2059 return 0;
2060 return ret;
2061} 2211}
2062 2212
2063int 2213int
@@ -2080,7 +2230,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
2080 BUG_ON(obj_priv->pages != NULL); 2230 BUG_ON(obj_priv->pages != NULL);
2081 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); 2231 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2082 if (obj_priv->pages == NULL) { 2232 if (obj_priv->pages == NULL) {
2083 DRM_ERROR("Faled to allocate page list\n");
2084 obj_priv->pages_refcount--; 2233 obj_priv->pages_refcount--;
2085 return -ENOMEM; 2234 return -ENOMEM;
2086 } 2235 }
@@ -2091,7 +2240,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
2091 page = read_mapping_page(mapping, i, NULL); 2240 page = read_mapping_page(mapping, i, NULL);
2092 if (IS_ERR(page)) { 2241 if (IS_ERR(page)) {
2093 ret = PTR_ERR(page); 2242 ret = PTR_ERR(page);
2094 DRM_ERROR("read_mapping_page failed: %d\n", ret);
2095 i915_gem_object_put_pages(obj); 2243 i915_gem_object_put_pages(obj);
2096 return ret; 2244 return ret;
2097 } 2245 }
@@ -2328,6 +2476,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2328 else 2476 else
2329 i830_write_fence_reg(reg); 2477 i830_write_fence_reg(reg);
2330 2478
2479 trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);
2480
2331 return 0; 2481 return 0;
2332} 2482}
2333 2483
@@ -2410,10 +2560,17 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2410 drm_i915_private_t *dev_priv = dev->dev_private; 2560 drm_i915_private_t *dev_priv = dev->dev_private;
2411 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2561 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2412 struct drm_mm_node *free_space; 2562 struct drm_mm_node *free_space;
2413 int page_count, ret; 2563 bool retry_alloc = false;
2564 int ret;
2414 2565
2415 if (dev_priv->mm.suspended) 2566 if (dev_priv->mm.suspended)
2416 return -EBUSY; 2567 return -EBUSY;
2568
2569 if (obj_priv->madv != I915_MADV_WILLNEED) {
2570 DRM_ERROR("Attempting to bind a purgeable object\n");
2571 return -EINVAL;
2572 }
2573
2417 if (alignment == 0) 2574 if (alignment == 0)
2418 alignment = i915_gem_get_gtt_alignment(obj); 2575 alignment = i915_gem_get_gtt_alignment(obj);
2419 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { 2576 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
@@ -2433,30 +2590,16 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2433 } 2590 }
2434 } 2591 }
2435 if (obj_priv->gtt_space == NULL) { 2592 if (obj_priv->gtt_space == NULL) {
2436 bool lists_empty;
2437
2438 /* If the gtt is empty and we're still having trouble 2593 /* If the gtt is empty and we're still having trouble
2439 * fitting our object in, we're out of memory. 2594 * fitting our object in, we're out of memory.
2440 */ 2595 */
2441#if WATCH_LRU 2596#if WATCH_LRU
2442 DRM_INFO("%s: GTT full, evicting something\n", __func__); 2597 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2443#endif 2598#endif
2444 spin_lock(&dev_priv->mm.active_list_lock); 2599 ret = i915_gem_evict_something(dev, obj->size);
2445 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2600 if (ret)
2446 list_empty(&dev_priv->mm.flushing_list) &&
2447 list_empty(&dev_priv->mm.active_list));
2448 spin_unlock(&dev_priv->mm.active_list_lock);
2449 if (lists_empty) {
2450 DRM_ERROR("GTT full, but LRU list empty\n");
2451 return -ENOSPC;
2452 }
2453
2454 ret = i915_gem_evict_something(dev);
2455 if (ret != 0) {
2456 if (ret != -ERESTARTSYS)
2457 DRM_ERROR("Failed to evict a buffer %d\n", ret);
2458 return ret; 2601 return ret;
2459 } 2602
2460 goto search_free; 2603 goto search_free;
2461 } 2604 }
2462 2605
@@ -2464,27 +2607,56 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2464 DRM_INFO("Binding object of size %zd at 0x%08x\n", 2607 DRM_INFO("Binding object of size %zd at 0x%08x\n",
2465 obj->size, obj_priv->gtt_offset); 2608 obj->size, obj_priv->gtt_offset);
2466#endif 2609#endif
2610 if (retry_alloc) {
2611 i915_gem_object_set_page_gfp_mask (obj,
2612 i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY);
2613 }
2467 ret = i915_gem_object_get_pages(obj); 2614 ret = i915_gem_object_get_pages(obj);
2615 if (retry_alloc) {
2616 i915_gem_object_set_page_gfp_mask (obj,
2617 i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY);
2618 }
2468 if (ret) { 2619 if (ret) {
2469 drm_mm_put_block(obj_priv->gtt_space); 2620 drm_mm_put_block(obj_priv->gtt_space);
2470 obj_priv->gtt_space = NULL; 2621 obj_priv->gtt_space = NULL;
2622
2623 if (ret == -ENOMEM) {
2624 /* first try to clear up some space from the GTT */
2625 ret = i915_gem_evict_something(dev, obj->size);
2626 if (ret) {
2627 /* now try to shrink everyone else */
2628 if (! retry_alloc) {
2629 retry_alloc = true;
2630 goto search_free;
2631 }
2632
2633 return ret;
2634 }
2635
2636 goto search_free;
2637 }
2638
2471 return ret; 2639 return ret;
2472 } 2640 }
2473 2641
2474 page_count = obj->size / PAGE_SIZE;
2475 /* Create an AGP memory structure pointing at our pages, and bind it 2642 /* Create an AGP memory structure pointing at our pages, and bind it
2476 * into the GTT. 2643 * into the GTT.
2477 */ 2644 */
2478 obj_priv->agp_mem = drm_agp_bind_pages(dev, 2645 obj_priv->agp_mem = drm_agp_bind_pages(dev,
2479 obj_priv->pages, 2646 obj_priv->pages,
2480 page_count, 2647 obj->size >> PAGE_SHIFT,
2481 obj_priv->gtt_offset, 2648 obj_priv->gtt_offset,
2482 obj_priv->agp_type); 2649 obj_priv->agp_type);
2483 if (obj_priv->agp_mem == NULL) { 2650 if (obj_priv->agp_mem == NULL) {
2484 i915_gem_object_put_pages(obj); 2651 i915_gem_object_put_pages(obj);
2485 drm_mm_put_block(obj_priv->gtt_space); 2652 drm_mm_put_block(obj_priv->gtt_space);
2486 obj_priv->gtt_space = NULL; 2653 obj_priv->gtt_space = NULL;
2487 return -ENOMEM; 2654
2655 ret = i915_gem_evict_something(dev, obj->size);
2656 if (ret)
2657 return ret;
2658
2659 goto search_free;
2488 } 2660 }
2489 atomic_inc(&dev->gtt_count); 2661 atomic_inc(&dev->gtt_count);
2490 atomic_add(obj->size, &dev->gtt_memory); 2662 atomic_add(obj->size, &dev->gtt_memory);
@@ -2496,6 +2668,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2496 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2668 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2497 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2669 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2498 2670
2671 trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2672
2499 return 0; 2673 return 0;
2500} 2674}
2501 2675
@@ -2511,15 +2685,7 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
2511 if (obj_priv->pages == NULL) 2685 if (obj_priv->pages == NULL)
2512 return; 2686 return;
2513 2687
2514 /* XXX: The 865 in particular appears to be weird in how it handles 2688 trace_i915_gem_object_clflush(obj);
2515 * cache flushing. We haven't figured it out, but the
2516 * clflush+agp_chipset_flush doesn't appear to successfully get the
2517 * data visible to the PGU, while wbinvd + agp_chipset_flush does.
2518 */
2519 if (IS_I865G(obj->dev)) {
2520 wbinvd();
2521 return;
2522 }
2523 2689
2524 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2690 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2525} 2691}
@@ -2530,21 +2696,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2530{ 2696{
2531 struct drm_device *dev = obj->dev; 2697 struct drm_device *dev = obj->dev;
2532 uint32_t seqno; 2698 uint32_t seqno;
2699 uint32_t old_write_domain;
2533 2700
2534 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 2701 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2535 return; 2702 return;
2536 2703
2537 /* Queue the GPU write cache flushing we need. */ 2704 /* Queue the GPU write cache flushing we need. */
2705 old_write_domain = obj->write_domain;
2538 i915_gem_flush(dev, 0, obj->write_domain); 2706 i915_gem_flush(dev, 0, obj->write_domain);
2539 seqno = i915_add_request(dev, NULL, obj->write_domain); 2707 seqno = i915_add_request(dev, NULL, obj->write_domain);
2540 obj->write_domain = 0; 2708 obj->write_domain = 0;
2541 i915_gem_object_move_to_active(obj, seqno); 2709 i915_gem_object_move_to_active(obj, seqno);
2710
2711 trace_i915_gem_object_change_domain(obj,
2712 obj->read_domains,
2713 old_write_domain);
2542} 2714}
2543 2715
2544/** Flushes the GTT write domain for the object if it's dirty. */ 2716/** Flushes the GTT write domain for the object if it's dirty. */
2545static void 2717static void
2546i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 2718i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2547{ 2719{
2720 uint32_t old_write_domain;
2721
2548 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 2722 if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2549 return; 2723 return;
2550 2724
@@ -2552,7 +2726,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2552 * to it immediately go to main memory as far as we know, so there's 2726 * to it immediately go to main memory as far as we know, so there's
2553 * no chipset flush. It also doesn't land in render cache. 2727 * no chipset flush. It also doesn't land in render cache.
2554 */ 2728 */
2729 old_write_domain = obj->write_domain;
2555 obj->write_domain = 0; 2730 obj->write_domain = 0;
2731
2732 trace_i915_gem_object_change_domain(obj,
2733 obj->read_domains,
2734 old_write_domain);
2556} 2735}
2557 2736
2558/** Flushes the CPU write domain for the object if it's dirty. */ 2737/** Flushes the CPU write domain for the object if it's dirty. */
@@ -2560,13 +2739,19 @@ static void
2560i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 2739i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2561{ 2740{
2562 struct drm_device *dev = obj->dev; 2741 struct drm_device *dev = obj->dev;
2742 uint32_t old_write_domain;
2563 2743
2564 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 2744 if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2565 return; 2745 return;
2566 2746
2567 i915_gem_clflush_object(obj); 2747 i915_gem_clflush_object(obj);
2568 drm_agp_chipset_flush(dev); 2748 drm_agp_chipset_flush(dev);
2749 old_write_domain = obj->write_domain;
2569 obj->write_domain = 0; 2750 obj->write_domain = 0;
2751
2752 trace_i915_gem_object_change_domain(obj,
2753 obj->read_domains,
2754 old_write_domain);
2570} 2755}
2571 2756
2572/** 2757/**
@@ -2579,6 +2764,7 @@ int
2579i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 2764i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2580{ 2765{
2581 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2766 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2767 uint32_t old_write_domain, old_read_domains;
2582 int ret; 2768 int ret;
2583 2769
2584 /* Not valid to be called on unbound objects. */ 2770 /* Not valid to be called on unbound objects. */
@@ -2591,6 +2777,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2591 if (ret != 0) 2777 if (ret != 0)
2592 return ret; 2778 return ret;
2593 2779
2780 old_write_domain = obj->write_domain;
2781 old_read_domains = obj->read_domains;
2782
2594 /* If we're writing through the GTT domain, then CPU and GPU caches 2783 /* If we're writing through the GTT domain, then CPU and GPU caches
2595 * will need to be invalidated at next use. 2784 * will need to be invalidated at next use.
2596 */ 2785 */
@@ -2609,6 +2798,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2609 obj_priv->dirty = 1; 2798 obj_priv->dirty = 1;
2610 } 2799 }
2611 2800
2801 trace_i915_gem_object_change_domain(obj,
2802 old_read_domains,
2803 old_write_domain);
2804
2612 return 0; 2805 return 0;
2613} 2806}
2614 2807
@@ -2621,6 +2814,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2621static int 2814static int
2622i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 2815i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2623{ 2816{
2817 uint32_t old_write_domain, old_read_domains;
2624 int ret; 2818 int ret;
2625 2819
2626 i915_gem_object_flush_gpu_write_domain(obj); 2820 i915_gem_object_flush_gpu_write_domain(obj);
@@ -2636,6 +2830,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2636 */ 2830 */
2637 i915_gem_object_set_to_full_cpu_read_domain(obj); 2831 i915_gem_object_set_to_full_cpu_read_domain(obj);
2638 2832
2833 old_write_domain = obj->write_domain;
2834 old_read_domains = obj->read_domains;
2835
2639 /* Flush the CPU cache if it's still invalid. */ 2836 /* Flush the CPU cache if it's still invalid. */
2640 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2837 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2641 i915_gem_clflush_object(obj); 2838 i915_gem_clflush_object(obj);
@@ -2656,6 +2853,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2656 obj->write_domain = I915_GEM_DOMAIN_CPU; 2853 obj->write_domain = I915_GEM_DOMAIN_CPU;
2657 } 2854 }
2658 2855
2856 trace_i915_gem_object_change_domain(obj,
2857 old_read_domains,
2858 old_write_domain);
2859
2659 return 0; 2860 return 0;
2660} 2861}
2661 2862
@@ -2777,6 +2978,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2777 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2978 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2778 uint32_t invalidate_domains = 0; 2979 uint32_t invalidate_domains = 0;
2779 uint32_t flush_domains = 0; 2980 uint32_t flush_domains = 0;
2981 uint32_t old_read_domains;
2780 2982
2781 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); 2983 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2782 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); 2984 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
@@ -2823,6 +3025,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2823 i915_gem_clflush_object(obj); 3025 i915_gem_clflush_object(obj);
2824 } 3026 }
2825 3027
3028 old_read_domains = obj->read_domains;
3029
2826 /* The actual obj->write_domain will be updated with 3030 /* The actual obj->write_domain will be updated with
2827 * pending_write_domain after we emit the accumulated flush for all 3031 * pending_write_domain after we emit the accumulated flush for all
2828 * of our domain changes in execbuffers (which clears objects' 3032 * of our domain changes in execbuffers (which clears objects'
@@ -2841,6 +3045,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2841 obj->read_domains, obj->write_domain, 3045 obj->read_domains, obj->write_domain,
2842 dev->invalidate_domains, dev->flush_domains); 3046 dev->invalidate_domains, dev->flush_domains);
2843#endif 3047#endif
3048
3049 trace_i915_gem_object_change_domain(obj,
3050 old_read_domains,
3051 obj->write_domain);
2844} 3052}
2845 3053
2846/** 3054/**
@@ -2893,6 +3101,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
2893 uint64_t offset, uint64_t size) 3101 uint64_t offset, uint64_t size)
2894{ 3102{
2895 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3103 struct drm_i915_gem_object *obj_priv = obj->driver_private;
3104 uint32_t old_read_domains;
2896 int i, ret; 3105 int i, ret;
2897 3106
2898 if (offset == 0 && size == obj->size) 3107 if (offset == 0 && size == obj->size)
@@ -2939,8 +3148,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
2939 */ 3148 */
2940 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3149 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2941 3150
3151 old_read_domains = obj->read_domains;
2942 obj->read_domains |= I915_GEM_DOMAIN_CPU; 3152 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2943 3153
3154 trace_i915_gem_object_change_domain(obj,
3155 old_read_domains,
3156 obj->write_domain);
3157
2944 return 0; 3158 return 0;
2945} 3159}
2946 3160
@@ -2984,6 +3198,21 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
2984 } 3198 }
2985 target_obj_priv = target_obj->driver_private; 3199 target_obj_priv = target_obj->driver_private;
2986 3200
3201#if WATCH_RELOC
3202 DRM_INFO("%s: obj %p offset %08x target %d "
3203 "read %08x write %08x gtt %08x "
3204 "presumed %08x delta %08x\n",
3205 __func__,
3206 obj,
3207 (int) reloc->offset,
3208 (int) reloc->target_handle,
3209 (int) reloc->read_domains,
3210 (int) reloc->write_domain,
3211 (int) target_obj_priv->gtt_offset,
3212 (int) reloc->presumed_offset,
3213 reloc->delta);
3214#endif
3215
2987 /* The target buffer should have appeared before us in the 3216 /* The target buffer should have appeared before us in the
2988 * exec_object list, so it should have a GTT space bound by now. 3217 * exec_object list, so it should have a GTT space bound by now.
2989 */ 3218 */
@@ -2995,25 +3224,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
2995 return -EINVAL; 3224 return -EINVAL;
2996 } 3225 }
2997 3226
2998 if (reloc->offset > obj->size - 4) { 3227 /* Validate that the target is in a valid r/w GPU domain */
2999 DRM_ERROR("Relocation beyond object bounds: "
3000 "obj %p target %d offset %d size %d.\n",
3001 obj, reloc->target_handle,
3002 (int) reloc->offset, (int) obj->size);
3003 drm_gem_object_unreference(target_obj);
3004 i915_gem_object_unpin(obj);
3005 return -EINVAL;
3006 }
3007 if (reloc->offset & 3) {
3008 DRM_ERROR("Relocation not 4-byte aligned: "
3009 "obj %p target %d offset %d.\n",
3010 obj, reloc->target_handle,
3011 (int) reloc->offset);
3012 drm_gem_object_unreference(target_obj);
3013 i915_gem_object_unpin(obj);
3014 return -EINVAL;
3015 }
3016
3017 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 3228 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3018 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 3229 reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3019 DRM_ERROR("reloc with read/write CPU domains: " 3230 DRM_ERROR("reloc with read/write CPU domains: "
@@ -3027,7 +3238,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3027 i915_gem_object_unpin(obj); 3238 i915_gem_object_unpin(obj);
3028 return -EINVAL; 3239 return -EINVAL;
3029 } 3240 }
3030
3031 if (reloc->write_domain && target_obj->pending_write_domain && 3241 if (reloc->write_domain && target_obj->pending_write_domain &&
3032 reloc->write_domain != target_obj->pending_write_domain) { 3242 reloc->write_domain != target_obj->pending_write_domain) {
3033 DRM_ERROR("Write domain conflict: " 3243 DRM_ERROR("Write domain conflict: "
@@ -3042,21 +3252,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3042 return -EINVAL; 3252 return -EINVAL;
3043 } 3253 }
3044 3254
3045#if WATCH_RELOC
3046 DRM_INFO("%s: obj %p offset %08x target %d "
3047 "read %08x write %08x gtt %08x "
3048 "presumed %08x delta %08x\n",
3049 __func__,
3050 obj,
3051 (int) reloc->offset,
3052 (int) reloc->target_handle,
3053 (int) reloc->read_domains,
3054 (int) reloc->write_domain,
3055 (int) target_obj_priv->gtt_offset,
3056 (int) reloc->presumed_offset,
3057 reloc->delta);
3058#endif
3059
3060 target_obj->pending_read_domains |= reloc->read_domains; 3255 target_obj->pending_read_domains |= reloc->read_domains;
3061 target_obj->pending_write_domain |= reloc->write_domain; 3256 target_obj->pending_write_domain |= reloc->write_domain;
3062 3257
@@ -3068,6 +3263,37 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3068 continue; 3263 continue;
3069 } 3264 }
3070 3265
3266 /* Check that the relocation address is valid... */
3267 if (reloc->offset > obj->size - 4) {
3268 DRM_ERROR("Relocation beyond object bounds: "
3269 "obj %p target %d offset %d size %d.\n",
3270 obj, reloc->target_handle,
3271 (int) reloc->offset, (int) obj->size);
3272 drm_gem_object_unreference(target_obj);
3273 i915_gem_object_unpin(obj);
3274 return -EINVAL;
3275 }
3276 if (reloc->offset & 3) {
3277 DRM_ERROR("Relocation not 4-byte aligned: "
3278 "obj %p target %d offset %d.\n",
3279 obj, reloc->target_handle,
3280 (int) reloc->offset);
3281 drm_gem_object_unreference(target_obj);
3282 i915_gem_object_unpin(obj);
3283 return -EINVAL;
3284 }
3285
3286 /* and points to somewhere within the target object. */
3287 if (reloc->delta >= target_obj->size) {
3288 DRM_ERROR("Relocation beyond target object bounds: "
3289 "obj %p target %d delta %d size %d.\n",
3290 obj, reloc->target_handle,
3291 (int) reloc->delta, (int) target_obj->size);
3292 drm_gem_object_unreference(target_obj);
3293 i915_gem_object_unpin(obj);
3294 return -EINVAL;
3295 }
3296
3071 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 3297 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3072 if (ret != 0) { 3298 if (ret != 0) {
3073 drm_gem_object_unreference(target_obj); 3299 drm_gem_object_unreference(target_obj);
@@ -3126,6 +3352,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
3126 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3352 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3127 exec_len = (uint32_t) exec->batch_len; 3353 exec_len = (uint32_t) exec->batch_len;
3128 3354
3355 trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno);
3356
3129 count = nbox ? nbox : 1; 3357 count = nbox ? nbox : 1;
3130 3358
3131 for (i = 0; i < count; i++) { 3359 for (i = 0; i < count; i++) {
@@ -3363,7 +3591,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
3363 3591
3364 i915_verify_inactive(dev, __FILE__, __LINE__); 3592 i915_verify_inactive(dev, __FILE__, __LINE__);
3365 3593
3366 if (dev_priv->mm.wedged) { 3594 if (atomic_read(&dev_priv->mm.wedged)) {
3367 DRM_ERROR("Execbuf while wedged\n"); 3595 DRM_ERROR("Execbuf while wedged\n");
3368 mutex_unlock(&dev->struct_mutex); 3596 mutex_unlock(&dev->struct_mutex);
3369 ret = -EIO; 3597 ret = -EIO;
@@ -3421,8 +3649,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
3421 3649
3422 /* error other than GTT full, or we've already tried again */ 3650 /* error other than GTT full, or we've already tried again */
3423 if (ret != -ENOSPC || pin_tries >= 1) { 3651 if (ret != -ENOSPC || pin_tries >= 1) {
3424 if (ret != -ERESTARTSYS) 3652 if (ret != -ERESTARTSYS) {
3425 DRM_ERROR("Failed to pin buffers %d\n", ret); 3653 unsigned long long total_size = 0;
3654 for (i = 0; i < args->buffer_count; i++)
3655 total_size += object_list[i]->size;
3656 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n",
3657 pinned+1, args->buffer_count,
3658 total_size, ret);
3659 DRM_ERROR("%d objects [%d pinned], "
3660 "%d object bytes [%d pinned], "
3661 "%d/%d gtt bytes\n",
3662 atomic_read(&dev->object_count),
3663 atomic_read(&dev->pin_count),
3664 atomic_read(&dev->object_memory),
3665 atomic_read(&dev->pin_memory),
3666 atomic_read(&dev->gtt_memory),
3667 dev->gtt_total);
3668 }
3426 goto err; 3669 goto err;
3427 } 3670 }
3428 3671
@@ -3433,7 +3676,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
3433 3676
3434 /* evict everyone we can from the aperture */ 3677 /* evict everyone we can from the aperture */
3435 ret = i915_gem_evict_everything(dev); 3678 ret = i915_gem_evict_everything(dev);
3436 if (ret) 3679 if (ret && ret != -ENOSPC)
3437 goto err; 3680 goto err;
3438 } 3681 }
3439 3682
@@ -3489,8 +3732,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
3489 3732
3490 for (i = 0; i < args->buffer_count; i++) { 3733 for (i = 0; i < args->buffer_count; i++) {
3491 struct drm_gem_object *obj = object_list[i]; 3734 struct drm_gem_object *obj = object_list[i];
3735 uint32_t old_write_domain = obj->write_domain;
3492 3736
3493 obj->write_domain = obj->pending_write_domain; 3737 obj->write_domain = obj->pending_write_domain;
3738 trace_i915_gem_object_change_domain(obj,
3739 obj->read_domains,
3740 old_write_domain);
3494 } 3741 }
3495 3742
3496 i915_verify_inactive(dev, __FILE__, __LINE__); 3743 i915_verify_inactive(dev, __FILE__, __LINE__);
@@ -3607,11 +3854,8 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
3607 i915_verify_inactive(dev, __FILE__, __LINE__); 3854 i915_verify_inactive(dev, __FILE__, __LINE__);
3608 if (obj_priv->gtt_space == NULL) { 3855 if (obj_priv->gtt_space == NULL) {
3609 ret = i915_gem_object_bind_to_gtt(obj, alignment); 3856 ret = i915_gem_object_bind_to_gtt(obj, alignment);
3610 if (ret != 0) { 3857 if (ret)
3611 if (ret != -EBUSY && ret != -ERESTARTSYS)
3612 DRM_ERROR("Failure to bind: %d\n", ret);
3613 return ret; 3858 return ret;
3614 }
3615 } 3859 }
3616 /* 3860 /*
3617 * Pre-965 chips need a fence register set up in order to 3861 * Pre-965 chips need a fence register set up in order to
@@ -3691,6 +3935,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3691 } 3935 }
3692 obj_priv = obj->driver_private; 3936 obj_priv = obj->driver_private;
3693 3937
3938 if (obj_priv->madv != I915_MADV_WILLNEED) {
3939 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3940 drm_gem_object_unreference(obj);
3941 mutex_unlock(&dev->struct_mutex);
3942 return -EINVAL;
3943 }
3944
3694 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { 3945 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
3695 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3946 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3696 args->handle); 3947 args->handle);
@@ -3803,6 +4054,56 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3803 return i915_gem_ring_throttle(dev, file_priv); 4054 return i915_gem_ring_throttle(dev, file_priv);
3804} 4055}
3805 4056
4057int
4058i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4059 struct drm_file *file_priv)
4060{
4061 struct drm_i915_gem_madvise *args = data;
4062 struct drm_gem_object *obj;
4063 struct drm_i915_gem_object *obj_priv;
4064
4065 switch (args->madv) {
4066 case I915_MADV_DONTNEED:
4067 case I915_MADV_WILLNEED:
4068 break;
4069 default:
4070 return -EINVAL;
4071 }
4072
4073 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4074 if (obj == NULL) {
4075 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4076 args->handle);
4077 return -EBADF;
4078 }
4079
4080 mutex_lock(&dev->struct_mutex);
4081 obj_priv = obj->driver_private;
4082
4083 if (obj_priv->pin_count) {
4084 drm_gem_object_unreference(obj);
4085 mutex_unlock(&dev->struct_mutex);
4086
4087 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4088 return -EINVAL;
4089 }
4090
4091 if (obj_priv->madv != __I915_MADV_PURGED)
4092 obj_priv->madv = args->madv;
4093
4094 /* if the object is no longer bound, discard its backing storage */
4095 if (i915_gem_object_is_purgeable(obj_priv) &&
4096 obj_priv->gtt_space == NULL)
4097 i915_gem_object_truncate(obj);
4098
4099 args->retained = obj_priv->madv != __I915_MADV_PURGED;
4100
4101 drm_gem_object_unreference(obj);
4102 mutex_unlock(&dev->struct_mutex);
4103
4104 return 0;
4105}
4106
3806int i915_gem_init_object(struct drm_gem_object *obj) 4107int i915_gem_init_object(struct drm_gem_object *obj)
3807{ 4108{
3808 struct drm_i915_gem_object *obj_priv; 4109 struct drm_i915_gem_object *obj_priv;
@@ -3827,6 +4128,9 @@ int i915_gem_init_object(struct drm_gem_object *obj)
3827 obj_priv->fence_reg = I915_FENCE_REG_NONE; 4128 obj_priv->fence_reg = I915_FENCE_REG_NONE;
3828 INIT_LIST_HEAD(&obj_priv->list); 4129 INIT_LIST_HEAD(&obj_priv->list);
3829 INIT_LIST_HEAD(&obj_priv->fence_list); 4130 INIT_LIST_HEAD(&obj_priv->fence_list);
4131 obj_priv->madv = I915_MADV_WILLNEED;
4132
4133 trace_i915_gem_object_create(obj);
3830 4134
3831 return 0; 4135 return 0;
3832} 4136}
@@ -3836,6 +4140,8 @@ void i915_gem_free_object(struct drm_gem_object *obj)
3836 struct drm_device *dev = obj->dev; 4140 struct drm_device *dev = obj->dev;
3837 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4141 struct drm_i915_gem_object *obj_priv = obj->driver_private;
3838 4142
4143 trace_i915_gem_object_destroy(obj);
4144
3839 while (obj_priv->pin_count > 0) 4145 while (obj_priv->pin_count > 0)
3840 i915_gem_object_unpin(obj); 4146 i915_gem_object_unpin(obj);
3841 4147
@@ -3844,43 +4150,35 @@ void i915_gem_free_object(struct drm_gem_object *obj)
3844 4150
3845 i915_gem_object_unbind(obj); 4151 i915_gem_object_unbind(obj);
3846 4152
3847 i915_gem_free_mmap_offset(obj); 4153 if (obj_priv->mmap_offset)
4154 i915_gem_free_mmap_offset(obj);
3848 4155
3849 kfree(obj_priv->page_cpu_valid); 4156 kfree(obj_priv->page_cpu_valid);
3850 kfree(obj_priv->bit_17); 4157 kfree(obj_priv->bit_17);
3851 kfree(obj->driver_private); 4158 kfree(obj->driver_private);
3852} 4159}
3853 4160
3854/** Unbinds all objects that are on the given buffer list. */ 4161/** Unbinds all inactive objects. */
3855static int 4162static int
3856i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) 4163i915_gem_evict_from_inactive_list(struct drm_device *dev)
3857{ 4164{
3858 struct drm_gem_object *obj; 4165 drm_i915_private_t *dev_priv = dev->dev_private;
3859 struct drm_i915_gem_object *obj_priv;
3860 int ret;
3861 4166
3862 while (!list_empty(head)) { 4167 while (!list_empty(&dev_priv->mm.inactive_list)) {
3863 obj_priv = list_first_entry(head, 4168 struct drm_gem_object *obj;
3864 struct drm_i915_gem_object, 4169 int ret;
3865 list);
3866 obj = obj_priv->obj;
3867 4170
3868 if (obj_priv->pin_count != 0) { 4171 obj = list_first_entry(&dev_priv->mm.inactive_list,
3869 DRM_ERROR("Pinned object in unbind list\n"); 4172 struct drm_i915_gem_object,
3870 mutex_unlock(&dev->struct_mutex); 4173 list)->obj;
3871 return -EINVAL;
3872 }
3873 4174
3874 ret = i915_gem_object_unbind(obj); 4175 ret = i915_gem_object_unbind(obj);
3875 if (ret != 0) { 4176 if (ret != 0) {
3876 DRM_ERROR("Error unbinding object in LeaveVT: %d\n", 4177 DRM_ERROR("Error unbinding object: %d\n", ret);
3877 ret);
3878 mutex_unlock(&dev->struct_mutex);
3879 return ret; 4178 return ret;
3880 } 4179 }
3881 } 4180 }
3882 4181
3883
3884 return 0; 4182 return 0;
3885} 4183}
3886 4184
@@ -3902,6 +4200,7 @@ i915_gem_idle(struct drm_device *dev)
3902 * We need to replace this with a semaphore, or something. 4200 * We need to replace this with a semaphore, or something.
3903 */ 4201 */
3904 dev_priv->mm.suspended = 1; 4202 dev_priv->mm.suspended = 1;
4203 del_timer(&dev_priv->hangcheck_timer);
3905 4204
3906 /* Cancel the retire work handler, wait for it to finish if running 4205 /* Cancel the retire work handler, wait for it to finish if running
3907 */ 4206 */
@@ -3931,7 +4230,7 @@ i915_gem_idle(struct drm_device *dev)
3931 if (last_seqno == cur_seqno) { 4230 if (last_seqno == cur_seqno) {
3932 if (stuck++ > 100) { 4231 if (stuck++ > 100) {
3933 DRM_ERROR("hardware wedged\n"); 4232 DRM_ERROR("hardware wedged\n");
3934 dev_priv->mm.wedged = 1; 4233 atomic_set(&dev_priv->mm.wedged, 1);
3935 DRM_WAKEUP(&dev_priv->irq_queue); 4234 DRM_WAKEUP(&dev_priv->irq_queue);
3936 break; 4235 break;
3937 } 4236 }
@@ -3944,7 +4243,7 @@ i915_gem_idle(struct drm_device *dev)
3944 i915_gem_retire_requests(dev); 4243 i915_gem_retire_requests(dev);
3945 4244
3946 spin_lock(&dev_priv->mm.active_list_lock); 4245 spin_lock(&dev_priv->mm.active_list_lock);
3947 if (!dev_priv->mm.wedged) { 4246 if (!atomic_read(&dev_priv->mm.wedged)) {
3948 /* Active and flushing should now be empty as we've 4247 /* Active and flushing should now be empty as we've
3949 * waited for a sequence higher than any pending execbuffer 4248 * waited for a sequence higher than any pending execbuffer
3950 */ 4249 */
@@ -3962,29 +4261,41 @@ i915_gem_idle(struct drm_device *dev)
3962 * the GPU domains and just stuff them onto inactive. 4261 * the GPU domains and just stuff them onto inactive.
3963 */ 4262 */
3964 while (!list_empty(&dev_priv->mm.active_list)) { 4263 while (!list_empty(&dev_priv->mm.active_list)) {
3965 struct drm_i915_gem_object *obj_priv; 4264 struct drm_gem_object *obj;
4265 uint32_t old_write_domain;
3966 4266
3967 obj_priv = list_first_entry(&dev_priv->mm.active_list, 4267 obj = list_first_entry(&dev_priv->mm.active_list,
3968 struct drm_i915_gem_object, 4268 struct drm_i915_gem_object,
3969 list); 4269 list)->obj;
3970 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 4270 old_write_domain = obj->write_domain;
3971 i915_gem_object_move_to_inactive(obj_priv->obj); 4271 obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
4272 i915_gem_object_move_to_inactive(obj);
4273
4274 trace_i915_gem_object_change_domain(obj,
4275 obj->read_domains,
4276 old_write_domain);
3972 } 4277 }
3973 spin_unlock(&dev_priv->mm.active_list_lock); 4278 spin_unlock(&dev_priv->mm.active_list_lock);
3974 4279
3975 while (!list_empty(&dev_priv->mm.flushing_list)) { 4280 while (!list_empty(&dev_priv->mm.flushing_list)) {
3976 struct drm_i915_gem_object *obj_priv; 4281 struct drm_gem_object *obj;
4282 uint32_t old_write_domain;
3977 4283
3978 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 4284 obj = list_first_entry(&dev_priv->mm.flushing_list,
3979 struct drm_i915_gem_object, 4285 struct drm_i915_gem_object,
3980 list); 4286 list)->obj;
3981 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 4287 old_write_domain = obj->write_domain;
3982 i915_gem_object_move_to_inactive(obj_priv->obj); 4288 obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
4289 i915_gem_object_move_to_inactive(obj);
4290
4291 trace_i915_gem_object_change_domain(obj,
4292 obj->read_domains,
4293 old_write_domain);
3983 } 4294 }
3984 4295
3985 4296
3986 /* Move all inactive buffers out of the GTT. */ 4297 /* Move all inactive buffers out of the GTT. */
3987 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); 4298 ret = i915_gem_evict_from_inactive_list(dev);
3988 WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); 4299 WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
3989 if (ret) { 4300 if (ret) {
3990 mutex_unlock(&dev->struct_mutex); 4301 mutex_unlock(&dev->struct_mutex);
@@ -4206,9 +4517,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4206 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4517 if (drm_core_check_feature(dev, DRIVER_MODESET))
4207 return 0; 4518 return 0;
4208 4519
4209 if (dev_priv->mm.wedged) { 4520 if (atomic_read(&dev_priv->mm.wedged)) {
4210 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4521 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4211 dev_priv->mm.wedged = 0; 4522 atomic_set(&dev_priv->mm.wedged, 0);
4212 } 4523 }
4213 4524
4214 mutex_lock(&dev->struct_mutex); 4525 mutex_lock(&dev->struct_mutex);
@@ -4274,6 +4585,10 @@ i915_gem_load(struct drm_device *dev)
4274 i915_gem_retire_work_handler); 4585 i915_gem_retire_work_handler);
4275 dev_priv->mm.next_gem_seqno = 1; 4586 dev_priv->mm.next_gem_seqno = 1;
4276 4587
4588 spin_lock(&shrink_list_lock);
4589 list_add(&dev_priv->mm.shrink_list, &shrink_list);
4590 spin_unlock(&shrink_list_lock);
4591
4277 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4592 /* Old X drivers will take 0-2 for front, back, depth buffers */
4278 dev_priv->fence_reg_start = 3; 4593 dev_priv->fence_reg_start = 3;
4279 4594
@@ -4491,3 +4806,116 @@ void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4491 list_del_init(i915_file_priv->mm.request_list.next); 4806 list_del_init(i915_file_priv->mm.request_list.next);
4492 mutex_unlock(&dev->struct_mutex); 4807 mutex_unlock(&dev->struct_mutex);
4493} 4808}
4809
4810static int
4811i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
4812{
4813 drm_i915_private_t *dev_priv, *next_dev;
4814 struct drm_i915_gem_object *obj_priv, *next_obj;
4815 int cnt = 0;
4816 int would_deadlock = 1;
4817
4818 /* "fast-path" to count number of available objects */
4819 if (nr_to_scan == 0) {
4820 spin_lock(&shrink_list_lock);
4821 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4822 struct drm_device *dev = dev_priv->dev;
4823
4824 if (mutex_trylock(&dev->struct_mutex)) {
4825 list_for_each_entry(obj_priv,
4826 &dev_priv->mm.inactive_list,
4827 list)
4828 cnt++;
4829 mutex_unlock(&dev->struct_mutex);
4830 }
4831 }
4832 spin_unlock(&shrink_list_lock);
4833
4834 return (cnt / 100) * sysctl_vfs_cache_pressure;
4835 }
4836
4837 spin_lock(&shrink_list_lock);
4838
4839 /* first scan for clean buffers */
4840 list_for_each_entry_safe(dev_priv, next_dev,
4841 &shrink_list, mm.shrink_list) {
4842 struct drm_device *dev = dev_priv->dev;
4843
4844 if (! mutex_trylock(&dev->struct_mutex))
4845 continue;
4846
4847 spin_unlock(&shrink_list_lock);
4848
4849 i915_gem_retire_requests(dev);
4850
4851 list_for_each_entry_safe(obj_priv, next_obj,
4852 &dev_priv->mm.inactive_list,
4853 list) {
4854 if (i915_gem_object_is_purgeable(obj_priv)) {
4855 i915_gem_object_unbind(obj_priv->obj);
4856 if (--nr_to_scan <= 0)
4857 break;
4858 }
4859 }
4860
4861 spin_lock(&shrink_list_lock);
4862 mutex_unlock(&dev->struct_mutex);
4863
4864 would_deadlock = 0;
4865
4866 if (nr_to_scan <= 0)
4867 break;
4868 }
4869
4870 /* second pass, evict/count anything still on the inactive list */
4871 list_for_each_entry_safe(dev_priv, next_dev,
4872 &shrink_list, mm.shrink_list) {
4873 struct drm_device *dev = dev_priv->dev;
4874
4875 if (! mutex_trylock(&dev->struct_mutex))
4876 continue;
4877
4878 spin_unlock(&shrink_list_lock);
4879
4880 list_for_each_entry_safe(obj_priv, next_obj,
4881 &dev_priv->mm.inactive_list,
4882 list) {
4883 if (nr_to_scan > 0) {
4884 i915_gem_object_unbind(obj_priv->obj);
4885 nr_to_scan--;
4886 } else
4887 cnt++;
4888 }
4889
4890 spin_lock(&shrink_list_lock);
4891 mutex_unlock(&dev->struct_mutex);
4892
4893 would_deadlock = 0;
4894 }
4895
4896 spin_unlock(&shrink_list_lock);
4897
4898 if (would_deadlock)
4899 return -1;
4900 else if (cnt > 0)
4901 return (cnt / 100) * sysctl_vfs_cache_pressure;
4902 else
4903 return 0;
4904}
4905
4906static struct shrinker shrinker = {
4907 .shrink = i915_gem_shrink,
4908 .seeks = DEFAULT_SEEKS,
4909};
4910
4911__init void
4912i915_gem_shrinker_init(void)
4913{
4914 register_shrinker(&shrinker);
4915}
4916
4917__exit void
4918i915_gem_shrinker_exit(void)
4919{
4920 unregister_shrinker(&shrinker);
4921}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6c89f2ff2495..4dfeec7cdd42 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -31,6 +31,7 @@
31#include "drm.h" 31#include "drm.h"
32#include "i915_drm.h" 32#include "i915_drm.h"
33#include "i915_drv.h" 33#include "i915_drv.h"
34#include "i915_trace.h"
34#include "intel_drv.h" 35#include "intel_drv.h"
35 36
36#define MAX_NOPID ((u32)~0) 37#define MAX_NOPID ((u32)~0)
@@ -279,7 +280,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev)
279 } 280 }
280 281
281 if (gt_iir & GT_USER_INTERRUPT) { 282 if (gt_iir & GT_USER_INTERRUPT) {
282 dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); 283 u32 seqno = i915_get_gem_seqno(dev);
284 dev_priv->mm.irq_gem_seqno = seqno;
285 trace_i915_gem_request_complete(dev, seqno);
283 DRM_WAKEUP(&dev_priv->irq_queue); 286 DRM_WAKEUP(&dev_priv->irq_queue);
284 } 287 }
285 288
@@ -302,12 +305,25 @@ static void i915_error_work_func(struct work_struct *work)
302 drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, 305 drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
303 error_work); 306 error_work);
304 struct drm_device *dev = dev_priv->dev; 307 struct drm_device *dev = dev_priv->dev;
305 char *event_string = "ERROR=1"; 308 char *error_event[] = { "ERROR=1", NULL };
306 char *envp[] = { event_string, NULL }; 309 char *reset_event[] = { "RESET=1", NULL };
310 char *reset_done_event[] = { "ERROR=0", NULL };
307 311
308 DRM_DEBUG("generating error event\n"); 312 DRM_DEBUG("generating error event\n");
309 313 kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
310 kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp); 314
315 if (atomic_read(&dev_priv->mm.wedged)) {
316 if (IS_I965G(dev)) {
317 DRM_DEBUG("resetting chip\n");
318 kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event);
319 if (!i965_reset(dev, GDRST_RENDER)) {
320 atomic_set(&dev_priv->mm.wedged, 0);
321 kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
322 }
323 } else {
324 printk("reboot required\n");
325 }
326 }
311} 327}
312 328
313/** 329/**
@@ -372,7 +388,7 @@ out:
372 * so userspace knows something bad happened (should trigger collection 388 * so userspace knows something bad happened (should trigger collection
373 * of a ring dump etc.). 389 * of a ring dump etc.).
374 */ 390 */
375static void i915_handle_error(struct drm_device *dev) 391static void i915_handle_error(struct drm_device *dev, bool wedged)
376{ 392{
377 struct drm_i915_private *dev_priv = dev->dev_private; 393 struct drm_i915_private *dev_priv = dev->dev_private;
378 u32 eir = I915_READ(EIR); 394 u32 eir = I915_READ(EIR);
@@ -482,6 +498,16 @@ static void i915_handle_error(struct drm_device *dev)
482 I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); 498 I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
483 } 499 }
484 500
501 if (wedged) {
502 atomic_set(&dev_priv->mm.wedged, 1);
503
504 /*
505 * Wakeup waiting processes so they don't hang
506 */
507 printk("i915: Waking up sleeping processes\n");
508 DRM_WAKEUP(&dev_priv->irq_queue);
509 }
510
485 queue_work(dev_priv->wq, &dev_priv->error_work); 511 queue_work(dev_priv->wq, &dev_priv->error_work);
486} 512}
487 513
@@ -527,7 +553,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
527 pipeb_stats = I915_READ(PIPEBSTAT); 553 pipeb_stats = I915_READ(PIPEBSTAT);
528 554
529 if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) 555 if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
530 i915_handle_error(dev); 556 i915_handle_error(dev, false);
531 557
532 /* 558 /*
533 * Clear the PIPE(A|B)STAT regs before the IIR 559 * Clear the PIPE(A|B)STAT regs before the IIR
@@ -599,8 +625,12 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
599 } 625 }
600 626
601 if (iir & I915_USER_INTERRUPT) { 627 if (iir & I915_USER_INTERRUPT) {
602 dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); 628 u32 seqno = i915_get_gem_seqno(dev);
629 dev_priv->mm.irq_gem_seqno = seqno;
630 trace_i915_gem_request_complete(dev, seqno);
603 DRM_WAKEUP(&dev_priv->irq_queue); 631 DRM_WAKEUP(&dev_priv->irq_queue);
632 dev_priv->hangcheck_count = 0;
633 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
604 } 634 }
605 635
606 if (pipea_stats & vblank_status) { 636 if (pipea_stats & vblank_status) {
@@ -880,6 +910,52 @@ int i915_vblank_swap(struct drm_device *dev, void *data,
880 return -EINVAL; 910 return -EINVAL;
881} 911}
882 912
913struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) {
914 drm_i915_private_t *dev_priv = dev->dev_private;
915 return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list);
916}
917
918/**
919 * This is called when the chip hasn't reported back with completed
920 * batchbuffers in a long time. The first time this is called we simply record
921 * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses
922 * again, we assume the chip is wedged and try to fix it.
923 */
924void i915_hangcheck_elapsed(unsigned long data)
925{
926 struct drm_device *dev = (struct drm_device *)data;
927 drm_i915_private_t *dev_priv = dev->dev_private;
928 uint32_t acthd;
929
930 if (!IS_I965G(dev))
931 acthd = I915_READ(ACTHD);
932 else
933 acthd = I915_READ(ACTHD_I965);
934
935 /* If all work is done then ACTHD clearly hasn't advanced. */
936 if (list_empty(&dev_priv->mm.request_list) ||
937 i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) {
938 dev_priv->hangcheck_count = 0;
939 return;
940 }
941
942 if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) {
943 DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
944 i915_handle_error(dev, true);
945 return;
946 }
947
948 /* Reset timer case chip hangs without another request being added */
949 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
950
951 if (acthd != dev_priv->last_acthd)
952 dev_priv->hangcheck_count = 0;
953 else
954 dev_priv->hangcheck_count++;
955
956 dev_priv->last_acthd = acthd;
957}
958
883/* drm_dma.h hooks 959/* drm_dma.h hooks
884*/ 960*/
885static void igdng_irq_preinstall(struct drm_device *dev) 961static void igdng_irq_preinstall(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_opregion.c b/drivers/gpu/drm/i915/i915_opregion.c
index e4b4e8898e39..2d5193556d3f 100644
--- a/drivers/gpu/drm/i915/i915_opregion.c
+++ b/drivers/gpu/drm/i915/i915_opregion.c
@@ -148,6 +148,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
148 struct drm_i915_private *dev_priv = dev->dev_private; 148 struct drm_i915_private *dev_priv = dev->dev_private;
149 struct opregion_asle *asle = dev_priv->opregion.asle; 149 struct opregion_asle *asle = dev_priv->opregion.asle;
150 u32 blc_pwm_ctl, blc_pwm_ctl2; 150 u32 blc_pwm_ctl, blc_pwm_ctl2;
151 u32 max_backlight, level, shift;
151 152
152 if (!(bclp & ASLE_BCLP_VALID)) 153 if (!(bclp & ASLE_BCLP_VALID))
153 return ASLE_BACKLIGHT_FAIL; 154 return ASLE_BACKLIGHT_FAIL;
@@ -157,14 +158,25 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
157 return ASLE_BACKLIGHT_FAIL; 158 return ASLE_BACKLIGHT_FAIL;
158 159
159 blc_pwm_ctl = I915_READ(BLC_PWM_CTL); 160 blc_pwm_ctl = I915_READ(BLC_PWM_CTL);
160 blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
161 blc_pwm_ctl2 = I915_READ(BLC_PWM_CTL2); 161 blc_pwm_ctl2 = I915_READ(BLC_PWM_CTL2);
162 162
163 if (blc_pwm_ctl2 & BLM_COMBINATION_MODE) 163 if (IS_I965G(dev) && (blc_pwm_ctl2 & BLM_COMBINATION_MODE))
164 pci_write_config_dword(dev->pdev, PCI_LBPC, bclp); 164 pci_write_config_dword(dev->pdev, PCI_LBPC, bclp);
165 else 165 else {
166 I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | ((bclp * 0x101)-1)); 166 if (IS_IGD(dev)) {
167 167 blc_pwm_ctl &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
168 max_backlight = (blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >>
169 BACKLIGHT_MODULATION_FREQ_SHIFT;
170 shift = BACKLIGHT_DUTY_CYCLE_SHIFT + 1;
171 } else {
172 blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
173 max_backlight = ((blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >>
174 BACKLIGHT_MODULATION_FREQ_SHIFT) * 2;
175 shift = BACKLIGHT_DUTY_CYCLE_SHIFT;
176 }
177 level = (bclp * max_backlight) / 255;
178 I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | (level << shift));
179 }
168 asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID; 180 asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID;
169 181
170 return 0; 182 return 0;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3f7963553464..0466ddbeba32 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -86,6 +86,10 @@
86#define I915_GC_RENDER_CLOCK_200_MHZ (1 << 0) 86#define I915_GC_RENDER_CLOCK_200_MHZ (1 << 0)
87#define I915_GC_RENDER_CLOCK_333_MHZ (4 << 0) 87#define I915_GC_RENDER_CLOCK_333_MHZ (4 << 0)
88#define LBB 0xf4 88#define LBB 0xf4
89#define GDRST 0xc0
90#define GDRST_FULL (0<<2)
91#define GDRST_RENDER (1<<2)
92#define GDRST_MEDIA (3<<2)
89 93
90/* VGA stuff */ 94/* VGA stuff */
91 95
@@ -344,9 +348,37 @@
344#define FBC_CTL_PLANEA (0<<0) 348#define FBC_CTL_PLANEA (0<<0)
345#define FBC_CTL_PLANEB (1<<0) 349#define FBC_CTL_PLANEB (1<<0)
346#define FBC_FENCE_OFF 0x0321b 350#define FBC_FENCE_OFF 0x0321b
351#define FBC_TAG 0x03300
347 352
348#define FBC_LL_SIZE (1536) 353#define FBC_LL_SIZE (1536)
349 354
355/* Framebuffer compression for GM45+ */
356#define DPFC_CB_BASE 0x3200
357#define DPFC_CONTROL 0x3208
358#define DPFC_CTL_EN (1<<31)
359#define DPFC_CTL_PLANEA (0<<30)
360#define DPFC_CTL_PLANEB (1<<30)
361#define DPFC_CTL_FENCE_EN (1<<29)
362#define DPFC_SR_EN (1<<10)
363#define DPFC_CTL_LIMIT_1X (0<<6)
364#define DPFC_CTL_LIMIT_2X (1<<6)
365#define DPFC_CTL_LIMIT_4X (2<<6)
366#define DPFC_RECOMP_CTL 0x320c
367#define DPFC_RECOMP_STALL_EN (1<<27)
368#define DPFC_RECOMP_STALL_WM_SHIFT (16)
369#define DPFC_RECOMP_STALL_WM_MASK (0x07ff0000)
370#define DPFC_RECOMP_TIMER_COUNT_SHIFT (0)
371#define DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f)
372#define DPFC_STATUS 0x3210
373#define DPFC_INVAL_SEG_SHIFT (16)
374#define DPFC_INVAL_SEG_MASK (0x07ff0000)
375#define DPFC_COMP_SEG_SHIFT (0)
376#define DPFC_COMP_SEG_MASK (0x000003ff)
377#define DPFC_STATUS2 0x3214
378#define DPFC_FENCE_YOFF 0x3218
379#define DPFC_CHICKEN 0x3224
380#define DPFC_HT_MODIFY (1<<31)
381
350/* 382/*
351 * GPIO regs 383 * GPIO regs
352 */ 384 */
@@ -2000,6 +2032,8 @@
2000#define PF_ENABLE (1<<31) 2032#define PF_ENABLE (1<<31)
2001#define PFA_WIN_SZ 0x68074 2033#define PFA_WIN_SZ 0x68074
2002#define PFB_WIN_SZ 0x68874 2034#define PFB_WIN_SZ 0x68874
2035#define PFA_WIN_POS 0x68070
2036#define PFB_WIN_POS 0x68870
2003 2037
2004/* legacy palette */ 2038/* legacy palette */
2005#define LGC_PALETTE_A 0x4a000 2039#define LGC_PALETTE_A 0x4a000
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 20d4d19f5568..bd6d8d91ca9f 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -228,6 +228,7 @@ static void i915_save_modeset_reg(struct drm_device *dev)
228 228
229 if (drm_core_check_feature(dev, DRIVER_MODESET)) 229 if (drm_core_check_feature(dev, DRIVER_MODESET))
230 return; 230 return;
231
231 /* Pipe & plane A info */ 232 /* Pipe & plane A info */
232 dev_priv->savePIPEACONF = I915_READ(PIPEACONF); 233 dev_priv->savePIPEACONF = I915_READ(PIPEACONF);
233 dev_priv->savePIPEASRC = I915_READ(PIPEASRC); 234 dev_priv->savePIPEASRC = I915_READ(PIPEASRC);
@@ -285,6 +286,7 @@ static void i915_save_modeset_reg(struct drm_device *dev)
285 dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT); 286 dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT);
286 return; 287 return;
287} 288}
289
288static void i915_restore_modeset_reg(struct drm_device *dev) 290static void i915_restore_modeset_reg(struct drm_device *dev)
289{ 291{
290 struct drm_i915_private *dev_priv = dev->dev_private; 292 struct drm_i915_private *dev_priv = dev->dev_private;
@@ -379,19 +381,10 @@ static void i915_restore_modeset_reg(struct drm_device *dev)
379 381
380 return; 382 return;
381} 383}
382int i915_save_state(struct drm_device *dev) 384
385void i915_save_display(struct drm_device *dev)
383{ 386{
384 struct drm_i915_private *dev_priv = dev->dev_private; 387 struct drm_i915_private *dev_priv = dev->dev_private;
385 int i;
386
387 pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
388
389 /* Render Standby */
390 if (IS_I965G(dev) && IS_MOBILE(dev))
391 dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
392
393 /* Hardware status page */
394 dev_priv->saveHWS = I915_READ(HWS_PGA);
395 388
396 /* Display arbitration control */ 389 /* Display arbitration control */
397 dev_priv->saveDSPARB = I915_READ(DSPARB); 390 dev_priv->saveDSPARB = I915_READ(DSPARB);
@@ -399,6 +392,7 @@ int i915_save_state(struct drm_device *dev)
399 /* This is only meaningful in non-KMS mode */ 392 /* This is only meaningful in non-KMS mode */
400 /* Don't save them in KMS mode */ 393 /* Don't save them in KMS mode */
401 i915_save_modeset_reg(dev); 394 i915_save_modeset_reg(dev);
395
402 /* Cursor state */ 396 /* Cursor state */
403 dev_priv->saveCURACNTR = I915_READ(CURACNTR); 397 dev_priv->saveCURACNTR = I915_READ(CURACNTR);
404 dev_priv->saveCURAPOS = I915_READ(CURAPOS); 398 dev_priv->saveCURAPOS = I915_READ(CURAPOS);
@@ -448,81 +442,22 @@ int i915_save_state(struct drm_device *dev)
448 dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2); 442 dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2);
449 dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL); 443 dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL);
450 444
451 /* Interrupt state */
452 dev_priv->saveIIR = I915_READ(IIR);
453 dev_priv->saveIER = I915_READ(IER);
454 dev_priv->saveIMR = I915_READ(IMR);
455
456 /* VGA state */ 445 /* VGA state */
457 dev_priv->saveVGA0 = I915_READ(VGA0); 446 dev_priv->saveVGA0 = I915_READ(VGA0);
458 dev_priv->saveVGA1 = I915_READ(VGA1); 447 dev_priv->saveVGA1 = I915_READ(VGA1);
459 dev_priv->saveVGA_PD = I915_READ(VGA_PD); 448 dev_priv->saveVGA_PD = I915_READ(VGA_PD);
460 dev_priv->saveVGACNTRL = I915_READ(VGACNTRL); 449 dev_priv->saveVGACNTRL = I915_READ(VGACNTRL);
461 450
462 /* Clock gating state */
463 dev_priv->saveD_STATE = I915_READ(D_STATE);
464 dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D);
465
466 /* Cache mode state */
467 dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
468
469 /* Memory Arbitration state */
470 dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
471
472 /* Scratch space */
473 for (i = 0; i < 16; i++) {
474 dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
475 dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
476 }
477 for (i = 0; i < 3; i++)
478 dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
479
480 /* Fences */
481 if (IS_I965G(dev)) {
482 for (i = 0; i < 16; i++)
483 dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
484 } else {
485 for (i = 0; i < 8; i++)
486 dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
487
488 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
489 for (i = 0; i < 8; i++)
490 dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
491 }
492 i915_save_vga(dev); 451 i915_save_vga(dev);
493
494 return 0;
495} 452}
496 453
497int i915_restore_state(struct drm_device *dev) 454void i915_restore_display(struct drm_device *dev)
498{ 455{
499 struct drm_i915_private *dev_priv = dev->dev_private; 456 struct drm_i915_private *dev_priv = dev->dev_private;
500 int i;
501
502 pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
503
504 /* Render Standby */
505 if (IS_I965G(dev) && IS_MOBILE(dev))
506 I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
507
508 /* Hardware status page */
509 I915_WRITE(HWS_PGA, dev_priv->saveHWS);
510 457
511 /* Display arbitration */ 458 /* Display arbitration */
512 I915_WRITE(DSPARB, dev_priv->saveDSPARB); 459 I915_WRITE(DSPARB, dev_priv->saveDSPARB);
513 460
514 /* Fences */
515 if (IS_I965G(dev)) {
516 for (i = 0; i < 16; i++)
517 I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
518 } else {
519 for (i = 0; i < 8; i++)
520 I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
521 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
522 for (i = 0; i < 8; i++)
523 I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
524 }
525
526 /* Display port ratios (must be done before clock is set) */ 461 /* Display port ratios (must be done before clock is set) */
527 if (SUPPORTS_INTEGRATED_DP(dev)) { 462 if (SUPPORTS_INTEGRATED_DP(dev)) {
528 I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M); 463 I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M);
@@ -534,9 +469,11 @@ int i915_restore_state(struct drm_device *dev)
534 I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N); 469 I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N);
535 I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N); 470 I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N);
536 } 471 }
472
537 /* This is only meaningful in non-KMS mode */ 473 /* This is only meaningful in non-KMS mode */
538 /* Don't restore them in KMS mode */ 474 /* Don't restore them in KMS mode */
539 i915_restore_modeset_reg(dev); 475 i915_restore_modeset_reg(dev);
476
540 /* Cursor state */ 477 /* Cursor state */
541 I915_WRITE(CURAPOS, dev_priv->saveCURAPOS); 478 I915_WRITE(CURAPOS, dev_priv->saveCURAPOS);
542 I915_WRITE(CURACNTR, dev_priv->saveCURACNTR); 479 I915_WRITE(CURACNTR, dev_priv->saveCURACNTR);
@@ -586,6 +523,95 @@ int i915_restore_state(struct drm_device *dev)
586 I915_WRITE(VGA_PD, dev_priv->saveVGA_PD); 523 I915_WRITE(VGA_PD, dev_priv->saveVGA_PD);
587 DRM_UDELAY(150); 524 DRM_UDELAY(150);
588 525
526 i915_restore_vga(dev);
527}
528
529int i915_save_state(struct drm_device *dev)
530{
531 struct drm_i915_private *dev_priv = dev->dev_private;
532 int i;
533
534 pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
535
536 /* Render Standby */
537 if (IS_I965G(dev) && IS_MOBILE(dev))
538 dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
539
540 /* Hardware status page */
541 dev_priv->saveHWS = I915_READ(HWS_PGA);
542
543 i915_save_display(dev);
544
545 /* Interrupt state */
546 dev_priv->saveIER = I915_READ(IER);
547 dev_priv->saveIMR = I915_READ(IMR);
548
549 /* Clock gating state */
550 dev_priv->saveD_STATE = I915_READ(D_STATE);
551 dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); /* Not sure about this */
552
553 /* Cache mode state */
554 dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
555
556 /* Memory Arbitration state */
557 dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
558
559 /* Scratch space */
560 for (i = 0; i < 16; i++) {
561 dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
562 dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
563 }
564 for (i = 0; i < 3; i++)
565 dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
566
567 /* Fences */
568 if (IS_I965G(dev)) {
569 for (i = 0; i < 16; i++)
570 dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
571 } else {
572 for (i = 0; i < 8; i++)
573 dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
574
575 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
576 for (i = 0; i < 8; i++)
577 dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
578 }
579
580 return 0;
581}
582
583int i915_restore_state(struct drm_device *dev)
584{
585 struct drm_i915_private *dev_priv = dev->dev_private;
586 int i;
587
588 pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
589
590 /* Render Standby */
591 if (IS_I965G(dev) && IS_MOBILE(dev))
592 I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
593
594 /* Hardware status page */
595 I915_WRITE(HWS_PGA, dev_priv->saveHWS);
596
597 /* Fences */
598 if (IS_I965G(dev)) {
599 for (i = 0; i < 16; i++)
600 I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
601 } else {
602 for (i = 0; i < 8; i++)
603 I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
604 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
605 for (i = 0; i < 8; i++)
606 I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
607 }
608
609 i915_restore_display(dev);
610
611 /* Interrupt state */
612 I915_WRITE (IER, dev_priv->saveIER);
613 I915_WRITE (IMR, dev_priv->saveIMR);
614
589 /* Clock gating state */ 615 /* Clock gating state */
590 I915_WRITE (D_STATE, dev_priv->saveD_STATE); 616 I915_WRITE (D_STATE, dev_priv->saveD_STATE);
591 I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D); 617 I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D);
@@ -603,8 +629,6 @@ int i915_restore_state(struct drm_device *dev)
603 for (i = 0; i < 3; i++) 629 for (i = 0; i < 3; i++)
604 I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]); 630 I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]);
605 631
606 i915_restore_vga(dev);
607
608 return 0; 632 return 0;
609} 633}
610 634
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
new file mode 100644
index 000000000000..5567a40816f3
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -0,0 +1,315 @@
1#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
2#define _I915_TRACE_H_
3
4#include <linux/stringify.h>
5#include <linux/types.h>
6#include <linux/tracepoint.h>
7
8#include <drm/drmP.h>
9
10#undef TRACE_SYSTEM
11#define TRACE_SYSTEM i915
12#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
13#define TRACE_INCLUDE_FILE i915_trace
14
15/* object tracking */
16
17TRACE_EVENT(i915_gem_object_create,
18
19 TP_PROTO(struct drm_gem_object *obj),
20
21 TP_ARGS(obj),
22
23 TP_STRUCT__entry(
24 __field(struct drm_gem_object *, obj)
25 __field(u32, size)
26 ),
27
28 TP_fast_assign(
29 __entry->obj = obj;
30 __entry->size = obj->size;
31 ),
32
33 TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
34);
35
36TRACE_EVENT(i915_gem_object_bind,
37
38 TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset),
39
40 TP_ARGS(obj, gtt_offset),
41
42 TP_STRUCT__entry(
43 __field(struct drm_gem_object *, obj)
44 __field(u32, gtt_offset)
45 ),
46
47 TP_fast_assign(
48 __entry->obj = obj;
49 __entry->gtt_offset = gtt_offset;
50 ),
51
52 TP_printk("obj=%p, gtt_offset=%08x",
53 __entry->obj, __entry->gtt_offset)
54);
55
56TRACE_EVENT(i915_gem_object_clflush,
57
58 TP_PROTO(struct drm_gem_object *obj),
59
60 TP_ARGS(obj),
61
62 TP_STRUCT__entry(
63 __field(struct drm_gem_object *, obj)
64 ),
65
66 TP_fast_assign(
67 __entry->obj = obj;
68 ),
69
70 TP_printk("obj=%p", __entry->obj)
71);
72
73TRACE_EVENT(i915_gem_object_change_domain,
74
75 TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain),
76
77 TP_ARGS(obj, old_read_domains, old_write_domain),
78
79 TP_STRUCT__entry(
80 __field(struct drm_gem_object *, obj)
81 __field(u32, read_domains)
82 __field(u32, write_domain)
83 ),
84
85 TP_fast_assign(
86 __entry->obj = obj;
87 __entry->read_domains = obj->read_domains | (old_read_domains << 16);
88 __entry->write_domain = obj->write_domain | (old_write_domain << 16);
89 ),
90
91 TP_printk("obj=%p, read=%04x, write=%04x",
92 __entry->obj,
93 __entry->read_domains, __entry->write_domain)
94);
95
96TRACE_EVENT(i915_gem_object_get_fence,
97
98 TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode),
99
100 TP_ARGS(obj, fence, tiling_mode),
101
102 TP_STRUCT__entry(
103 __field(struct drm_gem_object *, obj)
104 __field(int, fence)
105 __field(int, tiling_mode)
106 ),
107
108 TP_fast_assign(
109 __entry->obj = obj;
110 __entry->fence = fence;
111 __entry->tiling_mode = tiling_mode;
112 ),
113
114 TP_printk("obj=%p, fence=%d, tiling=%d",
115 __entry->obj, __entry->fence, __entry->tiling_mode)
116);
117
118TRACE_EVENT(i915_gem_object_unbind,
119
120 TP_PROTO(struct drm_gem_object *obj),
121
122 TP_ARGS(obj),
123
124 TP_STRUCT__entry(
125 __field(struct drm_gem_object *, obj)
126 ),
127
128 TP_fast_assign(
129 __entry->obj = obj;
130 ),
131
132 TP_printk("obj=%p", __entry->obj)
133);
134
135TRACE_EVENT(i915_gem_object_destroy,
136
137 TP_PROTO(struct drm_gem_object *obj),
138
139 TP_ARGS(obj),
140
141 TP_STRUCT__entry(
142 __field(struct drm_gem_object *, obj)
143 ),
144
145 TP_fast_assign(
146 __entry->obj = obj;
147 ),
148
149 TP_printk("obj=%p", __entry->obj)
150);
151
152/* batch tracing */
153
154TRACE_EVENT(i915_gem_request_submit,
155
156 TP_PROTO(struct drm_device *dev, u32 seqno),
157
158 TP_ARGS(dev, seqno),
159
160 TP_STRUCT__entry(
161 __field(struct drm_device *, dev)
162 __field(u32, seqno)
163 ),
164
165 TP_fast_assign(
166 __entry->dev = dev;
167 __entry->seqno = seqno;
168 ),
169
170 TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
171);
172
173TRACE_EVENT(i915_gem_request_flush,
174
175 TP_PROTO(struct drm_device *dev, u32 seqno,
176 u32 flush_domains, u32 invalidate_domains),
177
178 TP_ARGS(dev, seqno, flush_domains, invalidate_domains),
179
180 TP_STRUCT__entry(
181 __field(struct drm_device *, dev)
182 __field(u32, seqno)
183 __field(u32, flush_domains)
184 __field(u32, invalidate_domains)
185 ),
186
187 TP_fast_assign(
188 __entry->dev = dev;
189 __entry->seqno = seqno;
190 __entry->flush_domains = flush_domains;
191 __entry->invalidate_domains = invalidate_domains;
192 ),
193
194 TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x",
195 __entry->dev, __entry->seqno,
196 __entry->flush_domains, __entry->invalidate_domains)
197);
198
199
200TRACE_EVENT(i915_gem_request_complete,
201
202 TP_PROTO(struct drm_device *dev, u32 seqno),
203
204 TP_ARGS(dev, seqno),
205
206 TP_STRUCT__entry(
207 __field(struct drm_device *, dev)
208 __field(u32, seqno)
209 ),
210
211 TP_fast_assign(
212 __entry->dev = dev;
213 __entry->seqno = seqno;
214 ),
215
216 TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
217);
218
219TRACE_EVENT(i915_gem_request_retire,
220
221 TP_PROTO(struct drm_device *dev, u32 seqno),
222
223 TP_ARGS(dev, seqno),
224
225 TP_STRUCT__entry(
226 __field(struct drm_device *, dev)
227 __field(u32, seqno)
228 ),
229
230 TP_fast_assign(
231 __entry->dev = dev;
232 __entry->seqno = seqno;
233 ),
234
235 TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
236);
237
238TRACE_EVENT(i915_gem_request_wait_begin,
239
240 TP_PROTO(struct drm_device *dev, u32 seqno),
241
242 TP_ARGS(dev, seqno),
243
244 TP_STRUCT__entry(
245 __field(struct drm_device *, dev)
246 __field(u32, seqno)
247 ),
248
249 TP_fast_assign(
250 __entry->dev = dev;
251 __entry->seqno = seqno;
252 ),
253
254 TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
255);
256
257TRACE_EVENT(i915_gem_request_wait_end,
258
259 TP_PROTO(struct drm_device *dev, u32 seqno),
260
261 TP_ARGS(dev, seqno),
262
263 TP_STRUCT__entry(
264 __field(struct drm_device *, dev)
265 __field(u32, seqno)
266 ),
267
268 TP_fast_assign(
269 __entry->dev = dev;
270 __entry->seqno = seqno;
271 ),
272
273 TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
274);
275
276TRACE_EVENT(i915_ring_wait_begin,
277
278 TP_PROTO(struct drm_device *dev),
279
280 TP_ARGS(dev),
281
282 TP_STRUCT__entry(
283 __field(struct drm_device *, dev)
284 ),
285
286 TP_fast_assign(
287 __entry->dev = dev;
288 ),
289
290 TP_printk("dev=%p", __entry->dev)
291);
292
293TRACE_EVENT(i915_ring_wait_end,
294
295 TP_PROTO(struct drm_device *dev),
296
297 TP_ARGS(dev),
298
299 TP_STRUCT__entry(
300 __field(struct drm_device *, dev)
301 ),
302
303 TP_fast_assign(
304 __entry->dev = dev;
305 ),
306
307 TP_printk("dev=%p", __entry->dev)
308);
309
310#endif /* _I915_TRACE_H_ */
311
312/* This part must be outside protection */
313#undef TRACE_INCLUDE_PATH
314#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915
315#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c
new file mode 100644
index 000000000000..ead876eb6ea0
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace_points.c
@@ -0,0 +1,11 @@
1/*
2 * Copyright © 2009 Intel Corporation
3 *
4 * Authors:
5 * Chris Wilson <chris@chris-wilson.co.uk>
6 */
7
8#include "i915_drv.h"
9
10#define CREATE_TRACE_POINTS
11#include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 1e28c1652fd0..4337414846b6 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -217,6 +217,9 @@ parse_general_features(struct drm_i915_private *dev_priv,
217 if (IS_I85X(dev_priv->dev)) 217 if (IS_I85X(dev_priv->dev))
218 dev_priv->lvds_ssc_freq = 218 dev_priv->lvds_ssc_freq =
219 general->ssc_freq ? 66 : 48; 219 general->ssc_freq ? 66 : 48;
220 else if (IS_IGDNG(dev_priv->dev))
221 dev_priv->lvds_ssc_freq =
222 general->ssc_freq ? 100 : 120;
220 else 223 else
221 dev_priv->lvds_ssc_freq = 224 dev_priv->lvds_ssc_freq =
222 general->ssc_freq ? 100 : 96; 225 general->ssc_freq ? 100 : 96;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 88814fa2dfd2..212e22740fc1 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -179,13 +179,10 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
179{ 179{
180 struct drm_device *dev = connector->dev; 180 struct drm_device *dev = connector->dev;
181 struct drm_i915_private *dev_priv = dev->dev_private; 181 struct drm_i915_private *dev_priv = dev->dev_private;
182 u32 adpa, temp; 182 u32 adpa;
183 bool ret; 183 bool ret;
184 184
185 temp = adpa = I915_READ(PCH_ADPA); 185 adpa = I915_READ(PCH_ADPA);
186
187 adpa &= ~ADPA_DAC_ENABLE;
188 I915_WRITE(PCH_ADPA, adpa);
189 186
190 adpa &= ~ADPA_CRT_HOTPLUG_MASK; 187 adpa &= ~ADPA_CRT_HOTPLUG_MASK;
191 188
@@ -212,8 +209,6 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
212 else 209 else
213 ret = false; 210 ret = false;
214 211
215 /* restore origin register */
216 I915_WRITE(PCH_ADPA, temp);
217 return ret; 212 return ret;
218} 213}
219 214
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0227b1652906..93ff6c03733e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -24,6 +24,8 @@
24 * Eric Anholt <eric@anholt.net> 24 * Eric Anholt <eric@anholt.net>
25 */ 25 */
26 26
27#include <linux/module.h>
28#include <linux/input.h>
27#include <linux/i2c.h> 29#include <linux/i2c.h>
28#include <linux/kernel.h> 30#include <linux/kernel.h>
29#include "drmP.h" 31#include "drmP.h"
@@ -875,7 +877,7 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
875 refclk, best_clock); 877 refclk, best_clock);
876 878
877 if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { 879 if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
878 if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) == 880 if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) ==
879 LVDS_CLKB_POWER_UP) 881 LVDS_CLKB_POWER_UP)
880 clock.p2 = limit->p2.p2_fast; 882 clock.p2 = limit->p2.p2_fast;
881 else 883 else
@@ -952,6 +954,241 @@ intel_wait_for_vblank(struct drm_device *dev)
952 mdelay(20); 954 mdelay(20);
953} 955}
954 956
957/* Parameters have changed, update FBC info */
958static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
959{
960 struct drm_device *dev = crtc->dev;
961 struct drm_i915_private *dev_priv = dev->dev_private;
962 struct drm_framebuffer *fb = crtc->fb;
963 struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
964 struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
965 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
966 int plane, i;
967 u32 fbc_ctl, fbc_ctl2;
968
969 dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
970
971 if (fb->pitch < dev_priv->cfb_pitch)
972 dev_priv->cfb_pitch = fb->pitch;
973
974 /* FBC_CTL wants 64B units */
975 dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
976 dev_priv->cfb_fence = obj_priv->fence_reg;
977 dev_priv->cfb_plane = intel_crtc->plane;
978 plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
979
980 /* Clear old tags */
981 for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
982 I915_WRITE(FBC_TAG + (i * 4), 0);
983
984 /* Set it up... */
985 fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane;
986 if (obj_priv->tiling_mode != I915_TILING_NONE)
987 fbc_ctl2 |= FBC_CTL_CPU_FENCE;
988 I915_WRITE(FBC_CONTROL2, fbc_ctl2);
989 I915_WRITE(FBC_FENCE_OFF, crtc->y);
990
991 /* enable it... */
992 fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
993 fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
994 fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
995 if (obj_priv->tiling_mode != I915_TILING_NONE)
996 fbc_ctl |= dev_priv->cfb_fence;
997 I915_WRITE(FBC_CONTROL, fbc_ctl);
998
999 DRM_DEBUG("enabled FBC, pitch %ld, yoff %d, plane %d, ",
1000 dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane);
1001}
1002
1003void i8xx_disable_fbc(struct drm_device *dev)
1004{
1005 struct drm_i915_private *dev_priv = dev->dev_private;
1006 u32 fbc_ctl;
1007
1008 if (!I915_HAS_FBC(dev))
1009 return;
1010
1011 /* Disable compression */
1012 fbc_ctl = I915_READ(FBC_CONTROL);
1013 fbc_ctl &= ~FBC_CTL_EN;
1014 I915_WRITE(FBC_CONTROL, fbc_ctl);
1015
1016 /* Wait for compressing bit to clear */
1017 while (I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING)
1018 ; /* nothing */
1019
1020 intel_wait_for_vblank(dev);
1021
1022 DRM_DEBUG("disabled FBC\n");
1023}
1024
1025static bool i8xx_fbc_enabled(struct drm_crtc *crtc)
1026{
1027 struct drm_device *dev = crtc->dev;
1028 struct drm_i915_private *dev_priv = dev->dev_private;
1029
1030 return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
1031}
1032
1033static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
1034{
1035 struct drm_device *dev = crtc->dev;
1036 struct drm_i915_private *dev_priv = dev->dev_private;
1037 struct drm_framebuffer *fb = crtc->fb;
1038 struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
1039 struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
1040 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1041 int plane = (intel_crtc->plane == 0 ? DPFC_CTL_PLANEA :
1042 DPFC_CTL_PLANEB);
1043 unsigned long stall_watermark = 200;
1044 u32 dpfc_ctl;
1045
1046 dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
1047 dev_priv->cfb_fence = obj_priv->fence_reg;
1048 dev_priv->cfb_plane = intel_crtc->plane;
1049
1050 dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
1051 if (obj_priv->tiling_mode != I915_TILING_NONE) {
1052 dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence;
1053 I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
1054 } else {
1055 I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY);
1056 }
1057
1058 I915_WRITE(DPFC_CONTROL, dpfc_ctl);
1059 I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
1060 (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
1061 (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
1062 I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
1063
1064 /* enable it... */
1065 I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
1066
1067 DRM_DEBUG("enabled fbc on plane %d\n", intel_crtc->plane);
1068}
1069
1070void g4x_disable_fbc(struct drm_device *dev)
1071{
1072 struct drm_i915_private *dev_priv = dev->dev_private;
1073 u32 dpfc_ctl;
1074
1075 /* Disable compression */
1076 dpfc_ctl = I915_READ(DPFC_CONTROL);
1077 dpfc_ctl &= ~DPFC_CTL_EN;
1078 I915_WRITE(DPFC_CONTROL, dpfc_ctl);
1079 intel_wait_for_vblank(dev);
1080
1081 DRM_DEBUG("disabled FBC\n");
1082}
1083
1084static bool g4x_fbc_enabled(struct drm_crtc *crtc)
1085{
1086 struct drm_device *dev = crtc->dev;
1087 struct drm_i915_private *dev_priv = dev->dev_private;
1088
1089 return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
1090}
1091
1092/**
1093 * intel_update_fbc - enable/disable FBC as needed
1094 * @crtc: CRTC to point the compressor at
1095 * @mode: mode in use
1096 *
1097 * Set up the framebuffer compression hardware at mode set time. We
1098 * enable it if possible:
1099 * - plane A only (on pre-965)
1100 * - no pixel mulitply/line duplication
1101 * - no alpha buffer discard
1102 * - no dual wide
1103 * - framebuffer <= 2048 in width, 1536 in height
1104 *
1105 * We can't assume that any compression will take place (worst case),
1106 * so the compressed buffer has to be the same size as the uncompressed
1107 * one. It also must reside (along with the line length buffer) in
1108 * stolen memory.
1109 *
1110 * We need to enable/disable FBC on a global basis.
1111 */
1112static void intel_update_fbc(struct drm_crtc *crtc,
1113 struct drm_display_mode *mode)
1114{
1115 struct drm_device *dev = crtc->dev;
1116 struct drm_i915_private *dev_priv = dev->dev_private;
1117 struct drm_framebuffer *fb = crtc->fb;
1118 struct intel_framebuffer *intel_fb;
1119 struct drm_i915_gem_object *obj_priv;
1120 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1121 int plane = intel_crtc->plane;
1122
1123 if (!i915_powersave)
1124 return;
1125
1126 if (!dev_priv->display.fbc_enabled ||
1127 !dev_priv->display.enable_fbc ||
1128 !dev_priv->display.disable_fbc)
1129 return;
1130
1131 if (!crtc->fb)
1132 return;
1133
1134 intel_fb = to_intel_framebuffer(fb);
1135 obj_priv = intel_fb->obj->driver_private;
1136
1137 /*
1138 * If FBC is already on, we just have to verify that we can
1139 * keep it that way...
1140 * Need to disable if:
1141 * - changing FBC params (stride, fence, mode)
1142 * - new fb is too large to fit in compressed buffer
1143 * - going to an unsupported config (interlace, pixel multiply, etc.)
1144 */
1145 if (intel_fb->obj->size > dev_priv->cfb_size) {
1146 DRM_DEBUG("framebuffer too large, disabling compression\n");
1147 goto out_disable;
1148 }
1149 if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
1150 (mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
1151 DRM_DEBUG("mode incompatible with compression, disabling\n");
1152 goto out_disable;
1153 }
1154 if ((mode->hdisplay > 2048) ||
1155 (mode->vdisplay > 1536)) {
1156 DRM_DEBUG("mode too large for compression, disabling\n");
1157 goto out_disable;
1158 }
1159 if ((IS_I915GM(dev) || IS_I945GM(dev)) && plane != 0) {
1160 DRM_DEBUG("plane not 0, disabling compression\n");
1161 goto out_disable;
1162 }
1163 if (obj_priv->tiling_mode != I915_TILING_X) {
1164 DRM_DEBUG("framebuffer not tiled, disabling compression\n");
1165 goto out_disable;
1166 }
1167
1168 if (dev_priv->display.fbc_enabled(crtc)) {
1169 /* We can re-enable it in this case, but need to update pitch */
1170 if (fb->pitch > dev_priv->cfb_pitch)
1171 dev_priv->display.disable_fbc(dev);
1172 if (obj_priv->fence_reg != dev_priv->cfb_fence)
1173 dev_priv->display.disable_fbc(dev);
1174 if (plane != dev_priv->cfb_plane)
1175 dev_priv->display.disable_fbc(dev);
1176 }
1177
1178 if (!dev_priv->display.fbc_enabled(crtc)) {
1179 /* Now try to turn it back on if possible */
1180 dev_priv->display.enable_fbc(crtc, 500);
1181 }
1182
1183 return;
1184
1185out_disable:
1186 DRM_DEBUG("unsupported config, disabling FBC\n");
1187 /* Multiple disables should be harmless */
1188 if (dev_priv->display.fbc_enabled(crtc))
1189 dev_priv->display.disable_fbc(dev);
1190}
1191
955static int 1192static int
956intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, 1193intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
957 struct drm_framebuffer *old_fb) 1194 struct drm_framebuffer *old_fb)
@@ -964,12 +1201,13 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
964 struct drm_i915_gem_object *obj_priv; 1201 struct drm_i915_gem_object *obj_priv;
965 struct drm_gem_object *obj; 1202 struct drm_gem_object *obj;
966 int pipe = intel_crtc->pipe; 1203 int pipe = intel_crtc->pipe;
1204 int plane = intel_crtc->plane;
967 unsigned long Start, Offset; 1205 unsigned long Start, Offset;
968 int dspbase = (pipe == 0 ? DSPAADDR : DSPBADDR); 1206 int dspbase = (plane == 0 ? DSPAADDR : DSPBADDR);
969 int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF); 1207 int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF);
970 int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE; 1208 int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE;
971 int dsptileoff = (pipe == 0 ? DSPATILEOFF : DSPBTILEOFF); 1209 int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF);
972 int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; 1210 int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
973 u32 dspcntr, alignment; 1211 u32 dspcntr, alignment;
974 int ret; 1212 int ret;
975 1213
@@ -979,12 +1217,12 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
979 return 0; 1217 return 0;
980 } 1218 }
981 1219
982 switch (pipe) { 1220 switch (plane) {
983 case 0: 1221 case 0:
984 case 1: 1222 case 1:
985 break; 1223 break;
986 default: 1224 default:
987 DRM_ERROR("Can't update pipe %d in SAREA\n", pipe); 1225 DRM_ERROR("Can't update plane %d in SAREA\n", plane);
988 return -EINVAL; 1226 return -EINVAL;
989 } 1227 }
990 1228
@@ -1086,6 +1324,9 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
1086 I915_READ(dspbase); 1324 I915_READ(dspbase);
1087 } 1325 }
1088 1326
1327 if ((IS_I965G(dev) || plane == 0))
1328 intel_update_fbc(crtc, &crtc->mode);
1329
1089 intel_wait_for_vblank(dev); 1330 intel_wait_for_vblank(dev);
1090 1331
1091 if (old_fb) { 1332 if (old_fb) {
@@ -1217,6 +1458,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
1217 int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF; 1458 int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF;
1218 int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1; 1459 int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1;
1219 int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ; 1460 int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ;
1461 int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS;
1220 int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; 1462 int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
1221 int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; 1463 int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
1222 int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B; 1464 int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B;
@@ -1268,6 +1510,19 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
1268 } 1510 }
1269 } 1511 }
1270 1512
1513 /* Enable panel fitting for LVDS */
1514 if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
1515 temp = I915_READ(pf_ctl_reg);
1516 I915_WRITE(pf_ctl_reg, temp | PF_ENABLE);
1517
1518 /* currently full aspect */
1519 I915_WRITE(pf_win_pos, 0);
1520
1521 I915_WRITE(pf_win_size,
1522 (dev_priv->panel_fixed_mode->hdisplay << 16) |
1523 (dev_priv->panel_fixed_mode->vdisplay));
1524 }
1525
1271 /* Enable CPU pipe */ 1526 /* Enable CPU pipe */
1272 temp = I915_READ(pipeconf_reg); 1527 temp = I915_READ(pipeconf_reg);
1273 if ((temp & PIPEACONF_ENABLE) == 0) { 1528 if ((temp & PIPEACONF_ENABLE) == 0) {
@@ -1532,9 +1787,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
1532 struct drm_i915_private *dev_priv = dev->dev_private; 1787 struct drm_i915_private *dev_priv = dev->dev_private;
1533 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1788 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1534 int pipe = intel_crtc->pipe; 1789 int pipe = intel_crtc->pipe;
1790 int plane = intel_crtc->plane;
1535 int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B; 1791 int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
1536 int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; 1792 int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
1537 int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR; 1793 int dspbase_reg = (plane == 0) ? DSPAADDR : DSPBADDR;
1538 int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF; 1794 int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
1539 u32 temp; 1795 u32 temp;
1540 1796
@@ -1577,6 +1833,9 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
1577 1833
1578 intel_crtc_load_lut(crtc); 1834 intel_crtc_load_lut(crtc);
1579 1835
1836 if ((IS_I965G(dev) || plane == 0))
1837 intel_update_fbc(crtc, &crtc->mode);
1838
1580 /* Give the overlay scaler a chance to enable if it's on this pipe */ 1839 /* Give the overlay scaler a chance to enable if it's on this pipe */
1581 //intel_crtc_dpms_video(crtc, true); TODO 1840 //intel_crtc_dpms_video(crtc, true); TODO
1582 intel_update_watermarks(dev); 1841 intel_update_watermarks(dev);
@@ -1586,6 +1845,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
1586 /* Give the overlay scaler a chance to disable if it's on this pipe */ 1845 /* Give the overlay scaler a chance to disable if it's on this pipe */
1587 //intel_crtc_dpms_video(crtc, FALSE); TODO 1846 //intel_crtc_dpms_video(crtc, FALSE); TODO
1588 1847
1848 if (dev_priv->cfb_plane == plane &&
1849 dev_priv->display.disable_fbc)
1850 dev_priv->display.disable_fbc(dev);
1851
1589 /* Disable the VGA plane that we never use */ 1852 /* Disable the VGA plane that we never use */
1590 i915_disable_vga(dev); 1853 i915_disable_vga(dev);
1591 1854
@@ -1634,15 +1897,13 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
1634static void intel_crtc_dpms(struct drm_crtc *crtc, int mode) 1897static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
1635{ 1898{
1636 struct drm_device *dev = crtc->dev; 1899 struct drm_device *dev = crtc->dev;
1900 struct drm_i915_private *dev_priv = dev->dev_private;
1637 struct drm_i915_master_private *master_priv; 1901 struct drm_i915_master_private *master_priv;
1638 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1902 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1639 int pipe = intel_crtc->pipe; 1903 int pipe = intel_crtc->pipe;
1640 bool enabled; 1904 bool enabled;
1641 1905
1642 if (IS_IGDNG(dev)) 1906 dev_priv->display.dpms(crtc, mode);
1643 igdng_crtc_dpms(crtc, mode);
1644 else
1645 i9xx_crtc_dpms(crtc, mode);
1646 1907
1647 intel_crtc->dpms_mode = mode; 1908 intel_crtc->dpms_mode = mode;
1648 1909
@@ -1709,56 +1970,68 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc,
1709 return true; 1970 return true;
1710} 1971}
1711 1972
1973static int i945_get_display_clock_speed(struct drm_device *dev)
1974{
1975 return 400000;
1976}
1712 1977
1713/** Returns the core display clock speed for i830 - i945 */ 1978static int i915_get_display_clock_speed(struct drm_device *dev)
1714static int intel_get_core_clock_speed(struct drm_device *dev)
1715{ 1979{
1980 return 333000;
1981}
1716 1982
1717 /* Core clock values taken from the published datasheets. 1983static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
1718 * The 830 may go up to 166 Mhz, which we should check. 1984{
1719 */ 1985 return 200000;
1720 if (IS_I945G(dev)) 1986}
1721 return 400000;
1722 else if (IS_I915G(dev))
1723 return 333000;
1724 else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
1725 return 200000;
1726 else if (IS_I915GM(dev)) {
1727 u16 gcfgc = 0;
1728 1987
1729 pci_read_config_word(dev->pdev, GCFGC, &gcfgc); 1988static int i915gm_get_display_clock_speed(struct drm_device *dev)
1989{
1990 u16 gcfgc = 0;
1730 1991
1731 if (gcfgc & GC_LOW_FREQUENCY_ENABLE) 1992 pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
1732 return 133000; 1993
1733 else { 1994 if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
1734 switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { 1995 return 133000;
1735 case GC_DISPLAY_CLOCK_333_MHZ: 1996 else {
1736 return 333000; 1997 switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
1737 default: 1998 case GC_DISPLAY_CLOCK_333_MHZ:
1738 case GC_DISPLAY_CLOCK_190_200_MHZ: 1999 return 333000;
1739 return 190000; 2000 default:
1740 } 2001 case GC_DISPLAY_CLOCK_190_200_MHZ:
1741 } 2002 return 190000;
1742 } else if (IS_I865G(dev))
1743 return 266000;
1744 else if (IS_I855(dev)) {
1745 u16 hpllcc = 0;
1746 /* Assume that the hardware is in the high speed state. This
1747 * should be the default.
1748 */
1749 switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
1750 case GC_CLOCK_133_200:
1751 case GC_CLOCK_100_200:
1752 return 200000;
1753 case GC_CLOCK_166_250:
1754 return 250000;
1755 case GC_CLOCK_100_133:
1756 return 133000;
1757 } 2003 }
1758 } else /* 852, 830 */ 2004 }
2005}
2006
2007static int i865_get_display_clock_speed(struct drm_device *dev)
2008{
2009 return 266000;
2010}
2011
2012static int i855_get_display_clock_speed(struct drm_device *dev)
2013{
2014 u16 hpllcc = 0;
2015 /* Assume that the hardware is in the high speed state. This
2016 * should be the default.
2017 */
2018 switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
2019 case GC_CLOCK_133_200:
2020 case GC_CLOCK_100_200:
2021 return 200000;
2022 case GC_CLOCK_166_250:
2023 return 250000;
2024 case GC_CLOCK_100_133:
1759 return 133000; 2025 return 133000;
2026 }
2027
2028 /* Shouldn't happen */
2029 return 0;
2030}
1760 2031
1761 return 0; /* Silence gcc warning */ 2032static int i830_get_display_clock_speed(struct drm_device *dev)
2033{
2034 return 133000;
1762} 2035}
1763 2036
1764/** 2037/**
@@ -1921,7 +2194,14 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
1921{ 2194{
1922 long entries_required, wm_size; 2195 long entries_required, wm_size;
1923 2196
1924 entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000; 2197 /*
2198 * Note: we need to make sure we don't overflow for various clock &
2199 * latency values.
2200 * clocks go from a few thousand to several hundred thousand.
2201 * latency is usually a few thousand
2202 */
2203 entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
2204 1000;
1925 entries_required /= wm->cacheline_size; 2205 entries_required /= wm->cacheline_size;
1926 2206
1927 DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required); 2207 DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required);
@@ -1986,14 +2266,13 @@ static struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, int fsb,
1986 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { 2266 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
1987 latency = &cxsr_latency_table[i]; 2267 latency = &cxsr_latency_table[i];
1988 if (is_desktop == latency->is_desktop && 2268 if (is_desktop == latency->is_desktop &&
1989 fsb == latency->fsb_freq && mem == latency->mem_freq) 2269 fsb == latency->fsb_freq && mem == latency->mem_freq)
1990 break; 2270 return latency;
1991 } 2271 }
1992 if (i >= ARRAY_SIZE(cxsr_latency_table)) { 2272
1993 DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n"); 2273 DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
1994 return NULL; 2274
1995 } 2275 return NULL;
1996 return latency;
1997} 2276}
1998 2277
1999static void igd_disable_cxsr(struct drm_device *dev) 2278static void igd_disable_cxsr(struct drm_device *dev)
@@ -2084,32 +2363,36 @@ static void igd_enable_cxsr(struct drm_device *dev, unsigned long clock,
2084 */ 2363 */
2085const static int latency_ns = 5000; 2364const static int latency_ns = 5000;
2086 2365
2087static int intel_get_fifo_size(struct drm_device *dev, int plane) 2366static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
2088{ 2367{
2089 struct drm_i915_private *dev_priv = dev->dev_private; 2368 struct drm_i915_private *dev_priv = dev->dev_private;
2090 uint32_t dsparb = I915_READ(DSPARB); 2369 uint32_t dsparb = I915_READ(DSPARB);
2091 int size; 2370 int size;
2092 2371
2093 if (IS_I9XX(dev)) { 2372 if (plane == 0)
2094 if (plane == 0)
2095 size = dsparb & 0x7f;
2096 else
2097 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
2098 (dsparb & 0x7f);
2099 } else if (IS_I85X(dev)) {
2100 if (plane == 0)
2101 size = dsparb & 0x1ff;
2102 else
2103 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
2104 (dsparb & 0x1ff);
2105 size >>= 1; /* Convert to cachelines */
2106 } else if (IS_845G(dev)) {
2107 size = dsparb & 0x7f; 2373 size = dsparb & 0x7f;
2108 size >>= 2; /* Convert to cachelines */ 2374 else
2109 } else { 2375 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
2110 size = dsparb & 0x7f; 2376 (dsparb & 0x7f);
2111 size >>= 1; /* Convert to cachelines */ 2377
2112 } 2378 DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
2379 size);
2380
2381 return size;
2382}
2383
2384static int i85x_get_fifo_size(struct drm_device *dev, int plane)
2385{
2386 struct drm_i915_private *dev_priv = dev->dev_private;
2387 uint32_t dsparb = I915_READ(DSPARB);
2388 int size;
2389
2390 if (plane == 0)
2391 size = dsparb & 0x1ff;
2392 else
2393 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
2394 (dsparb & 0x1ff);
2395 size >>= 1; /* Convert to cachelines */
2113 2396
2114 DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", 2397 DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
2115 size); 2398 size);
@@ -2117,7 +2400,38 @@ static int intel_get_fifo_size(struct drm_device *dev, int plane)
2117 return size; 2400 return size;
2118} 2401}
2119 2402
2120static void g4x_update_wm(struct drm_device *dev) 2403static int i845_get_fifo_size(struct drm_device *dev, int plane)
2404{
2405 struct drm_i915_private *dev_priv = dev->dev_private;
2406 uint32_t dsparb = I915_READ(DSPARB);
2407 int size;
2408
2409 size = dsparb & 0x7f;
2410 size >>= 2; /* Convert to cachelines */
2411
2412 DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
2413 size);
2414
2415 return size;
2416}
2417
2418static int i830_get_fifo_size(struct drm_device *dev, int plane)
2419{
2420 struct drm_i915_private *dev_priv = dev->dev_private;
2421 uint32_t dsparb = I915_READ(DSPARB);
2422 int size;
2423
2424 size = dsparb & 0x7f;
2425 size >>= 1; /* Convert to cachelines */
2426
2427 DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
2428 size);
2429
2430 return size;
2431}
2432
2433static void g4x_update_wm(struct drm_device *dev, int unused, int unused2,
2434 int unused3, int unused4)
2121{ 2435{
2122 struct drm_i915_private *dev_priv = dev->dev_private; 2436 struct drm_i915_private *dev_priv = dev->dev_private;
2123 u32 fw_blc_self = I915_READ(FW_BLC_SELF); 2437 u32 fw_blc_self = I915_READ(FW_BLC_SELF);
@@ -2129,7 +2443,8 @@ static void g4x_update_wm(struct drm_device *dev)
2129 I915_WRITE(FW_BLC_SELF, fw_blc_self); 2443 I915_WRITE(FW_BLC_SELF, fw_blc_self);
2130} 2444}
2131 2445
2132static void i965_update_wm(struct drm_device *dev) 2446static void i965_update_wm(struct drm_device *dev, int unused, int unused2,
2447 int unused3, int unused4)
2133{ 2448{
2134 struct drm_i915_private *dev_priv = dev->dev_private; 2449 struct drm_i915_private *dev_priv = dev->dev_private;
2135 2450
@@ -2165,8 +2480,8 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
2165 cacheline_size = planea_params.cacheline_size; 2480 cacheline_size = planea_params.cacheline_size;
2166 2481
2167 /* Update per-plane FIFO sizes */ 2482 /* Update per-plane FIFO sizes */
2168 planea_params.fifo_size = intel_get_fifo_size(dev, 0); 2483 planea_params.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
2169 planeb_params.fifo_size = intel_get_fifo_size(dev, 1); 2484 planeb_params.fifo_size = dev_priv->display.get_fifo_size(dev, 1);
2170 2485
2171 planea_wm = intel_calculate_wm(planea_clock, &planea_params, 2486 planea_wm = intel_calculate_wm(planea_clock, &planea_params,
2172 pixel_size, latency_ns); 2487 pixel_size, latency_ns);
@@ -2213,14 +2528,14 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
2213 I915_WRITE(FW_BLC2, fwater_hi); 2528 I915_WRITE(FW_BLC2, fwater_hi);
2214} 2529}
2215 2530
2216static void i830_update_wm(struct drm_device *dev, int planea_clock, 2531static void i830_update_wm(struct drm_device *dev, int planea_clock, int unused,
2217 int pixel_size) 2532 int unused2, int pixel_size)
2218{ 2533{
2219 struct drm_i915_private *dev_priv = dev->dev_private; 2534 struct drm_i915_private *dev_priv = dev->dev_private;
2220 uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff; 2535 uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2221 int planea_wm; 2536 int planea_wm;
2222 2537
2223 i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0); 2538 i830_wm_info.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
2224 2539
2225 planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info, 2540 planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info,
2226 pixel_size, latency_ns); 2541 pixel_size, latency_ns);
@@ -2264,6 +2579,7 @@ static void i830_update_wm(struct drm_device *dev, int planea_clock,
2264 */ 2579 */
2265static void intel_update_watermarks(struct drm_device *dev) 2580static void intel_update_watermarks(struct drm_device *dev)
2266{ 2581{
2582 struct drm_i915_private *dev_priv = dev->dev_private;
2267 struct drm_crtc *crtc; 2583 struct drm_crtc *crtc;
2268 struct intel_crtc *intel_crtc; 2584 struct intel_crtc *intel_crtc;
2269 int sr_hdisplay = 0; 2585 int sr_hdisplay = 0;
@@ -2302,15 +2618,8 @@ static void intel_update_watermarks(struct drm_device *dev)
2302 else if (IS_IGD(dev)) 2618 else if (IS_IGD(dev))
2303 igd_disable_cxsr(dev); 2619 igd_disable_cxsr(dev);
2304 2620
2305 if (IS_G4X(dev)) 2621 dev_priv->display.update_wm(dev, planea_clock, planeb_clock,
2306 g4x_update_wm(dev); 2622 sr_hdisplay, pixel_size);
2307 else if (IS_I965G(dev))
2308 i965_update_wm(dev);
2309 else if (IS_I9XX(dev) || IS_MOBILE(dev))
2310 i9xx_update_wm(dev, planea_clock, planeb_clock, sr_hdisplay,
2311 pixel_size);
2312 else
2313 i830_update_wm(dev, planea_clock, pixel_size);
2314} 2623}
2315 2624
2316static int intel_crtc_mode_set(struct drm_crtc *crtc, 2625static int intel_crtc_mode_set(struct drm_crtc *crtc,
@@ -2323,10 +2632,11 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
2323 struct drm_i915_private *dev_priv = dev->dev_private; 2632 struct drm_i915_private *dev_priv = dev->dev_private;
2324 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 2633 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2325 int pipe = intel_crtc->pipe; 2634 int pipe = intel_crtc->pipe;
2635 int plane = intel_crtc->plane;
2326 int fp_reg = (pipe == 0) ? FPA0 : FPB0; 2636 int fp_reg = (pipe == 0) ? FPA0 : FPB0;
2327 int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B; 2637 int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
2328 int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD; 2638 int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD;
2329 int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; 2639 int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
2330 int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF; 2640 int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
2331 int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; 2641 int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
2332 int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; 2642 int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
@@ -2334,8 +2644,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
2334 int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B; 2644 int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B;
2335 int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B; 2645 int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B;
2336 int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B; 2646 int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B;
2337 int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE; 2647 int dspsize_reg = (plane == 0) ? DSPASIZE : DSPBSIZE;
2338 int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS; 2648 int dsppos_reg = (plane == 0) ? DSPAPOS : DSPBPOS;
2339 int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC; 2649 int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC;
2340 int refclk, num_outputs = 0; 2650 int refclk, num_outputs = 0;
2341 intel_clock_t clock, reduced_clock; 2651 intel_clock_t clock, reduced_clock;
@@ -2568,7 +2878,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
2568 enable color space conversion */ 2878 enable color space conversion */
2569 if (!IS_IGDNG(dev)) { 2879 if (!IS_IGDNG(dev)) {
2570 if (pipe == 0) 2880 if (pipe == 0)
2571 dspcntr |= DISPPLANE_SEL_PIPE_A; 2881 dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
2572 else 2882 else
2573 dspcntr |= DISPPLANE_SEL_PIPE_B; 2883 dspcntr |= DISPPLANE_SEL_PIPE_B;
2574 } 2884 }
@@ -2580,7 +2890,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
2580 * XXX: No double-wide on 915GM pipe B. Is that the only reason for the 2890 * XXX: No double-wide on 915GM pipe B. Is that the only reason for the
2581 * pipe == 0 check? 2891 * pipe == 0 check?
2582 */ 2892 */
2583 if (mode->clock > intel_get_core_clock_speed(dev) * 9 / 10) 2893 if (mode->clock >
2894 dev_priv->display.get_display_clock_speed(dev) * 9 / 10)
2584 pipeconf |= PIPEACONF_DOUBLE_WIDE; 2895 pipeconf |= PIPEACONF_DOUBLE_WIDE;
2585 else 2896 else
2586 pipeconf &= ~PIPEACONF_DOUBLE_WIDE; 2897 pipeconf &= ~PIPEACONF_DOUBLE_WIDE;
@@ -2652,9 +2963,12 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
2652 udelay(150); 2963 udelay(150);
2653 2964
2654 if (IS_I965G(dev) && !IS_IGDNG(dev)) { 2965 if (IS_I965G(dev) && !IS_IGDNG(dev)) {
2655 sdvo_pixel_multiply = adjusted_mode->clock / mode->clock; 2966 if (is_sdvo) {
2656 I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) | 2967 sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
2968 I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
2657 ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT)); 2969 ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT));
2970 } else
2971 I915_WRITE(dpll_md_reg, 0);
2658 } else { 2972 } else {
2659 /* write it again -- the BIOS does, after all */ 2973 /* write it again -- the BIOS does, after all */
2660 I915_WRITE(dpll_reg, dpll); 2974 I915_WRITE(dpll_reg, dpll);
@@ -2734,6 +3048,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
2734 /* Flush the plane changes */ 3048 /* Flush the plane changes */
2735 ret = intel_pipe_set_base(crtc, x, y, old_fb); 3049 ret = intel_pipe_set_base(crtc, x, y, old_fb);
2736 3050
3051 if ((IS_I965G(dev) || plane == 0))
3052 intel_update_fbc(crtc, &crtc->mode);
3053
2737 intel_update_watermarks(dev); 3054 intel_update_watermarks(dev);
2738 3055
2739 drm_vblank_post_modeset(dev, pipe); 3056 drm_vblank_post_modeset(dev, pipe);
@@ -2778,6 +3095,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
2778 struct drm_gem_object *bo; 3095 struct drm_gem_object *bo;
2779 struct drm_i915_gem_object *obj_priv; 3096 struct drm_i915_gem_object *obj_priv;
2780 int pipe = intel_crtc->pipe; 3097 int pipe = intel_crtc->pipe;
3098 int plane = intel_crtc->plane;
2781 uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR; 3099 uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
2782 uint32_t base = (pipe == 0) ? CURABASE : CURBBASE; 3100 uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
2783 uint32_t temp = I915_READ(control); 3101 uint32_t temp = I915_READ(control);
@@ -2863,6 +3181,10 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
2863 i915_gem_object_unpin(intel_crtc->cursor_bo); 3181 i915_gem_object_unpin(intel_crtc->cursor_bo);
2864 drm_gem_object_unreference(intel_crtc->cursor_bo); 3182 drm_gem_object_unreference(intel_crtc->cursor_bo);
2865 } 3183 }
3184
3185 if ((IS_I965G(dev) || plane == 0))
3186 intel_update_fbc(crtc, &crtc->mode);
3187
2866 mutex_unlock(&dev->struct_mutex); 3188 mutex_unlock(&dev->struct_mutex);
2867 3189
2868 intel_crtc->cursor_addr = addr; 3190 intel_crtc->cursor_addr = addr;
@@ -3544,6 +3866,14 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
3544 intel_crtc->lut_b[i] = i; 3866 intel_crtc->lut_b[i] = i;
3545 } 3867 }
3546 3868
3869 /* Swap pipes & planes for FBC on pre-965 */
3870 intel_crtc->pipe = pipe;
3871 intel_crtc->plane = pipe;
3872 if (IS_MOBILE(dev) && (IS_I9XX(dev) && !IS_I965G(dev))) {
3873 DRM_DEBUG("swapping pipes & planes for FBC\n");
3874 intel_crtc->plane = ((pipe == 0) ? 1 : 0);
3875 }
3876
3547 intel_crtc->cursor_addr = 0; 3877 intel_crtc->cursor_addr = 0;
3548 intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF; 3878 intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
3549 drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); 3879 drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
@@ -3826,6 +4156,73 @@ void intel_init_clock_gating(struct drm_device *dev)
3826 } 4156 }
3827} 4157}
3828 4158
4159/* Set up chip specific display functions */
4160static void intel_init_display(struct drm_device *dev)
4161{
4162 struct drm_i915_private *dev_priv = dev->dev_private;
4163
4164 /* We always want a DPMS function */
4165 if (IS_IGDNG(dev))
4166 dev_priv->display.dpms = igdng_crtc_dpms;
4167 else
4168 dev_priv->display.dpms = i9xx_crtc_dpms;
4169
4170 /* Only mobile has FBC, leave pointers NULL for other chips */
4171 if (IS_MOBILE(dev)) {
4172 if (IS_GM45(dev)) {
4173 dev_priv->display.fbc_enabled = g4x_fbc_enabled;
4174 dev_priv->display.enable_fbc = g4x_enable_fbc;
4175 dev_priv->display.disable_fbc = g4x_disable_fbc;
4176 } else if (IS_I965GM(dev) || IS_I945GM(dev) || IS_I915GM(dev)) {
4177 dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
4178 dev_priv->display.enable_fbc = i8xx_enable_fbc;
4179 dev_priv->display.disable_fbc = i8xx_disable_fbc;
4180 }
4181 /* 855GM needs testing */
4182 }
4183
4184 /* Returns the core display clock speed */
4185 if (IS_I945G(dev))
4186 dev_priv->display.get_display_clock_speed =
4187 i945_get_display_clock_speed;
4188 else if (IS_I915G(dev))
4189 dev_priv->display.get_display_clock_speed =
4190 i915_get_display_clock_speed;
4191 else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
4192 dev_priv->display.get_display_clock_speed =
4193 i9xx_misc_get_display_clock_speed;
4194 else if (IS_I915GM(dev))
4195 dev_priv->display.get_display_clock_speed =
4196 i915gm_get_display_clock_speed;
4197 else if (IS_I865G(dev))
4198 dev_priv->display.get_display_clock_speed =
4199 i865_get_display_clock_speed;
4200 else if (IS_I855(dev))
4201 dev_priv->display.get_display_clock_speed =
4202 i855_get_display_clock_speed;
4203 else /* 852, 830 */
4204 dev_priv->display.get_display_clock_speed =
4205 i830_get_display_clock_speed;
4206
4207 /* For FIFO watermark updates */
4208 if (IS_G4X(dev))
4209 dev_priv->display.update_wm = g4x_update_wm;
4210 else if (IS_I965G(dev))
4211 dev_priv->display.update_wm = i965_update_wm;
4212 else if (IS_I9XX(dev) || IS_MOBILE(dev)) {
4213 dev_priv->display.update_wm = i9xx_update_wm;
4214 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
4215 } else {
4216 if (IS_I85X(dev))
4217 dev_priv->display.get_fifo_size = i85x_get_fifo_size;
4218 else if (IS_845G(dev))
4219 dev_priv->display.get_fifo_size = i845_get_fifo_size;
4220 else
4221 dev_priv->display.get_fifo_size = i830_get_fifo_size;
4222 dev_priv->display.update_wm = i830_update_wm;
4223 }
4224}
4225
3829void intel_modeset_init(struct drm_device *dev) 4226void intel_modeset_init(struct drm_device *dev)
3830{ 4227{
3831 struct drm_i915_private *dev_priv = dev->dev_private; 4228 struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3839,6 +4236,8 @@ void intel_modeset_init(struct drm_device *dev)
3839 4236
3840 dev->mode_config.funcs = (void *)&intel_mode_funcs; 4237 dev->mode_config.funcs = (void *)&intel_mode_funcs;
3841 4238
4239 intel_init_display(dev);
4240
3842 if (IS_I965G(dev)) { 4241 if (IS_I965G(dev)) {
3843 dev->mode_config.max_width = 8192; 4242 dev->mode_config.max_width = 8192;
3844 dev->mode_config.max_height = 8192; 4243 dev->mode_config.max_height = 8192;
@@ -3904,6 +4303,9 @@ void intel_modeset_cleanup(struct drm_device *dev)
3904 4303
3905 mutex_unlock(&dev->struct_mutex); 4304 mutex_unlock(&dev->struct_mutex);
3906 4305
4306 if (dev_priv->display.disable_fbc)
4307 dev_priv->display.disable_fbc(dev);
4308
3907 drm_mode_config_cleanup(dev); 4309 drm_mode_config_cleanup(dev);
3908} 4310}
3909 4311
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 3ebbbabfe59b..8aa4b7f30daa 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -28,6 +28,7 @@
28#include <linux/i2c.h> 28#include <linux/i2c.h>
29#include <linux/i2c-id.h> 29#include <linux/i2c-id.h>
30#include <linux/i2c-algo-bit.h> 30#include <linux/i2c-algo-bit.h>
31#include "i915_drv.h"
31#include "drm_crtc.h" 32#include "drm_crtc.h"
32 33
33#include "drm_crtc_helper.h" 34#include "drm_crtc_helper.h"
@@ -111,8 +112,8 @@ struct intel_output {
111 112
112struct intel_crtc { 113struct intel_crtc {
113 struct drm_crtc base; 114 struct drm_crtc base;
114 int pipe; 115 enum pipe pipe;
115 int plane; 116 enum plane plane;
116 struct drm_gem_object *cursor_bo; 117 struct drm_gem_object *cursor_bo;
117 uint32_t cursor_addr; 118 uint32_t cursor_addr;
118 u8 lut_r[256], lut_g[256], lut_b[256]; 119 u8 lut_r[256], lut_g[256], lut_b[256];
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index dafc0da1c256..98ae3d73577e 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -27,6 +27,7 @@
27 * Jesse Barnes <jesse.barnes@intel.com> 27 * Jesse Barnes <jesse.barnes@intel.com>
28 */ 28 */
29 29
30#include <acpi/button.h>
30#include <linux/dmi.h> 31#include <linux/dmi.h>
31#include <linux/i2c.h> 32#include <linux/i2c.h>
32#include "drmP.h" 33#include "drmP.h"
@@ -295,6 +296,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
295 goto out; 296 goto out;
296 } 297 }
297 298
299 /* full screen scale for now */
300 if (IS_IGDNG(dev))
301 goto out;
302
298 /* 965+ wants fuzzy fitting */ 303 /* 965+ wants fuzzy fitting */
299 if (IS_I965G(dev)) 304 if (IS_I965G(dev))
300 pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) | 305 pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) |
@@ -322,8 +327,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
322 * to register description and PRM. 327 * to register description and PRM.
323 * Change the value here to see the borders for debugging 328 * Change the value here to see the borders for debugging
324 */ 329 */
325 I915_WRITE(BCLRPAT_A, 0); 330 if (!IS_IGDNG(dev)) {
326 I915_WRITE(BCLRPAT_B, 0); 331 I915_WRITE(BCLRPAT_A, 0);
332 I915_WRITE(BCLRPAT_B, 0);
333 }
327 334
328 switch (lvds_priv->fitting_mode) { 335 switch (lvds_priv->fitting_mode) {
329 case DRM_MODE_SCALE_CENTER: 336 case DRM_MODE_SCALE_CENTER:
@@ -572,7 +579,6 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
572 * settings. 579 * settings.
573 */ 580 */
574 581
575 /* No panel fitting yet, fixme */
576 if (IS_IGDNG(dev)) 582 if (IS_IGDNG(dev))
577 return; 583 return;
578 584
@@ -585,15 +591,33 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
585 I915_WRITE(PFIT_CONTROL, lvds_priv->pfit_control); 591 I915_WRITE(PFIT_CONTROL, lvds_priv->pfit_control);
586} 592}
587 593
594/* Some lid devices report incorrect lid status, assume they're connected */
595static const struct dmi_system_id bad_lid_status[] = {
596 {
597 .ident = "Aspire One",
598 .matches = {
599 DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
600 DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"),
601 },
602 },
603 { }
604};
605
588/** 606/**
589 * Detect the LVDS connection. 607 * Detect the LVDS connection.
590 * 608 *
591 * This always returns CONNECTOR_STATUS_CONNECTED. This connector should only have 609 * Since LVDS doesn't have hotlug, we use the lid as a proxy. Open means
592 * been set up if the LVDS was actually connected anyway. 610 * connected and closed means disconnected. We also send hotplug events as
611 * needed, using lid status notification from the input layer.
593 */ 612 */
594static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector) 613static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
595{ 614{
596 return connector_status_connected; 615 enum drm_connector_status status = connector_status_connected;
616
617 if (!acpi_lid_open() && !dmi_check_system(bad_lid_status))
618 status = connector_status_disconnected;
619
620 return status;
597} 621}
598 622
599/** 623/**
@@ -632,6 +656,24 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
632 return 0; 656 return 0;
633} 657}
634 658
659static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
660 void *unused)
661{
662 struct drm_i915_private *dev_priv =
663 container_of(nb, struct drm_i915_private, lid_notifier);
664 struct drm_device *dev = dev_priv->dev;
665
666 if (acpi_lid_open() && !dev_priv->suspended) {
667 mutex_lock(&dev->mode_config.mutex);
668 drm_helper_resume_force_mode(dev);
669 mutex_unlock(&dev->mode_config.mutex);
670 }
671
672 drm_sysfs_hotplug_event(dev_priv->dev);
673
674 return NOTIFY_OK;
675}
676
635/** 677/**
636 * intel_lvds_destroy - unregister and free LVDS structures 678 * intel_lvds_destroy - unregister and free LVDS structures
637 * @connector: connector to free 679 * @connector: connector to free
@@ -641,10 +683,14 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
641 */ 683 */
642static void intel_lvds_destroy(struct drm_connector *connector) 684static void intel_lvds_destroy(struct drm_connector *connector)
643{ 685{
686 struct drm_device *dev = connector->dev;
644 struct intel_output *intel_output = to_intel_output(connector); 687 struct intel_output *intel_output = to_intel_output(connector);
688 struct drm_i915_private *dev_priv = dev->dev_private;
645 689
646 if (intel_output->ddc_bus) 690 if (intel_output->ddc_bus)
647 intel_i2c_destroy(intel_output->ddc_bus); 691 intel_i2c_destroy(intel_output->ddc_bus);
692 if (dev_priv->lid_notifier.notifier_call)
693 acpi_lid_notifier_unregister(&dev_priv->lid_notifier);
648 drm_sysfs_connector_remove(connector); 694 drm_sysfs_connector_remove(connector);
649 drm_connector_cleanup(connector); 695 drm_connector_cleanup(connector);
650 kfree(connector); 696 kfree(connector);
@@ -1011,6 +1057,11 @@ out:
1011 pwm |= PWM_PCH_ENABLE; 1057 pwm |= PWM_PCH_ENABLE;
1012 I915_WRITE(BLC_PWM_PCH_CTL1, pwm); 1058 I915_WRITE(BLC_PWM_PCH_CTL1, pwm);
1013 } 1059 }
1060 dev_priv->lid_notifier.notifier_call = intel_lid_notify;
1061 if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) {
1062 DRM_DEBUG("lid notifier registration failed\n");
1063 dev_priv->lid_notifier.notifier_call = NULL;
1064 }
1014 drm_sysfs_connector_add(connector); 1065 drm_sysfs_connector_add(connector);
1015 return; 1066 return;
1016 1067
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 0bf28efcf2c1..083bec2e50f9 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -135,6 +135,30 @@ struct intel_sdvo_priv {
135 struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2; 135 struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2;
136 struct intel_sdvo_dtd save_output_dtd[16]; 136 struct intel_sdvo_dtd save_output_dtd[16];
137 u32 save_SDVOX; 137 u32 save_SDVOX;
138 /* add the property for the SDVO-TV */
139 struct drm_property *left_property;
140 struct drm_property *right_property;
141 struct drm_property *top_property;
142 struct drm_property *bottom_property;
143 struct drm_property *hpos_property;
144 struct drm_property *vpos_property;
145
146 /* add the property for the SDVO-TV/LVDS */
147 struct drm_property *brightness_property;
148 struct drm_property *contrast_property;
149 struct drm_property *saturation_property;
150 struct drm_property *hue_property;
151
152 /* Add variable to record current setting for the above property */
153 u32 left_margin, right_margin, top_margin, bottom_margin;
154 /* this is to get the range of margin.*/
155 u32 max_hscan, max_vscan;
156 u32 max_hpos, cur_hpos;
157 u32 max_vpos, cur_vpos;
158 u32 cur_brightness, max_brightness;
159 u32 cur_contrast, max_contrast;
160 u32 cur_saturation, max_saturation;
161 u32 cur_hue, max_hue;
138}; 162};
139 163
140static bool 164static bool
@@ -281,6 +305,31 @@ static const struct _sdvo_cmd_name {
281 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT), 305 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
282 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT), 306 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
283 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS), 307 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
308 /* Add the op code for SDVO enhancements */
309 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_H),
310 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_H),
311 SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_H),
312 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_V),
313 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_V),
314 SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_V),
315 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
316 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
317 SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
318 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
319 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
320 SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
321 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
322 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
323 SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
324 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
325 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
326 SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
327 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
328 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
329 SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
330 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
331 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
332 SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
284 /* HDMI op code */ 333 /* HDMI op code */
285 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE), 334 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
286 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE), 335 SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
@@ -981,7 +1030,7 @@ static void intel_sdvo_set_tv_format(struct intel_output *output)
981 1030
982 status = intel_sdvo_read_response(output, NULL, 0); 1031 status = intel_sdvo_read_response(output, NULL, 0);
983 if (status != SDVO_CMD_STATUS_SUCCESS) 1032 if (status != SDVO_CMD_STATUS_SUCCESS)
984 DRM_DEBUG("%s: Failed to set TV format\n", 1033 DRM_DEBUG_KMS("%s: Failed to set TV format\n",
985 SDVO_NAME(sdvo_priv)); 1034 SDVO_NAME(sdvo_priv));
986} 1035}
987 1036
@@ -1792,6 +1841,45 @@ static int intel_sdvo_get_modes(struct drm_connector *connector)
1792 return 1; 1841 return 1;
1793} 1842}
1794 1843
1844static
1845void intel_sdvo_destroy_enhance_property(struct drm_connector *connector)
1846{
1847 struct intel_output *intel_output = to_intel_output(connector);
1848 struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
1849 struct drm_device *dev = connector->dev;
1850
1851 if (sdvo_priv->is_tv) {
1852 if (sdvo_priv->left_property)
1853 drm_property_destroy(dev, sdvo_priv->left_property);
1854 if (sdvo_priv->right_property)
1855 drm_property_destroy(dev, sdvo_priv->right_property);
1856 if (sdvo_priv->top_property)
1857 drm_property_destroy(dev, sdvo_priv->top_property);
1858 if (sdvo_priv->bottom_property)
1859 drm_property_destroy(dev, sdvo_priv->bottom_property);
1860 if (sdvo_priv->hpos_property)
1861 drm_property_destroy(dev, sdvo_priv->hpos_property);
1862 if (sdvo_priv->vpos_property)
1863 drm_property_destroy(dev, sdvo_priv->vpos_property);
1864 }
1865 if (sdvo_priv->is_tv) {
1866 if (sdvo_priv->saturation_property)
1867 drm_property_destroy(dev,
1868 sdvo_priv->saturation_property);
1869 if (sdvo_priv->contrast_property)
1870 drm_property_destroy(dev,
1871 sdvo_priv->contrast_property);
1872 if (sdvo_priv->hue_property)
1873 drm_property_destroy(dev, sdvo_priv->hue_property);
1874 }
1875 if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
1876 if (sdvo_priv->brightness_property)
1877 drm_property_destroy(dev,
1878 sdvo_priv->brightness_property);
1879 }
1880 return;
1881}
1882
1795static void intel_sdvo_destroy(struct drm_connector *connector) 1883static void intel_sdvo_destroy(struct drm_connector *connector)
1796{ 1884{
1797 struct intel_output *intel_output = to_intel_output(connector); 1885 struct intel_output *intel_output = to_intel_output(connector);
@@ -1812,6 +1900,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector)
1812 drm_property_destroy(connector->dev, 1900 drm_property_destroy(connector->dev,
1813 sdvo_priv->tv_format_property); 1901 sdvo_priv->tv_format_property);
1814 1902
1903 if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
1904 intel_sdvo_destroy_enhance_property(connector);
1905
1815 drm_sysfs_connector_remove(connector); 1906 drm_sysfs_connector_remove(connector);
1816 drm_connector_cleanup(connector); 1907 drm_connector_cleanup(connector);
1817 1908
@@ -1829,6 +1920,8 @@ intel_sdvo_set_property(struct drm_connector *connector,
1829 struct drm_crtc *crtc = encoder->crtc; 1920 struct drm_crtc *crtc = encoder->crtc;
1830 int ret = 0; 1921 int ret = 0;
1831 bool changed = false; 1922 bool changed = false;
1923 uint8_t cmd, status;
1924 uint16_t temp_value;
1832 1925
1833 ret = drm_connector_property_set_value(connector, property, val); 1926 ret = drm_connector_property_set_value(connector, property, val);
1834 if (ret < 0) 1927 if (ret < 0)
@@ -1845,11 +1938,102 @@ intel_sdvo_set_property(struct drm_connector *connector,
1845 1938
1846 sdvo_priv->tv_format_name = sdvo_priv->tv_format_supported[val]; 1939 sdvo_priv->tv_format_name = sdvo_priv->tv_format_supported[val];
1847 changed = true; 1940 changed = true;
1848 } else {
1849 ret = -EINVAL;
1850 goto out;
1851 } 1941 }
1852 1942
1943 if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
1944 cmd = 0;
1945 temp_value = val;
1946 if (sdvo_priv->left_property == property) {
1947 drm_connector_property_set_value(connector,
1948 sdvo_priv->right_property, val);
1949 if (sdvo_priv->left_margin == temp_value)
1950 goto out;
1951
1952 sdvo_priv->left_margin = temp_value;
1953 sdvo_priv->right_margin = temp_value;
1954 temp_value = sdvo_priv->max_hscan -
1955 sdvo_priv->left_margin;
1956 cmd = SDVO_CMD_SET_OVERSCAN_H;
1957 } else if (sdvo_priv->right_property == property) {
1958 drm_connector_property_set_value(connector,
1959 sdvo_priv->left_property, val);
1960 if (sdvo_priv->right_margin == temp_value)
1961 goto out;
1962
1963 sdvo_priv->left_margin = temp_value;
1964 sdvo_priv->right_margin = temp_value;
1965 temp_value = sdvo_priv->max_hscan -
1966 sdvo_priv->left_margin;
1967 cmd = SDVO_CMD_SET_OVERSCAN_H;
1968 } else if (sdvo_priv->top_property == property) {
1969 drm_connector_property_set_value(connector,
1970 sdvo_priv->bottom_property, val);
1971 if (sdvo_priv->top_margin == temp_value)
1972 goto out;
1973
1974 sdvo_priv->top_margin = temp_value;
1975 sdvo_priv->bottom_margin = temp_value;
1976 temp_value = sdvo_priv->max_vscan -
1977 sdvo_priv->top_margin;
1978 cmd = SDVO_CMD_SET_OVERSCAN_V;
1979 } else if (sdvo_priv->bottom_property == property) {
1980 drm_connector_property_set_value(connector,
1981 sdvo_priv->top_property, val);
1982 if (sdvo_priv->bottom_margin == temp_value)
1983 goto out;
1984 sdvo_priv->top_margin = temp_value;
1985 sdvo_priv->bottom_margin = temp_value;
1986 temp_value = sdvo_priv->max_vscan -
1987 sdvo_priv->top_margin;
1988 cmd = SDVO_CMD_SET_OVERSCAN_V;
1989 } else if (sdvo_priv->hpos_property == property) {
1990 if (sdvo_priv->cur_hpos == temp_value)
1991 goto out;
1992
1993 cmd = SDVO_CMD_SET_POSITION_H;
1994 sdvo_priv->cur_hpos = temp_value;
1995 } else if (sdvo_priv->vpos_property == property) {
1996 if (sdvo_priv->cur_vpos == temp_value)
1997 goto out;
1998
1999 cmd = SDVO_CMD_SET_POSITION_V;
2000 sdvo_priv->cur_vpos = temp_value;
2001 } else if (sdvo_priv->saturation_property == property) {
2002 if (sdvo_priv->cur_saturation == temp_value)
2003 goto out;
2004
2005 cmd = SDVO_CMD_SET_SATURATION;
2006 sdvo_priv->cur_saturation = temp_value;
2007 } else if (sdvo_priv->contrast_property == property) {
2008 if (sdvo_priv->cur_contrast == temp_value)
2009 goto out;
2010
2011 cmd = SDVO_CMD_SET_CONTRAST;
2012 sdvo_priv->cur_contrast = temp_value;
2013 } else if (sdvo_priv->hue_property == property) {
2014 if (sdvo_priv->cur_hue == temp_value)
2015 goto out;
2016
2017 cmd = SDVO_CMD_SET_HUE;
2018 sdvo_priv->cur_hue = temp_value;
2019 } else if (sdvo_priv->brightness_property == property) {
2020 if (sdvo_priv->cur_brightness == temp_value)
2021 goto out;
2022
2023 cmd = SDVO_CMD_SET_BRIGHTNESS;
2024 sdvo_priv->cur_brightness = temp_value;
2025 }
2026 if (cmd) {
2027 intel_sdvo_write_cmd(intel_output, cmd, &temp_value, 2);
2028 status = intel_sdvo_read_response(intel_output,
2029 NULL, 0);
2030 if (status != SDVO_CMD_STATUS_SUCCESS) {
2031 DRM_DEBUG_KMS("Incorrect SDVO command \n");
2032 return -EINVAL;
2033 }
2034 changed = true;
2035 }
2036 }
1853 if (changed && crtc) 2037 if (changed && crtc)
1854 drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x, 2038 drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x,
1855 crtc->y, crtc->fb); 2039 crtc->y, crtc->fb);
@@ -2090,6 +2274,8 @@ intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags)
2090 sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; 2274 sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1;
2091 encoder->encoder_type = DRM_MODE_ENCODER_DAC; 2275 encoder->encoder_type = DRM_MODE_ENCODER_DAC;
2092 connector->connector_type = DRM_MODE_CONNECTOR_VGA; 2276 connector->connector_type = DRM_MODE_CONNECTOR_VGA;
2277 intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
2278 (1 << INTEL_ANALOG_CLONE_BIT);
2093 } else if (flags & SDVO_OUTPUT_LVDS0) { 2279 } else if (flags & SDVO_OUTPUT_LVDS0) {
2094 2280
2095 sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; 2281 sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0;
@@ -2176,6 +2362,310 @@ static void intel_sdvo_tv_create_property(struct drm_connector *connector)
2176 2362
2177} 2363}
2178 2364
2365static void intel_sdvo_create_enhance_property(struct drm_connector *connector)
2366{
2367 struct intel_output *intel_output = to_intel_output(connector);
2368 struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
2369 struct intel_sdvo_enhancements_reply sdvo_data;
2370 struct drm_device *dev = connector->dev;
2371 uint8_t status;
2372 uint16_t response, data_value[2];
2373
2374 intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
2375 NULL, 0);
2376 status = intel_sdvo_read_response(intel_output, &sdvo_data,
2377 sizeof(sdvo_data));
2378 if (status != SDVO_CMD_STATUS_SUCCESS) {
2379 DRM_DEBUG_KMS(" incorrect response is returned\n");
2380 return;
2381 }
2382 response = *((uint16_t *)&sdvo_data);
2383 if (!response) {
2384 DRM_DEBUG_KMS("No enhancement is supported\n");
2385 return;
2386 }
2387 if (sdvo_priv->is_tv) {
2388 /* when horizontal overscan is supported, Add the left/right
2389 * property
2390 */
2391 if (sdvo_data.overscan_h) {
2392 intel_sdvo_write_cmd(intel_output,
2393 SDVO_CMD_GET_MAX_OVERSCAN_H, NULL, 0);
2394 status = intel_sdvo_read_response(intel_output,
2395 &data_value, 4);
2396 if (status != SDVO_CMD_STATUS_SUCCESS) {
2397 DRM_DEBUG_KMS("Incorrect SDVO max "
2398 "h_overscan\n");
2399 return;
2400 }
2401 intel_sdvo_write_cmd(intel_output,
2402 SDVO_CMD_GET_OVERSCAN_H, NULL, 0);
2403 status = intel_sdvo_read_response(intel_output,
2404 &response, 2);
2405 if (status != SDVO_CMD_STATUS_SUCCESS) {
2406 DRM_DEBUG_KMS("Incorrect SDVO h_overscan\n");
2407 return;
2408 }
2409 sdvo_priv->max_hscan = data_value[0];
2410 sdvo_priv->left_margin = data_value[0] - response;
2411 sdvo_priv->right_margin = sdvo_priv->left_margin;
2412 sdvo_priv->left_property =
2413 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2414 "left_margin", 2);
2415 sdvo_priv->left_property->values[0] = 0;
2416 sdvo_priv->left_property->values[1] = data_value[0];
2417 drm_connector_attach_property(connector,
2418 sdvo_priv->left_property,
2419 sdvo_priv->left_margin);
2420 sdvo_priv->right_property =
2421 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2422 "right_margin", 2);
2423 sdvo_priv->right_property->values[0] = 0;
2424 sdvo_priv->right_property->values[1] = data_value[0];
2425 drm_connector_attach_property(connector,
2426 sdvo_priv->right_property,
2427 sdvo_priv->right_margin);
2428 DRM_DEBUG_KMS("h_overscan: max %d, "
2429 "default %d, current %d\n",
2430 data_value[0], data_value[1], response);
2431 }
2432 if (sdvo_data.overscan_v) {
2433 intel_sdvo_write_cmd(intel_output,
2434 SDVO_CMD_GET_MAX_OVERSCAN_V, NULL, 0);
2435 status = intel_sdvo_read_response(intel_output,
2436 &data_value, 4);
2437 if (status != SDVO_CMD_STATUS_SUCCESS) {
2438 DRM_DEBUG_KMS("Incorrect SDVO max "
2439 "v_overscan\n");
2440 return;
2441 }
2442 intel_sdvo_write_cmd(intel_output,
2443 SDVO_CMD_GET_OVERSCAN_V, NULL, 0);
2444 status = intel_sdvo_read_response(intel_output,
2445 &response, 2);
2446 if (status != SDVO_CMD_STATUS_SUCCESS) {
2447 DRM_DEBUG_KMS("Incorrect SDVO v_overscan\n");
2448 return;
2449 }
2450 sdvo_priv->max_vscan = data_value[0];
2451 sdvo_priv->top_margin = data_value[0] - response;
2452 sdvo_priv->bottom_margin = sdvo_priv->top_margin;
2453 sdvo_priv->top_property =
2454 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2455 "top_margin", 2);
2456 sdvo_priv->top_property->values[0] = 0;
2457 sdvo_priv->top_property->values[1] = data_value[0];
2458 drm_connector_attach_property(connector,
2459 sdvo_priv->top_property,
2460 sdvo_priv->top_margin);
2461 sdvo_priv->bottom_property =
2462 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2463 "bottom_margin", 2);
2464 sdvo_priv->bottom_property->values[0] = 0;
2465 sdvo_priv->bottom_property->values[1] = data_value[0];
2466 drm_connector_attach_property(connector,
2467 sdvo_priv->bottom_property,
2468 sdvo_priv->bottom_margin);
2469 DRM_DEBUG_KMS("v_overscan: max %d, "
2470 "default %d, current %d\n",
2471 data_value[0], data_value[1], response);
2472 }
2473 if (sdvo_data.position_h) {
2474 intel_sdvo_write_cmd(intel_output,
2475 SDVO_CMD_GET_MAX_POSITION_H, NULL, 0);
2476 status = intel_sdvo_read_response(intel_output,
2477 &data_value, 4);
2478 if (status != SDVO_CMD_STATUS_SUCCESS) {
2479 DRM_DEBUG_KMS("Incorrect SDVO Max h_pos\n");
2480 return;
2481 }
2482 intel_sdvo_write_cmd(intel_output,
2483 SDVO_CMD_GET_POSITION_H, NULL, 0);
2484 status = intel_sdvo_read_response(intel_output,
2485 &response, 2);
2486 if (status != SDVO_CMD_STATUS_SUCCESS) {
2487 DRM_DEBUG_KMS("Incorrect SDVO get h_postion\n");
2488 return;
2489 }
2490 sdvo_priv->max_hpos = data_value[0];
2491 sdvo_priv->cur_hpos = response;
2492 sdvo_priv->hpos_property =
2493 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2494 "hpos", 2);
2495 sdvo_priv->hpos_property->values[0] = 0;
2496 sdvo_priv->hpos_property->values[1] = data_value[0];
2497 drm_connector_attach_property(connector,
2498 sdvo_priv->hpos_property,
2499 sdvo_priv->cur_hpos);
2500 DRM_DEBUG_KMS("h_position: max %d, "
2501 "default %d, current %d\n",
2502 data_value[0], data_value[1], response);
2503 }
2504 if (sdvo_data.position_v) {
2505 intel_sdvo_write_cmd(intel_output,
2506 SDVO_CMD_GET_MAX_POSITION_V, NULL, 0);
2507 status = intel_sdvo_read_response(intel_output,
2508 &data_value, 4);
2509 if (status != SDVO_CMD_STATUS_SUCCESS) {
2510 DRM_DEBUG_KMS("Incorrect SDVO Max v_pos\n");
2511 return;
2512 }
2513 intel_sdvo_write_cmd(intel_output,
2514 SDVO_CMD_GET_POSITION_V, NULL, 0);
2515 status = intel_sdvo_read_response(intel_output,
2516 &response, 2);
2517 if (status != SDVO_CMD_STATUS_SUCCESS) {
2518 DRM_DEBUG_KMS("Incorrect SDVO get v_postion\n");
2519 return;
2520 }
2521 sdvo_priv->max_vpos = data_value[0];
2522 sdvo_priv->cur_vpos = response;
2523 sdvo_priv->vpos_property =
2524 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2525 "vpos", 2);
2526 sdvo_priv->vpos_property->values[0] = 0;
2527 sdvo_priv->vpos_property->values[1] = data_value[0];
2528 drm_connector_attach_property(connector,
2529 sdvo_priv->vpos_property,
2530 sdvo_priv->cur_vpos);
2531 DRM_DEBUG_KMS("v_position: max %d, "
2532 "default %d, current %d\n",
2533 data_value[0], data_value[1], response);
2534 }
2535 }
2536 if (sdvo_priv->is_tv) {
2537 if (sdvo_data.saturation) {
2538 intel_sdvo_write_cmd(intel_output,
2539 SDVO_CMD_GET_MAX_SATURATION, NULL, 0);
2540 status = intel_sdvo_read_response(intel_output,
2541 &data_value, 4);
2542 if (status != SDVO_CMD_STATUS_SUCCESS) {
2543 DRM_DEBUG_KMS("Incorrect SDVO Max sat\n");
2544 return;
2545 }
2546 intel_sdvo_write_cmd(intel_output,
2547 SDVO_CMD_GET_SATURATION, NULL, 0);
2548 status = intel_sdvo_read_response(intel_output,
2549 &response, 2);
2550 if (status != SDVO_CMD_STATUS_SUCCESS) {
2551 DRM_DEBUG_KMS("Incorrect SDVO get sat\n");
2552 return;
2553 }
2554 sdvo_priv->max_saturation = data_value[0];
2555 sdvo_priv->cur_saturation = response;
2556 sdvo_priv->saturation_property =
2557 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2558 "saturation", 2);
2559 sdvo_priv->saturation_property->values[0] = 0;
2560 sdvo_priv->saturation_property->values[1] =
2561 data_value[0];
2562 drm_connector_attach_property(connector,
2563 sdvo_priv->saturation_property,
2564 sdvo_priv->cur_saturation);
2565 DRM_DEBUG_KMS("saturation: max %d, "
2566 "default %d, current %d\n",
2567 data_value[0], data_value[1], response);
2568 }
2569 if (sdvo_data.contrast) {
2570 intel_sdvo_write_cmd(intel_output,
2571 SDVO_CMD_GET_MAX_CONTRAST, NULL, 0);
2572 status = intel_sdvo_read_response(intel_output,
2573 &data_value, 4);
2574 if (status != SDVO_CMD_STATUS_SUCCESS) {
2575 DRM_DEBUG_KMS("Incorrect SDVO Max contrast\n");
2576 return;
2577 }
2578 intel_sdvo_write_cmd(intel_output,
2579 SDVO_CMD_GET_CONTRAST, NULL, 0);
2580 status = intel_sdvo_read_response(intel_output,
2581 &response, 2);
2582 if (status != SDVO_CMD_STATUS_SUCCESS) {
2583 DRM_DEBUG_KMS("Incorrect SDVO get contrast\n");
2584 return;
2585 }
2586 sdvo_priv->max_contrast = data_value[0];
2587 sdvo_priv->cur_contrast = response;
2588 sdvo_priv->contrast_property =
2589 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2590 "contrast", 2);
2591 sdvo_priv->contrast_property->values[0] = 0;
2592 sdvo_priv->contrast_property->values[1] = data_value[0];
2593 drm_connector_attach_property(connector,
2594 sdvo_priv->contrast_property,
2595 sdvo_priv->cur_contrast);
2596 DRM_DEBUG_KMS("contrast: max %d, "
2597 "default %d, current %d\n",
2598 data_value[0], data_value[1], response);
2599 }
2600 if (sdvo_data.hue) {
2601 intel_sdvo_write_cmd(intel_output,
2602 SDVO_CMD_GET_MAX_HUE, NULL, 0);
2603 status = intel_sdvo_read_response(intel_output,
2604 &data_value, 4);
2605 if (status != SDVO_CMD_STATUS_SUCCESS) {
2606 DRM_DEBUG_KMS("Incorrect SDVO Max hue\n");
2607 return;
2608 }
2609 intel_sdvo_write_cmd(intel_output,
2610 SDVO_CMD_GET_HUE, NULL, 0);
2611 status = intel_sdvo_read_response(intel_output,
2612 &response, 2);
2613 if (status != SDVO_CMD_STATUS_SUCCESS) {
2614 DRM_DEBUG_KMS("Incorrect SDVO get hue\n");
2615 return;
2616 }
2617 sdvo_priv->max_hue = data_value[0];
2618 sdvo_priv->cur_hue = response;
2619 sdvo_priv->hue_property =
2620 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2621 "hue", 2);
2622 sdvo_priv->hue_property->values[0] = 0;
2623 sdvo_priv->hue_property->values[1] =
2624 data_value[0];
2625 drm_connector_attach_property(connector,
2626 sdvo_priv->hue_property,
2627 sdvo_priv->cur_hue);
2628 DRM_DEBUG_KMS("hue: max %d, default %d, current %d\n",
2629 data_value[0], data_value[1], response);
2630 }
2631 }
2632 if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
2633 if (sdvo_data.brightness) {
2634 intel_sdvo_write_cmd(intel_output,
2635 SDVO_CMD_GET_MAX_BRIGHTNESS, NULL, 0);
2636 status = intel_sdvo_read_response(intel_output,
2637 &data_value, 4);
2638 if (status != SDVO_CMD_STATUS_SUCCESS) {
2639 DRM_DEBUG_KMS("Incorrect SDVO Max bright\n");
2640 return;
2641 }
2642 intel_sdvo_write_cmd(intel_output,
2643 SDVO_CMD_GET_BRIGHTNESS, NULL, 0);
2644 status = intel_sdvo_read_response(intel_output,
2645 &response, 2);
2646 if (status != SDVO_CMD_STATUS_SUCCESS) {
2647 DRM_DEBUG_KMS("Incorrect SDVO get brigh\n");
2648 return;
2649 }
2650 sdvo_priv->max_brightness = data_value[0];
2651 sdvo_priv->cur_brightness = response;
2652 sdvo_priv->brightness_property =
2653 drm_property_create(dev, DRM_MODE_PROP_RANGE,
2654 "brightness", 2);
2655 sdvo_priv->brightness_property->values[0] = 0;
2656 sdvo_priv->brightness_property->values[1] =
2657 data_value[0];
2658 drm_connector_attach_property(connector,
2659 sdvo_priv->brightness_property,
2660 sdvo_priv->cur_brightness);
2661 DRM_DEBUG_KMS("brightness: max %d, "
2662 "default %d, current %d\n",
2663 data_value[0], data_value[1], response);
2664 }
2665 }
2666 return;
2667}
2668
2179bool intel_sdvo_init(struct drm_device *dev, int output_device) 2669bool intel_sdvo_init(struct drm_device *dev, int output_device)
2180{ 2670{
2181 struct drm_connector *connector; 2671 struct drm_connector *connector;
@@ -2264,6 +2754,10 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
2264 drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc); 2754 drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
2265 if (sdvo_priv->is_tv) 2755 if (sdvo_priv->is_tv)
2266 intel_sdvo_tv_create_property(connector); 2756 intel_sdvo_tv_create_property(connector);
2757
2758 if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
2759 intel_sdvo_create_enhance_property(connector);
2760
2267 drm_sysfs_connector_add(connector); 2761 drm_sysfs_connector_add(connector);
2268 2762
2269 intel_sdvo_select_ddc_bus(sdvo_priv); 2763 intel_sdvo_select_ddc_bus(sdvo_priv);
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 6bedd2fcfc15..737335ff2b21 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -477,8 +477,8 @@ config I2C_PNX
477 will be called i2c-pnx. 477 will be called i2c-pnx.
478 478
479config I2C_PXA 479config I2C_PXA
480 tristate "Intel PXA2XX I2C adapter (EXPERIMENTAL)" 480 tristate "Intel PXA2XX I2C adapter"
481 depends on EXPERIMENTAL && ARCH_PXA 481 depends on ARCH_PXA || ARCH_MMP
482 help 482 help
483 If you have devices in the PXA I2C bus, say yes to this option. 483 If you have devices in the PXA I2C bus, say yes to this option.
484 This driver can also be built as a module. If so, the module 484 This driver can also be built as a module. If so, the module
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index 949c97ff57e3..1f20a042a4f5 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -29,8 +29,8 @@
29 29
30#include <asm/idle.h> 30#include <asm/idle.h>
31 31
32#include "../dma/ioatdma_hw.h" 32#include "../dma/ioat/hw.h"
33#include "../dma/ioatdma_registers.h" 33#include "../dma/ioat/registers.h"
34 34
35#define I7300_IDLE_DRIVER_VERSION "1.55" 35#define I7300_IDLE_DRIVER_VERSION "1.55"
36#define I7300_PRINT "i7300_idle:" 36#define I7300_PRINT "i7300_idle:"
@@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void)
126 udelay(10); 126 udelay(10);
127 127
128 sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & 128 sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
129 IOAT_CHANSTS_DMA_TRANSFER_STATUS; 129 IOAT_CHANSTS_STATUS;
130 130
131 if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) 131 if (sts != IOAT_CHANSTS_ACTIVE)
132 break; 132 break;
133 133
134 } 134 }
@@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl,
160 udelay(1000); 160 udelay(1000);
161 161
162 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & 162 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
163 IOAT_CHANSTS_DMA_TRANSFER_STATUS; 163 IOAT_CHANSTS_STATUS;
164 164
165 if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) { 165 if (chan_sts != IOAT_CHANSTS_DONE) {
166 /* Not complete, reset the channel */ 166 /* Not complete, reset the channel */
167 writeb(IOAT_CHANCMD_RESET, 167 writeb(IOAT_CHANCMD_RESET,
168 ioat_chanbase + IOAT1_CHANCMD_OFFSET); 168 ioat_chanbase + IOAT1_CHANCMD_OFFSET);
@@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void)
288 ioat_chanbase + IOAT1_CHANCMD_OFFSET); 288 ioat_chanbase + IOAT1_CHANCMD_OFFSET);
289 289
290 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & 290 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
291 IOAT_CHANSTS_DMA_TRANSFER_STATUS; 291 IOAT_CHANSTS_STATUS;
292 292
293 if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { 293 if (chan_sts != IOAT_CHANSTS_ACTIVE) {
294 writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); 294 writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
295 break; 295 break;
296 } 296 }
@@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void)
298 } 298 }
299 299
300 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & 300 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
301 IOAT_CHANSTS_DMA_TRANSFER_STATUS; 301 IOAT_CHANSTS_STATUS;
302 302
303 /* 303 /*
304 * We tried to reset multiple times. If IO A/T channel is still active 304 * We tried to reset multiple times. If IO A/T channel is still active
305 * flag an error and return without cleanup. Memory leak is better 305 * flag an error and return without cleanup. Memory leak is better
306 * than random corruption in that extreme error situation. 306 * than random corruption in that extreme error situation.
307 */ 307 */
308 if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { 308 if (chan_sts == IOAT_CHANSTS_ACTIVE) {
309 printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels." 309 printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
310 " Not freeing resources\n"); 310 " Not freeing resources\n");
311 return; 311 return;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 57a3c6f947b2..4e0f2829e0e5 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -37,7 +37,8 @@
37enum rmpp_state { 37enum rmpp_state {
38 RMPP_STATE_ACTIVE, 38 RMPP_STATE_ACTIVE,
39 RMPP_STATE_TIMEOUT, 39 RMPP_STATE_TIMEOUT,
40 RMPP_STATE_COMPLETE 40 RMPP_STATE_COMPLETE,
41 RMPP_STATE_CANCELING
41}; 42};
42 43
43struct mad_rmpp_recv { 44struct mad_rmpp_recv {
@@ -87,18 +88,22 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
87 88
88 spin_lock_irqsave(&agent->lock, flags); 89 spin_lock_irqsave(&agent->lock, flags);
89 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { 90 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
91 if (rmpp_recv->state != RMPP_STATE_COMPLETE)
92 ib_free_recv_mad(rmpp_recv->rmpp_wc);
93 rmpp_recv->state = RMPP_STATE_CANCELING;
94 }
95 spin_unlock_irqrestore(&agent->lock, flags);
96
97 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
90 cancel_delayed_work(&rmpp_recv->timeout_work); 98 cancel_delayed_work(&rmpp_recv->timeout_work);
91 cancel_delayed_work(&rmpp_recv->cleanup_work); 99 cancel_delayed_work(&rmpp_recv->cleanup_work);
92 } 100 }
93 spin_unlock_irqrestore(&agent->lock, flags);
94 101
95 flush_workqueue(agent->qp_info->port_priv->wq); 102 flush_workqueue(agent->qp_info->port_priv->wq);
96 103
97 list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, 104 list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
98 &agent->rmpp_list, list) { 105 &agent->rmpp_list, list) {
99 list_del(&rmpp_recv->list); 106 list_del(&rmpp_recv->list);
100 if (rmpp_recv->state != RMPP_STATE_COMPLETE)
101 ib_free_recv_mad(rmpp_recv->rmpp_wc);
102 destroy_rmpp_recv(rmpp_recv); 107 destroy_rmpp_recv(rmpp_recv);
103 } 108 }
104} 109}
@@ -260,6 +265,10 @@ static void recv_cleanup_handler(struct work_struct *work)
260 unsigned long flags; 265 unsigned long flags;
261 266
262 spin_lock_irqsave(&rmpp_recv->agent->lock, flags); 267 spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
268 if (rmpp_recv->state == RMPP_STATE_CANCELING) {
269 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
270 return;
271 }
263 list_del(&rmpp_recv->list); 272 list_del(&rmpp_recv->list);
264 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); 273 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
265 destroy_rmpp_recv(rmpp_recv); 274 destroy_rmpp_recv(rmpp_recv);
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index 056b2a4c6970..0aa0110e4b6c 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -68,11 +68,16 @@ static void catas_reset(struct work_struct *work)
68 spin_unlock_irq(&catas_lock); 68 spin_unlock_irq(&catas_lock);
69 69
70 list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { 70 list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
71 struct pci_dev *pdev = dev->pdev;
71 ret = __mthca_restart_one(dev->pdev); 72 ret = __mthca_restart_one(dev->pdev);
73 /* 'dev' now is not valid */
72 if (ret) 74 if (ret)
73 mthca_err(dev, "Reset failed (%d)\n", ret); 75 printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
74 else 76 pci_name(pdev), ret);
75 mthca_dbg(dev, "Reset succeeded\n"); 77 else {
78 struct mthca_dev *d = pci_get_drvdata(pdev);
79 mthca_dbg(d, "Reset succeeded\n");
80 }
76 } 81 }
77 82
78 mutex_unlock(&mthca_device_mutex); 83 mutex_unlock(&mthca_device_mutex);
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 538e409d4515..e593af3354b8 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1566,7 +1566,6 @@ static const struct net_device_ops nes_netdev_ops = {
1566 .ndo_set_mac_address = nes_netdev_set_mac_address, 1566 .ndo_set_mac_address = nes_netdev_set_mac_address,
1567 .ndo_set_multicast_list = nes_netdev_set_multicast_list, 1567 .ndo_set_multicast_list = nes_netdev_set_multicast_list,
1568 .ndo_change_mtu = nes_netdev_change_mtu, 1568 .ndo_change_mtu = nes_netdev_change_mtu,
1569 .ndo_set_mac_address = eth_mac_addr,
1570 .ndo_validate_addr = eth_validate_addr, 1569 .ndo_validate_addr = eth_validate_addr,
1571 .ndo_vlan_rx_register = nes_netdev_vlan_rx_register, 1570 .ndo_vlan_rx_register = nes_netdev_vlan_rx_register,
1572}; 1571};
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 25874fc680c9..8763c1ea5eb4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -362,12 +362,19 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
362{ 362{
363 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 363 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
364 carrier_on_task); 364 carrier_on_task);
365 struct ib_port_attr attr;
365 366
366 /* 367 /*
367 * Take rtnl_lock to avoid racing with ipoib_stop() and 368 * Take rtnl_lock to avoid racing with ipoib_stop() and
368 * turning the carrier back on while a device is being 369 * turning the carrier back on while a device is being
369 * removed. 370 * removed.
370 */ 371 */
372 if (ib_query_port(priv->ca, priv->port, &attr) ||
373 attr.state != IB_PORT_ACTIVE) {
374 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
375 return;
376 }
377
371 rtnl_lock(); 378 rtnl_lock();
372 netif_carrier_on(priv->dev); 379 netif_carrier_on(priv->dev);
373 rtnl_unlock(); 380 rtnl_unlock();
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 76d6751f89a7..02f4f8f1db6f 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -225,6 +225,7 @@ config INPUT_SGI_BTNS
225config INPUT_WINBOND_CIR 225config INPUT_WINBOND_CIR
226 tristate "Winbond IR remote control" 226 tristate "Winbond IR remote control"
227 depends on X86 && PNP 227 depends on X86 && PNP
228 select NEW_LEDS
228 select LEDS_CLASS 229 select LEDS_CLASS
229 select BITREVERSE 230 select BITREVERSE
230 help 231 help
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 020f9573fd82..2158377a1359 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -124,6 +124,8 @@ config MD_RAID456
124 select MD_RAID6_PQ 124 select MD_RAID6_PQ
125 select ASYNC_MEMCPY 125 select ASYNC_MEMCPY
126 select ASYNC_XOR 126 select ASYNC_XOR
127 select ASYNC_PQ
128 select ASYNC_RAID6_RECOV
127 ---help--- 129 ---help---
128 A RAID-5 set of N drives with a capacity of C MB per drive provides 130 A RAID-5 set of N drives with a capacity of C MB per drive provides
129 the capacity of C * (N - 1) MB, and protects against a failure 131 the capacity of C * (N - 1) MB, and protects against a failure
@@ -152,9 +154,33 @@ config MD_RAID456
152 154
153 If unsure, say Y. 155 If unsure, say Y.
154 156
157config MULTICORE_RAID456
158 bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
159 depends on MD_RAID456
160 depends on SMP
161 depends on EXPERIMENTAL
162 ---help---
163 Enable the raid456 module to dispatch per-stripe raid operations to a
164 thread pool.
165
166 If unsure, say N.
167
155config MD_RAID6_PQ 168config MD_RAID6_PQ
156 tristate 169 tristate
157 170
171config ASYNC_RAID6_TEST
172 tristate "Self test for hardware accelerated raid6 recovery"
173 depends on MD_RAID6_PQ
174 select ASYNC_RAID6_RECOV
175 ---help---
176 This is a one-shot self test that permutes through the
177 recovery of all the possible two disk failure scenarios for a
178 N-disk array. Recovery is performed with the asynchronous
179 raid6 recovery routines, and will optionally use an offload
180 engine if one is available.
181
182 If unsure, say N.
183
158config MD_MULTIPATH 184config MD_MULTIPATH
159 tristate "Multipath I/O support" 185 tristate "Multipath I/O support"
160 depends on BLK_DEV_MD 186 depends on BLK_DEV_MD
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 3319c2fec28e..6986b0059d23 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -108,6 +108,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
108 * allocated while we're using it 108 * allocated while we're using it
109 */ 109 */
110static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) 110static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
111__releases(bitmap->lock)
112__acquires(bitmap->lock)
111{ 113{
112 unsigned char *mappage; 114 unsigned char *mappage;
113 115
@@ -325,7 +327,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
325 return 0; 327 return 0;
326 328
327 bad_alignment: 329 bad_alignment:
328 rcu_read_unlock();
329 return -EINVAL; 330 return -EINVAL;
330} 331}
331 332
@@ -1207,6 +1208,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1207static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1208static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1208 sector_t offset, int *blocks, 1209 sector_t offset, int *blocks,
1209 int create) 1210 int create)
1211__releases(bitmap->lock)
1212__acquires(bitmap->lock)
1210{ 1213{
1211 /* If 'create', we might release the lock and reclaim it. 1214 /* If 'create', we might release the lock and reclaim it.
1212 * The lock must have been taken with interrupts enabled. 1215 * The lock must have been taken with interrupts enabled.
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index ea4842905444..1ceceb334d5e 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -108,6 +108,9 @@ static int linear_congested(void *data, int bits)
108 linear_conf_t *conf; 108 linear_conf_t *conf;
109 int i, ret = 0; 109 int i, ret = 0;
110 110
111 if (mddev_congested(mddev, bits))
112 return 1;
113
111 rcu_read_lock(); 114 rcu_read_lock();
112 conf = rcu_dereference(mddev->private); 115 conf = rcu_dereference(mddev->private);
113 116
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6aa497e4baf8..26ba42a79129 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -262,6 +262,12 @@ static void mddev_resume(mddev_t *mddev)
262 mddev->pers->quiesce(mddev, 0); 262 mddev->pers->quiesce(mddev, 0);
263} 263}
264 264
265int mddev_congested(mddev_t *mddev, int bits)
266{
267 return mddev->suspended;
268}
269EXPORT_SYMBOL(mddev_congested);
270
265 271
266static inline mddev_t *mddev_get(mddev_t *mddev) 272static inline mddev_t *mddev_get(mddev_t *mddev)
267{ 273{
@@ -4218,7 +4224,7 @@ static int do_md_run(mddev_t * mddev)
4218 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 4224 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4219 mddev->sync_thread = md_register_thread(md_do_sync, 4225 mddev->sync_thread = md_register_thread(md_do_sync,
4220 mddev, 4226 mddev,
4221 "%s_resync"); 4227 "resync");
4222 if (!mddev->sync_thread) { 4228 if (!mddev->sync_thread) {
4223 printk(KERN_ERR "%s: could not start resync" 4229 printk(KERN_ERR "%s: could not start resync"
4224 " thread...\n", 4230 " thread...\n",
@@ -4575,10 +4581,10 @@ static int get_version(void __user * arg)
4575static int get_array_info(mddev_t * mddev, void __user * arg) 4581static int get_array_info(mddev_t * mddev, void __user * arg)
4576{ 4582{
4577 mdu_array_info_t info; 4583 mdu_array_info_t info;
4578 int nr,working,active,failed,spare; 4584 int nr,working,insync,failed,spare;
4579 mdk_rdev_t *rdev; 4585 mdk_rdev_t *rdev;
4580 4586
4581 nr=working=active=failed=spare=0; 4587 nr=working=insync=failed=spare=0;
4582 list_for_each_entry(rdev, &mddev->disks, same_set) { 4588 list_for_each_entry(rdev, &mddev->disks, same_set) {
4583 nr++; 4589 nr++;
4584 if (test_bit(Faulty, &rdev->flags)) 4590 if (test_bit(Faulty, &rdev->flags))
@@ -4586,7 +4592,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
4586 else { 4592 else {
4587 working++; 4593 working++;
4588 if (test_bit(In_sync, &rdev->flags)) 4594 if (test_bit(In_sync, &rdev->flags))
4589 active++; 4595 insync++;
4590 else 4596 else
4591 spare++; 4597 spare++;
4592 } 4598 }
@@ -4611,7 +4617,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
4611 info.state = (1<<MD_SB_CLEAN); 4617 info.state = (1<<MD_SB_CLEAN);
4612 if (mddev->bitmap && mddev->bitmap_offset) 4618 if (mddev->bitmap && mddev->bitmap_offset)
4613 info.state = (1<<MD_SB_BITMAP_PRESENT); 4619 info.state = (1<<MD_SB_BITMAP_PRESENT);
4614 info.active_disks = active; 4620 info.active_disks = insync;
4615 info.working_disks = working; 4621 info.working_disks = working;
4616 info.failed_disks = failed; 4622 info.failed_disks = failed;
4617 info.spare_disks = spare; 4623 info.spare_disks = spare;
@@ -4721,7 +4727,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
4721 if (!list_empty(&mddev->disks)) { 4727 if (!list_empty(&mddev->disks)) {
4722 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next, 4728 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
4723 mdk_rdev_t, same_set); 4729 mdk_rdev_t, same_set);
4724 int err = super_types[mddev->major_version] 4730 err = super_types[mddev->major_version]
4725 .load_super(rdev, rdev0, mddev->minor_version); 4731 .load_super(rdev, rdev0, mddev->minor_version);
4726 if (err < 0) { 4732 if (err < 0) {
4727 printk(KERN_WARNING 4733 printk(KERN_WARNING
@@ -5631,7 +5637,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
5631 thread->run = run; 5637 thread->run = run;
5632 thread->mddev = mddev; 5638 thread->mddev = mddev;
5633 thread->timeout = MAX_SCHEDULE_TIMEOUT; 5639 thread->timeout = MAX_SCHEDULE_TIMEOUT;
5634 thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev)); 5640 thread->tsk = kthread_run(md_thread, thread,
5641 "%s_%s",
5642 mdname(thread->mddev),
5643 name ?: mddev->pers->name);
5635 if (IS_ERR(thread->tsk)) { 5644 if (IS_ERR(thread->tsk)) {
5636 kfree(thread); 5645 kfree(thread);
5637 return NULL; 5646 return NULL;
@@ -6745,7 +6754,7 @@ void md_check_recovery(mddev_t *mddev)
6745 } 6754 }
6746 mddev->sync_thread = md_register_thread(md_do_sync, 6755 mddev->sync_thread = md_register_thread(md_do_sync,
6747 mddev, 6756 mddev,
6748 "%s_resync"); 6757 "resync");
6749 if (!mddev->sync_thread) { 6758 if (!mddev->sync_thread) {
6750 printk(KERN_ERR "%s: could not start resync" 6759 printk(KERN_ERR "%s: could not start resync"
6751 " thread...\n", 6760 " thread...\n",
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f55d2ff95133..f184b69ef337 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -430,6 +430,7 @@ extern void md_write_end(mddev_t *mddev);
430extern void md_done_sync(mddev_t *mddev, int blocks, int ok); 430extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
431extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); 431extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
432 432
433extern int mddev_congested(mddev_t *mddev, int bits);
433extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, 434extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
434 sector_t sector, int size, struct page *page); 435 sector_t sector, int size, struct page *page);
435extern void md_super_wait(mddev_t *mddev); 436extern void md_super_wait(mddev_t *mddev);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index d2d3fd54cc68..ee7646f974a0 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -150,7 +150,6 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
150 } 150 }
151 151
152 mp_bh = mempool_alloc(conf->pool, GFP_NOIO); 152 mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
153 memset(mp_bh, 0, sizeof(*mp_bh));
154 153
155 mp_bh->master_bio = bio; 154 mp_bh->master_bio = bio;
156 mp_bh->mddev = mddev; 155 mp_bh->mddev = mddev;
@@ -199,6 +198,9 @@ static int multipath_congested(void *data, int bits)
199 multipath_conf_t *conf = mddev->private; 198 multipath_conf_t *conf = mddev->private;
200 int i, ret = 0; 199 int i, ret = 0;
201 200
201 if (mddev_congested(mddev, bits))
202 return 1;
203
202 rcu_read_lock(); 204 rcu_read_lock();
203 for (i = 0; i < mddev->raid_disks ; i++) { 205 for (i = 0; i < mddev->raid_disks ; i++) {
204 mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); 206 mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
@@ -504,7 +506,7 @@ static int multipath_run (mddev_t *mddev)
504 } 506 }
505 507
506 { 508 {
507 mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath"); 509 mddev->thread = md_register_thread(multipathd, mddev, NULL);
508 if (!mddev->thread) { 510 if (!mddev->thread) {
509 printk(KERN_ERR "multipath: couldn't allocate thread" 511 printk(KERN_ERR "multipath: couldn't allocate thread"
510 " for %s\n", mdname(mddev)); 512 " for %s\n", mdname(mddev));
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f845ed98fec9..d3a4ce06015a 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -44,6 +44,9 @@ static int raid0_congested(void *data, int bits)
44 mdk_rdev_t **devlist = conf->devlist; 44 mdk_rdev_t **devlist = conf->devlist;
45 int i, ret = 0; 45 int i, ret = 0;
46 46
47 if (mddev_congested(mddev, bits))
48 return 1;
49
47 for (i = 0; i < mddev->raid_disks && !ret ; i++) { 50 for (i = 0; i < mddev->raid_disks && !ret ; i++) {
48 struct request_queue *q = bdev_get_queue(devlist[i]->bdev); 51 struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
49 52
@@ -86,7 +89,7 @@ static void dump_zones(mddev_t *mddev)
86 89
87static int create_strip_zones(mddev_t *mddev) 90static int create_strip_zones(mddev_t *mddev)
88{ 91{
89 int i, c, j, err; 92 int i, c, err;
90 sector_t curr_zone_end, sectors; 93 sector_t curr_zone_end, sectors;
91 mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; 94 mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
92 struct strip_zone *zone; 95 struct strip_zone *zone;
@@ -198,6 +201,8 @@ static int create_strip_zones(mddev_t *mddev)
198 /* now do the other zones */ 201 /* now do the other zones */
199 for (i = 1; i < conf->nr_strip_zones; i++) 202 for (i = 1; i < conf->nr_strip_zones; i++)
200 { 203 {
204 int j;
205
201 zone = conf->strip_zone + i; 206 zone = conf->strip_zone + i;
202 dev = conf->devlist + i * mddev->raid_disks; 207 dev = conf->devlist + i * mddev->raid_disks;
203 208
@@ -207,7 +212,6 @@ static int create_strip_zones(mddev_t *mddev)
207 c = 0; 212 c = 0;
208 213
209 for (j=0; j<cnt; j++) { 214 for (j=0; j<cnt; j++) {
210 char b[BDEVNAME_SIZE];
211 rdev = conf->devlist[j]; 215 rdev = conf->devlist[j];
212 printk(KERN_INFO "raid0: checking %s ...", 216 printk(KERN_INFO "raid0: checking %s ...",
213 bdevname(rdev->bdev, b)); 217 bdevname(rdev->bdev, b));
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ff7ed3335995..d1b9bd5fd4f6 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -576,6 +576,9 @@ static int raid1_congested(void *data, int bits)
576 conf_t *conf = mddev->private; 576 conf_t *conf = mddev->private;
577 int i, ret = 0; 577 int i, ret = 0;
578 578
579 if (mddev_congested(mddev, bits))
580 return 1;
581
579 rcu_read_lock(); 582 rcu_read_lock();
580 for (i = 0; i < mddev->raid_disks; i++) { 583 for (i = 0; i < mddev->raid_disks; i++) {
581 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); 584 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -851,7 +854,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
851 read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; 854 read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
852 read_bio->bi_bdev = mirror->rdev->bdev; 855 read_bio->bi_bdev = mirror->rdev->bdev;
853 read_bio->bi_end_io = raid1_end_read_request; 856 read_bio->bi_end_io = raid1_end_read_request;
854 read_bio->bi_rw = READ | do_sync; 857 read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
855 read_bio->bi_private = r1_bio; 858 read_bio->bi_private = r1_bio;
856 859
857 generic_make_request(read_bio); 860 generic_make_request(read_bio);
@@ -943,7 +946,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
943 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; 946 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
944 mbio->bi_bdev = conf->mirrors[i].rdev->bdev; 947 mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
945 mbio->bi_end_io = raid1_end_write_request; 948 mbio->bi_end_io = raid1_end_write_request;
946 mbio->bi_rw = WRITE | do_barriers | do_sync; 949 mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) |
950 (do_sync << BIO_RW_SYNCIO);
947 mbio->bi_private = r1_bio; 951 mbio->bi_private = r1_bio;
948 952
949 if (behind_pages) { 953 if (behind_pages) {
@@ -1623,7 +1627,8 @@ static void raid1d(mddev_t *mddev)
1623 conf->mirrors[i].rdev->data_offset; 1627 conf->mirrors[i].rdev->data_offset;
1624 bio->bi_bdev = conf->mirrors[i].rdev->bdev; 1628 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
1625 bio->bi_end_io = raid1_end_write_request; 1629 bio->bi_end_io = raid1_end_write_request;
1626 bio->bi_rw = WRITE | do_sync; 1630 bio->bi_rw = WRITE |
1631 (do_sync << BIO_RW_SYNCIO);
1627 bio->bi_private = r1_bio; 1632 bio->bi_private = r1_bio;
1628 r1_bio->bios[i] = bio; 1633 r1_bio->bios[i] = bio;
1629 generic_make_request(bio); 1634 generic_make_request(bio);
@@ -1672,7 +1677,7 @@ static void raid1d(mddev_t *mddev)
1672 bio->bi_sector = r1_bio->sector + rdev->data_offset; 1677 bio->bi_sector = r1_bio->sector + rdev->data_offset;
1673 bio->bi_bdev = rdev->bdev; 1678 bio->bi_bdev = rdev->bdev;
1674 bio->bi_end_io = raid1_end_read_request; 1679 bio->bi_end_io = raid1_end_read_request;
1675 bio->bi_rw = READ | do_sync; 1680 bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
1676 bio->bi_private = r1_bio; 1681 bio->bi_private = r1_bio;
1677 unplug = 1; 1682 unplug = 1;
1678 generic_make_request(bio); 1683 generic_make_request(bio);
@@ -2047,7 +2052,7 @@ static int run(mddev_t *mddev)
2047 conf->last_used = j; 2052 conf->last_used = j;
2048 2053
2049 2054
2050 mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); 2055 mddev->thread = md_register_thread(raid1d, mddev, NULL);
2051 if (!mddev->thread) { 2056 if (!mddev->thread) {
2052 printk(KERN_ERR 2057 printk(KERN_ERR
2053 "raid1: couldn't allocate thread for %s\n", 2058 "raid1: couldn't allocate thread for %s\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d0a2152e064f..51c4c5c4d87a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -631,6 +631,8 @@ static int raid10_congested(void *data, int bits)
631 conf_t *conf = mddev->private; 631 conf_t *conf = mddev->private;
632 int i, ret = 0; 632 int i, ret = 0;
633 633
634 if (mddev_congested(mddev, bits))
635 return 1;
634 rcu_read_lock(); 636 rcu_read_lock();
635 for (i = 0; i < mddev->raid_disks && ret == 0; i++) { 637 for (i = 0; i < mddev->raid_disks && ret == 0; i++) {
636 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); 638 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -882,7 +884,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
882 mirror->rdev->data_offset; 884 mirror->rdev->data_offset;
883 read_bio->bi_bdev = mirror->rdev->bdev; 885 read_bio->bi_bdev = mirror->rdev->bdev;
884 read_bio->bi_end_io = raid10_end_read_request; 886 read_bio->bi_end_io = raid10_end_read_request;
885 read_bio->bi_rw = READ | do_sync; 887 read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
886 read_bio->bi_private = r10_bio; 888 read_bio->bi_private = r10_bio;
887 889
888 generic_make_request(read_bio); 890 generic_make_request(read_bio);
@@ -950,7 +952,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
950 conf->mirrors[d].rdev->data_offset; 952 conf->mirrors[d].rdev->data_offset;
951 mbio->bi_bdev = conf->mirrors[d].rdev->bdev; 953 mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
952 mbio->bi_end_io = raid10_end_write_request; 954 mbio->bi_end_io = raid10_end_write_request;
953 mbio->bi_rw = WRITE | do_sync; 955 mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
954 mbio->bi_private = r10_bio; 956 mbio->bi_private = r10_bio;
955 957
956 atomic_inc(&r10_bio->remaining); 958 atomic_inc(&r10_bio->remaining);
@@ -1623,7 +1625,7 @@ static void raid10d(mddev_t *mddev)
1623 bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr 1625 bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
1624 + rdev->data_offset; 1626 + rdev->data_offset;
1625 bio->bi_bdev = rdev->bdev; 1627 bio->bi_bdev = rdev->bdev;
1626 bio->bi_rw = READ | do_sync; 1628 bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
1627 bio->bi_private = r10_bio; 1629 bio->bi_private = r10_bio;
1628 bio->bi_end_io = raid10_end_read_request; 1630 bio->bi_end_io = raid10_end_read_request;
1629 unplug = 1; 1631 unplug = 1;
@@ -1773,7 +1775,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1773 max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); 1775 max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
1774 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 1776 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
1775 /* recovery... the complicated one */ 1777 /* recovery... the complicated one */
1776 int i, j, k; 1778 int j, k;
1777 r10_bio = NULL; 1779 r10_bio = NULL;
1778 1780
1779 for (i=0 ; i<conf->raid_disks; i++) 1781 for (i=0 ; i<conf->raid_disks; i++)
@@ -2188,7 +2190,7 @@ static int run(mddev_t *mddev)
2188 } 2190 }
2189 2191
2190 2192
2191 mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10"); 2193 mddev->thread = md_register_thread(raid10d, mddev, NULL);
2192 if (!mddev->thread) { 2194 if (!mddev->thread) {
2193 printk(KERN_ERR 2195 printk(KERN_ERR
2194 "raid10: couldn't allocate thread for %s\n", 2196 "raid10: couldn't allocate thread for %s\n",
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 826eb3467357..94829804ab7f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -47,7 +47,9 @@
47#include <linux/kthread.h> 47#include <linux/kthread.h>
48#include <linux/raid/pq.h> 48#include <linux/raid/pq.h>
49#include <linux/async_tx.h> 49#include <linux/async_tx.h>
50#include <linux/async.h>
50#include <linux/seq_file.h> 51#include <linux/seq_file.h>
52#include <linux/cpu.h>
51#include "md.h" 53#include "md.h"
52#include "raid5.h" 54#include "raid5.h"
53#include "bitmap.h" 55#include "bitmap.h"
@@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
499 struct page *bio_page; 501 struct page *bio_page;
500 int i; 502 int i;
501 int page_offset; 503 int page_offset;
504 struct async_submit_ctl submit;
505 enum async_tx_flags flags = 0;
502 506
503 if (bio->bi_sector >= sector) 507 if (bio->bi_sector >= sector)
504 page_offset = (signed)(bio->bi_sector - sector) * 512; 508 page_offset = (signed)(bio->bi_sector - sector) * 512;
505 else 509 else
506 page_offset = (signed)(sector - bio->bi_sector) * -512; 510 page_offset = (signed)(sector - bio->bi_sector) * -512;
511
512 if (frombio)
513 flags |= ASYNC_TX_FENCE;
514 init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
515
507 bio_for_each_segment(bvl, bio, i) { 516 bio_for_each_segment(bvl, bio, i) {
508 int len = bio_iovec_idx(bio, i)->bv_len; 517 int len = bio_iovec_idx(bio, i)->bv_len;
509 int clen; 518 int clen;
@@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
525 bio_page = bio_iovec_idx(bio, i)->bv_page; 534 bio_page = bio_iovec_idx(bio, i)->bv_page;
526 if (frombio) 535 if (frombio)
527 tx = async_memcpy(page, bio_page, page_offset, 536 tx = async_memcpy(page, bio_page, page_offset,
528 b_offset, clen, 537 b_offset, clen, &submit);
529 ASYNC_TX_DEP_ACK,
530 tx, NULL, NULL);
531 else 538 else
532 tx = async_memcpy(bio_page, page, b_offset, 539 tx = async_memcpy(bio_page, page, b_offset,
533 page_offset, clen, 540 page_offset, clen, &submit);
534 ASYNC_TX_DEP_ACK,
535 tx, NULL, NULL);
536 } 541 }
542 /* chain the operations */
543 submit.depend_tx = tx;
544
537 if (clen < len) /* hit end of page */ 545 if (clen < len) /* hit end of page */
538 break; 546 break;
539 page_offset += len; 547 page_offset += len;
@@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh)
592{ 600{
593 struct dma_async_tx_descriptor *tx = NULL; 601 struct dma_async_tx_descriptor *tx = NULL;
594 raid5_conf_t *conf = sh->raid_conf; 602 raid5_conf_t *conf = sh->raid_conf;
603 struct async_submit_ctl submit;
595 int i; 604 int i;
596 605
597 pr_debug("%s: stripe %llu\n", __func__, 606 pr_debug("%s: stripe %llu\n", __func__,
@@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh)
615 } 624 }
616 625
617 atomic_inc(&sh->count); 626 atomic_inc(&sh->count);
618 async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 627 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
619 ops_complete_biofill, sh); 628 async_trigger_callback(&submit);
620} 629}
621 630
622static void ops_complete_compute5(void *stripe_head_ref) 631static void mark_target_uptodate(struct stripe_head *sh, int target)
623{ 632{
624 struct stripe_head *sh = stripe_head_ref; 633 struct r5dev *tgt;
625 int target = sh->ops.target;
626 struct r5dev *tgt = &sh->dev[target];
627 634
628 pr_debug("%s: stripe %llu\n", __func__, 635 if (target < 0)
629 (unsigned long long)sh->sector); 636 return;
630 637
638 tgt = &sh->dev[target];
631 set_bit(R5_UPTODATE, &tgt->flags); 639 set_bit(R5_UPTODATE, &tgt->flags);
632 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 640 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
633 clear_bit(R5_Wantcompute, &tgt->flags); 641 clear_bit(R5_Wantcompute, &tgt->flags);
642}
643
644static void ops_complete_compute(void *stripe_head_ref)
645{
646 struct stripe_head *sh = stripe_head_ref;
647
648 pr_debug("%s: stripe %llu\n", __func__,
649 (unsigned long long)sh->sector);
650
651 /* mark the computed target(s) as uptodate */
652 mark_target_uptodate(sh, sh->ops.target);
653 mark_target_uptodate(sh, sh->ops.target2);
654
634 clear_bit(STRIPE_COMPUTE_RUN, &sh->state); 655 clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
635 if (sh->check_state == check_state_compute_run) 656 if (sh->check_state == check_state_compute_run)
636 sh->check_state = check_state_compute_result; 657 sh->check_state = check_state_compute_result;
@@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref)
638 release_stripe(sh); 659 release_stripe(sh);
639} 660}
640 661
641static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) 662/* return a pointer to the address conversion region of the scribble buffer */
663static addr_conv_t *to_addr_conv(struct stripe_head *sh,
664 struct raid5_percpu *percpu)
665{
666 return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
667}
668
669static struct dma_async_tx_descriptor *
670ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
642{ 671{
643 /* kernel stack size limits the total number of disks */
644 int disks = sh->disks; 672 int disks = sh->disks;
645 struct page *xor_srcs[disks]; 673 struct page **xor_srcs = percpu->scribble;
646 int target = sh->ops.target; 674 int target = sh->ops.target;
647 struct r5dev *tgt = &sh->dev[target]; 675 struct r5dev *tgt = &sh->dev[target];
648 struct page *xor_dest = tgt->page; 676 struct page *xor_dest = tgt->page;
649 int count = 0; 677 int count = 0;
650 struct dma_async_tx_descriptor *tx; 678 struct dma_async_tx_descriptor *tx;
679 struct async_submit_ctl submit;
651 int i; 680 int i;
652 681
653 pr_debug("%s: stripe %llu block: %d\n", 682 pr_debug("%s: stripe %llu block: %d\n",
@@ -660,17 +689,215 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
660 689
661 atomic_inc(&sh->count); 690 atomic_inc(&sh->count);
662 691
692 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
693 ops_complete_compute, sh, to_addr_conv(sh, percpu));
663 if (unlikely(count == 1)) 694 if (unlikely(count == 1))
664 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 695 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
665 0, NULL, ops_complete_compute5, sh);
666 else 696 else
667 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 697 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
668 ASYNC_TX_XOR_ZERO_DST, NULL,
669 ops_complete_compute5, sh);
670 698
671 return tx; 699 return tx;
672} 700}
673 701
702/* set_syndrome_sources - populate source buffers for gen_syndrome
703 * @srcs - (struct page *) array of size sh->disks
704 * @sh - stripe_head to parse
705 *
706 * Populates srcs in proper layout order for the stripe and returns the
707 * 'count' of sources to be used in a call to async_gen_syndrome. The P
708 * destination buffer is recorded in srcs[count] and the Q destination
709 * is recorded in srcs[count+1]].
710 */
711static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
712{
713 int disks = sh->disks;
714 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
715 int d0_idx = raid6_d0(sh);
716 int count;
717 int i;
718
719 for (i = 0; i < disks; i++)
720 srcs[i] = (void *)raid6_empty_zero_page;
721
722 count = 0;
723 i = d0_idx;
724 do {
725 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
726
727 srcs[slot] = sh->dev[i].page;
728 i = raid6_next_disk(i, disks);
729 } while (i != d0_idx);
730 BUG_ON(count != syndrome_disks);
731
732 return count;
733}
734
735static struct dma_async_tx_descriptor *
736ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
737{
738 int disks = sh->disks;
739 struct page **blocks = percpu->scribble;
740 int target;
741 int qd_idx = sh->qd_idx;
742 struct dma_async_tx_descriptor *tx;
743 struct async_submit_ctl submit;
744 struct r5dev *tgt;
745 struct page *dest;
746 int i;
747 int count;
748
749 if (sh->ops.target < 0)
750 target = sh->ops.target2;
751 else if (sh->ops.target2 < 0)
752 target = sh->ops.target;
753 else
754 /* we should only have one valid target */
755 BUG();
756 BUG_ON(target < 0);
757 pr_debug("%s: stripe %llu block: %d\n",
758 __func__, (unsigned long long)sh->sector, target);
759
760 tgt = &sh->dev[target];
761 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
762 dest = tgt->page;
763
764 atomic_inc(&sh->count);
765
766 if (target == qd_idx) {
767 count = set_syndrome_sources(blocks, sh);
768 blocks[count] = NULL; /* regenerating p is not necessary */
769 BUG_ON(blocks[count+1] != dest); /* q should already be set */
770 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
771 ops_complete_compute, sh,
772 to_addr_conv(sh, percpu));
773 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
774 } else {
775 /* Compute any data- or p-drive using XOR */
776 count = 0;
777 for (i = disks; i-- ; ) {
778 if (i == target || i == qd_idx)
779 continue;
780 blocks[count++] = sh->dev[i].page;
781 }
782
783 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
784 NULL, ops_complete_compute, sh,
785 to_addr_conv(sh, percpu));
786 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
787 }
788
789 return tx;
790}
791
792static struct dma_async_tx_descriptor *
793ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
794{
795 int i, count, disks = sh->disks;
796 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
797 int d0_idx = raid6_d0(sh);
798 int faila = -1, failb = -1;
799 int target = sh->ops.target;
800 int target2 = sh->ops.target2;
801 struct r5dev *tgt = &sh->dev[target];
802 struct r5dev *tgt2 = &sh->dev[target2];
803 struct dma_async_tx_descriptor *tx;
804 struct page **blocks = percpu->scribble;
805 struct async_submit_ctl submit;
806
807 pr_debug("%s: stripe %llu block1: %d block2: %d\n",
808 __func__, (unsigned long long)sh->sector, target, target2);
809 BUG_ON(target < 0 || target2 < 0);
810 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
811 BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
812
813 /* we need to open-code set_syndrome_sources to handle the
814 * slot number conversion for 'faila' and 'failb'
815 */
816 for (i = 0; i < disks ; i++)
817 blocks[i] = (void *)raid6_empty_zero_page;
818 count = 0;
819 i = d0_idx;
820 do {
821 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
822
823 blocks[slot] = sh->dev[i].page;
824
825 if (i == target)
826 faila = slot;
827 if (i == target2)
828 failb = slot;
829 i = raid6_next_disk(i, disks);
830 } while (i != d0_idx);
831 BUG_ON(count != syndrome_disks);
832
833 BUG_ON(faila == failb);
834 if (failb < faila)
835 swap(faila, failb);
836 pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
837 __func__, (unsigned long long)sh->sector, faila, failb);
838
839 atomic_inc(&sh->count);
840
841 if (failb == syndrome_disks+1) {
842 /* Q disk is one of the missing disks */
843 if (faila == syndrome_disks) {
844 /* Missing P+Q, just recompute */
845 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
846 ops_complete_compute, sh,
847 to_addr_conv(sh, percpu));
848 return async_gen_syndrome(blocks, 0, count+2,
849 STRIPE_SIZE, &submit);
850 } else {
851 struct page *dest;
852 int data_target;
853 int qd_idx = sh->qd_idx;
854
855 /* Missing D+Q: recompute D from P, then recompute Q */
856 if (target == qd_idx)
857 data_target = target2;
858 else
859 data_target = target;
860
861 count = 0;
862 for (i = disks; i-- ; ) {
863 if (i == data_target || i == qd_idx)
864 continue;
865 blocks[count++] = sh->dev[i].page;
866 }
867 dest = sh->dev[data_target].page;
868 init_async_submit(&submit,
869 ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
870 NULL, NULL, NULL,
871 to_addr_conv(sh, percpu));
872 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
873 &submit);
874
875 count = set_syndrome_sources(blocks, sh);
876 init_async_submit(&submit, ASYNC_TX_FENCE, tx,
877 ops_complete_compute, sh,
878 to_addr_conv(sh, percpu));
879 return async_gen_syndrome(blocks, 0, count+2,
880 STRIPE_SIZE, &submit);
881 }
882 } else {
883 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
884 ops_complete_compute, sh,
885 to_addr_conv(sh, percpu));
886 if (failb == syndrome_disks) {
887 /* We're missing D+P. */
888 return async_raid6_datap_recov(syndrome_disks+2,
889 STRIPE_SIZE, faila,
890 blocks, &submit);
891 } else {
892 /* We're missing D+D. */
893 return async_raid6_2data_recov(syndrome_disks+2,
894 STRIPE_SIZE, faila, failb,
895 blocks, &submit);
896 }
897 }
898}
899
900
674static void ops_complete_prexor(void *stripe_head_ref) 901static void ops_complete_prexor(void *stripe_head_ref)
675{ 902{
676 struct stripe_head *sh = stripe_head_ref; 903 struct stripe_head *sh = stripe_head_ref;
@@ -680,12 +907,13 @@ static void ops_complete_prexor(void *stripe_head_ref)
680} 907}
681 908
682static struct dma_async_tx_descriptor * 909static struct dma_async_tx_descriptor *
683ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 910ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
911 struct dma_async_tx_descriptor *tx)
684{ 912{
685 /* kernel stack size limits the total number of disks */
686 int disks = sh->disks; 913 int disks = sh->disks;
687 struct page *xor_srcs[disks]; 914 struct page **xor_srcs = percpu->scribble;
688 int count = 0, pd_idx = sh->pd_idx, i; 915 int count = 0, pd_idx = sh->pd_idx, i;
916 struct async_submit_ctl submit;
689 917
690 /* existing parity data subtracted */ 918 /* existing parity data subtracted */
691 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 919 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -700,9 +928,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
700 xor_srcs[count++] = dev->page; 928 xor_srcs[count++] = dev->page;
701 } 929 }
702 930
703 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 931 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
704 ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, 932 ops_complete_prexor, sh, to_addr_conv(sh, percpu));
705 ops_complete_prexor, sh); 933 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
706 934
707 return tx; 935 return tx;
708} 936}
@@ -742,17 +970,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
742 return tx; 970 return tx;
743} 971}
744 972
745static void ops_complete_postxor(void *stripe_head_ref) 973static void ops_complete_reconstruct(void *stripe_head_ref)
746{ 974{
747 struct stripe_head *sh = stripe_head_ref; 975 struct stripe_head *sh = stripe_head_ref;
748 int disks = sh->disks, i, pd_idx = sh->pd_idx; 976 int disks = sh->disks;
977 int pd_idx = sh->pd_idx;
978 int qd_idx = sh->qd_idx;
979 int i;
749 980
750 pr_debug("%s: stripe %llu\n", __func__, 981 pr_debug("%s: stripe %llu\n", __func__,
751 (unsigned long long)sh->sector); 982 (unsigned long long)sh->sector);
752 983
753 for (i = disks; i--; ) { 984 for (i = disks; i--; ) {
754 struct r5dev *dev = &sh->dev[i]; 985 struct r5dev *dev = &sh->dev[i];
755 if (dev->written || i == pd_idx) 986
987 if (dev->written || i == pd_idx || i == qd_idx)
756 set_bit(R5_UPTODATE, &dev->flags); 988 set_bit(R5_UPTODATE, &dev->flags);
757 } 989 }
758 990
@@ -770,12 +1002,12 @@ static void ops_complete_postxor(void *stripe_head_ref)
770} 1002}
771 1003
772static void 1004static void
773ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 1005ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
1006 struct dma_async_tx_descriptor *tx)
774{ 1007{
775 /* kernel stack size limits the total number of disks */
776 int disks = sh->disks; 1008 int disks = sh->disks;
777 struct page *xor_srcs[disks]; 1009 struct page **xor_srcs = percpu->scribble;
778 1010 struct async_submit_ctl submit;
779 int count = 0, pd_idx = sh->pd_idx, i; 1011 int count = 0, pd_idx = sh->pd_idx, i;
780 struct page *xor_dest; 1012 struct page *xor_dest;
781 int prexor = 0; 1013 int prexor = 0;
@@ -809,18 +1041,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
809 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST 1041 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
810 * for the synchronous xor case 1042 * for the synchronous xor case
811 */ 1043 */
812 flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | 1044 flags = ASYNC_TX_ACK |
813 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); 1045 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
814 1046
815 atomic_inc(&sh->count); 1047 atomic_inc(&sh->count);
816 1048
817 if (unlikely(count == 1)) { 1049 init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
818 flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); 1050 to_addr_conv(sh, percpu));
819 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 1051 if (unlikely(count == 1))
820 flags, tx, ops_complete_postxor, sh); 1052 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
821 } else 1053 else
822 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 1054 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
823 flags, tx, ops_complete_postxor, sh); 1055}
1056
1057static void
1058ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
1059 struct dma_async_tx_descriptor *tx)
1060{
1061 struct async_submit_ctl submit;
1062 struct page **blocks = percpu->scribble;
1063 int count;
1064
1065 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
1066
1067 count = set_syndrome_sources(blocks, sh);
1068
1069 atomic_inc(&sh->count);
1070
1071 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
1072 sh, to_addr_conv(sh, percpu));
1073 async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
824} 1074}
825 1075
826static void ops_complete_check(void *stripe_head_ref) 1076static void ops_complete_check(void *stripe_head_ref)
@@ -835,63 +1085,115 @@ static void ops_complete_check(void *stripe_head_ref)
835 release_stripe(sh); 1085 release_stripe(sh);
836} 1086}
837 1087
838static void ops_run_check(struct stripe_head *sh) 1088static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
839{ 1089{
840 /* kernel stack size limits the total number of disks */
841 int disks = sh->disks; 1090 int disks = sh->disks;
842 struct page *xor_srcs[disks]; 1091 int pd_idx = sh->pd_idx;
1092 int qd_idx = sh->qd_idx;
1093 struct page *xor_dest;
1094 struct page **xor_srcs = percpu->scribble;
843 struct dma_async_tx_descriptor *tx; 1095 struct dma_async_tx_descriptor *tx;
844 1096 struct async_submit_ctl submit;
845 int count = 0, pd_idx = sh->pd_idx, i; 1097 int count;
846 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 1098 int i;
847 1099
848 pr_debug("%s: stripe %llu\n", __func__, 1100 pr_debug("%s: stripe %llu\n", __func__,
849 (unsigned long long)sh->sector); 1101 (unsigned long long)sh->sector);
850 1102
1103 count = 0;
1104 xor_dest = sh->dev[pd_idx].page;
1105 xor_srcs[count++] = xor_dest;
851 for (i = disks; i--; ) { 1106 for (i = disks; i--; ) {
852 struct r5dev *dev = &sh->dev[i]; 1107 if (i == pd_idx || i == qd_idx)
853 if (i != pd_idx) 1108 continue;
854 xor_srcs[count++] = dev->page; 1109 xor_srcs[count++] = sh->dev[i].page;
855 } 1110 }
856 1111
857 tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 1112 init_async_submit(&submit, 0, NULL, NULL, NULL,
858 &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); 1113 to_addr_conv(sh, percpu));
1114 tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
1115 &sh->ops.zero_sum_result, &submit);
1116
1117 atomic_inc(&sh->count);
1118 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
1119 tx = async_trigger_callback(&submit);
1120}
1121
1122static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
1123{
1124 struct page **srcs = percpu->scribble;
1125 struct async_submit_ctl submit;
1126 int count;
1127
1128 pr_debug("%s: stripe %llu checkp: %d\n", __func__,
1129 (unsigned long long)sh->sector, checkp);
1130
1131 count = set_syndrome_sources(srcs, sh);
1132 if (!checkp)
1133 srcs[count] = NULL;
859 1134
860 atomic_inc(&sh->count); 1135 atomic_inc(&sh->count);
861 tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 1136 init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
862 ops_complete_check, sh); 1137 sh, to_addr_conv(sh, percpu));
1138 async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
1139 &sh->ops.zero_sum_result, percpu->spare_page, &submit);
863} 1140}
864 1141
865static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) 1142static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
866{ 1143{
867 int overlap_clear = 0, i, disks = sh->disks; 1144 int overlap_clear = 0, i, disks = sh->disks;
868 struct dma_async_tx_descriptor *tx = NULL; 1145 struct dma_async_tx_descriptor *tx = NULL;
1146 raid5_conf_t *conf = sh->raid_conf;
1147 int level = conf->level;
1148 struct raid5_percpu *percpu;
1149 unsigned long cpu;
869 1150
1151 cpu = get_cpu();
1152 percpu = per_cpu_ptr(conf->percpu, cpu);
870 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { 1153 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
871 ops_run_biofill(sh); 1154 ops_run_biofill(sh);
872 overlap_clear++; 1155 overlap_clear++;
873 } 1156 }
874 1157
875 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { 1158 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
876 tx = ops_run_compute5(sh); 1159 if (level < 6)
877 /* terminate the chain if postxor is not set to be run */ 1160 tx = ops_run_compute5(sh, percpu);
878 if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) 1161 else {
1162 if (sh->ops.target2 < 0 || sh->ops.target < 0)
1163 tx = ops_run_compute6_1(sh, percpu);
1164 else
1165 tx = ops_run_compute6_2(sh, percpu);
1166 }
1167 /* terminate the chain if reconstruct is not set to be run */
1168 if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
879 async_tx_ack(tx); 1169 async_tx_ack(tx);
880 } 1170 }
881 1171
882 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) 1172 if (test_bit(STRIPE_OP_PREXOR, &ops_request))
883 tx = ops_run_prexor(sh, tx); 1173 tx = ops_run_prexor(sh, percpu, tx);
884 1174
885 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { 1175 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
886 tx = ops_run_biodrain(sh, tx); 1176 tx = ops_run_biodrain(sh, tx);
887 overlap_clear++; 1177 overlap_clear++;
888 } 1178 }
889 1179
890 if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) 1180 if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
891 ops_run_postxor(sh, tx); 1181 if (level < 6)
1182 ops_run_reconstruct5(sh, percpu, tx);
1183 else
1184 ops_run_reconstruct6(sh, percpu, tx);
1185 }
892 1186
893 if (test_bit(STRIPE_OP_CHECK, &ops_request)) 1187 if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
894 ops_run_check(sh); 1188 if (sh->check_state == check_state_run)
1189 ops_run_check_p(sh, percpu);
1190 else if (sh->check_state == check_state_run_q)
1191 ops_run_check_pq(sh, percpu, 0);
1192 else if (sh->check_state == check_state_run_pq)
1193 ops_run_check_pq(sh, percpu, 1);
1194 else
1195 BUG();
1196 }
895 1197
896 if (overlap_clear) 1198 if (overlap_clear)
897 for (i = disks; i--; ) { 1199 for (i = disks; i--; ) {
@@ -899,6 +1201,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
899 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 1201 if (test_and_clear_bit(R5_Overlap, &dev->flags))
900 wake_up(&sh->raid_conf->wait_for_overlap); 1202 wake_up(&sh->raid_conf->wait_for_overlap);
901 } 1203 }
1204 put_cpu();
902} 1205}
903 1206
904static int grow_one_stripe(raid5_conf_t *conf) 1207static int grow_one_stripe(raid5_conf_t *conf)
@@ -948,6 +1251,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
948 return 0; 1251 return 0;
949} 1252}
950 1253
1254/**
1255 * scribble_len - return the required size of the scribble region
1256 * @num - total number of disks in the array
1257 *
1258 * The size must be enough to contain:
1259 * 1/ a struct page pointer for each device in the array +2
1260 * 2/ room to convert each entry in (1) to its corresponding dma
1261 * (dma_map_page()) or page (page_address()) address.
1262 *
1263 * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
1264 * calculate over all devices (not just the data blocks), using zeros in place
1265 * of the P and Q blocks.
1266 */
1267static size_t scribble_len(int num)
1268{
1269 size_t len;
1270
1271 len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
1272
1273 return len;
1274}
1275
951static int resize_stripes(raid5_conf_t *conf, int newsize) 1276static int resize_stripes(raid5_conf_t *conf, int newsize)
952{ 1277{
953 /* Make all the stripes able to hold 'newsize' devices. 1278 /* Make all the stripes able to hold 'newsize' devices.
@@ -976,6 +1301,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
976 struct stripe_head *osh, *nsh; 1301 struct stripe_head *osh, *nsh;
977 LIST_HEAD(newstripes); 1302 LIST_HEAD(newstripes);
978 struct disk_info *ndisks; 1303 struct disk_info *ndisks;
1304 unsigned long cpu;
979 int err; 1305 int err;
980 struct kmem_cache *sc; 1306 struct kmem_cache *sc;
981 int i; 1307 int i;
@@ -1041,7 +1367,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1041 /* Step 3. 1367 /* Step 3.
1042 * At this point, we are holding all the stripes so the array 1368 * At this point, we are holding all the stripes so the array
1043 * is completely stalled, so now is a good time to resize 1369 * is completely stalled, so now is a good time to resize
1044 * conf->disks. 1370 * conf->disks and the scribble region
1045 */ 1371 */
1046 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); 1372 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
1047 if (ndisks) { 1373 if (ndisks) {
@@ -1052,10 +1378,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1052 } else 1378 } else
1053 err = -ENOMEM; 1379 err = -ENOMEM;
1054 1380
1381 get_online_cpus();
1382 conf->scribble_len = scribble_len(newsize);
1383 for_each_present_cpu(cpu) {
1384 struct raid5_percpu *percpu;
1385 void *scribble;
1386
1387 percpu = per_cpu_ptr(conf->percpu, cpu);
1388 scribble = kmalloc(conf->scribble_len, GFP_NOIO);
1389
1390 if (scribble) {
1391 kfree(percpu->scribble);
1392 percpu->scribble = scribble;
1393 } else {
1394 err = -ENOMEM;
1395 break;
1396 }
1397 }
1398 put_online_cpus();
1399
1055 /* Step 4, return new stripes to service */ 1400 /* Step 4, return new stripes to service */
1056 while(!list_empty(&newstripes)) { 1401 while(!list_empty(&newstripes)) {
1057 nsh = list_entry(newstripes.next, struct stripe_head, lru); 1402 nsh = list_entry(newstripes.next, struct stripe_head, lru);
1058 list_del_init(&nsh->lru); 1403 list_del_init(&nsh->lru);
1404
1059 for (i=conf->raid_disks; i < newsize; i++) 1405 for (i=conf->raid_disks; i < newsize; i++)
1060 if (nsh->dev[i].page == NULL) { 1406 if (nsh->dev[i].page == NULL) {
1061 struct page *p = alloc_page(GFP_NOIO); 1407 struct page *p = alloc_page(GFP_NOIO);
@@ -1594,258 +1940,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
1594} 1940}
1595 1941
1596 1942
1597
1598/*
1599 * Copy data between a page in the stripe cache, and one or more bion
1600 * The page could align with the middle of the bio, or there could be
1601 * several bion, each with several bio_vecs, which cover part of the page
1602 * Multiple bion are linked together on bi_next. There may be extras
1603 * at the end of this list. We ignore them.
1604 */
1605static void copy_data(int frombio, struct bio *bio,
1606 struct page *page,
1607 sector_t sector)
1608{
1609 char *pa = page_address(page);
1610 struct bio_vec *bvl;
1611 int i;
1612 int page_offset;
1613
1614 if (bio->bi_sector >= sector)
1615 page_offset = (signed)(bio->bi_sector - sector) * 512;
1616 else
1617 page_offset = (signed)(sector - bio->bi_sector) * -512;
1618 bio_for_each_segment(bvl, bio, i) {
1619 int len = bio_iovec_idx(bio,i)->bv_len;
1620 int clen;
1621 int b_offset = 0;
1622
1623 if (page_offset < 0) {
1624 b_offset = -page_offset;
1625 page_offset += b_offset;
1626 len -= b_offset;
1627 }
1628
1629 if (len > 0 && page_offset + len > STRIPE_SIZE)
1630 clen = STRIPE_SIZE - page_offset;
1631 else clen = len;
1632
1633 if (clen > 0) {
1634 char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
1635 if (frombio)
1636 memcpy(pa+page_offset, ba+b_offset, clen);
1637 else
1638 memcpy(ba+b_offset, pa+page_offset, clen);
1639 __bio_kunmap_atomic(ba, KM_USER0);
1640 }
1641 if (clen < len) /* hit end of page */
1642 break;
1643 page_offset += len;
1644 }
1645}
1646
1647#define check_xor() do { \
1648 if (count == MAX_XOR_BLOCKS) { \
1649 xor_blocks(count, STRIPE_SIZE, dest, ptr);\
1650 count = 0; \
1651 } \
1652 } while(0)
1653
1654static void compute_parity6(struct stripe_head *sh, int method)
1655{
1656 raid5_conf_t *conf = sh->raid_conf;
1657 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
1658 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
1659 struct bio *chosen;
1660 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1661 void *ptrs[syndrome_disks+2];
1662
1663 pd_idx = sh->pd_idx;
1664 qd_idx = sh->qd_idx;
1665 d0_idx = raid6_d0(sh);
1666
1667 pr_debug("compute_parity, stripe %llu, method %d\n",
1668 (unsigned long long)sh->sector, method);
1669
1670 switch(method) {
1671 case READ_MODIFY_WRITE:
1672 BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
1673 case RECONSTRUCT_WRITE:
1674 for (i= disks; i-- ;)
1675 if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
1676 chosen = sh->dev[i].towrite;
1677 sh->dev[i].towrite = NULL;
1678
1679 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1680 wake_up(&conf->wait_for_overlap);
1681
1682 BUG_ON(sh->dev[i].written);
1683 sh->dev[i].written = chosen;
1684 }
1685 break;
1686 case CHECK_PARITY:
1687 BUG(); /* Not implemented yet */
1688 }
1689
1690 for (i = disks; i--;)
1691 if (sh->dev[i].written) {
1692 sector_t sector = sh->dev[i].sector;
1693 struct bio *wbi = sh->dev[i].written;
1694 while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
1695 copy_data(1, wbi, sh->dev[i].page, sector);
1696 wbi = r5_next_bio(wbi, sector);
1697 }
1698
1699 set_bit(R5_LOCKED, &sh->dev[i].flags);
1700 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1701 }
1702
1703 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
1704
1705 for (i = 0; i < disks; i++)
1706 ptrs[i] = (void *)raid6_empty_zero_page;
1707
1708 count = 0;
1709 i = d0_idx;
1710 do {
1711 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1712
1713 ptrs[slot] = page_address(sh->dev[i].page);
1714 if (slot < syndrome_disks &&
1715 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
1716 printk(KERN_ERR "block %d/%d not uptodate "
1717 "on parity calc\n", i, count);
1718 BUG();
1719 }
1720
1721 i = raid6_next_disk(i, disks);
1722 } while (i != d0_idx);
1723 BUG_ON(count != syndrome_disks);
1724
1725 raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
1726
1727 switch(method) {
1728 case RECONSTRUCT_WRITE:
1729 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1730 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1731 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1732 set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
1733 break;
1734 case UPDATE_PARITY:
1735 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1736 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1737 break;
1738 }
1739}
1740
1741
1742/* Compute one missing block */
1743static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1744{
1745 int i, count, disks = sh->disks;
1746 void *ptr[MAX_XOR_BLOCKS], *dest, *p;
1747 int qd_idx = sh->qd_idx;
1748
1749 pr_debug("compute_block_1, stripe %llu, idx %d\n",
1750 (unsigned long long)sh->sector, dd_idx);
1751
1752 if ( dd_idx == qd_idx ) {
1753 /* We're actually computing the Q drive */
1754 compute_parity6(sh, UPDATE_PARITY);
1755 } else {
1756 dest = page_address(sh->dev[dd_idx].page);
1757 if (!nozero) memset(dest, 0, STRIPE_SIZE);
1758 count = 0;
1759 for (i = disks ; i--; ) {
1760 if (i == dd_idx || i == qd_idx)
1761 continue;
1762 p = page_address(sh->dev[i].page);
1763 if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
1764 ptr[count++] = p;
1765 else
1766 printk("compute_block() %d, stripe %llu, %d"
1767 " not present\n", dd_idx,
1768 (unsigned long long)sh->sector, i);
1769
1770 check_xor();
1771 }
1772 if (count)
1773 xor_blocks(count, STRIPE_SIZE, dest, ptr);
1774 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1775 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1776 }
1777}
1778
1779/* Compute two missing blocks */
1780static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1781{
1782 int i, count, disks = sh->disks;
1783 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
1784 int d0_idx = raid6_d0(sh);
1785 int faila = -1, failb = -1;
1786 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1787 void *ptrs[syndrome_disks+2];
1788
1789 for (i = 0; i < disks ; i++)
1790 ptrs[i] = (void *)raid6_empty_zero_page;
1791 count = 0;
1792 i = d0_idx;
1793 do {
1794 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1795
1796 ptrs[slot] = page_address(sh->dev[i].page);
1797
1798 if (i == dd_idx1)
1799 faila = slot;
1800 if (i == dd_idx2)
1801 failb = slot;
1802 i = raid6_next_disk(i, disks);
1803 } while (i != d0_idx);
1804 BUG_ON(count != syndrome_disks);
1805
1806 BUG_ON(faila == failb);
1807 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
1808
1809 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
1810 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
1811 faila, failb);
1812
1813 if (failb == syndrome_disks+1) {
1814 /* Q disk is one of the missing disks */
1815 if (faila == syndrome_disks) {
1816 /* Missing P+Q, just recompute */
1817 compute_parity6(sh, UPDATE_PARITY);
1818 return;
1819 } else {
1820 /* We're missing D+Q; recompute D from P */
1821 compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
1822 dd_idx2 : dd_idx1),
1823 0);
1824 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
1825 return;
1826 }
1827 }
1828
1829 /* We're missing D+P or D+D; */
1830 if (failb == syndrome_disks) {
1831 /* We're missing D+P. */
1832 raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
1833 } else {
1834 /* We're missing D+D. */
1835 raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
1836 ptrs);
1837 }
1838
1839 /* Both the above update both missing blocks */
1840 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1841 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1842}
1843
1844static void 1943static void
1845schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, 1944schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
1846 int rcw, int expand) 1945 int rcw, int expand)
1847{ 1946{
1848 int i, pd_idx = sh->pd_idx, disks = sh->disks; 1947 int i, pd_idx = sh->pd_idx, disks = sh->disks;
1948 raid5_conf_t *conf = sh->raid_conf;
1949 int level = conf->level;
1849 1950
1850 if (rcw) { 1951 if (rcw) {
1851 /* if we are not expanding this is a proper write request, and 1952 /* if we are not expanding this is a proper write request, and
@@ -1858,7 +1959,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1858 } else 1959 } else
1859 sh->reconstruct_state = reconstruct_state_run; 1960 sh->reconstruct_state = reconstruct_state_run;
1860 1961
1861 set_bit(STRIPE_OP_POSTXOR, &s->ops_request); 1962 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
1862 1963
1863 for (i = disks; i--; ) { 1964 for (i = disks; i--; ) {
1864 struct r5dev *dev = &sh->dev[i]; 1965 struct r5dev *dev = &sh->dev[i];
@@ -1871,17 +1972,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1871 s->locked++; 1972 s->locked++;
1872 } 1973 }
1873 } 1974 }
1874 if (s->locked + 1 == disks) 1975 if (s->locked + conf->max_degraded == disks)
1875 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 1976 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
1876 atomic_inc(&sh->raid_conf->pending_full_writes); 1977 atomic_inc(&conf->pending_full_writes);
1877 } else { 1978 } else {
1979 BUG_ON(level == 6);
1878 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 1980 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
1879 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 1981 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
1880 1982
1881 sh->reconstruct_state = reconstruct_state_prexor_drain_run; 1983 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
1882 set_bit(STRIPE_OP_PREXOR, &s->ops_request); 1984 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
1883 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); 1985 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
1884 set_bit(STRIPE_OP_POSTXOR, &s->ops_request); 1986 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
1885 1987
1886 for (i = disks; i--; ) { 1988 for (i = disks; i--; ) {
1887 struct r5dev *dev = &sh->dev[i]; 1989 struct r5dev *dev = &sh->dev[i];
@@ -1899,13 +2001,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1899 } 2001 }
1900 } 2002 }
1901 2003
1902 /* keep the parity disk locked while asynchronous operations 2004 /* keep the parity disk(s) locked while asynchronous operations
1903 * are in flight 2005 * are in flight
1904 */ 2006 */
1905 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); 2007 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1906 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 2008 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1907 s->locked++; 2009 s->locked++;
1908 2010
2011 if (level == 6) {
2012 int qd_idx = sh->qd_idx;
2013 struct r5dev *dev = &sh->dev[qd_idx];
2014
2015 set_bit(R5_LOCKED, &dev->flags);
2016 clear_bit(R5_UPTODATE, &dev->flags);
2017 s->locked++;
2018 }
2019
1909 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", 2020 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
1910 __func__, (unsigned long long)sh->sector, 2021 __func__, (unsigned long long)sh->sector,
1911 s->locked, s->ops_request); 2022 s->locked, s->ops_request);
@@ -1986,13 +2097,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1986 2097
1987static void end_reshape(raid5_conf_t *conf); 2098static void end_reshape(raid5_conf_t *conf);
1988 2099
1989static int page_is_zero(struct page *p)
1990{
1991 char *a = page_address(p);
1992 return ((*(u32*)a) == 0 &&
1993 memcmp(a, a+4, STRIPE_SIZE-4)==0);
1994}
1995
1996static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, 2100static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
1997 struct stripe_head *sh) 2101 struct stripe_head *sh)
1998{ 2102{
@@ -2132,9 +2236,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
2132 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); 2236 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2133 set_bit(R5_Wantcompute, &dev->flags); 2237 set_bit(R5_Wantcompute, &dev->flags);
2134 sh->ops.target = disk_idx; 2238 sh->ops.target = disk_idx;
2239 sh->ops.target2 = -1;
2135 s->req_compute = 1; 2240 s->req_compute = 1;
2136 /* Careful: from this point on 'uptodate' is in the eye 2241 /* Careful: from this point on 'uptodate' is in the eye
2137 * of raid5_run_ops which services 'compute' operations 2242 * of raid_run_ops which services 'compute' operations
2138 * before writes. R5_Wantcompute flags a block that will 2243 * before writes. R5_Wantcompute flags a block that will
2139 * be R5_UPTODATE by the time it is needed for a 2244 * be R5_UPTODATE by the time it is needed for a
2140 * subsequent operation. 2245 * subsequent operation.
@@ -2173,61 +2278,104 @@ static void handle_stripe_fill5(struct stripe_head *sh,
2173 set_bit(STRIPE_HANDLE, &sh->state); 2278 set_bit(STRIPE_HANDLE, &sh->state);
2174} 2279}
2175 2280
2176static void handle_stripe_fill6(struct stripe_head *sh, 2281/* fetch_block6 - checks the given member device to see if its data needs
2177 struct stripe_head_state *s, struct r6_state *r6s, 2282 * to be read or computed to satisfy a request.
2178 int disks) 2283 *
2284 * Returns 1 when no more member devices need to be checked, otherwise returns
2285 * 0 to tell the loop in handle_stripe_fill6 to continue
2286 */
2287static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
2288 struct r6_state *r6s, int disk_idx, int disks)
2179{ 2289{
2180 int i; 2290 struct r5dev *dev = &sh->dev[disk_idx];
2181 for (i = disks; i--; ) { 2291 struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
2182 struct r5dev *dev = &sh->dev[i]; 2292 &sh->dev[r6s->failed_num[1]] };
2183 if (!test_bit(R5_LOCKED, &dev->flags) && 2293
2184 !test_bit(R5_UPTODATE, &dev->flags) && 2294 if (!test_bit(R5_LOCKED, &dev->flags) &&
2185 (dev->toread || (dev->towrite && 2295 !test_bit(R5_UPTODATE, &dev->flags) &&
2186 !test_bit(R5_OVERWRITE, &dev->flags)) || 2296 (dev->toread ||
2187 s->syncing || s->expanding || 2297 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
2188 (s->failed >= 1 && 2298 s->syncing || s->expanding ||
2189 (sh->dev[r6s->failed_num[0]].toread || 2299 (s->failed >= 1 &&
2190 s->to_write)) || 2300 (fdev[0]->toread || s->to_write)) ||
2191 (s->failed >= 2 && 2301 (s->failed >= 2 &&
2192 (sh->dev[r6s->failed_num[1]].toread || 2302 (fdev[1]->toread || s->to_write)))) {
2193 s->to_write)))) { 2303 /* we would like to get this block, possibly by computing it,
2194 /* we would like to get this block, possibly 2304 * otherwise read it if the backing disk is insync
2195 * by computing it, but we might not be able to 2305 */
2306 BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
2307 BUG_ON(test_bit(R5_Wantread, &dev->flags));
2308 if ((s->uptodate == disks - 1) &&
2309 (s->failed && (disk_idx == r6s->failed_num[0] ||
2310 disk_idx == r6s->failed_num[1]))) {
2311 /* have disk failed, and we're requested to fetch it;
2312 * do compute it
2196 */ 2313 */
2197 if ((s->uptodate == disks - 1) && 2314 pr_debug("Computing stripe %llu block %d\n",
2198 (s->failed && (i == r6s->failed_num[0] || 2315 (unsigned long long)sh->sector, disk_idx);
2199 i == r6s->failed_num[1]))) { 2316 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2200 pr_debug("Computing stripe %llu block %d\n", 2317 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2201 (unsigned long long)sh->sector, i); 2318 set_bit(R5_Wantcompute, &dev->flags);
2202 compute_block_1(sh, i, 0); 2319 sh->ops.target = disk_idx;
2203 s->uptodate++; 2320 sh->ops.target2 = -1; /* no 2nd target */
2204 } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { 2321 s->req_compute = 1;
2205 /* Computing 2-failure is *very* expensive; only 2322 s->uptodate++;
2206 * do it if failed >= 2 2323 return 1;
2207 */ 2324 } else if (s->uptodate == disks-2 && s->failed >= 2) {
2208 int other; 2325 /* Computing 2-failure is *very* expensive; only
2209 for (other = disks; other--; ) { 2326 * do it if failed >= 2
2210 if (other == i) 2327 */
2211 continue; 2328 int other;
2212 if (!test_bit(R5_UPTODATE, 2329 for (other = disks; other--; ) {
2213 &sh->dev[other].flags)) 2330 if (other == disk_idx)
2214 break; 2331 continue;
2215 } 2332 if (!test_bit(R5_UPTODATE,
2216 BUG_ON(other < 0); 2333 &sh->dev[other].flags))
2217 pr_debug("Computing stripe %llu blocks %d,%d\n", 2334 break;
2218 (unsigned long long)sh->sector,
2219 i, other);
2220 compute_block_2(sh, i, other);
2221 s->uptodate += 2;
2222 } else if (test_bit(R5_Insync, &dev->flags)) {
2223 set_bit(R5_LOCKED, &dev->flags);
2224 set_bit(R5_Wantread, &dev->flags);
2225 s->locked++;
2226 pr_debug("Reading block %d (sync=%d)\n",
2227 i, s->syncing);
2228 } 2335 }
2336 BUG_ON(other < 0);
2337 pr_debug("Computing stripe %llu blocks %d,%d\n",
2338 (unsigned long long)sh->sector,
2339 disk_idx, other);
2340 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2341 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2342 set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
2343 set_bit(R5_Wantcompute, &sh->dev[other].flags);
2344 sh->ops.target = disk_idx;
2345 sh->ops.target2 = other;
2346 s->uptodate += 2;
2347 s->req_compute = 1;
2348 return 1;
2349 } else if (test_bit(R5_Insync, &dev->flags)) {
2350 set_bit(R5_LOCKED, &dev->flags);
2351 set_bit(R5_Wantread, &dev->flags);
2352 s->locked++;
2353 pr_debug("Reading block %d (sync=%d)\n",
2354 disk_idx, s->syncing);
2229 } 2355 }
2230 } 2356 }
2357
2358 return 0;
2359}
2360
2361/**
2362 * handle_stripe_fill6 - read or compute data to satisfy pending requests.
2363 */
2364static void handle_stripe_fill6(struct stripe_head *sh,
2365 struct stripe_head_state *s, struct r6_state *r6s,
2366 int disks)
2367{
2368 int i;
2369
2370 /* look for blocks to read/compute, skip this if a compute
2371 * is already in flight, or if the stripe contents are in the
2372 * midst of changing due to a write
2373 */
2374 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
2375 !sh->reconstruct_state)
2376 for (i = disks; i--; )
2377 if (fetch_block6(sh, s, r6s, i, disks))
2378 break;
2231 set_bit(STRIPE_HANDLE, &sh->state); 2379 set_bit(STRIPE_HANDLE, &sh->state);
2232} 2380}
2233 2381
@@ -2361,114 +2509,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
2361 */ 2509 */
2362 /* since handle_stripe can be called at any time we need to handle the 2510 /* since handle_stripe can be called at any time we need to handle the
2363 * case where a compute block operation has been submitted and then a 2511 * case where a compute block operation has been submitted and then a
2364 * subsequent call wants to start a write request. raid5_run_ops only 2512 * subsequent call wants to start a write request. raid_run_ops only
2365 * handles the case where compute block and postxor are requested 2513 * handles the case where compute block and reconstruct are requested
2366 * simultaneously. If this is not the case then new writes need to be 2514 * simultaneously. If this is not the case then new writes need to be
2367 * held off until the compute completes. 2515 * held off until the compute completes.
2368 */ 2516 */
2369 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && 2517 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2370 (s->locked == 0 && (rcw == 0 || rmw == 0) && 2518 (s->locked == 0 && (rcw == 0 || rmw == 0) &&
2371 !test_bit(STRIPE_BIT_DELAY, &sh->state))) 2519 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
2372 schedule_reconstruction5(sh, s, rcw == 0, 0); 2520 schedule_reconstruction(sh, s, rcw == 0, 0);
2373} 2521}
2374 2522
2375static void handle_stripe_dirtying6(raid5_conf_t *conf, 2523static void handle_stripe_dirtying6(raid5_conf_t *conf,
2376 struct stripe_head *sh, struct stripe_head_state *s, 2524 struct stripe_head *sh, struct stripe_head_state *s,
2377 struct r6_state *r6s, int disks) 2525 struct r6_state *r6s, int disks)
2378{ 2526{
2379 int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; 2527 int rcw = 0, pd_idx = sh->pd_idx, i;
2380 int qd_idx = sh->qd_idx; 2528 int qd_idx = sh->qd_idx;
2529
2530 set_bit(STRIPE_HANDLE, &sh->state);
2381 for (i = disks; i--; ) { 2531 for (i = disks; i--; ) {
2382 struct r5dev *dev = &sh->dev[i]; 2532 struct r5dev *dev = &sh->dev[i];
2383 /* Would I have to read this buffer for reconstruct_write */ 2533 /* check if we haven't enough data */
2384 if (!test_bit(R5_OVERWRITE, &dev->flags) 2534 if (!test_bit(R5_OVERWRITE, &dev->flags) &&
2385 && i != pd_idx && i != qd_idx 2535 i != pd_idx && i != qd_idx &&
2386 && (!test_bit(R5_LOCKED, &dev->flags) 2536 !test_bit(R5_LOCKED, &dev->flags) &&
2387 ) && 2537 !(test_bit(R5_UPTODATE, &dev->flags) ||
2388 !test_bit(R5_UPTODATE, &dev->flags)) { 2538 test_bit(R5_Wantcompute, &dev->flags))) {
2389 if (test_bit(R5_Insync, &dev->flags)) rcw++; 2539 rcw++;
2390 else { 2540 if (!test_bit(R5_Insync, &dev->flags))
2391 pr_debug("raid6: must_compute: " 2541 continue; /* it's a failed drive */
2392 "disk %d flags=%#lx\n", i, dev->flags); 2542
2393 must_compute++; 2543 if (
2544 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2545 pr_debug("Read_old stripe %llu "
2546 "block %d for Reconstruct\n",
2547 (unsigned long long)sh->sector, i);
2548 set_bit(R5_LOCKED, &dev->flags);
2549 set_bit(R5_Wantread, &dev->flags);
2550 s->locked++;
2551 } else {
2552 pr_debug("Request delayed stripe %llu "
2553 "block %d for Reconstruct\n",
2554 (unsigned long long)sh->sector, i);
2555 set_bit(STRIPE_DELAYED, &sh->state);
2556 set_bit(STRIPE_HANDLE, &sh->state);
2394 } 2557 }
2395 } 2558 }
2396 } 2559 }
2397 pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
2398 (unsigned long long)sh->sector, rcw, must_compute);
2399 set_bit(STRIPE_HANDLE, &sh->state);
2400
2401 if (rcw > 0)
2402 /* want reconstruct write, but need to get some data */
2403 for (i = disks; i--; ) {
2404 struct r5dev *dev = &sh->dev[i];
2405 if (!test_bit(R5_OVERWRITE, &dev->flags)
2406 && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
2407 && !test_bit(R5_LOCKED, &dev->flags) &&
2408 !test_bit(R5_UPTODATE, &dev->flags) &&
2409 test_bit(R5_Insync, &dev->flags)) {
2410 if (
2411 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2412 pr_debug("Read_old stripe %llu "
2413 "block %d for Reconstruct\n",
2414 (unsigned long long)sh->sector, i);
2415 set_bit(R5_LOCKED, &dev->flags);
2416 set_bit(R5_Wantread, &dev->flags);
2417 s->locked++;
2418 } else {
2419 pr_debug("Request delayed stripe %llu "
2420 "block %d for Reconstruct\n",
2421 (unsigned long long)sh->sector, i);
2422 set_bit(STRIPE_DELAYED, &sh->state);
2423 set_bit(STRIPE_HANDLE, &sh->state);
2424 }
2425 }
2426 }
2427 /* now if nothing is locked, and if we have enough data, we can start a 2560 /* now if nothing is locked, and if we have enough data, we can start a
2428 * write request 2561 * write request
2429 */ 2562 */
2430 if (s->locked == 0 && rcw == 0 && 2563 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2564 s->locked == 0 && rcw == 0 &&
2431 !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 2565 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2432 if (must_compute > 0) { 2566 schedule_reconstruction(sh, s, 1, 0);
2433 /* We have failed blocks and need to compute them */
2434 switch (s->failed) {
2435 case 0:
2436 BUG();
2437 case 1:
2438 compute_block_1(sh, r6s->failed_num[0], 0);
2439 break;
2440 case 2:
2441 compute_block_2(sh, r6s->failed_num[0],
2442 r6s->failed_num[1]);
2443 break;
2444 default: /* This request should have been failed? */
2445 BUG();
2446 }
2447 }
2448
2449 pr_debug("Computing parity for stripe %llu\n",
2450 (unsigned long long)sh->sector);
2451 compute_parity6(sh, RECONSTRUCT_WRITE);
2452 /* now every locked buffer is ready to be written */
2453 for (i = disks; i--; )
2454 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2455 pr_debug("Writing stripe %llu block %d\n",
2456 (unsigned long long)sh->sector, i);
2457 s->locked++;
2458 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2459 }
2460 if (s->locked == disks)
2461 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2462 atomic_inc(&conf->pending_full_writes);
2463 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2464 set_bit(STRIPE_INSYNC, &sh->state);
2465
2466 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2467 atomic_dec(&conf->preread_active_stripes);
2468 if (atomic_read(&conf->preread_active_stripes) <
2469 IO_THRESHOLD)
2470 md_wakeup_thread(conf->mddev->thread);
2471 }
2472 } 2567 }
2473} 2568}
2474 2569
@@ -2527,7 +2622,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2527 * we are done. Otherwise update the mismatch count and repair 2622 * we are done. Otherwise update the mismatch count and repair
2528 * parity if !MD_RECOVERY_CHECK 2623 * parity if !MD_RECOVERY_CHECK
2529 */ 2624 */
2530 if (sh->ops.zero_sum_result == 0) 2625 if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
2531 /* parity is correct (on disc, 2626 /* parity is correct (on disc,
2532 * not in buffer any more) 2627 * not in buffer any more)
2533 */ 2628 */
@@ -2544,6 +2639,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2544 set_bit(R5_Wantcompute, 2639 set_bit(R5_Wantcompute,
2545 &sh->dev[sh->pd_idx].flags); 2640 &sh->dev[sh->pd_idx].flags);
2546 sh->ops.target = sh->pd_idx; 2641 sh->ops.target = sh->pd_idx;
2642 sh->ops.target2 = -1;
2547 s->uptodate++; 2643 s->uptodate++;
2548 } 2644 }
2549 } 2645 }
@@ -2560,67 +2656,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2560 2656
2561 2657
2562static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, 2658static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2563 struct stripe_head_state *s, 2659 struct stripe_head_state *s,
2564 struct r6_state *r6s, struct page *tmp_page, 2660 struct r6_state *r6s, int disks)
2565 int disks)
2566{ 2661{
2567 int update_p = 0, update_q = 0;
2568 struct r5dev *dev;
2569 int pd_idx = sh->pd_idx; 2662 int pd_idx = sh->pd_idx;
2570 int qd_idx = sh->qd_idx; 2663 int qd_idx = sh->qd_idx;
2664 struct r5dev *dev;
2571 2665
2572 set_bit(STRIPE_HANDLE, &sh->state); 2666 set_bit(STRIPE_HANDLE, &sh->state);
2573 2667
2574 BUG_ON(s->failed > 2); 2668 BUG_ON(s->failed > 2);
2575 BUG_ON(s->uptodate < disks); 2669
2576 /* Want to check and possibly repair P and Q. 2670 /* Want to check and possibly repair P and Q.
2577 * However there could be one 'failed' device, in which 2671 * However there could be one 'failed' device, in which
2578 * case we can only check one of them, possibly using the 2672 * case we can only check one of them, possibly using the
2579 * other to generate missing data 2673 * other to generate missing data
2580 */ 2674 */
2581 2675
2582 /* If !tmp_page, we cannot do the calculations, 2676 switch (sh->check_state) {
2583 * but as we have set STRIPE_HANDLE, we will soon be called 2677 case check_state_idle:
2584 * by stripe_handle with a tmp_page - just wait until then. 2678 /* start a new check operation if there are < 2 failures */
2585 */
2586 if (tmp_page) {
2587 if (s->failed == r6s->q_failed) { 2679 if (s->failed == r6s->q_failed) {
2588 /* The only possible failed device holds 'Q', so it 2680 /* The only possible failed device holds Q, so it
2589 * makes sense to check P (If anything else were failed, 2681 * makes sense to check P (If anything else were failed,
2590 * we would have used P to recreate it). 2682 * we would have used P to recreate it).
2591 */ 2683 */
2592 compute_block_1(sh, pd_idx, 1); 2684 sh->check_state = check_state_run;
2593 if (!page_is_zero(sh->dev[pd_idx].page)) {
2594 compute_block_1(sh, pd_idx, 0);
2595 update_p = 1;
2596 }
2597 } 2685 }
2598 if (!r6s->q_failed && s->failed < 2) { 2686 if (!r6s->q_failed && s->failed < 2) {
2599 /* q is not failed, and we didn't use it to generate 2687 /* Q is not failed, and we didn't use it to generate
2600 * anything, so it makes sense to check it 2688 * anything, so it makes sense to check it
2601 */ 2689 */
2602 memcpy(page_address(tmp_page), 2690 if (sh->check_state == check_state_run)
2603 page_address(sh->dev[qd_idx].page), 2691 sh->check_state = check_state_run_pq;
2604 STRIPE_SIZE); 2692 else
2605 compute_parity6(sh, UPDATE_PARITY); 2693 sh->check_state = check_state_run_q;
2606 if (memcmp(page_address(tmp_page),
2607 page_address(sh->dev[qd_idx].page),
2608 STRIPE_SIZE) != 0) {
2609 clear_bit(STRIPE_INSYNC, &sh->state);
2610 update_q = 1;
2611 }
2612 } 2694 }
2613 if (update_p || update_q) { 2695
2614 conf->mddev->resync_mismatches += STRIPE_SECTORS; 2696 /* discard potentially stale zero_sum_result */
2615 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) 2697 sh->ops.zero_sum_result = 0;
2616 /* don't try to repair!! */ 2698
2617 update_p = update_q = 0; 2699 if (sh->check_state == check_state_run) {
2700 /* async_xor_zero_sum destroys the contents of P */
2701 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
2702 s->uptodate--;
2703 }
2704 if (sh->check_state >= check_state_run &&
2705 sh->check_state <= check_state_run_pq) {
2706 /* async_syndrome_zero_sum preserves P and Q, so
2707 * no need to mark them !uptodate here
2708 */
2709 set_bit(STRIPE_OP_CHECK, &s->ops_request);
2710 break;
2618 } 2711 }
2619 2712
2713 /* we have 2-disk failure */
2714 BUG_ON(s->failed != 2);
2715 /* fall through */
2716 case check_state_compute_result:
2717 sh->check_state = check_state_idle;
2718
2719 /* check that a write has not made the stripe insync */
2720 if (test_bit(STRIPE_INSYNC, &sh->state))
2721 break;
2722
2620 /* now write out any block on a failed drive, 2723 /* now write out any block on a failed drive,
2621 * or P or Q if they need it 2724 * or P or Q if they were recomputed
2622 */ 2725 */
2623 2726 BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
2624 if (s->failed == 2) { 2727 if (s->failed == 2) {
2625 dev = &sh->dev[r6s->failed_num[1]]; 2728 dev = &sh->dev[r6s->failed_num[1]];
2626 s->locked++; 2729 s->locked++;
@@ -2633,14 +2736,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2633 set_bit(R5_LOCKED, &dev->flags); 2736 set_bit(R5_LOCKED, &dev->flags);
2634 set_bit(R5_Wantwrite, &dev->flags); 2737 set_bit(R5_Wantwrite, &dev->flags);
2635 } 2738 }
2636 2739 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2637 if (update_p) {
2638 dev = &sh->dev[pd_idx]; 2740 dev = &sh->dev[pd_idx];
2639 s->locked++; 2741 s->locked++;
2640 set_bit(R5_LOCKED, &dev->flags); 2742 set_bit(R5_LOCKED, &dev->flags);
2641 set_bit(R5_Wantwrite, &dev->flags); 2743 set_bit(R5_Wantwrite, &dev->flags);
2642 } 2744 }
2643 if (update_q) { 2745 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2644 dev = &sh->dev[qd_idx]; 2746 dev = &sh->dev[qd_idx];
2645 s->locked++; 2747 s->locked++;
2646 set_bit(R5_LOCKED, &dev->flags); 2748 set_bit(R5_LOCKED, &dev->flags);
@@ -2649,6 +2751,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2649 clear_bit(STRIPE_DEGRADED, &sh->state); 2751 clear_bit(STRIPE_DEGRADED, &sh->state);
2650 2752
2651 set_bit(STRIPE_INSYNC, &sh->state); 2753 set_bit(STRIPE_INSYNC, &sh->state);
2754 break;
2755 case check_state_run:
2756 case check_state_run_q:
2757 case check_state_run_pq:
2758 break; /* we will be called again upon completion */
2759 case check_state_check_result:
2760 sh->check_state = check_state_idle;
2761
2762 /* handle a successful check operation, if parity is correct
2763 * we are done. Otherwise update the mismatch count and repair
2764 * parity if !MD_RECOVERY_CHECK
2765 */
2766 if (sh->ops.zero_sum_result == 0) {
2767 /* both parities are correct */
2768 if (!s->failed)
2769 set_bit(STRIPE_INSYNC, &sh->state);
2770 else {
2771 /* in contrast to the raid5 case we can validate
2772 * parity, but still have a failure to write
2773 * back
2774 */
2775 sh->check_state = check_state_compute_result;
2776 /* Returning at this point means that we may go
2777 * off and bring p and/or q uptodate again so
2778 * we make sure to check zero_sum_result again
2779 * to verify if p or q need writeback
2780 */
2781 }
2782 } else {
2783 conf->mddev->resync_mismatches += STRIPE_SECTORS;
2784 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2785 /* don't try to repair!! */
2786 set_bit(STRIPE_INSYNC, &sh->state);
2787 else {
2788 int *target = &sh->ops.target;
2789
2790 sh->ops.target = -1;
2791 sh->ops.target2 = -1;
2792 sh->check_state = check_state_compute_run;
2793 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2794 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2795 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2796 set_bit(R5_Wantcompute,
2797 &sh->dev[pd_idx].flags);
2798 *target = pd_idx;
2799 target = &sh->ops.target2;
2800 s->uptodate++;
2801 }
2802 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2803 set_bit(R5_Wantcompute,
2804 &sh->dev[qd_idx].flags);
2805 *target = qd_idx;
2806 s->uptodate++;
2807 }
2808 }
2809 }
2810 break;
2811 case check_state_compute_run:
2812 break;
2813 default:
2814 printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
2815 __func__, sh->check_state,
2816 (unsigned long long) sh->sector);
2817 BUG();
2652 } 2818 }
2653} 2819}
2654 2820
@@ -2666,6 +2832,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2666 if (i != sh->pd_idx && i != sh->qd_idx) { 2832 if (i != sh->pd_idx && i != sh->qd_idx) {
2667 int dd_idx, j; 2833 int dd_idx, j;
2668 struct stripe_head *sh2; 2834 struct stripe_head *sh2;
2835 struct async_submit_ctl submit;
2669 2836
2670 sector_t bn = compute_blocknr(sh, i, 1); 2837 sector_t bn = compute_blocknr(sh, i, 1);
2671 sector_t s = raid5_compute_sector(conf, bn, 0, 2838 sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2685,9 +2852,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2685 } 2852 }
2686 2853
2687 /* place all the copies on one channel */ 2854 /* place all the copies on one channel */
2855 init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
2688 tx = async_memcpy(sh2->dev[dd_idx].page, 2856 tx = async_memcpy(sh2->dev[dd_idx].page,
2689 sh->dev[i].page, 0, 0, STRIPE_SIZE, 2857 sh->dev[i].page, 0, 0, STRIPE_SIZE,
2690 ASYNC_TX_DEP_ACK, tx, NULL, NULL); 2858 &submit);
2691 2859
2692 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); 2860 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
2693 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 2861 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@ -2756,7 +2924,8 @@ static bool handle_stripe5(struct stripe_head *sh)
2756 rcu_read_lock(); 2924 rcu_read_lock();
2757 for (i=disks; i--; ) { 2925 for (i=disks; i--; ) {
2758 mdk_rdev_t *rdev; 2926 mdk_rdev_t *rdev;
2759 struct r5dev *dev = &sh->dev[i]; 2927
2928 dev = &sh->dev[i];
2760 clear_bit(R5_Insync, &dev->flags); 2929 clear_bit(R5_Insync, &dev->flags);
2761 2930
2762 pr_debug("check %d: state 0x%lx toread %p read %p write %p " 2931 pr_debug("check %d: state 0x%lx toread %p read %p write %p "
@@ -2973,7 +3142,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2973 /* Need to write out all blocks after computing parity */ 3142 /* Need to write out all blocks after computing parity */
2974 sh->disks = conf->raid_disks; 3143 sh->disks = conf->raid_disks;
2975 stripe_set_idx(sh->sector, conf, 0, sh); 3144 stripe_set_idx(sh->sector, conf, 0, sh);
2976 schedule_reconstruction5(sh, &s, 1, 1); 3145 schedule_reconstruction(sh, &s, 1, 1);
2977 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { 3146 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
2978 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3147 clear_bit(STRIPE_EXPAND_READY, &sh->state);
2979 atomic_dec(&conf->reshape_stripes); 3148 atomic_dec(&conf->reshape_stripes);
@@ -2993,7 +3162,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2993 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3162 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
2994 3163
2995 if (s.ops_request) 3164 if (s.ops_request)
2996 raid5_run_ops(sh, s.ops_request); 3165 raid_run_ops(sh, s.ops_request);
2997 3166
2998 ops_run_io(sh, &s); 3167 ops_run_io(sh, &s);
2999 3168
@@ -3002,7 +3171,7 @@ static bool handle_stripe5(struct stripe_head *sh)
3002 return blocked_rdev == NULL; 3171 return blocked_rdev == NULL;
3003} 3172}
3004 3173
3005static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) 3174static bool handle_stripe6(struct stripe_head *sh)
3006{ 3175{
3007 raid5_conf_t *conf = sh->raid_conf; 3176 raid5_conf_t *conf = sh->raid_conf;
3008 int disks = sh->disks; 3177 int disks = sh->disks;
@@ -3014,9 +3183,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3014 mdk_rdev_t *blocked_rdev = NULL; 3183 mdk_rdev_t *blocked_rdev = NULL;
3015 3184
3016 pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 3185 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
3017 "pd_idx=%d, qd_idx=%d\n", 3186 "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
3018 (unsigned long long)sh->sector, sh->state, 3187 (unsigned long long)sh->sector, sh->state,
3019 atomic_read(&sh->count), pd_idx, qd_idx); 3188 atomic_read(&sh->count), pd_idx, qd_idx,
3189 sh->check_state, sh->reconstruct_state);
3020 memset(&s, 0, sizeof(s)); 3190 memset(&s, 0, sizeof(s));
3021 3191
3022 spin_lock(&sh->lock); 3192 spin_lock(&sh->lock);
@@ -3036,35 +3206,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3036 3206
3037 pr_debug("check %d: state 0x%lx read %p write %p written %p\n", 3207 pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
3038 i, dev->flags, dev->toread, dev->towrite, dev->written); 3208 i, dev->flags, dev->toread, dev->towrite, dev->written);
3039 /* maybe we can reply to a read */ 3209 /* maybe we can reply to a read
3040 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 3210 *
3041 struct bio *rbi, *rbi2; 3211 * new wantfill requests are only permitted while
3042 pr_debug("Return read for disc %d\n", i); 3212 * ops_complete_biofill is guaranteed to be inactive
3043 spin_lock_irq(&conf->device_lock); 3213 */
3044 rbi = dev->toread; 3214 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
3045 dev->toread = NULL; 3215 !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
3046 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 3216 set_bit(R5_Wantfill, &dev->flags);
3047 wake_up(&conf->wait_for_overlap);
3048 spin_unlock_irq(&conf->device_lock);
3049 while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
3050 copy_data(0, rbi, dev->page, dev->sector);
3051 rbi2 = r5_next_bio(rbi, dev->sector);
3052 spin_lock_irq(&conf->device_lock);
3053 if (!raid5_dec_bi_phys_segments(rbi)) {
3054 rbi->bi_next = return_bi;
3055 return_bi = rbi;
3056 }
3057 spin_unlock_irq(&conf->device_lock);
3058 rbi = rbi2;
3059 }
3060 }
3061 3217
3062 /* now count some things */ 3218 /* now count some things */
3063 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 3219 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
3064 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 3220 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
3221 if (test_bit(R5_Wantcompute, &dev->flags)) {
3222 s.compute++;
3223 BUG_ON(s.compute > 2);
3224 }
3065 3225
3066 3226 if (test_bit(R5_Wantfill, &dev->flags)) {
3067 if (dev->toread) 3227 s.to_fill++;
3228 } else if (dev->toread)
3068 s.to_read++; 3229 s.to_read++;
3069 if (dev->towrite) { 3230 if (dev->towrite) {
3070 s.to_write++; 3231 s.to_write++;
@@ -3105,6 +3266,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3105 blocked_rdev = NULL; 3266 blocked_rdev = NULL;
3106 } 3267 }
3107 3268
3269 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
3270 set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
3271 set_bit(STRIPE_BIOFILL_RUN, &sh->state);
3272 }
3273
3108 pr_debug("locked=%d uptodate=%d to_read=%d" 3274 pr_debug("locked=%d uptodate=%d to_read=%d"
3109 " to_write=%d failed=%d failed_num=%d,%d\n", 3275 " to_write=%d failed=%d failed_num=%d,%d\n",
3110 s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 3276 s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3145,19 +3311,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3145 * or to load a block that is being partially written. 3311 * or to load a block that is being partially written.
3146 */ 3312 */
3147 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || 3313 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
3148 (s.syncing && (s.uptodate < disks)) || s.expanding) 3314 (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
3149 handle_stripe_fill6(sh, &s, &r6s, disks); 3315 handle_stripe_fill6(sh, &s, &r6s, disks);
3150 3316
3151 /* now to consider writing and what else, if anything should be read */ 3317 /* Now we check to see if any write operations have recently
3152 if (s.to_write) 3318 * completed
3319 */
3320 if (sh->reconstruct_state == reconstruct_state_drain_result) {
3321 int qd_idx = sh->qd_idx;
3322
3323 sh->reconstruct_state = reconstruct_state_idle;
3324 /* All the 'written' buffers and the parity blocks are ready to
3325 * be written back to disk
3326 */
3327 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
3328 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
3329 for (i = disks; i--; ) {
3330 dev = &sh->dev[i];
3331 if (test_bit(R5_LOCKED, &dev->flags) &&
3332 (i == sh->pd_idx || i == qd_idx ||
3333 dev->written)) {
3334 pr_debug("Writing block %d\n", i);
3335 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
3336 set_bit(R5_Wantwrite, &dev->flags);
3337 if (!test_bit(R5_Insync, &dev->flags) ||
3338 ((i == sh->pd_idx || i == qd_idx) &&
3339 s.failed == 0))
3340 set_bit(STRIPE_INSYNC, &sh->state);
3341 }
3342 }
3343 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
3344 atomic_dec(&conf->preread_active_stripes);
3345 if (atomic_read(&conf->preread_active_stripes) <
3346 IO_THRESHOLD)
3347 md_wakeup_thread(conf->mddev->thread);
3348 }
3349 }
3350
3351 /* Now to consider new write requests and what else, if anything
3352 * should be read. We do not handle new writes when:
3353 * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
3354 * 2/ A 'check' operation is in flight, as it may clobber the parity
3355 * block.
3356 */
3357 if (s.to_write && !sh->reconstruct_state && !sh->check_state)
3153 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); 3358 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
3154 3359
3155 /* maybe we need to check and possibly fix the parity for this stripe 3360 /* maybe we need to check and possibly fix the parity for this stripe
3156 * Any reads will already have been scheduled, so we just see if enough 3361 * Any reads will already have been scheduled, so we just see if enough
3157 * data is available 3362 * data is available. The parity check is held off while parity
3363 * dependent operations are in flight.
3158 */ 3364 */
3159 if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) 3365 if (sh->check_state ||
3160 handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); 3366 (s.syncing && s.locked == 0 &&
3367 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
3368 !test_bit(STRIPE_INSYNC, &sh->state)))
3369 handle_parity_checks6(conf, sh, &s, &r6s, disks);
3161 3370
3162 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 3371 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
3163 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 3372 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@ -3178,15 +3387,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3178 set_bit(R5_Wantwrite, &dev->flags); 3387 set_bit(R5_Wantwrite, &dev->flags);
3179 set_bit(R5_ReWrite, &dev->flags); 3388 set_bit(R5_ReWrite, &dev->flags);
3180 set_bit(R5_LOCKED, &dev->flags); 3389 set_bit(R5_LOCKED, &dev->flags);
3390 s.locked++;
3181 } else { 3391 } else {
3182 /* let's read it back */ 3392 /* let's read it back */
3183 set_bit(R5_Wantread, &dev->flags); 3393 set_bit(R5_Wantread, &dev->flags);
3184 set_bit(R5_LOCKED, &dev->flags); 3394 set_bit(R5_LOCKED, &dev->flags);
3395 s.locked++;
3185 } 3396 }
3186 } 3397 }
3187 } 3398 }
3188 3399
3189 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3400 /* Finish reconstruct operations initiated by the expansion process */
3401 if (sh->reconstruct_state == reconstruct_state_result) {
3402 sh->reconstruct_state = reconstruct_state_idle;
3403 clear_bit(STRIPE_EXPANDING, &sh->state);
3404 for (i = conf->raid_disks; i--; ) {
3405 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3406 set_bit(R5_LOCKED, &sh->dev[i].flags);
3407 s.locked++;
3408 }
3409 }
3410
3411 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
3412 !sh->reconstruct_state) {
3190 struct stripe_head *sh2 3413 struct stripe_head *sh2
3191 = get_active_stripe(conf, sh->sector, 1, 1, 1); 3414 = get_active_stripe(conf, sh->sector, 1, 1, 1);
3192 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 3415 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@ -3207,14 +3430,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3207 /* Need to write out all blocks after computing P&Q */ 3430 /* Need to write out all blocks after computing P&Q */
3208 sh->disks = conf->raid_disks; 3431 sh->disks = conf->raid_disks;
3209 stripe_set_idx(sh->sector, conf, 0, sh); 3432 stripe_set_idx(sh->sector, conf, 0, sh);
3210 compute_parity6(sh, RECONSTRUCT_WRITE); 3433 schedule_reconstruction(sh, &s, 1, 1);
3211 for (i = conf->raid_disks ; i-- ; ) { 3434 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
3212 set_bit(R5_LOCKED, &sh->dev[i].flags);
3213 s.locked++;
3214 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3215 }
3216 clear_bit(STRIPE_EXPANDING, &sh->state);
3217 } else if (s.expanded) {
3218 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3435 clear_bit(STRIPE_EXPAND_READY, &sh->state);
3219 atomic_dec(&conf->reshape_stripes); 3436 atomic_dec(&conf->reshape_stripes);
3220 wake_up(&conf->wait_for_overlap); 3437 wake_up(&conf->wait_for_overlap);
@@ -3232,6 +3449,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3232 if (unlikely(blocked_rdev)) 3449 if (unlikely(blocked_rdev))
3233 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3450 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
3234 3451
3452 if (s.ops_request)
3453 raid_run_ops(sh, s.ops_request);
3454
3235 ops_run_io(sh, &s); 3455 ops_run_io(sh, &s);
3236 3456
3237 return_io(return_bi); 3457 return_io(return_bi);
@@ -3240,16 +3460,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3240} 3460}
3241 3461
3242/* returns true if the stripe was handled */ 3462/* returns true if the stripe was handled */
3243static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) 3463static bool handle_stripe(struct stripe_head *sh)
3244{ 3464{
3245 if (sh->raid_conf->level == 6) 3465 if (sh->raid_conf->level == 6)
3246 return handle_stripe6(sh, tmp_page); 3466 return handle_stripe6(sh);
3247 else 3467 else
3248 return handle_stripe5(sh); 3468 return handle_stripe5(sh);
3249} 3469}
3250 3470
3251
3252
3253static void raid5_activate_delayed(raid5_conf_t *conf) 3471static void raid5_activate_delayed(raid5_conf_t *conf)
3254{ 3472{
3255 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { 3473 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -3331,6 +3549,9 @@ static int raid5_congested(void *data, int bits)
3331 /* No difference between reads and writes. Just check 3549 /* No difference between reads and writes. Just check
3332 * how busy the stripe_cache is 3550 * how busy the stripe_cache is
3333 */ 3551 */
3552
3553 if (mddev_congested(mddev, bits))
3554 return 1;
3334 if (conf->inactive_blocked) 3555 if (conf->inactive_blocked)
3335 return 1; 3556 return 1;
3336 if (conf->quiesce) 3557 if (conf->quiesce)
@@ -3880,7 +4101,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3880 INIT_LIST_HEAD(&stripes); 4101 INIT_LIST_HEAD(&stripes);
3881 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { 4102 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
3882 int j; 4103 int j;
3883 int skipped = 0; 4104 int skipped_disk = 0;
3884 sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); 4105 sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
3885 set_bit(STRIPE_EXPANDING, &sh->state); 4106 set_bit(STRIPE_EXPANDING, &sh->state);
3886 atomic_inc(&conf->reshape_stripes); 4107 atomic_inc(&conf->reshape_stripes);
@@ -3896,14 +4117,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3896 continue; 4117 continue;
3897 s = compute_blocknr(sh, j, 0); 4118 s = compute_blocknr(sh, j, 0);
3898 if (s < raid5_size(mddev, 0, 0)) { 4119 if (s < raid5_size(mddev, 0, 0)) {
3899 skipped = 1; 4120 skipped_disk = 1;
3900 continue; 4121 continue;
3901 } 4122 }
3902 memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE); 4123 memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
3903 set_bit(R5_Expanded, &sh->dev[j].flags); 4124 set_bit(R5_Expanded, &sh->dev[j].flags);
3904 set_bit(R5_UPTODATE, &sh->dev[j].flags); 4125 set_bit(R5_UPTODATE, &sh->dev[j].flags);
3905 } 4126 }
3906 if (!skipped) { 4127 if (!skipped_disk) {
3907 set_bit(STRIPE_EXPAND_READY, &sh->state); 4128 set_bit(STRIPE_EXPAND_READY, &sh->state);
3908 set_bit(STRIPE_HANDLE, &sh->state); 4129 set_bit(STRIPE_HANDLE, &sh->state);
3909 } 4130 }
@@ -4057,7 +4278,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4057 spin_unlock(&sh->lock); 4278 spin_unlock(&sh->lock);
4058 4279
4059 /* wait for any blocked device to be handled */ 4280 /* wait for any blocked device to be handled */
4060 while(unlikely(!handle_stripe(sh, NULL))) 4281 while (unlikely(!handle_stripe(sh)))
4061 ; 4282 ;
4062 release_stripe(sh); 4283 release_stripe(sh);
4063 4284
@@ -4114,7 +4335,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4114 return handled; 4335 return handled;
4115 } 4336 }
4116 4337
4117 handle_stripe(sh, NULL); 4338 handle_stripe(sh);
4118 release_stripe(sh); 4339 release_stripe(sh);
4119 handled++; 4340 handled++;
4120 } 4341 }
@@ -4128,6 +4349,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4128 return handled; 4349 return handled;
4129} 4350}
4130 4351
4352#ifdef CONFIG_MULTICORE_RAID456
4353static void __process_stripe(void *param, async_cookie_t cookie)
4354{
4355 struct stripe_head *sh = param;
4356
4357 handle_stripe(sh);
4358 release_stripe(sh);
4359}
4360
4361static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4362{
4363 async_schedule_domain(__process_stripe, sh, domain);
4364}
4365
4366static void synchronize_stripe_processing(struct list_head *domain)
4367{
4368 async_synchronize_full_domain(domain);
4369}
4370#else
4371static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4372{
4373 handle_stripe(sh);
4374 release_stripe(sh);
4375 cond_resched();
4376}
4377
4378static void synchronize_stripe_processing(struct list_head *domain)
4379{
4380}
4381#endif
4131 4382
4132 4383
4133/* 4384/*
@@ -4142,6 +4393,7 @@ static void raid5d(mddev_t *mddev)
4142 struct stripe_head *sh; 4393 struct stripe_head *sh;
4143 raid5_conf_t *conf = mddev->private; 4394 raid5_conf_t *conf = mddev->private;
4144 int handled; 4395 int handled;
4396 LIST_HEAD(raid_domain);
4145 4397
4146 pr_debug("+++ raid5d active\n"); 4398 pr_debug("+++ raid5d active\n");
4147 4399
@@ -4178,8 +4430,7 @@ static void raid5d(mddev_t *mddev)
4178 spin_unlock_irq(&conf->device_lock); 4430 spin_unlock_irq(&conf->device_lock);
4179 4431
4180 handled++; 4432 handled++;
4181 handle_stripe(sh, conf->spare_page); 4433 process_stripe(sh, &raid_domain);
4182 release_stripe(sh);
4183 4434
4184 spin_lock_irq(&conf->device_lock); 4435 spin_lock_irq(&conf->device_lock);
4185 } 4436 }
@@ -4187,6 +4438,7 @@ static void raid5d(mddev_t *mddev)
4187 4438
4188 spin_unlock_irq(&conf->device_lock); 4439 spin_unlock_irq(&conf->device_lock);
4189 4440
4441 synchronize_stripe_processing(&raid_domain);
4190 async_tx_issue_pending_all(); 4442 async_tx_issue_pending_all();
4191 unplug_slaves(mddev); 4443 unplug_slaves(mddev);
4192 4444
@@ -4319,15 +4571,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
4319 return sectors * (raid_disks - conf->max_degraded); 4571 return sectors * (raid_disks - conf->max_degraded);
4320} 4572}
4321 4573
4574static void raid5_free_percpu(raid5_conf_t *conf)
4575{
4576 struct raid5_percpu *percpu;
4577 unsigned long cpu;
4578
4579 if (!conf->percpu)
4580 return;
4581
4582 get_online_cpus();
4583 for_each_possible_cpu(cpu) {
4584 percpu = per_cpu_ptr(conf->percpu, cpu);
4585 safe_put_page(percpu->spare_page);
4586 kfree(percpu->scribble);
4587 }
4588#ifdef CONFIG_HOTPLUG_CPU
4589 unregister_cpu_notifier(&conf->cpu_notify);
4590#endif
4591 put_online_cpus();
4592
4593 free_percpu(conf->percpu);
4594}
4595
4322static void free_conf(raid5_conf_t *conf) 4596static void free_conf(raid5_conf_t *conf)
4323{ 4597{
4324 shrink_stripes(conf); 4598 shrink_stripes(conf);
4325 safe_put_page(conf->spare_page); 4599 raid5_free_percpu(conf);
4326 kfree(conf->disks); 4600 kfree(conf->disks);
4327 kfree(conf->stripe_hashtbl); 4601 kfree(conf->stripe_hashtbl);
4328 kfree(conf); 4602 kfree(conf);
4329} 4603}
4330 4604
4605#ifdef CONFIG_HOTPLUG_CPU
4606static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
4607 void *hcpu)
4608{
4609 raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
4610 long cpu = (long)hcpu;
4611 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
4612
4613 switch (action) {
4614 case CPU_UP_PREPARE:
4615 case CPU_UP_PREPARE_FROZEN:
4616 if (conf->level == 6 && !percpu->spare_page)
4617 percpu->spare_page = alloc_page(GFP_KERNEL);
4618 if (!percpu->scribble)
4619 percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
4620
4621 if (!percpu->scribble ||
4622 (conf->level == 6 && !percpu->spare_page)) {
4623 safe_put_page(percpu->spare_page);
4624 kfree(percpu->scribble);
4625 pr_err("%s: failed memory allocation for cpu%ld\n",
4626 __func__, cpu);
4627 return NOTIFY_BAD;
4628 }
4629 break;
4630 case CPU_DEAD:
4631 case CPU_DEAD_FROZEN:
4632 safe_put_page(percpu->spare_page);
4633 kfree(percpu->scribble);
4634 percpu->spare_page = NULL;
4635 percpu->scribble = NULL;
4636 break;
4637 default:
4638 break;
4639 }
4640 return NOTIFY_OK;
4641}
4642#endif
4643
4644static int raid5_alloc_percpu(raid5_conf_t *conf)
4645{
4646 unsigned long cpu;
4647 struct page *spare_page;
4648 struct raid5_percpu *allcpus;
4649 void *scribble;
4650 int err;
4651
4652 allcpus = alloc_percpu(struct raid5_percpu);
4653 if (!allcpus)
4654 return -ENOMEM;
4655 conf->percpu = allcpus;
4656
4657 get_online_cpus();
4658 err = 0;
4659 for_each_present_cpu(cpu) {
4660 if (conf->level == 6) {
4661 spare_page = alloc_page(GFP_KERNEL);
4662 if (!spare_page) {
4663 err = -ENOMEM;
4664 break;
4665 }
4666 per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
4667 }
4668 scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
4669 if (!scribble) {
4670 err = -ENOMEM;
4671 break;
4672 }
4673 per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
4674 }
4675#ifdef CONFIG_HOTPLUG_CPU
4676 conf->cpu_notify.notifier_call = raid456_cpu_notify;
4677 conf->cpu_notify.priority = 0;
4678 if (err == 0)
4679 err = register_cpu_notifier(&conf->cpu_notify);
4680#endif
4681 put_online_cpus();
4682
4683 return err;
4684}
4685
4331static raid5_conf_t *setup_conf(mddev_t *mddev) 4686static raid5_conf_t *setup_conf(mddev_t *mddev)
4332{ 4687{
4333 raid5_conf_t *conf; 4688 raid5_conf_t *conf;
@@ -4369,6 +4724,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4369 goto abort; 4724 goto abort;
4370 4725
4371 conf->raid_disks = mddev->raid_disks; 4726 conf->raid_disks = mddev->raid_disks;
4727 conf->scribble_len = scribble_len(conf->raid_disks);
4372 if (mddev->reshape_position == MaxSector) 4728 if (mddev->reshape_position == MaxSector)
4373 conf->previous_raid_disks = mddev->raid_disks; 4729 conf->previous_raid_disks = mddev->raid_disks;
4374 else 4730 else
@@ -4384,11 +4740,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4384 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) 4740 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
4385 goto abort; 4741 goto abort;
4386 4742
4387 if (mddev->new_level == 6) { 4743 conf->level = mddev->new_level;
4388 conf->spare_page = alloc_page(GFP_KERNEL); 4744 if (raid5_alloc_percpu(conf) != 0)
4389 if (!conf->spare_page) 4745 goto abort;
4390 goto abort; 4746
4391 }
4392 spin_lock_init(&conf->device_lock); 4747 spin_lock_init(&conf->device_lock);
4393 init_waitqueue_head(&conf->wait_for_stripe); 4748 init_waitqueue_head(&conf->wait_for_stripe);
4394 init_waitqueue_head(&conf->wait_for_overlap); 4749 init_waitqueue_head(&conf->wait_for_overlap);
@@ -4447,7 +4802,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4447 printk(KERN_INFO "raid5: allocated %dkB for %s\n", 4802 printk(KERN_INFO "raid5: allocated %dkB for %s\n",
4448 memory, mdname(mddev)); 4803 memory, mdname(mddev));
4449 4804
4450 conf->thread = md_register_thread(raid5d, mddev, "%s_raid5"); 4805 conf->thread = md_register_thread(raid5d, mddev, NULL);
4451 if (!conf->thread) { 4806 if (!conf->thread) {
4452 printk(KERN_ERR 4807 printk(KERN_ERR
4453 "raid5: couldn't allocate thread for %s\n", 4808 "raid5: couldn't allocate thread for %s\n",
@@ -4613,7 +4968,7 @@ static int run(mddev_t *mddev)
4613 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 4968 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
4614 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 4969 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4615 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 4970 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
4616 "%s_reshape"); 4971 "reshape");
4617 } 4972 }
4618 4973
4619 /* read-ahead size must cover two whole stripes, which is 4974 /* read-ahead size must cover two whole stripes, which is
@@ -5031,7 +5386,7 @@ static int raid5_start_reshape(mddev_t *mddev)
5031 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 5386 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
5032 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 5387 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
5033 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 5388 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
5034 "%s_reshape"); 5389 "reshape");
5035 if (!mddev->sync_thread) { 5390 if (!mddev->sync_thread) {
5036 mddev->recovery = 0; 5391 mddev->recovery = 0;
5037 spin_lock_irq(&conf->device_lock); 5392 spin_lock_irq(&conf->device_lock);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 9459689c4ea0..2390e0e83daf 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
2#define _RAID5_H 2#define _RAID5_H
3 3
4#include <linux/raid/xor.h> 4#include <linux/raid/xor.h>
5#include <linux/dmaengine.h>
5 6
6/* 7/*
7 * 8 *
@@ -175,7 +176,9 @@
175 */ 176 */
176enum check_states { 177enum check_states {
177 check_state_idle = 0, 178 check_state_idle = 0,
178 check_state_run, /* parity check */ 179 check_state_run, /* xor parity check */
180 check_state_run_q, /* q-parity check */
181 check_state_run_pq, /* pq dual parity check */
179 check_state_check_result, 182 check_state_check_result,
180 check_state_compute_run, /* parity repair */ 183 check_state_compute_run, /* parity repair */
181 check_state_compute_result, 184 check_state_compute_result,
@@ -215,8 +218,8 @@ struct stripe_head {
215 * @target - STRIPE_OP_COMPUTE_BLK target 218 * @target - STRIPE_OP_COMPUTE_BLK target
216 */ 219 */
217 struct stripe_operations { 220 struct stripe_operations {
218 int target; 221 int target, target2;
219 u32 zero_sum_result; 222 enum sum_check_flags zero_sum_result;
220 } ops; 223 } ops;
221 struct r5dev { 224 struct r5dev {
222 struct bio req; 225 struct bio req;
@@ -298,7 +301,7 @@ struct r6_state {
298#define STRIPE_OP_COMPUTE_BLK 1 301#define STRIPE_OP_COMPUTE_BLK 1
299#define STRIPE_OP_PREXOR 2 302#define STRIPE_OP_PREXOR 2
300#define STRIPE_OP_BIODRAIN 3 303#define STRIPE_OP_BIODRAIN 3
301#define STRIPE_OP_POSTXOR 4 304#define STRIPE_OP_RECONSTRUCT 4
302#define STRIPE_OP_CHECK 5 305#define STRIPE_OP_CHECK 5
303 306
304/* 307/*
@@ -385,8 +388,21 @@ struct raid5_private_data {
385 * (fresh device added). 388 * (fresh device added).
386 * Cleared when a sync completes. 389 * Cleared when a sync completes.
387 */ 390 */
388 391 /* per cpu variables */
389 struct page *spare_page; /* Used when checking P/Q in raid6 */ 392 struct raid5_percpu {
393 struct page *spare_page; /* Used when checking P/Q in raid6 */
394 void *scribble; /* space for constructing buffer
395 * lists and performing address
396 * conversions
397 */
398 } *percpu;
399 size_t scribble_len; /* size of scribble region must be
400 * associated with conf to handle
401 * cpu hotplug while reshaping
402 */
403#ifdef CONFIG_HOTPLUG_CPU
404 struct notifier_block cpu_notify;
405#endif
390 406
391 /* 407 /*
392 * Free stripes pool 408 * Free stripes pool
diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h
index 895e2efca8a9..01fc70484743 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.h
+++ b/drivers/media/dvb/dvb-core/dvbdev.h
@@ -31,10 +31,9 @@
31#define DVB_MAJOR 212 31#define DVB_MAJOR 212
32 32
33#if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0 33#if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0
34#define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS 34 #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
35#else 35#else
36#warning invalid CONFIG_DVB_MAX_ADAPTERS value 36 #define DVB_MAX_ADAPTERS 8
37#define DVB_MAX_ADAPTERS 8
38#endif 37#endif
39 38
40#define DVB_UNSET (-1) 39#define DVB_UNSET (-1)
diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig
index 0e4b97fba384..9744b0692417 100644
--- a/drivers/media/dvb/dvb-usb/Kconfig
+++ b/drivers/media/dvb/dvb-usb/Kconfig
@@ -75,7 +75,7 @@ config DVB_USB_DIB0700
75 select DVB_DIB3000MC if !DVB_FE_CUSTOMISE 75 select DVB_DIB3000MC if !DVB_FE_CUSTOMISE
76 select DVB_S5H1411 if !DVB_FE_CUSTOMISE 76 select DVB_S5H1411 if !DVB_FE_CUSTOMISE
77 select DVB_LGDT3305 if !DVB_FE_CUSTOMISE 77 select DVB_LGDT3305 if !DVB_FE_CUSTOMISE
78 select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE 78 select DVB_TUNER_DIB0070
79 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE 79 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE
80 select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE 80 select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE
81 select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE 81 select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE
diff --git a/drivers/media/video/saa7164/saa7164-api.c b/drivers/media/video/saa7164/saa7164-api.c
index bb6df1b276be..6f094a96ac81 100644
--- a/drivers/media/video/saa7164/saa7164-api.c
+++ b/drivers/media/video/saa7164/saa7164-api.c
@@ -415,7 +415,7 @@ int saa7164_api_enum_subdevs(struct saa7164_dev *dev)
415 goto out; 415 goto out;
416 } 416 }
417 417
418 if (debug & DBGLVL_API) 418 if (saa_debug & DBGLVL_API)
419 saa7164_dumphex16(dev, buf, (buflen/16)*16); 419 saa7164_dumphex16(dev, buf, (buflen/16)*16);
420 420
421 saa7164_api_dump_subdevs(dev, buf, buflen); 421 saa7164_api_dump_subdevs(dev, buf, buflen);
@@ -480,7 +480,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
480 480
481 dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len); 481 dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len);
482 482
483 if (debug & DBGLVL_I2C) 483 if (saa_debug & DBGLVL_I2C)
484 saa7164_dumphex16(dev, buf, 2 * 16); 484 saa7164_dumphex16(dev, buf, 2 * 16);
485 485
486 ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR, 486 ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR,
@@ -488,7 +488,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
488 if (ret != SAA_OK) 488 if (ret != SAA_OK)
489 printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret); 489 printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret);
490 else { 490 else {
491 if (debug & DBGLVL_I2C) 491 if (saa_debug & DBGLVL_I2C)
492 saa7164_dumphex16(dev, buf, sizeof(buf)); 492 saa7164_dumphex16(dev, buf, sizeof(buf));
493 memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen); 493 memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen);
494 } 494 }
@@ -548,7 +548,7 @@ int saa7164_api_i2c_write(struct saa7164_i2c *bus, u8 addr, u32 datalen,
548 *((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen; 548 *((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen;
549 memcpy((buf + 2 * sizeof(u32)), data, datalen); 549 memcpy((buf + 2 * sizeof(u32)), data, datalen);
550 550
551 if (debug & DBGLVL_I2C) 551 if (saa_debug & DBGLVL_I2C)
552 saa7164_dumphex16(dev, buf, sizeof(buf)); 552 saa7164_dumphex16(dev, buf, sizeof(buf));
553 553
554 ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR, 554 ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR,
diff --git a/drivers/media/video/saa7164/saa7164-cmd.c b/drivers/media/video/saa7164/saa7164-cmd.c
index e097f1a0969a..c45966edc0cf 100644
--- a/drivers/media/video/saa7164/saa7164-cmd.c
+++ b/drivers/media/video/saa7164/saa7164-cmd.c
@@ -250,7 +250,7 @@ int saa7164_cmd_wait(struct saa7164_dev *dev, u8 seqno)
250 unsigned long stamp; 250 unsigned long stamp;
251 int r; 251 int r;
252 252
253 if (debug >= 4) 253 if (saa_debug >= 4)
254 saa7164_bus_dump(dev); 254 saa7164_bus_dump(dev);
255 255
256 dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno); 256 dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno);
diff --git a/drivers/media/video/saa7164/saa7164-core.c b/drivers/media/video/saa7164/saa7164-core.c
index f0dbead188c8..709affc31042 100644
--- a/drivers/media/video/saa7164/saa7164-core.c
+++ b/drivers/media/video/saa7164/saa7164-core.c
@@ -45,8 +45,8 @@ MODULE_LICENSE("GPL");
45 32 bus 45 32 bus
46 */ 46 */
47 47
48unsigned int debug; 48unsigned int saa_debug;
49module_param(debug, int, 0644); 49module_param_named(debug, saa_debug, int, 0644);
50MODULE_PARM_DESC(debug, "enable debug messages"); 50MODULE_PARM_DESC(debug, "enable debug messages");
51 51
52unsigned int waitsecs = 10; 52unsigned int waitsecs = 10;
@@ -653,7 +653,7 @@ static int __devinit saa7164_initdev(struct pci_dev *pci_dev,
653 printk(KERN_ERR "%s() Unsupported board detected, " 653 printk(KERN_ERR "%s() Unsupported board detected, "
654 "registering without firmware\n", __func__); 654 "registering without firmware\n", __func__);
655 655
656 dprintk(1, "%s() parameter debug = %d\n", __func__, debug); 656 dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug);
657 dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs); 657 dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs);
658 658
659fail_fw: 659fail_fw:
diff --git a/drivers/media/video/saa7164/saa7164.h b/drivers/media/video/saa7164/saa7164.h
index 6753008a9c9b..42660b546f0e 100644
--- a/drivers/media/video/saa7164/saa7164.h
+++ b/drivers/media/video/saa7164/saa7164.h
@@ -375,9 +375,9 @@ extern int saa7164_buffer_dealloc(struct saa7164_tsport *port,
375 375
376/* ----------------------------------------------------------- */ 376/* ----------------------------------------------------------- */
377 377
378extern unsigned int debug; 378extern unsigned int saa_debug;
379#define dprintk(level, fmt, arg...)\ 379#define dprintk(level, fmt, arg...)\
380 do { if (debug & level)\ 380 do { if (saa_debug & level)\
381 printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\ 381 printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\
382 } while (0) 382 } while (0)
383 383
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index a5b448ea4eab..b3bf1c44d74d 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -339,9 +339,9 @@ static int h_memstick_read_dev_id(struct memstick_dev *card,
339 card->id.type = id_reg.type; 339 card->id.type = id_reg.type;
340 card->id.category = id_reg.category; 340 card->id.category = id_reg.category;
341 card->id.class = id_reg.class; 341 card->id.class = id_reg.class;
342 dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
342 } 343 }
343 complete(&card->mrq_complete); 344 complete(&card->mrq_complete);
344 dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
345 return -EAGAIN; 345 return -EAGAIN;
346 } 346 }
347} 347}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 79689b10f937..766e21e15574 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -937,6 +937,8 @@ static int quicktest1(unsigned long arg)
937 937
938 /* Need 1K cacheline aligned that does not cross page boundary */ 938 /* Need 1K cacheline aligned that does not cross page boundary */
939 p = kmalloc(4096, 0); 939 p = kmalloc(4096, 0);
940 if (p == NULL)
941 return -ENOMEM;
940 mq = ALIGNUP(p, 1024); 942 mq = ALIGNUP(p, 1024);
941 memset(mes, 0xee, sizeof(mes)); 943 memset(mes, 0xee, sizeof(mes));
942 dw = mq; 944 dw = mq;
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 9cbf95bedce6..ccd4408a26c7 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -340,10 +340,9 @@ static struct proc_dir_entry *proc_gru __read_mostly;
340 340
341static int create_proc_file(struct proc_entry *p) 341static int create_proc_file(struct proc_entry *p)
342{ 342{
343 p->entry = create_proc_entry(p->name, p->mode, proc_gru); 343 p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
344 if (!p->entry) 344 if (!p->entry)
345 return -1; 345 return -1;
346 p->entry->proc_fops = p->fops;
347 return 0; 346 return 0;
348} 347}
349 348
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 065fa818be57..fc25586b7ee1 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -599,6 +599,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
599 struct scatterlist *sg; 599 struct scatterlist *sg;
600 unsigned int i; 600 unsigned int i;
601 enum dma_data_direction direction; 601 enum dma_data_direction direction;
602 unsigned int sglen;
602 603
603 /* 604 /*
604 * We don't do DMA on "complex" transfers, i.e. with 605 * We don't do DMA on "complex" transfers, i.e. with
@@ -628,11 +629,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
628 else 629 else
629 direction = DMA_TO_DEVICE; 630 direction = DMA_TO_DEVICE;
630 631
632 sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction);
633 if (sglen != data->sg_len)
634 goto unmap_exit;
631 desc = chan->device->device_prep_slave_sg(chan, 635 desc = chan->device->device_prep_slave_sg(chan,
632 data->sg, data->sg_len, direction, 636 data->sg, data->sg_len, direction,
633 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 637 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
634 if (!desc) 638 if (!desc)
635 return -ENOMEM; 639 goto unmap_exit;
636 640
637 host->dma.data_desc = desc; 641 host->dma.data_desc = desc;
638 desc->callback = atmci_dma_complete; 642 desc->callback = atmci_dma_complete;
@@ -643,6 +647,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
643 chan->device->device_issue_pending(chan); 647 chan->device->device_issue_pending(chan);
644 648
645 return 0; 649 return 0;
650unmap_exit:
651 dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction);
652 return -ENOMEM;
646} 653}
647 654
648#else /* CONFIG_MMC_ATMELMCI_DMA */ 655#else /* CONFIG_MMC_ATMELMCI_DMA */
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 8741d0f5146a..3d1e5329da12 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -22,12 +22,13 @@
22#include <linux/clk.h> 22#include <linux/clk.h>
23#include <linux/scatterlist.h> 23#include <linux/scatterlist.h>
24#include <linux/gpio.h> 24#include <linux/gpio.h>
25#include <linux/amba/mmci.h>
26#include <linux/regulator/consumer.h>
25 27
26#include <asm/cacheflush.h> 28#include <asm/cacheflush.h>
27#include <asm/div64.h> 29#include <asm/div64.h>
28#include <asm/io.h> 30#include <asm/io.h>
29#include <asm/sizes.h> 31#include <asm/sizes.h>
30#include <asm/mach/mmc.h>
31 32
32#include "mmci.h" 33#include "mmci.h"
33 34
@@ -38,6 +39,36 @@
38 39
39static unsigned int fmax = 515633; 40static unsigned int fmax = 515633;
40 41
42/*
43 * This must be called with host->lock held
44 */
45static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
46{
47 u32 clk = 0;
48
49 if (desired) {
50 if (desired >= host->mclk) {
51 clk = MCI_CLK_BYPASS;
52 host->cclk = host->mclk;
53 } else {
54 clk = host->mclk / (2 * desired) - 1;
55 if (clk >= 256)
56 clk = 255;
57 host->cclk = host->mclk / (2 * (clk + 1));
58 }
59 if (host->hw_designer == 0x80)
60 clk |= MCI_FCEN; /* Bug fix in ST IP block */
61 clk |= MCI_CLK_ENABLE;
62 /* This hasn't proven to be worthwhile */
63 /* clk |= MCI_CLK_PWRSAVE; */
64 }
65
66 if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4)
67 clk |= MCI_WIDE_BUS;
68
69 writel(clk, host->base + MMCICLOCK);
70}
71
41static void 72static void
42mmci_request_end(struct mmci_host *host, struct mmc_request *mrq) 73mmci_request_end(struct mmci_host *host, struct mmc_request *mrq)
43{ 74{
@@ -419,30 +450,31 @@ static void mmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
419static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) 450static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
420{ 451{
421 struct mmci_host *host = mmc_priv(mmc); 452 struct mmci_host *host = mmc_priv(mmc);
422 u32 clk = 0, pwr = 0; 453 u32 pwr = 0;
423 454 unsigned long flags;
424 if (ios->clock) {
425 if (ios->clock >= host->mclk) {
426 clk = MCI_CLK_BYPASS;
427 host->cclk = host->mclk;
428 } else {
429 clk = host->mclk / (2 * ios->clock) - 1;
430 if (clk >= 256)
431 clk = 255;
432 host->cclk = host->mclk / (2 * (clk + 1));
433 }
434 if (host->hw_designer == AMBA_VENDOR_ST)
435 clk |= MCI_FCEN; /* Bug fix in ST IP block */
436 clk |= MCI_CLK_ENABLE;
437 }
438
439 if (host->plat->translate_vdd)
440 pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd);
441 455
442 switch (ios->power_mode) { 456 switch (ios->power_mode) {
443 case MMC_POWER_OFF: 457 case MMC_POWER_OFF:
458 if(host->vcc &&
459 regulator_is_enabled(host->vcc))
460 regulator_disable(host->vcc);
444 break; 461 break;
445 case MMC_POWER_UP: 462 case MMC_POWER_UP:
463#ifdef CONFIG_REGULATOR
464 if (host->vcc)
465 /* This implicitly enables the regulator */
466 mmc_regulator_set_ocr(host->vcc, ios->vdd);
467#endif
468 /*
469 * The translate_vdd function is not used if you have
470 * an external regulator, or your design is really weird.
471 * Using it would mean sending in power control BOTH using
472 * a regulator AND the 4 MMCIPWR bits. If we don't have
473 * a regulator, we might have some other platform specific
474 * power control behind this translate function.
475 */
476 if (!host->vcc && host->plat->translate_vdd)
477 pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd);
446 /* The ST version does not have this, fall through to POWER_ON */ 478 /* The ST version does not have this, fall through to POWER_ON */
447 if (host->hw_designer != AMBA_VENDOR_ST) { 479 if (host->hw_designer != AMBA_VENDOR_ST) {
448 pwr |= MCI_PWR_UP; 480 pwr |= MCI_PWR_UP;
@@ -465,12 +497,16 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
465 } 497 }
466 } 498 }
467 499
468 writel(clk, host->base + MMCICLOCK); 500 spin_lock_irqsave(&host->lock, flags);
501
502 mmci_set_clkreg(host, ios->clock);
469 503
470 if (host->pwr != pwr) { 504 if (host->pwr != pwr) {
471 host->pwr = pwr; 505 host->pwr = pwr;
472 writel(pwr, host->base + MMCIPOWER); 506 writel(pwr, host->base + MMCIPOWER);
473 } 507 }
508
509 spin_unlock_irqrestore(&host->lock, flags);
474} 510}
475 511
476static int mmci_get_ro(struct mmc_host *mmc) 512static int mmci_get_ro(struct mmc_host *mmc)
@@ -517,7 +553,7 @@ static void mmci_check_status(unsigned long data)
517 553
518static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id) 554static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
519{ 555{
520 struct mmc_platform_data *plat = dev->dev.platform_data; 556 struct mmci_platform_data *plat = dev->dev.platform_data;
521 struct mmci_host *host; 557 struct mmci_host *host;
522 struct mmc_host *mmc; 558 struct mmc_host *mmc;
523 int ret; 559 int ret;
@@ -583,7 +619,30 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
583 mmc->ops = &mmci_ops; 619 mmc->ops = &mmci_ops;
584 mmc->f_min = (host->mclk + 511) / 512; 620 mmc->f_min = (host->mclk + 511) / 512;
585 mmc->f_max = min(host->mclk, fmax); 621 mmc->f_max = min(host->mclk, fmax);
586 mmc->ocr_avail = plat->ocr_mask; 622#ifdef CONFIG_REGULATOR
623 /* If we're using the regulator framework, try to fetch a regulator */
624 host->vcc = regulator_get(&dev->dev, "vmmc");
625 if (IS_ERR(host->vcc))
626 host->vcc = NULL;
627 else {
628 int mask = mmc_regulator_get_ocrmask(host->vcc);
629
630 if (mask < 0)
631 dev_err(&dev->dev, "error getting OCR mask (%d)\n",
632 mask);
633 else {
634 host->mmc->ocr_avail = (u32) mask;
635 if (plat->ocr_mask)
636 dev_warn(&dev->dev,
637 "Provided ocr_mask/setpower will not be used "
638 "(using regulator instead)\n");
639 }
640 }
641#endif
642 /* Fall back to platform data if no regulator is found */
643 if (host->vcc == NULL)
644 mmc->ocr_avail = plat->ocr_mask;
645 mmc->caps = plat->capabilities;
587 646
588 /* 647 /*
589 * We can do SGIO 648 * We can do SGIO
@@ -720,6 +779,10 @@ static int __devexit mmci_remove(struct amba_device *dev)
720 clk_disable(host->clk); 779 clk_disable(host->clk);
721 clk_put(host->clk); 780 clk_put(host->clk);
722 781
782 if (regulator_is_enabled(host->vcc))
783 regulator_disable(host->vcc);
784 regulator_put(host->vcc);
785
723 mmc_free_host(mmc); 786 mmc_free_host(mmc);
724 787
725 amba_release_regions(dev); 788 amba_release_regions(dev);
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 839f264c9725..1ceb9a90f59b 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -161,7 +161,7 @@ struct mmci_host {
161 unsigned int mclk; 161 unsigned int mclk;
162 unsigned int cclk; 162 unsigned int cclk;
163 u32 pwr; 163 u32 pwr;
164 struct mmc_platform_data *plat; 164 struct mmci_platform_data *plat;
165 165
166 u8 hw_designer; 166 u8 hw_designer;
167 u8 hw_revision:4; 167 u8 hw_revision:4;
@@ -175,6 +175,7 @@ struct mmci_host {
175 struct scatterlist *sg_ptr; 175 struct scatterlist *sg_ptr;
176 unsigned int sg_off; 176 unsigned int sg_off;
177 unsigned int size; 177 unsigned int size;
178 struct regulator *vcc;
178}; 179};
179 180
180static inline void mmci_init_sg(struct mmci_host *host, struct mmc_data *data) 181static inline void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index e55ac792d68c..5e0b1529964d 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -28,6 +28,7 @@
28#include <linux/mmc/host.h> 28#include <linux/mmc/host.h>
29#include <linux/io.h> 29#include <linux/io.h>
30#include <linux/regulator/consumer.h> 30#include <linux/regulator/consumer.h>
31#include <linux/gpio.h>
31 32
32#include <asm/sizes.h> 33#include <asm/sizes.h>
33 34
@@ -96,10 +97,18 @@ static inline void pxamci_init_ocr(struct pxamci_host *host)
96 97
97static inline void pxamci_set_power(struct pxamci_host *host, unsigned int vdd) 98static inline void pxamci_set_power(struct pxamci_host *host, unsigned int vdd)
98{ 99{
100 int on;
101
99#ifdef CONFIG_REGULATOR 102#ifdef CONFIG_REGULATOR
100 if (host->vcc) 103 if (host->vcc)
101 mmc_regulator_set_ocr(host->vcc, vdd); 104 mmc_regulator_set_ocr(host->vcc, vdd);
102#endif 105#endif
106 if (!host->vcc && host->pdata &&
107 gpio_is_valid(host->pdata->gpio_power)) {
108 on = ((1 << vdd) & host->pdata->ocr_mask);
109 gpio_set_value(host->pdata->gpio_power,
110 !!on ^ host->pdata->gpio_power_invert);
111 }
103 if (!host->vcc && host->pdata && host->pdata->setpower) 112 if (!host->vcc && host->pdata && host->pdata->setpower)
104 host->pdata->setpower(mmc_dev(host->mmc), vdd); 113 host->pdata->setpower(mmc_dev(host->mmc), vdd);
105} 114}
@@ -421,6 +430,12 @@ static int pxamci_get_ro(struct mmc_host *mmc)
421{ 430{
422 struct pxamci_host *host = mmc_priv(mmc); 431 struct pxamci_host *host = mmc_priv(mmc);
423 432
433 if (host->pdata && gpio_is_valid(host->pdata->gpio_card_ro)) {
434 if (host->pdata->gpio_card_ro_invert)
435 return !gpio_get_value(host->pdata->gpio_card_ro);
436 else
437 return gpio_get_value(host->pdata->gpio_card_ro);
438 }
424 if (host->pdata && host->pdata->get_ro) 439 if (host->pdata && host->pdata->get_ro)
425 return !!host->pdata->get_ro(mmc_dev(mmc)); 440 return !!host->pdata->get_ro(mmc_dev(mmc));
426 /* 441 /*
@@ -534,7 +549,7 @@ static int pxamci_probe(struct platform_device *pdev)
534 struct mmc_host *mmc; 549 struct mmc_host *mmc;
535 struct pxamci_host *host = NULL; 550 struct pxamci_host *host = NULL;
536 struct resource *r, *dmarx, *dmatx; 551 struct resource *r, *dmarx, *dmatx;
537 int ret, irq; 552 int ret, irq, gpio_cd = -1, gpio_ro = -1, gpio_power = -1;
538 553
539 r = platform_get_resource(pdev, IORESOURCE_MEM, 0); 554 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
540 irq = platform_get_irq(pdev, 0); 555 irq = platform_get_irq(pdev, 0);
@@ -661,13 +676,63 @@ static int pxamci_probe(struct platform_device *pdev)
661 } 676 }
662 host->dma_drcmrtx = dmatx->start; 677 host->dma_drcmrtx = dmatx->start;
663 678
679 if (host->pdata) {
680 gpio_cd = host->pdata->gpio_card_detect;
681 gpio_ro = host->pdata->gpio_card_ro;
682 gpio_power = host->pdata->gpio_power;
683 }
684 if (gpio_is_valid(gpio_power)) {
685 ret = gpio_request(gpio_power, "mmc card power");
686 if (ret) {
687 dev_err(&pdev->dev, "Failed requesting gpio_power %d\n", gpio_power);
688 goto out;
689 }
690 gpio_direction_output(gpio_power,
691 host->pdata->gpio_power_invert);
692 }
693 if (gpio_is_valid(gpio_ro)) {
694 ret = gpio_request(gpio_ro, "mmc card read only");
695 if (ret) {
696 dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_power);
697 goto err_gpio_ro;
698 }
699 gpio_direction_input(gpio_ro);
700 }
701 if (gpio_is_valid(gpio_cd)) {
702 ret = gpio_request(gpio_cd, "mmc card detect");
703 if (ret) {
704 dev_err(&pdev->dev, "Failed requesting gpio_cd %d\n", gpio_power);
705 goto err_gpio_cd;
706 }
707 gpio_direction_input(gpio_cd);
708
709 ret = request_irq(gpio_to_irq(gpio_cd), pxamci_detect_irq,
710 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
711 "mmc card detect", mmc);
712 if (ret) {
713 dev_err(&pdev->dev, "failed to request card detect IRQ\n");
714 goto err_request_irq;
715 }
716 }
717
664 if (host->pdata && host->pdata->init) 718 if (host->pdata && host->pdata->init)
665 host->pdata->init(&pdev->dev, pxamci_detect_irq, mmc); 719 host->pdata->init(&pdev->dev, pxamci_detect_irq, mmc);
666 720
721 if (gpio_is_valid(gpio_power) && host->pdata->setpower)
722 dev_warn(&pdev->dev, "gpio_power and setpower() both defined\n");
723 if (gpio_is_valid(gpio_ro) && host->pdata->get_ro)
724 dev_warn(&pdev->dev, "gpio_ro and get_ro() both defined\n");
725
667 mmc_add_host(mmc); 726 mmc_add_host(mmc);
668 727
669 return 0; 728 return 0;
670 729
730err_request_irq:
731 gpio_free(gpio_cd);
732err_gpio_cd:
733 gpio_free(gpio_ro);
734err_gpio_ro:
735 gpio_free(gpio_power);
671 out: 736 out:
672 if (host) { 737 if (host) {
673 if (host->dma >= 0) 738 if (host->dma >= 0)
@@ -688,12 +753,26 @@ static int pxamci_probe(struct platform_device *pdev)
688static int pxamci_remove(struct platform_device *pdev) 753static int pxamci_remove(struct platform_device *pdev)
689{ 754{
690 struct mmc_host *mmc = platform_get_drvdata(pdev); 755 struct mmc_host *mmc = platform_get_drvdata(pdev);
756 int gpio_cd = -1, gpio_ro = -1, gpio_power = -1;
691 757
692 platform_set_drvdata(pdev, NULL); 758 platform_set_drvdata(pdev, NULL);
693 759
694 if (mmc) { 760 if (mmc) {
695 struct pxamci_host *host = mmc_priv(mmc); 761 struct pxamci_host *host = mmc_priv(mmc);
696 762
763 if (host->pdata) {
764 gpio_cd = host->pdata->gpio_card_detect;
765 gpio_ro = host->pdata->gpio_card_ro;
766 gpio_power = host->pdata->gpio_power;
767 }
768 if (gpio_is_valid(gpio_cd)) {
769 free_irq(gpio_to_irq(gpio_cd), mmc);
770 gpio_free(gpio_cd);
771 }
772 if (gpio_is_valid(gpio_ro))
773 gpio_free(gpio_ro);
774 if (gpio_is_valid(gpio_power))
775 gpio_free(gpio_power);
697 if (host->vcc) 776 if (host->vcc)
698 regulator_put(host->vcc); 777 regulator_put(host->vcc);
699 778
@@ -725,20 +804,20 @@ static int pxamci_remove(struct platform_device *pdev)
725} 804}
726 805
727#ifdef CONFIG_PM 806#ifdef CONFIG_PM
728static int pxamci_suspend(struct platform_device *dev, pm_message_t state) 807static int pxamci_suspend(struct device *dev)
729{ 808{
730 struct mmc_host *mmc = platform_get_drvdata(dev); 809 struct mmc_host *mmc = dev_get_drvdata(dev);
731 int ret = 0; 810 int ret = 0;
732 811
733 if (mmc) 812 if (mmc)
734 ret = mmc_suspend_host(mmc, state); 813 ret = mmc_suspend_host(mmc, PMSG_SUSPEND);
735 814
736 return ret; 815 return ret;
737} 816}
738 817
739static int pxamci_resume(struct platform_device *dev) 818static int pxamci_resume(struct device *dev)
740{ 819{
741 struct mmc_host *mmc = platform_get_drvdata(dev); 820 struct mmc_host *mmc = dev_get_drvdata(dev);
742 int ret = 0; 821 int ret = 0;
743 822
744 if (mmc) 823 if (mmc)
@@ -746,19 +825,22 @@ static int pxamci_resume(struct platform_device *dev)
746 825
747 return ret; 826 return ret;
748} 827}
749#else 828
750#define pxamci_suspend NULL 829static struct dev_pm_ops pxamci_pm_ops = {
751#define pxamci_resume NULL 830 .suspend = pxamci_suspend,
831 .resume = pxamci_resume,
832};
752#endif 833#endif
753 834
754static struct platform_driver pxamci_driver = { 835static struct platform_driver pxamci_driver = {
755 .probe = pxamci_probe, 836 .probe = pxamci_probe,
756 .remove = pxamci_remove, 837 .remove = pxamci_remove,
757 .suspend = pxamci_suspend,
758 .resume = pxamci_resume,
759 .driver = { 838 .driver = {
760 .name = DRIVER_NAME, 839 .name = DRIVER_NAME,
761 .owner = THIS_MODULE, 840 .owner = THIS_MODULE,
841#ifdef CONFIG_PM
842 .pm = &pxamci_pm_ops,
843#endif
762 }, 844 },
763}; 845};
764 846
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index e4ec3659759a..ecf90f5c97c2 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -159,7 +159,7 @@ config MTD_AFS_PARTS
159 159
160config MTD_OF_PARTS 160config MTD_OF_PARTS
161 tristate "Flash partition map based on OF description" 161 tristate "Flash partition map based on OF description"
162 depends on PPC_OF && MTD_PARTITIONS 162 depends on (MICROBLAZE || PPC_OF) && MTD_PARTITIONS
163 help 163 help
164 This provides a partition parsing function which derives 164 This provides a partition parsing function which derives
165 the partition map from the children of the flash node, 165 the partition map from the children of the flash node,
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 3a9a960644b6..841e085ab74a 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -74,7 +74,7 @@ config MTD_PHYSMAP_BANKWIDTH
74 74
75config MTD_PHYSMAP_OF 75config MTD_PHYSMAP_OF
76 tristate "Flash device in physical memory map based on OF description" 76 tristate "Flash device in physical memory map based on OF description"
77 depends on PPC_OF && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM) 77 depends on (MICROBLAZE || PPC_OF) && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
78 help 78 help
79 This provides a 'mapping' driver which allows the NOR Flash and 79 This provides a 'mapping' driver which allows the NOR Flash and
80 ROM driver code to communicate with chips which are mapped 80 ROM driver code to communicate with chips which are mapped
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 15c0195ebd31..a24be34a3f7a 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -768,10 +768,24 @@ e100_negotiate(struct net_device* dev)
768 768
769 e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_ADVERTISE, data); 769 e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_ADVERTISE, data);
770 770
771 /* Renegotiate with link partner */ 771 data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR);
772 if (autoneg_normal) { 772 if (autoneg_normal) {
773 data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR); 773 /* Renegotiate with link partner */
774 data |= BMCR_ANENABLE | BMCR_ANRESTART; 774 data |= BMCR_ANENABLE | BMCR_ANRESTART;
775 } else {
776 /* Don't negotiate speed or duplex */
777 data &= ~(BMCR_ANENABLE | BMCR_ANRESTART);
778
779 /* Set speed and duplex static */
780 if (current_speed_selection == 10)
781 data &= ~BMCR_SPEED100;
782 else
783 data |= BMCR_SPEED100;
784
785 if (current_duplex != full)
786 data &= ~BMCR_FULLDPLX;
787 else
788 data |= BMCR_FULLDPLX;
775 } 789 }
776 e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR, data); 790 e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR, data);
777} 791}
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index 1445e5865196..84db145d2b59 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -17,6 +17,7 @@
17#include <linux/etherdevice.h> 17#include <linux/etherdevice.h>
18#include <linux/platform_device.h> 18#include <linux/platform_device.h>
19#include <linux/clk.h> 19#include <linux/clk.h>
20#include <linux/gpio.h>
20 21
21#include <net/irda/irda.h> 22#include <net/irda/irda.h>
22#include <net/irda/irmod.h> 23#include <net/irda/irmod.h>
@@ -163,6 +164,22 @@ inline static void pxa_irda_fir_dma_tx_start(struct pxa_irda *si)
163} 164}
164 165
165/* 166/*
167 * Set the IrDA communications mode.
168 */
169static void pxa_irda_set_mode(struct pxa_irda *si, int mode)
170{
171 if (si->pdata->transceiver_mode)
172 si->pdata->transceiver_mode(si->dev, mode);
173 else {
174 if (gpio_is_valid(si->pdata->gpio_pwdown))
175 gpio_set_value(si->pdata->gpio_pwdown,
176 !(mode & IR_OFF) ^
177 !si->pdata->gpio_pwdown_inverted);
178 pxa2xx_transceiver_mode(si->dev, mode);
179 }
180}
181
182/*
166 * Set the IrDA communications speed. 183 * Set the IrDA communications speed.
167 */ 184 */
168static int pxa_irda_set_speed(struct pxa_irda *si, int speed) 185static int pxa_irda_set_speed(struct pxa_irda *si, int speed)
@@ -188,7 +205,7 @@ static int pxa_irda_set_speed(struct pxa_irda *si, int speed)
188 pxa_irda_disable_clk(si); 205 pxa_irda_disable_clk(si);
189 206
190 /* set board transceiver to SIR mode */ 207 /* set board transceiver to SIR mode */
191 si->pdata->transceiver_mode(si->dev, IR_SIRMODE); 208 pxa_irda_set_mode(si, IR_SIRMODE);
192 209
193 /* enable the STUART clock */ 210 /* enable the STUART clock */
194 pxa_irda_enable_sirclk(si); 211 pxa_irda_enable_sirclk(si);
@@ -222,7 +239,7 @@ static int pxa_irda_set_speed(struct pxa_irda *si, int speed)
222 ICCR0 = 0; 239 ICCR0 = 0;
223 240
224 /* set board transceiver to FIR mode */ 241 /* set board transceiver to FIR mode */
225 si->pdata->transceiver_mode(si->dev, IR_FIRMODE); 242 pxa_irda_set_mode(si, IR_FIRMODE);
226 243
227 /* enable the FICP clock */ 244 /* enable the FICP clock */
228 pxa_irda_enable_firclk(si); 245 pxa_irda_enable_firclk(si);
@@ -641,7 +658,7 @@ static void pxa_irda_shutdown(struct pxa_irda *si)
641 local_irq_restore(flags); 658 local_irq_restore(flags);
642 659
643 /* power off board transceiver */ 660 /* power off board transceiver */
644 si->pdata->transceiver_mode(si->dev, IR_OFF); 661 pxa_irda_set_mode(si, IR_OFF);
645 662
646 printk(KERN_DEBUG "pxa_ir: irda shutdown\n"); 663 printk(KERN_DEBUG "pxa_ir: irda shutdown\n");
647} 664}
@@ -849,10 +866,26 @@ static int pxa_irda_probe(struct platform_device *pdev)
849 if (err) 866 if (err)
850 goto err_mem_5; 867 goto err_mem_5;
851 868
852 if (si->pdata->startup) 869 if (gpio_is_valid(si->pdata->gpio_pwdown)) {
870 err = gpio_request(si->pdata->gpio_pwdown, "IrDA switch");
871 if (err)
872 goto err_startup;
873 err = gpio_direction_output(si->pdata->gpio_pwdown,
874 !si->pdata->gpio_pwdown_inverted);
875 if (err) {
876 gpio_free(si->pdata->gpio_pwdown);
877 goto err_startup;
878 }
879 }
880
881 if (si->pdata->startup) {
853 err = si->pdata->startup(si->dev); 882 err = si->pdata->startup(si->dev);
854 if (err) 883 if (err)
855 goto err_startup; 884 goto err_startup;
885 }
886
887 if (gpio_is_valid(si->pdata->gpio_pwdown) && si->pdata->startup)
888 dev_warn(si->dev, "gpio_pwdown and startup() both defined!\n");
856 889
857 dev->netdev_ops = &pxa_irda_netdev_ops; 890 dev->netdev_ops = &pxa_irda_netdev_ops;
858 891
@@ -903,6 +936,8 @@ static int pxa_irda_remove(struct platform_device *_dev)
903 if (dev) { 936 if (dev) {
904 struct pxa_irda *si = netdev_priv(dev); 937 struct pxa_irda *si = netdev_priv(dev);
905 unregister_netdev(dev); 938 unregister_netdev(dev);
939 if (gpio_is_valid(si->pdata->gpio_pwdown))
940 gpio_free(si->pdata->gpio_pwdown);
906 if (si->pdata->shutdown) 941 if (si->pdata->shutdown)
907 si->pdata->shutdown(si->dev); 942 si->pdata->shutdown(si->dev);
908 kfree(si->tx_buff.head); 943 kfree(si->tx_buff.head);
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index cee199ceba2f..3c16602172fc 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -33,6 +33,7 @@
33 */ 33 */
34 34
35#include <linux/mlx4/cmd.h> 35#include <linux/mlx4/cmd.h>
36#include <linux/cache.h>
36 37
37#include "fw.h" 38#include "fw.h"
38#include "icm.h" 39#include "icm.h"
@@ -698,6 +699,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
698#define INIT_HCA_IN_SIZE 0x200 699#define INIT_HCA_IN_SIZE 0x200
699#define INIT_HCA_VERSION_OFFSET 0x000 700#define INIT_HCA_VERSION_OFFSET 0x000
700#define INIT_HCA_VERSION 2 701#define INIT_HCA_VERSION 2
702#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e
701#define INIT_HCA_FLAGS_OFFSET 0x014 703#define INIT_HCA_FLAGS_OFFSET 0x014
702#define INIT_HCA_QPC_OFFSET 0x020 704#define INIT_HCA_QPC_OFFSET 0x020
703#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) 705#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
@@ -735,6 +737,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
735 737
736 *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; 738 *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION;
737 739
740 *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) =
741 (ilog2(cache_line_size()) - 4) << 5;
742
738#if defined(__LITTLE_ENDIAN) 743#if defined(__LITTLE_ENDIAN)
739 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); 744 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
740#elif defined(__BIG_ENDIAN) 745#elif defined(__BIG_ENDIAN)
diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c
index 2ab1d59870f4..a8b689635a3b 100644
--- a/drivers/net/wireless/arlan-proc.c
+++ b/drivers/net/wireless/arlan-proc.c
@@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev)
402 402
403static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0"; 403static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0";
404 404
405static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp, 405static int arlan_sysctl_info(ctl_table * ctl, int write,
406 void __user *buffer, size_t * lenp, loff_t *ppos) 406 void __user *buffer, size_t * lenp, loff_t *ppos)
407{ 407{
408 int i; 408 int i;
@@ -629,7 +629,7 @@ final:
629 *lenp = pos; 629 *lenp = pos;
630 630
631 if (!write) 631 if (!write)
632 retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); 632 retv = proc_dostring(ctl, write, buffer, lenp, ppos);
633 else 633 else
634 { 634 {
635 *lenp = 0; 635 *lenp = 0;
@@ -639,7 +639,7 @@ final:
639} 639}
640 640
641 641
642static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp, 642static int arlan_sysctl_info161719(ctl_table * ctl, int write,
643 void __user *buffer, size_t * lenp, loff_t *ppos) 643 void __user *buffer, size_t * lenp, loff_t *ppos)
644{ 644{
645 int i; 645 int i;
@@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp
669 669
670final: 670final:
671 *lenp = pos; 671 *lenp = pos;
672 retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); 672 retv = proc_dostring(ctl, write, buffer, lenp, ppos);
673 return retv; 673 return retv;
674} 674}
675 675
676static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp, 676static int arlan_sysctl_infotxRing(ctl_table * ctl, int write,
677 void __user *buffer, size_t * lenp, loff_t *ppos) 677 void __user *buffer, size_t * lenp, loff_t *ppos)
678{ 678{
679 int i; 679 int i;
@@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp
698 SARLBNpln(u_char, txBuffer, 0x800); 698 SARLBNpln(u_char, txBuffer, 0x800);
699final: 699final:
700 *lenp = pos; 700 *lenp = pos;
701 retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); 701 retv = proc_dostring(ctl, write, buffer, lenp, ppos);
702 return retv; 702 return retv;
703} 703}
704 704
705static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp, 705static int arlan_sysctl_inforxRing(ctl_table * ctl, int write,
706 void __user *buffer, size_t * lenp, loff_t *ppos) 706 void __user *buffer, size_t * lenp, loff_t *ppos)
707{ 707{
708 int i; 708 int i;
@@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp
726 SARLBNpln(u_char, rxBuffer, 0x800); 726 SARLBNpln(u_char, rxBuffer, 0x800);
727final: 727final:
728 *lenp = pos; 728 *lenp = pos;
729 retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); 729 retv = proc_dostring(ctl, write, buffer, lenp, ppos);
730 return retv; 730 return retv;
731} 731}
732 732
733static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, 733static int arlan_sysctl_info18(ctl_table * ctl, int write,
734 void __user *buffer, size_t * lenp, loff_t *ppos) 734 void __user *buffer, size_t * lenp, loff_t *ppos)
735{ 735{
736 int i; 736 int i;
@@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
756 756
757final: 757final:
758 *lenp = pos; 758 *lenp = pos;
759 retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); 759 retv = proc_dostring(ctl, write, buffer, lenp, ppos);
760 return retv; 760 return retv;
761} 761}
762 762
@@ -766,7 +766,7 @@ final:
766 766
767static char conf_reset_result[200]; 767static char conf_reset_result[200];
768 768
769static int arlan_configure(ctl_table * ctl, int write, struct file *filp, 769static int arlan_configure(ctl_table * ctl, int write,
770 void __user *buffer, size_t * lenp, loff_t *ppos) 770 void __user *buffer, size_t * lenp, loff_t *ppos)
771{ 771{
772 int pos = 0; 772 int pos = 0;
@@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
788 return -1; 788 return -1;
789 789
790 *lenp = pos; 790 *lenp = pos;
791 return proc_dostring(ctl, write, filp, buffer, lenp, ppos); 791 return proc_dostring(ctl, write, buffer, lenp, ppos);
792} 792}
793 793
794static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, 794static int arlan_sysctl_reset(ctl_table * ctl, int write,
795 void __user *buffer, size_t * lenp, loff_t *ppos) 795 void __user *buffer, size_t * lenp, loff_t *ppos)
796{ 796{
797 int pos = 0; 797 int pos = 0;
@@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
811 } else 811 } else
812 return -1; 812 return -1;
813 *lenp = pos + 3; 813 *lenp = pos + 3;
814 return proc_dostring(ctl, write, filp, buffer, lenp, ppos); 814 return proc_dostring(ctl, write, buffer, lenp, ppos);
815} 815}
816 816
817 817
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 554e11f9e1ce..8eefe56f1cbe 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -31,7 +31,7 @@
31#define PARPORT_MIN_SPINTIME_VALUE 1 31#define PARPORT_MIN_SPINTIME_VALUE 1
32#define PARPORT_MAX_SPINTIME_VALUE 1000 32#define PARPORT_MAX_SPINTIME_VALUE 1000
33 33
34static int do_active_device(ctl_table *table, int write, struct file *filp, 34static int do_active_device(ctl_table *table, int write,
35 void __user *result, size_t *lenp, loff_t *ppos) 35 void __user *result, size_t *lenp, loff_t *ppos)
36{ 36{
37 struct parport *port = (struct parport *)table->extra1; 37 struct parport *port = (struct parport *)table->extra1;
@@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp,
68} 68}
69 69
70#ifdef CONFIG_PARPORT_1284 70#ifdef CONFIG_PARPORT_1284
71static int do_autoprobe(ctl_table *table, int write, struct file *filp, 71static int do_autoprobe(ctl_table *table, int write,
72 void __user *result, size_t *lenp, loff_t *ppos) 72 void __user *result, size_t *lenp, loff_t *ppos)
73{ 73{
74 struct parport_device_info *info = table->extra2; 74 struct parport_device_info *info = table->extra2;
@@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp,
111#endif /* IEEE1284.3 support. */ 111#endif /* IEEE1284.3 support. */
112 112
113static int do_hardware_base_addr (ctl_table *table, int write, 113static int do_hardware_base_addr (ctl_table *table, int write,
114 struct file *filp, void __user *result, 114 void __user *result,
115 size_t *lenp, loff_t *ppos) 115 size_t *lenp, loff_t *ppos)
116{ 116{
117 struct parport *port = (struct parport *)table->extra1; 117 struct parport *port = (struct parport *)table->extra1;
@@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write,
139} 139}
140 140
141static int do_hardware_irq (ctl_table *table, int write, 141static int do_hardware_irq (ctl_table *table, int write,
142 struct file *filp, void __user *result, 142 void __user *result,
143 size_t *lenp, loff_t *ppos) 143 size_t *lenp, loff_t *ppos)
144{ 144{
145 struct parport *port = (struct parport *)table->extra1; 145 struct parport *port = (struct parport *)table->extra1;
@@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write,
167} 167}
168 168
169static int do_hardware_dma (ctl_table *table, int write, 169static int do_hardware_dma (ctl_table *table, int write,
170 struct file *filp, void __user *result, 170 void __user *result,
171 size_t *lenp, loff_t *ppos) 171 size_t *lenp, loff_t *ppos)
172{ 172{
173 struct parport *port = (struct parport *)table->extra1; 173 struct parport *port = (struct parport *)table->extra1;
@@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write,
195} 195}
196 196
197static int do_hardware_modes (ctl_table *table, int write, 197static int do_hardware_modes (ctl_table *table, int write,
198 struct file *filp, void __user *result, 198 void __user *result,
199 size_t *lenp, loff_t *ppos) 199 size_t *lenp, loff_t *ppos)
200{ 200{
201 struct parport *port = (struct parport *)table->extra1; 201 struct parport *port = (struct parport *)table->extra1;
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 36faa9a8e18f..3070f77eb56a 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -72,15 +72,9 @@ do { \
72 72
73#define SLOT_NAME_SIZE 10 73#define SLOT_NAME_SIZE 10
74struct slot { 74struct slot {
75 u8 bus;
76 u8 device;
77 u8 state; 75 u8 state;
78 u8 hp_slot;
79 u32 number;
80 struct controller *ctrl; 76 struct controller *ctrl;
81 struct hpc_ops *hpc_ops;
82 struct hotplug_slot *hotplug_slot; 77 struct hotplug_slot *hotplug_slot;
83 struct list_head slot_list;
84 struct delayed_work work; /* work for button event */ 78 struct delayed_work work; /* work for button event */
85 struct mutex lock; 79 struct mutex lock;
86}; 80};
@@ -92,18 +86,10 @@ struct event_info {
92}; 86};
93 87
94struct controller { 88struct controller {
95 struct mutex crit_sect; /* critical section mutex */
96 struct mutex ctrl_lock; /* controller lock */ 89 struct mutex ctrl_lock; /* controller lock */
97 int num_slots; /* Number of slots on ctlr */
98 int slot_num_inc; /* 1 or -1 */
99 struct pci_dev *pci_dev;
100 struct pcie_device *pcie; /* PCI Express port service */ 90 struct pcie_device *pcie; /* PCI Express port service */
101 struct list_head slot_list; 91 struct slot *slot;
102 struct hpc_ops *hpc_ops;
103 wait_queue_head_t queue; /* sleep & wake process */ 92 wait_queue_head_t queue; /* sleep & wake process */
104 u8 slot_device_offset;
105 u32 first_slot; /* First physical slot number */ /* PCIE only has 1 slot */
106 u8 slot_bus; /* Bus where the slots handled by this controller sit */
107 u32 slot_cap; 93 u32 slot_cap;
108 u8 cap_base; 94 u8 cap_base;
109 struct timer_list poll_timer; 95 struct timer_list poll_timer;
@@ -131,40 +117,20 @@ struct controller {
131#define POWERON_STATE 3 117#define POWERON_STATE 3
132#define POWEROFF_STATE 4 118#define POWEROFF_STATE 4
133 119
134/* Error messages */ 120#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP)
135#define INTERLOCK_OPEN 0x00000002 121#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP)
136#define ADD_NOT_SUPPORTED 0x00000003 122#define MRL_SENS(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP)
137#define CARD_FUNCTIONING 0x00000005 123#define ATTN_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP)
138#define ADAPTER_NOT_SAME 0x00000006 124#define PWR_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP)
139#define NO_ADAPTER_PRESENT 0x00000009 125#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_HPS)
140#define NOT_ENOUGH_RESOURCES 0x0000000B 126#define EMI(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_EIP)
141#define DEVICE_TYPE_NOT_SUPPORTED 0x0000000C 127#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS)
142#define WRONG_BUS_FREQUENCY 0x0000000D 128#define PSN(ctrl) ((ctrl)->slot_cap >> 19)
143#define POWER_FAILURE 0x0000000E
144
145/* Field definitions in Slot Capabilities Register */
146#define ATTN_BUTTN_PRSN 0x00000001
147#define PWR_CTRL_PRSN 0x00000002
148#define MRL_SENS_PRSN 0x00000004
149#define ATTN_LED_PRSN 0x00000008
150#define PWR_LED_PRSN 0x00000010
151#define HP_SUPR_RM_SUP 0x00000020
152#define EMI_PRSN 0x00020000
153#define NO_CMD_CMPL_SUP 0x00040000
154
155#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & ATTN_BUTTN_PRSN)
156#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PWR_CTRL_PRSN)
157#define MRL_SENS(ctrl) ((ctrl)->slot_cap & MRL_SENS_PRSN)
158#define ATTN_LED(ctrl) ((ctrl)->slot_cap & ATTN_LED_PRSN)
159#define PWR_LED(ctrl) ((ctrl)->slot_cap & PWR_LED_PRSN)
160#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & HP_SUPR_RM_SUP)
161#define EMI(ctrl) ((ctrl)->slot_cap & EMI_PRSN)
162#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & NO_CMD_CMPL_SUP)
163 129
164extern int pciehp_sysfs_enable_slot(struct slot *slot); 130extern int pciehp_sysfs_enable_slot(struct slot *slot);
165extern int pciehp_sysfs_disable_slot(struct slot *slot); 131extern int pciehp_sysfs_disable_slot(struct slot *slot);
166extern u8 pciehp_handle_attention_button(struct slot *p_slot); 132extern u8 pciehp_handle_attention_button(struct slot *p_slot);
167 extern u8 pciehp_handle_switch_change(struct slot *p_slot); 133extern u8 pciehp_handle_switch_change(struct slot *p_slot);
168extern u8 pciehp_handle_presence_change(struct slot *p_slot); 134extern u8 pciehp_handle_presence_change(struct slot *p_slot);
169extern u8 pciehp_handle_power_fault(struct slot *p_slot); 135extern u8 pciehp_handle_power_fault(struct slot *p_slot);
170extern int pciehp_configure_device(struct slot *p_slot); 136extern int pciehp_configure_device(struct slot *p_slot);
@@ -175,45 +141,30 @@ int pcie_init_notification(struct controller *ctrl);
175int pciehp_enable_slot(struct slot *p_slot); 141int pciehp_enable_slot(struct slot *p_slot);
176int pciehp_disable_slot(struct slot *p_slot); 142int pciehp_disable_slot(struct slot *p_slot);
177int pcie_enable_notification(struct controller *ctrl); 143int pcie_enable_notification(struct controller *ctrl);
144int pciehp_power_on_slot(struct slot *slot);
145int pciehp_power_off_slot(struct slot *slot);
146int pciehp_get_power_status(struct slot *slot, u8 *status);
147int pciehp_get_attention_status(struct slot *slot, u8 *status);
148
149int pciehp_set_attention_status(struct slot *slot, u8 status);
150int pciehp_get_latch_status(struct slot *slot, u8 *status);
151int pciehp_get_adapter_status(struct slot *slot, u8 *status);
152int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *speed);
153int pciehp_get_max_link_width(struct slot *slot, enum pcie_link_width *val);
154int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *speed);
155int pciehp_get_cur_link_width(struct slot *slot, enum pcie_link_width *val);
156int pciehp_query_power_fault(struct slot *slot);
157void pciehp_green_led_on(struct slot *slot);
158void pciehp_green_led_off(struct slot *slot);
159void pciehp_green_led_blink(struct slot *slot);
160int pciehp_check_link_status(struct controller *ctrl);
161void pciehp_release_ctrl(struct controller *ctrl);
178 162
179static inline const char *slot_name(struct slot *slot) 163static inline const char *slot_name(struct slot *slot)
180{ 164{
181 return hotplug_slot_name(slot->hotplug_slot); 165 return hotplug_slot_name(slot->hotplug_slot);
182} 166}
183 167
184static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
185{
186 struct slot *slot;
187
188 list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
189 if (slot->device == device)
190 return slot;
191 }
192
193 ctrl_err(ctrl, "Slot (device=0x%02x) not found\n", device);
194 return NULL;
195}
196
197struct hpc_ops {
198 int (*power_on_slot)(struct slot *slot);
199 int (*power_off_slot)(struct slot *slot);
200 int (*get_power_status)(struct slot *slot, u8 *status);
201 int (*get_attention_status)(struct slot *slot, u8 *status);
202 int (*set_attention_status)(struct slot *slot, u8 status);
203 int (*get_latch_status)(struct slot *slot, u8 *status);
204 int (*get_adapter_status)(struct slot *slot, u8 *status);
205 int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
206 int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
207 int (*get_max_lnk_width)(struct slot *slot, enum pcie_link_width *val);
208 int (*get_cur_lnk_width)(struct slot *slot, enum pcie_link_width *val);
209 int (*query_power_fault)(struct slot *slot);
210 void (*green_led_on)(struct slot *slot);
211 void (*green_led_off)(struct slot *slot);
212 void (*green_led_blink)(struct slot *slot);
213 void (*release_ctlr)(struct controller *ctrl);
214 int (*check_lnk_status)(struct controller *ctrl);
215};
216
217#ifdef CONFIG_ACPI 168#ifdef CONFIG_ACPI
218#include <acpi/acpi.h> 169#include <acpi/acpi.h>
219#include <acpi/acpi_bus.h> 170#include <acpi/acpi_bus.h>
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 7163e6a6cfae..37c8d3d0323e 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -33,6 +33,11 @@
33#define PCIEHP_DETECT_AUTO (2) 33#define PCIEHP_DETECT_AUTO (2)
34#define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_AUTO 34#define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_AUTO
35 35
36struct dummy_slot {
37 u32 number;
38 struct list_head list;
39};
40
36static int slot_detection_mode; 41static int slot_detection_mode;
37static char *pciehp_detect_mode; 42static char *pciehp_detect_mode;
38module_param(pciehp_detect_mode, charp, 0444); 43module_param(pciehp_detect_mode, charp, 0444);
@@ -77,7 +82,7 @@ static int __init dummy_probe(struct pcie_device *dev)
77 int pos; 82 int pos;
78 u32 slot_cap; 83 u32 slot_cap;
79 acpi_handle handle; 84 acpi_handle handle;
80 struct slot *slot, *tmp; 85 struct dummy_slot *slot, *tmp;
81 struct pci_dev *pdev = dev->port; 86 struct pci_dev *pdev = dev->port;
82 /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ 87 /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
83 if (pciehp_get_hp_hw_control_from_firmware(pdev)) 88 if (pciehp_get_hp_hw_control_from_firmware(pdev))
@@ -89,11 +94,11 @@ static int __init dummy_probe(struct pcie_device *dev)
89 if (!slot) 94 if (!slot)
90 return -ENOMEM; 95 return -ENOMEM;
91 slot->number = slot_cap >> 19; 96 slot->number = slot_cap >> 19;
92 list_for_each_entry(tmp, &dummy_slots, slot_list) { 97 list_for_each_entry(tmp, &dummy_slots, list) {
93 if (tmp->number == slot->number) 98 if (tmp->number == slot->number)
94 dup_slot_id++; 99 dup_slot_id++;
95 } 100 }
96 list_add_tail(&slot->slot_list, &dummy_slots); 101 list_add_tail(&slot->list, &dummy_slots);
97 handle = DEVICE_ACPI_HANDLE(&pdev->dev); 102 handle = DEVICE_ACPI_HANDLE(&pdev->dev);
98 if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle)) 103 if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
99 acpi_slot_detected = 1; 104 acpi_slot_detected = 1;
@@ -109,11 +114,11 @@ static struct pcie_port_service_driver __initdata dummy_driver = {
109 114
110static int __init select_detection_mode(void) 115static int __init select_detection_mode(void)
111{ 116{
112 struct slot *slot, *tmp; 117 struct dummy_slot *slot, *tmp;
113 pcie_port_service_register(&dummy_driver); 118 pcie_port_service_register(&dummy_driver);
114 pcie_port_service_unregister(&dummy_driver); 119 pcie_port_service_unregister(&dummy_driver);
115 list_for_each_entry_safe(slot, tmp, &dummy_slots, slot_list) { 120 list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
116 list_del(&slot->slot_list); 121 list_del(&slot->list);
117 kfree(slot); 122 kfree(slot);
118 } 123 }
119 if (acpi_slot_detected && dup_slot_id) 124 if (acpi_slot_detected && dup_slot_id)
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 2317557fdee6..bc234719b1df 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -99,65 +99,55 @@ static void release_slot(struct hotplug_slot *hotplug_slot)
99 kfree(hotplug_slot); 99 kfree(hotplug_slot);
100} 100}
101 101
102static int init_slots(struct controller *ctrl) 102static int init_slot(struct controller *ctrl)
103{ 103{
104 struct slot *slot; 104 struct slot *slot = ctrl->slot;
105 struct hotplug_slot *hotplug_slot; 105 struct hotplug_slot *hotplug = NULL;
106 struct hotplug_slot_info *info; 106 struct hotplug_slot_info *info = NULL;
107 char name[SLOT_NAME_SIZE]; 107 char name[SLOT_NAME_SIZE];
108 int retval = -ENOMEM; 108 int retval = -ENOMEM;
109 109
110 list_for_each_entry(slot, &ctrl->slot_list, slot_list) { 110 hotplug = kzalloc(sizeof(*hotplug), GFP_KERNEL);
111 hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL); 111 if (!hotplug)
112 if (!hotplug_slot) 112 goto out;
113 goto error; 113
114 114 info = kzalloc(sizeof(*info), GFP_KERNEL);
115 info = kzalloc(sizeof(*info), GFP_KERNEL); 115 if (!info)
116 if (!info) 116 goto out;
117 goto error_hpslot; 117
118 118 /* register this slot with the hotplug pci core */
119 /* register this slot with the hotplug pci core */ 119 hotplug->info = info;
120 hotplug_slot->info = info; 120 hotplug->private = slot;
121 hotplug_slot->private = slot; 121 hotplug->release = &release_slot;
122 hotplug_slot->release = &release_slot; 122 hotplug->ops = &pciehp_hotplug_slot_ops;
123 hotplug_slot->ops = &pciehp_hotplug_slot_ops; 123 slot->hotplug_slot = hotplug;
124 slot->hotplug_slot = hotplug_slot; 124 snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
125 snprintf(name, SLOT_NAME_SIZE, "%u", slot->number); 125
126 126 ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:00 sun=%x\n",
127 ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x " 127 pci_domain_nr(ctrl->pcie->port->subordinate),
128 "hp_slot=%x sun=%x slot_device_offset=%x\n", 128 ctrl->pcie->port->subordinate->number, PSN(ctrl));
129 pci_domain_nr(ctrl->pci_dev->subordinate), 129 retval = pci_hp_register(hotplug,
130 slot->bus, slot->device, slot->hp_slot, slot->number, 130 ctrl->pcie->port->subordinate, 0, name);
131 ctrl->slot_device_offset); 131 if (retval) {
132 retval = pci_hp_register(hotplug_slot, 132 ctrl_err(ctrl,
133 ctrl->pci_dev->subordinate, 133 "pci_hp_register failed with error %d\n", retval);
134 slot->device, 134 goto out;
135 name); 135 }
136 if (retval) { 136 get_power_status(hotplug, &info->power_status);
137 ctrl_err(ctrl, "pci_hp_register failed with error %d\n", 137 get_attention_status(hotplug, &info->attention_status);
138 retval); 138 get_latch_status(hotplug, &info->latch_status);
139 goto error_info; 139 get_adapter_status(hotplug, &info->adapter_status);
140 } 140out:
141 get_power_status(hotplug_slot, &info->power_status); 141 if (retval) {
142 get_attention_status(hotplug_slot, &info->attention_status); 142 kfree(info);
143 get_latch_status(hotplug_slot, &info->latch_status); 143 kfree(hotplug);
144 get_adapter_status(hotplug_slot, &info->adapter_status);
145 } 144 }
146
147 return 0;
148error_info:
149 kfree(info);
150error_hpslot:
151 kfree(hotplug_slot);
152error:
153 return retval; 145 return retval;
154} 146}
155 147
156static void cleanup_slots(struct controller *ctrl) 148static void cleanup_slot(struct controller *ctrl)
157{ 149{
158 struct slot *slot; 150 pci_hp_deregister(ctrl->slot->hotplug_slot);
159 list_for_each_entry(slot, &ctrl->slot_list, slot_list)
160 pci_hp_deregister(slot->hotplug_slot);
161} 151}
162 152
163/* 153/*
@@ -173,7 +163,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
173 hotplug_slot->info->attention_status = status; 163 hotplug_slot->info->attention_status = status;
174 164
175 if (ATTN_LED(slot->ctrl)) 165 if (ATTN_LED(slot->ctrl))
176 slot->hpc_ops->set_attention_status(slot, status); 166 pciehp_set_attention_status(slot, status);
177 167
178 return 0; 168 return 0;
179} 169}
@@ -208,7 +198,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
208 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", 198 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
209 __func__, slot_name(slot)); 199 __func__, slot_name(slot));
210 200
211 retval = slot->hpc_ops->get_power_status(slot, value); 201 retval = pciehp_get_power_status(slot, value);
212 if (retval < 0) 202 if (retval < 0)
213 *value = hotplug_slot->info->power_status; 203 *value = hotplug_slot->info->power_status;
214 204
@@ -223,7 +213,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
223 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", 213 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
224 __func__, slot_name(slot)); 214 __func__, slot_name(slot));
225 215
226 retval = slot->hpc_ops->get_attention_status(slot, value); 216 retval = pciehp_get_attention_status(slot, value);
227 if (retval < 0) 217 if (retval < 0)
228 *value = hotplug_slot->info->attention_status; 218 *value = hotplug_slot->info->attention_status;
229 219
@@ -238,7 +228,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
238 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", 228 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
239 __func__, slot_name(slot)); 229 __func__, slot_name(slot));
240 230
241 retval = slot->hpc_ops->get_latch_status(slot, value); 231 retval = pciehp_get_latch_status(slot, value);
242 if (retval < 0) 232 if (retval < 0)
243 *value = hotplug_slot->info->latch_status; 233 *value = hotplug_slot->info->latch_status;
244 234
@@ -253,7 +243,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
253 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", 243 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
254 __func__, slot_name(slot)); 244 __func__, slot_name(slot));
255 245
256 retval = slot->hpc_ops->get_adapter_status(slot, value); 246 retval = pciehp_get_adapter_status(slot, value);
257 if (retval < 0) 247 if (retval < 0)
258 *value = hotplug_slot->info->adapter_status; 248 *value = hotplug_slot->info->adapter_status;
259 249
@@ -269,7 +259,7 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
269 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", 259 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
270 __func__, slot_name(slot)); 260 __func__, slot_name(slot));
271 261
272 retval = slot->hpc_ops->get_max_bus_speed(slot, value); 262 retval = pciehp_get_max_link_speed(slot, value);
273 if (retval < 0) 263 if (retval < 0)
274 *value = PCI_SPEED_UNKNOWN; 264 *value = PCI_SPEED_UNKNOWN;
275 265
@@ -284,7 +274,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
284 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", 274 ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
285 __func__, slot_name(slot)); 275 __func__, slot_name(slot));
286 276
287 retval = slot->hpc_ops->get_cur_bus_speed(slot, value); 277 retval = pciehp_get_cur_link_speed(slot, value);
288 if (retval < 0) 278 if (retval < 0)
289 *value = PCI_SPEED_UNKNOWN; 279 *value = PCI_SPEED_UNKNOWN;
290 280
@@ -295,7 +285,7 @@ static int pciehp_probe(struct pcie_device *dev)
295{ 285{
296 int rc; 286 int rc;
297 struct controller *ctrl; 287 struct controller *ctrl;
298 struct slot *t_slot; 288 struct slot *slot;
299 u8 value; 289 u8 value;
300 struct pci_dev *pdev = dev->port; 290 struct pci_dev *pdev = dev->port;
301 291
@@ -314,7 +304,7 @@ static int pciehp_probe(struct pcie_device *dev)
314 set_service_data(dev, ctrl); 304 set_service_data(dev, ctrl);
315 305
316 /* Setup the slot information structures */ 306 /* Setup the slot information structures */
317 rc = init_slots(ctrl); 307 rc = init_slot(ctrl);
318 if (rc) { 308 if (rc) {
319 if (rc == -EBUSY) 309 if (rc == -EBUSY)
320 ctrl_warn(ctrl, "Slot already registered by another " 310 ctrl_warn(ctrl, "Slot already registered by another "
@@ -332,15 +322,15 @@ static int pciehp_probe(struct pcie_device *dev)
332 } 322 }
333 323
334 /* Check if slot is occupied */ 324 /* Check if slot is occupied */
335 t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); 325 slot = ctrl->slot;
336 t_slot->hpc_ops->get_adapter_status(t_slot, &value); 326 pciehp_get_adapter_status(slot, &value);
337 if (value) { 327 if (value) {
338 if (pciehp_force) 328 if (pciehp_force)
339 pciehp_enable_slot(t_slot); 329 pciehp_enable_slot(slot);
340 } else { 330 } else {
341 /* Power off slot if not occupied */ 331 /* Power off slot if not occupied */
342 if (POWER_CTRL(ctrl)) { 332 if (POWER_CTRL(ctrl)) {
343 rc = t_slot->hpc_ops->power_off_slot(t_slot); 333 rc = pciehp_power_off_slot(slot);
344 if (rc) 334 if (rc)
345 goto err_out_free_ctrl_slot; 335 goto err_out_free_ctrl_slot;
346 } 336 }
@@ -349,19 +339,19 @@ static int pciehp_probe(struct pcie_device *dev)
349 return 0; 339 return 0;
350 340
351err_out_free_ctrl_slot: 341err_out_free_ctrl_slot:
352 cleanup_slots(ctrl); 342 cleanup_slot(ctrl);
353err_out_release_ctlr: 343err_out_release_ctlr:
354 ctrl->hpc_ops->release_ctlr(ctrl); 344 pciehp_release_ctrl(ctrl);
355err_out_none: 345err_out_none:
356 return -ENODEV; 346 return -ENODEV;
357} 347}
358 348
359static void pciehp_remove (struct pcie_device *dev) 349static void pciehp_remove(struct pcie_device *dev)
360{ 350{
361 struct controller *ctrl = get_service_data(dev); 351 struct controller *ctrl = get_service_data(dev);
362 352
363 cleanup_slots(ctrl); 353 cleanup_slot(ctrl);
364 ctrl->hpc_ops->release_ctlr(ctrl); 354 pciehp_release_ctrl(ctrl);
365} 355}
366 356
367#ifdef CONFIG_PM 357#ifdef CONFIG_PM
@@ -376,20 +366,20 @@ static int pciehp_resume (struct pcie_device *dev)
376 dev_info(&dev->device, "%s ENTRY\n", __func__); 366 dev_info(&dev->device, "%s ENTRY\n", __func__);
377 if (pciehp_force) { 367 if (pciehp_force) {
378 struct controller *ctrl = get_service_data(dev); 368 struct controller *ctrl = get_service_data(dev);
379 struct slot *t_slot; 369 struct slot *slot;
380 u8 status; 370 u8 status;
381 371
382 /* reinitialize the chipset's event detection logic */ 372 /* reinitialize the chipset's event detection logic */
383 pcie_enable_notification(ctrl); 373 pcie_enable_notification(ctrl);
384 374
385 t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); 375 slot = ctrl->slot;
386 376
387 /* Check if slot is occupied */ 377 /* Check if slot is occupied */
388 t_slot->hpc_ops->get_adapter_status(t_slot, &status); 378 pciehp_get_adapter_status(slot, &status);
389 if (status) 379 if (status)
390 pciehp_enable_slot(t_slot); 380 pciehp_enable_slot(slot);
391 else 381 else
392 pciehp_disable_slot(t_slot); 382 pciehp_disable_slot(slot);
393 } 383 }
394 return 0; 384 return 0;
395} 385}
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index b97cb4c3e0fe..84487d126e4d 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -82,7 +82,7 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
82 /* Switch Change */ 82 /* Switch Change */
83 ctrl_dbg(ctrl, "Switch interrupt received\n"); 83 ctrl_dbg(ctrl, "Switch interrupt received\n");
84 84
85 p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); 85 pciehp_get_latch_status(p_slot, &getstatus);
86 if (getstatus) { 86 if (getstatus) {
87 /* 87 /*
88 * Switch opened 88 * Switch opened
@@ -114,7 +114,7 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
114 /* Switch is open, assume a presence change 114 /* Switch is open, assume a presence change
115 * Save the presence state 115 * Save the presence state
116 */ 116 */
117 p_slot->hpc_ops->get_adapter_status(p_slot, &presence_save); 117 pciehp_get_adapter_status(p_slot, &presence_save);
118 if (presence_save) { 118 if (presence_save) {
119 /* 119 /*
120 * Card Present 120 * Card Present
@@ -143,7 +143,7 @@ u8 pciehp_handle_power_fault(struct slot *p_slot)
143 /* power fault */ 143 /* power fault */
144 ctrl_dbg(ctrl, "Power fault interrupt received\n"); 144 ctrl_dbg(ctrl, "Power fault interrupt received\n");
145 145
146 if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) { 146 if (!pciehp_query_power_fault(p_slot)) {
147 /* 147 /*
148 * power fault Cleared 148 * power fault Cleared
149 */ 149 */
@@ -172,7 +172,7 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
172{ 172{
173 /* turn off slot, turn on Amber LED, turn off Green LED if supported*/ 173 /* turn off slot, turn on Amber LED, turn off Green LED if supported*/
174 if (POWER_CTRL(ctrl)) { 174 if (POWER_CTRL(ctrl)) {
175 if (pslot->hpc_ops->power_off_slot(pslot)) { 175 if (pciehp_power_off_slot(pslot)) {
176 ctrl_err(ctrl, 176 ctrl_err(ctrl,
177 "Issue of Slot Power Off command failed\n"); 177 "Issue of Slot Power Off command failed\n");
178 return; 178 return;
@@ -186,10 +186,10 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
186 } 186 }
187 187
188 if (PWR_LED(ctrl)) 188 if (PWR_LED(ctrl))
189 pslot->hpc_ops->green_led_off(pslot); 189 pciehp_green_led_off(pslot);
190 190
191 if (ATTN_LED(ctrl)) { 191 if (ATTN_LED(ctrl)) {
192 if (pslot->hpc_ops->set_attention_status(pslot, 1)) { 192 if (pciehp_set_attention_status(pslot, 1)) {
193 ctrl_err(ctrl, 193 ctrl_err(ctrl,
194 "Issue of Set Attention Led command failed\n"); 194 "Issue of Set Attention Led command failed\n");
195 return; 195 return;
@@ -208,24 +208,20 @@ static int board_added(struct slot *p_slot)
208{ 208{
209 int retval = 0; 209 int retval = 0;
210 struct controller *ctrl = p_slot->ctrl; 210 struct controller *ctrl = p_slot->ctrl;
211 struct pci_bus *parent = ctrl->pci_dev->subordinate; 211 struct pci_bus *parent = ctrl->pcie->port->subordinate;
212
213 ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d, %d\n",
214 __func__, p_slot->device, ctrl->slot_device_offset,
215 p_slot->hp_slot);
216 212
217 if (POWER_CTRL(ctrl)) { 213 if (POWER_CTRL(ctrl)) {
218 /* Power on slot */ 214 /* Power on slot */
219 retval = p_slot->hpc_ops->power_on_slot(p_slot); 215 retval = pciehp_power_on_slot(p_slot);
220 if (retval) 216 if (retval)
221 return retval; 217 return retval;
222 } 218 }
223 219
224 if (PWR_LED(ctrl)) 220 if (PWR_LED(ctrl))
225 p_slot->hpc_ops->green_led_blink(p_slot); 221 pciehp_green_led_blink(p_slot);
226 222
227 /* Check link training status */ 223 /* Check link training status */
228 retval = p_slot->hpc_ops->check_lnk_status(ctrl); 224 retval = pciehp_check_link_status(ctrl);
229 if (retval) { 225 if (retval) {
230 ctrl_err(ctrl, "Failed to check link status\n"); 226 ctrl_err(ctrl, "Failed to check link status\n");
231 set_slot_off(ctrl, p_slot); 227 set_slot_off(ctrl, p_slot);
@@ -233,21 +229,21 @@ static int board_added(struct slot *p_slot)
233 } 229 }
234 230
235 /* Check for a power fault */ 231 /* Check for a power fault */
236 if (p_slot->hpc_ops->query_power_fault(p_slot)) { 232 if (pciehp_query_power_fault(p_slot)) {
237 ctrl_dbg(ctrl, "Power fault detected\n"); 233 ctrl_dbg(ctrl, "Power fault detected\n");
238 retval = POWER_FAILURE; 234 retval = -EIO;
239 goto err_exit; 235 goto err_exit;
240 } 236 }
241 237
242 retval = pciehp_configure_device(p_slot); 238 retval = pciehp_configure_device(p_slot);
243 if (retval) { 239 if (retval) {
244 ctrl_err(ctrl, "Cannot add device at %04x:%02x:%02x\n", 240 ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
245 pci_domain_nr(parent), p_slot->bus, p_slot->device); 241 pci_domain_nr(parent), parent->number);
246 goto err_exit; 242 goto err_exit;
247 } 243 }
248 244
249 if (PWR_LED(ctrl)) 245 if (PWR_LED(ctrl))
250 p_slot->hpc_ops->green_led_on(p_slot); 246 pciehp_green_led_on(p_slot);
251 247
252 return 0; 248 return 0;
253 249
@@ -269,11 +265,9 @@ static int remove_board(struct slot *p_slot)
269 if (retval) 265 if (retval)
270 return retval; 266 return retval;
271 267
272 ctrl_dbg(ctrl, "%s: hp_slot = %d\n", __func__, p_slot->hp_slot);
273
274 if (POWER_CTRL(ctrl)) { 268 if (POWER_CTRL(ctrl)) {
275 /* power off slot */ 269 /* power off slot */
276 retval = p_slot->hpc_ops->power_off_slot(p_slot); 270 retval = pciehp_power_off_slot(p_slot);
277 if (retval) { 271 if (retval) {
278 ctrl_err(ctrl, 272 ctrl_err(ctrl,
279 "Issue of Slot Disable command failed\n"); 273 "Issue of Slot Disable command failed\n");
@@ -287,9 +281,9 @@ static int remove_board(struct slot *p_slot)
287 msleep(1000); 281 msleep(1000);
288 } 282 }
289 283
284 /* turn off Green LED */
290 if (PWR_LED(ctrl)) 285 if (PWR_LED(ctrl))
291 /* turn off Green LED */ 286 pciehp_green_led_off(p_slot);
292 p_slot->hpc_ops->green_led_off(p_slot);
293 287
294 return 0; 288 return 0;
295} 289}
@@ -317,18 +311,17 @@ static void pciehp_power_thread(struct work_struct *work)
317 case POWEROFF_STATE: 311 case POWEROFF_STATE:
318 mutex_unlock(&p_slot->lock); 312 mutex_unlock(&p_slot->lock);
319 ctrl_dbg(p_slot->ctrl, 313 ctrl_dbg(p_slot->ctrl,
320 "Disabling domain:bus:device=%04x:%02x:%02x\n", 314 "Disabling domain:bus:device=%04x:%02x:00\n",
321 pci_domain_nr(p_slot->ctrl->pci_dev->subordinate), 315 pci_domain_nr(p_slot->ctrl->pcie->port->subordinate),
322 p_slot->bus, p_slot->device); 316 p_slot->ctrl->pcie->port->subordinate->number);
323 pciehp_disable_slot(p_slot); 317 pciehp_disable_slot(p_slot);
324 mutex_lock(&p_slot->lock); 318 mutex_lock(&p_slot->lock);
325 p_slot->state = STATIC_STATE; 319 p_slot->state = STATIC_STATE;
326 break; 320 break;
327 case POWERON_STATE: 321 case POWERON_STATE:
328 mutex_unlock(&p_slot->lock); 322 mutex_unlock(&p_slot->lock);
329 if (pciehp_enable_slot(p_slot) && 323 if (pciehp_enable_slot(p_slot) && PWR_LED(p_slot->ctrl))
330 PWR_LED(p_slot->ctrl)) 324 pciehp_green_led_off(p_slot);
331 p_slot->hpc_ops->green_led_off(p_slot);
332 mutex_lock(&p_slot->lock); 325 mutex_lock(&p_slot->lock);
333 p_slot->state = STATIC_STATE; 326 p_slot->state = STATIC_STATE;
334 break; 327 break;
@@ -379,10 +372,10 @@ static int update_slot_info(struct slot *slot)
379 if (!info) 372 if (!info)
380 return -ENOMEM; 373 return -ENOMEM;
381 374
382 slot->hpc_ops->get_power_status(slot, &(info->power_status)); 375 pciehp_get_power_status(slot, &info->power_status);
383 slot->hpc_ops->get_attention_status(slot, &(info->attention_status)); 376 pciehp_get_attention_status(slot, &info->attention_status);
384 slot->hpc_ops->get_latch_status(slot, &(info->latch_status)); 377 pciehp_get_latch_status(slot, &info->latch_status);
385 slot->hpc_ops->get_adapter_status(slot, &(info->adapter_status)); 378 pciehp_get_adapter_status(slot, &info->adapter_status);
386 379
387 result = pci_hp_change_slot_info(slot->hotplug_slot, info); 380 result = pci_hp_change_slot_info(slot->hotplug_slot, info);
388 kfree (info); 381 kfree (info);
@@ -399,7 +392,7 @@ static void handle_button_press_event(struct slot *p_slot)
399 392
400 switch (p_slot->state) { 393 switch (p_slot->state) {
401 case STATIC_STATE: 394 case STATIC_STATE:
402 p_slot->hpc_ops->get_power_status(p_slot, &getstatus); 395 pciehp_get_power_status(p_slot, &getstatus);
403 if (getstatus) { 396 if (getstatus) {
404 p_slot->state = BLINKINGOFF_STATE; 397 p_slot->state = BLINKINGOFF_STATE;
405 ctrl_info(ctrl, 398 ctrl_info(ctrl,
@@ -413,9 +406,9 @@ static void handle_button_press_event(struct slot *p_slot)
413 } 406 }
414 /* blink green LED and turn off amber */ 407 /* blink green LED and turn off amber */
415 if (PWR_LED(ctrl)) 408 if (PWR_LED(ctrl))
416 p_slot->hpc_ops->green_led_blink(p_slot); 409 pciehp_green_led_blink(p_slot);
417 if (ATTN_LED(ctrl)) 410 if (ATTN_LED(ctrl))
418 p_slot->hpc_ops->set_attention_status(p_slot, 0); 411 pciehp_set_attention_status(p_slot, 0);
419 412
420 schedule_delayed_work(&p_slot->work, 5*HZ); 413 schedule_delayed_work(&p_slot->work, 5*HZ);
421 break; 414 break;
@@ -430,13 +423,13 @@ static void handle_button_press_event(struct slot *p_slot)
430 cancel_delayed_work(&p_slot->work); 423 cancel_delayed_work(&p_slot->work);
431 if (p_slot->state == BLINKINGOFF_STATE) { 424 if (p_slot->state == BLINKINGOFF_STATE) {
432 if (PWR_LED(ctrl)) 425 if (PWR_LED(ctrl))
433 p_slot->hpc_ops->green_led_on(p_slot); 426 pciehp_green_led_on(p_slot);
434 } else { 427 } else {
435 if (PWR_LED(ctrl)) 428 if (PWR_LED(ctrl))
436 p_slot->hpc_ops->green_led_off(p_slot); 429 pciehp_green_led_off(p_slot);
437 } 430 }
438 if (ATTN_LED(ctrl)) 431 if (ATTN_LED(ctrl))
439 p_slot->hpc_ops->set_attention_status(p_slot, 0); 432 pciehp_set_attention_status(p_slot, 0);
440 ctrl_info(ctrl, "PCI slot #%s - action canceled " 433 ctrl_info(ctrl, "PCI slot #%s - action canceled "
441 "due to button press\n", slot_name(p_slot)); 434 "due to button press\n", slot_name(p_slot));
442 p_slot->state = STATIC_STATE; 435 p_slot->state = STATIC_STATE;
@@ -474,7 +467,7 @@ static void handle_surprise_event(struct slot *p_slot)
474 info->p_slot = p_slot; 467 info->p_slot = p_slot;
475 INIT_WORK(&info->work, pciehp_power_thread); 468 INIT_WORK(&info->work, pciehp_power_thread);
476 469
477 p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); 470 pciehp_get_adapter_status(p_slot, &getstatus);
478 if (!getstatus) 471 if (!getstatus)
479 p_slot->state = POWEROFF_STATE; 472 p_slot->state = POWEROFF_STATE;
480 else 473 else
@@ -498,9 +491,9 @@ static void interrupt_event_handler(struct work_struct *work)
498 if (!POWER_CTRL(ctrl)) 491 if (!POWER_CTRL(ctrl))
499 break; 492 break;
500 if (ATTN_LED(ctrl)) 493 if (ATTN_LED(ctrl))
501 p_slot->hpc_ops->set_attention_status(p_slot, 1); 494 pciehp_set_attention_status(p_slot, 1);
502 if (PWR_LED(ctrl)) 495 if (PWR_LED(ctrl))
503 p_slot->hpc_ops->green_led_off(p_slot); 496 pciehp_green_led_off(p_slot);
504 break; 497 break;
505 case INT_PRESENCE_ON: 498 case INT_PRESENCE_ON:
506 case INT_PRESENCE_OFF: 499 case INT_PRESENCE_OFF:
@@ -525,45 +518,38 @@ int pciehp_enable_slot(struct slot *p_slot)
525 int rc; 518 int rc;
526 struct controller *ctrl = p_slot->ctrl; 519 struct controller *ctrl = p_slot->ctrl;
527 520
528 /* Check to see if (latch closed, card present, power off) */ 521 rc = pciehp_get_adapter_status(p_slot, &getstatus);
529 mutex_lock(&p_slot->ctrl->crit_sect);
530
531 rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
532 if (rc || !getstatus) { 522 if (rc || !getstatus) {
533 ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot)); 523 ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot));
534 mutex_unlock(&p_slot->ctrl->crit_sect);
535 return -ENODEV; 524 return -ENODEV;
536 } 525 }
537 if (MRL_SENS(p_slot->ctrl)) { 526 if (MRL_SENS(p_slot->ctrl)) {
538 rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); 527 rc = pciehp_get_latch_status(p_slot, &getstatus);
539 if (rc || getstatus) { 528 if (rc || getstatus) {
540 ctrl_info(ctrl, "Latch open on slot(%s)\n", 529 ctrl_info(ctrl, "Latch open on slot(%s)\n",
541 slot_name(p_slot)); 530 slot_name(p_slot));
542 mutex_unlock(&p_slot->ctrl->crit_sect);
543 return -ENODEV; 531 return -ENODEV;
544 } 532 }
545 } 533 }
546 534
547 if (POWER_CTRL(p_slot->ctrl)) { 535 if (POWER_CTRL(p_slot->ctrl)) {
548 rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus); 536 rc = pciehp_get_power_status(p_slot, &getstatus);
549 if (rc || getstatus) { 537 if (rc || getstatus) {
550 ctrl_info(ctrl, "Already enabled on slot(%s)\n", 538 ctrl_info(ctrl, "Already enabled on slot(%s)\n",
551 slot_name(p_slot)); 539 slot_name(p_slot));
552 mutex_unlock(&p_slot->ctrl->crit_sect);
553 return -EINVAL; 540 return -EINVAL;
554 } 541 }
555 } 542 }
556 543
557 p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); 544 pciehp_get_latch_status(p_slot, &getstatus);
558 545
559 rc = board_added(p_slot); 546 rc = board_added(p_slot);
560 if (rc) { 547 if (rc) {
561 p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); 548 pciehp_get_latch_status(p_slot, &getstatus);
562 } 549 }
563 550
564 update_slot_info(p_slot); 551 update_slot_info(p_slot);
565 552
566 mutex_unlock(&p_slot->ctrl->crit_sect);
567 return rc; 553 return rc;
568} 554}
569 555
@@ -577,35 +563,29 @@ int pciehp_disable_slot(struct slot *p_slot)
577 if (!p_slot->ctrl) 563 if (!p_slot->ctrl)
578 return 1; 564 return 1;
579 565
580 /* Check to see if (latch closed, card present, power on) */
581 mutex_lock(&p_slot->ctrl->crit_sect);
582
583 if (!HP_SUPR_RM(p_slot->ctrl)) { 566 if (!HP_SUPR_RM(p_slot->ctrl)) {
584 ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); 567 ret = pciehp_get_adapter_status(p_slot, &getstatus);
585 if (ret || !getstatus) { 568 if (ret || !getstatus) {
586 ctrl_info(ctrl, "No adapter on slot(%s)\n", 569 ctrl_info(ctrl, "No adapter on slot(%s)\n",
587 slot_name(p_slot)); 570 slot_name(p_slot));
588 mutex_unlock(&p_slot->ctrl->crit_sect);
589 return -ENODEV; 571 return -ENODEV;
590 } 572 }
591 } 573 }
592 574
593 if (MRL_SENS(p_slot->ctrl)) { 575 if (MRL_SENS(p_slot->ctrl)) {
594 ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); 576 ret = pciehp_get_latch_status(p_slot, &getstatus);
595 if (ret || getstatus) { 577 if (ret || getstatus) {
596 ctrl_info(ctrl, "Latch open on slot(%s)\n", 578 ctrl_info(ctrl, "Latch open on slot(%s)\n",
597 slot_name(p_slot)); 579 slot_name(p_slot));
598 mutex_unlock(&p_slot->ctrl->crit_sect);
599 return -ENODEV; 580 return -ENODEV;
600 } 581 }
601 } 582 }
602 583
603 if (POWER_CTRL(p_slot->ctrl)) { 584 if (POWER_CTRL(p_slot->ctrl)) {
604 ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus); 585 ret = pciehp_get_power_status(p_slot, &getstatus);
605 if (ret || !getstatus) { 586 if (ret || !getstatus) {
606 ctrl_info(ctrl, "Already disabled on slot(%s)\n", 587 ctrl_info(ctrl, "Already disabled on slot(%s)\n",
607 slot_name(p_slot)); 588 slot_name(p_slot));
608 mutex_unlock(&p_slot->ctrl->crit_sect);
609 return -EINVAL; 589 return -EINVAL;
610 } 590 }
611 } 591 }
@@ -613,7 +593,6 @@ int pciehp_disable_slot(struct slot *p_slot)
613 ret = remove_board(p_slot); 593 ret = remove_board(p_slot);
614 update_slot_info(p_slot); 594 update_slot_info(p_slot);
615 595
616 mutex_unlock(&p_slot->ctrl->crit_sect);
617 return ret; 596 return ret;
618} 597}
619 598
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 271f917b6f2c..9ef4605c1ef6 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -44,25 +44,25 @@ static atomic_t pciehp_num_controllers = ATOMIC_INIT(0);
44 44
45static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value) 45static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value)
46{ 46{
47 struct pci_dev *dev = ctrl->pci_dev; 47 struct pci_dev *dev = ctrl->pcie->port;
48 return pci_read_config_word(dev, ctrl->cap_base + reg, value); 48 return pci_read_config_word(dev, ctrl->cap_base + reg, value);
49} 49}
50 50
51static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value) 51static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value)
52{ 52{
53 struct pci_dev *dev = ctrl->pci_dev; 53 struct pci_dev *dev = ctrl->pcie->port;
54 return pci_read_config_dword(dev, ctrl->cap_base + reg, value); 54 return pci_read_config_dword(dev, ctrl->cap_base + reg, value);
55} 55}
56 56
57static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value) 57static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value)
58{ 58{
59 struct pci_dev *dev = ctrl->pci_dev; 59 struct pci_dev *dev = ctrl->pcie->port;
60 return pci_write_config_word(dev, ctrl->cap_base + reg, value); 60 return pci_write_config_word(dev, ctrl->cap_base + reg, value);
61} 61}
62 62
63static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value) 63static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value)
64{ 64{
65 struct pci_dev *dev = ctrl->pci_dev; 65 struct pci_dev *dev = ctrl->pcie->port;
66 return pci_write_config_dword(dev, ctrl->cap_base + reg, value); 66 return pci_write_config_dword(dev, ctrl->cap_base + reg, value);
67} 67}
68 68
@@ -266,7 +266,7 @@ static void pcie_wait_link_active(struct controller *ctrl)
266 ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n"); 266 ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n");
267} 267}
268 268
269static int hpc_check_lnk_status(struct controller *ctrl) 269int pciehp_check_link_status(struct controller *ctrl)
270{ 270{
271 u16 lnk_status; 271 u16 lnk_status;
272 int retval = 0; 272 int retval = 0;
@@ -305,7 +305,7 @@ static int hpc_check_lnk_status(struct controller *ctrl)
305 return retval; 305 return retval;
306} 306}
307 307
308static int hpc_get_attention_status(struct slot *slot, u8 *status) 308int pciehp_get_attention_status(struct slot *slot, u8 *status)
309{ 309{
310 struct controller *ctrl = slot->ctrl; 310 struct controller *ctrl = slot->ctrl;
311 u16 slot_ctrl; 311 u16 slot_ctrl;
@@ -344,7 +344,7 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
344 return 0; 344 return 0;
345} 345}
346 346
347static int hpc_get_power_status(struct slot *slot, u8 *status) 347int pciehp_get_power_status(struct slot *slot, u8 *status)
348{ 348{
349 struct controller *ctrl = slot->ctrl; 349 struct controller *ctrl = slot->ctrl;
350 u16 slot_ctrl; 350 u16 slot_ctrl;
@@ -376,7 +376,7 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
376 return retval; 376 return retval;
377} 377}
378 378
379static int hpc_get_latch_status(struct slot *slot, u8 *status) 379int pciehp_get_latch_status(struct slot *slot, u8 *status)
380{ 380{
381 struct controller *ctrl = slot->ctrl; 381 struct controller *ctrl = slot->ctrl;
382 u16 slot_status; 382 u16 slot_status;
@@ -392,7 +392,7 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
392 return 0; 392 return 0;
393} 393}
394 394
395static int hpc_get_adapter_status(struct slot *slot, u8 *status) 395int pciehp_get_adapter_status(struct slot *slot, u8 *status)
396{ 396{
397 struct controller *ctrl = slot->ctrl; 397 struct controller *ctrl = slot->ctrl;
398 u16 slot_status; 398 u16 slot_status;
@@ -408,7 +408,7 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
408 return 0; 408 return 0;
409} 409}
410 410
411static int hpc_query_power_fault(struct slot *slot) 411int pciehp_query_power_fault(struct slot *slot)
412{ 412{
413 struct controller *ctrl = slot->ctrl; 413 struct controller *ctrl = slot->ctrl;
414 u16 slot_status; 414 u16 slot_status;
@@ -422,7 +422,7 @@ static int hpc_query_power_fault(struct slot *slot)
422 return !!(slot_status & PCI_EXP_SLTSTA_PFD); 422 return !!(slot_status & PCI_EXP_SLTSTA_PFD);
423} 423}
424 424
425static int hpc_set_attention_status(struct slot *slot, u8 value) 425int pciehp_set_attention_status(struct slot *slot, u8 value)
426{ 426{
427 struct controller *ctrl = slot->ctrl; 427 struct controller *ctrl = slot->ctrl;
428 u16 slot_cmd; 428 u16 slot_cmd;
@@ -450,7 +450,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
450 return rc; 450 return rc;
451} 451}
452 452
453static void hpc_set_green_led_on(struct slot *slot) 453void pciehp_green_led_on(struct slot *slot)
454{ 454{
455 struct controller *ctrl = slot->ctrl; 455 struct controller *ctrl = slot->ctrl;
456 u16 slot_cmd; 456 u16 slot_cmd;
@@ -463,7 +463,7 @@ static void hpc_set_green_led_on(struct slot *slot)
463 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); 463 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
464} 464}
465 465
466static void hpc_set_green_led_off(struct slot *slot) 466void pciehp_green_led_off(struct slot *slot)
467{ 467{
468 struct controller *ctrl = slot->ctrl; 468 struct controller *ctrl = slot->ctrl;
469 u16 slot_cmd; 469 u16 slot_cmd;
@@ -476,7 +476,7 @@ static void hpc_set_green_led_off(struct slot *slot)
476 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); 476 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
477} 477}
478 478
479static void hpc_set_green_led_blink(struct slot *slot) 479void pciehp_green_led_blink(struct slot *slot)
480{ 480{
481 struct controller *ctrl = slot->ctrl; 481 struct controller *ctrl = slot->ctrl;
482 u16 slot_cmd; 482 u16 slot_cmd;
@@ -489,7 +489,7 @@ static void hpc_set_green_led_blink(struct slot *slot)
489 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); 489 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
490} 490}
491 491
492static int hpc_power_on_slot(struct slot * slot) 492int pciehp_power_on_slot(struct slot * slot)
493{ 493{
494 struct controller *ctrl = slot->ctrl; 494 struct controller *ctrl = slot->ctrl;
495 u16 slot_cmd; 495 u16 slot_cmd;
@@ -497,8 +497,6 @@ static int hpc_power_on_slot(struct slot * slot)
497 u16 slot_status; 497 u16 slot_status;
498 int retval = 0; 498 int retval = 0;
499 499
500 ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
501
502 /* Clear sticky power-fault bit from previous power failures */ 500 /* Clear sticky power-fault bit from previous power failures */
503 retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); 501 retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
504 if (retval) { 502 if (retval) {
@@ -539,7 +537,7 @@ static int hpc_power_on_slot(struct slot * slot)
539 537
540static inline int pcie_mask_bad_dllp(struct controller *ctrl) 538static inline int pcie_mask_bad_dllp(struct controller *ctrl)
541{ 539{
542 struct pci_dev *dev = ctrl->pci_dev; 540 struct pci_dev *dev = ctrl->pcie->port;
543 int pos; 541 int pos;
544 u32 reg; 542 u32 reg;
545 543
@@ -556,7 +554,7 @@ static inline int pcie_mask_bad_dllp(struct controller *ctrl)
556 554
557static inline void pcie_unmask_bad_dllp(struct controller *ctrl) 555static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
558{ 556{
559 struct pci_dev *dev = ctrl->pci_dev; 557 struct pci_dev *dev = ctrl->pcie->port;
560 u32 reg; 558 u32 reg;
561 int pos; 559 int pos;
562 560
@@ -570,7 +568,7 @@ static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
570 pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg); 568 pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg);
571} 569}
572 570
573static int hpc_power_off_slot(struct slot * slot) 571int pciehp_power_off_slot(struct slot * slot)
574{ 572{
575 struct controller *ctrl = slot->ctrl; 573 struct controller *ctrl = slot->ctrl;
576 u16 slot_cmd; 574 u16 slot_cmd;
@@ -578,8 +576,6 @@ static int hpc_power_off_slot(struct slot * slot)
578 int retval = 0; 576 int retval = 0;
579 int changed; 577 int changed;
580 578
581 ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
582
583 /* 579 /*
584 * Set Bad DLLP Mask bit in Correctable Error Mask 580 * Set Bad DLLP Mask bit in Correctable Error Mask
585 * Register. This is the workaround against Bad DLLP error 581 * Register. This is the workaround against Bad DLLP error
@@ -614,8 +610,8 @@ static int hpc_power_off_slot(struct slot * slot)
614static irqreturn_t pcie_isr(int irq, void *dev_id) 610static irqreturn_t pcie_isr(int irq, void *dev_id)
615{ 611{
616 struct controller *ctrl = (struct controller *)dev_id; 612 struct controller *ctrl = (struct controller *)dev_id;
613 struct slot *slot = ctrl->slot;
617 u16 detected, intr_loc; 614 u16 detected, intr_loc;
618 struct slot *p_slot;
619 615
620 /* 616 /*
621 * In order to guarantee that all interrupt events are 617 * In order to guarantee that all interrupt events are
@@ -656,29 +652,27 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
656 if (!(intr_loc & ~PCI_EXP_SLTSTA_CC)) 652 if (!(intr_loc & ~PCI_EXP_SLTSTA_CC))
657 return IRQ_HANDLED; 653 return IRQ_HANDLED;
658 654
659 p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
660
661 /* Check MRL Sensor Changed */ 655 /* Check MRL Sensor Changed */
662 if (intr_loc & PCI_EXP_SLTSTA_MRLSC) 656 if (intr_loc & PCI_EXP_SLTSTA_MRLSC)
663 pciehp_handle_switch_change(p_slot); 657 pciehp_handle_switch_change(slot);
664 658
665 /* Check Attention Button Pressed */ 659 /* Check Attention Button Pressed */
666 if (intr_loc & PCI_EXP_SLTSTA_ABP) 660 if (intr_loc & PCI_EXP_SLTSTA_ABP)
667 pciehp_handle_attention_button(p_slot); 661 pciehp_handle_attention_button(slot);
668 662
669 /* Check Presence Detect Changed */ 663 /* Check Presence Detect Changed */
670 if (intr_loc & PCI_EXP_SLTSTA_PDC) 664 if (intr_loc & PCI_EXP_SLTSTA_PDC)
671 pciehp_handle_presence_change(p_slot); 665 pciehp_handle_presence_change(slot);
672 666
673 /* Check Power Fault Detected */ 667 /* Check Power Fault Detected */
674 if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { 668 if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
675 ctrl->power_fault_detected = 1; 669 ctrl->power_fault_detected = 1;
676 pciehp_handle_power_fault(p_slot); 670 pciehp_handle_power_fault(slot);
677 } 671 }
678 return IRQ_HANDLED; 672 return IRQ_HANDLED;
679} 673}
680 674
681static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value) 675int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value)
682{ 676{
683 struct controller *ctrl = slot->ctrl; 677 struct controller *ctrl = slot->ctrl;
684 enum pcie_link_speed lnk_speed; 678 enum pcie_link_speed lnk_speed;
@@ -709,7 +703,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
709 return retval; 703 return retval;
710} 704}
711 705
712static int hpc_get_max_lnk_width(struct slot *slot, 706int pciehp_get_max_lnk_width(struct slot *slot,
713 enum pcie_link_width *value) 707 enum pcie_link_width *value)
714{ 708{
715 struct controller *ctrl = slot->ctrl; 709 struct controller *ctrl = slot->ctrl;
@@ -759,7 +753,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
759 return retval; 753 return retval;
760} 754}
761 755
762static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value) 756int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value)
763{ 757{
764 struct controller *ctrl = slot->ctrl; 758 struct controller *ctrl = slot->ctrl;
765 enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN; 759 enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN;
@@ -791,7 +785,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
791 return retval; 785 return retval;
792} 786}
793 787
794static int hpc_get_cur_lnk_width(struct slot *slot, 788int pciehp_get_cur_lnk_width(struct slot *slot,
795 enum pcie_link_width *value) 789 enum pcie_link_width *value)
796{ 790{
797 struct controller *ctrl = slot->ctrl; 791 struct controller *ctrl = slot->ctrl;
@@ -842,30 +836,6 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
842 return retval; 836 return retval;
843} 837}
844 838
845static void pcie_release_ctrl(struct controller *ctrl);
846static struct hpc_ops pciehp_hpc_ops = {
847 .power_on_slot = hpc_power_on_slot,
848 .power_off_slot = hpc_power_off_slot,
849 .set_attention_status = hpc_set_attention_status,
850 .get_power_status = hpc_get_power_status,
851 .get_attention_status = hpc_get_attention_status,
852 .get_latch_status = hpc_get_latch_status,
853 .get_adapter_status = hpc_get_adapter_status,
854
855 .get_max_bus_speed = hpc_get_max_lnk_speed,
856 .get_cur_bus_speed = hpc_get_cur_lnk_speed,
857 .get_max_lnk_width = hpc_get_max_lnk_width,
858 .get_cur_lnk_width = hpc_get_cur_lnk_width,
859
860 .query_power_fault = hpc_query_power_fault,
861 .green_led_on = hpc_set_green_led_on,
862 .green_led_off = hpc_set_green_led_off,
863 .green_led_blink = hpc_set_green_led_blink,
864
865 .release_ctlr = pcie_release_ctrl,
866 .check_lnk_status = hpc_check_lnk_status,
867};
868
869int pcie_enable_notification(struct controller *ctrl) 839int pcie_enable_notification(struct controller *ctrl)
870{ 840{
871 u16 cmd, mask; 841 u16 cmd, mask;
@@ -930,23 +900,16 @@ static int pcie_init_slot(struct controller *ctrl)
930 if (!slot) 900 if (!slot)
931 return -ENOMEM; 901 return -ENOMEM;
932 902
933 slot->hp_slot = 0;
934 slot->ctrl = ctrl; 903 slot->ctrl = ctrl;
935 slot->bus = ctrl->pci_dev->subordinate->number;
936 slot->device = ctrl->slot_device_offset + slot->hp_slot;
937 slot->hpc_ops = ctrl->hpc_ops;
938 slot->number = ctrl->first_slot;
939 mutex_init(&slot->lock); 904 mutex_init(&slot->lock);
940 INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work); 905 INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
941 list_add(&slot->slot_list, &ctrl->slot_list); 906 ctrl->slot = slot;
942 return 0; 907 return 0;
943} 908}
944 909
945static void pcie_cleanup_slot(struct controller *ctrl) 910static void pcie_cleanup_slot(struct controller *ctrl)
946{ 911{
947 struct slot *slot; 912 struct slot *slot = ctrl->slot;
948 slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list);
949 list_del(&slot->slot_list);
950 cancel_delayed_work(&slot->work); 913 cancel_delayed_work(&slot->work);
951 flush_scheduled_work(); 914 flush_scheduled_work();
952 flush_workqueue(pciehp_wq); 915 flush_workqueue(pciehp_wq);
@@ -957,7 +920,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
957{ 920{
958 int i; 921 int i;
959 u16 reg16; 922 u16 reg16;
960 struct pci_dev *pdev = ctrl->pci_dev; 923 struct pci_dev *pdev = ctrl->pcie->port;
961 924
962 if (!pciehp_debug) 925 if (!pciehp_debug)
963 return; 926 return;
@@ -980,7 +943,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
980 (unsigned long long)pci_resource_start(pdev, i)); 943 (unsigned long long)pci_resource_start(pdev, i));
981 } 944 }
982 ctrl_info(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap); 945 ctrl_info(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap);
983 ctrl_info(ctrl, " Physical Slot Number : %d\n", ctrl->first_slot); 946 ctrl_info(ctrl, " Physical Slot Number : %d\n", PSN(ctrl));
984 ctrl_info(ctrl, " Attention Button : %3s\n", 947 ctrl_info(ctrl, " Attention Button : %3s\n",
985 ATTN_BUTTN(ctrl) ? "yes" : "no"); 948 ATTN_BUTTN(ctrl) ? "yes" : "no");
986 ctrl_info(ctrl, " Power Controller : %3s\n", 949 ctrl_info(ctrl, " Power Controller : %3s\n",
@@ -1014,10 +977,7 @@ struct controller *pcie_init(struct pcie_device *dev)
1014 dev_err(&dev->device, "%s: Out of memory\n", __func__); 977 dev_err(&dev->device, "%s: Out of memory\n", __func__);
1015 goto abort; 978 goto abort;
1016 } 979 }
1017 INIT_LIST_HEAD(&ctrl->slot_list);
1018
1019 ctrl->pcie = dev; 980 ctrl->pcie = dev;
1020 ctrl->pci_dev = pdev;
1021 ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP); 981 ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
1022 if (!ctrl->cap_base) { 982 if (!ctrl->cap_base) {
1023 ctrl_err(ctrl, "Cannot find PCI Express capability\n"); 983 ctrl_err(ctrl, "Cannot find PCI Express capability\n");
@@ -1029,11 +989,6 @@ struct controller *pcie_init(struct pcie_device *dev)
1029 } 989 }
1030 990
1031 ctrl->slot_cap = slot_cap; 991 ctrl->slot_cap = slot_cap;
1032 ctrl->first_slot = slot_cap >> 19;
1033 ctrl->slot_device_offset = 0;
1034 ctrl->num_slots = 1;
1035 ctrl->hpc_ops = &pciehp_hpc_ops;
1036 mutex_init(&ctrl->crit_sect);
1037 mutex_init(&ctrl->ctrl_lock); 992 mutex_init(&ctrl->ctrl_lock);
1038 init_waitqueue_head(&ctrl->queue); 993 init_waitqueue_head(&ctrl->queue);
1039 dbg_ctrl(ctrl); 994 dbg_ctrl(ctrl);
@@ -1089,7 +1044,7 @@ abort:
1089 return NULL; 1044 return NULL;
1090} 1045}
1091 1046
1092void pcie_release_ctrl(struct controller *ctrl) 1047void pciehp_release_ctrl(struct controller *ctrl)
1093{ 1048{
1094 pcie_shutdown_notification(ctrl); 1049 pcie_shutdown_notification(ctrl);
1095 pcie_cleanup_slot(ctrl); 1050 pcie_cleanup_slot(ctrl);
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index 02e24d63b3ee..21733108adde 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -63,27 +63,27 @@ static int __ref pciehp_add_bridge(struct pci_dev *dev)
63int pciehp_configure_device(struct slot *p_slot) 63int pciehp_configure_device(struct slot *p_slot)
64{ 64{
65 struct pci_dev *dev; 65 struct pci_dev *dev;
66 struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate; 66 struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
67 int num, fn; 67 int num, fn;
68 struct controller *ctrl = p_slot->ctrl; 68 struct controller *ctrl = p_slot->ctrl;
69 69
70 dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0)); 70 dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
71 if (dev) { 71 if (dev) {
72 ctrl_err(ctrl, "Device %s already exists " 72 ctrl_err(ctrl, "Device %s already exists "
73 "at %04x:%02x:%02x, cannot hot-add\n", pci_name(dev), 73 "at %04x:%02x:00, cannot hot-add\n", pci_name(dev),
74 pci_domain_nr(parent), p_slot->bus, p_slot->device); 74 pci_domain_nr(parent), parent->number);
75 pci_dev_put(dev); 75 pci_dev_put(dev);
76 return -EINVAL; 76 return -EINVAL;
77 } 77 }
78 78
79 num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0)); 79 num = pci_scan_slot(parent, PCI_DEVFN(0, 0));
80 if (num == 0) { 80 if (num == 0) {
81 ctrl_err(ctrl, "No new device found\n"); 81 ctrl_err(ctrl, "No new device found\n");
82 return -ENODEV; 82 return -ENODEV;
83 } 83 }
84 84
85 for (fn = 0; fn < 8; fn++) { 85 for (fn = 0; fn < 8; fn++) {
86 dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, fn)); 86 dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
87 if (!dev) 87 if (!dev)
88 continue; 88 continue;
89 if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { 89 if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
@@ -111,19 +111,18 @@ int pciehp_unconfigure_device(struct slot *p_slot)
111 int j; 111 int j;
112 u8 bctl = 0; 112 u8 bctl = 0;
113 u8 presence = 0; 113 u8 presence = 0;
114 struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate; 114 struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
115 u16 command; 115 u16 command;
116 struct controller *ctrl = p_slot->ctrl; 116 struct controller *ctrl = p_slot->ctrl;
117 117
118 ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n", 118 ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
119 __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device); 119 __func__, pci_domain_nr(parent), parent->number);
120 ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence); 120 ret = pciehp_get_adapter_status(p_slot, &presence);
121 if (ret) 121 if (ret)
122 presence = 0; 122 presence = 0;
123 123
124 for (j = 0; j < 8; j++) { 124 for (j = 0; j < 8; j++) {
125 struct pci_dev* temp = pci_get_slot(parent, 125 struct pci_dev* temp = pci_get_slot(parent, PCI_DEVFN(0, j));
126 (p_slot->device << 3) | j);
127 if (!temp) 126 if (!temp)
128 continue; 127 continue;
129 if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) { 128 if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 10c0e62bd5a8..2ce8f9ccc66e 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -318,6 +318,8 @@ static int __init aer_service_init(void)
318{ 318{
319 if (pcie_aer_disable) 319 if (pcie_aer_disable)
320 return -ENXIO; 320 return -ENXIO;
321 if (!pci_msi_enabled())
322 return -ENXIO;
321 return pcie_port_service_register(&aerdriver); 323 return pcie_port_service_register(&aerdriver);
322} 324}
323 325
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index f289ca9bf18d..745402e8e498 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -303,9 +303,6 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev,
303 pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); 303 pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
304 pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, &reg32); 304 pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, &reg32);
305 info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10; 305 info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
306 /* 00b and 10b are defined as "Reserved". */
307 if (info->support == PCIE_LINK_STATE_L1)
308 info->support = 0;
309 info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12; 306 info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
310 info->latency_encoding_l1 = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15; 307 info->latency_encoding_l1 = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
311 pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16); 308 pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile
index 047394d98ac2..3247828aa203 100644
--- a/drivers/pcmcia/Makefile
+++ b/drivers/pcmcia/Makefile
@@ -71,6 +71,7 @@ pxa2xx-obj-$(CONFIG_MACH_ARMCORE) += pxa2xx_cm_x2xx_cs.o
71pxa2xx-obj-$(CONFIG_ARCH_VIPER) += pxa2xx_viper.o 71pxa2xx-obj-$(CONFIG_ARCH_VIPER) += pxa2xx_viper.o
72pxa2xx-obj-$(CONFIG_TRIZEPS_PCMCIA) += pxa2xx_trizeps4.o 72pxa2xx-obj-$(CONFIG_TRIZEPS_PCMCIA) += pxa2xx_trizeps4.o
73pxa2xx-obj-$(CONFIG_MACH_PALMTX) += pxa2xx_palmtx.o 73pxa2xx-obj-$(CONFIG_MACH_PALMTX) += pxa2xx_palmtx.o
74pxa2xx-obj-$(CONFIG_MACH_PALMTC) += pxa2xx_palmtc.o
74pxa2xx-obj-$(CONFIG_MACH_PALMLD) += pxa2xx_palmld.o 75pxa2xx-obj-$(CONFIG_MACH_PALMLD) += pxa2xx_palmld.o
75pxa2xx-obj-$(CONFIG_MACH_E740) += pxa2xx_e740.o 76pxa2xx-obj-$(CONFIG_MACH_E740) += pxa2xx_e740.o
76pxa2xx-obj-$(CONFIG_MACH_STARGATE2) += pxa2xx_stargate2.o 77pxa2xx-obj-$(CONFIG_MACH_STARGATE2) += pxa2xx_stargate2.o
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index c49a7269f6d1..87e22ef8eb02 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -300,25 +300,29 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev)
300 return soc_common_drv_pcmcia_remove(&dev->dev); 300 return soc_common_drv_pcmcia_remove(&dev->dev);
301} 301}
302 302
303static int pxa2xx_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) 303static int pxa2xx_drv_pcmcia_suspend(struct device *dev)
304{ 304{
305 return pcmcia_socket_dev_suspend(&dev->dev, state); 305 return pcmcia_socket_dev_suspend(dev, PMSG_SUSPEND);
306} 306}
307 307
308static int pxa2xx_drv_pcmcia_resume(struct platform_device *dev) 308static int pxa2xx_drv_pcmcia_resume(struct device *dev)
309{ 309{
310 pxa2xx_configure_sockets(&dev->dev); 310 pxa2xx_configure_sockets(dev);
311 return pcmcia_socket_dev_resume(&dev->dev); 311 return pcmcia_socket_dev_resume(dev);
312} 312}
313 313
314static struct dev_pm_ops pxa2xx_drv_pcmcia_pm_ops = {
315 .suspend = pxa2xx_drv_pcmcia_suspend,
316 .resume = pxa2xx_drv_pcmcia_resume,
317};
318
314static struct platform_driver pxa2xx_pcmcia_driver = { 319static struct platform_driver pxa2xx_pcmcia_driver = {
315 .probe = pxa2xx_drv_pcmcia_probe, 320 .probe = pxa2xx_drv_pcmcia_probe,
316 .remove = pxa2xx_drv_pcmcia_remove, 321 .remove = pxa2xx_drv_pcmcia_remove,
317 .suspend = pxa2xx_drv_pcmcia_suspend,
318 .resume = pxa2xx_drv_pcmcia_resume,
319 .driver = { 322 .driver = {
320 .name = "pxa2xx-pcmcia", 323 .name = "pxa2xx-pcmcia",
321 .owner = THIS_MODULE, 324 .owner = THIS_MODULE,
325 .pm = &pxa2xx_drv_pcmcia_pm_ops,
322 }, 326 },
323}; 327};
324 328
diff --git a/drivers/pcmcia/pxa2xx_palmtc.c b/drivers/pcmcia/pxa2xx_palmtc.c
new file mode 100644
index 000000000000..3a8993ed5621
--- /dev/null
+++ b/drivers/pcmcia/pxa2xx_palmtc.c
@@ -0,0 +1,230 @@
1/*
2 * linux/drivers/pcmcia/pxa2xx_palmtc.c
3 *
4 * Driver for Palm Tungsten|C PCMCIA
5 *
6 * Copyright (C) 2008 Alex Osborne <ato@meshy.org>
7 * Copyright (C) 2009 Marek Vasut <marek.vasut@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/platform_device.h>
17#include <linux/gpio.h>
18#include <linux/delay.h>
19
20#include <asm/mach-types.h>
21#include <mach/palmtc.h>
22#include "soc_common.h"
23
24static int palmtc_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
25{
26 int ret;
27
28 ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER1, "PCMCIA PWR1");
29 if (ret)
30 goto err1;
31 ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER1, 0);
32 if (ret)
33 goto err2;
34
35 ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER2, "PCMCIA PWR2");
36 if (ret)
37 goto err2;
38 ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER2, 0);
39 if (ret)
40 goto err3;
41
42 ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER3, "PCMCIA PWR3");
43 if (ret)
44 goto err3;
45 ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER3, 0);
46 if (ret)
47 goto err4;
48
49 ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_RESET, "PCMCIA RST");
50 if (ret)
51 goto err4;
52 ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_RESET, 1);
53 if (ret)
54 goto err5;
55
56 ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_READY, "PCMCIA RDY");
57 if (ret)
58 goto err5;
59 ret = gpio_direction_input(GPIO_NR_PALMTC_PCMCIA_READY);
60 if (ret)
61 goto err6;
62
63 ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_PWRREADY, "PCMCIA PWRRDY");
64 if (ret)
65 goto err6;
66 ret = gpio_direction_input(GPIO_NR_PALMTC_PCMCIA_PWRREADY);
67 if (ret)
68 goto err7;
69
70 skt->irq = IRQ_GPIO(GPIO_NR_PALMTC_PCMCIA_READY);
71 return 0;
72
73err7:
74 gpio_free(GPIO_NR_PALMTC_PCMCIA_PWRREADY);
75err6:
76 gpio_free(GPIO_NR_PALMTC_PCMCIA_READY);
77err5:
78 gpio_free(GPIO_NR_PALMTC_PCMCIA_RESET);
79err4:
80 gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER3);
81err3:
82 gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER2);
83err2:
84 gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER1);
85err1:
86 return ret;
87}
88
89static void palmtc_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
90{
91 gpio_free(GPIO_NR_PALMTC_PCMCIA_PWRREADY);
92 gpio_free(GPIO_NR_PALMTC_PCMCIA_READY);
93 gpio_free(GPIO_NR_PALMTC_PCMCIA_RESET);
94 gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER3);
95 gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER2);
96 gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER1);
97}
98
99static void palmtc_pcmcia_socket_state(struct soc_pcmcia_socket *skt,
100 struct pcmcia_state *state)
101{
102 state->detect = 1; /* always inserted */
103 state->ready = !!gpio_get_value(GPIO_NR_PALMTC_PCMCIA_READY);
104 state->bvd1 = 1;
105 state->bvd2 = 1;
106 state->wrprot = 0;
107 state->vs_3v = 1;
108 state->vs_Xv = 0;
109}
110
111static int palmtc_wifi_powerdown(void)
112{
113 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 1);
114 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER2, 0);
115 mdelay(40);
116 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER1, 0);
117 return 0;
118}
119
120static int palmtc_wifi_powerup(void)
121{
122 int timeout = 50;
123
124 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER3, 1);
125 mdelay(50);
126
127 /* Power up the card, 1.8V first, after a while 3.3V */
128 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER1, 1);
129 mdelay(100);
130 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER2, 1);
131
132 /* Wait till the card is ready */
133 while (!gpio_get_value(GPIO_NR_PALMTC_PCMCIA_PWRREADY) &&
134 timeout) {
135 mdelay(1);
136 timeout--;
137 }
138
139 /* Power down the WiFi in case of error */
140 if (!timeout) {
141 palmtc_wifi_powerdown();
142 return 1;
143 }
144
145 /* Reset the card */
146 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 1);
147 mdelay(20);
148 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 0);
149 mdelay(25);
150
151 gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER3, 0);
152
153 return 0;
154}
155
156static int palmtc_pcmcia_configure_socket(struct soc_pcmcia_socket *skt,
157 const socket_state_t *state)
158{
159 int ret = 1;
160
161 if (state->Vcc == 0)
162 ret = palmtc_wifi_powerdown();
163 else if (state->Vcc == 33)
164 ret = palmtc_wifi_powerup();
165
166 return ret;
167}
168
169static void palmtc_pcmcia_socket_init(struct soc_pcmcia_socket *skt)
170{
171}
172
173static void palmtc_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt)
174{
175}
176
177static struct pcmcia_low_level palmtc_pcmcia_ops = {
178 .owner = THIS_MODULE,
179
180 .first = 0,
181 .nr = 1,
182
183 .hw_init = palmtc_pcmcia_hw_init,
184 .hw_shutdown = palmtc_pcmcia_hw_shutdown,
185
186 .socket_state = palmtc_pcmcia_socket_state,
187 .configure_socket = palmtc_pcmcia_configure_socket,
188
189 .socket_init = palmtc_pcmcia_socket_init,
190 .socket_suspend = palmtc_pcmcia_socket_suspend,
191};
192
193static struct platform_device *palmtc_pcmcia_device;
194
195static int __init palmtc_pcmcia_init(void)
196{
197 int ret;
198
199 if (!machine_is_palmtc())
200 return -ENODEV;
201
202 palmtc_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1);
203 if (!palmtc_pcmcia_device)
204 return -ENOMEM;
205
206 ret = platform_device_add_data(palmtc_pcmcia_device, &palmtc_pcmcia_ops,
207 sizeof(palmtc_pcmcia_ops));
208
209 if (!ret)
210 ret = platform_device_add(palmtc_pcmcia_device);
211
212 if (ret)
213 platform_device_put(palmtc_pcmcia_device);
214
215 return ret;
216}
217
218static void __exit palmtc_pcmcia_exit(void)
219{
220 platform_device_unregister(palmtc_pcmcia_device);
221}
222
223module_init(palmtc_pcmcia_init);
224module_exit(palmtc_pcmcia_exit);
225
226MODULE_AUTHOR("Alex Osborne <ato@meshy.org>,"
227 " Marek Vasut <marek.vasut@gmail.com>");
228MODULE_DESCRIPTION("PCMCIA support for Palm Tungsten|C");
229MODULE_ALIAS("platform:pxa2xx-pcmcia");
230MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index bb8cc05605ac..747ca194fad4 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -438,34 +438,37 @@ static int __exit pxa_rtc_remove(struct platform_device *pdev)
438} 438}
439 439
440#ifdef CONFIG_PM 440#ifdef CONFIG_PM
441static int pxa_rtc_suspend(struct platform_device *pdev, pm_message_t state) 441static int pxa_rtc_suspend(struct device *dev)
442{ 442{
443 struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); 443 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
444 444
445 if (device_may_wakeup(&pdev->dev)) 445 if (device_may_wakeup(dev))
446 enable_irq_wake(pxa_rtc->irq_Alrm); 446 enable_irq_wake(pxa_rtc->irq_Alrm);
447 return 0; 447 return 0;
448} 448}
449 449
450static int pxa_rtc_resume(struct platform_device *pdev) 450static int pxa_rtc_resume(struct device *dev)
451{ 451{
452 struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); 452 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
453 453
454 if (device_may_wakeup(&pdev->dev)) 454 if (device_may_wakeup(dev))
455 disable_irq_wake(pxa_rtc->irq_Alrm); 455 disable_irq_wake(pxa_rtc->irq_Alrm);
456 return 0; 456 return 0;
457} 457}
458#else 458
459#define pxa_rtc_suspend NULL 459static struct dev_pm_ops pxa_rtc_pm_ops = {
460#define pxa_rtc_resume NULL 460 .suspend = pxa_rtc_suspend,
461 .resume = pxa_rtc_resume,
462};
461#endif 463#endif
462 464
463static struct platform_driver pxa_rtc_driver = { 465static struct platform_driver pxa_rtc_driver = {
464 .remove = __exit_p(pxa_rtc_remove), 466 .remove = __exit_p(pxa_rtc_remove),
465 .suspend = pxa_rtc_suspend,
466 .resume = pxa_rtc_resume,
467 .driver = { 467 .driver = {
468 .name = "pxa-rtc", 468 .name = "pxa-rtc",
469#ifdef CONFIG_PM
470 .pm = &pxa_rtc_pm_ops,
471#endif
469 }, 472 },
470}; 473};
471 474
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 021b2928f0b9..29f98a70586e 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -393,31 +393,34 @@ static int sa1100_rtc_remove(struct platform_device *pdev)
393} 393}
394 394
395#ifdef CONFIG_PM 395#ifdef CONFIG_PM
396static int sa1100_rtc_suspend(struct platform_device *pdev, pm_message_t state) 396static int sa1100_rtc_suspend(struct device *dev)
397{ 397{
398 if (device_may_wakeup(&pdev->dev)) 398 if (device_may_wakeup(dev))
399 enable_irq_wake(IRQ_RTCAlrm); 399 enable_irq_wake(IRQ_RTCAlrm);
400 return 0; 400 return 0;
401} 401}
402 402
403static int sa1100_rtc_resume(struct platform_device *pdev) 403static int sa1100_rtc_resume(struct device *dev)
404{ 404{
405 if (device_may_wakeup(&pdev->dev)) 405 if (device_may_wakeup(dev))
406 disable_irq_wake(IRQ_RTCAlrm); 406 disable_irq_wake(IRQ_RTCAlrm);
407 return 0; 407 return 0;
408} 408}
409#else 409
410#define sa1100_rtc_suspend NULL 410static struct dev_pm_ops sa1100_rtc_pm_ops = {
411#define sa1100_rtc_resume NULL 411 .suspend = sa1100_rtc_suspend,
412 .resume = sa1100_rtc_resume,
413};
412#endif 414#endif
413 415
414static struct platform_driver sa1100_rtc_driver = { 416static struct platform_driver sa1100_rtc_driver = {
415 .probe = sa1100_rtc_probe, 417 .probe = sa1100_rtc_probe,
416 .remove = sa1100_rtc_remove, 418 .remove = sa1100_rtc_remove,
417 .suspend = sa1100_rtc_suspend,
418 .resume = sa1100_rtc_resume,
419 .driver = { 419 .driver = {
420 .name = "sa1100-rtc", 420 .name = "sa1100-rtc",
421#ifdef CONFIG_PM
422 .pm = &sa1100_rtc_pm_ops,
423#endif
421 }, 424 },
422}; 425};
423 426
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index 8d349b23249a..300cea768d74 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -649,7 +649,7 @@ static int cpm_uart_tx_pump(struct uart_port *port)
649 u8 *p; 649 u8 *p;
650 int count; 650 int count;
651 struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port; 651 struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
652 struct circ_buf *xmit = &port->info->xmit; 652 struct circ_buf *xmit = &port->state->xmit;
653 653
654 /* Handle xon/xoff */ 654 /* Handle xon/xoff */
655 if (port->x_char) { 655 if (port->x_char) {
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index 7be52fe288eb..31f172397af3 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -18,6 +18,7 @@ static char *serial_version = "$Revision: 1.25 $";
18#include <linux/tty.h> 18#include <linux/tty.h>
19#include <linux/tty_flip.h> 19#include <linux/tty_flip.h>
20#include <linux/major.h> 20#include <linux/major.h>
21#include <linux/smp_lock.h>
21#include <linux/string.h> 22#include <linux/string.h>
22#include <linux/fcntl.h> 23#include <linux/fcntl.h>
23#include <linux/mm.h> 24#include <linux/mm.h>
diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c
index 6443b7ff274a..b8629d74f6a2 100644
--- a/drivers/serial/pxa.c
+++ b/drivers/serial/pxa.c
@@ -726,9 +726,10 @@ static struct uart_driver serial_pxa_reg = {
726 .cons = PXA_CONSOLE, 726 .cons = PXA_CONSOLE,
727}; 727};
728 728
729static int serial_pxa_suspend(struct platform_device *dev, pm_message_t state) 729#ifdef CONFIG_PM
730static int serial_pxa_suspend(struct device *dev)
730{ 731{
731 struct uart_pxa_port *sport = platform_get_drvdata(dev); 732 struct uart_pxa_port *sport = dev_get_drvdata(dev);
732 733
733 if (sport) 734 if (sport)
734 uart_suspend_port(&serial_pxa_reg, &sport->port); 735 uart_suspend_port(&serial_pxa_reg, &sport->port);
@@ -736,9 +737,9 @@ static int serial_pxa_suspend(struct platform_device *dev, pm_message_t state)
736 return 0; 737 return 0;
737} 738}
738 739
739static int serial_pxa_resume(struct platform_device *dev) 740static int serial_pxa_resume(struct device *dev)
740{ 741{
741 struct uart_pxa_port *sport = platform_get_drvdata(dev); 742 struct uart_pxa_port *sport = dev_get_drvdata(dev);
742 743
743 if (sport) 744 if (sport)
744 uart_resume_port(&serial_pxa_reg, &sport->port); 745 uart_resume_port(&serial_pxa_reg, &sport->port);
@@ -746,6 +747,12 @@ static int serial_pxa_resume(struct platform_device *dev)
746 return 0; 747 return 0;
747} 748}
748 749
750static struct dev_pm_ops serial_pxa_pm_ops = {
751 .suspend = serial_pxa_suspend,
752 .resume = serial_pxa_resume,
753};
754#endif
755
749static int serial_pxa_probe(struct platform_device *dev) 756static int serial_pxa_probe(struct platform_device *dev)
750{ 757{
751 struct uart_pxa_port *sport; 758 struct uart_pxa_port *sport;
@@ -825,11 +832,12 @@ static struct platform_driver serial_pxa_driver = {
825 .probe = serial_pxa_probe, 832 .probe = serial_pxa_probe,
826 .remove = serial_pxa_remove, 833 .remove = serial_pxa_remove,
827 834
828 .suspend = serial_pxa_suspend,
829 .resume = serial_pxa_resume,
830 .driver = { 835 .driver = {
831 .name = "pxa2xx-uart", 836 .name = "pxa2xx-uart",
832 .owner = THIS_MODULE, 837 .owner = THIS_MODULE,
838#ifdef CONFIG_PM
839 .pm = &serial_pxa_pm_ops,
840#endif
833 }, 841 },
834}; 842};
835 843
diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
index c0f950a7cbec..958a3ffc8987 100644
--- a/drivers/spi/amba-pl022.c
+++ b/drivers/spi/amba-pl022.c
@@ -532,7 +532,7 @@ static void restore_state(struct pl022 *pl022)
532 GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0) | \ 532 GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0) | \
533 GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \ 533 GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \
534 GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \ 534 GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
535 GEN_MASK_BITS(SSP_CLK_FALLING_EDGE, SSP_CR0_MASK_SPH, 7) | \ 535 GEN_MASK_BITS(SSP_CLK_SECOND_EDGE, SSP_CR0_MASK_SPH, 7) | \
536 GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \ 536 GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \
537 GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16) | \ 537 GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16) | \
538 GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \ 538 GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \
@@ -1247,8 +1247,8 @@ static int verify_controller_parameters(struct pl022 *pl022,
1247 return -EINVAL; 1247 return -EINVAL;
1248 } 1248 }
1249 if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) { 1249 if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) {
1250 if ((chip_info->clk_phase != SSP_CLK_RISING_EDGE) 1250 if ((chip_info->clk_phase != SSP_CLK_FIRST_EDGE)
1251 && (chip_info->clk_phase != SSP_CLK_FALLING_EDGE)) { 1251 && (chip_info->clk_phase != SSP_CLK_SECOND_EDGE)) {
1252 dev_err(chip_info->dev, 1252 dev_err(chip_info->dev,
1253 "Clock Phase is configured incorrectly\n"); 1253 "Clock Phase is configured incorrectly\n");
1254 return -EINVAL; 1254 return -EINVAL;
@@ -1485,7 +1485,7 @@ static int pl022_setup(struct spi_device *spi)
1485 chip_info->data_size = SSP_DATA_BITS_12; 1485 chip_info->data_size = SSP_DATA_BITS_12;
1486 chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM; 1486 chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM;
1487 chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC; 1487 chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC;
1488 chip_info->clk_phase = SSP_CLK_FALLING_EDGE; 1488 chip_info->clk_phase = SSP_CLK_SECOND_EDGE;
1489 chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW; 1489 chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW;
1490 chip_info->ctrl_len = SSP_BITS_8; 1490 chip_info->ctrl_len = SSP_BITS_8;
1491 chip_info->wait_state = SSP_MWIRE_WAIT_ZERO; 1491 chip_info->wait_state = SSP_MWIRE_WAIT_ZERO;
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 31dd56f0dae9..c8c2b693ffac 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1668,10 +1668,9 @@ static void pxa2xx_spi_shutdown(struct platform_device *pdev)
1668} 1668}
1669 1669
1670#ifdef CONFIG_PM 1670#ifdef CONFIG_PM
1671 1671static int pxa2xx_spi_suspend(struct device *dev)
1672static int pxa2xx_spi_suspend(struct platform_device *pdev, pm_message_t state)
1673{ 1672{
1674 struct driver_data *drv_data = platform_get_drvdata(pdev); 1673 struct driver_data *drv_data = dev_get_drvdata(dev);
1675 struct ssp_device *ssp = drv_data->ssp; 1674 struct ssp_device *ssp = drv_data->ssp;
1676 int status = 0; 1675 int status = 0;
1677 1676
@@ -1684,9 +1683,9 @@ static int pxa2xx_spi_suspend(struct platform_device *pdev, pm_message_t state)
1684 return 0; 1683 return 0;
1685} 1684}
1686 1685
1687static int pxa2xx_spi_resume(struct platform_device *pdev) 1686static int pxa2xx_spi_resume(struct device *dev)
1688{ 1687{
1689 struct driver_data *drv_data = platform_get_drvdata(pdev); 1688 struct driver_data *drv_data = dev_get_drvdata(dev);
1690 struct ssp_device *ssp = drv_data->ssp; 1689 struct ssp_device *ssp = drv_data->ssp;
1691 int status = 0; 1690 int status = 0;
1692 1691
@@ -1703,26 +1702,29 @@ static int pxa2xx_spi_resume(struct platform_device *pdev)
1703 /* Start the queue running */ 1702 /* Start the queue running */
1704 status = start_queue(drv_data); 1703 status = start_queue(drv_data);
1705 if (status != 0) { 1704 if (status != 0) {
1706 dev_err(&pdev->dev, "problem starting queue (%d)\n", status); 1705 dev_err(dev, "problem starting queue (%d)\n", status);
1707 return status; 1706 return status;
1708 } 1707 }
1709 1708
1710 return 0; 1709 return 0;
1711} 1710}
1712#else 1711
1713#define pxa2xx_spi_suspend NULL 1712static struct dev_pm_ops pxa2xx_spi_pm_ops = {
1714#define pxa2xx_spi_resume NULL 1713 .suspend = pxa2xx_spi_suspend,
1715#endif /* CONFIG_PM */ 1714 .resume = pxa2xx_spi_resume,
1715};
1716#endif
1716 1717
1717static struct platform_driver driver = { 1718static struct platform_driver driver = {
1718 .driver = { 1719 .driver = {
1719 .name = "pxa2xx-spi", 1720 .name = "pxa2xx-spi",
1720 .owner = THIS_MODULE, 1721 .owner = THIS_MODULE,
1722#ifdef CONFIG_PM
1723 .pm = &pxa2xx_spi_pm_ops,
1724#endif
1721 }, 1725 },
1722 .remove = pxa2xx_spi_remove, 1726 .remove = pxa2xx_spi_remove,
1723 .shutdown = pxa2xx_spi_shutdown, 1727 .shutdown = pxa2xx_spi_shutdown,
1724 .suspend = pxa2xx_spi_suspend,
1725 .resume = pxa2xx_spi_resume,
1726}; 1728};
1727 1729
1728static int __init pxa2xx_spi_init(void) 1730static int __init pxa2xx_spi_init(void)
diff --git a/drivers/staging/go7007/Makefile b/drivers/staging/go7007/Makefile
index d14ea84a01f6..1301caa7495d 100644
--- a/drivers/staging/go7007/Makefile
+++ b/drivers/staging/go7007/Makefile
@@ -32,8 +32,3 @@ endif
32 32
33EXTRA_CFLAGS += -Idrivers/media/dvb/frontends 33EXTRA_CFLAGS += -Idrivers/media/dvb/frontends
34EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core 34EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core
35
36# Ubuntu 8.04 has CONFIG_SND undefined, so include lum sound/config.h too
37ifeq ($(CONFIG_SND),)
38EXTRA_CFLAGS += -include sound/config.h
39endif
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index ebd7237230e3..240750881d28 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -22,7 +22,6 @@ config USB_ARCH_HAS_HCD
22 default y if PCMCIA && !M32R # sl811_cs 22 default y if PCMCIA && !M32R # sl811_cs
23 default y if ARM # SL-811 23 default y if ARM # SL-811
24 default y if SUPERH # r8a66597-hcd 24 default y if SUPERH # r8a66597-hcd
25 default y if MICROBLAZE
26 default PCI 25 default PCI
27 26
28# many non-PCI SOC chips embed OHCI 27# many non-PCI SOC chips embed OHCI
diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c
index b5294a9344de..f1c06202fdf2 100644
--- a/drivers/usb/host/ohci-pxa27x.c
+++ b/drivers/usb/host/ohci-pxa27x.c
@@ -481,38 +481,47 @@ static int ohci_hcd_pxa27x_drv_remove(struct platform_device *pdev)
481 return 0; 481 return 0;
482} 482}
483 483
484#ifdef CONFIG_PM 484#ifdef CONFIG_PM
485static int ohci_hcd_pxa27x_drv_suspend(struct platform_device *pdev, pm_message_t state) 485static int ohci_hcd_pxa27x_drv_suspend(struct device *dev)
486{ 486{
487 struct usb_hcd *hcd = platform_get_drvdata(pdev); 487 struct usb_hcd *hcd = dev_get_drvdata(dev);
488 struct pxa27x_ohci *ohci = to_pxa27x_ohci(hcd); 488 struct pxa27x_ohci *ohci = to_pxa27x_ohci(hcd);
489 489
490 if (time_before(jiffies, ohci->ohci.next_statechange)) 490 if (time_before(jiffies, ohci->ohci.next_statechange))
491 msleep(5); 491 msleep(5);
492 ohci->ohci.next_statechange = jiffies; 492 ohci->ohci.next_statechange = jiffies;
493 493
494 pxa27x_stop_hc(ohci, &pdev->dev); 494 pxa27x_stop_hc(ohci, dev);
495 hcd->state = HC_STATE_SUSPENDED; 495 hcd->state = HC_STATE_SUSPENDED;
496 496
497 return 0; 497 return 0;
498} 498}
499 499
500static int ohci_hcd_pxa27x_drv_resume(struct platform_device *pdev) 500static int ohci_hcd_pxa27x_drv_resume(struct device *dev)
501{ 501{
502 struct usb_hcd *hcd = platform_get_drvdata(pdev); 502 struct usb_hcd *hcd = dev_get_drvdata(dev);
503 struct pxa27x_ohci *ohci = to_pxa27x_ohci(hcd); 503 struct pxa27x_ohci *ohci = to_pxa27x_ohci(hcd);
504 struct pxaohci_platform_data *inf = dev->platform_data;
504 int status; 505 int status;
505 506
506 if (time_before(jiffies, ohci->ohci.next_statechange)) 507 if (time_before(jiffies, ohci->ohci.next_statechange))
507 msleep(5); 508 msleep(5);
508 ohci->ohci.next_statechange = jiffies; 509 ohci->ohci.next_statechange = jiffies;
509 510
510 if ((status = pxa27x_start_hc(ohci, &pdev->dev)) < 0) 511 if ((status = pxa27x_start_hc(ohci, dev)) < 0)
511 return status; 512 return status;
512 513
514 /* Select Power Management Mode */
515 pxa27x_ohci_select_pmm(ohci, inf->port_mode);
516
513 ohci_finish_controller_resume(hcd); 517 ohci_finish_controller_resume(hcd);
514 return 0; 518 return 0;
515} 519}
520
521static struct dev_pm_ops ohci_hcd_pxa27x_pm_ops = {
522 .suspend = ohci_hcd_pxa27x_drv_suspend,
523 .resume = ohci_hcd_pxa27x_drv_resume,
524};
516#endif 525#endif
517 526
518/* work with hotplug and coldplug */ 527/* work with hotplug and coldplug */
@@ -522,13 +531,12 @@ static struct platform_driver ohci_hcd_pxa27x_driver = {
522 .probe = ohci_hcd_pxa27x_drv_probe, 531 .probe = ohci_hcd_pxa27x_drv_probe,
523 .remove = ohci_hcd_pxa27x_drv_remove, 532 .remove = ohci_hcd_pxa27x_drv_remove,
524 .shutdown = usb_hcd_platform_shutdown, 533 .shutdown = usb_hcd_platform_shutdown,
525#ifdef CONFIG_PM
526 .suspend = ohci_hcd_pxa27x_drv_suspend,
527 .resume = ohci_hcd_pxa27x_drv_resume,
528#endif
529 .driver = { 534 .driver = {
530 .name = "pxa27x-ohci", 535 .name = "pxa27x-ohci",
531 .owner = THIS_MODULE, 536 .owner = THIS_MODULE,
537#ifdef CONFIG_PM
538 .pm = &ohci_hcd_pxa27x_pm_ops,
539#endif
532 }, 540 },
533}; 541};
534 542
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 68fa0e43b781..8c075b2416bb 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -912,6 +912,7 @@ static void sierra_release(struct usb_serial *serial)
912 } 912 }
913} 913}
914 914
915#ifdef CONFIG_PM
915static void stop_read_write_urbs(struct usb_serial *serial) 916static void stop_read_write_urbs(struct usb_serial *serial)
916{ 917{
917 int i, j; 918 int i, j;
@@ -988,6 +989,10 @@ static int sierra_resume(struct usb_serial *serial)
988 989
989 return ec ? -EIO : 0; 990 return ec ? -EIO : 0;
990} 991}
992#else
993#define sierra_suspend NULL
994#define sierra_resume NULL
995#endif
991 996
992static struct usb_serial_driver sierra_device = { 997static struct usb_serial_driver sierra_device = {
993 .driver = { 998 .driver = {
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 93bb4340cc64..701a1081e199 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -154,34 +154,38 @@ static int da903x_backlight_remove(struct platform_device *pdev)
154} 154}
155 155
156#ifdef CONFIG_PM 156#ifdef CONFIG_PM
157static int da903x_backlight_suspend(struct platform_device *pdev, 157static int da903x_backlight_suspend(struct device *dev)
158 pm_message_t state)
159{ 158{
159 struct platform_device *pdev = to_platform_device(dev);
160 struct backlight_device *bl = platform_get_drvdata(pdev); 160 struct backlight_device *bl = platform_get_drvdata(pdev);
161 return da903x_backlight_set(bl, 0); 161 return da903x_backlight_set(bl, 0);
162} 162}
163 163
164static int da903x_backlight_resume(struct platform_device *pdev) 164static int da903x_backlight_resume(struct device *dev)
165{ 165{
166 struct platform_device *pdev = to_platform_device(dev);
166 struct backlight_device *bl = platform_get_drvdata(pdev); 167 struct backlight_device *bl = platform_get_drvdata(pdev);
167 168
168 backlight_update_status(bl); 169 backlight_update_status(bl);
169 return 0; 170 return 0;
170} 171}
171#else 172
172#define da903x_backlight_suspend NULL 173static struct dev_pm_ops da903x_backlight_pm_ops = {
173#define da903x_backlight_resume NULL 174 .suspend = da903x_backlight_suspend,
175 .resume = da903x_backlight_resume,
176};
174#endif 177#endif
175 178
176static struct platform_driver da903x_backlight_driver = { 179static struct platform_driver da903x_backlight_driver = {
177 .driver = { 180 .driver = {
178 .name = "da903x-backlight", 181 .name = "da903x-backlight",
179 .owner = THIS_MODULE, 182 .owner = THIS_MODULE,
183#ifdef CONFIG_PM
184 .pm = &da903x_backlight_pm_ops,
185#endif
180 }, 186 },
181 .probe = da903x_backlight_probe, 187 .probe = da903x_backlight_probe,
182 .remove = da903x_backlight_remove, 188 .remove = da903x_backlight_remove,
183 .suspend = da903x_backlight_suspend,
184 .resume = da903x_backlight_resume,
185}; 189};
186 190
187static int __init da903x_backlight_init(void) 191static int __init da903x_backlight_init(void)
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 6506117c134b..1820c4a24434 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -1638,24 +1638,26 @@ pxafb_freq_policy(struct notifier_block *nb, unsigned long val, void *data)
1638 * Power management hooks. Note that we won't be called from IRQ context, 1638 * Power management hooks. Note that we won't be called from IRQ context,
1639 * unlike the blank functions above, so we may sleep. 1639 * unlike the blank functions above, so we may sleep.
1640 */ 1640 */
1641static int pxafb_suspend(struct platform_device *dev, pm_message_t state) 1641static int pxafb_suspend(struct device *dev)
1642{ 1642{
1643 struct pxafb_info *fbi = platform_get_drvdata(dev); 1643 struct pxafb_info *fbi = dev_get_drvdata(dev);
1644 1644
1645 set_ctrlr_state(fbi, C_DISABLE_PM); 1645 set_ctrlr_state(fbi, C_DISABLE_PM);
1646 return 0; 1646 return 0;
1647} 1647}
1648 1648
1649static int pxafb_resume(struct platform_device *dev) 1649static int pxafb_resume(struct device *dev)
1650{ 1650{
1651 struct pxafb_info *fbi = platform_get_drvdata(dev); 1651 struct pxafb_info *fbi = dev_get_drvdata(dev);
1652 1652
1653 set_ctrlr_state(fbi, C_ENABLE_PM); 1653 set_ctrlr_state(fbi, C_ENABLE_PM);
1654 return 0; 1654 return 0;
1655} 1655}
1656#else 1656
1657#define pxafb_suspend NULL 1657static struct dev_pm_ops pxafb_pm_ops = {
1658#define pxafb_resume NULL 1658 .suspend = pxafb_suspend,
1659 .resume = pxafb_resume,
1660};
1659#endif 1661#endif
1660 1662
1661static int __devinit pxafb_init_video_memory(struct pxafb_info *fbi) 1663static int __devinit pxafb_init_video_memory(struct pxafb_info *fbi)
@@ -2081,6 +2083,9 @@ static int __devinit pxafb_probe(struct platform_device *dev)
2081 goto failed; 2083 goto failed;
2082 } 2084 }
2083 2085
2086 if (cpu_is_pxa3xx() && inf->acceleration_enabled)
2087 fbi->fb.fix.accel = FB_ACCEL_PXA3XX;
2088
2084 fbi->backlight_power = inf->pxafb_backlight_power; 2089 fbi->backlight_power = inf->pxafb_backlight_power;
2085 fbi->lcd_power = inf->pxafb_lcd_power; 2090 fbi->lcd_power = inf->pxafb_lcd_power;
2086 2091
@@ -2091,14 +2096,14 @@ static int __devinit pxafb_probe(struct platform_device *dev)
2091 goto failed_fbi; 2096 goto failed_fbi;
2092 } 2097 }
2093 2098
2094 r = request_mem_region(r->start, r->end - r->start + 1, dev->name); 2099 r = request_mem_region(r->start, resource_size(r), dev->name);
2095 if (r == NULL) { 2100 if (r == NULL) {
2096 dev_err(&dev->dev, "failed to request I/O memory\n"); 2101 dev_err(&dev->dev, "failed to request I/O memory\n");
2097 ret = -EBUSY; 2102 ret = -EBUSY;
2098 goto failed_fbi; 2103 goto failed_fbi;
2099 } 2104 }
2100 2105
2101 fbi->mmio_base = ioremap(r->start, r->end - r->start + 1); 2106 fbi->mmio_base = ioremap(r->start, resource_size(r));
2102 if (fbi->mmio_base == NULL) { 2107 if (fbi->mmio_base == NULL) {
2103 dev_err(&dev->dev, "failed to map I/O memory\n"); 2108 dev_err(&dev->dev, "failed to map I/O memory\n");
2104 ret = -EBUSY; 2109 ret = -EBUSY;
@@ -2197,7 +2202,7 @@ failed_free_dma:
2197failed_free_io: 2202failed_free_io:
2198 iounmap(fbi->mmio_base); 2203 iounmap(fbi->mmio_base);
2199failed_free_res: 2204failed_free_res:
2200 release_mem_region(r->start, r->end - r->start + 1); 2205 release_mem_region(r->start, resource_size(r));
2201failed_fbi: 2206failed_fbi:
2202 clk_put(fbi->clk); 2207 clk_put(fbi->clk);
2203 platform_set_drvdata(dev, NULL); 2208 platform_set_drvdata(dev, NULL);
@@ -2237,7 +2242,7 @@ static int __devexit pxafb_remove(struct platform_device *dev)
2237 iounmap(fbi->mmio_base); 2242 iounmap(fbi->mmio_base);
2238 2243
2239 r = platform_get_resource(dev, IORESOURCE_MEM, 0); 2244 r = platform_get_resource(dev, IORESOURCE_MEM, 0);
2240 release_mem_region(r->start, r->end - r->start + 1); 2245 release_mem_region(r->start, resource_size(r));
2241 2246
2242 clk_put(fbi->clk); 2247 clk_put(fbi->clk);
2243 kfree(fbi); 2248 kfree(fbi);
@@ -2248,11 +2253,12 @@ static int __devexit pxafb_remove(struct platform_device *dev)
2248static struct platform_driver pxafb_driver = { 2253static struct platform_driver pxafb_driver = {
2249 .probe = pxafb_probe, 2254 .probe = pxafb_probe,
2250 .remove = __devexit_p(pxafb_remove), 2255 .remove = __devexit_p(pxafb_remove),
2251 .suspend = pxafb_suspend,
2252 .resume = pxafb_resume,
2253 .driver = { 2256 .driver = {
2254 .owner = THIS_MODULE, 2257 .owner = THIS_MODULE,
2255 .name = "pxa2xx-fb", 2258 .name = "pxa2xx-fb",
2259#ifdef CONFIG_PM
2260 .pm = &pxafb_pm_ops,
2261#endif
2256 }, 2262 },
2257}; 2263};
2258 2264
diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c
index ba3d71f5c7d0..9554ad5f9af7 100644
--- a/drivers/vlynq/vlynq.c
+++ b/drivers/vlynq/vlynq.c
@@ -702,7 +702,7 @@ static int vlynq_probe(struct platform_device *pdev)
702 dev->mem_start = mem_res->start; 702 dev->mem_start = mem_res->start;
703 dev->mem_end = mem_res->end; 703 dev->mem_end = mem_res->end;
704 704
705 len = regs_res->end - regs_res->start; 705 len = resource_size(regs_res);
706 if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) { 706 if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) {
707 printk(KERN_ERR "%s: Can't request vlynq registers\n", 707 printk(KERN_ERR "%s: Can't request vlynq registers\n",
708 dev_name(&dev->dev)); 708 dev_name(&dev->dev));
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 798cb071d132..3f57ce4bee5d 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -19,9 +19,6 @@ static int
19adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, 19adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
20 int create) 20 int create)
21{ 21{
22 if (block < 0)
23 goto abort_negative;
24
25 if (!create) { 22 if (!create) {
26 if (block >= inode->i_blocks) 23 if (block >= inode->i_blocks)
27 goto abort_toobig; 24 goto abort_toobig;
@@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
34 /* don't support allocation of blocks yet */ 31 /* don't support allocation of blocks yet */
35 return -EIO; 32 return -EIO;
36 33
37abort_negative:
38 adfs_error(inode->i_sb, "block %d < 0", block);
39 return -EIO;
40
41abort_toobig: 34abort_toobig:
42 return 0; 35 return 0;
43} 36}
diff --git a/fs/attr.c b/fs/attr.c
index 9fe1b1bd30a8..96d394bdaddf 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
18/* Taken over from the old code... */ 18/* Taken over from the old code... */
19 19
20/* POSIX UID/GID verification for setting inode attributes. */ 20/* POSIX UID/GID verification for setting inode attributes. */
21int inode_change_ok(struct inode *inode, struct iattr *attr) 21int inode_change_ok(const struct inode *inode, struct iattr *attr)
22{ 22{
23 int retval = -EPERM; 23 int retval = -EPERM;
24 unsigned int ia_valid = attr->ia_valid; 24 unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
60error: 60error:
61 return retval; 61 return retval;
62} 62}
63
64EXPORT_SYMBOL(inode_change_ok); 63EXPORT_SYMBOL(inode_change_ok);
65 64
65/**
66 * inode_newsize_ok - may this inode be truncated to a given size
67 * @inode: the inode to be truncated
68 * @offset: the new size to assign to the inode
69 * @Returns: 0 on success, -ve errno on failure
70 *
71 * inode_newsize_ok will check filesystem limits and ulimits to check that the
72 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
73 * when necessary. Caller must not proceed with inode size change if failure is
74 * returned. @inode must be a file (not directory), with appropriate
75 * permissions to allow truncate (inode_newsize_ok does NOT check these
76 * conditions).
77 *
78 * inode_newsize_ok must be called with i_mutex held.
79 */
80int inode_newsize_ok(const struct inode *inode, loff_t offset)
81{
82 if (inode->i_size < offset) {
83 unsigned long limit;
84
85 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
86 if (limit != RLIM_INFINITY && offset > limit)
87 goto out_sig;
88 if (offset > inode->i_sb->s_maxbytes)
89 goto out_big;
90 } else {
91 /*
92 * truncation of in-use swapfiles is disallowed - it would
93 * cause subsequent swapout to scribble on the now-freed
94 * blocks.
95 */
96 if (IS_SWAPFILE(inode))
97 return -ETXTBSY;
98 }
99
100 return 0;
101out_sig:
102 send_sig(SIGXFSZ, current, 0);
103out_big:
104 return -EFBIG;
105}
106EXPORT_SYMBOL(inode_newsize_ok);
107
66int inode_setattr(struct inode * inode, struct iattr * attr) 108int inode_setattr(struct inode * inode, struct iattr * attr)
67{ 109{
68 unsigned int ia_valid = attr->ia_valid; 110 unsigned int ia_valid = attr->ia_valid;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dd376c124e71..33baf27fac78 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb)
737{ 737{
738 kfree(BEFS_SB(sb)->mount_opts.iocharset); 738 kfree(BEFS_SB(sb)->mount_opts.iocharset);
739 BEFS_SB(sb)->mount_opts.iocharset = NULL; 739 BEFS_SB(sb)->mount_opts.iocharset = NULL;
740 740 unload_nls(BEFS_SB(sb)->nls);
741 if (BEFS_SB(sb)->nls) {
742 unload_nls(BEFS_SB(sb)->nls);
743 BEFS_SB(sb)->nls = NULL;
744 }
745
746 kfree(sb->s_fs_info); 741 kfree(sb->s_fs_info);
747 sb->s_fs_info = NULL; 742 sb->s_fs_info = NULL;
748} 743}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 442d94fe255c..b9b3bb51b1e4 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1711,42 +1711,52 @@ struct elf_note_info {
1711 int numnote; 1711 int numnote;
1712}; 1712};
1713 1713
1714static int fill_note_info(struct elfhdr *elf, int phdrs, 1714static int elf_note_info_init(struct elf_note_info *info)
1715 struct elf_note_info *info,
1716 long signr, struct pt_regs *regs)
1717{ 1715{
1718#define NUM_NOTES 6 1716 memset(info, 0, sizeof(*info));
1719 struct list_head *t;
1720
1721 info->notes = NULL;
1722 info->prstatus = NULL;
1723 info->psinfo = NULL;
1724 info->fpu = NULL;
1725#ifdef ELF_CORE_COPY_XFPREGS
1726 info->xfpu = NULL;
1727#endif
1728 INIT_LIST_HEAD(&info->thread_list); 1717 INIT_LIST_HEAD(&info->thread_list);
1729 1718
1730 info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), 1719 /* Allocate space for six ELF notes */
1731 GFP_KERNEL); 1720 info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1732 if (!info->notes) 1721 if (!info->notes)
1733 return 0; 1722 return 0;
1734 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); 1723 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1735 if (!info->psinfo) 1724 if (!info->psinfo)
1736 return 0; 1725 goto notes_free;
1737 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); 1726 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1738 if (!info->prstatus) 1727 if (!info->prstatus)
1739 return 0; 1728 goto psinfo_free;
1740 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); 1729 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1741 if (!info->fpu) 1730 if (!info->fpu)
1742 return 0; 1731 goto prstatus_free;
1743#ifdef ELF_CORE_COPY_XFPREGS 1732#ifdef ELF_CORE_COPY_XFPREGS
1744 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); 1733 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1745 if (!info->xfpu) 1734 if (!info->xfpu)
1746 return 0; 1735 goto fpu_free;
1736#endif
1737 return 1;
1738#ifdef ELF_CORE_COPY_XFPREGS
1739 fpu_free:
1740 kfree(info->fpu);
1747#endif 1741#endif
1742 prstatus_free:
1743 kfree(info->prstatus);
1744 psinfo_free:
1745 kfree(info->psinfo);
1746 notes_free:
1747 kfree(info->notes);
1748 return 0;
1749}
1750
1751static int fill_note_info(struct elfhdr *elf, int phdrs,
1752 struct elf_note_info *info,
1753 long signr, struct pt_regs *regs)
1754{
1755 struct list_head *t;
1756
1757 if (!elf_note_info_init(info))
1758 return 0;
1748 1759
1749 info->thread_status_size = 0;
1750 if (signr) { 1760 if (signr) {
1751 struct core_thread *ct; 1761 struct core_thread *ct;
1752 struct elf_thread_status *ets; 1762 struct elf_thread_status *ets;
@@ -1806,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1806#endif 1816#endif
1807 1817
1808 return 1; 1818 return 1;
1809
1810#undef NUM_NOTES
1811} 1819}
1812 1820
1813static size_t get_note_info_size(struct elf_note_info *info) 1821static size_t get_note_info_size(struct elf_note_info *info)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 76285471073e..38502c67987c 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
283 } 283 }
284 284
285 stack_size = exec_params.stack_size; 285 stack_size = exec_params.stack_size;
286 if (stack_size < interp_params.stack_size)
287 stack_size = interp_params.stack_size;
288
289 if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) 286 if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
290 executable_stack = EXSTACK_ENABLE_X; 287 executable_stack = EXSTACK_ENABLE_X;
291 else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) 288 else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
292 executable_stack = EXSTACK_DISABLE_X; 289 executable_stack = EXSTACK_DISABLE_X;
293 else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
294 executable_stack = EXSTACK_ENABLE_X;
295 else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
296 executable_stack = EXSTACK_DISABLE_X;
297 else 290 else
298 executable_stack = EXSTACK_DEFAULT; 291 executable_stack = EXSTACK_DEFAULT;
299 292
293 if (stack_size == 0) {
294 stack_size = interp_params.stack_size;
295 if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
296 executable_stack = EXSTACK_ENABLE_X;
297 else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
298 executable_stack = EXSTACK_DISABLE_X;
299 else
300 executable_stack = EXSTACK_DEFAULT;
301 }
302
300 retval = -ENOEXEC; 303 retval = -ENOEXEC;
301 if (stack_size == 0) 304 if (stack_size == 0)
302 goto error; 305 goto error;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e92f229e3c6e..a2796651e756 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -278,8 +278,6 @@ static int decompress_exec(
278 ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); 278 ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
279 if (ret <= 0) 279 if (ret <= 0)
280 break; 280 break;
281 if (ret >= (unsigned long) -4096)
282 break;
283 len -= ret; 281 len -= ret;
284 282
285 strm.next_in = buf; 283 strm.next_in = buf;
@@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
335 "(%d != %d)", (unsigned) r, curid, id); 333 "(%d != %d)", (unsigned) r, curid, id);
336 goto failed; 334 goto failed;
337 } else if ( ! p->lib_list[id].loaded && 335 } else if ( ! p->lib_list[id].loaded &&
338 load_flat_shared_library(id, p) > (unsigned long) -4096) { 336 IS_ERR_VALUE(load_flat_shared_library(id, p))) {
339 printk("BINFMT_FLAT: failed to load library %d", id); 337 printk("BINFMT_FLAT: failed to load library %d", id);
340 goto failed; 338 goto failed;
341 } 339 }
@@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm,
545 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, 543 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
546 MAP_PRIVATE|MAP_EXECUTABLE, 0); 544 MAP_PRIVATE|MAP_EXECUTABLE, 0);
547 up_write(&current->mm->mmap_sem); 545 up_write(&current->mm->mmap_sem);
548 if (!textpos || textpos >= (unsigned long) -4096) { 546 if (!textpos || IS_ERR_VALUE(textpos)) {
549 if (!textpos) 547 if (!textpos)
550 textpos = (unsigned long) -ENOMEM; 548 textpos = (unsigned long) -ENOMEM;
551 printk("Unable to mmap process text, errno %d\n", (int)-textpos); 549 printk("Unable to mmap process text, errno %d\n", (int)-textpos);
@@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm,
560 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); 558 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
561 up_write(&current->mm->mmap_sem); 559 up_write(&current->mm->mmap_sem);
562 560
563 if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { 561 if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
564 if (!realdatastart) 562 if (!realdatastart)
565 realdatastart = (unsigned long) -ENOMEM; 563 realdatastart = (unsigned long) -ENOMEM;
566 printk("Unable to allocate RAM for process data, errno %d\n", 564 printk("Unable to allocate RAM for process data, errno %d\n",
@@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm,
587 result = bprm->file->f_op->read(bprm->file, (char *) datapos, 585 result = bprm->file->f_op->read(bprm->file, (char *) datapos,
588 data_len + (relocs * sizeof(unsigned long)), &fpos); 586 data_len + (relocs * sizeof(unsigned long)), &fpos);
589 } 587 }
590 if (result >= (unsigned long)-4096) { 588 if (IS_ERR_VALUE(result)) {
591 printk("Unable to read data+bss, errno %d\n", (int)-result); 589 printk("Unable to read data+bss, errno %d\n", (int)-result);
592 do_munmap(current->mm, textpos, text_len); 590 do_munmap(current->mm, textpos, text_len);
593 do_munmap(current->mm, realdatastart, data_len + extra); 591 do_munmap(current->mm, realdatastart, data_len + extra);
@@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm,
607 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); 605 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
608 up_write(&current->mm->mmap_sem); 606 up_write(&current->mm->mmap_sem);
609 607
610 if (!textpos || textpos >= (unsigned long) -4096) { 608 if (!textpos || IS_ERR_VALUE(textpos)) {
611 if (!textpos) 609 if (!textpos)
612 textpos = (unsigned long) -ENOMEM; 610 textpos = (unsigned long) -ENOMEM;
613 printk("Unable to allocate RAM for process text/data, errno %d\n", 611 printk("Unable to allocate RAM for process text/data, errno %d\n",
@@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm,
641 fpos = 0; 639 fpos = 0;
642 result = bprm->file->f_op->read(bprm->file, 640 result = bprm->file->f_op->read(bprm->file,
643 (char *) textpos, text_len, &fpos); 641 (char *) textpos, text_len, &fpos);
644 if (result < (unsigned long) -4096) 642 if (!IS_ERR_VALUE(result))
645 result = decompress_exec(bprm, text_len, (char *) datapos, 643 result = decompress_exec(bprm, text_len, (char *) datapos,
646 data_len + (relocs * sizeof(unsigned long)), 0); 644 data_len + (relocs * sizeof(unsigned long)), 0);
647 } 645 }
@@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm,
651 fpos = 0; 649 fpos = 0;
652 result = bprm->file->f_op->read(bprm->file, 650 result = bprm->file->f_op->read(bprm->file,
653 (char *) textpos, text_len, &fpos); 651 (char *) textpos, text_len, &fpos);
654 if (result < (unsigned long) -4096) { 652 if (!IS_ERR_VALUE(result)) {
655 fpos = ntohl(hdr->data_start); 653 fpos = ntohl(hdr->data_start);
656 result = bprm->file->f_op->read(bprm->file, (char *) datapos, 654 result = bprm->file->f_op->read(bprm->file, (char *) datapos,
657 data_len + (relocs * sizeof(unsigned long)), &fpos); 655 data_len + (relocs * sizeof(unsigned long)), &fpos);
658 } 656 }
659 } 657 }
660 if (result >= (unsigned long)-4096) { 658 if (IS_ERR_VALUE(result)) {
661 printk("Unable to read code+data+bss, errno %d\n",(int)-result); 659 printk("Unable to read code+data+bss, errno %d\n",(int)-result);
662 do_munmap(current->mm, textpos, text_len + data_len + extra + 660 do_munmap(current->mm, textpos, text_len + data_len + extra +
663 MAX_SHARED_LIBS * sizeof(unsigned long)); 661 MAX_SHARED_LIBS * sizeof(unsigned long));
@@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
835 833
836 res = prepare_binprm(&bprm); 834 res = prepare_binprm(&bprm);
837 835
838 if (res <= (unsigned long)-4096) 836 if (!IS_ERR_VALUE(res))
839 res = load_flat_file(&bprm, libs, id, NULL); 837 res = load_flat_file(&bprm, libs, id, NULL);
840 838
841 abort_creds(bprm.cred); 839 abort_creds(bprm.cred);
@@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
880 stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ 878 stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
881 879
882 res = load_flat_file(bprm, &libinfo, 0, &stack_len); 880 res = load_flat_file(bprm, &libinfo, 0, &stack_len);
883 if (res > (unsigned long)-4096) 881 if (IS_ERR_VALUE(res))
884 return res; 882 return res;
885 883
886 /* Update data segment pointers for all libraries */ 884 /* Update data segment pointers for all libraries */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5d1ed50bd46c..9cf4b926f8e4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
216 * freeze_bdev -- lock a filesystem and force it into a consistent state 216 * freeze_bdev -- lock a filesystem and force it into a consistent state
217 * @bdev: blockdevice to lock 217 * @bdev: blockdevice to lock
218 * 218 *
219 * This takes the block device bd_mount_sem to make sure no new mounts
220 * happen on bdev until thaw_bdev() is called.
221 * If a superblock is found on this device, we take the s_umount semaphore 219 * If a superblock is found on this device, we take the s_umount semaphore
222 * on it to make sure nobody unmounts until the snapshot creation is done. 220 * on it to make sure nobody unmounts until the snapshot creation is done.
223 * The reference counter (bd_fsfreeze_count) guarantees that only the last 221 * The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev)
232 int error = 0; 230 int error = 0;
233 231
234 mutex_lock(&bdev->bd_fsfreeze_mutex); 232 mutex_lock(&bdev->bd_fsfreeze_mutex);
235 if (bdev->bd_fsfreeze_count > 0) { 233 if (++bdev->bd_fsfreeze_count > 1) {
236 bdev->bd_fsfreeze_count++; 234 /*
235 * We don't even need to grab a reference - the first call
236 * to freeze_bdev grab an active reference and only the last
237 * thaw_bdev drops it.
238 */
237 sb = get_super(bdev); 239 sb = get_super(bdev);
240 drop_super(sb);
238 mutex_unlock(&bdev->bd_fsfreeze_mutex); 241 mutex_unlock(&bdev->bd_fsfreeze_mutex);
239 return sb; 242 return sb;
240 } 243 }
241 bdev->bd_fsfreeze_count++; 244
242 245 sb = get_active_super(bdev);
243 down(&bdev->bd_mount_sem); 246 if (!sb)
244 sb = get_super(bdev); 247 goto out;
245 if (sb && !(sb->s_flags & MS_RDONLY)) { 248 if (sb->s_flags & MS_RDONLY) {
246 sb->s_frozen = SB_FREEZE_WRITE; 249 deactivate_locked_super(sb);
247 smp_wmb(); 250 mutex_unlock(&bdev->bd_fsfreeze_mutex);
248 251 return sb;
249 sync_filesystem(sb); 252 }
250 253
251 sb->s_frozen = SB_FREEZE_TRANS; 254 sb->s_frozen = SB_FREEZE_WRITE;
252 smp_wmb(); 255 smp_wmb();
253 256
254 sync_blockdev(sb->s_bdev); 257 sync_filesystem(sb);
255 258
256 if (sb->s_op->freeze_fs) { 259 sb->s_frozen = SB_FREEZE_TRANS;
257 error = sb->s_op->freeze_fs(sb); 260 smp_wmb();
258 if (error) { 261
259 printk(KERN_ERR 262 sync_blockdev(sb->s_bdev);
260 "VFS:Filesystem freeze failed\n"); 263
261 sb->s_frozen = SB_UNFROZEN; 264 if (sb->s_op->freeze_fs) {
262 drop_super(sb); 265 error = sb->s_op->freeze_fs(sb);
263 up(&bdev->bd_mount_sem); 266 if (error) {
264 bdev->bd_fsfreeze_count--; 267 printk(KERN_ERR
265 mutex_unlock(&bdev->bd_fsfreeze_mutex); 268 "VFS:Filesystem freeze failed\n");
266 return ERR_PTR(error); 269 sb->s_frozen = SB_UNFROZEN;
267 } 270 deactivate_locked_super(sb);
271 bdev->bd_fsfreeze_count--;
272 mutex_unlock(&bdev->bd_fsfreeze_mutex);
273 return ERR_PTR(error);
268 } 274 }
269 } 275 }
276 up_write(&sb->s_umount);
270 277
278 out:
271 sync_blockdev(bdev); 279 sync_blockdev(bdev);
272 mutex_unlock(&bdev->bd_fsfreeze_mutex); 280 mutex_unlock(&bdev->bd_fsfreeze_mutex);
273 281 return sb; /* thaw_bdev releases s->s_umount */
274 return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
275} 282}
276EXPORT_SYMBOL(freeze_bdev); 283EXPORT_SYMBOL(freeze_bdev);
277 284
@@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
284 */ 291 */
285int thaw_bdev(struct block_device *bdev, struct super_block *sb) 292int thaw_bdev(struct block_device *bdev, struct super_block *sb)
286{ 293{
287 int error = 0; 294 int error = -EINVAL;
288 295
289 mutex_lock(&bdev->bd_fsfreeze_mutex); 296 mutex_lock(&bdev->bd_fsfreeze_mutex);
290 if (!bdev->bd_fsfreeze_count) { 297 if (!bdev->bd_fsfreeze_count)
291 mutex_unlock(&bdev->bd_fsfreeze_mutex); 298 goto out_unlock;
292 return -EINVAL; 299
293 } 300 error = 0;
294 301 if (--bdev->bd_fsfreeze_count > 0)
295 bdev->bd_fsfreeze_count--; 302 goto out_unlock;
296 if (bdev->bd_fsfreeze_count > 0) { 303
297 if (sb) 304 if (!sb)
298 drop_super(sb); 305 goto out_unlock;
299 mutex_unlock(&bdev->bd_fsfreeze_mutex); 306
300 return 0; 307 BUG_ON(sb->s_bdev != bdev);
301 } 308 down_write(&sb->s_umount);
302 309 if (sb->s_flags & MS_RDONLY)
303 if (sb) { 310 goto out_deactivate;
304 BUG_ON(sb->s_bdev != bdev); 311
305 if (!(sb->s_flags & MS_RDONLY)) { 312 if (sb->s_op->unfreeze_fs) {
306 if (sb->s_op->unfreeze_fs) { 313 error = sb->s_op->unfreeze_fs(sb);
307 error = sb->s_op->unfreeze_fs(sb); 314 if (error) {
308 if (error) { 315 printk(KERN_ERR
309 printk(KERN_ERR 316 "VFS:Filesystem thaw failed\n");
310 "VFS:Filesystem thaw failed\n"); 317 sb->s_frozen = SB_FREEZE_TRANS;
311 sb->s_frozen = SB_FREEZE_TRANS; 318 bdev->bd_fsfreeze_count++;
312 bdev->bd_fsfreeze_count++; 319 mutex_unlock(&bdev->bd_fsfreeze_mutex);
313 mutex_unlock(&bdev->bd_fsfreeze_mutex); 320 return error;
314 return error;
315 }
316 }
317 sb->s_frozen = SB_UNFROZEN;
318 smp_wmb();
319 wake_up(&sb->s_wait_unfrozen);
320 } 321 }
321 drop_super(sb);
322 } 322 }
323 323
324 up(&bdev->bd_mount_sem); 324 sb->s_frozen = SB_UNFROZEN;
325 smp_wmb();
326 wake_up(&sb->s_wait_unfrozen);
327
328out_deactivate:
329 if (sb)
330 deactivate_locked_super(sb);
331out_unlock:
325 mutex_unlock(&bdev->bd_fsfreeze_mutex); 332 mutex_unlock(&bdev->bd_fsfreeze_mutex);
326 return 0; 333 return 0;
327} 334}
@@ -430,7 +437,6 @@ static void init_once(void *foo)
430 437
431 memset(bdev, 0, sizeof(*bdev)); 438 memset(bdev, 0, sizeof(*bdev));
432 mutex_init(&bdev->bd_mutex); 439 mutex_init(&bdev->bd_mutex);
433 sema_init(&bdev->bd_mount_sem, 1);
434 INIT_LIST_HEAD(&bdev->bd_inodes); 440 INIT_LIST_HEAD(&bdev->bd_inodes);
435 INIT_LIST_HEAD(&bdev->bd_list); 441 INIT_LIST_HEAD(&bdev->bd_list);
436#ifdef CONFIG_SYSFS 442#ifdef CONFIG_SYSFS
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 019e8af449ab..282ca085c2fb 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -48,6 +48,9 @@ struct btrfs_worker_thread {
48 /* number of things on the pending list */ 48 /* number of things on the pending list */
49 atomic_t num_pending; 49 atomic_t num_pending;
50 50
51 /* reference counter for this struct */
52 atomic_t refs;
53
51 unsigned long sequence; 54 unsigned long sequence;
52 55
53 /* protects the pending list. */ 56 /* protects the pending list. */
@@ -71,7 +74,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
71 unsigned long flags; 74 unsigned long flags;
72 spin_lock_irqsave(&worker->workers->lock, flags); 75 spin_lock_irqsave(&worker->workers->lock, flags);
73 worker->idle = 1; 76 worker->idle = 1;
74 list_move(&worker->worker_list, &worker->workers->idle_list); 77
78 /* the list may be empty if the worker is just starting */
79 if (!list_empty(&worker->worker_list)) {
80 list_move(&worker->worker_list,
81 &worker->workers->idle_list);
82 }
75 spin_unlock_irqrestore(&worker->workers->lock, flags); 83 spin_unlock_irqrestore(&worker->workers->lock, flags);
76 } 84 }
77} 85}
@@ -87,23 +95,49 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
87 unsigned long flags; 95 unsigned long flags;
88 spin_lock_irqsave(&worker->workers->lock, flags); 96 spin_lock_irqsave(&worker->workers->lock, flags);
89 worker->idle = 0; 97 worker->idle = 0;
90 list_move_tail(&worker->worker_list, 98
91 &worker->workers->worker_list); 99 if (!list_empty(&worker->worker_list)) {
100 list_move_tail(&worker->worker_list,
101 &worker->workers->worker_list);
102 }
92 spin_unlock_irqrestore(&worker->workers->lock, flags); 103 spin_unlock_irqrestore(&worker->workers->lock, flags);
93 } 104 }
94} 105}
95 106
96static noinline int run_ordered_completions(struct btrfs_workers *workers, 107static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
97 struct btrfs_work *work)
98{ 108{
109 struct btrfs_workers *workers = worker->workers;
99 unsigned long flags; 110 unsigned long flags;
100 111
112 rmb();
113 if (!workers->atomic_start_pending)
114 return;
115
116 spin_lock_irqsave(&workers->lock, flags);
117 if (!workers->atomic_start_pending)
118 goto out;
119
120 workers->atomic_start_pending = 0;
121 if (workers->num_workers >= workers->max_workers)
122 goto out;
123
124 spin_unlock_irqrestore(&workers->lock, flags);
125 btrfs_start_workers(workers, 1);
126 return;
127
128out:
129 spin_unlock_irqrestore(&workers->lock, flags);
130}
131
132static noinline int run_ordered_completions(struct btrfs_workers *workers,
133 struct btrfs_work *work)
134{
101 if (!workers->ordered) 135 if (!workers->ordered)
102 return 0; 136 return 0;
103 137
104 set_bit(WORK_DONE_BIT, &work->flags); 138 set_bit(WORK_DONE_BIT, &work->flags);
105 139
106 spin_lock_irqsave(&workers->lock, flags); 140 spin_lock(&workers->order_lock);
107 141
108 while (1) { 142 while (1) {
109 if (!list_empty(&workers->prio_order_list)) { 143 if (!list_empty(&workers->prio_order_list)) {
@@ -126,45 +160,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
126 if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) 160 if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
127 break; 161 break;
128 162
129 spin_unlock_irqrestore(&workers->lock, flags); 163 spin_unlock(&workers->order_lock);
130 164
131 work->ordered_func(work); 165 work->ordered_func(work);
132 166
133 /* now take the lock again and call the freeing code */ 167 /* now take the lock again and call the freeing code */
134 spin_lock_irqsave(&workers->lock, flags); 168 spin_lock(&workers->order_lock);
135 list_del(&work->order_list); 169 list_del(&work->order_list);
136 work->ordered_free(work); 170 work->ordered_free(work);
137 } 171 }
138 172
139 spin_unlock_irqrestore(&workers->lock, flags); 173 spin_unlock(&workers->order_lock);
140 return 0; 174 return 0;
141} 175}
142 176
177static void put_worker(struct btrfs_worker_thread *worker)
178{
179 if (atomic_dec_and_test(&worker->refs))
180 kfree(worker);
181}
182
183static int try_worker_shutdown(struct btrfs_worker_thread *worker)
184{
185 int freeit = 0;
186
187 spin_lock_irq(&worker->lock);
188 spin_lock(&worker->workers->lock);
189 if (worker->workers->num_workers > 1 &&
190 worker->idle &&
191 !worker->working &&
192 !list_empty(&worker->worker_list) &&
193 list_empty(&worker->prio_pending) &&
194 list_empty(&worker->pending) &&
195 atomic_read(&worker->num_pending) == 0) {
196 freeit = 1;
197 list_del_init(&worker->worker_list);
198 worker->workers->num_workers--;
199 }
200 spin_unlock(&worker->workers->lock);
201 spin_unlock_irq(&worker->lock);
202
203 if (freeit)
204 put_worker(worker);
205 return freeit;
206}
207
208static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
209 struct list_head *prio_head,
210 struct list_head *head)
211{
212 struct btrfs_work *work = NULL;
213 struct list_head *cur = NULL;
214
215 if(!list_empty(prio_head))
216 cur = prio_head->next;
217
218 smp_mb();
219 if (!list_empty(&worker->prio_pending))
220 goto refill;
221
222 if (!list_empty(head))
223 cur = head->next;
224
225 if (cur)
226 goto out;
227
228refill:
229 spin_lock_irq(&worker->lock);
230 list_splice_tail_init(&worker->prio_pending, prio_head);
231 list_splice_tail_init(&worker->pending, head);
232
233 if (!list_empty(prio_head))
234 cur = prio_head->next;
235 else if (!list_empty(head))
236 cur = head->next;
237 spin_unlock_irq(&worker->lock);
238
239 if (!cur)
240 goto out_fail;
241
242out:
243 work = list_entry(cur, struct btrfs_work, list);
244
245out_fail:
246 return work;
247}
248
143/* 249/*
144 * main loop for servicing work items 250 * main loop for servicing work items
145 */ 251 */
146static int worker_loop(void *arg) 252static int worker_loop(void *arg)
147{ 253{
148 struct btrfs_worker_thread *worker = arg; 254 struct btrfs_worker_thread *worker = arg;
149 struct list_head *cur; 255 struct list_head head;
256 struct list_head prio_head;
150 struct btrfs_work *work; 257 struct btrfs_work *work;
258
259 INIT_LIST_HEAD(&head);
260 INIT_LIST_HEAD(&prio_head);
261
151 do { 262 do {
152 spin_lock_irq(&worker->lock); 263again:
153again_locked:
154 while (1) { 264 while (1) {
155 if (!list_empty(&worker->prio_pending)) 265
156 cur = worker->prio_pending.next; 266
157 else if (!list_empty(&worker->pending)) 267 work = get_next_work(worker, &prio_head, &head);
158 cur = worker->pending.next; 268 if (!work)
159 else
160 break; 269 break;
161 270
162 work = list_entry(cur, struct btrfs_work, list);
163 list_del(&work->list); 271 list_del(&work->list);
164 clear_bit(WORK_QUEUED_BIT, &work->flags); 272 clear_bit(WORK_QUEUED_BIT, &work->flags);
165 273
166 work->worker = worker; 274 work->worker = worker;
167 spin_unlock_irq(&worker->lock);
168 275
169 work->func(work); 276 work->func(work);
170 277
@@ -175,9 +282,13 @@ again_locked:
175 */ 282 */
176 run_ordered_completions(worker->workers, work); 283 run_ordered_completions(worker->workers, work);
177 284
178 spin_lock_irq(&worker->lock); 285 check_pending_worker_creates(worker);
179 check_idle_worker(worker); 286
180 } 287 }
288
289 spin_lock_irq(&worker->lock);
290 check_idle_worker(worker);
291
181 if (freezing(current)) { 292 if (freezing(current)) {
182 worker->working = 0; 293 worker->working = 0;
183 spin_unlock_irq(&worker->lock); 294 spin_unlock_irq(&worker->lock);
@@ -216,8 +327,10 @@ again_locked:
216 spin_lock_irq(&worker->lock); 327 spin_lock_irq(&worker->lock);
217 set_current_state(TASK_INTERRUPTIBLE); 328 set_current_state(TASK_INTERRUPTIBLE);
218 if (!list_empty(&worker->pending) || 329 if (!list_empty(&worker->pending) ||
219 !list_empty(&worker->prio_pending)) 330 !list_empty(&worker->prio_pending)) {
220 goto again_locked; 331 spin_unlock_irq(&worker->lock);
332 goto again;
333 }
221 334
222 /* 335 /*
223 * this makes sure we get a wakeup when someone 336 * this makes sure we get a wakeup when someone
@@ -226,8 +339,13 @@ again_locked:
226 worker->working = 0; 339 worker->working = 0;
227 spin_unlock_irq(&worker->lock); 340 spin_unlock_irq(&worker->lock);
228 341
229 if (!kthread_should_stop()) 342 if (!kthread_should_stop()) {
230 schedule(); 343 schedule_timeout(HZ * 120);
344 if (!worker->working &&
345 try_worker_shutdown(worker)) {
346 return 0;
347 }
348 }
231 } 349 }
232 __set_current_state(TASK_RUNNING); 350 __set_current_state(TASK_RUNNING);
233 } 351 }
@@ -242,16 +360,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
242{ 360{
243 struct list_head *cur; 361 struct list_head *cur;
244 struct btrfs_worker_thread *worker; 362 struct btrfs_worker_thread *worker;
363 int can_stop;
245 364
365 spin_lock_irq(&workers->lock);
246 list_splice_init(&workers->idle_list, &workers->worker_list); 366 list_splice_init(&workers->idle_list, &workers->worker_list);
247 while (!list_empty(&workers->worker_list)) { 367 while (!list_empty(&workers->worker_list)) {
248 cur = workers->worker_list.next; 368 cur = workers->worker_list.next;
249 worker = list_entry(cur, struct btrfs_worker_thread, 369 worker = list_entry(cur, struct btrfs_worker_thread,
250 worker_list); 370 worker_list);
251 kthread_stop(worker->task); 371
252 list_del(&worker->worker_list); 372 atomic_inc(&worker->refs);
253 kfree(worker); 373 workers->num_workers -= 1;
374 if (!list_empty(&worker->worker_list)) {
375 list_del_init(&worker->worker_list);
376 put_worker(worker);
377 can_stop = 1;
378 } else
379 can_stop = 0;
380 spin_unlock_irq(&workers->lock);
381 if (can_stop)
382 kthread_stop(worker->task);
383 spin_lock_irq(&workers->lock);
384 put_worker(worker);
254 } 385 }
386 spin_unlock_irq(&workers->lock);
255 return 0; 387 return 0;
256} 388}
257 389
@@ -266,10 +398,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
266 INIT_LIST_HEAD(&workers->order_list); 398 INIT_LIST_HEAD(&workers->order_list);
267 INIT_LIST_HEAD(&workers->prio_order_list); 399 INIT_LIST_HEAD(&workers->prio_order_list);
268 spin_lock_init(&workers->lock); 400 spin_lock_init(&workers->lock);
401 spin_lock_init(&workers->order_lock);
269 workers->max_workers = max; 402 workers->max_workers = max;
270 workers->idle_thresh = 32; 403 workers->idle_thresh = 32;
271 workers->name = name; 404 workers->name = name;
272 workers->ordered = 0; 405 workers->ordered = 0;
406 workers->atomic_start_pending = 0;
407 workers->atomic_worker_start = 0;
273} 408}
274 409
275/* 410/*
@@ -293,7 +428,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
293 INIT_LIST_HEAD(&worker->prio_pending); 428 INIT_LIST_HEAD(&worker->prio_pending);
294 INIT_LIST_HEAD(&worker->worker_list); 429 INIT_LIST_HEAD(&worker->worker_list);
295 spin_lock_init(&worker->lock); 430 spin_lock_init(&worker->lock);
431
296 atomic_set(&worker->num_pending, 0); 432 atomic_set(&worker->num_pending, 0);
433 atomic_set(&worker->refs, 1);
297 worker->workers = workers; 434 worker->workers = workers;
298 worker->task = kthread_run(worker_loop, worker, 435 worker->task = kthread_run(worker_loop, worker,
299 "btrfs-%s-%d", workers->name, 436 "btrfs-%s-%d", workers->name,
@@ -303,7 +440,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
303 kfree(worker); 440 kfree(worker);
304 goto fail; 441 goto fail;
305 } 442 }
306
307 spin_lock_irq(&workers->lock); 443 spin_lock_irq(&workers->lock);
308 list_add_tail(&worker->worker_list, &workers->idle_list); 444 list_add_tail(&worker->worker_list, &workers->idle_list);
309 worker->idle = 1; 445 worker->idle = 1;
@@ -350,7 +486,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
350 */ 486 */
351 next = workers->worker_list.next; 487 next = workers->worker_list.next;
352 worker = list_entry(next, struct btrfs_worker_thread, worker_list); 488 worker = list_entry(next, struct btrfs_worker_thread, worker_list);
353 atomic_inc(&worker->num_pending);
354 worker->sequence++; 489 worker->sequence++;
355 490
356 if (worker->sequence % workers->idle_thresh == 0) 491 if (worker->sequence % workers->idle_thresh == 0)
@@ -367,28 +502,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
367{ 502{
368 struct btrfs_worker_thread *worker; 503 struct btrfs_worker_thread *worker;
369 unsigned long flags; 504 unsigned long flags;
505 struct list_head *fallback;
370 506
371again: 507again:
372 spin_lock_irqsave(&workers->lock, flags); 508 spin_lock_irqsave(&workers->lock, flags);
373 worker = next_worker(workers); 509 worker = next_worker(workers);
374 spin_unlock_irqrestore(&workers->lock, flags);
375 510
376 if (!worker) { 511 if (!worker) {
377 spin_lock_irqsave(&workers->lock, flags);
378 if (workers->num_workers >= workers->max_workers) { 512 if (workers->num_workers >= workers->max_workers) {
379 struct list_head *fallback = NULL; 513 goto fallback;
380 /* 514 } else if (workers->atomic_worker_start) {
381 * we have failed to find any workers, just 515 workers->atomic_start_pending = 1;
382 * return the force one 516 goto fallback;
383 */
384 if (!list_empty(&workers->worker_list))
385 fallback = workers->worker_list.next;
386 if (!list_empty(&workers->idle_list))
387 fallback = workers->idle_list.next;
388 BUG_ON(!fallback);
389 worker = list_entry(fallback,
390 struct btrfs_worker_thread, worker_list);
391 spin_unlock_irqrestore(&workers->lock, flags);
392 } else { 517 } else {
393 spin_unlock_irqrestore(&workers->lock, flags); 518 spin_unlock_irqrestore(&workers->lock, flags);
394 /* we're below the limit, start another worker */ 519 /* we're below the limit, start another worker */
@@ -396,6 +521,28 @@ again:
396 goto again; 521 goto again;
397 } 522 }
398 } 523 }
524 goto found;
525
526fallback:
527 fallback = NULL;
528 /*
529 * we have failed to find any workers, just
530 * return the first one we can find.
531 */
532 if (!list_empty(&workers->worker_list))
533 fallback = workers->worker_list.next;
534 if (!list_empty(&workers->idle_list))
535 fallback = workers->idle_list.next;
536 BUG_ON(!fallback);
537 worker = list_entry(fallback,
538 struct btrfs_worker_thread, worker_list);
539found:
540 /*
541 * this makes sure the worker doesn't exit before it is placed
542 * onto a busy/idle list
543 */
544 atomic_inc(&worker->num_pending);
545 spin_unlock_irqrestore(&workers->lock, flags);
399 return worker; 546 return worker;
400} 547}
401 548
@@ -427,7 +574,7 @@ int btrfs_requeue_work(struct btrfs_work *work)
427 spin_lock(&worker->workers->lock); 574 spin_lock(&worker->workers->lock);
428 worker->idle = 0; 575 worker->idle = 0;
429 list_move_tail(&worker->worker_list, 576 list_move_tail(&worker->worker_list,
430 &worker->workers->worker_list); 577 &worker->workers->worker_list);
431 spin_unlock(&worker->workers->lock); 578 spin_unlock(&worker->workers->lock);
432 } 579 }
433 if (!worker->working) { 580 if (!worker->working) {
@@ -435,9 +582,9 @@ int btrfs_requeue_work(struct btrfs_work *work)
435 worker->working = 1; 582 worker->working = 1;
436 } 583 }
437 584
438 spin_unlock_irqrestore(&worker->lock, flags);
439 if (wake) 585 if (wake)
440 wake_up_process(worker->task); 586 wake_up_process(worker->task);
587 spin_unlock_irqrestore(&worker->lock, flags);
441out: 588out:
442 589
443 return 0; 590 return 0;
@@ -463,14 +610,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
463 610
464 worker = find_worker(workers); 611 worker = find_worker(workers);
465 if (workers->ordered) { 612 if (workers->ordered) {
466 spin_lock_irqsave(&workers->lock, flags); 613 /*
614 * you're not allowed to do ordered queues from an
615 * interrupt handler
616 */
617 spin_lock(&workers->order_lock);
467 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { 618 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
468 list_add_tail(&work->order_list, 619 list_add_tail(&work->order_list,
469 &workers->prio_order_list); 620 &workers->prio_order_list);
470 } else { 621 } else {
471 list_add_tail(&work->order_list, &workers->order_list); 622 list_add_tail(&work->order_list, &workers->order_list);
472 } 623 }
473 spin_unlock_irqrestore(&workers->lock, flags); 624 spin_unlock(&workers->order_lock);
474 } else { 625 } else {
475 INIT_LIST_HEAD(&work->order_list); 626 INIT_LIST_HEAD(&work->order_list);
476 } 627 }
@@ -481,7 +632,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
481 list_add_tail(&work->list, &worker->prio_pending); 632 list_add_tail(&work->list, &worker->prio_pending);
482 else 633 else
483 list_add_tail(&work->list, &worker->pending); 634 list_add_tail(&work->list, &worker->pending);
484 atomic_inc(&worker->num_pending);
485 check_busy_worker(worker); 635 check_busy_worker(worker);
486 636
487 /* 637 /*
@@ -492,10 +642,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
492 wake = 1; 642 wake = 1;
493 worker->working = 1; 643 worker->working = 1;
494 644
495 spin_unlock_irqrestore(&worker->lock, flags);
496
497 if (wake) 645 if (wake)
498 wake_up_process(worker->task); 646 wake_up_process(worker->task);
647 spin_unlock_irqrestore(&worker->lock, flags);
648
499out: 649out:
500 return 0; 650 return 0;
501} 651}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 1b511c109db6..fc089b95ec14 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -73,6 +73,15 @@ struct btrfs_workers {
73 /* force completions in the order they were queued */ 73 /* force completions in the order they were queued */
74 int ordered; 74 int ordered;
75 75
76 /* more workers required, but in an interrupt handler */
77 int atomic_start_pending;
78
79 /*
80 * are we allowed to sleep while starting workers or are we required
81 * to start them at a later time?
82 */
83 int atomic_worker_start;
84
76 /* list with all the work threads. The workers on the idle thread 85 /* list with all the work threads. The workers on the idle thread
77 * may be actively servicing jobs, but they haven't yet hit the 86 * may be actively servicing jobs, but they haven't yet hit the
78 * idle thresh limit above. 87 * idle thresh limit above.
@@ -90,6 +99,9 @@ struct btrfs_workers {
90 /* lock for finding the next worker thread to queue on */ 99 /* lock for finding the next worker thread to queue on */
91 spinlock_t lock; 100 spinlock_t lock;
92 101
102 /* lock for the ordered lists */
103 spinlock_t order_lock;
104
93 /* extra name for this worker, used for current->name */ 105 /* extra name for this worker, used for current->name */
94 char *name; 106 char *name;
95}; 107};
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ea1ea0af8c0e..82ee56bba299 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -138,6 +138,7 @@ struct btrfs_inode {
138 * of these. 138 * of these.
139 */ 139 */
140 unsigned ordered_data_close:1; 140 unsigned ordered_data_close:1;
141 unsigned dummy_inode:1;
141 142
142 struct inode vfs_inode; 143 struct inode vfs_inode;
143}; 144};
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9d8ba4d54a37..a11a32058b50 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
506 */ 506 */
507 set_page_extent_mapped(page); 507 set_page_extent_mapped(page);
508 lock_extent(tree, last_offset, end, GFP_NOFS); 508 lock_extent(tree, last_offset, end, GFP_NOFS);
509 spin_lock(&em_tree->lock); 509 read_lock(&em_tree->lock);
510 em = lookup_extent_mapping(em_tree, last_offset, 510 em = lookup_extent_mapping(em_tree, last_offset,
511 PAGE_CACHE_SIZE); 511 PAGE_CACHE_SIZE);
512 spin_unlock(&em_tree->lock); 512 read_unlock(&em_tree->lock);
513 513
514 if (!em || last_offset < em->start || 514 if (!em || last_offset < em->start ||
515 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || 515 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
593 em_tree = &BTRFS_I(inode)->extent_tree; 593 em_tree = &BTRFS_I(inode)->extent_tree;
594 594
595 /* we need the actual starting offset of this extent in the file */ 595 /* we need the actual starting offset of this extent in the file */
596 spin_lock(&em_tree->lock); 596 read_lock(&em_tree->lock);
597 em = lookup_extent_mapping(em_tree, 597 em = lookup_extent_mapping(em_tree,
598 page_offset(bio->bi_io_vec->bv_page), 598 page_offset(bio->bi_io_vec->bv_page),
599 PAGE_CACHE_SIZE); 599 PAGE_CACHE_SIZE);
600 spin_unlock(&em_tree->lock); 600 read_unlock(&em_tree->lock);
601 601
602 compressed_len = em->block_len; 602 compressed_len = em->block_len;
603 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 603 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3fdcc0512d3a..ec96f3a6d536 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2853 int split; 2853 int split;
2854 int num_doubles = 0; 2854 int num_doubles = 0;
2855 2855
2856 l = path->nodes[0];
2857 slot = path->slots[0];
2858 if (extend && data_size + btrfs_item_size_nr(l, slot) +
2859 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
2860 return -EOVERFLOW;
2861
2856 /* first try to make some room by pushing left and right */ 2862 /* first try to make some room by pushing left and right */
2857 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { 2863 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2858 wret = push_leaf_right(trans, root, path, data_size, 0); 2864 wret = push_leaf_right(trans, root, path, data_size, 0);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 837435ce84ca..80599b4e42bd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -114,6 +114,10 @@ struct btrfs_ordered_sum;
114 */ 114 */
115#define BTRFS_DEV_ITEMS_OBJECTID 1ULL 115#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
116 116
117#define BTRFS_BTREE_INODE_OBJECTID 1
118
119#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
120
117/* 121/*
118 * we can actually store much bigger names, but lets not confuse the rest 122 * we can actually store much bigger names, but lets not confuse the rest
119 * of linux 123 * of linux
@@ -670,6 +674,7 @@ struct btrfs_space_info {
670 u64 bytes_reserved; /* total bytes the allocator has reserved for 674 u64 bytes_reserved; /* total bytes the allocator has reserved for
671 current allocations */ 675 current allocations */
672 u64 bytes_readonly; /* total bytes that are read only */ 676 u64 bytes_readonly; /* total bytes that are read only */
677 u64 bytes_super; /* total bytes reserved for the super blocks */
673 678
674 /* delalloc accounting */ 679 /* delalloc accounting */
675 u64 bytes_delalloc; /* number of bytes reserved for allocation, 680 u64 bytes_delalloc; /* number of bytes reserved for allocation,
@@ -726,6 +731,15 @@ enum btrfs_caching_type {
726 BTRFS_CACHE_FINISHED = 2, 731 BTRFS_CACHE_FINISHED = 2,
727}; 732};
728 733
734struct btrfs_caching_control {
735 struct list_head list;
736 struct mutex mutex;
737 wait_queue_head_t wait;
738 struct btrfs_block_group_cache *block_group;
739 u64 progress;
740 atomic_t count;
741};
742
729struct btrfs_block_group_cache { 743struct btrfs_block_group_cache {
730 struct btrfs_key key; 744 struct btrfs_key key;
731 struct btrfs_block_group_item item; 745 struct btrfs_block_group_item item;
@@ -733,6 +747,7 @@ struct btrfs_block_group_cache {
733 spinlock_t lock; 747 spinlock_t lock;
734 u64 pinned; 748 u64 pinned;
735 u64 reserved; 749 u64 reserved;
750 u64 bytes_super;
736 u64 flags; 751 u64 flags;
737 u64 sectorsize; 752 u64 sectorsize;
738 int extents_thresh; 753 int extents_thresh;
@@ -742,8 +757,9 @@ struct btrfs_block_group_cache {
742 int dirty; 757 int dirty;
743 758
744 /* cache tracking stuff */ 759 /* cache tracking stuff */
745 wait_queue_head_t caching_q;
746 int cached; 760 int cached;
761 struct btrfs_caching_control *caching_ctl;
762 u64 last_byte_to_unpin;
747 763
748 struct btrfs_space_info *space_info; 764 struct btrfs_space_info *space_info;
749 765
@@ -782,13 +798,16 @@ struct btrfs_fs_info {
782 798
783 /* the log root tree is a directory of all the other log roots */ 799 /* the log root tree is a directory of all the other log roots */
784 struct btrfs_root *log_root_tree; 800 struct btrfs_root *log_root_tree;
801
802 spinlock_t fs_roots_radix_lock;
785 struct radix_tree_root fs_roots_radix; 803 struct radix_tree_root fs_roots_radix;
786 804
787 /* block group cache stuff */ 805 /* block group cache stuff */
788 spinlock_t block_group_cache_lock; 806 spinlock_t block_group_cache_lock;
789 struct rb_root block_group_cache_tree; 807 struct rb_root block_group_cache_tree;
790 808
791 struct extent_io_tree pinned_extents; 809 struct extent_io_tree freed_extents[2];
810 struct extent_io_tree *pinned_extents;
792 811
793 /* logical->physical extent mapping */ 812 /* logical->physical extent mapping */
794 struct btrfs_mapping_tree mapping_tree; 813 struct btrfs_mapping_tree mapping_tree;
@@ -822,11 +841,7 @@ struct btrfs_fs_info {
822 struct mutex transaction_kthread_mutex; 841 struct mutex transaction_kthread_mutex;
823 struct mutex cleaner_mutex; 842 struct mutex cleaner_mutex;
824 struct mutex chunk_mutex; 843 struct mutex chunk_mutex;
825 struct mutex drop_mutex;
826 struct mutex volume_mutex; 844 struct mutex volume_mutex;
827 struct mutex tree_reloc_mutex;
828 struct rw_semaphore extent_commit_sem;
829
830 /* 845 /*
831 * this protects the ordered operations list only while we are 846 * this protects the ordered operations list only while we are
832 * processing all of the entries on it. This way we make 847 * processing all of the entries on it. This way we make
@@ -835,10 +850,16 @@ struct btrfs_fs_info {
835 * before jumping into the main commit. 850 * before jumping into the main commit.
836 */ 851 */
837 struct mutex ordered_operations_mutex; 852 struct mutex ordered_operations_mutex;
853 struct rw_semaphore extent_commit_sem;
854
855 struct rw_semaphore subvol_sem;
856
857 struct srcu_struct subvol_srcu;
838 858
839 struct list_head trans_list; 859 struct list_head trans_list;
840 struct list_head hashers; 860 struct list_head hashers;
841 struct list_head dead_roots; 861 struct list_head dead_roots;
862 struct list_head caching_block_groups;
842 863
843 atomic_t nr_async_submits; 864 atomic_t nr_async_submits;
844 atomic_t async_submit_draining; 865 atomic_t async_submit_draining;
@@ -996,10 +1017,12 @@ struct btrfs_root {
996 u32 stripesize; 1017 u32 stripesize;
997 1018
998 u32 type; 1019 u32 type;
999 u64 highest_inode; 1020
1000 u64 last_inode_alloc; 1021 u64 highest_objectid;
1001 int ref_cows; 1022 int ref_cows;
1002 int track_dirty; 1023 int track_dirty;
1024 int in_radix;
1025
1003 u64 defrag_trans_start; 1026 u64 defrag_trans_start;
1004 struct btrfs_key defrag_progress; 1027 struct btrfs_key defrag_progress;
1005 struct btrfs_key defrag_max; 1028 struct btrfs_key defrag_max;
@@ -1920,8 +1943,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1920int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 1943int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1921 struct btrfs_root *root, unsigned long count); 1944 struct btrfs_root *root, unsigned long count);
1922int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 1945int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
1923int btrfs_update_pinned_extents(struct btrfs_root *root, 1946int btrfs_pin_extent(struct btrfs_root *root,
1924 u64 bytenr, u64 num, int pin); 1947 u64 bytenr, u64 num, int reserved);
1925int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1948int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1926 struct btrfs_root *root, struct extent_buffer *leaf); 1949 struct btrfs_root *root, struct extent_buffer *leaf);
1927int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1950int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
@@ -1971,9 +1994,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
1971 u64 root_objectid, u64 owner, u64 offset); 1994 u64 root_objectid, u64 owner, u64 offset);
1972 1995
1973int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 1996int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
1997int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
1998 struct btrfs_root *root);
1974int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 1999int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1975 struct btrfs_root *root, 2000 struct btrfs_root *root);
1976 struct extent_io_tree *unpin);
1977int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 2001int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1978 struct btrfs_root *root, 2002 struct btrfs_root *root,
1979 u64 bytenr, u64 num_bytes, u64 parent, 2003 u64 bytenr, u64 num_bytes, u64 parent,
@@ -1984,6 +2008,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1984int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 2008int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
1985int btrfs_free_block_groups(struct btrfs_fs_info *info); 2009int btrfs_free_block_groups(struct btrfs_fs_info *info);
1986int btrfs_read_block_groups(struct btrfs_root *root); 2010int btrfs_read_block_groups(struct btrfs_root *root);
2011int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
1987int btrfs_make_block_group(struct btrfs_trans_handle *trans, 2012int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1988 struct btrfs_root *root, u64 bytes_used, 2013 struct btrfs_root *root, u64 bytes_used,
1989 u64 type, u64 chunk_objectid, u64 chunk_offset, 2014 u64 type, u64 chunk_objectid, u64 chunk_offset,
@@ -2006,7 +2031,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
2006 u64 bytes); 2031 u64 bytes);
2007void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2032void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
2008 u64 bytes); 2033 u64 bytes);
2009void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
2010/* ctree.c */ 2034/* ctree.c */
2011int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2035int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2012 int level, int *slot); 2036 int level, int *slot);
@@ -2100,12 +2124,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2100 struct extent_buffer *parent); 2124 struct extent_buffer *parent);
2101/* root-item.c */ 2125/* root-item.c */
2102int btrfs_find_root_ref(struct btrfs_root *tree_root, 2126int btrfs_find_root_ref(struct btrfs_root *tree_root,
2103 struct btrfs_path *path, 2127 struct btrfs_path *path,
2104 u64 root_id, u64 ref_id); 2128 u64 root_id, u64 ref_id);
2105int btrfs_add_root_ref(struct btrfs_trans_handle *trans, 2129int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
2106 struct btrfs_root *tree_root, 2130 struct btrfs_root *tree_root,
2107 u64 root_id, u8 type, u64 ref_id, 2131 u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
2108 u64 dirid, u64 sequence, 2132 const char *name, int name_len);
2133int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
2134 struct btrfs_root *tree_root,
2135 u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
2109 const char *name, int name_len); 2136 const char *name, int name_len);
2110int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2137int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2111 struct btrfs_key *key); 2138 struct btrfs_key *key);
@@ -2120,6 +2147,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2120int btrfs_search_root(struct btrfs_root *root, u64 search_start, 2147int btrfs_search_root(struct btrfs_root *root, u64 search_start,
2121 u64 *found_objectid); 2148 u64 *found_objectid);
2122int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2149int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2150int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2123int btrfs_set_root_node(struct btrfs_root_item *item, 2151int btrfs_set_root_node(struct btrfs_root_item *item,
2124 struct extent_buffer *node); 2152 struct extent_buffer *node);
2125/* dir-item.c */ 2153/* dir-item.c */
@@ -2138,6 +2166,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_path *path, u64 dir, 2166 struct btrfs_path *path, u64 dir,
2139 u64 objectid, const char *name, int name_len, 2167 u64 objectid, const char *name, int name_len,
2140 int mod); 2168 int mod);
2169struct btrfs_dir_item *
2170btrfs_search_dir_index_item(struct btrfs_root *root,
2171 struct btrfs_path *path, u64 dirid,
2172 const char *name, int name_len);
2141struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, 2173struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
2142 struct btrfs_path *path, 2174 struct btrfs_path *path,
2143 const char *name, int name_len); 2175 const char *name, int name_len);
@@ -2160,6 +2192,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
2160 struct btrfs_root *root, u64 offset); 2192 struct btrfs_root *root, u64 offset);
2161int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, 2193int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
2162 struct btrfs_root *root, u64 offset); 2194 struct btrfs_root *root, u64 offset);
2195int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
2163 2196
2164/* inode-map.c */ 2197/* inode-map.c */
2165int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, 2198int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
@@ -2232,6 +2265,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2232int btrfs_add_link(struct btrfs_trans_handle *trans, 2265int btrfs_add_link(struct btrfs_trans_handle *trans,
2233 struct inode *parent_inode, struct inode *inode, 2266 struct inode *parent_inode, struct inode *inode,
2234 const char *name, int name_len, int add_backref, u64 index); 2267 const char *name, int name_len, int add_backref, u64 index);
2268int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2269 struct btrfs_root *root,
2270 struct inode *dir, u64 objectid,
2271 const char *name, int name_len);
2235int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2272int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2236 struct btrfs_root *root, 2273 struct btrfs_root *root,
2237 struct inode *inode, u64 new_size, 2274 struct inode *inode, u64 new_size,
@@ -2242,7 +2279,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2242int btrfs_writepages(struct address_space *mapping, 2279int btrfs_writepages(struct address_space *mapping,
2243 struct writeback_control *wbc); 2280 struct writeback_control *wbc);
2244int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 2281int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2245 struct btrfs_root *new_root, struct dentry *dentry, 2282 struct btrfs_root *new_root,
2246 u64 new_dirid, u64 alloc_hint); 2283 u64 new_dirid, u64 alloc_hint);
2247int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 2284int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2248 size_t size, struct bio *bio, unsigned long bio_flags); 2285 size_t size, struct bio *bio, unsigned long bio_flags);
@@ -2258,6 +2295,7 @@ int btrfs_write_inode(struct inode *inode, int wait);
2258void btrfs_dirty_inode(struct inode *inode); 2295void btrfs_dirty_inode(struct inode *inode);
2259struct inode *btrfs_alloc_inode(struct super_block *sb); 2296struct inode *btrfs_alloc_inode(struct super_block *sb);
2260void btrfs_destroy_inode(struct inode *inode); 2297void btrfs_destroy_inode(struct inode *inode);
2298void btrfs_drop_inode(struct inode *inode);
2261int btrfs_init_cachep(void); 2299int btrfs_init_cachep(void);
2262void btrfs_destroy_cachep(void); 2300void btrfs_destroy_cachep(void);
2263long btrfs_ioctl_trans_end(struct file *file); 2301long btrfs_ioctl_trans_end(struct file *file);
@@ -2275,6 +2313,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
2275int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); 2313int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2276void btrfs_orphan_cleanup(struct btrfs_root *root); 2314void btrfs_orphan_cleanup(struct btrfs_root *root);
2277int btrfs_cont_expand(struct inode *inode, loff_t size); 2315int btrfs_cont_expand(struct inode *inode, loff_t size);
2316int btrfs_invalidate_inodes(struct btrfs_root *root);
2317extern struct dentry_operations btrfs_dentry_operations;
2278 2318
2279/* ioctl.c */ 2319/* ioctl.c */
2280long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 2320long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -2290,7 +2330,7 @@ extern struct file_operations btrfs_file_operations;
2290int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2330int btrfs_drop_extents(struct btrfs_trans_handle *trans,
2291 struct btrfs_root *root, struct inode *inode, 2331 struct btrfs_root *root, struct inode *inode,
2292 u64 start, u64 end, u64 locked_end, 2332 u64 start, u64 end, u64 locked_end,
2293 u64 inline_limit, u64 *hint_block); 2333 u64 inline_limit, u64 *hint_block, int drop_cache);
2294int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2334int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2295 struct btrfs_root *root, 2335 struct btrfs_root *root,
2296 struct inode *inode, u64 start, u64 end); 2336 struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 1d70236ba00c..f3a6075519cc 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
281 return btrfs_match_dir_item_name(root, path, name, name_len); 281 return btrfs_match_dir_item_name(root, path, name, name_len);
282} 282}
283 283
284struct btrfs_dir_item *
285btrfs_search_dir_index_item(struct btrfs_root *root,
286 struct btrfs_path *path, u64 dirid,
287 const char *name, int name_len)
288{
289 struct extent_buffer *leaf;
290 struct btrfs_dir_item *di;
291 struct btrfs_key key;
292 u32 nritems;
293 int ret;
294
295 key.objectid = dirid;
296 key.type = BTRFS_DIR_INDEX_KEY;
297 key.offset = 0;
298
299 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
300 if (ret < 0)
301 return ERR_PTR(ret);
302
303 leaf = path->nodes[0];
304 nritems = btrfs_header_nritems(leaf);
305
306 while (1) {
307 if (path->slots[0] >= nritems) {
308 ret = btrfs_next_leaf(root, path);
309 if (ret < 0)
310 return ERR_PTR(ret);
311 if (ret > 0)
312 break;
313 leaf = path->nodes[0];
314 nritems = btrfs_header_nritems(leaf);
315 continue;
316 }
317
318 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
319 if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
320 break;
321
322 di = btrfs_match_dir_item_name(root, path, name, name_len);
323 if (di)
324 return di;
325
326 path->slots[0]++;
327 }
328 return NULL;
329}
330
284struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, 331struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
285 struct btrfs_root *root, 332 struct btrfs_root *root,
286 struct btrfs_path *path, u64 dir, 333 struct btrfs_path *path, u64 dir,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6c4173146bb7..644e796fd643 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -41,6 +41,7 @@
41 41
42static struct extent_io_ops btree_extent_io_ops; 42static struct extent_io_ops btree_extent_io_ops;
43static void end_workqueue_fn(struct btrfs_work *work); 43static void end_workqueue_fn(struct btrfs_work *work);
44static void free_fs_root(struct btrfs_root *root);
44 45
45static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); 46static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
46 47
@@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
123 struct extent_map *em; 124 struct extent_map *em;
124 int ret; 125 int ret;
125 126
126 spin_lock(&em_tree->lock); 127 read_lock(&em_tree->lock);
127 em = lookup_extent_mapping(em_tree, start, len); 128 em = lookup_extent_mapping(em_tree, start, len);
128 if (em) { 129 if (em) {
129 em->bdev = 130 em->bdev =
130 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 131 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
131 spin_unlock(&em_tree->lock); 132 read_unlock(&em_tree->lock);
132 goto out; 133 goto out;
133 } 134 }
134 spin_unlock(&em_tree->lock); 135 read_unlock(&em_tree->lock);
135 136
136 em = alloc_extent_map(GFP_NOFS); 137 em = alloc_extent_map(GFP_NOFS);
137 if (!em) { 138 if (!em) {
@@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
144 em->block_start = 0; 145 em->block_start = 0;
145 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 146 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
146 147
147 spin_lock(&em_tree->lock); 148 write_lock(&em_tree->lock);
148 ret = add_extent_mapping(em_tree, em); 149 ret = add_extent_mapping(em_tree, em);
149 if (ret == -EEXIST) { 150 if (ret == -EEXIST) {
150 u64 failed_start = em->start; 151 u64 failed_start = em->start;
@@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
163 free_extent_map(em); 164 free_extent_map(em);
164 em = NULL; 165 em = NULL;
165 } 166 }
166 spin_unlock(&em_tree->lock); 167 write_unlock(&em_tree->lock);
167 168
168 if (ret) 169 if (ret)
169 em = ERR_PTR(ret); 170 em = ERR_PTR(ret);
@@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
895 root->fs_info = fs_info; 896 root->fs_info = fs_info;
896 root->objectid = objectid; 897 root->objectid = objectid;
897 root->last_trans = 0; 898 root->last_trans = 0;
898 root->highest_inode = 0; 899 root->highest_objectid = 0;
899 root->last_inode_alloc = 0;
900 root->name = NULL; 900 root->name = NULL;
901 root->in_sysfs = 0; 901 root->in_sysfs = 0;
902 root->inode_tree.rb_node = NULL; 902 root->inode_tree.rb_node = NULL;
@@ -952,14 +952,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
952 root, fs_info, objectid); 952 root, fs_info, objectid);
953 ret = btrfs_find_last_root(tree_root, objectid, 953 ret = btrfs_find_last_root(tree_root, objectid,
954 &root->root_item, &root->root_key); 954 &root->root_item, &root->root_key);
955 if (ret > 0)
956 return -ENOENT;
955 BUG_ON(ret); 957 BUG_ON(ret);
956 958
957 generation = btrfs_root_generation(&root->root_item); 959 generation = btrfs_root_generation(&root->root_item);
958 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 960 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
959 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 961 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
960 blocksize, generation); 962 blocksize, generation);
961 root->commit_root = btrfs_root_node(root);
962 BUG_ON(!root->node); 963 BUG_ON(!root->node);
964 root->commit_root = btrfs_root_node(root);
963 return 0; 965 return 0;
964} 966}
965 967
@@ -1095,7 +1097,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1095 struct btrfs_fs_info *fs_info = tree_root->fs_info; 1097 struct btrfs_fs_info *fs_info = tree_root->fs_info;
1096 struct btrfs_path *path; 1098 struct btrfs_path *path;
1097 struct extent_buffer *l; 1099 struct extent_buffer *l;
1098 u64 highest_inode;
1099 u64 generation; 1100 u64 generation;
1100 u32 blocksize; 1101 u32 blocksize;
1101 int ret = 0; 1102 int ret = 0;
@@ -1110,7 +1111,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1110 kfree(root); 1111 kfree(root);
1111 return ERR_PTR(ret); 1112 return ERR_PTR(ret);
1112 } 1113 }
1113 goto insert; 1114 goto out;
1114 } 1115 }
1115 1116
1116 __setup_root(tree_root->nodesize, tree_root->leafsize, 1117 __setup_root(tree_root->nodesize, tree_root->leafsize,
@@ -1120,39 +1121,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1120 path = btrfs_alloc_path(); 1121 path = btrfs_alloc_path();
1121 BUG_ON(!path); 1122 BUG_ON(!path);
1122 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); 1123 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
1123 if (ret != 0) { 1124 if (ret == 0) {
1124 if (ret > 0) 1125 l = path->nodes[0];
1125 ret = -ENOENT; 1126 read_extent_buffer(l, &root->root_item,
1126 goto out; 1127 btrfs_item_ptr_offset(l, path->slots[0]),
1128 sizeof(root->root_item));
1129 memcpy(&root->root_key, location, sizeof(*location));
1127 } 1130 }
1128 l = path->nodes[0];
1129 read_extent_buffer(l, &root->root_item,
1130 btrfs_item_ptr_offset(l, path->slots[0]),
1131 sizeof(root->root_item));
1132 memcpy(&root->root_key, location, sizeof(*location));
1133 ret = 0;
1134out:
1135 btrfs_release_path(root, path);
1136 btrfs_free_path(path); 1131 btrfs_free_path(path);
1137 if (ret) { 1132 if (ret) {
1138 kfree(root); 1133 if (ret > 0)
1134 ret = -ENOENT;
1139 return ERR_PTR(ret); 1135 return ERR_PTR(ret);
1140 } 1136 }
1137
1141 generation = btrfs_root_generation(&root->root_item); 1138 generation = btrfs_root_generation(&root->root_item);
1142 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1139 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1143 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1140 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1144 blocksize, generation); 1141 blocksize, generation);
1145 root->commit_root = btrfs_root_node(root); 1142 root->commit_root = btrfs_root_node(root);
1146 BUG_ON(!root->node); 1143 BUG_ON(!root->node);
1147insert: 1144out:
1148 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1145 if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
1149 root->ref_cows = 1; 1146 root->ref_cows = 1;
1150 ret = btrfs_find_highest_inode(root, &highest_inode); 1147
1151 if (ret == 0) {
1152 root->highest_inode = highest_inode;
1153 root->last_inode_alloc = highest_inode;
1154 }
1155 }
1156 return root; 1148 return root;
1157} 1149}
1158 1150
@@ -1187,39 +1179,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1187 return fs_info->dev_root; 1179 return fs_info->dev_root;
1188 if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) 1180 if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
1189 return fs_info->csum_root; 1181 return fs_info->csum_root;
1190 1182again:
1183 spin_lock(&fs_info->fs_roots_radix_lock);
1191 root = radix_tree_lookup(&fs_info->fs_roots_radix, 1184 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1192 (unsigned long)location->objectid); 1185 (unsigned long)location->objectid);
1186 spin_unlock(&fs_info->fs_roots_radix_lock);
1193 if (root) 1187 if (root)
1194 return root; 1188 return root;
1195 1189
1190 ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
1191 if (ret == 0)
1192 ret = -ENOENT;
1193 if (ret < 0)
1194 return ERR_PTR(ret);
1195
1196 root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); 1196 root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
1197 if (IS_ERR(root)) 1197 if (IS_ERR(root))
1198 return root; 1198 return root;
1199 1199
1200 WARN_ON(btrfs_root_refs(&root->root_item) == 0);
1200 set_anon_super(&root->anon_super, NULL); 1201 set_anon_super(&root->anon_super, NULL);
1201 1202
1203 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
1204 if (ret)
1205 goto fail;
1206
1207 spin_lock(&fs_info->fs_roots_radix_lock);
1202 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1208 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1203 (unsigned long)root->root_key.objectid, 1209 (unsigned long)root->root_key.objectid,
1204 root); 1210 root);
1211 if (ret == 0)
1212 root->in_radix = 1;
1213 spin_unlock(&fs_info->fs_roots_radix_lock);
1214 radix_tree_preload_end();
1205 if (ret) { 1215 if (ret) {
1206 free_extent_buffer(root->node); 1216 if (ret == -EEXIST) {
1207 kfree(root); 1217 free_fs_root(root);
1208 return ERR_PTR(ret); 1218 goto again;
1219 }
1220 goto fail;
1209 } 1221 }
1210 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 1222
1211 ret = btrfs_find_dead_roots(fs_info->tree_root, 1223 ret = btrfs_find_dead_roots(fs_info->tree_root,
1212 root->root_key.objectid); 1224 root->root_key.objectid);
1213 BUG_ON(ret); 1225 WARN_ON(ret);
1226
1227 if (!(fs_info->sb->s_flags & MS_RDONLY))
1214 btrfs_orphan_cleanup(root); 1228 btrfs_orphan_cleanup(root);
1215 } 1229
1216 return root; 1230 return root;
1231fail:
1232 free_fs_root(root);
1233 return ERR_PTR(ret);
1217} 1234}
1218 1235
1219struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, 1236struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
1220 struct btrfs_key *location, 1237 struct btrfs_key *location,
1221 const char *name, int namelen) 1238 const char *name, int namelen)
1222{ 1239{
1240 return btrfs_read_fs_root_no_name(fs_info, location);
1241#if 0
1223 struct btrfs_root *root; 1242 struct btrfs_root *root;
1224 int ret; 1243 int ret;
1225 1244
@@ -1236,7 +1255,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
1236 kfree(root); 1255 kfree(root);
1237 return ERR_PTR(ret); 1256 return ERR_PTR(ret);
1238 } 1257 }
1239#if 0 1258
1240 ret = btrfs_sysfs_add_root(root); 1259 ret = btrfs_sysfs_add_root(root);
1241 if (ret) { 1260 if (ret) {
1242 free_extent_buffer(root->node); 1261 free_extent_buffer(root->node);
@@ -1244,9 +1263,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
1244 kfree(root); 1263 kfree(root);
1245 return ERR_PTR(ret); 1264 return ERR_PTR(ret);
1246 } 1265 }
1247#endif
1248 root->in_sysfs = 1; 1266 root->in_sysfs = 1;
1249 return root; 1267 return root;
1268#endif
1250} 1269}
1251 1270
1252static int btrfs_congested_fn(void *congested_data, int bdi_bits) 1271static int btrfs_congested_fn(void *congested_data, int bdi_bits)
@@ -1325,9 +1344,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1325 offset = page_offset(page); 1344 offset = page_offset(page);
1326 1345
1327 em_tree = &BTRFS_I(inode)->extent_tree; 1346 em_tree = &BTRFS_I(inode)->extent_tree;
1328 spin_lock(&em_tree->lock); 1347 read_lock(&em_tree->lock);
1329 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); 1348 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1330 spin_unlock(&em_tree->lock); 1349 read_unlock(&em_tree->lock);
1331 if (!em) { 1350 if (!em) {
1332 __unplug_io_fn(bdi, page); 1351 __unplug_io_fn(bdi, page);
1333 return; 1352 return;
@@ -1360,8 +1379,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1360 1379
1361 err = bdi_register(bdi, NULL, "btrfs-%d", 1380 err = bdi_register(bdi, NULL, "btrfs-%d",
1362 atomic_inc_return(&btrfs_bdi_num)); 1381 atomic_inc_return(&btrfs_bdi_num));
1363 if (err) 1382 if (err) {
1383 bdi_destroy(bdi);
1364 return err; 1384 return err;
1385 }
1365 1386
1366 bdi->ra_pages = default_backing_dev_info.ra_pages; 1387 bdi->ra_pages = default_backing_dev_info.ra_pages;
1367 bdi->unplug_io_fn = btrfs_unplug_io_fn; 1388 bdi->unplug_io_fn = btrfs_unplug_io_fn;
@@ -1451,9 +1472,12 @@ static int cleaner_kthread(void *arg)
1451 break; 1472 break;
1452 1473
1453 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1474 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1454 mutex_lock(&root->fs_info->cleaner_mutex); 1475
1455 btrfs_clean_old_snapshots(root); 1476 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1456 mutex_unlock(&root->fs_info->cleaner_mutex); 1477 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1478 btrfs_clean_old_snapshots(root);
1479 mutex_unlock(&root->fs_info->cleaner_mutex);
1480 }
1457 1481
1458 if (freezing(current)) { 1482 if (freezing(current)) {
1459 refrigerator(); 1483 refrigerator();
@@ -1558,15 +1582,36 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1558 err = -ENOMEM; 1582 err = -ENOMEM;
1559 goto fail; 1583 goto fail;
1560 } 1584 }
1561 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); 1585
1586 ret = init_srcu_struct(&fs_info->subvol_srcu);
1587 if (ret) {
1588 err = ret;
1589 goto fail;
1590 }
1591
1592 ret = setup_bdi(fs_info, &fs_info->bdi);
1593 if (ret) {
1594 err = ret;
1595 goto fail_srcu;
1596 }
1597
1598 fs_info->btree_inode = new_inode(sb);
1599 if (!fs_info->btree_inode) {
1600 err = -ENOMEM;
1601 goto fail_bdi;
1602 }
1603
1604 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1562 INIT_LIST_HEAD(&fs_info->trans_list); 1605 INIT_LIST_HEAD(&fs_info->trans_list);
1563 INIT_LIST_HEAD(&fs_info->dead_roots); 1606 INIT_LIST_HEAD(&fs_info->dead_roots);
1564 INIT_LIST_HEAD(&fs_info->hashers); 1607 INIT_LIST_HEAD(&fs_info->hashers);
1565 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1608 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1566 INIT_LIST_HEAD(&fs_info->ordered_operations); 1609 INIT_LIST_HEAD(&fs_info->ordered_operations);
1610 INIT_LIST_HEAD(&fs_info->caching_block_groups);
1567 spin_lock_init(&fs_info->delalloc_lock); 1611 spin_lock_init(&fs_info->delalloc_lock);
1568 spin_lock_init(&fs_info->new_trans_lock); 1612 spin_lock_init(&fs_info->new_trans_lock);
1569 spin_lock_init(&fs_info->ref_cache_lock); 1613 spin_lock_init(&fs_info->ref_cache_lock);
1614 spin_lock_init(&fs_info->fs_roots_radix_lock);
1570 1615
1571 init_completion(&fs_info->kobj_unregister); 1616 init_completion(&fs_info->kobj_unregister);
1572 fs_info->tree_root = tree_root; 1617 fs_info->tree_root = tree_root;
@@ -1585,11 +1630,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1585 fs_info->sb = sb; 1630 fs_info->sb = sb;
1586 fs_info->max_extent = (u64)-1; 1631 fs_info->max_extent = (u64)-1;
1587 fs_info->max_inline = 8192 * 1024; 1632 fs_info->max_inline = 8192 * 1024;
1588 if (setup_bdi(fs_info, &fs_info->bdi))
1589 goto fail_bdi;
1590 fs_info->btree_inode = new_inode(sb);
1591 fs_info->btree_inode->i_ino = 1;
1592 fs_info->btree_inode->i_nlink = 1;
1593 fs_info->metadata_ratio = 8; 1633 fs_info->metadata_ratio = 8;
1594 1634
1595 fs_info->thread_pool_size = min_t(unsigned long, 1635 fs_info->thread_pool_size = min_t(unsigned long,
@@ -1602,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1602 sb->s_blocksize_bits = blksize_bits(4096); 1642 sb->s_blocksize_bits = blksize_bits(4096);
1603 sb->s_bdi = &fs_info->bdi; 1643 sb->s_bdi = &fs_info->bdi;
1604 1644
1645 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
1646 fs_info->btree_inode->i_nlink = 1;
1605 /* 1647 /*
1606 * we set the i_size on the btree inode to the max possible int. 1648 * we set the i_size on the btree inode to the max possible int.
1607 * the real end of the address space is determined by all of 1649 * the real end of the address space is determined by all of
@@ -1620,28 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1620 1662
1621 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1663 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1622 1664
1665 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1666 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1667 sizeof(struct btrfs_key));
1668 BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
1669 insert_inode_hash(fs_info->btree_inode);
1670
1623 spin_lock_init(&fs_info->block_group_cache_lock); 1671 spin_lock_init(&fs_info->block_group_cache_lock);
1624 fs_info->block_group_cache_tree.rb_node = NULL; 1672 fs_info->block_group_cache_tree.rb_node = NULL;
1625 1673
1626 extent_io_tree_init(&fs_info->pinned_extents, 1674 extent_io_tree_init(&fs_info->freed_extents[0],
1627 fs_info->btree_inode->i_mapping, GFP_NOFS); 1675 fs_info->btree_inode->i_mapping, GFP_NOFS);
1676 extent_io_tree_init(&fs_info->freed_extents[1],
1677 fs_info->btree_inode->i_mapping, GFP_NOFS);
1678 fs_info->pinned_extents = &fs_info->freed_extents[0];
1628 fs_info->do_barriers = 1; 1679 fs_info->do_barriers = 1;
1629 1680
1630 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1631 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1632 sizeof(struct btrfs_key));
1633 insert_inode_hash(fs_info->btree_inode);
1634 1681
1635 mutex_init(&fs_info->trans_mutex); 1682 mutex_init(&fs_info->trans_mutex);
1636 mutex_init(&fs_info->ordered_operations_mutex); 1683 mutex_init(&fs_info->ordered_operations_mutex);
1637 mutex_init(&fs_info->tree_log_mutex); 1684 mutex_init(&fs_info->tree_log_mutex);
1638 mutex_init(&fs_info->drop_mutex);
1639 mutex_init(&fs_info->chunk_mutex); 1685 mutex_init(&fs_info->chunk_mutex);
1640 mutex_init(&fs_info->transaction_kthread_mutex); 1686 mutex_init(&fs_info->transaction_kthread_mutex);
1641 mutex_init(&fs_info->cleaner_mutex); 1687 mutex_init(&fs_info->cleaner_mutex);
1642 mutex_init(&fs_info->volume_mutex); 1688 mutex_init(&fs_info->volume_mutex);
1643 mutex_init(&fs_info->tree_reloc_mutex);
1644 init_rwsem(&fs_info->extent_commit_sem); 1689 init_rwsem(&fs_info->extent_commit_sem);
1690 init_rwsem(&fs_info->subvol_sem);
1645 1691
1646 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1692 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
1647 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 1693 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -1700,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1700 err = -EINVAL; 1746 err = -EINVAL;
1701 goto fail_iput; 1747 goto fail_iput;
1702 } 1748 }
1703 1749printk("thread pool is %d\n", fs_info->thread_pool_size);
1704 /* 1750 /*
1705 * we need to start all the end_io workers up front because the 1751 * we need to start all the end_io workers up front because the
1706 * queue work function gets called at interrupt time, and so it 1752 * queue work function gets called at interrupt time, and so it
@@ -1745,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1745 fs_info->endio_workers.idle_thresh = 4; 1791 fs_info->endio_workers.idle_thresh = 4;
1746 fs_info->endio_meta_workers.idle_thresh = 4; 1792 fs_info->endio_meta_workers.idle_thresh = 4;
1747 1793
1748 fs_info->endio_write_workers.idle_thresh = 64; 1794 fs_info->endio_write_workers.idle_thresh = 2;
1749 fs_info->endio_meta_write_workers.idle_thresh = 64; 1795 fs_info->endio_meta_write_workers.idle_thresh = 2;
1796
1797 fs_info->endio_workers.atomic_worker_start = 1;
1798 fs_info->endio_meta_workers.atomic_worker_start = 1;
1799 fs_info->endio_write_workers.atomic_worker_start = 1;
1800 fs_info->endio_meta_write_workers.atomic_worker_start = 1;
1750 1801
1751 btrfs_start_workers(&fs_info->workers, 1); 1802 btrfs_start_workers(&fs_info->workers, 1);
1752 btrfs_start_workers(&fs_info->submit_workers, 1); 1803 btrfs_start_workers(&fs_info->submit_workers, 1);
1753 btrfs_start_workers(&fs_info->delalloc_workers, 1); 1804 btrfs_start_workers(&fs_info->delalloc_workers, 1);
1754 btrfs_start_workers(&fs_info->fixup_workers, 1); 1805 btrfs_start_workers(&fs_info->fixup_workers, 1);
1755 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1806 btrfs_start_workers(&fs_info->endio_workers, 1);
1756 btrfs_start_workers(&fs_info->endio_meta_workers, 1807 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1757 fs_info->thread_pool_size); 1808 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1758 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1809 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1759 fs_info->thread_pool_size);
1760 btrfs_start_workers(&fs_info->endio_write_workers,
1761 fs_info->thread_pool_size);
1762 1810
1763 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1811 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1764 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1812 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1918,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1918 } 1966 }
1919 } 1967 }
1920 1968
1969 ret = btrfs_find_orphan_roots(tree_root);
1970 BUG_ON(ret);
1971
1921 if (!(sb->s_flags & MS_RDONLY)) { 1972 if (!(sb->s_flags & MS_RDONLY)) {
1922 ret = btrfs_recover_relocation(tree_root); 1973 ret = btrfs_recover_relocation(tree_root);
1923 BUG_ON(ret); 1974 BUG_ON(ret);
@@ -1977,6 +2028,8 @@ fail_iput:
1977 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2028 btrfs_mapping_tree_free(&fs_info->mapping_tree);
1978fail_bdi: 2029fail_bdi:
1979 bdi_destroy(&fs_info->bdi); 2030 bdi_destroy(&fs_info->bdi);
2031fail_srcu:
2032 cleanup_srcu_struct(&fs_info->subvol_srcu);
1980fail: 2033fail:
1981 kfree(extent_root); 2034 kfree(extent_root);
1982 kfree(tree_root); 2035 kfree(tree_root);
@@ -2236,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2236 2289
2237int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2290int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2238{ 2291{
2239 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2292 spin_lock(&fs_info->fs_roots_radix_lock);
2240 radix_tree_delete(&fs_info->fs_roots_radix, 2293 radix_tree_delete(&fs_info->fs_roots_radix,
2241 (unsigned long)root->root_key.objectid); 2294 (unsigned long)root->root_key.objectid);
2295 spin_unlock(&fs_info->fs_roots_radix_lock);
2296
2297 if (btrfs_root_refs(&root->root_item) == 0)
2298 synchronize_srcu(&fs_info->subvol_srcu);
2299
2300 free_fs_root(root);
2301 return 0;
2302}
2303
2304static void free_fs_root(struct btrfs_root *root)
2305{
2306 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2242 if (root->anon_super.s_dev) { 2307 if (root->anon_super.s_dev) {
2243 down_write(&root->anon_super.s_umount); 2308 down_write(&root->anon_super.s_umount);
2244 kill_anon_super(&root->anon_super); 2309 kill_anon_super(&root->anon_super);
2245 } 2310 }
2246 if (root->node) 2311 free_extent_buffer(root->node);
2247 free_extent_buffer(root->node); 2312 free_extent_buffer(root->commit_root);
2248 if (root->commit_root)
2249 free_extent_buffer(root->commit_root);
2250 kfree(root->name); 2313 kfree(root->name);
2251 kfree(root); 2314 kfree(root);
2252 return 0;
2253} 2315}
2254 2316
2255static int del_fs_roots(struct btrfs_fs_info *fs_info) 2317static int del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2258,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
2258 struct btrfs_root *gang[8]; 2320 struct btrfs_root *gang[8];
2259 int i; 2321 int i;
2260 2322
2323 while (!list_empty(&fs_info->dead_roots)) {
2324 gang[0] = list_entry(fs_info->dead_roots.next,
2325 struct btrfs_root, root_list);
2326 list_del(&gang[0]->root_list);
2327
2328 if (gang[0]->in_radix) {
2329 btrfs_free_fs_root(fs_info, gang[0]);
2330 } else {
2331 free_extent_buffer(gang[0]->node);
2332 free_extent_buffer(gang[0]->commit_root);
2333 kfree(gang[0]);
2334 }
2335 }
2336
2261 while (1) { 2337 while (1) {
2262 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, 2338 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
2263 (void **)gang, 0, 2339 (void **)gang, 0,
@@ -2287,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2287 root_objectid = gang[ret - 1]->root_key.objectid + 1; 2363 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2288 for (i = 0; i < ret; i++) { 2364 for (i = 0; i < ret; i++) {
2289 root_objectid = gang[i]->root_key.objectid; 2365 root_objectid = gang[i]->root_key.objectid;
2290 ret = btrfs_find_dead_roots(fs_info->tree_root,
2291 root_objectid);
2292 BUG_ON(ret);
2293 btrfs_orphan_cleanup(gang[i]); 2366 btrfs_orphan_cleanup(gang[i]);
2294 } 2367 }
2295 root_objectid++; 2368 root_objectid++;
@@ -2359,7 +2432,6 @@ int close_ctree(struct btrfs_root *root)
2359 free_extent_buffer(root->fs_info->csum_root->commit_root); 2432 free_extent_buffer(root->fs_info->csum_root->commit_root);
2360 2433
2361 btrfs_free_block_groups(root->fs_info); 2434 btrfs_free_block_groups(root->fs_info);
2362 btrfs_free_pinned_extents(root->fs_info);
2363 2435
2364 del_fs_roots(fs_info); 2436 del_fs_roots(fs_info);
2365 2437
@@ -2378,6 +2450,7 @@ int close_ctree(struct btrfs_root *root)
2378 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2450 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2379 2451
2380 bdi_destroy(&fs_info->bdi); 2452 bdi_destroy(&fs_info->bdi);
2453 cleanup_srcu_struct(&fs_info->subvol_srcu);
2381 2454
2382 kfree(fs_info->extent_root); 2455 kfree(fs_info->extent_root);
2383 kfree(fs_info->tree_root); 2456 kfree(fs_info->tree_root);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 9596b40caa4e..ba5c3fd5ab8c 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 28 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 29 type = FILEID_BTRFS_WITHOUT_PARENT;
30 30
31 fid->objectid = BTRFS_I(inode)->location.objectid; 31 fid->objectid = inode->i_ino;
32 fid->root_objectid = BTRFS_I(inode)->root->objectid; 32 fid->root_objectid = BTRFS_I(inode)->root->objectid;
33 fid->gen = inode->i_generation; 33 fid->gen = inode->i_generation;
34 34
@@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
60} 60}
61 61
62static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, 62static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
63 u64 root_objectid, u32 generation) 63 u64 root_objectid, u32 generation,
64 int check_generation)
64{ 65{
66 struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info;
65 struct btrfs_root *root; 67 struct btrfs_root *root;
68 struct dentry *dentry;
66 struct inode *inode; 69 struct inode *inode;
67 struct btrfs_key key; 70 struct btrfs_key key;
71 int index;
72 int err = 0;
73
74 if (objectid < BTRFS_FIRST_FREE_OBJECTID)
75 return ERR_PTR(-ESTALE);
68 76
69 key.objectid = root_objectid; 77 key.objectid = root_objectid;
70 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 78 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
71 key.offset = (u64)-1; 79 key.offset = (u64)-1;
72 80
73 root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); 81 index = srcu_read_lock(&fs_info->subvol_srcu);
74 if (IS_ERR(root)) 82
75 return ERR_CAST(root); 83 root = btrfs_read_fs_root_no_name(fs_info, &key);
84 if (IS_ERR(root)) {
85 err = PTR_ERR(root);
86 goto fail;
87 }
88
89 if (btrfs_root_refs(&root->root_item) == 0) {
90 err = -ENOENT;
91 goto fail;
92 }
76 93
77 key.objectid = objectid; 94 key.objectid = objectid;
78 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 95 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
79 key.offset = 0; 96 key.offset = 0;
80 97
81 inode = btrfs_iget(sb, &key, root); 98 inode = btrfs_iget(sb, &key, root);
82 if (IS_ERR(inode)) 99 if (IS_ERR(inode)) {
83 return (void *)inode; 100 err = PTR_ERR(inode);
101 goto fail;
102 }
103
104 srcu_read_unlock(&fs_info->subvol_srcu, index);
84 105
85 if (generation != inode->i_generation) { 106 if (check_generation && generation != inode->i_generation) {
86 iput(inode); 107 iput(inode);
87 return ERR_PTR(-ESTALE); 108 return ERR_PTR(-ESTALE);
88 } 109 }
89 110
90 return d_obtain_alias(inode); 111 dentry = d_obtain_alias(inode);
112 if (!IS_ERR(dentry))
113 dentry->d_op = &btrfs_dentry_operations;
114 return dentry;
115fail:
116 srcu_read_unlock(&fs_info->subvol_srcu, index);
117 return ERR_PTR(err);
91} 118}
92 119
93static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, 120static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
@@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
111 objectid = fid->parent_objectid; 138 objectid = fid->parent_objectid;
112 generation = fid->parent_gen; 139 generation = fid->parent_gen;
113 140
114 return btrfs_get_dentry(sb, objectid, root_objectid, generation); 141 return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
115} 142}
116 143
117static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, 144static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
@@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
133 root_objectid = fid->root_objectid; 160 root_objectid = fid->root_objectid;
134 generation = fid->gen; 161 generation = fid->gen;
135 162
136 return btrfs_get_dentry(sb, objectid, root_objectid, generation); 163 return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
137} 164}
138 165
139static struct dentry *btrfs_get_parent(struct dentry *child) 166static struct dentry *btrfs_get_parent(struct dentry *child)
140{ 167{
141 struct inode *dir = child->d_inode; 168 struct inode *dir = child->d_inode;
169 static struct dentry *dentry;
142 struct btrfs_root *root = BTRFS_I(dir)->root; 170 struct btrfs_root *root = BTRFS_I(dir)->root;
143 struct btrfs_key key;
144 struct btrfs_path *path; 171 struct btrfs_path *path;
145 struct extent_buffer *leaf; 172 struct extent_buffer *leaf;
146 int slot; 173 struct btrfs_root_ref *ref;
147 u64 objectid; 174 struct btrfs_key key;
175 struct btrfs_key found_key;
148 int ret; 176 int ret;
149 177
150 path = btrfs_alloc_path(); 178 path = btrfs_alloc_path();
151 179
152 key.objectid = dir->i_ino; 180 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
153 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); 181 key.objectid = root->root_key.objectid;
154 key.offset = (u64)-1; 182 key.type = BTRFS_ROOT_BACKREF_KEY;
183 key.offset = (u64)-1;
184 root = root->fs_info->tree_root;
185 } else {
186 key.objectid = dir->i_ino;
187 key.type = BTRFS_INODE_REF_KEY;
188 key.offset = (u64)-1;
189 }
155 190
156 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 191 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
157 if (ret < 0) { 192 if (ret < 0)
158 /* Error */ 193 goto fail;
159 btrfs_free_path(path); 194
160 return ERR_PTR(ret); 195 BUG_ON(ret == 0);
196 if (path->slots[0] == 0) {
197 ret = -ENOENT;
198 goto fail;
161 } 199 }
200
201 path->slots[0]--;
162 leaf = path->nodes[0]; 202 leaf = path->nodes[0];
163 slot = path->slots[0]; 203
164 if (ret) { 204 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
165 /* btrfs_search_slot() returns the slot where we'd want to 205 if (found_key.objectid != key.objectid || found_key.type != key.type) {
166 insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. 206 ret = -ENOENT;
167 The _real_ backref, telling us what the parent inode 207 goto fail;
168 _actually_ is, will be in the slot _before_ the one
169 that btrfs_search_slot() returns. */
170 if (!slot) {
171 /* Unless there is _no_ key in the tree before... */
172 btrfs_free_path(path);
173 return ERR_PTR(-EIO);
174 }
175 slot--;
176 } 208 }
177 209
178 btrfs_item_key_to_cpu(leaf, &key, slot); 210 if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
211 ref = btrfs_item_ptr(leaf, path->slots[0],
212 struct btrfs_root_ref);
213 key.objectid = btrfs_root_ref_dirid(leaf, ref);
214 } else {
215 key.objectid = found_key.offset;
216 }
179 btrfs_free_path(path); 217 btrfs_free_path(path);
180 218
181 if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) 219 if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
182 return ERR_PTR(-EINVAL); 220 return btrfs_get_dentry(root->fs_info->sb, key.objectid,
183 221 found_key.offset, 0, 0);
184 objectid = key.offset; 222 }
185
186 /* If we are already at the root of a subvol, return the real root */
187 if (objectid == dir->i_ino)
188 return dget(dir->i_sb->s_root);
189 223
190 /* Build a new key for the inode item */ 224 key.type = BTRFS_INODE_ITEM_KEY;
191 key.objectid = objectid;
192 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
193 key.offset = 0; 225 key.offset = 0;
194 226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
195 return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); 227 if (!IS_ERR(dentry))
228 dentry->d_op = &btrfs_dentry_operations;
229 return dentry;
230fail:
231 btrfs_free_path(path);
232 return ERR_PTR(ret);
196} 233}
197 234
198const struct export_operations btrfs_export_ops = { 235const struct export_operations btrfs_export_ops = {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 535f85ba104f..993f93ff7ba6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -32,12 +32,12 @@
32#include "locking.h" 32#include "locking.h"
33#include "free-space-cache.h" 33#include "free-space-cache.h"
34 34
35static int update_reserved_extents(struct btrfs_root *root,
36 u64 bytenr, u64 num, int reserve);
37static int update_block_group(struct btrfs_trans_handle *trans, 35static int update_block_group(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 36 struct btrfs_root *root,
39 u64 bytenr, u64 num_bytes, int alloc, 37 u64 bytenr, u64 num_bytes, int alloc,
40 int mark_free); 38 int mark_free);
39static int update_reserved_extents(struct btrfs_block_group_cache *cache,
40 u64 num_bytes, int reserve);
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, 42 struct btrfs_root *root,
43 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 bytenr, u64 num_bytes, u64 parent,
@@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
57 u64 parent, u64 root_objectid, 57 u64 parent, u64 root_objectid,
58 u64 flags, struct btrfs_disk_key *key, 58 u64 flags, struct btrfs_disk_key *key,
59 int level, struct btrfs_key *ins); 59 int level, struct btrfs_key *ins);
60
61static int do_chunk_alloc(struct btrfs_trans_handle *trans, 60static int do_chunk_alloc(struct btrfs_trans_handle *trans,
62 struct btrfs_root *extent_root, u64 alloc_bytes, 61 struct btrfs_root *extent_root, u64 alloc_bytes,
63 u64 flags, int force); 62 u64 flags, int force);
63static int pin_down_bytes(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root,
65 struct btrfs_path *path,
66 u64 bytenr, u64 num_bytes,
67 int is_data, int reserved,
68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key);
64 71
65static noinline int 72static noinline int
66block_group_cache_done(struct btrfs_block_group_cache *cache) 73block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
153 return ret; 160 return ret;
154} 161}
155 162
156/* 163static int add_excluded_extent(struct btrfs_root *root,
157 * We always set EXTENT_LOCKED for the super mirror extents so we don't 164 u64 start, u64 num_bytes)
158 * overwrite them, so those bits need to be unset. Also, if we are unmounting
159 * with pinned extents still sitting there because we had a block group caching,
160 * we need to clear those now, since we are done.
161 */
162void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
163{ 165{
164 u64 start, end, last = 0; 166 u64 end = start + num_bytes - 1;
165 int ret; 167 set_extent_bits(&root->fs_info->freed_extents[0],
168 start, end, EXTENT_UPTODATE, GFP_NOFS);
169 set_extent_bits(&root->fs_info->freed_extents[1],
170 start, end, EXTENT_UPTODATE, GFP_NOFS);
171 return 0;
172}
166 173
167 while (1) { 174static void free_excluded_extents(struct btrfs_root *root,
168 ret = find_first_extent_bit(&info->pinned_extents, last, 175 struct btrfs_block_group_cache *cache)
169 &start, &end, 176{
170 EXTENT_LOCKED|EXTENT_DIRTY); 177 u64 start, end;
171 if (ret)
172 break;
173 178
174 clear_extent_bits(&info->pinned_extents, start, end, 179 start = cache->key.objectid;
175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); 180 end = start + cache->key.offset - 1;
176 last = end+1; 181
177 } 182 clear_extent_bits(&root->fs_info->freed_extents[0],
183 start, end, EXTENT_UPTODATE, GFP_NOFS);
184 clear_extent_bits(&root->fs_info->freed_extents[1],
185 start, end, EXTENT_UPTODATE, GFP_NOFS);
178} 186}
179 187
180static int remove_sb_from_cache(struct btrfs_root *root, 188static int exclude_super_stripes(struct btrfs_root *root,
181 struct btrfs_block_group_cache *cache) 189 struct btrfs_block_group_cache *cache)
182{ 190{
183 struct btrfs_fs_info *fs_info = root->fs_info;
184 u64 bytenr; 191 u64 bytenr;
185 u64 *logical; 192 u64 *logical;
186 int stripe_len; 193 int stripe_len;
@@ -192,17 +199,42 @@ static int remove_sb_from_cache(struct btrfs_root *root,
192 cache->key.objectid, bytenr, 199 cache->key.objectid, bytenr,
193 0, &logical, &nr, &stripe_len); 200 0, &logical, &nr, &stripe_len);
194 BUG_ON(ret); 201 BUG_ON(ret);
202
195 while (nr--) { 203 while (nr--) {
196 try_lock_extent(&fs_info->pinned_extents, 204 cache->bytes_super += stripe_len;
197 logical[nr], 205 ret = add_excluded_extent(root, logical[nr],
198 logical[nr] + stripe_len - 1, GFP_NOFS); 206 stripe_len);
207 BUG_ON(ret);
199 } 208 }
209
200 kfree(logical); 210 kfree(logical);
201 } 211 }
202
203 return 0; 212 return 0;
204} 213}
205 214
215static struct btrfs_caching_control *
216get_caching_control(struct btrfs_block_group_cache *cache)
217{
218 struct btrfs_caching_control *ctl;
219
220 spin_lock(&cache->lock);
221 if (cache->cached != BTRFS_CACHE_STARTED) {
222 spin_unlock(&cache->lock);
223 return NULL;
224 }
225
226 ctl = cache->caching_ctl;
227 atomic_inc(&ctl->count);
228 spin_unlock(&cache->lock);
229 return ctl;
230}
231
232static void put_caching_control(struct btrfs_caching_control *ctl)
233{
234 if (atomic_dec_and_test(&ctl->count))
235 kfree(ctl);
236}
237
206/* 238/*
207 * this is only called by cache_block_group, since we could have freed extents 239 * this is only called by cache_block_group, since we could have freed extents
208 * we need to check the pinned_extents for any extents that can't be used yet 240 * we need to check the pinned_extents for any extents that can't be used yet
@@ -215,9 +247,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
215 int ret; 247 int ret;
216 248
217 while (start < end) { 249 while (start < end) {
218 ret = find_first_extent_bit(&info->pinned_extents, start, 250 ret = find_first_extent_bit(info->pinned_extents, start,
219 &extent_start, &extent_end, 251 &extent_start, &extent_end,
220 EXTENT_DIRTY|EXTENT_LOCKED); 252 EXTENT_DIRTY | EXTENT_UPTODATE);
221 if (ret) 253 if (ret)
222 break; 254 break;
223 255
@@ -249,22 +281,27 @@ static int caching_kthread(void *data)
249{ 281{
250 struct btrfs_block_group_cache *block_group = data; 282 struct btrfs_block_group_cache *block_group = data;
251 struct btrfs_fs_info *fs_info = block_group->fs_info; 283 struct btrfs_fs_info *fs_info = block_group->fs_info;
252 u64 last = 0; 284 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
285 struct btrfs_root *extent_root = fs_info->extent_root;
253 struct btrfs_path *path; 286 struct btrfs_path *path;
254 int ret = 0;
255 struct btrfs_key key;
256 struct extent_buffer *leaf; 287 struct extent_buffer *leaf;
257 int slot; 288 struct btrfs_key key;
258 u64 total_found = 0; 289 u64 total_found = 0;
259 290 u64 last = 0;
260 BUG_ON(!fs_info); 291 u32 nritems;
292 int ret = 0;
261 293
262 path = btrfs_alloc_path(); 294 path = btrfs_alloc_path();
263 if (!path) 295 if (!path)
264 return -ENOMEM; 296 return -ENOMEM;
265 297
266 atomic_inc(&block_group->space_info->caching_threads); 298 exclude_super_stripes(extent_root, block_group);
299 spin_lock(&block_group->space_info->lock);
300 block_group->space_info->bytes_super += block_group->bytes_super;
301 spin_unlock(&block_group->space_info->lock);
302
267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 303 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
304
268 /* 305 /*
269 * We don't want to deadlock with somebody trying to allocate a new 306 * We don't want to deadlock with somebody trying to allocate a new
270 * extent for the extent root while also trying to search the extent 307 * extent for the extent root while also trying to search the extent
@@ -277,74 +314,64 @@ static int caching_kthread(void *data)
277 314
278 key.objectid = last; 315 key.objectid = last;
279 key.offset = 0; 316 key.offset = 0;
280 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 317 key.type = BTRFS_EXTENT_ITEM_KEY;
281again: 318again:
319 mutex_lock(&caching_ctl->mutex);
282 /* need to make sure the commit_root doesn't disappear */ 320 /* need to make sure the commit_root doesn't disappear */
283 down_read(&fs_info->extent_commit_sem); 321 down_read(&fs_info->extent_commit_sem);
284 322
285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 323 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
286 if (ret < 0) 324 if (ret < 0)
287 goto err; 325 goto err;
288 326
327 leaf = path->nodes[0];
328 nritems = btrfs_header_nritems(leaf);
329
289 while (1) { 330 while (1) {
290 smp_mb(); 331 smp_mb();
291 if (block_group->fs_info->closing > 1) { 332 if (fs_info->closing > 1) {
292 last = (u64)-1; 333 last = (u64)-1;
293 break; 334 break;
294 } 335 }
295 336
296 leaf = path->nodes[0]; 337 if (path->slots[0] < nritems) {
297 slot = path->slots[0]; 338 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
298 if (slot >= btrfs_header_nritems(leaf)) { 339 } else {
299 ret = btrfs_next_leaf(fs_info->extent_root, path); 340 ret = find_next_key(path, 0, &key);
300 if (ret < 0) 341 if (ret)
301 goto err;
302 else if (ret)
303 break; 342 break;
304 343
305 if (need_resched() || 344 caching_ctl->progress = last;
306 btrfs_transaction_in_commit(fs_info)) { 345 btrfs_release_path(extent_root, path);
307 leaf = path->nodes[0]; 346 up_read(&fs_info->extent_commit_sem);
308 347 mutex_unlock(&caching_ctl->mutex);
309 /* this shouldn't happen, but if the 348 if (btrfs_transaction_in_commit(fs_info))
310 * leaf is empty just move on.
311 */
312 if (btrfs_header_nritems(leaf) == 0)
313 break;
314 /*
315 * we need to copy the key out so that
316 * we are sure the next search advances
317 * us forward in the btree.
318 */
319 btrfs_item_key_to_cpu(leaf, &key, 0);
320 btrfs_release_path(fs_info->extent_root, path);
321 up_read(&fs_info->extent_commit_sem);
322 schedule_timeout(1); 349 schedule_timeout(1);
323 goto again; 350 else
324 } 351 cond_resched();
352 goto again;
353 }
325 354
355 if (key.objectid < block_group->key.objectid) {
356 path->slots[0]++;
326 continue; 357 continue;
327 } 358 }
328 btrfs_item_key_to_cpu(leaf, &key, slot);
329 if (key.objectid < block_group->key.objectid)
330 goto next;
331 359
332 if (key.objectid >= block_group->key.objectid + 360 if (key.objectid >= block_group->key.objectid +
333 block_group->key.offset) 361 block_group->key.offset)
334 break; 362 break;
335 363
336 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 364 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
337 total_found += add_new_free_space(block_group, 365 total_found += add_new_free_space(block_group,
338 fs_info, last, 366 fs_info, last,
339 key.objectid); 367 key.objectid);
340 last = key.objectid + key.offset; 368 last = key.objectid + key.offset;
341 }
342 369
343 if (total_found > (1024 * 1024 * 2)) { 370 if (total_found > (1024 * 1024 * 2)) {
344 total_found = 0; 371 total_found = 0;
345 wake_up(&block_group->caching_q); 372 wake_up(&caching_ctl->wait);
373 }
346 } 374 }
347next:
348 path->slots[0]++; 375 path->slots[0]++;
349 } 376 }
350 ret = 0; 377 ret = 0;
@@ -352,33 +379,65 @@ next:
352 total_found += add_new_free_space(block_group, fs_info, last, 379 total_found += add_new_free_space(block_group, fs_info, last,
353 block_group->key.objectid + 380 block_group->key.objectid +
354 block_group->key.offset); 381 block_group->key.offset);
382 caching_ctl->progress = (u64)-1;
355 383
356 spin_lock(&block_group->lock); 384 spin_lock(&block_group->lock);
385 block_group->caching_ctl = NULL;
357 block_group->cached = BTRFS_CACHE_FINISHED; 386 block_group->cached = BTRFS_CACHE_FINISHED;
358 spin_unlock(&block_group->lock); 387 spin_unlock(&block_group->lock);
359 388
360err: 389err:
361 btrfs_free_path(path); 390 btrfs_free_path(path);
362 up_read(&fs_info->extent_commit_sem); 391 up_read(&fs_info->extent_commit_sem);
363 atomic_dec(&block_group->space_info->caching_threads);
364 wake_up(&block_group->caching_q);
365 392
393 free_excluded_extents(extent_root, block_group);
394
395 mutex_unlock(&caching_ctl->mutex);
396 wake_up(&caching_ctl->wait);
397
398 put_caching_control(caching_ctl);
399 atomic_dec(&block_group->space_info->caching_threads);
366 return 0; 400 return 0;
367} 401}
368 402
369static int cache_block_group(struct btrfs_block_group_cache *cache) 403static int cache_block_group(struct btrfs_block_group_cache *cache)
370{ 404{
405 struct btrfs_fs_info *fs_info = cache->fs_info;
406 struct btrfs_caching_control *caching_ctl;
371 struct task_struct *tsk; 407 struct task_struct *tsk;
372 int ret = 0; 408 int ret = 0;
373 409
410 smp_mb();
411 if (cache->cached != BTRFS_CACHE_NO)
412 return 0;
413
414 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
415 BUG_ON(!caching_ctl);
416
417 INIT_LIST_HEAD(&caching_ctl->list);
418 mutex_init(&caching_ctl->mutex);
419 init_waitqueue_head(&caching_ctl->wait);
420 caching_ctl->block_group = cache;
421 caching_ctl->progress = cache->key.objectid;
422 /* one for caching kthread, one for caching block group list */
423 atomic_set(&caching_ctl->count, 2);
424
374 spin_lock(&cache->lock); 425 spin_lock(&cache->lock);
375 if (cache->cached != BTRFS_CACHE_NO) { 426 if (cache->cached != BTRFS_CACHE_NO) {
376 spin_unlock(&cache->lock); 427 spin_unlock(&cache->lock);
377 return ret; 428 kfree(caching_ctl);
429 return 0;
378 } 430 }
431 cache->caching_ctl = caching_ctl;
379 cache->cached = BTRFS_CACHE_STARTED; 432 cache->cached = BTRFS_CACHE_STARTED;
380 spin_unlock(&cache->lock); 433 spin_unlock(&cache->lock);
381 434
435 down_write(&fs_info->extent_commit_sem);
436 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
437 up_write(&fs_info->extent_commit_sem);
438
439 atomic_inc(&cache->space_info->caching_threads);
440
382 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 441 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
383 cache->key.objectid); 442 cache->key.objectid);
384 if (IS_ERR(tsk)) { 443 if (IS_ERR(tsk)) {
@@ -1657,7 +1716,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1657 parent, ref_root, flags, 1716 parent, ref_root, flags,
1658 ref->objectid, ref->offset, 1717 ref->objectid, ref->offset,
1659 &ins, node->ref_mod); 1718 &ins, node->ref_mod);
1660 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1661 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1719 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1662 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1720 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1663 node->num_bytes, parent, 1721 node->num_bytes, parent,
@@ -1783,7 +1841,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1783 extent_op->flags_to_set, 1841 extent_op->flags_to_set,
1784 &extent_op->key, 1842 &extent_op->key,
1785 ref->level, &ins); 1843 ref->level, &ins);
1786 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1787 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1844 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1788 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1845 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1789 node->num_bytes, parent, ref_root, 1846 node->num_bytes, parent, ref_root,
@@ -1818,16 +1875,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1818 BUG_ON(extent_op); 1875 BUG_ON(extent_op);
1819 head = btrfs_delayed_node_to_head(node); 1876 head = btrfs_delayed_node_to_head(node);
1820 if (insert_reserved) { 1877 if (insert_reserved) {
1878 int mark_free = 0;
1879 struct extent_buffer *must_clean = NULL;
1880
1881 ret = pin_down_bytes(trans, root, NULL,
1882 node->bytenr, node->num_bytes,
1883 head->is_data, 1, &must_clean);
1884 if (ret > 0)
1885 mark_free = 1;
1886
1887 if (must_clean) {
1888 clean_tree_block(NULL, root, must_clean);
1889 btrfs_tree_unlock(must_clean);
1890 free_extent_buffer(must_clean);
1891 }
1821 if (head->is_data) { 1892 if (head->is_data) {
1822 ret = btrfs_del_csums(trans, root, 1893 ret = btrfs_del_csums(trans, root,
1823 node->bytenr, 1894 node->bytenr,
1824 node->num_bytes); 1895 node->num_bytes);
1825 BUG_ON(ret); 1896 BUG_ON(ret);
1826 } 1897 }
1827 btrfs_update_pinned_extents(root, node->bytenr, 1898 if (mark_free) {
1828 node->num_bytes, 1); 1899 ret = btrfs_free_reserved_extent(root,
1829 update_reserved_extents(root, node->bytenr, 1900 node->bytenr,
1830 node->num_bytes, 0); 1901 node->num_bytes);
1902 BUG_ON(ret);
1903 }
1831 } 1904 }
1832 mutex_unlock(&head->mutex); 1905 mutex_unlock(&head->mutex);
1833 return 0; 1906 return 0;
@@ -2706,6 +2779,8 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root)
2706 /* get the space info for where the metadata will live */ 2779 /* get the space info for where the metadata will live */
2707 alloc_target = btrfs_get_alloc_profile(root, 0); 2780 alloc_target = btrfs_get_alloc_profile(root, 0);
2708 meta_sinfo = __find_space_info(info, alloc_target); 2781 meta_sinfo = __find_space_info(info, alloc_target);
2782 if (!meta_sinfo)
2783 goto alloc;
2709 2784
2710again: 2785again:
2711 spin_lock(&meta_sinfo->lock); 2786 spin_lock(&meta_sinfo->lock);
@@ -2717,12 +2792,13 @@ again:
2717 do_div(thresh, 100); 2792 do_div(thresh, 100);
2718 2793
2719 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2794 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2720 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { 2795 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2796 meta_sinfo->bytes_super > thresh) {
2721 struct btrfs_trans_handle *trans; 2797 struct btrfs_trans_handle *trans;
2722 if (!meta_sinfo->full) { 2798 if (!meta_sinfo->full) {
2723 meta_sinfo->force_alloc = 1; 2799 meta_sinfo->force_alloc = 1;
2724 spin_unlock(&meta_sinfo->lock); 2800 spin_unlock(&meta_sinfo->lock);
2725 2801alloc:
2726 trans = btrfs_start_transaction(root, 1); 2802 trans = btrfs_start_transaction(root, 1);
2727 if (!trans) 2803 if (!trans)
2728 return -ENOMEM; 2804 return -ENOMEM;
@@ -2730,6 +2806,10 @@ again:
2730 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2806 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2731 2 * 1024 * 1024, alloc_target, 0); 2807 2 * 1024 * 1024, alloc_target, 0);
2732 btrfs_end_transaction(trans, root); 2808 btrfs_end_transaction(trans, root);
2809 if (!meta_sinfo) {
2810 meta_sinfo = __find_space_info(info,
2811 alloc_target);
2812 }
2733 goto again; 2813 goto again;
2734 } 2814 }
2735 spin_unlock(&meta_sinfo->lock); 2815 spin_unlock(&meta_sinfo->lock);
@@ -2765,13 +2845,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2765 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2845 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2766 2846
2767 data_sinfo = BTRFS_I(inode)->space_info; 2847 data_sinfo = BTRFS_I(inode)->space_info;
2848 if (!data_sinfo)
2849 goto alloc;
2850
2768again: 2851again:
2769 /* make sure we have enough space to handle the data first */ 2852 /* make sure we have enough space to handle the data first */
2770 spin_lock(&data_sinfo->lock); 2853 spin_lock(&data_sinfo->lock);
2771 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 2854 if (data_sinfo->total_bytes - data_sinfo->bytes_used -
2772 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 2855 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
2773 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 2856 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
2774 data_sinfo->bytes_may_use < bytes) { 2857 data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
2775 struct btrfs_trans_handle *trans; 2858 struct btrfs_trans_handle *trans;
2776 2859
2777 /* 2860 /*
@@ -2783,7 +2866,7 @@ again:
2783 2866
2784 data_sinfo->force_alloc = 1; 2867 data_sinfo->force_alloc = 1;
2785 spin_unlock(&data_sinfo->lock); 2868 spin_unlock(&data_sinfo->lock);
2786 2869alloc:
2787 alloc_target = btrfs_get_alloc_profile(root, 1); 2870 alloc_target = btrfs_get_alloc_profile(root, 1);
2788 trans = btrfs_start_transaction(root, 1); 2871 trans = btrfs_start_transaction(root, 1);
2789 if (!trans) 2872 if (!trans)
@@ -2795,6 +2878,11 @@ again:
2795 btrfs_end_transaction(trans, root); 2878 btrfs_end_transaction(trans, root);
2796 if (ret) 2879 if (ret)
2797 return ret; 2880 return ret;
2881
2882 if (!data_sinfo) {
2883 btrfs_set_inode_space_info(root, inode);
2884 data_sinfo = BTRFS_I(inode)->space_info;
2885 }
2798 goto again; 2886 goto again;
2799 } 2887 }
2800 spin_unlock(&data_sinfo->lock); 2888 spin_unlock(&data_sinfo->lock);
@@ -3009,10 +3097,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3009 num_bytes = min(total, cache->key.offset - byte_in_group); 3097 num_bytes = min(total, cache->key.offset - byte_in_group);
3010 if (alloc) { 3098 if (alloc) {
3011 old_val += num_bytes; 3099 old_val += num_bytes;
3100 btrfs_set_block_group_used(&cache->item, old_val);
3101 cache->reserved -= num_bytes;
3012 cache->space_info->bytes_used += num_bytes; 3102 cache->space_info->bytes_used += num_bytes;
3103 cache->space_info->bytes_reserved -= num_bytes;
3013 if (cache->ro) 3104 if (cache->ro)
3014 cache->space_info->bytes_readonly -= num_bytes; 3105 cache->space_info->bytes_readonly -= num_bytes;
3015 btrfs_set_block_group_used(&cache->item, old_val);
3016 spin_unlock(&cache->lock); 3106 spin_unlock(&cache->lock);
3017 spin_unlock(&cache->space_info->lock); 3107 spin_unlock(&cache->space_info->lock);
3018 } else { 3108 } else {
@@ -3057,127 +3147,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
3057 return bytenr; 3147 return bytenr;
3058} 3148}
3059 3149
3060int btrfs_update_pinned_extents(struct btrfs_root *root, 3150/*
3061 u64 bytenr, u64 num, int pin) 3151 * this function must be called within transaction
3152 */
3153int btrfs_pin_extent(struct btrfs_root *root,
3154 u64 bytenr, u64 num_bytes, int reserved)
3062{ 3155{
3063 u64 len;
3064 struct btrfs_block_group_cache *cache;
3065 struct btrfs_fs_info *fs_info = root->fs_info; 3156 struct btrfs_fs_info *fs_info = root->fs_info;
3157 struct btrfs_block_group_cache *cache;
3066 3158
3067 if (pin) 3159 cache = btrfs_lookup_block_group(fs_info, bytenr);
3068 set_extent_dirty(&fs_info->pinned_extents, 3160 BUG_ON(!cache);
3069 bytenr, bytenr + num - 1, GFP_NOFS);
3070
3071 while (num > 0) {
3072 cache = btrfs_lookup_block_group(fs_info, bytenr);
3073 BUG_ON(!cache);
3074 len = min(num, cache->key.offset -
3075 (bytenr - cache->key.objectid));
3076 if (pin) {
3077 spin_lock(&cache->space_info->lock);
3078 spin_lock(&cache->lock);
3079 cache->pinned += len;
3080 cache->space_info->bytes_pinned += len;
3081 spin_unlock(&cache->lock);
3082 spin_unlock(&cache->space_info->lock);
3083 fs_info->total_pinned += len;
3084 } else {
3085 int unpin = 0;
3086 3161
3087 /* 3162 spin_lock(&cache->space_info->lock);
3088 * in order to not race with the block group caching, we 3163 spin_lock(&cache->lock);
3089 * only want to unpin the extent if we are cached. If 3164 cache->pinned += num_bytes;
3090 * we aren't cached, we want to start async caching this 3165 cache->space_info->bytes_pinned += num_bytes;
3091 * block group so we can free the extent the next time 3166 if (reserved) {
3092 * around. 3167 cache->reserved -= num_bytes;
3093 */ 3168 cache->space_info->bytes_reserved -= num_bytes;
3094 spin_lock(&cache->space_info->lock); 3169 }
3095 spin_lock(&cache->lock); 3170 spin_unlock(&cache->lock);
3096 unpin = (cache->cached == BTRFS_CACHE_FINISHED); 3171 spin_unlock(&cache->space_info->lock);
3097 if (likely(unpin)) {
3098 cache->pinned -= len;
3099 cache->space_info->bytes_pinned -= len;
3100 fs_info->total_pinned -= len;
3101 }
3102 spin_unlock(&cache->lock);
3103 spin_unlock(&cache->space_info->lock);
3104 3172
3105 if (likely(unpin)) 3173 btrfs_put_block_group(cache);
3106 clear_extent_dirty(&fs_info->pinned_extents,
3107 bytenr, bytenr + len -1,
3108 GFP_NOFS);
3109 else
3110 cache_block_group(cache);
3111 3174
3112 if (unpin) 3175 set_extent_dirty(fs_info->pinned_extents,
3113 btrfs_add_free_space(cache, bytenr, len); 3176 bytenr, bytenr + num_bytes - 1, GFP_NOFS);
3114 } 3177 return 0;
3115 btrfs_put_block_group(cache); 3178}
3116 bytenr += len; 3179
3117 num -= len; 3180static int update_reserved_extents(struct btrfs_block_group_cache *cache,
3181 u64 num_bytes, int reserve)
3182{
3183 spin_lock(&cache->space_info->lock);
3184 spin_lock(&cache->lock);
3185 if (reserve) {
3186 cache->reserved += num_bytes;
3187 cache->space_info->bytes_reserved += num_bytes;
3188 } else {
3189 cache->reserved -= num_bytes;
3190 cache->space_info->bytes_reserved -= num_bytes;
3118 } 3191 }
3192 spin_unlock(&cache->lock);
3193 spin_unlock(&cache->space_info->lock);
3119 return 0; 3194 return 0;
3120} 3195}
3121 3196
3122static int update_reserved_extents(struct btrfs_root *root, 3197int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
3123 u64 bytenr, u64 num, int reserve) 3198 struct btrfs_root *root)
3124{ 3199{
3125 u64 len;
3126 struct btrfs_block_group_cache *cache;
3127 struct btrfs_fs_info *fs_info = root->fs_info; 3200 struct btrfs_fs_info *fs_info = root->fs_info;
3201 struct btrfs_caching_control *next;
3202 struct btrfs_caching_control *caching_ctl;
3203 struct btrfs_block_group_cache *cache;
3128 3204
3129 while (num > 0) { 3205 down_write(&fs_info->extent_commit_sem);
3130 cache = btrfs_lookup_block_group(fs_info, bytenr);
3131 BUG_ON(!cache);
3132 len = min(num, cache->key.offset -
3133 (bytenr - cache->key.objectid));
3134 3206
3135 spin_lock(&cache->space_info->lock); 3207 list_for_each_entry_safe(caching_ctl, next,
3136 spin_lock(&cache->lock); 3208 &fs_info->caching_block_groups, list) {
3137 if (reserve) { 3209 cache = caching_ctl->block_group;
3138 cache->reserved += len; 3210 if (block_group_cache_done(cache)) {
3139 cache->space_info->bytes_reserved += len; 3211 cache->last_byte_to_unpin = (u64)-1;
3212 list_del_init(&caching_ctl->list);
3213 put_caching_control(caching_ctl);
3140 } else { 3214 } else {
3141 cache->reserved -= len; 3215 cache->last_byte_to_unpin = caching_ctl->progress;
3142 cache->space_info->bytes_reserved -= len;
3143 } 3216 }
3144 spin_unlock(&cache->lock);
3145 spin_unlock(&cache->space_info->lock);
3146 btrfs_put_block_group(cache);
3147 bytenr += len;
3148 num -= len;
3149 } 3217 }
3218
3219 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3220 fs_info->pinned_extents = &fs_info->freed_extents[1];
3221 else
3222 fs_info->pinned_extents = &fs_info->freed_extents[0];
3223
3224 up_write(&fs_info->extent_commit_sem);
3150 return 0; 3225 return 0;
3151} 3226}
3152 3227
3153int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 3228static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
3154{ 3229{
3155 u64 last = 0; 3230 struct btrfs_fs_info *fs_info = root->fs_info;
3156 u64 start; 3231 struct btrfs_block_group_cache *cache = NULL;
3157 u64 end; 3232 u64 len;
3158 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
3159 int ret;
3160 3233
3161 while (1) { 3234 while (start <= end) {
3162 ret = find_first_extent_bit(pinned_extents, last, 3235 if (!cache ||
3163 &start, &end, EXTENT_DIRTY); 3236 start >= cache->key.objectid + cache->key.offset) {
3164 if (ret) 3237 if (cache)
3165 break; 3238 btrfs_put_block_group(cache);
3239 cache = btrfs_lookup_block_group(fs_info, start);
3240 BUG_ON(!cache);
3241 }
3242
3243 len = cache->key.objectid + cache->key.offset - start;
3244 len = min(len, end + 1 - start);
3166 3245
3167 set_extent_dirty(copy, start, end, GFP_NOFS); 3246 if (start < cache->last_byte_to_unpin) {
3168 last = end + 1; 3247 len = min(len, cache->last_byte_to_unpin - start);
3248 btrfs_add_free_space(cache, start, len);
3249 }
3250
3251 spin_lock(&cache->space_info->lock);
3252 spin_lock(&cache->lock);
3253 cache->pinned -= len;
3254 cache->space_info->bytes_pinned -= len;
3255 spin_unlock(&cache->lock);
3256 spin_unlock(&cache->space_info->lock);
3257
3258 start += len;
3169 } 3259 }
3260
3261 if (cache)
3262 btrfs_put_block_group(cache);
3170 return 0; 3263 return 0;
3171} 3264}
3172 3265
3173int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3266int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3174 struct btrfs_root *root, 3267 struct btrfs_root *root)
3175 struct extent_io_tree *unpin)
3176{ 3268{
3269 struct btrfs_fs_info *fs_info = root->fs_info;
3270 struct extent_io_tree *unpin;
3177 u64 start; 3271 u64 start;
3178 u64 end; 3272 u64 end;
3179 int ret; 3273 int ret;
3180 3274
3275 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3276 unpin = &fs_info->freed_extents[1];
3277 else
3278 unpin = &fs_info->freed_extents[0];
3279
3181 while (1) { 3280 while (1) {
3182 ret = find_first_extent_bit(unpin, 0, &start, &end, 3281 ret = find_first_extent_bit(unpin, 0, &start, &end,
3183 EXTENT_DIRTY); 3282 EXTENT_DIRTY);
@@ -3186,10 +3285,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3186 3285
3187 ret = btrfs_discard_extent(root, start, end + 1 - start); 3286 ret = btrfs_discard_extent(root, start, end + 1 - start);
3188 3287
3189 /* unlocks the pinned mutex */
3190 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
3191 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3288 clear_extent_dirty(unpin, start, end, GFP_NOFS);
3192 3289 unpin_extent_range(root, start, end);
3193 cond_resched(); 3290 cond_resched();
3194 } 3291 }
3195 3292
@@ -3199,7 +3296,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3199static int pin_down_bytes(struct btrfs_trans_handle *trans, 3296static int pin_down_bytes(struct btrfs_trans_handle *trans,
3200 struct btrfs_root *root, 3297 struct btrfs_root *root,
3201 struct btrfs_path *path, 3298 struct btrfs_path *path,
3202 u64 bytenr, u64 num_bytes, int is_data, 3299 u64 bytenr, u64 num_bytes,
3300 int is_data, int reserved,
3203 struct extent_buffer **must_clean) 3301 struct extent_buffer **must_clean)
3204{ 3302{
3205 int err = 0; 3303 int err = 0;
@@ -3231,15 +3329,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3231 } 3329 }
3232 free_extent_buffer(buf); 3330 free_extent_buffer(buf);
3233pinit: 3331pinit:
3234 btrfs_set_path_blocking(path); 3332 if (path)
3333 btrfs_set_path_blocking(path);
3235 /* unlocks the pinned mutex */ 3334 /* unlocks the pinned mutex */
3236 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3335 btrfs_pin_extent(root, bytenr, num_bytes, reserved);
3237 3336
3238 BUG_ON(err < 0); 3337 BUG_ON(err < 0);
3239 return 0; 3338 return 0;
3240} 3339}
3241 3340
3242
3243static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3341static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3244 struct btrfs_root *root, 3342 struct btrfs_root *root,
3245 u64 bytenr, u64 num_bytes, u64 parent, 3343 u64 bytenr, u64 num_bytes, u64 parent,
@@ -3413,7 +3511,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3413 } 3511 }
3414 3512
3415 ret = pin_down_bytes(trans, root, path, bytenr, 3513 ret = pin_down_bytes(trans, root, path, bytenr,
3416 num_bytes, is_data, &must_clean); 3514 num_bytes, is_data, 0, &must_clean);
3417 if (ret > 0) 3515 if (ret > 0)
3418 mark_free = 1; 3516 mark_free = 1;
3419 BUG_ON(ret < 0); 3517 BUG_ON(ret < 0);
@@ -3544,8 +3642,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3544 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3642 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
3545 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3643 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
3546 /* unlocks the pinned mutex */ 3644 /* unlocks the pinned mutex */
3547 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3645 btrfs_pin_extent(root, bytenr, num_bytes, 1);
3548 update_reserved_extents(root, bytenr, num_bytes, 0);
3549 ret = 0; 3646 ret = 0;
3550 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3647 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
3551 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3648 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
@@ -3585,19 +3682,33 @@ static noinline int
3585wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 3682wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
3586 u64 num_bytes) 3683 u64 num_bytes)
3587{ 3684{
3685 struct btrfs_caching_control *caching_ctl;
3588 DEFINE_WAIT(wait); 3686 DEFINE_WAIT(wait);
3589 3687
3590 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); 3688 caching_ctl = get_caching_control(cache);
3591 3689 if (!caching_ctl)
3592 if (block_group_cache_done(cache)) {
3593 finish_wait(&cache->caching_q, &wait);
3594 return 0; 3690 return 0;
3595 }
3596 schedule();
3597 finish_wait(&cache->caching_q, &wait);
3598 3691
3599 wait_event(cache->caching_q, block_group_cache_done(cache) || 3692 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
3600 (cache->free_space >= num_bytes)); 3693 (cache->free_space >= num_bytes));
3694
3695 put_caching_control(caching_ctl);
3696 return 0;
3697}
3698
3699static noinline int
3700wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
3701{
3702 struct btrfs_caching_control *caching_ctl;
3703 DEFINE_WAIT(wait);
3704
3705 caching_ctl = get_caching_control(cache);
3706 if (!caching_ctl)
3707 return 0;
3708
3709 wait_event(caching_ctl->wait, block_group_cache_done(cache));
3710
3711 put_caching_control(caching_ctl);
3601 return 0; 3712 return 0;
3602} 3713}
3603 3714
@@ -3635,6 +3746,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3635 int last_ptr_loop = 0; 3746 int last_ptr_loop = 0;
3636 int loop = 0; 3747 int loop = 0;
3637 bool found_uncached_bg = false; 3748 bool found_uncached_bg = false;
3749 bool failed_cluster_refill = false;
3638 3750
3639 WARN_ON(num_bytes < root->sectorsize); 3751 WARN_ON(num_bytes < root->sectorsize);
3640 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3752 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3732,7 +3844,16 @@ have_block_group:
3732 if (unlikely(block_group->ro)) 3844 if (unlikely(block_group->ro))
3733 goto loop; 3845 goto loop;
3734 3846
3735 if (last_ptr) { 3847 /*
3848 * Ok we want to try and use the cluster allocator, so lets look
3849 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
3850 * have tried the cluster allocator plenty of times at this
3851 * point and not have found anything, so we are likely way too
3852 * fragmented for the clustering stuff to find anything, so lets
3853 * just skip it and let the allocator find whatever block it can
3854 * find
3855 */
3856 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
3736 /* 3857 /*
3737 * the refill lock keeps out other 3858 * the refill lock keeps out other
3738 * people trying to start a new cluster 3859 * people trying to start a new cluster
@@ -3807,9 +3928,11 @@ refill_cluster:
3807 spin_unlock(&last_ptr->refill_lock); 3928 spin_unlock(&last_ptr->refill_lock);
3808 goto checks; 3929 goto checks;
3809 } 3930 }
3810 } else if (!cached && loop > LOOP_CACHING_NOWAIT) { 3931 } else if (!cached && loop > LOOP_CACHING_NOWAIT
3932 && !failed_cluster_refill) {
3811 spin_unlock(&last_ptr->refill_lock); 3933 spin_unlock(&last_ptr->refill_lock);
3812 3934
3935 failed_cluster_refill = true;
3813 wait_block_group_cache_progress(block_group, 3936 wait_block_group_cache_progress(block_group,
3814 num_bytes + empty_cluster + empty_size); 3937 num_bytes + empty_cluster + empty_size);
3815 goto have_block_group; 3938 goto have_block_group;
@@ -3821,13 +3944,9 @@ refill_cluster:
3821 * cluster. Free the cluster we've been trying 3944 * cluster. Free the cluster we've been trying
3822 * to use, and go to the next block group 3945 * to use, and go to the next block group
3823 */ 3946 */
3824 if (loop < LOOP_NO_EMPTY_SIZE) { 3947 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3825 btrfs_return_cluster_to_free_space(NULL,
3826 last_ptr);
3827 spin_unlock(&last_ptr->refill_lock);
3828 goto loop;
3829 }
3830 spin_unlock(&last_ptr->refill_lock); 3948 spin_unlock(&last_ptr->refill_lock);
3949 goto loop;
3831 } 3950 }
3832 3951
3833 offset = btrfs_find_space_for_alloc(block_group, search_start, 3952 offset = btrfs_find_space_for_alloc(block_group, search_start,
@@ -3881,9 +4000,12 @@ checks:
3881 search_start - offset); 4000 search_start - offset);
3882 BUG_ON(offset > search_start); 4001 BUG_ON(offset > search_start);
3883 4002
4003 update_reserved_extents(block_group, num_bytes, 1);
4004
3884 /* we are all good, lets return */ 4005 /* we are all good, lets return */
3885 break; 4006 break;
3886loop: 4007loop:
4008 failed_cluster_refill = false;
3887 btrfs_put_block_group(block_group); 4009 btrfs_put_block_group(block_group);
3888 } 4010 }
3889 up_read(&space_info->groups_sem); 4011 up_read(&space_info->groups_sem);
@@ -3973,12 +4095,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3973 up_read(&info->groups_sem); 4095 up_read(&info->groups_sem);
3974} 4096}
3975 4097
3976static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4098int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3977 struct btrfs_root *root, 4099 struct btrfs_root *root,
3978 u64 num_bytes, u64 min_alloc_size, 4100 u64 num_bytes, u64 min_alloc_size,
3979 u64 empty_size, u64 hint_byte, 4101 u64 empty_size, u64 hint_byte,
3980 u64 search_end, struct btrfs_key *ins, 4102 u64 search_end, struct btrfs_key *ins,
3981 u64 data) 4103 u64 data)
3982{ 4104{
3983 int ret; 4105 int ret;
3984 u64 search_start = 0; 4106 u64 search_start = 0;
@@ -4044,25 +4166,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
4044 ret = btrfs_discard_extent(root, start, len); 4166 ret = btrfs_discard_extent(root, start, len);
4045 4167
4046 btrfs_add_free_space(cache, start, len); 4168 btrfs_add_free_space(cache, start, len);
4169 update_reserved_extents(cache, len, 0);
4047 btrfs_put_block_group(cache); 4170 btrfs_put_block_group(cache);
4048 update_reserved_extents(root, start, len, 0);
4049
4050 return ret;
4051}
4052
4053int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4054 struct btrfs_root *root,
4055 u64 num_bytes, u64 min_alloc_size,
4056 u64 empty_size, u64 hint_byte,
4057 u64 search_end, struct btrfs_key *ins,
4058 u64 data)
4059{
4060 int ret;
4061 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
4062 empty_size, hint_byte, search_end, ins,
4063 data);
4064 if (!ret)
4065 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4066 4171
4067 return ret; 4172 return ret;
4068} 4173}
@@ -4223,15 +4328,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4223{ 4328{
4224 int ret; 4329 int ret;
4225 struct btrfs_block_group_cache *block_group; 4330 struct btrfs_block_group_cache *block_group;
4331 struct btrfs_caching_control *caching_ctl;
4332 u64 start = ins->objectid;
4333 u64 num_bytes = ins->offset;
4226 4334
4227 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4335 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
4228 cache_block_group(block_group); 4336 cache_block_group(block_group);
4229 wait_event(block_group->caching_q, 4337 caching_ctl = get_caching_control(block_group);
4230 block_group_cache_done(block_group));
4231 4338
4232 ret = btrfs_remove_free_space(block_group, ins->objectid, 4339 if (!caching_ctl) {
4233 ins->offset); 4340 BUG_ON(!block_group_cache_done(block_group));
4234 BUG_ON(ret); 4341 ret = btrfs_remove_free_space(block_group, start, num_bytes);
4342 BUG_ON(ret);
4343 } else {
4344 mutex_lock(&caching_ctl->mutex);
4345
4346 if (start >= caching_ctl->progress) {
4347 ret = add_excluded_extent(root, start, num_bytes);
4348 BUG_ON(ret);
4349 } else if (start + num_bytes <= caching_ctl->progress) {
4350 ret = btrfs_remove_free_space(block_group,
4351 start, num_bytes);
4352 BUG_ON(ret);
4353 } else {
4354 num_bytes = caching_ctl->progress - start;
4355 ret = btrfs_remove_free_space(block_group,
4356 start, num_bytes);
4357 BUG_ON(ret);
4358
4359 start = caching_ctl->progress;
4360 num_bytes = ins->objectid + ins->offset -
4361 caching_ctl->progress;
4362 ret = add_excluded_extent(root, start, num_bytes);
4363 BUG_ON(ret);
4364 }
4365
4366 mutex_unlock(&caching_ctl->mutex);
4367 put_caching_control(caching_ctl);
4368 }
4369
4370 update_reserved_extents(block_group, ins->offset, 1);
4235 btrfs_put_block_group(block_group); 4371 btrfs_put_block_group(block_group);
4236 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4372 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
4237 0, owner, offset, ins, 1); 4373 0, owner, offset, ins, 1);
@@ -4255,9 +4391,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4255 int ret; 4391 int ret;
4256 u64 flags = 0; 4392 u64 flags = 0;
4257 4393
4258 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4394 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4259 empty_size, hint_byte, search_end, 4395 empty_size, hint_byte, search_end,
4260 ins, 0); 4396 ins, 0);
4261 if (ret) 4397 if (ret)
4262 return ret; 4398 return ret;
4263 4399
@@ -4268,7 +4404,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4268 } else 4404 } else
4269 BUG_ON(parent > 0); 4405 BUG_ON(parent > 0);
4270 4406
4271 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4272 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4407 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4273 struct btrfs_delayed_extent_op *extent_op; 4408 struct btrfs_delayed_extent_op *extent_op;
4274 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4409 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
@@ -4347,452 +4482,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4347 return buf; 4482 return buf;
4348} 4483}
4349 4484
4350#if 0 4485struct walk_control {
4351int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4486 u64 refs[BTRFS_MAX_LEVEL];
4352 struct btrfs_root *root, struct extent_buffer *leaf) 4487 u64 flags[BTRFS_MAX_LEVEL];
4353{ 4488 struct btrfs_key update_progress;
4354 u64 disk_bytenr; 4489 int stage;
4355 u64 num_bytes; 4490 int level;
4356 struct btrfs_key key; 4491 int shared_level;
4357 struct btrfs_file_extent_item *fi; 4492 int update_ref;
4358 u32 nritems; 4493 int keep_locks;
4359 int i; 4494 int reada_slot;
4360 int ret; 4495 int reada_count;
4361 4496};
4362 BUG_ON(!btrfs_is_leaf(leaf));
4363 nritems = btrfs_header_nritems(leaf);
4364
4365 for (i = 0; i < nritems; i++) {
4366 cond_resched();
4367 btrfs_item_key_to_cpu(leaf, &key, i);
4368
4369 /* only extents have references, skip everything else */
4370 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4371 continue;
4372
4373 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4374
4375 /* inline extents live in the btree, they don't have refs */
4376 if (btrfs_file_extent_type(leaf, fi) ==
4377 BTRFS_FILE_EXTENT_INLINE)
4378 continue;
4379
4380 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4381
4382 /* holes don't have refs */
4383 if (disk_bytenr == 0)
4384 continue;
4385
4386 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4387 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
4388 leaf->start, 0, key.objectid, 0);
4389 BUG_ON(ret);
4390 }
4391 return 0;
4392}
4393
4394static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
4395 struct btrfs_root *root,
4396 struct btrfs_leaf_ref *ref)
4397{
4398 int i;
4399 int ret;
4400 struct btrfs_extent_info *info;
4401 struct refsort *sorted;
4402
4403 if (ref->nritems == 0)
4404 return 0;
4405
4406 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
4407 for (i = 0; i < ref->nritems; i++) {
4408 sorted[i].bytenr = ref->extents[i].bytenr;
4409 sorted[i].slot = i;
4410 }
4411 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
4412
4413 /*
4414 * the items in the ref were sorted when the ref was inserted
4415 * into the ref cache, so this is already in order
4416 */
4417 for (i = 0; i < ref->nritems; i++) {
4418 info = ref->extents + sorted[i].slot;
4419 ret = btrfs_free_extent(trans, root, info->bytenr,
4420 info->num_bytes, ref->bytenr,
4421 ref->owner, ref->generation,
4422 info->objectid, 0);
4423
4424 atomic_inc(&root->fs_info->throttle_gen);
4425 wake_up(&root->fs_info->transaction_throttle);
4426 cond_resched();
4427
4428 BUG_ON(ret);
4429 info++;
4430 }
4431
4432 kfree(sorted);
4433 return 0;
4434}
4435
4436
4437static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
4438 struct btrfs_root *root, u64 start,
4439 u64 len, u32 *refs)
4440{
4441 int ret;
4442
4443 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
4444 BUG_ON(ret);
4445
4446#if 0 /* some debugging code in case we see problems here */
4447 /* if the refs count is one, it won't get increased again. But
4448 * if the ref count is > 1, someone may be decreasing it at
4449 * the same time we are.
4450 */
4451 if (*refs != 1) {
4452 struct extent_buffer *eb = NULL;
4453 eb = btrfs_find_create_tree_block(root, start, len);
4454 if (eb)
4455 btrfs_tree_lock(eb);
4456
4457 mutex_lock(&root->fs_info->alloc_mutex);
4458 ret = lookup_extent_ref(NULL, root, start, len, refs);
4459 BUG_ON(ret);
4460 mutex_unlock(&root->fs_info->alloc_mutex);
4461
4462 if (eb) {
4463 btrfs_tree_unlock(eb);
4464 free_extent_buffer(eb);
4465 }
4466 if (*refs == 1) {
4467 printk(KERN_ERR "btrfs block %llu went down to one "
4468 "during drop_snap\n", (unsigned long long)start);
4469 }
4470
4471 }
4472#endif
4473
4474 cond_resched();
4475 return ret;
4476}
4477 4497
4498#define DROP_REFERENCE 1
4499#define UPDATE_BACKREF 2
4478 4500
4479/* 4501static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4480 * this is used while deleting old snapshots, and it drops the refs 4502 struct btrfs_root *root,
4481 * on a whole subtree starting from a level 1 node. 4503 struct walk_control *wc,
4482 * 4504 struct btrfs_path *path)
4483 * The idea is to sort all the leaf pointers, and then drop the
4484 * ref on all the leaves in order. Most of the time the leaves
4485 * will have ref cache entries, so no leaf IOs will be required to
4486 * find the extents they have references on.
4487 *
4488 * For each leaf, any references it has are also dropped in order
4489 *
4490 * This ends up dropping the references in something close to optimal
4491 * order for reading and modifying the extent allocation tree.
4492 */
4493static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
4494 struct btrfs_root *root,
4495 struct btrfs_path *path)
4496{ 4505{
4497 u64 bytenr; 4506 u64 bytenr;
4498 u64 root_owner; 4507 u64 generation;
4499 u64 root_gen; 4508 u64 refs;
4500 struct extent_buffer *eb = path->nodes[1]; 4509 u64 last = 0;
4501 struct extent_buffer *leaf; 4510 u32 nritems;
4502 struct btrfs_leaf_ref *ref; 4511 u32 blocksize;
4503 struct refsort *sorted = NULL; 4512 struct btrfs_key key;
4504 int nritems = btrfs_header_nritems(eb); 4513 struct extent_buffer *eb;
4505 int ret; 4514 int ret;
4506 int i; 4515 int slot;
4507 int refi = 0; 4516 int nread = 0;
4508 int slot = path->slots[1];
4509 u32 blocksize = btrfs_level_size(root, 0);
4510 u32 refs;
4511
4512 if (nritems == 0)
4513 goto out;
4514
4515 root_owner = btrfs_header_owner(eb);
4516 root_gen = btrfs_header_generation(eb);
4517 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
4518 4517
4519 /* 4518 if (path->slots[wc->level] < wc->reada_slot) {
4520 * step one, sort all the leaf pointers so we don't scribble 4519 wc->reada_count = wc->reada_count * 2 / 3;
4521 * randomly into the extent allocation tree 4520 wc->reada_count = max(wc->reada_count, 2);
4522 */ 4521 } else {
4523 for (i = slot; i < nritems; i++) { 4522 wc->reada_count = wc->reada_count * 3 / 2;
4524 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 4523 wc->reada_count = min_t(int, wc->reada_count,
4525 sorted[refi].slot = i; 4524 BTRFS_NODEPTRS_PER_BLOCK(root));
4526 refi++;
4527 } 4525 }
4528 4526
4529 /* 4527 eb = path->nodes[wc->level];
4530 * nritems won't be zero, but if we're picking up drop_snapshot 4528 nritems = btrfs_header_nritems(eb);
4531 * after a crash, slot might be > 0, so double check things 4529 blocksize = btrfs_level_size(root, wc->level - 1);
4532 * just in case.
4533 */
4534 if (refi == 0)
4535 goto out;
4536 4530
4537 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 4531 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4532 if (nread >= wc->reada_count)
4533 break;
4538 4534
4539 /* 4535 cond_resched();
4540 * the first loop frees everything the leaves point to 4536 bytenr = btrfs_node_blockptr(eb, slot);
4541 */ 4537 generation = btrfs_node_ptr_generation(eb, slot);
4542 for (i = 0; i < refi; i++) {
4543 u64 ptr_gen;
4544 4538
4545 bytenr = sorted[i].bytenr; 4539 if (slot == path->slots[wc->level])
4540 goto reada;
4546 4541
4547 /* 4542 if (wc->stage == UPDATE_BACKREF &&
4548 * check the reference count on this leaf. If it is > 1 4543 generation <= root->root_key.offset)
4549 * we just decrement it below and don't update any
4550 * of the refs the leaf points to.
4551 */
4552 ret = drop_snap_lookup_refcount(trans, root, bytenr,
4553 blocksize, &refs);
4554 BUG_ON(ret);
4555 if (refs != 1)
4556 continue; 4544 continue;
4557 4545
4558 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 4546 if (wc->stage == DROP_REFERENCE) {
4559 4547 ret = btrfs_lookup_extent_info(trans, root,
4560 /* 4548 bytenr, blocksize,
4561 * the leaf only had one reference, which means the 4549 &refs, NULL);
4562 * only thing pointing to this leaf is the snapshot
4563 * we're deleting. It isn't possible for the reference
4564 * count to increase again later
4565 *
4566 * The reference cache is checked for the leaf,
4567 * and if found we'll be able to drop any refs held by
4568 * the leaf without needing to read it in.
4569 */
4570 ref = btrfs_lookup_leaf_ref(root, bytenr);
4571 if (ref && ref->generation != ptr_gen) {
4572 btrfs_free_leaf_ref(root, ref);
4573 ref = NULL;
4574 }
4575 if (ref) {
4576 ret = cache_drop_leaf_ref(trans, root, ref);
4577 BUG_ON(ret);
4578 btrfs_remove_leaf_ref(root, ref);
4579 btrfs_free_leaf_ref(root, ref);
4580 } else {
4581 /*
4582 * the leaf wasn't in the reference cache, so
4583 * we have to read it.
4584 */
4585 leaf = read_tree_block(root, bytenr, blocksize,
4586 ptr_gen);
4587 ret = btrfs_drop_leaf_ref(trans, root, leaf);
4588 BUG_ON(ret); 4550 BUG_ON(ret);
4589 free_extent_buffer(leaf); 4551 BUG_ON(refs == 0);
4590 } 4552 if (refs == 1)
4591 atomic_inc(&root->fs_info->throttle_gen); 4553 goto reada;
4592 wake_up(&root->fs_info->transaction_throttle);
4593 cond_resched();
4594 }
4595
4596 /*
4597 * run through the loop again to free the refs on the leaves.
4598 * This is faster than doing it in the loop above because
4599 * the leaves are likely to be clustered together. We end up
4600 * working in nice chunks on the extent allocation tree.
4601 */
4602 for (i = 0; i < refi; i++) {
4603 bytenr = sorted[i].bytenr;
4604 ret = btrfs_free_extent(trans, root, bytenr,
4605 blocksize, eb->start,
4606 root_owner, root_gen, 0, 1);
4607 BUG_ON(ret);
4608
4609 atomic_inc(&root->fs_info->throttle_gen);
4610 wake_up(&root->fs_info->transaction_throttle);
4611 cond_resched();
4612 }
4613out:
4614 kfree(sorted);
4615
4616 /*
4617 * update the path to show we've processed the entire level 1
4618 * node. This will get saved into the root's drop_snapshot_progress
4619 * field so these drops are not repeated again if this transaction
4620 * commits.
4621 */
4622 path->slots[1] = nritems;
4623 return 0;
4624}
4625
4626/*
4627 * helper function for drop_snapshot, this walks down the tree dropping ref
4628 * counts as it goes.
4629 */
4630static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4631 struct btrfs_root *root,
4632 struct btrfs_path *path, int *level)
4633{
4634 u64 root_owner;
4635 u64 root_gen;
4636 u64 bytenr;
4637 u64 ptr_gen;
4638 struct extent_buffer *next;
4639 struct extent_buffer *cur;
4640 struct extent_buffer *parent;
4641 u32 blocksize;
4642 int ret;
4643 u32 refs;
4644
4645 WARN_ON(*level < 0);
4646 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4647 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
4648 path->nodes[*level]->len, &refs);
4649 BUG_ON(ret);
4650 if (refs > 1)
4651 goto out;
4652
4653 /*
4654 * walk down to the last node level and free all the leaves
4655 */
4656 while (*level >= 0) {
4657 WARN_ON(*level < 0);
4658 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4659 cur = path->nodes[*level];
4660
4661 if (btrfs_header_level(cur) != *level)
4662 WARN_ON(1);
4663 4554
4664 if (path->slots[*level] >= 4555 if (!wc->update_ref ||
4665 btrfs_header_nritems(cur)) 4556 generation <= root->root_key.offset)
4666 break; 4557 continue;
4667 4558 btrfs_node_key_to_cpu(eb, &key, slot);
4668 /* the new code goes down to level 1 and does all the 4559 ret = btrfs_comp_cpu_keys(&key,
4669 * leaves pointed to that node in bulk. So, this check 4560 &wc->update_progress);
4670 * for level 0 will always be false. 4561 if (ret < 0)
4671 * 4562 continue;
4672 * But, the disk format allows the drop_snapshot_progress
4673 * field in the root to leave things in a state where
4674 * a leaf will need cleaning up here. If someone crashes
4675 * with the old code and then boots with the new code,
4676 * we might find a leaf here.
4677 */
4678 if (*level == 0) {
4679 ret = btrfs_drop_leaf_ref(trans, root, cur);
4680 BUG_ON(ret);
4681 break;
4682 } 4563 }
4683 4564reada:
4684 /* 4565 ret = readahead_tree_block(root, bytenr, blocksize,
4685 * once we get to level one, process the whole node 4566 generation);
4686 * at once, including everything below it. 4567 if (ret)
4687 */
4688 if (*level == 1) {
4689 ret = drop_level_one_refs(trans, root, path);
4690 BUG_ON(ret);
4691 break; 4568 break;
4692 } 4569 last = bytenr + blocksize;
4693 4570 nread++;
4694 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
4695 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
4696 blocksize = btrfs_level_size(root, *level - 1);
4697
4698 ret = drop_snap_lookup_refcount(trans, root, bytenr,
4699 blocksize, &refs);
4700 BUG_ON(ret);
4701
4702 /*
4703 * if there is more than one reference, we don't need
4704 * to read that node to drop any references it has. We
4705 * just drop the ref we hold on that node and move on to the
4706 * next slot in this level.
4707 */
4708 if (refs != 1) {
4709 parent = path->nodes[*level];
4710 root_owner = btrfs_header_owner(parent);
4711 root_gen = btrfs_header_generation(parent);
4712 path->slots[*level]++;
4713
4714 ret = btrfs_free_extent(trans, root, bytenr,
4715 blocksize, parent->start,
4716 root_owner, root_gen,
4717 *level - 1, 1);
4718 BUG_ON(ret);
4719
4720 atomic_inc(&root->fs_info->throttle_gen);
4721 wake_up(&root->fs_info->transaction_throttle);
4722 cond_resched();
4723
4724 continue;
4725 }
4726
4727 /*
4728 * we need to keep freeing things in the next level down.
4729 * read the block and loop around to process it
4730 */
4731 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
4732 WARN_ON(*level <= 0);
4733 if (path->nodes[*level-1])
4734 free_extent_buffer(path->nodes[*level-1]);
4735 path->nodes[*level-1] = next;
4736 *level = btrfs_header_level(next);
4737 path->slots[*level] = 0;
4738 cond_resched();
4739 } 4571 }
4740out: 4572 wc->reada_slot = slot;
4741 WARN_ON(*level < 0);
4742 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4743
4744 if (path->nodes[*level] == root->node) {
4745 parent = path->nodes[*level];
4746 bytenr = path->nodes[*level]->start;
4747 } else {
4748 parent = path->nodes[*level + 1];
4749 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
4750 }
4751
4752 blocksize = btrfs_level_size(root, *level);
4753 root_owner = btrfs_header_owner(parent);
4754 root_gen = btrfs_header_generation(parent);
4755
4756 /*
4757 * cleanup and free the reference on the last node
4758 * we processed
4759 */
4760 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
4761 parent->start, root_owner, root_gen,
4762 *level, 1);
4763 free_extent_buffer(path->nodes[*level]);
4764 path->nodes[*level] = NULL;
4765
4766 *level += 1;
4767 BUG_ON(ret);
4768
4769 cond_resched();
4770 return 0;
4771} 4573}
4772#endif
4773
4774struct walk_control {
4775 u64 refs[BTRFS_MAX_LEVEL];
4776 u64 flags[BTRFS_MAX_LEVEL];
4777 struct btrfs_key update_progress;
4778 int stage;
4779 int level;
4780 int shared_level;
4781 int update_ref;
4782 int keep_locks;
4783};
4784
4785#define DROP_REFERENCE 1
4786#define UPDATE_BACKREF 2
4787 4574
4788/* 4575/*
4789 * hepler to process tree block while walking down the tree. 4576 * hepler to process tree block while walking down the tree.
4790 * 4577 *
4791 * when wc->stage == DROP_REFERENCE, this function checks
4792 * reference count of the block. if the block is shared and
4793 * we need update back refs for the subtree rooted at the
4794 * block, this function changes wc->stage to UPDATE_BACKREF
4795 *
4796 * when wc->stage == UPDATE_BACKREF, this function updates 4578 * when wc->stage == UPDATE_BACKREF, this function updates
4797 * back refs for pointers in the block. 4579 * back refs for pointers in the block.
4798 * 4580 *
@@ -4805,7 +4587,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4805{ 4587{
4806 int level = wc->level; 4588 int level = wc->level;
4807 struct extent_buffer *eb = path->nodes[level]; 4589 struct extent_buffer *eb = path->nodes[level];
4808 struct btrfs_key key;
4809 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4590 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4810 int ret; 4591 int ret;
4811 4592
@@ -4828,21 +4609,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4828 BUG_ON(wc->refs[level] == 0); 4609 BUG_ON(wc->refs[level] == 0);
4829 } 4610 }
4830 4611
4831 if (wc->stage == DROP_REFERENCE &&
4832 wc->update_ref && wc->refs[level] > 1) {
4833 BUG_ON(eb == root->node);
4834 BUG_ON(path->slots[level] > 0);
4835 if (level == 0)
4836 btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
4837 else
4838 btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
4839 if (btrfs_header_owner(eb) == root->root_key.objectid &&
4840 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
4841 wc->stage = UPDATE_BACKREF;
4842 wc->shared_level = level;
4843 }
4844 }
4845
4846 if (wc->stage == DROP_REFERENCE) { 4612 if (wc->stage == DROP_REFERENCE) {
4847 if (wc->refs[level] > 1) 4613 if (wc->refs[level] > 1)
4848 return 1; 4614 return 1;
@@ -4879,6 +4645,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4879} 4645}
4880 4646
4881/* 4647/*
4648 * hepler to process tree block pointer.
4649 *
4650 * when wc->stage == DROP_REFERENCE, this function checks
4651 * reference count of the block pointed to. if the block
4652 * is shared and we need update back refs for the subtree
4653 * rooted at the block, this function changes wc->stage to
4654 * UPDATE_BACKREF. if the block is shared and there is no
4655 * need to update back, this function drops the reference
4656 * to the block.
4657 *
4658 * NOTE: return value 1 means we should stop walking down.
4659 */
4660static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4661 struct btrfs_root *root,
4662 struct btrfs_path *path,
4663 struct walk_control *wc)
4664{
4665 u64 bytenr;
4666 u64 generation;
4667 u64 parent;
4668 u32 blocksize;
4669 struct btrfs_key key;
4670 struct extent_buffer *next;
4671 int level = wc->level;
4672 int reada = 0;
4673 int ret = 0;
4674
4675 generation = btrfs_node_ptr_generation(path->nodes[level],
4676 path->slots[level]);
4677 /*
4678 * if the lower level block was created before the snapshot
4679 * was created, we know there is no need to update back refs
4680 * for the subtree
4681 */
4682 if (wc->stage == UPDATE_BACKREF &&
4683 generation <= root->root_key.offset)
4684 return 1;
4685
4686 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4687 blocksize = btrfs_level_size(root, level - 1);
4688
4689 next = btrfs_find_tree_block(root, bytenr, blocksize);
4690 if (!next) {
4691 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
4692 reada = 1;
4693 }
4694 btrfs_tree_lock(next);
4695 btrfs_set_lock_blocking(next);
4696
4697 if (wc->stage == DROP_REFERENCE) {
4698 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4699 &wc->refs[level - 1],
4700 &wc->flags[level - 1]);
4701 BUG_ON(ret);
4702 BUG_ON(wc->refs[level - 1] == 0);
4703
4704 if (wc->refs[level - 1] > 1) {
4705 if (!wc->update_ref ||
4706 generation <= root->root_key.offset)
4707 goto skip;
4708
4709 btrfs_node_key_to_cpu(path->nodes[level], &key,
4710 path->slots[level]);
4711 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
4712 if (ret < 0)
4713 goto skip;
4714
4715 wc->stage = UPDATE_BACKREF;
4716 wc->shared_level = level - 1;
4717 }
4718 }
4719
4720 if (!btrfs_buffer_uptodate(next, generation)) {
4721 btrfs_tree_unlock(next);
4722 free_extent_buffer(next);
4723 next = NULL;
4724 }
4725
4726 if (!next) {
4727 if (reada && level == 1)
4728 reada_walk_down(trans, root, wc, path);
4729 next = read_tree_block(root, bytenr, blocksize, generation);
4730 btrfs_tree_lock(next);
4731 btrfs_set_lock_blocking(next);
4732 }
4733
4734 level--;
4735 BUG_ON(level != btrfs_header_level(next));
4736 path->nodes[level] = next;
4737 path->slots[level] = 0;
4738 path->locks[level] = 1;
4739 wc->level = level;
4740 if (wc->level == 1)
4741 wc->reada_slot = 0;
4742 return 0;
4743skip:
4744 wc->refs[level - 1] = 0;
4745 wc->flags[level - 1] = 0;
4746
4747 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4748 parent = path->nodes[level]->start;
4749 } else {
4750 BUG_ON(root->root_key.objectid !=
4751 btrfs_header_owner(path->nodes[level]));
4752 parent = 0;
4753 }
4754
4755 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
4756 root->root_key.objectid, level - 1, 0);
4757 BUG_ON(ret);
4758
4759 btrfs_tree_unlock(next);
4760 free_extent_buffer(next);
4761 return 1;
4762}
4763
4764/*
4882 * hepler to process tree block while walking up the tree. 4765 * hepler to process tree block while walking up the tree.
4883 * 4766 *
4884 * when wc->stage == DROP_REFERENCE, this function drops 4767 * when wc->stage == DROP_REFERENCE, this function drops
@@ -4905,7 +4788,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4905 if (level < wc->shared_level) 4788 if (level < wc->shared_level)
4906 goto out; 4789 goto out;
4907 4790
4908 BUG_ON(wc->refs[level] <= 1);
4909 ret = find_next_key(path, level + 1, &wc->update_progress); 4791 ret = find_next_key(path, level + 1, &wc->update_progress);
4910 if (ret > 0) 4792 if (ret > 0)
4911 wc->update_ref = 0; 4793 wc->update_ref = 0;
@@ -4936,8 +4818,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4936 path->locks[level] = 0; 4818 path->locks[level] = 0;
4937 return 1; 4819 return 1;
4938 } 4820 }
4939 } else {
4940 BUG_ON(level != 0);
4941 } 4821 }
4942 } 4822 }
4943 4823
@@ -4990,17 +4870,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4990 struct btrfs_path *path, 4870 struct btrfs_path *path,
4991 struct walk_control *wc) 4871 struct walk_control *wc)
4992{ 4872{
4993 struct extent_buffer *next;
4994 struct extent_buffer *cur;
4995 u64 bytenr;
4996 u64 ptr_gen;
4997 u32 blocksize;
4998 int level = wc->level; 4873 int level = wc->level;
4999 int ret; 4874 int ret;
5000 4875
5001 while (level >= 0) { 4876 while (level >= 0) {
5002 cur = path->nodes[level]; 4877 if (path->slots[level] >=
5003 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); 4878 btrfs_header_nritems(path->nodes[level]))
4879 break;
5004 4880
5005 ret = walk_down_proc(trans, root, path, wc); 4881 ret = walk_down_proc(trans, root, path, wc);
5006 if (ret > 0) 4882 if (ret > 0)
@@ -5009,20 +4885,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5009 if (level == 0) 4885 if (level == 0)
5010 break; 4886 break;
5011 4887
5012 bytenr = btrfs_node_blockptr(cur, path->slots[level]); 4888 ret = do_walk_down(trans, root, path, wc);
5013 blocksize = btrfs_level_size(root, level - 1); 4889 if (ret > 0) {
5014 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); 4890 path->slots[level]++;
5015 4891 continue;
5016 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4892 }
5017 btrfs_tree_lock(next); 4893 level = wc->level;
5018 btrfs_set_lock_blocking(next);
5019
5020 level--;
5021 BUG_ON(level != btrfs_header_level(next));
5022 path->nodes[level] = next;
5023 path->slots[level] = 0;
5024 path->locks[level] = 1;
5025 wc->level = level;
5026 } 4894 }
5027 return 0; 4895 return 0;
5028} 4896}
@@ -5112,9 +4980,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5112 err = ret; 4980 err = ret;
5113 goto out; 4981 goto out;
5114 } 4982 }
5115 btrfs_node_key_to_cpu(path->nodes[level], &key, 4983 WARN_ON(ret > 0);
5116 path->slots[level]);
5117 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
5118 4984
5119 /* 4985 /*
5120 * unlock our path, this is safe because only this 4986 * unlock our path, this is safe because only this
@@ -5149,6 +5015,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5149 wc->stage = DROP_REFERENCE; 5015 wc->stage = DROP_REFERENCE;
5150 wc->update_ref = update_ref; 5016 wc->update_ref = update_ref;
5151 wc->keep_locks = 0; 5017 wc->keep_locks = 0;
5018 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
5152 5019
5153 while (1) { 5020 while (1) {
5154 ret = walk_down_tree(trans, root, path, wc); 5021 ret = walk_down_tree(trans, root, path, wc);
@@ -5201,9 +5068,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5201 ret = btrfs_del_root(trans, tree_root, &root->root_key); 5068 ret = btrfs_del_root(trans, tree_root, &root->root_key);
5202 BUG_ON(ret); 5069 BUG_ON(ret);
5203 5070
5204 free_extent_buffer(root->node); 5071 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5205 free_extent_buffer(root->commit_root); 5072 ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
5206 kfree(root); 5073 NULL, NULL);
5074 BUG_ON(ret < 0);
5075 if (ret > 0) {
5076 ret = btrfs_del_orphan_item(trans, tree_root,
5077 root->root_key.objectid);
5078 BUG_ON(ret);
5079 }
5080 }
5081
5082 if (root->in_radix) {
5083 btrfs_free_fs_root(tree_root->fs_info, root);
5084 } else {
5085 free_extent_buffer(root->node);
5086 free_extent_buffer(root->commit_root);
5087 kfree(root);
5088 }
5207out: 5089out:
5208 btrfs_end_transaction(trans, tree_root); 5090 btrfs_end_transaction(trans, tree_root);
5209 kfree(wc); 5091 kfree(wc);
@@ -5255,6 +5137,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5255 wc->stage = DROP_REFERENCE; 5137 wc->stage = DROP_REFERENCE;
5256 wc->update_ref = 0; 5138 wc->update_ref = 0;
5257 wc->keep_locks = 1; 5139 wc->keep_locks = 1;
5140 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
5258 5141
5259 while (1) { 5142 while (1) {
5260 wret = walk_down_tree(trans, root, path, wc); 5143 wret = walk_down_tree(trans, root, path, wc);
@@ -5397,9 +5280,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
5397 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5280 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
5398 while (1) { 5281 while (1) {
5399 int ret; 5282 int ret;
5400 spin_lock(&em_tree->lock); 5283 write_lock(&em_tree->lock);
5401 ret = add_extent_mapping(em_tree, em); 5284 ret = add_extent_mapping(em_tree, em);
5402 spin_unlock(&em_tree->lock); 5285 write_unlock(&em_tree->lock);
5403 if (ret != -EEXIST) { 5286 if (ret != -EEXIST) {
5404 free_extent_map(em); 5287 free_extent_map(em);
5405 break; 5288 break;
@@ -6842,287 +6725,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
6842 return 0; 6725 return 0;
6843} 6726}
6844 6727
6845#if 0 6728/*
6846static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6729 * checks to see if its even possible to relocate this block group.
6847 struct btrfs_root *root, 6730 *
6848 u64 objectid, u64 size) 6731 * @return - -1 if it's not a good idea to relocate this block group, 0 if its
6849{ 6732 * ok to go ahead and try.
6850 struct btrfs_path *path; 6733 */
6851 struct btrfs_inode_item *item; 6734int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
6852 struct extent_buffer *leaf;
6853 int ret;
6854
6855 path = btrfs_alloc_path();
6856 if (!path)
6857 return -ENOMEM;
6858
6859 path->leave_spinning = 1;
6860 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
6861 if (ret)
6862 goto out;
6863
6864 leaf = path->nodes[0];
6865 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
6866 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
6867 btrfs_set_inode_generation(leaf, item, 1);
6868 btrfs_set_inode_size(leaf, item, size);
6869 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
6870 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
6871 btrfs_mark_buffer_dirty(leaf);
6872 btrfs_release_path(root, path);
6873out:
6874 btrfs_free_path(path);
6875 return ret;
6876}
6877
6878static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
6879 struct btrfs_block_group_cache *group)
6880{ 6735{
6881 struct inode *inode = NULL; 6736 struct btrfs_block_group_cache *block_group;
6882 struct btrfs_trans_handle *trans; 6737 struct btrfs_space_info *space_info;
6883 struct btrfs_root *root; 6738 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
6884 struct btrfs_key root_key; 6739 struct btrfs_device *device;
6885 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 6740 int full = 0;
6886 int err = 0; 6741 int ret = 0;
6887 6742
6888 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6743 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
6889 root_key.type = BTRFS_ROOT_ITEM_KEY;
6890 root_key.offset = (u64)-1;
6891 root = btrfs_read_fs_root_no_name(fs_info, &root_key);
6892 if (IS_ERR(root))
6893 return ERR_CAST(root);
6894 6744
6895 trans = btrfs_start_transaction(root, 1); 6745 /* odd, couldn't find the block group, leave it alone */
6896 BUG_ON(!trans); 6746 if (!block_group)
6747 return -1;
6897 6748
6898 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 6749 /* no bytes used, we're good */
6899 if (err) 6750 if (!btrfs_block_group_used(&block_group->item))
6900 goto out; 6751 goto out;
6901 6752
6902 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 6753 space_info = block_group->space_info;
6903 BUG_ON(err); 6754 spin_lock(&space_info->lock);
6904
6905 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
6906 group->key.offset, 0, group->key.offset,
6907 0, 0, 0);
6908 BUG_ON(err);
6909
6910 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
6911 if (inode->i_state & I_NEW) {
6912 BTRFS_I(inode)->root = root;
6913 BTRFS_I(inode)->location.objectid = objectid;
6914 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
6915 BTRFS_I(inode)->location.offset = 0;
6916 btrfs_read_locked_inode(inode);
6917 unlock_new_inode(inode);
6918 BUG_ON(is_bad_inode(inode));
6919 } else {
6920 BUG_ON(1);
6921 }
6922 BTRFS_I(inode)->index_cnt = group->key.objectid;
6923
6924 err = btrfs_orphan_add(trans, inode);
6925out:
6926 btrfs_end_transaction(trans, root);
6927 if (err) {
6928 if (inode)
6929 iput(inode);
6930 inode = ERR_PTR(err);
6931 }
6932 return inode;
6933}
6934
6935int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
6936{
6937
6938 struct btrfs_ordered_sum *sums;
6939 struct btrfs_sector_sum *sector_sum;
6940 struct btrfs_ordered_extent *ordered;
6941 struct btrfs_root *root = BTRFS_I(inode)->root;
6942 struct list_head list;
6943 size_t offset;
6944 int ret;
6945 u64 disk_bytenr;
6946
6947 INIT_LIST_HEAD(&list);
6948
6949 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
6950 BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
6951
6952 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
6953 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
6954 disk_bytenr + len - 1, &list);
6955
6956 while (!list_empty(&list)) {
6957 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
6958 list_del_init(&sums->list);
6959
6960 sector_sum = sums->sums;
6961 sums->bytenr = ordered->start;
6962 6755
6963 offset = 0; 6756 full = space_info->full;
6964 while (offset < sums->len) {
6965 sector_sum->bytenr += ordered->start - disk_bytenr;
6966 sector_sum++;
6967 offset += root->sectorsize;
6968 }
6969 6757
6970 btrfs_add_ordered_sum(inode, ordered, sums); 6758 /*
6759 * if this is the last block group we have in this space, we can't
6760 * relocate it unless we're able to allocate a new chunk below.
6761 *
6762 * Otherwise, we need to make sure we have room in the space to handle
6763 * all of the extents from this block group. If we can, we're good
6764 */
6765 if ((space_info->total_bytes != block_group->key.offset) &&
6766 (space_info->bytes_used + space_info->bytes_reserved +
6767 space_info->bytes_pinned + space_info->bytes_readonly +
6768 btrfs_block_group_used(&block_group->item) <
6769 space_info->total_bytes)) {
6770 spin_unlock(&space_info->lock);
6771 goto out;
6971 } 6772 }
6972 btrfs_put_ordered_extent(ordered); 6773 spin_unlock(&space_info->lock);
6973 return 0;
6974}
6975
6976int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
6977{
6978 struct btrfs_trans_handle *trans;
6979 struct btrfs_path *path;
6980 struct btrfs_fs_info *info = root->fs_info;
6981 struct extent_buffer *leaf;
6982 struct inode *reloc_inode;
6983 struct btrfs_block_group_cache *block_group;
6984 struct btrfs_key key;
6985 u64 skipped;
6986 u64 cur_byte;
6987 u64 total_found;
6988 u32 nritems;
6989 int ret;
6990 int progress;
6991 int pass = 0;
6992
6993 root = root->fs_info->extent_root;
6994
6995 block_group = btrfs_lookup_block_group(info, group_start);
6996 BUG_ON(!block_group);
6997
6998 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
6999 (unsigned long long)block_group->key.objectid,
7000 (unsigned long long)block_group->flags);
7001
7002 path = btrfs_alloc_path();
7003 BUG_ON(!path);
7004
7005 reloc_inode = create_reloc_inode(info, block_group);
7006 BUG_ON(IS_ERR(reloc_inode));
7007
7008 __alloc_chunk_for_shrink(root, block_group, 1);
7009 set_block_group_readonly(block_group);
7010
7011 btrfs_start_delalloc_inodes(info->tree_root);
7012 btrfs_wait_ordered_extents(info->tree_root, 0);
7013again:
7014 skipped = 0;
7015 total_found = 0;
7016 progress = 0;
7017 key.objectid = block_group->key.objectid;
7018 key.offset = 0;
7019 key.type = 0;
7020 cur_byte = key.objectid;
7021
7022 trans = btrfs_start_transaction(info->tree_root, 1);
7023 btrfs_commit_transaction(trans, info->tree_root);
7024 6774
7025 mutex_lock(&root->fs_info->cleaner_mutex); 6775 /*
7026 btrfs_clean_old_snapshots(info->tree_root); 6776 * ok we don't have enough space, but maybe we have free space on our
7027 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 6777 * devices to allocate new chunks for relocation, so loop through our
7028 mutex_unlock(&root->fs_info->cleaner_mutex); 6778 * alloc devices and guess if we have enough space. However, if we
6779 * were marked as full, then we know there aren't enough chunks, and we
6780 * can just return.
6781 */
6782 ret = -1;
6783 if (full)
6784 goto out;
7029 6785
7030 trans = btrfs_start_transaction(info->tree_root, 1); 6786 mutex_lock(&root->fs_info->chunk_mutex);
7031 btrfs_commit_transaction(trans, info->tree_root); 6787 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
6788 u64 min_free = btrfs_block_group_used(&block_group->item);
6789 u64 dev_offset, max_avail;
7032 6790
7033 while (1) { 6791 /*
7034 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6792 * check to make sure we can actually find a chunk with enough
7035 if (ret < 0) 6793 * space to fit our block group in.
7036 goto out; 6794 */
7037next: 6795 if (device->total_bytes > device->bytes_used + min_free) {
7038 leaf = path->nodes[0]; 6796 ret = find_free_dev_extent(NULL, device, min_free,
7039 nritems = btrfs_header_nritems(leaf); 6797 &dev_offset, &max_avail);
7040 if (path->slots[0] >= nritems) { 6798 if (!ret)
7041 ret = btrfs_next_leaf(root, path);
7042 if (ret < 0)
7043 goto out;
7044 if (ret == 1) {
7045 ret = 0;
7046 break; 6799 break;
7047 } 6800 ret = -1;
7048 leaf = path->nodes[0];
7049 nritems = btrfs_header_nritems(leaf);
7050 } 6801 }
7051
7052 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7053
7054 if (key.objectid >= block_group->key.objectid +
7055 block_group->key.offset)
7056 break;
7057
7058 if (progress && need_resched()) {
7059 btrfs_release_path(root, path);
7060 cond_resched();
7061 progress = 0;
7062 continue;
7063 }
7064 progress = 1;
7065
7066 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
7067 key.objectid + key.offset <= cur_byte) {
7068 path->slots[0]++;
7069 goto next;
7070 }
7071
7072 total_found++;
7073 cur_byte = key.objectid + key.offset;
7074 btrfs_release_path(root, path);
7075
7076 __alloc_chunk_for_shrink(root, block_group, 0);
7077 ret = relocate_one_extent(root, path, &key, block_group,
7078 reloc_inode, pass);
7079 BUG_ON(ret < 0);
7080 if (ret > 0)
7081 skipped++;
7082
7083 key.objectid = cur_byte;
7084 key.type = 0;
7085 key.offset = 0;
7086 }
7087
7088 btrfs_release_path(root, path);
7089
7090 if (pass == 0) {
7091 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
7092 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
7093 }
7094
7095 if (total_found > 0) {
7096 printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
7097 (unsigned long long)total_found, pass);
7098 pass++;
7099 if (total_found == skipped && pass > 2) {
7100 iput(reloc_inode);
7101 reloc_inode = create_reloc_inode(info, block_group);
7102 pass = 0;
7103 }
7104 goto again;
7105 } 6802 }
7106 6803 mutex_unlock(&root->fs_info->chunk_mutex);
7107 /* delete reloc_inode */
7108 iput(reloc_inode);
7109
7110 /* unpin extents in this range */
7111 trans = btrfs_start_transaction(info->tree_root, 1);
7112 btrfs_commit_transaction(trans, info->tree_root);
7113
7114 spin_lock(&block_group->lock);
7115 WARN_ON(block_group->pinned > 0);
7116 WARN_ON(block_group->reserved > 0);
7117 WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
7118 spin_unlock(&block_group->lock);
7119 btrfs_put_block_group(block_group);
7120 ret = 0;
7121out: 6804out:
7122 btrfs_free_path(path); 6805 btrfs_put_block_group(block_group);
7123 return ret; 6806 return ret;
7124} 6807}
7125#endif
7126 6808
7127static int find_first_block_group(struct btrfs_root *root, 6809static int find_first_block_group(struct btrfs_root *root,
7128 struct btrfs_path *path, struct btrfs_key *key) 6810 struct btrfs_path *path, struct btrfs_key *key)
@@ -7165,8 +6847,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7165{ 6847{
7166 struct btrfs_block_group_cache *block_group; 6848 struct btrfs_block_group_cache *block_group;
7167 struct btrfs_space_info *space_info; 6849 struct btrfs_space_info *space_info;
6850 struct btrfs_caching_control *caching_ctl;
7168 struct rb_node *n; 6851 struct rb_node *n;
7169 6852
6853 down_write(&info->extent_commit_sem);
6854 while (!list_empty(&info->caching_block_groups)) {
6855 caching_ctl = list_entry(info->caching_block_groups.next,
6856 struct btrfs_caching_control, list);
6857 list_del(&caching_ctl->list);
6858 put_caching_control(caching_ctl);
6859 }
6860 up_write(&info->extent_commit_sem);
6861
7170 spin_lock(&info->block_group_cache_lock); 6862 spin_lock(&info->block_group_cache_lock);
7171 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6863 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
7172 block_group = rb_entry(n, struct btrfs_block_group_cache, 6864 block_group = rb_entry(n, struct btrfs_block_group_cache,
@@ -7180,8 +6872,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7180 up_write(&block_group->space_info->groups_sem); 6872 up_write(&block_group->space_info->groups_sem);
7181 6873
7182 if (block_group->cached == BTRFS_CACHE_STARTED) 6874 if (block_group->cached == BTRFS_CACHE_STARTED)
7183 wait_event(block_group->caching_q, 6875 wait_block_group_cache_done(block_group);
7184 block_group_cache_done(block_group));
7185 6876
7186 btrfs_remove_free_space_cache(block_group); 6877 btrfs_remove_free_space_cache(block_group);
7187 6878
@@ -7251,7 +6942,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7251 spin_lock_init(&cache->lock); 6942 spin_lock_init(&cache->lock);
7252 spin_lock_init(&cache->tree_lock); 6943 spin_lock_init(&cache->tree_lock);
7253 cache->fs_info = info; 6944 cache->fs_info = info;
7254 init_waitqueue_head(&cache->caching_q);
7255 INIT_LIST_HEAD(&cache->list); 6945 INIT_LIST_HEAD(&cache->list);
7256 INIT_LIST_HEAD(&cache->cluster_list); 6946 INIT_LIST_HEAD(&cache->cluster_list);
7257 6947
@@ -7273,8 +6963,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7273 cache->flags = btrfs_block_group_flags(&cache->item); 6963 cache->flags = btrfs_block_group_flags(&cache->item);
7274 cache->sectorsize = root->sectorsize; 6964 cache->sectorsize = root->sectorsize;
7275 6965
7276 remove_sb_from_cache(root, cache);
7277
7278 /* 6966 /*
7279 * check for two cases, either we are full, and therefore 6967 * check for two cases, either we are full, and therefore
7280 * don't need to bother with the caching work since we won't 6968 * don't need to bother with the caching work since we won't
@@ -7283,13 +6971,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7283 * time, particularly in the full case. 6971 * time, particularly in the full case.
7284 */ 6972 */
7285 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 6973 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
6974 exclude_super_stripes(root, cache);
6975 cache->last_byte_to_unpin = (u64)-1;
7286 cache->cached = BTRFS_CACHE_FINISHED; 6976 cache->cached = BTRFS_CACHE_FINISHED;
6977 free_excluded_extents(root, cache);
7287 } else if (btrfs_block_group_used(&cache->item) == 0) { 6978 } else if (btrfs_block_group_used(&cache->item) == 0) {
6979 exclude_super_stripes(root, cache);
6980 cache->last_byte_to_unpin = (u64)-1;
7288 cache->cached = BTRFS_CACHE_FINISHED; 6981 cache->cached = BTRFS_CACHE_FINISHED;
7289 add_new_free_space(cache, root->fs_info, 6982 add_new_free_space(cache, root->fs_info,
7290 found_key.objectid, 6983 found_key.objectid,
7291 found_key.objectid + 6984 found_key.objectid +
7292 found_key.offset); 6985 found_key.offset);
6986 free_excluded_extents(root, cache);
7293 } 6987 }
7294 6988
7295 ret = update_space_info(info, cache->flags, found_key.offset, 6989 ret = update_space_info(info, cache->flags, found_key.offset,
@@ -7297,6 +6991,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7297 &space_info); 6991 &space_info);
7298 BUG_ON(ret); 6992 BUG_ON(ret);
7299 cache->space_info = space_info; 6993 cache->space_info = space_info;
6994 spin_lock(&cache->space_info->lock);
6995 cache->space_info->bytes_super += cache->bytes_super;
6996 spin_unlock(&cache->space_info->lock);
6997
7300 down_write(&space_info->groups_sem); 6998 down_write(&space_info->groups_sem);
7301 list_add_tail(&cache->list, &space_info->block_groups); 6999 list_add_tail(&cache->list, &space_info->block_groups);
7302 up_write(&space_info->groups_sem); 7000 up_write(&space_info->groups_sem);
@@ -7346,7 +7044,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7346 atomic_set(&cache->count, 1); 7044 atomic_set(&cache->count, 1);
7347 spin_lock_init(&cache->lock); 7045 spin_lock_init(&cache->lock);
7348 spin_lock_init(&cache->tree_lock); 7046 spin_lock_init(&cache->tree_lock);
7349 init_waitqueue_head(&cache->caching_q);
7350 INIT_LIST_HEAD(&cache->list); 7047 INIT_LIST_HEAD(&cache->list);
7351 INIT_LIST_HEAD(&cache->cluster_list); 7048 INIT_LIST_HEAD(&cache->cluster_list);
7352 7049
@@ -7355,15 +7052,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7355 cache->flags = type; 7052 cache->flags = type;
7356 btrfs_set_block_group_flags(&cache->item, type); 7053 btrfs_set_block_group_flags(&cache->item, type);
7357 7054
7055 cache->last_byte_to_unpin = (u64)-1;
7358 cache->cached = BTRFS_CACHE_FINISHED; 7056 cache->cached = BTRFS_CACHE_FINISHED;
7359 remove_sb_from_cache(root, cache); 7057 exclude_super_stripes(root, cache);
7360 7058
7361 add_new_free_space(cache, root->fs_info, chunk_offset, 7059 add_new_free_space(cache, root->fs_info, chunk_offset,
7362 chunk_offset + size); 7060 chunk_offset + size);
7363 7061
7062 free_excluded_extents(root, cache);
7063
7364 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7064 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7365 &cache->space_info); 7065 &cache->space_info);
7366 BUG_ON(ret); 7066 BUG_ON(ret);
7067
7068 spin_lock(&cache->space_info->lock);
7069 cache->space_info->bytes_super += cache->bytes_super;
7070 spin_unlock(&cache->space_info->lock);
7071
7367 down_write(&cache->space_info->groups_sem); 7072 down_write(&cache->space_info->groups_sem);
7368 list_add_tail(&cache->list, &cache->space_info->block_groups); 7073 list_add_tail(&cache->list, &cache->space_info->block_groups);
7369 up_write(&cache->space_info->groups_sem); 7074 up_write(&cache->space_info->groups_sem);
@@ -7429,8 +7134,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7429 up_write(&block_group->space_info->groups_sem); 7134 up_write(&block_group->space_info->groups_sem);
7430 7135
7431 if (block_group->cached == BTRFS_CACHE_STARTED) 7136 if (block_group->cached == BTRFS_CACHE_STARTED)
7432 wait_event(block_group->caching_q, 7137 wait_block_group_cache_done(block_group);
7433 block_group_cache_done(block_group));
7434 7138
7435 btrfs_remove_free_space_cache(block_group); 7139 btrfs_remove_free_space_cache(block_group);
7436 7140
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 68260180f587..0cb88f8146ea 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree,
367 } 367 }
368 if (bits & EXTENT_DIRTY) 368 if (bits & EXTENT_DIRTY)
369 tree->dirty_bytes += end - start + 1; 369 tree->dirty_bytes += end - start + 1;
370 set_state_cb(tree, state, bits);
371 state->state |= bits;
372 state->start = start; 370 state->start = start;
373 state->end = end; 371 state->end = end;
372 set_state_cb(tree, state, bits);
373 state->state |= bits;
374 node = tree_insert(&tree->state, end, &state->rb_node); 374 node = tree_insert(&tree->state, end, &state->rb_node);
375 if (node) { 375 if (node) {
376 struct extent_state *found; 376 struct extent_state *found;
@@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree,
471 * bits were already set, or zero if none of the bits were already set. 471 * bits were already set, or zero if none of the bits were already set.
472 */ 472 */
473int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 473int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
474 int bits, int wake, int delete, gfp_t mask) 474 int bits, int wake, int delete,
475 struct extent_state **cached_state,
476 gfp_t mask)
475{ 477{
476 struct extent_state *state; 478 struct extent_state *state;
479 struct extent_state *cached;
477 struct extent_state *prealloc = NULL; 480 struct extent_state *prealloc = NULL;
481 struct rb_node *next_node;
478 struct rb_node *node; 482 struct rb_node *node;
479 u64 last_end; 483 u64 last_end;
480 int err; 484 int err;
@@ -488,6 +492,17 @@ again:
488 } 492 }
489 493
490 spin_lock(&tree->lock); 494 spin_lock(&tree->lock);
495 if (cached_state) {
496 cached = *cached_state;
497 *cached_state = NULL;
498 cached_state = NULL;
499 if (cached && cached->tree && cached->start == start) {
500 atomic_dec(&cached->refs);
501 state = cached;
502 goto hit_next;
503 }
504 free_extent_state(cached);
505 }
491 /* 506 /*
492 * this search will find the extents that end after 507 * this search will find the extents that end after
493 * our range starts 508 * our range starts
@@ -496,6 +511,7 @@ again:
496 if (!node) 511 if (!node)
497 goto out; 512 goto out;
498 state = rb_entry(node, struct extent_state, rb_node); 513 state = rb_entry(node, struct extent_state, rb_node);
514hit_next:
499 if (state->start > end) 515 if (state->start > end)
500 goto out; 516 goto out;
501 WARN_ON(state->end < start); 517 WARN_ON(state->end < start);
@@ -531,8 +547,6 @@ again:
531 if (last_end == (u64)-1) 547 if (last_end == (u64)-1)
532 goto out; 548 goto out;
533 start = last_end + 1; 549 start = last_end + 1;
534 } else {
535 start = state->start;
536 } 550 }
537 goto search_again; 551 goto search_again;
538 } 552 }
@@ -550,16 +564,28 @@ again:
550 564
551 if (wake) 565 if (wake)
552 wake_up(&state->wq); 566 wake_up(&state->wq);
567
553 set |= clear_state_bit(tree, prealloc, bits, 568 set |= clear_state_bit(tree, prealloc, bits,
554 wake, delete); 569 wake, delete);
555 prealloc = NULL; 570 prealloc = NULL;
556 goto out; 571 goto out;
557 } 572 }
558 573
574 if (state->end < end && prealloc && !need_resched())
575 next_node = rb_next(&state->rb_node);
576 else
577 next_node = NULL;
578
559 set |= clear_state_bit(tree, state, bits, wake, delete); 579 set |= clear_state_bit(tree, state, bits, wake, delete);
560 if (last_end == (u64)-1) 580 if (last_end == (u64)-1)
561 goto out; 581 goto out;
562 start = last_end + 1; 582 start = last_end + 1;
583 if (start <= end && next_node) {
584 state = rb_entry(next_node, struct extent_state,
585 rb_node);
586 if (state->start == start)
587 goto hit_next;
588 }
563 goto search_again; 589 goto search_again;
564 590
565out: 591out:
@@ -653,28 +679,40 @@ static void set_state_bits(struct extent_io_tree *tree,
653 state->state |= bits; 679 state->state |= bits;
654} 680}
655 681
682static void cache_state(struct extent_state *state,
683 struct extent_state **cached_ptr)
684{
685 if (cached_ptr && !(*cached_ptr)) {
686 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
687 *cached_ptr = state;
688 atomic_inc(&state->refs);
689 }
690 }
691}
692
656/* 693/*
657 * set some bits on a range in the tree. This may require allocations 694 * set some bits on a range in the tree. This may require allocations or
658 * or sleeping, so the gfp mask is used to indicate what is allowed. 695 * sleeping, so the gfp mask is used to indicate what is allowed.
659 * 696 *
660 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 697 * If any of the exclusive bits are set, this will fail with -EEXIST if some
661 * range already has the desired bits set. The start of the existing 698 * part of the range already has the desired bits set. The start of the
662 * range is returned in failed_start in this case. 699 * existing range is returned in failed_start in this case.
663 * 700 *
664 * [start, end] is inclusive 701 * [start, end] is inclusive This takes the tree lock.
665 * This takes the tree lock.
666 */ 702 */
703
667static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 704static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
668 int bits, int exclusive, u64 *failed_start, 705 int bits, int exclusive_bits, u64 *failed_start,
706 struct extent_state **cached_state,
669 gfp_t mask) 707 gfp_t mask)
670{ 708{
671 struct extent_state *state; 709 struct extent_state *state;
672 struct extent_state *prealloc = NULL; 710 struct extent_state *prealloc = NULL;
673 struct rb_node *node; 711 struct rb_node *node;
674 int err = 0; 712 int err = 0;
675 int set;
676 u64 last_start; 713 u64 last_start;
677 u64 last_end; 714 u64 last_end;
715
678again: 716again:
679 if (!prealloc && (mask & __GFP_WAIT)) { 717 if (!prealloc && (mask & __GFP_WAIT)) {
680 prealloc = alloc_extent_state(mask); 718 prealloc = alloc_extent_state(mask);
@@ -683,6 +721,13 @@ again:
683 } 721 }
684 722
685 spin_lock(&tree->lock); 723 spin_lock(&tree->lock);
724 if (cached_state && *cached_state) {
725 state = *cached_state;
726 if (state->start == start && state->tree) {
727 node = &state->rb_node;
728 goto hit_next;
729 }
730 }
686 /* 731 /*
687 * this search will find all the extents that end after 732 * this search will find all the extents that end after
688 * our range starts. 733 * our range starts.
@@ -694,8 +739,8 @@ again:
694 BUG_ON(err == -EEXIST); 739 BUG_ON(err == -EEXIST);
695 goto out; 740 goto out;
696 } 741 }
697
698 state = rb_entry(node, struct extent_state, rb_node); 742 state = rb_entry(node, struct extent_state, rb_node);
743hit_next:
699 last_start = state->start; 744 last_start = state->start;
700 last_end = state->end; 745 last_end = state->end;
701 746
@@ -706,17 +751,29 @@ again:
706 * Just lock what we found and keep going 751 * Just lock what we found and keep going
707 */ 752 */
708 if (state->start == start && state->end <= end) { 753 if (state->start == start && state->end <= end) {
709 set = state->state & bits; 754 struct rb_node *next_node;
710 if (set && exclusive) { 755 if (state->state & exclusive_bits) {
711 *failed_start = state->start; 756 *failed_start = state->start;
712 err = -EEXIST; 757 err = -EEXIST;
713 goto out; 758 goto out;
714 } 759 }
760
715 set_state_bits(tree, state, bits); 761 set_state_bits(tree, state, bits);
762 cache_state(state, cached_state);
716 merge_state(tree, state); 763 merge_state(tree, state);
717 if (last_end == (u64)-1) 764 if (last_end == (u64)-1)
718 goto out; 765 goto out;
766
719 start = last_end + 1; 767 start = last_end + 1;
768 if (start < end && prealloc && !need_resched()) {
769 next_node = rb_next(node);
770 if (next_node) {
771 state = rb_entry(next_node, struct extent_state,
772 rb_node);
773 if (state->start == start)
774 goto hit_next;
775 }
776 }
720 goto search_again; 777 goto search_again;
721 } 778 }
722 779
@@ -737,8 +794,7 @@ again:
737 * desired bit on it. 794 * desired bit on it.
738 */ 795 */
739 if (state->start < start) { 796 if (state->start < start) {
740 set = state->state & bits; 797 if (state->state & exclusive_bits) {
741 if (exclusive && set) {
742 *failed_start = start; 798 *failed_start = start;
743 err = -EEXIST; 799 err = -EEXIST;
744 goto out; 800 goto out;
@@ -750,12 +806,11 @@ again:
750 goto out; 806 goto out;
751 if (state->end <= end) { 807 if (state->end <= end) {
752 set_state_bits(tree, state, bits); 808 set_state_bits(tree, state, bits);
809 cache_state(state, cached_state);
753 merge_state(tree, state); 810 merge_state(tree, state);
754 if (last_end == (u64)-1) 811 if (last_end == (u64)-1)
755 goto out; 812 goto out;
756 start = last_end + 1; 813 start = last_end + 1;
757 } else {
758 start = state->start;
759 } 814 }
760 goto search_again; 815 goto search_again;
761 } 816 }
@@ -774,6 +829,7 @@ again:
774 this_end = last_start - 1; 829 this_end = last_start - 1;
775 err = insert_state(tree, prealloc, start, this_end, 830 err = insert_state(tree, prealloc, start, this_end,
776 bits); 831 bits);
832 cache_state(prealloc, cached_state);
777 prealloc = NULL; 833 prealloc = NULL;
778 BUG_ON(err == -EEXIST); 834 BUG_ON(err == -EEXIST);
779 if (err) 835 if (err)
@@ -788,8 +844,7 @@ again:
788 * on the first half 844 * on the first half
789 */ 845 */
790 if (state->start <= end && state->end > end) { 846 if (state->start <= end && state->end > end) {
791 set = state->state & bits; 847 if (state->state & exclusive_bits) {
792 if (exclusive && set) {
793 *failed_start = start; 848 *failed_start = start;
794 err = -EEXIST; 849 err = -EEXIST;
795 goto out; 850 goto out;
@@ -798,6 +853,7 @@ again:
798 BUG_ON(err == -EEXIST); 853 BUG_ON(err == -EEXIST);
799 854
800 set_state_bits(tree, prealloc, bits); 855 set_state_bits(tree, prealloc, bits);
856 cache_state(prealloc, cached_state);
801 merge_state(tree, prealloc); 857 merge_state(tree, prealloc);
802 prealloc = NULL; 858 prealloc = NULL;
803 goto out; 859 goto out;
@@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
826 gfp_t mask) 882 gfp_t mask)
827{ 883{
828 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 884 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
829 mask); 885 NULL, mask);
830}
831
832int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
833 gfp_t mask)
834{
835 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
836} 886}
837 887
838int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 888int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
839 int bits, gfp_t mask) 889 int bits, gfp_t mask)
840{ 890{
841 return set_extent_bit(tree, start, end, bits, 0, NULL, 891 return set_extent_bit(tree, start, end, bits, 0, NULL,
842 mask); 892 NULL, mask);
843} 893}
844 894
845int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 895int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
846 int bits, gfp_t mask) 896 int bits, gfp_t mask)
847{ 897{
848 return clear_extent_bit(tree, start, end, bits, 0, 0, mask); 898 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
849} 899}
850 900
851int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 901int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
852 gfp_t mask) 902 gfp_t mask)
853{ 903{
854 return set_extent_bit(tree, start, end, 904 return set_extent_bit(tree, start, end,
855 EXTENT_DELALLOC | EXTENT_DIRTY, 905 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
856 0, NULL, mask); 906 0, NULL, NULL, mask);
857} 907}
858 908
859int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 909int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
860 gfp_t mask) 910 gfp_t mask)
861{ 911{
862 return clear_extent_bit(tree, start, end, 912 return clear_extent_bit(tree, start, end,
863 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 913 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
864} 914 NULL, mask);
865
866int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
867 gfp_t mask)
868{
869 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
870} 915}
871 916
872int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 917int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
873 gfp_t mask) 918 gfp_t mask)
874{ 919{
875 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 920 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
876 mask); 921 NULL, mask);
877} 922}
878 923
879static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 924static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
880 gfp_t mask) 925 gfp_t mask)
881{ 926{
882 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 927 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
928 NULL, mask);
883} 929}
884 930
885int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 931int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
886 gfp_t mask) 932 gfp_t mask)
887{ 933{
888 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 934 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
889 mask); 935 NULL, mask);
890} 936}
891 937
892static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 938static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
893 u64 end, gfp_t mask) 939 u64 end, gfp_t mask)
894{ 940{
895 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 941 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
896} 942 NULL, mask);
897
898static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
899 gfp_t mask)
900{
901 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
902 0, NULL, mask);
903}
904
905static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
906 u64 end, gfp_t mask)
907{
908 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
909} 943}
910 944
911int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 945int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
917 * either insert or lock state struct between start and end use mask to tell 951 * either insert or lock state struct between start and end use mask to tell
918 * us if waiting is desired. 952 * us if waiting is desired.
919 */ 953 */
920int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 954int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
955 int bits, struct extent_state **cached_state, gfp_t mask)
921{ 956{
922 int err; 957 int err;
923 u64 failed_start; 958 u64 failed_start;
924 while (1) { 959 while (1) {
925 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 960 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
926 &failed_start, mask); 961 EXTENT_LOCKED, &failed_start,
962 cached_state, mask);
927 if (err == -EEXIST && (mask & __GFP_WAIT)) { 963 if (err == -EEXIST && (mask & __GFP_WAIT)) {
928 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 964 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
929 start = failed_start; 965 start = failed_start;
@@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
935 return err; 971 return err;
936} 972}
937 973
974int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
975{
976 return lock_extent_bits(tree, start, end, 0, NULL, mask);
977}
978
938int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 979int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
939 gfp_t mask) 980 gfp_t mask)
940{ 981{
941 int err; 982 int err;
942 u64 failed_start; 983 u64 failed_start;
943 984
944 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 985 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
945 &failed_start, mask); 986 &failed_start, NULL, mask);
946 if (err == -EEXIST) { 987 if (err == -EEXIST) {
947 if (failed_start > start) 988 if (failed_start > start)
948 clear_extent_bit(tree, start, failed_start - 1, 989 clear_extent_bit(tree, start, failed_start - 1,
949 EXTENT_LOCKED, 1, 0, mask); 990 EXTENT_LOCKED, 1, 0, NULL, mask);
950 return 0; 991 return 0;
951 } 992 }
952 return 1; 993 return 1;
953} 994}
954 995
996int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
997 struct extent_state **cached, gfp_t mask)
998{
999 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
1000 mask);
1001}
1002
955int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1003int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
956 gfp_t mask) 1004 gfp_t mask)
957{ 1005{
958 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 1006 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1007 mask);
959} 1008}
960 1009
961/* 1010/*
@@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
974 page_cache_release(page); 1023 page_cache_release(page);
975 index++; 1024 index++;
976 } 1025 }
977 set_extent_dirty(tree, start, end, GFP_NOFS);
978 return 0; 1026 return 0;
979} 1027}
980 1028
@@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
994 page_cache_release(page); 1042 page_cache_release(page);
995 index++; 1043 index++;
996 } 1044 }
997 set_extent_writeback(tree, start, end, GFP_NOFS);
998 return 0; 1045 return 0;
999} 1046}
1000 1047
@@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode,
1232 u64 delalloc_start; 1279 u64 delalloc_start;
1233 u64 delalloc_end; 1280 u64 delalloc_end;
1234 u64 found; 1281 u64 found;
1282 struct extent_state *cached_state = NULL;
1235 int ret; 1283 int ret;
1236 int loops = 0; 1284 int loops = 0;
1237 1285
@@ -1269,6 +1317,7 @@ again:
1269 /* some of the pages are gone, lets avoid looping by 1317 /* some of the pages are gone, lets avoid looping by
1270 * shortening the size of the delalloc range we're searching 1318 * shortening the size of the delalloc range we're searching
1271 */ 1319 */
1320 free_extent_state(cached_state);
1272 if (!loops) { 1321 if (!loops) {
1273 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); 1322 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1274 max_bytes = PAGE_CACHE_SIZE - offset; 1323 max_bytes = PAGE_CACHE_SIZE - offset;
@@ -1282,18 +1331,21 @@ again:
1282 BUG_ON(ret); 1331 BUG_ON(ret);
1283 1332
1284 /* step three, lock the state bits for the whole range */ 1333 /* step three, lock the state bits for the whole range */
1285 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1334 lock_extent_bits(tree, delalloc_start, delalloc_end,
1335 0, &cached_state, GFP_NOFS);
1286 1336
1287 /* then test to make sure it is all still delalloc */ 1337 /* then test to make sure it is all still delalloc */
1288 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1338 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1289 EXTENT_DELALLOC, 1); 1339 EXTENT_DELALLOC, 1, cached_state);
1290 if (!ret) { 1340 if (!ret) {
1291 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1341 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1342 &cached_state, GFP_NOFS);
1292 __unlock_for_delalloc(inode, locked_page, 1343 __unlock_for_delalloc(inode, locked_page,
1293 delalloc_start, delalloc_end); 1344 delalloc_start, delalloc_end);
1294 cond_resched(); 1345 cond_resched();
1295 goto again; 1346 goto again;
1296 } 1347 }
1348 free_extent_state(cached_state);
1297 *start = delalloc_start; 1349 *start = delalloc_start;
1298 *end = delalloc_end; 1350 *end = delalloc_end;
1299out_failed: 1351out_failed:
@@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1307 int clear_unlock, 1359 int clear_unlock,
1308 int clear_delalloc, int clear_dirty, 1360 int clear_delalloc, int clear_dirty,
1309 int set_writeback, 1361 int set_writeback,
1310 int end_writeback) 1362 int end_writeback,
1363 int set_private2)
1311{ 1364{
1312 int ret; 1365 int ret;
1313 struct page *pages[16]; 1366 struct page *pages[16];
@@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1325 if (clear_delalloc) 1378 if (clear_delalloc)
1326 clear_bits |= EXTENT_DELALLOC; 1379 clear_bits |= EXTENT_DELALLOC;
1327 1380
1328 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); 1381 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1329 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) 1382 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
1383 set_private2))
1330 return 0; 1384 return 0;
1331 1385
1332 while (nr_pages > 0) { 1386 while (nr_pages > 0) {
@@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1334 min_t(unsigned long, 1388 min_t(unsigned long,
1335 nr_pages, ARRAY_SIZE(pages)), pages); 1389 nr_pages, ARRAY_SIZE(pages)), pages);
1336 for (i = 0; i < ret; i++) { 1390 for (i = 0; i < ret; i++) {
1391
1392 if (set_private2)
1393 SetPagePrivate2(pages[i]);
1394
1337 if (pages[i] == locked_page) { 1395 if (pages[i] == locked_page) {
1338 page_cache_release(pages[i]); 1396 page_cache_release(pages[i]);
1339 continue; 1397 continue;
@@ -1476,14 +1534,17 @@ out:
1476 * range is found set. 1534 * range is found set.
1477 */ 1535 */
1478int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1536int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1479 int bits, int filled) 1537 int bits, int filled, struct extent_state *cached)
1480{ 1538{
1481 struct extent_state *state = NULL; 1539 struct extent_state *state = NULL;
1482 struct rb_node *node; 1540 struct rb_node *node;
1483 int bitset = 0; 1541 int bitset = 0;
1484 1542
1485 spin_lock(&tree->lock); 1543 spin_lock(&tree->lock);
1486 node = tree_search(tree, start); 1544 if (cached && cached->tree && cached->start == start)
1545 node = &cached->rb_node;
1546 else
1547 node = tree_search(tree, start);
1487 while (node && start <= end) { 1548 while (node && start <= end) {
1488 state = rb_entry(node, struct extent_state, rb_node); 1549 state = rb_entry(node, struct extent_state, rb_node);
1489 1550
@@ -1503,6 +1564,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1503 bitset = 0; 1564 bitset = 0;
1504 break; 1565 break;
1505 } 1566 }
1567
1568 if (state->end == (u64)-1)
1569 break;
1570
1506 start = state->end + 1; 1571 start = state->end + 1;
1507 if (start > end) 1572 if (start > end)
1508 break; 1573 break;
@@ -1526,7 +1591,7 @@ static int check_page_uptodate(struct extent_io_tree *tree,
1526{ 1591{
1527 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1592 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1528 u64 end = start + PAGE_CACHE_SIZE - 1; 1593 u64 end = start + PAGE_CACHE_SIZE - 1;
1529 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1594 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1530 SetPageUptodate(page); 1595 SetPageUptodate(page);
1531 return 0; 1596 return 0;
1532} 1597}
@@ -1540,7 +1605,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1540{ 1605{
1541 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1606 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1542 u64 end = start + PAGE_CACHE_SIZE - 1; 1607 u64 end = start + PAGE_CACHE_SIZE - 1;
1543 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1608 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1544 unlock_page(page); 1609 unlock_page(page);
1545 return 0; 1610 return 0;
1546} 1611}
@@ -1552,10 +1617,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1552static int check_page_writeback(struct extent_io_tree *tree, 1617static int check_page_writeback(struct extent_io_tree *tree,
1553 struct page *page) 1618 struct page *page)
1554{ 1619{
1555 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1620 end_page_writeback(page);
1556 u64 end = start + PAGE_CACHE_SIZE - 1;
1557 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1558 end_page_writeback(page);
1559 return 0; 1621 return 0;
1560} 1622}
1561 1623
@@ -1613,13 +1675,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1613 } 1675 }
1614 1676
1615 if (!uptodate) { 1677 if (!uptodate) {
1616 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1678 clear_extent_uptodate(tree, start, end, GFP_NOFS);
1617 ClearPageUptodate(page); 1679 ClearPageUptodate(page);
1618 SetPageError(page); 1680 SetPageError(page);
1619 } 1681 }
1620 1682
1621 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1622
1623 if (whole_page) 1683 if (whole_page)
1624 end_page_writeback(page); 1684 end_page_writeback(page);
1625 else 1685 else
@@ -1983,7 +2043,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1983 continue; 2043 continue;
1984 } 2044 }
1985 /* the get_extent function already copied into the page */ 2045 /* the get_extent function already copied into the page */
1986 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 2046 if (test_range_bit(tree, cur, cur_end,
2047 EXTENT_UPTODATE, 1, NULL)) {
1987 check_page_uptodate(tree, page); 2048 check_page_uptodate(tree, page);
1988 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2049 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1989 cur = cur + iosize; 2050 cur = cur + iosize;
@@ -2078,6 +2139,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2078 u64 iosize; 2139 u64 iosize;
2079 u64 unlock_start; 2140 u64 unlock_start;
2080 sector_t sector; 2141 sector_t sector;
2142 struct extent_state *cached_state = NULL;
2081 struct extent_map *em; 2143 struct extent_map *em;
2082 struct block_device *bdev; 2144 struct block_device *bdev;
2083 int ret; 2145 int ret;
@@ -2124,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2124 delalloc_end = 0; 2186 delalloc_end = 0;
2125 page_started = 0; 2187 page_started = 0;
2126 if (!epd->extent_locked) { 2188 if (!epd->extent_locked) {
2189 u64 delalloc_to_write = 0;
2127 /* 2190 /*
2128 * make sure the wbc mapping index is at least updated 2191 * make sure the wbc mapping index is at least updated
2129 * to this page. 2192 * to this page.
@@ -2143,8 +2206,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2143 tree->ops->fill_delalloc(inode, page, delalloc_start, 2206 tree->ops->fill_delalloc(inode, page, delalloc_start,
2144 delalloc_end, &page_started, 2207 delalloc_end, &page_started,
2145 &nr_written); 2208 &nr_written);
2209 /*
2210 * delalloc_end is already one less than the total
2211 * length, so we don't subtract one from
2212 * PAGE_CACHE_SIZE
2213 */
2214 delalloc_to_write += (delalloc_end - delalloc_start +
2215 PAGE_CACHE_SIZE) >>
2216 PAGE_CACHE_SHIFT;
2146 delalloc_start = delalloc_end + 1; 2217 delalloc_start = delalloc_end + 1;
2147 } 2218 }
2219 if (wbc->nr_to_write < delalloc_to_write) {
2220 int thresh = 8192;
2221
2222 if (delalloc_to_write < thresh * 2)
2223 thresh = delalloc_to_write;
2224 wbc->nr_to_write = min_t(u64, delalloc_to_write,
2225 thresh);
2226 }
2148 2227
2149 /* did the fill delalloc function already unlock and start 2228 /* did the fill delalloc function already unlock and start
2150 * the IO? 2229 * the IO?
@@ -2160,15 +2239,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2160 goto done_unlocked; 2239 goto done_unlocked;
2161 } 2240 }
2162 } 2241 }
2163 lock_extent(tree, start, page_end, GFP_NOFS);
2164
2165 unlock_start = start;
2166
2167 if (tree->ops && tree->ops->writepage_start_hook) { 2242 if (tree->ops && tree->ops->writepage_start_hook) {
2168 ret = tree->ops->writepage_start_hook(page, start, 2243 ret = tree->ops->writepage_start_hook(page, start,
2169 page_end); 2244 page_end);
2170 if (ret == -EAGAIN) { 2245 if (ret == -EAGAIN) {
2171 unlock_extent(tree, start, page_end, GFP_NOFS);
2172 redirty_page_for_writepage(wbc, page); 2246 redirty_page_for_writepage(wbc, page);
2173 update_nr_written(page, wbc, nr_written); 2247 update_nr_written(page, wbc, nr_written);
2174 unlock_page(page); 2248 unlock_page(page);
@@ -2184,12 +2258,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2184 update_nr_written(page, wbc, nr_written + 1); 2258 update_nr_written(page, wbc, nr_written + 1);
2185 2259
2186 end = page_end; 2260 end = page_end;
2187 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
2188 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
2189
2190 if (last_byte <= start) { 2261 if (last_byte <= start) {
2191 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2192 unlock_extent(tree, start, page_end, GFP_NOFS);
2193 if (tree->ops && tree->ops->writepage_end_io_hook) 2262 if (tree->ops && tree->ops->writepage_end_io_hook)
2194 tree->ops->writepage_end_io_hook(page, start, 2263 tree->ops->writepage_end_io_hook(page, start,
2195 page_end, NULL, 1); 2264 page_end, NULL, 1);
@@ -2197,13 +2266,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2197 goto done; 2266 goto done;
2198 } 2267 }
2199 2268
2200 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2201 blocksize = inode->i_sb->s_blocksize; 2269 blocksize = inode->i_sb->s_blocksize;
2202 2270
2203 while (cur <= end) { 2271 while (cur <= end) {
2204 if (cur >= last_byte) { 2272 if (cur >= last_byte) {
2205 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2206 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2207 if (tree->ops && tree->ops->writepage_end_io_hook) 2273 if (tree->ops && tree->ops->writepage_end_io_hook)
2208 tree->ops->writepage_end_io_hook(page, cur, 2274 tree->ops->writepage_end_io_hook(page, cur,
2209 page_end, NULL, 1); 2275 page_end, NULL, 1);
@@ -2235,12 +2301,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2235 */ 2301 */
2236 if (compressed || block_start == EXTENT_MAP_HOLE || 2302 if (compressed || block_start == EXTENT_MAP_HOLE ||
2237 block_start == EXTENT_MAP_INLINE) { 2303 block_start == EXTENT_MAP_INLINE) {
2238 clear_extent_dirty(tree, cur,
2239 cur + iosize - 1, GFP_NOFS);
2240
2241 unlock_extent(tree, unlock_start, cur + iosize - 1,
2242 GFP_NOFS);
2243
2244 /* 2304 /*
2245 * end_io notification does not happen here for 2305 * end_io notification does not happen here for
2246 * compressed extents 2306 * compressed extents
@@ -2265,13 +2325,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2265 } 2325 }
2266 /* leave this out until we have a page_mkwrite call */ 2326 /* leave this out until we have a page_mkwrite call */
2267 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 2327 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2268 EXTENT_DIRTY, 0)) { 2328 EXTENT_DIRTY, 0, NULL)) {
2269 cur = cur + iosize; 2329 cur = cur + iosize;
2270 pg_offset += iosize; 2330 pg_offset += iosize;
2271 continue; 2331 continue;
2272 } 2332 }
2273 2333
2274 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2275 if (tree->ops && tree->ops->writepage_io_hook) { 2334 if (tree->ops && tree->ops->writepage_io_hook) {
2276 ret = tree->ops->writepage_io_hook(page, cur, 2335 ret = tree->ops->writepage_io_hook(page, cur,
2277 cur + iosize - 1); 2336 cur + iosize - 1);
@@ -2309,12 +2368,12 @@ done:
2309 set_page_writeback(page); 2368 set_page_writeback(page);
2310 end_page_writeback(page); 2369 end_page_writeback(page);
2311 } 2370 }
2312 if (unlock_start <= page_end)
2313 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2314 unlock_page(page); 2371 unlock_page(page);
2315 2372
2316done_unlocked: 2373done_unlocked:
2317 2374
2375 /* drop our reference on any cached states */
2376 free_extent_state(cached_state);
2318 return 0; 2377 return 0;
2319} 2378}
2320 2379
@@ -2339,9 +2398,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2339 writepage_t writepage, void *data, 2398 writepage_t writepage, void *data,
2340 void (*flush_fn)(void *)) 2399 void (*flush_fn)(void *))
2341{ 2400{
2342 struct backing_dev_info *bdi = mapping->backing_dev_info;
2343 int ret = 0; 2401 int ret = 0;
2344 int done = 0; 2402 int done = 0;
2403 int nr_to_write_done = 0;
2345 struct pagevec pvec; 2404 struct pagevec pvec;
2346 int nr_pages; 2405 int nr_pages;
2347 pgoff_t index; 2406 pgoff_t index;
@@ -2361,7 +2420,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2361 scanned = 1; 2420 scanned = 1;
2362 } 2421 }
2363retry: 2422retry:
2364 while (!done && (index <= end) && 2423 while (!done && !nr_to_write_done && (index <= end) &&
2365 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2424 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
2366 PAGECACHE_TAG_DIRTY, min(end - index, 2425 PAGECACHE_TAG_DIRTY, min(end - index,
2367 (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 2426 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2412,12 +2471,15 @@ retry:
2412 unlock_page(page); 2471 unlock_page(page);
2413 ret = 0; 2472 ret = 0;
2414 } 2473 }
2415 if (ret || wbc->nr_to_write <= 0) 2474 if (ret)
2416 done = 1;
2417 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2418 wbc->encountered_congestion = 1;
2419 done = 1; 2475 done = 1;
2420 } 2476
2477 /*
2478 * the filesystem may choose to bump up nr_to_write.
2479 * We have to make sure to honor the new nr_to_write
2480 * at any time
2481 */
2482 nr_to_write_done = wbc->nr_to_write <= 0;
2421 } 2483 }
2422 pagevec_release(&pvec); 2484 pagevec_release(&pvec);
2423 cond_resched(); 2485 cond_resched();
@@ -2604,10 +2666,10 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2604 return 0; 2666 return 0;
2605 2667
2606 lock_extent(tree, start, end, GFP_NOFS); 2668 lock_extent(tree, start, end, GFP_NOFS);
2607 wait_on_extent_writeback(tree, start, end); 2669 wait_on_page_writeback(page);
2608 clear_extent_bit(tree, start, end, 2670 clear_extent_bit(tree, start, end,
2609 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2671 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2610 1, 1, GFP_NOFS); 2672 1, 1, NULL, GFP_NOFS);
2611 return 0; 2673 return 0;
2612} 2674}
2613 2675
@@ -2687,7 +2749,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
2687 !isnew && !PageUptodate(page) && 2749 !isnew && !PageUptodate(page) &&
2688 (block_off_end > to || block_off_start < from) && 2750 (block_off_end > to || block_off_start < from) &&
2689 !test_range_bit(tree, block_start, cur_end, 2751 !test_range_bit(tree, block_start, cur_end,
2690 EXTENT_UPTODATE, 1)) { 2752 EXTENT_UPTODATE, 1, NULL)) {
2691 u64 sector; 2753 u64 sector;
2692 u64 extent_offset = block_start - em->start; 2754 u64 extent_offset = block_start - em->start;
2693 size_t iosize; 2755 size_t iosize;
@@ -2701,7 +2763,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
2701 */ 2763 */
2702 set_extent_bit(tree, block_start, 2764 set_extent_bit(tree, block_start,
2703 block_start + iosize - 1, 2765 block_start + iosize - 1,
2704 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 2766 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
2705 ret = submit_extent_page(READ, tree, page, 2767 ret = submit_extent_page(READ, tree, page,
2706 sector, iosize, page_offset, em->bdev, 2768 sector, iosize, page_offset, em->bdev,
2707 NULL, 1, 2769 NULL, 1,
@@ -2742,13 +2804,18 @@ int try_release_extent_state(struct extent_map_tree *map,
2742 int ret = 1; 2804 int ret = 1;
2743 2805
2744 if (test_range_bit(tree, start, end, 2806 if (test_range_bit(tree, start, end,
2745 EXTENT_IOBITS | EXTENT_ORDERED, 0)) 2807 EXTENT_IOBITS, 0, NULL))
2746 ret = 0; 2808 ret = 0;
2747 else { 2809 else {
2748 if ((mask & GFP_NOFS) == GFP_NOFS) 2810 if ((mask & GFP_NOFS) == GFP_NOFS)
2749 mask = GFP_NOFS; 2811 mask = GFP_NOFS;
2750 clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 2812 /*
2751 1, 1, mask); 2813 * at this point we can safely clear everything except the
2814 * locked bit and the nodatasum bit
2815 */
2816 clear_extent_bit(tree, start, end,
2817 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2818 0, 0, NULL, mask);
2752 } 2819 }
2753 return ret; 2820 return ret;
2754} 2821}
@@ -2771,29 +2838,28 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2771 u64 len; 2838 u64 len;
2772 while (start <= end) { 2839 while (start <= end) {
2773 len = end - start + 1; 2840 len = end - start + 1;
2774 spin_lock(&map->lock); 2841 write_lock(&map->lock);
2775 em = lookup_extent_mapping(map, start, len); 2842 em = lookup_extent_mapping(map, start, len);
2776 if (!em || IS_ERR(em)) { 2843 if (!em || IS_ERR(em)) {
2777 spin_unlock(&map->lock); 2844 write_unlock(&map->lock);
2778 break; 2845 break;
2779 } 2846 }
2780 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || 2847 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2781 em->start != start) { 2848 em->start != start) {
2782 spin_unlock(&map->lock); 2849 write_unlock(&map->lock);
2783 free_extent_map(em); 2850 free_extent_map(em);
2784 break; 2851 break;
2785 } 2852 }
2786 if (!test_range_bit(tree, em->start, 2853 if (!test_range_bit(tree, em->start,
2787 extent_map_end(em) - 1, 2854 extent_map_end(em) - 1,
2788 EXTENT_LOCKED | EXTENT_WRITEBACK | 2855 EXTENT_LOCKED | EXTENT_WRITEBACK,
2789 EXTENT_ORDERED, 2856 0, NULL)) {
2790 0)) {
2791 remove_extent_mapping(map, em); 2857 remove_extent_mapping(map, em);
2792 /* once for the rb tree */ 2858 /* once for the rb tree */
2793 free_extent_map(em); 2859 free_extent_map(em);
2794 } 2860 }
2795 start = extent_map_end(em); 2861 start = extent_map_end(em);
2796 spin_unlock(&map->lock); 2862 write_unlock(&map->lock);
2797 2863
2798 /* once for us */ 2864 /* once for us */
2799 free_extent_map(em); 2865 free_extent_map(em);
@@ -3203,7 +3269,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3203 int uptodate; 3269 int uptodate;
3204 unsigned long index; 3270 unsigned long index;
3205 3271
3206 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); 3272 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
3207 if (ret) 3273 if (ret)
3208 return 1; 3274 return 1;
3209 while (start <= end) { 3275 while (start <= end) {
@@ -3233,7 +3299,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
3233 return 1; 3299 return 1;
3234 3300
3235 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3301 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3236 EXTENT_UPTODATE, 1); 3302 EXTENT_UPTODATE, 1, NULL);
3237 if (ret) 3303 if (ret)
3238 return ret; 3304 return ret;
3239 3305
@@ -3269,7 +3335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3269 return 0; 3335 return 0;
3270 3336
3271 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3337 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3272 EXTENT_UPTODATE, 1)) { 3338 EXTENT_UPTODATE, 1, NULL)) {
3273 return 0; 3339 return 0;
3274 } 3340 }
3275 3341
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5bc20abf3f3d..14ed16fd862d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -13,10 +13,8 @@
13#define EXTENT_DEFRAG (1 << 6) 13#define EXTENT_DEFRAG (1 << 6)
14#define EXTENT_DEFRAG_DONE (1 << 7) 14#define EXTENT_DEFRAG_DONE (1 << 7)
15#define EXTENT_BUFFER_FILLED (1 << 8) 15#define EXTENT_BUFFER_FILLED (1 << 8)
16#define EXTENT_ORDERED (1 << 9) 16#define EXTENT_BOUNDARY (1 << 9)
17#define EXTENT_ORDERED_METADATA (1 << 10) 17#define EXTENT_NODATASUM (1 << 10)
18#define EXTENT_BOUNDARY (1 << 11)
19#define EXTENT_NODATASUM (1 << 12)
20#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 18#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
21 19
22/* flags for bio submission */ 20/* flags for bio submission */
@@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map,
142 struct extent_io_tree *tree, struct page *page, 140 struct extent_io_tree *tree, struct page *page,
143 gfp_t mask); 141 gfp_t mask);
144int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 142int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
143int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
144 int bits, struct extent_state **cached, gfp_t mask);
145int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 145int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
146int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 146int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
147 gfp_t mask); 147 gfp_t mask);
@@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree,
155 u64 max_bytes, unsigned long bits); 155 u64 max_bytes, unsigned long bits);
156 156
157int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 157int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
158 int bits, int filled); 158 int bits, int filled, struct extent_state *cached_state);
159int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 159int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
160 int bits, gfp_t mask); 160 int bits, gfp_t mask);
161int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 161int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
162 int bits, int wake, int delete, gfp_t mask); 162 int bits, int wake, int delete, struct extent_state **cached,
163 gfp_t mask);
163int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 164int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
164 int bits, gfp_t mask); 165 int bits, gfp_t mask);
165int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 166int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
@@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
282 int clear_unlock, 283 int clear_unlock,
283 int clear_delalloc, int clear_dirty, 284 int clear_delalloc, int clear_dirty,
284 int set_writeback, 285 int set_writeback,
285 int end_writeback); 286 int end_writeback,
287 int set_private2);
286#endif 288#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 30c9365861e6..2c726b7b9faa 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -36,7 +36,7 @@ void extent_map_exit(void)
36void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) 36void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
37{ 37{
38 tree->map.rb_node = NULL; 38 tree->map.rb_node = NULL;
39 spin_lock_init(&tree->lock); 39 rwlock_init(&tree->lock);
40} 40}
41 41
42/** 42/**
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
198 return 0; 198 return 0;
199} 199}
200 200
201int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
202{
203 int ret = 0;
204 struct extent_map *merge = NULL;
205 struct rb_node *rb;
206 struct extent_map *em;
207
208 write_lock(&tree->lock);
209 em = lookup_extent_mapping(tree, start, len);
210
211 WARN_ON(em->start != start || !em);
212
213 if (!em)
214 goto out;
215
216 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
217
218 if (em->start != 0) {
219 rb = rb_prev(&em->rb_node);
220 if (rb)
221 merge = rb_entry(rb, struct extent_map, rb_node);
222 if (rb && mergable_maps(merge, em)) {
223 em->start = merge->start;
224 em->len += merge->len;
225 em->block_len += merge->block_len;
226 em->block_start = merge->block_start;
227 merge->in_tree = 0;
228 rb_erase(&merge->rb_node, &tree->map);
229 free_extent_map(merge);
230 }
231 }
232
233 rb = rb_next(&em->rb_node);
234 if (rb)
235 merge = rb_entry(rb, struct extent_map, rb_node);
236 if (rb && mergable_maps(em, merge)) {
237 em->len += merge->len;
238 em->block_len += merge->len;
239 rb_erase(&merge->rb_node, &tree->map);
240 merge->in_tree = 0;
241 free_extent_map(merge);
242 }
243
244 free_extent_map(em);
245out:
246 write_unlock(&tree->lock);
247 return ret;
248
249}
250
201/** 251/**
202 * add_extent_mapping - add new extent map to the extent tree 252 * add_extent_mapping - add new extent map to the extent tree
203 * @tree: tree to insert new map in 253 * @tree: tree to insert new map in
@@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
222 ret = -EEXIST; 272 ret = -EEXIST;
223 goto out; 273 goto out;
224 } 274 }
225 assert_spin_locked(&tree->lock);
226 rb = tree_insert(&tree->map, em->start, &em->rb_node); 275 rb = tree_insert(&tree->map, em->start, &em->rb_node);
227 if (rb) { 276 if (rb) {
228 ret = -EEXIST; 277 ret = -EEXIST;
@@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
285 struct rb_node *next = NULL; 334 struct rb_node *next = NULL;
286 u64 end = range_end(start, len); 335 u64 end = range_end(start, len);
287 336
288 assert_spin_locked(&tree->lock);
289 rb_node = __tree_search(&tree->map, start, &prev, &next); 337 rb_node = __tree_search(&tree->map, start, &prev, &next);
290 if (!rb_node && prev) { 338 if (!rb_node && prev) {
291 em = rb_entry(prev, struct extent_map, rb_node); 339 em = rb_entry(prev, struct extent_map, rb_node);
@@ -319,6 +367,54 @@ out:
319} 367}
320 368
321/** 369/**
370 * search_extent_mapping - find a nearby extent map
371 * @tree: tree to lookup in
372 * @start: byte offset to start the search
373 * @len: length of the lookup range
374 *
375 * Find and return the first extent_map struct in @tree that intersects the
376 * [start, len] range.
377 *
378 * If one can't be found, any nearby extent may be returned
379 */
380struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
381 u64 start, u64 len)
382{
383 struct extent_map *em;
384 struct rb_node *rb_node;
385 struct rb_node *prev = NULL;
386 struct rb_node *next = NULL;
387
388 rb_node = __tree_search(&tree->map, start, &prev, &next);
389 if (!rb_node && prev) {
390 em = rb_entry(prev, struct extent_map, rb_node);
391 goto found;
392 }
393 if (!rb_node && next) {
394 em = rb_entry(next, struct extent_map, rb_node);
395 goto found;
396 }
397 if (!rb_node) {
398 em = NULL;
399 goto out;
400 }
401 if (IS_ERR(rb_node)) {
402 em = ERR_PTR(PTR_ERR(rb_node));
403 goto out;
404 }
405 em = rb_entry(rb_node, struct extent_map, rb_node);
406 goto found;
407
408 em = NULL;
409 goto out;
410
411found:
412 atomic_inc(&em->refs);
413out:
414 return em;
415}
416
417/**
322 * remove_extent_mapping - removes an extent_map from the extent tree 418 * remove_extent_mapping - removes an extent_map from the extent tree
323 * @tree: extent tree to remove from 419 * @tree: extent tree to remove from
324 * @em: extent map beeing removed 420 * @em: extent map beeing removed
@@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
331 int ret = 0; 427 int ret = 0;
332 428
333 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); 429 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
334 assert_spin_locked(&tree->lock);
335 rb_erase(&em->rb_node, &tree->map); 430 rb_erase(&em->rb_node, &tree->map);
336 em->in_tree = 0; 431 em->in_tree = 0;
337 return ret; 432 return ret;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index fb6eeef06bb0..ab6d74b6e647 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -31,7 +31,7 @@ struct extent_map {
31 31
32struct extent_map_tree { 32struct extent_map_tree {
33 struct rb_root map; 33 struct rb_root map;
34 spinlock_t lock; 34 rwlock_t lock;
35}; 35};
36 36
37static inline u64 extent_map_end(struct extent_map *em) 37static inline u64 extent_map_end(struct extent_map *em)
@@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask);
59void free_extent_map(struct extent_map *em); 59void free_extent_map(struct extent_map *em);
60int __init extent_map_init(void); 60int __init extent_map_init(void);
61void extent_map_exit(void); 61void extent_map_exit(void);
62int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
63struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
64 u64 start, u64 len);
62#endif 65#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4b833972273a..571ad3c13b47 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
112 int err = 0; 112 int err = 0;
113 int i; 113 int i;
114 struct inode *inode = fdentry(file)->d_inode; 114 struct inode *inode = fdentry(file)->d_inode;
115 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
116 u64 hint_byte;
117 u64 num_bytes; 115 u64 num_bytes;
118 u64 start_pos; 116 u64 start_pos;
119 u64 end_of_last_block; 117 u64 end_of_last_block;
@@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
125 root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
126 124
127 end_of_last_block = start_pos + num_bytes - 1; 125 end_of_last_block = start_pos + num_bytes - 1;
128
129 lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
130 trans = btrfs_join_transaction(root, 1);
131 if (!trans) {
132 err = -ENOMEM;
133 goto out_unlock;
134 }
135 btrfs_set_trans_block_group(trans, inode);
136 hint_byte = 0;
137
138 set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
139
140 /* check for reserved extents on each page, we don't want
141 * to reset the delalloc bit on things that already have
142 * extents reserved.
143 */
144 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); 126 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
145 for (i = 0; i < num_pages; i++) { 127 for (i = 0; i < num_pages; i++) {
146 struct page *p = pages[i]; 128 struct page *p = pages[i];
@@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
155 * at this time. 137 * at this time.
156 */ 138 */
157 } 139 }
158 err = btrfs_end_transaction(trans, root);
159out_unlock:
160 unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
161 return err; 140 return err;
162} 141}
163 142
@@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
189 if (!split2) 168 if (!split2)
190 split2 = alloc_extent_map(GFP_NOFS); 169 split2 = alloc_extent_map(GFP_NOFS);
191 170
192 spin_lock(&em_tree->lock); 171 write_lock(&em_tree->lock);
193 em = lookup_extent_mapping(em_tree, start, len); 172 em = lookup_extent_mapping(em_tree, start, len);
194 if (!em) { 173 if (!em) {
195 spin_unlock(&em_tree->lock); 174 write_unlock(&em_tree->lock);
196 break; 175 break;
197 } 176 }
198 flags = em->flags; 177 flags = em->flags;
199 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 178 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
200 spin_unlock(&em_tree->lock);
201 if (em->start <= start && 179 if (em->start <= start &&
202 (!testend || em->start + em->len >= start + len)) { 180 (!testend || em->start + em->len >= start + len)) {
203 free_extent_map(em); 181 free_extent_map(em);
182 write_unlock(&em_tree->lock);
204 break; 183 break;
205 } 184 }
206 if (start < em->start) { 185 if (start < em->start) {
@@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
210 start = em->start + em->len; 189 start = em->start + em->len;
211 } 190 }
212 free_extent_map(em); 191 free_extent_map(em);
192 write_unlock(&em_tree->lock);
213 continue; 193 continue;
214 } 194 }
215 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 195 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
260 free_extent_map(split); 240 free_extent_map(split);
261 split = NULL; 241 split = NULL;
262 } 242 }
263 spin_unlock(&em_tree->lock); 243 write_unlock(&em_tree->lock);
264 244
265 /* once for us */ 245 /* once for us */
266 free_extent_map(em); 246 free_extent_map(em);
@@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
289noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 269noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
290 struct btrfs_root *root, struct inode *inode, 270 struct btrfs_root *root, struct inode *inode,
291 u64 start, u64 end, u64 locked_end, 271 u64 start, u64 end, u64 locked_end,
292 u64 inline_limit, u64 *hint_byte) 272 u64 inline_limit, u64 *hint_byte, int drop_cache)
293{ 273{
294 u64 extent_end = 0; 274 u64 extent_end = 0;
295 u64 search_start = start; 275 u64 search_start = start;
@@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
314 int ret; 294 int ret;
315 295
316 inline_limit = 0; 296 inline_limit = 0;
317 btrfs_drop_extent_cache(inode, start, end - 1, 0); 297 if (drop_cache)
298 btrfs_drop_extent_cache(inode, start, end - 1, 0);
318 299
319 path = btrfs_alloc_path(); 300 path = btrfs_alloc_path();
320 if (!path) 301 if (!path)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5edcee3a617f..5c2caad76212 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
259 259
260static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 260static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
261{ 261{
262 u64 max_bytes, possible_bytes; 262 u64 max_bytes;
263 u64 bitmap_bytes;
264 u64 extent_bytes;
263 265
264 /* 266 /*
265 * The goal is to keep the total amount of memory used per 1gb of space 267 * The goal is to keep the total amount of memory used per 1gb of space
@@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
269 max_bytes = MAX_CACHE_BYTES_PER_GIG * 271 max_bytes = MAX_CACHE_BYTES_PER_GIG *
270 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 272 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
271 273
272 possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + 274 /*
273 (sizeof(struct btrfs_free_space) * 275 * we want to account for 1 more bitmap than what we have so we can make
274 block_group->extents_thresh); 276 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
277 * we add more bitmaps.
278 */
279 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
275 280
276 if (possible_bytes > max_bytes) { 281 if (bitmap_bytes >= max_bytes) {
277 int extent_bytes = max_bytes - 282 block_group->extents_thresh = 0;
278 (block_group->total_bitmaps * PAGE_CACHE_SIZE); 283 return;
284 }
279 285
280 if (extent_bytes <= 0) { 286 /*
281 block_group->extents_thresh = 0; 287 * we want the extent entry threshold to always be at most 1/2 the maxw
282 return; 288 * bytes we can have, or whatever is less than that.
283 } 289 */
290 extent_bytes = max_bytes - bitmap_bytes;
291 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
284 292
285 block_group->extents_thresh = extent_bytes / 293 block_group->extents_thresh =
286 (sizeof(struct btrfs_free_space)); 294 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
287 }
288} 295}
289 296
290static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 297static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
@@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
403 BUG_ON(block_group->total_bitmaps >= max_bitmaps); 410 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
404 411
405 info->offset = offset_to_bitmap(block_group, offset); 412 info->offset = offset_to_bitmap(block_group, offset);
413 info->bytes = 0;
406 link_free_space(block_group, info); 414 link_free_space(block_group, info);
407 block_group->total_bitmaps++; 415 block_group->total_bitmaps++;
408 416
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 6b627c611808..72ce3c173d6a 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
149 ptr = (unsigned long)(ref + 1); 149 ptr = (unsigned long)(ref + 1);
150 ret = 0; 150 ret = 0;
151 } else if (ret < 0) { 151 } else if (ret < 0) {
152 if (ret == -EOVERFLOW)
153 ret = -EMLINK;
152 goto out; 154 goto out;
153 } else { 155 } else {
154 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 156 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
177 179
178 ret = btrfs_insert_empty_item(trans, root, path, &key, 180 ret = btrfs_insert_empty_item(trans, root, path, &key,
179 sizeof(struct btrfs_inode_item)); 181 sizeof(struct btrfs_inode_item));
180 if (ret == 0 && objectid > root->highest_inode)
181 root->highest_inode = objectid;
182 return ret; 182 return ret;
183} 183}
184 184
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 9abbced1123d..c56eb5909172 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
43 slot = path->slots[0] - 1; 43 slot = path->slots[0] - 1;
44 l = path->nodes[0]; 44 l = path->nodes[0];
45 btrfs_item_key_to_cpu(l, &found_key, slot); 45 btrfs_item_key_to_cpu(l, &found_key, slot);
46 *objectid = found_key.objectid; 46 *objectid = max_t(u64, found_key.objectid,
47 BTRFS_FIRST_FREE_OBJECTID - 1);
47 } else { 48 } else {
48 *objectid = BTRFS_FIRST_FREE_OBJECTID; 49 *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
49 } 50 }
50 ret = 0; 51 ret = 0;
51error: 52error:
@@ -53,91 +54,27 @@ error:
53 return ret; 54 return ret;
54} 55}
55 56
56/*
57 * walks the btree of allocated inodes and find a hole.
58 */
59int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, 57int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root, 58 struct btrfs_root *root,
61 u64 dirid, u64 *objectid) 59 u64 dirid, u64 *objectid)
62{ 60{
63 struct btrfs_path *path;
64 struct btrfs_key key;
65 int ret; 61 int ret;
66 int slot = 0;
67 u64 last_ino = 0;
68 int start_found;
69 struct extent_buffer *l;
70 struct btrfs_key search_key;
71 u64 search_start = dirid;
72
73 mutex_lock(&root->objectid_mutex); 62 mutex_lock(&root->objectid_mutex);
74 if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID &&
75 root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) {
76 *objectid = ++root->last_inode_alloc;
77 mutex_unlock(&root->objectid_mutex);
78 return 0;
79 }
80 path = btrfs_alloc_path();
81 BUG_ON(!path);
82 search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
83 search_key.objectid = search_start;
84 search_key.type = 0;
85 search_key.offset = 0;
86
87 start_found = 0;
88 ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
89 if (ret < 0)
90 goto error;
91 63
92 while (1) { 64 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
93 l = path->nodes[0]; 65 ret = btrfs_find_highest_inode(root, &root->highest_objectid);
94 slot = path->slots[0]; 66 if (ret)
95 if (slot >= btrfs_header_nritems(l)) { 67 goto out;
96 ret = btrfs_next_leaf(root, path); 68 }
97 if (ret == 0)
98 continue;
99 if (ret < 0)
100 goto error;
101 if (!start_found) {
102 *objectid = search_start;
103 start_found = 1;
104 goto found;
105 }
106 *objectid = last_ino > search_start ?
107 last_ino : search_start;
108 goto found;
109 }
110 btrfs_item_key_to_cpu(l, &key, slot);
111 if (key.objectid >= search_start) {
112 if (start_found) {
113 if (last_ino < search_start)
114 last_ino = search_start;
115 if (key.objectid > last_ino) {
116 *objectid = last_ino;
117 goto found;
118 }
119 } else if (key.objectid > search_start) {
120 *objectid = search_start;
121 goto found;
122 }
123 }
124 if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
125 break;
126 69
127 start_found = 1; 70 if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
128 last_ino = key.objectid + 1; 71 ret = -ENOSPC;
129 path->slots[0]++; 72 goto out;
130 } 73 }
131 BUG_ON(1); 74
132found: 75 *objectid = ++root->highest_objectid;
133 btrfs_release_path(root, path); 76 ret = 0;
134 btrfs_free_path(path); 77out:
135 BUG_ON(*objectid < search_start);
136 mutex_unlock(&root->objectid_mutex);
137 return 0;
138error:
139 btrfs_release_path(root, path);
140 btrfs_free_path(path);
141 mutex_unlock(&root->objectid_mutex); 78 mutex_unlock(&root->objectid_mutex);
142 return ret; 79 return ret;
143} 80}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9096fd0ca3ca..e9b76bcd1c12 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
231 } 231 }
232 232
233 ret = btrfs_drop_extents(trans, root, inode, start, 233 ret = btrfs_drop_extents(trans, root, inode, start,
234 aligned_end, aligned_end, start, &hint_byte); 234 aligned_end, aligned_end, start,
235 &hint_byte, 1);
235 BUG_ON(ret); 236 BUG_ON(ret);
236 237
237 if (isize > actual_end) 238 if (isize > actual_end)
@@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
240 inline_len, compressed_size, 241 inline_len, compressed_size,
241 compressed_pages); 242 compressed_pages);
242 BUG_ON(ret); 243 BUG_ON(ret);
243 btrfs_drop_extent_cache(inode, start, aligned_end, 0); 244 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
244 return 0; 245 return 0;
245} 246}
246 247
@@ -425,7 +426,7 @@ again:
425 extent_clear_unlock_delalloc(inode, 426 extent_clear_unlock_delalloc(inode,
426 &BTRFS_I(inode)->io_tree, 427 &BTRFS_I(inode)->io_tree,
427 start, end, NULL, 1, 0, 428 start, end, NULL, 1, 0,
428 0, 1, 1, 1); 429 0, 1, 1, 1, 0);
429 ret = 0; 430 ret = 0;
430 goto free_pages_out; 431 goto free_pages_out;
431 } 432 }
@@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode,
611 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 612 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
612 613
613 while (1) { 614 while (1) {
614 spin_lock(&em_tree->lock); 615 write_lock(&em_tree->lock);
615 ret = add_extent_mapping(em_tree, em); 616 ret = add_extent_mapping(em_tree, em);
616 spin_unlock(&em_tree->lock); 617 write_unlock(&em_tree->lock);
617 if (ret != -EEXIST) { 618 if (ret != -EEXIST) {
618 free_extent_map(em); 619 free_extent_map(em);
619 break; 620 break;
@@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
640 async_extent->start, 641 async_extent->start,
641 async_extent->start + 642 async_extent->start +
642 async_extent->ram_size - 1, 643 async_extent->ram_size - 1,
643 NULL, 1, 1, 0, 1, 1, 0); 644 NULL, 1, 1, 0, 1, 1, 0, 0);
644 645
645 ret = btrfs_submit_compressed_write(inode, 646 ret = btrfs_submit_compressed_write(inode,
646 async_extent->start, 647 async_extent->start,
@@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode,
713 extent_clear_unlock_delalloc(inode, 714 extent_clear_unlock_delalloc(inode,
714 &BTRFS_I(inode)->io_tree, 715 &BTRFS_I(inode)->io_tree,
715 start, end, NULL, 1, 1, 716 start, end, NULL, 1, 1,
716 1, 1, 1, 1); 717 1, 1, 1, 1, 0);
717 *nr_written = *nr_written + 718 *nr_written = *nr_written +
718 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 719 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
719 *page_started = 1; 720 *page_started = 1;
@@ -725,6 +726,15 @@ static noinline int cow_file_range(struct inode *inode,
725 BUG_ON(disk_num_bytes > 726 BUG_ON(disk_num_bytes >
726 btrfs_super_total_bytes(&root->fs_info->super_copy)); 727 btrfs_super_total_bytes(&root->fs_info->super_copy));
727 728
729
730 read_lock(&BTRFS_I(inode)->extent_tree.lock);
731 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
732 start, num_bytes);
733 if (em) {
734 alloc_hint = em->block_start;
735 free_extent_map(em);
736 }
737 read_unlock(&BTRFS_I(inode)->extent_tree.lock);
728 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 738 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
729 739
730 while (disk_num_bytes > 0) { 740 while (disk_num_bytes > 0) {
@@ -737,7 +747,6 @@ static noinline int cow_file_range(struct inode *inode,
737 em = alloc_extent_map(GFP_NOFS); 747 em = alloc_extent_map(GFP_NOFS);
738 em->start = start; 748 em->start = start;
739 em->orig_start = em->start; 749 em->orig_start = em->start;
740
741 ram_size = ins.offset; 750 ram_size = ins.offset;
742 em->len = ins.offset; 751 em->len = ins.offset;
743 752
@@ -747,9 +756,9 @@ static noinline int cow_file_range(struct inode *inode,
747 set_bit(EXTENT_FLAG_PINNED, &em->flags); 756 set_bit(EXTENT_FLAG_PINNED, &em->flags);
748 757
749 while (1) { 758 while (1) {
750 spin_lock(&em_tree->lock); 759 write_lock(&em_tree->lock);
751 ret = add_extent_mapping(em_tree, em); 760 ret = add_extent_mapping(em_tree, em);
752 spin_unlock(&em_tree->lock); 761 write_unlock(&em_tree->lock);
753 if (ret != -EEXIST) { 762 if (ret != -EEXIST) {
754 free_extent_map(em); 763 free_extent_map(em);
755 break; 764 break;
@@ -776,11 +785,14 @@ static noinline int cow_file_range(struct inode *inode,
776 /* we're not doing compressed IO, don't unlock the first 785 /* we're not doing compressed IO, don't unlock the first
777 * page (which the caller expects to stay locked), don't 786 * page (which the caller expects to stay locked), don't
778 * clear any dirty bits and don't set any writeback bits 787 * clear any dirty bits and don't set any writeback bits
788 *
789 * Do set the Private2 bit so we know this page was properly
790 * setup for writepage
779 */ 791 */
780 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 792 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
781 start, start + ram_size - 1, 793 start, start + ram_size - 1,
782 locked_page, unlock, 1, 794 locked_page, unlock, 1,
783 1, 0, 0, 0); 795 1, 0, 0, 0, 1);
784 disk_num_bytes -= cur_alloc_size; 796 disk_num_bytes -= cur_alloc_size;
785 num_bytes -= cur_alloc_size; 797 num_bytes -= cur_alloc_size;
786 alloc_hint = ins.objectid + ins.offset; 798 alloc_hint = ins.objectid + ins.offset;
@@ -853,7 +865,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
853 int limit = 10 * 1024 * 1042; 865 int limit = 10 * 1024 * 1042;
854 866
855 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | 867 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
856 EXTENT_DELALLOC, 1, 0, GFP_NOFS); 868 EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
857 while (start < end) { 869 while (start < end) {
858 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 870 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
859 async_cow->inode = inode; 871 async_cow->inode = inode;
@@ -1080,9 +1092,9 @@ out_check:
1080 em->bdev = root->fs_info->fs_devices->latest_bdev; 1092 em->bdev = root->fs_info->fs_devices->latest_bdev;
1081 set_bit(EXTENT_FLAG_PINNED, &em->flags); 1093 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1082 while (1) { 1094 while (1) {
1083 spin_lock(&em_tree->lock); 1095 write_lock(&em_tree->lock);
1084 ret = add_extent_mapping(em_tree, em); 1096 ret = add_extent_mapping(em_tree, em);
1085 spin_unlock(&em_tree->lock); 1097 write_unlock(&em_tree->lock);
1086 if (ret != -EEXIST) { 1098 if (ret != -EEXIST) {
1087 free_extent_map(em); 1099 free_extent_map(em);
1088 break; 1100 break;
@@ -1101,7 +1113,7 @@ out_check:
1101 1113
1102 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1114 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1103 cur_offset, cur_offset + num_bytes - 1, 1115 cur_offset, cur_offset + num_bytes - 1,
1104 locked_page, 1, 1, 1, 0, 0, 0); 1116 locked_page, 1, 1, 1, 0, 0, 0, 1);
1105 cur_offset = extent_end; 1117 cur_offset = extent_end;
1106 if (cur_offset > end) 1118 if (cur_offset > end)
1107 break; 1119 break;
@@ -1374,10 +1386,8 @@ again:
1374 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1386 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
1375 1387
1376 /* already ordered? We're done */ 1388 /* already ordered? We're done */
1377 if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, 1389 if (PagePrivate2(page))
1378 EXTENT_ORDERED, 0)) {
1379 goto out; 1390 goto out;
1380 }
1381 1391
1382 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1392 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1383 if (ordered) { 1393 if (ordered) {
@@ -1413,11 +1423,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1413 struct inode *inode = page->mapping->host; 1423 struct inode *inode = page->mapping->host;
1414 struct btrfs_writepage_fixup *fixup; 1424 struct btrfs_writepage_fixup *fixup;
1415 struct btrfs_root *root = BTRFS_I(inode)->root; 1425 struct btrfs_root *root = BTRFS_I(inode)->root;
1416 int ret;
1417 1426
1418 ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, 1427 /* this page is properly in the ordered list */
1419 EXTENT_ORDERED, 0); 1428 if (TestClearPagePrivate2(page))
1420 if (ret)
1421 return 0; 1429 return 0;
1422 1430
1423 if (PageChecked(page)) 1431 if (PageChecked(page))
@@ -1455,9 +1463,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1455 BUG_ON(!path); 1463 BUG_ON(!path);
1456 1464
1457 path->leave_spinning = 1; 1465 path->leave_spinning = 1;
1466
1467 /*
1468 * we may be replacing one extent in the tree with another.
1469 * The new extent is pinned in the extent map, and we don't want
1470 * to drop it from the cache until it is completely in the btree.
1471 *
1472 * So, tell btrfs_drop_extents to leave this extent in the cache.
1473 * the caller is expected to unpin it and allow it to be merged
1474 * with the others.
1475 */
1458 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1476 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1459 file_pos + num_bytes, locked_end, 1477 file_pos + num_bytes, locked_end,
1460 file_pos, &hint); 1478 file_pos, &hint, 0);
1461 BUG_ON(ret); 1479 BUG_ON(ret);
1462 1480
1463 ins.objectid = inode->i_ino; 1481 ins.objectid = inode->i_ino;
@@ -1485,7 +1503,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1485 btrfs_mark_buffer_dirty(leaf); 1503 btrfs_mark_buffer_dirty(leaf);
1486 1504
1487 inode_add_bytes(inode, num_bytes); 1505 inode_add_bytes(inode, num_bytes);
1488 btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
1489 1506
1490 ins.objectid = disk_bytenr; 1507 ins.objectid = disk_bytenr;
1491 ins.offset = disk_num_bytes; 1508 ins.offset = disk_num_bytes;
@@ -1596,6 +1613,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1596 ordered_extent->len, 1613 ordered_extent->len,
1597 compressed, 0, 0, 1614 compressed, 0, 0,
1598 BTRFS_FILE_EXTENT_REG); 1615 BTRFS_FILE_EXTENT_REG);
1616 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1617 ordered_extent->file_offset,
1618 ordered_extent->len);
1599 BUG_ON(ret); 1619 BUG_ON(ret);
1600 } 1620 }
1601 unlock_extent(io_tree, ordered_extent->file_offset, 1621 unlock_extent(io_tree, ordered_extent->file_offset,
@@ -1623,6 +1643,7 @@ nocow:
1623static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1643static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1624 struct extent_state *state, int uptodate) 1644 struct extent_state *state, int uptodate)
1625{ 1645{
1646 ClearPagePrivate2(page);
1626 return btrfs_finish_ordered_io(page->mapping->host, start, end); 1647 return btrfs_finish_ordered_io(page->mapping->host, start, end);
1627} 1648}
1628 1649
@@ -1669,13 +1690,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1669 failrec->last_mirror = 0; 1690 failrec->last_mirror = 0;
1670 failrec->bio_flags = 0; 1691 failrec->bio_flags = 0;
1671 1692
1672 spin_lock(&em_tree->lock); 1693 read_lock(&em_tree->lock);
1673 em = lookup_extent_mapping(em_tree, start, failrec->len); 1694 em = lookup_extent_mapping(em_tree, start, failrec->len);
1674 if (em->start > start || em->start + em->len < start) { 1695 if (em->start > start || em->start + em->len < start) {
1675 free_extent_map(em); 1696 free_extent_map(em);
1676 em = NULL; 1697 em = NULL;
1677 } 1698 }
1678 spin_unlock(&em_tree->lock); 1699 read_unlock(&em_tree->lock);
1679 1700
1680 if (!em || IS_ERR(em)) { 1701 if (!em || IS_ERR(em)) {
1681 kfree(failrec); 1702 kfree(failrec);
@@ -1794,7 +1815,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1794 return 0; 1815 return 0;
1795 1816
1796 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && 1817 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1797 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { 1818 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
1798 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, 1819 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
1799 GFP_NOFS); 1820 GFP_NOFS);
1800 return 0; 1821 return 0;
@@ -2352,6 +2373,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2352 return ret; 2373 return ret;
2353} 2374}
2354 2375
2376int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2377 struct btrfs_root *root,
2378 struct inode *dir, u64 objectid,
2379 const char *name, int name_len)
2380{
2381 struct btrfs_path *path;
2382 struct extent_buffer *leaf;
2383 struct btrfs_dir_item *di;
2384 struct btrfs_key key;
2385 u64 index;
2386 int ret;
2387
2388 path = btrfs_alloc_path();
2389 if (!path)
2390 return -ENOMEM;
2391
2392 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
2393 name, name_len, -1);
2394 BUG_ON(!di || IS_ERR(di));
2395
2396 leaf = path->nodes[0];
2397 btrfs_dir_item_key_to_cpu(leaf, di, &key);
2398 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
2399 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2400 BUG_ON(ret);
2401 btrfs_release_path(root, path);
2402
2403 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
2404 objectid, root->root_key.objectid,
2405 dir->i_ino, &index, name, name_len);
2406 if (ret < 0) {
2407 BUG_ON(ret != -ENOENT);
2408 di = btrfs_search_dir_index_item(root, path, dir->i_ino,
2409 name, name_len);
2410 BUG_ON(!di || IS_ERR(di));
2411
2412 leaf = path->nodes[0];
2413 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2414 btrfs_release_path(root, path);
2415 index = key.offset;
2416 }
2417
2418 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
2419 index, name, name_len, -1);
2420 BUG_ON(!di || IS_ERR(di));
2421
2422 leaf = path->nodes[0];
2423 btrfs_dir_item_key_to_cpu(leaf, di, &key);
2424 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
2425 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2426 BUG_ON(ret);
2427 btrfs_release_path(root, path);
2428
2429 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
2430 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2431 ret = btrfs_update_inode(trans, root, dir);
2432 BUG_ON(ret);
2433 dir->i_sb->s_dirt = 1;
2434
2435 btrfs_free_path(path);
2436 return 0;
2437}
2438
2355static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) 2439static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2356{ 2440{
2357 struct inode *inode = dentry->d_inode; 2441 struct inode *inode = dentry->d_inode;
@@ -2361,29 +2445,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2361 struct btrfs_trans_handle *trans; 2445 struct btrfs_trans_handle *trans;
2362 unsigned long nr = 0; 2446 unsigned long nr = 0;
2363 2447
2364 /*
2365 * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir
2366 * the root of a subvolume or snapshot
2367 */
2368 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || 2448 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
2369 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 2449 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
2370 return -ENOTEMPTY; 2450 return -ENOTEMPTY;
2371 }
2372 2451
2373 trans = btrfs_start_transaction(root, 1); 2452 trans = btrfs_start_transaction(root, 1);
2374 btrfs_set_trans_block_group(trans, dir); 2453 btrfs_set_trans_block_group(trans, dir);
2375 2454
2455 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
2456 err = btrfs_unlink_subvol(trans, root, dir,
2457 BTRFS_I(inode)->location.objectid,
2458 dentry->d_name.name,
2459 dentry->d_name.len);
2460 goto out;
2461 }
2462
2376 err = btrfs_orphan_add(trans, inode); 2463 err = btrfs_orphan_add(trans, inode);
2377 if (err) 2464 if (err)
2378 goto fail_trans; 2465 goto out;
2379 2466
2380 /* now the directory is empty */ 2467 /* now the directory is empty */
2381 err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 2468 err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2382 dentry->d_name.name, dentry->d_name.len); 2469 dentry->d_name.name, dentry->d_name.len);
2383 if (!err) 2470 if (!err)
2384 btrfs_i_size_write(inode, 0); 2471 btrfs_i_size_write(inode, 0);
2385 2472out:
2386fail_trans:
2387 nr = trans->blocks_used; 2473 nr = trans->blocks_used;
2388 ret = btrfs_end_transaction_throttle(trans, root); 2474 ret = btrfs_end_transaction_throttle(trans, root);
2389 btrfs_btree_balance_dirty(root, nr); 2475 btrfs_btree_balance_dirty(root, nr);
@@ -2935,7 +3021,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2935 cur_offset, 3021 cur_offset,
2936 cur_offset + hole_size, 3022 cur_offset + hole_size,
2937 block_end, 3023 block_end,
2938 cur_offset, &hint_byte); 3024 cur_offset, &hint_byte, 1);
2939 if (err) 3025 if (err)
2940 break; 3026 break;
2941 err = btrfs_insert_file_extent(trans, root, 3027 err = btrfs_insert_file_extent(trans, root,
@@ -3003,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode)
3003 } 3089 }
3004 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3090 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3005 3091
3092 if (inode->i_nlink > 0) {
3093 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3094 goto no_delete;
3095 }
3096
3006 btrfs_i_size_write(inode, 0); 3097 btrfs_i_size_write(inode, 0);
3007 trans = btrfs_join_transaction(root, 1); 3098 trans = btrfs_join_transaction(root, 1);
3008 3099
@@ -3070,29 +3161,67 @@ out_err:
3070 * is kind of like crossing a mount point. 3161 * is kind of like crossing a mount point.
3071 */ 3162 */
3072static int fixup_tree_root_location(struct btrfs_root *root, 3163static int fixup_tree_root_location(struct btrfs_root *root,
3073 struct btrfs_key *location, 3164 struct inode *dir,
3074 struct btrfs_root **sub_root, 3165 struct dentry *dentry,
3075 struct dentry *dentry) 3166 struct btrfs_key *location,
3167 struct btrfs_root **sub_root)
3076{ 3168{
3077 struct btrfs_root_item *ri; 3169 struct btrfs_path *path;
3170 struct btrfs_root *new_root;
3171 struct btrfs_root_ref *ref;
3172 struct extent_buffer *leaf;
3173 int ret;
3174 int err = 0;
3078 3175
3079 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) 3176 path = btrfs_alloc_path();
3080 return 0; 3177 if (!path) {
3081 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) 3178 err = -ENOMEM;
3082 return 0; 3179 goto out;
3180 }
3083 3181
3084 *sub_root = btrfs_read_fs_root(root->fs_info, location, 3182 err = -ENOENT;
3085 dentry->d_name.name, 3183 ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
3086 dentry->d_name.len); 3184 BTRFS_I(dir)->root->root_key.objectid,
3087 if (IS_ERR(*sub_root)) 3185 location->objectid);
3088 return PTR_ERR(*sub_root); 3186 if (ret) {
3187 if (ret < 0)
3188 err = ret;
3189 goto out;
3190 }
3089 3191
3090 ri = &(*sub_root)->root_item; 3192 leaf = path->nodes[0];
3091 location->objectid = btrfs_root_dirid(ri); 3193 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
3092 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); 3194 if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino ||
3093 location->offset = 0; 3195 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
3196 goto out;
3094 3197
3095 return 0; 3198 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
3199 (unsigned long)(ref + 1),
3200 dentry->d_name.len);
3201 if (ret)
3202 goto out;
3203
3204 btrfs_release_path(root->fs_info->tree_root, path);
3205
3206 new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
3207 if (IS_ERR(new_root)) {
3208 err = PTR_ERR(new_root);
3209 goto out;
3210 }
3211
3212 if (btrfs_root_refs(&new_root->root_item) == 0) {
3213 err = -ENOENT;
3214 goto out;
3215 }
3216
3217 *sub_root = new_root;
3218 location->objectid = btrfs_root_dirid(&new_root->root_item);
3219 location->type = BTRFS_INODE_ITEM_KEY;
3220 location->offset = 0;
3221 err = 0;
3222out:
3223 btrfs_free_path(path);
3224 return err;
3096} 3225}
3097 3226
3098static void inode_tree_add(struct inode *inode) 3227static void inode_tree_add(struct inode *inode)
@@ -3101,11 +3230,13 @@ static void inode_tree_add(struct inode *inode)
3101 struct btrfs_inode *entry; 3230 struct btrfs_inode *entry;
3102 struct rb_node **p; 3231 struct rb_node **p;
3103 struct rb_node *parent; 3232 struct rb_node *parent;
3104
3105again: 3233again:
3106 p = &root->inode_tree.rb_node; 3234 p = &root->inode_tree.rb_node;
3107 parent = NULL; 3235 parent = NULL;
3108 3236
3237 if (hlist_unhashed(&inode->i_hash))
3238 return;
3239
3109 spin_lock(&root->inode_lock); 3240 spin_lock(&root->inode_lock);
3110 while (*p) { 3241 while (*p) {
3111 parent = *p; 3242 parent = *p;
@@ -3132,13 +3263,87 @@ again:
3132static void inode_tree_del(struct inode *inode) 3263static void inode_tree_del(struct inode *inode)
3133{ 3264{
3134 struct btrfs_root *root = BTRFS_I(inode)->root; 3265 struct btrfs_root *root = BTRFS_I(inode)->root;
3266 int empty = 0;
3135 3267
3136 spin_lock(&root->inode_lock); 3268 spin_lock(&root->inode_lock);
3137 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { 3269 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
3138 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); 3270 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3139 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3271 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3272 empty = RB_EMPTY_ROOT(&root->inode_tree);
3140 } 3273 }
3141 spin_unlock(&root->inode_lock); 3274 spin_unlock(&root->inode_lock);
3275
3276 if (empty && btrfs_root_refs(&root->root_item) == 0) {
3277 synchronize_srcu(&root->fs_info->subvol_srcu);
3278 spin_lock(&root->inode_lock);
3279 empty = RB_EMPTY_ROOT(&root->inode_tree);
3280 spin_unlock(&root->inode_lock);
3281 if (empty)
3282 btrfs_add_dead_root(root);
3283 }
3284}
3285
3286int btrfs_invalidate_inodes(struct btrfs_root *root)
3287{
3288 struct rb_node *node;
3289 struct rb_node *prev;
3290 struct btrfs_inode *entry;
3291 struct inode *inode;
3292 u64 objectid = 0;
3293
3294 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
3295
3296 spin_lock(&root->inode_lock);
3297again:
3298 node = root->inode_tree.rb_node;
3299 prev = NULL;
3300 while (node) {
3301 prev = node;
3302 entry = rb_entry(node, struct btrfs_inode, rb_node);
3303
3304 if (objectid < entry->vfs_inode.i_ino)
3305 node = node->rb_left;
3306 else if (objectid > entry->vfs_inode.i_ino)
3307 node = node->rb_right;
3308 else
3309 break;
3310 }
3311 if (!node) {
3312 while (prev) {
3313 entry = rb_entry(prev, struct btrfs_inode, rb_node);
3314 if (objectid <= entry->vfs_inode.i_ino) {
3315 node = prev;
3316 break;
3317 }
3318 prev = rb_next(prev);
3319 }
3320 }
3321 while (node) {
3322 entry = rb_entry(node, struct btrfs_inode, rb_node);
3323 objectid = entry->vfs_inode.i_ino + 1;
3324 inode = igrab(&entry->vfs_inode);
3325 if (inode) {
3326 spin_unlock(&root->inode_lock);
3327 if (atomic_read(&inode->i_count) > 1)
3328 d_prune_aliases(inode);
3329 /*
3330 * btrfs_drop_inode will remove it from
3331 * the inode cache when its usage count
3332 * hits zero.
3333 */
3334 iput(inode);
3335 cond_resched();
3336 spin_lock(&root->inode_lock);
3337 goto again;
3338 }
3339
3340 if (cond_resched_lock(&root->inode_lock))
3341 goto again;
3342
3343 node = rb_next(node);
3344 }
3345 spin_unlock(&root->inode_lock);
3346 return 0;
3142} 3347}
3143 3348
3144static noinline void init_btrfs_i(struct inode *inode) 3349static noinline void init_btrfs_i(struct inode *inode)
@@ -3225,15 +3430,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3225 return inode; 3430 return inode;
3226} 3431}
3227 3432
3433static struct inode *new_simple_dir(struct super_block *s,
3434 struct btrfs_key *key,
3435 struct btrfs_root *root)
3436{
3437 struct inode *inode = new_inode(s);
3438
3439 if (!inode)
3440 return ERR_PTR(-ENOMEM);
3441
3442 init_btrfs_i(inode);
3443
3444 BTRFS_I(inode)->root = root;
3445 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
3446 BTRFS_I(inode)->dummy_inode = 1;
3447
3448 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
3449 inode->i_op = &simple_dir_inode_operations;
3450 inode->i_fop = &simple_dir_operations;
3451 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
3452 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
3453
3454 return inode;
3455}
3456
3228struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) 3457struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3229{ 3458{
3230 struct inode *inode; 3459 struct inode *inode;
3231 struct btrfs_inode *bi = BTRFS_I(dir); 3460 struct btrfs_root *root = BTRFS_I(dir)->root;
3232 struct btrfs_root *root = bi->root;
3233 struct btrfs_root *sub_root = root; 3461 struct btrfs_root *sub_root = root;
3234 struct btrfs_key location; 3462 struct btrfs_key location;
3463 int index;
3235 int ret; 3464 int ret;
3236 3465
3466 dentry->d_op = &btrfs_dentry_operations;
3467
3237 if (dentry->d_name.len > BTRFS_NAME_LEN) 3468 if (dentry->d_name.len > BTRFS_NAME_LEN)
3238 return ERR_PTR(-ENAMETOOLONG); 3469 return ERR_PTR(-ENAMETOOLONG);
3239 3470
@@ -3242,29 +3473,50 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3242 if (ret < 0) 3473 if (ret < 0)
3243 return ERR_PTR(ret); 3474 return ERR_PTR(ret);
3244 3475
3245 inode = NULL; 3476 if (location.objectid == 0)
3246 if (location.objectid) { 3477 return NULL;
3247 ret = fixup_tree_root_location(root, &location, &sub_root, 3478
3248 dentry); 3479 if (location.type == BTRFS_INODE_ITEM_KEY) {
3249 if (ret < 0) 3480 inode = btrfs_iget(dir->i_sb, &location, root);
3250 return ERR_PTR(ret); 3481 return inode;
3251 if (ret > 0) 3482 }
3252 return ERR_PTR(-ENOENT); 3483
3484 BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
3485
3486 index = srcu_read_lock(&root->fs_info->subvol_srcu);
3487 ret = fixup_tree_root_location(root, dir, dentry,
3488 &location, &sub_root);
3489 if (ret < 0) {
3490 if (ret != -ENOENT)
3491 inode = ERR_PTR(ret);
3492 else
3493 inode = new_simple_dir(dir->i_sb, &location, sub_root);
3494 } else {
3253 inode = btrfs_iget(dir->i_sb, &location, sub_root); 3495 inode = btrfs_iget(dir->i_sb, &location, sub_root);
3254 if (IS_ERR(inode))
3255 return ERR_CAST(inode);
3256 } 3496 }
3497 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3498
3257 return inode; 3499 return inode;
3258} 3500}
3259 3501
3502static int btrfs_dentry_delete(struct dentry *dentry)
3503{
3504 struct btrfs_root *root;
3505
3506 if (!dentry->d_inode)
3507 return 0;
3508
3509 root = BTRFS_I(dentry->d_inode)->root;
3510 if (btrfs_root_refs(&root->root_item) == 0)
3511 return 1;
3512 return 0;
3513}
3514
3260static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 3515static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
3261 struct nameidata *nd) 3516 struct nameidata *nd)
3262{ 3517{
3263 struct inode *inode; 3518 struct inode *inode;
3264 3519
3265 if (dentry->d_name.len > BTRFS_NAME_LEN)
3266 return ERR_PTR(-ENAMETOOLONG);
3267
3268 inode = btrfs_lookup_dentry(dir, dentry); 3520 inode = btrfs_lookup_dentry(dir, dentry);
3269 if (IS_ERR(inode)) 3521 if (IS_ERR(inode))
3270 return ERR_CAST(inode); 3522 return ERR_CAST(inode);
@@ -3603,9 +3855,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3603 if (ret != 0) 3855 if (ret != 0)
3604 goto fail; 3856 goto fail;
3605 3857
3606 if (objectid > root->highest_inode)
3607 root->highest_inode = objectid;
3608
3609 inode->i_uid = current_fsuid(); 3858 inode->i_uid = current_fsuid();
3610 3859
3611 if (dir && (dir->i_mode & S_ISGID)) { 3860 if (dir && (dir->i_mode & S_ISGID)) {
@@ -3673,26 +3922,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
3673 struct inode *parent_inode, struct inode *inode, 3922 struct inode *parent_inode, struct inode *inode,
3674 const char *name, int name_len, int add_backref, u64 index) 3923 const char *name, int name_len, int add_backref, u64 index)
3675{ 3924{
3676 int ret; 3925 int ret = 0;
3677 struct btrfs_key key; 3926 struct btrfs_key key;
3678 struct btrfs_root *root = BTRFS_I(parent_inode)->root; 3927 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
3679 3928
3680 key.objectid = inode->i_ino; 3929 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
3681 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 3930 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
3682 key.offset = 0; 3931 } else {
3932 key.objectid = inode->i_ino;
3933 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
3934 key.offset = 0;
3935 }
3936
3937 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
3938 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
3939 key.objectid, root->root_key.objectid,
3940 parent_inode->i_ino,
3941 index, name, name_len);
3942 } else if (add_backref) {
3943 ret = btrfs_insert_inode_ref(trans, root,
3944 name, name_len, inode->i_ino,
3945 parent_inode->i_ino, index);
3946 }
3683 3947
3684 ret = btrfs_insert_dir_item(trans, root, name, name_len,
3685 parent_inode->i_ino,
3686 &key, btrfs_inode_type(inode),
3687 index);
3688 if (ret == 0) { 3948 if (ret == 0) {
3689 if (add_backref) { 3949 ret = btrfs_insert_dir_item(trans, root, name, name_len,
3690 ret = btrfs_insert_inode_ref(trans, root, 3950 parent_inode->i_ino, &key,
3691 name, name_len, 3951 btrfs_inode_type(inode), index);
3692 inode->i_ino, 3952 BUG_ON(ret);
3693 parent_inode->i_ino, 3953
3694 index);
3695 }
3696 btrfs_i_size_write(parent_inode, parent_inode->i_size + 3954 btrfs_i_size_write(parent_inode, parent_inode->i_size +
3697 name_len * 2); 3955 name_len * 2);
3698 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 3956 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
@@ -3875,18 +4133,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
3875 4133
3876 err = btrfs_add_nondir(trans, dentry, inode, 1, index); 4134 err = btrfs_add_nondir(trans, dentry, inode, 1, index);
3877 4135
3878 if (err) 4136 if (err) {
3879 drop_inode = 1;
3880
3881 btrfs_update_inode_block_group(trans, dir);
3882 err = btrfs_update_inode(trans, root, inode);
3883
3884 if (err)
3885 drop_inode = 1; 4137 drop_inode = 1;
4138 } else {
4139 btrfs_update_inode_block_group(trans, dir);
4140 err = btrfs_update_inode(trans, root, inode);
4141 BUG_ON(err);
4142 btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
4143 }
3886 4144
3887 nr = trans->blocks_used; 4145 nr = trans->blocks_used;
3888
3889 btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
3890 btrfs_end_transaction_throttle(trans, root); 4146 btrfs_end_transaction_throttle(trans, root);
3891fail: 4147fail:
3892 if (drop_inode) { 4148 if (drop_inode) {
@@ -4064,11 +4320,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
4064 int compressed; 4320 int compressed;
4065 4321
4066again: 4322again:
4067 spin_lock(&em_tree->lock); 4323 read_lock(&em_tree->lock);
4068 em = lookup_extent_mapping(em_tree, start, len); 4324 em = lookup_extent_mapping(em_tree, start, len);
4069 if (em) 4325 if (em)
4070 em->bdev = root->fs_info->fs_devices->latest_bdev; 4326 em->bdev = root->fs_info->fs_devices->latest_bdev;
4071 spin_unlock(&em_tree->lock); 4327 read_unlock(&em_tree->lock);
4072 4328
4073 if (em) { 4329 if (em) {
4074 if (em->start > start || em->start + em->len <= start) 4330 if (em->start > start || em->start + em->len <= start)
@@ -4215,6 +4471,11 @@ again:
4215 map = kmap(page); 4471 map = kmap(page);
4216 read_extent_buffer(leaf, map + pg_offset, ptr, 4472 read_extent_buffer(leaf, map + pg_offset, ptr,
4217 copy_size); 4473 copy_size);
4474 if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
4475 memset(map + pg_offset + copy_size, 0,
4476 PAGE_CACHE_SIZE - pg_offset -
4477 copy_size);
4478 }
4218 kunmap(page); 4479 kunmap(page);
4219 } 4480 }
4220 flush_dcache_page(page); 4481 flush_dcache_page(page);
@@ -4259,7 +4520,7 @@ insert:
4259 } 4520 }
4260 4521
4261 err = 0; 4522 err = 0;
4262 spin_lock(&em_tree->lock); 4523 write_lock(&em_tree->lock);
4263 ret = add_extent_mapping(em_tree, em); 4524 ret = add_extent_mapping(em_tree, em);
4264 /* it is possible that someone inserted the extent into the tree 4525 /* it is possible that someone inserted the extent into the tree
4265 * while we had the lock dropped. It is also possible that 4526 * while we had the lock dropped. It is also possible that
@@ -4299,7 +4560,7 @@ insert:
4299 err = 0; 4560 err = 0;
4300 } 4561 }
4301 } 4562 }
4302 spin_unlock(&em_tree->lock); 4563 write_unlock(&em_tree->lock);
4303out: 4564out:
4304 if (path) 4565 if (path)
4305 btrfs_free_path(path); 4566 btrfs_free_path(path);
@@ -4398,13 +4659,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4398 u64 page_start = page_offset(page); 4659 u64 page_start = page_offset(page);
4399 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 4660 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
4400 4661
4662
4663 /*
4664 * we have the page locked, so new writeback can't start,
4665 * and the dirty bit won't be cleared while we are here.
4666 *
4667 * Wait for IO on this page so that we can safely clear
4668 * the PagePrivate2 bit and do ordered accounting
4669 */
4401 wait_on_page_writeback(page); 4670 wait_on_page_writeback(page);
4671
4402 tree = &BTRFS_I(page->mapping->host)->io_tree; 4672 tree = &BTRFS_I(page->mapping->host)->io_tree;
4403 if (offset) { 4673 if (offset) {
4404 btrfs_releasepage(page, GFP_NOFS); 4674 btrfs_releasepage(page, GFP_NOFS);
4405 return; 4675 return;
4406 } 4676 }
4407
4408 lock_extent(tree, page_start, page_end, GFP_NOFS); 4677 lock_extent(tree, page_start, page_end, GFP_NOFS);
4409 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 4678 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
4410 page_offset(page)); 4679 page_offset(page));
@@ -4415,16 +4684,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4415 */ 4684 */
4416 clear_extent_bit(tree, page_start, page_end, 4685 clear_extent_bit(tree, page_start, page_end,
4417 EXTENT_DIRTY | EXTENT_DELALLOC | 4686 EXTENT_DIRTY | EXTENT_DELALLOC |
4418 EXTENT_LOCKED, 1, 0, GFP_NOFS); 4687 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
4419 btrfs_finish_ordered_io(page->mapping->host, 4688 /*
4420 page_start, page_end); 4689 * whoever cleared the private bit is responsible
4690 * for the finish_ordered_io
4691 */
4692 if (TestClearPagePrivate2(page)) {
4693 btrfs_finish_ordered_io(page->mapping->host,
4694 page_start, page_end);
4695 }
4421 btrfs_put_ordered_extent(ordered); 4696 btrfs_put_ordered_extent(ordered);
4422 lock_extent(tree, page_start, page_end, GFP_NOFS); 4697 lock_extent(tree, page_start, page_end, GFP_NOFS);
4423 } 4698 }
4424 clear_extent_bit(tree, page_start, page_end, 4699 clear_extent_bit(tree, page_start, page_end,
4425 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 4700 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
4426 EXTENT_ORDERED, 4701 1, 1, NULL, GFP_NOFS);
4427 1, 1, GFP_NOFS);
4428 __btrfs_releasepage(page, GFP_NOFS); 4702 __btrfs_releasepage(page, GFP_NOFS);
4429 4703
4430 ClearPageChecked(page); 4704 ClearPageChecked(page);
@@ -4521,11 +4795,14 @@ again:
4521 } 4795 }
4522 ClearPageChecked(page); 4796 ClearPageChecked(page);
4523 set_page_dirty(page); 4797 set_page_dirty(page);
4798 SetPageUptodate(page);
4524 4799
4525 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 4800 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
4526 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4801 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4527 4802
4528out_unlock: 4803out_unlock:
4804 if (!ret)
4805 return VM_FAULT_LOCKED;
4529 unlock_page(page); 4806 unlock_page(page);
4530out: 4807out:
4531 return ret; 4808 return ret;
@@ -4594,11 +4871,11 @@ out:
4594 * create a new subvolume directory/inode (helper for the ioctl). 4871 * create a new subvolume directory/inode (helper for the ioctl).
4595 */ 4872 */
4596int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 4873int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
4597 struct btrfs_root *new_root, struct dentry *dentry, 4874 struct btrfs_root *new_root,
4598 u64 new_dirid, u64 alloc_hint) 4875 u64 new_dirid, u64 alloc_hint)
4599{ 4876{
4600 struct inode *inode; 4877 struct inode *inode;
4601 int error; 4878 int err;
4602 u64 index = 0; 4879 u64 index = 0;
4603 4880
4604 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, 4881 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
@@ -4611,11 +4888,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
4611 inode->i_nlink = 1; 4888 inode->i_nlink = 1;
4612 btrfs_i_size_write(inode, 0); 4889 btrfs_i_size_write(inode, 0);
4613 4890
4614 error = btrfs_update_inode(trans, new_root, inode); 4891 err = btrfs_update_inode(trans, new_root, inode);
4615 if (error) 4892 BUG_ON(err);
4616 return error;
4617 4893
4618 d_instantiate(dentry, inode); 4894 iput(inode);
4619 return 0; 4895 return 0;
4620} 4896}
4621 4897
@@ -4693,6 +4969,16 @@ void btrfs_destroy_inode(struct inode *inode)
4693 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 4969 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
4694} 4970}
4695 4971
4972void btrfs_drop_inode(struct inode *inode)
4973{
4974 struct btrfs_root *root = BTRFS_I(inode)->root;
4975
4976 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
4977 generic_delete_inode(inode);
4978 else
4979 generic_drop_inode(inode);
4980}
4981
4696static void init_once(void *foo) 4982static void init_once(void *foo)
4697{ 4983{
4698 struct btrfs_inode *ei = (struct btrfs_inode *) foo; 4984 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
@@ -4761,31 +5047,32 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4761{ 5047{
4762 struct btrfs_trans_handle *trans; 5048 struct btrfs_trans_handle *trans;
4763 struct btrfs_root *root = BTRFS_I(old_dir)->root; 5049 struct btrfs_root *root = BTRFS_I(old_dir)->root;
5050 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
4764 struct inode *new_inode = new_dentry->d_inode; 5051 struct inode *new_inode = new_dentry->d_inode;
4765 struct inode *old_inode = old_dentry->d_inode; 5052 struct inode *old_inode = old_dentry->d_inode;
4766 struct timespec ctime = CURRENT_TIME; 5053 struct timespec ctime = CURRENT_TIME;
4767 u64 index = 0; 5054 u64 index = 0;
5055 u64 root_objectid;
4768 int ret; 5056 int ret;
4769 5057
4770 /* we're not allowed to rename between subvolumes */ 5058 if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
4771 if (BTRFS_I(old_inode)->root->root_key.objectid != 5059 return -EPERM;
4772 BTRFS_I(new_dir)->root->root_key.objectid) 5060
5061 /* we only allow rename subvolume link between subvolumes */
5062 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
4773 return -EXDEV; 5063 return -EXDEV;
4774 5064
4775 if (S_ISDIR(old_inode->i_mode) && new_inode && 5065 if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
4776 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { 5066 (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID))
4777 return -ENOTEMPTY; 5067 return -ENOTEMPTY;
4778 }
4779 5068
4780 /* to rename a snapshot or subvolume, we need to juggle the 5069 if (S_ISDIR(old_inode->i_mode) && new_inode &&
4781 * backrefs. This isn't coded yet 5070 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4782 */ 5071 return -ENOTEMPTY;
4783 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
4784 return -EXDEV;
4785 5072
4786 ret = btrfs_check_metadata_free_space(root); 5073 ret = btrfs_check_metadata_free_space(root);
4787 if (ret) 5074 if (ret)
4788 goto out_unlock; 5075 return ret;
4789 5076
4790 /* 5077 /*
4791 * we're using rename to replace one file with another. 5078 * we're using rename to replace one file with another.
@@ -4796,8 +5083,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4796 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 5083 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
4797 filemap_flush(old_inode->i_mapping); 5084 filemap_flush(old_inode->i_mapping);
4798 5085
5086 /* close the racy window with snapshot create/destroy ioctl */
5087 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5088 down_read(&root->fs_info->subvol_sem);
5089
4799 trans = btrfs_start_transaction(root, 1); 5090 trans = btrfs_start_transaction(root, 1);
5091 btrfs_set_trans_block_group(trans, new_dir);
5092
5093 if (dest != root)
5094 btrfs_record_root_in_trans(trans, dest);
4800 5095
5096 ret = btrfs_set_inode_index(new_dir, &index);
5097 if (ret)
5098 goto out_fail;
5099
5100 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
5101 /* force full log commit if subvolume involved. */
5102 root->fs_info->last_trans_log_full_commit = trans->transid;
5103 } else {
5104 ret = btrfs_insert_inode_ref(trans, dest,
5105 new_dentry->d_name.name,
5106 new_dentry->d_name.len,
5107 old_inode->i_ino,
5108 new_dir->i_ino, index);
5109 if (ret)
5110 goto out_fail;
5111 /*
5112 * this is an ugly little race, but the rename is required
5113 * to make sure that if we crash, the inode is either at the
5114 * old name or the new one. pinning the log transaction lets
5115 * us make sure we don't allow a log commit to come in after
5116 * we unlink the name but before we add the new name back in.
5117 */
5118 btrfs_pin_log_trans(root);
5119 }
4801 /* 5120 /*
4802 * make sure the inode gets flushed if it is replacing 5121 * make sure the inode gets flushed if it is replacing
4803 * something. 5122 * something.
@@ -4807,18 +5126,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4807 btrfs_add_ordered_operation(trans, root, old_inode); 5126 btrfs_add_ordered_operation(trans, root, old_inode);
4808 } 5127 }
4809 5128
4810 /*
4811 * this is an ugly little race, but the rename is required to make
4812 * sure that if we crash, the inode is either at the old name
4813 * or the new one. pinning the log transaction lets us make sure
4814 * we don't allow a log commit to come in after we unlink the
4815 * name but before we add the new name back in.
4816 */
4817 btrfs_pin_log_trans(root);
4818
4819 btrfs_set_trans_block_group(trans, new_dir);
4820
4821 btrfs_inc_nlink(old_dentry->d_inode);
4822 old_dir->i_ctime = old_dir->i_mtime = ctime; 5129 old_dir->i_ctime = old_dir->i_mtime = ctime;
4823 new_dir->i_ctime = new_dir->i_mtime = ctime; 5130 new_dir->i_ctime = new_dir->i_mtime = ctime;
4824 old_inode->i_ctime = ctime; 5131 old_inode->i_ctime = ctime;
@@ -4826,47 +5133,58 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4826 if (old_dentry->d_parent != new_dentry->d_parent) 5133 if (old_dentry->d_parent != new_dentry->d_parent)
4827 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); 5134 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
4828 5135
4829 ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, 5136 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
4830 old_dentry->d_name.name, 5137 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
4831 old_dentry->d_name.len); 5138 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
4832 if (ret) 5139 old_dentry->d_name.name,
4833 goto out_fail; 5140 old_dentry->d_name.len);
5141 } else {
5142 btrfs_inc_nlink(old_dentry->d_inode);
5143 ret = btrfs_unlink_inode(trans, root, old_dir,
5144 old_dentry->d_inode,
5145 old_dentry->d_name.name,
5146 old_dentry->d_name.len);
5147 }
5148 BUG_ON(ret);
4834 5149
4835 if (new_inode) { 5150 if (new_inode) {
4836 new_inode->i_ctime = CURRENT_TIME; 5151 new_inode->i_ctime = CURRENT_TIME;
4837 ret = btrfs_unlink_inode(trans, root, new_dir, 5152 if (unlikely(new_inode->i_ino ==
4838 new_dentry->d_inode, 5153 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4839 new_dentry->d_name.name, 5154 root_objectid = BTRFS_I(new_inode)->location.objectid;
4840 new_dentry->d_name.len); 5155 ret = btrfs_unlink_subvol(trans, dest, new_dir,
4841 if (ret) 5156 root_objectid,
4842 goto out_fail; 5157 new_dentry->d_name.name,
5158 new_dentry->d_name.len);
5159 BUG_ON(new_inode->i_nlink == 0);
5160 } else {
5161 ret = btrfs_unlink_inode(trans, dest, new_dir,
5162 new_dentry->d_inode,
5163 new_dentry->d_name.name,
5164 new_dentry->d_name.len);
5165 }
5166 BUG_ON(ret);
4843 if (new_inode->i_nlink == 0) { 5167 if (new_inode->i_nlink == 0) {
4844 ret = btrfs_orphan_add(trans, new_dentry->d_inode); 5168 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
4845 if (ret) 5169 BUG_ON(ret);
4846 goto out_fail;
4847 } 5170 }
4848
4849 } 5171 }
4850 ret = btrfs_set_inode_index(new_dir, &index);
4851 if (ret)
4852 goto out_fail;
4853 5172
4854 ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, 5173 ret = btrfs_add_link(trans, new_dir, old_inode,
4855 old_inode, new_dentry->d_name.name, 5174 new_dentry->d_name.name,
4856 new_dentry->d_name.len, 1, index); 5175 new_dentry->d_name.len, 0, index);
4857 if (ret) 5176 BUG_ON(ret);
4858 goto out_fail;
4859 5177
4860 btrfs_log_new_name(trans, old_inode, old_dir, 5178 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
4861 new_dentry->d_parent); 5179 btrfs_log_new_name(trans, old_inode, old_dir,
5180 new_dentry->d_parent);
5181 btrfs_end_log_trans(root);
5182 }
4862out_fail: 5183out_fail:
4863
4864 /* this btrfs_end_log_trans just allows the current
4865 * log-sub transaction to complete
4866 */
4867 btrfs_end_log_trans(root);
4868 btrfs_end_transaction_throttle(trans, root); 5184 btrfs_end_transaction_throttle(trans, root);
4869out_unlock: 5185
5186 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5187 up_read(&root->fs_info->subvol_sem);
4870 return ret; 5188 return ret;
4871} 5189}
4872 5190
@@ -5058,6 +5376,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5058 0, 0, 0, 5376 0, 0, 0,
5059 BTRFS_FILE_EXTENT_PREALLOC); 5377 BTRFS_FILE_EXTENT_PREALLOC);
5060 BUG_ON(ret); 5378 BUG_ON(ret);
5379 btrfs_drop_extent_cache(inode, cur_offset,
5380 cur_offset + ins.offset -1, 0);
5061 num_bytes -= ins.offset; 5381 num_bytes -= ins.offset;
5062 cur_offset += ins.offset; 5382 cur_offset += ins.offset;
5063 alloc_hint = ins.objectid + ins.offset; 5383 alloc_hint = ins.objectid + ins.offset;
@@ -5223,6 +5543,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
5223 .lookup = btrfs_lookup, 5543 .lookup = btrfs_lookup,
5224 .permission = btrfs_permission, 5544 .permission = btrfs_permission,
5225}; 5545};
5546
5226static struct file_operations btrfs_dir_file_operations = { 5547static struct file_operations btrfs_dir_file_operations = {
5227 .llseek = generic_file_llseek, 5548 .llseek = generic_file_llseek,
5228 .read = generic_read_dir, 5549 .read = generic_read_dir,
@@ -5269,6 +5590,7 @@ static const struct address_space_operations btrfs_aops = {
5269 .invalidatepage = btrfs_invalidatepage, 5590 .invalidatepage = btrfs_invalidatepage,
5270 .releasepage = btrfs_releasepage, 5591 .releasepage = btrfs_releasepage,
5271 .set_page_dirty = btrfs_set_page_dirty, 5592 .set_page_dirty = btrfs_set_page_dirty,
5593 .error_remove_page = generic_error_remove_page,
5272}; 5594};
5273 5595
5274static const struct address_space_operations btrfs_symlink_aops = { 5596static const struct address_space_operations btrfs_symlink_aops = {
@@ -5309,3 +5631,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
5309 .listxattr = btrfs_listxattr, 5631 .listxattr = btrfs_listxattr,
5310 .removexattr = btrfs_removexattr, 5632 .removexattr = btrfs_removexattr,
5311}; 5633};
5634
5635struct dentry_operations btrfs_dentry_operations = {
5636 .d_delete = btrfs_dentry_delete,
5637};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd88f25889f7..a8577a7f26ab 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root,
230 struct btrfs_root_item root_item; 230 struct btrfs_root_item root_item;
231 struct btrfs_inode_item *inode_item; 231 struct btrfs_inode_item *inode_item;
232 struct extent_buffer *leaf; 232 struct extent_buffer *leaf;
233 struct btrfs_root *new_root = root; 233 struct btrfs_root *new_root;
234 struct inode *dir; 234 struct inode *dir = dentry->d_parent->d_inode;
235 int ret; 235 int ret;
236 int err; 236 int err;
237 u64 objectid; 237 u64 objectid;
@@ -241,7 +241,7 @@ static noinline int create_subvol(struct btrfs_root *root,
241 241
242 ret = btrfs_check_metadata_free_space(root); 242 ret = btrfs_check_metadata_free_space(root);
243 if (ret) 243 if (ret)
244 goto fail_commit; 244 return ret;
245 245
246 trans = btrfs_start_transaction(root, 1); 246 trans = btrfs_start_transaction(root, 1);
247 BUG_ON(!trans); 247 BUG_ON(!trans);
@@ -304,11 +304,17 @@ static noinline int create_subvol(struct btrfs_root *root,
304 if (ret) 304 if (ret)
305 goto fail; 305 goto fail;
306 306
307 key.offset = (u64)-1;
308 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
309 BUG_ON(IS_ERR(new_root));
310
311 btrfs_record_root_in_trans(trans, new_root);
312
313 ret = btrfs_create_subvol_root(trans, new_root, new_dirid,
314 BTRFS_I(dir)->block_group);
307 /* 315 /*
308 * insert the directory item 316 * insert the directory item
309 */ 317 */
310 key.offset = (u64)-1;
311 dir = dentry->d_parent->d_inode;
312 ret = btrfs_set_inode_index(dir, &index); 318 ret = btrfs_set_inode_index(dir, &index);
313 BUG_ON(ret); 319 BUG_ON(ret);
314 320
@@ -322,44 +328,18 @@ static noinline int create_subvol(struct btrfs_root *root,
322 ret = btrfs_update_inode(trans, root, dir); 328 ret = btrfs_update_inode(trans, root, dir);
323 BUG_ON(ret); 329 BUG_ON(ret);
324 330
325 /* add the backref first */
326 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 331 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
327 objectid, BTRFS_ROOT_BACKREF_KEY, 332 objectid, root->root_key.objectid,
328 root->root_key.objectid,
329 dir->i_ino, index, name, namelen); 333 dir->i_ino, index, name, namelen);
330 334
331 BUG_ON(ret); 335 BUG_ON(ret);
332 336
333 /* now add the forward ref */ 337 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
334 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
335 root->root_key.objectid, BTRFS_ROOT_REF_KEY,
336 objectid,
337 dir->i_ino, index, name, namelen);
338
339 BUG_ON(ret);
340
341 ret = btrfs_commit_transaction(trans, root);
342 if (ret)
343 goto fail_commit;
344
345 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
346 BUG_ON(!new_root);
347
348 trans = btrfs_start_transaction(new_root, 1);
349 BUG_ON(!trans);
350
351 ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid,
352 BTRFS_I(dir)->block_group);
353 if (ret)
354 goto fail;
355
356fail: 338fail:
357 nr = trans->blocks_used; 339 nr = trans->blocks_used;
358 err = btrfs_commit_transaction(trans, new_root); 340 err = btrfs_commit_transaction(trans, root);
359 if (err && !ret) 341 if (err && !ret)
360 ret = err; 342 ret = err;
361fail_commit:
362 btrfs_btree_balance_dirty(root, nr);
363 return ret; 343 return ret;
364} 344}
365 345
@@ -420,14 +400,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
420 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 400 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
421 * inside this filesystem so it's quite a bit simpler. 401 * inside this filesystem so it's quite a bit simpler.
422 */ 402 */
423static noinline int btrfs_mksubvol(struct path *parent, char *name, 403static noinline int btrfs_mksubvol(struct path *parent,
424 int mode, int namelen, 404 char *name, int namelen,
425 struct btrfs_root *snap_src) 405 struct btrfs_root *snap_src)
426{ 406{
407 struct inode *dir = parent->dentry->d_inode;
427 struct dentry *dentry; 408 struct dentry *dentry;
428 int error; 409 int error;
429 410
430 mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); 411 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
431 412
432 dentry = lookup_one_len(name, parent->dentry, namelen); 413 dentry = lookup_one_len(name, parent->dentry, namelen);
433 error = PTR_ERR(dentry); 414 error = PTR_ERR(dentry);
@@ -438,99 +419,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name,
438 if (dentry->d_inode) 419 if (dentry->d_inode)
439 goto out_dput; 420 goto out_dput;
440 421
441 if (!IS_POSIXACL(parent->dentry->d_inode))
442 mode &= ~current_umask();
443
444 error = mnt_want_write(parent->mnt); 422 error = mnt_want_write(parent->mnt);
445 if (error) 423 if (error)
446 goto out_dput; 424 goto out_dput;
447 425
448 error = btrfs_may_create(parent->dentry->d_inode, dentry); 426 error = btrfs_may_create(dir, dentry);
449 if (error) 427 if (error)
450 goto out_drop_write; 428 goto out_drop_write;
451 429
452 /* 430 down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
453 * Actually perform the low-level subvolume creation after all 431
454 * this VFS fuzz. 432 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
455 * 433 goto out_up_read;
456 * Eventually we want to pass in an inode under which we create this 434
457 * subvolume, but for now all are under the filesystem root.
458 *
459 * Also we should pass on the mode eventually to allow creating new
460 * subvolume with specific mode bits.
461 */
462 if (snap_src) { 435 if (snap_src) {
463 struct dentry *dir = dentry->d_parent; 436 error = create_snapshot(snap_src, dentry,
464 struct dentry *test = dir->d_parent; 437 name, namelen);
465 struct btrfs_path *path = btrfs_alloc_path();
466 int ret;
467 u64 test_oid;
468 u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid;
469
470 test_oid = snap_src->root_key.objectid;
471
472 ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
473 path, parent_oid, test_oid);
474 if (ret == 0)
475 goto create;
476 btrfs_release_path(snap_src->fs_info->tree_root, path);
477
478 /* we need to make sure we aren't creating a directory loop
479 * by taking a snapshot of something that has our current
480 * subvol in its directory tree. So, this loops through
481 * the dentries and checks the forward refs for each subvolume
482 * to see if is references the subvolume where we are
483 * placing this new snapshot.
484 */
485 while (1) {
486 if (!test ||
487 dir == snap_src->fs_info->sb->s_root ||
488 test == snap_src->fs_info->sb->s_root ||
489 test->d_inode->i_sb != snap_src->fs_info->sb) {
490 break;
491 }
492 if (S_ISLNK(test->d_inode->i_mode)) {
493 printk(KERN_INFO "Btrfs symlink in snapshot "
494 "path, failed\n");
495 error = -EMLINK;
496 btrfs_free_path(path);
497 goto out_drop_write;
498 }
499 test_oid =
500 BTRFS_I(test->d_inode)->root->root_key.objectid;
501 ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
502 path, test_oid, parent_oid);
503 if (ret == 0) {
504 printk(KERN_INFO "Btrfs snapshot creation "
505 "failed, looping\n");
506 error = -EMLINK;
507 btrfs_free_path(path);
508 goto out_drop_write;
509 }
510 btrfs_release_path(snap_src->fs_info->tree_root, path);
511 test = test->d_parent;
512 }
513create:
514 btrfs_free_path(path);
515 error = create_snapshot(snap_src, dentry, name, namelen);
516 } else { 438 } else {
517 error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, 439 error = create_subvol(BTRFS_I(dir)->root, dentry,
518 dentry, name, namelen); 440 name, namelen);
519 } 441 }
520 if (error) 442 if (!error)
521 goto out_drop_write; 443 fsnotify_mkdir(dir, dentry);
522 444out_up_read:
523 fsnotify_mkdir(parent->dentry->d_inode, dentry); 445 up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
524out_drop_write: 446out_drop_write:
525 mnt_drop_write(parent->mnt); 447 mnt_drop_write(parent->mnt);
526out_dput: 448out_dput:
527 dput(dentry); 449 dput(dentry);
528out_unlock: 450out_unlock:
529 mutex_unlock(&parent->dentry->d_inode->i_mutex); 451 mutex_unlock(&dir->i_mutex);
530 return error; 452 return error;
531} 453}
532 454
533
534static int btrfs_defrag_file(struct file *file) 455static int btrfs_defrag_file(struct file *file)
535{ 456{
536 struct inode *inode = fdentry(file)->d_inode; 457 struct inode *inode = fdentry(file)->d_inode;
@@ -596,9 +517,8 @@ again:
596 clear_page_dirty_for_io(page); 517 clear_page_dirty_for_io(page);
597 518
598 btrfs_set_extent_delalloc(inode, page_start, page_end); 519 btrfs_set_extent_delalloc(inode, page_start, page_end);
599
600 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
601 set_page_dirty(page); 520 set_page_dirty(page);
521 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
602 unlock_page(page); 522 unlock_page(page);
603 page_cache_release(page); 523 page_cache_release(page);
604 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); 524 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
@@ -609,7 +529,8 @@ out_unlock:
609 return 0; 529 return 0;
610} 530}
611 531
612static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) 532static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
533 void __user *arg)
613{ 534{
614 u64 new_size; 535 u64 new_size;
615 u64 old_size; 536 u64 old_size;
@@ -718,10 +639,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
718{ 639{
719 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 640 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
720 struct btrfs_ioctl_vol_args *vol_args; 641 struct btrfs_ioctl_vol_args *vol_args;
721 struct btrfs_dir_item *di;
722 struct btrfs_path *path;
723 struct file *src_file; 642 struct file *src_file;
724 u64 root_dirid;
725 int namelen; 643 int namelen;
726 int ret = 0; 644 int ret = 0;
727 645
@@ -739,32 +657,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
739 goto out; 657 goto out;
740 } 658 }
741 659
742 path = btrfs_alloc_path();
743 if (!path) {
744 ret = -ENOMEM;
745 goto out;
746 }
747
748 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
749 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
750 path, root_dirid,
751 vol_args->name, namelen, 0);
752 btrfs_free_path(path);
753
754 if (di && !IS_ERR(di)) {
755 ret = -EEXIST;
756 goto out;
757 }
758
759 if (IS_ERR(di)) {
760 ret = PTR_ERR(di);
761 goto out;
762 }
763
764 if (subvol) { 660 if (subvol) {
765 ret = btrfs_mksubvol(&file->f_path, vol_args->name, 661 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
766 file->f_path.dentry->d_inode->i_mode, 662 NULL);
767 namelen, NULL);
768 } else { 663 } else {
769 struct inode *src_inode; 664 struct inode *src_inode;
770 src_file = fget(vol_args->fd); 665 src_file = fget(vol_args->fd);
@@ -781,17 +676,156 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
781 fput(src_file); 676 fput(src_file);
782 goto out; 677 goto out;
783 } 678 }
784 ret = btrfs_mksubvol(&file->f_path, vol_args->name, 679 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
785 file->f_path.dentry->d_inode->i_mode, 680 BTRFS_I(src_inode)->root);
786 namelen, BTRFS_I(src_inode)->root);
787 fput(src_file); 681 fput(src_file);
788 } 682 }
789
790out: 683out:
791 kfree(vol_args); 684 kfree(vol_args);
792 return ret; 685 return ret;
793} 686}
794 687
688/*
689 * helper to check if the subvolume references other subvolumes
690 */
691static noinline int may_destroy_subvol(struct btrfs_root *root)
692{
693 struct btrfs_path *path;
694 struct btrfs_key key;
695 int ret;
696
697 path = btrfs_alloc_path();
698 if (!path)
699 return -ENOMEM;
700
701 key.objectid = root->root_key.objectid;
702 key.type = BTRFS_ROOT_REF_KEY;
703 key.offset = (u64)-1;
704
705 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
706 &key, path, 0, 0);
707 if (ret < 0)
708 goto out;
709 BUG_ON(ret == 0);
710
711 ret = 0;
712 if (path->slots[0] > 0) {
713 path->slots[0]--;
714 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
715 if (key.objectid == root->root_key.objectid &&
716 key.type == BTRFS_ROOT_REF_KEY)
717 ret = -ENOTEMPTY;
718 }
719out:
720 btrfs_free_path(path);
721 return ret;
722}
723
724static noinline int btrfs_ioctl_snap_destroy(struct file *file,
725 void __user *arg)
726{
727 struct dentry *parent = fdentry(file);
728 struct dentry *dentry;
729 struct inode *dir = parent->d_inode;
730 struct inode *inode;
731 struct btrfs_root *root = BTRFS_I(dir)->root;
732 struct btrfs_root *dest = NULL;
733 struct btrfs_ioctl_vol_args *vol_args;
734 struct btrfs_trans_handle *trans;
735 int namelen;
736 int ret;
737 int err = 0;
738
739 if (!capable(CAP_SYS_ADMIN))
740 return -EPERM;
741
742 vol_args = memdup_user(arg, sizeof(*vol_args));
743 if (IS_ERR(vol_args))
744 return PTR_ERR(vol_args);
745
746 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
747 namelen = strlen(vol_args->name);
748 if (strchr(vol_args->name, '/') ||
749 strncmp(vol_args->name, "..", namelen) == 0) {
750 err = -EINVAL;
751 goto out;
752 }
753
754 err = mnt_want_write(file->f_path.mnt);
755 if (err)
756 goto out;
757
758 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
759 dentry = lookup_one_len(vol_args->name, parent, namelen);
760 if (IS_ERR(dentry)) {
761 err = PTR_ERR(dentry);
762 goto out_unlock_dir;
763 }
764
765 if (!dentry->d_inode) {
766 err = -ENOENT;
767 goto out_dput;
768 }
769
770 inode = dentry->d_inode;
771 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
772 err = -EINVAL;
773 goto out_dput;
774 }
775
776 dest = BTRFS_I(inode)->root;
777
778 mutex_lock(&inode->i_mutex);
779 err = d_invalidate(dentry);
780 if (err)
781 goto out_unlock;
782
783 down_write(&root->fs_info->subvol_sem);
784
785 err = may_destroy_subvol(dest);
786 if (err)
787 goto out_up_write;
788
789 trans = btrfs_start_transaction(root, 1);
790 ret = btrfs_unlink_subvol(trans, root, dir,
791 dest->root_key.objectid,
792 dentry->d_name.name,
793 dentry->d_name.len);
794 BUG_ON(ret);
795
796 btrfs_record_root_in_trans(trans, dest);
797
798 memset(&dest->root_item.drop_progress, 0,
799 sizeof(dest->root_item.drop_progress));
800 dest->root_item.drop_level = 0;
801 btrfs_set_root_refs(&dest->root_item, 0);
802
803 ret = btrfs_insert_orphan_item(trans,
804 root->fs_info->tree_root,
805 dest->root_key.objectid);
806 BUG_ON(ret);
807
808 ret = btrfs_commit_transaction(trans, root);
809 BUG_ON(ret);
810 inode->i_flags |= S_DEAD;
811out_up_write:
812 up_write(&root->fs_info->subvol_sem);
813out_unlock:
814 mutex_unlock(&inode->i_mutex);
815 if (!err) {
816 btrfs_invalidate_inodes(dest);
817 d_delete(dentry);
818 }
819out_dput:
820 dput(dentry);
821out_unlock_dir:
822 mutex_unlock(&dir->i_mutex);
823 mnt_drop_write(file->f_path.mnt);
824out:
825 kfree(vol_args);
826 return err;
827}
828
795static int btrfs_ioctl_defrag(struct file *file) 829static int btrfs_ioctl_defrag(struct file *file)
796{ 830{
797 struct inode *inode = fdentry(file)->d_inode; 831 struct inode *inode = fdentry(file)->d_inode;
@@ -865,8 +899,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
865 return ret; 899 return ret;
866} 900}
867 901
868static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 902static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
869 u64 off, u64 olen, u64 destoff) 903 u64 off, u64 olen, u64 destoff)
870{ 904{
871 struct inode *inode = fdentry(file)->d_inode; 905 struct inode *inode = fdentry(file)->d_inode;
872 struct btrfs_root *root = BTRFS_I(inode)->root; 906 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -976,7 +1010,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
976 1010
977 /* punch hole in destination first */ 1011 /* punch hole in destination first */
978 btrfs_drop_extents(trans, root, inode, off, off + len, 1012 btrfs_drop_extents(trans, root, inode, off, off + len,
979 off + len, 0, &hint_byte); 1013 off + len, 0, &hint_byte, 1);
980 1014
981 /* clone data */ 1015 /* clone data */
982 key.objectid = src->i_ino; 1016 key.objectid = src->i_ino;
@@ -1071,8 +1105,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1071 datao += off - key.offset; 1105 datao += off - key.offset;
1072 datal -= off - key.offset; 1106 datal -= off - key.offset;
1073 } 1107 }
1074 if (key.offset + datao + datal + key.offset > 1108 if (key.offset + datao + datal > off + len)
1075 off + len)
1076 datal = off + len - key.offset - datao; 1109 datal = off + len - key.offset - datao;
1077 /* disko == 0 means it's a hole */ 1110 /* disko == 0 means it's a hole */
1078 if (!disko) 1111 if (!disko)
@@ -1258,6 +1291,8 @@ long btrfs_ioctl(struct file *file, unsigned int
1258 return btrfs_ioctl_snap_create(file, argp, 0); 1291 return btrfs_ioctl_snap_create(file, argp, 0);
1259 case BTRFS_IOC_SUBVOL_CREATE: 1292 case BTRFS_IOC_SUBVOL_CREATE:
1260 return btrfs_ioctl_snap_create(file, argp, 1); 1293 return btrfs_ioctl_snap_create(file, argp, 1);
1294 case BTRFS_IOC_SNAP_DESTROY:
1295 return btrfs_ioctl_snap_destroy(file, argp);
1261 case BTRFS_IOC_DEFRAG: 1296 case BTRFS_IOC_DEFRAG:
1262 return btrfs_ioctl_defrag(file); 1297 return btrfs_ioctl_defrag(file);
1263 case BTRFS_IOC_RESIZE: 1298 case BTRFS_IOC_RESIZE:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index b320b103fa13..bc49914475eb 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args {
65 65
66#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ 66#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
67 struct btrfs_ioctl_vol_args) 67 struct btrfs_ioctl_vol_args)
68 68#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
69 struct btrfs_ioctl_vol_args)
69#endif 70#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7b2f401e604e..b5d6d24726b0 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
159 * 159 *
160 * len is the length of the extent 160 * len is the length of the extent
161 * 161 *
162 * This also sets the EXTENT_ORDERED bit on the range in the inode.
163 *
164 * The tree is given a single reference on the ordered extent that was 162 * The tree is given a single reference on the ordered extent that was
165 * inserted. 163 * inserted.
166 */ 164 */
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
181 entry->start = start; 179 entry->start = start;
182 entry->len = len; 180 entry->len = len;
183 entry->disk_len = disk_len; 181 entry->disk_len = disk_len;
182 entry->bytes_left = len;
184 entry->inode = inode; 183 entry->inode = inode;
185 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 184 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
186 set_bit(type, &entry->flags); 185 set_bit(type, &entry->flags);
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
195 &entry->rb_node); 194 &entry->rb_node);
196 BUG_ON(node); 195 BUG_ON(node);
197 196
198 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
199 entry_end(entry) - 1, GFP_NOFS);
200
201 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
202 list_add_tail(&entry->root_extent_list, 198 list_add_tail(&entry->root_extent_list,
203 &BTRFS_I(inode)->root->fs_info->ordered_extents); 199 &BTRFS_I(inode)->root->fs_info->ordered_extents);
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
241 struct btrfs_ordered_inode_tree *tree; 237 struct btrfs_ordered_inode_tree *tree;
242 struct rb_node *node; 238 struct rb_node *node;
243 struct btrfs_ordered_extent *entry; 239 struct btrfs_ordered_extent *entry;
244 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
245 int ret; 240 int ret;
246 241
247 tree = &BTRFS_I(inode)->ordered_tree; 242 tree = &BTRFS_I(inode)->ordered_tree;
248 mutex_lock(&tree->mutex); 243 mutex_lock(&tree->mutex);
249 clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
250 GFP_NOFS);
251 node = tree_search(tree, file_offset); 244 node = tree_search(tree, file_offset);
252 if (!node) { 245 if (!node) {
253 ret = 1; 246 ret = 1;
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
260 goto out; 253 goto out;
261 } 254 }
262 255
263 ret = test_range_bit(io_tree, entry->file_offset, 256 if (io_size > entry->bytes_left) {
264 entry->file_offset + entry->len - 1, 257 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
265 EXTENT_ORDERED, 0); 258 (unsigned long long)entry->bytes_left,
266 if (ret == 0) 259 (unsigned long long)io_size);
260 }
261 entry->bytes_left -= io_size;
262 if (entry->bytes_left == 0)
267 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 263 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
264 else
265 ret = 1;
268out: 266out:
269 mutex_unlock(&tree->mutex); 267 mutex_unlock(&tree->mutex);
270 return ret == 0; 268 return ret == 0;
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
476 u64 orig_end; 474 u64 orig_end;
477 u64 wait_end; 475 u64 wait_end;
478 struct btrfs_ordered_extent *ordered; 476 struct btrfs_ordered_extent *ordered;
477 int found;
479 478
480 if (start + len < start) { 479 if (start + len < start) {
481 orig_end = INT_LIMIT(loff_t); 480 orig_end = INT_LIMIT(loff_t);
@@ -502,6 +501,7 @@ again:
502 orig_end >> PAGE_CACHE_SHIFT); 501 orig_end >> PAGE_CACHE_SHIFT);
503 502
504 end = orig_end; 503 end = orig_end;
504 found = 0;
505 while (1) { 505 while (1) {
506 ordered = btrfs_lookup_first_ordered_extent(inode, end); 506 ordered = btrfs_lookup_first_ordered_extent(inode, end);
507 if (!ordered) 507 if (!ordered)
@@ -514,6 +514,7 @@ again:
514 btrfs_put_ordered_extent(ordered); 514 btrfs_put_ordered_extent(ordered);
515 break; 515 break;
516 } 516 }
517 found++;
517 btrfs_start_ordered_extent(inode, ordered, 1); 518 btrfs_start_ordered_extent(inode, ordered, 1);
518 end = ordered->file_offset; 519 end = ordered->file_offset;
519 btrfs_put_ordered_extent(ordered); 520 btrfs_put_ordered_extent(ordered);
@@ -521,8 +522,8 @@ again:
521 break; 522 break;
522 end--; 523 end--;
523 } 524 }
524 if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, 525 if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
525 EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { 526 EXTENT_DELALLOC, 0, NULL)) {
526 schedule_timeout(1); 527 schedule_timeout(1);
527 goto again; 528 goto again;
528 } 529 }
@@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
613 */ 614 */
614 if (test_range_bit(io_tree, disk_i_size, 615 if (test_range_bit(io_tree, disk_i_size,
615 ordered->file_offset + ordered->len - 1, 616 ordered->file_offset + ordered->len - 1,
616 EXTENT_DELALLOC, 0)) { 617 EXTENT_DELALLOC, 0, NULL)) {
617 goto out; 618 goto out;
618 } 619 }
619 /* 620 /*
@@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
664 */ 665 */
665 if (i_size_test > entry_end(ordered) && 666 if (i_size_test > entry_end(ordered) &&
666 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 667 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
667 EXTENT_DELALLOC, 0)) { 668 EXTENT_DELALLOC, 0, NULL)) {
668 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 669 new_i_size = min_t(u64, i_size_test, i_size_read(inode));
669 } 670 }
670 BTRFS_I(inode)->disk_i_size = new_i_size; 671 BTRFS_I(inode)->disk_i_size = new_i_size;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 3d31c8827b01..993a7ea45c70 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -85,6 +85,9 @@ struct btrfs_ordered_extent {
85 /* extent length on disk */ 85 /* extent length on disk */
86 u64 disk_len; 86 u64 disk_len;
87 87
88 /* number of bytes that still need writing */
89 u64 bytes_left;
90
88 /* flags (described above) */ 91 /* flags (described above) */
89 unsigned long flags; 92 unsigned long flags;
90 93
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 3c0d52af4f80..79cba5fbc28e 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -65,3 +65,23 @@ out:
65 btrfs_free_path(path); 65 btrfs_free_path(path);
66 return ret; 66 return ret;
67} 67}
68
69int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
70{
71 struct btrfs_path *path;
72 struct btrfs_key key;
73 int ret;
74
75 key.objectid = BTRFS_ORPHAN_OBJECTID;
76 key.type = BTRFS_ORPHAN_ITEM_KEY;
77 key.offset = offset;
78
79 path = btrfs_alloc_path();
80 if (!path)
81 return -ENOMEM;
82
83 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
84
85 btrfs_free_path(path);
86 return ret;
87}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index c04f7f212602..361ad323faac 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -121,6 +121,15 @@ struct inodevec {
121 int nr; 121 int nr;
122}; 122};
123 123
124#define MAX_EXTENTS 128
125
126struct file_extent_cluster {
127 u64 start;
128 u64 end;
129 u64 boundary[MAX_EXTENTS];
130 unsigned int nr;
131};
132
124struct reloc_control { 133struct reloc_control {
125 /* block group to relocate */ 134 /* block group to relocate */
126 struct btrfs_block_group_cache *block_group; 135 struct btrfs_block_group_cache *block_group;
@@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize,
2180 struct reloc_control *rc) 2189 struct reloc_control *rc)
2181{ 2190{
2182 if (test_range_bit(&rc->processed_blocks, bytenr, 2191 if (test_range_bit(&rc->processed_blocks, bytenr,
2183 bytenr + blocksize - 1, EXTENT_DIRTY, 1)) 2192 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2184 return 1; 2193 return 1;
2185 return 0; 2194 return 0;
2186} 2195}
@@ -2529,56 +2538,94 @@ out:
2529} 2538}
2530 2539
2531static noinline_for_stack 2540static noinline_for_stack
2532int relocate_inode_pages(struct inode *inode, u64 start, u64 len) 2541int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2542 u64 block_start)
2543{
2544 struct btrfs_root *root = BTRFS_I(inode)->root;
2545 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2546 struct extent_map *em;
2547 int ret = 0;
2548
2549 em = alloc_extent_map(GFP_NOFS);
2550 if (!em)
2551 return -ENOMEM;
2552
2553 em->start = start;
2554 em->len = end + 1 - start;
2555 em->block_len = em->len;
2556 em->block_start = block_start;
2557 em->bdev = root->fs_info->fs_devices->latest_bdev;
2558 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2559
2560 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2561 while (1) {
2562 write_lock(&em_tree->lock);
2563 ret = add_extent_mapping(em_tree, em);
2564 write_unlock(&em_tree->lock);
2565 if (ret != -EEXIST) {
2566 free_extent_map(em);
2567 break;
2568 }
2569 btrfs_drop_extent_cache(inode, start, end, 0);
2570 }
2571 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2572 return ret;
2573}
2574
2575static int relocate_file_extent_cluster(struct inode *inode,
2576 struct file_extent_cluster *cluster)
2533{ 2577{
2534 u64 page_start; 2578 u64 page_start;
2535 u64 page_end; 2579 u64 page_end;
2536 unsigned long i; 2580 u64 offset = BTRFS_I(inode)->index_cnt;
2537 unsigned long first_index; 2581 unsigned long index;
2538 unsigned long last_index; 2582 unsigned long last_index;
2539 unsigned int total_read = 0; 2583 unsigned int dirty_page = 0;
2540 unsigned int total_dirty = 0;
2541 struct page *page; 2584 struct page *page;
2542 struct file_ra_state *ra; 2585 struct file_ra_state *ra;
2543 struct btrfs_ordered_extent *ordered; 2586 int nr = 0;
2544 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2545 int ret = 0; 2587 int ret = 0;
2546 2588
2589 if (!cluster->nr)
2590 return 0;
2591
2547 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2592 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2548 if (!ra) 2593 if (!ra)
2549 return -ENOMEM; 2594 return -ENOMEM;
2550 2595
2596 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2597 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2598
2551 mutex_lock(&inode->i_mutex); 2599 mutex_lock(&inode->i_mutex);
2552 first_index = start >> PAGE_CACHE_SHIFT;
2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2554 2600
2555 /* make sure the dirty trick played by the caller work */ 2601 i_size_write(inode, cluster->end + 1 - offset);
2556 while (1) { 2602 ret = setup_extent_mapping(inode, cluster->start - offset,
2557 ret = invalidate_inode_pages2_range(inode->i_mapping, 2603 cluster->end - offset, cluster->start);
2558 first_index, last_index);
2559 if (ret != -EBUSY)
2560 break;
2561 schedule_timeout(HZ/10);
2562 }
2563 if (ret) 2604 if (ret)
2564 goto out_unlock; 2605 goto out_unlock;
2565 2606
2566 file_ra_state_init(ra, inode->i_mapping); 2607 file_ra_state_init(ra, inode->i_mapping);
2567 2608
2568 for (i = first_index ; i <= last_index; i++) { 2609 WARN_ON(cluster->start != cluster->boundary[0]);
2569 if (total_read % ra->ra_pages == 0) { 2610 while (index <= last_index) {
2570 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 2611 page = find_lock_page(inode->i_mapping, index);
2571 min(last_index, ra->ra_pages + i - 1));
2572 }
2573 total_read++;
2574again:
2575 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
2576 BUG_ON(1);
2577 page = grab_cache_page(inode->i_mapping, i);
2578 if (!page) { 2612 if (!page) {
2579 ret = -ENOMEM; 2613 page_cache_sync_readahead(inode->i_mapping,
2580 goto out_unlock; 2614 ra, NULL, index,
2615 last_index + 1 - index);
2616 page = grab_cache_page(inode->i_mapping, index);
2617 if (!page) {
2618 ret = -ENOMEM;
2619 goto out_unlock;
2620 }
2621 }
2622
2623 if (PageReadahead(page)) {
2624 page_cache_async_readahead(inode->i_mapping,
2625 ra, NULL, page, index,
2626 last_index + 1 - index);
2581 } 2627 }
2628
2582 if (!PageUptodate(page)) { 2629 if (!PageUptodate(page)) {
2583 btrfs_readpage(NULL, page); 2630 btrfs_readpage(NULL, page);
2584 lock_page(page); 2631 lock_page(page);
@@ -2589,75 +2636,79 @@ again:
2589 goto out_unlock; 2636 goto out_unlock;
2590 } 2637 }
2591 } 2638 }
2592 wait_on_page_writeback(page);
2593 2639
2594 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2640 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2595 page_end = page_start + PAGE_CACHE_SIZE - 1; 2641 page_end = page_start + PAGE_CACHE_SIZE - 1;
2596 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2642
2597 2643 lock_extent(&BTRFS_I(inode)->io_tree,
2598 ordered = btrfs_lookup_ordered_extent(inode, page_start); 2644 page_start, page_end, GFP_NOFS);
2599 if (ordered) { 2645
2600 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2601 unlock_page(page);
2602 page_cache_release(page);
2603 btrfs_start_ordered_extent(inode, ordered, 1);
2604 btrfs_put_ordered_extent(ordered);
2605 goto again;
2606 }
2607 set_page_extent_mapped(page); 2646 set_page_extent_mapped(page);
2608 2647
2609 if (i == first_index) 2648 if (nr < cluster->nr &&
2610 set_extent_bits(io_tree, page_start, page_end, 2649 page_start + offset == cluster->boundary[nr]) {
2650 set_extent_bits(&BTRFS_I(inode)->io_tree,
2651 page_start, page_end,
2611 EXTENT_BOUNDARY, GFP_NOFS); 2652 EXTENT_BOUNDARY, GFP_NOFS);
2653 nr++;
2654 }
2612 btrfs_set_extent_delalloc(inode, page_start, page_end); 2655 btrfs_set_extent_delalloc(inode, page_start, page_end);
2613 2656
2614 set_page_dirty(page); 2657 set_page_dirty(page);
2615 total_dirty++; 2658 dirty_page++;
2616 2659
2617 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2660 unlock_extent(&BTRFS_I(inode)->io_tree,
2661 page_start, page_end, GFP_NOFS);
2618 unlock_page(page); 2662 unlock_page(page);
2619 page_cache_release(page); 2663 page_cache_release(page);
2664
2665 index++;
2666 if (nr < cluster->nr &&
2667 page_end + 1 + offset == cluster->boundary[nr]) {
2668 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2669 dirty_page);
2670 dirty_page = 0;
2671 }
2672 }
2673 if (dirty_page) {
2674 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2675 dirty_page);
2620 } 2676 }
2677 WARN_ON(nr != cluster->nr);
2621out_unlock: 2678out_unlock:
2622 mutex_unlock(&inode->i_mutex); 2679 mutex_unlock(&inode->i_mutex);
2623 kfree(ra); 2680 kfree(ra);
2624 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
2625 return ret; 2681 return ret;
2626} 2682}
2627 2683
2628static noinline_for_stack 2684static noinline_for_stack
2629int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) 2685int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
2686 struct file_extent_cluster *cluster)
2630{ 2687{
2631 struct btrfs_root *root = BTRFS_I(inode)->root; 2688 int ret;
2632 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2633 struct extent_map *em;
2634 u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
2635 u64 end = start + extent_key->offset - 1;
2636
2637 em = alloc_extent_map(GFP_NOFS);
2638 em->start = start;
2639 em->len = extent_key->offset;
2640 em->block_len = extent_key->offset;
2641 em->block_start = extent_key->objectid;
2642 em->bdev = root->fs_info->fs_devices->latest_bdev;
2643 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2644 2689
2645 /* setup extent map to cheat btrfs_readpage */ 2690 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
2646 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2691 ret = relocate_file_extent_cluster(inode, cluster);
2647 while (1) { 2692 if (ret)
2648 int ret; 2693 return ret;
2649 spin_lock(&em_tree->lock); 2694 cluster->nr = 0;
2650 ret = add_extent_mapping(em_tree, em);
2651 spin_unlock(&em_tree->lock);
2652 if (ret != -EEXIST) {
2653 free_extent_map(em);
2654 break;
2655 }
2656 btrfs_drop_extent_cache(inode, start, end, 0);
2657 } 2695 }
2658 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2659 2696
2660 return relocate_inode_pages(inode, start, extent_key->offset); 2697 if (!cluster->nr)
2698 cluster->start = extent_key->objectid;
2699 else
2700 BUG_ON(cluster->nr >= MAX_EXTENTS);
2701 cluster->end = extent_key->objectid + extent_key->offset - 1;
2702 cluster->boundary[cluster->nr] = extent_key->objectid;
2703 cluster->nr++;
2704
2705 if (cluster->nr >= MAX_EXTENTS) {
2706 ret = relocate_file_extent_cluster(inode, cluster);
2707 if (ret)
2708 return ret;
2709 cluster->nr = 0;
2710 }
2711 return 0;
2661} 2712}
2662 2713
2663#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2714#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
@@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags)
3203 return 0; 3254 return 0;
3204} 3255}
3205 3256
3257
3206static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3258static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3207{ 3259{
3208 struct rb_root blocks = RB_ROOT; 3260 struct rb_root blocks = RB_ROOT;
3209 struct btrfs_key key; 3261 struct btrfs_key key;
3262 struct file_extent_cluster *cluster;
3210 struct btrfs_trans_handle *trans = NULL; 3263 struct btrfs_trans_handle *trans = NULL;
3211 struct btrfs_path *path; 3264 struct btrfs_path *path;
3212 struct btrfs_extent_item *ei; 3265 struct btrfs_extent_item *ei;
@@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3216 int ret; 3269 int ret;
3217 int err = 0; 3270 int err = 0;
3218 3271
3272 cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
3273 if (!cluster)
3274 return -ENOMEM;
3275
3219 path = btrfs_alloc_path(); 3276 path = btrfs_alloc_path();
3220 if (!path) 3277 if (!path)
3221 return -ENOMEM; 3278 return -ENOMEM;
3222 3279
3280 rc->extents_found = 0;
3281 rc->extents_skipped = 0;
3282
3223 rc->search_start = rc->block_group->key.objectid; 3283 rc->search_start = rc->block_group->key.objectid;
3224 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3284 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3225 GFP_NOFS); 3285 GFP_NOFS);
@@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3306 } 3366 }
3307 3367
3308 nr = trans->blocks_used; 3368 nr = trans->blocks_used;
3309 btrfs_end_transaction_throttle(trans, rc->extent_root); 3369 btrfs_end_transaction(trans, rc->extent_root);
3310 trans = NULL; 3370 trans = NULL;
3311 btrfs_btree_balance_dirty(rc->extent_root, nr); 3371 btrfs_btree_balance_dirty(rc->extent_root, nr);
3312 3372
3313 if (rc->stage == MOVE_DATA_EXTENTS && 3373 if (rc->stage == MOVE_DATA_EXTENTS &&
3314 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3374 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3315 rc->found_file_extent = 1; 3375 rc->found_file_extent = 1;
3316 ret = relocate_data_extent(rc->data_inode, &key); 3376 ret = relocate_data_extent(rc->data_inode,
3377 &key, cluster);
3317 if (ret < 0) { 3378 if (ret < 0) {
3318 err = ret; 3379 err = ret;
3319 break; 3380 break;
@@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3328 btrfs_btree_balance_dirty(rc->extent_root, nr); 3389 btrfs_btree_balance_dirty(rc->extent_root, nr);
3329 } 3390 }
3330 3391
3392 if (!err) {
3393 ret = relocate_file_extent_cluster(rc->data_inode, cluster);
3394 if (ret < 0)
3395 err = ret;
3396 }
3397
3398 kfree(cluster);
3399
3331 rc->create_reloc_root = 0; 3400 rc->create_reloc_root = 0;
3332 smp_mb(); 3401 smp_mb();
3333 3402
@@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3348} 3417}
3349 3418
3350static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3419static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3351 struct btrfs_root *root, 3420 struct btrfs_root *root, u64 objectid)
3352 u64 objectid, u64 size)
3353{ 3421{
3354 struct btrfs_path *path; 3422 struct btrfs_path *path;
3355 struct btrfs_inode_item *item; 3423 struct btrfs_inode_item *item;
@@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3368 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3436 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3369 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3437 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3370 btrfs_set_inode_generation(leaf, item, 1); 3438 btrfs_set_inode_generation(leaf, item, 1);
3371 btrfs_set_inode_size(leaf, item, size); 3439 btrfs_set_inode_size(leaf, item, 0);
3372 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3440 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3373 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 3441 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
3374 btrfs_mark_buffer_dirty(leaf); 3442 btrfs_mark_buffer_dirty(leaf);
@@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3404 if (err) 3472 if (err)
3405 goto out; 3473 goto out;
3406 3474
3407 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 3475 err = __insert_orphan_inode(trans, root, objectid);
3408 BUG_ON(err);
3409
3410 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
3411 group->key.offset, 0, group->key.offset,
3412 0, 0, 0);
3413 BUG_ON(err); 3476 BUG_ON(err);
3414 3477
3415 key.objectid = objectid; 3478 key.objectid = objectid;
@@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3475 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0);
3476 3539
3477 while (1) { 3540 while (1) {
3478 mutex_lock(&fs_info->cleaner_mutex);
3479 btrfs_clean_old_snapshots(fs_info->tree_root);
3480 mutex_unlock(&fs_info->cleaner_mutex);
3481
3482 rc->extents_found = 0; 3541 rc->extents_found = 0;
3483 rc->extents_skipped = 0; 3542 rc->extents_skipped = 0;
3484 3543
3544 mutex_lock(&fs_info->cleaner_mutex);
3545
3546 btrfs_clean_old_snapshots(fs_info->tree_root);
3485 ret = relocate_block_group(rc); 3547 ret = relocate_block_group(rc);
3548
3549 mutex_unlock(&fs_info->cleaner_mutex);
3486 if (ret < 0) { 3550 if (ret < 0) {
3487 err = ret; 3551 err = ret;
3488 break; 3552 break;
@@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3514 } 3578 }
3515 } 3579 }
3516 3580
3517 filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, 3581 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
3518 rc->block_group->key.objectid, 3582 rc->block_group->key.objectid,
3519 rc->block_group->key.objectid + 3583 rc->block_group->key.objectid +
3520 rc->block_group->key.offset - 1); 3584 rc->block_group->key.offset - 1);
3521 3585
3522 WARN_ON(rc->block_group->pinned > 0); 3586 WARN_ON(rc->block_group->pinned > 0);
3523 WARN_ON(rc->block_group->reserved > 0); 3587 WARN_ON(rc->block_group->reserved > 0);
@@ -3530,6 +3594,26 @@ out:
3530 return err; 3594 return err;
3531} 3595}
3532 3596
3597static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
3598{
3599 struct btrfs_trans_handle *trans;
3600 int ret;
3601
3602 trans = btrfs_start_transaction(root->fs_info->tree_root, 1);
3603
3604 memset(&root->root_item.drop_progress, 0,
3605 sizeof(root->root_item.drop_progress));
3606 root->root_item.drop_level = 0;
3607 btrfs_set_root_refs(&root->root_item, 0);
3608 ret = btrfs_update_root(trans, root->fs_info->tree_root,
3609 &root->root_key, &root->root_item);
3610 BUG_ON(ret);
3611
3612 ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
3613 BUG_ON(ret);
3614 return 0;
3615}
3616
3533/* 3617/*
3534 * recover relocation interrupted by system crash. 3618 * recover relocation interrupted by system crash.
3535 * 3619 *
@@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
3589 fs_root = read_fs_root(root->fs_info, 3673 fs_root = read_fs_root(root->fs_info,
3590 reloc_root->root_key.offset); 3674 reloc_root->root_key.offset);
3591 if (IS_ERR(fs_root)) { 3675 if (IS_ERR(fs_root)) {
3592 err = PTR_ERR(fs_root); 3676 ret = PTR_ERR(fs_root);
3593 goto out; 3677 if (ret != -ENOENT) {
3678 err = ret;
3679 goto out;
3680 }
3681 mark_garbage_root(reloc_root);
3594 } 3682 }
3595 } 3683 }
3596 3684
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 0ddc6d61c55a..9351428f30e2 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
94 goto out; 94 goto out;
95 95
96 BUG_ON(ret == 0); 96 BUG_ON(ret == 0);
97 if (path->slots[0] == 0) {
98 ret = 1;
99 goto out;
100 }
97 l = path->nodes[0]; 101 l = path->nodes[0];
98 BUG_ON(path->slots[0] == 0);
99 slot = path->slots[0] - 1; 102 slot = path->slots[0] - 1;
100 btrfs_item_key_to_cpu(l, &found_key, slot); 103 btrfs_item_key_to_cpu(l, &found_key, slot);
101 if (found_key.objectid != objectid) { 104 if (found_key.objectid != objectid ||
105 found_key.type != BTRFS_ROOT_ITEM_KEY) {
102 ret = 1; 106 ret = 1;
103 goto out; 107 goto out;
104 } 108 }
105 read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), 109 if (item)
106 sizeof(*item)); 110 read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
107 memcpy(key, &found_key, sizeof(found_key)); 111 sizeof(*item));
112 if (key)
113 memcpy(key, &found_key, sizeof(found_key));
108 ret = 0; 114 ret = 0;
109out: 115out:
110 btrfs_free_path(path); 116 btrfs_free_path(path);
@@ -249,6 +255,59 @@ err:
249 return ret; 255 return ret;
250} 256}
251 257
258int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
259{
260 struct extent_buffer *leaf;
261 struct btrfs_path *path;
262 struct btrfs_key key;
263 int err = 0;
264 int ret;
265
266 path = btrfs_alloc_path();
267 if (!path)
268 return -ENOMEM;
269
270 key.objectid = BTRFS_ORPHAN_OBJECTID;
271 key.type = BTRFS_ORPHAN_ITEM_KEY;
272 key.offset = 0;
273
274 while (1) {
275 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
276 if (ret < 0) {
277 err = ret;
278 break;
279 }
280
281 leaf = path->nodes[0];
282 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
283 ret = btrfs_next_leaf(tree_root, path);
284 if (ret < 0)
285 err = ret;
286 if (ret != 0)
287 break;
288 leaf = path->nodes[0];
289 }
290
291 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
292 btrfs_release_path(tree_root, path);
293
294 if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
295 key.type != BTRFS_ORPHAN_ITEM_KEY)
296 break;
297
298 ret = btrfs_find_dead_roots(tree_root, key.offset);
299 if (ret) {
300 err = ret;
301 break;
302 }
303
304 key.offset++;
305 }
306
307 btrfs_free_path(path);
308 return err;
309}
310
252/* drop the root item for 'key' from 'root' */ 311/* drop the root item for 'key' from 'root' */
253int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, 312int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
254 struct btrfs_key *key) 313 struct btrfs_key *key)
@@ -278,31 +337,57 @@ out:
278 return ret; 337 return ret;
279} 338}
280 339
281#if 0 /* this will get used when snapshot deletion is implemented */
282int btrfs_del_root_ref(struct btrfs_trans_handle *trans, 340int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
283 struct btrfs_root *tree_root, 341 struct btrfs_root *tree_root,
284 u64 root_id, u8 type, u64 ref_id) 342 u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
343 const char *name, int name_len)
344
285{ 345{
346 struct btrfs_path *path;
347 struct btrfs_root_ref *ref;
348 struct extent_buffer *leaf;
286 struct btrfs_key key; 349 struct btrfs_key key;
350 unsigned long ptr;
351 int err = 0;
287 int ret; 352 int ret;
288 struct btrfs_path *path;
289 353
290 path = btrfs_alloc_path(); 354 path = btrfs_alloc_path();
355 if (!path)
356 return -ENOMEM;
291 357
292 key.objectid = root_id; 358 key.objectid = root_id;
293 key.type = type; 359 key.type = BTRFS_ROOT_BACKREF_KEY;
294 key.offset = ref_id; 360 key.offset = ref_id;
295 361again:
296 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); 362 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
297 BUG_ON(ret); 363 BUG_ON(ret < 0);
298 364 if (ret == 0) {
299 ret = btrfs_del_item(trans, tree_root, path); 365 leaf = path->nodes[0];
300 BUG_ON(ret); 366 ref = btrfs_item_ptr(leaf, path->slots[0],
367 struct btrfs_root_ref);
368
369 WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
370 WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
371 ptr = (unsigned long)(ref + 1);
372 WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
373 *sequence = btrfs_root_ref_sequence(leaf, ref);
374
375 ret = btrfs_del_item(trans, tree_root, path);
376 BUG_ON(ret);
377 } else
378 err = -ENOENT;
379
380 if (key.type == BTRFS_ROOT_BACKREF_KEY) {
381 btrfs_release_path(tree_root, path);
382 key.objectid = ref_id;
383 key.type = BTRFS_ROOT_REF_KEY;
384 key.offset = root_id;
385 goto again;
386 }
301 387
302 btrfs_free_path(path); 388 btrfs_free_path(path);
303 return ret; 389 return err;
304} 390}
305#endif
306 391
307int btrfs_find_root_ref(struct btrfs_root *tree_root, 392int btrfs_find_root_ref(struct btrfs_root *tree_root,
308 struct btrfs_path *path, 393 struct btrfs_path *path,
@@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
319 return ret; 404 return ret;
320} 405}
321 406
322
323/* 407/*
324 * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY 408 * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
325 * or BTRFS_ROOT_BACKREF_KEY. 409 * or BTRFS_ROOT_BACKREF_KEY.
@@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
335 */ 419 */
336int btrfs_add_root_ref(struct btrfs_trans_handle *trans, 420int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
337 struct btrfs_root *tree_root, 421 struct btrfs_root *tree_root,
338 u64 root_id, u8 type, u64 ref_id, 422 u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
339 u64 dirid, u64 sequence,
340 const char *name, int name_len) 423 const char *name, int name_len)
341{ 424{
342 struct btrfs_key key; 425 struct btrfs_key key;
@@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
346 struct extent_buffer *leaf; 429 struct extent_buffer *leaf;
347 unsigned long ptr; 430 unsigned long ptr;
348 431
349
350 path = btrfs_alloc_path(); 432 path = btrfs_alloc_path();
433 if (!path)
434 return -ENOMEM;
351 435
352 key.objectid = root_id; 436 key.objectid = root_id;
353 key.type = type; 437 key.type = BTRFS_ROOT_BACKREF_KEY;
354 key.offset = ref_id; 438 key.offset = ref_id;
355 439again:
356 ret = btrfs_insert_empty_item(trans, tree_root, path, &key, 440 ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
357 sizeof(*ref) + name_len); 441 sizeof(*ref) + name_len);
358 BUG_ON(ret); 442 BUG_ON(ret);
@@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
366 write_extent_buffer(leaf, name, ptr, name_len); 450 write_extent_buffer(leaf, name, ptr, name_len);
367 btrfs_mark_buffer_dirty(leaf); 451 btrfs_mark_buffer_dirty(leaf);
368 452
453 if (key.type == BTRFS_ROOT_BACKREF_KEY) {
454 btrfs_release_path(tree_root, path);
455 key.objectid = ref_id;
456 key.type = BTRFS_ROOT_REF_KEY;
457 key.offset = root_id;
458 goto again;
459 }
460
369 btrfs_free_path(path); 461 btrfs_free_path(path);
370 return ret; 462 return 0;
371} 463}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2db17cd66fc5..67035385444c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -676,6 +676,7 @@ static int btrfs_unfreeze(struct super_block *sb)
676} 676}
677 677
678static const struct super_operations btrfs_super_ops = { 678static const struct super_operations btrfs_super_ops = {
679 .drop_inode = btrfs_drop_inode,
679 .delete_inode = btrfs_delete_inode, 680 .delete_inode = btrfs_delete_inode,
680 .put_super = btrfs_put_super, 681 .put_super = btrfs_put_super,
681 .sync_fs = btrfs_sync_fs, 682 .sync_fs = btrfs_sync_fs,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cdbb5022da52..88f866f85e7a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
104{ 104{
105 if (root->ref_cows && root->last_trans < trans->transid) { 105 if (root->ref_cows && root->last_trans < trans->transid) {
106 WARN_ON(root == root->fs_info->extent_root); 106 WARN_ON(root == root->fs_info->extent_root);
107 WARN_ON(root->root_item.refs == 0);
108 WARN_ON(root->commit_root != root->node); 107 WARN_ON(root->commit_root != root->node);
109 108
110 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 109 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
@@ -720,7 +719,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
720 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 719 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
721 720
722 key.objectid = objectid; 721 key.objectid = objectid;
723 key.offset = 0; 722 /* record when the snapshot was created in key.offset */
723 key.offset = trans->transid;
724 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 724 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
725 725
726 old = btrfs_lock_root_node(root); 726 old = btrfs_lock_root_node(root);
@@ -778,24 +778,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
778 ret = btrfs_update_inode(trans, parent_root, parent_inode); 778 ret = btrfs_update_inode(trans, parent_root, parent_inode);
779 BUG_ON(ret); 779 BUG_ON(ret);
780 780
781 /* add the backref first */
782 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 781 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
783 pending->root_key.objectid, 782 pending->root_key.objectid,
784 BTRFS_ROOT_BACKREF_KEY,
785 parent_root->root_key.objectid, 783 parent_root->root_key.objectid,
786 parent_inode->i_ino, index, pending->name, 784 parent_inode->i_ino, index, pending->name,
787 namelen); 785 namelen);
788 786
789 BUG_ON(ret); 787 BUG_ON(ret);
790 788
791 /* now add the forward ref */
792 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
793 parent_root->root_key.objectid,
794 BTRFS_ROOT_REF_KEY,
795 pending->root_key.objectid,
796 parent_inode->i_ino, index, pending->name,
797 namelen);
798
799 inode = btrfs_lookup_dentry(parent_inode, pending->dentry); 789 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
800 d_instantiate(pending->dentry, inode); 790 d_instantiate(pending->dentry, inode);
801fail: 791fail:
@@ -874,7 +864,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
874 unsigned long timeout = 1; 864 unsigned long timeout = 1;
875 struct btrfs_transaction *cur_trans; 865 struct btrfs_transaction *cur_trans;
876 struct btrfs_transaction *prev_trans = NULL; 866 struct btrfs_transaction *prev_trans = NULL;
877 struct extent_io_tree *pinned_copy;
878 DEFINE_WAIT(wait); 867 DEFINE_WAIT(wait);
879 int ret; 868 int ret;
880 int should_grow = 0; 869 int should_grow = 0;
@@ -915,13 +904,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
915 return 0; 904 return 0;
916 } 905 }
917 906
918 pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
919 if (!pinned_copy)
920 return -ENOMEM;
921
922 extent_io_tree_init(pinned_copy,
923 root->fs_info->btree_inode->i_mapping, GFP_NOFS);
924
925 trans->transaction->in_commit = 1; 907 trans->transaction->in_commit = 1;
926 trans->transaction->blocked = 1; 908 trans->transaction->blocked = 1;
927 if (cur_trans->list.prev != &root->fs_info->trans_list) { 909 if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -1019,6 +1001,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1019 ret = commit_cowonly_roots(trans, root); 1001 ret = commit_cowonly_roots(trans, root);
1020 BUG_ON(ret); 1002 BUG_ON(ret);
1021 1003
1004 btrfs_prepare_extent_commit(trans, root);
1005
1022 cur_trans = root->fs_info->running_transaction; 1006 cur_trans = root->fs_info->running_transaction;
1023 spin_lock(&root->fs_info->new_trans_lock); 1007 spin_lock(&root->fs_info->new_trans_lock);
1024 root->fs_info->running_transaction = NULL; 1008 root->fs_info->running_transaction = NULL;
@@ -1042,8 +1026,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1042 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, 1026 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
1043 sizeof(root->fs_info->super_copy)); 1027 sizeof(root->fs_info->super_copy));
1044 1028
1045 btrfs_copy_pinned(root, pinned_copy);
1046
1047 trans->transaction->blocked = 0; 1029 trans->transaction->blocked = 0;
1048 1030
1049 wake_up(&root->fs_info->transaction_wait); 1031 wake_up(&root->fs_info->transaction_wait);
@@ -1059,8 +1041,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1059 */ 1041 */
1060 mutex_unlock(&root->fs_info->tree_log_mutex); 1042 mutex_unlock(&root->fs_info->tree_log_mutex);
1061 1043
1062 btrfs_finish_extent_commit(trans, root, pinned_copy); 1044 btrfs_finish_extent_commit(trans, root);
1063 kfree(pinned_copy);
1064 1045
1065 /* do the directory inserts of any pending snapshot creations */ 1046 /* do the directory inserts of any pending snapshot creations */
1066 finish_pending_snapshots(trans, root->fs_info); 1047 finish_pending_snapshots(trans, root->fs_info);
@@ -1096,8 +1077,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1096 1077
1097 while (!list_empty(&list)) { 1078 while (!list_empty(&list)) {
1098 root = list_entry(list.next, struct btrfs_root, root_list); 1079 root = list_entry(list.next, struct btrfs_root, root_list);
1099 list_del_init(&root->root_list); 1080 list_del(&root->root_list);
1100 btrfs_drop_snapshot(root, 0); 1081
1082 if (btrfs_header_backref_rev(root->node) <
1083 BTRFS_MIXED_BACKREF_REV)
1084 btrfs_drop_snapshot(root, 0);
1085 else
1086 btrfs_drop_snapshot(root, 1);
1101 } 1087 }
1102 return 0; 1088 return 0;
1103} 1089}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 30c0d45c1b5e..7827841b55cb 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -263,8 +263,8 @@ static int process_one_buffer(struct btrfs_root *log,
263 struct walk_control *wc, u64 gen) 263 struct walk_control *wc, u64 gen)
264{ 264{
265 if (wc->pin) 265 if (wc->pin)
266 btrfs_update_pinned_extents(log->fs_info->extent_root, 266 btrfs_pin_extent(log->fs_info->extent_root,
267 eb->start, eb->len, 1); 267 eb->start, eb->len, 0);
268 268
269 if (btrfs_buffer_uptodate(eb, gen)) { 269 if (btrfs_buffer_uptodate(eb, gen)) {
270 if (wc->write) 270 if (wc->write)
@@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
534 saved_nbytes = inode_get_bytes(inode); 534 saved_nbytes = inode_get_bytes(inode);
535 /* drop any overlapping extents */ 535 /* drop any overlapping extents */
536 ret = btrfs_drop_extents(trans, root, inode, 536 ret = btrfs_drop_extents(trans, root, inode,
537 start, extent_end, extent_end, start, &alloc_hint); 537 start, extent_end, extent_end, start, &alloc_hint, 1);
538 BUG_ON(ret); 538 BUG_ON(ret);
539 539
540 if (found_type == BTRFS_FILE_EXTENT_REG || 540 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -2841,7 +2841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
2841 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 2841 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
2842 break; 2842 break;
2843 2843
2844 if (parent == sb->s_root) 2844 if (IS_ROOT(parent))
2845 break; 2845 break;
2846 2846
2847 parent = parent->d_parent; 2847 parent = parent->d_parent;
@@ -2880,6 +2880,12 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
2880 goto end_no_trans; 2880 goto end_no_trans;
2881 } 2881 }
2882 2882
2883 if (root != BTRFS_I(inode)->root ||
2884 btrfs_root_refs(&root->root_item) == 0) {
2885 ret = 1;
2886 goto end_no_trans;
2887 }
2888
2883 ret = check_parent_dirs_for_sync(trans, inode, parent, 2889 ret = check_parent_dirs_for_sync(trans, inode, parent,
2884 sb, last_committed); 2890 sb, last_committed);
2885 if (ret) 2891 if (ret)
@@ -2907,12 +2913,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
2907 break; 2913 break;
2908 2914
2909 inode = parent->d_inode; 2915 inode = parent->d_inode;
2916 if (root != BTRFS_I(inode)->root)
2917 break;
2918
2910 if (BTRFS_I(inode)->generation > 2919 if (BTRFS_I(inode)->generation >
2911 root->fs_info->last_trans_committed) { 2920 root->fs_info->last_trans_committed) {
2912 ret = btrfs_log_inode(trans, root, inode, inode_only); 2921 ret = btrfs_log_inode(trans, root, inode, inode_only);
2913 BUG_ON(ret); 2922 BUG_ON(ret);
2914 } 2923 }
2915 if (parent == sb->s_root) 2924 if (IS_ROOT(parent))
2916 break; 2925 break;
2917 2926
2918 parent = parent->d_parent; 2927 parent = parent->d_parent;
@@ -2951,7 +2960,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
2951 struct btrfs_key tmp_key; 2960 struct btrfs_key tmp_key;
2952 struct btrfs_root *log; 2961 struct btrfs_root *log;
2953 struct btrfs_fs_info *fs_info = log_root_tree->fs_info; 2962 struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
2954 u64 highest_inode;
2955 struct walk_control wc = { 2963 struct walk_control wc = {
2956 .process_func = process_one_buffer, 2964 .process_func = process_one_buffer,
2957 .stage = 0, 2965 .stage = 0,
@@ -3010,11 +3018,6 @@ again:
3010 path); 3018 path);
3011 BUG_ON(ret); 3019 BUG_ON(ret);
3012 } 3020 }
3013 ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode);
3014 if (ret == 0) {
3015 wc.replay_dest->highest_inode = highest_inode;
3016 wc.replay_dest->last_inode_alloc = highest_inode;
3017 }
3018 3021
3019 key.offset = found_key.offset - 1; 3022 key.offset = found_key.offset - 1;
3020 wc.replay_dest->log_root = NULL; 3023 wc.replay_dest->log_root = NULL;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cf405b0828d..23e7d36ff325 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -276,7 +276,7 @@ loop_lock:
276 * is now congested. Back off and let other work structs 276 * is now congested. Back off and let other work structs
277 * run instead 277 * run instead
278 */ 278 */
279 if (pending && bdi_write_congested(bdi) && batch_run > 32 && 279 if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
280 fs_info->fs_devices->open_devices > 1) { 280 fs_info->fs_devices->open_devices > 1) {
281 struct io_context *ioc; 281 struct io_context *ioc;
282 282
@@ -719,10 +719,9 @@ error:
719 * called very infrequently and that a given device has a small number 719 * called very infrequently and that a given device has a small number
720 * of extents 720 * of extents
721 */ 721 */
722static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, 722int find_free_dev_extent(struct btrfs_trans_handle *trans,
723 struct btrfs_device *device, 723 struct btrfs_device *device, u64 num_bytes,
724 u64 num_bytes, u64 *start, 724 u64 *start, u64 *max_avail)
725 u64 *max_avail)
726{ 725{
727 struct btrfs_key key; 726 struct btrfs_key key;
728 struct btrfs_root *root = device->dev_root; 727 struct btrfs_root *root = device->dev_root;
@@ -1736,6 +1735,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1736 extent_root = root->fs_info->extent_root; 1735 extent_root = root->fs_info->extent_root;
1737 em_tree = &root->fs_info->mapping_tree.map_tree; 1736 em_tree = &root->fs_info->mapping_tree.map_tree;
1738 1737
1738 ret = btrfs_can_relocate(extent_root, chunk_offset);
1739 if (ret)
1740 return -ENOSPC;
1741
1739 /* step one, relocate all the extents inside this chunk */ 1742 /* step one, relocate all the extents inside this chunk */
1740 ret = btrfs_relocate_block_group(extent_root, chunk_offset); 1743 ret = btrfs_relocate_block_group(extent_root, chunk_offset);
1741 BUG_ON(ret); 1744 BUG_ON(ret);
@@ -1749,9 +1752,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1749 * step two, delete the device extents and the 1752 * step two, delete the device extents and the
1750 * chunk tree entries 1753 * chunk tree entries
1751 */ 1754 */
1752 spin_lock(&em_tree->lock); 1755 read_lock(&em_tree->lock);
1753 em = lookup_extent_mapping(em_tree, chunk_offset, 1); 1756 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1754 spin_unlock(&em_tree->lock); 1757 read_unlock(&em_tree->lock);
1755 1758
1756 BUG_ON(em->start > chunk_offset || 1759 BUG_ON(em->start > chunk_offset ||
1757 em->start + em->len < chunk_offset); 1760 em->start + em->len < chunk_offset);
@@ -1780,9 +1783,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1780 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); 1783 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1781 BUG_ON(ret); 1784 BUG_ON(ret);
1782 1785
1783 spin_lock(&em_tree->lock); 1786 write_lock(&em_tree->lock);
1784 remove_extent_mapping(em_tree, em); 1787 remove_extent_mapping(em_tree, em);
1785 spin_unlock(&em_tree->lock); 1788 write_unlock(&em_tree->lock);
1786 1789
1787 kfree(map); 1790 kfree(map);
1788 em->bdev = NULL; 1791 em->bdev = NULL;
@@ -1807,12 +1810,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1807 struct btrfs_key found_key; 1810 struct btrfs_key found_key;
1808 u64 chunk_tree = chunk_root->root_key.objectid; 1811 u64 chunk_tree = chunk_root->root_key.objectid;
1809 u64 chunk_type; 1812 u64 chunk_type;
1813 bool retried = false;
1814 int failed = 0;
1810 int ret; 1815 int ret;
1811 1816
1812 path = btrfs_alloc_path(); 1817 path = btrfs_alloc_path();
1813 if (!path) 1818 if (!path)
1814 return -ENOMEM; 1819 return -ENOMEM;
1815 1820
1821again:
1816 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 1822 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1817 key.offset = (u64)-1; 1823 key.offset = (u64)-1;
1818 key.type = BTRFS_CHUNK_ITEM_KEY; 1824 key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -1842,7 +1848,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1842 ret = btrfs_relocate_chunk(chunk_root, chunk_tree, 1848 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
1843 found_key.objectid, 1849 found_key.objectid,
1844 found_key.offset); 1850 found_key.offset);
1845 BUG_ON(ret); 1851 if (ret == -ENOSPC)
1852 failed++;
1853 else if (ret)
1854 BUG();
1846 } 1855 }
1847 1856
1848 if (found_key.offset == 0) 1857 if (found_key.offset == 0)
@@ -1850,6 +1859,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1850 key.offset = found_key.offset - 1; 1859 key.offset = found_key.offset - 1;
1851 } 1860 }
1852 ret = 0; 1861 ret = 0;
1862 if (failed && !retried) {
1863 failed = 0;
1864 retried = true;
1865 goto again;
1866 } else if (failed && retried) {
1867 WARN_ON(1);
1868 ret = -ENOSPC;
1869 }
1853error: 1870error:
1854 btrfs_free_path(path); 1871 btrfs_free_path(path);
1855 return ret; 1872 return ret;
@@ -1894,6 +1911,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
1894 continue; 1911 continue;
1895 1912
1896 ret = btrfs_shrink_device(device, old_size - size_to_free); 1913 ret = btrfs_shrink_device(device, old_size - size_to_free);
1914 if (ret == -ENOSPC)
1915 break;
1897 BUG_ON(ret); 1916 BUG_ON(ret);
1898 1917
1899 trans = btrfs_start_transaction(dev_root, 1); 1918 trans = btrfs_start_transaction(dev_root, 1);
@@ -1938,9 +1957,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
1938 chunk = btrfs_item_ptr(path->nodes[0], 1957 chunk = btrfs_item_ptr(path->nodes[0],
1939 path->slots[0], 1958 path->slots[0],
1940 struct btrfs_chunk); 1959 struct btrfs_chunk);
1941 key.offset = found_key.offset;
1942 /* chunk zero is special */ 1960 /* chunk zero is special */
1943 if (key.offset == 0) 1961 if (found_key.offset == 0)
1944 break; 1962 break;
1945 1963
1946 btrfs_release_path(chunk_root, path); 1964 btrfs_release_path(chunk_root, path);
@@ -1948,7 +1966,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
1948 chunk_root->root_key.objectid, 1966 chunk_root->root_key.objectid,
1949 found_key.objectid, 1967 found_key.objectid,
1950 found_key.offset); 1968 found_key.offset);
1951 BUG_ON(ret); 1969 BUG_ON(ret && ret != -ENOSPC);
1970 key.offset = found_key.offset - 1;
1952 } 1971 }
1953 ret = 0; 1972 ret = 0;
1954error: 1973error:
@@ -1974,10 +1993,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1974 u64 chunk_offset; 1993 u64 chunk_offset;
1975 int ret; 1994 int ret;
1976 int slot; 1995 int slot;
1996 int failed = 0;
1997 bool retried = false;
1977 struct extent_buffer *l; 1998 struct extent_buffer *l;
1978 struct btrfs_key key; 1999 struct btrfs_key key;
1979 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 2000 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1980 u64 old_total = btrfs_super_total_bytes(super_copy); 2001 u64 old_total = btrfs_super_total_bytes(super_copy);
2002 u64 old_size = device->total_bytes;
1981 u64 diff = device->total_bytes - new_size; 2003 u64 diff = device->total_bytes - new_size;
1982 2004
1983 if (new_size >= device->total_bytes) 2005 if (new_size >= device->total_bytes)
@@ -1987,12 +2009,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1987 if (!path) 2009 if (!path)
1988 return -ENOMEM; 2010 return -ENOMEM;
1989 2011
1990 trans = btrfs_start_transaction(root, 1);
1991 if (!trans) {
1992 ret = -ENOMEM;
1993 goto done;
1994 }
1995
1996 path->reada = 2; 2012 path->reada = 2;
1997 2013
1998 lock_chunks(root); 2014 lock_chunks(root);
@@ -2001,8 +2017,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2001 if (device->writeable) 2017 if (device->writeable)
2002 device->fs_devices->total_rw_bytes -= diff; 2018 device->fs_devices->total_rw_bytes -= diff;
2003 unlock_chunks(root); 2019 unlock_chunks(root);
2004 btrfs_end_transaction(trans, root);
2005 2020
2021again:
2006 key.objectid = device->devid; 2022 key.objectid = device->devid;
2007 key.offset = (u64)-1; 2023 key.offset = (u64)-1;
2008 key.type = BTRFS_DEV_EXTENT_KEY; 2024 key.type = BTRFS_DEV_EXTENT_KEY;
@@ -2017,6 +2033,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2017 goto done; 2033 goto done;
2018 if (ret) { 2034 if (ret) {
2019 ret = 0; 2035 ret = 0;
2036 btrfs_release_path(root, path);
2020 break; 2037 break;
2021 } 2038 }
2022 2039
@@ -2024,14 +2041,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2024 slot = path->slots[0]; 2041 slot = path->slots[0];
2025 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 2042 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2026 2043
2027 if (key.objectid != device->devid) 2044 if (key.objectid != device->devid) {
2045 btrfs_release_path(root, path);
2028 break; 2046 break;
2047 }
2029 2048
2030 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 2049 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2031 length = btrfs_dev_extent_length(l, dev_extent); 2050 length = btrfs_dev_extent_length(l, dev_extent);
2032 2051
2033 if (key.offset + length <= new_size) 2052 if (key.offset + length <= new_size) {
2053 btrfs_release_path(root, path);
2034 break; 2054 break;
2055 }
2035 2056
2036 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 2057 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2037 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 2058 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -2040,8 +2061,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2040 2061
2041 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, 2062 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
2042 chunk_offset); 2063 chunk_offset);
2043 if (ret) 2064 if (ret && ret != -ENOSPC)
2044 goto done; 2065 goto done;
2066 if (ret == -ENOSPC)
2067 failed++;
2068 key.offset -= 1;
2069 }
2070
2071 if (failed && !retried) {
2072 failed = 0;
2073 retried = true;
2074 goto again;
2075 } else if (failed && retried) {
2076 ret = -ENOSPC;
2077 lock_chunks(root);
2078
2079 device->total_bytes = old_size;
2080 if (device->writeable)
2081 device->fs_devices->total_rw_bytes += diff;
2082 unlock_chunks(root);
2083 goto done;
2045 } 2084 }
2046 2085
2047 /* Shrinking succeeded, else we would be at "done". */ 2086 /* Shrinking succeeded, else we would be at "done". */
@@ -2294,9 +2333,9 @@ again:
2294 em->block_len = em->len; 2333 em->block_len = em->len;
2295 2334
2296 em_tree = &extent_root->fs_info->mapping_tree.map_tree; 2335 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
2297 spin_lock(&em_tree->lock); 2336 write_lock(&em_tree->lock);
2298 ret = add_extent_mapping(em_tree, em); 2337 ret = add_extent_mapping(em_tree, em);
2299 spin_unlock(&em_tree->lock); 2338 write_unlock(&em_tree->lock);
2300 BUG_ON(ret); 2339 BUG_ON(ret);
2301 free_extent_map(em); 2340 free_extent_map(em);
2302 2341
@@ -2491,9 +2530,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2491 int readonly = 0; 2530 int readonly = 0;
2492 int i; 2531 int i;
2493 2532
2494 spin_lock(&map_tree->map_tree.lock); 2533 read_lock(&map_tree->map_tree.lock);
2495 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); 2534 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2496 spin_unlock(&map_tree->map_tree.lock); 2535 read_unlock(&map_tree->map_tree.lock);
2497 if (!em) 2536 if (!em)
2498 return 1; 2537 return 1;
2499 2538
@@ -2518,11 +2557,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
2518 struct extent_map *em; 2557 struct extent_map *em;
2519 2558
2520 while (1) { 2559 while (1) {
2521 spin_lock(&tree->map_tree.lock); 2560 write_lock(&tree->map_tree.lock);
2522 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); 2561 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
2523 if (em) 2562 if (em)
2524 remove_extent_mapping(&tree->map_tree, em); 2563 remove_extent_mapping(&tree->map_tree, em);
2525 spin_unlock(&tree->map_tree.lock); 2564 write_unlock(&tree->map_tree.lock);
2526 if (!em) 2565 if (!em)
2527 break; 2566 break;
2528 kfree(em->bdev); 2567 kfree(em->bdev);
@@ -2540,9 +2579,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
2540 struct extent_map_tree *em_tree = &map_tree->map_tree; 2579 struct extent_map_tree *em_tree = &map_tree->map_tree;
2541 int ret; 2580 int ret;
2542 2581
2543 spin_lock(&em_tree->lock); 2582 read_lock(&em_tree->lock);
2544 em = lookup_extent_mapping(em_tree, logical, len); 2583 em = lookup_extent_mapping(em_tree, logical, len);
2545 spin_unlock(&em_tree->lock); 2584 read_unlock(&em_tree->lock);
2546 BUG_ON(!em); 2585 BUG_ON(!em);
2547 2586
2548 BUG_ON(em->start > logical || em->start + em->len < logical); 2587 BUG_ON(em->start > logical || em->start + em->len < logical);
@@ -2604,9 +2643,9 @@ again:
2604 atomic_set(&multi->error, 0); 2643 atomic_set(&multi->error, 0);
2605 } 2644 }
2606 2645
2607 spin_lock(&em_tree->lock); 2646 read_lock(&em_tree->lock);
2608 em = lookup_extent_mapping(em_tree, logical, *length); 2647 em = lookup_extent_mapping(em_tree, logical, *length);
2609 spin_unlock(&em_tree->lock); 2648 read_unlock(&em_tree->lock);
2610 2649
2611 if (!em && unplug_page) 2650 if (!em && unplug_page)
2612 return 0; 2651 return 0;
@@ -2763,9 +2802,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
2763 u64 stripe_nr; 2802 u64 stripe_nr;
2764 int i, j, nr = 0; 2803 int i, j, nr = 0;
2765 2804
2766 spin_lock(&em_tree->lock); 2805 read_lock(&em_tree->lock);
2767 em = lookup_extent_mapping(em_tree, chunk_start, 1); 2806 em = lookup_extent_mapping(em_tree, chunk_start, 1);
2768 spin_unlock(&em_tree->lock); 2807 read_unlock(&em_tree->lock);
2769 2808
2770 BUG_ON(!em || em->start != chunk_start); 2809 BUG_ON(!em || em->start != chunk_start);
2771 map = (struct map_lookup *)em->bdev; 2810 map = (struct map_lookup *)em->bdev;
@@ -3053,9 +3092,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
3053 logical = key->offset; 3092 logical = key->offset;
3054 length = btrfs_chunk_length(leaf, chunk); 3093 length = btrfs_chunk_length(leaf, chunk);
3055 3094
3056 spin_lock(&map_tree->map_tree.lock); 3095 read_lock(&map_tree->map_tree.lock);
3057 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); 3096 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
3058 spin_unlock(&map_tree->map_tree.lock); 3097 read_unlock(&map_tree->map_tree.lock);
3059 3098
3060 /* already mapped? */ 3099 /* already mapped? */
3061 if (em && em->start <= logical && em->start + em->len > logical) { 3100 if (em && em->start <= logical && em->start + em->len > logical) {
@@ -3114,9 +3153,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
3114 map->stripes[i].dev->in_fs_metadata = 1; 3153 map->stripes[i].dev->in_fs_metadata = 1;
3115 } 3154 }
3116 3155
3117 spin_lock(&map_tree->map_tree.lock); 3156 write_lock(&map_tree->map_tree.lock);
3118 ret = add_extent_mapping(&map_tree->map_tree, em); 3157 ret = add_extent_mapping(&map_tree->map_tree, em);
3119 spin_unlock(&map_tree->map_tree.lock); 3158 write_unlock(&map_tree->map_tree.lock);
3120 BUG_ON(ret); 3159 BUG_ON(ret);
3121 free_extent_map(em); 3160 free_extent_map(em);
3122 3161
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5139a833f721..31b0fabdd2ea 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root);
181void btrfs_unlock_volumes(void); 181void btrfs_unlock_volumes(void);
182void btrfs_lock_volumes(void); 182void btrfs_lock_volumes(void);
183int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 183int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
184int find_free_dev_extent(struct btrfs_trans_handle *trans,
185 struct btrfs_device *device, u64 num_bytes,
186 u64 *start, u64 *max_avail);
184#endif 187#endif
diff --git a/fs/buffer.c b/fs/buffer.c
index 209f7f15f5f8..24afd7422ae8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
2239 struct address_space *mapping = inode->i_mapping; 2239 struct address_space *mapping = inode->i_mapping;
2240 struct page *page; 2240 struct page *page;
2241 void *fsdata; 2241 void *fsdata;
2242 unsigned long limit;
2243 int err; 2242 int err;
2244 2243
2245 err = -EFBIG; 2244 err = inode_newsize_ok(inode, size);
2246 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 2245 if (err)
2247 if (limit != RLIM_INFINITY && size > (loff_t)limit) {
2248 send_sig(SIGXFSZ, current, 0);
2249 goto out;
2250 }
2251 if (size > inode->i_sb->s_maxbytes)
2252 goto out; 2246 goto out;
2253 2247
2254 err = pagecache_write_begin(NULL, mapping, size, 0, 2248 err = pagecache_write_begin(NULL, mapping, size, 0,
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3cbc57f932d2..d6db933df2b2 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
264{ 264{
265 struct char_device_struct *cd; 265 struct char_device_struct *cd;
266 struct cdev *cdev; 266 struct cdev *cdev;
267 char *s;
268 int err = -ENOMEM; 267 int err = -ENOMEM;
269 268
270 cd = __register_chrdev_region(major, baseminor, count, name); 269 cd = __register_chrdev_region(major, baseminor, count, name);
@@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
278 cdev->owner = fops->owner; 277 cdev->owner = fops->owner;
279 cdev->ops = fops; 278 cdev->ops = fops;
280 kobject_set_name(&cdev->kobj, "%s", name); 279 kobject_set_name(&cdev->kobj, "%s", name);
281 for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
282 *s = '!';
283 280
284 err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); 281 err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
285 if (err) 282 if (err)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d79ce2e95c23..90c5b39f0313 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -185,8 +185,7 @@ out_mount_failed:
185 cifs_sb->mountdata = NULL; 185 cifs_sb->mountdata = NULL;
186 } 186 }
187#endif 187#endif
188 if (cifs_sb->local_nls) 188 unload_nls(cifs_sb->local_nls);
189 unload_nls(cifs_sb->local_nls);
190 kfree(cifs_sb); 189 kfree(cifs_sb);
191 } 190 }
192 return rc; 191 return rc;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1f09c7619319..5e2492535daa 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
1557 1557
1558static int cifs_vmtruncate(struct inode *inode, loff_t offset) 1558static int cifs_vmtruncate(struct inode *inode, loff_t offset)
1559{ 1559{
1560 struct address_space *mapping = inode->i_mapping; 1560 loff_t oldsize;
1561 unsigned long limit; 1561 int err;
1562 1562
1563 spin_lock(&inode->i_lock); 1563 spin_lock(&inode->i_lock);
1564 if (inode->i_size < offset) 1564 err = inode_newsize_ok(inode, offset);
1565 goto do_expand; 1565 if (err) {
1566 /*
1567 * truncation of in-use swapfiles is disallowed - it would cause
1568 * subsequent swapout to scribble on the now-freed blocks.
1569 */
1570 if (IS_SWAPFILE(inode)) {
1571 spin_unlock(&inode->i_lock);
1572 goto out_busy;
1573 }
1574 i_size_write(inode, offset);
1575 spin_unlock(&inode->i_lock);
1576 /*
1577 * unmap_mapping_range is called twice, first simply for efficiency
1578 * so that truncate_inode_pages does fewer single-page unmaps. However
1579 * after this first call, and before truncate_inode_pages finishes,
1580 * it is possible for private pages to be COWed, which remain after
1581 * truncate_inode_pages finishes, hence the second unmap_mapping_range
1582 * call must be made for correctness.
1583 */
1584 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
1585 truncate_inode_pages(mapping, offset);
1586 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
1587 goto out_truncate;
1588
1589do_expand:
1590 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
1591 if (limit != RLIM_INFINITY && offset > limit) {
1592 spin_unlock(&inode->i_lock); 1566 spin_unlock(&inode->i_lock);
1593 goto out_sig; 1567 goto out;
1594 }
1595 if (offset > inode->i_sb->s_maxbytes) {
1596 spin_unlock(&inode->i_lock);
1597 goto out_big;
1598 } 1568 }
1569
1570 oldsize = inode->i_size;
1599 i_size_write(inode, offset); 1571 i_size_write(inode, offset);
1600 spin_unlock(&inode->i_lock); 1572 spin_unlock(&inode->i_lock);
1601out_truncate: 1573 truncate_pagecache(inode, oldsize, offset);
1602 if (inode->i_op->truncate) 1574 if (inode->i_op->truncate)
1603 inode->i_op->truncate(inode); 1575 inode->i_op->truncate(inode);
1604 return 0; 1576out:
1605out_sig: 1577 return err;
1606 send_sig(SIGXFSZ, current, 0);
1607out_big:
1608 return -EFBIG;
1609out_busy:
1610 return -ETXTBSY;
1611} 1578}
1612 1579
1613static int 1580static int
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index 8ccd5ed81d9c..d99860a33890 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -2,6 +2,7 @@
2#define _CODA_INT_ 2#define _CODA_INT_
3 3
4struct dentry; 4struct dentry;
5struct file;
5 6
6extern struct file_system_type coda_fs_type; 7extern struct file_system_type coda_fs_type;
7extern unsigned long coda_timeout; 8extern unsigned long coda_timeout;
diff --git a/fs/compat.c b/fs/compat.c
index 3aa48834a222..d576b552e8e2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
768 char __user * type, unsigned long flags, 768 char __user * type, unsigned long flags,
769 void __user * data) 769 void __user * data)
770{ 770{
771 unsigned long type_page; 771 char *kernel_type;
772 unsigned long data_page; 772 unsigned long data_page;
773 unsigned long dev_page; 773 char *kernel_dev;
774 char *dir_page; 774 char *dir_page;
775 int retval; 775 int retval;
776 776
777 retval = copy_mount_options (type, &type_page); 777 retval = copy_mount_string(type, &kernel_type);
778 if (retval < 0) 778 if (retval < 0)
779 goto out; 779 goto out;
780 780
@@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
783 if (IS_ERR(dir_page)) 783 if (IS_ERR(dir_page))
784 goto out1; 784 goto out1;
785 785
786 retval = copy_mount_options (dev_name, &dev_page); 786 retval = copy_mount_string(dev_name, &kernel_dev);
787 if (retval < 0) 787 if (retval < 0)
788 goto out2; 788 goto out2;
789 789
790 retval = copy_mount_options (data, &data_page); 790 retval = copy_mount_options(data, &data_page);
791 if (retval < 0) 791 if (retval < 0)
792 goto out3; 792 goto out3;
793 793
794 retval = -EINVAL; 794 retval = -EINVAL;
795 795
796 if (type_page && data_page) { 796 if (kernel_type && data_page) {
797 if (!strcmp((char *)type_page, SMBFS_NAME)) { 797 if (!strcmp(kernel_type, SMBFS_NAME)) {
798 do_smb_super_data_conv((void *)data_page); 798 do_smb_super_data_conv((void *)data_page);
799 } else if (!strcmp((char *)type_page, NCPFS_NAME)) { 799 } else if (!strcmp(kernel_type, NCPFS_NAME)) {
800 do_ncp_super_data_conv((void *)data_page); 800 do_ncp_super_data_conv((void *)data_page);
801 } else if (!strcmp((char *)type_page, NFS4_NAME)) { 801 } else if (!strcmp(kernel_type, NFS4_NAME)) {
802 if (do_nfs4_super_data_conv((void *) data_page)) 802 if (do_nfs4_super_data_conv((void *) data_page))
803 goto out4; 803 goto out4;
804 } 804 }
805 } 805 }
806 806
807 retval = do_mount((char*)dev_page, dir_page, (char*)type_page, 807 retval = do_mount(kernel_dev, dir_page, kernel_type,
808 flags, (void*)data_page); 808 flags, (void*)data_page);
809 809
810 out4: 810 out4:
811 free_page(data_page); 811 free_page(data_page);
812 out3: 812 out3:
813 free_page(dev_page); 813 kfree(kernel_dev);
814 out2: 814 out2:
815 putname(dir_page); 815 putname(dir_page);
816 out1: 816 out1:
817 free_page(type_page); 817 kfree(kernel_type);
818 out: 818 out:
819 return retval; 819 return retval;
820} 820}
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index a2edb7913447..31f4b0e6d72c 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -63,9 +63,9 @@ static void drop_slab(void)
63} 63}
64 64
65int drop_caches_sysctl_handler(ctl_table *table, int write, 65int drop_caches_sysctl_handler(ctl_table *table, int write,
66 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 66 void __user *buffer, size_t *length, loff_t *ppos)
67{ 67{
68 proc_dointvec_minmax(table, write, file, buffer, length, ppos); 68 proc_dointvec_minmax(table, write, buffer, length, ppos);
69 if (write) { 69 if (write) {
70 if (sysctl_drop_caches & 1) 70 if (sysctl_drop_caches & 1)
71 drop_pagecache(); 71 drop_pagecache();
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index 0c754e64232b..8aadb99b7634 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -1,6 +1,8 @@
1config ECRYPT_FS 1config ECRYPT_FS
2 tristate "eCrypt filesystem layer support (EXPERIMENTAL)" 2 tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
3 depends on EXPERIMENTAL && KEYS && CRYPTO && NET 3 depends on EXPERIMENTAL && KEYS && NET
4 select CRYPTO_ECB
5 select CRYPTO_CBC
4 help 6 help
5 Encrypted filesystem that operates on the VFS layer. See 7 Encrypted filesystem that operates on the VFS layer. See
6 <file:Documentation/filesystems/ecryptfs.txt> to learn more about 8 <file:Documentation/filesystems/ecryptfs.txt> to learn more about
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index b91851f1cda3..fbb6e5eed697 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -245,13 +245,11 @@ void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
245 crypto_free_blkcipher(crypt_stat->tfm); 245 crypto_free_blkcipher(crypt_stat->tfm);
246 if (crypt_stat->hash_tfm) 246 if (crypt_stat->hash_tfm)
247 crypto_free_hash(crypt_stat->hash_tfm); 247 crypto_free_hash(crypt_stat->hash_tfm);
248 mutex_lock(&crypt_stat->keysig_list_mutex);
249 list_for_each_entry_safe(key_sig, key_sig_tmp, 248 list_for_each_entry_safe(key_sig, key_sig_tmp,
250 &crypt_stat->keysig_list, crypt_stat_list) { 249 &crypt_stat->keysig_list, crypt_stat_list) {
251 list_del(&key_sig->crypt_stat_list); 250 list_del(&key_sig->crypt_stat_list);
252 kmem_cache_free(ecryptfs_key_sig_cache, key_sig); 251 kmem_cache_free(ecryptfs_key_sig_cache, key_sig);
253 } 252 }
254 mutex_unlock(&crypt_stat->keysig_list_mutex);
255 memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); 253 memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
256} 254}
257 255
@@ -511,13 +509,14 @@ int ecryptfs_encrypt_page(struct page *page)
511 + extent_offset), crypt_stat); 509 + extent_offset), crypt_stat);
512 rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, 510 rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt,
513 offset, crypt_stat->extent_size); 511 offset, crypt_stat->extent_size);
514 if (rc) { 512 if (rc < 0) {
515 ecryptfs_printk(KERN_ERR, "Error attempting " 513 ecryptfs_printk(KERN_ERR, "Error attempting "
516 "to write lower page; rc = [%d]" 514 "to write lower page; rc = [%d]"
517 "\n", rc); 515 "\n", rc);
518 goto out; 516 goto out;
519 } 517 }
520 } 518 }
519 rc = 0;
521out: 520out:
522 if (enc_extent_page) { 521 if (enc_extent_page) {
523 kunmap(enc_extent_page); 522 kunmap(enc_extent_page);
@@ -633,7 +632,7 @@ int ecryptfs_decrypt_page(struct page *page)
633 rc = ecryptfs_read_lower(enc_extent_virt, offset, 632 rc = ecryptfs_read_lower(enc_extent_virt, offset,
634 crypt_stat->extent_size, 633 crypt_stat->extent_size,
635 ecryptfs_inode); 634 ecryptfs_inode);
636 if (rc) { 635 if (rc < 0) {
637 ecryptfs_printk(KERN_ERR, "Error attempting " 636 ecryptfs_printk(KERN_ERR, "Error attempting "
638 "to read lower page; rc = [%d]" 637 "to read lower page; rc = [%d]"
639 "\n", rc); 638 "\n", rc);
@@ -797,6 +796,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
797 kfree(full_alg_name); 796 kfree(full_alg_name);
798 if (IS_ERR(crypt_stat->tfm)) { 797 if (IS_ERR(crypt_stat->tfm)) {
799 rc = PTR_ERR(crypt_stat->tfm); 798 rc = PTR_ERR(crypt_stat->tfm);
799 crypt_stat->tfm = NULL;
800 ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " 800 ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
801 "Error initializing cipher [%s]\n", 801 "Error initializing cipher [%s]\n",
802 crypt_stat->cipher); 802 crypt_stat->cipher);
@@ -925,7 +925,9 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
925 struct ecryptfs_global_auth_tok *global_auth_tok; 925 struct ecryptfs_global_auth_tok *global_auth_tok;
926 int rc = 0; 926 int rc = 0;
927 927
928 mutex_lock(&crypt_stat->keysig_list_mutex);
928 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); 929 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
930
929 list_for_each_entry(global_auth_tok, 931 list_for_each_entry(global_auth_tok,
930 &mount_crypt_stat->global_auth_tok_list, 932 &mount_crypt_stat->global_auth_tok_list,
931 mount_crypt_stat_list) { 933 mount_crypt_stat_list) {
@@ -934,13 +936,13 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
934 rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig); 936 rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig);
935 if (rc) { 937 if (rc) {
936 printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc); 938 printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc);
937 mutex_unlock(
938 &mount_crypt_stat->global_auth_tok_list_mutex);
939 goto out; 939 goto out;
940 } 940 }
941 } 941 }
942 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); 942
943out: 943out:
944 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
945 mutex_unlock(&crypt_stat->keysig_list_mutex);
944 return rc; 946 return rc;
945} 947}
946 948
@@ -1212,14 +1214,15 @@ int ecryptfs_read_and_validate_header_region(char *data,
1212 crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; 1214 crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
1213 rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, 1215 rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size,
1214 ecryptfs_inode); 1216 ecryptfs_inode);
1215 if (rc) { 1217 if (rc < 0) {
1216 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", 1218 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
1217 __func__, rc); 1219 __func__, rc);
1218 goto out; 1220 goto out;
1219 } 1221 }
1220 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { 1222 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
1221 rc = -EINVAL; 1223 rc = -EINVAL;
1222 } 1224 } else
1225 rc = 0;
1223out: 1226out:
1224 return rc; 1227 return rc;
1225} 1228}
@@ -1314,10 +1317,11 @@ ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry,
1314 1317
1315 rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt, 1318 rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt,
1316 0, virt_len); 1319 0, virt_len);
1317 if (rc) 1320 if (rc < 0)
1318 printk(KERN_ERR "%s: Error attempting to write header " 1321 printk(KERN_ERR "%s: Error attempting to write header "
1319 "information to lower file; rc = [%d]\n", __func__, 1322 "information to lower file; rc = [%d]\n", __func__, rc);
1320 rc); 1323 else
1324 rc = 0;
1321 return rc; 1325 return rc;
1322} 1326}
1323 1327
@@ -1597,7 +1601,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1597 } 1601 }
1598 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, 1602 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
1599 ecryptfs_inode); 1603 ecryptfs_inode);
1600 if (!rc) 1604 if (rc >= 0)
1601 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, 1605 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
1602 ecryptfs_dentry, 1606 ecryptfs_dentry,
1603 ECRYPTFS_VALIDATE_HEADER_SIZE); 1607 ECRYPTFS_VALIDATE_HEADER_SIZE);
@@ -1702,7 +1706,7 @@ ecryptfs_encrypt_filename(struct ecryptfs_filename *filename,
1702 } else { 1706 } else {
1703 printk(KERN_ERR "%s: No support for requested filename " 1707 printk(KERN_ERR "%s: No support for requested filename "
1704 "encryption method in this release\n", __func__); 1708 "encryption method in this release\n", __func__);
1705 rc = -ENOTSUPP; 1709 rc = -EOPNOTSUPP;
1706 goto out; 1710 goto out;
1707 } 1711 }
1708out: 1712out:
@@ -1763,7 +1767,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1763 if (IS_ERR(*key_tfm)) { 1767 if (IS_ERR(*key_tfm)) {
1764 rc = PTR_ERR(*key_tfm); 1768 rc = PTR_ERR(*key_tfm);
1765 printk(KERN_ERR "Unable to allocate crypto cipher with name " 1769 printk(KERN_ERR "Unable to allocate crypto cipher with name "
1766 "[%s]; rc = [%d]\n", cipher_name, rc); 1770 "[%s]; rc = [%d]\n", full_alg_name, rc);
1767 goto out; 1771 goto out;
1768 } 1772 }
1769 crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY); 1773 crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
@@ -1776,7 +1780,8 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1776 rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size); 1780 rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size);
1777 if (rc) { 1781 if (rc) {
1778 printk(KERN_ERR "Error attempting to set key of size [%zd] for " 1782 printk(KERN_ERR "Error attempting to set key of size [%zd] for "
1779 "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc); 1783 "cipher [%s]; rc = [%d]\n", *key_size, full_alg_name,
1784 rc);
1780 rc = -EINVAL; 1785 rc = -EINVAL;
1781 goto out; 1786 goto out;
1782 } 1787 }
@@ -2166,7 +2171,7 @@ int ecryptfs_encrypt_and_encode_filename(
2166 (*encoded_name)[(*encoded_name_size)] = '\0'; 2171 (*encoded_name)[(*encoded_name_size)] = '\0';
2167 (*encoded_name_size)++; 2172 (*encoded_name_size)++;
2168 } else { 2173 } else {
2169 rc = -ENOTSUPP; 2174 rc = -EOPNOTSUPP;
2170 } 2175 }
2171 if (rc) { 2176 if (rc) {
2172 printk(KERN_ERR "%s: Error attempting to encode " 2177 printk(KERN_ERR "%s: Error attempting to encode "
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 2f0945d63297..056fed62d0de 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -476,6 +476,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
476 struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); 476 struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
477 struct dentry *lower_dir_dentry; 477 struct dentry *lower_dir_dentry;
478 478
479 dget(lower_dentry);
479 lower_dir_dentry = lock_parent(lower_dentry); 480 lower_dir_dentry = lock_parent(lower_dentry);
480 rc = vfs_unlink(lower_dir_inode, lower_dentry); 481 rc = vfs_unlink(lower_dir_inode, lower_dentry);
481 if (rc) { 482 if (rc) {
@@ -489,6 +490,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
489 d_drop(dentry); 490 d_drop(dentry);
490out_unlock: 491out_unlock:
491 unlock_dir(lower_dir_dentry); 492 unlock_dir(lower_dir_dentry);
493 dput(lower_dentry);
492 return rc; 494 return rc;
493} 495}
494 496
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 259525c9abb8..a0a7847567e9 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -416,7 +416,9 @@ ecryptfs_find_global_auth_tok_for_sig(
416 &mount_crypt_stat->global_auth_tok_list, 416 &mount_crypt_stat->global_auth_tok_list,
417 mount_crypt_stat_list) { 417 mount_crypt_stat_list) {
418 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) { 418 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) {
419 (*global_auth_tok) = walker; 419 rc = key_validate(walker->global_auth_tok_key);
420 if (!rc)
421 (*global_auth_tok) = walker;
420 goto out; 422 goto out;
421 } 423 }
422 } 424 }
@@ -612,7 +614,12 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
612 } 614 }
613 /* TODO: Support other key modules than passphrase for 615 /* TODO: Support other key modules than passphrase for
614 * filename encryption */ 616 * filename encryption */
615 BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD); 617 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
618 rc = -EOPNOTSUPP;
619 printk(KERN_INFO "%s: Filename encryption only supports "
620 "password tokens\n", __func__);
621 goto out_free_unlock;
622 }
616 sg_init_one( 623 sg_init_one(
617 &s->hash_sg, 624 &s->hash_sg,
618 (u8 *)s->auth_tok->token.password.session_key_encryption_key, 625 (u8 *)s->auth_tok->token.password.session_key_encryption_key,
@@ -910,7 +917,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
910 } 917 }
911 /* TODO: Support other key modules than passphrase for 918 /* TODO: Support other key modules than passphrase for
912 * filename encryption */ 919 * filename encryption */
913 BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD); 920 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
921 rc = -EOPNOTSUPP;
922 printk(KERN_INFO "%s: Filename encryption only supports "
923 "password tokens\n", __func__);
924 goto out_free_unlock;
925 }
914 rc = crypto_blkcipher_setkey( 926 rc = crypto_blkcipher_setkey(
915 s->desc.tfm, 927 s->desc.tfm,
916 s->auth_tok->token.password.session_key_encryption_key, 928 s->auth_tok->token.password.session_key_encryption_key,
@@ -1316,8 +1328,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
1316 rc = -EINVAL; 1328 rc = -EINVAL;
1317 goto out_free; 1329 goto out_free;
1318 } 1330 }
1319 ecryptfs_cipher_code_to_string(crypt_stat->cipher, 1331 rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher,
1320 (u16)data[(*packet_size)]); 1332 (u16)data[(*packet_size)]);
1333 if (rc)
1334 goto out_free;
1321 /* A little extra work to differentiate among the AES key 1335 /* A little extra work to differentiate among the AES key
1322 * sizes; see RFC2440 */ 1336 * sizes; see RFC2440 */
1323 switch(data[(*packet_size)++]) { 1337 switch(data[(*packet_size)++]) {
@@ -1328,7 +1342,9 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
1328 crypt_stat->key_size = 1342 crypt_stat->key_size =
1329 (*new_auth_tok)->session_key.encrypted_key_size; 1343 (*new_auth_tok)->session_key.encrypted_key_size;
1330 } 1344 }
1331 ecryptfs_init_crypt_ctx(crypt_stat); 1345 rc = ecryptfs_init_crypt_ctx(crypt_stat);
1346 if (rc)
1347 goto out_free;
1332 if (unlikely(data[(*packet_size)++] != 0x03)) { 1348 if (unlikely(data[(*packet_size)++] != 0x03)) {
1333 printk(KERN_WARNING "Only S2K ID 3 is currently supported\n"); 1349 printk(KERN_WARNING "Only S2K ID 3 is currently supported\n");
1334 rc = -ENOSYS; 1350 rc = -ENOSYS;
@@ -2366,21 +2382,18 @@ struct kmem_cache *ecryptfs_key_sig_cache;
2366int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig) 2382int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig)
2367{ 2383{
2368 struct ecryptfs_key_sig *new_key_sig; 2384 struct ecryptfs_key_sig *new_key_sig;
2369 int rc = 0;
2370 2385
2371 new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL); 2386 new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL);
2372 if (!new_key_sig) { 2387 if (!new_key_sig) {
2373 rc = -ENOMEM;
2374 printk(KERN_ERR 2388 printk(KERN_ERR
2375 "Error allocating from ecryptfs_key_sig_cache\n"); 2389 "Error allocating from ecryptfs_key_sig_cache\n");
2376 goto out; 2390 return -ENOMEM;
2377 } 2391 }
2378 memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX); 2392 memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX);
2379 mutex_lock(&crypt_stat->keysig_list_mutex); 2393 /* Caller must hold keysig_list_mutex */
2380 list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list); 2394 list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list);
2381 mutex_unlock(&crypt_stat->keysig_list_mutex); 2395
2382out: 2396 return 0;
2383 return rc;
2384} 2397}
2385 2398
2386struct kmem_cache *ecryptfs_global_auth_tok_cache; 2399struct kmem_cache *ecryptfs_global_auth_tok_cache;
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index c6d7a4d748a0..e14cf7e588db 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -136,6 +136,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
136 const struct cred *cred) 136 const struct cred *cred)
137{ 137{
138 struct ecryptfs_open_req *req; 138 struct ecryptfs_open_req *req;
139 int flags = O_LARGEFILE;
139 int rc = 0; 140 int rc = 0;
140 141
141 /* Corresponding dput() and mntput() are done when the 142 /* Corresponding dput() and mntput() are done when the
@@ -143,10 +144,14 @@ int ecryptfs_privileged_open(struct file **lower_file,
143 * destroyed. */ 144 * destroyed. */
144 dget(lower_dentry); 145 dget(lower_dentry);
145 mntget(lower_mnt); 146 mntget(lower_mnt);
146 (*lower_file) = dentry_open(lower_dentry, lower_mnt, 147 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
147 (O_RDWR | O_LARGEFILE), cred); 148 (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
148 if (!IS_ERR(*lower_file)) 149 if (!IS_ERR(*lower_file))
149 goto out; 150 goto out;
151 if (flags & O_RDONLY) {
152 rc = PTR_ERR((*lower_file));
153 goto out;
154 }
150 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL); 155 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
151 if (!req) { 156 if (!req) {
152 rc = -ENOMEM; 157 rc = -ENOMEM;
@@ -180,21 +185,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
180 __func__); 185 __func__);
181 goto out_unlock; 186 goto out_unlock;
182 } 187 }
183 if (IS_ERR(*req->lower_file)) { 188 if (IS_ERR(*req->lower_file))
184 rc = PTR_ERR(*req->lower_file); 189 rc = PTR_ERR(*req->lower_file);
185 dget(lower_dentry);
186 mntget(lower_mnt);
187 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
188 (O_RDONLY | O_LARGEFILE), cred);
189 if (IS_ERR(*lower_file)) {
190 rc = PTR_ERR(*req->lower_file);
191 (*lower_file) = NULL;
192 printk(KERN_WARNING "%s: Error attempting privileged "
193 "open of lower file with either RW or RO "
194 "perms; rc = [%d]. Giving up.\n",
195 __func__, rc);
196 }
197 }
198out_unlock: 190out_unlock:
199 mutex_unlock(&req->mux); 191 mutex_unlock(&req->mux);
200out_free: 192out_free:
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 9f0aa9883c28..101fe4c7b1ee 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -129,11 +129,10 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
129 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 129 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
130 rc = ecryptfs_privileged_open(&inode_info->lower_file, 130 rc = ecryptfs_privileged_open(&inode_info->lower_file,
131 lower_dentry, lower_mnt, cred); 131 lower_dentry, lower_mnt, cred);
132 if (rc || IS_ERR(inode_info->lower_file)) { 132 if (rc) {
133 printk(KERN_ERR "Error opening lower persistent file " 133 printk(KERN_ERR "Error opening lower persistent file "
134 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 134 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
135 "rc = [%d]\n", lower_dentry, lower_mnt, rc); 135 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
136 rc = PTR_ERR(inode_info->lower_file);
137 inode_info->lower_file = NULL; 136 inode_info->lower_file = NULL;
138 } 137 }
139 } 138 }
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 05772aeaa8f4..df4ce99d0597 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -396,9 +396,11 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
396 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, 396 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
397 sizeof(u64)); 397 sizeof(u64));
398 kfree(file_size_virt); 398 kfree(file_size_virt);
399 if (rc) 399 if (rc < 0)
400 printk(KERN_ERR "%s: Error writing file size to header; " 400 printk(KERN_ERR "%s: Error writing file size to header; "
401 "rc = [%d]\n", __func__, rc); 401 "rc = [%d]\n", __func__, rc);
402 else
403 rc = 0;
402out: 404out:
403 return rc; 405 return rc;
404} 406}
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index a137c6ea2fee..0cc4fafd6552 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -34,15 +34,14 @@
34 * 34 *
35 * Write data to the lower file. 35 * Write data to the lower file.
36 * 36 *
37 * Returns zero on success; non-zero on error 37 * Returns bytes written on success; less than zero on error
38 */ 38 */
39int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, 39int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
40 loff_t offset, size_t size) 40 loff_t offset, size_t size)
41{ 41{
42 struct ecryptfs_inode_info *inode_info; 42 struct ecryptfs_inode_info *inode_info;
43 ssize_t octets_written;
44 mm_segment_t fs_save; 43 mm_segment_t fs_save;
45 int rc = 0; 44 ssize_t rc;
46 45
47 inode_info = ecryptfs_inode_to_private(ecryptfs_inode); 46 inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
48 mutex_lock(&inode_info->lower_file_mutex); 47 mutex_lock(&inode_info->lower_file_mutex);
@@ -50,14 +49,9 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
50 inode_info->lower_file->f_pos = offset; 49 inode_info->lower_file->f_pos = offset;
51 fs_save = get_fs(); 50 fs_save = get_fs();
52 set_fs(get_ds()); 51 set_fs(get_ds());
53 octets_written = vfs_write(inode_info->lower_file, data, size, 52 rc = vfs_write(inode_info->lower_file, data, size,
54 &inode_info->lower_file->f_pos); 53 &inode_info->lower_file->f_pos);
55 set_fs(fs_save); 54 set_fs(fs_save);
56 if (octets_written < 0) {
57 printk(KERN_ERR "%s: octets_written = [%td]; "
58 "expected [%td]\n", __func__, octets_written, size);
59 rc = -EINVAL;
60 }
61 mutex_unlock(&inode_info->lower_file_mutex); 55 mutex_unlock(&inode_info->lower_file_mutex);
62 mark_inode_dirty_sync(ecryptfs_inode); 56 mark_inode_dirty_sync(ecryptfs_inode);
63 return rc; 57 return rc;
@@ -91,6 +85,8 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
91 + offset_in_page); 85 + offset_in_page);
92 virt = kmap(page_for_lower); 86 virt = kmap(page_for_lower);
93 rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size); 87 rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
88 if (rc > 0)
89 rc = 0;
94 kunmap(page_for_lower); 90 kunmap(page_for_lower);
95 return rc; 91 return rc;
96} 92}
@@ -229,30 +225,24 @@ out:
229 * Read @size bytes of data at byte offset @offset from the lower 225 * Read @size bytes of data at byte offset @offset from the lower
230 * inode into memory location @data. 226 * inode into memory location @data.
231 * 227 *
232 * Returns zero on success; non-zero on error 228 * Returns bytes read on success; 0 on EOF; less than zero on error
233 */ 229 */
234int ecryptfs_read_lower(char *data, loff_t offset, size_t size, 230int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
235 struct inode *ecryptfs_inode) 231 struct inode *ecryptfs_inode)
236{ 232{
237 struct ecryptfs_inode_info *inode_info = 233 struct ecryptfs_inode_info *inode_info =
238 ecryptfs_inode_to_private(ecryptfs_inode); 234 ecryptfs_inode_to_private(ecryptfs_inode);
239 ssize_t octets_read;
240 mm_segment_t fs_save; 235 mm_segment_t fs_save;
241 int rc = 0; 236 ssize_t rc;
242 237
243 mutex_lock(&inode_info->lower_file_mutex); 238 mutex_lock(&inode_info->lower_file_mutex);
244 BUG_ON(!inode_info->lower_file); 239 BUG_ON(!inode_info->lower_file);
245 inode_info->lower_file->f_pos = offset; 240 inode_info->lower_file->f_pos = offset;
246 fs_save = get_fs(); 241 fs_save = get_fs();
247 set_fs(get_ds()); 242 set_fs(get_ds());
248 octets_read = vfs_read(inode_info->lower_file, data, size, 243 rc = vfs_read(inode_info->lower_file, data, size,
249 &inode_info->lower_file->f_pos); 244 &inode_info->lower_file->f_pos);
250 set_fs(fs_save); 245 set_fs(fs_save);
251 if (octets_read < 0) {
252 printk(KERN_ERR "%s: octets_read = [%td]; "
253 "expected [%td]\n", __func__, octets_read, size);
254 rc = -EINVAL;
255 }
256 mutex_unlock(&inode_info->lower_file_mutex); 246 mutex_unlock(&inode_info->lower_file_mutex);
257 return rc; 247 return rc;
258} 248}
@@ -284,6 +274,8 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
284 offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page); 274 offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page);
285 virt = kmap(page_for_ecryptfs); 275 virt = kmap(page_for_ecryptfs);
286 rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode); 276 rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
277 if (rc > 0)
278 rc = 0;
287 kunmap(page_for_ecryptfs); 279 kunmap(page_for_ecryptfs);
288 flush_dcache_page(page_for_ecryptfs); 280 flush_dcache_page(page_for_ecryptfs);
289 return rc; 281 return rc;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 12d649602d3a..b15a43a80ab7 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -77,7 +77,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
77 struct ecryptfs_inode_info *inode_info; 77 struct ecryptfs_inode_info *inode_info;
78 78
79 inode_info = ecryptfs_inode_to_private(inode); 79 inode_info = ecryptfs_inode_to_private(inode);
80 mutex_lock(&inode_info->lower_file_mutex);
81 if (inode_info->lower_file) { 80 if (inode_info->lower_file) {
82 struct dentry *lower_dentry = 81 struct dentry *lower_dentry =
83 inode_info->lower_file->f_dentry; 82 inode_info->lower_file->f_dentry;
@@ -89,7 +88,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
89 d_drop(lower_dentry); 88 d_drop(lower_dentry);
90 } 89 }
91 } 90 }
92 mutex_unlock(&inode_info->lower_file_mutex);
93 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 91 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
94 kmem_cache_free(ecryptfs_inode_info_cache, inode_info); 92 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
95} 93}
diff --git a/fs/exec.c b/fs/exec.c
index 5c833c18d0d4..d49be6bc1793 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
55#include <linux/kmod.h> 55#include <linux/kmod.h>
56#include <linux/fsnotify.h> 56#include <linux/fsnotify.h>
57#include <linux/fs_struct.h> 57#include <linux/fs_struct.h>
58#include <linux/pipe_fs_i.h>
58 59
59#include <asm/uaccess.h> 60#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 61#include <asm/mmu_context.h>
@@ -63,6 +64,7 @@
63 64
64int core_uses_pid; 65int core_uses_pid;
65char core_pattern[CORENAME_MAX_SIZE] = "core"; 66char core_pattern[CORENAME_MAX_SIZE] = "core";
67unsigned int core_pipe_limit;
66int suid_dumpable = 0; 68int suid_dumpable = 0;
67 69
68/* The maximal length of core_pattern is also specified in sysctl.c */ 70/* The maximal length of core_pattern is also specified in sysctl.c */
@@ -1393,18 +1395,16 @@ out_ret:
1393 return retval; 1395 return retval;
1394} 1396}
1395 1397
1396int set_binfmt(struct linux_binfmt *new) 1398void set_binfmt(struct linux_binfmt *new)
1397{ 1399{
1398 struct linux_binfmt *old = current->binfmt; 1400 struct mm_struct *mm = current->mm;
1399 1401
1400 if (new) { 1402 if (mm->binfmt)
1401 if (!try_module_get(new->module)) 1403 module_put(mm->binfmt->module);
1402 return -1; 1404
1403 } 1405 mm->binfmt = new;
1404 current->binfmt = new; 1406 if (new)
1405 if (old) 1407 __module_get(new->module);
1406 module_put(old->module);
1407 return 0;
1408} 1408}
1409 1409
1410EXPORT_SYMBOL(set_binfmt); 1410EXPORT_SYMBOL(set_binfmt);
@@ -1728,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm)
1728 return (ret >= 2) ? 2 : ret; 1728 return (ret >= 2) ? 2 : ret;
1729} 1729}
1730 1730
1731static void wait_for_dump_helpers(struct file *file)
1732{
1733 struct pipe_inode_info *pipe;
1734
1735 pipe = file->f_path.dentry->d_inode->i_pipe;
1736
1737 pipe_lock(pipe);
1738 pipe->readers++;
1739 pipe->writers--;
1740
1741 while ((pipe->readers > 1) && (!signal_pending(current))) {
1742 wake_up_interruptible_sync(&pipe->wait);
1743 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1744 pipe_wait(pipe);
1745 }
1746
1747 pipe->readers--;
1748 pipe->writers++;
1749 pipe_unlock(pipe);
1750
1751}
1752
1753
1731void do_coredump(long signr, int exit_code, struct pt_regs *regs) 1754void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1732{ 1755{
1733 struct core_state core_state; 1756 struct core_state core_state;
@@ -1744,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1744 unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; 1767 unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1745 char **helper_argv = NULL; 1768 char **helper_argv = NULL;
1746 int helper_argc = 0; 1769 int helper_argc = 0;
1747 char *delimit; 1770 int dump_count = 0;
1771 static atomic_t core_dump_count = ATOMIC_INIT(0);
1748 1772
1749 audit_core_dumps(signr); 1773 audit_core_dumps(signr);
1750 1774
1751 binfmt = current->binfmt; 1775 binfmt = mm->binfmt;
1752 if (!binfmt || !binfmt->core_dump) 1776 if (!binfmt || !binfmt->core_dump)
1753 goto fail; 1777 goto fail;
1754 1778
@@ -1799,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1799 lock_kernel(); 1823 lock_kernel();
1800 ispipe = format_corename(corename, signr); 1824 ispipe = format_corename(corename, signr);
1801 unlock_kernel(); 1825 unlock_kernel();
1802 /* 1826
1803 * Don't bother to check the RLIMIT_CORE value if core_pattern points
1804 * to a pipe. Since we're not writing directly to the filesystem
1805 * RLIMIT_CORE doesn't really apply, as no actual core file will be
1806 * created unless the pipe reader choses to write out the core file
1807 * at which point file size limits and permissions will be imposed
1808 * as it does with any other process
1809 */
1810 if ((!ispipe) && (core_limit < binfmt->min_coredump)) 1827 if ((!ispipe) && (core_limit < binfmt->min_coredump))
1811 goto fail_unlock; 1828 goto fail_unlock;
1812 1829
1813 if (ispipe) { 1830 if (ispipe) {
1831 if (core_limit == 0) {
1832 /*
1833 * Normally core limits are irrelevant to pipes, since
1834 * we're not writing to the file system, but we use
1835 * core_limit of 0 here as a speacial value. Any
1836 * non-zero limit gets set to RLIM_INFINITY below, but
1837 * a limit of 0 skips the dump. This is a consistent
1838 * way to catch recursive crashes. We can still crash
1839 * if the core_pattern binary sets RLIM_CORE = !0
1840 * but it runs as root, and can do lots of stupid things
1841 * Note that we use task_tgid_vnr here to grab the pid
1842 * of the process group leader. That way we get the
1843 * right pid if a thread in a multi-threaded
1844 * core_pattern process dies.
1845 */
1846 printk(KERN_WARNING
1847 "Process %d(%s) has RLIMIT_CORE set to 0\n",
1848 task_tgid_vnr(current), current->comm);
1849 printk(KERN_WARNING "Aborting core\n");
1850 goto fail_unlock;
1851 }
1852
1853 dump_count = atomic_inc_return(&core_dump_count);
1854 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
1855 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
1856 task_tgid_vnr(current), current->comm);
1857 printk(KERN_WARNING "Skipping core dump\n");
1858 goto fail_dropcount;
1859 }
1860
1814 helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); 1861 helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
1815 if (!helper_argv) { 1862 if (!helper_argv) {
1816 printk(KERN_WARNING "%s failed to allocate memory\n", 1863 printk(KERN_WARNING "%s failed to allocate memory\n",
1817 __func__); 1864 __func__);
1818 goto fail_unlock; 1865 goto fail_dropcount;
1819 }
1820 /* Terminate the string before the first option */
1821 delimit = strchr(corename, ' ');
1822 if (delimit)
1823 *delimit = '\0';
1824 delimit = strrchr(helper_argv[0], '/');
1825 if (delimit)
1826 delimit++;
1827 else
1828 delimit = helper_argv[0];
1829 if (!strcmp(delimit, current->comm)) {
1830 printk(KERN_NOTICE "Recursive core dump detected, "
1831 "aborting\n");
1832 goto fail_unlock;
1833 } 1866 }
1834 1867
1835 core_limit = RLIM_INFINITY; 1868 core_limit = RLIM_INFINITY;
1836 1869
1837 /* SIGPIPE can happen, but it's just never processed */ 1870 /* SIGPIPE can happen, but it's just never processed */
1838 if (call_usermodehelper_pipe(corename+1, helper_argv, NULL, 1871 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
1839 &file)) { 1872 &file)) {
1840 printk(KERN_INFO "Core dump to %s pipe failed\n", 1873 printk(KERN_INFO "Core dump to %s pipe failed\n",
1841 corename); 1874 corename);
1842 goto fail_unlock; 1875 goto fail_dropcount;
1843 } 1876 }
1844 } else 1877 } else
1845 file = filp_open(corename, 1878 file = filp_open(corename,
1846 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 1879 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
1847 0600); 1880 0600);
1848 if (IS_ERR(file)) 1881 if (IS_ERR(file))
1849 goto fail_unlock; 1882 goto fail_dropcount;
1850 inode = file->f_path.dentry->d_inode; 1883 inode = file->f_path.dentry->d_inode;
1851 if (inode->i_nlink > 1) 1884 if (inode->i_nlink > 1)
1852 goto close_fail; /* multiple links - don't dump */ 1885 goto close_fail; /* multiple links - don't dump */
@@ -1875,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1875 if (retval) 1908 if (retval)
1876 current->signal->group_exit_code |= 0x80; 1909 current->signal->group_exit_code |= 0x80;
1877close_fail: 1910close_fail:
1911 if (ispipe && core_pipe_limit)
1912 wait_for_dump_helpers(file);
1878 filp_close(file, NULL); 1913 filp_close(file, NULL);
1914fail_dropcount:
1915 if (dump_count)
1916 atomic_dec(&core_dump_count);
1879fail_unlock: 1917fail_unlock:
1880 if (helper_argv) 1918 if (helper_argv)
1881 argv_free(helper_argv); 1919 argv_free(helper_argv);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 5ab10c3bbebe..9f500dec3b59 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
214 } 214 }
215 215
216 lock_super(sb); 216 lock_super(sb);
217 lock_kernel();
218 sbi = sb->s_fs_info; 217 sbi = sb->s_fs_info;
219 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 218 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
220 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); 219 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
@@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
245out: 244out:
246 if (or) 245 if (or)
247 osd_end_request(or); 246 osd_end_request(or);
248 unlock_kernel();
249 unlock_super(sb); 247 unlock_super(sb);
250 kfree(fscb); 248 kfree(fscb);
251 return ret; 249 return ret;
@@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb)
268 int num_pend; 266 int num_pend;
269 struct exofs_sb_info *sbi = sb->s_fs_info; 267 struct exofs_sb_info *sbi = sb->s_fs_info;
270 268
271 lock_kernel();
272
273 if (sb->s_dirt) 269 if (sb->s_dirt)
274 exofs_write_super(sb); 270 exofs_write_super(sb);
275 271
@@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb)
286 osduld_put_device(sbi->s_dev); 282 osduld_put_device(sbi->s_dev);
287 kfree(sb->s_fs_info); 283 kfree(sb->s_fs_info);
288 sb->s_fs_info = NULL; 284 sb->s_fs_info = NULL;
289
290 unlock_kernel();
291} 285}
292 286
293/* 287/*
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 1c1638f873a4..ade634076d0a 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = {
819 .writepages = ext2_writepages, 819 .writepages = ext2_writepages,
820 .migratepage = buffer_migrate_page, 820 .migratepage = buffer_migrate_page,
821 .is_partially_uptodate = block_is_partially_uptodate, 821 .is_partially_uptodate = block_is_partially_uptodate,
822 .error_remove_page = generic_error_remove_page,
822}; 823};
823 824
824const struct address_space_operations ext2_aops_xip = { 825const struct address_space_operations ext2_aops_xip = {
@@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = {
837 .direct_IO = ext2_direct_IO, 838 .direct_IO = ext2_direct_IO,
838 .writepages = ext2_writepages, 839 .writepages = ext2_writepages,
839 .migratepage = buffer_migrate_page, 840 .migratepage = buffer_migrate_page,
841 .error_remove_page = generic_error_remove_page,
840}; 842};
841 843
842/* 844/*
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index cd098a7b77fc..acf1b1423327 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = {
1830 .direct_IO = ext3_direct_IO, 1830 .direct_IO = ext3_direct_IO,
1831 .migratepage = buffer_migrate_page, 1831 .migratepage = buffer_migrate_page,
1832 .is_partially_uptodate = block_is_partially_uptodate, 1832 .is_partially_uptodate = block_is_partially_uptodate,
1833 .error_remove_page = generic_error_remove_page,
1833}; 1834};
1834 1835
1835static const struct address_space_operations ext3_writeback_aops = { 1836static const struct address_space_operations ext3_writeback_aops = {
@@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = {
1845 .direct_IO = ext3_direct_IO, 1846 .direct_IO = ext3_direct_IO,
1846 .migratepage = buffer_migrate_page, 1847 .migratepage = buffer_migrate_page,
1847 .is_partially_uptodate = block_is_partially_uptodate, 1848 .is_partially_uptodate = block_is_partially_uptodate,
1849 .error_remove_page = generic_error_remove_page,
1848}; 1850};
1849 1851
1850static const struct address_space_operations ext3_journalled_aops = { 1852static const struct address_space_operations ext3_journalled_aops = {
@@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = {
1859 .invalidatepage = ext3_invalidatepage, 1861 .invalidatepage = ext3_invalidatepage,
1860 .releasepage = ext3_releasepage, 1862 .releasepage = ext3_releasepage,
1861 .is_partially_uptodate = block_is_partially_uptodate, 1863 .is_partially_uptodate = block_is_partially_uptodate,
1864 .error_remove_page = generic_error_remove_page,
1862}; 1865};
1863 1866
1864void ext3_set_aops(struct inode *inode) 1867void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3a798737e305..064746fad581 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = {
3386 .direct_IO = ext4_direct_IO, 3386 .direct_IO = ext4_direct_IO,
3387 .migratepage = buffer_migrate_page, 3387 .migratepage = buffer_migrate_page,
3388 .is_partially_uptodate = block_is_partially_uptodate, 3388 .is_partially_uptodate = block_is_partially_uptodate,
3389 .error_remove_page = generic_error_remove_page,
3389}; 3390};
3390 3391
3391static const struct address_space_operations ext4_writeback_aops = { 3392static const struct address_space_operations ext4_writeback_aops = {
@@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = {
3401 .direct_IO = ext4_direct_IO, 3402 .direct_IO = ext4_direct_IO,
3402 .migratepage = buffer_migrate_page, 3403 .migratepage = buffer_migrate_page,
3403 .is_partially_uptodate = block_is_partially_uptodate, 3404 .is_partially_uptodate = block_is_partially_uptodate,
3405 .error_remove_page = generic_error_remove_page,
3404}; 3406};
3405 3407
3406static const struct address_space_operations ext4_journalled_aops = { 3408static const struct address_space_operations ext4_journalled_aops = {
@@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = {
3415 .invalidatepage = ext4_invalidatepage, 3417 .invalidatepage = ext4_invalidatepage,
3416 .releasepage = ext4_releasepage, 3418 .releasepage = ext4_releasepage,
3417 .is_partially_uptodate = block_is_partially_uptodate, 3419 .is_partially_uptodate = block_is_partially_uptodate,
3420 .error_remove_page = generic_error_remove_page,
3418}; 3421};
3419 3422
3420static const struct address_space_operations ext4_da_aops = { 3423static const struct address_space_operations ext4_da_aops = {
@@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = {
3431 .direct_IO = ext4_direct_IO, 3434 .direct_IO = ext4_direct_IO,
3432 .migratepage = buffer_migrate_page, 3435 .migratepage = buffer_migrate_page,
3433 .is_partially_uptodate = block_is_partially_uptodate, 3436 .is_partially_uptodate = block_is_partially_uptodate,
3437 .error_remove_page = generic_error_remove_page,
3434}; 3438};
3435 3439
3436void ext4_set_aops(struct inode *inode) 3440void ext4_set_aops(struct inode *inode)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8970d8c49bb0..04629d1302fc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb)
470 470
471 iput(sbi->fat_inode); 471 iput(sbi->fat_inode);
472 472
473 if (sbi->nls_disk) { 473 unload_nls(sbi->nls_disk);
474 unload_nls(sbi->nls_disk); 474 unload_nls(sbi->nls_io);
475 sbi->nls_disk = NULL; 475
476 sbi->options.codepage = fat_default_codepage; 476 if (sbi->options.iocharset != fat_default_iocharset)
477 }
478 if (sbi->nls_io) {
479 unload_nls(sbi->nls_io);
480 sbi->nls_io = NULL;
481 }
482 if (sbi->options.iocharset != fat_default_iocharset) {
483 kfree(sbi->options.iocharset); 477 kfree(sbi->options.iocharset);
484 sbi->options.iocharset = fat_default_iocharset;
485 }
486 478
487 sb->s_fs_info = NULL; 479 sb->s_fs_info = NULL;
488 kfree(sbi); 480 kfree(sbi);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ae413086db97..fc089f2f7f56 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
263 return pid; 263 return pid;
264} 264}
265 265
266static int f_setown_ex(struct file *filp, unsigned long arg)
267{
268 struct f_owner_ex * __user owner_p = (void * __user)arg;
269 struct f_owner_ex owner;
270 struct pid *pid;
271 int type;
272 int ret;
273
274 ret = copy_from_user(&owner, owner_p, sizeof(owner));
275 if (ret)
276 return ret;
277
278 switch (owner.type) {
279 case F_OWNER_TID:
280 type = PIDTYPE_MAX;
281 break;
282
283 case F_OWNER_PID:
284 type = PIDTYPE_PID;
285 break;
286
287 case F_OWNER_GID:
288 type = PIDTYPE_PGID;
289 break;
290
291 default:
292 return -EINVAL;
293 }
294
295 rcu_read_lock();
296 pid = find_vpid(owner.pid);
297 if (owner.pid && !pid)
298 ret = -ESRCH;
299 else
300 ret = __f_setown(filp, pid, type, 1);
301 rcu_read_unlock();
302
303 return ret;
304}
305
306static int f_getown_ex(struct file *filp, unsigned long arg)
307{
308 struct f_owner_ex * __user owner_p = (void * __user)arg;
309 struct f_owner_ex owner;
310 int ret = 0;
311
312 read_lock(&filp->f_owner.lock);
313 owner.pid = pid_vnr(filp->f_owner.pid);
314 switch (filp->f_owner.pid_type) {
315 case PIDTYPE_MAX:
316 owner.type = F_OWNER_TID;
317 break;
318
319 case PIDTYPE_PID:
320 owner.type = F_OWNER_PID;
321 break;
322
323 case PIDTYPE_PGID:
324 owner.type = F_OWNER_GID;
325 break;
326
327 default:
328 WARN_ON(1);
329 ret = -EINVAL;
330 break;
331 }
332 read_unlock(&filp->f_owner.lock);
333
334 if (!ret)
335 ret = copy_to_user(owner_p, &owner, sizeof(owner));
336 return ret;
337}
338
266static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 339static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
267 struct file *filp) 340 struct file *filp)
268{ 341{
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
313 case F_SETOWN: 386 case F_SETOWN:
314 err = f_setown(filp, arg, 1); 387 err = f_setown(filp, arg, 1);
315 break; 388 break;
389 case F_GETOWN_EX:
390 err = f_getown_ex(filp, arg);
391 break;
392 case F_SETOWN_EX:
393 err = f_setown_ex(filp, arg);
394 break;
316 case F_GETSIG: 395 case F_GETSIG:
317 err = filp->f_owner.signum; 396 err = filp->f_owner.signum;
318 break; 397 break;
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p,
428 507
429static void send_sigio_to_task(struct task_struct *p, 508static void send_sigio_to_task(struct task_struct *p,
430 struct fown_struct *fown, 509 struct fown_struct *fown,
431 int fd, 510 int fd, int reason, int group)
432 int reason)
433{ 511{
434 /* 512 /*
435 * F_SETSIG can change ->signum lockless in parallel, make 513 * F_SETSIG can change ->signum lockless in parallel, make
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p,
461 else 539 else
462 si.si_band = band_table[reason - POLL_IN]; 540 si.si_band = band_table[reason - POLL_IN];
463 si.si_fd = fd; 541 si.si_fd = fd;
464 if (!group_send_sig_info(signum, &si, p)) 542 if (!do_send_sig_info(signum, &si, p, group))
465 break; 543 break;
466 /* fall-through: fall back on the old plain SIGIO signal */ 544 /* fall-through: fall back on the old plain SIGIO signal */
467 case 0: 545 case 0:
468 group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); 546 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
469 } 547 }
470} 548}
471 549
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
474 struct task_struct *p; 552 struct task_struct *p;
475 enum pid_type type; 553 enum pid_type type;
476 struct pid *pid; 554 struct pid *pid;
555 int group = 1;
477 556
478 read_lock(&fown->lock); 557 read_lock(&fown->lock);
558
479 type = fown->pid_type; 559 type = fown->pid_type;
560 if (type == PIDTYPE_MAX) {
561 group = 0;
562 type = PIDTYPE_PID;
563 }
564
480 pid = fown->pid; 565 pid = fown->pid;
481 if (!pid) 566 if (!pid)
482 goto out_unlock_fown; 567 goto out_unlock_fown;
483 568
484 read_lock(&tasklist_lock); 569 read_lock(&tasklist_lock);
485 do_each_pid_task(pid, type, p) { 570 do_each_pid_task(pid, type, p) {
486 send_sigio_to_task(p, fown, fd, band); 571 send_sigio_to_task(p, fown, fd, band, group);
487 } while_each_pid_task(pid, type, p); 572 } while_each_pid_task(pid, type, p);
488 read_unlock(&tasklist_lock); 573 read_unlock(&tasklist_lock);
489 out_unlock_fown: 574 out_unlock_fown:
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
491} 576}
492 577
493static void send_sigurg_to_task(struct task_struct *p, 578static void send_sigurg_to_task(struct task_struct *p,
494 struct fown_struct *fown) 579 struct fown_struct *fown, int group)
495{ 580{
496 if (sigio_perm(p, fown, SIGURG)) 581 if (sigio_perm(p, fown, SIGURG))
497 group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); 582 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
498} 583}
499 584
500int send_sigurg(struct fown_struct *fown) 585int send_sigurg(struct fown_struct *fown)
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown)
502 struct task_struct *p; 587 struct task_struct *p;
503 enum pid_type type; 588 enum pid_type type;
504 struct pid *pid; 589 struct pid *pid;
590 int group = 1;
505 int ret = 0; 591 int ret = 0;
506 592
507 read_lock(&fown->lock); 593 read_lock(&fown->lock);
594
508 type = fown->pid_type; 595 type = fown->pid_type;
596 if (type == PIDTYPE_MAX) {
597 group = 0;
598 type = PIDTYPE_PID;
599 }
600
509 pid = fown->pid; 601 pid = fown->pid;
510 if (!pid) 602 if (!pid)
511 goto out_unlock_fown; 603 goto out_unlock_fown;
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown)
514 606
515 read_lock(&tasklist_lock); 607 read_lock(&tasklist_lock);
516 do_each_pid_task(pid, type, p) { 608 do_each_pid_task(pid, type, p) {
517 send_sigurg_to_task(p, fown); 609 send_sigurg_to_task(p, fown, group);
518 } while_each_pid_task(pid, type, p); 610 } while_each_pid_task(pid, type, p);
519 read_unlock(&tasklist_lock); 611 read_unlock(&tasklist_lock);
520 out_unlock_fown: 612 out_unlock_fown:
diff --git a/fs/file_table.c b/fs/file_table.c
index 334ce39881f8..8eb44042e009 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files);
74 * Handle nr_files sysctl 74 * Handle nr_files sysctl
75 */ 75 */
76#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 76#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
77int proc_nr_files(ctl_table *table, int write, struct file *filp, 77int proc_nr_files(ctl_table *table, int write,
78 void __user *buffer, size_t *lenp, loff_t *ppos) 78 void __user *buffer, size_t *lenp, loff_t *ppos)
79{ 79{
80 files_stat.nr_files = get_nr_files(); 80 files_stat.nr_files = get_nr_files();
81 return proc_dointvec(table, write, filp, buffer, lenp, ppos); 81 return proc_dointvec(table, write, buffer, lenp, ppos);
82} 82}
83#else 83#else
84int proc_nr_files(ctl_table *table, int write, struct file *filp, 84int proc_nr_files(ctl_table *table, int write,
85 void __user *buffer, size_t *lenp, loff_t *ppos) 85 void __user *buffer, size_t *lenp, loff_t *ppos)
86{ 86{
87 return -ENOSYS; 87 return -ENOSYS;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e703654e7f40..992f6c9410bb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1276 return 0; 1276 return 0;
1277 1277
1278 if (attr->ia_valid & ATTR_SIZE) { 1278 if (attr->ia_valid & ATTR_SIZE) {
1279 unsigned long limit; 1279 err = inode_newsize_ok(inode, attr->ia_size);
1280 if (IS_SWAPFILE(inode)) 1280 if (err)
1281 return -ETXTBSY; 1281 return err;
1282 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
1283 if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
1284 send_sig(SIGXFSZ, current, 0);
1285 return -EFBIG;
1286 }
1287 is_truncate = true; 1282 is_truncate = true;
1288 } 1283 }
1289 1284
@@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1350 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. 1345 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1351 */ 1346 */
1352 if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { 1347 if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1353 if (outarg.attr.size < oldsize) 1348 truncate_pagecache(inode, oldsize, outarg.attr.size);
1354 fuse_truncate(inode->i_mapping, outarg.attr.size);
1355 invalidate_inode_pages2(inode->i_mapping); 1349 invalidate_inode_pages2(inode->i_mapping);
1356 } 1350 }
1357 1351
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fc9c79feb5f7..01cc462ff45d 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
606void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 606void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
607 u64 attr_valid); 607 u64 attr_valid);
608 608
609void fuse_truncate(struct address_space *mapping, loff_t offset);
610
611/** 609/**
612 * Initialize the client device 610 * Initialize the client device
613 */ 611 */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6da947daabda..1a822ce2b24b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
140 return 0; 140 return 0;
141} 141}
142 142
143void fuse_truncate(struct address_space *mapping, loff_t offset)
144{
145 /* See vmtruncate() */
146 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
147 truncate_inode_pages(mapping, offset);
148 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
149}
150
151void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 143void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
152 u64 attr_valid) 144 u64 attr_valid)
153{ 145{
@@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
205 spin_unlock(&fc->lock); 197 spin_unlock(&fc->lock);
206 198
207 if (S_ISREG(inode->i_mode) && oldsize != attr->size) { 199 if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
208 if (attr->size < oldsize) 200 truncate_pagecache(inode, oldsize, attr->size);
209 fuse_truncate(inode->i_mapping, attr->size);
210 invalidate_inode_pages2(inode->i_mapping); 201 invalidate_inode_pages2(inode->i_mapping);
211 } 202 }
212} 203}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 7ebae9a4ecc0..694b5d48f036 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1135 .direct_IO = gfs2_direct_IO, 1135 .direct_IO = gfs2_direct_IO,
1136 .migratepage = buffer_migrate_page, 1136 .migratepage = buffer_migrate_page,
1137 .is_partially_uptodate = block_is_partially_uptodate, 1137 .is_partially_uptodate = block_is_partially_uptodate,
1138 .error_remove_page = generic_error_remove_page,
1138}; 1139};
1139 1140
1140static const struct address_space_operations gfs2_ordered_aops = { 1141static const struct address_space_operations gfs2_ordered_aops = {
@@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
1151 .direct_IO = gfs2_direct_IO, 1152 .direct_IO = gfs2_direct_IO,
1152 .migratepage = buffer_migrate_page, 1153 .migratepage = buffer_migrate_page,
1153 .is_partially_uptodate = block_is_partially_uptodate, 1154 .is_partially_uptodate = block_is_partially_uptodate,
1155 .error_remove_page = generic_error_remove_page,
1154}; 1156};
1155 1157
1156static const struct address_space_operations gfs2_jdata_aops = { 1158static const struct address_space_operations gfs2_jdata_aops = {
@@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
1166 .invalidatepage = gfs2_invalidatepage, 1168 .invalidatepage = gfs2_invalidatepage,
1167 .releasepage = gfs2_releasepage, 1169 .releasepage = gfs2_releasepage,
1168 .is_partially_uptodate = block_is_partially_uptodate, 1170 .is_partially_uptodate = block_is_partially_uptodate,
1171 .error_remove_page = generic_error_remove_page,
1169}; 1172};
1170 1173
1171void gfs2_set_aops(struct inode *inode) 1174void gfs2_set_aops(struct inode *inode)
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 7b6165f25fbe..8bbe03c3f6d5 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb)
344 brelse(HFS_SB(sb)->mdb_bh); 344 brelse(HFS_SB(sb)->mdb_bh);
345 brelse(HFS_SB(sb)->alt_mdb_bh); 345 brelse(HFS_SB(sb)->alt_mdb_bh);
346 346
347 if (HFS_SB(sb)->nls_io) 347 unload_nls(HFS_SB(sb)->nls_io);
348 unload_nls(HFS_SB(sb)->nls_io); 348 unload_nls(HFS_SB(sb)->nls_disk);
349 if (HFS_SB(sb)->nls_disk)
350 unload_nls(HFS_SB(sb)->nls_disk);
351 349
352 free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); 350 free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
353 kfree(HFS_SB(sb)); 351 kfree(HFS_SB(sb));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index c0759fe0855b..43022f3d5148 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb)
229 iput(HFSPLUS_SB(sb).alloc_file); 229 iput(HFSPLUS_SB(sb).alloc_file);
230 iput(HFSPLUS_SB(sb).hidden_dir); 230 iput(HFSPLUS_SB(sb).hidden_dir);
231 brelse(HFSPLUS_SB(sb).s_vhbh); 231 brelse(HFSPLUS_SB(sb).s_vhbh);
232 if (HFSPLUS_SB(sb).nls) 232 unload_nls(HFSPLUS_SB(sb).nls);
233 unload_nls(HFSPLUS_SB(sb).nls);
234 kfree(sb->s_fs_info); 233 kfree(sb->s_fs_info);
235 sb->s_fs_info = NULL; 234 sb->s_fs_info = NULL;
236 235
@@ -464,8 +463,7 @@ out:
464 463
465cleanup: 464cleanup:
466 hfsplus_put_super(sb); 465 hfsplus_put_super(sb);
467 if (nls) 466 unload_nls(nls);
468 unload_nls(nls);
469 return err; 467 return err;
470} 468}
471 469
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index eba6d552d9c9..87a1258953b8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
380 380
381static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) 381static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
382{ 382{
383 struct super_block *sb = inode->i_sb; 383 if (generic_detach_inode(inode)) {
384 384 truncate_hugepages(inode, 0);
385 if (!hlist_unhashed(&inode->i_hash)) { 385 clear_inode(inode);
386 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 386 destroy_inode(inode);
387 list_move(&inode->i_list, &inode_unused);
388 inodes_stat.nr_unused++;
389 if (!sb || (sb->s_flags & MS_ACTIVE)) {
390 spin_unlock(&inode_lock);
391 return;
392 }
393 inode->i_state |= I_WILL_FREE;
394 spin_unlock(&inode_lock);
395 /*
396 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
397 * in our backing_dev_info.
398 */
399 write_inode_now(inode, 1);
400 spin_lock(&inode_lock);
401 inode->i_state &= ~I_WILL_FREE;
402 inodes_stat.nr_unused--;
403 hlist_del_init(&inode->i_hash);
404 } 387 }
405 list_del_init(&inode->i_list);
406 list_del_init(&inode->i_sb_list);
407 inode->i_state |= I_FREEING;
408 inodes_stat.nr_inodes--;
409 spin_unlock(&inode_lock);
410 truncate_hugepages(inode, 0);
411 clear_inode(inode);
412 destroy_inode(inode);
413} 388}
414 389
415static void hugetlbfs_drop_inode(struct inode *inode) 390static void hugetlbfs_drop_inode(struct inode *inode)
@@ -936,15 +911,9 @@ static struct file_system_type hugetlbfs_fs_type = {
936 911
937static struct vfsmount *hugetlbfs_vfsmount; 912static struct vfsmount *hugetlbfs_vfsmount;
938 913
939static int can_do_hugetlb_shm(int creat_flags) 914static int can_do_hugetlb_shm(void)
940{ 915{
941 if (creat_flags != HUGETLB_SHMFS_INODE) 916 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
942 return 0;
943 if (capable(CAP_IPC_LOCK))
944 return 1;
945 if (in_group_p(sysctl_hugetlb_shm_group))
946 return 1;
947 return 0;
948} 917}
949 918
950struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, 919struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
@@ -960,7 +929,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
960 if (!hugetlbfs_vfsmount) 929 if (!hugetlbfs_vfsmount)
961 return ERR_PTR(-ENOENT); 930 return ERR_PTR(-ENOENT);
962 931
963 if (!can_do_hugetlb_shm(creat_flags)) { 932 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
964 *user = current_user(); 933 *user = current_user();
965 if (user_shm_lock(size, *user)) { 934 if (user_shm_lock(size, *user)) {
966 WARN_ONCE(1, 935 WARN_ONCE(1,
diff --git a/fs/inode.c b/fs/inode.c
index 76582b06ab97..4d8e3be55976 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
1241} 1241}
1242EXPORT_SYMBOL(generic_delete_inode); 1242EXPORT_SYMBOL(generic_delete_inode);
1243 1243
1244static void generic_forget_inode(struct inode *inode) 1244/**
1245 * generic_detach_inode - remove inode from inode lists
1246 * @inode: inode to remove
1247 *
1248 * Remove inode from inode lists, write it if it's dirty. This is just an
1249 * internal VFS helper exported for hugetlbfs. Do not use!
1250 *
1251 * Returns 1 if inode should be completely destroyed.
1252 */
1253int generic_detach_inode(struct inode *inode)
1245{ 1254{
1246 struct super_block *sb = inode->i_sb; 1255 struct super_block *sb = inode->i_sb;
1247 1256
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
1251 inodes_stat.nr_unused++; 1260 inodes_stat.nr_unused++;
1252 if (sb->s_flags & MS_ACTIVE) { 1261 if (sb->s_flags & MS_ACTIVE) {
1253 spin_unlock(&inode_lock); 1262 spin_unlock(&inode_lock);
1254 return; 1263 return 0;
1255 } 1264 }
1256 WARN_ON(inode->i_state & I_NEW); 1265 WARN_ON(inode->i_state & I_NEW);
1257 inode->i_state |= I_WILL_FREE; 1266 inode->i_state |= I_WILL_FREE;
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
1269 inode->i_state |= I_FREEING; 1278 inode->i_state |= I_FREEING;
1270 inodes_stat.nr_inodes--; 1279 inodes_stat.nr_inodes--;
1271 spin_unlock(&inode_lock); 1280 spin_unlock(&inode_lock);
1281 return 1;
1282}
1283EXPORT_SYMBOL_GPL(generic_detach_inode);
1284
1285static void generic_forget_inode(struct inode *inode)
1286{
1287 if (!generic_detach_inode(inode))
1288 return;
1272 if (inode->i_data.nrpages) 1289 if (inode->i_data.nrpages)
1273 truncate_inode_pages(&inode->i_data, 0); 1290 truncate_inode_pages(&inode->i_data, 0);
1274 clear_inode(inode); 1291 clear_inode(inode);
@@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1399 struct inode *inode = dentry->d_inode; 1416 struct inode *inode = dentry->d_inode;
1400 struct timespec now; 1417 struct timespec now;
1401 1418
1402 if (mnt_want_write(mnt))
1403 return;
1404 if (inode->i_flags & S_NOATIME) 1419 if (inode->i_flags & S_NOATIME)
1405 goto out; 1420 return;
1406 if (IS_NOATIME(inode)) 1421 if (IS_NOATIME(inode))
1407 goto out; 1422 return;
1408 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1423 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1409 goto out; 1424 return;
1410 1425
1411 if (mnt->mnt_flags & MNT_NOATIME) 1426 if (mnt->mnt_flags & MNT_NOATIME)
1412 goto out; 1427 return;
1413 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1428 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1414 goto out; 1429 return;
1415 1430
1416 now = current_fs_time(inode->i_sb); 1431 now = current_fs_time(inode->i_sb);
1417 1432
1418 if (!relatime_need_update(mnt, inode, now)) 1433 if (!relatime_need_update(mnt, inode, now))
1419 goto out; 1434 return;
1420 1435
1421 if (timespec_equal(&inode->i_atime, &now)) 1436 if (timespec_equal(&inode->i_atime, &now))
1422 goto out; 1437 return;
1438
1439 if (mnt_want_write(mnt))
1440 return;
1423 1441
1424 inode->i_atime = now; 1442 inode->i_atime = now;
1425 mark_inode_dirty_sync(inode); 1443 mark_inode_dirty_sync(inode);
1426out:
1427 mnt_drop_write(mnt); 1444 mnt_drop_write(mnt);
1428} 1445}
1429EXPORT_SYMBOL(touch_atime); 1446EXPORT_SYMBOL(touch_atime);
@@ -1444,34 +1461,37 @@ void file_update_time(struct file *file)
1444{ 1461{
1445 struct inode *inode = file->f_path.dentry->d_inode; 1462 struct inode *inode = file->f_path.dentry->d_inode;
1446 struct timespec now; 1463 struct timespec now;
1447 int sync_it = 0; 1464 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
1448 int err;
1449 1465
1466 /* First try to exhaust all avenues to not sync */
1450 if (IS_NOCMTIME(inode)) 1467 if (IS_NOCMTIME(inode))
1451 return; 1468 return;
1452 1469
1453 err = mnt_want_write_file(file);
1454 if (err)
1455 return;
1456
1457 now = current_fs_time(inode->i_sb); 1470 now = current_fs_time(inode->i_sb);
1458 if (!timespec_equal(&inode->i_mtime, &now)) { 1471 if (!timespec_equal(&inode->i_mtime, &now))
1459 inode->i_mtime = now; 1472 sync_it = S_MTIME;
1460 sync_it = 1;
1461 }
1462 1473
1463 if (!timespec_equal(&inode->i_ctime, &now)) { 1474 if (!timespec_equal(&inode->i_ctime, &now))
1464 inode->i_ctime = now; 1475 sync_it |= S_CTIME;
1465 sync_it = 1;
1466 }
1467 1476
1468 if (IS_I_VERSION(inode)) { 1477 if (IS_I_VERSION(inode))
1469 inode_inc_iversion(inode); 1478 sync_it |= S_VERSION;
1470 sync_it = 1; 1479
1471 } 1480 if (!sync_it)
1481 return;
1472 1482
1473 if (sync_it) 1483 /* Finally allowed to write? Takes lock. */
1474 mark_inode_dirty_sync(inode); 1484 if (mnt_want_write_file(file))
1485 return;
1486
1487 /* Only change inode inside the lock region */
1488 if (sync_it & S_VERSION)
1489 inode_inc_iversion(inode);
1490 if (sync_it & S_CTIME)
1491 inode->i_ctime = now;
1492 if (sync_it & S_MTIME)
1493 inode->i_mtime = now;
1494 mark_inode_dirty_sync(inode);
1475 mnt_drop_write(file->f_path.mnt); 1495 mnt_drop_write(file->f_path.mnt);
1476} 1496}
1477EXPORT_SYMBOL(file_update_time); 1497EXPORT_SYMBOL(file_update_time);
@@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1599 else if (S_ISSOCK(mode)) 1619 else if (S_ISSOCK(mode))
1600 inode->i_fop = &bad_sock_fops; 1620 inode->i_fop = &bad_sock_fops;
1601 else 1621 else
1602 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", 1622 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
1603 mode); 1623 " inode %s:%lu\n", mode, inode->i_sb->s_id,
1624 inode->i_ino);
1604} 1625}
1605EXPORT_SYMBOL(init_special_inode); 1626EXPORT_SYMBOL(init_special_inode);
diff --git a/fs/internal.h b/fs/internal.h
index d55ef562f0bb..515175b8b72e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *);
57 * namespace.c 57 * namespace.c
58 */ 58 */
59extern int copy_mount_options(const void __user *, unsigned long *); 59extern int copy_mount_options(const void __user *, unsigned long *);
60extern int copy_mount_string(const void __user *, char **);
60 61
61extern void free_vfsmnt(struct vfsmount *); 62extern void free_vfsmnt(struct vfsmount *);
62extern struct vfsmount *alloc_vfsmnt(const char *); 63extern struct vfsmount *alloc_vfsmnt(const char *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5612880fcbe7..7b17a14396ff 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags);
162static int fiemap_check_ranges(struct super_block *sb, 162static int fiemap_check_ranges(struct super_block *sb,
163 u64 start, u64 len, u64 *new_len) 163 u64 start, u64 len, u64 *new_len)
164{ 164{
165 u64 maxbytes = (u64) sb->s_maxbytes;
166
165 *new_len = len; 167 *new_len = len;
166 168
167 if (len == 0) 169 if (len == 0)
168 return -EINVAL; 170 return -EINVAL;
169 171
170 if (start > sb->s_maxbytes) 172 if (start > maxbytes)
171 return -EFBIG; 173 return -EFBIG;
172 174
173 /* 175 /*
174 * Shrink request scope to what the fs can actually handle. 176 * Shrink request scope to what the fs can actually handle.
175 */ 177 */
176 if ((len > sb->s_maxbytes) || 178 if (len > maxbytes || (maxbytes - len) < start)
177 (sb->s_maxbytes - len) < start) 179 *new_len = maxbytes - start;
178 *new_len = sb->s_maxbytes - start;
179 180
180 return 0; 181 return 0;
181} 182}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 85f96bc651c7..6b4dcd4f2943 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb)
46#ifdef CONFIG_JOLIET 46#ifdef CONFIG_JOLIET
47 lock_kernel(); 47 lock_kernel();
48 48
49 if (sbi->s_nls_iocharset) { 49 unload_nls(sbi->s_nls_iocharset);
50 unload_nls(sbi->s_nls_iocharset);
51 sbi->s_nls_iocharset = NULL;
52 }
53 50
54 unlock_kernel(); 51 unlock_kernel();
55#endif 52#endif
@@ -912,8 +909,7 @@ out_no_root:
912 printk(KERN_WARNING "%s: get root inode failed\n", __func__); 909 printk(KERN_WARNING "%s: get root inode failed\n", __func__);
913out_no_inode: 910out_no_inode:
914#ifdef CONFIG_JOLIET 911#ifdef CONFIG_JOLIET
915 if (sbi->s_nls_iocharset) 912 unload_nls(sbi->s_nls_iocharset);
916 unload_nls(sbi->s_nls_iocharset);
917#endif 913#endif
918 goto out_freesbi; 914 goto out_freesbi;
919out_no_read: 915out_no_read:
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 37e6dcda8fc8..2234c73fc577 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb)
178 rc = jfs_umount(sb); 178 rc = jfs_umount(sb);
179 if (rc) 179 if (rc)
180 jfs_err("jfs_umount failed with return code %d", rc); 180 jfs_err("jfs_umount failed with return code %d", rc);
181 if (sbi->nls_tab) 181
182 unload_nls(sbi->nls_tab); 182 unload_nls(sbi->nls_tab);
183 sbi->nls_tab = NULL;
184 183
185 truncate_inode_pages(sbi->direct_inode->i_mapping, 0); 184 truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
186 iput(sbi->direct_inode); 185 iput(sbi->direct_inode);
187 sbi->direct_inode = NULL;
188 186
189 kfree(sbi); 187 kfree(sbi);
190 188
@@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
347 345
348 if (nls_map != (void *) -1) { 346 if (nls_map != (void *) -1) {
349 /* Discard old (if remount) */ 347 /* Discard old (if remount) */
350 if (sbi->nls_tab) 348 unload_nls(sbi->nls_tab);
351 unload_nls(sbi->nls_tab);
352 sbi->nls_tab = nls_map; 349 sbi->nls_tab = nls_map;
353 } 350 }
354 return 1; 351 return 1;
diff --git a/fs/libfs.c b/fs/libfs.c
index dcec3d3ea64f..219576c52d80 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
527 const void *from, size_t available) 527 const void *from, size_t available)
528{ 528{
529 loff_t pos = *ppos; 529 loff_t pos = *ppos;
530 size_t ret;
531
530 if (pos < 0) 532 if (pos < 0)
531 return -EINVAL; 533 return -EINVAL;
532 if (pos >= available) 534 if (pos >= available || !count)
533 return 0; 535 return 0;
534 if (count > available - pos) 536 if (count > available - pos)
535 count = available - pos; 537 count = available - pos;
536 if (copy_to_user(to, from + pos, count)) 538 ret = copy_to_user(to, from + pos, count);
539 if (ret == count)
537 return -EFAULT; 540 return -EFAULT;
541 count -= ret;
538 *ppos = pos + count; 542 *ppos = pos + count;
539 return count; 543 return count;
540} 544}
@@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
735 if (copy_from_user(attr->set_buf, buf, size)) 739 if (copy_from_user(attr->set_buf, buf, size))
736 goto out; 740 goto out;
737 741
738 ret = len; /* claim we got the whole input */
739 attr->set_buf[size] = '\0'; 742 attr->set_buf[size] = '\0';
740 val = simple_strtol(attr->set_buf, NULL, 0); 743 val = simple_strtol(attr->set_buf, NULL, 0);
741 attr->set(attr->data, val); 744 ret = attr->set(attr->data, val);
745 if (ret == 0)
746 ret = len; /* on success, claim we got the whole input */
742out: 747out:
743 mutex_unlock(&attr->mutex); 748 mutex_unlock(&attr->mutex);
744 return ret; 749 return ret;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7230787d18b0..bdc3cb4fd222 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
1640{ 1640{
1641 struct vfsmount *mnt; 1641 struct vfsmount *mnt;
1642 1642
1643 if (!type || !memchr(type, 0, PAGE_SIZE)) 1643 if (!type)
1644 return -EINVAL; 1644 return -EINVAL;
1645 1645
1646 /* we need capabilities... */ 1646 /* we need capabilities... */
@@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
1871 return 0; 1871 return 0;
1872} 1872}
1873 1873
1874int copy_mount_string(const void __user *data, char **where)
1875{
1876 char *tmp;
1877
1878 if (!data) {
1879 *where = NULL;
1880 return 0;
1881 }
1882
1883 tmp = strndup_user(data, PAGE_SIZE);
1884 if (IS_ERR(tmp))
1885 return PTR_ERR(tmp);
1886
1887 *where = tmp;
1888 return 0;
1889}
1890
1874/* 1891/*
1875 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to 1892 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
1876 * be given to the mount() call (ie: read-only, no-dev, no-suid etc). 1893 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
@@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1900 1917
1901 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) 1918 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1902 return -EINVAL; 1919 return -EINVAL;
1903 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
1904 return -EINVAL;
1905 1920
1906 if (data_page) 1921 if (data_page)
1907 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1922 ((char *)data_page)[PAGE_SIZE - 1] = 0;
@@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns);
2070SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, 2085SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2071 char __user *, type, unsigned long, flags, void __user *, data) 2086 char __user *, type, unsigned long, flags, void __user *, data)
2072{ 2087{
2073 int retval; 2088 int ret;
2089 char *kernel_type;
2090 char *kernel_dir;
2091 char *kernel_dev;
2074 unsigned long data_page; 2092 unsigned long data_page;
2075 unsigned long type_page;
2076 unsigned long dev_page;
2077 char *dir_page;
2078 2093
2079 retval = copy_mount_options(type, &type_page); 2094 ret = copy_mount_string(type, &kernel_type);
2080 if (retval < 0) 2095 if (ret < 0)
2081 return retval; 2096 goto out_type;
2082 2097
2083 dir_page = getname(dir_name); 2098 kernel_dir = getname(dir_name);
2084 retval = PTR_ERR(dir_page); 2099 if (IS_ERR(kernel_dir)) {
2085 if (IS_ERR(dir_page)) 2100 ret = PTR_ERR(kernel_dir);
2086 goto out1; 2101 goto out_dir;
2102 }
2087 2103
2088 retval = copy_mount_options(dev_name, &dev_page); 2104 ret = copy_mount_string(dev_name, &kernel_dev);
2089 if (retval < 0) 2105 if (ret < 0)
2090 goto out2; 2106 goto out_dev;
2091 2107
2092 retval = copy_mount_options(data, &data_page); 2108 ret = copy_mount_options(data, &data_page);
2093 if (retval < 0) 2109 if (ret < 0)
2094 goto out3; 2110 goto out_data;
2095 2111
2096 retval = do_mount((char *)dev_page, dir_page, (char *)type_page, 2112 ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
2097 flags, (void *)data_page); 2113 (void *) data_page);
2098 free_page(data_page);
2099 2114
2100out3: 2115 free_page(data_page);
2101 free_page(dev_page); 2116out_data:
2102out2: 2117 kfree(kernel_dev);
2103 putname(dir_page); 2118out_dev:
2104out1: 2119 putname(kernel_dir);
2105 free_page(type_page); 2120out_dir:
2106 return retval; 2121 kfree(kernel_type);
2122out_type:
2123 return ret;
2107} 2124}
2108 2125
2109/* 2126/*
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index b99ce205b1bd..cf98da1be23e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb)
746 746
747#ifdef CONFIG_NCPFS_NLS 747#ifdef CONFIG_NCPFS_NLS
748 /* unload the NLS charsets */ 748 /* unload the NLS charsets */
749 if (server->nls_vol) 749 unload_nls(server->nls_vol);
750 { 750 unload_nls(server->nls_io);
751 unload_nls(server->nls_vol);
752 server->nls_vol = NULL;
753 }
754 if (server->nls_io)
755 {
756 unload_nls(server->nls_io);
757 server->nls_io = NULL;
758 }
759#endif /* CONFIG_NCPFS_NLS */ 751#endif /* CONFIG_NCPFS_NLS */
760 752
761 if (server->info_filp) 753 if (server->info_filp)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 53a7ed7eb9c6..0d58caf4a6e1 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
223 oldset_io = server->nls_io; 223 oldset_io = server->nls_io;
224 server->nls_io = iocharset; 224 server->nls_io = iocharset;
225 225
226 if (oldset_cp) 226 unload_nls(oldset_cp);
227 unload_nls(oldset_cp); 227 unload_nls(oldset_io);
228 if (oldset_io)
229 unload_nls(oldset_io);
230 228
231 return 0; 229 return 0;
232} 230}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5021b75d2d1e..86d6b4db1096 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = {
525 .direct_IO = nfs_direct_IO, 525 .direct_IO = nfs_direct_IO,
526 .migratepage = nfs_migrate_page, 526 .migratepage = nfs_migrate_page,
527 .launder_page = nfs_launder_page, 527 .launder_page = nfs_launder_page,
528 .error_remove_page = generic_error_remove_page,
528}; 529};
529 530
530/* 531/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 060022b4651c..faa091865ad0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
458 */ 458 */
459static int nfs_vmtruncate(struct inode * inode, loff_t offset) 459static int nfs_vmtruncate(struct inode * inode, loff_t offset)
460{ 460{
461 if (i_size_read(inode) < offset) { 461 loff_t oldsize;
462 unsigned long limit; 462 int err;
463
464 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
465 if (limit != RLIM_INFINITY && offset > limit)
466 goto out_sig;
467 if (offset > inode->i_sb->s_maxbytes)
468 goto out_big;
469 spin_lock(&inode->i_lock);
470 i_size_write(inode, offset);
471 spin_unlock(&inode->i_lock);
472 } else {
473 struct address_space *mapping = inode->i_mapping;
474 463
475 /* 464 err = inode_newsize_ok(inode, offset);
476 * truncation of in-use swapfiles is disallowed - it would 465 if (err)
477 * cause subsequent swapout to scribble on the now-freed 466 goto out;
478 * blocks.
479 */
480 if (IS_SWAPFILE(inode))
481 return -ETXTBSY;
482 spin_lock(&inode->i_lock);
483 i_size_write(inode, offset);
484 spin_unlock(&inode->i_lock);
485 467
486 /* 468 spin_lock(&inode->i_lock);
487 * unmap_mapping_range is called twice, first simply for 469 oldsize = inode->i_size;
488 * efficiency so that truncate_inode_pages does fewer 470 i_size_write(inode, offset);
489 * single-page unmaps. However after this first call, and 471 spin_unlock(&inode->i_lock);
490 * before truncate_inode_pages finishes, it is possible for 472
491 * private pages to be COWed, which remain after 473 truncate_pagecache(inode, oldsize, offset);
492 * truncate_inode_pages finishes, hence the second 474out:
493 * unmap_mapping_range call must be made for correctness. 475 return err;
494 */
495 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
496 truncate_inode_pages(mapping, offset);
497 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
498 }
499 return 0;
500out_sig:
501 send_sig(SIGXFSZ, current, 0);
502out_big:
503 return -EFBIG;
504} 476}
505 477
506/** 478/**
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 810770f96816..29786d3b9326 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1711,6 +1711,8 @@ static int nfs_validate_mount_data(void *options,
1711 1711
1712 if (!(data->flags & NFS_MOUNT_TCP)) 1712 if (!(data->flags & NFS_MOUNT_TCP))
1713 args->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1713 args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
1714 else
1715 args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1714 /* N.B. caller will free nfs_server.hostname in all cases */ 1716 /* N.B. caller will free nfs_server.hostname in all cases */
1715 args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); 1717 args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
1716 args->namlen = data->namlen; 1718 args->namlen = data->namlen;
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 477d37d83b31..2224b4d07bf0 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset)
270 270
271void unload_nls(struct nls_table *nls) 271void unload_nls(struct nls_table *nls)
272{ 272{
273 module_put(nls->owner); 273 if (nls)
274 module_put(nls->owner);
274} 275}
275 276
276static const wchar_t charset2uni[256] = { 277static const wchar_t charset2uni[256] = {
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index b38f944f0667..cfce53cb65d7 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = {
1550 .migratepage = buffer_migrate_page, /* Move a page cache page from 1550 .migratepage = buffer_migrate_page, /* Move a page cache page from
1551 one physical page to an 1551 one physical page to an
1552 other. */ 1552 other. */
1553 .error_remove_page = generic_error_remove_page,
1553}; 1554};
1554 1555
1555/** 1556/**
@@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = {
1569 .migratepage = buffer_migrate_page, /* Move a page cache page from 1570 .migratepage = buffer_migrate_page, /* Move a page cache page from
1570 one physical page to an 1571 one physical page to an
1571 other. */ 1572 other. */
1573 .error_remove_page = generic_error_remove_page,
1572}; 1574};
1573 1575
1574#ifdef NTFS_RW 1576#ifdef NTFS_RW
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index abaaa1cbf8de..80b04770e8e9 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -201,8 +201,7 @@ use_utf8:
201 v, old_nls->charset); 201 v, old_nls->charset);
202 nls_map = old_nls; 202 nls_map = old_nls;
203 } else /* nls_map */ { 203 } else /* nls_map */ {
204 if (old_nls) 204 unload_nls(old_nls);
205 unload_nls(old_nls);
206 } 205 }
207 } else if (!strcmp(p, "utf8")) { 206 } else if (!strcmp(p, "utf8")) {
208 bool val = false; 207 bool val = false;
@@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb)
2427 ntfs_free(vol->upcase); 2426 ntfs_free(vol->upcase);
2428 vol->upcase = NULL; 2427 vol->upcase = NULL;
2429 } 2428 }
2430 if (vol->nls_map) { 2429
2431 unload_nls(vol->nls_map); 2430 unload_nls(vol->nls_map);
2432 vol->nls_map = NULL; 2431
2433 }
2434 sb->s_fs_info = NULL; 2432 sb->s_fs_info = NULL;
2435 kfree(vol); 2433 kfree(vol);
2436 2434
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 72e76062a900..deb2b132ae5e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = {
2022 .releasepage = ocfs2_releasepage, 2022 .releasepage = ocfs2_releasepage,
2023 .migratepage = buffer_migrate_page, 2023 .migratepage = buffer_migrate_page,
2024 .is_partially_uptodate = block_is_partially_uptodate, 2024 .is_partially_uptodate = block_is_partially_uptodate,
2025 .error_remove_page = generic_error_remove_page,
2025}; 2026};
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0c6bc602e6c4..07f77a7945c3 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -322,6 +322,8 @@ static inline void task_context_switch_counts(struct seq_file *m,
322 p->nivcsw); 322 p->nivcsw);
323} 323}
324 324
325#ifdef CONFIG_MMU
326
325struct stack_stats { 327struct stack_stats {
326 struct vm_area_struct *vma; 328 struct vm_area_struct *vma;
327 unsigned long startpage; 329 unsigned long startpage;
@@ -402,6 +404,11 @@ static inline void task_show_stack_usage(struct seq_file *m,
402 mmput(mm); 404 mmput(mm);
403 } 405 }
404} 406}
407#else
408static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
409{
410}
411#endif /* CONFIG_MMU */
405 412
406int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 413int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
407 struct pid *pid, struct task_struct *task) 414 struct pid *pid, struct task_struct *task)
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 171e052c07b3..c7bff4f603ff 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
97 "Committed_AS: %8lu kB\n" 97 "Committed_AS: %8lu kB\n"
98 "VmallocTotal: %8lu kB\n" 98 "VmallocTotal: %8lu kB\n"
99 "VmallocUsed: %8lu kB\n" 99 "VmallocUsed: %8lu kB\n"
100 "VmallocChunk: %8lu kB\n", 100 "VmallocChunk: %8lu kB\n"
101#ifdef CONFIG_MEMORY_FAILURE
102 "HardwareCorrupted: %8lu kB\n"
103#endif
104 ,
101 K(i.totalram), 105 K(i.totalram),
102 K(i.freeram), 106 K(i.freeram),
103 K(i.bufferram), 107 K(i.bufferram),
@@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
144 (unsigned long)VMALLOC_TOTAL >> 10, 148 (unsigned long)VMALLOC_TOTAL >> 10,
145 vmi.used >> 10, 149 vmi.used >> 10,
146 vmi.largest_chunk >> 10 150 vmi.largest_chunk >> 10
151#ifdef CONFIG_MEMORY_FAILURE
152 ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
153#endif
147 ); 154 );
148 155
149 hugetlb_report_meminfo(m); 156 hugetlb_report_meminfo(m);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 9b1e4e9a16bf..f667e8aeabdf 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
153 153
154 /* careful: calling conventions are nasty here */ 154 /* careful: calling conventions are nasty here */
155 res = count; 155 res = count;
156 error = table->proc_handler(table, write, filp, buf, &res, ppos); 156 error = table->proc_handler(table, write, buf, &res, ppos);
157 if (!error) 157 if (!error)
158 error = res; 158 error = res;
159out: 159out:
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 0c10a0b3f146..766b1d456050 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -4,13 +4,18 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/seq_file.h> 5#include <linux/seq_file.h>
6#include <linux/time.h> 6#include <linux/time.h>
7#include <linux/kernel_stat.h>
7#include <asm/cputime.h> 8#include <asm/cputime.h>
8 9
9static int uptime_proc_show(struct seq_file *m, void *v) 10static int uptime_proc_show(struct seq_file *m, void *v)
10{ 11{
11 struct timespec uptime; 12 struct timespec uptime;
12 struct timespec idle; 13 struct timespec idle;
13 cputime_t idletime = cputime_add(init_task.utime, init_task.stime); 14 int i;
15 cputime_t idletime = cputime_zero;
16
17 for_each_possible_cpu(i)
18 idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
14 19
15 do_posix_clock_monotonic_gettime(&uptime); 20 do_posix_clock_monotonic_gettime(&uptime);
16 monotonic_to_bootbased(&uptime); 21 monotonic_to_bootbased(&uptime);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 11f0c06316de..32fae4040ebf 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
69 /* make various checks */ 69 /* make various checks */
70 order = get_order(newsize); 70 order = get_order(newsize);
71 if (unlikely(order >= MAX_ORDER)) 71 if (unlikely(order >= MAX_ORDER))
72 goto too_big; 72 return -EFBIG;
73 73
74 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 74 ret = inode_newsize_ok(inode, newsize);
75 if (limit != RLIM_INFINITY && newsize > limit) 75 if (ret)
76 goto fsize_exceeded; 76 return ret;
77
78 if (newsize > inode->i_sb->s_maxbytes)
79 goto too_big;
80 77
81 i_size_write(inode, newsize); 78 i_size_write(inode, newsize);
82 79
@@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
118 115
119 return 0; 116 return 0;
120 117
121 fsize_exceeded: 118add_error:
122 send_sig(SIGXFSZ, current, 0);
123 too_big:
124 return -EFBIG;
125
126 add_error:
127 while (loop < npages) 119 while (loop < npages)
128 __free_page(pages + loop++); 120 __free_page(pages + loop++);
129 return ret; 121 return ret;
diff --git a/fs/read_write.c b/fs/read_write.c
index 6c8c55dec2bc..3ac28987f22a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
839 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 839 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
840 840
841 pos = *ppos; 841 pos = *ppos;
842 retval = -EINVAL;
843 if (unlikely(pos < 0))
844 goto fput_out;
845 if (unlikely(pos + count > max)) { 842 if (unlikely(pos + count > max)) {
846 retval = -EOVERFLOW; 843 retval = -EOVERFLOW;
847 if (pos >= max) 844 if (pos >= max)
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 47f132df0c3f..c117fa80d1e9 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
528 pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; 528 pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK;
529 529
530 root = romfs_iget(sb, pos); 530 root = romfs_iget(sb, pos);
531 if (!root) 531 if (IS_ERR(root))
532 goto error; 532 goto error;
533 533
534 sb->s_root = d_alloc_root(root); 534 sb->s_root = d_alloc_root(root);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 6c959275f2d0..eae7d9dbf3ff 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
429 */ 429 */
430int seq_path(struct seq_file *m, struct path *path, char *esc) 430int seq_path(struct seq_file *m, struct path *path, char *esc)
431{ 431{
432 if (m->count < m->size) { 432 char *buf;
433 char *s = m->buf + m->count; 433 size_t size = seq_get_buf(m, &buf);
434 char *p = d_path(path, s, m->size - m->count); 434 int res = -1;
435
436 if (size) {
437 char *p = d_path(path, buf, size);
435 if (!IS_ERR(p)) { 438 if (!IS_ERR(p)) {
436 s = mangle_path(s, p, esc); 439 char *end = mangle_path(buf, p, esc);
437 if (s) { 440 if (end)
438 p = m->buf + m->count; 441 res = end - buf;
439 m->count = s - m->buf;
440 return s - p;
441 }
442 } 442 }
443 } 443 }
444 m->count = m->size; 444 seq_commit(m, res);
445 return -1; 445
446 return res;
446} 447}
447EXPORT_SYMBOL(seq_path); 448EXPORT_SYMBOL(seq_path);
448 449
@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path);
454int seq_path_root(struct seq_file *m, struct path *path, struct path *root, 455int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
455 char *esc) 456 char *esc)
456{ 457{
457 int err = -ENAMETOOLONG; 458 char *buf;
458 if (m->count < m->size) { 459 size_t size = seq_get_buf(m, &buf);
459 char *s = m->buf + m->count; 460 int res = -ENAMETOOLONG;
461
462 if (size) {
460 char *p; 463 char *p;
461 464
462 spin_lock(&dcache_lock); 465 spin_lock(&dcache_lock);
463 p = __d_path(path, root, s, m->size - m->count); 466 p = __d_path(path, root, buf, size);
464 spin_unlock(&dcache_lock); 467 spin_unlock(&dcache_lock);
465 err = PTR_ERR(p); 468 res = PTR_ERR(p);
466 if (!IS_ERR(p)) { 469 if (!IS_ERR(p)) {
467 s = mangle_path(s, p, esc); 470 char *end = mangle_path(buf, p, esc);
468 if (s) { 471 if (end)
469 p = m->buf + m->count; 472 res = end - buf;
470 m->count = s - m->buf; 473 else
471 return 0; 474 res = -ENAMETOOLONG;
472 }
473 } 475 }
474 } 476 }
475 m->count = m->size; 477 seq_commit(m, res);
476 return err; 478
479 return res < 0 ? res : 0;
477} 480}
478 481
479/* 482/*
@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
481 */ 484 */
482int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) 485int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
483{ 486{
484 if (m->count < m->size) { 487 char *buf;
485 char *s = m->buf + m->count; 488 size_t size = seq_get_buf(m, &buf);
486 char *p = dentry_path(dentry, s, m->size - m->count); 489 int res = -1;
490
491 if (size) {
492 char *p = dentry_path(dentry, buf, size);
487 if (!IS_ERR(p)) { 493 if (!IS_ERR(p)) {
488 s = mangle_path(s, p, esc); 494 char *end = mangle_path(buf, p, esc);
489 if (s) { 495 if (end)
490 p = m->buf + m->count; 496 res = end - buf;
491 m->count = s - m->buf;
492 return s - p;
493 }
494 } 497 }
495 } 498 }
496 m->count = m->size; 499 seq_commit(m, res);
497 return -1; 500
501 return res;
498} 502}
499 503
500int seq_bitmap(struct seq_file *m, const unsigned long *bits, 504int seq_bitmap(struct seq_file *m, const unsigned long *bits,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1402d2d54f52..1c4c8f089970 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m)
459static void 459static void
460smb_unload_nls(struct smb_sb_info *server) 460smb_unload_nls(struct smb_sb_info *server)
461{ 461{
462 if (server->remote_nls) { 462 unload_nls(server->remote_nls);
463 unload_nls(server->remote_nls); 463 unload_nls(server->local_nls);
464 server->remote_nls = NULL;
465 }
466 if (server->local_nls) {
467 unload_nls(server->local_nls);
468 server->local_nls = NULL;
469 }
470} 464}
471 465
472static void 466static void
diff --git a/fs/super.c b/fs/super.c
index 0e7207b9815c..19eb70b374bc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -465,6 +465,48 @@ rescan:
465} 465}
466 466
467EXPORT_SYMBOL(get_super); 467EXPORT_SYMBOL(get_super);
468
469/**
470 * get_active_super - get an active reference to the superblock of a device
471 * @bdev: device to get the superblock for
472 *
473 * Scans the superblock list and finds the superblock of the file system
474 * mounted on the device given. Returns the superblock with an active
475 * reference and s_umount held exclusively or %NULL if none was found.
476 */
477struct super_block *get_active_super(struct block_device *bdev)
478{
479 struct super_block *sb;
480
481 if (!bdev)
482 return NULL;
483
484 spin_lock(&sb_lock);
485 list_for_each_entry(sb, &super_blocks, s_list) {
486 if (sb->s_bdev != bdev)
487 continue;
488
489 sb->s_count++;
490 spin_unlock(&sb_lock);
491 down_write(&sb->s_umount);
492 if (sb->s_root) {
493 spin_lock(&sb_lock);
494 if (sb->s_count > S_BIAS) {
495 atomic_inc(&sb->s_active);
496 sb->s_count--;
497 spin_unlock(&sb_lock);
498 return sb;
499 }
500 spin_unlock(&sb_lock);
501 }
502 up_write(&sb->s_umount);
503 put_super(sb);
504 yield();
505 spin_lock(&sb_lock);
506 }
507 spin_unlock(&sb_lock);
508 return NULL;
509}
468 510
469struct super_block * user_get_super(dev_t dev) 511struct super_block * user_get_super(dev_t dev)
470{ 512{
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
527{ 569{
528 int retval; 570 int retval;
529 int remount_rw; 571 int remount_rw;
530 572
573 if (sb->s_frozen != SB_UNFROZEN)
574 return -EBUSY;
575
531#ifdef CONFIG_BLOCK 576#ifdef CONFIG_BLOCK
532 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) 577 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
533 return -EACCES; 578 return -EACCES;
534#endif 579#endif
580
535 if (flags & MS_RDONLY) 581 if (flags & MS_RDONLY)
536 acct_auto_close(sb); 582 acct_auto_close(sb);
537 shrink_dcache_sb(sb); 583 shrink_dcache_sb(sb);
@@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
743 * will protect the lockfs code from trying to start a snapshot 789 * will protect the lockfs code from trying to start a snapshot
744 * while we are mounting 790 * while we are mounting
745 */ 791 */
746 down(&bdev->bd_mount_sem); 792 mutex_lock(&bdev->bd_fsfreeze_mutex);
793 if (bdev->bd_fsfreeze_count > 0) {
794 mutex_unlock(&bdev->bd_fsfreeze_mutex);
795 error = -EBUSY;
796 goto error_bdev;
797 }
747 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); 798 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
748 up(&bdev->bd_mount_sem); 799 mutex_unlock(&bdev->bd_fsfreeze_mutex);
749 if (IS_ERR(s)) 800 if (IS_ERR(s))
750 goto error_s; 801 goto error_s;
751 802
@@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
892 if (error) 943 if (error)
893 goto out_sb; 944 goto out_sb;
894 945
946 /*
947 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
948 * but s_maxbytes was an unsigned long long for many releases. Throw
949 * this warning for a little while to try and catch filesystems that
950 * violate this rule. This warning should be either removed or
951 * converted to a BUG() in 2.6.34.
952 */
953 WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
954 "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
955
895 mnt->mnt_mountpoint = mnt->mnt_root; 956 mnt->mnt_mountpoint = mnt->mnt_root;
896 mnt->mnt_parent = mnt; 957 mnt->mnt_parent = mnt;
897 up_write(&mnt->mnt_sb->s_umount); 958 up_write(&mnt->mnt_sb->s_umount);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d5e5559e31db..381854461b28 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = {
1635 .direct_IO = xfs_vm_direct_IO, 1635 .direct_IO = xfs_vm_direct_IO,
1636 .migratepage = buffer_migrate_page, 1636 .migratepage = buffer_migrate_page,
1637 .is_partially_uptodate = block_is_partially_uptodate, 1637 .is_partially_uptodate = block_is_partially_uptodate,
1638 .error_remove_page = generic_error_remove_page,
1638}; 1639};
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 916c0ffb6083..c5bc67c4e3bb 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -26,7 +26,6 @@ STATIC int
26xfs_stats_clear_proc_handler( 26xfs_stats_clear_proc_handler(
27 ctl_table *ctl, 27 ctl_table *ctl,
28 int write, 28 int write,
29 struct file *filp,
30 void __user *buffer, 29 void __user *buffer,
31 size_t *lenp, 30 size_t *lenp,
32 loff_t *ppos) 31 loff_t *ppos)
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler(
34 int c, ret, *valp = ctl->data; 33 int c, ret, *valp = ctl->data;
35 __uint32_t vn_active; 34 __uint32_t vn_active;
36 35
37 ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); 36 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
38 37
39 if (!ret && write && *valp) { 38 if (!ret && write && *valp) {
40 printk("XFS Clearing xfsstats\n"); 39 printk("XFS Clearing xfsstats\n");
diff --git a/include/acpi/button.h b/include/acpi/button.h
new file mode 100644
index 000000000000..97eea0e4c016
--- /dev/null
+++ b/include/acpi/button.h
@@ -0,0 +1,25 @@
1#ifndef ACPI_BUTTON_H
2#define ACPI_BUTTON_H
3
4#include <linux/notifier.h>
5
6#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE)
7extern int acpi_lid_notifier_register(struct notifier_block *nb);
8extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
9extern int acpi_lid_open(void);
10#else
11static inline int acpi_lid_notifier_register(struct notifier_block *nb)
12{
13 return 0;
14}
15static inline int acpi_lid_notifier_unregister(struct notifier_block *nb)
16{
17 return 0;
18}
19static inline int acpi_lid_open(void)
20{
21 return 1;
22}
23#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */
24
25#endif /* ACPI_BUTTON_H */
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 4d3e48373e74..0c3dd8603927 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -73,6 +73,19 @@
73#define F_SETSIG 10 /* for sockets. */ 73#define F_SETSIG 10 /* for sockets. */
74#define F_GETSIG 11 /* for sockets. */ 74#define F_GETSIG 11 /* for sockets. */
75#endif 75#endif
76#ifndef F_SETOWN_EX
77#define F_SETOWN_EX 12
78#define F_GETOWN_EX 13
79#endif
80
81#define F_OWNER_TID 0
82#define F_OWNER_PID 1
83#define F_OWNER_GID 2
84
85struct f_owner_ex {
86 int type;
87 pid_t pid;
88};
76 89
77/* for F_[GET|SET]FL */ 90/* for F_[GET|SET]FL */
78#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ 91#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index dd63bd38864b..5ee13b2fd223 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -34,6 +34,7 @@
34#define MADV_REMOVE 9 /* remove these pages & resources */ 34#define MADV_REMOVE 9 /* remove these pages & resources */
35#define MADV_DONTFORK 10 /* don't inherit across fork */ 35#define MADV_DONTFORK 10 /* don't inherit across fork */
36#define MADV_DOFORK 11 /* do inherit across fork */ 36#define MADV_DOFORK 11 /* do inherit across fork */
37#define MADV_HWPOISON 100 /* poison a page for testing */
37 38
38#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ 39#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
39#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ 40#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index c840719a8c59..942d30b5aab1 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -82,6 +82,7 @@ typedef struct siginfo {
82#ifdef __ARCH_SI_TRAPNO 82#ifdef __ARCH_SI_TRAPNO
83 int _trapno; /* TRAP # which caused the signal */ 83 int _trapno; /* TRAP # which caused the signal */
84#endif 84#endif
85 short _addr_lsb; /* LSB of the reported address */
85 } _sigfault; 86 } _sigfault;
86 87
87 /* SIGPOLL */ 88 /* SIGPOLL */
@@ -112,6 +113,7 @@ typedef struct siginfo {
112#ifdef __ARCH_SI_TRAPNO 113#ifdef __ARCH_SI_TRAPNO
113#define si_trapno _sifields._sigfault._trapno 114#define si_trapno _sifields._sigfault._trapno
114#endif 115#endif
116#define si_addr_lsb _sifields._sigfault._addr_lsb
115#define si_band _sifields._sigpoll._band 117#define si_band _sifields._sigpoll._band
116#define si_fd _sifields._sigpoll._fd 118#define si_fd _sifields._sigpoll._fd
117 119
@@ -192,7 +194,11 @@ typedef struct siginfo {
192#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ 194#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */
193#define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ 195#define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */
194#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ 196#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */
195#define NSIGBUS 3 197/* hardware memory error consumed on a machine check: action required */
198#define BUS_MCEERR_AR (__SI_FAULT|4)
199/* hardware memory error detected in process but not consumed: action optional*/
200#define BUS_MCEERR_AO (__SI_FAULT|5)
201#define NSIGBUS 5
196 202
197/* 203/*
198 * SIGTRAP si_codes 204 * SIGTRAP si_codes
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 29ca8f53ffbe..b6e818f4b247 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -721,12 +721,12 @@
721 . = ALIGN(PAGE_SIZE); \ 721 . = ALIGN(PAGE_SIZE); \
722 .data : AT(ADDR(.data) - LOAD_OFFSET) { \ 722 .data : AT(ADDR(.data) - LOAD_OFFSET) { \
723 INIT_TASK_DATA(inittask) \ 723 INIT_TASK_DATA(inittask) \
724 NOSAVE_DATA \
725 PAGE_ALIGNED_DATA(pagealigned) \
724 CACHELINE_ALIGNED_DATA(cacheline) \ 726 CACHELINE_ALIGNED_DATA(cacheline) \
725 READ_MOSTLY_DATA(cacheline) \ 727 READ_MOSTLY_DATA(cacheline) \
726 DATA_DATA \ 728 DATA_DATA \
727 CONSTRUCTORS \ 729 CONSTRUCTORS \
728 NOSAVE_DATA \
729 PAGE_ALIGNED_DATA(pagealigned) \
730 } 730 }
731 731
732#define INIT_TEXT_SECTION(inittext_align) \ 732#define INIT_TEXT_SECTION(inittext_align) \
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 853508499d20..3f6e545609be 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -552,6 +552,7 @@
552 {0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 552 {0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
553 {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 553 {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
554 {0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 554 {0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
555 {0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
555 {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 556 {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
556 {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 557 {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
557 {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 558 {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8e1e92583fbc..7e0cb1da92e6 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea {
185#define DRM_I915_GEM_GET_APERTURE 0x23 185#define DRM_I915_GEM_GET_APERTURE 0x23
186#define DRM_I915_GEM_MMAP_GTT 0x24 186#define DRM_I915_GEM_MMAP_GTT 0x24
187#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 187#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
188#define DRM_I915_GEM_MADVISE 0x26
188 189
189#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) 190#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
190#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) 191#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea {
221#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) 222#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
222#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) 223#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
223#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id) 224#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id)
225#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
224 226
225/* Allow drivers to submit batchbuffers directly to hardware, relying 227/* Allow drivers to submit batchbuffers directly to hardware, relying
226 * on the security mechanisms provided by hardware. 228 * on the security mechanisms provided by hardware.
@@ -667,4 +669,21 @@ struct drm_i915_get_pipe_from_crtc_id {
667 __u32 pipe; 669 __u32 pipe;
668}; 670};
669 671
672#define I915_MADV_WILLNEED 0
673#define I915_MADV_DONTNEED 1
674#define __I915_MADV_PURGED 2 /* internal state */
675
676struct drm_i915_gem_madvise {
677 /** Handle of the buffer to change the backing store advice */
678 __u32 handle;
679
680 /* Advice: either the buffer will be needed again in the near future,
681 * or wont be and could be discarded under memory pressure.
682 */
683 __u32 madv;
684
685 /** Whether the backing store still exists. */
686 __u32 retained;
687};
688
670#endif /* _I915_DRM_H_ */ 689#endif /* _I915_DRM_H_ */
diff --git a/arch/arm/include/asm/mach/mmc.h b/include/linux/amba/mmci.h
index b490ecc79def..6b4241748dda 100644
--- a/arch/arm/include/asm/mach/mmc.h
+++ b/include/linux/amba/mmci.h
@@ -1,17 +1,18 @@
1/* 1/*
2 * arch/arm/include/asm/mach/mmc.h 2 * include/linux/amba/mmci.h
3 */ 3 */
4#ifndef ASMARM_MACH_MMC_H 4#ifndef AMBA_MMCI_H
5#define ASMARM_MACH_MMC_H 5#define AMBA_MMCI_H
6 6
7#include <linux/mmc/host.h> 7#include <linux/mmc/host.h>
8 8
9struct mmc_platform_data { 9struct mmci_platform_data {
10 unsigned int ocr_mask; /* available voltages */ 10 unsigned int ocr_mask; /* available voltages */
11 u32 (*translate_vdd)(struct device *, unsigned int); 11 u32 (*translate_vdd)(struct device *, unsigned int);
12 unsigned int (*status)(struct device *); 12 unsigned int (*status)(struct device *);
13 int gpio_wp; 13 int gpio_wp;
14 int gpio_cd; 14 int gpio_cd;
15 unsigned long capabilities;
15}; 16};
16 17
17#endif 18#endif
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index dcad0ffd1755..e4836c6b3dd7 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -136,12 +136,12 @@ enum ssp_tx_level_trig {
136 136
137/** 137/**
138 * enum SPI Clock Phase - clock phase (Motorola SPI interface only) 138 * enum SPI Clock Phase - clock phase (Motorola SPI interface only)
139 * @SSP_CLK_RISING_EDGE: Receive data on rising edge 139 * @SSP_CLK_FIRST_EDGE: Receive data on first edge transition (actual direction depends on polarity)
140 * @SSP_CLK_FALLING_EDGE: Receive data on falling edge 140 * @SSP_CLK_SECOND_EDGE: Receive data on second edge transition (actual direction depends on polarity)
141 */ 141 */
142enum ssp_spi_clk_phase { 142enum ssp_spi_clk_phase {
143 SSP_CLK_RISING_EDGE, 143 SSP_CLK_FIRST_EDGE,
144 SSP_CLK_FALLING_EDGE 144 SSP_CLK_SECOND_EDGE
145}; 145};
146 146
147/** 147/**
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 5fc2ef8d97fa..a1c486a88e88 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,25 +58,60 @@ struct dma_chan_ref {
58 * array. 58 * array.
59 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a 59 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
60 * dependency chain 60 * dependency chain
61 * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. 61 * @ASYNC_TX_FENCE: specify that the next operation in the dependency
62 * chain uses this operation's result as an input
62 */ 63 */
63enum async_tx_flags { 64enum async_tx_flags {
64 ASYNC_TX_XOR_ZERO_DST = (1 << 0), 65 ASYNC_TX_XOR_ZERO_DST = (1 << 0),
65 ASYNC_TX_XOR_DROP_DST = (1 << 1), 66 ASYNC_TX_XOR_DROP_DST = (1 << 1),
66 ASYNC_TX_ACK = (1 << 3), 67 ASYNC_TX_ACK = (1 << 2),
67 ASYNC_TX_DEP_ACK = (1 << 4), 68 ASYNC_TX_FENCE = (1 << 3),
69};
70
71/**
72 * struct async_submit_ctl - async_tx submission/completion modifiers
73 * @flags: submission modifiers
74 * @depend_tx: parent dependency of the current operation being submitted
75 * @cb_fn: callback routine to run at operation completion
76 * @cb_param: parameter for the callback routine
77 * @scribble: caller provided space for dma/page address conversions
78 */
79struct async_submit_ctl {
80 enum async_tx_flags flags;
81 struct dma_async_tx_descriptor *depend_tx;
82 dma_async_tx_callback cb_fn;
83 void *cb_param;
84 void *scribble;
68}; 85};
69 86
70#ifdef CONFIG_DMA_ENGINE 87#ifdef CONFIG_DMA_ENGINE
71#define async_tx_issue_pending_all dma_issue_pending_all 88#define async_tx_issue_pending_all dma_issue_pending_all
89
90/**
91 * async_tx_issue_pending - send pending descriptor to the hardware channel
92 * @tx: descriptor handle to retrieve hardware context
93 *
94 * Note: any dependent operations will have already been issued by
95 * async_tx_channel_switch, or (in the case of no channel switch) will
96 * be already pending on this channel.
97 */
98static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
99{
100 if (likely(tx)) {
101 struct dma_chan *chan = tx->chan;
102 struct dma_device *dma = chan->device;
103
104 dma->device_issue_pending(chan);
105 }
106}
72#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL 107#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
73#include <asm/async_tx.h> 108#include <asm/async_tx.h>
74#else 109#else
75#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ 110#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
76 __async_tx_find_channel(dep, type) 111 __async_tx_find_channel(dep, type)
77struct dma_chan * 112struct dma_chan *
78__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 113__async_tx_find_channel(struct async_submit_ctl *submit,
79 enum dma_transaction_type tx_type); 114 enum dma_transaction_type tx_type);
80#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ 115#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
81#else 116#else
82static inline void async_tx_issue_pending_all(void) 117static inline void async_tx_issue_pending_all(void)
@@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void)
84 do { } while (0); 119 do { } while (0);
85} 120}
86 121
122static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
123{
124 do { } while (0);
125}
126
87static inline struct dma_chan * 127static inline struct dma_chan *
88async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 128async_tx_find_channel(struct async_submit_ctl *submit,
89 enum dma_transaction_type tx_type, struct page **dst, int dst_count, 129 enum dma_transaction_type tx_type, struct page **dst,
90 struct page **src, int src_count, size_t len) 130 int dst_count, struct page **src, int src_count,
131 size_t len)
91{ 132{
92 return NULL; 133 return NULL;
93} 134}
@@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
99 * @cb_fn_param: parameter to pass to the callback routine 140 * @cb_fn_param: parameter to pass to the callback routine
100 */ 141 */
101static inline void 142static inline void
102async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) 143async_tx_sync_epilog(struct async_submit_ctl *submit)
103{ 144{
104 if (cb_fn) 145 if (submit->cb_fn)
105 cb_fn(cb_fn_param); 146 submit->cb_fn(submit->cb_param);
106} 147}
107 148
108void 149typedef union {
109async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 150 unsigned long addr;
110 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 151 struct page *page;
111 dma_async_tx_callback cb_fn, void *cb_fn_param); 152 dma_addr_t dma;
153} addr_conv_t;
154
155static inline void
156init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
157 struct dma_async_tx_descriptor *tx,
158 dma_async_tx_callback cb_fn, void *cb_param,
159 addr_conv_t *scribble)
160{
161 args->flags = flags;
162 args->depend_tx = tx;
163 args->cb_fn = cb_fn;
164 args->cb_param = cb_param;
165 args->scribble = scribble;
166}
167
168void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
169 struct async_submit_ctl *submit);
112 170
113struct dma_async_tx_descriptor * 171struct dma_async_tx_descriptor *
114async_xor(struct page *dest, struct page **src_list, unsigned int offset, 172async_xor(struct page *dest, struct page **src_list, unsigned int offset,
115 int src_cnt, size_t len, enum async_tx_flags flags, 173 int src_cnt, size_t len, struct async_submit_ctl *submit);
116 struct dma_async_tx_descriptor *depend_tx,
117 dma_async_tx_callback cb_fn, void *cb_fn_param);
118 174
119struct dma_async_tx_descriptor * 175struct dma_async_tx_descriptor *
120async_xor_zero_sum(struct page *dest, struct page **src_list, 176async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
121 unsigned int offset, int src_cnt, size_t len, 177 int src_cnt, size_t len, enum sum_check_flags *result,
122 u32 *result, enum async_tx_flags flags, 178 struct async_submit_ctl *submit);
123 struct dma_async_tx_descriptor *depend_tx,
124 dma_async_tx_callback cb_fn, void *cb_fn_param);
125 179
126struct dma_async_tx_descriptor * 180struct dma_async_tx_descriptor *
127async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, 181async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
128 unsigned int src_offset, size_t len, enum async_tx_flags flags, 182 unsigned int src_offset, size_t len,
129 struct dma_async_tx_descriptor *depend_tx, 183 struct async_submit_ctl *submit);
130 dma_async_tx_callback cb_fn, void *cb_fn_param);
131 184
132struct dma_async_tx_descriptor * 185struct dma_async_tx_descriptor *
133async_memset(struct page *dest, int val, unsigned int offset, 186async_memset(struct page *dest, int val, unsigned int offset,
134 size_t len, enum async_tx_flags flags, 187 size_t len, struct async_submit_ctl *submit);
135 struct dma_async_tx_descriptor *depend_tx, 188
136 dma_async_tx_callback cb_fn, void *cb_fn_param); 189struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
190
191struct dma_async_tx_descriptor *
192async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
193 size_t len, struct async_submit_ctl *submit);
194
195struct dma_async_tx_descriptor *
196async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
197 size_t len, enum sum_check_flags *pqres, struct page *spare,
198 struct async_submit_ctl *submit);
199
200struct dma_async_tx_descriptor *
201async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
202 struct page **ptrs, struct async_submit_ctl *submit);
137 203
138struct dma_async_tx_descriptor * 204struct dma_async_tx_descriptor *
139async_trigger_callback(enum async_tx_flags flags, 205async_raid6_datap_recov(int src_num, size_t bytes, int faila,
140 struct dma_async_tx_descriptor *depend_tx, 206 struct page **ptrs, struct async_submit_ctl *submit);
141 dma_async_tx_callback cb_fn, void *cb_fn_param);
142 207
143void async_tx_quiesce(struct dma_async_tx_descriptor **tx); 208void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
144#endif /* _ASYNC_TX_H_ */ 209#endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2046b5b8af48..aece486ac734 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
120extern int prepare_bprm_creds(struct linux_binprm *bprm); 120extern int prepare_bprm_creds(struct linux_binprm *bprm);
121extern void install_exec_creds(struct linux_binprm *bprm); 121extern void install_exec_creds(struct linux_binprm *bprm);
122extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); 122extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
123extern int set_binfmt(struct linux_binfmt *new); 123extern void set_binfmt(struct linux_binfmt *new);
124extern void free_bprm(struct linux_binprm *); 124extern void free_bprm(struct linux_binprm *);
125 125
126#endif /* __KERNEL__ */ 126#endif /* __KERNEL__ */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 90bba9e62286..b62bb9294d0c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,6 +141,38 @@ enum {
141 CGRP_WAIT_ON_RMDIR, 141 CGRP_WAIT_ON_RMDIR,
142}; 142};
143 143
144/* which pidlist file are we talking about? */
145enum cgroup_filetype {
146 CGROUP_FILE_PROCS,
147 CGROUP_FILE_TASKS,
148};
149
150/*
151 * A pidlist is a list of pids that virtually represents the contents of one
152 * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
153 * a pair (one each for procs, tasks) for each pid namespace that's relevant
154 * to the cgroup.
155 */
156struct cgroup_pidlist {
157 /*
158 * used to find which pidlist is wanted. doesn't change as long as
159 * this particular list stays in the list.
160 */
161 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
162 /* array of xids */
163 pid_t *list;
164 /* how many elements the above list has */
165 int length;
166 /* how many files are using the current array */
167 int use_count;
168 /* each of these stored in a list by its cgroup */
169 struct list_head links;
170 /* pointer to the cgroup we belong to, for list removal purposes */
171 struct cgroup *owner;
172 /* protects the other fields */
173 struct rw_semaphore mutex;
174};
175
144struct cgroup { 176struct cgroup {
145 unsigned long flags; /* "unsigned long" so bitops work */ 177 unsigned long flags; /* "unsigned long" so bitops work */
146 178
@@ -179,11 +211,12 @@ struct cgroup {
179 */ 211 */
180 struct list_head release_list; 212 struct list_head release_list;
181 213
182 /* pids_mutex protects pids_list and cached pid arrays. */ 214 /*
183 struct rw_semaphore pids_mutex; 215 * list of pidlists, up to two for each namespace (one for procs, one
184 216 * for tasks); created on demand.
185 /* Linked list of struct cgroup_pids */ 217 */
186 struct list_head pids_list; 218 struct list_head pidlists;
219 struct mutex pidlist_mutex;
187 220
188 /* For RCU-protected deletion */ 221 /* For RCU-protected deletion */
189 struct rcu_head rcu_head; 222 struct rcu_head rcu_head;
@@ -227,6 +260,9 @@ struct css_set {
227 * during subsystem registration (at boot time). 260 * during subsystem registration (at boot time).
228 */ 261 */
229 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 262 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
263
264 /* For RCU-protected deletion */
265 struct rcu_head rcu_head;
230}; 266};
231 267
232/* 268/*
@@ -389,10 +425,11 @@ struct cgroup_subsys {
389 struct cgroup *cgrp); 425 struct cgroup *cgrp);
390 int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); 426 int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
391 void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); 427 void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
392 int (*can_attach)(struct cgroup_subsys *ss, 428 int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
393 struct cgroup *cgrp, struct task_struct *tsk); 429 struct task_struct *tsk, bool threadgroup);
394 void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, 430 void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
395 struct cgroup *old_cgrp, struct task_struct *tsk); 431 struct cgroup *old_cgrp, struct task_struct *tsk,
432 bool threadgroup);
396 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); 433 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
397 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); 434 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
398 int (*populate)(struct cgroup_subsys *ss, 435 int (*populate)(struct cgroup_subsys *ss,
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 7f627775c947..ddb7a97c78c2 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -27,8 +27,8 @@
27 * 27 *
28 * configfs Copyright (C) 2005 Oracle. All rights reserved. 28 * configfs Copyright (C) 2005 Oracle. All rights reserved.
29 * 29 *
30 * Please read Documentation/filesystems/configfs.txt before using the 30 * Please read Documentation/filesystems/configfs/configfs.txt before using
31 * configfs interface, ESPECIALLY the parts about reference counts and 31 * the configfs interface, ESPECIALLY the parts about reference counts and
32 * item destructors. 32 * item destructors.
33 */ 33 */
34 34
diff --git a/include/linux/dca.h b/include/linux/dca.h
index 9c20c7e87d0a..d27a7a05718d 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -20,6 +20,9 @@
20 */ 20 */
21#ifndef DCA_H 21#ifndef DCA_H
22#define DCA_H 22#define DCA_H
23
24#include <linux/pci.h>
25
23/* DCA Provider API */ 26/* DCA Provider API */
24 27
25/* DCA Notifier Interface */ 28/* DCA Notifier Interface */
@@ -36,6 +39,12 @@ struct dca_provider {
36 int id; 39 int id;
37}; 40};
38 41
42struct dca_domain {
43 struct list_head node;
44 struct list_head dca_providers;
45 struct pci_bus *pci_rc;
46};
47
39struct dca_ops { 48struct dca_ops {
40 int (*add_requester) (struct dca_provider *, struct device *); 49 int (*add_requester) (struct dca_provider *, struct device *);
41 int (*remove_requester) (struct dca_provider *, struct device *); 50 int (*remove_requester) (struct dca_provider *, struct device *);
@@ -47,7 +56,7 @@ struct dca_ops {
47struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); 56struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
48void free_dca_provider(struct dca_provider *dca); 57void free_dca_provider(struct dca_provider *dca);
49int register_dca_provider(struct dca_provider *dca, struct device *dev); 58int register_dca_provider(struct dca_provider *dca, struct device *dev);
50void unregister_dca_provider(struct dca_provider *dca); 59void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
51 60
52static inline void *dca_priv(struct dca_provider *dca) 61static inline void *dca_priv(struct dca_provider *dca)
53{ 62{
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index eb5c2ba2f81a..fc1b930f246c 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -9,7 +9,7 @@
9 * 2 as published by the Free Software Foundation. 9 * 2 as published by the Free Software Foundation.
10 * 10 *
11 * debugfs is for people to use instead of /proc or /sys. 11 * debugfs is for people to use instead of /proc or /sys.
12 * See Documentation/DocBook/kernel-api for more details. 12 * See Documentation/DocBook/filesystems for more details.
13 */ 13 */
14 14
15#ifndef _DEBUGFS_H_ 15#ifndef _DEBUGFS_H_
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ffefba81c818..2b9f2ac7ed60 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -48,19 +48,20 @@ enum dma_status {
48 48
49/** 49/**
50 * enum dma_transaction_type - DMA transaction types/indexes 50 * enum dma_transaction_type - DMA transaction types/indexes
51 *
52 * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is
53 * automatically set as dma devices are registered.
51 */ 54 */
52enum dma_transaction_type { 55enum dma_transaction_type {
53 DMA_MEMCPY, 56 DMA_MEMCPY,
54 DMA_XOR, 57 DMA_XOR,
55 DMA_PQ_XOR, 58 DMA_PQ,
56 DMA_DUAL_XOR, 59 DMA_XOR_VAL,
57 DMA_PQ_UPDATE, 60 DMA_PQ_VAL,
58 DMA_ZERO_SUM,
59 DMA_PQ_ZERO_SUM,
60 DMA_MEMSET, 61 DMA_MEMSET,
61 DMA_MEMCPY_CRC32C,
62 DMA_INTERRUPT, 62 DMA_INTERRUPT,
63 DMA_PRIVATE, 63 DMA_PRIVATE,
64 DMA_ASYNC_TX,
64 DMA_SLAVE, 65 DMA_SLAVE,
65}; 66};
66 67
@@ -70,18 +71,25 @@ enum dma_transaction_type {
70 71
71/** 72/**
72 * enum dma_ctrl_flags - DMA flags to augment operation preparation, 73 * enum dma_ctrl_flags - DMA flags to augment operation preparation,
73 * control completion, and communicate status. 74 * control completion, and communicate status.
74 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of 75 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
75 * this transaction 76 * this transaction
76 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client 77 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
77 * acknowledges receipt, i.e. has has a chance to establish any 78 * acknowledges receipt, i.e. has has a chance to establish any dependency
78 * dependency chains 79 * chains
79 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) 80 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
80 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) 81 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
81 * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single 82 * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
82 * (if not set, do the source dma-unmapping as page) 83 * (if not set, do the source dma-unmapping as page)
83 * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single 84 * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
84 * (if not set, do the destination dma-unmapping as page) 85 * (if not set, do the destination dma-unmapping as page)
86 * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
87 * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
88 * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
89 * sources that were the result of a previous operation, in the case of a PQ
90 * operation it continues the calculation with new sources
91 * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
92 * on the result of this operation
85 */ 93 */
86enum dma_ctrl_flags { 94enum dma_ctrl_flags {
87 DMA_PREP_INTERRUPT = (1 << 0), 95 DMA_PREP_INTERRUPT = (1 << 0),
@@ -90,9 +98,32 @@ enum dma_ctrl_flags {
90 DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), 98 DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
91 DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), 99 DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
92 DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), 100 DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
101 DMA_PREP_PQ_DISABLE_P = (1 << 6),
102 DMA_PREP_PQ_DISABLE_Q = (1 << 7),
103 DMA_PREP_CONTINUE = (1 << 8),
104 DMA_PREP_FENCE = (1 << 9),
93}; 105};
94 106
95/** 107/**
108 * enum sum_check_bits - bit position of pq_check_flags
109 */
110enum sum_check_bits {
111 SUM_CHECK_P = 0,
112 SUM_CHECK_Q = 1,
113};
114
115/**
116 * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
117 * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
118 * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
119 */
120enum sum_check_flags {
121 SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
122 SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
123};
124
125
126/**
96 * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. 127 * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
97 * See linux/cpumask.h 128 * See linux/cpumask.h
98 */ 129 */
@@ -180,8 +211,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
180 * @flags: flags to augment operation preparation, control completion, and 211 * @flags: flags to augment operation preparation, control completion, and
181 * communicate status 212 * communicate status
182 * @phys: physical address of the descriptor 213 * @phys: physical address of the descriptor
183 * @tx_list: driver common field for operations that require multiple
184 * descriptors
185 * @chan: target channel for this operation 214 * @chan: target channel for this operation
186 * @tx_submit: set the prepared descriptor(s) to be executed by the engine 215 * @tx_submit: set the prepared descriptor(s) to be executed by the engine
187 * @callback: routine to call after this operation is complete 216 * @callback: routine to call after this operation is complete
@@ -195,7 +224,6 @@ struct dma_async_tx_descriptor {
195 dma_cookie_t cookie; 224 dma_cookie_t cookie;
196 enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */ 225 enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
197 dma_addr_t phys; 226 dma_addr_t phys;
198 struct list_head tx_list;
199 struct dma_chan *chan; 227 struct dma_chan *chan;
200 dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); 228 dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
201 dma_async_tx_callback callback; 229 dma_async_tx_callback callback;
@@ -213,6 +241,11 @@ struct dma_async_tx_descriptor {
213 * @global_node: list_head for global dma_device_list 241 * @global_node: list_head for global dma_device_list
214 * @cap_mask: one or more dma_capability flags 242 * @cap_mask: one or more dma_capability flags
215 * @max_xor: maximum number of xor sources, 0 if no capability 243 * @max_xor: maximum number of xor sources, 0 if no capability
244 * @max_pq: maximum number of PQ sources and PQ-continue capability
245 * @copy_align: alignment shift for memcpy operations
246 * @xor_align: alignment shift for xor operations
247 * @pq_align: alignment shift for pq operations
248 * @fill_align: alignment shift for memset operations
216 * @dev_id: unique device ID 249 * @dev_id: unique device ID
217 * @dev: struct device reference for dma mapping api 250 * @dev: struct device reference for dma mapping api
218 * @device_alloc_chan_resources: allocate resources and return the 251 * @device_alloc_chan_resources: allocate resources and return the
@@ -220,7 +253,9 @@ struct dma_async_tx_descriptor {
220 * @device_free_chan_resources: release DMA channel's resources 253 * @device_free_chan_resources: release DMA channel's resources
221 * @device_prep_dma_memcpy: prepares a memcpy operation 254 * @device_prep_dma_memcpy: prepares a memcpy operation
222 * @device_prep_dma_xor: prepares a xor operation 255 * @device_prep_dma_xor: prepares a xor operation
223 * @device_prep_dma_zero_sum: prepares a zero_sum operation 256 * @device_prep_dma_xor_val: prepares a xor validation operation
257 * @device_prep_dma_pq: prepares a pq operation
258 * @device_prep_dma_pq_val: prepares a pqzero_sum operation
224 * @device_prep_dma_memset: prepares a memset operation 259 * @device_prep_dma_memset: prepares a memset operation
225 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation 260 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
226 * @device_prep_slave_sg: prepares a slave dma operation 261 * @device_prep_slave_sg: prepares a slave dma operation
@@ -235,7 +270,13 @@ struct dma_device {
235 struct list_head channels; 270 struct list_head channels;
236 struct list_head global_node; 271 struct list_head global_node;
237 dma_cap_mask_t cap_mask; 272 dma_cap_mask_t cap_mask;
238 int max_xor; 273 unsigned short max_xor;
274 unsigned short max_pq;
275 u8 copy_align;
276 u8 xor_align;
277 u8 pq_align;
278 u8 fill_align;
279 #define DMA_HAS_PQ_CONTINUE (1 << 15)
239 280
240 int dev_id; 281 int dev_id;
241 struct device *dev; 282 struct device *dev;
@@ -249,9 +290,17 @@ struct dma_device {
249 struct dma_async_tx_descriptor *(*device_prep_dma_xor)( 290 struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
250 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, 291 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
251 unsigned int src_cnt, size_t len, unsigned long flags); 292 unsigned int src_cnt, size_t len, unsigned long flags);
252 struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( 293 struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
253 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, 294 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
254 size_t len, u32 *result, unsigned long flags); 295 size_t len, enum sum_check_flags *result, unsigned long flags);
296 struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
297 struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
298 unsigned int src_cnt, const unsigned char *scf,
299 size_t len, unsigned long flags);
300 struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
301 struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
302 unsigned int src_cnt, const unsigned char *scf, size_t len,
303 enum sum_check_flags *pqres, unsigned long flags);
255 struct dma_async_tx_descriptor *(*device_prep_dma_memset)( 304 struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
256 struct dma_chan *chan, dma_addr_t dest, int value, size_t len, 305 struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
257 unsigned long flags); 306 unsigned long flags);
@@ -270,6 +319,96 @@ struct dma_device {
270 void (*device_issue_pending)(struct dma_chan *chan); 319 void (*device_issue_pending)(struct dma_chan *chan);
271}; 320};
272 321
322static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
323{
324 size_t mask;
325
326 if (!align)
327 return true;
328 mask = (1 << align) - 1;
329 if (mask & (off1 | off2 | len))
330 return false;
331 return true;
332}
333
334static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
335 size_t off2, size_t len)
336{
337 return dmaengine_check_align(dev->copy_align, off1, off2, len);
338}
339
340static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
341 size_t off2, size_t len)
342{
343 return dmaengine_check_align(dev->xor_align, off1, off2, len);
344}
345
346static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
347 size_t off2, size_t len)
348{
349 return dmaengine_check_align(dev->pq_align, off1, off2, len);
350}
351
352static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
353 size_t off2, size_t len)
354{
355 return dmaengine_check_align(dev->fill_align, off1, off2, len);
356}
357
358static inline void
359dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
360{
361 dma->max_pq = maxpq;
362 if (has_pq_continue)
363 dma->max_pq |= DMA_HAS_PQ_CONTINUE;
364}
365
366static inline bool dmaf_continue(enum dma_ctrl_flags flags)
367{
368 return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
369}
370
371static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
372{
373 enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
374
375 return (flags & mask) == mask;
376}
377
378static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
379{
380 return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
381}
382
383static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
384{
385 return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
386}
387
388/* dma_maxpq - reduce maxpq in the face of continued operations
389 * @dma - dma device with PQ capability
390 * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
391 *
392 * When an engine does not support native continuation we need 3 extra
393 * source slots to reuse P and Q with the following coefficients:
394 * 1/ {00} * P : remove P from Q', but use it as a source for P'
395 * 2/ {01} * Q : use Q to continue Q' calculation
396 * 3/ {00} * Q : subtract Q from P' to cancel (2)
397 *
398 * In the case where P is disabled we only need 1 extra source:
399 * 1/ {01} * Q : use Q to continue Q' calculation
400 */
401static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
402{
403 if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
404 return dma_dev_to_maxpq(dma);
405 else if (dmaf_p_disabled_continue(flags))
406 return dma_dev_to_maxpq(dma) - 1;
407 else if (dmaf_continue(flags))
408 return dma_dev_to_maxpq(dma) - 3;
409 BUG();
410}
411
273/* --- public DMA engine API --- */ 412/* --- public DMA engine API --- */
274 413
275#ifdef CONFIG_DMA_ENGINE 414#ifdef CONFIG_DMA_ENGINE
@@ -299,7 +438,11 @@ static inline void net_dmaengine_put(void)
299#ifdef CONFIG_ASYNC_TX_DMA 438#ifdef CONFIG_ASYNC_TX_DMA
300#define async_dmaengine_get() dmaengine_get() 439#define async_dmaengine_get() dmaengine_get()
301#define async_dmaengine_put() dmaengine_put() 440#define async_dmaengine_put() dmaengine_put()
441#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
442#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
443#else
302#define async_dma_find_channel(type) dma_find_channel(type) 444#define async_dma_find_channel(type) dma_find_channel(type)
445#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
303#else 446#else
304static inline void async_dmaengine_get(void) 447static inline void async_dmaengine_get(void)
305{ 448{
@@ -312,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type)
312{ 455{
313 return NULL; 456 return NULL;
314} 457}
315#endif 458#endif /* CONFIG_ASYNC_TX_DMA */
316 459
317dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, 460dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
318 void *dest, void *src, size_t len); 461 void *dest, void *src, size_t len);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f847df9e99b6..a34bdf5a9d23 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -133,6 +133,7 @@ struct dentry;
133#define FB_ACCEL_NEOMAGIC_NM2230 96 /* NeoMagic NM2230 */ 133#define FB_ACCEL_NEOMAGIC_NM2230 96 /* NeoMagic NM2230 */
134#define FB_ACCEL_NEOMAGIC_NM2360 97 /* NeoMagic NM2360 */ 134#define FB_ACCEL_NEOMAGIC_NM2360 97 /* NeoMagic NM2360 */
135#define FB_ACCEL_NEOMAGIC_NM2380 98 /* NeoMagic NM2380 */ 135#define FB_ACCEL_NEOMAGIC_NM2380 98 /* NeoMagic NM2380 */
136#define FB_ACCEL_PXA3XX 99 /* PXA3xx */
136 137
137#define FB_ACCEL_SAVAGE4 0x80 /* S3 Savage4 */ 138#define FB_ACCEL_SAVAGE4 0x80 /* S3 Savage4 */
138#define FB_ACCEL_SAVAGE3D 0x81 /* S3 Savage3D */ 139#define FB_ACCEL_SAVAGE3D 0x81 /* S3 Savage3D */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51803528b095..2adaa2529f18 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -595,6 +595,7 @@ struct address_space_operations {
595 int (*launder_page) (struct page *); 595 int (*launder_page) (struct page *);
596 int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 596 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
597 unsigned long); 597 unsigned long);
598 int (*error_remove_page)(struct address_space *, struct page *);
598}; 599};
599 600
600/* 601/*
@@ -640,7 +641,6 @@ struct block_device {
640 struct super_block * bd_super; 641 struct super_block * bd_super;
641 int bd_openers; 642 int bd_openers;
642 struct mutex bd_mutex; /* open/close mutex */ 643 struct mutex bd_mutex; /* open/close mutex */
643 struct semaphore bd_mount_sem;
644 struct list_head bd_inodes; 644 struct list_head bd_inodes;
645 void * bd_holder; 645 void * bd_holder;
646 int bd_holders; 646 int bd_holders;
@@ -1315,7 +1315,7 @@ struct super_block {
1315 unsigned long s_blocksize; 1315 unsigned long s_blocksize;
1316 unsigned char s_blocksize_bits; 1316 unsigned char s_blocksize_bits;
1317 unsigned char s_dirt; 1317 unsigned char s_dirt;
1318 unsigned long long s_maxbytes; /* Max file size */ 1318 loff_t s_maxbytes; /* Max file size */
1319 struct file_system_type *s_type; 1319 struct file_system_type *s_type;
1320 const struct super_operations *s_op; 1320 const struct super_operations *s_op;
1321 const struct dquot_operations *dq_op; 1321 const struct dquot_operations *dq_op;
@@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
2156extern int inode_needs_sync(struct inode *inode); 2156extern int inode_needs_sync(struct inode *inode);
2157extern void generic_delete_inode(struct inode *inode); 2157extern void generic_delete_inode(struct inode *inode);
2158extern void generic_drop_inode(struct inode *inode); 2158extern void generic_drop_inode(struct inode *inode);
2159extern int generic_detach_inode(struct inode *inode);
2159 2160
2160extern struct inode *ilookup5_nowait(struct super_block *sb, 2161extern struct inode *ilookup5_nowait(struct super_block *sb,
2161 unsigned long hashval, int (*test)(struct inode *, void *), 2162 unsigned long hashval, int (*test)(struct inode *, void *),
@@ -2334,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs);
2334extern void put_filesystem(struct file_system_type *fs); 2335extern void put_filesystem(struct file_system_type *fs);
2335extern struct file_system_type *get_fs_type(const char *name); 2336extern struct file_system_type *get_fs_type(const char *name);
2336extern struct super_block *get_super(struct block_device *); 2337extern struct super_block *get_super(struct block_device *);
2338extern struct super_block *get_active_super(struct block_device *bdev);
2337extern struct super_block *user_get_super(dev_t); 2339extern struct super_block *user_get_super(dev_t);
2338extern void drop_super(struct super_block *sb); 2340extern void drop_super(struct super_block *sb);
2339 2341
@@ -2381,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *,
2381#define buffer_migrate_page NULL 2383#define buffer_migrate_page NULL
2382#endif 2384#endif
2383 2385
2384extern int inode_change_ok(struct inode *, struct iattr *); 2386extern int inode_change_ok(const struct inode *, struct iattr *);
2387extern int inode_newsize_ok(const struct inode *, loff_t offset);
2385extern int __must_check inode_setattr(struct inode *, struct iattr *); 2388extern int __must_check inode_setattr(struct inode *, struct iattr *);
2386 2389
2387extern void file_update_time(struct file *file); 2390extern void file_update_time(struct file *file);
@@ -2467,7 +2470,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
2467 size_t len, loff_t *ppos); 2470 size_t len, loff_t *ppos);
2468 2471
2469struct ctl_table; 2472struct ctl_table;
2470int proc_nr_files(struct ctl_table *table, int write, struct file *filp, 2473int proc_nr_files(struct ctl_table *table, int write,
2471 void __user *buffer, size_t *lenp, loff_t *ppos); 2474 void __user *buffer, size_t *lenp, loff_t *ppos);
2472 2475
2473int __init get_filesystem_list(char *buf); 2476int __init get_filesystem_list(char *buf);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3c0924a18daf..cd3d2abaf30a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -19,7 +19,7 @@
19extern int ftrace_enabled; 19extern int ftrace_enabled;
20extern int 20extern int
21ftrace_enable_sysctl(struct ctl_table *table, int write, 21ftrace_enable_sysctl(struct ctl_table *table, int write,
22 struct file *filp, void __user *buffer, size_t *lenp, 22 void __user *buffer, size_t *lenp,
23 loff_t *ppos); 23 loff_t *ppos);
24 24
25typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); 25typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
@@ -94,7 +94,7 @@ static inline void ftrace_start(void) { }
94extern int stack_tracer_enabled; 94extern int stack_tracer_enabled;
95int 95int
96stack_trace_sysctl(struct ctl_table *table, int write, 96stack_trace_sysctl(struct ctl_table *table, int write,
97 struct file *file, void __user *buffer, size_t *lenp, 97 void __user *buffer, size_t *lenp,
98 loff_t *ppos); 98 loff_t *ppos);
99#endif 99#endif
100 100
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 34956c8fdebf..8ec17997d94f 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -4,11 +4,6 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <linux/types.h> 5#include <linux/types.h>
6 6
7struct inode;
8struct mm_struct;
9struct task_struct;
10union ktime;
11
12/* Second argument to futex syscall */ 7/* Second argument to futex syscall */
13 8
14 9
@@ -129,6 +124,11 @@ struct robust_list_head {
129#define FUTEX_BITSET_MATCH_ANY 0xffffffff 124#define FUTEX_BITSET_MATCH_ANY 0xffffffff
130 125
131#ifdef __KERNEL__ 126#ifdef __KERNEL__
127struct inode;
128struct mm_struct;
129struct task_struct;
130union ktime;
131
132long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, 132long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
133 u32 __user *uaddr2, u32 val2, u32 val3); 133 u32 __user *uaddr2, u32 val2, u32 val3);
134 134
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 176e7ee73eff..16937995abd4 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -3,15 +3,15 @@
3 3
4#include <linux/fs.h> 4#include <linux/fs.h>
5 5
6struct ctl_table;
7struct user_struct;
8
6#ifdef CONFIG_HUGETLB_PAGE 9#ifdef CONFIG_HUGETLB_PAGE
7 10
8#include <linux/mempolicy.h> 11#include <linux/mempolicy.h>
9#include <linux/shm.h> 12#include <linux/shm.h>
10#include <asm/tlbflush.h> 13#include <asm/tlbflush.h>
11 14
12struct ctl_table;
13struct user_struct;
14
15int PageHuge(struct page *page); 15int PageHuge(struct page *page);
16 16
17static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) 17static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
@@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
20} 20}
21 21
22void reset_vma_resv_huge_pages(struct vm_area_struct *vma); 22void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
23int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 23int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
24int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 24int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
25int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 25int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
26int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); 26int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
27int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, 27int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
28 struct page **, struct vm_area_struct **, 28 struct page **, struct vm_area_struct **,
@@ -187,7 +187,11 @@ static inline void set_file_hugepages(struct file *file)
187 187
188#define is_file_hugepages(file) 0 188#define is_file_hugepages(file) 0
189#define set_file_hugepages(file) BUG() 189#define set_file_hugepages(file) BUG()
190#define hugetlb_file_setup(name,size,acct,user,creat) ERR_PTR(-ENOSYS) 190static inline struct file *hugetlb_file_setup(const char *name, size_t size,
191 int acctflag, struct user_struct **user, int creat_flags)
192{
193 return ERR_PTR(-ENOSYS);
194}
191 195
192#endif /* !CONFIG_HUGETLBFS */ 196#endif /* !CONFIG_HUGETLBFS */
193 197
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e46a0734ab6e..bf9213b2db8f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void)
118 118
119extern bool mem_cgroup_oom_called(struct task_struct *task); 119extern bool mem_cgroup_oom_called(struct task_struct *task);
120void mem_cgroup_update_mapped_file_stat(struct page *page, int val); 120void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
121unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
122 gfp_t gfp_mask, int nid,
123 int zid);
121#else /* CONFIG_CGROUP_MEM_RES_CTLR */ 124#else /* CONFIG_CGROUP_MEM_RES_CTLR */
122struct mem_cgroup; 125struct mem_cgroup;
123 126
@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
276{ 279{
277} 280}
278 281
282static inline
283unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
284 gfp_t gfp_mask, int nid, int zid)
285{
286 return 0;
287}
288
279#endif /* CONFIG_CGROUP_MEM_CONT */ 289#endif /* CONFIG_CGROUP_MEM_CONT */
280 290
281#endif /* _LINUX_MEMCONTROL_H */ 291#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b6eae5e3144b..24c395694f4d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -288,7 +288,7 @@ static inline int is_vmalloc_addr(const void *x)
288#ifdef CONFIG_MMU 288#ifdef CONFIG_MMU
289extern int is_vmalloc_or_module_addr(const void *x); 289extern int is_vmalloc_or_module_addr(const void *x);
290#else 290#else
291static int is_vmalloc_or_module_addr(const void *x) 291static inline int is_vmalloc_or_module_addr(const void *x)
292{ 292{
293 return 0; 293 return 0;
294} 294}
@@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page)
695#define VM_FAULT_SIGBUS 0x0002 695#define VM_FAULT_SIGBUS 0x0002
696#define VM_FAULT_MAJOR 0x0004 696#define VM_FAULT_MAJOR 0x0004
697#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ 697#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */
698#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */
698 699
699#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ 700#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */
700#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ 701#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
701 702
702#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) 703#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
703 704
704/* 705/*
705 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. 706 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
@@ -791,8 +792,14 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
791 unmap_mapping_range(mapping, holebegin, holelen, 0); 792 unmap_mapping_range(mapping, holebegin, holelen, 0);
792} 793}
793 794
794extern int vmtruncate(struct inode * inode, loff_t offset); 795extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
795extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); 796extern int vmtruncate(struct inode *inode, loff_t offset);
797extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
798
799int truncate_inode_page(struct address_space *mapping, struct page *page);
800int generic_error_remove_page(struct address_space *mapping, struct page *page);
801
802int invalidate_inode_page(struct page *page);
796 803
797#ifdef CONFIG_MMU 804#ifdef CONFIG_MMU
798extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 805extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1279,7 +1286,7 @@ int in_gate_area_no_task(unsigned long addr);
1279#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) 1286#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
1280#endif /* __HAVE_ARCH_GATE_AREA */ 1287#endif /* __HAVE_ARCH_GATE_AREA */
1281 1288
1282int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, 1289int drop_caches_sysctl_handler(struct ctl_table *, int,
1283 void __user *, size_t *, loff_t *); 1290 void __user *, size_t *, loff_t *);
1284unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, 1291unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
1285 unsigned long lru_pages); 1292 unsigned long lru_pages);
@@ -1308,5 +1315,12 @@ void vmemmap_populate_print_last(void);
1308extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, 1315extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
1309 size_t size); 1316 size_t size);
1310extern void refund_locked_memory(struct mm_struct *mm, size_t size); 1317extern void refund_locked_memory(struct mm_struct *mm, size_t size);
1318
1319extern void memory_failure(unsigned long pfn, int trapno);
1320extern int __memory_failure(unsigned long pfn, int trapno, int ref);
1321extern int sysctl_memory_failure_early_kill;
1322extern int sysctl_memory_failure_recovery;
1323extern atomic_long_t mce_bad_pages;
1324
1311#endif /* __KERNEL__ */ 1325#endif /* __KERNEL__ */
1312#endif /* _LINUX_MM_H */ 1326#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0042090a4d70..21d6aa45206a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -240,6 +240,8 @@ struct mm_struct {
240 240
241 unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ 241 unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
242 242
243 struct linux_binfmt *binfmt;
244
243 cpumask_t cpu_vm_mask; 245 cpumask_t cpu_vm_mask;
244 246
245 /* Architecture-specific MM context */ 247 /* Architecture-specific MM context */
@@ -259,11 +261,10 @@ struct mm_struct {
259 unsigned long flags; /* Must use atomic bitops to access the bits */ 261 unsigned long flags; /* Must use atomic bitops to access the bits */
260 262
261 struct core_state *core_state; /* coredumping support */ 263 struct core_state *core_state; /* coredumping support */
262 264#ifdef CONFIG_AIO
263 /* aio bits */
264 spinlock_t ioctx_lock; 265 spinlock_t ioctx_lock;
265 struct hlist_head ioctx_list; 266 struct hlist_head ioctx_list;
266 267#endif
267#ifdef CONFIG_MM_OWNER 268#ifdef CONFIG_MM_OWNER
268 /* 269 /*
269 * "owner" points to a task that is regarded as the canonical 270 * "owner" points to a task that is regarded as the canonical
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 652ef01be582..6f7561730d88 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone)
755 755
756/* These two functions are used to setup the per zone pages min values */ 756/* These two functions are used to setup the per zone pages min values */
757struct ctl_table; 757struct ctl_table;
758struct file; 758int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
759int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *,
760 void __user *, size_t *, loff_t *); 759 void __user *, size_t *, loff_t *);
761extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; 760extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
762int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, 761int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
763 void __user *, size_t *, loff_t *); 762 void __user *, size_t *, loff_t *);
764int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, 763int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
765 void __user *, size_t *, loff_t *); 764 void __user *, size_t *, loff_t *);
766int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, 765int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
767 struct file *, void __user *, size_t *, loff_t *); 766 void __user *, size_t *, loff_t *);
768int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, 767int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
769 struct file *, void __user *, size_t *, loff_t *); 768 void __user *, size_t *, loff_t *);
770 769
771extern int numa_zonelist_order_handler(struct ctl_table *, int, 770extern int numa_zonelist_order_handler(struct ctl_table *, int,
772 struct file *, void __user *, size_t *, loff_t *); 771 void __user *, size_t *, loff_t *);
773extern char numa_zonelist_order[]; 772extern char numa_zonelist_order[];
774#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ 773#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */
775 774
diff --git a/include/linux/module.h b/include/linux/module.h
index 1c755b2f937d..482efc865acf 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -128,7 +128,10 @@ extern struct module __this_module;
128 */ 128 */
129#define MODULE_LICENSE(_license) MODULE_INFO(license, _license) 129#define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
130 130
131/* Author, ideally of form NAME[, NAME]*[ and NAME] */ 131/*
132 * Author(s), use "Name <email>" or just "Name", for multiple
133 * authors use multiple MODULE_AUTHOR() statements/lines.
134 */
132#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) 135#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
133 136
134/* What your module does. */ 137/* What your module does. */
@@ -308,10 +311,14 @@ struct module
308#endif 311#endif
309 312
310#ifdef CONFIG_KALLSYMS 313#ifdef CONFIG_KALLSYMS
311 /* We keep the symbol and string tables for kallsyms. */ 314 /*
312 Elf_Sym *symtab; 315 * We keep the symbol and string tables for kallsyms.
313 unsigned int num_symtab; 316 * The core_* fields below are temporary, loader-only (they
314 char *strtab; 317 * could really be discarded after module init).
318 */
319 Elf_Sym *symtab, *core_symtab;
320 unsigned int num_symtab, core_num_syms;
321 char *strtab, *core_strtab;
315 322
316 /* Section attributes */ 323 /* Section attributes */
317 struct module_sect_attrs *sect_attrs; 324 struct module_sect_attrs *sect_attrs;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 13de789f0a5c..6b202b173955 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -51,6 +51,9 @@
51 * PG_buddy is set to indicate that the page is free and in the buddy system 51 * PG_buddy is set to indicate that the page is free and in the buddy system
52 * (see mm/page_alloc.c). 52 * (see mm/page_alloc.c).
53 * 53 *
54 * PG_hwpoison indicates that a page got corrupted in hardware and contains
55 * data with incorrect ECC bits that triggered a machine check. Accessing is
56 * not safe since it may cause another machine check. Don't touch!
54 */ 57 */
55 58
56/* 59/*
@@ -102,6 +105,9 @@ enum pageflags {
102#ifdef CONFIG_ARCH_USES_PG_UNCACHED 105#ifdef CONFIG_ARCH_USES_PG_UNCACHED
103 PG_uncached, /* Page has been mapped as uncached */ 106 PG_uncached, /* Page has been mapped as uncached */
104#endif 107#endif
108#ifdef CONFIG_MEMORY_FAILURE
109 PG_hwpoison, /* hardware poisoned page. Don't touch */
110#endif
105 __NR_PAGEFLAGS, 111 __NR_PAGEFLAGS,
106 112
107 /* Filesystems */ 113 /* Filesystems */
@@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached)
269PAGEFLAG_FALSE(Uncached) 275PAGEFLAG_FALSE(Uncached)
270#endif 276#endif
271 277
278#ifdef CONFIG_MEMORY_FAILURE
279PAGEFLAG(HWPoison, hwpoison)
280TESTSETFLAG(HWPoison, hwpoison)
281#define __PG_HWPOISON (1UL << PG_hwpoison)
282#else
283PAGEFLAG_FALSE(HWPoison)
284#define __PG_HWPOISON 0
285#endif
286
272static inline int PageUptodate(struct page *page) 287static inline int PageUptodate(struct page *page)
273{ 288{
274 int ret = test_bit(PG_uptodate, &(page)->flags); 289 int ret = test_bit(PG_uptodate, &(page)->flags);
@@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page)
393 1 << PG_private | 1 << PG_private_2 | \ 408 1 << PG_private | 1 << PG_private_2 | \
394 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 409 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
395 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ 410 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
396 1 << PG_unevictable | __PG_MLOCKED) 411 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
397 412
398/* 413/*
399 * Flags checked when a page is prepped for return by the page allocator. 414 * Flags checked when a page is prepped for return by the page allocator.
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index ada779f24178..4b938d4f3ac2 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -38,6 +38,7 @@ enum {
38 PCG_LOCK, /* page cgroup is locked */ 38 PCG_LOCK, /* page cgroup is locked */
39 PCG_CACHE, /* charged as cache */ 39 PCG_CACHE, /* charged as cache */
40 PCG_USED, /* this object is in use. */ 40 PCG_USED, /* this object is in use. */
41 PCG_ACCT_LRU, /* page has been accounted for */
41}; 42};
42 43
43#define TESTPCGFLAG(uname, lname) \ 44#define TESTPCGFLAG(uname, lname) \
@@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
52static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ 53static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
53 { clear_bit(PCG_##lname, &pc->flags); } 54 { clear_bit(PCG_##lname, &pc->flags); }
54 55
56#define TESTCLEARPCGFLAG(uname, lname) \
57static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
58 { return test_and_clear_bit(PCG_##lname, &pc->flags); }
59
55/* Cache flag is set only once (at allocation) */ 60/* Cache flag is set only once (at allocation) */
56TESTPCGFLAG(Cache, CACHE) 61TESTPCGFLAG(Cache, CACHE)
62CLEARPCGFLAG(Cache, CACHE)
63SETPCGFLAG(Cache, CACHE)
57 64
58TESTPCGFLAG(Used, USED) 65TESTPCGFLAG(Used, USED)
59CLEARPCGFLAG(Used, USED) 66CLEARPCGFLAG(Used, USED)
67SETPCGFLAG(Used, USED)
68
69SETPCGFLAG(AcctLRU, ACCT_LRU)
70CLEARPCGFLAG(AcctLRU, ACCT_LRU)
71TESTPCGFLAG(AcctLRU, ACCT_LRU)
72TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
60 73
61static inline int page_cgroup_nid(struct page_cgroup *pc) 74static inline int page_cgroup_nid(struct page_cgroup *pc)
62{ 75{
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7803565aa877..da1fda8623e0 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2527,6 +2527,16 @@
2527#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e 2527#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
2528#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b 2528#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
2529#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c 2529#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c
2530#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
2531#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
2532#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
2533#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713
2534#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714
2535#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715
2536#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716
2537#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717
2538#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718
2539#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
2530#define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 2540#define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14
2531#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 2541#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16
2532#define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 2542#define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 07bff666e65b..931150566ade 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -88,4 +88,6 @@
88#define PR_TASK_PERF_EVENTS_DISABLE 31 88#define PR_TASK_PERF_EVENTS_DISABLE 31
89#define PR_TASK_PERF_EVENTS_ENABLE 32 89#define PR_TASK_PERF_EVENTS_ENABLE 32
90 90
91#define PR_MCE_KILL 33
92
91#endif /* _LINUX_PRCTL_H */ 93#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 953fc055e875..14a86bc7102b 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -140,7 +140,7 @@ struct rchan_callbacks
140 * cause relay_open() to create a single global buffer rather 140 * cause relay_open() to create a single global buffer rather
141 * than the default set of per-cpu buffers. 141 * than the default set of per-cpu buffers.
142 * 142 *
143 * See Documentation/filesystems/relayfs.txt for more info. 143 * See Documentation/filesystems/relay.txt for more info.
144 */ 144 */
145 struct dentry *(*create_buf_file)(const char *filename, 145 struct dentry *(*create_buf_file)(const char *filename,
146 struct dentry *parent, 146 struct dentry *parent,
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 511f42fc6816..731af71cddc9 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -35,6 +35,10 @@ struct res_counter {
35 */ 35 */
36 unsigned long long limit; 36 unsigned long long limit;
37 /* 37 /*
38 * the limit that usage can be exceed
39 */
40 unsigned long long soft_limit;
41 /*
38 * the number of unsuccessful attempts to consume the resource 42 * the number of unsuccessful attempts to consume the resource
39 */ 43 */
40 unsigned long long failcnt; 44 unsigned long long failcnt;
@@ -87,6 +91,7 @@ enum {
87 RES_MAX_USAGE, 91 RES_MAX_USAGE,
88 RES_LIMIT, 92 RES_LIMIT,
89 RES_FAILCNT, 93 RES_FAILCNT,
94 RES_SOFT_LIMIT,
90}; 95};
91 96
92/* 97/*
@@ -109,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
109int __must_check res_counter_charge_locked(struct res_counter *counter, 114int __must_check res_counter_charge_locked(struct res_counter *counter,
110 unsigned long val); 115 unsigned long val);
111int __must_check res_counter_charge(struct res_counter *counter, 116int __must_check res_counter_charge(struct res_counter *counter,
112 unsigned long val, struct res_counter **limit_fail_at); 117 unsigned long val, struct res_counter **limit_fail_at,
118 struct res_counter **soft_limit_at);
113 119
114/* 120/*
115 * uncharge - tell that some portion of the resource is released 121 * uncharge - tell that some portion of the resource is released
@@ -122,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter,
122 */ 128 */
123 129
124void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); 130void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
125void res_counter_uncharge(struct res_counter *counter, unsigned long val); 131void res_counter_uncharge(struct res_counter *counter, unsigned long val,
132 bool *was_soft_limit_excess);
126 133
127static inline bool res_counter_limit_check_locked(struct res_counter *cnt) 134static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
128{ 135{
@@ -132,6 +139,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
132 return false; 139 return false;
133} 140}
134 141
142static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt)
143{
144 if (cnt->usage < cnt->soft_limit)
145 return true;
146
147 return false;
148}
149
150/**
151 * Get the difference between the usage and the soft limit
152 * @cnt: The counter
153 *
154 * Returns 0 if usage is less than or equal to soft limit
155 * The difference between usage and soft limit, otherwise.
156 */
157static inline unsigned long long
158res_counter_soft_limit_excess(struct res_counter *cnt)
159{
160 unsigned long long excess;
161 unsigned long flags;
162
163 spin_lock_irqsave(&cnt->lock, flags);
164 if (cnt->usage <= cnt->soft_limit)
165 excess = 0;
166 else
167 excess = cnt->usage - cnt->soft_limit;
168 spin_unlock_irqrestore(&cnt->lock, flags);
169 return excess;
170}
171
135/* 172/*
136 * Helper function to detect if the cgroup is within it's limit or 173 * Helper function to detect if the cgroup is within it's limit or
137 * not. It's currently called from cgroup_rss_prepare() 174 * not. It's currently called from cgroup_rss_prepare()
@@ -147,6 +184,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt)
147 return ret; 184 return ret;
148} 185}
149 186
187static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt)
188{
189 bool ret;
190 unsigned long flags;
191
192 spin_lock_irqsave(&cnt->lock, flags);
193 ret = res_counter_soft_limit_check_locked(cnt);
194 spin_unlock_irqrestore(&cnt->lock, flags);
195 return ret;
196}
197
150static inline void res_counter_reset_max(struct res_counter *cnt) 198static inline void res_counter_reset_max(struct res_counter *cnt)
151{ 199{
152 unsigned long flags; 200 unsigned long flags;
@@ -180,4 +228,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt,
180 return ret; 228 return ret;
181} 229}
182 230
231static inline int
232res_counter_set_soft_limit(struct res_counter *cnt,
233 unsigned long long soft_limit)
234{
235 unsigned long flags;
236
237 spin_lock_irqsave(&cnt->lock, flags);
238 cnt->soft_limit = soft_limit;
239 spin_unlock_irqrestore(&cnt->lock, flags);
240 return 0;
241}
242
183#endif 243#endif
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 477841d29fce..cb0ba7032609 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page)
81 */ 81 */
82int page_referenced(struct page *, int is_locked, 82int page_referenced(struct page *, int is_locked,
83 struct mem_cgroup *cnt, unsigned long *vm_flags); 83 struct mem_cgroup *cnt, unsigned long *vm_flags);
84int try_to_unmap(struct page *, int ignore_refs); 84enum ttu_flags {
85 TTU_UNMAP = 0, /* unmap mode */
86 TTU_MIGRATION = 1, /* migration mode */
87 TTU_MUNLOCK = 2, /* munlock mode */
88 TTU_ACTION_MASK = 0xff,
89
90 TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
91 TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
92 TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
93};
94#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
95
96int try_to_unmap(struct page *, enum ttu_flags flags);
85 97
86/* 98/*
87 * Called from mm/filemap_xip.c to unmap empty zero page 99 * Called from mm/filemap_xip.c to unmap empty zero page
@@ -108,6 +120,13 @@ int page_mkclean(struct page *);
108 */ 120 */
109int try_to_munlock(struct page *); 121int try_to_munlock(struct page *);
110 122
123/*
124 * Called by memory-failure.c to kill processes.
125 */
126struct anon_vma *page_lock_anon_vma(struct page *page);
127void page_unlock_anon_vma(struct anon_vma *anon_vma);
128int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
129
111#else /* !CONFIG_MMU */ 130#else /* !CONFIG_MMU */
112 131
113#define anon_vma_init() do {} while (0) 132#define anon_vma_init() do {} while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 848d1f20086e..75e6e60bf583 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -309,7 +309,7 @@ extern void softlockup_tick(void);
309extern void touch_softlockup_watchdog(void); 309extern void touch_softlockup_watchdog(void);
310extern void touch_all_softlockup_watchdogs(void); 310extern void touch_all_softlockup_watchdogs(void);
311extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, 311extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
312 struct file *filp, void __user *buffer, 312 void __user *buffer,
313 size_t *lenp, loff_t *ppos); 313 size_t *lenp, loff_t *ppos);
314extern unsigned int softlockup_panic; 314extern unsigned int softlockup_panic;
315extern int softlockup_thresh; 315extern int softlockup_thresh;
@@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count;
331extern unsigned long sysctl_hung_task_timeout_secs; 331extern unsigned long sysctl_hung_task_timeout_secs;
332extern unsigned long sysctl_hung_task_warnings; 332extern unsigned long sysctl_hung_task_warnings;
333extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, 333extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
334 struct file *filp, void __user *buffer, 334 void __user *buffer,
335 size_t *lenp, loff_t *ppos); 335 size_t *lenp, loff_t *ppos);
336#endif 336#endif
337 337
@@ -1271,7 +1271,6 @@ struct task_struct {
1271 struct mm_struct *mm, *active_mm; 1271 struct mm_struct *mm, *active_mm;
1272 1272
1273/* task state */ 1273/* task state */
1274 struct linux_binfmt *binfmt;
1275 int exit_state; 1274 int exit_state;
1276 int exit_code, exit_signal; 1275 int exit_code, exit_signal;
1277 int pdeath_signal; /* The signal sent when the parent dies */ 1276 int pdeath_signal; /* The signal sent when the parent dies */
@@ -1735,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p);
1735#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ 1734#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
1736#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ 1735#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
1737#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ 1736#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
1737#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
1738#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ 1738#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
1739#define PF_DUMPCORE 0x00000200 /* dumped core */ 1739#define PF_DUMPCORE 0x00000200 /* dumped core */
1740#define PF_SIGNALED 0x00000400 /* killed by a signal */ 1740#define PF_SIGNALED 0x00000400 /* killed by a signal */
@@ -1754,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p);
1754#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ 1754#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
1755#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ 1755#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
1756#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ 1756#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
1757#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1757#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ 1758#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
1758#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ 1759#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1759#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ 1760#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
@@ -1906,7 +1907,7 @@ extern unsigned int sysctl_sched_time_avg;
1906extern unsigned int sysctl_timer_migration; 1907extern unsigned int sysctl_timer_migration;
1907 1908
1908int sched_nr_latency_handler(struct ctl_table *table, int write, 1909int sched_nr_latency_handler(struct ctl_table *table, int write,
1909 struct file *file, void __user *buffer, size_t *length, 1910 void __user *buffer, size_t *length,
1910 loff_t *ppos); 1911 loff_t *ppos);
1911#endif 1912#endif
1912#ifdef CONFIG_SCHED_DEBUG 1913#ifdef CONFIG_SCHED_DEBUG
@@ -1924,7 +1925,7 @@ extern unsigned int sysctl_sched_rt_period;
1924extern int sysctl_sched_rt_runtime; 1925extern int sysctl_sched_rt_runtime;
1925 1926
1926int sched_rt_handler(struct ctl_table *table, int write, 1927int sched_rt_handler(struct ctl_table *table, int write,
1927 struct file *filp, void __user *buffer, size_t *lenp, 1928 void __user *buffer, size_t *lenp,
1928 loff_t *ppos); 1929 loff_t *ppos);
1929 1930
1930extern unsigned int sysctl_sched_compat_yield; 1931extern unsigned int sysctl_sched_compat_yield;
@@ -2059,6 +2060,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv);
2059extern int kill_pid(struct pid *pid, int sig, int priv); 2060extern int kill_pid(struct pid *pid, int sig, int priv);
2060extern int kill_proc_info(int, struct siginfo *, pid_t); 2061extern int kill_proc_info(int, struct siginfo *, pid_t);
2061extern int do_notify_parent(struct task_struct *, int); 2062extern int do_notify_parent(struct task_struct *, int);
2063extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
2062extern void force_sig(int, struct task_struct *); 2064extern void force_sig(int, struct task_struct *);
2063extern void force_sig_specific(int, struct task_struct *); 2065extern void force_sig_specific(int, struct task_struct *);
2064extern int send_sig(int, struct task_struct *, int); 2066extern int send_sig(int, struct task_struct *, int);
@@ -2336,7 +2338,10 @@ static inline int signal_pending(struct task_struct *p)
2336 return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); 2338 return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
2337} 2339}
2338 2340
2339extern int __fatal_signal_pending(struct task_struct *p); 2341static inline int __fatal_signal_pending(struct task_struct *p)
2342{
2343 return unlikely(sigismember(&p->pending.signal, SIGKILL));
2344}
2340 2345
2341static inline int fatal_signal_pending(struct task_struct *p) 2346static inline int fatal_signal_pending(struct task_struct *p)
2342{ 2347{
diff --git a/include/linux/security.h b/include/linux/security.h
index d050b66ab9ef..239e40d0450b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
133 return PAGE_ALIGN(mmap_min_addr); 133 return PAGE_ALIGN(mmap_min_addr);
134 return hint; 134 return hint;
135} 135}
136extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, 136extern int mmap_min_addr_handler(struct ctl_table *table, int write,
137 void __user *buffer, size_t *lenp, loff_t *ppos); 137 void __user *buffer, size_t *lenp, loff_t *ppos);
138 138
139#ifdef CONFIG_SECURITY 139#ifdef CONFIG_SECURITY
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 0c6a86b79596..8366d8f12e53 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -35,6 +35,44 @@ struct seq_operations {
35 35
36#define SEQ_SKIP 1 36#define SEQ_SKIP 1
37 37
38/**
39 * seq_get_buf - get buffer to write arbitrary data to
40 * @m: the seq_file handle
41 * @bufp: the beginning of the buffer is stored here
42 *
43 * Return the number of bytes available in the buffer, or zero if
44 * there's no space.
45 */
46static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
47{
48 BUG_ON(m->count > m->size);
49 if (m->count < m->size)
50 *bufp = m->buf + m->count;
51 else
52 *bufp = NULL;
53
54 return m->size - m->count;
55}
56
57/**
58 * seq_commit - commit data to the buffer
59 * @m: the seq_file handle
60 * @num: the number of bytes to commit
61 *
62 * Commit @num bytes of data written to a buffer previously acquired
63 * by seq_buf_get. To signal an error condition, or that the data
64 * didn't fit in the available space, pass a negative @num value.
65 */
66static inline void seq_commit(struct seq_file *m, int num)
67{
68 if (num < 0) {
69 m->count = m->size;
70 } else {
71 BUG_ON(m->count + num > m->size);
72 m->count += num;
73 }
74}
75
38char *mangle_path(char *s, char *p, char *esc); 76char *mangle_path(char *s, char *p, char *esc);
39int seq_open(struct file *, const struct seq_operations *); 77int seq_open(struct file *, const struct seq_operations *);
40ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); 78ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/signal.h b/include/linux/signal.h
index c7552836bd95..ab9272cc270c 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig)
233} 233}
234 234
235extern int next_signal(struct sigpending *pending, sigset_t *mask); 235extern int next_signal(struct sigpending *pending, sigset_t *mask);
236extern int do_send_sig_info(int sig, struct siginfo *info,
237 struct task_struct *p, bool group);
236extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); 238extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
237extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); 239extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
238extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, 240extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6c990e658f4e..4ec90019c1a4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -34,16 +34,38 @@ static inline int current_is_kswapd(void)
34 * the type/offset into the pte as 5/27 as well. 34 * the type/offset into the pte as 5/27 as well.
35 */ 35 */
36#define MAX_SWAPFILES_SHIFT 5 36#define MAX_SWAPFILES_SHIFT 5
37#ifndef CONFIG_MIGRATION 37
38#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) 38/*
39 * Use some of the swap files numbers for other purposes. This
40 * is a convenient way to hook into the VM to trigger special
41 * actions on faults.
42 */
43
44/*
45 * NUMA node memory migration support
46 */
47#ifdef CONFIG_MIGRATION
48#define SWP_MIGRATION_NUM 2
49#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM)
50#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
39#else 51#else
40/* Use last two entries for page migration swap entries */ 52#define SWP_MIGRATION_NUM 0
41#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2)
42#define SWP_MIGRATION_READ MAX_SWAPFILES
43#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1)
44#endif 53#endif
45 54
46/* 55/*
56 * Handling of hardware poisoned pages with memory corruption.
57 */
58#ifdef CONFIG_MEMORY_FAILURE
59#define SWP_HWPOISON_NUM 1
60#define SWP_HWPOISON MAX_SWAPFILES
61#else
62#define SWP_HWPOISON_NUM 0
63#endif
64
65#define MAX_SWAPFILES \
66 ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
67
68/*
47 * Magic header for a swap area. The first part of the union is 69 * Magic header for a swap area. The first part of the union is
48 * what the swap magic looks like for the old (limited to 128MB) 70 * what the swap magic looks like for the old (limited to 128MB)
49 * swap area format, the second part of the union adds - in the 71 * swap area format, the second part of the union adds - in the
@@ -217,6 +239,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
217extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, 239extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
218 gfp_t gfp_mask, bool noswap, 240 gfp_t gfp_mask, bool noswap,
219 unsigned int swappiness); 241 unsigned int swappiness);
242extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
243 gfp_t gfp_mask, bool noswap,
244 unsigned int swappiness,
245 struct zone *zone,
246 int nid);
220extern int __isolate_lru_page(struct page *page, int mode, int file); 247extern int __isolate_lru_page(struct page *page, int mode, int file);
221extern unsigned long shrink_all_memory(unsigned long nr_pages); 248extern unsigned long shrink_all_memory(unsigned long nr_pages);
222extern int vm_swappiness; 249extern int vm_swappiness;
@@ -240,7 +267,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma);
240extern void scan_mapping_unevictable_pages(struct address_space *); 267extern void scan_mapping_unevictable_pages(struct address_space *);
241 268
242extern unsigned long scan_unevictable_pages; 269extern unsigned long scan_unevictable_pages;
243extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, 270extern int scan_unevictable_handler(struct ctl_table *, int,
244 void __user *, size_t *, loff_t *); 271 void __user *, size_t *, loff_t *);
245extern int scan_unevictable_register_node(struct node *node); 272extern int scan_unevictable_register_node(struct node *node);
246extern void scan_unevictable_unregister_node(struct node *node); 273extern void scan_unevictable_unregister_node(struct node *node);
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 6ec39ab27b4b..cd42e30b7c6e 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry)
131 131
132#endif 132#endif
133 133
134#ifdef CONFIG_MEMORY_FAILURE
135/*
136 * Support for hardware poisoned pages
137 */
138static inline swp_entry_t make_hwpoison_entry(struct page *page)
139{
140 BUG_ON(!PageLocked(page));
141 return swp_entry(SWP_HWPOISON, page_to_pfn(page));
142}
143
144static inline int is_hwpoison_entry(swp_entry_t entry)
145{
146 return swp_type(entry) == SWP_HWPOISON;
147}
148#else
149
150static inline swp_entry_t make_hwpoison_entry(struct page *page)
151{
152 return swp_entry(0, 0);
153}
154
155static inline int is_hwpoison_entry(swp_entry_t swp)
156{
157 return 0;
158}
159#endif
160
161#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
162static inline int non_swap_entry(swp_entry_t entry)
163{
164 return swp_type(entry) >= MAX_SWAPFILES;
165}
166#else
167static inline int non_swap_entry(swp_entry_t entry)
168{
169 return 0;
170}
171#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index e76d3b22a466..1e4743ee6831 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -29,7 +29,6 @@
29#include <linux/types.h> 29#include <linux/types.h>
30#include <linux/compiler.h> 30#include <linux/compiler.h>
31 31
32struct file;
33struct completion; 32struct completion;
34 33
35#define CTL_MAXNAME 10 /* how many path components do we allow in a 34#define CTL_MAXNAME 10 /* how many path components do we allow in a
@@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table,
977 void __user *oldval, size_t __user *oldlenp, 976 void __user *oldval, size_t __user *oldlenp,
978 void __user *newval, size_t newlen); 977 void __user *newval, size_t newlen);
979 978
980typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, 979typedef int proc_handler (struct ctl_table *ctl, int write,
981 void __user *buffer, size_t *lenp, loff_t *ppos); 980 void __user *buffer, size_t *lenp, loff_t *ppos);
982 981
983extern int proc_dostring(struct ctl_table *, int, struct file *, 982extern int proc_dostring(struct ctl_table *, int,
984 void __user *, size_t *, loff_t *); 983 void __user *, size_t *, loff_t *);
985extern int proc_dointvec(struct ctl_table *, int, struct file *, 984extern int proc_dointvec(struct ctl_table *, int,
986 void __user *, size_t *, loff_t *); 985 void __user *, size_t *, loff_t *);
987extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, 986extern int proc_dointvec_minmax(struct ctl_table *, int,
988 void __user *, size_t *, loff_t *); 987 void __user *, size_t *, loff_t *);
989extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, 988extern int proc_dointvec_jiffies(struct ctl_table *, int,
990 void __user *, size_t *, loff_t *); 989 void __user *, size_t *, loff_t *);
991extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *, 990extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
992 void __user *, size_t *, loff_t *); 991 void __user *, size_t *, loff_t *);
993extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *, 992extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
994 void __user *, size_t *, loff_t *); 993 void __user *, size_t *, loff_t *);
995extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *, 994extern int proc_doulongvec_minmax(struct ctl_table *, int,
996 void __user *, size_t *, loff_t *); 995 void __user *, size_t *, loff_t *);
997extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, 996extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
998 struct file *, void __user *, size_t *, loff_t *); 997 void __user *, size_t *, loff_t *);
999 998
1000extern int do_sysctl (int __user *name, int nlen, 999extern int do_sysctl (int __user *name, int nlen,
1001 void __user *oldval, size_t __user *oldlenp, 1000 void __user *oldval, size_t __user *oldlenp,
diff --git a/include/linux/time.h b/include/linux/time.h
index 56787c093345..fe04e5ef6a59 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond);
155struct tms; 155struct tms;
156extern void do_sys_times(struct tms *); 156extern void do_sys_times(struct tms *);
157 157
158/*
159 * Similar to the struct tm in userspace <time.h>, but it needs to be here so
160 * that the kernel source is self contained.
161 */
162struct tm {
163 /*
164 * the number of seconds after the minute, normally in the range
165 * 0 to 59, but can be up to 60 to allow for leap seconds
166 */
167 int tm_sec;
168 /* the number of minutes after the hour, in the range 0 to 59*/
169 int tm_min;
170 /* the number of hours past midnight, in the range 0 to 23 */
171 int tm_hour;
172 /* the day of the month, in the range 1 to 31 */
173 int tm_mday;
174 /* the number of months since January, in the range 0 to 11 */
175 int tm_mon;
176 /* the number of years since 1900 */
177 long tm_year;
178 /* the number of days since Sunday, in the range 0 to 6 */
179 int tm_wday;
180 /* the number of days since January 1, in the range 0 to 365 */
181 int tm_yday;
182};
183
184void time_to_tm(time_t totalsecs, int offset, struct tm *result);
185
158/** 186/**
159 * timespec_to_ns - Convert timespec to nanoseconds 187 * timespec_to_ns - Convert timespec to nanoseconds
160 * @ts: pointer to the timespec variable to be converted 188 * @ts: pointer to the timespec variable to be converted
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 17ba82efa483..1eb44a924e56 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Tracing hooks 2 * Tracing hooks
3 * 3 *
4 * Copyright (C) 2008 Red Hat, Inc. All rights reserved. 4 * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
5 * 5 *
6 * This copyrighted material is made available to anyone wishing to use, 6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions 7 * modify, copy, or redistribute it subject to the terms and conditions
@@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task,
463 463
464/** 464/**
465 * tracehook_notify_jctl - report about job control stop/continue 465 * tracehook_notify_jctl - report about job control stop/continue
466 * @notify: nonzero if this is the last thread in the group to stop 466 * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED
467 * @why: %CLD_STOPPED or %CLD_CONTINUED 467 * @why: %CLD_STOPPED or %CLD_CONTINUED
468 * 468 *
469 * This is called when we might call do_notify_parent_cldstop(). 469 * This is called when we might call do_notify_parent_cldstop().
470 * It's called when about to stop for job control; we are already in
471 * %TASK_STOPPED state, about to call schedule(). It's also called when
472 * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made.
473 * 470 *
474 * Return nonzero to generate a %SIGCHLD with @why, which is 471 * @notify is zero if we would not ordinarily send a %SIGCHLD,
475 * normal if @notify is nonzero. 472 * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
476 * 473 *
477 * Called with no locks held. 474 * @why is %CLD_STOPPED when about to stop for job control;
475 * we are already in %TASK_STOPPED state, about to call schedule().
476 * It might also be that we have just exited (check %PF_EXITING),
477 * but need to report that a group-wide stop is complete.
478 *
479 * @why is %CLD_CONTINUED when waking up after job control stop and
480 * ready to make a delayed @notify report.
481 *
482 * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
483 *
484 * Called with the siglock held.
478 */ 485 */
479static inline int tracehook_notify_jctl(int notify, int why) 486static inline int tracehook_notify_jctl(int notify, int why)
480{ 487{
481 return notify || (current->ptrace & PT_PTRACED); 488 return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
489}
490
491/**
492 * tracehook_finish_jctl - report about return from job control stop
493 *
494 * This is called by do_signal_stop() after wakeup.
495 */
496static inline void tracehook_finish_jctl(void)
497{
482} 498}
483 499
484#define DEATH_REAP -1 500#define DEATH_REAP -1
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63a3f7a80580..660a9de96f81 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -4,7 +4,7 @@
4/* 4/*
5 * Kernel Tracepoint API. 5 * Kernel Tracepoint API.
6 * 6 *
7 * See Documentation/tracepoint.txt. 7 * See Documentation/trace/tracepoints.txt.
8 * 8 *
9 * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> 9 * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
10 * 10 *
diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h
index 46dd12c5709e..9356b24223ac 100644
--- a/include/linux/unaligned/be_byteshift.h
+++ b/include/linux/unaligned/be_byteshift.h
@@ -1,7 +1,7 @@
1#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H 1#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
2#define _LINUX_UNALIGNED_BE_BYTESHIFT_H 2#define _LINUX_UNALIGNED_BE_BYTESHIFT_H
3 3
4#include <linux/kernel.h> 4#include <linux/types.h>
5 5
6static inline u16 __get_unaligned_be16(const u8 *p) 6static inline u16 __get_unaligned_be16(const u8 *p)
7{ 7{
diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h
index 59777e951baf..be376fb79b64 100644
--- a/include/linux/unaligned/le_byteshift.h
+++ b/include/linux/unaligned/le_byteshift.h
@@ -1,7 +1,7 @@
1#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H 1#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
2#define _LINUX_UNALIGNED_LE_BYTESHIFT_H 2#define _LINUX_UNALIGNED_LE_BYTESHIFT_H
3 3
4#include <linux/kernel.h> 4#include <linux/types.h>
5 5
6static inline u16 __get_unaligned_le16(const u8 *p) 6static inline u16 __get_unaligned_le16(const u8 *p)
7{ 7{
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 923f9040ea20..2dfaa293ae8c 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * vgaarb.c 2 * The VGA aribiter manages VGA space routing and VGA resource decode to
3 * allow multiple VGA devices to be used in a system in a safe way.
3 * 4 *
4 * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> 5 * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
5 * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com> 6 * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 75cf58666ff9..66ebddcff664 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -110,21 +110,20 @@ extern int laptop_mode;
110extern unsigned long determine_dirtyable_memory(void); 110extern unsigned long determine_dirtyable_memory(void);
111 111
112extern int dirty_background_ratio_handler(struct ctl_table *table, int write, 112extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
113 struct file *filp, void __user *buffer, size_t *lenp, 113 void __user *buffer, size_t *lenp,
114 loff_t *ppos); 114 loff_t *ppos);
115extern int dirty_background_bytes_handler(struct ctl_table *table, int write, 115extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
116 struct file *filp, void __user *buffer, size_t *lenp, 116 void __user *buffer, size_t *lenp,
117 loff_t *ppos); 117 loff_t *ppos);
118extern int dirty_ratio_handler(struct ctl_table *table, int write, 118extern int dirty_ratio_handler(struct ctl_table *table, int write,
119 struct file *filp, void __user *buffer, size_t *lenp, 119 void __user *buffer, size_t *lenp,
120 loff_t *ppos); 120 loff_t *ppos);
121extern int dirty_bytes_handler(struct ctl_table *table, int write, 121extern int dirty_bytes_handler(struct ctl_table *table, int write,
122 struct file *filp, void __user *buffer, size_t *lenp, 122 void __user *buffer, size_t *lenp,
123 loff_t *ppos); 123 loff_t *ppos);
124 124
125struct ctl_table; 125struct ctl_table;
126struct file; 126int dirty_writeback_centisecs_handler(struct ctl_table *, int,
127int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
128 void __user *, size_t *, loff_t *); 127 void __user *, size_t *, loff_t *);
129 128
130void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, 129void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
diff --git a/include/net/ip.h b/include/net/ip.h
index 72c36926c26d..5b26a0bd178e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -399,7 +399,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
399 * fed into the routing cache should use these handlers. 399 * fed into the routing cache should use these handlers.
400 */ 400 */
401int ipv4_doint_and_flush(ctl_table *ctl, int write, 401int ipv4_doint_and_flush(ctl_table *ctl, int write,
402 struct file* filp, void __user *buffer, 402 void __user *buffer,
403 size_t *lenp, loff_t *ppos); 403 size_t *lenp, loff_t *ppos);
404int ipv4_doint_and_flush_strategy(ctl_table *table, 404int ipv4_doint_and_flush_strategy(ctl_table *table,
405 void __user *oldval, size_t __user *oldlenp, 405 void __user *oldval, size_t __user *oldlenp,
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 1459ed3e2697..f76f22d05721 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -55,7 +55,6 @@ enum {
55#include <net/neighbour.h> 55#include <net/neighbour.h>
56 56
57struct ctl_table; 57struct ctl_table;
58struct file;
59struct inet6_dev; 58struct inet6_dev;
60struct net_device; 59struct net_device;
61struct net_proto_family; 60struct net_proto_family;
@@ -139,7 +138,6 @@ extern int igmp6_event_report(struct sk_buff *skb);
139#ifdef CONFIG_SYSCTL 138#ifdef CONFIG_SYSCTL
140extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, 139extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
141 int write, 140 int write,
142 struct file * filp,
143 void __user *buffer, 141 void __user *buffer,
144 size_t *lenp, 142 size_t *lenp,
145 loff_t *ppos); 143 loff_t *ppos);
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 40eab7314aeb..7d3704750efc 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table)
27} 27}
28 28
29#ifdef CONFIG_PROC_SYSCTL 29#ifdef CONFIG_PROC_SYSCTL
30static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, 30static int proc_ipc_dointvec(ctl_table *table, int write,
31 void __user *buffer, size_t *lenp, loff_t *ppos) 31 void __user *buffer, size_t *lenp, loff_t *ppos)
32{ 32{
33 struct ctl_table ipc_table; 33 struct ctl_table ipc_table;
34 memcpy(&ipc_table, table, sizeof(ipc_table)); 34 memcpy(&ipc_table, table, sizeof(ipc_table));
35 ipc_table.data = get_ipc(table); 35 ipc_table.data = get_ipc(table);
36 36
37 return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); 37 return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
38} 38}
39 39
40static int proc_ipc_callback_dointvec(ctl_table *table, int write, 40static int proc_ipc_callback_dointvec(ctl_table *table, int write,
41 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) 41 void __user *buffer, size_t *lenp, loff_t *ppos)
42{ 42{
43 struct ctl_table ipc_table; 43 struct ctl_table ipc_table;
44 size_t lenp_bef = *lenp; 44 size_t lenp_bef = *lenp;
@@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
47 memcpy(&ipc_table, table, sizeof(ipc_table)); 47 memcpy(&ipc_table, table, sizeof(ipc_table));
48 ipc_table.data = get_ipc(table); 48 ipc_table.data = get_ipc(table);
49 49
50 rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); 50 rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
51 51
52 if (write && !rc && lenp_bef == *lenp) 52 if (write && !rc && lenp_bef == *lenp)
53 /* 53 /*
@@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
61} 61}
62 62
63static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, 63static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
64 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) 64 void __user *buffer, size_t *lenp, loff_t *ppos)
65{ 65{
66 struct ctl_table ipc_table; 66 struct ctl_table ipc_table;
67 memcpy(&ipc_table, table, sizeof(ipc_table)); 67 memcpy(&ipc_table, table, sizeof(ipc_table));
68 ipc_table.data = get_ipc(table); 68 ipc_table.data = get_ipc(table);
69 69
70 return proc_doulongvec_minmax(&ipc_table, write, filp, buffer, 70 return proc_doulongvec_minmax(&ipc_table, write, buffer,
71 lenp, ppos); 71 lenp, ppos);
72} 72}
73 73
@@ -95,7 +95,7 @@ static void ipc_auto_callback(int val)
95} 95}
96 96
97static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, 97static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
98 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) 98 void __user *buffer, size_t *lenp, loff_t *ppos)
99{ 99{
100 struct ctl_table ipc_table; 100 struct ctl_table ipc_table;
101 size_t lenp_bef = *lenp; 101 size_t lenp_bef = *lenp;
@@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
106 ipc_table.data = get_ipc(table); 106 ipc_table.data = get_ipc(table);
107 oldval = *((int *)(ipc_table.data)); 107 oldval = *((int *)(ipc_table.data));
108 108
109 rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); 109 rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
110 110
111 if (write && !rc && lenp_bef == *lenp) { 111 if (write && !rc && lenp_bef == *lenp) {
112 int newval = *((int *)(ipc_table.data)); 112 int newval = *((int *)(ipc_table.data));
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 24ae46dfe45d..8a058711fc10 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table)
31 return which; 31 return which;
32} 32}
33 33
34static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp, 34static int proc_mq_dointvec(ctl_table *table, int write,
35 void __user *buffer, size_t *lenp, loff_t *ppos) 35 void __user *buffer, size_t *lenp, loff_t *ppos)
36{ 36{
37 struct ctl_table mq_table; 37 struct ctl_table mq_table;
38 memcpy(&mq_table, table, sizeof(mq_table)); 38 memcpy(&mq_table, table, sizeof(mq_table));
39 mq_table.data = get_mq(table); 39 mq_table.data = get_mq(table);
40 40
41 return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos); 41 return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
42} 42}
43 43
44static int proc_mq_dointvec_minmax(ctl_table *table, int write, 44static int proc_mq_dointvec_minmax(ctl_table *table, int write,
45 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) 45 void __user *buffer, size_t *lenp, loff_t *ppos)
46{ 46{
47 struct ctl_table mq_table; 47 struct ctl_table mq_table;
48 memcpy(&mq_table, table, sizeof(mq_table)); 48 memcpy(&mq_table, table, sizeof(mq_table));
49 mq_table.data = get_mq(table); 49 mq_table.data = get_mq(table);
50 50
51 return proc_dointvec_minmax(&mq_table, write, filp, buffer, 51 return proc_dointvec_minmax(&mq_table, write, buffer,
52 lenp, ppos); 52 lenp, ppos);
53} 53}
54#else 54#else
diff --git a/kernel/Makefile b/kernel/Makefile
index 187c89b4783d..b8d4cd8ac0b9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -58,7 +58,6 @@ obj-$(CONFIG_KEXEC) += kexec.o
58obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o 58obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
59obj-$(CONFIG_COMPAT) += compat.o 59obj-$(CONFIG_COMPAT) += compat.o
60obj-$(CONFIG_CGROUPS) += cgroup.o 60obj-$(CONFIG_CGROUPS) += cgroup.o
61obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
62obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o 61obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
63obj-$(CONFIG_CPUSETS) += cpuset.o 62obj-$(CONFIG_CPUSETS) += cpuset.o
64obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o 63obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
diff --git a/kernel/audit.c b/kernel/audit.c
index defc2e6f1e3b..5feed232be9d 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -855,18 +855,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
855 break; 855 break;
856 } 856 }
857 case AUDIT_SIGNAL_INFO: 857 case AUDIT_SIGNAL_INFO:
858 err = security_secid_to_secctx(audit_sig_sid, &ctx, &len); 858 len = 0;
859 if (err) 859 if (audit_sig_sid) {
860 return err; 860 err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
861 if (err)
862 return err;
863 }
861 sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); 864 sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
862 if (!sig_data) { 865 if (!sig_data) {
863 security_release_secctx(ctx, len); 866 if (audit_sig_sid)
867 security_release_secctx(ctx, len);
864 return -ENOMEM; 868 return -ENOMEM;
865 } 869 }
866 sig_data->uid = audit_sig_uid; 870 sig_data->uid = audit_sig_uid;
867 sig_data->pid = audit_sig_pid; 871 sig_data->pid = audit_sig_pid;
868 memcpy(sig_data->ctx, ctx, len); 872 if (audit_sig_sid) {
869 security_release_secctx(ctx, len); 873 memcpy(sig_data->ctx, ctx, len);
874 security_release_secctx(ctx, len);
875 }
870 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, 876 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO,
871 0, 0, sig_data, sizeof(*sig_data) + len); 877 0, 0, sig_data, sizeof(*sig_data) + len);
872 kfree(sig_data); 878 kfree(sig_data);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 0e96dbc60ea9..cc7e87936cbc 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -45,8 +45,8 @@
45 45
46struct audit_watch { 46struct audit_watch {
47 atomic_t count; /* reference count */ 47 atomic_t count; /* reference count */
48 char *path; /* insertion path */
49 dev_t dev; /* associated superblock device */ 48 dev_t dev; /* associated superblock device */
49 char *path; /* insertion path */
50 unsigned long ino; /* associated inode number */ 50 unsigned long ino; /* associated inode number */
51 struct audit_parent *parent; /* associated parent */ 51 struct audit_parent *parent; /* associated parent */
52 struct list_head wlist; /* entry in parent->watches list */ 52 struct list_head wlist; /* entry in parent->watches list */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 68d3c6a0ecd6..267e484f0198 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -168,12 +168,12 @@ struct audit_context {
168 int in_syscall; /* 1 if task is in a syscall */ 168 int in_syscall; /* 1 if task is in a syscall */
169 enum audit_state state, current_state; 169 enum audit_state state, current_state;
170 unsigned int serial; /* serial number for record */ 170 unsigned int serial; /* serial number for record */
171 struct timespec ctime; /* time of syscall entry */
172 int major; /* syscall number */ 171 int major; /* syscall number */
172 struct timespec ctime; /* time of syscall entry */
173 unsigned long argv[4]; /* syscall arguments */ 173 unsigned long argv[4]; /* syscall arguments */
174 int return_valid; /* return code is valid */
175 long return_code;/* syscall return code */ 174 long return_code;/* syscall return code */
176 u64 prio; 175 u64 prio;
176 int return_valid; /* return code is valid */
177 int name_count; 177 int name_count;
178 struct audit_names names[AUDIT_NAMES]; 178 struct audit_names names[AUDIT_NAMES];
179 char * filterkey; /* key for rule that triggered record */ 179 char * filterkey; /* key for rule that triggered record */
@@ -198,8 +198,8 @@ struct audit_context {
198 char target_comm[TASK_COMM_LEN]; 198 char target_comm[TASK_COMM_LEN];
199 199
200 struct audit_tree_refs *trees, *first_trees; 200 struct audit_tree_refs *trees, *first_trees;
201 int tree_count;
202 struct list_head killed_trees; 201 struct list_head killed_trees;
202 int tree_count;
203 203
204 int type; 204 int type;
205 union { 205 union {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cd83d9933b6b..7ccba4bc5e3b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -23,6 +23,7 @@
23 */ 23 */
24 24
25#include <linux/cgroup.h> 25#include <linux/cgroup.h>
26#include <linux/ctype.h>
26#include <linux/errno.h> 27#include <linux/errno.h>
27#include <linux/fs.h> 28#include <linux/fs.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
@@ -48,6 +49,8 @@
48#include <linux/namei.h> 49#include <linux/namei.h>
49#include <linux/smp_lock.h> 50#include <linux/smp_lock.h>
50#include <linux/pid_namespace.h> 51#include <linux/pid_namespace.h>
52#include <linux/idr.h>
53#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
51 54
52#include <asm/atomic.h> 55#include <asm/atomic.h>
53 56
@@ -60,6 +63,8 @@ static struct cgroup_subsys *subsys[] = {
60#include <linux/cgroup_subsys.h> 63#include <linux/cgroup_subsys.h>
61}; 64};
62 65
66#define MAX_CGROUP_ROOT_NAMELEN 64
67
63/* 68/*
64 * A cgroupfs_root represents the root of a cgroup hierarchy, 69 * A cgroupfs_root represents the root of a cgroup hierarchy,
65 * and may be associated with a superblock to form an active 70 * and may be associated with a superblock to form an active
@@ -74,6 +79,9 @@ struct cgroupfs_root {
74 */ 79 */
75 unsigned long subsys_bits; 80 unsigned long subsys_bits;
76 81
82 /* Unique id for this hierarchy. */
83 int hierarchy_id;
84
77 /* The bitmask of subsystems currently attached to this hierarchy */ 85 /* The bitmask of subsystems currently attached to this hierarchy */
78 unsigned long actual_subsys_bits; 86 unsigned long actual_subsys_bits;
79 87
@@ -94,6 +102,9 @@ struct cgroupfs_root {
94 102
95 /* The path to use for release notifications. */ 103 /* The path to use for release notifications. */
96 char release_agent_path[PATH_MAX]; 104 char release_agent_path[PATH_MAX];
105
106 /* The name for this hierarchy - may be empty */
107 char name[MAX_CGROUP_ROOT_NAMELEN];
97}; 108};
98 109
99/* 110/*
@@ -141,6 +152,10 @@ struct css_id {
141static LIST_HEAD(roots); 152static LIST_HEAD(roots);
142static int root_count; 153static int root_count;
143 154
155static DEFINE_IDA(hierarchy_ida);
156static int next_hierarchy_id;
157static DEFINE_SPINLOCK(hierarchy_id_lock);
158
144/* dummytop is a shorthand for the dummy hierarchy's top cgroup */ 159/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
145#define dummytop (&rootnode.top_cgroup) 160#define dummytop (&rootnode.top_cgroup)
146 161
@@ -201,6 +216,7 @@ struct cg_cgroup_link {
201 * cgroup, anchored on cgroup->css_sets 216 * cgroup, anchored on cgroup->css_sets
202 */ 217 */
203 struct list_head cgrp_link_list; 218 struct list_head cgrp_link_list;
219 struct cgroup *cgrp;
204 /* 220 /*
205 * List running through cg_cgroup_links pointing at a 221 * List running through cg_cgroup_links pointing at a
206 * single css_set object, anchored on css_set->cg_links 222 * single css_set object, anchored on css_set->cg_links
@@ -227,8 +243,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
227static DEFINE_RWLOCK(css_set_lock); 243static DEFINE_RWLOCK(css_set_lock);
228static int css_set_count; 244static int css_set_count;
229 245
230/* hash table for cgroup groups. This improves the performance to 246/*
231 * find an existing css_set */ 247 * hash table for cgroup groups. This improves the performance to find
248 * an existing css_set. This hash doesn't (currently) take into
249 * account cgroups in empty hierarchies.
250 */
232#define CSS_SET_HASH_BITS 7 251#define CSS_SET_HASH_BITS 7
233#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) 252#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
234static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; 253static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
@@ -248,48 +267,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
248 return &css_set_table[index]; 267 return &css_set_table[index];
249} 268}
250 269
270static void free_css_set_rcu(struct rcu_head *obj)
271{
272 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
273 kfree(cg);
274}
275
251/* We don't maintain the lists running through each css_set to its 276/* We don't maintain the lists running through each css_set to its
252 * task until after the first call to cgroup_iter_start(). This 277 * task until after the first call to cgroup_iter_start(). This
253 * reduces the fork()/exit() overhead for people who have cgroups 278 * reduces the fork()/exit() overhead for people who have cgroups
254 * compiled into their kernel but not actually in use */ 279 * compiled into their kernel but not actually in use */
255static int use_task_css_set_links __read_mostly; 280static int use_task_css_set_links __read_mostly;
256 281
257/* When we create or destroy a css_set, the operation simply 282static void __put_css_set(struct css_set *cg, int taskexit)
258 * takes/releases a reference count on all the cgroups referenced
259 * by subsystems in this css_set. This can end up multiple-counting
260 * some cgroups, but that's OK - the ref-count is just a
261 * busy/not-busy indicator; ensuring that we only count each cgroup
262 * once would require taking a global lock to ensure that no
263 * subsystems moved between hierarchies while we were doing so.
264 *
265 * Possible TODO: decide at boot time based on the number of
266 * registered subsystems and the number of CPUs or NUMA nodes whether
267 * it's better for performance to ref-count every subsystem, or to
268 * take a global lock and only add one ref count to each hierarchy.
269 */
270
271/*
272 * unlink a css_set from the list and free it
273 */
274static void unlink_css_set(struct css_set *cg)
275{ 283{
276 struct cg_cgroup_link *link; 284 struct cg_cgroup_link *link;
277 struct cg_cgroup_link *saved_link; 285 struct cg_cgroup_link *saved_link;
278
279 hlist_del(&cg->hlist);
280 css_set_count--;
281
282 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
283 cg_link_list) {
284 list_del(&link->cg_link_list);
285 list_del(&link->cgrp_link_list);
286 kfree(link);
287 }
288}
289
290static void __put_css_set(struct css_set *cg, int taskexit)
291{
292 int i;
293 /* 286 /*
294 * Ensure that the refcount doesn't hit zero while any readers 287 * Ensure that the refcount doesn't hit zero while any readers
295 * can see it. Similar to atomic_dec_and_lock(), but for an 288 * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -302,21 +295,28 @@ static void __put_css_set(struct css_set *cg, int taskexit)
302 write_unlock(&css_set_lock); 295 write_unlock(&css_set_lock);
303 return; 296 return;
304 } 297 }
305 unlink_css_set(cg);
306 write_unlock(&css_set_lock);
307 298
308 rcu_read_lock(); 299 /* This css_set is dead. unlink it and release cgroup refcounts */
309 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 300 hlist_del(&cg->hlist);
310 struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup); 301 css_set_count--;
302
303 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
304 cg_link_list) {
305 struct cgroup *cgrp = link->cgrp;
306 list_del(&link->cg_link_list);
307 list_del(&link->cgrp_link_list);
311 if (atomic_dec_and_test(&cgrp->count) && 308 if (atomic_dec_and_test(&cgrp->count) &&
312 notify_on_release(cgrp)) { 309 notify_on_release(cgrp)) {
313 if (taskexit) 310 if (taskexit)
314 set_bit(CGRP_RELEASABLE, &cgrp->flags); 311 set_bit(CGRP_RELEASABLE, &cgrp->flags);
315 check_for_release(cgrp); 312 check_for_release(cgrp);
316 } 313 }
314
315 kfree(link);
317 } 316 }
318 rcu_read_unlock(); 317
319 kfree(cg); 318 write_unlock(&css_set_lock);
319 call_rcu(&cg->rcu_head, free_css_set_rcu);
320} 320}
321 321
322/* 322/*
@@ -338,6 +338,78 @@ static inline void put_css_set_taskexit(struct css_set *cg)
338} 338}
339 339
340/* 340/*
341 * compare_css_sets - helper function for find_existing_css_set().
342 * @cg: candidate css_set being tested
343 * @old_cg: existing css_set for a task
344 * @new_cgrp: cgroup that's being entered by the task
345 * @template: desired set of css pointers in css_set (pre-calculated)
346 *
347 * Returns true if "cg" matches "old_cg" except for the hierarchy
348 * which "new_cgrp" belongs to, for which it should match "new_cgrp".
349 */
350static bool compare_css_sets(struct css_set *cg,
351 struct css_set *old_cg,
352 struct cgroup *new_cgrp,
353 struct cgroup_subsys_state *template[])
354{
355 struct list_head *l1, *l2;
356
357 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
358 /* Not all subsystems matched */
359 return false;
360 }
361
362 /*
363 * Compare cgroup pointers in order to distinguish between
364 * different cgroups in heirarchies with no subsystems. We
365 * could get by with just this check alone (and skip the
366 * memcmp above) but on most setups the memcmp check will
367 * avoid the need for this more expensive check on almost all
368 * candidates.
369 */
370
371 l1 = &cg->cg_links;
372 l2 = &old_cg->cg_links;
373 while (1) {
374 struct cg_cgroup_link *cgl1, *cgl2;
375 struct cgroup *cg1, *cg2;
376
377 l1 = l1->next;
378 l2 = l2->next;
379 /* See if we reached the end - both lists are equal length. */
380 if (l1 == &cg->cg_links) {
381 BUG_ON(l2 != &old_cg->cg_links);
382 break;
383 } else {
384 BUG_ON(l2 == &old_cg->cg_links);
385 }
386 /* Locate the cgroups associated with these links. */
387 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
388 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
389 cg1 = cgl1->cgrp;
390 cg2 = cgl2->cgrp;
391 /* Hierarchies should be linked in the same order. */
392 BUG_ON(cg1->root != cg2->root);
393
394 /*
395 * If this hierarchy is the hierarchy of the cgroup
396 * that's changing, then we need to check that this
397 * css_set points to the new cgroup; if it's any other
398 * hierarchy, then this css_set should point to the
399 * same cgroup as the old css_set.
400 */
401 if (cg1->root == new_cgrp->root) {
402 if (cg1 != new_cgrp)
403 return false;
404 } else {
405 if (cg1 != cg2)
406 return false;
407 }
408 }
409 return true;
410}
411
412/*
341 * find_existing_css_set() is a helper for 413 * find_existing_css_set() is a helper for
342 * find_css_set(), and checks to see whether an existing 414 * find_css_set(), and checks to see whether an existing
343 * css_set is suitable. 415 * css_set is suitable.
@@ -378,10 +450,11 @@ static struct css_set *find_existing_css_set(
378 450
379 hhead = css_set_hash(template); 451 hhead = css_set_hash(template);
380 hlist_for_each_entry(cg, node, hhead, hlist) { 452 hlist_for_each_entry(cg, node, hhead, hlist) {
381 if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { 453 if (!compare_css_sets(cg, oldcg, cgrp, template))
382 /* All subsystems matched */ 454 continue;
383 return cg; 455
384 } 456 /* This css_set matches what we need */
457 return cg;
385 } 458 }
386 459
387 /* No existing cgroup group matched */ 460 /* No existing cgroup group matched */
@@ -435,8 +508,14 @@ static void link_css_set(struct list_head *tmp_cg_links,
435 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, 508 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
436 cgrp_link_list); 509 cgrp_link_list);
437 link->cg = cg; 510 link->cg = cg;
511 link->cgrp = cgrp;
512 atomic_inc(&cgrp->count);
438 list_move(&link->cgrp_link_list, &cgrp->css_sets); 513 list_move(&link->cgrp_link_list, &cgrp->css_sets);
439 list_add(&link->cg_link_list, &cg->cg_links); 514 /*
515 * Always add links to the tail of the list so that the list
516 * is sorted by order of hierarchy creation
517 */
518 list_add_tail(&link->cg_link_list, &cg->cg_links);
440} 519}
441 520
442/* 521/*
@@ -451,11 +530,11 @@ static struct css_set *find_css_set(
451{ 530{
452 struct css_set *res; 531 struct css_set *res;
453 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; 532 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
454 int i;
455 533
456 struct list_head tmp_cg_links; 534 struct list_head tmp_cg_links;
457 535
458 struct hlist_head *hhead; 536 struct hlist_head *hhead;
537 struct cg_cgroup_link *link;
459 538
460 /* First see if we already have a cgroup group that matches 539 /* First see if we already have a cgroup group that matches
461 * the desired set */ 540 * the desired set */
@@ -489,20 +568,12 @@ static struct css_set *find_css_set(
489 568
490 write_lock(&css_set_lock); 569 write_lock(&css_set_lock);
491 /* Add reference counts and links from the new css_set. */ 570 /* Add reference counts and links from the new css_set. */
492 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 571 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
493 struct cgroup *cgrp = res->subsys[i]->cgroup; 572 struct cgroup *c = link->cgrp;
494 struct cgroup_subsys *ss = subsys[i]; 573 if (c->root == cgrp->root)
495 atomic_inc(&cgrp->count); 574 c = cgrp;
496 /* 575 link_css_set(&tmp_cg_links, res, c);
497 * We want to add a link once per cgroup, so we
498 * only do it for the first subsystem in each
499 * hierarchy
500 */
501 if (ss->root->subsys_list.next == &ss->sibling)
502 link_css_set(&tmp_cg_links, res, cgrp);
503 } 576 }
504 if (list_empty(&rootnode.subsys_list))
505 link_css_set(&tmp_cg_links, res, dummytop);
506 577
507 BUG_ON(!list_empty(&tmp_cg_links)); 578 BUG_ON(!list_empty(&tmp_cg_links));
508 579
@@ -518,6 +589,41 @@ static struct css_set *find_css_set(
518} 589}
519 590
520/* 591/*
592 * Return the cgroup for "task" from the given hierarchy. Must be
593 * called with cgroup_mutex held.
594 */
595static struct cgroup *task_cgroup_from_root(struct task_struct *task,
596 struct cgroupfs_root *root)
597{
598 struct css_set *css;
599 struct cgroup *res = NULL;
600
601 BUG_ON(!mutex_is_locked(&cgroup_mutex));
602 read_lock(&css_set_lock);
603 /*
604 * No need to lock the task - since we hold cgroup_mutex the
605 * task can't change groups, so the only thing that can happen
606 * is that it exits and its css is set back to init_css_set.
607 */
608 css = task->cgroups;
609 if (css == &init_css_set) {
610 res = &root->top_cgroup;
611 } else {
612 struct cg_cgroup_link *link;
613 list_for_each_entry(link, &css->cg_links, cg_link_list) {
614 struct cgroup *c = link->cgrp;
615 if (c->root == root) {
616 res = c;
617 break;
618 }
619 }
620 }
621 read_unlock(&css_set_lock);
622 BUG_ON(!res);
623 return res;
624}
625
626/*
521 * There is one global cgroup mutex. We also require taking 627 * There is one global cgroup mutex. We also require taking
522 * task_lock() when dereferencing a task's cgroup subsys pointers. 628 * task_lock() when dereferencing a task's cgroup subsys pointers.
523 * See "The task_lock() exception", at the end of this comment. 629 * See "The task_lock() exception", at the end of this comment.
@@ -677,6 +783,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
677 */ 783 */
678 deactivate_super(cgrp->root->sb); 784 deactivate_super(cgrp->root->sb);
679 785
786 /*
787 * if we're getting rid of the cgroup, refcount should ensure
788 * that there are no pidlists left.
789 */
790 BUG_ON(!list_empty(&cgrp->pidlists));
791
680 call_rcu(&cgrp->rcu_head, free_cgroup_rcu); 792 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
681 } 793 }
682 iput(inode); 794 iput(inode);
@@ -841,6 +953,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
841 seq_puts(seq, ",noprefix"); 953 seq_puts(seq, ",noprefix");
842 if (strlen(root->release_agent_path)) 954 if (strlen(root->release_agent_path))
843 seq_printf(seq, ",release_agent=%s", root->release_agent_path); 955 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
956 if (strlen(root->name))
957 seq_printf(seq, ",name=%s", root->name);
844 mutex_unlock(&cgroup_mutex); 958 mutex_unlock(&cgroup_mutex);
845 return 0; 959 return 0;
846} 960}
@@ -849,6 +963,12 @@ struct cgroup_sb_opts {
849 unsigned long subsys_bits; 963 unsigned long subsys_bits;
850 unsigned long flags; 964 unsigned long flags;
851 char *release_agent; 965 char *release_agent;
966 char *name;
967 /* User explicitly requested empty subsystem */
968 bool none;
969
970 struct cgroupfs_root *new_root;
971
852}; 972};
853 973
854/* Convert a hierarchy specifier into a bitmask of subsystems and 974/* Convert a hierarchy specifier into a bitmask of subsystems and
@@ -863,9 +983,7 @@ static int parse_cgroupfs_options(char *data,
863 mask = ~(1UL << cpuset_subsys_id); 983 mask = ~(1UL << cpuset_subsys_id);
864#endif 984#endif
865 985
866 opts->subsys_bits = 0; 986 memset(opts, 0, sizeof(*opts));
867 opts->flags = 0;
868 opts->release_agent = NULL;
869 987
870 while ((token = strsep(&o, ",")) != NULL) { 988 while ((token = strsep(&o, ",")) != NULL) {
871 if (!*token) 989 if (!*token)
@@ -879,17 +997,42 @@ static int parse_cgroupfs_options(char *data,
879 if (!ss->disabled) 997 if (!ss->disabled)
880 opts->subsys_bits |= 1ul << i; 998 opts->subsys_bits |= 1ul << i;
881 } 999 }
1000 } else if (!strcmp(token, "none")) {
1001 /* Explicitly have no subsystems */
1002 opts->none = true;
882 } else if (!strcmp(token, "noprefix")) { 1003 } else if (!strcmp(token, "noprefix")) {
883 set_bit(ROOT_NOPREFIX, &opts->flags); 1004 set_bit(ROOT_NOPREFIX, &opts->flags);
884 } else if (!strncmp(token, "release_agent=", 14)) { 1005 } else if (!strncmp(token, "release_agent=", 14)) {
885 /* Specifying two release agents is forbidden */ 1006 /* Specifying two release agents is forbidden */
886 if (opts->release_agent) 1007 if (opts->release_agent)
887 return -EINVAL; 1008 return -EINVAL;
888 opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL); 1009 opts->release_agent =
1010 kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
889 if (!opts->release_agent) 1011 if (!opts->release_agent)
890 return -ENOMEM; 1012 return -ENOMEM;
891 strncpy(opts->release_agent, token + 14, PATH_MAX - 1); 1013 } else if (!strncmp(token, "name=", 5)) {
892 opts->release_agent[PATH_MAX - 1] = 0; 1014 int i;
1015 const char *name = token + 5;
1016 /* Can't specify an empty name */
1017 if (!strlen(name))
1018 return -EINVAL;
1019 /* Must match [\w.-]+ */
1020 for (i = 0; i < strlen(name); i++) {
1021 char c = name[i];
1022 if (isalnum(c))
1023 continue;
1024 if ((c == '.') || (c == '-') || (c == '_'))
1025 continue;
1026 return -EINVAL;
1027 }
1028 /* Specifying two names is forbidden */
1029 if (opts->name)
1030 return -EINVAL;
1031 opts->name = kstrndup(name,
1032 MAX_CGROUP_ROOT_NAMELEN,
1033 GFP_KERNEL);
1034 if (!opts->name)
1035 return -ENOMEM;
893 } else { 1036 } else {
894 struct cgroup_subsys *ss; 1037 struct cgroup_subsys *ss;
895 int i; 1038 int i;
@@ -906,6 +1049,8 @@ static int parse_cgroupfs_options(char *data,
906 } 1049 }
907 } 1050 }
908 1051
1052 /* Consistency checks */
1053
909 /* 1054 /*
910 * Option noprefix was introduced just for backward compatibility 1055 * Option noprefix was introduced just for backward compatibility
911 * with the old cpuset, so we allow noprefix only if mounting just 1056 * with the old cpuset, so we allow noprefix only if mounting just
@@ -915,8 +1060,16 @@ static int parse_cgroupfs_options(char *data,
915 (opts->subsys_bits & mask)) 1060 (opts->subsys_bits & mask))
916 return -EINVAL; 1061 return -EINVAL;
917 1062
918 /* We can't have an empty hierarchy */ 1063
919 if (!opts->subsys_bits) 1064 /* Can't specify "none" and some subsystems */
1065 if (opts->subsys_bits && opts->none)
1066 return -EINVAL;
1067
1068 /*
1069 * We either have to specify by name or by subsystems. (So all
1070 * empty hierarchies must have a name).
1071 */
1072 if (!opts->subsys_bits && !opts->name)
920 return -EINVAL; 1073 return -EINVAL;
921 1074
922 return 0; 1075 return 0;
@@ -944,6 +1097,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
944 goto out_unlock; 1097 goto out_unlock;
945 } 1098 }
946 1099
1100 /* Don't allow name to change at remount */
1101 if (opts.name && strcmp(opts.name, root->name)) {
1102 ret = -EINVAL;
1103 goto out_unlock;
1104 }
1105
947 ret = rebind_subsystems(root, opts.subsys_bits); 1106 ret = rebind_subsystems(root, opts.subsys_bits);
948 if (ret) 1107 if (ret)
949 goto out_unlock; 1108 goto out_unlock;
@@ -955,6 +1114,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
955 strcpy(root->release_agent_path, opts.release_agent); 1114 strcpy(root->release_agent_path, opts.release_agent);
956 out_unlock: 1115 out_unlock:
957 kfree(opts.release_agent); 1116 kfree(opts.release_agent);
1117 kfree(opts.name);
958 mutex_unlock(&cgroup_mutex); 1118 mutex_unlock(&cgroup_mutex);
959 mutex_unlock(&cgrp->dentry->d_inode->i_mutex); 1119 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
960 unlock_kernel(); 1120 unlock_kernel();
@@ -974,9 +1134,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
974 INIT_LIST_HEAD(&cgrp->children); 1134 INIT_LIST_HEAD(&cgrp->children);
975 INIT_LIST_HEAD(&cgrp->css_sets); 1135 INIT_LIST_HEAD(&cgrp->css_sets);
976 INIT_LIST_HEAD(&cgrp->release_list); 1136 INIT_LIST_HEAD(&cgrp->release_list);
977 INIT_LIST_HEAD(&cgrp->pids_list); 1137 INIT_LIST_HEAD(&cgrp->pidlists);
978 init_rwsem(&cgrp->pids_mutex); 1138 mutex_init(&cgrp->pidlist_mutex);
979} 1139}
1140
980static void init_cgroup_root(struct cgroupfs_root *root) 1141static void init_cgroup_root(struct cgroupfs_root *root)
981{ 1142{
982 struct cgroup *cgrp = &root->top_cgroup; 1143 struct cgroup *cgrp = &root->top_cgroup;
@@ -988,33 +1149,106 @@ static void init_cgroup_root(struct cgroupfs_root *root)
988 init_cgroup_housekeeping(cgrp); 1149 init_cgroup_housekeeping(cgrp);
989} 1150}
990 1151
1152static bool init_root_id(struct cgroupfs_root *root)
1153{
1154 int ret = 0;
1155
1156 do {
1157 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1158 return false;
1159 spin_lock(&hierarchy_id_lock);
1160 /* Try to allocate the next unused ID */
1161 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1162 &root->hierarchy_id);
1163 if (ret == -ENOSPC)
1164 /* Try again starting from 0 */
1165 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1166 if (!ret) {
1167 next_hierarchy_id = root->hierarchy_id + 1;
1168 } else if (ret != -EAGAIN) {
1169 /* Can only get here if the 31-bit IDR is full ... */
1170 BUG_ON(ret);
1171 }
1172 spin_unlock(&hierarchy_id_lock);
1173 } while (ret);
1174 return true;
1175}
1176
991static int cgroup_test_super(struct super_block *sb, void *data) 1177static int cgroup_test_super(struct super_block *sb, void *data)
992{ 1178{
993 struct cgroupfs_root *new = data; 1179 struct cgroup_sb_opts *opts = data;
994 struct cgroupfs_root *root = sb->s_fs_info; 1180 struct cgroupfs_root *root = sb->s_fs_info;
995 1181
996 /* First check subsystems */ 1182 /* If we asked for a name then it must match */
997 if (new->subsys_bits != root->subsys_bits) 1183 if (opts->name && strcmp(opts->name, root->name))
998 return 0; 1184 return 0;
999 1185
1000 /* Next check flags */ 1186 /*
1001 if (new->flags != root->flags) 1187 * If we asked for subsystems (or explicitly for no
1188 * subsystems) then they must match
1189 */
1190 if ((opts->subsys_bits || opts->none)
1191 && (opts->subsys_bits != root->subsys_bits))
1002 return 0; 1192 return 0;
1003 1193
1004 return 1; 1194 return 1;
1005} 1195}
1006 1196
1197static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1198{
1199 struct cgroupfs_root *root;
1200
1201 if (!opts->subsys_bits && !opts->none)
1202 return NULL;
1203
1204 root = kzalloc(sizeof(*root), GFP_KERNEL);
1205 if (!root)
1206 return ERR_PTR(-ENOMEM);
1207
1208 if (!init_root_id(root)) {
1209 kfree(root);
1210 return ERR_PTR(-ENOMEM);
1211 }
1212 init_cgroup_root(root);
1213
1214 root->subsys_bits = opts->subsys_bits;
1215 root->flags = opts->flags;
1216 if (opts->release_agent)
1217 strcpy(root->release_agent_path, opts->release_agent);
1218 if (opts->name)
1219 strcpy(root->name, opts->name);
1220 return root;
1221}
1222
1223static void cgroup_drop_root(struct cgroupfs_root *root)
1224{
1225 if (!root)
1226 return;
1227
1228 BUG_ON(!root->hierarchy_id);
1229 spin_lock(&hierarchy_id_lock);
1230 ida_remove(&hierarchy_ida, root->hierarchy_id);
1231 spin_unlock(&hierarchy_id_lock);
1232 kfree(root);
1233}
1234
1007static int cgroup_set_super(struct super_block *sb, void *data) 1235static int cgroup_set_super(struct super_block *sb, void *data)
1008{ 1236{
1009 int ret; 1237 int ret;
1010 struct cgroupfs_root *root = data; 1238 struct cgroup_sb_opts *opts = data;
1239
1240 /* If we don't have a new root, we can't set up a new sb */
1241 if (!opts->new_root)
1242 return -EINVAL;
1243
1244 BUG_ON(!opts->subsys_bits && !opts->none);
1011 1245
1012 ret = set_anon_super(sb, NULL); 1246 ret = set_anon_super(sb, NULL);
1013 if (ret) 1247 if (ret)
1014 return ret; 1248 return ret;
1015 1249
1016 sb->s_fs_info = root; 1250 sb->s_fs_info = opts->new_root;
1017 root->sb = sb; 1251 opts->new_root->sb = sb;
1018 1252
1019 sb->s_blocksize = PAGE_CACHE_SIZE; 1253 sb->s_blocksize = PAGE_CACHE_SIZE;
1020 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 1254 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -1051,48 +1285,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1051 void *data, struct vfsmount *mnt) 1285 void *data, struct vfsmount *mnt)
1052{ 1286{
1053 struct cgroup_sb_opts opts; 1287 struct cgroup_sb_opts opts;
1288 struct cgroupfs_root *root;
1054 int ret = 0; 1289 int ret = 0;
1055 struct super_block *sb; 1290 struct super_block *sb;
1056 struct cgroupfs_root *root; 1291 struct cgroupfs_root *new_root;
1057 struct list_head tmp_cg_links;
1058 1292
1059 /* First find the desired set of subsystems */ 1293 /* First find the desired set of subsystems */
1060 ret = parse_cgroupfs_options(data, &opts); 1294 ret = parse_cgroupfs_options(data, &opts);
1061 if (ret) { 1295 if (ret)
1062 kfree(opts.release_agent); 1296 goto out_err;
1063 return ret;
1064 }
1065
1066 root = kzalloc(sizeof(*root), GFP_KERNEL);
1067 if (!root) {
1068 kfree(opts.release_agent);
1069 return -ENOMEM;
1070 }
1071 1297
1072 init_cgroup_root(root); 1298 /*
1073 root->subsys_bits = opts.subsys_bits; 1299 * Allocate a new cgroup root. We may not need it if we're
1074 root->flags = opts.flags; 1300 * reusing an existing hierarchy.
1075 if (opts.release_agent) { 1301 */
1076 strcpy(root->release_agent_path, opts.release_agent); 1302 new_root = cgroup_root_from_opts(&opts);
1077 kfree(opts.release_agent); 1303 if (IS_ERR(new_root)) {
1304 ret = PTR_ERR(new_root);
1305 goto out_err;
1078 } 1306 }
1307 opts.new_root = new_root;
1079 1308
1080 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root); 1309 /* Locate an existing or new sb for this hierarchy */
1081 1310 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1082 if (IS_ERR(sb)) { 1311 if (IS_ERR(sb)) {
1083 kfree(root); 1312 ret = PTR_ERR(sb);
1084 return PTR_ERR(sb); 1313 cgroup_drop_root(opts.new_root);
1314 goto out_err;
1085 } 1315 }
1086 1316
1087 if (sb->s_fs_info != root) { 1317 root = sb->s_fs_info;
1088 /* Reusing an existing superblock */ 1318 BUG_ON(!root);
1089 BUG_ON(sb->s_root == NULL); 1319 if (root == opts.new_root) {
1090 kfree(root); 1320 /* We used the new root structure, so this is a new hierarchy */
1091 root = NULL; 1321 struct list_head tmp_cg_links;
1092 } else {
1093 /* New superblock */
1094 struct cgroup *root_cgrp = &root->top_cgroup; 1322 struct cgroup *root_cgrp = &root->top_cgroup;
1095 struct inode *inode; 1323 struct inode *inode;
1324 struct cgroupfs_root *existing_root;
1096 int i; 1325 int i;
1097 1326
1098 BUG_ON(sb->s_root != NULL); 1327 BUG_ON(sb->s_root != NULL);
@@ -1105,6 +1334,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1105 mutex_lock(&inode->i_mutex); 1334 mutex_lock(&inode->i_mutex);
1106 mutex_lock(&cgroup_mutex); 1335 mutex_lock(&cgroup_mutex);
1107 1336
1337 if (strlen(root->name)) {
1338 /* Check for name clashes with existing mounts */
1339 for_each_active_root(existing_root) {
1340 if (!strcmp(existing_root->name, root->name)) {
1341 ret = -EBUSY;
1342 mutex_unlock(&cgroup_mutex);
1343 mutex_unlock(&inode->i_mutex);
1344 goto drop_new_super;
1345 }
1346 }
1347 }
1348
1108 /* 1349 /*
1109 * We're accessing css_set_count without locking 1350 * We're accessing css_set_count without locking
1110 * css_set_lock here, but that's OK - it can only be 1351 * css_set_lock here, but that's OK - it can only be
@@ -1123,7 +1364,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1123 if (ret == -EBUSY) { 1364 if (ret == -EBUSY) {
1124 mutex_unlock(&cgroup_mutex); 1365 mutex_unlock(&cgroup_mutex);
1125 mutex_unlock(&inode->i_mutex); 1366 mutex_unlock(&inode->i_mutex);
1126 goto free_cg_links; 1367 free_cg_links(&tmp_cg_links);
1368 goto drop_new_super;
1127 } 1369 }
1128 1370
1129 /* EBUSY should be the only error here */ 1371 /* EBUSY should be the only error here */
@@ -1155,17 +1397,27 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1155 BUG_ON(root->number_of_cgroups != 1); 1397 BUG_ON(root->number_of_cgroups != 1);
1156 1398
1157 cgroup_populate_dir(root_cgrp); 1399 cgroup_populate_dir(root_cgrp);
1158 mutex_unlock(&inode->i_mutex);
1159 mutex_unlock(&cgroup_mutex); 1400 mutex_unlock(&cgroup_mutex);
1401 mutex_unlock(&inode->i_mutex);
1402 } else {
1403 /*
1404 * We re-used an existing hierarchy - the new root (if
1405 * any) is not needed
1406 */
1407 cgroup_drop_root(opts.new_root);
1160 } 1408 }
1161 1409
1162 simple_set_mnt(mnt, sb); 1410 simple_set_mnt(mnt, sb);
1411 kfree(opts.release_agent);
1412 kfree(opts.name);
1163 return 0; 1413 return 0;
1164 1414
1165 free_cg_links:
1166 free_cg_links(&tmp_cg_links);
1167 drop_new_super: 1415 drop_new_super:
1168 deactivate_locked_super(sb); 1416 deactivate_locked_super(sb);
1417 out_err:
1418 kfree(opts.release_agent);
1419 kfree(opts.name);
1420
1169 return ret; 1421 return ret;
1170} 1422}
1171 1423
@@ -1211,7 +1463,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1211 mutex_unlock(&cgroup_mutex); 1463 mutex_unlock(&cgroup_mutex);
1212 1464
1213 kill_litter_super(sb); 1465 kill_litter_super(sb);
1214 kfree(root); 1466 cgroup_drop_root(root);
1215} 1467}
1216 1468
1217static struct file_system_type cgroup_fs_type = { 1469static struct file_system_type cgroup_fs_type = {
@@ -1276,27 +1528,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1276 return 0; 1528 return 0;
1277} 1529}
1278 1530
1279/*
1280 * Return the first subsystem attached to a cgroup's hierarchy, and
1281 * its subsystem id.
1282 */
1283
1284static void get_first_subsys(const struct cgroup *cgrp,
1285 struct cgroup_subsys_state **css, int *subsys_id)
1286{
1287 const struct cgroupfs_root *root = cgrp->root;
1288 const struct cgroup_subsys *test_ss;
1289 BUG_ON(list_empty(&root->subsys_list));
1290 test_ss = list_entry(root->subsys_list.next,
1291 struct cgroup_subsys, sibling);
1292 if (css) {
1293 *css = cgrp->subsys[test_ss->subsys_id];
1294 BUG_ON(!*css);
1295 }
1296 if (subsys_id)
1297 *subsys_id = test_ss->subsys_id;
1298}
1299
1300/** 1531/**
1301 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' 1532 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
1302 * @cgrp: the cgroup the task is attaching to 1533 * @cgrp: the cgroup the task is attaching to
@@ -1313,18 +1544,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1313 struct css_set *cg; 1544 struct css_set *cg;
1314 struct css_set *newcg; 1545 struct css_set *newcg;
1315 struct cgroupfs_root *root = cgrp->root; 1546 struct cgroupfs_root *root = cgrp->root;
1316 int subsys_id;
1317
1318 get_first_subsys(cgrp, NULL, &subsys_id);
1319 1547
1320 /* Nothing to do if the task is already in that cgroup */ 1548 /* Nothing to do if the task is already in that cgroup */
1321 oldcgrp = task_cgroup(tsk, subsys_id); 1549 oldcgrp = task_cgroup_from_root(tsk, root);
1322 if (cgrp == oldcgrp) 1550 if (cgrp == oldcgrp)
1323 return 0; 1551 return 0;
1324 1552
1325 for_each_subsys(root, ss) { 1553 for_each_subsys(root, ss) {
1326 if (ss->can_attach) { 1554 if (ss->can_attach) {
1327 retval = ss->can_attach(ss, cgrp, tsk); 1555 retval = ss->can_attach(ss, cgrp, tsk, false);
1328 if (retval) 1556 if (retval)
1329 return retval; 1557 return retval;
1330 } 1558 }
@@ -1362,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1362 1590
1363 for_each_subsys(root, ss) { 1591 for_each_subsys(root, ss) {
1364 if (ss->attach) 1592 if (ss->attach)
1365 ss->attach(ss, cgrp, oldcgrp, tsk); 1593 ss->attach(ss, cgrp, oldcgrp, tsk, false);
1366 } 1594 }
1367 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1595 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1368 synchronize_rcu(); 1596 synchronize_rcu();
@@ -1423,15 +1651,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1423 return ret; 1651 return ret;
1424} 1652}
1425 1653
1426/* The various types of files and directories in a cgroup file system */
1427enum cgroup_filetype {
1428 FILE_ROOT,
1429 FILE_DIR,
1430 FILE_TASKLIST,
1431 FILE_NOTIFY_ON_RELEASE,
1432 FILE_RELEASE_AGENT,
1433};
1434
1435/** 1654/**
1436 * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. 1655 * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
1437 * @cgrp: the cgroup to be checked for liveness 1656 * @cgrp: the cgroup to be checked for liveness
@@ -1876,7 +2095,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
1876 * the start of a css_set 2095 * the start of a css_set
1877 */ 2096 */
1878static void cgroup_advance_iter(struct cgroup *cgrp, 2097static void cgroup_advance_iter(struct cgroup *cgrp,
1879 struct cgroup_iter *it) 2098 struct cgroup_iter *it)
1880{ 2099{
1881 struct list_head *l = it->cg_link; 2100 struct list_head *l = it->cg_link;
1882 struct cg_cgroup_link *link; 2101 struct cg_cgroup_link *link;
@@ -2129,7 +2348,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
2129} 2348}
2130 2349
2131/* 2350/*
2132 * Stuff for reading the 'tasks' file. 2351 * Stuff for reading the 'tasks'/'procs' files.
2133 * 2352 *
2134 * Reading this file can return large amounts of data if a cgroup has 2353 * Reading this file can return large amounts of data if a cgroup has
2135 * *lots* of attached tasks. So it may need several calls to read(), 2354 * *lots* of attached tasks. So it may need several calls to read(),
@@ -2139,27 +2358,196 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
2139 */ 2358 */
2140 2359
2141/* 2360/*
2142 * Load into 'pidarray' up to 'npids' of the tasks using cgroup 2361 * The following two functions "fix" the issue where there are more pids
2143 * 'cgrp'. Return actual number of pids loaded. No need to 2362 * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
2144 * task_lock(p) when reading out p->cgroup, since we're in an RCU 2363 * TODO: replace with a kernel-wide solution to this problem
2145 * read section, so the css_set can't go away, and is 2364 */
2146 * immutable after creation. 2365#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
2366static void *pidlist_allocate(int count)
2367{
2368 if (PIDLIST_TOO_LARGE(count))
2369 return vmalloc(count * sizeof(pid_t));
2370 else
2371 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
2372}
2373static void pidlist_free(void *p)
2374{
2375 if (is_vmalloc_addr(p))
2376 vfree(p);
2377 else
2378 kfree(p);
2379}
2380static void *pidlist_resize(void *p, int newcount)
2381{
2382 void *newlist;
2383 /* note: if new alloc fails, old p will still be valid either way */
2384 if (is_vmalloc_addr(p)) {
2385 newlist = vmalloc(newcount * sizeof(pid_t));
2386 if (!newlist)
2387 return NULL;
2388 memcpy(newlist, p, newcount * sizeof(pid_t));
2389 vfree(p);
2390 } else {
2391 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
2392 }
2393 return newlist;
2394}
2395
2396/*
2397 * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
2398 * If the new stripped list is sufficiently smaller and there's enough memory
2399 * to allocate a new buffer, will let go of the unneeded memory. Returns the
2400 * number of unique elements.
2401 */
2402/* is the size difference enough that we should re-allocate the array? */
2403#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
2404static int pidlist_uniq(pid_t **p, int length)
2405{
2406 int src, dest = 1;
2407 pid_t *list = *p;
2408 pid_t *newlist;
2409
2410 /*
2411 * we presume the 0th element is unique, so i starts at 1. trivial
2412 * edge cases first; no work needs to be done for either
2413 */
2414 if (length == 0 || length == 1)
2415 return length;
2416 /* src and dest walk down the list; dest counts unique elements */
2417 for (src = 1; src < length; src++) {
2418 /* find next unique element */
2419 while (list[src] == list[src-1]) {
2420 src++;
2421 if (src == length)
2422 goto after;
2423 }
2424 /* dest always points to where the next unique element goes */
2425 list[dest] = list[src];
2426 dest++;
2427 }
2428after:
2429 /*
2430 * if the length difference is large enough, we want to allocate a
2431 * smaller buffer to save memory. if this fails due to out of memory,
2432 * we'll just stay with what we've got.
2433 */
2434 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
2435 newlist = pidlist_resize(list, dest);
2436 if (newlist)
2437 *p = newlist;
2438 }
2439 return dest;
2440}
2441
2442static int cmppid(const void *a, const void *b)
2443{
2444 return *(pid_t *)a - *(pid_t *)b;
2445}
2446
2447/*
2448 * find the appropriate pidlist for our purpose (given procs vs tasks)
2449 * returns with the lock on that pidlist already held, and takes care
2450 * of the use count, or returns NULL with no locks held if we're out of
2451 * memory.
2147 */ 2452 */
2148static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) 2453static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2454 enum cgroup_filetype type)
2149{ 2455{
2150 int n = 0, pid; 2456 struct cgroup_pidlist *l;
2457 /* don't need task_nsproxy() if we're looking at ourself */
2458 struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
2459 /*
2460 * We can't drop the pidlist_mutex before taking the l->mutex in case
2461 * the last ref-holder is trying to remove l from the list at the same
2462 * time. Holding the pidlist_mutex precludes somebody taking whichever
2463 * list we find out from under us - compare release_pid_array().
2464 */
2465 mutex_lock(&cgrp->pidlist_mutex);
2466 list_for_each_entry(l, &cgrp->pidlists, links) {
2467 if (l->key.type == type && l->key.ns == ns) {
2468 /* found a matching list - drop the extra refcount */
2469 put_pid_ns(ns);
2470 /* make sure l doesn't vanish out from under us */
2471 down_write(&l->mutex);
2472 mutex_unlock(&cgrp->pidlist_mutex);
2473 l->use_count++;
2474 return l;
2475 }
2476 }
2477 /* entry not found; create a new one */
2478 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2479 if (!l) {
2480 mutex_unlock(&cgrp->pidlist_mutex);
2481 put_pid_ns(ns);
2482 return l;
2483 }
2484 init_rwsem(&l->mutex);
2485 down_write(&l->mutex);
2486 l->key.type = type;
2487 l->key.ns = ns;
2488 l->use_count = 0; /* don't increment here */
2489 l->list = NULL;
2490 l->owner = cgrp;
2491 list_add(&l->links, &cgrp->pidlists);
2492 mutex_unlock(&cgrp->pidlist_mutex);
2493 return l;
2494}
2495
2496/*
2497 * Load a cgroup's pidarray with either procs' tgids or tasks' pids
2498 */
2499static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
2500 struct cgroup_pidlist **lp)
2501{
2502 pid_t *array;
2503 int length;
2504 int pid, n = 0; /* used for populating the array */
2151 struct cgroup_iter it; 2505 struct cgroup_iter it;
2152 struct task_struct *tsk; 2506 struct task_struct *tsk;
2507 struct cgroup_pidlist *l;
2508
2509 /*
2510 * If cgroup gets more users after we read count, we won't have
2511 * enough space - tough. This race is indistinguishable to the
2512 * caller from the case that the additional cgroup users didn't
2513 * show up until sometime later on.
2514 */
2515 length = cgroup_task_count(cgrp);
2516 array = pidlist_allocate(length);
2517 if (!array)
2518 return -ENOMEM;
2519 /* now, populate the array */
2153 cgroup_iter_start(cgrp, &it); 2520 cgroup_iter_start(cgrp, &it);
2154 while ((tsk = cgroup_iter_next(cgrp, &it))) { 2521 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2155 if (unlikely(n == npids)) 2522 if (unlikely(n == length))
2156 break; 2523 break;
2157 pid = task_pid_vnr(tsk); 2524 /* get tgid or pid for procs or tasks file respectively */
2158 if (pid > 0) 2525 if (type == CGROUP_FILE_PROCS)
2159 pidarray[n++] = pid; 2526 pid = task_tgid_vnr(tsk);
2527 else
2528 pid = task_pid_vnr(tsk);
2529 if (pid > 0) /* make sure to only use valid results */
2530 array[n++] = pid;
2160 } 2531 }
2161 cgroup_iter_end(cgrp, &it); 2532 cgroup_iter_end(cgrp, &it);
2162 return n; 2533 length = n;
2534 /* now sort & (if procs) strip out duplicates */
2535 sort(array, length, sizeof(pid_t), cmppid, NULL);
2536 if (type == CGROUP_FILE_PROCS)
2537 length = pidlist_uniq(&array, length);
2538 l = cgroup_pidlist_find(cgrp, type);
2539 if (!l) {
2540 pidlist_free(array);
2541 return -ENOMEM;
2542 }
2543 /* store array, freeing old if necessary - lock already held */
2544 pidlist_free(l->list);
2545 l->list = array;
2546 l->length = length;
2547 l->use_count++;
2548 up_write(&l->mutex);
2549 *lp = l;
2550 return 0;
2163} 2551}
2164 2552
2165/** 2553/**
@@ -2216,37 +2604,14 @@ err:
2216 return ret; 2604 return ret;
2217} 2605}
2218 2606
2219/*
2220 * Cache pids for all threads in the same pid namespace that are
2221 * opening the same "tasks" file.
2222 */
2223struct cgroup_pids {
2224 /* The node in cgrp->pids_list */
2225 struct list_head list;
2226 /* The cgroup those pids belong to */
2227 struct cgroup *cgrp;
2228 /* The namepsace those pids belong to */
2229 struct pid_namespace *ns;
2230 /* Array of process ids in the cgroup */
2231 pid_t *tasks_pids;
2232 /* How many files are using the this tasks_pids array */
2233 int use_count;
2234 /* Length of the current tasks_pids array */
2235 int length;
2236};
2237
2238static int cmppid(const void *a, const void *b)
2239{
2240 return *(pid_t *)a - *(pid_t *)b;
2241}
2242 2607
2243/* 2608/*
2244 * seq_file methods for the "tasks" file. The seq_file position is the 2609 * seq_file methods for the tasks/procs files. The seq_file position is the
2245 * next pid to display; the seq_file iterator is a pointer to the pid 2610 * next pid to display; the seq_file iterator is a pointer to the pid
2246 * in the cgroup->tasks_pids array. 2611 * in the cgroup->l->list array.
2247 */ 2612 */
2248 2613
2249static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) 2614static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
2250{ 2615{
2251 /* 2616 /*
2252 * Initially we receive a position value that corresponds to 2617 * Initially we receive a position value that corresponds to
@@ -2254,48 +2619,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
2254 * after a seek to the start). Use a binary-search to find the 2619 * after a seek to the start). Use a binary-search to find the
2255 * next pid to display, if any 2620 * next pid to display, if any
2256 */ 2621 */
2257 struct cgroup_pids *cp = s->private; 2622 struct cgroup_pidlist *l = s->private;
2258 struct cgroup *cgrp = cp->cgrp;
2259 int index = 0, pid = *pos; 2623 int index = 0, pid = *pos;
2260 int *iter; 2624 int *iter;
2261 2625
2262 down_read(&cgrp->pids_mutex); 2626 down_read(&l->mutex);
2263 if (pid) { 2627 if (pid) {
2264 int end = cp->length; 2628 int end = l->length;
2265 2629
2266 while (index < end) { 2630 while (index < end) {
2267 int mid = (index + end) / 2; 2631 int mid = (index + end) / 2;
2268 if (cp->tasks_pids[mid] == pid) { 2632 if (l->list[mid] == pid) {
2269 index = mid; 2633 index = mid;
2270 break; 2634 break;
2271 } else if (cp->tasks_pids[mid] <= pid) 2635 } else if (l->list[mid] <= pid)
2272 index = mid + 1; 2636 index = mid + 1;
2273 else 2637 else
2274 end = mid; 2638 end = mid;
2275 } 2639 }
2276 } 2640 }
2277 /* If we're off the end of the array, we're done */ 2641 /* If we're off the end of the array, we're done */
2278 if (index >= cp->length) 2642 if (index >= l->length)
2279 return NULL; 2643 return NULL;
2280 /* Update the abstract position to be the actual pid that we found */ 2644 /* Update the abstract position to be the actual pid that we found */
2281 iter = cp->tasks_pids + index; 2645 iter = l->list + index;
2282 *pos = *iter; 2646 *pos = *iter;
2283 return iter; 2647 return iter;
2284} 2648}
2285 2649
2286static void cgroup_tasks_stop(struct seq_file *s, void *v) 2650static void cgroup_pidlist_stop(struct seq_file *s, void *v)
2287{ 2651{
2288 struct cgroup_pids *cp = s->private; 2652 struct cgroup_pidlist *l = s->private;
2289 struct cgroup *cgrp = cp->cgrp; 2653 up_read(&l->mutex);
2290 up_read(&cgrp->pids_mutex);
2291} 2654}
2292 2655
2293static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) 2656static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
2294{ 2657{
2295 struct cgroup_pids *cp = s->private; 2658 struct cgroup_pidlist *l = s->private;
2296 int *p = v; 2659 pid_t *p = v;
2297 int *end = cp->tasks_pids + cp->length; 2660 pid_t *end = l->list + l->length;
2298
2299 /* 2661 /*
2300 * Advance to the next pid in the array. If this goes off the 2662 * Advance to the next pid in the array. If this goes off the
2301 * end, we're done 2663 * end, we're done
@@ -2309,124 +2671,107 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
2309 } 2671 }
2310} 2672}
2311 2673
2312static int cgroup_tasks_show(struct seq_file *s, void *v) 2674static int cgroup_pidlist_show(struct seq_file *s, void *v)
2313{ 2675{
2314 return seq_printf(s, "%d\n", *(int *)v); 2676 return seq_printf(s, "%d\n", *(int *)v);
2315} 2677}
2316 2678
2317static const struct seq_operations cgroup_tasks_seq_operations = { 2679/*
2318 .start = cgroup_tasks_start, 2680 * seq_operations functions for iterating on pidlists through seq_file -
2319 .stop = cgroup_tasks_stop, 2681 * independent of whether it's tasks or procs
2320 .next = cgroup_tasks_next, 2682 */
2321 .show = cgroup_tasks_show, 2683static const struct seq_operations cgroup_pidlist_seq_operations = {
2684 .start = cgroup_pidlist_start,
2685 .stop = cgroup_pidlist_stop,
2686 .next = cgroup_pidlist_next,
2687 .show = cgroup_pidlist_show,
2322}; 2688};
2323 2689
2324static void release_cgroup_pid_array(struct cgroup_pids *cp) 2690static void cgroup_release_pid_array(struct cgroup_pidlist *l)
2325{ 2691{
2326 struct cgroup *cgrp = cp->cgrp; 2692 /*
2327 2693 * the case where we're the last user of this particular pidlist will
2328 down_write(&cgrp->pids_mutex); 2694 * have us remove it from the cgroup's list, which entails taking the
2329 BUG_ON(!cp->use_count); 2695 * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
2330 if (!--cp->use_count) { 2696 * pidlist_mutex, we have to take pidlist_mutex first.
2331 list_del(&cp->list); 2697 */
2332 put_pid_ns(cp->ns); 2698 mutex_lock(&l->owner->pidlist_mutex);
2333 kfree(cp->tasks_pids); 2699 down_write(&l->mutex);
2334 kfree(cp); 2700 BUG_ON(!l->use_count);
2701 if (!--l->use_count) {
2702 /* we're the last user if refcount is 0; remove and free */
2703 list_del(&l->links);
2704 mutex_unlock(&l->owner->pidlist_mutex);
2705 pidlist_free(l->list);
2706 put_pid_ns(l->key.ns);
2707 up_write(&l->mutex);
2708 kfree(l);
2709 return;
2335 } 2710 }
2336 up_write(&cgrp->pids_mutex); 2711 mutex_unlock(&l->owner->pidlist_mutex);
2712 up_write(&l->mutex);
2337} 2713}
2338 2714
2339static int cgroup_tasks_release(struct inode *inode, struct file *file) 2715static int cgroup_pidlist_release(struct inode *inode, struct file *file)
2340{ 2716{
2341 struct seq_file *seq; 2717 struct cgroup_pidlist *l;
2342 struct cgroup_pids *cp;
2343
2344 if (!(file->f_mode & FMODE_READ)) 2718 if (!(file->f_mode & FMODE_READ))
2345 return 0; 2719 return 0;
2346 2720 /*
2347 seq = file->private_data; 2721 * the seq_file will only be initialized if the file was opened for
2348 cp = seq->private; 2722 * reading; hence we check if it's not null only in that case.
2349 2723 */
2350 release_cgroup_pid_array(cp); 2724 l = ((struct seq_file *)file->private_data)->private;
2725 cgroup_release_pid_array(l);
2351 return seq_release(inode, file); 2726 return seq_release(inode, file);
2352} 2727}
2353 2728
2354static struct file_operations cgroup_tasks_operations = { 2729static const struct file_operations cgroup_pidlist_operations = {
2355 .read = seq_read, 2730 .read = seq_read,
2356 .llseek = seq_lseek, 2731 .llseek = seq_lseek,
2357 .write = cgroup_file_write, 2732 .write = cgroup_file_write,
2358 .release = cgroup_tasks_release, 2733 .release = cgroup_pidlist_release,
2359}; 2734};
2360 2735
2361/* 2736/*
2362 * Handle an open on 'tasks' file. Prepare an array containing the 2737 * The following functions handle opens on a file that displays a pidlist
2363 * process id's of tasks currently attached to the cgroup being opened. 2738 * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
2739 * in the cgroup.
2364 */ 2740 */
2365 2741/* helper function for the two below it */
2366static int cgroup_tasks_open(struct inode *unused, struct file *file) 2742static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
2367{ 2743{
2368 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 2744 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2369 struct pid_namespace *ns = current->nsproxy->pid_ns; 2745 struct cgroup_pidlist *l;
2370 struct cgroup_pids *cp;
2371 pid_t *pidarray;
2372 int npids;
2373 int retval; 2746 int retval;
2374 2747
2375 /* Nothing to do for write-only files */ 2748 /* Nothing to do for write-only files */
2376 if (!(file->f_mode & FMODE_READ)) 2749 if (!(file->f_mode & FMODE_READ))
2377 return 0; 2750 return 0;
2378 2751
2379 /* 2752 /* have the array populated */
2380 * If cgroup gets more users after we read count, we won't have 2753 retval = pidlist_array_load(cgrp, type, &l);
2381 * enough space - tough. This race is indistinguishable to the 2754 if (retval)
2382 * caller from the case that the additional cgroup users didn't 2755 return retval;
2383 * show up until sometime later on. 2756 /* configure file information */
2384 */ 2757 file->f_op = &cgroup_pidlist_operations;
2385 npids = cgroup_task_count(cgrp);
2386 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
2387 if (!pidarray)
2388 return -ENOMEM;
2389 npids = pid_array_load(pidarray, npids, cgrp);
2390 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
2391
2392 /*
2393 * Store the array in the cgroup, freeing the old
2394 * array if necessary
2395 */
2396 down_write(&cgrp->pids_mutex);
2397
2398 list_for_each_entry(cp, &cgrp->pids_list, list) {
2399 if (ns == cp->ns)
2400 goto found;
2401 }
2402
2403 cp = kzalloc(sizeof(*cp), GFP_KERNEL);
2404 if (!cp) {
2405 up_write(&cgrp->pids_mutex);
2406 kfree(pidarray);
2407 return -ENOMEM;
2408 }
2409 cp->cgrp = cgrp;
2410 cp->ns = ns;
2411 get_pid_ns(ns);
2412 list_add(&cp->list, &cgrp->pids_list);
2413found:
2414 kfree(cp->tasks_pids);
2415 cp->tasks_pids = pidarray;
2416 cp->length = npids;
2417 cp->use_count++;
2418 up_write(&cgrp->pids_mutex);
2419
2420 file->f_op = &cgroup_tasks_operations;
2421 2758
2422 retval = seq_open(file, &cgroup_tasks_seq_operations); 2759 retval = seq_open(file, &cgroup_pidlist_seq_operations);
2423 if (retval) { 2760 if (retval) {
2424 release_cgroup_pid_array(cp); 2761 cgroup_release_pid_array(l);
2425 return retval; 2762 return retval;
2426 } 2763 }
2427 ((struct seq_file *)file->private_data)->private = cp; 2764 ((struct seq_file *)file->private_data)->private = l;
2428 return 0; 2765 return 0;
2429} 2766}
2767static int cgroup_tasks_open(struct inode *unused, struct file *file)
2768{
2769 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
2770}
2771static int cgroup_procs_open(struct inode *unused, struct file *file)
2772{
2773 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
2774}
2430 2775
2431static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, 2776static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
2432 struct cftype *cft) 2777 struct cftype *cft)
@@ -2449,21 +2794,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2449/* 2794/*
2450 * for the common functions, 'private' gives the type of file 2795 * for the common functions, 'private' gives the type of file
2451 */ 2796 */
2797/* for hysterical raisins, we can't put this on the older files */
2798#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
2452static struct cftype files[] = { 2799static struct cftype files[] = {
2453 { 2800 {
2454 .name = "tasks", 2801 .name = "tasks",
2455 .open = cgroup_tasks_open, 2802 .open = cgroup_tasks_open,
2456 .write_u64 = cgroup_tasks_write, 2803 .write_u64 = cgroup_tasks_write,
2457 .release = cgroup_tasks_release, 2804 .release = cgroup_pidlist_release,
2458 .private = FILE_TASKLIST,
2459 .mode = S_IRUGO | S_IWUSR, 2805 .mode = S_IRUGO | S_IWUSR,
2460 }, 2806 },
2461 2807 {
2808 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
2809 .open = cgroup_procs_open,
2810 /* .write_u64 = cgroup_procs_write, TODO */
2811 .release = cgroup_pidlist_release,
2812 .mode = S_IRUGO,
2813 },
2462 { 2814 {
2463 .name = "notify_on_release", 2815 .name = "notify_on_release",
2464 .read_u64 = cgroup_read_notify_on_release, 2816 .read_u64 = cgroup_read_notify_on_release,
2465 .write_u64 = cgroup_write_notify_on_release, 2817 .write_u64 = cgroup_write_notify_on_release,
2466 .private = FILE_NOTIFY_ON_RELEASE,
2467 }, 2818 },
2468}; 2819};
2469 2820
@@ -2472,7 +2823,6 @@ static struct cftype cft_release_agent = {
2472 .read_seq_string = cgroup_release_agent_show, 2823 .read_seq_string = cgroup_release_agent_show,
2473 .write_string = cgroup_release_agent_write, 2824 .write_string = cgroup_release_agent_write,
2474 .max_write_len = PATH_MAX, 2825 .max_write_len = PATH_MAX,
2475 .private = FILE_RELEASE_AGENT,
2476}; 2826};
2477 2827
2478static int cgroup_populate_dir(struct cgroup *cgrp) 2828static int cgroup_populate_dir(struct cgroup *cgrp)
@@ -2879,6 +3229,7 @@ int __init cgroup_init_early(void)
2879 init_task.cgroups = &init_css_set; 3229 init_task.cgroups = &init_css_set;
2880 3230
2881 init_css_set_link.cg = &init_css_set; 3231 init_css_set_link.cg = &init_css_set;
3232 init_css_set_link.cgrp = dummytop;
2882 list_add(&init_css_set_link.cgrp_link_list, 3233 list_add(&init_css_set_link.cgrp_link_list,
2883 &rootnode.top_cgroup.css_sets); 3234 &rootnode.top_cgroup.css_sets);
2884 list_add(&init_css_set_link.cg_link_list, 3235 list_add(&init_css_set_link.cg_link_list,
@@ -2933,7 +3284,7 @@ int __init cgroup_init(void)
2933 /* Add init_css_set to the hash table */ 3284 /* Add init_css_set to the hash table */
2934 hhead = css_set_hash(init_css_set.subsys); 3285 hhead = css_set_hash(init_css_set.subsys);
2935 hlist_add_head(&init_css_set.hlist, hhead); 3286 hlist_add_head(&init_css_set.hlist, hhead);
2936 3287 BUG_ON(!init_root_id(&rootnode));
2937 err = register_filesystem(&cgroup_fs_type); 3288 err = register_filesystem(&cgroup_fs_type);
2938 if (err < 0) 3289 if (err < 0)
2939 goto out; 3290 goto out;
@@ -2986,15 +3337,16 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
2986 for_each_active_root(root) { 3337 for_each_active_root(root) {
2987 struct cgroup_subsys *ss; 3338 struct cgroup_subsys *ss;
2988 struct cgroup *cgrp; 3339 struct cgroup *cgrp;
2989 int subsys_id;
2990 int count = 0; 3340 int count = 0;
2991 3341
2992 seq_printf(m, "%lu:", root->subsys_bits); 3342 seq_printf(m, "%d:", root->hierarchy_id);
2993 for_each_subsys(root, ss) 3343 for_each_subsys(root, ss)
2994 seq_printf(m, "%s%s", count++ ? "," : "", ss->name); 3344 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
3345 if (strlen(root->name))
3346 seq_printf(m, "%sname=%s", count ? "," : "",
3347 root->name);
2995 seq_putc(m, ':'); 3348 seq_putc(m, ':');
2996 get_first_subsys(&root->top_cgroup, NULL, &subsys_id); 3349 cgrp = task_cgroup_from_root(tsk, root);
2997 cgrp = task_cgroup(tsk, subsys_id);
2998 retval = cgroup_path(cgrp, buf, PAGE_SIZE); 3350 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
2999 if (retval < 0) 3351 if (retval < 0)
3000 goto out_unlock; 3352 goto out_unlock;
@@ -3033,8 +3385,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
3033 mutex_lock(&cgroup_mutex); 3385 mutex_lock(&cgroup_mutex);
3034 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3386 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3035 struct cgroup_subsys *ss = subsys[i]; 3387 struct cgroup_subsys *ss = subsys[i];
3036 seq_printf(m, "%s\t%lu\t%d\t%d\n", 3388 seq_printf(m, "%s\t%d\t%d\t%d\n",
3037 ss->name, ss->root->subsys_bits, 3389 ss->name, ss->root->hierarchy_id,
3038 ss->root->number_of_cgroups, !ss->disabled); 3390 ss->root->number_of_cgroups, !ss->disabled);
3039 } 3391 }
3040 mutex_unlock(&cgroup_mutex); 3392 mutex_unlock(&cgroup_mutex);
@@ -3320,13 +3672,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
3320{ 3672{
3321 int ret; 3673 int ret;
3322 struct cgroup *target; 3674 struct cgroup *target;
3323 int subsys_id;
3324 3675
3325 if (cgrp == dummytop) 3676 if (cgrp == dummytop)
3326 return 1; 3677 return 1;
3327 3678
3328 get_first_subsys(cgrp, NULL, &subsys_id); 3679 target = task_cgroup_from_root(task, cgrp->root);
3329 target = task_cgroup(task, subsys_id);
3330 while (cgrp != target && cgrp!= cgrp->top_cgroup) 3680 while (cgrp != target && cgrp!= cgrp->top_cgroup)
3331 cgrp = cgrp->parent; 3681 cgrp = cgrp->parent;
3332 ret = (cgrp == target); 3682 ret = (cgrp == target);
@@ -3693,3 +4043,154 @@ css_get_next(struct cgroup_subsys *ss, int id,
3693 return ret; 4043 return ret;
3694} 4044}
3695 4045
4046#ifdef CONFIG_CGROUP_DEBUG
4047static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
4048 struct cgroup *cont)
4049{
4050 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
4051
4052 if (!css)
4053 return ERR_PTR(-ENOMEM);
4054
4055 return css;
4056}
4057
4058static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
4059{
4060 kfree(cont->subsys[debug_subsys_id]);
4061}
4062
4063static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
4064{
4065 return atomic_read(&cont->count);
4066}
4067
4068static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
4069{
4070 return cgroup_task_count(cont);
4071}
4072
4073static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
4074{
4075 return (u64)(unsigned long)current->cgroups;
4076}
4077
4078static u64 current_css_set_refcount_read(struct cgroup *cont,
4079 struct cftype *cft)
4080{
4081 u64 count;
4082
4083 rcu_read_lock();
4084 count = atomic_read(&current->cgroups->refcount);
4085 rcu_read_unlock();
4086 return count;
4087}
4088
4089static int current_css_set_cg_links_read(struct cgroup *cont,
4090 struct cftype *cft,
4091 struct seq_file *seq)
4092{
4093 struct cg_cgroup_link *link;
4094 struct css_set *cg;
4095
4096 read_lock(&css_set_lock);
4097 rcu_read_lock();
4098 cg = rcu_dereference(current->cgroups);
4099 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
4100 struct cgroup *c = link->cgrp;
4101 const char *name;
4102
4103 if (c->dentry)
4104 name = c->dentry->d_name.name;
4105 else
4106 name = "?";
4107 seq_printf(seq, "Root %d group %s\n",
4108 c->root->hierarchy_id, name);
4109 }
4110 rcu_read_unlock();
4111 read_unlock(&css_set_lock);
4112 return 0;
4113}
4114
4115#define MAX_TASKS_SHOWN_PER_CSS 25
4116static int cgroup_css_links_read(struct cgroup *cont,
4117 struct cftype *cft,
4118 struct seq_file *seq)
4119{
4120 struct cg_cgroup_link *link;
4121
4122 read_lock(&css_set_lock);
4123 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
4124 struct css_set *cg = link->cg;
4125 struct task_struct *task;
4126 int count = 0;
4127 seq_printf(seq, "css_set %p\n", cg);
4128 list_for_each_entry(task, &cg->tasks, cg_list) {
4129 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
4130 seq_puts(seq, " ...\n");
4131 break;
4132 } else {
4133 seq_printf(seq, " task %d\n",
4134 task_pid_vnr(task));
4135 }
4136 }
4137 }
4138 read_unlock(&css_set_lock);
4139 return 0;
4140}
4141
4142static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
4143{
4144 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
4145}
4146
4147static struct cftype debug_files[] = {
4148 {
4149 .name = "cgroup_refcount",
4150 .read_u64 = cgroup_refcount_read,
4151 },
4152 {
4153 .name = "taskcount",
4154 .read_u64 = debug_taskcount_read,
4155 },
4156
4157 {
4158 .name = "current_css_set",
4159 .read_u64 = current_css_set_read,
4160 },
4161
4162 {
4163 .name = "current_css_set_refcount",
4164 .read_u64 = current_css_set_refcount_read,
4165 },
4166
4167 {
4168 .name = "current_css_set_cg_links",
4169 .read_seq_string = current_css_set_cg_links_read,
4170 },
4171
4172 {
4173 .name = "cgroup_css_links",
4174 .read_seq_string = cgroup_css_links_read,
4175 },
4176
4177 {
4178 .name = "releasable",
4179 .read_u64 = releasable_read,
4180 },
4181};
4182
4183static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
4184{
4185 return cgroup_add_files(cont, ss, debug_files,
4186 ARRAY_SIZE(debug_files));
4187}
4188
4189struct cgroup_subsys debug_subsys = {
4190 .name = "debug",
4191 .create = debug_create,
4192 .destroy = debug_destroy,
4193 .populate = debug_populate,
4194 .subsys_id = debug_subsys_id,
4195};
4196#endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
deleted file mode 100644
index 0c92d797baa6..000000000000
--- a/kernel/cgroup_debug.c
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * kernel/cgroup_debug.c - Example cgroup subsystem that
3 * exposes debug info
4 *
5 * Copyright (C) Google Inc, 2007
6 *
7 * Developed by Paul Menage (menage@google.com)
8 *
9 */
10
11#include <linux/cgroup.h>
12#include <linux/fs.h>
13#include <linux/slab.h>
14#include <linux/rcupdate.h>
15
16#include <asm/atomic.h>
17
18static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
19 struct cgroup *cont)
20{
21 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
22
23 if (!css)
24 return ERR_PTR(-ENOMEM);
25
26 return css;
27}
28
29static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
30{
31 kfree(cont->subsys[debug_subsys_id]);
32}
33
34static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
35{
36 return atomic_read(&cont->count);
37}
38
39static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
40{
41 u64 count;
42
43 count = cgroup_task_count(cont);
44 return count;
45}
46
47static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
48{
49 return (u64)(long)current->cgroups;
50}
51
52static u64 current_css_set_refcount_read(struct cgroup *cont,
53 struct cftype *cft)
54{
55 u64 count;
56
57 rcu_read_lock();
58 count = atomic_read(&current->cgroups->refcount);
59 rcu_read_unlock();
60 return count;
61}
62
63static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
64{
65 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
66}
67
68static struct cftype files[] = {
69 {
70 .name = "cgroup_refcount",
71 .read_u64 = cgroup_refcount_read,
72 },
73 {
74 .name = "taskcount",
75 .read_u64 = taskcount_read,
76 },
77
78 {
79 .name = "current_css_set",
80 .read_u64 = current_css_set_read,
81 },
82
83 {
84 .name = "current_css_set_refcount",
85 .read_u64 = current_css_set_refcount_read,
86 },
87
88 {
89 .name = "releasable",
90 .read_u64 = releasable_read,
91 },
92};
93
94static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
95{
96 return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
97}
98
99struct cgroup_subsys debug_subsys = {
100 .name = "debug",
101 .create = debug_create,
102 .destroy = debug_destroy,
103 .populate = debug_populate,
104 .subsys_id = debug_subsys_id,
105};
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index fb249e2bcada..59e9ef6aab40 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task)
159 */ 159 */
160static int freezer_can_attach(struct cgroup_subsys *ss, 160static int freezer_can_attach(struct cgroup_subsys *ss,
161 struct cgroup *new_cgroup, 161 struct cgroup *new_cgroup,
162 struct task_struct *task) 162 struct task_struct *task, bool threadgroup)
163{ 163{
164 struct freezer *freezer; 164 struct freezer *freezer;
165 165
@@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
177 if (freezer->state == CGROUP_FROZEN) 177 if (freezer->state == CGROUP_FROZEN)
178 return -EBUSY; 178 return -EBUSY;
179 179
180 if (threadgroup) {
181 struct task_struct *c;
182
183 rcu_read_lock();
184 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
185 if (is_task_frozen_enough(c)) {
186 rcu_read_unlock();
187 return -EBUSY;
188 }
189 }
190 rcu_read_unlock();
191 }
192
180 return 0; 193 return 0;
181} 194}
182 195
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7e75a41bd508..b5cb469d2545 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp)
1324static cpumask_var_t cpus_attach; 1324static cpumask_var_t cpus_attach;
1325 1325
1326/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ 1326/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1327static int cpuset_can_attach(struct cgroup_subsys *ss, 1327static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1328 struct cgroup *cont, struct task_struct *tsk) 1328 struct task_struct *tsk, bool threadgroup)
1329{ 1329{
1330 int ret;
1330 struct cpuset *cs = cgroup_cs(cont); 1331 struct cpuset *cs = cgroup_cs(cont);
1331 1332
1332 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1333 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
1343 if (tsk->flags & PF_THREAD_BOUND) 1344 if (tsk->flags & PF_THREAD_BOUND)
1344 return -EINVAL; 1345 return -EINVAL;
1345 1346
1346 return security_task_setscheduler(tsk, 0, NULL); 1347 ret = security_task_setscheduler(tsk, 0, NULL);
1348 if (ret)
1349 return ret;
1350 if (threadgroup) {
1351 struct task_struct *c;
1352
1353 rcu_read_lock();
1354 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1355 ret = security_task_setscheduler(c, 0, NULL);
1356 if (ret) {
1357 rcu_read_unlock();
1358 return ret;
1359 }
1360 }
1361 rcu_read_unlock();
1362 }
1363 return 0;
1364}
1365
1366static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
1367 struct cpuset *cs)
1368{
1369 int err;
1370 /*
1371 * can_attach beforehand should guarantee that this doesn't fail.
1372 * TODO: have a better way to handle failure here
1373 */
1374 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1375 WARN_ON_ONCE(err);
1376
1377 task_lock(tsk);
1378 cpuset_change_task_nodemask(tsk, to);
1379 task_unlock(tsk);
1380 cpuset_update_task_spread_flag(cs, tsk);
1381
1347} 1382}
1348 1383
1349static void cpuset_attach(struct cgroup_subsys *ss, 1384static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1350 struct cgroup *cont, struct cgroup *oldcont, 1385 struct cgroup *oldcont, struct task_struct *tsk,
1351 struct task_struct *tsk) 1386 bool threadgroup)
1352{ 1387{
1353 nodemask_t from, to; 1388 nodemask_t from, to;
1354 struct mm_struct *mm; 1389 struct mm_struct *mm;
1355 struct cpuset *cs = cgroup_cs(cont); 1390 struct cpuset *cs = cgroup_cs(cont);
1356 struct cpuset *oldcs = cgroup_cs(oldcont); 1391 struct cpuset *oldcs = cgroup_cs(oldcont);
1357 int err;
1358 1392
1359 if (cs == &top_cpuset) { 1393 if (cs == &top_cpuset) {
1360 cpumask_copy(cpus_attach, cpu_possible_mask); 1394 cpumask_copy(cpus_attach, cpu_possible_mask);
@@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss,
1363 guarantee_online_cpus(cs, cpus_attach); 1397 guarantee_online_cpus(cs, cpus_attach);
1364 guarantee_online_mems(cs, &to); 1398 guarantee_online_mems(cs, &to);
1365 } 1399 }
1366 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1367 if (err)
1368 return;
1369 1400
1370 task_lock(tsk); 1401 /* do per-task migration stuff possibly for each in the threadgroup */
1371 cpuset_change_task_nodemask(tsk, &to); 1402 cpuset_attach_task(tsk, &to, cs);
1372 task_unlock(tsk); 1403 if (threadgroup) {
1373 cpuset_update_task_spread_flag(cs, tsk); 1404 struct task_struct *c;
1405 rcu_read_lock();
1406 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1407 cpuset_attach_task(c, &to, cs);
1408 }
1409 rcu_read_unlock();
1410 }
1374 1411
1412 /* change mm; only needs to be done once even if threadgroup */
1375 from = oldcs->mems_allowed; 1413 from = oldcs->mems_allowed;
1376 to = cs->mems_allowed; 1414 to = cs->mems_allowed;
1377 mm = get_task_mm(tsk); 1415 mm = get_task_mm(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index 60d6fdcc9265..5859f598c951 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code)
976 disassociate_ctty(1); 976 disassociate_ctty(1);
977 977
978 module_put(task_thread_info(tsk)->exec_domain->module); 978 module_put(task_thread_info(tsk)->exec_domain->module);
979 if (tsk->binfmt)
980 module_put(tsk->binfmt->module);
981 979
982 proc_exit_connector(tsk); 980 proc_exit_connector(tsk);
983 981
@@ -1097,28 +1095,28 @@ struct wait_opts {
1097 int __user *wo_stat; 1095 int __user *wo_stat;
1098 struct rusage __user *wo_rusage; 1096 struct rusage __user *wo_rusage;
1099 1097
1098 wait_queue_t child_wait;
1100 int notask_error; 1099 int notask_error;
1101}; 1100};
1102 1101
1103static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) 1102static inline
1103struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1104{ 1104{
1105 struct pid *pid = NULL; 1105 if (type != PIDTYPE_PID)
1106 if (type == PIDTYPE_PID) 1106 task = task->group_leader;
1107 pid = task->pids[type].pid; 1107 return task->pids[type].pid;
1108 else if (type < PIDTYPE_MAX)
1109 pid = task->group_leader->pids[type].pid;
1110 return pid;
1111} 1108}
1112 1109
1113static int eligible_child(struct wait_opts *wo, struct task_struct *p) 1110static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
1114{ 1111{
1115 int err; 1112 return wo->wo_type == PIDTYPE_MAX ||
1116 1113 task_pid_type(p, wo->wo_type) == wo->wo_pid;
1117 if (wo->wo_type < PIDTYPE_MAX) { 1114}
1118 if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
1119 return 0;
1120 }
1121 1115
1116static int eligible_child(struct wait_opts *wo, struct task_struct *p)
1117{
1118 if (!eligible_pid(wo, p))
1119 return 0;
1122 /* Wait for all children (clone and not) if __WALL is set; 1120 /* Wait for all children (clone and not) if __WALL is set;
1123 * otherwise, wait for clone children *only* if __WCLONE is 1121 * otherwise, wait for clone children *only* if __WCLONE is
1124 * set; otherwise, wait for non-clone children *only*. (Note: 1122 * set; otherwise, wait for non-clone children *only*. (Note:
@@ -1128,10 +1126,6 @@ static int eligible_child(struct wait_opts *wo, struct task_struct *p)
1128 && !(wo->wo_flags & __WALL)) 1126 && !(wo->wo_flags & __WALL))
1129 return 0; 1127 return 0;
1130 1128
1131 err = security_task_wait(p);
1132 if (err)
1133 return err;
1134
1135 return 1; 1129 return 1;
1136} 1130}
1137 1131
@@ -1144,18 +1138,20 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
1144 1138
1145 put_task_struct(p); 1139 put_task_struct(p);
1146 infop = wo->wo_info; 1140 infop = wo->wo_info;
1147 if (!retval) 1141 if (infop) {
1148 retval = put_user(SIGCHLD, &infop->si_signo); 1142 if (!retval)
1149 if (!retval) 1143 retval = put_user(SIGCHLD, &infop->si_signo);
1150 retval = put_user(0, &infop->si_errno); 1144 if (!retval)
1151 if (!retval) 1145 retval = put_user(0, &infop->si_errno);
1152 retval = put_user((short)why, &infop->si_code); 1146 if (!retval)
1153 if (!retval) 1147 retval = put_user((short)why, &infop->si_code);
1154 retval = put_user(pid, &infop->si_pid); 1148 if (!retval)
1155 if (!retval) 1149 retval = put_user(pid, &infop->si_pid);
1156 retval = put_user(uid, &infop->si_uid); 1150 if (!retval)
1157 if (!retval) 1151 retval = put_user(uid, &infop->si_uid);
1158 retval = put_user(status, &infop->si_status); 1152 if (!retval)
1153 retval = put_user(status, &infop->si_status);
1154 }
1159 if (!retval) 1155 if (!retval)
1160 retval = pid; 1156 retval = pid;
1161 return retval; 1157 return retval;
@@ -1485,13 +1481,14 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1485 * then ->notask_error is 0 if @p is an eligible child, 1481 * then ->notask_error is 0 if @p is an eligible child,
1486 * or another error from security_task_wait(), or still -ECHILD. 1482 * or another error from security_task_wait(), or still -ECHILD.
1487 */ 1483 */
1488static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, 1484static int wait_consider_task(struct wait_opts *wo, int ptrace,
1489 int ptrace, struct task_struct *p) 1485 struct task_struct *p)
1490{ 1486{
1491 int ret = eligible_child(wo, p); 1487 int ret = eligible_child(wo, p);
1492 if (!ret) 1488 if (!ret)
1493 return ret; 1489 return ret;
1494 1490
1491 ret = security_task_wait(p);
1495 if (unlikely(ret < 0)) { 1492 if (unlikely(ret < 0)) {
1496 /* 1493 /*
1497 * If we have not yet seen any eligible child, 1494 * If we have not yet seen any eligible child,
@@ -1553,7 +1550,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1553 * Do not consider detached threads. 1550 * Do not consider detached threads.
1554 */ 1551 */
1555 if (!task_detached(p)) { 1552 if (!task_detached(p)) {
1556 int ret = wait_consider_task(wo, tsk, 0, p); 1553 int ret = wait_consider_task(wo, 0, p);
1557 if (ret) 1554 if (ret)
1558 return ret; 1555 return ret;
1559 } 1556 }
@@ -1567,7 +1564,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1567 struct task_struct *p; 1564 struct task_struct *p;
1568 1565
1569 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1566 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1570 int ret = wait_consider_task(wo, tsk, 1, p); 1567 int ret = wait_consider_task(wo, 1, p);
1571 if (ret) 1568 if (ret)
1572 return ret; 1569 return ret;
1573 } 1570 }
@@ -1575,15 +1572,38 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1575 return 0; 1572 return 0;
1576} 1573}
1577 1574
1575static int child_wait_callback(wait_queue_t *wait, unsigned mode,
1576 int sync, void *key)
1577{
1578 struct wait_opts *wo = container_of(wait, struct wait_opts,
1579 child_wait);
1580 struct task_struct *p = key;
1581
1582 if (!eligible_pid(wo, p))
1583 return 0;
1584
1585 if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
1586 return 0;
1587
1588 return default_wake_function(wait, mode, sync, key);
1589}
1590
1591void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
1592{
1593 __wake_up_sync_key(&parent->signal->wait_chldexit,
1594 TASK_INTERRUPTIBLE, 1, p);
1595}
1596
1578static long do_wait(struct wait_opts *wo) 1597static long do_wait(struct wait_opts *wo)
1579{ 1598{
1580 DECLARE_WAITQUEUE(wait, current);
1581 struct task_struct *tsk; 1599 struct task_struct *tsk;
1582 int retval; 1600 int retval;
1583 1601
1584 trace_sched_process_wait(wo->wo_pid); 1602 trace_sched_process_wait(wo->wo_pid);
1585 1603
1586 add_wait_queue(&current->signal->wait_chldexit,&wait); 1604 init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
1605 wo->child_wait.private = current;
1606 add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
1587repeat: 1607repeat:
1588 /* 1608 /*
1589 * If there is nothing that can match our critiera just get out. 1609 * If there is nothing that can match our critiera just get out.
@@ -1624,32 +1644,7 @@ notask:
1624 } 1644 }
1625end: 1645end:
1626 __set_current_state(TASK_RUNNING); 1646 __set_current_state(TASK_RUNNING);
1627 remove_wait_queue(&current->signal->wait_chldexit,&wait); 1647 remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
1628 if (wo->wo_info) {
1629 struct siginfo __user *infop = wo->wo_info;
1630
1631 if (retval > 0)
1632 retval = 0;
1633 else {
1634 /*
1635 * For a WNOHANG return, clear out all the fields
1636 * we would set so the user can easily tell the
1637 * difference.
1638 */
1639 if (!retval)
1640 retval = put_user(0, &infop->si_signo);
1641 if (!retval)
1642 retval = put_user(0, &infop->si_errno);
1643 if (!retval)
1644 retval = put_user(0, &infop->si_code);
1645 if (!retval)
1646 retval = put_user(0, &infop->si_pid);
1647 if (!retval)
1648 retval = put_user(0, &infop->si_uid);
1649 if (!retval)
1650 retval = put_user(0, &infop->si_status);
1651 }
1652 }
1653 return retval; 1648 return retval;
1654} 1649}
1655 1650
@@ -1694,6 +1689,29 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1694 wo.wo_stat = NULL; 1689 wo.wo_stat = NULL;
1695 wo.wo_rusage = ru; 1690 wo.wo_rusage = ru;
1696 ret = do_wait(&wo); 1691 ret = do_wait(&wo);
1692
1693 if (ret > 0) {
1694 ret = 0;
1695 } else if (infop) {
1696 /*
1697 * For a WNOHANG return, clear out all the fields
1698 * we would set so the user can easily tell the
1699 * difference.
1700 */
1701 if (!ret)
1702 ret = put_user(0, &infop->si_signo);
1703 if (!ret)
1704 ret = put_user(0, &infop->si_errno);
1705 if (!ret)
1706 ret = put_user(0, &infop->si_code);
1707 if (!ret)
1708 ret = put_user(0, &infop->si_pid);
1709 if (!ret)
1710 ret = put_user(0, &infop->si_uid);
1711 if (!ret)
1712 ret = put_user(0, &infop->si_status);
1713 }
1714
1697 put_pid(pid); 1715 put_pid(pid);
1698 1716
1699 /* avoid REGPARM breakage on x86: */ 1717 /* avoid REGPARM breakage on x86: */
diff --git a/kernel/fork.c b/kernel/fork.c
index 51ad0b0b7266..266c6af6ef1b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup);
434 434
435#include <linux/init_task.h> 435#include <linux/init_task.h>
436 436
437static void mm_init_aio(struct mm_struct *mm)
438{
439#ifdef CONFIG_AIO
440 spin_lock_init(&mm->ioctx_lock);
441 INIT_HLIST_HEAD(&mm->ioctx_list);
442#endif
443}
444
437static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) 445static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
438{ 446{
439 atomic_set(&mm->mm_users, 1); 447 atomic_set(&mm->mm_users, 1);
@@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
447 set_mm_counter(mm, file_rss, 0); 455 set_mm_counter(mm, file_rss, 0);
448 set_mm_counter(mm, anon_rss, 0); 456 set_mm_counter(mm, anon_rss, 0);
449 spin_lock_init(&mm->page_table_lock); 457 spin_lock_init(&mm->page_table_lock);
450 spin_lock_init(&mm->ioctx_lock);
451 INIT_HLIST_HEAD(&mm->ioctx_list);
452 mm->free_area_cache = TASK_UNMAPPED_BASE; 458 mm->free_area_cache = TASK_UNMAPPED_BASE;
453 mm->cached_hole_size = ~0UL; 459 mm->cached_hole_size = ~0UL;
460 mm_init_aio(mm);
454 mm_init_owner(mm, p); 461 mm_init_owner(mm, p);
455 462
456 if (likely(!mm_alloc_pgd(mm))) { 463 if (likely(!mm_alloc_pgd(mm))) {
@@ -511,6 +518,8 @@ void mmput(struct mm_struct *mm)
511 spin_unlock(&mmlist_lock); 518 spin_unlock(&mmlist_lock);
512 } 519 }
513 put_swap_token(mm); 520 put_swap_token(mm);
521 if (mm->binfmt)
522 module_put(mm->binfmt->module);
514 mmdrop(mm); 523 mmdrop(mm);
515 } 524 }
516} 525}
@@ -636,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
636 mm->hiwater_rss = get_mm_rss(mm); 645 mm->hiwater_rss = get_mm_rss(mm);
637 mm->hiwater_vm = mm->total_vm; 646 mm->hiwater_vm = mm->total_vm;
638 647
648 if (mm->binfmt && !try_module_get(mm->binfmt->module))
649 goto free_pt;
650
639 return mm; 651 return mm;
640 652
641free_pt: 653free_pt:
654 /* don't put binfmt in mmput, we haven't got module yet */
655 mm->binfmt = NULL;
642 mmput(mm); 656 mmput(mm);
643 657
644fail_nomem: 658fail_nomem:
@@ -979,6 +993,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
979 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 993 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
980 return ERR_PTR(-EINVAL); 994 return ERR_PTR(-EINVAL);
981 995
996 /*
997 * Siblings of global init remain as zombies on exit since they are
998 * not reaped by their parent (swapper). To solve this and to avoid
999 * multi-rooted process trees, prevent global and container-inits
1000 * from creating siblings.
1001 */
1002 if ((clone_flags & CLONE_PARENT) &&
1003 current->signal->flags & SIGNAL_UNKILLABLE)
1004 return ERR_PTR(-EINVAL);
1005
982 retval = security_task_create(clone_flags); 1006 retval = security_task_create(clone_flags);
983 if (retval) 1007 if (retval)
984 goto fork_out; 1008 goto fork_out;
@@ -1020,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1020 if (!try_module_get(task_thread_info(p)->exec_domain->module)) 1044 if (!try_module_get(task_thread_info(p)->exec_domain->module))
1021 goto bad_fork_cleanup_count; 1045 goto bad_fork_cleanup_count;
1022 1046
1023 if (p->binfmt && !try_module_get(p->binfmt->module))
1024 goto bad_fork_cleanup_put_domain;
1025
1026 p->did_exec = 0; 1047 p->did_exec = 0;
1027 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1048 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1028 copy_flags(clone_flags, p); 1049 copy_flags(clone_flags, p);
@@ -1310,9 +1331,6 @@ bad_fork_cleanup_cgroup:
1310#endif 1331#endif
1311 cgroup_exit(p, cgroup_callbacks_done); 1332 cgroup_exit(p, cgroup_callbacks_done);
1312 delayacct_tsk_free(p); 1333 delayacct_tsk_free(p);
1313 if (p->binfmt)
1314 module_put(p->binfmt->module);
1315bad_fork_cleanup_put_domain:
1316 module_put(task_thread_info(p)->exec_domain->module); 1334 module_put(task_thread_info(p)->exec_domain->module);
1317bad_fork_cleanup_count: 1335bad_fork_cleanup_count:
1318 atomic_dec(&p->cred->user->processes); 1336 atomic_dec(&p->cred->user->processes);
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 654efd09f6a9..70a298d6da71 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -34,7 +34,7 @@ config GCOV_KERNEL
34config GCOV_PROFILE_ALL 34config GCOV_PROFILE_ALL
35 bool "Profile entire Kernel" 35 bool "Profile entire Kernel"
36 depends on GCOV_KERNEL 36 depends on GCOV_KERNEL
37 depends on S390 || X86 || (PPC && EXPERIMENTAL) 37 depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
38 default n 38 default n
39 ---help--- 39 ---help---
40 This options activates profiling for the entire kernel. 40 This options activates profiling for the entire kernel.
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 022a4927b785..d4e841747400 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout)
171 * Process updating of timeout sysctl 171 * Process updating of timeout sysctl
172 */ 172 */
173int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, 173int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
174 struct file *filp, void __user *buffer, 174 void __user *buffer,
175 size_t *lenp, loff_t *ppos) 175 size_t *lenp, loff_t *ppos)
176{ 176{
177 int ret; 177 int ret;
178 178
179 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); 179 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
180 180
181 if (ret || !write) 181 if (ret || !write)
182 goto out; 182 goto out;
diff --git a/kernel/module.c b/kernel/module.c
index e6bc4b28aa62..5a29397ca4b6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1797,6 +1797,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
1797 } 1797 }
1798} 1798}
1799 1799
1800static void free_modinfo(struct module *mod)
1801{
1802 struct module_attribute *attr;
1803 int i;
1804
1805 for (i = 0; (attr = modinfo_attrs[i]); i++) {
1806 if (attr->free)
1807 attr->free(mod);
1808 }
1809}
1810
1800#ifdef CONFIG_KALLSYMS 1811#ifdef CONFIG_KALLSYMS
1801 1812
1802/* lookup symbol in given range of kernel_symbols */ 1813/* lookup symbol in given range of kernel_symbols */
@@ -1862,13 +1873,93 @@ static char elf_type(const Elf_Sym *sym,
1862 return '?'; 1873 return '?';
1863} 1874}
1864 1875
1876static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
1877 unsigned int shnum)
1878{
1879 const Elf_Shdr *sec;
1880
1881 if (src->st_shndx == SHN_UNDEF
1882 || src->st_shndx >= shnum
1883 || !src->st_name)
1884 return false;
1885
1886 sec = sechdrs + src->st_shndx;
1887 if (!(sec->sh_flags & SHF_ALLOC)
1888#ifndef CONFIG_KALLSYMS_ALL
1889 || !(sec->sh_flags & SHF_EXECINSTR)
1890#endif
1891 || (sec->sh_entsize & INIT_OFFSET_MASK))
1892 return false;
1893
1894 return true;
1895}
1896
1897static unsigned long layout_symtab(struct module *mod,
1898 Elf_Shdr *sechdrs,
1899 unsigned int symindex,
1900 unsigned int strindex,
1901 const Elf_Ehdr *hdr,
1902 const char *secstrings,
1903 unsigned long *pstroffs,
1904 unsigned long *strmap)
1905{
1906 unsigned long symoffs;
1907 Elf_Shdr *symsect = sechdrs + symindex;
1908 Elf_Shdr *strsect = sechdrs + strindex;
1909 const Elf_Sym *src;
1910 const char *strtab;
1911 unsigned int i, nsrc, ndst;
1912
1913 /* Put symbol section at end of init part of module. */
1914 symsect->sh_flags |= SHF_ALLOC;
1915 symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
1916 symindex) | INIT_OFFSET_MASK;
1917 DEBUGP("\t%s\n", secstrings + symsect->sh_name);
1918
1919 src = (void *)hdr + symsect->sh_offset;
1920 nsrc = symsect->sh_size / sizeof(*src);
1921 strtab = (void *)hdr + strsect->sh_offset;
1922 for (ndst = i = 1; i < nsrc; ++i, ++src)
1923 if (is_core_symbol(src, sechdrs, hdr->e_shnum)) {
1924 unsigned int j = src->st_name;
1925
1926 while(!__test_and_set_bit(j, strmap) && strtab[j])
1927 ++j;
1928 ++ndst;
1929 }
1930
1931 /* Append room for core symbols at end of core part. */
1932 symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
1933 mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
1934
1935 /* Put string table section at end of init part of module. */
1936 strsect->sh_flags |= SHF_ALLOC;
1937 strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
1938 strindex) | INIT_OFFSET_MASK;
1939 DEBUGP("\t%s\n", secstrings + strsect->sh_name);
1940
1941 /* Append room for core symbols' strings at end of core part. */
1942 *pstroffs = mod->core_size;
1943 __set_bit(0, strmap);
1944 mod->core_size += bitmap_weight(strmap, strsect->sh_size);
1945
1946 return symoffs;
1947}
1948
1865static void add_kallsyms(struct module *mod, 1949static void add_kallsyms(struct module *mod,
1866 Elf_Shdr *sechdrs, 1950 Elf_Shdr *sechdrs,
1951 unsigned int shnum,
1867 unsigned int symindex, 1952 unsigned int symindex,
1868 unsigned int strindex, 1953 unsigned int strindex,
1869 const char *secstrings) 1954 unsigned long symoffs,
1955 unsigned long stroffs,
1956 const char *secstrings,
1957 unsigned long *strmap)
1870{ 1958{
1871 unsigned int i; 1959 unsigned int i, ndst;
1960 const Elf_Sym *src;
1961 Elf_Sym *dst;
1962 char *s;
1872 1963
1873 mod->symtab = (void *)sechdrs[symindex].sh_addr; 1964 mod->symtab = (void *)sechdrs[symindex].sh_addr;
1874 mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); 1965 mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
@@ -1878,13 +1969,44 @@ static void add_kallsyms(struct module *mod,
1878 for (i = 0; i < mod->num_symtab; i++) 1969 for (i = 0; i < mod->num_symtab; i++)
1879 mod->symtab[i].st_info 1970 mod->symtab[i].st_info
1880 = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); 1971 = elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
1972
1973 mod->core_symtab = dst = mod->module_core + symoffs;
1974 src = mod->symtab;
1975 *dst = *src;
1976 for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
1977 if (!is_core_symbol(src, sechdrs, shnum))
1978 continue;
1979 dst[ndst] = *src;
1980 dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name);
1981 ++ndst;
1982 }
1983 mod->core_num_syms = ndst;
1984
1985 mod->core_strtab = s = mod->module_core + stroffs;
1986 for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i)
1987 if (test_bit(i, strmap))
1988 *++s = mod->strtab[i];
1881} 1989}
1882#else 1990#else
1991static inline unsigned long layout_symtab(struct module *mod,
1992 Elf_Shdr *sechdrs,
1993 unsigned int symindex,
1994 unsigned int strindex,
1995 const Elf_Hdr *hdr,
1996 const char *secstrings,
1997 unsigned long *pstroffs,
1998 unsigned long *strmap)
1999{
2000}
1883static inline void add_kallsyms(struct module *mod, 2001static inline void add_kallsyms(struct module *mod,
1884 Elf_Shdr *sechdrs, 2002 Elf_Shdr *sechdrs,
2003 unsigned int shnum,
1885 unsigned int symindex, 2004 unsigned int symindex,
1886 unsigned int strindex, 2005 unsigned int strindex,
1887 const char *secstrings) 2006 unsigned long symoffs,
2007 unsigned long stroffs,
2008 const char *secstrings,
2009 const unsigned long *strmap)
1888{ 2010{
1889} 2011}
1890#endif /* CONFIG_KALLSYMS */ 2012#endif /* CONFIG_KALLSYMS */
@@ -1959,6 +2081,9 @@ static noinline struct module *load_module(void __user *umod,
1959 struct module *mod; 2081 struct module *mod;
1960 long err = 0; 2082 long err = 0;
1961 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 2083 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
2084#ifdef CONFIG_KALLSYMS
2085 unsigned long symoffs, stroffs, *strmap;
2086#endif
1962 mm_segment_t old_fs; 2087 mm_segment_t old_fs;
1963 2088
1964 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", 2089 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2040,11 +2165,6 @@ static noinline struct module *load_module(void __user *umod,
2040 /* Don't keep modinfo and version sections. */ 2165 /* Don't keep modinfo and version sections. */
2041 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2166 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2042 sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2167 sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2043#ifdef CONFIG_KALLSYMS
2044 /* Keep symbol and string tables for decoding later. */
2045 sechdrs[symindex].sh_flags |= SHF_ALLOC;
2046 sechdrs[strindex].sh_flags |= SHF_ALLOC;
2047#endif
2048 2168
2049 /* Check module struct version now, before we try to use module. */ 2169 /* Check module struct version now, before we try to use module. */
2050 if (!check_modstruct_version(sechdrs, versindex, mod)) { 2170 if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2080,6 +2200,13 @@ static noinline struct module *load_module(void __user *umod,
2080 goto free_hdr; 2200 goto free_hdr;
2081 } 2201 }
2082 2202
2203 strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
2204 * sizeof(long), GFP_KERNEL);
2205 if (!strmap) {
2206 err = -ENOMEM;
2207 goto free_mod;
2208 }
2209
2083 if (find_module(mod->name)) { 2210 if (find_module(mod->name)) {
2084 err = -EEXIST; 2211 err = -EEXIST;
2085 goto free_mod; 2212 goto free_mod;
@@ -2109,6 +2236,8 @@ static noinline struct module *load_module(void __user *umod,
2109 this is done generically; there doesn't appear to be any 2236 this is done generically; there doesn't appear to be any
2110 special cases for the architectures. */ 2237 special cases for the architectures. */
2111 layout_sections(mod, hdr, sechdrs, secstrings); 2238 layout_sections(mod, hdr, sechdrs, secstrings);
2239 symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
2240 secstrings, &stroffs, strmap);
2112 2241
2113 /* Do the allocs. */ 2242 /* Do the allocs. */
2114 ptr = module_alloc_update_bounds(mod->core_size); 2243 ptr = module_alloc_update_bounds(mod->core_size);
@@ -2313,7 +2442,10 @@ static noinline struct module *load_module(void __user *umod,
2313 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, 2442 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
2314 sechdrs[pcpuindex].sh_size); 2443 sechdrs[pcpuindex].sh_size);
2315 2444
2316 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); 2445 add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
2446 symoffs, stroffs, secstrings, strmap);
2447 kfree(strmap);
2448 strmap = NULL;
2317 2449
2318 if (!mod->taints) { 2450 if (!mod->taints) {
2319 struct _ddebug *debug; 2451 struct _ddebug *debug;
@@ -2385,13 +2517,14 @@ static noinline struct module *load_module(void __user *umod,
2385 synchronize_sched(); 2517 synchronize_sched();
2386 module_arch_cleanup(mod); 2518 module_arch_cleanup(mod);
2387 cleanup: 2519 cleanup:
2520 free_modinfo(mod);
2388 kobject_del(&mod->mkobj.kobj); 2521 kobject_del(&mod->mkobj.kobj);
2389 kobject_put(&mod->mkobj.kobj); 2522 kobject_put(&mod->mkobj.kobj);
2390 free_unload: 2523 free_unload:
2391 module_unload_free(mod); 2524 module_unload_free(mod);
2392#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) 2525#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2393 free_init:
2394 percpu_modfree(mod->refptr); 2526 percpu_modfree(mod->refptr);
2527 free_init:
2395#endif 2528#endif
2396 module_free(mod, mod->module_init); 2529 module_free(mod, mod->module_init);
2397 free_core: 2530 free_core:
@@ -2402,6 +2535,7 @@ static noinline struct module *load_module(void __user *umod,
2402 percpu_modfree(percpu); 2535 percpu_modfree(percpu);
2403 free_mod: 2536 free_mod:
2404 kfree(args); 2537 kfree(args);
2538 kfree(strmap);
2405 free_hdr: 2539 free_hdr:
2406 vfree(hdr); 2540 vfree(hdr);
2407 return ERR_PTR(err); 2541 return ERR_PTR(err);
@@ -2491,6 +2625,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2491 /* Drop initial reference. */ 2625 /* Drop initial reference. */
2492 module_put(mod); 2626 module_put(mod);
2493 trim_init_extable(mod); 2627 trim_init_extable(mod);
2628#ifdef CONFIG_KALLSYMS
2629 mod->num_symtab = mod->core_num_syms;
2630 mod->symtab = mod->core_symtab;
2631 mod->strtab = mod->core_strtab;
2632#endif
2494 module_free(mod, mod->module_init); 2633 module_free(mod, mod->module_init);
2495 mod->module_init = NULL; 2634 mod->module_init = NULL;
2496 mod->init_size = 0; 2635 mod->init_size = 0;
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 5aa854f9e5ae..2a5dfec8efe0 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
42 * (hence either you are in the same cgroup as task, or in an 42 * (hence either you are in the same cgroup as task, or in an
43 * ancestor cgroup thereof) 43 * ancestor cgroup thereof)
44 */ 44 */
45static int ns_can_attach(struct cgroup_subsys *ss, 45static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
46 struct cgroup *new_cgroup, struct task_struct *task) 46 struct task_struct *task, bool threadgroup)
47{ 47{
48 if (current != task) { 48 if (current != task) {
49 if (!capable(CAP_SYS_ADMIN)) 49 if (!capable(CAP_SYS_ADMIN))
@@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss,
56 if (!cgroup_is_descendant(new_cgroup, task)) 56 if (!cgroup_is_descendant(new_cgroup, task))
57 return -EPERM; 57 return -EPERM;
58 58
59 if (threadgroup) {
60 struct task_struct *c;
61 rcu_read_lock();
62 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
63 if (!cgroup_is_descendant(new_cgroup, c)) {
64 rcu_read_unlock();
65 return -EPERM;
66 }
67 }
68 rcu_read_unlock();
69 }
70
59 return 0; 71 return 0;
60} 72}
61 73
diff --git a/kernel/params.c b/kernel/params.c
index 7f6912ced2ba..9da58eabdcb2 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -23,6 +23,7 @@
23#include <linux/device.h> 23#include <linux/device.h>
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/ctype.h>
26 27
27#if 0 28#if 0
28#define DEBUGP printk 29#define DEBUGP printk
@@ -87,7 +88,7 @@ static char *next_arg(char *args, char **param, char **val)
87 } 88 }
88 89
89 for (i = 0; args[i]; i++) { 90 for (i = 0; args[i]; i++) {
90 if (args[i] == ' ' && !in_quote) 91 if (isspace(args[i]) && !in_quote)
91 break; 92 break;
92 if (equals == 0) { 93 if (equals == 0) {
93 if (args[i] == '=') 94 if (args[i] == '=')
@@ -121,7 +122,7 @@ static char *next_arg(char *args, char **param, char **val)
121 next = args + i; 122 next = args + i;
122 123
123 /* Chew up trailing spaces. */ 124 /* Chew up trailing spaces. */
124 while (*next == ' ') 125 while (isspace(*next))
125 next++; 126 next++;
126 return next; 127 return next;
127} 128}
@@ -138,7 +139,7 @@ int parse_args(const char *name,
138 DEBUGP("Parsing ARGS: %s\n", args); 139 DEBUGP("Parsing ARGS: %s\n", args);
139 140
140 /* Chew leading spaces */ 141 /* Chew leading spaces */
141 while (*args == ' ') 142 while (isspace(*args))
142 args++; 143 args++;
143 144
144 while (*args) { 145 while (*args) {
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 821722ae58a7..86b3796b0436 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -118,7 +118,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old
118{ 118{
119 if (!(flags & CLONE_NEWPID)) 119 if (!(flags & CLONE_NEWPID))
120 return get_pid_ns(old_ns); 120 return get_pid_ns(old_ns);
121 if (flags & CLONE_THREAD) 121 if (flags & (CLONE_THREAD|CLONE_PARENT))
122 return ERR_PTR(-EINVAL); 122 return ERR_PTR(-EINVAL);
123 return create_pid_namespace(old_ns); 123 return create_pid_namespace(old_ns);
124} 124}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 307c285af59e..23bd09cd042e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh)
266 * or self-reaping. Do notification now if it would have happened earlier. 266 * or self-reaping. Do notification now if it would have happened earlier.
267 * If it should reap itself, return true. 267 * If it should reap itself, return true.
268 * 268 *
269 * If it's our own child, there is no notification to do. 269 * If it's our own child, there is no notification to do. But if our normal
270 * But if our normal children self-reap, then this child 270 * children self-reap, then this child was prevented by ptrace and we must
271 * was prevented by ptrace and we must reap it now. 271 * reap it now, in that case we must also wake up sub-threads sleeping in
272 * do_wait().
272 */ 273 */
273static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) 274static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
274{ 275{
@@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
278 if (!task_detached(p) && thread_group_empty(p)) { 279 if (!task_detached(p) && thread_group_empty(p)) {
279 if (!same_thread_group(p->real_parent, tracer)) 280 if (!same_thread_group(p->real_parent, tracer))
280 do_notify_parent(p, p->exit_signal); 281 do_notify_parent(p, p->exit_signal);
281 else if (ignoring_children(tracer->sighand)) 282 else if (ignoring_children(tracer->sighand)) {
283 __wake_up_parent(p, tracer);
282 p->exit_signal = -1; 284 p->exit_signal = -1;
285 }
283 } 286 }
284 if (task_detached(p)) { 287 if (task_detached(p)) {
285 /* Mark it as in the process of being reaped. */ 288 /* Mark it as in the process of being reaped. */
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index e1338f074314..88faec23e833 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
19{ 19{
20 spin_lock_init(&counter->lock); 20 spin_lock_init(&counter->lock);
21 counter->limit = RESOURCE_MAX; 21 counter->limit = RESOURCE_MAX;
22 counter->soft_limit = RESOURCE_MAX;
22 counter->parent = parent; 23 counter->parent = parent;
23} 24}
24 25
@@ -36,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
36} 37}
37 38
38int res_counter_charge(struct res_counter *counter, unsigned long val, 39int res_counter_charge(struct res_counter *counter, unsigned long val,
39 struct res_counter **limit_fail_at) 40 struct res_counter **limit_fail_at,
41 struct res_counter **soft_limit_fail_at)
40{ 42{
41 int ret; 43 int ret;
42 unsigned long flags; 44 unsigned long flags;
43 struct res_counter *c, *u; 45 struct res_counter *c, *u;
44 46
45 *limit_fail_at = NULL; 47 *limit_fail_at = NULL;
48 if (soft_limit_fail_at)
49 *soft_limit_fail_at = NULL;
46 local_irq_save(flags); 50 local_irq_save(flags);
47 for (c = counter; c != NULL; c = c->parent) { 51 for (c = counter; c != NULL; c = c->parent) {
48 spin_lock(&c->lock); 52 spin_lock(&c->lock);
49 ret = res_counter_charge_locked(c, val); 53 ret = res_counter_charge_locked(c, val);
54 /*
55 * With soft limits, we return the highest ancestor
56 * that exceeds its soft limit
57 */
58 if (soft_limit_fail_at &&
59 !res_counter_soft_limit_check_locked(c))
60 *soft_limit_fail_at = c;
50 spin_unlock(&c->lock); 61 spin_unlock(&c->lock);
51 if (ret < 0) { 62 if (ret < 0) {
52 *limit_fail_at = c; 63 *limit_fail_at = c;
@@ -74,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
74 counter->usage -= val; 85 counter->usage -= val;
75} 86}
76 87
77void res_counter_uncharge(struct res_counter *counter, unsigned long val) 88void res_counter_uncharge(struct res_counter *counter, unsigned long val,
89 bool *was_soft_limit_excess)
78{ 90{
79 unsigned long flags; 91 unsigned long flags;
80 struct res_counter *c; 92 struct res_counter *c;
@@ -82,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
82 local_irq_save(flags); 94 local_irq_save(flags);
83 for (c = counter; c != NULL; c = c->parent) { 95 for (c = counter; c != NULL; c = c->parent) {
84 spin_lock(&c->lock); 96 spin_lock(&c->lock);
97 if (was_soft_limit_excess)
98 *was_soft_limit_excess =
99 !res_counter_soft_limit_check_locked(c);
85 res_counter_uncharge_locked(c, val); 100 res_counter_uncharge_locked(c, val);
86 spin_unlock(&c->lock); 101 spin_unlock(&c->lock);
87 } 102 }
@@ -101,6 +116,8 @@ res_counter_member(struct res_counter *counter, int member)
101 return &counter->limit; 116 return &counter->limit;
102 case RES_FAILCNT: 117 case RES_FAILCNT:
103 return &counter->failcnt; 118 return &counter->failcnt;
119 case RES_SOFT_LIMIT:
120 return &counter->soft_limit;
104 }; 121 };
105 122
106 BUG(); 123 BUG();
diff --git a/kernel/sched.c b/kernel/sched.c
index 2f76e06bea58..ee61f454a98b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void)
10312#endif /* CONFIG_RT_GROUP_SCHED */ 10312#endif /* CONFIG_RT_GROUP_SCHED */
10313 10313
10314int sched_rt_handler(struct ctl_table *table, int write, 10314int sched_rt_handler(struct ctl_table *table, int write,
10315 struct file *filp, void __user *buffer, size_t *lenp, 10315 void __user *buffer, size_t *lenp,
10316 loff_t *ppos) 10316 loff_t *ppos)
10317{ 10317{
10318 int ret; 10318 int ret;
@@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
10323 old_period = sysctl_sched_rt_period; 10323 old_period = sysctl_sched_rt_period;
10324 old_runtime = sysctl_sched_rt_runtime; 10324 old_runtime = sysctl_sched_rt_runtime;
10325 10325
10326 ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); 10326 ret = proc_dointvec(table, write, buffer, lenp, ppos);
10327 10327
10328 if (!ret && write) { 10328 if (!ret && write) {
10329 ret = sched_rt_global_constraints(); 10329 ret = sched_rt_global_constraints();
@@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
10377} 10377}
10378 10378
10379static int 10379static int
10380cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 10380cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
10381 struct task_struct *tsk)
10382{ 10381{
10383#ifdef CONFIG_RT_GROUP_SCHED 10382#ifdef CONFIG_RT_GROUP_SCHED
10384 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) 10383 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10388 if (tsk->sched_class != &fair_sched_class) 10387 if (tsk->sched_class != &fair_sched_class)
10389 return -EINVAL; 10388 return -EINVAL;
10390#endif 10389#endif
10390 return 0;
10391}
10391 10392
10393static int
10394cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10395 struct task_struct *tsk, bool threadgroup)
10396{
10397 int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
10398 if (retval)
10399 return retval;
10400 if (threadgroup) {
10401 struct task_struct *c;
10402 rcu_read_lock();
10403 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
10404 retval = cpu_cgroup_can_attach_task(cgrp, c);
10405 if (retval) {
10406 rcu_read_unlock();
10407 return retval;
10408 }
10409 }
10410 rcu_read_unlock();
10411 }
10392 return 0; 10412 return 0;
10393} 10413}
10394 10414
10395static void 10415static void
10396cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 10416cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10397 struct cgroup *old_cont, struct task_struct *tsk) 10417 struct cgroup *old_cont, struct task_struct *tsk,
10418 bool threadgroup)
10398{ 10419{
10399 sched_move_task(tsk); 10420 sched_move_task(tsk);
10421 if (threadgroup) {
10422 struct task_struct *c;
10423 rcu_read_lock();
10424 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
10425 sched_move_task(c);
10426 }
10427 rcu_read_unlock();
10428 }
10400} 10429}
10401 10430
10402#ifdef CONFIG_FAIR_GROUP_SCHED 10431#ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ecc637a0d591..4e777b47eeda 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
384 384
385#ifdef CONFIG_SCHED_DEBUG 385#ifdef CONFIG_SCHED_DEBUG
386int sched_nr_latency_handler(struct ctl_table *table, int write, 386int sched_nr_latency_handler(struct ctl_table *table, int write,
387 struct file *filp, void __user *buffer, size_t *lenp, 387 void __user *buffer, size_t *lenp,
388 loff_t *ppos) 388 loff_t *ppos)
389{ 389{
390 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 390 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
391 391
392 if (ret || !write) 392 if (ret || !write)
393 return ret; 393 return ret;
diff --git a/kernel/signal.c b/kernel/signal.c
index 64c5deeaca5d..6705320784fd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
705 705
706 if (why) { 706 if (why) {
707 /* 707 /*
708 * The first thread which returns from finish_stop() 708 * The first thread which returns from do_signal_stop()
709 * will take ->siglock, notice SIGNAL_CLD_MASK, and 709 * will take ->siglock, notice SIGNAL_CLD_MASK, and
710 * notify its parent. See get_signal_to_deliver(). 710 * notify its parent. See get_signal_to_deliver().
711 */ 711 */
@@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
971 return send_signal(sig, info, t, 0); 971 return send_signal(sig, info, t, 0);
972} 972}
973 973
974int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
975 bool group)
976{
977 unsigned long flags;
978 int ret = -ESRCH;
979
980 if (lock_task_sighand(p, &flags)) {
981 ret = send_signal(sig, info, p, group);
982 unlock_task_sighand(p, &flags);
983 }
984
985 return ret;
986}
987
974/* 988/*
975 * Force a signal that the process can't ignore: if necessary 989 * Force a signal that the process can't ignore: if necessary
976 * we unblock the signal and change any SIG_IGN to SIG_DFL. 990 * we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -1036,12 +1050,6 @@ void zap_other_threads(struct task_struct *p)
1036 } 1050 }
1037} 1051}
1038 1052
1039int __fatal_signal_pending(struct task_struct *tsk)
1040{
1041 return sigismember(&tsk->pending.signal, SIGKILL);
1042}
1043EXPORT_SYMBOL(__fatal_signal_pending);
1044
1045struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) 1053struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
1046{ 1054{
1047 struct sighand_struct *sighand; 1055 struct sighand_struct *sighand;
@@ -1068,18 +1076,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
1068 */ 1076 */
1069int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) 1077int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1070{ 1078{
1071 unsigned long flags; 1079 int ret = check_kill_permission(sig, info, p);
1072 int ret;
1073 1080
1074 ret = check_kill_permission(sig, info, p); 1081 if (!ret && sig)
1075 1082 ret = do_send_sig_info(sig, info, p, true);
1076 if (!ret && sig) {
1077 ret = -ESRCH;
1078 if (lock_task_sighand(p, &flags)) {
1079 ret = __group_send_sig_info(sig, info, p);
1080 unlock_task_sighand(p, &flags);
1081 }
1082 }
1083 1083
1084 return ret; 1084 return ret;
1085} 1085}
@@ -1224,15 +1224,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1224 * These are for backward compatibility with the rest of the kernel source. 1224 * These are for backward compatibility with the rest of the kernel source.
1225 */ 1225 */
1226 1226
1227/*
1228 * The caller must ensure the task can't exit.
1229 */
1230int 1227int
1231send_sig_info(int sig, struct siginfo *info, struct task_struct *p) 1228send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1232{ 1229{
1233 int ret;
1234 unsigned long flags;
1235
1236 /* 1230 /*
1237 * Make sure legacy kernel users don't send in bad values 1231 * Make sure legacy kernel users don't send in bad values
1238 * (normal paths check this in check_kill_permission). 1232 * (normal paths check this in check_kill_permission).
@@ -1240,10 +1234,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1240 if (!valid_signal(sig)) 1234 if (!valid_signal(sig))
1241 return -EINVAL; 1235 return -EINVAL;
1242 1236
1243 spin_lock_irqsave(&p->sighand->siglock, flags); 1237 return do_send_sig_info(sig, info, p, false);
1244 ret = specific_send_sig_info(sig, info, p);
1245 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1246 return ret;
1247} 1238}
1248 1239
1249#define __si_special(priv) \ 1240#define __si_special(priv) \
@@ -1383,15 +1374,6 @@ ret:
1383} 1374}
1384 1375
1385/* 1376/*
1386 * Wake up any threads in the parent blocked in wait* syscalls.
1387 */
1388static inline void __wake_up_parent(struct task_struct *p,
1389 struct task_struct *parent)
1390{
1391 wake_up_interruptible_sync(&parent->signal->wait_chldexit);
1392}
1393
1394/*
1395 * Let a parent know about the death of a child. 1377 * Let a parent know about the death of a child.
1396 * For a stopped/continued status change, use do_notify_parent_cldstop instead. 1378 * For a stopped/continued status change, use do_notify_parent_cldstop instead.
1397 * 1379 *
@@ -1673,29 +1655,6 @@ void ptrace_notify(int exit_code)
1673 spin_unlock_irq(&current->sighand->siglock); 1655 spin_unlock_irq(&current->sighand->siglock);
1674} 1656}
1675 1657
1676static void
1677finish_stop(int stop_count)
1678{
1679 /*
1680 * If there are no other threads in the group, or if there is
1681 * a group stop in progress and we are the last to stop,
1682 * report to the parent. When ptraced, every thread reports itself.
1683 */
1684 if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
1685 read_lock(&tasklist_lock);
1686 do_notify_parent_cldstop(current, CLD_STOPPED);
1687 read_unlock(&tasklist_lock);
1688 }
1689
1690 do {
1691 schedule();
1692 } while (try_to_freeze());
1693 /*
1694 * Now we don't run again until continued.
1695 */
1696 current->exit_code = 0;
1697}
1698
1699/* 1658/*
1700 * This performs the stopping for SIGSTOP and other stop signals. 1659 * This performs the stopping for SIGSTOP and other stop signals.
1701 * We have to stop all threads in the thread group. 1660 * We have to stop all threads in the thread group.
@@ -1705,15 +1664,9 @@ finish_stop(int stop_count)
1705static int do_signal_stop(int signr) 1664static int do_signal_stop(int signr)
1706{ 1665{
1707 struct signal_struct *sig = current->signal; 1666 struct signal_struct *sig = current->signal;
1708 int stop_count; 1667 int notify;
1709 1668
1710 if (sig->group_stop_count > 0) { 1669 if (!sig->group_stop_count) {
1711 /*
1712 * There is a group stop in progress. We don't need to
1713 * start another one.
1714 */
1715 stop_count = --sig->group_stop_count;
1716 } else {
1717 struct task_struct *t; 1670 struct task_struct *t;
1718 1671
1719 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || 1672 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1725,7 +1678,7 @@ static int do_signal_stop(int signr)
1725 */ 1678 */
1726 sig->group_exit_code = signr; 1679 sig->group_exit_code = signr;
1727 1680
1728 stop_count = 0; 1681 sig->group_stop_count = 1;
1729 for (t = next_thread(current); t != current; t = next_thread(t)) 1682 for (t = next_thread(current); t != current; t = next_thread(t))
1730 /* 1683 /*
1731 * Setting state to TASK_STOPPED for a group 1684 * Setting state to TASK_STOPPED for a group
@@ -1734,19 +1687,44 @@ static int do_signal_stop(int signr)
1734 */ 1687 */
1735 if (!(t->flags & PF_EXITING) && 1688 if (!(t->flags & PF_EXITING) &&
1736 !task_is_stopped_or_traced(t)) { 1689 !task_is_stopped_or_traced(t)) {
1737 stop_count++; 1690 sig->group_stop_count++;
1738 signal_wake_up(t, 0); 1691 signal_wake_up(t, 0);
1739 } 1692 }
1740 sig->group_stop_count = stop_count;
1741 } 1693 }
1694 /*
1695 * If there are no other threads in the group, or if there is
1696 * a group stop in progress and we are the last to stop, report
1697 * to the parent. When ptraced, every thread reports itself.
1698 */
1699 notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
1700 notify = tracehook_notify_jctl(notify, CLD_STOPPED);
1701 /*
1702 * tracehook_notify_jctl() can drop and reacquire siglock, so
1703 * we keep ->group_stop_count != 0 before the call. If SIGCONT
1704 * or SIGKILL comes in between ->group_stop_count == 0.
1705 */
1706 if (sig->group_stop_count) {
1707 if (!--sig->group_stop_count)
1708 sig->flags = SIGNAL_STOP_STOPPED;
1709 current->exit_code = sig->group_exit_code;
1710 __set_current_state(TASK_STOPPED);
1711 }
1712 spin_unlock_irq(&current->sighand->siglock);
1742 1713
1743 if (stop_count == 0) 1714 if (notify) {
1744 sig->flags = SIGNAL_STOP_STOPPED; 1715 read_lock(&tasklist_lock);
1745 current->exit_code = sig->group_exit_code; 1716 do_notify_parent_cldstop(current, notify);
1746 __set_current_state(TASK_STOPPED); 1717 read_unlock(&tasklist_lock);
1718 }
1719
1720 /* Now we don't run again until woken by SIGCONT or SIGKILL */
1721 do {
1722 schedule();
1723 } while (try_to_freeze());
1724
1725 tracehook_finish_jctl();
1726 current->exit_code = 0;
1747 1727
1748 spin_unlock_irq(&current->sighand->siglock);
1749 finish_stop(stop_count);
1750 return 1; 1728 return 1;
1751} 1729}
1752 1730
@@ -1815,14 +1793,15 @@ relock:
1815 int why = (signal->flags & SIGNAL_STOP_CONTINUED) 1793 int why = (signal->flags & SIGNAL_STOP_CONTINUED)
1816 ? CLD_CONTINUED : CLD_STOPPED; 1794 ? CLD_CONTINUED : CLD_STOPPED;
1817 signal->flags &= ~SIGNAL_CLD_MASK; 1795 signal->flags &= ~SIGNAL_CLD_MASK;
1818 spin_unlock_irq(&sighand->siglock);
1819 1796
1820 if (unlikely(!tracehook_notify_jctl(1, why))) 1797 why = tracehook_notify_jctl(why, CLD_CONTINUED);
1821 goto relock; 1798 spin_unlock_irq(&sighand->siglock);
1822 1799
1823 read_lock(&tasklist_lock); 1800 if (why) {
1824 do_notify_parent_cldstop(current->group_leader, why); 1801 read_lock(&tasklist_lock);
1825 read_unlock(&tasklist_lock); 1802 do_notify_parent_cldstop(current->group_leader, why);
1803 read_unlock(&tasklist_lock);
1804 }
1826 goto relock; 1805 goto relock;
1827 } 1806 }
1828 1807
@@ -1987,14 +1966,14 @@ void exit_signals(struct task_struct *tsk)
1987 if (unlikely(tsk->signal->group_stop_count) && 1966 if (unlikely(tsk->signal->group_stop_count) &&
1988 !--tsk->signal->group_stop_count) { 1967 !--tsk->signal->group_stop_count) {
1989 tsk->signal->flags = SIGNAL_STOP_STOPPED; 1968 tsk->signal->flags = SIGNAL_STOP_STOPPED;
1990 group_stop = 1; 1969 group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
1991 } 1970 }
1992out: 1971out:
1993 spin_unlock_irq(&tsk->sighand->siglock); 1972 spin_unlock_irq(&tsk->sighand->siglock);
1994 1973
1995 if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) { 1974 if (unlikely(group_stop)) {
1996 read_lock(&tasklist_lock); 1975 read_lock(&tasklist_lock);
1997 do_notify_parent_cldstop(tsk, CLD_STOPPED); 1976 do_notify_parent_cldstop(tsk, group_stop);
1998 read_unlock(&tasklist_lock); 1977 read_unlock(&tasklist_lock);
1999 } 1978 }
2000} 1979}
@@ -2290,7 +2269,6 @@ static int
2290do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) 2269do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
2291{ 2270{
2292 struct task_struct *p; 2271 struct task_struct *p;
2293 unsigned long flags;
2294 int error = -ESRCH; 2272 int error = -ESRCH;
2295 2273
2296 rcu_read_lock(); 2274 rcu_read_lock();
@@ -2300,14 +2278,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
2300 /* 2278 /*
2301 * The null signal is a permissions and process existence 2279 * The null signal is a permissions and process existence
2302 * probe. No signal is actually delivered. 2280 * probe. No signal is actually delivered.
2303 *
2304 * If lock_task_sighand() fails we pretend the task dies
2305 * after receiving the signal. The window is tiny, and the
2306 * signal is private anyway.
2307 */ 2281 */
2308 if (!error && sig && lock_task_sighand(p, &flags)) { 2282 if (!error && sig) {
2309 error = specific_send_sig_info(sig, info, p); 2283 error = do_send_sig_info(sig, info, p, false);
2310 unlock_task_sighand(p, &flags); 2284 /*
2285 * If lock_task_sighand() failed we pretend the task
2286 * dies after receiving the signal. The window is tiny,
2287 * and the signal is private anyway.
2288 */
2289 if (unlikely(error == -ESRCH))
2290 error = 0;
2311 } 2291 }
2312 } 2292 }
2313 rcu_read_unlock(); 2293 rcu_read_unlock();
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 09d7519557d3..0d31135efbf4 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long);
26static void slow_work_oom_timeout(unsigned long); 26static void slow_work_oom_timeout(unsigned long);
27 27
28#ifdef CONFIG_SYSCTL 28#ifdef CONFIG_SYSCTL
29static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *, 29static int slow_work_min_threads_sysctl(struct ctl_table *, int,
30 void __user *, size_t *, loff_t *); 30 void __user *, size_t *, loff_t *);
31 31
32static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *, 32static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
33 void __user *, size_t *, loff_t *); 33 void __user *, size_t *, loff_t *);
34#endif 34#endif
35 35
@@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data)
493 * Handle adjustment of the minimum number of threads 493 * Handle adjustment of the minimum number of threads
494 */ 494 */
495static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, 495static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
496 struct file *filp, void __user *buffer, 496 void __user *buffer,
497 size_t *lenp, loff_t *ppos) 497 size_t *lenp, loff_t *ppos)
498{ 498{
499 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 499 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
500 int n; 500 int n;
501 501
502 if (ret == 0) { 502 if (ret == 0) {
@@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
521 * Handle adjustment of the maximum number of threads 521 * Handle adjustment of the maximum number of threads
522 */ 522 */
523static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, 523static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
524 struct file *filp, void __user *buffer, 524 void __user *buffer,
525 size_t *lenp, loff_t *ppos) 525 size_t *lenp, loff_t *ppos)
526{ 526{
527 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 527 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
528 int n; 528 int n;
529 529
530 if (ret == 0) { 530 if (ret == 0) {
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 88796c330838..81324d12eb35 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void)
90EXPORT_SYMBOL(touch_all_softlockup_watchdogs); 90EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
91 91
92int proc_dosoftlockup_thresh(struct ctl_table *table, int write, 92int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
93 struct file *filp, void __user *buffer, 93 void __user *buffer,
94 size_t *lenp, loff_t *ppos) 94 size_t *lenp, loff_t *ppos)
95{ 95{
96 touch_all_softlockup_watchdogs(); 96 touch_all_softlockup_watchdogs();
97 return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 97 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
98} 98}
99 99
100/* 100/*
diff --git a/kernel/sys.c b/kernel/sys.c
index ebcb15611728..255475d163e0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1542 current->timer_slack_ns = arg2; 1542 current->timer_slack_ns = arg2;
1543 error = 0; 1543 error = 0;
1544 break; 1544 break;
1545 case PR_MCE_KILL:
1546 if (arg4 | arg5)
1547 return -EINVAL;
1548 switch (arg2) {
1549 case 0:
1550 if (arg3 != 0)
1551 return -EINVAL;
1552 current->flags &= ~PF_MCE_PROCESS;
1553 break;
1554 case 1:
1555 current->flags |= PF_MCE_PROCESS;
1556 if (arg3 != 0)
1557 current->flags |= PF_MCE_EARLY;
1558 else
1559 current->flags &= ~PF_MCE_EARLY;
1560 break;
1561 default:
1562 return -EINVAL;
1563 }
1564 error = 0;
1565 break;
1566
1545 default: 1567 default:
1546 error = -EINVAL; 1568 error = -EINVAL;
1547 break; 1569 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7f4f57bea4ce..0d949c517412 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -76,6 +76,7 @@ extern int max_threads;
76extern int core_uses_pid; 76extern int core_uses_pid;
77extern int suid_dumpable; 77extern int suid_dumpable;
78extern char core_pattern[]; 78extern char core_pattern[];
79extern unsigned int core_pipe_limit;
79extern int pid_max; 80extern int pid_max;
80extern int min_free_kbytes; 81extern int min_free_kbytes;
81extern int pid_max_min, pid_max_max; 82extern int pid_max_min, pid_max_max;
@@ -162,9 +163,9 @@ extern int max_lock_depth;
162#endif 163#endif
163 164
164#ifdef CONFIG_PROC_SYSCTL 165#ifdef CONFIG_PROC_SYSCTL
165static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, 166static int proc_do_cad_pid(struct ctl_table *table, int write,
166 void __user *buffer, size_t *lenp, loff_t *ppos); 167 void __user *buffer, size_t *lenp, loff_t *ppos);
167static int proc_taint(struct ctl_table *table, int write, struct file *filp, 168static int proc_taint(struct ctl_table *table, int write,
168 void __user *buffer, size_t *lenp, loff_t *ppos); 169 void __user *buffer, size_t *lenp, loff_t *ppos);
169#endif 170#endif
170 171
@@ -423,6 +424,14 @@ static struct ctl_table kern_table[] = {
423 .proc_handler = &proc_dostring, 424 .proc_handler = &proc_dostring,
424 .strategy = &sysctl_string, 425 .strategy = &sysctl_string,
425 }, 426 },
427 {
428 .ctl_name = CTL_UNNUMBERED,
429 .procname = "core_pipe_limit",
430 .data = &core_pipe_limit,
431 .maxlen = sizeof(unsigned int),
432 .mode = 0644,
433 .proc_handler = &proc_dointvec,
434 },
426#ifdef CONFIG_PROC_SYSCTL 435#ifdef CONFIG_PROC_SYSCTL
427 { 436 {
428 .procname = "tainted", 437 .procname = "tainted",
@@ -1389,6 +1398,31 @@ static struct ctl_table vm_table[] = {
1389 .mode = 0644, 1398 .mode = 0644,
1390 .proc_handler = &scan_unevictable_handler, 1399 .proc_handler = &scan_unevictable_handler,
1391 }, 1400 },
1401#ifdef CONFIG_MEMORY_FAILURE
1402 {
1403 .ctl_name = CTL_UNNUMBERED,
1404 .procname = "memory_failure_early_kill",
1405 .data = &sysctl_memory_failure_early_kill,
1406 .maxlen = sizeof(sysctl_memory_failure_early_kill),
1407 .mode = 0644,
1408 .proc_handler = &proc_dointvec_minmax,
1409 .strategy = &sysctl_intvec,
1410 .extra1 = &zero,
1411 .extra2 = &one,
1412 },
1413 {
1414 .ctl_name = CTL_UNNUMBERED,
1415 .procname = "memory_failure_recovery",
1416 .data = &sysctl_memory_failure_recovery,
1417 .maxlen = sizeof(sysctl_memory_failure_recovery),
1418 .mode = 0644,
1419 .proc_handler = &proc_dointvec_minmax,
1420 .strategy = &sysctl_intvec,
1421 .extra1 = &zero,
1422 .extra2 = &one,
1423 },
1424#endif
1425
1392/* 1426/*
1393 * NOTE: do not add new entries to this table unless you have read 1427 * NOTE: do not add new entries to this table unless you have read
1394 * Documentation/sysctl/ctl_unnumbered.txt 1428 * Documentation/sysctl/ctl_unnumbered.txt
@@ -2217,7 +2251,7 @@ void sysctl_head_put(struct ctl_table_header *head)
2217#ifdef CONFIG_PROC_SYSCTL 2251#ifdef CONFIG_PROC_SYSCTL
2218 2252
2219static int _proc_do_string(void* data, int maxlen, int write, 2253static int _proc_do_string(void* data, int maxlen, int write,
2220 struct file *filp, void __user *buffer, 2254 void __user *buffer,
2221 size_t *lenp, loff_t *ppos) 2255 size_t *lenp, loff_t *ppos)
2222{ 2256{
2223 size_t len; 2257 size_t len;
@@ -2278,7 +2312,6 @@ static int _proc_do_string(void* data, int maxlen, int write,
2278 * proc_dostring - read a string sysctl 2312 * proc_dostring - read a string sysctl
2279 * @table: the sysctl table 2313 * @table: the sysctl table
2280 * @write: %TRUE if this is a write to the sysctl file 2314 * @write: %TRUE if this is a write to the sysctl file
2281 * @filp: the file structure
2282 * @buffer: the user buffer 2315 * @buffer: the user buffer
2283 * @lenp: the size of the user buffer 2316 * @lenp: the size of the user buffer
2284 * @ppos: file position 2317 * @ppos: file position
@@ -2292,10 +2325,10 @@ static int _proc_do_string(void* data, int maxlen, int write,
2292 * 2325 *
2293 * Returns 0 on success. 2326 * Returns 0 on success.
2294 */ 2327 */
2295int proc_dostring(struct ctl_table *table, int write, struct file *filp, 2328int proc_dostring(struct ctl_table *table, int write,
2296 void __user *buffer, size_t *lenp, loff_t *ppos) 2329 void __user *buffer, size_t *lenp, loff_t *ppos)
2297{ 2330{
2298 return _proc_do_string(table->data, table->maxlen, write, filp, 2331 return _proc_do_string(table->data, table->maxlen, write,
2299 buffer, lenp, ppos); 2332 buffer, lenp, ppos);
2300} 2333}
2301 2334
@@ -2320,7 +2353,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2320} 2353}
2321 2354
2322static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, 2355static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2323 int write, struct file *filp, void __user *buffer, 2356 int write, void __user *buffer,
2324 size_t *lenp, loff_t *ppos, 2357 size_t *lenp, loff_t *ppos,
2325 int (*conv)(int *negp, unsigned long *lvalp, int *valp, 2358 int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2326 int write, void *data), 2359 int write, void *data),
@@ -2427,13 +2460,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2427#undef TMPBUFLEN 2460#undef TMPBUFLEN
2428} 2461}
2429 2462
2430static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp, 2463static int do_proc_dointvec(struct ctl_table *table, int write,
2431 void __user *buffer, size_t *lenp, loff_t *ppos, 2464 void __user *buffer, size_t *lenp, loff_t *ppos,
2432 int (*conv)(int *negp, unsigned long *lvalp, int *valp, 2465 int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2433 int write, void *data), 2466 int write, void *data),
2434 void *data) 2467 void *data)
2435{ 2468{
2436 return __do_proc_dointvec(table->data, table, write, filp, 2469 return __do_proc_dointvec(table->data, table, write,
2437 buffer, lenp, ppos, conv, data); 2470 buffer, lenp, ppos, conv, data);
2438} 2471}
2439 2472
@@ -2441,7 +2474,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
2441 * proc_dointvec - read a vector of integers 2474 * proc_dointvec - read a vector of integers
2442 * @table: the sysctl table 2475 * @table: the sysctl table
2443 * @write: %TRUE if this is a write to the sysctl file 2476 * @write: %TRUE if this is a write to the sysctl file
2444 * @filp: the file structure
2445 * @buffer: the user buffer 2477 * @buffer: the user buffer
2446 * @lenp: the size of the user buffer 2478 * @lenp: the size of the user buffer
2447 * @ppos: file position 2479 * @ppos: file position
@@ -2451,10 +2483,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
2451 * 2483 *
2452 * Returns 0 on success. 2484 * Returns 0 on success.
2453 */ 2485 */
2454int proc_dointvec(struct ctl_table *table, int write, struct file *filp, 2486int proc_dointvec(struct ctl_table *table, int write,
2455 void __user *buffer, size_t *lenp, loff_t *ppos) 2487 void __user *buffer, size_t *lenp, loff_t *ppos)
2456{ 2488{
2457 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, 2489 return do_proc_dointvec(table,write,buffer,lenp,ppos,
2458 NULL,NULL); 2490 NULL,NULL);
2459} 2491}
2460 2492
@@ -2462,7 +2494,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2462 * Taint values can only be increased 2494 * Taint values can only be increased
2463 * This means we can safely use a temporary. 2495 * This means we can safely use a temporary.
2464 */ 2496 */
2465static int proc_taint(struct ctl_table *table, int write, struct file *filp, 2497static int proc_taint(struct ctl_table *table, int write,
2466 void __user *buffer, size_t *lenp, loff_t *ppos) 2498 void __user *buffer, size_t *lenp, loff_t *ppos)
2467{ 2499{
2468 struct ctl_table t; 2500 struct ctl_table t;
@@ -2474,7 +2506,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp,
2474 2506
2475 t = *table; 2507 t = *table;
2476 t.data = &tmptaint; 2508 t.data = &tmptaint;
2477 err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos); 2509 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2478 if (err < 0) 2510 if (err < 0)
2479 return err; 2511 return err;
2480 2512
@@ -2526,7 +2558,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
2526 * proc_dointvec_minmax - read a vector of integers with min/max values 2558 * proc_dointvec_minmax - read a vector of integers with min/max values
2527 * @table: the sysctl table 2559 * @table: the sysctl table
2528 * @write: %TRUE if this is a write to the sysctl file 2560 * @write: %TRUE if this is a write to the sysctl file
2529 * @filp: the file structure
2530 * @buffer: the user buffer 2561 * @buffer: the user buffer
2531 * @lenp: the size of the user buffer 2562 * @lenp: the size of the user buffer
2532 * @ppos: file position 2563 * @ppos: file position
@@ -2539,19 +2570,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
2539 * 2570 *
2540 * Returns 0 on success. 2571 * Returns 0 on success.
2541 */ 2572 */
2542int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, 2573int proc_dointvec_minmax(struct ctl_table *table, int write,
2543 void __user *buffer, size_t *lenp, loff_t *ppos) 2574 void __user *buffer, size_t *lenp, loff_t *ppos)
2544{ 2575{
2545 struct do_proc_dointvec_minmax_conv_param param = { 2576 struct do_proc_dointvec_minmax_conv_param param = {
2546 .min = (int *) table->extra1, 2577 .min = (int *) table->extra1,
2547 .max = (int *) table->extra2, 2578 .max = (int *) table->extra2,
2548 }; 2579 };
2549 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, 2580 return do_proc_dointvec(table, write, buffer, lenp, ppos,
2550 do_proc_dointvec_minmax_conv, &param); 2581 do_proc_dointvec_minmax_conv, &param);
2551} 2582}
2552 2583
2553static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, 2584static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2554 struct file *filp,
2555 void __user *buffer, 2585 void __user *buffer,
2556 size_t *lenp, loff_t *ppos, 2586 size_t *lenp, loff_t *ppos,
2557 unsigned long convmul, 2587 unsigned long convmul,
@@ -2656,21 +2686,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
2656} 2686}
2657 2687
2658static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, 2688static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2659 struct file *filp,
2660 void __user *buffer, 2689 void __user *buffer,
2661 size_t *lenp, loff_t *ppos, 2690 size_t *lenp, loff_t *ppos,
2662 unsigned long convmul, 2691 unsigned long convmul,
2663 unsigned long convdiv) 2692 unsigned long convdiv)
2664{ 2693{
2665 return __do_proc_doulongvec_minmax(table->data, table, write, 2694 return __do_proc_doulongvec_minmax(table->data, table, write,
2666 filp, buffer, lenp, ppos, convmul, convdiv); 2695 buffer, lenp, ppos, convmul, convdiv);
2667} 2696}
2668 2697
2669/** 2698/**
2670 * proc_doulongvec_minmax - read a vector of long integers with min/max values 2699 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2671 * @table: the sysctl table 2700 * @table: the sysctl table
2672 * @write: %TRUE if this is a write to the sysctl file 2701 * @write: %TRUE if this is a write to the sysctl file
2673 * @filp: the file structure
2674 * @buffer: the user buffer 2702 * @buffer: the user buffer
2675 * @lenp: the size of the user buffer 2703 * @lenp: the size of the user buffer
2676 * @ppos: file position 2704 * @ppos: file position
@@ -2683,17 +2711,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2683 * 2711 *
2684 * Returns 0 on success. 2712 * Returns 0 on success.
2685 */ 2713 */
2686int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, 2714int proc_doulongvec_minmax(struct ctl_table *table, int write,
2687 void __user *buffer, size_t *lenp, loff_t *ppos) 2715 void __user *buffer, size_t *lenp, loff_t *ppos)
2688{ 2716{
2689 return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); 2717 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2690} 2718}
2691 2719
2692/** 2720/**
2693 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values 2721 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2694 * @table: the sysctl table 2722 * @table: the sysctl table
2695 * @write: %TRUE if this is a write to the sysctl file 2723 * @write: %TRUE if this is a write to the sysctl file
2696 * @filp: the file structure
2697 * @buffer: the user buffer 2724 * @buffer: the user buffer
2698 * @lenp: the size of the user buffer 2725 * @lenp: the size of the user buffer
2699 * @ppos: file position 2726 * @ppos: file position
@@ -2708,11 +2735,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp
2708 * Returns 0 on success. 2735 * Returns 0 on success.
2709 */ 2736 */
2710int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, 2737int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2711 struct file *filp,
2712 void __user *buffer, 2738 void __user *buffer,
2713 size_t *lenp, loff_t *ppos) 2739 size_t *lenp, loff_t *ppos)
2714{ 2740{
2715 return do_proc_doulongvec_minmax(table, write, filp, buffer, 2741 return do_proc_doulongvec_minmax(table, write, buffer,
2716 lenp, ppos, HZ, 1000l); 2742 lenp, ppos, HZ, 1000l);
2717} 2743}
2718 2744
@@ -2788,7 +2814,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2788 * proc_dointvec_jiffies - read a vector of integers as seconds 2814 * proc_dointvec_jiffies - read a vector of integers as seconds
2789 * @table: the sysctl table 2815 * @table: the sysctl table
2790 * @write: %TRUE if this is a write to the sysctl file 2816 * @write: %TRUE if this is a write to the sysctl file
2791 * @filp: the file structure
2792 * @buffer: the user buffer 2817 * @buffer: the user buffer
2793 * @lenp: the size of the user buffer 2818 * @lenp: the size of the user buffer
2794 * @ppos: file position 2819 * @ppos: file position
@@ -2800,10 +2825,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2800 * 2825 *
2801 * Returns 0 on success. 2826 * Returns 0 on success.
2802 */ 2827 */
2803int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, 2828int proc_dointvec_jiffies(struct ctl_table *table, int write,
2804 void __user *buffer, size_t *lenp, loff_t *ppos) 2829 void __user *buffer, size_t *lenp, loff_t *ppos)
2805{ 2830{
2806 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, 2831 return do_proc_dointvec(table,write,buffer,lenp,ppos,
2807 do_proc_dointvec_jiffies_conv,NULL); 2832 do_proc_dointvec_jiffies_conv,NULL);
2808} 2833}
2809 2834
@@ -2811,7 +2836,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2811 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds 2836 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2812 * @table: the sysctl table 2837 * @table: the sysctl table
2813 * @write: %TRUE if this is a write to the sysctl file 2838 * @write: %TRUE if this is a write to the sysctl file
2814 * @filp: the file structure
2815 * @buffer: the user buffer 2839 * @buffer: the user buffer
2816 * @lenp: the size of the user buffer 2840 * @lenp: the size of the user buffer
2817 * @ppos: pointer to the file position 2841 * @ppos: pointer to the file position
@@ -2823,10 +2847,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2823 * 2847 *
2824 * Returns 0 on success. 2848 * Returns 0 on success.
2825 */ 2849 */
2826int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, 2850int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2827 void __user *buffer, size_t *lenp, loff_t *ppos) 2851 void __user *buffer, size_t *lenp, loff_t *ppos)
2828{ 2852{
2829 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, 2853 return do_proc_dointvec(table,write,buffer,lenp,ppos,
2830 do_proc_dointvec_userhz_jiffies_conv,NULL); 2854 do_proc_dointvec_userhz_jiffies_conv,NULL);
2831} 2855}
2832 2856
@@ -2834,7 +2858,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
2834 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds 2858 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2835 * @table: the sysctl table 2859 * @table: the sysctl table
2836 * @write: %TRUE if this is a write to the sysctl file 2860 * @write: %TRUE if this is a write to the sysctl file
2837 * @filp: the file structure
2838 * @buffer: the user buffer 2861 * @buffer: the user buffer
2839 * @lenp: the size of the user buffer 2862 * @lenp: the size of the user buffer
2840 * @ppos: file position 2863 * @ppos: file position
@@ -2847,14 +2870,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
2847 * 2870 *
2848 * Returns 0 on success. 2871 * Returns 0 on success.
2849 */ 2872 */
2850int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, 2873int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2851 void __user *buffer, size_t *lenp, loff_t *ppos) 2874 void __user *buffer, size_t *lenp, loff_t *ppos)
2852{ 2875{
2853 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, 2876 return do_proc_dointvec(table, write, buffer, lenp, ppos,
2854 do_proc_dointvec_ms_jiffies_conv, NULL); 2877 do_proc_dointvec_ms_jiffies_conv, NULL);
2855} 2878}
2856 2879
2857static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, 2880static int proc_do_cad_pid(struct ctl_table *table, int write,
2858 void __user *buffer, size_t *lenp, loff_t *ppos) 2881 void __user *buffer, size_t *lenp, loff_t *ppos)
2859{ 2882{
2860 struct pid *new_pid; 2883 struct pid *new_pid;
@@ -2863,7 +2886,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
2863 2886
2864 tmp = pid_vnr(cad_pid); 2887 tmp = pid_vnr(cad_pid);
2865 2888
2866 r = __do_proc_dointvec(&tmp, table, write, filp, buffer, 2889 r = __do_proc_dointvec(&tmp, table, write, buffer,
2867 lenp, ppos, NULL, NULL); 2890 lenp, ppos, NULL, NULL);
2868 if (r || !write) 2891 if (r || !write)
2869 return r; 2892 return r;
@@ -2878,50 +2901,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
2878 2901
2879#else /* CONFIG_PROC_FS */ 2902#else /* CONFIG_PROC_FS */
2880 2903
2881int proc_dostring(struct ctl_table *table, int write, struct file *filp, 2904int proc_dostring(struct ctl_table *table, int write,
2882 void __user *buffer, size_t *lenp, loff_t *ppos) 2905 void __user *buffer, size_t *lenp, loff_t *ppos)
2883{ 2906{
2884 return -ENOSYS; 2907 return -ENOSYS;
2885} 2908}
2886 2909
2887int proc_dointvec(struct ctl_table *table, int write, struct file *filp, 2910int proc_dointvec(struct ctl_table *table, int write,
2888 void __user *buffer, size_t *lenp, loff_t *ppos) 2911 void __user *buffer, size_t *lenp, loff_t *ppos)
2889{ 2912{
2890 return -ENOSYS; 2913 return -ENOSYS;
2891} 2914}
2892 2915
2893int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, 2916int proc_dointvec_minmax(struct ctl_table *table, int write,
2894 void __user *buffer, size_t *lenp, loff_t *ppos) 2917 void __user *buffer, size_t *lenp, loff_t *ppos)
2895{ 2918{
2896 return -ENOSYS; 2919 return -ENOSYS;
2897} 2920}
2898 2921
2899int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, 2922int proc_dointvec_jiffies(struct ctl_table *table, int write,
2900 void __user *buffer, size_t *lenp, loff_t *ppos) 2923 void __user *buffer, size_t *lenp, loff_t *ppos)
2901{ 2924{
2902 return -ENOSYS; 2925 return -ENOSYS;
2903} 2926}
2904 2927
2905int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, 2928int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2906 void __user *buffer, size_t *lenp, loff_t *ppos) 2929 void __user *buffer, size_t *lenp, loff_t *ppos)
2907{ 2930{
2908 return -ENOSYS; 2931 return -ENOSYS;
2909} 2932}
2910 2933
2911int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, 2934int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2912 void __user *buffer, size_t *lenp, loff_t *ppos) 2935 void __user *buffer, size_t *lenp, loff_t *ppos)
2913{ 2936{
2914 return -ENOSYS; 2937 return -ENOSYS;
2915} 2938}
2916 2939
2917int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, 2940int proc_doulongvec_minmax(struct ctl_table *table, int write,
2918 void __user *buffer, size_t *lenp, loff_t *ppos) 2941 void __user *buffer, size_t *lenp, loff_t *ppos)
2919{ 2942{
2920 return -ENOSYS; 2943 return -ENOSYS;
2921} 2944}
2922 2945
2923int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, 2946int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2924 struct file *filp,
2925 void __user *buffer, 2947 void __user *buffer,
2926 size_t *lenp, loff_t *ppos) 2948 size_t *lenp, loff_t *ppos)
2927{ 2949{
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 0b0a6366c9d4..ee266620b06c 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@
1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o 1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
2 2
3obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 3obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
new file mode 100644
index 000000000000..86628e755f38
--- /dev/null
+++ b/kernel/time/timeconv.c
@@ -0,0 +1,127 @@
1/*
2 * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
3 * This file is part of the GNU C Library.
4 * Contributed by Paul Eggert (eggert@twinsun.com).
5 *
6 * The GNU C Library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
10 *
11 * The GNU C Library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with the GNU C Library; see the file COPYING.LIB. If not,
18 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
20 */
21
22/*
23 * Converts the calendar time to broken-down time representation
24 * Based on code from glibc-2.6
25 *
26 * 2009-7-14:
27 * Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
28 */
29
30#include <linux/time.h>
31#include <linux/module.h>
32
33/*
34 * Nonzero if YEAR is a leap year (every 4 years,
35 * except every 100th isn't, and every 400th is).
36 */
37static int __isleap(long year)
38{
39 return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
40}
41
42/* do a mathdiv for long type */
43static long math_div(long a, long b)
44{
45 return a / b - (a % b < 0);
46}
47
48/* How many leap years between y1 and y2, y1 must less or equal to y2 */
49static long leaps_between(long y1, long y2)
50{
51 long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
52 + math_div(y1 - 1, 400);
53 long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
54 + math_div(y2 - 1, 400);
55 return leaps2 - leaps1;
56}
57
58/* How many days come before each month (0-12). */
59static const unsigned short __mon_yday[2][13] = {
60 /* Normal years. */
61 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
62 /* Leap years. */
63 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
64};
65
66#define SECS_PER_HOUR (60 * 60)
67#define SECS_PER_DAY (SECS_PER_HOUR * 24)
68
69/**
70 * time_to_tm - converts the calendar time to local broken-down time
71 *
72 * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970,
73 * Coordinated Universal Time (UTC).
74 * @offset offset seconds adding to totalsecs.
75 * @result pointer to struct tm variable to receive broken-down time
76 */
77void time_to_tm(time_t totalsecs, int offset, struct tm *result)
78{
79 long days, rem, y;
80 const unsigned short *ip;
81
82 days = totalsecs / SECS_PER_DAY;
83 rem = totalsecs % SECS_PER_DAY;
84 rem += offset;
85 while (rem < 0) {
86 rem += SECS_PER_DAY;
87 --days;
88 }
89 while (rem >= SECS_PER_DAY) {
90 rem -= SECS_PER_DAY;
91 ++days;
92 }
93
94 result->tm_hour = rem / SECS_PER_HOUR;
95 rem %= SECS_PER_HOUR;
96 result->tm_min = rem / 60;
97 result->tm_sec = rem % 60;
98
99 /* January 1, 1970 was a Thursday. */
100 result->tm_wday = (4 + days) % 7;
101 if (result->tm_wday < 0)
102 result->tm_wday += 7;
103
104 y = 1970;
105
106 while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
107 /* Guess a corrected year, assuming 365 days per year. */
108 long yg = y + math_div(days, 365);
109
110 /* Adjust DAYS and Y to match the guessed year. */
111 days -= (yg - y) * 365 + leaps_between(y, yg);
112 y = yg;
113 }
114
115 result->tm_year = y - 1900;
116
117 result->tm_yday = days;
118
119 ip = __mon_yday[__isleap(y)];
120 for (y = 11; days < ip[y]; y--)
121 continue;
122 days -= ip[y];
123
124 result->tm_mon = y;
125 result->tm_mday = days + 1;
126}
127EXPORT_SYMBOL(time_to_tm);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 23df7771c937..a142579765bf 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
3015 3015
3016int 3016int
3017ftrace_enable_sysctl(struct ctl_table *table, int write, 3017ftrace_enable_sysctl(struct ctl_table *table, int write,
3018 struct file *file, void __user *buffer, size_t *lenp, 3018 void __user *buffer, size_t *lenp,
3019 loff_t *ppos) 3019 loff_t *ppos)
3020{ 3020{
3021 int ret; 3021 int ret;
@@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3025 3025
3026 mutex_lock(&ftrace_lock); 3026 mutex_lock(&ftrace_lock);
3027 3027
3028 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 3028 ret = proc_dointvec(table, write, buffer, lenp, ppos);
3029 3029
3030 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) 3030 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3031 goto out; 3031 goto out;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 0f6facb050a1..8504ac71e4e8 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = {
296 296
297int 297int
298stack_trace_sysctl(struct ctl_table *table, int write, 298stack_trace_sysctl(struct ctl_table *table, int write,
299 struct file *file, void __user *buffer, size_t *lenp, 299 void __user *buffer, size_t *lenp,
300 loff_t *ppos) 300 loff_t *ppos)
301{ 301{
302 int ret; 302 int ret;
303 303
304 mutex_lock(&stack_sysctl_mutex); 304 mutex_lock(&stack_sysctl_mutex);
305 305
306 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 306 ret = proc_dointvec(table, write, buffer, lenp, ppos);
307 307
308 if (ret || !write || 308 if (ret || !write ||
309 (last_stack_tracer_enabled == !!stack_tracer_enabled)) 309 (last_stack_tracer_enabled == !!stack_tracer_enabled))
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 92359cc747a7..69eae358a726 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which)
42 * Special case of dostring for the UTS structure. This has locks 42 * Special case of dostring for the UTS structure. This has locks
43 * to observe. Should this be in kernel/sys.c ???? 43 * to observe. Should this be in kernel/sys.c ????
44 */ 44 */
45static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, 45static int proc_do_uts_string(ctl_table *table, int write,
46 void __user *buffer, size_t *lenp, loff_t *ppos) 46 void __user *buffer, size_t *lenp, loff_t *ppos)
47{ 47{
48 struct ctl_table uts_table; 48 struct ctl_table uts_table;
49 int r; 49 int r;
50 memcpy(&uts_table, table, sizeof(uts_table)); 50 memcpy(&uts_table, table, sizeof(uts_table));
51 uts_table.data = get_uts(table, write); 51 uts_table.data = get_uts(table, write);
52 r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos); 52 r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
53 put_uts(table, write, uts_table.data); 53 put_uts(table, write, uts_table.data);
54 return r; 54 return r;
55} 55}
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index 68dfce59c1b8..fc686c7a0a0d 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,6 +27,11 @@
27 27
28#define GZIP_IOBUF_SIZE (16*1024) 28#define GZIP_IOBUF_SIZE (16*1024)
29 29
30static int nofill(void *buffer, unsigned int len)
31{
32 return -1;
33}
34
30/* Included from initramfs et al code */ 35/* Included from initramfs et al code */
31STATIC int INIT gunzip(unsigned char *buf, int len, 36STATIC int INIT gunzip(unsigned char *buf, int len,
32 int(*fill)(void*, unsigned int), 37 int(*fill)(void*, unsigned int),
@@ -76,6 +81,9 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
76 goto gunzip_nomem4; 81 goto gunzip_nomem4;
77 } 82 }
78 83
84 if (!fill)
85 fill = nofill;
86
79 if (len == 0) 87 if (len == 0)
80 len = fill(zbuf, GZIP_IOBUF_SIZE); 88 len = fill(zbuf, GZIP_IOBUF_SIZE);
81 89
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 0b954e04bd30..ca82fde81c8f 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -82,6 +82,11 @@ struct rc {
82#define RC_MODEL_TOTAL_BITS 11 82#define RC_MODEL_TOTAL_BITS 11
83 83
84 84
85static int nofill(void *buffer, unsigned int len)
86{
87 return -1;
88}
89
85/* Called twice: once at startup and once in rc_normalize() */ 90/* Called twice: once at startup and once in rc_normalize() */
86static void INIT rc_read(struct rc *rc) 91static void INIT rc_read(struct rc *rc)
87{ 92{
@@ -97,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc,
97 int (*fill)(void*, unsigned int), 102 int (*fill)(void*, unsigned int),
98 char *buffer, int buffer_size) 103 char *buffer, int buffer_size)
99{ 104{
100 rc->fill = fill; 105 if (fill)
106 rc->fill = fill;
107 else
108 rc->fill = nofill;
101 rc->buffer = (uint8_t *)buffer; 109 rc->buffer = (uint8_t *)buffer;
102 rc->buffer_size = buffer_size; 110 rc->buffer_size = buffer_size;
103 rc->buffer_end = rc->buffer + rc->buffer_size; 111 rc->buffer_end = rc->buffer + rc->buffer_size;
diff --git a/mm/Kconfig b/mm/Kconfig
index 71eb0b4cce8d..247760729593 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR
245 /proc/sys/vm/mmap_min_addr tunable. 245 /proc/sys/vm/mmap_min_addr tunable.
246 246
247 247
248config MEMORY_FAILURE
249 depends on MMU
250 depends on X86_MCE
251 bool "Enable recovery from hardware memory errors"
252 help
253 Enables code to recover from some memory failures on systems
254 with MCA recovery. This allows a system to continue running
255 even when some of its memory has uncorrected errors. This requires
256 special hardware support and typically ECC memory.
257
258config HWPOISON_INJECT
259 tristate "Poison pages injector"
260 depends on MEMORY_FAILURE && DEBUG_KERNEL
261
248config NOMMU_INITIAL_TRIM_EXCESS 262config NOMMU_INITIAL_TRIM_EXCESS
249 int "Turn on mmap() excess space trimming before booting" 263 int "Turn on mmap() excess space trimming before booting"
250 depends on !MMU 264 depends on !MMU
diff --git a/mm/Makefile b/mm/Makefile
index 88193d73cd1a..ebf849042ed3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -5,14 +5,14 @@
5mmu-y := nommu.o 5mmu-y := nommu.o
6mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ 6mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ 7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
8 vmalloc.o 8 vmalloc.o pagewalk.o
9 9
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 maccess.o page_alloc.o page-writeback.o \ 11 maccess.o page_alloc.o page-writeback.o \
12 readahead.o swap.o truncate.o vmscan.o shmem.o \ 12 readahead.o swap.o truncate.o vmscan.o shmem.o \
13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
14 page_isolation.o mm_init.o mmu_context.o \ 14 page_isolation.o mm_init.o mmu_context.o \
15 pagewalk.o $(mmu-y) 15 $(mmu-y)
16obj-y += init-mm.o 16obj-y += init-mm.o
17 17
18obj-$(CONFIG_BOUNCE) += bounce.o 18obj-$(CONFIG_BOUNCE) += bounce.o
@@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o
41endif 41endif
42obj-$(CONFIG_QUICKLIST) += quicklist.o 42obj-$(CONFIG_QUICKLIST) += quicklist.o
43obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o 43obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
44obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
45obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
44obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o 46obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
45obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o 47obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/mm/filemap.c b/mm/filemap.c
index bcc7372aebbc..6c84e598b4a9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,7 +58,7 @@
58/* 58/*
59 * Lock ordering: 59 * Lock ordering:
60 * 60 *
61 * ->i_mmap_lock (vmtruncate) 61 * ->i_mmap_lock (truncate_pagecache)
62 * ->private_lock (__free_pte->__set_page_dirty_buffers) 62 * ->private_lock (__free_pte->__set_page_dirty_buffers)
63 * ->swap_lock (exclusive_swap_page, others) 63 * ->swap_lock (exclusive_swap_page, others)
64 * ->mapping->tree_lock 64 * ->mapping->tree_lock
@@ -104,6 +104,10 @@
104 * 104 *
105 * ->task->proc_lock 105 * ->task->proc_lock
106 * ->dcache_lock (proc_pid_lookup) 106 * ->dcache_lock (proc_pid_lookup)
107 *
108 * (code doesn't rely on that order, so you could switch it around)
109 * ->tasklist_lock (memory_failure, collect_procs_ao)
110 * ->i_mmap_lock
107 */ 111 */
108 112
109/* 113/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 815dbd4a6dcb..6f048fcc749c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
1537 1537
1538#ifdef CONFIG_SYSCTL 1538#ifdef CONFIG_SYSCTL
1539int hugetlb_sysctl_handler(struct ctl_table *table, int write, 1539int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1540 struct file *file, void __user *buffer, 1540 void __user *buffer,
1541 size_t *length, loff_t *ppos) 1541 size_t *length, loff_t *ppos)
1542{ 1542{
1543 struct hstate *h = &default_hstate; 1543 struct hstate *h = &default_hstate;
@@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1548 1548
1549 table->data = &tmp; 1549 table->data = &tmp;
1550 table->maxlen = sizeof(unsigned long); 1550 table->maxlen = sizeof(unsigned long);
1551 proc_doulongvec_minmax(table, write, file, buffer, length, ppos); 1551 proc_doulongvec_minmax(table, write, buffer, length, ppos);
1552 1552
1553 if (write) 1553 if (write)
1554 h->max_huge_pages = set_max_huge_pages(h, tmp); 1554 h->max_huge_pages = set_max_huge_pages(h, tmp);
@@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1557} 1557}
1558 1558
1559int hugetlb_treat_movable_handler(struct ctl_table *table, int write, 1559int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
1560 struct file *file, void __user *buffer, 1560 void __user *buffer,
1561 size_t *length, loff_t *ppos) 1561 size_t *length, loff_t *ppos)
1562{ 1562{
1563 proc_dointvec(table, write, file, buffer, length, ppos); 1563 proc_dointvec(table, write, buffer, length, ppos);
1564 if (hugepages_treat_as_movable) 1564 if (hugepages_treat_as_movable)
1565 htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; 1565 htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
1566 else 1566 else
@@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
1569} 1569}
1570 1570
1571int hugetlb_overcommit_handler(struct ctl_table *table, int write, 1571int hugetlb_overcommit_handler(struct ctl_table *table, int write,
1572 struct file *file, void __user *buffer, 1572 void __user *buffer,
1573 size_t *length, loff_t *ppos) 1573 size_t *length, loff_t *ppos)
1574{ 1574{
1575 struct hstate *h = &default_hstate; 1575 struct hstate *h = &default_hstate;
@@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
1580 1580
1581 table->data = &tmp; 1581 table->data = &tmp;
1582 table->maxlen = sizeof(unsigned long); 1582 table->maxlen = sizeof(unsigned long);
1583 proc_doulongvec_minmax(table, write, file, buffer, length, ppos); 1583 proc_doulongvec_minmax(table, write, buffer, length, ppos);
1584 1584
1585 if (write) { 1585 if (write) {
1586 spin_lock(&hugetlb_lock); 1586 spin_lock(&hugetlb_lock);
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
new file mode 100644
index 000000000000..e1d85137f086
--- /dev/null
+++ b/mm/hwpoison-inject.c
@@ -0,0 +1,41 @@
1/* Inject a hwpoison memory failure on a arbitary pfn */
2#include <linux/module.h>
3#include <linux/debugfs.h>
4#include <linux/kernel.h>
5#include <linux/mm.h>
6
7static struct dentry *hwpoison_dir, *corrupt_pfn;
8
9static int hwpoison_inject(void *data, u64 val)
10{
11 if (!capable(CAP_SYS_ADMIN))
12 return -EPERM;
13 printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val);
14 return __memory_failure(val, 18, 0);
15}
16
17DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
18
19static void pfn_inject_exit(void)
20{
21 if (hwpoison_dir)
22 debugfs_remove_recursive(hwpoison_dir);
23}
24
25static int pfn_inject_init(void)
26{
27 hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
28 if (hwpoison_dir == NULL)
29 return -ENOMEM;
30 corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
31 NULL, &hwpoison_fops);
32 if (corrupt_pfn == NULL) {
33 pfn_inject_exit();
34 return -ENOMEM;
35 }
36 return 0;
37}
38
39module_init(pfn_inject_init);
40module_exit(pfn_inject_exit);
41MODULE_LICENSE("GPL");
diff --git a/mm/ksm.c b/mm/ksm.c
index 37cc37325094..f7edac356f46 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -30,6 +30,7 @@
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/rbtree.h> 31#include <linux/rbtree.h>
32#include <linux/mmu_notifier.h> 32#include <linux/mmu_notifier.h>
33#include <linux/swap.h>
33#include <linux/ksm.h> 34#include <linux/ksm.h>
34 35
35#include <asm/tlbflush.h> 36#include <asm/tlbflush.h>
@@ -162,10 +163,10 @@ static unsigned long ksm_pages_unshared;
162static unsigned long ksm_rmap_items; 163static unsigned long ksm_rmap_items;
163 164
164/* Limit on the number of unswappable pages used */ 165/* Limit on the number of unswappable pages used */
165static unsigned long ksm_max_kernel_pages = 2000; 166static unsigned long ksm_max_kernel_pages;
166 167
167/* Number of pages ksmd should scan in one batch */ 168/* Number of pages ksmd should scan in one batch */
168static unsigned int ksm_thread_pages_to_scan = 200; 169static unsigned int ksm_thread_pages_to_scan = 100;
169 170
170/* Milliseconds ksmd should sleep between batches */ 171/* Milliseconds ksmd should sleep between batches */
171static unsigned int ksm_thread_sleep_millisecs = 20; 172static unsigned int ksm_thread_sleep_millisecs = 20;
@@ -173,7 +174,7 @@ static unsigned int ksm_thread_sleep_millisecs = 20;
173#define KSM_RUN_STOP 0 174#define KSM_RUN_STOP 0
174#define KSM_RUN_MERGE 1 175#define KSM_RUN_MERGE 1
175#define KSM_RUN_UNMERGE 2 176#define KSM_RUN_UNMERGE 2
176static unsigned int ksm_run = KSM_RUN_MERGE; 177static unsigned int ksm_run = KSM_RUN_STOP;
177 178
178static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); 179static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
179static DEFINE_MUTEX(ksm_thread_mutex); 180static DEFINE_MUTEX(ksm_thread_mutex);
@@ -183,6 +184,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock);
183 sizeof(struct __struct), __alignof__(struct __struct),\ 184 sizeof(struct __struct), __alignof__(struct __struct),\
184 (__flags), NULL) 185 (__flags), NULL)
185 186
187static void __init ksm_init_max_kernel_pages(void)
188{
189 ksm_max_kernel_pages = nr_free_buffer_pages() / 4;
190}
191
186static int __init ksm_slab_init(void) 192static int __init ksm_slab_init(void)
187{ 193{
188 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); 194 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
@@ -1667,6 +1673,8 @@ static int __init ksm_init(void)
1667 struct task_struct *ksm_thread; 1673 struct task_struct *ksm_thread;
1668 int err; 1674 int err;
1669 1675
1676 ksm_init_max_kernel_pages();
1677
1670 err = ksm_slab_init(); 1678 err = ksm_slab_init();
1671 if (err) 1679 if (err)
1672 goto out; 1680 goto out;
diff --git a/mm/madvise.c b/mm/madvise.c
index d9ae2067952e..35b1479b7c9d 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma,
218 return error; 218 return error;
219} 219}
220 220
221#ifdef CONFIG_MEMORY_FAILURE
222/*
223 * Error injection support for memory error handling.
224 */
225static int madvise_hwpoison(unsigned long start, unsigned long end)
226{
227 int ret = 0;
228
229 if (!capable(CAP_SYS_ADMIN))
230 return -EPERM;
231 for (; start < end; start += PAGE_SIZE) {
232 struct page *p;
233 int ret = get_user_pages(current, current->mm, start, 1,
234 0, 0, &p, NULL);
235 if (ret != 1)
236 return ret;
237 printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
238 page_to_pfn(p), start);
239 /* Ignore return value for now */
240 __memory_failure(page_to_pfn(p), 0, 1);
241 put_page(p);
242 }
243 return ret;
244}
245#endif
246
221static long 247static long
222madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, 248madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
223 unsigned long start, unsigned long end, int behavior) 249 unsigned long start, unsigned long end, int behavior)
@@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
308 int write; 334 int write;
309 size_t len; 335 size_t len;
310 336
337#ifdef CONFIG_MEMORY_FAILURE
338 if (behavior == MADV_HWPOISON)
339 return madvise_hwpoison(start, start+len_in);
340#endif
311 if (!madvise_behavior_valid(behavior)) 341 if (!madvise_behavior_valid(behavior))
312 return error; 342 return error;
313 343
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9b10d8753784..e2b98a6875c0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -29,6 +29,7 @@
29#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
30#include <linux/limits.h> 30#include <linux/limits.h>
31#include <linux/mutex.h> 31#include <linux/mutex.h>
32#include <linux/rbtree.h>
32#include <linux/slab.h> 33#include <linux/slab.h>
33#include <linux/swap.h> 34#include <linux/swap.h>
34#include <linux/spinlock.h> 35#include <linux/spinlock.h>
@@ -43,6 +44,7 @@
43 44
44struct cgroup_subsys mem_cgroup_subsys __read_mostly; 45struct cgroup_subsys mem_cgroup_subsys __read_mostly;
45#define MEM_CGROUP_RECLAIM_RETRIES 5 46#define MEM_CGROUP_RECLAIM_RETRIES 5
47struct mem_cgroup *root_mem_cgroup __read_mostly;
46 48
47#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 49#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
48/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ 50/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -53,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/
53#endif 55#endif
54 56
55static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ 57static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */
58#define SOFTLIMIT_EVENTS_THRESH (1000)
56 59
57/* 60/*
58 * Statistics for memory cgroup. 61 * Statistics for memory cgroup.
@@ -66,6 +69,8 @@ enum mem_cgroup_stat_index {
66 MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ 69 MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */
67 MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ 70 MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
68 MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ 71 MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
72 MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
73 MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
69 74
70 MEM_CGROUP_STAT_NSTATS, 75 MEM_CGROUP_STAT_NSTATS,
71}; 76};
@@ -78,6 +83,20 @@ struct mem_cgroup_stat {
78 struct mem_cgroup_stat_cpu cpustat[0]; 83 struct mem_cgroup_stat_cpu cpustat[0];
79}; 84};
80 85
86static inline void
87__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat,
88 enum mem_cgroup_stat_index idx)
89{
90 stat->count[idx] = 0;
91}
92
93static inline s64
94__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
95 enum mem_cgroup_stat_index idx)
96{
97 return stat->count[idx];
98}
99
81/* 100/*
82 * For accounting under irq disable, no need for increment preempt count. 101 * For accounting under irq disable, no need for increment preempt count.
83 */ 102 */
@@ -117,6 +136,12 @@ struct mem_cgroup_per_zone {
117 unsigned long count[NR_LRU_LISTS]; 136 unsigned long count[NR_LRU_LISTS];
118 137
119 struct zone_reclaim_stat reclaim_stat; 138 struct zone_reclaim_stat reclaim_stat;
139 struct rb_node tree_node; /* RB tree node */
140 unsigned long long usage_in_excess;/* Set to the value by which */
141 /* the soft limit is exceeded*/
142 bool on_tree;
143 struct mem_cgroup *mem; /* Back pointer, we cannot */
144 /* use container_of */
120}; 145};
121/* Macro for accessing counter */ 146/* Macro for accessing counter */
122#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) 147#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)])
@@ -130,6 +155,26 @@ struct mem_cgroup_lru_info {
130}; 155};
131 156
132/* 157/*
158 * Cgroups above their limits are maintained in a RB-Tree, independent of
159 * their hierarchy representation
160 */
161
162struct mem_cgroup_tree_per_zone {
163 struct rb_root rb_root;
164 spinlock_t lock;
165};
166
167struct mem_cgroup_tree_per_node {
168 struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
169};
170
171struct mem_cgroup_tree {
172 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
173};
174
175static struct mem_cgroup_tree soft_limit_tree __read_mostly;
176
177/*
133 * The memory controller data structure. The memory controller controls both 178 * The memory controller data structure. The memory controller controls both
134 * page cache and RSS per cgroup. We would eventually like to provide 179 * page cache and RSS per cgroup. We would eventually like to provide
135 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 180 * statistics based on the statistics developed by Rik Van Riel for clock-pro,
@@ -186,6 +231,13 @@ struct mem_cgroup {
186 struct mem_cgroup_stat stat; 231 struct mem_cgroup_stat stat;
187}; 232};
188 233
234/*
235 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
236 * limit reclaim to prevent infinite loops, if they ever occur.
237 */
238#define MEM_CGROUP_MAX_RECLAIM_LOOPS (100)
239#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2)
240
189enum charge_type { 241enum charge_type {
190 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 242 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
191 MEM_CGROUP_CHARGE_TYPE_MAPPED, 243 MEM_CGROUP_CHARGE_TYPE_MAPPED,
@@ -200,13 +252,8 @@ enum charge_type {
200#define PCGF_CACHE (1UL << PCG_CACHE) 252#define PCGF_CACHE (1UL << PCG_CACHE)
201#define PCGF_USED (1UL << PCG_USED) 253#define PCGF_USED (1UL << PCG_USED)
202#define PCGF_LOCK (1UL << PCG_LOCK) 254#define PCGF_LOCK (1UL << PCG_LOCK)
203static const unsigned long 255/* Not used, but added here for completeness */
204pcg_default_flags[NR_CHARGE_TYPE] = { 256#define PCGF_ACCT (1UL << PCG_ACCT)
205 PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
206 PCGF_USED | PCGF_LOCK, /* Anon */
207 PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
208 0, /* FORCE */
209};
210 257
211/* for encoding cft->private value on file */ 258/* for encoding cft->private value on file */
212#define _MEM (0) 259#define _MEM (0)
@@ -215,15 +262,241 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
215#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) 262#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff)
216#define MEMFILE_ATTR(val) ((val) & 0xffff) 263#define MEMFILE_ATTR(val) ((val) & 0xffff)
217 264
265/*
266 * Reclaim flags for mem_cgroup_hierarchical_reclaim
267 */
268#define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0
269#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
270#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
271#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
272#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
273#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
274
218static void mem_cgroup_get(struct mem_cgroup *mem); 275static void mem_cgroup_get(struct mem_cgroup *mem);
219static void mem_cgroup_put(struct mem_cgroup *mem); 276static void mem_cgroup_put(struct mem_cgroup *mem);
220static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); 277static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
221 278
279static struct mem_cgroup_per_zone *
280mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
281{
282 return &mem->info.nodeinfo[nid]->zoneinfo[zid];
283}
284
285static struct mem_cgroup_per_zone *
286page_cgroup_zoneinfo(struct page_cgroup *pc)
287{
288 struct mem_cgroup *mem = pc->mem_cgroup;
289 int nid = page_cgroup_nid(pc);
290 int zid = page_cgroup_zid(pc);
291
292 if (!mem)
293 return NULL;
294
295 return mem_cgroup_zoneinfo(mem, nid, zid);
296}
297
298static struct mem_cgroup_tree_per_zone *
299soft_limit_tree_node_zone(int nid, int zid)
300{
301 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
302}
303
304static struct mem_cgroup_tree_per_zone *
305soft_limit_tree_from_page(struct page *page)
306{
307 int nid = page_to_nid(page);
308 int zid = page_zonenum(page);
309
310 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
311}
312
313static void
314__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
315 struct mem_cgroup_per_zone *mz,
316 struct mem_cgroup_tree_per_zone *mctz)
317{
318 struct rb_node **p = &mctz->rb_root.rb_node;
319 struct rb_node *parent = NULL;
320 struct mem_cgroup_per_zone *mz_node;
321
322 if (mz->on_tree)
323 return;
324
325 mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
326 while (*p) {
327 parent = *p;
328 mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
329 tree_node);
330 if (mz->usage_in_excess < mz_node->usage_in_excess)
331 p = &(*p)->rb_left;
332 /*
333 * We can't avoid mem cgroups that are over their soft
334 * limit by the same amount
335 */
336 else if (mz->usage_in_excess >= mz_node->usage_in_excess)
337 p = &(*p)->rb_right;
338 }
339 rb_link_node(&mz->tree_node, parent, p);
340 rb_insert_color(&mz->tree_node, &mctz->rb_root);
341 mz->on_tree = true;
342}
343
344static void
345__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
346 struct mem_cgroup_per_zone *mz,
347 struct mem_cgroup_tree_per_zone *mctz)
348{
349 if (!mz->on_tree)
350 return;
351 rb_erase(&mz->tree_node, &mctz->rb_root);
352 mz->on_tree = false;
353}
354
355static void
356mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
357 struct mem_cgroup_per_zone *mz,
358 struct mem_cgroup_tree_per_zone *mctz)
359{
360 spin_lock(&mctz->lock);
361 __mem_cgroup_insert_exceeded(mem, mz, mctz);
362 spin_unlock(&mctz->lock);
363}
364
365static void
366mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
367 struct mem_cgroup_per_zone *mz,
368 struct mem_cgroup_tree_per_zone *mctz)
369{
370 spin_lock(&mctz->lock);
371 __mem_cgroup_remove_exceeded(mem, mz, mctz);
372 spin_unlock(&mctz->lock);
373}
374
375static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
376{
377 bool ret = false;
378 int cpu;
379 s64 val;
380 struct mem_cgroup_stat_cpu *cpustat;
381
382 cpu = get_cpu();
383 cpustat = &mem->stat.cpustat[cpu];
384 val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS);
385 if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) {
386 __mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS);
387 ret = true;
388 }
389 put_cpu();
390 return ret;
391}
392
393static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
394{
395 unsigned long long prev_usage_in_excess, new_usage_in_excess;
396 bool updated_tree = false;
397 struct mem_cgroup_per_zone *mz;
398 struct mem_cgroup_tree_per_zone *mctz;
399
400 mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
401 mctz = soft_limit_tree_from_page(page);
402
403 /*
404 * We do updates in lazy mode, mem's are removed
405 * lazily from the per-zone, per-node rb tree
406 */
407 prev_usage_in_excess = mz->usage_in_excess;
408
409 new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
410 if (prev_usage_in_excess) {
411 mem_cgroup_remove_exceeded(mem, mz, mctz);
412 updated_tree = true;
413 }
414 if (!new_usage_in_excess)
415 goto done;
416 mem_cgroup_insert_exceeded(mem, mz, mctz);
417
418done:
419 if (updated_tree) {
420 spin_lock(&mctz->lock);
421 mz->usage_in_excess = new_usage_in_excess;
422 spin_unlock(&mctz->lock);
423 }
424}
425
426static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
427{
428 int node, zone;
429 struct mem_cgroup_per_zone *mz;
430 struct mem_cgroup_tree_per_zone *mctz;
431
432 for_each_node_state(node, N_POSSIBLE) {
433 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
434 mz = mem_cgroup_zoneinfo(mem, node, zone);
435 mctz = soft_limit_tree_node_zone(node, zone);
436 mem_cgroup_remove_exceeded(mem, mz, mctz);
437 }
438 }
439}
440
441static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem)
442{
443 return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
444}
445
446static struct mem_cgroup_per_zone *
447__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
448{
449 struct rb_node *rightmost = NULL;
450 struct mem_cgroup_per_zone *mz = NULL;
451
452retry:
453 rightmost = rb_last(&mctz->rb_root);
454 if (!rightmost)
455 goto done; /* Nothing to reclaim from */
456
457 mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
458 /*
459 * Remove the node now but someone else can add it back,
460 * we will to add it back at the end of reclaim to its correct
461 * position in the tree.
462 */
463 __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
464 if (!res_counter_soft_limit_excess(&mz->mem->res) ||
465 !css_tryget(&mz->mem->css))
466 goto retry;
467done:
468 return mz;
469}
470
471static struct mem_cgroup_per_zone *
472mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
473{
474 struct mem_cgroup_per_zone *mz;
475
476 spin_lock(&mctz->lock);
477 mz = __mem_cgroup_largest_soft_limit_node(mctz);
478 spin_unlock(&mctz->lock);
479 return mz;
480}
481
482static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
483 bool charge)
484{
485 int val = (charge) ? 1 : -1;
486 struct mem_cgroup_stat *stat = &mem->stat;
487 struct mem_cgroup_stat_cpu *cpustat;
488 int cpu = get_cpu();
489
490 cpustat = &stat->cpustat[cpu];
491 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
492 put_cpu();
493}
494
222static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 495static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
223 struct page_cgroup *pc, 496 struct page_cgroup *pc,
224 bool charge) 497 bool charge)
225{ 498{
226 int val = (charge)? 1 : -1; 499 int val = (charge) ? 1 : -1;
227 struct mem_cgroup_stat *stat = &mem->stat; 500 struct mem_cgroup_stat *stat = &mem->stat;
228 struct mem_cgroup_stat_cpu *cpustat; 501 struct mem_cgroup_stat_cpu *cpustat;
229 int cpu = get_cpu(); 502 int cpu = get_cpu();
@@ -240,28 +513,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
240 else 513 else
241 __mem_cgroup_stat_add_safe(cpustat, 514 __mem_cgroup_stat_add_safe(cpustat,
242 MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); 515 MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
516 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1);
243 put_cpu(); 517 put_cpu();
244} 518}
245 519
246static struct mem_cgroup_per_zone *
247mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
248{
249 return &mem->info.nodeinfo[nid]->zoneinfo[zid];
250}
251
252static struct mem_cgroup_per_zone *
253page_cgroup_zoneinfo(struct page_cgroup *pc)
254{
255 struct mem_cgroup *mem = pc->mem_cgroup;
256 int nid = page_cgroup_nid(pc);
257 int zid = page_cgroup_zid(pc);
258
259 if (!mem)
260 return NULL;
261
262 return mem_cgroup_zoneinfo(mem, nid, zid);
263}
264
265static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, 520static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
266 enum lru_list idx) 521 enum lru_list idx)
267{ 522{
@@ -354,6 +609,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
354 return ret; 609 return ret;
355} 610}
356 611
612static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
613{
614 return (mem == root_mem_cgroup);
615}
616
357/* 617/*
358 * Following LRU functions are allowed to be used without PCG_LOCK. 618 * Following LRU functions are allowed to be used without PCG_LOCK.
359 * Operations are called by routine of global LRU independently from memcg. 619 * Operations are called by routine of global LRU independently from memcg.
@@ -371,22 +631,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
371void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) 631void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
372{ 632{
373 struct page_cgroup *pc; 633 struct page_cgroup *pc;
374 struct mem_cgroup *mem;
375 struct mem_cgroup_per_zone *mz; 634 struct mem_cgroup_per_zone *mz;
376 635
377 if (mem_cgroup_disabled()) 636 if (mem_cgroup_disabled())
378 return; 637 return;
379 pc = lookup_page_cgroup(page); 638 pc = lookup_page_cgroup(page);
380 /* can happen while we handle swapcache. */ 639 /* can happen while we handle swapcache. */
381 if (list_empty(&pc->lru) || !pc->mem_cgroup) 640 if (!TestClearPageCgroupAcctLRU(pc))
382 return; 641 return;
642 VM_BUG_ON(!pc->mem_cgroup);
383 /* 643 /*
384 * We don't check PCG_USED bit. It's cleared when the "page" is finally 644 * We don't check PCG_USED bit. It's cleared when the "page" is finally
385 * removed from global LRU. 645 * removed from global LRU.
386 */ 646 */
387 mz = page_cgroup_zoneinfo(pc); 647 mz = page_cgroup_zoneinfo(pc);
388 mem = pc->mem_cgroup;
389 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 648 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
649 if (mem_cgroup_is_root(pc->mem_cgroup))
650 return;
651 VM_BUG_ON(list_empty(&pc->lru));
390 list_del_init(&pc->lru); 652 list_del_init(&pc->lru);
391 return; 653 return;
392} 654}
@@ -410,8 +672,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
410 * For making pc->mem_cgroup visible, insert smp_rmb() here. 672 * For making pc->mem_cgroup visible, insert smp_rmb() here.
411 */ 673 */
412 smp_rmb(); 674 smp_rmb();
413 /* unused page is not rotated. */ 675 /* unused or root page is not rotated. */
414 if (!PageCgroupUsed(pc)) 676 if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
415 return; 677 return;
416 mz = page_cgroup_zoneinfo(pc); 678 mz = page_cgroup_zoneinfo(pc);
417 list_move(&pc->lru, &mz->lists[lru]); 679 list_move(&pc->lru, &mz->lists[lru]);
@@ -425,6 +687,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
425 if (mem_cgroup_disabled()) 687 if (mem_cgroup_disabled())
426 return; 688 return;
427 pc = lookup_page_cgroup(page); 689 pc = lookup_page_cgroup(page);
690 VM_BUG_ON(PageCgroupAcctLRU(pc));
428 /* 691 /*
429 * Used bit is set without atomic ops but after smp_wmb(). 692 * Used bit is set without atomic ops but after smp_wmb().
430 * For making pc->mem_cgroup visible, insert smp_rmb() here. 693 * For making pc->mem_cgroup visible, insert smp_rmb() here.
@@ -435,6 +698,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
435 698
436 mz = page_cgroup_zoneinfo(pc); 699 mz = page_cgroup_zoneinfo(pc);
437 MEM_CGROUP_ZSTAT(mz, lru) += 1; 700 MEM_CGROUP_ZSTAT(mz, lru) += 1;
701 SetPageCgroupAcctLRU(pc);
702 if (mem_cgroup_is_root(pc->mem_cgroup))
703 return;
438 list_add(&pc->lru, &mz->lists[lru]); 704 list_add(&pc->lru, &mz->lists[lru]);
439} 705}
440 706
@@ -469,7 +735,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
469 735
470 spin_lock_irqsave(&zone->lru_lock, flags); 736 spin_lock_irqsave(&zone->lru_lock, flags);
471 /* link when the page is linked to LRU but page_cgroup isn't */ 737 /* link when the page is linked to LRU but page_cgroup isn't */
472 if (PageLRU(page) && list_empty(&pc->lru)) 738 if (PageLRU(page) && !PageCgroupAcctLRU(pc))
473 mem_cgroup_add_lru_list(page, page_lru(page)); 739 mem_cgroup_add_lru_list(page, page_lru(page));
474 spin_unlock_irqrestore(&zone->lru_lock, flags); 740 spin_unlock_irqrestore(&zone->lru_lock, flags);
475} 741}
@@ -855,28 +1121,62 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
855 * If shrink==true, for avoiding to free too much, this returns immedieately. 1121 * If shrink==true, for avoiding to free too much, this returns immedieately.
856 */ 1122 */
857static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, 1123static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
858 gfp_t gfp_mask, bool noswap, bool shrink) 1124 struct zone *zone,
1125 gfp_t gfp_mask,
1126 unsigned long reclaim_options)
859{ 1127{
860 struct mem_cgroup *victim; 1128 struct mem_cgroup *victim;
861 int ret, total = 0; 1129 int ret, total = 0;
862 int loop = 0; 1130 int loop = 0;
1131 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
1132 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1133 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1134 unsigned long excess = mem_cgroup_get_excess(root_mem);
863 1135
864 /* If memsw_is_minimum==1, swap-out is of-no-use. */ 1136 /* If memsw_is_minimum==1, swap-out is of-no-use. */
865 if (root_mem->memsw_is_minimum) 1137 if (root_mem->memsw_is_minimum)
866 noswap = true; 1138 noswap = true;
867 1139
868 while (loop < 2) { 1140 while (1) {
869 victim = mem_cgroup_select_victim(root_mem); 1141 victim = mem_cgroup_select_victim(root_mem);
870 if (victim == root_mem) 1142 if (victim == root_mem) {
871 loop++; 1143 loop++;
1144 if (loop >= 2) {
1145 /*
1146 * If we have not been able to reclaim
1147 * anything, it might because there are
1148 * no reclaimable pages under this hierarchy
1149 */
1150 if (!check_soft || !total) {
1151 css_put(&victim->css);
1152 break;
1153 }
1154 /*
1155 * We want to do more targetted reclaim.
1156 * excess >> 2 is not to excessive so as to
1157 * reclaim too much, nor too less that we keep
1158 * coming back to reclaim from this cgroup
1159 */
1160 if (total >= (excess >> 2) ||
1161 (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
1162 css_put(&victim->css);
1163 break;
1164 }
1165 }
1166 }
872 if (!mem_cgroup_local_usage(&victim->stat)) { 1167 if (!mem_cgroup_local_usage(&victim->stat)) {
873 /* this cgroup's local usage == 0 */ 1168 /* this cgroup's local usage == 0 */
874 css_put(&victim->css); 1169 css_put(&victim->css);
875 continue; 1170 continue;
876 } 1171 }
877 /* we use swappiness of local cgroup */ 1172 /* we use swappiness of local cgroup */
878 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap, 1173 if (check_soft)
879 get_swappiness(victim)); 1174 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1175 noswap, get_swappiness(victim), zone,
1176 zone->zone_pgdat->node_id);
1177 else
1178 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1179 noswap, get_swappiness(victim));
880 css_put(&victim->css); 1180 css_put(&victim->css);
881 /* 1181 /*
882 * At shrinking usage, we can't check we should stop here or 1182 * At shrinking usage, we can't check we should stop here or
@@ -886,7 +1186,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
886 if (shrink) 1186 if (shrink)
887 return ret; 1187 return ret;
888 total += ret; 1188 total += ret;
889 if (mem_cgroup_check_under_limit(root_mem)) 1189 if (check_soft) {
1190 if (res_counter_check_under_soft_limit(&root_mem->res))
1191 return total;
1192 } else if (mem_cgroup_check_under_limit(root_mem))
890 return 1 + total; 1193 return 1 + total;
891 } 1194 }
892 return total; 1195 return total;
@@ -965,11 +1268,11 @@ done:
965 */ 1268 */
966static int __mem_cgroup_try_charge(struct mm_struct *mm, 1269static int __mem_cgroup_try_charge(struct mm_struct *mm,
967 gfp_t gfp_mask, struct mem_cgroup **memcg, 1270 gfp_t gfp_mask, struct mem_cgroup **memcg,
968 bool oom) 1271 bool oom, struct page *page)
969{ 1272{
970 struct mem_cgroup *mem, *mem_over_limit; 1273 struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
971 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 1274 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
972 struct res_counter *fail_res; 1275 struct res_counter *fail_res, *soft_fail_res = NULL;
973 1276
974 if (unlikely(test_thread_flag(TIF_MEMDIE))) { 1277 if (unlikely(test_thread_flag(TIF_MEMDIE))) {
975 /* Don't account this! */ 1278 /* Don't account this! */
@@ -996,20 +1299,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
996 VM_BUG_ON(css_is_removed(&mem->css)); 1299 VM_BUG_ON(css_is_removed(&mem->css));
997 1300
998 while (1) { 1301 while (1) {
999 int ret; 1302 int ret = 0;
1000 bool noswap = false; 1303 unsigned long flags = 0;
1001 1304
1002 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); 1305 if (mem_cgroup_is_root(mem))
1306 goto done;
1307 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
1308 &soft_fail_res);
1003 if (likely(!ret)) { 1309 if (likely(!ret)) {
1004 if (!do_swap_account) 1310 if (!do_swap_account)
1005 break; 1311 break;
1006 ret = res_counter_charge(&mem->memsw, PAGE_SIZE, 1312 ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
1007 &fail_res); 1313 &fail_res, NULL);
1008 if (likely(!ret)) 1314 if (likely(!ret))
1009 break; 1315 break;
1010 /* mem+swap counter fails */ 1316 /* mem+swap counter fails */
1011 res_counter_uncharge(&mem->res, PAGE_SIZE); 1317 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
1012 noswap = true; 1318 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1013 mem_over_limit = mem_cgroup_from_res_counter(fail_res, 1319 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1014 memsw); 1320 memsw);
1015 } else 1321 } else
@@ -1020,8 +1326,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1020 if (!(gfp_mask & __GFP_WAIT)) 1326 if (!(gfp_mask & __GFP_WAIT))
1021 goto nomem; 1327 goto nomem;
1022 1328
1023 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, 1329 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
1024 noswap, false); 1330 gfp_mask, flags);
1025 if (ret) 1331 if (ret)
1026 continue; 1332 continue;
1027 1333
@@ -1046,13 +1352,24 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1046 goto nomem; 1352 goto nomem;
1047 } 1353 }
1048 } 1354 }
1355 /*
1356 * Insert just the ancestor, we should trickle down to the correct
1357 * cgroup for reclaim, since the other nodes will be below their
1358 * soft limit
1359 */
1360 if (soft_fail_res) {
1361 mem_over_soft_limit =
1362 mem_cgroup_from_res_counter(soft_fail_res, res);
1363 if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
1364 mem_cgroup_update_tree(mem_over_soft_limit, page);
1365 }
1366done:
1049 return 0; 1367 return 0;
1050nomem: 1368nomem:
1051 css_put(&mem->css); 1369 css_put(&mem->css);
1052 return -ENOMEM; 1370 return -ENOMEM;
1053} 1371}
1054 1372
1055
1056/* 1373/*
1057 * A helper function to get mem_cgroup from ID. must be called under 1374 * A helper function to get mem_cgroup from ID. must be called under
1058 * rcu_read_lock(). The caller must check css_is_removed() or some if 1375 * rcu_read_lock(). The caller must check css_is_removed() or some if
@@ -1119,15 +1436,38 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1119 lock_page_cgroup(pc); 1436 lock_page_cgroup(pc);
1120 if (unlikely(PageCgroupUsed(pc))) { 1437 if (unlikely(PageCgroupUsed(pc))) {
1121 unlock_page_cgroup(pc); 1438 unlock_page_cgroup(pc);
1122 res_counter_uncharge(&mem->res, PAGE_SIZE); 1439 if (!mem_cgroup_is_root(mem)) {
1123 if (do_swap_account) 1440 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
1124 res_counter_uncharge(&mem->memsw, PAGE_SIZE); 1441 if (do_swap_account)
1442 res_counter_uncharge(&mem->memsw, PAGE_SIZE,
1443 NULL);
1444 }
1125 css_put(&mem->css); 1445 css_put(&mem->css);
1126 return; 1446 return;
1127 } 1447 }
1448
1128 pc->mem_cgroup = mem; 1449 pc->mem_cgroup = mem;
1450 /*
1451 * We access a page_cgroup asynchronously without lock_page_cgroup().
1452 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
1453 * is accessed after testing USED bit. To make pc->mem_cgroup visible
1454 * before USED bit, we need memory barrier here.
1455 * See mem_cgroup_add_lru_list(), etc.
1456 */
1129 smp_wmb(); 1457 smp_wmb();
1130 pc->flags = pcg_default_flags[ctype]; 1458 switch (ctype) {
1459 case MEM_CGROUP_CHARGE_TYPE_CACHE:
1460 case MEM_CGROUP_CHARGE_TYPE_SHMEM:
1461 SetPageCgroupCache(pc);
1462 SetPageCgroupUsed(pc);
1463 break;
1464 case MEM_CGROUP_CHARGE_TYPE_MAPPED:
1465 ClearPageCgroupCache(pc);
1466 SetPageCgroupUsed(pc);
1467 break;
1468 default:
1469 break;
1470 }
1131 1471
1132 mem_cgroup_charge_statistics(mem, pc, true); 1472 mem_cgroup_charge_statistics(mem, pc, true);
1133 1473
@@ -1178,7 +1518,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1178 if (pc->mem_cgroup != from) 1518 if (pc->mem_cgroup != from)
1179 goto out; 1519 goto out;
1180 1520
1181 res_counter_uncharge(&from->res, PAGE_SIZE); 1521 if (!mem_cgroup_is_root(from))
1522 res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
1182 mem_cgroup_charge_statistics(from, pc, false); 1523 mem_cgroup_charge_statistics(from, pc, false);
1183 1524
1184 page = pc->page; 1525 page = pc->page;
@@ -1197,8 +1538,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1197 1); 1538 1);
1198 } 1539 }
1199 1540
1200 if (do_swap_account) 1541 if (do_swap_account && !mem_cgroup_is_root(from))
1201 res_counter_uncharge(&from->memsw, PAGE_SIZE); 1542 res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
1202 css_put(&from->css); 1543 css_put(&from->css);
1203 1544
1204 css_get(&to->css); 1545 css_get(&to->css);
@@ -1238,7 +1579,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1238 parent = mem_cgroup_from_cont(pcg); 1579 parent = mem_cgroup_from_cont(pcg);
1239 1580
1240 1581
1241 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); 1582 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
1242 if (ret || !parent) 1583 if (ret || !parent)
1243 return ret; 1584 return ret;
1244 1585
@@ -1268,9 +1609,11 @@ uncharge:
1268 /* drop extra refcnt by try_charge() */ 1609 /* drop extra refcnt by try_charge() */
1269 css_put(&parent->css); 1610 css_put(&parent->css);
1270 /* uncharge if move fails */ 1611 /* uncharge if move fails */
1271 res_counter_uncharge(&parent->res, PAGE_SIZE); 1612 if (!mem_cgroup_is_root(parent)) {
1272 if (do_swap_account) 1613 res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
1273 res_counter_uncharge(&parent->memsw, PAGE_SIZE); 1614 if (do_swap_account)
1615 res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
1616 }
1274 return ret; 1617 return ret;
1275} 1618}
1276 1619
@@ -1295,7 +1638,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
1295 prefetchw(pc); 1638 prefetchw(pc);
1296 1639
1297 mem = memcg; 1640 mem = memcg;
1298 ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); 1641 ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page);
1299 if (ret || !mem) 1642 if (ret || !mem)
1300 return ret; 1643 return ret;
1301 1644
@@ -1414,14 +1757,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
1414 if (!mem) 1757 if (!mem)
1415 goto charge_cur_mm; 1758 goto charge_cur_mm;
1416 *ptr = mem; 1759 *ptr = mem;
1417 ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); 1760 ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page);
1418 /* drop extra refcnt from tryget */ 1761 /* drop extra refcnt from tryget */
1419 css_put(&mem->css); 1762 css_put(&mem->css);
1420 return ret; 1763 return ret;
1421charge_cur_mm: 1764charge_cur_mm:
1422 if (unlikely(!mm)) 1765 if (unlikely(!mm))
1423 mm = &init_mm; 1766 mm = &init_mm;
1424 return __mem_cgroup_try_charge(mm, mask, ptr, true); 1767 return __mem_cgroup_try_charge(mm, mask, ptr, true, page);
1425} 1768}
1426 1769
1427static void 1770static void
@@ -1459,7 +1802,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
1459 * This recorded memcg can be obsolete one. So, avoid 1802 * This recorded memcg can be obsolete one. So, avoid
1460 * calling css_tryget 1803 * calling css_tryget
1461 */ 1804 */
1462 res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 1805 if (!mem_cgroup_is_root(memcg))
1806 res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
1807 NULL);
1808 mem_cgroup_swap_statistics(memcg, false);
1463 mem_cgroup_put(memcg); 1809 mem_cgroup_put(memcg);
1464 } 1810 }
1465 rcu_read_unlock(); 1811 rcu_read_unlock();
@@ -1484,9 +1830,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
1484 return; 1830 return;
1485 if (!mem) 1831 if (!mem)
1486 return; 1832 return;
1487 res_counter_uncharge(&mem->res, PAGE_SIZE); 1833 if (!mem_cgroup_is_root(mem)) {
1488 if (do_swap_account) 1834 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
1489 res_counter_uncharge(&mem->memsw, PAGE_SIZE); 1835 if (do_swap_account)
1836 res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
1837 }
1490 css_put(&mem->css); 1838 css_put(&mem->css);
1491} 1839}
1492 1840
@@ -1500,6 +1848,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1500 struct page_cgroup *pc; 1848 struct page_cgroup *pc;
1501 struct mem_cgroup *mem = NULL; 1849 struct mem_cgroup *mem = NULL;
1502 struct mem_cgroup_per_zone *mz; 1850 struct mem_cgroup_per_zone *mz;
1851 bool soft_limit_excess = false;
1503 1852
1504 if (mem_cgroup_disabled()) 1853 if (mem_cgroup_disabled())
1505 return NULL; 1854 return NULL;
@@ -1538,9 +1887,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1538 break; 1887 break;
1539 } 1888 }
1540 1889
1541 res_counter_uncharge(&mem->res, PAGE_SIZE); 1890 if (!mem_cgroup_is_root(mem)) {
1542 if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) 1891 res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
1543 res_counter_uncharge(&mem->memsw, PAGE_SIZE); 1892 if (do_swap_account &&
1893 (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
1894 res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
1895 }
1896 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1897 mem_cgroup_swap_statistics(mem, true);
1544 mem_cgroup_charge_statistics(mem, pc, false); 1898 mem_cgroup_charge_statistics(mem, pc, false);
1545 1899
1546 ClearPageCgroupUsed(pc); 1900 ClearPageCgroupUsed(pc);
@@ -1554,6 +1908,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1554 mz = page_cgroup_zoneinfo(pc); 1908 mz = page_cgroup_zoneinfo(pc);
1555 unlock_page_cgroup(pc); 1909 unlock_page_cgroup(pc);
1556 1910
1911 if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
1912 mem_cgroup_update_tree(mem, page);
1557 /* at swapout, this memcg will be accessed to record to swap */ 1913 /* at swapout, this memcg will be accessed to record to swap */
1558 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) 1914 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1559 css_put(&mem->css); 1915 css_put(&mem->css);
@@ -1629,7 +1985,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
1629 * We uncharge this because swap is freed. 1985 * We uncharge this because swap is freed.
1630 * This memcg can be obsolete one. We avoid calling css_tryget 1986 * This memcg can be obsolete one. We avoid calling css_tryget
1631 */ 1987 */
1632 res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 1988 if (!mem_cgroup_is_root(memcg))
1989 res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
1990 mem_cgroup_swap_statistics(memcg, false);
1633 mem_cgroup_put(memcg); 1991 mem_cgroup_put(memcg);
1634 } 1992 }
1635 rcu_read_unlock(); 1993 rcu_read_unlock();
@@ -1658,7 +2016,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
1658 unlock_page_cgroup(pc); 2016 unlock_page_cgroup(pc);
1659 2017
1660 if (mem) { 2018 if (mem) {
1661 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); 2019 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
2020 page);
1662 css_put(&mem->css); 2021 css_put(&mem->css);
1663 } 2022 }
1664 *ptr = mem; 2023 *ptr = mem;
@@ -1798,8 +2157,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
1798 if (!ret) 2157 if (!ret)
1799 break; 2158 break;
1800 2159
1801 progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, 2160 progress = mem_cgroup_hierarchical_reclaim(memcg, NULL,
1802 false, true); 2161 GFP_KERNEL,
2162 MEM_CGROUP_RECLAIM_SHRINK);
1803 curusage = res_counter_read_u64(&memcg->res, RES_USAGE); 2163 curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
1804 /* Usage is reduced ? */ 2164 /* Usage is reduced ? */
1805 if (curusage >= oldusage) 2165 if (curusage >= oldusage)
@@ -1851,7 +2211,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
1851 if (!ret) 2211 if (!ret)
1852 break; 2212 break;
1853 2213
1854 mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true); 2214 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
2215 MEM_CGROUP_RECLAIM_NOSWAP |
2216 MEM_CGROUP_RECLAIM_SHRINK);
1855 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 2217 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
1856 /* Usage is reduced ? */ 2218 /* Usage is reduced ? */
1857 if (curusage >= oldusage) 2219 if (curusage >= oldusage)
@@ -1862,6 +2224,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
1862 return ret; 2224 return ret;
1863} 2225}
1864 2226
2227unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2228 gfp_t gfp_mask, int nid,
2229 int zid)
2230{
2231 unsigned long nr_reclaimed = 0;
2232 struct mem_cgroup_per_zone *mz, *next_mz = NULL;
2233 unsigned long reclaimed;
2234 int loop = 0;
2235 struct mem_cgroup_tree_per_zone *mctz;
2236
2237 if (order > 0)
2238 return 0;
2239
2240 mctz = soft_limit_tree_node_zone(nid, zid);
2241 /*
2242 * This loop can run a while, specially if mem_cgroup's continuously
2243 * keep exceeding their soft limit and putting the system under
2244 * pressure
2245 */
2246 do {
2247 if (next_mz)
2248 mz = next_mz;
2249 else
2250 mz = mem_cgroup_largest_soft_limit_node(mctz);
2251 if (!mz)
2252 break;
2253
2254 reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
2255 gfp_mask,
2256 MEM_CGROUP_RECLAIM_SOFT);
2257 nr_reclaimed += reclaimed;
2258 spin_lock(&mctz->lock);
2259
2260 /*
2261 * If we failed to reclaim anything from this memory cgroup
2262 * it is time to move on to the next cgroup
2263 */
2264 next_mz = NULL;
2265 if (!reclaimed) {
2266 do {
2267 /*
2268 * Loop until we find yet another one.
2269 *
2270 * By the time we get the soft_limit lock
2271 * again, someone might have aded the
2272 * group back on the RB tree. Iterate to
2273 * make sure we get a different mem.
2274 * mem_cgroup_largest_soft_limit_node returns
2275 * NULL if no other cgroup is present on
2276 * the tree
2277 */
2278 next_mz =
2279 __mem_cgroup_largest_soft_limit_node(mctz);
2280 if (next_mz == mz) {
2281 css_put(&next_mz->mem->css);
2282 next_mz = NULL;
2283 } else /* next_mz == NULL or other memcg */
2284 break;
2285 } while (1);
2286 }
2287 mz->usage_in_excess =
2288 res_counter_soft_limit_excess(&mz->mem->res);
2289 __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
2290 /*
2291 * One school of thought says that we should not add
2292 * back the node to the tree if reclaim returns 0.
2293 * But our reclaim could return 0, simply because due
2294 * to priority we are exposing a smaller subset of
2295 * memory to reclaim from. Consider this as a longer
2296 * term TODO.
2297 */
2298 if (mz->usage_in_excess)
2299 __mem_cgroup_insert_exceeded(mz->mem, mz, mctz);
2300 spin_unlock(&mctz->lock);
2301 css_put(&mz->mem->css);
2302 loop++;
2303 /*
2304 * Could not reclaim anything and there are no more
2305 * mem cgroups to try or we seem to be looping without
2306 * reclaiming anything.
2307 */
2308 if (!nr_reclaimed &&
2309 (next_mz == NULL ||
2310 loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
2311 break;
2312 } while (!nr_reclaimed);
2313 if (next_mz)
2314 css_put(&next_mz->mem->css);
2315 return nr_reclaimed;
2316}
2317
1865/* 2318/*
1866 * This routine traverse page_cgroup in given list and drop them all. 2319 * This routine traverse page_cgroup in given list and drop them all.
1867 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 2320 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
@@ -2046,20 +2499,64 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
2046 return retval; 2499 return retval;
2047} 2500}
2048 2501
2502struct mem_cgroup_idx_data {
2503 s64 val;
2504 enum mem_cgroup_stat_index idx;
2505};
2506
2507static int
2508mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
2509{
2510 struct mem_cgroup_idx_data *d = data;
2511 d->val += mem_cgroup_read_stat(&mem->stat, d->idx);
2512 return 0;
2513}
2514
2515static void
2516mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
2517 enum mem_cgroup_stat_index idx, s64 *val)
2518{
2519 struct mem_cgroup_idx_data d;
2520 d.idx = idx;
2521 d.val = 0;
2522 mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat);
2523 *val = d.val;
2524}
2525
2049static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 2526static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
2050{ 2527{
2051 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 2528 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
2052 u64 val = 0; 2529 u64 idx_val, val;
2053 int type, name; 2530 int type, name;
2054 2531
2055 type = MEMFILE_TYPE(cft->private); 2532 type = MEMFILE_TYPE(cft->private);
2056 name = MEMFILE_ATTR(cft->private); 2533 name = MEMFILE_ATTR(cft->private);
2057 switch (type) { 2534 switch (type) {
2058 case _MEM: 2535 case _MEM:
2059 val = res_counter_read_u64(&mem->res, name); 2536 if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
2537 mem_cgroup_get_recursive_idx_stat(mem,
2538 MEM_CGROUP_STAT_CACHE, &idx_val);
2539 val = idx_val;
2540 mem_cgroup_get_recursive_idx_stat(mem,
2541 MEM_CGROUP_STAT_RSS, &idx_val);
2542 val += idx_val;
2543 val <<= PAGE_SHIFT;
2544 } else
2545 val = res_counter_read_u64(&mem->res, name);
2060 break; 2546 break;
2061 case _MEMSWAP: 2547 case _MEMSWAP:
2062 val = res_counter_read_u64(&mem->memsw, name); 2548 if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
2549 mem_cgroup_get_recursive_idx_stat(mem,
2550 MEM_CGROUP_STAT_CACHE, &idx_val);
2551 val = idx_val;
2552 mem_cgroup_get_recursive_idx_stat(mem,
2553 MEM_CGROUP_STAT_RSS, &idx_val);
2554 val += idx_val;
2555 mem_cgroup_get_recursive_idx_stat(mem,
2556 MEM_CGROUP_STAT_SWAPOUT, &idx_val);
2557 val <<= PAGE_SHIFT;
2558 } else
2559 val = res_counter_read_u64(&mem->memsw, name);
2063 break; 2560 break;
2064 default: 2561 default:
2065 BUG(); 2562 BUG();
@@ -2083,6 +2580,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
2083 name = MEMFILE_ATTR(cft->private); 2580 name = MEMFILE_ATTR(cft->private);
2084 switch (name) { 2581 switch (name) {
2085 case RES_LIMIT: 2582 case RES_LIMIT:
2583 if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
2584 ret = -EINVAL;
2585 break;
2586 }
2086 /* This function does all necessary parse...reuse it */ 2587 /* This function does all necessary parse...reuse it */
2087 ret = res_counter_memparse_write_strategy(buffer, &val); 2588 ret = res_counter_memparse_write_strategy(buffer, &val);
2088 if (ret) 2589 if (ret)
@@ -2092,6 +2593,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
2092 else 2593 else
2093 ret = mem_cgroup_resize_memsw_limit(memcg, val); 2594 ret = mem_cgroup_resize_memsw_limit(memcg, val);
2094 break; 2595 break;
2596 case RES_SOFT_LIMIT:
2597 ret = res_counter_memparse_write_strategy(buffer, &val);
2598 if (ret)
2599 break;
2600 /*
2601 * For memsw, soft limits are hard to implement in terms
2602 * of semantics, for now, we support soft limits for
2603 * control without swap
2604 */
2605 if (type == _MEM)
2606 ret = res_counter_set_soft_limit(&memcg->res, val);
2607 else
2608 ret = -EINVAL;
2609 break;
2095 default: 2610 default:
2096 ret = -EINVAL; /* should be BUG() ? */ 2611 ret = -EINVAL; /* should be BUG() ? */
2097 break; 2612 break;
@@ -2149,6 +2664,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
2149 res_counter_reset_failcnt(&mem->memsw); 2664 res_counter_reset_failcnt(&mem->memsw);
2150 break; 2665 break;
2151 } 2666 }
2667
2152 return 0; 2668 return 0;
2153} 2669}
2154 2670
@@ -2160,6 +2676,7 @@ enum {
2160 MCS_MAPPED_FILE, 2676 MCS_MAPPED_FILE,
2161 MCS_PGPGIN, 2677 MCS_PGPGIN,
2162 MCS_PGPGOUT, 2678 MCS_PGPGOUT,
2679 MCS_SWAP,
2163 MCS_INACTIVE_ANON, 2680 MCS_INACTIVE_ANON,
2164 MCS_ACTIVE_ANON, 2681 MCS_ACTIVE_ANON,
2165 MCS_INACTIVE_FILE, 2682 MCS_INACTIVE_FILE,
@@ -2181,6 +2698,7 @@ struct {
2181 {"mapped_file", "total_mapped_file"}, 2698 {"mapped_file", "total_mapped_file"},
2182 {"pgpgin", "total_pgpgin"}, 2699 {"pgpgin", "total_pgpgin"},
2183 {"pgpgout", "total_pgpgout"}, 2700 {"pgpgout", "total_pgpgout"},
2701 {"swap", "total_swap"},
2184 {"inactive_anon", "total_inactive_anon"}, 2702 {"inactive_anon", "total_inactive_anon"},
2185 {"active_anon", "total_active_anon"}, 2703 {"active_anon", "total_active_anon"},
2186 {"inactive_file", "total_inactive_file"}, 2704 {"inactive_file", "total_inactive_file"},
@@ -2205,6 +2723,10 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
2205 s->stat[MCS_PGPGIN] += val; 2723 s->stat[MCS_PGPGIN] += val;
2206 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); 2724 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
2207 s->stat[MCS_PGPGOUT] += val; 2725 s->stat[MCS_PGPGOUT] += val;
2726 if (do_swap_account) {
2727 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);
2728 s->stat[MCS_SWAP] += val * PAGE_SIZE;
2729 }
2208 2730
2209 /* per zone stat */ 2731 /* per zone stat */
2210 val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON); 2732 val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
@@ -2236,8 +2758,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
2236 memset(&mystat, 0, sizeof(mystat)); 2758 memset(&mystat, 0, sizeof(mystat));
2237 mem_cgroup_get_local_stat(mem_cont, &mystat); 2759 mem_cgroup_get_local_stat(mem_cont, &mystat);
2238 2760
2239 for (i = 0; i < NR_MCS_STAT; i++) 2761 for (i = 0; i < NR_MCS_STAT; i++) {
2762 if (i == MCS_SWAP && !do_swap_account)
2763 continue;
2240 cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); 2764 cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]);
2765 }
2241 2766
2242 /* Hierarchical information */ 2767 /* Hierarchical information */
2243 { 2768 {
@@ -2250,9 +2775,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
2250 2775
2251 memset(&mystat, 0, sizeof(mystat)); 2776 memset(&mystat, 0, sizeof(mystat));
2252 mem_cgroup_get_total_stat(mem_cont, &mystat); 2777 mem_cgroup_get_total_stat(mem_cont, &mystat);
2253 for (i = 0; i < NR_MCS_STAT; i++) 2778 for (i = 0; i < NR_MCS_STAT; i++) {
2779 if (i == MCS_SWAP && !do_swap_account)
2780 continue;
2254 cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); 2781 cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]);
2255 2782 }
2256 2783
2257#ifdef CONFIG_DEBUG_VM 2784#ifdef CONFIG_DEBUG_VM
2258 cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); 2785 cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
@@ -2345,6 +2872,12 @@ static struct cftype mem_cgroup_files[] = {
2345 .read_u64 = mem_cgroup_read, 2872 .read_u64 = mem_cgroup_read,
2346 }, 2873 },
2347 { 2874 {
2875 .name = "soft_limit_in_bytes",
2876 .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
2877 .write_string = mem_cgroup_write,
2878 .read_u64 = mem_cgroup_read,
2879 },
2880 {
2348 .name = "failcnt", 2881 .name = "failcnt",
2349 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), 2882 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
2350 .trigger = mem_cgroup_reset, 2883 .trigger = mem_cgroup_reset,
@@ -2438,6 +2971,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
2438 mz = &pn->zoneinfo[zone]; 2971 mz = &pn->zoneinfo[zone];
2439 for_each_lru(l) 2972 for_each_lru(l)
2440 INIT_LIST_HEAD(&mz->lists[l]); 2973 INIT_LIST_HEAD(&mz->lists[l]);
2974 mz->usage_in_excess = 0;
2975 mz->on_tree = false;
2976 mz->mem = mem;
2441 } 2977 }
2442 return 0; 2978 return 0;
2443} 2979}
@@ -2483,6 +3019,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem)
2483{ 3019{
2484 int node; 3020 int node;
2485 3021
3022 mem_cgroup_remove_from_trees(mem);
2486 free_css_id(&mem_cgroup_subsys, &mem->css); 3023 free_css_id(&mem_cgroup_subsys, &mem->css);
2487 3024
2488 for_each_node_state(node, N_POSSIBLE) 3025 for_each_node_state(node, N_POSSIBLE)
@@ -2531,6 +3068,31 @@ static void __init enable_swap_cgroup(void)
2531} 3068}
2532#endif 3069#endif
2533 3070
3071static int mem_cgroup_soft_limit_tree_init(void)
3072{
3073 struct mem_cgroup_tree_per_node *rtpn;
3074 struct mem_cgroup_tree_per_zone *rtpz;
3075 int tmp, node, zone;
3076
3077 for_each_node_state(node, N_POSSIBLE) {
3078 tmp = node;
3079 if (!node_state(node, N_NORMAL_MEMORY))
3080 tmp = -1;
3081 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
3082 if (!rtpn)
3083 return 1;
3084
3085 soft_limit_tree.rb_tree_per_node[node] = rtpn;
3086
3087 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
3088 rtpz = &rtpn->rb_tree_per_zone[zone];
3089 rtpz->rb_root = RB_ROOT;
3090 spin_lock_init(&rtpz->lock);
3091 }
3092 }
3093 return 0;
3094}
3095
2534static struct cgroup_subsys_state * __ref 3096static struct cgroup_subsys_state * __ref
2535mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 3097mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2536{ 3098{
@@ -2545,10 +3107,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2545 for_each_node_state(node, N_POSSIBLE) 3107 for_each_node_state(node, N_POSSIBLE)
2546 if (alloc_mem_cgroup_per_zone_info(mem, node)) 3108 if (alloc_mem_cgroup_per_zone_info(mem, node))
2547 goto free_out; 3109 goto free_out;
3110
2548 /* root ? */ 3111 /* root ? */
2549 if (cont->parent == NULL) { 3112 if (cont->parent == NULL) {
2550 enable_swap_cgroup(); 3113 enable_swap_cgroup();
2551 parent = NULL; 3114 parent = NULL;
3115 root_mem_cgroup = mem;
3116 if (mem_cgroup_soft_limit_tree_init())
3117 goto free_out;
3118
2552 } else { 3119 } else {
2553 parent = mem_cgroup_from_cont(cont->parent); 3120 parent = mem_cgroup_from_cont(cont->parent);
2554 mem->use_hierarchy = parent->use_hierarchy; 3121 mem->use_hierarchy = parent->use_hierarchy;
@@ -2577,6 +3144,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2577 return &mem->css; 3144 return &mem->css;
2578free_out: 3145free_out:
2579 __mem_cgroup_free(mem); 3146 __mem_cgroup_free(mem);
3147 root_mem_cgroup = NULL;
2580 return ERR_PTR(error); 3148 return ERR_PTR(error);
2581} 3149}
2582 3150
@@ -2612,7 +3180,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
2612static void mem_cgroup_move_task(struct cgroup_subsys *ss, 3180static void mem_cgroup_move_task(struct cgroup_subsys *ss,
2613 struct cgroup *cont, 3181 struct cgroup *cont,
2614 struct cgroup *old_cont, 3182 struct cgroup *old_cont,
2615 struct task_struct *p) 3183 struct task_struct *p,
3184 bool threadgroup)
2616{ 3185{
2617 mutex_lock(&memcg_tasklist); 3186 mutex_lock(&memcg_tasklist);
2618 /* 3187 /*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
new file mode 100644
index 000000000000..729d4b15b645
--- /dev/null
+++ b/mm/memory-failure.c
@@ -0,0 +1,832 @@
1/*
2 * Copyright (C) 2008, 2009 Intel Corporation
3 * Authors: Andi Kleen, Fengguang Wu
4 *
5 * This software may be redistributed and/or modified under the terms of
6 * the GNU General Public License ("GPL") version 2 only as published by the
7 * Free Software Foundation.
8 *
9 * High level machine check handler. Handles pages reported by the
10 * hardware as being corrupted usually due to a 2bit ECC memory or cache
11 * failure.
12 *
13 * Handles page cache pages in various states. The tricky part
14 * here is that we can access any page asynchronous to other VM
15 * users, because memory failures could happen anytime and anywhere,
16 * possibly violating some of their assumptions. This is why this code
17 * has to be extremely careful. Generally it tries to use normal locking
18 * rules, as in get the standard locks, even if that means the
19 * error handling takes potentially a long time.
20 *
21 * The operation to map back from RMAP chains to processes has to walk
22 * the complete process list and has non linear complexity with the number
23 * mappings. In short it can be quite slow. But since memory corruptions
24 * are rare we hope to get away with this.
25 */
26
27/*
28 * Notebook:
29 * - hugetlb needs more code
30 * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages
31 * - pass bad pages to kdump next kernel
32 */
33#define DEBUG 1 /* remove me in 2.6.34 */
34#include <linux/kernel.h>
35#include <linux/mm.h>
36#include <linux/page-flags.h>
37#include <linux/sched.h>
38#include <linux/rmap.h>
39#include <linux/pagemap.h>
40#include <linux/swap.h>
41#include <linux/backing-dev.h>
42#include "internal.h"
43
44int sysctl_memory_failure_early_kill __read_mostly = 0;
45
46int sysctl_memory_failure_recovery __read_mostly = 1;
47
48atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
49
50/*
51 * Send all the processes who have the page mapped an ``action optional''
52 * signal.
53 */
54static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
55 unsigned long pfn)
56{
57 struct siginfo si;
58 int ret;
59
60 printk(KERN_ERR
61 "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
62 pfn, t->comm, t->pid);
63 si.si_signo = SIGBUS;
64 si.si_errno = 0;
65 si.si_code = BUS_MCEERR_AO;
66 si.si_addr = (void *)addr;
67#ifdef __ARCH_SI_TRAPNO
68 si.si_trapno = trapno;
69#endif
70 si.si_addr_lsb = PAGE_SHIFT;
71 /*
72 * Don't use force here, it's convenient if the signal
73 * can be temporarily blocked.
74 * This could cause a loop when the user sets SIGBUS
75 * to SIG_IGN, but hopefully noone will do that?
76 */
77 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
78 if (ret < 0)
79 printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
80 t->comm, t->pid, ret);
81 return ret;
82}
83
84/*
85 * Kill all processes that have a poisoned page mapped and then isolate
86 * the page.
87 *
88 * General strategy:
89 * Find all processes having the page mapped and kill them.
90 * But we keep a page reference around so that the page is not
91 * actually freed yet.
92 * Then stash the page away
93 *
94 * There's no convenient way to get back to mapped processes
95 * from the VMAs. So do a brute-force search over all
96 * running processes.
97 *
98 * Remember that machine checks are not common (or rather
99 * if they are common you have other problems), so this shouldn't
100 * be a performance issue.
101 *
102 * Also there are some races possible while we get from the
103 * error detection to actually handle it.
104 */
105
106struct to_kill {
107 struct list_head nd;
108 struct task_struct *tsk;
109 unsigned long addr;
110 unsigned addr_valid:1;
111};
112
113/*
114 * Failure handling: if we can't find or can't kill a process there's
115 * not much we can do. We just print a message and ignore otherwise.
116 */
117
118/*
119 * Schedule a process for later kill.
120 * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
121 * TBD would GFP_NOIO be enough?
122 */
123static void add_to_kill(struct task_struct *tsk, struct page *p,
124 struct vm_area_struct *vma,
125 struct list_head *to_kill,
126 struct to_kill **tkc)
127{
128 struct to_kill *tk;
129
130 if (*tkc) {
131 tk = *tkc;
132 *tkc = NULL;
133 } else {
134 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
135 if (!tk) {
136 printk(KERN_ERR
137 "MCE: Out of memory while machine check handling\n");
138 return;
139 }
140 }
141 tk->addr = page_address_in_vma(p, vma);
142 tk->addr_valid = 1;
143
144 /*
145 * In theory we don't have to kill when the page was
146 * munmaped. But it could be also a mremap. Since that's
147 * likely very rare kill anyways just out of paranoia, but use
148 * a SIGKILL because the error is not contained anymore.
149 */
150 if (tk->addr == -EFAULT) {
151 pr_debug("MCE: Unable to find user space address %lx in %s\n",
152 page_to_pfn(p), tsk->comm);
153 tk->addr_valid = 0;
154 }
155 get_task_struct(tsk);
156 tk->tsk = tsk;
157 list_add_tail(&tk->nd, to_kill);
158}
159
160/*
161 * Kill the processes that have been collected earlier.
162 *
163 * Only do anything when DOIT is set, otherwise just free the list
164 * (this is used for clean pages which do not need killing)
165 * Also when FAIL is set do a force kill because something went
166 * wrong earlier.
167 */
168static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
169 int fail, unsigned long pfn)
170{
171 struct to_kill *tk, *next;
172
173 list_for_each_entry_safe (tk, next, to_kill, nd) {
174 if (doit) {
175 /*
176 * In case something went wrong with munmaping
177 * make sure the process doesn't catch the
178 * signal and then access the memory. Just kill it.
179 * the signal handlers
180 */
181 if (fail || tk->addr_valid == 0) {
182 printk(KERN_ERR
183 "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
184 pfn, tk->tsk->comm, tk->tsk->pid);
185 force_sig(SIGKILL, tk->tsk);
186 }
187
188 /*
189 * In theory the process could have mapped
190 * something else on the address in-between. We could
191 * check for that, but we need to tell the
192 * process anyways.
193 */
194 else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
195 pfn) < 0)
196 printk(KERN_ERR
197 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
198 pfn, tk->tsk->comm, tk->tsk->pid);
199 }
200 put_task_struct(tk->tsk);
201 kfree(tk);
202 }
203}
204
205static int task_early_kill(struct task_struct *tsk)
206{
207 if (!tsk->mm)
208 return 0;
209 if (tsk->flags & PF_MCE_PROCESS)
210 return !!(tsk->flags & PF_MCE_EARLY);
211 return sysctl_memory_failure_early_kill;
212}
213
214/*
215 * Collect processes when the error hit an anonymous page.
216 */
217static void collect_procs_anon(struct page *page, struct list_head *to_kill,
218 struct to_kill **tkc)
219{
220 struct vm_area_struct *vma;
221 struct task_struct *tsk;
222 struct anon_vma *av;
223
224 read_lock(&tasklist_lock);
225 av = page_lock_anon_vma(page);
226 if (av == NULL) /* Not actually mapped anymore */
227 goto out;
228 for_each_process (tsk) {
229 if (!task_early_kill(tsk))
230 continue;
231 list_for_each_entry (vma, &av->head, anon_vma_node) {
232 if (!page_mapped_in_vma(page, vma))
233 continue;
234 if (vma->vm_mm == tsk->mm)
235 add_to_kill(tsk, page, vma, to_kill, tkc);
236 }
237 }
238 page_unlock_anon_vma(av);
239out:
240 read_unlock(&tasklist_lock);
241}
242
243/*
244 * Collect processes when the error hit a file mapped page.
245 */
246static void collect_procs_file(struct page *page, struct list_head *to_kill,
247 struct to_kill **tkc)
248{
249 struct vm_area_struct *vma;
250 struct task_struct *tsk;
251 struct prio_tree_iter iter;
252 struct address_space *mapping = page->mapping;
253
254 /*
255 * A note on the locking order between the two locks.
256 * We don't rely on this particular order.
257 * If you have some other code that needs a different order
258 * feel free to switch them around. Or add a reverse link
259 * from mm_struct to task_struct, then this could be all
260 * done without taking tasklist_lock and looping over all tasks.
261 */
262
263 read_lock(&tasklist_lock);
264 spin_lock(&mapping->i_mmap_lock);
265 for_each_process(tsk) {
266 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
267
268 if (!task_early_kill(tsk))
269 continue;
270
271 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
272 pgoff) {
273 /*
274 * Send early kill signal to tasks where a vma covers
275 * the page but the corrupted page is not necessarily
276 * mapped it in its pte.
277 * Assume applications who requested early kill want
278 * to be informed of all such data corruptions.
279 */
280 if (vma->vm_mm == tsk->mm)
281 add_to_kill(tsk, page, vma, to_kill, tkc);
282 }
283 }
284 spin_unlock(&mapping->i_mmap_lock);
285 read_unlock(&tasklist_lock);
286}
287
288/*
289 * Collect the processes who have the corrupted page mapped to kill.
290 * This is done in two steps for locking reasons.
291 * First preallocate one tokill structure outside the spin locks,
292 * so that we can kill at least one process reasonably reliable.
293 */
294static void collect_procs(struct page *page, struct list_head *tokill)
295{
296 struct to_kill *tk;
297
298 if (!page->mapping)
299 return;
300
301 tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
302 if (!tk)
303 return;
304 if (PageAnon(page))
305 collect_procs_anon(page, tokill, &tk);
306 else
307 collect_procs_file(page, tokill, &tk);
308 kfree(tk);
309}
310
311/*
312 * Error handlers for various types of pages.
313 */
314
315enum outcome {
316 FAILED, /* Error handling failed */
317 DELAYED, /* Will be handled later */
318 IGNORED, /* Error safely ignored */
319 RECOVERED, /* Successfully recovered */
320};
321
322static const char *action_name[] = {
323 [FAILED] = "Failed",
324 [DELAYED] = "Delayed",
325 [IGNORED] = "Ignored",
326 [RECOVERED] = "Recovered",
327};
328
329/*
330 * Error hit kernel page.
331 * Do nothing, try to be lucky and not touch this instead. For a few cases we
332 * could be more sophisticated.
333 */
334static int me_kernel(struct page *p, unsigned long pfn)
335{
336 return DELAYED;
337}
338
339/*
340 * Already poisoned page.
341 */
342static int me_ignore(struct page *p, unsigned long pfn)
343{
344 return IGNORED;
345}
346
347/*
348 * Page in unknown state. Do nothing.
349 */
350static int me_unknown(struct page *p, unsigned long pfn)
351{
352 printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
353 return FAILED;
354}
355
356/*
357 * Free memory
358 */
359static int me_free(struct page *p, unsigned long pfn)
360{
361 return DELAYED;
362}
363
364/*
365 * Clean (or cleaned) page cache page.
366 */
367static int me_pagecache_clean(struct page *p, unsigned long pfn)
368{
369 int err;
370 int ret = FAILED;
371 struct address_space *mapping;
372
373 if (!isolate_lru_page(p))
374 page_cache_release(p);
375
376 /*
377 * For anonymous pages we're done the only reference left
378 * should be the one m_f() holds.
379 */
380 if (PageAnon(p))
381 return RECOVERED;
382
383 /*
384 * Now truncate the page in the page cache. This is really
385 * more like a "temporary hole punch"
386 * Don't do this for block devices when someone else
387 * has a reference, because it could be file system metadata
388 * and that's not safe to truncate.
389 */
390 mapping = page_mapping(p);
391 if (!mapping) {
392 /*
393 * Page has been teared down in the meanwhile
394 */
395 return FAILED;
396 }
397
398 /*
399 * Truncation is a bit tricky. Enable it per file system for now.
400 *
401 * Open: to take i_mutex or not for this? Right now we don't.
402 */
403 if (mapping->a_ops->error_remove_page) {
404 err = mapping->a_ops->error_remove_page(mapping, p);
405 if (err != 0) {
406 printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
407 pfn, err);
408 } else if (page_has_private(p) &&
409 !try_to_release_page(p, GFP_NOIO)) {
410 pr_debug("MCE %#lx: failed to release buffers\n", pfn);
411 } else {
412 ret = RECOVERED;
413 }
414 } else {
415 /*
416 * If the file system doesn't support it just invalidate
417 * This fails on dirty or anything with private pages
418 */
419 if (invalidate_inode_page(p))
420 ret = RECOVERED;
421 else
422 printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
423 pfn);
424 }
425 return ret;
426}
427
428/*
429 * Dirty cache page page
430 * Issues: when the error hit a hole page the error is not properly
431 * propagated.
432 */
433static int me_pagecache_dirty(struct page *p, unsigned long pfn)
434{
435 struct address_space *mapping = page_mapping(p);
436
437 SetPageError(p);
438 /* TBD: print more information about the file. */
439 if (mapping) {
440 /*
441 * IO error will be reported by write(), fsync(), etc.
442 * who check the mapping.
443 * This way the application knows that something went
444 * wrong with its dirty file data.
445 *
446 * There's one open issue:
447 *
448 * The EIO will be only reported on the next IO
449 * operation and then cleared through the IO map.
450 * Normally Linux has two mechanisms to pass IO error
451 * first through the AS_EIO flag in the address space
452 * and then through the PageError flag in the page.
453 * Since we drop pages on memory failure handling the
454 * only mechanism open to use is through AS_AIO.
455 *
456 * This has the disadvantage that it gets cleared on
457 * the first operation that returns an error, while
458 * the PageError bit is more sticky and only cleared
459 * when the page is reread or dropped. If an
460 * application assumes it will always get error on
461 * fsync, but does other operations on the fd before
462 * and the page is dropped inbetween then the error
463 * will not be properly reported.
464 *
465 * This can already happen even without hwpoisoned
466 * pages: first on metadata IO errors (which only
467 * report through AS_EIO) or when the page is dropped
468 * at the wrong time.
469 *
470 * So right now we assume that the application DTRT on
471 * the first EIO, but we're not worse than other parts
472 * of the kernel.
473 */
474 mapping_set_error(mapping, EIO);
475 }
476
477 return me_pagecache_clean(p, pfn);
478}
479
480/*
481 * Clean and dirty swap cache.
482 *
483 * Dirty swap cache page is tricky to handle. The page could live both in page
484 * cache and swap cache(ie. page is freshly swapped in). So it could be
485 * referenced concurrently by 2 types of PTEs:
486 * normal PTEs and swap PTEs. We try to handle them consistently by calling
487 * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
488 * and then
489 * - clear dirty bit to prevent IO
490 * - remove from LRU
491 * - but keep in the swap cache, so that when we return to it on
492 * a later page fault, we know the application is accessing
493 * corrupted data and shall be killed (we installed simple
494 * interception code in do_swap_page to catch it).
495 *
496 * Clean swap cache pages can be directly isolated. A later page fault will
497 * bring in the known good data from disk.
498 */
499static int me_swapcache_dirty(struct page *p, unsigned long pfn)
500{
501 int ret = FAILED;
502
503 ClearPageDirty(p);
504 /* Trigger EIO in shmem: */
505 ClearPageUptodate(p);
506
507 if (!isolate_lru_page(p)) {
508 page_cache_release(p);
509 ret = DELAYED;
510 }
511
512 return ret;
513}
514
515static int me_swapcache_clean(struct page *p, unsigned long pfn)
516{
517 int ret = FAILED;
518
519 if (!isolate_lru_page(p)) {
520 page_cache_release(p);
521 ret = RECOVERED;
522 }
523 delete_from_swap_cache(p);
524 return ret;
525}
526
527/*
528 * Huge pages. Needs work.
529 * Issues:
530 * No rmap support so we cannot find the original mapper. In theory could walk
531 * all MMs and look for the mappings, but that would be non atomic and racy.
532 * Need rmap for hugepages for this. Alternatively we could employ a heuristic,
533 * like just walking the current process and hoping it has it mapped (that
534 * should be usually true for the common "shared database cache" case)
535 * Should handle free huge pages and dequeue them too, but this needs to
536 * handle huge page accounting correctly.
537 */
538static int me_huge_page(struct page *p, unsigned long pfn)
539{
540 return FAILED;
541}
542
543/*
544 * Various page states we can handle.
545 *
546 * A page state is defined by its current page->flags bits.
547 * The table matches them in order and calls the right handler.
548 *
549 * This is quite tricky because we can access page at any time
550 * in its live cycle, so all accesses have to be extremly careful.
551 *
552 * This is not complete. More states could be added.
553 * For any missing state don't attempt recovery.
554 */
555
556#define dirty (1UL << PG_dirty)
557#define sc (1UL << PG_swapcache)
558#define unevict (1UL << PG_unevictable)
559#define mlock (1UL << PG_mlocked)
560#define writeback (1UL << PG_writeback)
561#define lru (1UL << PG_lru)
562#define swapbacked (1UL << PG_swapbacked)
563#define head (1UL << PG_head)
564#define tail (1UL << PG_tail)
565#define compound (1UL << PG_compound)
566#define slab (1UL << PG_slab)
567#define buddy (1UL << PG_buddy)
568#define reserved (1UL << PG_reserved)
569
570static struct page_state {
571 unsigned long mask;
572 unsigned long res;
573 char *msg;
574 int (*action)(struct page *p, unsigned long pfn);
575} error_states[] = {
576 { reserved, reserved, "reserved kernel", me_ignore },
577 { buddy, buddy, "free kernel", me_free },
578
579 /*
580 * Could in theory check if slab page is free or if we can drop
581 * currently unused objects without touching them. But just
582 * treat it as standard kernel for now.
583 */
584 { slab, slab, "kernel slab", me_kernel },
585
586#ifdef CONFIG_PAGEFLAGS_EXTENDED
587 { head, head, "huge", me_huge_page },
588 { tail, tail, "huge", me_huge_page },
589#else
590 { compound, compound, "huge", me_huge_page },
591#endif
592
593 { sc|dirty, sc|dirty, "swapcache", me_swapcache_dirty },
594 { sc|dirty, sc, "swapcache", me_swapcache_clean },
595
596 { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty},
597 { unevict, unevict, "unevictable LRU", me_pagecache_clean},
598
599#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
600 { mlock|dirty, mlock|dirty, "mlocked LRU", me_pagecache_dirty },
601 { mlock, mlock, "mlocked LRU", me_pagecache_clean },
602#endif
603
604 { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty },
605 { lru|dirty, lru, "clean LRU", me_pagecache_clean },
606 { swapbacked, swapbacked, "anonymous", me_pagecache_clean },
607
608 /*
609 * Catchall entry: must be at end.
610 */
611 { 0, 0, "unknown page state", me_unknown },
612};
613
614#undef lru
615
616static void action_result(unsigned long pfn, char *msg, int result)
617{
618 struct page *page = NULL;
619 if (pfn_valid(pfn))
620 page = pfn_to_page(pfn);
621
622 printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
623 pfn,
624 page && PageDirty(page) ? "dirty " : "",
625 msg, action_name[result]);
626}
627
628static int page_action(struct page_state *ps, struct page *p,
629 unsigned long pfn, int ref)
630{
631 int result;
632
633 result = ps->action(p, pfn);
634 action_result(pfn, ps->msg, result);
635 if (page_count(p) != 1 + ref)
636 printk(KERN_ERR
637 "MCE %#lx: %s page still referenced by %d users\n",
638 pfn, ps->msg, page_count(p) - 1);
639
640 /* Could do more checks here if page looks ok */
641 /*
642 * Could adjust zone counters here to correct for the missing page.
643 */
644
645 return result == RECOVERED ? 0 : -EBUSY;
646}
647
648#define N_UNMAP_TRIES 5
649
650/*
651 * Do all that is necessary to remove user space mappings. Unmap
652 * the pages and send SIGBUS to the processes if the data was dirty.
653 */
654static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
655 int trapno)
656{
657 enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
658 struct address_space *mapping;
659 LIST_HEAD(tokill);
660 int ret;
661 int i;
662 int kill = 1;
663
664 if (PageReserved(p) || PageCompound(p) || PageSlab(p))
665 return;
666
667 if (!PageLRU(p))
668 lru_add_drain_all();
669
670 /*
671 * This check implies we don't kill processes if their pages
672 * are in the swap cache early. Those are always late kills.
673 */
674 if (!page_mapped(p))
675 return;
676
677 if (PageSwapCache(p)) {
678 printk(KERN_ERR
679 "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
680 ttu |= TTU_IGNORE_HWPOISON;
681 }
682
683 /*
684 * Propagate the dirty bit from PTEs to struct page first, because we
685 * need this to decide if we should kill or just drop the page.
686 */
687 mapping = page_mapping(p);
688 if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) {
689 if (page_mkclean(p)) {
690 SetPageDirty(p);
691 } else {
692 kill = 0;
693 ttu |= TTU_IGNORE_HWPOISON;
694 printk(KERN_INFO
695 "MCE %#lx: corrupted page was clean: dropped without side effects\n",
696 pfn);
697 }
698 }
699
700 /*
701 * First collect all the processes that have the page
702 * mapped in dirty form. This has to be done before try_to_unmap,
703 * because ttu takes the rmap data structures down.
704 *
705 * Error handling: We ignore errors here because
706 * there's nothing that can be done.
707 */
708 if (kill)
709 collect_procs(p, &tokill);
710
711 /*
712 * try_to_unmap can fail temporarily due to races.
713 * Try a few times (RED-PEN better strategy?)
714 */
715 for (i = 0; i < N_UNMAP_TRIES; i++) {
716 ret = try_to_unmap(p, ttu);
717 if (ret == SWAP_SUCCESS)
718 break;
719 pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret);
720 }
721
722 if (ret != SWAP_SUCCESS)
723 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
724 pfn, page_mapcount(p));
725
726 /*
727 * Now that the dirty bit has been propagated to the
728 * struct page and all unmaps done we can decide if
729 * killing is needed or not. Only kill when the page
730 * was dirty, otherwise the tokill list is merely
731 * freed. When there was a problem unmapping earlier
732 * use a more force-full uncatchable kill to prevent
733 * any accesses to the poisoned memory.
734 */
735 kill_procs_ao(&tokill, !!PageDirty(p), trapno,
736 ret != SWAP_SUCCESS, pfn);
737}
738
739int __memory_failure(unsigned long pfn, int trapno, int ref)
740{
741 struct page_state *ps;
742 struct page *p;
743 int res;
744
745 if (!sysctl_memory_failure_recovery)
746 panic("Memory failure from trap %d on page %lx", trapno, pfn);
747
748 if (!pfn_valid(pfn)) {
749 action_result(pfn, "memory outside kernel control", IGNORED);
750 return -EIO;
751 }
752
753 p = pfn_to_page(pfn);
754 if (TestSetPageHWPoison(p)) {
755 action_result(pfn, "already hardware poisoned", IGNORED);
756 return 0;
757 }
758
759 atomic_long_add(1, &mce_bad_pages);
760
761 /*
762 * We need/can do nothing about count=0 pages.
763 * 1) it's a free page, and therefore in safe hand:
764 * prep_new_page() will be the gate keeper.
765 * 2) it's part of a non-compound high order page.
766 * Implies some kernel user: cannot stop them from
767 * R/W the page; let's pray that the page has been
768 * used and will be freed some time later.
769 * In fact it's dangerous to directly bump up page count from 0,
770 * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
771 */
772 if (!get_page_unless_zero(compound_head(p))) {
773 action_result(pfn, "free or high order kernel", IGNORED);
774 return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
775 }
776
777 /*
778 * Lock the page and wait for writeback to finish.
779 * It's very difficult to mess with pages currently under IO
780 * and in many cases impossible, so we just avoid it here.
781 */
782 lock_page_nosync(p);
783 wait_on_page_writeback(p);
784
785 /*
786 * Now take care of user space mappings.
787 */
788 hwpoison_user_mappings(p, pfn, trapno);
789
790 /*
791 * Torn down by someone else?
792 */
793 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
794 action_result(pfn, "already truncated LRU", IGNORED);
795 res = 0;
796 goto out;
797 }
798
799 res = -EBUSY;
800 for (ps = error_states;; ps++) {
801 if ((p->flags & ps->mask) == ps->res) {
802 res = page_action(ps, p, pfn, ref);
803 break;
804 }
805 }
806out:
807 unlock_page(p);
808 return res;
809}
810EXPORT_SYMBOL_GPL(__memory_failure);
811
812/**
813 * memory_failure - Handle memory failure of a page.
814 * @pfn: Page Number of the corrupted page
815 * @trapno: Trap number reported in the signal to user space.
816 *
817 * This function is called by the low level machine check code
818 * of an architecture when it detects hardware memory corruption
819 * of a page. It tries its best to recover, which includes
820 * dropping pages, killing processes etc.
821 *
822 * The function is primarily of use for corruptions that
823 * happen outside the current execution context (e.g. when
824 * detected by a background scrubber)
825 *
826 * Must run in process context (e.g. a work queue) with interrupts
827 * enabled and no spinlocks hold.
828 */
829void memory_failure(unsigned long pfn, int trapno)
830{
831 __memory_failure(pfn, trapno, 0);
832}
diff --git a/mm/memory.c b/mm/memory.c
index b1443ac07c00..7e91b5f9f690 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
297 unsigned long addr = vma->vm_start; 297 unsigned long addr = vma->vm_start;
298 298
299 /* 299 /*
300 * Hide vma from rmap and vmtruncate before freeing pgtables 300 * Hide vma from rmap and truncate_pagecache before freeing
301 * pgtables
301 */ 302 */
302 anon_vma_unlink(vma); 303 anon_vma_unlink(vma);
303 unlink_file_vma(vma); 304 unlink_file_vma(vma);
@@ -1325,7 +1326,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1325 if (ret & VM_FAULT_ERROR) { 1326 if (ret & VM_FAULT_ERROR) {
1326 if (ret & VM_FAULT_OOM) 1327 if (ret & VM_FAULT_OOM)
1327 return i ? i : -ENOMEM; 1328 return i ? i : -ENOMEM;
1328 else if (ret & VM_FAULT_SIGBUS) 1329 if (ret &
1330 (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
1329 return i ? i : -EFAULT; 1331 return i ? i : -EFAULT;
1330 BUG(); 1332 BUG();
1331 } 1333 }
@@ -2407,7 +2409,7 @@ restart:
2407 * @mapping: the address space containing mmaps to be unmapped. 2409 * @mapping: the address space containing mmaps to be unmapped.
2408 * @holebegin: byte in first page to unmap, relative to the start of 2410 * @holebegin: byte in first page to unmap, relative to the start of
2409 * the underlying file. This will be rounded down to a PAGE_SIZE 2411 * the underlying file. This will be rounded down to a PAGE_SIZE
2410 * boundary. Note that this is different from vmtruncate(), which 2412 * boundary. Note that this is different from truncate_pagecache(), which
2411 * must keep the partial page. In contrast, we must get rid of 2413 * must keep the partial page. In contrast, we must get rid of
2412 * partial pages. 2414 * partial pages.
2413 * @holelen: size of prospective hole in bytes. This will be rounded 2415 * @holelen: size of prospective hole in bytes. This will be rounded
@@ -2458,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping,
2458} 2460}
2459EXPORT_SYMBOL(unmap_mapping_range); 2461EXPORT_SYMBOL(unmap_mapping_range);
2460 2462
2461/**
2462 * vmtruncate - unmap mappings "freed" by truncate() syscall
2463 * @inode: inode of the file used
2464 * @offset: file offset to start truncating
2465 *
2466 * NOTE! We have to be ready to update the memory sharing
2467 * between the file and the memory map for a potential last
2468 * incomplete page. Ugly, but necessary.
2469 */
2470int vmtruncate(struct inode * inode, loff_t offset)
2471{
2472 if (inode->i_size < offset) {
2473 unsigned long limit;
2474
2475 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
2476 if (limit != RLIM_INFINITY && offset > limit)
2477 goto out_sig;
2478 if (offset > inode->i_sb->s_maxbytes)
2479 goto out_big;
2480 i_size_write(inode, offset);
2481 } else {
2482 struct address_space *mapping = inode->i_mapping;
2483
2484 /*
2485 * truncation of in-use swapfiles is disallowed - it would
2486 * cause subsequent swapout to scribble on the now-freed
2487 * blocks.
2488 */
2489 if (IS_SWAPFILE(inode))
2490 return -ETXTBSY;
2491 i_size_write(inode, offset);
2492
2493 /*
2494 * unmap_mapping_range is called twice, first simply for
2495 * efficiency so that truncate_inode_pages does fewer
2496 * single-page unmaps. However after this first call, and
2497 * before truncate_inode_pages finishes, it is possible for
2498 * private pages to be COWed, which remain after
2499 * truncate_inode_pages finishes, hence the second
2500 * unmap_mapping_range call must be made for correctness.
2501 */
2502 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2503 truncate_inode_pages(mapping, offset);
2504 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2505 }
2506
2507 if (inode->i_op->truncate)
2508 inode->i_op->truncate(inode);
2509 return 0;
2510
2511out_sig:
2512 send_sig(SIGXFSZ, current, 0);
2513out_big:
2514 return -EFBIG;
2515}
2516EXPORT_SYMBOL(vmtruncate);
2517
2518int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) 2463int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
2519{ 2464{
2520 struct address_space *mapping = inode->i_mapping; 2465 struct address_space *mapping = inode->i_mapping;
@@ -2559,8 +2504,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2559 goto out; 2504 goto out;
2560 2505
2561 entry = pte_to_swp_entry(orig_pte); 2506 entry = pte_to_swp_entry(orig_pte);
2562 if (is_migration_entry(entry)) { 2507 if (unlikely(non_swap_entry(entry))) {
2563 migration_entry_wait(mm, pmd, address); 2508 if (is_migration_entry(entry)) {
2509 migration_entry_wait(mm, pmd, address);
2510 } else if (is_hwpoison_entry(entry)) {
2511 ret = VM_FAULT_HWPOISON;
2512 } else {
2513 print_bad_pte(vma, address, orig_pte, NULL);
2514 ret = VM_FAULT_OOM;
2515 }
2564 goto out; 2516 goto out;
2565 } 2517 }
2566 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 2518 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
@@ -2584,6 +2536,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2584 /* Had to read the page from swap area: Major fault */ 2536 /* Had to read the page from swap area: Major fault */
2585 ret = VM_FAULT_MAJOR; 2537 ret = VM_FAULT_MAJOR;
2586 count_vm_event(PGMAJFAULT); 2538 count_vm_event(PGMAJFAULT);
2539 } else if (PageHWPoison(page)) {
2540 ret = VM_FAULT_HWPOISON;
2541 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2542 goto out;
2587 } 2543 }
2588 2544
2589 lock_page(page); 2545 lock_page(page);
@@ -2760,6 +2716,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2760 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) 2716 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
2761 return ret; 2717 return ret;
2762 2718
2719 if (unlikely(PageHWPoison(vmf.page))) {
2720 if (ret & VM_FAULT_LOCKED)
2721 unlock_page(vmf.page);
2722 return VM_FAULT_HWPOISON;
2723 }
2724
2763 /* 2725 /*
2764 * For consistency in subsequent calls, make the faulted page always 2726 * For consistency in subsequent calls, make the faulted page always
2765 * locked. 2727 * locked.
diff --git a/mm/migrate.c b/mm/migrate.c
index 16052e80aaac..1a4bf4813780 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
675 } 675 }
676 676
677 /* Establish migration ptes or remove ptes */ 677 /* Establish migration ptes or remove ptes */
678 try_to_unmap(page, 1); 678 try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
679 679
680skip_unmap: 680skip_unmap:
681 if (!page_mapped(page)) 681 if (!page_mapped(page))
diff --git a/mm/mremap.c b/mm/mremap.c
index 20a07dba6be0..97bff2547719 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
86 if (vma->vm_file) { 86 if (vma->vm_file) {
87 /* 87 /*
88 * Subtle point from Rajesh Venkatasubramanian: before 88 * Subtle point from Rajesh Venkatasubramanian: before
89 * moving file-based ptes, we must lock vmtruncate out, 89 * moving file-based ptes, we must lock truncate_pagecache
90 * since it might clean the dst vma before the src vma, 90 * out, since it might clean the dst vma before the src vma,
91 * and we propagate stale pages into the dst afterward. 91 * and we propagate stale pages into the dst afterward.
92 */ 92 */
93 mapping = vma->vm_file->f_mapping; 93 mapping = vma->vm_file->f_mapping;
diff --git a/mm/nommu.c b/mm/nommu.c
index 8d484241d034..c73aa4753d79 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -83,46 +83,6 @@ struct vm_operations_struct generic_file_vm_ops = {
83}; 83};
84 84
85/* 85/*
86 * Handle all mappings that got truncated by a "truncate()"
87 * system call.
88 *
89 * NOTE! We have to be ready to update the memory sharing
90 * between the file and the memory map for a potential last
91 * incomplete page. Ugly, but necessary.
92 */
93int vmtruncate(struct inode *inode, loff_t offset)
94{
95 struct address_space *mapping = inode->i_mapping;
96 unsigned long limit;
97
98 if (inode->i_size < offset)
99 goto do_expand;
100 i_size_write(inode, offset);
101
102 truncate_inode_pages(mapping, offset);
103 goto out_truncate;
104
105do_expand:
106 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
107 if (limit != RLIM_INFINITY && offset > limit)
108 goto out_sig;
109 if (offset > inode->i_sb->s_maxbytes)
110 goto out;
111 i_size_write(inode, offset);
112
113out_truncate:
114 if (inode->i_op->truncate)
115 inode->i_op->truncate(inode);
116 return 0;
117out_sig:
118 send_sig(SIGXFSZ, current, 0);
119out:
120 return -EFBIG;
121}
122
123EXPORT_SYMBOL(vmtruncate);
124
125/*
126 * Return the total memory allocated for this pointer, not 86 * Return the total memory allocated for this pointer, not
127 * just what the caller asked for. 87 * just what the caller asked for.
128 * 88 *
@@ -866,7 +826,7 @@ static int validate_mmap_request(struct file *file,
866 int ret; 826 int ret;
867 827
868 /* do the simple checks first */ 828 /* do the simple checks first */
869 if (flags & MAP_FIXED || addr) { 829 if (flags & MAP_FIXED) {
870 printk(KERN_DEBUG 830 printk(KERN_DEBUG
871 "%d: Can't do fixed-address/overlay mmap of RAM\n", 831 "%d: Can't do fixed-address/overlay mmap of RAM\n",
872 current->pid); 832 current->pid);
@@ -1074,7 +1034,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
1074 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1034 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1075 if (ret == 0) { 1035 if (ret == 0) {
1076 vma->vm_region->vm_top = vma->vm_region->vm_end; 1036 vma->vm_region->vm_top = vma->vm_region->vm_end;
1077 return ret; 1037 return 0;
1078 } 1038 }
1079 if (ret != -ENOSYS) 1039 if (ret != -ENOSYS)
1080 return ret; 1040 return ret;
@@ -1091,7 +1051,8 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
1091 */ 1051 */
1092static int do_mmap_private(struct vm_area_struct *vma, 1052static int do_mmap_private(struct vm_area_struct *vma,
1093 struct vm_region *region, 1053 struct vm_region *region,
1094 unsigned long len) 1054 unsigned long len,
1055 unsigned long capabilities)
1095{ 1056{
1096 struct page *pages; 1057 struct page *pages;
1097 unsigned long total, point, n, rlen; 1058 unsigned long total, point, n, rlen;
@@ -1102,13 +1063,13 @@ static int do_mmap_private(struct vm_area_struct *vma,
1102 * shared mappings on devices or memory 1063 * shared mappings on devices or memory
1103 * - VM_MAYSHARE will be set if it may attempt to share 1064 * - VM_MAYSHARE will be set if it may attempt to share
1104 */ 1065 */
1105 if (vma->vm_file) { 1066 if (capabilities & BDI_CAP_MAP_DIRECT) {
1106 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1067 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1107 if (ret == 0) { 1068 if (ret == 0) {
1108 /* shouldn't return success if we're not sharing */ 1069 /* shouldn't return success if we're not sharing */
1109 BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); 1070 BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
1110 vma->vm_region->vm_top = vma->vm_region->vm_end; 1071 vma->vm_region->vm_top = vma->vm_region->vm_end;
1111 return ret; 1072 return 0;
1112 } 1073 }
1113 if (ret != -ENOSYS) 1074 if (ret != -ENOSYS)
1114 return ret; 1075 return ret;
@@ -1221,9 +1182,6 @@ unsigned long do_mmap_pgoff(struct file *file,
1221 1182
1222 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); 1183 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
1223 1184
1224 if (!(flags & MAP_FIXED))
1225 addr = round_hint_to_min(addr);
1226
1227 /* decide whether we should attempt the mapping, and if so what sort of 1185 /* decide whether we should attempt the mapping, and if so what sort of
1228 * mapping */ 1186 * mapping */
1229 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1187 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
@@ -1233,6 +1191,9 @@ unsigned long do_mmap_pgoff(struct file *file,
1233 return ret; 1191 return ret;
1234 } 1192 }
1235 1193
1194 /* we ignore the address hint */
1195 addr = 0;
1196
1236 /* we've determined that we can make the mapping, now translate what we 1197 /* we've determined that we can make the mapping, now translate what we
1237 * now know into VMA flags */ 1198 * now know into VMA flags */
1238 vm_flags = determine_vm_flags(file, prot, flags, capabilities); 1199 vm_flags = determine_vm_flags(file, prot, flags, capabilities);
@@ -1346,7 +1307,7 @@ unsigned long do_mmap_pgoff(struct file *file,
1346 * - this is the hook for quasi-memory character devices to 1307 * - this is the hook for quasi-memory character devices to
1347 * tell us the location of a shared mapping 1308 * tell us the location of a shared mapping
1348 */ 1309 */
1349 if (file && file->f_op->get_unmapped_area) { 1310 if (capabilities & BDI_CAP_MAP_DIRECT) {
1350 addr = file->f_op->get_unmapped_area(file, addr, len, 1311 addr = file->f_op->get_unmapped_area(file, addr, len,
1351 pgoff, flags); 1312 pgoff, flags);
1352 if (IS_ERR((void *) addr)) { 1313 if (IS_ERR((void *) addr)) {
@@ -1370,15 +1331,17 @@ unsigned long do_mmap_pgoff(struct file *file,
1370 } 1331 }
1371 1332
1372 vma->vm_region = region; 1333 vma->vm_region = region;
1373 add_nommu_region(region);
1374 1334
1375 /* set up the mapping */ 1335 /* set up the mapping
1336 * - the region is filled in if BDI_CAP_MAP_DIRECT is still set
1337 */
1376 if (file && vma->vm_flags & VM_SHARED) 1338 if (file && vma->vm_flags & VM_SHARED)
1377 ret = do_mmap_shared_file(vma); 1339 ret = do_mmap_shared_file(vma);
1378 else 1340 else
1379 ret = do_mmap_private(vma, region, len); 1341 ret = do_mmap_private(vma, region, len, capabilities);
1380 if (ret < 0) 1342 if (ret < 0)
1381 goto error_put_region; 1343 goto error_just_free;
1344 add_nommu_region(region);
1382 1345
1383 /* okay... we have a mapping; now we have to register it */ 1346 /* okay... we have a mapping; now we have to register it */
1384 result = vma->vm_start; 1347 result = vma->vm_start;
@@ -1396,19 +1359,6 @@ share:
1396 kleave(" = %lx", result); 1359 kleave(" = %lx", result);
1397 return result; 1360 return result;
1398 1361
1399error_put_region:
1400 __put_nommu_region(region);
1401 if (vma) {
1402 if (vma->vm_file) {
1403 fput(vma->vm_file);
1404 if (vma->vm_flags & VM_EXECUTABLE)
1405 removed_exe_file_vma(vma->vm_mm);
1406 }
1407 kmem_cache_free(vm_area_cachep, vma);
1408 }
1409 kleave(" = %d [pr]", ret);
1410 return ret;
1411
1412error_just_free: 1362error_just_free:
1413 up_write(&nommu_region_sem); 1363 up_write(&nommu_region_sem);
1414error: 1364error:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5f378dd58802..d99664e8607e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -155,37 +155,37 @@ static void update_completion_period(void)
155} 155}
156 156
157int dirty_background_ratio_handler(struct ctl_table *table, int write, 157int dirty_background_ratio_handler(struct ctl_table *table, int write,
158 struct file *filp, void __user *buffer, size_t *lenp, 158 void __user *buffer, size_t *lenp,
159 loff_t *ppos) 159 loff_t *ppos)
160{ 160{
161 int ret; 161 int ret;
162 162
163 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 163 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
164 if (ret == 0 && write) 164 if (ret == 0 && write)
165 dirty_background_bytes = 0; 165 dirty_background_bytes = 0;
166 return ret; 166 return ret;
167} 167}
168 168
169int dirty_background_bytes_handler(struct ctl_table *table, int write, 169int dirty_background_bytes_handler(struct ctl_table *table, int write,
170 struct file *filp, void __user *buffer, size_t *lenp, 170 void __user *buffer, size_t *lenp,
171 loff_t *ppos) 171 loff_t *ppos)
172{ 172{
173 int ret; 173 int ret;
174 174
175 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); 175 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
176 if (ret == 0 && write) 176 if (ret == 0 && write)
177 dirty_background_ratio = 0; 177 dirty_background_ratio = 0;
178 return ret; 178 return ret;
179} 179}
180 180
181int dirty_ratio_handler(struct ctl_table *table, int write, 181int dirty_ratio_handler(struct ctl_table *table, int write,
182 struct file *filp, void __user *buffer, size_t *lenp, 182 void __user *buffer, size_t *lenp,
183 loff_t *ppos) 183 loff_t *ppos)
184{ 184{
185 int old_ratio = vm_dirty_ratio; 185 int old_ratio = vm_dirty_ratio;
186 int ret; 186 int ret;
187 187
188 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 188 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
189 if (ret == 0 && write && vm_dirty_ratio != old_ratio) { 189 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
190 update_completion_period(); 190 update_completion_period();
191 vm_dirty_bytes = 0; 191 vm_dirty_bytes = 0;
@@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
195 195
196 196
197int dirty_bytes_handler(struct ctl_table *table, int write, 197int dirty_bytes_handler(struct ctl_table *table, int write,
198 struct file *filp, void __user *buffer, size_t *lenp, 198 void __user *buffer, size_t *lenp,
199 loff_t *ppos) 199 loff_t *ppos)
200{ 200{
201 unsigned long old_bytes = vm_dirty_bytes; 201 unsigned long old_bytes = vm_dirty_bytes;
202 int ret; 202 int ret;
203 203
204 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); 204 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
205 if (ret == 0 && write && vm_dirty_bytes != old_bytes) { 205 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
206 update_completion_period(); 206 update_completion_period();
207 vm_dirty_ratio = 0; 207 vm_dirty_ratio = 0;
@@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
686 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs 686 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
687 */ 687 */
688int dirty_writeback_centisecs_handler(ctl_table *table, int write, 688int dirty_writeback_centisecs_handler(ctl_table *table, int write,
689 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 689 void __user *buffer, size_t *length, loff_t *ppos)
690{ 690{
691 proc_dointvec(table, write, file, buffer, length, ppos); 691 proc_dointvec(table, write, buffer, length, ppos);
692 return 0; 692 return 0;
693} 693}
694 694
@@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
1149EXPORT_SYMBOL(redirty_page_for_writepage); 1149EXPORT_SYMBOL(redirty_page_for_writepage);
1150 1150
1151/* 1151/*
1152 * Dirty a page.
1153 *
1154 * For pages with a mapping this should be done under the page lock
1155 * for the benefit of asynchronous memory errors who prefer a consistent
1156 * dirty state. This rule can be broken in some special cases,
1157 * but should be better not to.
1158 *
1152 * If the mapping doesn't provide a set_page_dirty a_op, then 1159 * If the mapping doesn't provide a set_page_dirty a_op, then
1153 * just fall through and assume that it wants buffer_heads. 1160 * just fall through and assume that it wants buffer_heads.
1154 */ 1161 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5717f27a0704..bf720550b44d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -234,6 +234,12 @@ static void bad_page(struct page *page)
234 static unsigned long nr_shown; 234 static unsigned long nr_shown;
235 static unsigned long nr_unshown; 235 static unsigned long nr_unshown;
236 236
237 /* Don't complain about poisoned pages */
238 if (PageHWPoison(page)) {
239 __ClearPageBuddy(page);
240 return;
241 }
242
237 /* 243 /*
238 * Allow a burst of 60 reports, then keep quiet for that minute; 244 * Allow a burst of 60 reports, then keep quiet for that minute;
239 * or allow a steady drip of one report per second. 245 * or allow a steady drip of one report per second.
@@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page,
666/* 672/*
667 * This page is about to be returned from the page allocator 673 * This page is about to be returned from the page allocator
668 */ 674 */
669static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) 675static inline int check_new_page(struct page *page)
670{ 676{
671 if (unlikely(page_mapcount(page) | 677 if (unlikely(page_mapcount(page) |
672 (page->mapping != NULL) | 678 (page->mapping != NULL) |
@@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
675 bad_page(page); 681 bad_page(page);
676 return 1; 682 return 1;
677 } 683 }
684 return 0;
685}
686
687static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
688{
689 int i;
690
691 for (i = 0; i < (1 << order); i++) {
692 struct page *p = page + i;
693 if (unlikely(check_new_page(p)))
694 return 1;
695 }
678 696
679 set_page_private(page, 0); 697 set_page_private(page, 0);
680 set_page_refcounted(page); 698 set_page_refcounted(page);
@@ -2373,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
2373 * sysctl handler for numa_zonelist_order 2391 * sysctl handler for numa_zonelist_order
2374 */ 2392 */
2375int numa_zonelist_order_handler(ctl_table *table, int write, 2393int numa_zonelist_order_handler(ctl_table *table, int write,
2376 struct file *file, void __user *buffer, size_t *length, 2394 void __user *buffer, size_t *length,
2377 loff_t *ppos) 2395 loff_t *ppos)
2378{ 2396{
2379 char saved_string[NUMA_ZONELIST_ORDER_LEN]; 2397 char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2382,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2382 if (write) 2400 if (write)
2383 strncpy(saved_string, (char*)table->data, 2401 strncpy(saved_string, (char*)table->data,
2384 NUMA_ZONELIST_ORDER_LEN); 2402 NUMA_ZONELIST_ORDER_LEN);
2385 ret = proc_dostring(table, write, file, buffer, length, ppos); 2403 ret = proc_dostring(table, write, buffer, length, ppos);
2386 if (ret) 2404 if (ret)
2387 return ret; 2405 return ret;
2388 if (write) { 2406 if (write) {
@@ -4706,9 +4724,9 @@ module_init(init_per_zone_wmark_min)
4706 * changes. 4724 * changes.
4707 */ 4725 */
4708int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 4726int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
4709 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4727 void __user *buffer, size_t *length, loff_t *ppos)
4710{ 4728{
4711 proc_dointvec(table, write, file, buffer, length, ppos); 4729 proc_dointvec(table, write, buffer, length, ppos);
4712 if (write) 4730 if (write)
4713 setup_per_zone_wmarks(); 4731 setup_per_zone_wmarks();
4714 return 0; 4732 return 0;
@@ -4716,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
4716 4734
4717#ifdef CONFIG_NUMA 4735#ifdef CONFIG_NUMA
4718int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, 4736int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
4719 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4737 void __user *buffer, size_t *length, loff_t *ppos)
4720{ 4738{
4721 struct zone *zone; 4739 struct zone *zone;
4722 int rc; 4740 int rc;
4723 4741
4724 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4742 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
4725 if (rc) 4743 if (rc)
4726 return rc; 4744 return rc;
4727 4745
@@ -4732,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
4732} 4750}
4733 4751
4734int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, 4752int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4735 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4753 void __user *buffer, size_t *length, loff_t *ppos)
4736{ 4754{
4737 struct zone *zone; 4755 struct zone *zone;
4738 int rc; 4756 int rc;
4739 4757
4740 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4758 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
4741 if (rc) 4759 if (rc)
4742 return rc; 4760 return rc;
4743 4761
@@ -4758,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4758 * if in function of the boot time zone sizes. 4776 * if in function of the boot time zone sizes.
4759 */ 4777 */
4760int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, 4778int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
4761 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4779 void __user *buffer, size_t *length, loff_t *ppos)
4762{ 4780{
4763 proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4781 proc_dointvec_minmax(table, write, buffer, length, ppos);
4764 setup_per_zone_lowmem_reserve(); 4782 setup_per_zone_lowmem_reserve();
4765 return 0; 4783 return 0;
4766} 4784}
@@ -4772,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
4772 */ 4790 */
4773 4791
4774int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, 4792int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4775 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4793 void __user *buffer, size_t *length, loff_t *ppos)
4776{ 4794{
4777 struct zone *zone; 4795 struct zone *zone;
4778 unsigned int cpu; 4796 unsigned int cpu;
4779 int ret; 4797 int ret;
4780 4798
4781 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4799 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
4782 if (!write || (ret == -EINVAL)) 4800 if (!write || (ret == -EINVAL))
4783 return ret; 4801 return ret;
4784 for_each_populated_zone(zone) { 4802 for_each_populated_zone(zone) {
diff --git a/mm/rmap.c b/mm/rmap.c
index 720fc03a7bc4..28aafe2b5306 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -36,6 +36,11 @@
36 * mapping->tree_lock (widely used, in set_page_dirty, 36 * mapping->tree_lock (widely used, in set_page_dirty,
37 * in arch-dependent flush_dcache_mmap_lock, 37 * in arch-dependent flush_dcache_mmap_lock,
38 * within inode_lock in __sync_single_inode) 38 * within inode_lock in __sync_single_inode)
39 *
40 * (code doesn't rely on that order so it could be switched around)
41 * ->tasklist_lock
42 * anon_vma->lock (memory_failure, collect_procs_anon)
43 * pte map lock
39 */ 44 */
40 45
41#include <linux/mm.h> 46#include <linux/mm.h>
@@ -191,7 +196,7 @@ void __init anon_vma_init(void)
191 * Getting a lock on a stable anon_vma from a page off the LRU is 196 * Getting a lock on a stable anon_vma from a page off the LRU is
192 * tricky: page_lock_anon_vma rely on RCU to guard against the races. 197 * tricky: page_lock_anon_vma rely on RCU to guard against the races.
193 */ 198 */
194static struct anon_vma *page_lock_anon_vma(struct page *page) 199struct anon_vma *page_lock_anon_vma(struct page *page)
195{ 200{
196 struct anon_vma *anon_vma; 201 struct anon_vma *anon_vma;
197 unsigned long anon_mapping; 202 unsigned long anon_mapping;
@@ -211,7 +216,7 @@ out:
211 return NULL; 216 return NULL;
212} 217}
213 218
214static void page_unlock_anon_vma(struct anon_vma *anon_vma) 219void page_unlock_anon_vma(struct anon_vma *anon_vma)
215{ 220{
216 spin_unlock(&anon_vma->lock); 221 spin_unlock(&anon_vma->lock);
217 rcu_read_unlock(); 222 rcu_read_unlock();
@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
311 * if the page is not mapped into the page tables of this VMA. Only 316 * if the page is not mapped into the page tables of this VMA. Only
312 * valid for normal file or anonymous VMAs. 317 * valid for normal file or anonymous VMAs.
313 */ 318 */
314static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) 319int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
315{ 320{
316 unsigned long address; 321 unsigned long address;
317 pte_t *pte; 322 pte_t *pte;
@@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page)
756 * repeatedly from either try_to_unmap_anon or try_to_unmap_file. 761 * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
757 */ 762 */
758static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, 763static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
759 int migration) 764 enum ttu_flags flags)
760{ 765{
761 struct mm_struct *mm = vma->vm_mm; 766 struct mm_struct *mm = vma->vm_mm;
762 unsigned long address; 767 unsigned long address;
@@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
778 * If it's recently referenced (perhaps page_referenced 783 * If it's recently referenced (perhaps page_referenced
779 * skipped over this mm) then we should reactivate it. 784 * skipped over this mm) then we should reactivate it.
780 */ 785 */
781 if (!migration) { 786 if (!(flags & TTU_IGNORE_MLOCK)) {
782 if (vma->vm_flags & VM_LOCKED) { 787 if (vma->vm_flags & VM_LOCKED) {
783 ret = SWAP_MLOCK; 788 ret = SWAP_MLOCK;
784 goto out_unmap; 789 goto out_unmap;
785 } 790 }
791 }
792 if (!(flags & TTU_IGNORE_ACCESS)) {
786 if (ptep_clear_flush_young_notify(vma, address, pte)) { 793 if (ptep_clear_flush_young_notify(vma, address, pte)) {
787 ret = SWAP_FAIL; 794 ret = SWAP_FAIL;
788 goto out_unmap; 795 goto out_unmap;
@@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
800 /* Update high watermark before we lower rss */ 807 /* Update high watermark before we lower rss */
801 update_hiwater_rss(mm); 808 update_hiwater_rss(mm);
802 809
803 if (PageAnon(page)) { 810 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
811 if (PageAnon(page))
812 dec_mm_counter(mm, anon_rss);
813 else
814 dec_mm_counter(mm, file_rss);
815 set_pte_at(mm, address, pte,
816 swp_entry_to_pte(make_hwpoison_entry(page)));
817 } else if (PageAnon(page)) {
804 swp_entry_t entry = { .val = page_private(page) }; 818 swp_entry_t entry = { .val = page_private(page) };
805 819
806 if (PageSwapCache(page)) { 820 if (PageSwapCache(page)) {
@@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
822 * pte. do_swap_page() will wait until the migration 836 * pte. do_swap_page() will wait until the migration
823 * pte is removed and then restart fault handling. 837 * pte is removed and then restart fault handling.
824 */ 838 */
825 BUG_ON(!migration); 839 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
826 entry = make_migration_entry(page, pte_write(pteval)); 840 entry = make_migration_entry(page, pte_write(pteval));
827 } 841 }
828 set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); 842 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
829 BUG_ON(pte_file(*pte)); 843 BUG_ON(pte_file(*pte));
830 } else if (PAGE_MIGRATION && migration) { 844 } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
831 /* Establish migration entry for a file page */ 845 /* Establish migration entry for a file page */
832 swp_entry_t entry; 846 swp_entry_t entry;
833 entry = make_migration_entry(page, pte_write(pteval)); 847 entry = make_migration_entry(page, pte_write(pteval));
@@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
996 * vm_flags for that VMA. That should be OK, because that vma shouldn't be 1010 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
997 * 'LOCKED. 1011 * 'LOCKED.
998 */ 1012 */
999static int try_to_unmap_anon(struct page *page, int unlock, int migration) 1013static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1000{ 1014{
1001 struct anon_vma *anon_vma; 1015 struct anon_vma *anon_vma;
1002 struct vm_area_struct *vma; 1016 struct vm_area_struct *vma;
1003 unsigned int mlocked = 0; 1017 unsigned int mlocked = 0;
1004 int ret = SWAP_AGAIN; 1018 int ret = SWAP_AGAIN;
1019 int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
1005 1020
1006 if (MLOCK_PAGES && unlikely(unlock)) 1021 if (MLOCK_PAGES && unlikely(unlock))
1007 ret = SWAP_SUCCESS; /* default for try_to_munlock() */ 1022 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
@@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
1017 continue; /* must visit all unlocked vmas */ 1032 continue; /* must visit all unlocked vmas */
1018 ret = SWAP_MLOCK; /* saw at least one mlocked vma */ 1033 ret = SWAP_MLOCK; /* saw at least one mlocked vma */
1019 } else { 1034 } else {
1020 ret = try_to_unmap_one(page, vma, migration); 1035 ret = try_to_unmap_one(page, vma, flags);
1021 if (ret == SWAP_FAIL || !page_mapped(page)) 1036 if (ret == SWAP_FAIL || !page_mapped(page))
1022 break; 1037 break;
1023 } 1038 }
@@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
1041/** 1056/**
1042 * try_to_unmap_file - unmap/unlock file page using the object-based rmap method 1057 * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
1043 * @page: the page to unmap/unlock 1058 * @page: the page to unmap/unlock
1044 * @unlock: request for unlock rather than unmap [unlikely] 1059 * @flags: action and flags
1045 * @migration: unmapping for migration - ignored if @unlock
1046 * 1060 *
1047 * Find all the mappings of a page using the mapping pointer and the vma chains 1061 * Find all the mappings of a page using the mapping pointer and the vma chains
1048 * contained in the address_space struct it points to. 1062 * contained in the address_space struct it points to.
@@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
1054 * vm_flags for that VMA. That should be OK, because that vma shouldn't be 1068 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1055 * 'LOCKED. 1069 * 'LOCKED.
1056 */ 1070 */
1057static int try_to_unmap_file(struct page *page, int unlock, int migration) 1071static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1058{ 1072{
1059 struct address_space *mapping = page->mapping; 1073 struct address_space *mapping = page->mapping;
1060 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1074 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
1066 unsigned long max_nl_size = 0; 1080 unsigned long max_nl_size = 0;
1067 unsigned int mapcount; 1081 unsigned int mapcount;
1068 unsigned int mlocked = 0; 1082 unsigned int mlocked = 0;
1083 int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
1069 1084
1070 if (MLOCK_PAGES && unlikely(unlock)) 1085 if (MLOCK_PAGES && unlikely(unlock))
1071 ret = SWAP_SUCCESS; /* default for try_to_munlock() */ 1086 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
@@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
1078 continue; /* must visit all vmas */ 1093 continue; /* must visit all vmas */
1079 ret = SWAP_MLOCK; 1094 ret = SWAP_MLOCK;
1080 } else { 1095 } else {
1081 ret = try_to_unmap_one(page, vma, migration); 1096 ret = try_to_unmap_one(page, vma, flags);
1082 if (ret == SWAP_FAIL || !page_mapped(page)) 1097 if (ret == SWAP_FAIL || !page_mapped(page))
1083 goto out; 1098 goto out;
1084 } 1099 }
@@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
1103 ret = SWAP_MLOCK; /* leave mlocked == 0 */ 1118 ret = SWAP_MLOCK; /* leave mlocked == 0 */
1104 goto out; /* no need to look further */ 1119 goto out; /* no need to look further */
1105 } 1120 }
1106 if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED)) 1121 if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
1122 (vma->vm_flags & VM_LOCKED))
1107 continue; 1123 continue;
1108 cursor = (unsigned long) vma->vm_private_data; 1124 cursor = (unsigned long) vma->vm_private_data;
1109 if (cursor > max_nl_cursor) 1125 if (cursor > max_nl_cursor)
@@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
1137 do { 1153 do {
1138 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1154 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1139 shared.vm_set.list) { 1155 shared.vm_set.list) {
1140 if (!MLOCK_PAGES && !migration && 1156 if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
1141 (vma->vm_flags & VM_LOCKED)) 1157 (vma->vm_flags & VM_LOCKED))
1142 continue; 1158 continue;
1143 cursor = (unsigned long) vma->vm_private_data; 1159 cursor = (unsigned long) vma->vm_private_data;
@@ -1177,7 +1193,7 @@ out:
1177/** 1193/**
1178 * try_to_unmap - try to remove all page table mappings to a page 1194 * try_to_unmap - try to remove all page table mappings to a page
1179 * @page: the page to get unmapped 1195 * @page: the page to get unmapped
1180 * @migration: migration flag 1196 * @flags: action and flags
1181 * 1197 *
1182 * Tries to remove all the page table entries which are mapping this 1198 * Tries to remove all the page table entries which are mapping this
1183 * page, used in the pageout path. Caller must hold the page lock. 1199 * page, used in the pageout path. Caller must hold the page lock.
@@ -1188,16 +1204,16 @@ out:
1188 * SWAP_FAIL - the page is unswappable 1204 * SWAP_FAIL - the page is unswappable
1189 * SWAP_MLOCK - page is mlocked. 1205 * SWAP_MLOCK - page is mlocked.
1190 */ 1206 */
1191int try_to_unmap(struct page *page, int migration) 1207int try_to_unmap(struct page *page, enum ttu_flags flags)
1192{ 1208{
1193 int ret; 1209 int ret;
1194 1210
1195 BUG_ON(!PageLocked(page)); 1211 BUG_ON(!PageLocked(page));
1196 1212
1197 if (PageAnon(page)) 1213 if (PageAnon(page))
1198 ret = try_to_unmap_anon(page, 0, migration); 1214 ret = try_to_unmap_anon(page, flags);
1199 else 1215 else
1200 ret = try_to_unmap_file(page, 0, migration); 1216 ret = try_to_unmap_file(page, flags);
1201 if (ret != SWAP_MLOCK && !page_mapped(page)) 1217 if (ret != SWAP_MLOCK && !page_mapped(page))
1202 ret = SWAP_SUCCESS; 1218 ret = SWAP_SUCCESS;
1203 return ret; 1219 return ret;
@@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page)
1222 VM_BUG_ON(!PageLocked(page) || PageLRU(page)); 1238 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1223 1239
1224 if (PageAnon(page)) 1240 if (PageAnon(page))
1225 return try_to_unmap_anon(page, 1, 0); 1241 return try_to_unmap_anon(page, TTU_MUNLOCK);
1226 else 1242 else
1227 return try_to_unmap_file(page, 1, 0); 1243 return try_to_unmap_file(page, TTU_MUNLOCK);
1228} 1244}
1229 1245
diff --git a/mm/shmem.c b/mm/shmem.c
index b206a7a32e2a..98631c26c200 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping,
1633 if (pos + copied > inode->i_size) 1633 if (pos + copied > inode->i_size)
1634 i_size_write(inode, pos + copied); 1634 i_size_write(inode, pos + copied);
1635 1635
1636 unlock_page(page);
1637 set_page_dirty(page); 1636 set_page_dirty(page);
1637 unlock_page(page);
1638 page_cache_release(page); 1638 page_cache_release(page);
1639 1639
1640 return copied; 1640 return copied;
@@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
1971 iput(inode); 1971 iput(inode);
1972 return error; 1972 return error;
1973 } 1973 }
1974 unlock_page(page);
1975 inode->i_mapping->a_ops = &shmem_aops; 1974 inode->i_mapping->a_ops = &shmem_aops;
1976 inode->i_op = &shmem_symlink_inode_operations; 1975 inode->i_op = &shmem_symlink_inode_operations;
1977 kaddr = kmap_atomic(page, KM_USER0); 1976 kaddr = kmap_atomic(page, KM_USER0);
1978 memcpy(kaddr, symname, len); 1977 memcpy(kaddr, symname, len);
1979 kunmap_atomic(kaddr, KM_USER0); 1978 kunmap_atomic(kaddr, KM_USER0);
1980 set_page_dirty(page); 1979 set_page_dirty(page);
1980 unlock_page(page);
1981 page_cache_release(page); 1981 page_cache_release(page);
1982 } 1982 }
1983 if (dir->i_mode & S_ISGID) 1983 if (dir->i_mode & S_ISGID)
@@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = {
2420 .write_end = shmem_write_end, 2420 .write_end = shmem_write_end,
2421#endif 2421#endif
2422 .migratepage = migrate_page, 2422 .migratepage = migrate_page,
2423 .error_remove_page = generic_error_remove_page,
2423}; 2424};
2424 2425
2425static const struct file_operations shmem_file_operations = { 2426static const struct file_operations shmem_file_operations = {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f1bf19daadc6..4de7f02f820b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry)
699 struct swap_info_struct *p; 699 struct swap_info_struct *p;
700 struct page *page = NULL; 700 struct page *page = NULL;
701 701
702 if (is_migration_entry(entry)) 702 if (non_swap_entry(entry))
703 return 1; 703 return 1;
704 704
705 p = swap_info_get(entry); 705 p = swap_info_get(entry);
@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
2085 int count; 2085 int count;
2086 bool has_cache; 2086 bool has_cache;
2087 2087
2088 if (is_migration_entry(entry)) 2088 if (non_swap_entry(entry))
2089 return -EINVAL; 2089 return -EINVAL;
2090 2090
2091 type = swp_type(entry); 2091 type = swp_type(entry);
diff --git a/mm/truncate.c b/mm/truncate.c
index ccc3ecf7cb98..450cebdabfc0 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page);
93 * its lock, b) when a concurrent invalidate_mapping_pages got there first and 93 * its lock, b) when a concurrent invalidate_mapping_pages got there first and
94 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 94 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
95 */ 95 */
96static void 96static int
97truncate_complete_page(struct address_space *mapping, struct page *page) 97truncate_complete_page(struct address_space *mapping, struct page *page)
98{ 98{
99 if (page->mapping != mapping) 99 if (page->mapping != mapping)
100 return; 100 return -EIO;
101 101
102 if (page_has_private(page)) 102 if (page_has_private(page))
103 do_invalidatepage(page, 0); 103 do_invalidatepage(page, 0);
@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
108 remove_from_page_cache(page); 108 remove_from_page_cache(page);
109 ClearPageMappedToDisk(page); 109 ClearPageMappedToDisk(page);
110 page_cache_release(page); /* pagecache ref */ 110 page_cache_release(page); /* pagecache ref */
111 return 0;
111} 112}
112 113
113/* 114/*
@@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
135 return ret; 136 return ret;
136} 137}
137 138
139int truncate_inode_page(struct address_space *mapping, struct page *page)
140{
141 if (page_mapped(page)) {
142 unmap_mapping_range(mapping,
143 (loff_t)page->index << PAGE_CACHE_SHIFT,
144 PAGE_CACHE_SIZE, 0);
145 }
146 return truncate_complete_page(mapping, page);
147}
148
149/*
150 * Used to get rid of pages on hardware memory corruption.
151 */
152int generic_error_remove_page(struct address_space *mapping, struct page *page)
153{
154 if (!mapping)
155 return -EINVAL;
156 /*
157 * Only punch for normal data pages for now.
158 * Handling other types like directories would need more auditing.
159 */
160 if (!S_ISREG(mapping->host->i_mode))
161 return -EIO;
162 return truncate_inode_page(mapping, page);
163}
164EXPORT_SYMBOL(generic_error_remove_page);
165
166/*
167 * Safely invalidate one page from its pagecache mapping.
168 * It only drops clean, unused pages. The page must be locked.
169 *
170 * Returns 1 if the page is successfully invalidated, otherwise 0.
171 */
172int invalidate_inode_page(struct page *page)
173{
174 struct address_space *mapping = page_mapping(page);
175 if (!mapping)
176 return 0;
177 if (PageDirty(page) || PageWriteback(page))
178 return 0;
179 if (page_mapped(page))
180 return 0;
181 return invalidate_complete_page(mapping, page);
182}
183
138/** 184/**
139 * truncate_inode_pages - truncate range of pages specified by start & end byte offsets 185 * truncate_inode_pages - truncate range of pages specified by start & end byte offsets
140 * @mapping: mapping to truncate 186 * @mapping: mapping to truncate
@@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
196 unlock_page(page); 242 unlock_page(page);
197 continue; 243 continue;
198 } 244 }
199 if (page_mapped(page)) { 245 truncate_inode_page(mapping, page);
200 unmap_mapping_range(mapping,
201 (loff_t)page_index<<PAGE_CACHE_SHIFT,
202 PAGE_CACHE_SIZE, 0);
203 }
204 truncate_complete_page(mapping, page);
205 unlock_page(page); 246 unlock_page(page);
206 } 247 }
207 pagevec_release(&pvec); 248 pagevec_release(&pvec);
@@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
238 break; 279 break;
239 lock_page(page); 280 lock_page(page);
240 wait_on_page_writeback(page); 281 wait_on_page_writeback(page);
241 if (page_mapped(page)) { 282 truncate_inode_page(mapping, page);
242 unmap_mapping_range(mapping,
243 (loff_t)page->index<<PAGE_CACHE_SHIFT,
244 PAGE_CACHE_SIZE, 0);
245 }
246 if (page->index > next) 283 if (page->index > next)
247 next = page->index; 284 next = page->index;
248 next++; 285 next++;
249 truncate_complete_page(mapping, page);
250 unlock_page(page); 286 unlock_page(page);
251 } 287 }
252 pagevec_release(&pvec); 288 pagevec_release(&pvec);
@@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
311 if (lock_failed) 347 if (lock_failed)
312 continue; 348 continue;
313 349
314 if (PageDirty(page) || PageWriteback(page)) 350 ret += invalidate_inode_page(page);
315 goto unlock; 351
316 if (page_mapped(page))
317 goto unlock;
318 ret += invalidate_complete_page(mapping, page);
319unlock:
320 unlock_page(page); 352 unlock_page(page);
321 if (next > end) 353 if (next > end)
322 break; 354 break;
@@ -465,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
465 return invalidate_inode_pages2_range(mapping, 0, -1); 497 return invalidate_inode_pages2_range(mapping, 0, -1);
466} 498}
467EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 499EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
500
501/**
502 * truncate_pagecache - unmap and remove pagecache that has been truncated
503 * @inode: inode
504 * @old: old file offset
505 * @new: new file offset
506 *
507 * inode's new i_size must already be written before truncate_pagecache
508 * is called.
509 *
510 * This function should typically be called before the filesystem
511 * releases resources associated with the freed range (eg. deallocates
512 * blocks). This way, pagecache will always stay logically coherent
513 * with on-disk format, and the filesystem would not have to deal with
514 * situations such as writepage being called for a page that has already
515 * had its underlying blocks deallocated.
516 */
517void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
518{
519 if (new < old) {
520 struct address_space *mapping = inode->i_mapping;
521
522 /*
523 * unmap_mapping_range is called twice, first simply for
524 * efficiency so that truncate_inode_pages does fewer
525 * single-page unmaps. However after this first call, and
526 * before truncate_inode_pages finishes, it is possible for
527 * private pages to be COWed, which remain after
528 * truncate_inode_pages finishes, hence the second
529 * unmap_mapping_range call must be made for correctness.
530 */
531 unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
532 truncate_inode_pages(mapping, new);
533 unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
534 }
535}
536EXPORT_SYMBOL(truncate_pagecache);
537
538/**
539 * vmtruncate - unmap mappings "freed" by truncate() syscall
540 * @inode: inode of the file used
541 * @offset: file offset to start truncating
542 *
543 * NOTE! We have to be ready to update the memory sharing
544 * between the file and the memory map for a potential last
545 * incomplete page. Ugly, but necessary.
546 */
547int vmtruncate(struct inode *inode, loff_t offset)
548{
549 loff_t oldsize;
550 int error;
551
552 error = inode_newsize_ok(inode, offset);
553 if (error)
554 return error;
555 oldsize = inode->i_size;
556 i_size_write(inode, offset);
557 truncate_pagecache(inode, oldsize, offset);
558 if (inode->i_op->truncate)
559 inode->i_op->truncate(inode);
560
561 return error;
562}
563EXPORT_SYMBOL(vmtruncate);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 613e89f471d9..1219ceb8a9b2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
663 * processes. Try to unmap it here. 663 * processes. Try to unmap it here.
664 */ 664 */
665 if (page_mapped(page) && mapping) { 665 if (page_mapped(page) && mapping) {
666 switch (try_to_unmap(page, 0)) { 666 switch (try_to_unmap(page, TTU_UNMAP)) {
667 case SWAP_FAIL: 667 case SWAP_FAIL:
668 goto activate_locked; 668 goto activate_locked;
669 case SWAP_AGAIN: 669 case SWAP_AGAIN:
@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1836 1836
1837#ifdef CONFIG_CGROUP_MEM_RES_CTLR 1837#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1838 1838
1839unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
1840 gfp_t gfp_mask, bool noswap,
1841 unsigned int swappiness,
1842 struct zone *zone, int nid)
1843{
1844 struct scan_control sc = {
1845 .may_writepage = !laptop_mode,
1846 .may_unmap = 1,
1847 .may_swap = !noswap,
1848 .swap_cluster_max = SWAP_CLUSTER_MAX,
1849 .swappiness = swappiness,
1850 .order = 0,
1851 .mem_cgroup = mem,
1852 .isolate_pages = mem_cgroup_isolate_pages,
1853 };
1854 nodemask_t nm = nodemask_of_node(nid);
1855
1856 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1857 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1858 sc.nodemask = &nm;
1859 sc.nr_reclaimed = 0;
1860 sc.nr_scanned = 0;
1861 /*
1862 * NOTE: Although we can get the priority field, using it
1863 * here is not a good idea, since it limits the pages we can scan.
1864 * if we don't reclaim here, the shrink_zone from balance_pgdat
1865 * will pick up pages from other mem cgroup's as well. We hack
1866 * the priority and make it zero.
1867 */
1868 shrink_zone(0, zone, &sc);
1869 return sc.nr_reclaimed;
1870}
1871
1839unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, 1872unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1840 gfp_t gfp_mask, 1873 gfp_t gfp_mask,
1841 bool noswap, 1874 bool noswap,
1842 unsigned int swappiness) 1875 unsigned int swappiness)
1843{ 1876{
1877 struct zonelist *zonelist;
1844 struct scan_control sc = { 1878 struct scan_control sc = {
1845 .may_writepage = !laptop_mode, 1879 .may_writepage = !laptop_mode,
1846 .may_unmap = 1, 1880 .may_unmap = 1,
@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1852 .isolate_pages = mem_cgroup_isolate_pages, 1886 .isolate_pages = mem_cgroup_isolate_pages,
1853 .nodemask = NULL, /* we don't care the placement */ 1887 .nodemask = NULL, /* we don't care the placement */
1854 }; 1888 };
1855 struct zonelist *zonelist;
1856 1889
1857 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 1890 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1858 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 1891 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -1974,6 +2007,7 @@ loop_again:
1974 for (i = 0; i <= end_zone; i++) { 2007 for (i = 0; i <= end_zone; i++) {
1975 struct zone *zone = pgdat->node_zones + i; 2008 struct zone *zone = pgdat->node_zones + i;
1976 int nr_slab; 2009 int nr_slab;
2010 int nid, zid;
1977 2011
1978 if (!populated_zone(zone)) 2012 if (!populated_zone(zone))
1979 continue; 2013 continue;
@@ -1988,6 +2022,15 @@ loop_again:
1988 temp_priority[i] = priority; 2022 temp_priority[i] = priority;
1989 sc.nr_scanned = 0; 2023 sc.nr_scanned = 0;
1990 note_zone_scanning_priority(zone, priority); 2024 note_zone_scanning_priority(zone, priority);
2025
2026 nid = pgdat->node_id;
2027 zid = zone_idx(zone);
2028 /*
2029 * Call soft limit reclaim before calling shrink_zone.
2030 * For now we ignore the return value
2031 */
2032 mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
2033 nid, zid);
1991 /* 2034 /*
1992 * We put equal pressure on every zone, unless one 2035 * We put equal pressure on every zone, unless one
1993 * zone has way too many pages free already. 2036 * zone has way too many pages free already.
@@ -2801,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void)
2801unsigned long scan_unevictable_pages; 2844unsigned long scan_unevictable_pages;
2802 2845
2803int scan_unevictable_handler(struct ctl_table *table, int write, 2846int scan_unevictable_handler(struct ctl_table *table, int write,
2804 struct file *file, void __user *buffer, 2847 void __user *buffer,
2805 size_t *length, loff_t *ppos) 2848 size_t *length, loff_t *ppos)
2806{ 2849{
2807 proc_doulongvec_minmax(table, write, file, buffer, length, ppos); 2850 proc_doulongvec_minmax(table, write, buffer, length, ppos);
2808 2851
2809 if (write && *(unsigned long *)table->data) 2852 if (write && *(unsigned long *)table->data)
2810 scan_all_zones_unevictable_pages(); 2853 scan_all_zones_unevictable_pages();
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 907a82e9023d..a16a2342f6bf 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
965 965
966#ifdef CONFIG_SYSCTL 966#ifdef CONFIG_SYSCTL
967static 967static
968int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp, 968int brnf_sysctl_call_tables(ctl_table * ctl, int write,
969 void __user * buffer, size_t * lenp, loff_t * ppos) 969 void __user * buffer, size_t * lenp, loff_t * ppos)
970{ 970{
971 int ret; 971 int ret;
972 972
973 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 973 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
974 974
975 if (write && *(int *)(ctl->data)) 975 if (write && *(int *)(ctl->data))
976 *(int *)(ctl->data) = 1; 976 *(int *)(ctl->data) = 1;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 1c6a5bb6f0c8..6e1f085db06a 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU
164static int min_priority[1]; 164static int min_priority[1];
165static int max_priority[] = { 127 }; /* From DECnet spec */ 165static int max_priority[] = { 127 }; /* From DECnet spec */
166 166
167static int dn_forwarding_proc(ctl_table *, int, struct file *, 167static int dn_forwarding_proc(ctl_table *, int,
168 void __user *, size_t *, loff_t *); 168 void __user *, size_t *, loff_t *);
169static int dn_forwarding_sysctl(ctl_table *table, 169static int dn_forwarding_sysctl(ctl_table *table,
170 void __user *oldval, size_t __user *oldlenp, 170 void __user *oldval, size_t __user *oldlenp,
@@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
274} 274}
275 275
276static int dn_forwarding_proc(ctl_table *table, int write, 276static int dn_forwarding_proc(ctl_table *table, int write,
277 struct file *filep,
278 void __user *buffer, 277 void __user *buffer,
279 size_t *lenp, loff_t *ppos) 278 size_t *lenp, loff_t *ppos)
280{ 279{
@@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
290 dn_db = dev->dn_ptr; 289 dn_db = dev->dn_ptr;
291 old = dn_db->parms.forwarding; 290 old = dn_db->parms.forwarding;
292 291
293 err = proc_dointvec(table, write, filep, buffer, lenp, ppos); 292 err = proc_dointvec(table, write, buffer, lenp, ppos);
294 293
295 if ((err >= 0) && write) { 294 if ((err >= 0) && write) {
296 if (dn_db->parms.forwarding < 0) 295 if (dn_db->parms.forwarding < 0)
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 5bcd592ae6dd..26b0ab1e9f56 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table,
165} 165}
166 166
167static int dn_node_address_handler(ctl_table *table, int write, 167static int dn_node_address_handler(ctl_table *table, int write,
168 struct file *filp,
169 void __user *buffer, 168 void __user *buffer,
170 size_t *lenp, loff_t *ppos) 169 size_t *lenp, loff_t *ppos)
171{ 170{
@@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table,
276 275
277 276
278static int dn_def_dev_handler(ctl_table *table, int write, 277static int dn_def_dev_handler(ctl_table *table, int write,
279 struct file * filp,
280 void __user *buffer, 278 void __user *buffer,
281 size_t *lenp, loff_t *ppos) 279 size_t *lenp, loff_t *ppos)
282{ 280{
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 07336c6201f0..e92f1fd28aa5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net)
1270} 1270}
1271 1271
1272static int devinet_conf_proc(ctl_table *ctl, int write, 1272static int devinet_conf_proc(ctl_table *ctl, int write,
1273 struct file *filp, void __user *buffer, 1273 void __user *buffer,
1274 size_t *lenp, loff_t *ppos) 1274 size_t *lenp, loff_t *ppos)
1275{ 1275{
1276 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 1276 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1277 1277
1278 if (write) { 1278 if (write) {
1279 struct ipv4_devconf *cnf = ctl->extra1; 1279 struct ipv4_devconf *cnf = ctl->extra1;
@@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table,
1342} 1342}
1343 1343
1344static int devinet_sysctl_forward(ctl_table *ctl, int write, 1344static int devinet_sysctl_forward(ctl_table *ctl, int write,
1345 struct file *filp, void __user *buffer, 1345 void __user *buffer,
1346 size_t *lenp, loff_t *ppos) 1346 size_t *lenp, loff_t *ppos)
1347{ 1347{
1348 int *valp = ctl->data; 1348 int *valp = ctl->data;
1349 int val = *valp; 1349 int val = *valp;
1350 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 1350 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1351 1351
1352 if (write && *valp != val) { 1352 if (write && *valp != val) {
1353 struct net *net = ctl->extra2; 1353 struct net *net = ctl->extra2;
@@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
1372} 1372}
1373 1373
1374int ipv4_doint_and_flush(ctl_table *ctl, int write, 1374int ipv4_doint_and_flush(ctl_table *ctl, int write,
1375 struct file *filp, void __user *buffer, 1375 void __user *buffer,
1376 size_t *lenp, loff_t *ppos) 1376 size_t *lenp, loff_t *ppos)
1377{ 1377{
1378 int *valp = ctl->data; 1378 int *valp = ctl->data;
1379 int val = *valp; 1379 int val = *valp;
1380 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 1380 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1381 struct net *net = ctl->extra2; 1381 struct net *net = ctl->extra2;
1382 1382
1383 if (write && *valp != val) 1383 if (write && *valp != val)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df9347314538..bb4199252026 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
3036 3036
3037#ifdef CONFIG_SYSCTL 3037#ifdef CONFIG_SYSCTL
3038static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, 3038static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
3039 struct file *filp, void __user *buffer, 3039 void __user *buffer,
3040 size_t *lenp, loff_t *ppos) 3040 size_t *lenp, loff_t *ppos)
3041{ 3041{
3042 if (write) { 3042 if (write) {
@@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
3046 3046
3047 memcpy(&ctl, __ctl, sizeof(ctl)); 3047 memcpy(&ctl, __ctl, sizeof(ctl));
3048 ctl.data = &flush_delay; 3048 ctl.data = &flush_delay;
3049 proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); 3049 proc_dointvec(&ctl, write, buffer, lenp, ppos);
3050 3050
3051 net = (struct net *)__ctl->extra1; 3051 net = (struct net *)__ctl->extra1;
3052 rt_cache_flush(net, flush_delay); 3052 rt_cache_flush(net, flush_delay);
@@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old)
3106} 3106}
3107 3107
3108static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, 3108static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
3109 struct file *filp,
3110 void __user *buffer, size_t *lenp, 3109 void __user *buffer, size_t *lenp,
3111 loff_t *ppos) 3110 loff_t *ppos)
3112{ 3111{
3113 int old = ip_rt_secret_interval; 3112 int old = ip_rt_secret_interval;
3114 int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); 3113 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3115 3114
3116 rt_secret_reschedule(old); 3115 rt_secret_reschedule(old);
3117 3116
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4710d219f06a..2dcf04d9b005 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -36,7 +36,7 @@ static void set_local_port_range(int range[2])
36} 36}
37 37
38/* Validate changes from /proc interface. */ 38/* Validate changes from /proc interface. */
39static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, 39static int ipv4_local_port_range(ctl_table *table, int write,
40 void __user *buffer, 40 void __user *buffer,
41 size_t *lenp, loff_t *ppos) 41 size_t *lenp, loff_t *ppos)
42{ 42{
@@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
51 }; 51 };
52 52
53 inet_get_local_port_range(range, range + 1); 53 inet_get_local_port_range(range, range + 1);
54 ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); 54 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
55 55
56 if (write && ret == 0) { 56 if (write && ret == 0) {
57 if (range[1] < range[0]) 57 if (range[1] < range[0])
@@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table,
91} 91}
92 92
93 93
94static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp, 94static int proc_tcp_congestion_control(ctl_table *ctl, int write,
95 void __user *buffer, size_t *lenp, loff_t *ppos) 95 void __user *buffer, size_t *lenp, loff_t *ppos)
96{ 96{
97 char val[TCP_CA_NAME_MAX]; 97 char val[TCP_CA_NAME_MAX];
@@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
103 103
104 tcp_get_default_congestion_control(val); 104 tcp_get_default_congestion_control(val);
105 105
106 ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); 106 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
107 if (write && ret == 0) 107 if (write && ret == 0)
108 ret = tcp_set_default_congestion_control(val); 108 ret = tcp_set_default_congestion_control(val);
109 return ret; 109 return ret;
@@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table,
129} 129}
130 130
131static int proc_tcp_available_congestion_control(ctl_table *ctl, 131static int proc_tcp_available_congestion_control(ctl_table *ctl,
132 int write, struct file * filp, 132 int write,
133 void __user *buffer, size_t *lenp, 133 void __user *buffer, size_t *lenp,
134 loff_t *ppos) 134 loff_t *ppos)
135{ 135{
@@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
140 if (!tbl.data) 140 if (!tbl.data)
141 return -ENOMEM; 141 return -ENOMEM;
142 tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); 142 tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
143 ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); 143 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
144 kfree(tbl.data); 144 kfree(tbl.data);
145 return ret; 145 return ret;
146} 146}
147 147
148static int proc_allowed_congestion_control(ctl_table *ctl, 148static int proc_allowed_congestion_control(ctl_table *ctl,
149 int write, struct file * filp, 149 int write,
150 void __user *buffer, size_t *lenp, 150 void __user *buffer, size_t *lenp,
151 loff_t *ppos) 151 loff_t *ppos)
152{ 152{
@@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
158 return -ENOMEM; 158 return -ENOMEM;
159 159
160 tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); 160 tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
161 ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); 161 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
162 if (write && ret == 0) 162 if (write && ret == 0)
163 ret = tcp_set_allowed_congestion_control(tbl.data); 163 ret = tcp_set_allowed_congestion_control(tbl.data);
164 kfree(tbl.data); 164 kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 55f486d89c88..1fd0a3d775d2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3986#ifdef CONFIG_SYSCTL 3986#ifdef CONFIG_SYSCTL
3987 3987
3988static 3988static
3989int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, 3989int addrconf_sysctl_forward(ctl_table *ctl, int write,
3990 void __user *buffer, size_t *lenp, loff_t *ppos) 3990 void __user *buffer, size_t *lenp, loff_t *ppos)
3991{ 3991{
3992 int *valp = ctl->data; 3992 int *valp = ctl->data;
3993 int val = *valp; 3993 int val = *valp;
3994 int ret; 3994 int ret;
3995 3995
3996 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 3996 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3997 3997
3998 if (write) 3998 if (write)
3999 ret = addrconf_fixup_forwarding(ctl, valp, val); 3999 ret = addrconf_fixup_forwarding(ctl, valp, val);
@@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
4090} 4090}
4091 4091
4092static 4092static
4093int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp, 4093int addrconf_sysctl_disable(ctl_table *ctl, int write,
4094 void __user *buffer, size_t *lenp, loff_t *ppos) 4094 void __user *buffer, size_t *lenp, loff_t *ppos)
4095{ 4095{
4096 int *valp = ctl->data; 4096 int *valp = ctl->data;
4097 int val = *valp; 4097 int val = *valp;
4098 int ret; 4098 int ret;
4099 4099
4100 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 4100 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
4101 4101
4102 if (write) 4102 if (write)
4103 ret = addrconf_disable_ipv6(ctl, valp, val); 4103 ret = addrconf_disable_ipv6(ctl, valp, val);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7015478797f6..498b9b0b0fad 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1735 } 1735 }
1736} 1736}
1737 1737
1738int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) 1738int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
1739{ 1739{
1740 struct net_device *dev = ctl->extra1; 1740 struct net_device *dev = ctl->extra1;
1741 struct inet6_dev *idev; 1741 struct inet6_dev *idev;
@@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
1746 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); 1746 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1747 1747
1748 if (strcmp(ctl->procname, "retrans_time") == 0) 1748 if (strcmp(ctl->procname, "retrans_time") == 0)
1749 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 1749 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1750 1750
1751 else if (strcmp(ctl->procname, "base_reachable_time") == 0) 1751 else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1752 ret = proc_dointvec_jiffies(ctl, write, 1752 ret = proc_dointvec_jiffies(ctl, write,
1753 filp, buffer, lenp, ppos); 1753 buffer, lenp, ppos);
1754 1754
1755 else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || 1755 else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1756 (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) 1756 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1757 ret = proc_dointvec_ms_jiffies(ctl, write, 1757 ret = proc_dointvec_ms_jiffies(ctl, write,
1758 filp, buffer, lenp, ppos); 1758 buffer, lenp, ppos);
1759 else 1759 else
1760 ret = -1; 1760 ret = -1;
1761 1761
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 77aecbe8ff6c..d6fe7646a8ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = {
2524#ifdef CONFIG_SYSCTL 2524#ifdef CONFIG_SYSCTL
2525 2525
2526static 2526static
2527int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2527int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2528 void __user *buffer, size_t *lenp, loff_t *ppos) 2528 void __user *buffer, size_t *lenp, loff_t *ppos)
2529{ 2529{
2530 struct net *net = current->nsproxy->net_ns; 2530 struct net *net = current->nsproxy->net_ns;
2531 int delay = net->ipv6.sysctl.flush_delay; 2531 int delay = net->ipv6.sysctl.flush_delay;
2532 if (write) { 2532 if (write) {
2533 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2533 proc_dointvec(ctl, write, buffer, lenp, ppos);
2534 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2534 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2535 return 0; 2535 return 0;
2536 } else 2536 } else
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index 57f8817c3979..5c86567e5a78 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100; /* 100us */
73/* For other sysctl, I've no idea of the range. Maybe Dag could help 73/* For other sysctl, I've no idea of the range. Maybe Dag could help
74 * us on that - Jean II */ 74 * us on that - Jean II */
75 75
76static int do_devname(ctl_table *table, int write, struct file *filp, 76static int do_devname(ctl_table *table, int write,
77 void __user *buffer, size_t *lenp, loff_t *ppos) 77 void __user *buffer, size_t *lenp, loff_t *ppos)
78{ 78{
79 int ret; 79 int ret;
80 80
81 ret = proc_dostring(table, write, filp, buffer, lenp, ppos); 81 ret = proc_dostring(table, write, buffer, lenp, ppos);
82 if (ret == 0 && write) { 82 if (ret == 0 && write) {
83 struct ias_value *val; 83 struct ias_value *val;
84 84
@@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
90} 90}
91 91
92 92
93static int do_discovery(ctl_table *table, int write, struct file *filp, 93static int do_discovery(ctl_table *table, int write,
94 void __user *buffer, size_t *lenp, loff_t *ppos) 94 void __user *buffer, size_t *lenp, loff_t *ppos)
95{ 95{
96 int ret; 96 int ret;
97 97
98 ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); 98 ret = proc_dointvec(table, write, buffer, lenp, ppos);
99 if (ret) 99 if (ret)
100 return ret; 100 return ret;
101 101
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fba2892b99e1..446e9bd4b4bc 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void)
1496 1496
1497 1497
1498static int 1498static int
1499proc_do_defense_mode(ctl_table *table, int write, struct file * filp, 1499proc_do_defense_mode(ctl_table *table, int write,
1500 void __user *buffer, size_t *lenp, loff_t *ppos) 1500 void __user *buffer, size_t *lenp, loff_t *ppos)
1501{ 1501{
1502 int *valp = table->data; 1502 int *valp = table->data;
1503 int val = *valp; 1503 int val = *valp;
1504 int rc; 1504 int rc;
1505 1505
1506 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); 1506 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1507 if (write && (*valp != val)) { 1507 if (write && (*valp != val)) {
1508 if ((*valp < 0) || (*valp > 3)) { 1508 if ((*valp < 0) || (*valp > 3)) {
1509 /* Restore the correct value */ 1509 /* Restore the correct value */
@@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1517 1517
1518 1518
1519static int 1519static int
1520proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, 1520proc_do_sync_threshold(ctl_table *table, int write,
1521 void __user *buffer, size_t *lenp, loff_t *ppos) 1521 void __user *buffer, size_t *lenp, loff_t *ppos)
1522{ 1522{
1523 int *valp = table->data; 1523 int *valp = table->data;
@@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1527 /* backup the value first */ 1527 /* backup the value first */
1528 memcpy(val, valp, sizeof(val)); 1528 memcpy(val, valp, sizeof(val));
1529 1529
1530 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); 1530 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1531 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { 1531 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1532 /* Restore the correct value */ 1532 /* Restore the correct value */
1533 memcpy(valp, val, sizeof(val)); 1533 memcpy(valp, val, sizeof(val));
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 4e620305f28c..c93494fef8ef 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
226static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; 226static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
227static struct ctl_table_header *nf_log_dir_header; 227static struct ctl_table_header *nf_log_dir_header;
228 228
229static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, 229static int nf_log_proc_dostring(ctl_table *table, int write,
230 void __user *buffer, size_t *lenp, loff_t *ppos) 230 void __user *buffer, size_t *lenp, loff_t *ppos)
231{ 231{
232 const struct nf_logger *logger; 232 const struct nf_logger *logger;
@@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
260 table->data = "NONE"; 260 table->data = "NONE";
261 else 261 else
262 table->data = logger->name; 262 table->data = logger->name;
263 r = proc_dostring(table, write, filp, buffer, lenp, ppos); 263 r = proc_dostring(table, write, buffer, lenp, ppos);
264 mutex_unlock(&nf_log_mutex); 264 mutex_unlock(&nf_log_mutex);
265 } 265 }
266 266
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index 7b5749ee2765..2220f3322326 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max)
56 } while (read_seqretry(&local_port_range_lock, seq)); 56 } while (read_seqretry(&local_port_range_lock, seq));
57} 57}
58 58
59static int proc_local_port_range(ctl_table *table, int write, struct file *filp, 59static int proc_local_port_range(ctl_table *table, int write,
60 void __user *buffer, 60 void __user *buffer,
61 size_t *lenp, loff_t *ppos) 61 size_t *lenp, loff_t *ppos)
62{ 62{
@@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
70 .extra2 = &local_port_range_max, 70 .extra2 = &local_port_range_max,
71 }; 71 };
72 72
73 ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); 73 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
74 74
75 if (write && ret == 0) { 75 if (write && ret == 0) {
76 if (range[1] < range[0]) 76 if (range[1] < range[0])
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a417d5ab5dd7..38829e20500b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -640,10 +640,11 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
640/** 640/**
641 * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run 641 * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
642 * rpc_execute against it 642 * rpc_execute against it
643 * @ops: RPC call ops 643 * @req: RPC request
644 * @tk_ops: RPC call ops
644 */ 645 */
645struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, 646struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
646 const struct rpc_call_ops *tk_ops) 647 const struct rpc_call_ops *tk_ops)
647{ 648{
648 struct rpc_task *task; 649 struct rpc_task *task;
649 struct xdr_buf *xbufp = &req->rq_snd_buf; 650 struct xdr_buf *xbufp = &req->rq_snd_buf;
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 5231f7aaac0e..42f9748ae093 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -56,7 +56,7 @@ rpc_unregister_sysctl(void)
56 } 56 }
57} 57}
58 58
59static int proc_do_xprt(ctl_table *table, int write, struct file *file, 59static int proc_do_xprt(ctl_table *table, int write,
60 void __user *buffer, size_t *lenp, loff_t *ppos) 60 void __user *buffer, size_t *lenp, loff_t *ppos)
61{ 61{
62 char tmpbuf[256]; 62 char tmpbuf[256];
@@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file,
71} 71}
72 72
73static int 73static int
74proc_dodebug(ctl_table *table, int write, struct file *file, 74proc_dodebug(ctl_table *table, int write,
75 void __user *buffer, size_t *lenp, loff_t *ppos) 75 void __user *buffer, size_t *lenp, loff_t *ppos)
76{ 76{
77 char tmpbuf[20], c, *s; 77 char tmpbuf[20], c, *s;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 87101177825b..35fb68b9c8ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep;
80 * current value. 80 * current value.
81 */ 81 */
82static int read_reset_stat(ctl_table *table, int write, 82static int read_reset_stat(ctl_table *table, int write,
83 struct file *filp, void __user *buffer, size_t *lenp, 83 void __user *buffer, size_t *lenp,
84 loff_t *ppos) 84 loff_t *ppos)
85{ 85{
86 atomic_t *stat = (atomic_t *)table->data; 86 atomic_t *stat = (atomic_t *)table->data;
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index b8186bac8b7e..6cf8fd2b79e8 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
61struct cgroup_subsys devices_subsys; 61struct cgroup_subsys devices_subsys;
62 62
63static int devcgroup_can_attach(struct cgroup_subsys *ss, 63static int devcgroup_can_attach(struct cgroup_subsys *ss,
64 struct cgroup *new_cgroup, struct task_struct *task) 64 struct cgroup *new_cgroup, struct task_struct *task,
65 bool threadgroup)
65{ 66{
66 if (current != task && !capable(CAP_SYS_ADMIN)) 67 if (current != task && !capable(CAP_SYS_ADMIN))
67 return -EPERM; 68 return -EPERM;
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 500aad0ebd6a..3bb90b6f1dd3 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -187,7 +187,7 @@ static inline void print_ipv6_addr(struct audit_buffer *ab,
187 char *name1, char *name2) 187 char *name1, char *name2)
188{ 188{
189 if (!ipv6_addr_any(addr)) 189 if (!ipv6_addr_any(addr))
190 audit_log_format(ab, " %s=%pI6", name1, addr); 190 audit_log_format(ab, " %s=%pI6c", name1, addr);
191 if (port) 191 if (port)
192 audit_log_format(ab, " %s=%d", name2, ntohs(port)); 192 audit_log_format(ab, " %s=%d", name2, ntohs(port));
193} 193}
diff --git a/security/min_addr.c b/security/min_addr.c
index 14cc7b3b8d03..c844eed7915d 100644
--- a/security/min_addr.c
+++ b/security/min_addr.c
@@ -28,12 +28,12 @@ static void update_mmap_min_addr(void)
28 * sysctl handler which just sets dac_mmap_min_addr = the new value and then 28 * sysctl handler which just sets dac_mmap_min_addr = the new value and then
29 * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly 29 * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
30 */ 30 */
31int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, 31int mmap_min_addr_handler(struct ctl_table *table, int write,
32 void __user *buffer, size_t *lenp, loff_t *ppos) 32 void __user *buffer, size_t *lenp, loff_t *ppos)
33{ 33{
34 int ret; 34 int ret;
35 35
36 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); 36 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
37 37
38 update_mmap_min_addr(); 38 update_mmap_min_addr();
39 39
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 417f7c994522..bb230d5d7085 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
2411 /* Wake up the parent if it is waiting so that it can recheck 2411 /* Wake up the parent if it is waiting so that it can recheck
2412 * wait permission to the new task SID. */ 2412 * wait permission to the new task SID. */
2413 read_lock(&tasklist_lock); 2413 read_lock(&tasklist_lock);
2414 wake_up_interruptible(&current->real_parent->signal->wait_chldexit); 2414 __wake_up_parent(current, current->real_parent);
2415 read_unlock(&tasklist_lock); 2415 read_unlock(&tasklist_lock);
2416} 2416}
2417 2417
diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c
index 4e34d19ddbc0..b4b48afb6de6 100644
--- a/sound/arm/pxa2xx-ac97.c
+++ b/sound/arm/pxa2xx-ac97.c
@@ -137,9 +137,9 @@ static int pxa2xx_ac97_do_resume(struct snd_card *card)
137 return 0; 137 return 0;
138} 138}
139 139
140static int pxa2xx_ac97_suspend(struct platform_device *dev, pm_message_t state) 140static int pxa2xx_ac97_suspend(struct device *dev)
141{ 141{
142 struct snd_card *card = platform_get_drvdata(dev); 142 struct snd_card *card = dev_get_drvdata(dev);
143 int ret = 0; 143 int ret = 0;
144 144
145 if (card) 145 if (card)
@@ -148,9 +148,9 @@ static int pxa2xx_ac97_suspend(struct platform_device *dev, pm_message_t state)
148 return ret; 148 return ret;
149} 149}
150 150
151static int pxa2xx_ac97_resume(struct platform_device *dev) 151static int pxa2xx_ac97_resume(struct device *dev)
152{ 152{
153 struct snd_card *card = platform_get_drvdata(dev); 153 struct snd_card *card = dev_get_drvdata(dev);
154 int ret = 0; 154 int ret = 0;
155 155
156 if (card) 156 if (card)
@@ -159,9 +159,10 @@ static int pxa2xx_ac97_resume(struct platform_device *dev)
159 return ret; 159 return ret;
160} 160}
161 161
162#else 162static struct dev_pm_ops pxa2xx_ac97_pm_ops = {
163#define pxa2xx_ac97_suspend NULL 163 .suspend = pxa2xx_ac97_suspend,
164#define pxa2xx_ac97_resume NULL 164 .resume = pxa2xx_ac97_resume,
165};
165#endif 166#endif
166 167
167static int __devinit pxa2xx_ac97_probe(struct platform_device *dev) 168static int __devinit pxa2xx_ac97_probe(struct platform_device *dev)
@@ -241,11 +242,12 @@ static int __devexit pxa2xx_ac97_remove(struct platform_device *dev)
241static struct platform_driver pxa2xx_ac97_driver = { 242static struct platform_driver pxa2xx_ac97_driver = {
242 .probe = pxa2xx_ac97_probe, 243 .probe = pxa2xx_ac97_probe,
243 .remove = __devexit_p(pxa2xx_ac97_remove), 244 .remove = __devexit_p(pxa2xx_ac97_remove),
244 .suspend = pxa2xx_ac97_suspend,
245 .resume = pxa2xx_ac97_resume,
246 .driver = { 245 .driver = {
247 .name = "pxa2xx-ac97", 246 .name = "pxa2xx-ac97",
248 .owner = THIS_MODULE, 247 .owner = THIS_MODULE,
248#ifdef CONFIG_PM
249 .pm = &pxa2xx_ac97_pm_ops,
250#endif
249 }, 251 },
250}; 252};
251 253