aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/crypto/async-tx-api.txt96
-rw-r--r--Documentation/dmaengine.txt1
-rw-r--r--Documentation/filesystems/btrfs.txt91
-rw-r--r--Documentation/kernel-parameters.txt46
-rw-r--r--Documentation/powerpc/dts-bindings/4xx/ndfc.txt39
-rw-r--r--arch/arm/mach-pxa/corgi.c54
-rw-r--r--arch/arm/mach-pxa/poodle.c51
-rw-r--r--arch/arm/mach-pxa/spitz.c77
-rw-r--r--arch/avr32/mach-at32ap/at32ap700x.c15
-rw-r--r--arch/ia64/include/asm/acpi-ext.h1
-rw-r--r--arch/ia64/include/asm/sn/acpi.h2
-rw-r--r--arch/ia64/kernel/acpi.c1
-rw-r--r--arch/ia64/sn/kernel/io_acpi_init.c103
-rw-r--r--arch/ia64/sn/kernel/io_common.c5
-rw-r--r--arch/parisc/Makefile2
-rw-r--r--arch/parisc/include/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/byteorder.h77
-rw-r--r--arch/parisc/include/asm/checksum.h2
-rw-r--r--arch/parisc/include/asm/io.h12
-rw-r--r--arch/parisc/include/asm/mmu_context.h13
-rw-r--r--arch/parisc/include/asm/processor.h4
-rw-r--r--arch/parisc/include/asm/swab.h66
-rw-r--r--arch/parisc/include/asm/uaccess.h2
-rw-r--r--arch/parisc/kernel/drivers.c40
-rw-r--r--arch/parisc/kernel/hpmc.S8
-rw-r--r--arch/parisc/kernel/irq.c11
-rw-r--r--arch/parisc/kernel/pdc_cons.c2
-rw-r--r--arch/parisc/kernel/perf.c4
-rw-r--r--arch/parisc/kernel/processor.c68
-rw-r--r--arch/parisc/kernel/setup.c11
-rw-r--r--arch/parisc/kernel/smp.c32
-rw-r--r--arch/parisc/kernel/time.c4
-rw-r--r--arch/parisc/kernel/topology.c4
-rw-r--r--arch/parisc/kernel/traps.c9
-rw-r--r--arch/parisc/kernel/unwind.c2
-rw-r--r--arch/parisc/lib/iomap.c2
-rw-r--r--arch/parisc/lib/memcpy.c2
-rw-r--r--arch/parisc/mm/fault.c58
-rw-r--r--arch/powerpc/include/asm/cell-pmu.h2
-rw-r--r--arch/powerpc/include/asm/oprofile_impl.h6
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h11
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c56
-rw-r--r--arch/powerpc/oprofile/common.c22
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c748
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c17
-rw-r--r--arch/x86/kernel/acpi/cstate.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/e820.c21
-rw-r--r--arch/x86/kernel/early-quirks.c22
-rw-r--r--arch/x86/oprofile/op_model_amd.c149
-rw-r--r--crypto/async_tx/async_tx.c350
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/Kconfig84
-rw-r--r--drivers/acpi/Makefile25
-rw-r--r--drivers/acpi/acpica/Makefile44
-rw-r--r--drivers/acpi/acpica/accommon.h63
-rw-r--r--drivers/acpi/acpica/acconfig.h (renamed from include/acpi/acconfig.h)8
-rw-r--r--drivers/acpi/acpica/acdebug.h (renamed from include/acpi/acdebug.h)0
-rw-r--r--drivers/acpi/acpica/acdispat.h (renamed from include/acpi/acdispat.h)0
-rw-r--r--drivers/acpi/acpica/acevents.h (renamed from include/acpi/acevents.h)6
-rw-r--r--drivers/acpi/acpica/acglobal.h (renamed from include/acpi/acglobal.h)9
-rw-r--r--drivers/acpi/acpica/achware.h (renamed from include/acpi/achware.h)22
-rw-r--r--drivers/acpi/acpica/acinterp.h (renamed from include/acpi/acinterp.h)0
-rw-r--r--drivers/acpi/acpica/aclocal.h (renamed from include/acpi/aclocal.h)86
-rw-r--r--drivers/acpi/acpica/acmacros.h (renamed from include/acpi/acmacros.h)122
-rw-r--r--drivers/acpi/acpica/acnamesp.h (renamed from include/acpi/acnamesp.h)5
-rw-r--r--drivers/acpi/acpica/acobject.h (renamed from include/acpi/acobject.h)0
-rw-r--r--drivers/acpi/acpica/acopcode.h (renamed from include/acpi/acopcode.h)0
-rw-r--r--drivers/acpi/acpica/acparser.h (renamed from include/acpi/acparser.h)0
-rw-r--r--drivers/acpi/acpica/acpredef.h (renamed from include/acpi/acpredef.h)0
-rw-r--r--drivers/acpi/acpica/acresrc.h (renamed from include/acpi/acresrc.h)0
-rw-r--r--drivers/acpi/acpica/acstruct.h (renamed from include/acpi/acstruct.h)0
-rw-r--r--drivers/acpi/acpica/actables.h (renamed from include/acpi/actables.h)2
-rw-r--r--drivers/acpi/acpica/acutils.h (renamed from include/acpi/acutils.h)36
-rw-r--r--drivers/acpi/acpica/amlcode.h (renamed from include/acpi/amlcode.h)0
-rw-r--r--drivers/acpi/acpica/amlresrc.h (renamed from include/acpi/amlresrc.h)0
-rw-r--r--drivers/acpi/acpica/dsfield.c (renamed from drivers/acpi/dispatcher/dsfield.c)11
-rw-r--r--drivers/acpi/acpica/dsinit.c (renamed from drivers/acpi/dispatcher/dsinit.c)7
-rw-r--r--drivers/acpi/acpica/dsmethod.c (renamed from drivers/acpi/dispatcher/dsmethod.c)14
-rw-r--r--drivers/acpi/acpica/dsmthdat.c (renamed from drivers/acpi/dispatcher/dsmthdat.c)7
-rw-r--r--drivers/acpi/acpica/dsobject.c (renamed from drivers/acpi/dispatcher/dsobject.c)11
-rw-r--r--drivers/acpi/acpica/dsopcode.c (renamed from drivers/acpi/dispatcher/dsopcode.c)66
-rw-r--r--drivers/acpi/acpica/dsutils.c (renamed from drivers/acpi/dispatcher/dsutils.c)13
-rw-r--r--drivers/acpi/acpica/dswexec.c (renamed from drivers/acpi/dispatcher/dswexec.c)13
-rw-r--r--drivers/acpi/acpica/dswload.c (renamed from drivers/acpi/dispatcher/dswload.c)13
-rw-r--r--drivers/acpi/acpica/dswscope.c (renamed from drivers/acpi/dispatcher/dswscope.c)3
-rw-r--r--drivers/acpi/acpica/dswstate.c (renamed from drivers/acpi/dispatcher/dswstate.c)7
-rw-r--r--drivers/acpi/acpica/evevent.c (renamed from drivers/acpi/events/evevent.c)17
-rw-r--r--drivers/acpi/acpica/evgpe.c (renamed from drivers/acpi/events/evgpe.c)53
-rw-r--r--drivers/acpi/acpica/evgpeblk.c (renamed from drivers/acpi/events/evgpeblk.c)82
-rw-r--r--drivers/acpi/acpica/evmisc.c (renamed from drivers/acpi/events/evmisc.c)62
-rw-r--r--drivers/acpi/acpica/evregion.c (renamed from drivers/acpi/events/evregion.c)140
-rw-r--r--drivers/acpi/acpica/evrgnini.c (renamed from drivers/acpi/events/evrgnini.c)46
-rw-r--r--drivers/acpi/acpica/evsci.c (renamed from drivers/acpi/events/evsci.c)13
-rw-r--r--drivers/acpi/acpica/evxface.c (renamed from drivers/acpi/events/evxface.c)9
-rw-r--r--drivers/acpi/acpica/evxfevnt.c (renamed from drivers/acpi/events/evxfevnt.c)170
-rw-r--r--drivers/acpi/acpica/evxfregn.c (renamed from drivers/acpi/events/evxfregn.c)5
-rw-r--r--drivers/acpi/acpica/exconfig.c (renamed from drivers/acpi/executer/exconfig.c)9
-rw-r--r--drivers/acpi/acpica/exconvrt.c (renamed from drivers/acpi/executer/exconvrt.c)5
-rw-r--r--drivers/acpi/acpica/excreate.c (renamed from drivers/acpi/executer/excreate.c)7
-rw-r--r--drivers/acpi/acpica/exdump.c (renamed from drivers/acpi/executer/exdump.c)7
-rw-r--r--drivers/acpi/acpica/exfield.c (renamed from drivers/acpi/executer/exfield.c)5
-rw-r--r--drivers/acpi/acpica/exfldio.c (renamed from drivers/acpi/executer/exfldio.c)20
-rw-r--r--drivers/acpi/acpica/exmisc.c (renamed from drivers/acpi/executer/exmisc.c)7
-rw-r--r--drivers/acpi/acpica/exmutex.c (renamed from drivers/acpi/executer/exmutex.c)5
-rw-r--r--drivers/acpi/acpica/exnames.c (renamed from drivers/acpi/executer/exnames.c)5
-rw-r--r--drivers/acpi/acpica/exoparg1.c (renamed from drivers/acpi/executer/exoparg1.c)11
-rw-r--r--drivers/acpi/acpica/exoparg2.c (renamed from drivers/acpi/executer/exoparg2.c)9
-rw-r--r--drivers/acpi/acpica/exoparg3.c (renamed from drivers/acpi/executer/exoparg3.c)7
-rw-r--r--drivers/acpi/acpica/exoparg6.c (renamed from drivers/acpi/executer/exoparg6.c)7
-rw-r--r--drivers/acpi/acpica/exprep.c (renamed from drivers/acpi/executer/exprep.c)7
-rw-r--r--drivers/acpi/acpica/exregion.c (renamed from drivers/acpi/executer/exregion.c)3
-rw-r--r--drivers/acpi/acpica/exresnte.c (renamed from drivers/acpi/executer/exresnte.c)7
-rw-r--r--drivers/acpi/acpica/exresolv.c (renamed from drivers/acpi/executer/exresolv.c)9
-rw-r--r--drivers/acpi/acpica/exresop.c (renamed from drivers/acpi/executer/exresop.c)9
-rw-r--r--drivers/acpi/acpica/exstore.c (renamed from drivers/acpi/executer/exstore.c)9
-rw-r--r--drivers/acpi/acpica/exstoren.c (renamed from drivers/acpi/executer/exstoren.c)5
-rw-r--r--drivers/acpi/acpica/exstorob.c (renamed from drivers/acpi/executer/exstorob.c)3
-rw-r--r--drivers/acpi/acpica/exsystem.c (renamed from drivers/acpi/executer/exsystem.c)3
-rw-r--r--drivers/acpi/acpica/exutils.c (renamed from drivers/acpi/executer/exutils.c)5
-rw-r--r--drivers/acpi/acpica/hwacpi.c (renamed from drivers/acpi/hardware/hwacpi.c)1
-rw-r--r--drivers/acpi/acpica/hwgpe.c (renamed from drivers/acpi/hardware/hwgpe.c)78
-rw-r--r--drivers/acpi/acpica/hwregs.c353
-rw-r--r--drivers/acpi/acpica/hwsleep.c (renamed from drivers/acpi/hardware/hwsleep.c)76
-rw-r--r--drivers/acpi/acpica/hwtimer.c (renamed from drivers/acpi/hardware/hwtimer.c)1
-rw-r--r--drivers/acpi/acpica/hwxface.c (renamed from drivers/acpi/hardware/hwregs.c)744
-rw-r--r--drivers/acpi/acpica/nsaccess.c (renamed from drivers/acpi/namespace/nsaccess.c)18
-rw-r--r--drivers/acpi/acpica/nsalloc.c (renamed from drivers/acpi/namespace/nsalloc.c)3
-rw-r--r--drivers/acpi/acpica/nsdump.c (renamed from drivers/acpi/namespace/nsdump.c)3
-rw-r--r--drivers/acpi/acpica/nsdumpdv.c (renamed from drivers/acpi/namespace/nsdumpdv.c)3
-rw-r--r--drivers/acpi/acpica/nseval.c (renamed from drivers/acpi/namespace/nseval.c)77
-rw-r--r--drivers/acpi/acpica/nsinit.c (renamed from drivers/acpi/namespace/nsinit.c)7
-rw-r--r--drivers/acpi/acpica/nsload.c (renamed from drivers/acpi/namespace/nsload.c)7
-rw-r--r--drivers/acpi/acpica/nsnames.c (renamed from drivers/acpi/namespace/nsnames.c)5
-rw-r--r--drivers/acpi/acpica/nsobject.c (renamed from drivers/acpi/namespace/nsobject.c)3
-rw-r--r--drivers/acpi/acpica/nsparse.c (renamed from drivers/acpi/namespace/nsparse.c)9
-rw-r--r--drivers/acpi/acpica/nspredef.c (renamed from drivers/acpi/namespace/nspredef.c)261
-rw-r--r--drivers/acpi/acpica/nssearch.c (renamed from drivers/acpi/namespace/nssearch.c)3
-rw-r--r--drivers/acpi/acpica/nsutils.c (renamed from drivers/acpi/namespace/nsutils.c)15
-rw-r--r--drivers/acpi/acpica/nswalk.c (renamed from drivers/acpi/namespace/nswalk.c)3
-rw-r--r--drivers/acpi/acpica/nsxfeval.c (renamed from drivers/acpi/namespace/nsxfeval.c)5
-rw-r--r--drivers/acpi/acpica/nsxfname.c (renamed from drivers/acpi/namespace/nsxfname.c)3
-rw-r--r--drivers/acpi/acpica/nsxfobj.c (renamed from drivers/acpi/namespace/nsxfobj.c)3
-rw-r--r--drivers/acpi/acpica/psargs.c (renamed from drivers/acpi/parser/psargs.c)9
-rw-r--r--drivers/acpi/acpica/psloop.c (renamed from drivers/acpi/parser/psloop.c)7
-rw-r--r--drivers/acpi/acpica/psopcode.c (renamed from drivers/acpi/parser/psopcode.c)7
-rw-r--r--drivers/acpi/acpica/psparse.c (renamed from drivers/acpi/parser/psparse.c)23
-rw-r--r--drivers/acpi/acpica/psscope.c (renamed from drivers/acpi/parser/psscope.c)3
-rw-r--r--drivers/acpi/acpica/pstree.c (renamed from drivers/acpi/parser/pstree.c)5
-rw-r--r--drivers/acpi/acpica/psutils.c (renamed from drivers/acpi/parser/psutils.c)5
-rw-r--r--drivers/acpi/acpica/pswalk.c (renamed from drivers/acpi/parser/pswalk.c)3
-rw-r--r--drivers/acpi/acpica/psxface.c (renamed from drivers/acpi/parser/psxface.c)40
-rw-r--r--drivers/acpi/acpica/rsaddr.c (renamed from drivers/acpi/resources/rsaddr.c)3
-rw-r--r--drivers/acpi/acpica/rscalc.c (renamed from drivers/acpi/resources/rscalc.c)5
-rw-r--r--drivers/acpi/acpica/rscreate.c (renamed from drivers/acpi/resources/rscreate.c)5
-rw-r--r--drivers/acpi/acpica/rsdump.c (renamed from drivers/acpi/resources/rsdump.c)3
-rw-r--r--drivers/acpi/acpica/rsinfo.c (renamed from drivers/acpi/resources/rsinfo.c)3
-rw-r--r--drivers/acpi/acpica/rsio.c (renamed from drivers/acpi/resources/rsio.c)3
-rw-r--r--drivers/acpi/acpica/rsirq.c (renamed from drivers/acpi/resources/rsirq.c)3
-rw-r--r--drivers/acpi/acpica/rslist.c (renamed from drivers/acpi/resources/rslist.c)3
-rw-r--r--drivers/acpi/acpica/rsmemory.c (renamed from drivers/acpi/resources/rsmemory.c)3
-rw-r--r--drivers/acpi/acpica/rsmisc.c (renamed from drivers/acpi/resources/rsmisc.c)3
-rw-r--r--drivers/acpi/acpica/rsutils.c (renamed from drivers/acpi/resources/rsutils.c)5
-rw-r--r--drivers/acpi/acpica/rsxface.c (renamed from drivers/acpi/resources/rsxface.c)5
-rw-r--r--drivers/acpi/acpica/tbfadt.c (renamed from drivers/acpi/tables/tbfadt.c)252
-rw-r--r--drivers/acpi/acpica/tbfind.c (renamed from drivers/acpi/tables/tbfind.c)3
-rw-r--r--drivers/acpi/acpica/tbinstal.c (renamed from drivers/acpi/tables/tbinstal.c)5
-rw-r--r--drivers/acpi/acpica/tbutils.c (renamed from drivers/acpi/tables/tbutils.c)30
-rw-r--r--drivers/acpi/acpica/tbxface.c (renamed from drivers/acpi/tables/tbxface.c)5
-rw-r--r--drivers/acpi/acpica/tbxfroot.c (renamed from drivers/acpi/tables/tbxfroot.c)3
-rw-r--r--drivers/acpi/acpica/utalloc.c (renamed from drivers/acpi/utilities/utalloc.c)3
-rw-r--r--drivers/acpi/acpica/utcopy.c (renamed from drivers/acpi/utilities/utcopy.c)3
-rw-r--r--drivers/acpi/acpica/utdebug.c (renamed from drivers/acpi/utilities/utdebug.c)95
-rw-r--r--drivers/acpi/acpica/utdelete.c (renamed from drivers/acpi/utilities/utdelete.c)7
-rw-r--r--drivers/acpi/acpica/uteval.c (renamed from drivers/acpi/utilities/uteval.c)11
-rw-r--r--drivers/acpi/acpica/utglobal.c (renamed from drivers/acpi/utilities/utglobal.c)12
-rw-r--r--drivers/acpi/acpica/utinit.c (renamed from drivers/acpi/utilities/utinit.c)7
-rw-r--r--drivers/acpi/acpica/utmath.c (renamed from drivers/acpi/utilities/utmath.c)1
-rw-r--r--drivers/acpi/acpica/utmisc.c (renamed from drivers/acpi/utilities/utmisc.c)23
-rw-r--r--drivers/acpi/acpica/utmutex.c (renamed from drivers/acpi/utilities/utmutex.c)1
-rw-r--r--drivers/acpi/acpica/utobject.c (renamed from drivers/acpi/utilities/utobject.c)3
-rw-r--r--drivers/acpi/acpica/utresrc.c (renamed from drivers/acpi/utilities/utresrc.c)3
-rw-r--r--drivers/acpi/acpica/utstate.c (renamed from drivers/acpi/utilities/utstate.c)1
-rw-r--r--drivers/acpi/acpica/utxface.c (renamed from drivers/acpi/utilities/utxface.c)18
-rw-r--r--drivers/acpi/battery.c5
-rw-r--r--drivers/acpi/cm_sbs.c3
-rw-r--r--drivers/acpi/debug.c1
-rw-r--r--drivers/acpi/dispatcher/Makefile9
-rw-r--r--drivers/acpi/ec.c57
-rw-r--r--drivers/acpi/events/Makefile9
-rw-r--r--drivers/acpi/executer/Makefile10
-rw-r--r--drivers/acpi/hardware/Makefile9
-rw-r--r--drivers/acpi/main.c (renamed from drivers/acpi/sleep/main.c)79
-rw-r--r--drivers/acpi/namespace/Makefile12
-rw-r--r--drivers/acpi/numa.c1
-rw-r--r--drivers/acpi/osl.c4
-rw-r--r--drivers/acpi/parser/Makefile8
-rw-r--r--drivers/acpi/pci_bind.c90
-rw-r--r--drivers/acpi/pci_irq.c472
-rw-r--r--drivers/acpi/pci_link.c6
-rw-r--r--drivers/acpi/power.c6
-rw-r--r--drivers/acpi/proc.c (renamed from drivers/acpi/sleep/proc.c)65
-rw-r--r--drivers/acpi/reboot.c2
-rw-r--r--drivers/acpi/resources/Makefile10
-rw-r--r--drivers/acpi/sbshc.c1
-rw-r--r--drivers/acpi/scan.c1
-rw-r--r--drivers/acpi/sleep.h (renamed from drivers/acpi/sleep/sleep.h)0
-rw-r--r--drivers/acpi/sleep/Makefile5
-rw-r--r--drivers/acpi/system.c63
-rw-r--r--drivers/acpi/tables/Makefile7
-rw-r--r--drivers/acpi/utilities/Makefile9
-rw-r--r--drivers/acpi/utilities/utcache.c314
-rw-r--r--drivers/acpi/video.c20
-rw-r--r--drivers/acpi/video_detect.c4
-rw-r--r--drivers/acpi/wakeup.c (renamed from drivers/acpi/sleep/wakeup.c)6
-rw-r--r--drivers/ata/libata-acpi.c6
-rw-r--r--drivers/ata/libata-core.c16
-rw-r--r--drivers/ata/libata-sff.c24
-rw-r--r--drivers/ata/pata_acpi.c6
-rw-r--r--drivers/char/tpm/tpm_bios.c2
-rw-r--r--drivers/cpuidle/governors/menu.c10
-rw-r--r--drivers/dca/dca-core.c2
-rw-r--r--drivers/dma/Kconfig2
-rw-r--r--drivers/dma/dmaengine.c778
-rw-r--r--drivers/dma/dmatest.c129
-rw-r--r--drivers/dma/dw_dmac.c119
-rw-r--r--drivers/dma/fsldma.c5
-rw-r--r--drivers/dma/ioat.c92
-rw-r--r--drivers/dma/ioat_dma.c18
-rw-r--r--drivers/dma/iop-adma.c30
-rw-r--r--drivers/dma/mv_xor.c11
-rw-r--r--drivers/ide/ide-acpi.c6
-rw-r--r--drivers/leds/Kconfig15
-rw-r--r--drivers/leds/Makefile2
-rw-r--r--drivers/leds/led-class.c24
-rw-r--r--drivers/leds/leds-alix2.c181
-rw-r--r--drivers/leds/leds-ams-delta.c33
-rw-r--r--drivers/leds/leds-clevo-mail.c21
-rw-r--r--drivers/leds/leds-fsg.c37
-rw-r--r--drivers/leds/leds-gpio.c36
-rw-r--r--drivers/leds/leds-hp-disk.c20
-rw-r--r--drivers/leds/leds-hp6xx.c22
-rw-r--r--drivers/leds/leds-net48xx.c21
-rw-r--r--drivers/leds/leds-pca9532.c77
-rw-r--r--drivers/leds/leds-s3c24xx.c25
-rw-r--r--drivers/leds/leds-wm8350.c311
-rw-r--r--drivers/leds/leds-wrap.c27
-rw-r--r--drivers/leds/ledtrig-timer.c5
-rw-r--r--drivers/mfd/wm8350-core.c3
-rw-r--r--drivers/misc/Kconfig284
-rw-r--r--drivers/misc/Makefile13
-rw-r--r--drivers/mmc/host/atmel-mci.c103
-rw-r--r--drivers/mtd/Kconfig10
-rw-r--r--drivers/mtd/Makefile2
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c12
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c18
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0020.c14
-rw-r--r--drivers/mtd/chips/fwh_lock.h4
-rw-r--r--drivers/mtd/devices/lart.c6
-rw-r--r--drivers/mtd/devices/m25p80.c41
-rw-r--r--drivers/mtd/devices/mtd_dataflash.c24
-rw-r--r--drivers/mtd/ftl.c100
-rw-r--r--drivers/mtd/inftlcore.c2
-rw-r--r--drivers/mtd/inftlmount.c4
-rw-r--r--drivers/mtd/lpddr/Kconfig22
-rw-r--r--drivers/mtd/lpddr/Makefile6
-rw-r--r--drivers/mtd/lpddr/lpddr_cmds.c796
-rw-r--r--drivers/mtd/lpddr/qinfo_probe.c255
-rw-r--r--drivers/mtd/maps/Kconfig21
-rw-r--r--drivers/mtd/maps/alchemy-flash.c2
-rw-r--r--drivers/mtd/maps/amd76xrom.c4
-rw-r--r--drivers/mtd/maps/cfi_flagadm.c2
-rw-r--r--drivers/mtd/maps/ck804xrom.c4
-rw-r--r--drivers/mtd/maps/dbox2-flash.c2
-rw-r--r--drivers/mtd/maps/edb7312.c2
-rw-r--r--drivers/mtd/maps/esb2rom.c4
-rw-r--r--drivers/mtd/maps/fortunet.c2
-rw-r--r--drivers/mtd/maps/h720x-flash.c2
-rw-r--r--drivers/mtd/maps/ichxrom.c4
-rw-r--r--drivers/mtd/maps/impa7.c2
-rw-r--r--drivers/mtd/maps/ipaq-flash.c2
-rw-r--r--drivers/mtd/maps/mbx860.c2
-rw-r--r--drivers/mtd/maps/nettel.c9
-rw-r--r--drivers/mtd/maps/octagon-5066.c2
-rw-r--r--drivers/mtd/maps/physmap.c41
-rw-r--r--drivers/mtd/maps/pmcmsp-flash.c2
-rw-r--r--drivers/mtd/maps/redwood.c2
-rw-r--r--drivers/mtd/maps/rpxlite.c2
-rw-r--r--drivers/mtd/maps/sbc8240.c2
-rw-r--r--drivers/mtd/maps/scb2_flash.c8
-rw-r--r--drivers/mtd/maps/sharpsl-flash.c2
-rw-r--r--drivers/mtd/maps/tqm8xxl.c2
-rw-r--r--drivers/mtd/maps/uclinux.c4
-rw-r--r--drivers/mtd/maps/vmax301.c2
-rw-r--r--drivers/mtd/maps/wr_sbc82xx_flash.c2
-rw-r--r--drivers/mtd/mtdchar.c6
-rw-r--r--drivers/mtd/mtdconcat.c35
-rw-r--r--drivers/mtd/mtdcore.c16
-rw-r--r--drivers/mtd/mtdoops.c9
-rw-r--r--drivers/mtd/mtdpart.c34
-rw-r--r--drivers/mtd/nand/Kconfig7
-rw-r--r--drivers/mtd/nand/alauda.c6
-rw-r--r--drivers/mtd/nand/cafe_nand.c7
-rw-r--r--drivers/mtd/nand/fsl_elbc_nand.c4
-rw-r--r--drivers/mtd/nand/nand_base.c25
-rw-r--r--drivers/mtd/nand/nand_bbt.c31
-rw-r--r--drivers/mtd/nand/nandsim.c339
-rw-r--r--drivers/mtd/nand/ndfc.c269
-rw-r--r--drivers/mtd/nand/pxa3xx_nand.c6
-rw-r--r--drivers/mtd/nand/sharpsl.c247
-rw-r--r--drivers/mtd/nftlcore.c2
-rw-r--r--drivers/mtd/nftlmount.c4
-rw-r--r--drivers/mtd/onenand/onenand_base.c8
-rw-r--r--drivers/mtd/rfd_ftl.c29
-rw-r--r--drivers/mtd/ssfdc.c7
-rw-r--r--drivers/mtd/tests/Makefile7
-rw-r--r--drivers/mtd/tests/mtd_oobtest.c742
-rw-r--r--drivers/mtd/tests/mtd_pagetest.c632
-rw-r--r--drivers/mtd/tests/mtd_readtest.c253
-rw-r--r--drivers/mtd/tests/mtd_speedtest.c502
-rw-r--r--drivers/mtd/tests/mtd_stresstest.c330
-rw-r--r--drivers/mtd/tests/mtd_subpagetest.c525
-rw-r--r--drivers/mtd/tests/mtd_torturetest.c530
-rw-r--r--drivers/mtd/ubi/build.c2
-rw-r--r--drivers/mtd/ubi/gluebi.c17
-rw-r--r--drivers/oprofile/buffer_sync.c188
-rw-r--r--drivers/oprofile/cpu_buffer.c316
-rw-r--r--drivers/oprofile/cpu_buffer.h89
-rw-r--r--drivers/oprofile/event_buffer.c4
-rw-r--r--drivers/oprofile/oprof.c4
-rw-r--r--drivers/oprofile/oprof.h8
-rw-r--r--drivers/oprofile/oprofile_files.c27
-rw-r--r--drivers/parisc/asp.c3
-rw-r--r--drivers/parisc/ccio-dma.c4
-rw-r--r--drivers/parisc/dino.c4
-rw-r--r--drivers/parisc/hppb.c2
-rw-r--r--drivers/parisc/lasi.c5
-rw-r--r--drivers/parisc/lba_pci.c2
-rw-r--r--drivers/parisc/sba_iommu.c9
-rw-r--r--drivers/parisc/wax.c3
-rw-r--r--drivers/pci/hotplug/acpi_pcihp.c1
-rw-r--r--drivers/pci/hotplug/pciehp.h1
-rw-r--r--drivers/pci/pci-acpi.c2
-rw-r--r--drivers/platform/Kconfig5
-rw-r--r--drivers/platform/Makefile5
-rw-r--r--drivers/platform/x86/Kconfig375
-rw-r--r--drivers/platform/x86/Makefile19
-rw-r--r--drivers/platform/x86/acer-wmi.c (renamed from drivers/misc/acer-wmi.c)0
-rw-r--r--drivers/platform/x86/asus-laptop.c (renamed from drivers/misc/asus-laptop.c)0
-rw-r--r--drivers/platform/x86/asus_acpi.c (renamed from drivers/acpi/asus_acpi.c)0
-rw-r--r--drivers/platform/x86/compal-laptop.c (renamed from drivers/misc/compal-laptop.c)0
-rw-r--r--drivers/platform/x86/eeepc-laptop.c (renamed from drivers/misc/eeepc-laptop.c)0
-rw-r--r--drivers/platform/x86/fujitsu-laptop.c (renamed from drivers/misc/fujitsu-laptop.c)419
-rw-r--r--drivers/platform/x86/hp-wmi.c (renamed from drivers/misc/hp-wmi.c)0
-rw-r--r--drivers/platform/x86/intel_menlow.c (renamed from drivers/misc/intel_menlow.c)0
-rw-r--r--drivers/platform/x86/msi-laptop.c (renamed from drivers/misc/msi-laptop.c)0
-rw-r--r--drivers/platform/x86/panasonic-laptop.c (renamed from drivers/misc/panasonic-laptop.c)22
-rw-r--r--drivers/platform/x86/sony-laptop.c (renamed from drivers/misc/sony-laptop.c)15
-rw-r--r--drivers/platform/x86/tc1100-wmi.c (renamed from drivers/misc/tc1100-wmi.c)1
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c (renamed from drivers/misc/thinkpad_acpi.c)1
-rw-r--r--drivers/platform/x86/toshiba_acpi.c (renamed from drivers/acpi/toshiba_acpi.c)0
-rw-r--r--drivers/platform/x86/wmi.c (renamed from drivers/acpi/wmi.c)0
-rw-r--r--drivers/pnp/pnpacpi/core.c1
-rw-r--r--drivers/regulator/wm8350-regulator.c91
-rw-r--r--drivers/rtc/rtc-parisc.c3
-rw-r--r--drivers/video/backlight/Kconfig15
-rw-r--r--drivers/video/backlight/Makefile2
-rw-r--r--drivers/video/backlight/backlight.c73
-rw-r--r--drivers/video/backlight/corgi_bl.c169
-rw-r--r--drivers/video/backlight/cr_bllcd.c18
-rw-r--r--drivers/video/backlight/generic_bl.c147
-rw-r--r--drivers/video/backlight/hp680_bl.c20
-rw-r--r--drivers/video/backlight/mbp_nvidia_bl.c1
-rw-r--r--drivers/video/backlight/progear_bl.c20
-rw-r--r--drivers/video/backlight/tdo24m.c94
-rw-r--r--drivers/video/backlight/tosa_lcd.c27
-rw-r--r--drivers/video/backlight/vgg2432a4.c2
-rw-r--r--fs/Kconfig19
-rw-r--r--fs/Makefile1
-rw-r--r--fs/btrfs/Makefile25
-rw-r--r--fs/btrfs/acl.c351
-rw-r--r--fs/btrfs/async-thread.c419
-rw-r--r--fs/btrfs/async-thread.h101
-rw-r--r--fs/btrfs/btrfs_inode.h131
-rw-r--r--fs/btrfs/compat.h7
-rw-r--r--fs/btrfs/compression.c709
-rw-r--r--fs/btrfs/compression.h47
-rw-r--r--fs/btrfs/crc32c.h29
-rw-r--r--fs/btrfs/ctree.c3953
-rw-r--r--fs/btrfs/ctree.h2129
-rw-r--r--fs/btrfs/dir-item.c386
-rw-r--r--fs/btrfs/disk-io.c2343
-rw-r--r--fs/btrfs/disk-io.h102
-rw-r--r--fs/btrfs/export.c203
-rw-r--r--fs/btrfs/export.h19
-rw-r--r--fs/btrfs/extent-tree.c5986
-rw-r--r--fs/btrfs/extent_io.c3717
-rw-r--r--fs/btrfs/extent_io.h269
-rw-r--r--fs/btrfs/extent_map.c351
-rw-r--r--fs/btrfs/extent_map.h62
-rw-r--r--fs/btrfs/file-item.c831
-rw-r--r--fs/btrfs/file.c1288
-rw-r--r--fs/btrfs/free-space-cache.c495
-rw-r--r--fs/btrfs/hash.h27
-rw-r--r--fs/btrfs/inode-item.c206
-rw-r--r--fs/btrfs/inode-map.c144
-rw-r--r--fs/btrfs/inode.c5035
-rw-r--r--fs/btrfs/ioctl.c1132
-rw-r--r--fs/btrfs/ioctl.h67
-rw-r--r--fs/btrfs/locking.c88
-rw-r--r--fs/btrfs/locking.h27
-rw-r--r--fs/btrfs/ordered-data.c730
-rw-r--r--fs/btrfs/ordered-data.h158
-rw-r--r--fs/btrfs/orphan.c67
-rw-r--r--fs/btrfs/print-tree.c216
-rw-r--r--fs/btrfs/print-tree.h23
-rw-r--r--fs/btrfs/ref-cache.c230
-rw-r--r--fs/btrfs/ref-cache.h77
-rw-r--r--fs/btrfs/root-tree.c366
-rw-r--r--fs/btrfs/struct-funcs.c139
-rw-r--r--fs/btrfs/super.c720
-rw-r--r--fs/btrfs/sysfs.c269
-rw-r--r--fs/btrfs/transaction.c1097
-rw-r--r--fs/btrfs/transaction.h106
-rw-r--r--fs/btrfs/tree-defrag.c147
-rw-r--r--fs/btrfs/tree-log.c2898
-rw-r--r--fs/btrfs/tree-log.h41
-rw-r--r--fs/btrfs/version.h4
-rw-r--r--fs/btrfs/version.sh43
-rw-r--r--fs/btrfs/volumes.c3218
-rw-r--r--fs/btrfs/volumes.h162
-rw-r--r--fs/btrfs/xattr.c322
-rw-r--r--fs/btrfs/xattr.h39
-rw-r--r--fs/btrfs/zlib.c632
-rw-r--r--fs/jffs2/compr_rubin.c120
-rw-r--r--fs/jffs2/erase.c5
-rw-r--r--fs/partitions/check.c1
-rw-r--r--include/acpi/acdisasm.h445
-rw-r--r--include/acpi/acexcep.h6
-rw-r--r--include/acpi/acoutput.h103
-rw-r--r--include/acpi/acpi.h31
-rw-r--r--include/acpi/acpiosxf.h13
-rw-r--r--include/acpi/acpixf.h100
-rw-r--r--include/acpi/acrestyp.h405
-rw-r--r--include/acpi/actbl.h25
-rw-r--r--include/acpi/actbl1.h2
-rw-r--r--include/acpi/actypes.h557
-rw-r--r--include/acpi/platform/acenv.h45
-rw-r--r--include/acpi/platform/aclinux.h4
-rw-r--r--include/linux/acpi.h17
-rw-r--r--include/linux/async_tx.h17
-rw-r--r--include/linux/atmel-mci.h6
-rw-r--r--include/linux/backlight.h16
-rw-r--r--include/linux/dmaengine.h181
-rw-r--r--include/linux/dw_dmac.h31
-rw-r--r--include/linux/leds-pca9532.h2
-rw-r--r--include/linux/leds.h5
-rw-r--r--include/linux/mfd/wm8350/pmic.h36
-rw-r--r--include/linux/mtd/cfi.h1
-rw-r--r--include/linux/mtd/ftl.h38
-rw-r--r--include/linux/mtd/map.h1
-rw-r--r--include/linux/mtd/mtd.h75
-rw-r--r--include/linux/mtd/nand.h7
-rw-r--r--include/linux/mtd/partitions.h6
-rw-r--r--include/linux/mtd/pfow.h159
-rw-r--r--include/linux/mtd/physmap.h1
-rw-r--r--include/linux/mtd/qinfo.h91
-rw-r--r--include/linux/mtd/sharpsl.h20
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/oprofile.h18
-rw-r--r--include/linux/pci_hotplug.h1
-rw-r--r--include/linux/spi/tdo24m.h13
-rw-r--r--include/linux/suspend.h13
-rw-r--r--include/net/netdma.h11
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/power/disk.c6
-rw-r--r--kernel/power/snapshot.c370
-rw-r--r--kernel/power/swsusp.c122
-rw-r--r--kernel/trace/ring_buffer.c8
-rw-r--r--net/core/dev.c149
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/tcp_ipv6.c2
486 files changed, 56575 insertions, 7414 deletions
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index c1e9545c59bd..9f59fcbf5d82 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -13,9 +13,9 @@
133.6 Constraints 133.6 Constraints
143.7 Example 143.7 Example
15 15
164 DRIVER DEVELOPER NOTES 164 DMAENGINE DRIVER DEVELOPER NOTES
174.1 Conformance points 174.1 Conformance points
184.2 "My application needs finer control of hardware channels" 184.2 "My application needs exclusive control of hardware channels"
19 19
205 SOURCE 205 SOURCE
21 21
@@ -150,6 +150,7 @@ ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more
150implementation examples. 150implementation examples.
151 151
1524 DRIVER DEVELOPMENT NOTES 1524 DRIVER DEVELOPMENT NOTES
153
1534.1 Conformance points: 1544.1 Conformance points:
154There are a few conformance points required in dmaengine drivers to 155There are a few conformance points required in dmaengine drivers to
155accommodate assumptions made by applications using the async_tx API: 156accommodate assumptions made by applications using the async_tx API:
@@ -158,58 +159,49 @@ accommodate assumptions made by applications using the async_tx API:
1583/ Use async_tx_run_dependencies() in the descriptor clean up path to 1593/ Use async_tx_run_dependencies() in the descriptor clean up path to
159 handle submission of dependent operations 160 handle submission of dependent operations
160 161
1614.2 "My application needs finer control of hardware channels" 1624.2 "My application needs exclusive control of hardware channels"
162This requirement seems to arise from cases where a DMA engine driver is 163Primarily this requirement arises from cases where a DMA engine driver
163trying to support device-to-memory DMA. The dmaengine and async_tx 164is being used to support device-to-memory operations. A channel that is
164implementations were designed for offloading memory-to-memory 165performing these operations cannot, for many platform specific reasons,
165operations; however, there are some capabilities of the dmaengine layer 166be shared. For these cases the dma_request_channel() interface is
166that can be used for platform-specific channel management. 167provided.
167Platform-specific constraints can be handled by registering the 168
168application as a 'dma_client' and implementing a 'dma_event_callback' to 169The interface is:
169apply a filter to the available channels in the system. Before showing 170struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
170how to implement a custom dma_event callback some background of 171 dma_filter_fn filter_fn,
171dmaengine's client support is required. 172 void *filter_param);
172 173
173The following routines in dmaengine support multiple clients requesting 174Where dma_filter_fn is defined as:
174use of a channel: 175typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
175- dma_async_client_register(struct dma_client *client) 176
176- dma_async_client_chan_request(struct dma_client *client) 177When the optional 'filter_fn' parameter is set to NULL
177 178dma_request_channel simply returns the first channel that satisfies the
178dma_async_client_register takes a pointer to an initialized dma_client 179capability mask. Otherwise, when the mask parameter is insufficient for
179structure. It expects that the 'event_callback' and 'cap_mask' fields 180specifying the necessary channel, the filter_fn routine can be used to
180are already initialized. 181disposition the available channels in the system. The filter_fn routine
181 182is called once for each free channel in the system. Upon seeing a
182dma_async_client_chan_request triggers dmaengine to notify the client of 183suitable channel filter_fn returns DMA_ACK which flags that channel to
183all channels that satisfy the capability mask. It is up to the client's 184be the return value from dma_request_channel. A channel allocated via
184event_callback routine to track how many channels the client needs and 185this interface is exclusive to the caller, until dma_release_channel()
185how many it is currently using. The dma_event_callback routine returns a 186is called.
186dma_state_client code to let dmaengine know the status of the 187
187allocation. 188The DMA_PRIVATE capability flag is used to tag dma devices that should
188 189not be used by the general-purpose allocator. It can be set at
189Below is the example of how to extend this functionality for 190initialization time if it is known that a channel will always be
190platform-specific filtering of the available channels beyond the 191private. Alternatively, it is set when dma_request_channel() finds an
191standard capability mask: 192unused "public" channel.
192 193
193static enum dma_state_client 194A couple caveats to note when implementing a driver and consumer:
194my_dma_client_callback(struct dma_client *client, 1951/ Once a channel has been privately allocated it will no longer be
195 struct dma_chan *chan, enum dma_state state) 196 considered by the general-purpose allocator even after a call to
196{ 197 dma_release_channel().
197 struct dma_device *dma_dev; 1982/ Since capabilities are specified at the device level a dma_device
198 struct my_platform_specific_dma *plat_dma_dev; 199 with multiple channels will either have all channels public, or all
199 200 channels private.
200 dma_dev = chan->device;
201 plat_dma_dev = container_of(dma_dev,
202 struct my_platform_specific_dma,
203 dma_dev);
204
205 if (!plat_dma_dev->platform_specific_capability)
206 return DMA_DUP;
207
208 . . .
209}
210 201
2115 SOURCE 2025 SOURCE
212include/linux/dmaengine.h: core header file for DMA drivers and clients 203
204include/linux/dmaengine.h: core header file for DMA drivers and api users
213drivers/dma/dmaengine.c: offload engine channel management routines 205drivers/dma/dmaengine.c: offload engine channel management routines
214drivers/dma/: location for offload engine drivers 206drivers/dma/: location for offload engine drivers
215include/linux/async_tx.h: core header file for the async_tx api 207include/linux/async_tx.h: core header file for the async_tx api
diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt
new file mode 100644
index 000000000000..0c1c2f63c0a9
--- /dev/null
+++ b/Documentation/dmaengine.txt
@@ -0,0 +1 @@
See Documentation/crypto/async-tx-api.txt
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
new file mode 100644
index 000000000000..64087c34327f
--- /dev/null
+++ b/Documentation/filesystems/btrfs.txt
@@ -0,0 +1,91 @@
1
2 BTRFS
3 =====
4
5Btrfs is a new copy on write filesystem for Linux aimed at
6implementing advanced features while focusing on fault tolerance,
7repair and easy administration. Initially developed by Oracle, Btrfs
8is licensed under the GPL and open for contribution from anyone.
9
10Linux has a wealth of filesystems to choose from, but we are facing a
11number of challenges with scaling to the large storage subsystems that
12are becoming common in today's data centers. Filesystems need to scale
13in their ability to address and manage large storage, and also in
14their ability to detect, repair and tolerate errors in the data stored
15on disk. Btrfs is under heavy development, and is not suitable for
16any uses other than benchmarking and review. The Btrfs disk format is
17not yet finalized.
18
19The main Btrfs features include:
20
21 * Extent based file storage (2^64 max file size)
22 * Space efficient packing of small files
23 * Space efficient indexed directories
24 * Dynamic inode allocation
25 * Writable snapshots
26 * Subvolumes (separate internal filesystem roots)
27 * Object level mirroring and striping
28 * Checksums on data and metadata (multiple algorithms available)
29 * Compression
30 * Integrated multiple device support, with several raid algorithms
31 * Online filesystem check (not yet implemented)
32 * Very fast offline filesystem check
33 * Efficient incremental backup and FS mirroring (not yet implemented)
34 * Online filesystem defragmentation
35
36
37
38 MAILING LIST
39 ============
40
41There is a Btrfs mailing list hosted on vger.kernel.org. You can
42find details on how to subscribe here:
43
44http://vger.kernel.org/vger-lists.html#linux-btrfs
45
46Mailing list archives are available from gmane:
47
48http://dir.gmane.org/gmane.comp.file-systems.btrfs
49
50
51
52 IRC
53 ===
54
55Discussion of Btrfs also occurs on the #btrfs channel of the Freenode
56IRC network.
57
58
59
60 UTILITIES
61 =========
62
63Userspace tools for creating and manipulating Btrfs file systems are
64available from the git repository at the following location:
65
66 http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs-unstable.git
67 git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs-unstable.git
68
69These include the following tools:
70
71mkfs.btrfs: create a filesystem
72
73btrfsctl: control program to create snapshots and subvolumes:
74
75 mount /dev/sda2 /mnt
76 btrfsctl -s new_subvol_name /mnt
77 btrfsctl -s snapshot_of_default /mnt/default
78 btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
79 btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
80 ls /mnt
81 default snapshot_of_a_snapshot snapshot_of_new_subvol
82 new_subvol_name snapshot_of_default
83
84 Snapshots and subvolumes cannot be deleted right now, but you can
85 rm -rf all the files and directories inside them.
86
87btrfsck: do a limited check of the FS extent trees.
88
89btrfs-debug-tree: print all of the FS metadata in text form. Example:
90
91 btrfs-debug-tree /dev/sda2 >& big_output_file
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ed0a72442cf3..8511d3532c27 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -141,6 +141,7 @@ and is between 256 and 4096 characters. It is defined in the file
141 ht -- run only enough ACPI to enable Hyper Threading 141 ht -- run only enough ACPI to enable Hyper Threading
142 strict -- Be less tolerant of platforms that are not 142 strict -- Be less tolerant of platforms that are not
143 strictly ACPI specification compliant. 143 strictly ACPI specification compliant.
144 rsdt -- prefer RSDT over (default) XSDT
144 145
145 See also Documentation/power/pm.txt, pci=noacpi 146 See also Documentation/power/pm.txt, pci=noacpi
146 147
@@ -151,16 +152,20 @@ and is between 256 and 4096 characters. It is defined in the file
151 default: 0 152 default: 0
152 153
153 acpi_sleep= [HW,ACPI] Sleep options 154 acpi_sleep= [HW,ACPI] Sleep options
154 Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering } 155 Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
155 See Documentation/power/video.txt for s3_bios and s3_mode. 156 old_ordering, s4_nonvs }
157 See Documentation/power/video.txt for information on
158 s3_bios and s3_mode.
156 s3_beep is for debugging; it makes the PC's speaker beep 159 s3_beep is for debugging; it makes the PC's speaker beep
157 as soon as the kernel's real-mode entry point is called. 160 as soon as the kernel's real-mode entry point is called.
158 s4_nohwsig prevents ACPI hardware signature from being 161 s4_nohwsig prevents ACPI hardware signature from being
159 used during resume from hibernation. 162 used during resume from hibernation.
160 old_ordering causes the ACPI 1.0 ordering of the _PTS 163 old_ordering causes the ACPI 1.0 ordering of the _PTS
161 control method, wrt putting devices into low power 164 control method, with respect to putting devices into
162 states, to be enforced (the ACPI 2.0 ordering of _PTS is 165 low power states, to be enforced (the ACPI 2.0 ordering
163 used by default). 166 of _PTS is used by default).
167 s4_nonvs prevents the kernel from saving/restoring the
168 ACPI NVS memory during hibernation.
164 169
165 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode 170 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
166 Format: { level | edge | high | low } 171 Format: { level | edge | high | low }
@@ -195,7 +200,7 @@ and is between 256 and 4096 characters. It is defined in the file
195 acpi_skip_timer_override [HW,ACPI] 200 acpi_skip_timer_override [HW,ACPI]
196 Recognize and ignore IRQ0/pin2 Interrupt Override. 201 Recognize and ignore IRQ0/pin2 Interrupt Override.
197 For broken nForce2 BIOS resulting in XT-PIC timer. 202 For broken nForce2 BIOS resulting in XT-PIC timer.
198 acpi_use_timer_override [HW,ACPI} 203 acpi_use_timer_override [HW,ACPI]
199 Use timer override. For some broken Nvidia NF5 boards 204 Use timer override. For some broken Nvidia NF5 boards
200 that require a timer override, but don't have 205 that require a timer override, but don't have
201 HPET 206 HPET
@@ -878,17 +883,19 @@ and is between 256 and 4096 characters. It is defined in the file
878 See Documentation/ide/ide.txt. 883 See Documentation/ide/ide.txt.
879 884
880 idle= [X86] 885 idle= [X86]
881 Format: idle=poll or idle=mwait, idle=halt, idle=nomwait 886 Format: idle=poll, idle=mwait, idle=halt, idle=nomwait
882 Poll forces a polling idle loop that can slightly improves the performance 887 Poll forces a polling idle loop that can slightly
883 of waking up a idle CPU, but will use a lot of power and make the system 888 improve the performance of waking up a idle CPU, but
884 run hot. Not recommended. 889 will use a lot of power and make the system run hot.
885 idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose 890 Not recommended.
886 to not use it because it doesn't save as much power as a normal idle 891 idle=mwait: On systems which support MONITOR/MWAIT but
887 loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same 892 the kernel chose to not use it because it doesn't save
888 as idle=poll. 893 as much power as a normal idle loop, use the
889 idle=halt. Halt is forced to be used for CPU idle. 894 MONITOR/MWAIT idle loop anyways. Performance should be
895 the same as idle=poll.
896 idle=halt: Halt is forced to be used for CPU idle.
890 In such case C2/C3 won't be used again. 897 In such case C2/C3 won't be used again.
891 idle=nomwait. Disable mwait for CPU C-states 898 idle=nomwait: Disable mwait for CPU C-states
892 899
893 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem 900 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
894 Claim all unknown PCI IDE storage controllers. 901 Claim all unknown PCI IDE storage controllers.
@@ -1074,8 +1081,8 @@ and is between 256 and 4096 characters. It is defined in the file
1074 lapic [X86-32,APIC] Enable the local APIC even if BIOS 1081 lapic [X86-32,APIC] Enable the local APIC even if BIOS
1075 disabled it. 1082 disabled it.
1076 1083
1077 lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer in 1084 lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer
1078 C2 power state. 1085 in C2 power state.
1079 1086
1080 libata.dma= [LIBATA] DMA control 1087 libata.dma= [LIBATA] DMA control
1081 libata.dma=0 Disable all PATA and SATA DMA 1088 libata.dma=0 Disable all PATA and SATA DMA
@@ -2303,7 +2310,8 @@ and is between 256 and 4096 characters. It is defined in the file
2303 2310
2304 thermal.psv= [HW,ACPI] 2311 thermal.psv= [HW,ACPI]
2305 -1: disable all passive trip points 2312 -1: disable all passive trip points
2306 <degrees C>: override all passive trip points to this value 2313 <degrees C>: override all passive trip points to this
2314 value
2307 2315
2308 thermal.tzp= [HW,ACPI] 2316 thermal.tzp= [HW,ACPI]
2309 Specify global default ACPI thermal zone polling rate 2317 Specify global default ACPI thermal zone polling rate
diff --git a/Documentation/powerpc/dts-bindings/4xx/ndfc.txt b/Documentation/powerpc/dts-bindings/4xx/ndfc.txt
new file mode 100644
index 000000000000..869f0b5f16e8
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/4xx/ndfc.txt
@@ -0,0 +1,39 @@
1AMCC NDFC (NanD Flash Controller)
2
3Required properties:
4- compatible : "ibm,ndfc".
5- reg : should specify chip select and size used for the chip (0x2000).
6
7Optional properties:
8- ccr : NDFC config and control register value (default 0).
9- bank-settings : NDFC bank configuration register value (default 0).
10
11Notes:
12- partition(s) - follows the OF MTD standard for partitions
13
14Example:
15
16ndfc@1,0 {
17 compatible = "ibm,ndfc";
18 reg = <0x00000001 0x00000000 0x00002000>;
19 ccr = <0x00001000>;
20 bank-settings = <0x80002222>;
21 #address-cells = <1>;
22 #size-cells = <1>;
23
24 nand {
25 #address-cells = <1>;
26 #size-cells = <1>;
27
28 partition@0 {
29 label = "kernel";
30 reg = <0x00000000 0x00200000>;
31 };
32 partition@200000 {
33 label = "root";
34 reg = <0x00200000 0x03E00000>;
35 };
36 };
37};
38
39
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index c5e28a46b292..a8d91b6c136b 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -27,6 +27,7 @@
27#include <linux/spi/spi.h> 27#include <linux/spi/spi.h>
28#include <linux/spi/ads7846.h> 28#include <linux/spi/ads7846.h>
29#include <linux/spi/corgi_lcd.h> 29#include <linux/spi/corgi_lcd.h>
30#include <linux/mtd/sharpsl.h>
30#include <video/w100fb.h> 31#include <video/w100fb.h>
31 32
32#include <asm/setup.h> 33#include <asm/setup.h>
@@ -542,6 +543,55 @@ err_free_1:
542static inline void corgi_init_spi(void) {} 543static inline void corgi_init_spi(void) {}
543#endif 544#endif
544 545
546static struct mtd_partition sharpsl_nand_partitions[] = {
547 {
548 .name = "System Area",
549 .offset = 0,
550 .size = 7 * 1024 * 1024,
551 },
552 {
553 .name = "Root Filesystem",
554 .offset = 7 * 1024 * 1024,
555 .size = 25 * 1024 * 1024,
556 },
557 {
558 .name = "Home Filesystem",
559 .offset = MTDPART_OFS_APPEND,
560 .size = MTDPART_SIZ_FULL,
561 },
562};
563
564static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
565
566static struct nand_bbt_descr sharpsl_bbt = {
567 .options = 0,
568 .offs = 4,
569 .len = 2,
570 .pattern = scan_ff_pattern
571};
572
573static struct sharpsl_nand_platform_data sharpsl_nand_platform_data = {
574 .badblock_pattern = &sharpsl_bbt,
575 .partitions = sharpsl_nand_partitions,
576 .nr_partitions = ARRAY_SIZE(sharpsl_nand_partitions),
577};
578
579static struct resource sharpsl_nand_resources[] = {
580 {
581 .start = 0x0C000000,
582 .end = 0x0C000FFF,
583 .flags = IORESOURCE_MEM,
584 },
585};
586
587static struct platform_device sharpsl_nand_device = {
588 .name = "sharpsl-nand",
589 .id = -1,
590 .resource = sharpsl_nand_resources,
591 .num_resources = ARRAY_SIZE(sharpsl_nand_resources),
592 .dev.platform_data = &sharpsl_nand_platform_data,
593};
594
545static struct mtd_partition sharpsl_rom_parts[] = { 595static struct mtd_partition sharpsl_rom_parts[] = {
546 { 596 {
547 .name ="Boot PROM Filesystem", 597 .name ="Boot PROM Filesystem",
@@ -577,6 +627,7 @@ static struct platform_device *devices[] __initdata = {
577 &corgifb_device, 627 &corgifb_device,
578 &corgikbd_device, 628 &corgikbd_device,
579 &corgiled_device, 629 &corgiled_device,
630 &sharpsl_nand_device,
580 &sharpsl_rom_device, 631 &sharpsl_rom_device,
581}; 632};
582 633
@@ -617,6 +668,9 @@ static void __init corgi_init(void)
617 668
618 platform_scoop_config = &corgi_pcmcia_config; 669 platform_scoop_config = &corgi_pcmcia_config;
619 670
671 if (machine_is_husky())
672 sharpsl_nand_partitions[1].size = 53 * 1024 * 1024;
673
620 platform_add_devices(devices, ARRAY_SIZE(devices)); 674 platform_add_devices(devices, ARRAY_SIZE(devices));
621} 675}
622 676
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index ae88855bf974..f9093beba752 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -24,6 +24,7 @@
24#include <linux/gpio.h> 24#include <linux/gpio.h>
25#include <linux/spi/spi.h> 25#include <linux/spi/spi.h>
26#include <linux/spi/ads7846.h> 26#include <linux/spi/ads7846.h>
27#include <linux/mtd/sharpsl.h>
27 28
28#include <mach/hardware.h> 29#include <mach/hardware.h>
29#include <asm/mach-types.h> 30#include <asm/mach-types.h>
@@ -414,6 +415,55 @@ static struct pxafb_mach_info poodle_fb_info = {
414 .lcd_conn = LCD_COLOR_TFT_16BPP, 415 .lcd_conn = LCD_COLOR_TFT_16BPP,
415}; 416};
416 417
418static struct mtd_partition sharpsl_nand_partitions[] = {
419 {
420 .name = "System Area",
421 .offset = 0,
422 .size = 7 * 1024 * 1024,
423 },
424 {
425 .name = "Root Filesystem",
426 .offset = 7 * 1024 * 1024,
427 .size = 22 * 1024 * 1024,
428 },
429 {
430 .name = "Home Filesystem",
431 .offset = MTDPART_OFS_APPEND,
432 .size = MTDPART_SIZ_FULL,
433 },
434};
435
436static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
437
438static struct nand_bbt_descr sharpsl_bbt = {
439 .options = 0,
440 .offs = 4,
441 .len = 2,
442 .pattern = scan_ff_pattern
443};
444
445static struct sharpsl_nand_platform_data sharpsl_nand_platform_data = {
446 .badblock_pattern = &sharpsl_bbt,
447 .partitions = sharpsl_nand_partitions,
448 .nr_partitions = ARRAY_SIZE(sharpsl_nand_partitions),
449};
450
451static struct resource sharpsl_nand_resources[] = {
452 {
453 .start = 0x0C000000,
454 .end = 0x0C000FFF,
455 .flags = IORESOURCE_MEM,
456 },
457};
458
459static struct platform_device sharpsl_nand_device = {
460 .name = "sharpsl-nand",
461 .id = -1,
462 .resource = sharpsl_nand_resources,
463 .num_resources = ARRAY_SIZE(sharpsl_nand_resources),
464 .dev.platform_data = &sharpsl_nand_platform_data,
465};
466
417static struct mtd_partition sharpsl_rom_parts[] = { 467static struct mtd_partition sharpsl_rom_parts[] = {
418 { 468 {
419 .name ="Boot PROM Filesystem", 469 .name ="Boot PROM Filesystem",
@@ -447,6 +497,7 @@ static struct platform_device sharpsl_rom_device = {
447static struct platform_device *devices[] __initdata = { 497static struct platform_device *devices[] __initdata = {
448 &poodle_locomo_device, 498 &poodle_locomo_device,
449 &poodle_scoop_device, 499 &poodle_scoop_device,
500 &sharpsl_nand_device,
450 &sharpsl_rom_device, 501 &sharpsl_rom_device,
451}; 502};
452 503
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 7299d87a1cb3..6d447c9ce8ab 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -31,6 +31,7 @@
31#include <linux/spi/spi.h> 31#include <linux/spi/spi.h>
32#include <linux/spi/ads7846.h> 32#include <linux/spi/ads7846.h>
33#include <linux/spi/corgi_lcd.h> 33#include <linux/spi/corgi_lcd.h>
34#include <linux/mtd/sharpsl.h>
34 35
35#include <asm/setup.h> 36#include <asm/setup.h>
36#include <asm/memory.h> 37#include <asm/memory.h>
@@ -613,6 +614,54 @@ static struct pxafb_mach_info spitz_pxafb_info = {
613 .lcd_conn = LCD_COLOR_TFT_16BPP | LCD_ALTERNATE_MAPPING, 614 .lcd_conn = LCD_COLOR_TFT_16BPP | LCD_ALTERNATE_MAPPING,
614}; 615};
615 616
617static struct mtd_partition sharpsl_nand_partitions[] = {
618 {
619 .name = "System Area",
620 .offset = 0,
621 .size = 7 * 1024 * 1024,
622 },
623 {
624 .name = "Root Filesystem",
625 .offset = 7 * 1024 * 1024,
626 },
627 {
628 .name = "Home Filesystem",
629 .offset = MTDPART_OFS_APPEND,
630 .size = MTDPART_SIZ_FULL,
631 },
632};
633
634static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
635
636static struct nand_bbt_descr sharpsl_bbt = {
637 .options = 0,
638 .offs = 4,
639 .len = 2,
640 .pattern = scan_ff_pattern
641};
642
643static struct sharpsl_nand_platform_data sharpsl_nand_platform_data = {
644 .badblock_pattern = &sharpsl_bbt,
645 .partitions = sharpsl_nand_partitions,
646 .nr_partitions = ARRAY_SIZE(sharpsl_nand_partitions),
647};
648
649static struct resource sharpsl_nand_resources[] = {
650 {
651 .start = 0x0C000000,
652 .end = 0x0C000FFF,
653 .flags = IORESOURCE_MEM,
654 },
655};
656
657static struct platform_device sharpsl_nand_device = {
658 .name = "sharpsl-nand",
659 .id = -1,
660 .resource = sharpsl_nand_resources,
661 .num_resources = ARRAY_SIZE(sharpsl_nand_resources),
662 .dev.platform_data = &sharpsl_nand_platform_data,
663};
664
616 665
617static struct mtd_partition sharpsl_rom_parts[] = { 666static struct mtd_partition sharpsl_rom_parts[] = {
618 { 667 {
@@ -648,6 +697,7 @@ static struct platform_device *devices[] __initdata = {
648 &spitzscoop_device, 697 &spitzscoop_device,
649 &spitzkbd_device, 698 &spitzkbd_device,
650 &spitzled_device, 699 &spitzled_device,
700 &sharpsl_nand_device,
651 &sharpsl_rom_device, 701 &sharpsl_rom_device,
652}; 702};
653 703
@@ -671,6 +721,14 @@ static void __init common_init(void)
671 pm_power_off = spitz_poweroff; 721 pm_power_off = spitz_poweroff;
672 arm_pm_restart = spitz_restart; 722 arm_pm_restart = spitz_restart;
673 723
724 if (machine_is_spitz()) {
725 sharpsl_nand_partitions[1].size = 5 * 1024 * 1024;
726 } else if (machine_is_akita()) {
727 sharpsl_nand_partitions[1].size = 58 * 1024 * 1024;
728 } else if (machine_is_borzoi()) {
729 sharpsl_nand_partitions[1].size = 32 * 1024 * 1024;
730 }
731
674 PMCR = 0x00; 732 PMCR = 0x00;
675 733
676 /* Stop 3.6MHz and drive HIGH to PCMCIA and CS */ 734 /* Stop 3.6MHz and drive HIGH to PCMCIA and CS */
@@ -715,10 +773,29 @@ static struct i2c_board_info akita_i2c_board_info[] = {
715 }, 773 },
716}; 774};
717 775
776static struct nand_bbt_descr sharpsl_akita_bbt = {
777 .options = 0,
778 .offs = 4,
779 .len = 1,
780 .pattern = scan_ff_pattern
781};
782
783static struct nand_ecclayout akita_oobinfo = {
784 .eccbytes = 24,
785 .eccpos = {
786 0x5, 0x1, 0x2, 0x3, 0x6, 0x7, 0x15, 0x11,
787 0x12, 0x13, 0x16, 0x17, 0x25, 0x21, 0x22, 0x23,
788 0x26, 0x27, 0x35, 0x31, 0x32, 0x33, 0x36, 0x37},
789 .oobfree = {{0x08, 0x09}}
790};
791
718static void __init akita_init(void) 792static void __init akita_init(void)
719{ 793{
720 spitz_ficp_platform_data.transceiver_mode = akita_irda_transceiver_mode; 794 spitz_ficp_platform_data.transceiver_mode = akita_irda_transceiver_mode;
721 795
796 sharpsl_nand_platform_data.badblock_pattern = &sharpsl_akita_bbt;
797 sharpsl_nand_platform_data.ecc_layout = &akita_oobinfo;
798
722 /* We just pretend the second element of the array doesn't exist */ 799 /* We just pretend the second element of the array doesn't exist */
723 spitz_pcmcia_config.num_devs = 1; 800 spitz_pcmcia_config.num_devs = 1;
724 platform_scoop_config = &spitz_pcmcia_config; 801 platform_scoop_config = &spitz_pcmcia_config;
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index ea7bc1e8562b..3fbfd1e32a9e 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1305,7 +1305,7 @@ struct platform_device *__init
1305at32_add_device_mci(unsigned int id, struct mci_platform_data *data) 1305at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
1306{ 1306{
1307 struct platform_device *pdev; 1307 struct platform_device *pdev;
1308 struct dw_dma_slave *dws; 1308 struct dw_dma_slave *dws = &data->dma_slave;
1309 u32 pioa_mask; 1309 u32 pioa_mask;
1310 u32 piob_mask; 1310 u32 piob_mask;
1311 1311
@@ -1324,22 +1324,13 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
1324 ARRAY_SIZE(atmel_mci0_resource))) 1324 ARRAY_SIZE(atmel_mci0_resource)))
1325 goto fail; 1325 goto fail;
1326 1326
1327 if (data->dma_slave) 1327 dws->dma_dev = &dw_dmac0_device.dev;
1328 dws = kmemdup(to_dw_dma_slave(data->dma_slave), 1328 dws->reg_width = DW_DMA_SLAVE_WIDTH_32BIT;
1329 sizeof(struct dw_dma_slave), GFP_KERNEL);
1330 else
1331 dws = kzalloc(sizeof(struct dw_dma_slave), GFP_KERNEL);
1332
1333 dws->slave.dev = &pdev->dev;
1334 dws->slave.dma_dev = &dw_dmac0_device.dev;
1335 dws->slave.reg_width = DMA_SLAVE_WIDTH_32BIT;
1336 dws->cfg_hi = (DWC_CFGH_SRC_PER(0) 1329 dws->cfg_hi = (DWC_CFGH_SRC_PER(0)
1337 | DWC_CFGH_DST_PER(1)); 1330 | DWC_CFGH_DST_PER(1));
1338 dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL 1331 dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL
1339 | DWC_CFGL_HS_SRC_POL); 1332 | DWC_CFGL_HS_SRC_POL);
1340 1333
1341 data->dma_slave = &dws->slave;
1342
1343 if (platform_device_add_data(pdev, data, 1334 if (platform_device_add_data(pdev, data,
1344 sizeof(struct mci_platform_data))) 1335 sizeof(struct mci_platform_data)))
1345 goto fail; 1336 goto fail;
diff --git a/arch/ia64/include/asm/acpi-ext.h b/arch/ia64/include/asm/acpi-ext.h
index 734d137dda6e..7f8362b379eb 100644
--- a/arch/ia64/include/asm/acpi-ext.h
+++ b/arch/ia64/include/asm/acpi-ext.h
@@ -14,7 +14,6 @@
14#define _ASM_IA64_ACPI_EXT_H 14#define _ASM_IA64_ACPI_EXT_H
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17#include <acpi/actypes.h>
18 17
19extern acpi_status hp_acpi_csr_space (acpi_handle, u64 *base, u64 *length); 18extern acpi_status hp_acpi_csr_space (acpi_handle, u64 *base, u64 *length);
20 19
diff --git a/arch/ia64/include/asm/sn/acpi.h b/arch/ia64/include/asm/sn/acpi.h
index 9ce2801cbd57..fd480db25565 100644
--- a/arch/ia64/include/asm/sn/acpi.h
+++ b/arch/ia64/include/asm/sn/acpi.h
@@ -9,8 +9,6 @@
9#ifndef _ASM_IA64_SN_ACPI_H 9#ifndef _ASM_IA64_SN_ACPI_H
10#define _ASM_IA64_SN_ACPI_H 10#define _ASM_IA64_SN_ACPI_H
11 11
12#include "acpi/acglobal.h"
13
14extern int sn_acpi_rev; 12extern int sn_acpi_rev;
15#define SN_ACPI_BASE_SUPPORT() (sn_acpi_rev >= 0x20101) 13#define SN_ACPI_BASE_SUPPORT() (sn_acpi_rev >= 0x20101)
16 14
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 0553648b7595..d541671caf4a 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -65,6 +65,7 @@ EXPORT_SYMBOL(pm_idle);
65void (*pm_power_off) (void); 65void (*pm_power_off) (void);
66EXPORT_SYMBOL(pm_power_off); 66EXPORT_SYMBOL(pm_power_off);
67 67
68u32 acpi_rsdt_forced;
68unsigned int acpi_cpei_override; 69unsigned int acpi_cpei_override;
69unsigned int acpi_cpei_phys_cpuid; 70unsigned int acpi_cpei_phys_cpuid;
70 71
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
index bc610a6c7851..c5a214026a77 100644
--- a/arch/ia64/sn/kernel/io_acpi_init.c
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -13,7 +13,6 @@
13#include <asm/sn/sn_sal.h> 13#include <asm/sn/sn_sal.h>
14#include "xtalk/hubdev.h" 14#include "xtalk/hubdev.h"
15#include <linux/acpi.h> 15#include <linux/acpi.h>
16#include <acpi/acnamesp.h>
17 16
18 17
19/* 18/*
@@ -64,6 +63,7 @@ static acpi_status __init
64sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret) 63sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret)
65{ 64{
66 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 65 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
66 struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
67 u64 addr; 67 u64 addr;
68 struct hubdev_info *hubdev; 68 struct hubdev_info *hubdev;
69 struct hubdev_info *hubdev_ptr; 69 struct hubdev_info *hubdev_ptr;
@@ -77,11 +77,12 @@ sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret)
77 status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, 77 status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
78 &sn_uuid, &buffer); 78 &sn_uuid, &buffer);
79 if (ACPI_FAILURE(status)) { 79 if (ACPI_FAILURE(status)) {
80 acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
80 printk(KERN_ERR 81 printk(KERN_ERR
81 "sn_acpi_hubdev_init: acpi_get_vendor_resource() " 82 "sn_acpi_hubdev_init: acpi_get_vendor_resource() "
82 "(0x%x) failed for: ", status); 83 "(0x%x) failed for: %s\n", status,
83 acpi_ns_print_node_pathname(handle, NULL); 84 (char *)name_buffer.pointer);
84 printk("\n"); 85 kfree(name_buffer.pointer);
85 return AE_OK; /* Continue walking namespace */ 86 return AE_OK; /* Continue walking namespace */
86 } 87 }
87 88
@@ -89,11 +90,12 @@ sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret)
89 vendor = &resource->data.vendor_typed; 90 vendor = &resource->data.vendor_typed;
90 if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != 91 if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) !=
91 sizeof(struct hubdev_info *)) { 92 sizeof(struct hubdev_info *)) {
93 acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
92 printk(KERN_ERR 94 printk(KERN_ERR
93 "sn_acpi_hubdev_init: Invalid vendor data length: %d for: ", 95 "sn_acpi_hubdev_init: Invalid vendor data length: "
94 vendor->byte_length); 96 "%d for: %s\n",
95 acpi_ns_print_node_pathname(handle, NULL); 97 vendor->byte_length, (char *)name_buffer.pointer);
96 printk("\n"); 98 kfree(name_buffer.pointer);
97 goto exit; 99 goto exit;
98 } 100 }
99 101
@@ -120,6 +122,7 @@ sn_get_bussoft_ptr(struct pci_bus *bus)
120{ 122{
121 u64 addr; 123 u64 addr;
122 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 124 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
125 struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
123 acpi_handle handle; 126 acpi_handle handle;
124 struct pcibus_bussoft *prom_bussoft_ptr; 127 struct pcibus_bussoft *prom_bussoft_ptr;
125 struct acpi_resource *resource; 128 struct acpi_resource *resource;
@@ -131,11 +134,11 @@ sn_get_bussoft_ptr(struct pci_bus *bus)
131 status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, 134 status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
132 &sn_uuid, &buffer); 135 &sn_uuid, &buffer);
133 if (ACPI_FAILURE(status)) { 136 if (ACPI_FAILURE(status)) {
137 acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
134 printk(KERN_ERR "%s: " 138 printk(KERN_ERR "%s: "
135 "acpi_get_vendor_resource() failed (0x%x) for: ", 139 "acpi_get_vendor_resource() failed (0x%x) for: %s\n",
136 __func__, status); 140 __func__, status, (char *)name_buffer.pointer);
137 acpi_ns_print_node_pathname(handle, NULL); 141 kfree(name_buffer.pointer);
138 printk("\n");
139 return NULL; 142 return NULL;
140 } 143 }
141 resource = buffer.pointer; 144 resource = buffer.pointer;
@@ -168,6 +171,7 @@ sn_extract_device_info(acpi_handle handle, struct pcidev_info **pcidev_info,
168{ 171{
169 u64 addr; 172 u64 addr;
170 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 173 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
174 struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
171 struct sn_irq_info *irq_info, *irq_info_prom; 175 struct sn_irq_info *irq_info, *irq_info_prom;
172 struct pcidev_info *pcidev_ptr, *pcidev_prom_ptr; 176 struct pcidev_info *pcidev_ptr, *pcidev_prom_ptr;
173 struct acpi_resource *resource; 177 struct acpi_resource *resource;
@@ -182,11 +186,11 @@ sn_extract_device_info(acpi_handle handle, struct pcidev_info **pcidev_info,
182 status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, 186 status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
183 &sn_uuid, &buffer); 187 &sn_uuid, &buffer);
184 if (ACPI_FAILURE(status)) { 188 if (ACPI_FAILURE(status)) {
189 acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
185 printk(KERN_ERR 190 printk(KERN_ERR
186 "%s: acpi_get_vendor_resource() failed (0x%x) for: ", 191 "%s: acpi_get_vendor_resource() failed (0x%x) for: %s\n",
187 __func__, status); 192 __func__, status, (char *)name_buffer.pointer);
188 acpi_ns_print_node_pathname(handle, NULL); 193 kfree(name_buffer.pointer);
189 printk("\n");
190 return 1; 194 return 1;
191 } 195 }
192 196
@@ -194,11 +198,12 @@ sn_extract_device_info(acpi_handle handle, struct pcidev_info **pcidev_info,
194 vendor = &resource->data.vendor_typed; 198 vendor = &resource->data.vendor_typed;
195 if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != 199 if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) !=
196 sizeof(struct pci_devdev_info *)) { 200 sizeof(struct pci_devdev_info *)) {
201 acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
197 printk(KERN_ERR 202 printk(KERN_ERR
198 "%s: Invalid vendor data length: %d for: ", 203 "%s: Invalid vendor data length: %d for: %s\n",
199 __func__, vendor->byte_length); 204 __func__, vendor->byte_length,
200 acpi_ns_print_node_pathname(handle, NULL); 205 (char *)name_buffer.pointer);
201 printk("\n"); 206 kfree(name_buffer.pointer);
202 ret = 1; 207 ret = 1;
203 goto exit; 208 goto exit;
204 } 209 }
@@ -239,6 +244,9 @@ get_host_devfn(acpi_handle device_handle, acpi_handle rootbus_handle)
239 acpi_handle parent; 244 acpi_handle parent;
240 int slot; 245 int slot;
241 acpi_status status; 246 acpi_status status;
247 struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
248
249 acpi_get_name(device_handle, ACPI_FULL_PATHNAME, &name_buffer);
242 250
243 /* 251 /*
244 * Do an upward search to find the root bus device, and 252 * Do an upward search to find the root bus device, and
@@ -249,9 +257,8 @@ get_host_devfn(acpi_handle device_handle, acpi_handle rootbus_handle)
249 status = acpi_get_parent(child, &parent); 257 status = acpi_get_parent(child, &parent);
250 if (ACPI_FAILURE(status)) { 258 if (ACPI_FAILURE(status)) {
251 printk(KERN_ERR "%s: acpi_get_parent() failed " 259 printk(KERN_ERR "%s: acpi_get_parent() failed "
252 "(0x%x) for: ", __func__, status); 260 "(0x%x) for: %s\n", __func__, status,
253 acpi_ns_print_node_pathname(child, NULL); 261 (char *)name_buffer.pointer);
254 printk("\n");
255 panic("%s: Unable to find host devfn\n", __func__); 262 panic("%s: Unable to find host devfn\n", __func__);
256 } 263 }
257 if (parent == rootbus_handle) 264 if (parent == rootbus_handle)
@@ -259,22 +266,20 @@ get_host_devfn(acpi_handle device_handle, acpi_handle rootbus_handle)
259 child = parent; 266 child = parent;
260 } 267 }
261 if (!child) { 268 if (!child) {
262 printk(KERN_ERR "%s: Unable to find root bus for: ", 269 printk(KERN_ERR "%s: Unable to find root bus for: %s\n",
263 __func__); 270 __func__, (char *)name_buffer.pointer);
264 acpi_ns_print_node_pathname(device_handle, NULL);
265 printk("\n");
266 BUG(); 271 BUG();
267 } 272 }
268 273
269 status = acpi_evaluate_integer(child, METHOD_NAME__ADR, NULL, &adr); 274 status = acpi_evaluate_integer(child, METHOD_NAME__ADR, NULL, &adr);
270 if (ACPI_FAILURE(status)) { 275 if (ACPI_FAILURE(status)) {
271 printk(KERN_ERR "%s: Unable to get _ADR (0x%x) for: ", 276 printk(KERN_ERR "%s: Unable to get _ADR (0x%x) for: %s\n",
272 __func__, status); 277 __func__, status, (char *)name_buffer.pointer);
273 acpi_ns_print_node_pathname(child, NULL);
274 printk("\n");
275 panic("%s: Unable to find host devfn\n", __func__); 278 panic("%s: Unable to find host devfn\n", __func__);
276 } 279 }
277 280
281 kfree(name_buffer.pointer);
282
278 slot = (adr >> 16) & 0xffff; 283 slot = (adr >> 16) & 0xffff;
279 function = adr & 0xffff; 284 function = adr & 0xffff;
280 devfn = PCI_DEVFN(slot, function); 285 devfn = PCI_DEVFN(slot, function);
@@ -300,27 +305,28 @@ find_matching_device(acpi_handle handle, u32 lvl, void *context, void **rv)
300 int function; 305 int function;
301 int slot; 306 int slot;
302 struct sn_pcidev_match *info = context; 307 struct sn_pcidev_match *info = context;
308 struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
303 309
304 status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, 310 status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL,
305 &adr); 311 &adr);
306 if (ACPI_SUCCESS(status)) { 312 if (ACPI_SUCCESS(status)) {
307 status = acpi_get_parent(handle, &parent); 313 status = acpi_get_parent(handle, &parent);
308 if (ACPI_FAILURE(status)) { 314 if (ACPI_FAILURE(status)) {
315 acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
309 printk(KERN_ERR 316 printk(KERN_ERR
310 "%s: acpi_get_parent() failed (0x%x) for: ", 317 "%s: acpi_get_parent() failed (0x%x) for: %s\n",
311 __func__, status); 318 __func__, status, (char *)name_buffer.pointer);
312 acpi_ns_print_node_pathname(handle, NULL); 319 kfree(name_buffer.pointer);
313 printk("\n");
314 return AE_OK; 320 return AE_OK;
315 } 321 }
316 status = acpi_evaluate_integer(parent, METHOD_NAME__BBN, 322 status = acpi_evaluate_integer(parent, METHOD_NAME__BBN,
317 NULL, &bbn); 323 NULL, &bbn);
318 if (ACPI_FAILURE(status)) { 324 if (ACPI_FAILURE(status)) {
325 acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
319 printk(KERN_ERR 326 printk(KERN_ERR
320 "%s: Failed to find _BBN in parent of: ", 327 "%s: Failed to find _BBN in parent of: %s\n",
321 __func__); 328 __func__, (char *)name_buffer.pointer);
322 acpi_ns_print_node_pathname(handle, NULL); 329 kfree(name_buffer.pointer);
323 printk("\n");
324 return AE_OK; 330 return AE_OK;
325 } 331 }
326 332
@@ -350,24 +356,27 @@ sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info,
350 acpi_handle rootbus_handle; 356 acpi_handle rootbus_handle;
351 unsigned long long segment; 357 unsigned long long segment;
352 acpi_status status; 358 acpi_status status;
359 struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
353 360
354 rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle; 361 rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle;
355 status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL, 362 status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL,
356 &segment); 363 &segment);
357 if (ACPI_SUCCESS(status)) { 364 if (ACPI_SUCCESS(status)) {
358 if (segment != pci_domain_nr(dev)) { 365 if (segment != pci_domain_nr(dev)) {
366 acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME,
367 &name_buffer);
359 printk(KERN_ERR 368 printk(KERN_ERR
360 "%s: Segment number mismatch, 0x%llx vs 0x%x for: ", 369 "%s: Segment number mismatch, 0x%llx vs 0x%x for: %s\n",
361 __func__, segment, pci_domain_nr(dev)); 370 __func__, segment, pci_domain_nr(dev),
362 acpi_ns_print_node_pathname(rootbus_handle, NULL); 371 (char *)name_buffer.pointer);
363 printk("\n"); 372 kfree(name_buffer.pointer);
364 return 1; 373 return 1;
365 } 374 }
366 } else { 375 } else {
367 printk(KERN_ERR "%s: Unable to get __SEG from: ", 376 acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, &name_buffer);
368 __func__); 377 printk(KERN_ERR "%s: Unable to get __SEG from: %s\n",
369 acpi_ns_print_node_pathname(rootbus_handle, NULL); 378 __func__, (char *)name_buffer.pointer);
370 printk("\n"); 379 kfree(name_buffer.pointer);
371 return 1; 380 return 1;
372 } 381 }
373 382
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 8a924a5661dd..0d4ffa4da1da 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -26,7 +26,6 @@
26#include <linux/acpi.h> 26#include <linux/acpi.h>
27#include <asm/sn/sn2/sn_hwperf.h> 27#include <asm/sn/sn2/sn_hwperf.h>
28#include <asm/sn/acpi.h> 28#include <asm/sn/acpi.h>
29#include "acpi/acglobal.h"
30 29
31extern void sn_init_cpei_timer(void); 30extern void sn_init_cpei_timer(void);
32extern void register_sn_procfs(void); 31extern void register_sn_procfs(void);
@@ -473,7 +472,7 @@ sn_io_early_init(void)
473 { 472 {
474 struct acpi_table_header *header = NULL; 473 struct acpi_table_header *header = NULL;
475 474
476 acpi_get_table_by_index(ACPI_TABLE_INDEX_DSDT, &header); 475 acpi_get_table(ACPI_SIG_DSDT, 1, &header);
477 BUG_ON(header == NULL); 476 BUG_ON(header == NULL);
478 sn_acpi_rev = header->oem_revision; 477 sn_acpi_rev = header->oem_revision;
479 } 478 }
@@ -505,7 +504,7 @@ sn_io_early_init(void)
505 504
506 { 505 {
507 struct acpi_table_header *header; 506 struct acpi_table_header *header;
508 (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_DSDT, &header); 507 (void)acpi_get_table(ACPI_SIG_DSDT, 1, &header);
509 printk(KERN_INFO "ACPI DSDT OEM Rev 0x%x\n", 508 printk(KERN_INFO "ACPI DSDT OEM Rev 0x%x\n",
510 header->oem_revision); 509 header->oem_revision);
511 } 510 }
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 5ddad7bd60ac..0d428278356d 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -77,7 +77,7 @@ libs-y += arch/parisc/lib/ `$(CC) -print-libgcc-file-name`
77 77
78drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/ 78drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/
79 79
80PALO := $(shell if which palo; then : ; \ 80PALO := $(shell if (which palo 2>&1); then : ; \
81 elif [ -x /sbin/palo ]; then echo /sbin/palo; \ 81 elif [ -x /sbin/palo ]; then echo /sbin/palo; \
82 fi) 82 fi)
83 83
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index f88b252e419c..2121d99f8364 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -1,3 +1,4 @@
1include include/asm-generic/Kbuild.asm 1include include/asm-generic/Kbuild.asm
2 2
3unifdef-y += pdc.h 3unifdef-y += pdc.h
4unifdef-y += swab.h
diff --git a/arch/parisc/include/asm/byteorder.h b/arch/parisc/include/asm/byteorder.h
index db148313de5d..da66029c4cb2 100644
--- a/arch/parisc/include/asm/byteorder.h
+++ b/arch/parisc/include/asm/byteorder.h
@@ -1,82 +1,7 @@
1#ifndef _PARISC_BYTEORDER_H 1#ifndef _PARISC_BYTEORDER_H
2#define _PARISC_BYTEORDER_H 2#define _PARISC_BYTEORDER_H
3 3
4#include <asm/types.h> 4#include <asm/swab.h>
5#include <linux/compiler.h>
6
7#ifdef __GNUC__
8
9static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 x)
10{
11 __asm__("dep %0, 15, 8, %0\n\t" /* deposit 00ab -> 0bab */
12 "shd %%r0, %0, 8, %0" /* shift 000000ab -> 00ba */
13 : "=r" (x)
14 : "0" (x));
15 return x;
16}
17
18static __inline__ __attribute_const__ __u32 ___arch__swab24(__u32 x)
19{
20 __asm__("shd %0, %0, 8, %0\n\t" /* shift xabcxabc -> cxab */
21 "dep %0, 15, 8, %0\n\t" /* deposit cxab -> cbab */
22 "shd %%r0, %0, 8, %0" /* shift 0000cbab -> 0cba */
23 : "=r" (x)
24 : "0" (x));
25 return x;
26}
27
28static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 x)
29{
30 unsigned int temp;
31 __asm__("shd %0, %0, 16, %1\n\t" /* shift abcdabcd -> cdab */
32 "dep %1, 15, 8, %1\n\t" /* deposit cdab -> cbab */
33 "shd %0, %1, 8, %0" /* shift abcdcbab -> dcba */
34 : "=r" (x), "=&r" (temp)
35 : "0" (x));
36 return x;
37}
38
39
40#if BITS_PER_LONG > 32
41/*
42** From "PA-RISC 2.0 Architecture", HP Professional Books.
43** See Appendix I page 8 , "Endian Byte Swapping".
44**
45** Pretty cool algorithm: (* == zero'd bits)
46** PERMH 01234567 -> 67452301 into %0
47** HSHL 67452301 -> 7*5*3*1* into %1
48** HSHR 67452301 -> *6*4*2*0 into %0
49** OR %0 | %1 -> 76543210 into %0 (all done!)
50*/
51static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 x) {
52 __u64 temp;
53 __asm__("permh,3210 %0, %0\n\t"
54 "hshl %0, 8, %1\n\t"
55 "hshr,u %0, 8, %0\n\t"
56 "or %1, %0, %0"
57 : "=r" (x), "=&r" (temp)
58 : "0" (x));
59 return x;
60}
61#define __arch__swab64(x) ___arch__swab64(x)
62#define __BYTEORDER_HAS_U64__
63#elif !defined(__STRICT_ANSI__)
64static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 x)
65{
66 __u32 t1 = ___arch__swab32((__u32) x);
67 __u32 t2 = ___arch__swab32((__u32) (x >> 32));
68 return (((__u64) t1 << 32) | t2);
69}
70#define __arch__swab64(x) ___arch__swab64(x)
71#define __BYTEORDER_HAS_U64__
72#endif
73
74#define __arch__swab16(x) ___arch__swab16(x)
75#define __arch__swab24(x) ___arch__swab24(x)
76#define __arch__swab32(x) ___arch__swab32(x)
77
78#endif /* __GNUC__ */
79
80#include <linux/byteorder/big_endian.h> 5#include <linux/byteorder/big_endian.h>
81 6
82#endif /* _PARISC_BYTEORDER_H */ 7#endif /* _PARISC_BYTEORDER_H */
diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h
index e9639ccc3fce..c84b2fcb18a9 100644
--- a/arch/parisc/include/asm/checksum.h
+++ b/arch/parisc/include/asm/checksum.h
@@ -182,7 +182,7 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
182#endif 182#endif
183 : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len) 183 : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len)
184 : "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto) 184 : "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto)
185 : "r19", "r20", "r21", "r22"); 185 : "r19", "r20", "r21", "r22", "memory");
186 return csum_fold(sum); 186 return csum_fold(sum);
187} 187}
188 188
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 55ddb1842107..d3031d1f9d03 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -4,12 +4,6 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/pgtable.h> 5#include <asm/pgtable.h>
6 6
7extern unsigned long parisc_vmerge_boundary;
8extern unsigned long parisc_vmerge_max_size;
9
10#define BIO_VMERGE_BOUNDARY parisc_vmerge_boundary
11#define BIO_VMERGE_MAX_SIZE parisc_vmerge_max_size
12
13#define virt_to_phys(a) ((unsigned long)__pa(a)) 7#define virt_to_phys(a) ((unsigned long)__pa(a))
14#define phys_to_virt(a) __va(a) 8#define phys_to_virt(a) __va(a)
15#define virt_to_bus virt_to_phys 9#define virt_to_bus virt_to_phys
@@ -182,9 +176,9 @@ static inline void __raw_writeq(unsigned long long b, volatile void __iomem *add
182 176
183/* readb can never be const, so use __fswab instead of le*_to_cpu */ 177/* readb can never be const, so use __fswab instead of le*_to_cpu */
184#define readb(addr) __raw_readb(addr) 178#define readb(addr) __raw_readb(addr)
185#define readw(addr) __fswab16(__raw_readw(addr)) 179#define readw(addr) le16_to_cpu(__raw_readw(addr))
186#define readl(addr) __fswab32(__raw_readl(addr)) 180#define readl(addr) le32_to_cpu(__raw_readl(addr))
187#define readq(addr) __fswab64(__raw_readq(addr)) 181#define readq(addr) le64_to_cpu(__raw_readq(addr))
188#define writeb(b, addr) __raw_writeb(b, addr) 182#define writeb(b, addr) __raw_writeb(b, addr)
189#define writew(b, addr) __raw_writew(cpu_to_le16(b), addr) 183#define writew(b, addr) __raw_writew(cpu_to_le16(b), addr)
190#define writel(b, addr) __raw_writel(cpu_to_le32(b), addr) 184#define writel(b, addr) __raw_writel(cpu_to_le32(b), addr)
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 85856c74ad1d..354b2aca990e 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -34,16 +34,21 @@ destroy_context(struct mm_struct *mm)
34 mm->context = 0; 34 mm->context = 0;
35} 35}
36 36
37static inline void load_context(mm_context_t context) 37static inline unsigned long __space_to_prot(mm_context_t context)
38{ 38{
39 mtsp(context, 3);
40#if SPACEID_SHIFT == 0 39#if SPACEID_SHIFT == 0
41 mtctl(context << 1,8); 40 return context << 1;
42#else 41#else
43 mtctl(context >> (SPACEID_SHIFT - 1),8); 42 return context >> (SPACEID_SHIFT - 1);
44#endif 43#endif
45} 44}
46 45
46static inline void load_context(mm_context_t context)
47{
48 mtsp(context, 3);
49 mtctl(__space_to_prot(context), 8);
50}
51
47static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) 52static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
48{ 53{
49 54
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 3c9d34844c83..9d64df8754ba 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -17,6 +17,7 @@
17#include <asm/ptrace.h> 17#include <asm/ptrace.h>
18#include <asm/types.h> 18#include <asm/types.h>
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/percpu.h>
20#endif /* __ASSEMBLY__ */ 21#endif /* __ASSEMBLY__ */
21 22
22#define KERNEL_STACK_SIZE (4*PAGE_SIZE) 23#define KERNEL_STACK_SIZE (4*PAGE_SIZE)
@@ -109,8 +110,7 @@ struct cpuinfo_parisc {
109}; 110};
110 111
111extern struct system_cpuinfo_parisc boot_cpu_data; 112extern struct system_cpuinfo_parisc boot_cpu_data;
112extern struct cpuinfo_parisc cpu_data[NR_CPUS]; 113DECLARE_PER_CPU(struct cpuinfo_parisc, cpu_data);
113#define current_cpu_data cpu_data[smp_processor_id()]
114 114
115#define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF) 115#define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF)
116 116
diff --git a/arch/parisc/include/asm/swab.h b/arch/parisc/include/asm/swab.h
new file mode 100644
index 000000000000..3ff16c5a3358
--- /dev/null
+++ b/arch/parisc/include/asm/swab.h
@@ -0,0 +1,66 @@
1#ifndef _PARISC_SWAB_H
2#define _PARISC_SWAB_H
3
4#include <asm/types.h>
5#include <linux/compiler.h>
6
7#define __SWAB_64_THRU_32__
8
9static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
10{
11 __asm__("dep %0, 15, 8, %0\n\t" /* deposit 00ab -> 0bab */
12 "shd %%r0, %0, 8, %0" /* shift 000000ab -> 00ba */
13 : "=r" (x)
14 : "0" (x));
15 return x;
16}
17#define __arch_swab16 __arch_swab16
18
19static inline __attribute_const__ __u32 __arch_swab24(__u32 x)
20{
21 __asm__("shd %0, %0, 8, %0\n\t" /* shift xabcxabc -> cxab */
22 "dep %0, 15, 8, %0\n\t" /* deposit cxab -> cbab */
23 "shd %%r0, %0, 8, %0" /* shift 0000cbab -> 0cba */
24 : "=r" (x)
25 : "0" (x));
26 return x;
27}
28
29static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
30{
31 unsigned int temp;
32 __asm__("shd %0, %0, 16, %1\n\t" /* shift abcdabcd -> cdab */
33 "dep %1, 15, 8, %1\n\t" /* deposit cdab -> cbab */
34 "shd %0, %1, 8, %0" /* shift abcdcbab -> dcba */
35 : "=r" (x), "=&r" (temp)
36 : "0" (x));
37 return x;
38}
39#define __arch_swab32 __arch_swab32
40
41#if BITS_PER_LONG > 32
42/*
43** From "PA-RISC 2.0 Architecture", HP Professional Books.
44** See Appendix I page 8 , "Endian Byte Swapping".
45**
46** Pretty cool algorithm: (* == zero'd bits)
47** PERMH 01234567 -> 67452301 into %0
48** HSHL 67452301 -> 7*5*3*1* into %1
49** HSHR 67452301 -> *6*4*2*0 into %0
50** OR %0 | %1 -> 76543210 into %0 (all done!)
51*/
52static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
53{
54 __u64 temp;
55 __asm__("permh,3210 %0, %0\n\t"
56 "hshl %0, 8, %1\n\t"
57 "hshr,u %0, 8, %0\n\t"
58 "or %1, %0, %0"
59 : "=r" (x), "=&r" (temp)
60 : "0" (x));
61 return x;
62}
63#define __arch_swab64 __arch_swab64
64#endif /* BITS_PER_LONG > 32 */
65
66#endif /* _PARISC_SWAB_H */
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 4878b9501f24..1c6dbb6f6e56 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -241,4 +241,6 @@ unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned lo
241#define __copy_to_user_inatomic __copy_to_user 241#define __copy_to_user_inatomic __copy_to_user
242#define __copy_from_user_inatomic __copy_from_user 242#define __copy_from_user_inatomic __copy_from_user
243 243
244int fixup_exception(struct pt_regs *regs);
245
244#endif /* __PARISC_UACCESS_H */ 246#endif /* __PARISC_UACCESS_H */
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 884b7ce16a3b..994bcd980909 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -549,6 +549,38 @@ static int parisc_generic_match(struct device *dev, struct device_driver *drv)
549 return match_device(to_parisc_driver(drv), to_parisc_device(dev)); 549 return match_device(to_parisc_driver(drv), to_parisc_device(dev));
550} 550}
551 551
552static ssize_t make_modalias(struct device *dev, char *buf)
553{
554 const struct parisc_device *padev = to_parisc_device(dev);
555 const struct parisc_device_id *id = &padev->id;
556
557 return sprintf(buf, "parisc:t%02Xhv%04Xrev%02Xsv%08X\n",
558 (u8)id->hw_type, (u16)id->hversion, (u8)id->hversion_rev,
559 (u32)id->sversion);
560}
561
562static int parisc_uevent(struct device *dev, struct kobj_uevent_env *env)
563{
564 const struct parisc_device *padev;
565 char modalias[40];
566
567 if (!dev)
568 return -ENODEV;
569
570 padev = to_parisc_device(dev);
571 if (!padev)
572 return -ENODEV;
573
574 if (add_uevent_var(env, "PARISC_NAME=%s", padev->name))
575 return -ENOMEM;
576
577 make_modalias(dev, modalias);
578 if (add_uevent_var(env, "MODALIAS=%s", modalias))
579 return -ENOMEM;
580
581 return 0;
582}
583
552#define pa_dev_attr(name, field, format_string) \ 584#define pa_dev_attr(name, field, format_string) \
553static ssize_t name##_show(struct device *dev, struct device_attribute *attr, char *buf) \ 585static ssize_t name##_show(struct device *dev, struct device_attribute *attr, char *buf) \
554{ \ 586{ \
@@ -566,12 +598,7 @@ pa_dev_attr_id(sversion, "0x%05x\n");
566 598
567static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) 599static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
568{ 600{
569 struct parisc_device *padev = to_parisc_device(dev); 601 return make_modalias(dev, buf);
570 struct parisc_device_id *id = &padev->id;
571
572 return sprintf(buf, "parisc:t%02Xhv%04Xrev%02Xsv%08X\n",
573 (u8)id->hw_type, (u16)id->hversion, (u8)id->hversion_rev,
574 (u32)id->sversion);
575} 602}
576 603
577static struct device_attribute parisc_device_attrs[] = { 604static struct device_attribute parisc_device_attrs[] = {
@@ -587,6 +614,7 @@ static struct device_attribute parisc_device_attrs[] = {
587struct bus_type parisc_bus_type = { 614struct bus_type parisc_bus_type = {
588 .name = "parisc", 615 .name = "parisc",
589 .match = parisc_generic_match, 616 .match = parisc_generic_match,
617 .uevent = parisc_uevent,
590 .dev_attrs = parisc_device_attrs, 618 .dev_attrs = parisc_device_attrs,
591 .probe = parisc_driver_probe, 619 .probe = parisc_driver_probe,
592 .remove = parisc_driver_remove, 620 .remove = parisc_driver_remove,
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index 2cbf13b3ef11..5595a2f31181 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -80,6 +80,7 @@ END(hpmc_pim_data)
80 80
81 .import intr_save, code 81 .import intr_save, code
82ENTRY(os_hpmc) 82ENTRY(os_hpmc)
83.os_hpmc:
83 84
84 /* 85 /*
85 * registers modified: 86 * registers modified:
@@ -295,5 +296,10 @@ os_hpmc_6:
295 b . 296 b .
296 nop 297 nop
297ENDPROC(os_hpmc) 298ENDPROC(os_hpmc)
298ENTRY(os_hpmc_end) /* this label used to compute os_hpmc checksum */ 299.os_hpmc_end:
299 nop 300 nop
301.data
302.align 4
303 .export os_hpmc_size
304os_hpmc_size:
305 .word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 4cea935e2f99..ac2c822928c7 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -298,7 +298,7 @@ unsigned long txn_affinity_addr(unsigned int irq, int cpu)
298 irq_desc[irq].affinity = cpumask_of_cpu(cpu); 298 irq_desc[irq].affinity = cpumask_of_cpu(cpu);
299#endif 299#endif
300 300
301 return cpu_data[cpu].txn_addr; 301 return per_cpu(cpu_data, cpu).txn_addr;
302} 302}
303 303
304 304
@@ -309,8 +309,9 @@ unsigned long txn_alloc_addr(unsigned int virt_irq)
309 next_cpu++; /* assign to "next" CPU we want this bugger on */ 309 next_cpu++; /* assign to "next" CPU we want this bugger on */
310 310
311 /* validate entry */ 311 /* validate entry */
312 while ((next_cpu < NR_CPUS) && (!cpu_data[next_cpu].txn_addr || 312 while ((next_cpu < NR_CPUS) &&
313 !cpu_online(next_cpu))) 313 (!per_cpu(cpu_data, next_cpu).txn_addr ||
314 !cpu_online(next_cpu)))
314 next_cpu++; 315 next_cpu++;
315 316
316 if (next_cpu >= NR_CPUS) 317 if (next_cpu >= NR_CPUS)
@@ -359,7 +360,7 @@ void do_cpu_irq_mask(struct pt_regs *regs)
359 printk(KERN_DEBUG "redirecting irq %d from CPU %d to %d\n", 360 printk(KERN_DEBUG "redirecting irq %d from CPU %d to %d\n",
360 irq, smp_processor_id(), cpu); 361 irq, smp_processor_id(), cpu);
361 gsc_writel(irq + CPU_IRQ_BASE, 362 gsc_writel(irq + CPU_IRQ_BASE,
362 cpu_data[cpu].hpa); 363 per_cpu(cpu_data, cpu).hpa);
363 goto set_out; 364 goto set_out;
364 } 365 }
365#endif 366#endif
@@ -421,5 +422,5 @@ void __init init_IRQ(void)
421 422
422void ack_bad_irq(unsigned int irq) 423void ack_bad_irq(unsigned int irq)
423{ 424{
424 printk("unexpected IRQ %d\n", irq); 425 printk(KERN_WARNING "unexpected IRQ %d\n", irq);
425} 426}
diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c
index ccb68090781e..1ff366cb9685 100644
--- a/arch/parisc/kernel/pdc_cons.c
+++ b/arch/parisc/kernel/pdc_cons.c
@@ -52,7 +52,7 @@
52#include <linux/tty.h> 52#include <linux/tty.h>
53#include <asm/pdc.h> /* for iodc_call() proto and friends */ 53#include <asm/pdc.h> /* for iodc_call() proto and friends */
54 54
55static spinlock_t pdc_console_lock = SPIN_LOCK_UNLOCKED; 55static DEFINE_SPINLOCK(pdc_console_lock);
56 56
57static void pdc_console_write(struct console *co, const char *s, unsigned count) 57static void pdc_console_write(struct console *co, const char *s, unsigned count)
58{ 58{
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index f696f57faa15..75099efb3bf3 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -541,9 +541,9 @@ static int __init perf_init(void)
541 spin_lock_init(&perf_lock); 541 spin_lock_init(&perf_lock);
542 542
543 /* TODO: this only lets us access the first cpu.. what to do for SMP? */ 543 /* TODO: this only lets us access the first cpu.. what to do for SMP? */
544 cpu_device = cpu_data[0].dev; 544 cpu_device = per_cpu(cpu_data, 0).dev;
545 printk("Performance monitoring counters enabled for %s\n", 545 printk("Performance monitoring counters enabled for %s\n",
546 cpu_data[0].dev->name); 546 per_cpu(cpu_data, 0).dev->name);
547 547
548 return 0; 548 return 0;
549} 549}
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 370086fb8333..ecb609342feb 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -3,7 +3,7 @@
3 * Initial setup-routines for HP 9000 based hardware. 3 * Initial setup-routines for HP 9000 based hardware.
4 * 4 *
5 * Copyright (C) 1991, 1992, 1995 Linus Torvalds 5 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
6 * Modifications for PA-RISC (C) 1999 Helge Deller <deller@gmx.de> 6 * Modifications for PA-RISC (C) 1999-2008 Helge Deller <deller@gmx.de>
7 * Modifications copyright 1999 SuSE GmbH (Philipp Rumpf) 7 * Modifications copyright 1999 SuSE GmbH (Philipp Rumpf)
8 * Modifications copyright 2000 Martin K. Petersen <mkp@mkp.net> 8 * Modifications copyright 2000 Martin K. Petersen <mkp@mkp.net>
9 * Modifications copyright 2000 Philipp Rumpf <prumpf@tux.org> 9 * Modifications copyright 2000 Philipp Rumpf <prumpf@tux.org>
@@ -46,7 +46,7 @@
46struct system_cpuinfo_parisc boot_cpu_data __read_mostly; 46struct system_cpuinfo_parisc boot_cpu_data __read_mostly;
47EXPORT_SYMBOL(boot_cpu_data); 47EXPORT_SYMBOL(boot_cpu_data);
48 48
49struct cpuinfo_parisc cpu_data[NR_CPUS] __read_mostly; 49DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data);
50 50
51extern int update_cr16_clocksource(void); /* from time.c */ 51extern int update_cr16_clocksource(void); /* from time.c */
52 52
@@ -69,6 +69,23 @@ extern int update_cr16_clocksource(void); /* from time.c */
69*/ 69*/
70 70
71/** 71/**
72 * init_cpu_profiler - enable/setup per cpu profiling hooks.
73 * @cpunum: The processor instance.
74 *
75 * FIXME: doesn't do much yet...
76 */
77static void __cpuinit
78init_percpu_prof(unsigned long cpunum)
79{
80 struct cpuinfo_parisc *p;
81
82 p = &per_cpu(cpu_data, cpunum);
83 p->prof_counter = 1;
84 p->prof_multiplier = 1;
85}
86
87
88/**
72 * processor_probe - Determine if processor driver should claim this device. 89 * processor_probe - Determine if processor driver should claim this device.
73 * @dev: The device which has been found. 90 * @dev: The device which has been found.
74 * 91 *
@@ -147,7 +164,7 @@ static int __cpuinit processor_probe(struct parisc_device *dev)
147 } 164 }
148#endif 165#endif
149 166
150 p = &cpu_data[cpuid]; 167 p = &per_cpu(cpu_data, cpuid);
151 boot_cpu_data.cpu_count++; 168 boot_cpu_data.cpu_count++;
152 169
153 /* initialize counters - CPU 0 gets it_value set in time_init() */ 170 /* initialize counters - CPU 0 gets it_value set in time_init() */
@@ -162,12 +179,9 @@ static int __cpuinit processor_probe(struct parisc_device *dev)
162#ifdef CONFIG_SMP 179#ifdef CONFIG_SMP
163 /* 180 /*
164 ** FIXME: review if any other initialization is clobbered 181 ** FIXME: review if any other initialization is clobbered
165 ** for boot_cpu by the above memset(). 182 ** for boot_cpu by the above memset().
166 */ 183 */
167 184 init_percpu_prof(cpuid);
168 /* stolen from init_percpu_prof() */
169 cpu_data[cpuid].prof_counter = 1;
170 cpu_data[cpuid].prof_multiplier = 1;
171#endif 185#endif
172 186
173 /* 187 /*
@@ -261,19 +275,6 @@ void __init collect_boot_cpu_data(void)
261} 275}
262 276
263 277
264/**
265 * init_cpu_profiler - enable/setup per cpu profiling hooks.
266 * @cpunum: The processor instance.
267 *
268 * FIXME: doesn't do much yet...
269 */
270static inline void __init
271init_percpu_prof(int cpunum)
272{
273 cpu_data[cpunum].prof_counter = 1;
274 cpu_data[cpunum].prof_multiplier = 1;
275}
276
277 278
278/** 279/**
279 * init_per_cpu - Handle individual processor initializations. 280 * init_per_cpu - Handle individual processor initializations.
@@ -293,7 +294,7 @@ init_percpu_prof(int cpunum)
293 * 294 *
294 * o Enable CPU profiling hooks. 295 * o Enable CPU profiling hooks.
295 */ 296 */
296int __init init_per_cpu(int cpunum) 297int __cpuinit init_per_cpu(int cpunum)
297{ 298{
298 int ret; 299 int ret;
299 struct pdc_coproc_cfg coproc_cfg; 300 struct pdc_coproc_cfg coproc_cfg;
@@ -307,8 +308,8 @@ int __init init_per_cpu(int cpunum)
307 /* FWIW, FP rev/model is a more accurate way to determine 308 /* FWIW, FP rev/model is a more accurate way to determine
308 ** CPU type. CPU rev/model has some ambiguous cases. 309 ** CPU type. CPU rev/model has some ambiguous cases.
309 */ 310 */
310 cpu_data[cpunum].fp_rev = coproc_cfg.revision; 311 per_cpu(cpu_data, cpunum).fp_rev = coproc_cfg.revision;
311 cpu_data[cpunum].fp_model = coproc_cfg.model; 312 per_cpu(cpu_data, cpunum).fp_model = coproc_cfg.model;
312 313
313 printk(KERN_INFO "FP[%d] enabled: Rev %ld Model %ld\n", 314 printk(KERN_INFO "FP[%d] enabled: Rev %ld Model %ld\n",
314 cpunum, coproc_cfg.revision, coproc_cfg.model); 315 cpunum, coproc_cfg.revision, coproc_cfg.model);
@@ -344,16 +345,17 @@ int __init init_per_cpu(int cpunum)
344int 345int
345show_cpuinfo (struct seq_file *m, void *v) 346show_cpuinfo (struct seq_file *m, void *v)
346{ 347{
347 int n; 348 unsigned long cpu;
348 349
349 for(n=0; n<boot_cpu_data.cpu_count; n++) { 350 for_each_online_cpu(cpu) {
351 const struct cpuinfo_parisc *cpuinfo = &per_cpu(cpu_data, cpu);
350#ifdef CONFIG_SMP 352#ifdef CONFIG_SMP
351 if (0 == cpu_data[n].hpa) 353 if (0 == cpuinfo->hpa)
352 continue; 354 continue;
353#endif 355#endif
354 seq_printf(m, "processor\t: %d\n" 356 seq_printf(m, "processor\t: %lu\n"
355 "cpu family\t: PA-RISC %s\n", 357 "cpu family\t: PA-RISC %s\n",
356 n, boot_cpu_data.family_name); 358 cpu, boot_cpu_data.family_name);
357 359
358 seq_printf(m, "cpu\t\t: %s\n", boot_cpu_data.cpu_name ); 360 seq_printf(m, "cpu\t\t: %s\n", boot_cpu_data.cpu_name );
359 361
@@ -365,8 +367,8 @@ show_cpuinfo (struct seq_file *m, void *v)
365 seq_printf(m, "model\t\t: %s\n" 367 seq_printf(m, "model\t\t: %s\n"
366 "model name\t: %s\n", 368 "model name\t: %s\n",
367 boot_cpu_data.pdc.sys_model_name, 369 boot_cpu_data.pdc.sys_model_name,
368 cpu_data[n].dev ? 370 cpuinfo->dev ?
369 cpu_data[n].dev->name : "Unknown" ); 371 cpuinfo->dev->name : "Unknown");
370 372
371 seq_printf(m, "hversion\t: 0x%08x\n" 373 seq_printf(m, "hversion\t: 0x%08x\n"
372 "sversion\t: 0x%08x\n", 374 "sversion\t: 0x%08x\n",
@@ -377,8 +379,8 @@ show_cpuinfo (struct seq_file *m, void *v)
377 show_cache_info(m); 379 show_cache_info(m);
378 380
379 seq_printf(m, "bogomips\t: %lu.%02lu\n", 381 seq_printf(m, "bogomips\t: %lu.%02lu\n",
380 cpu_data[n].loops_per_jiffy / (500000 / HZ), 382 cpuinfo->loops_per_jiffy / (500000 / HZ),
381 (cpu_data[n].loops_per_jiffy / (5000 / HZ)) % 100); 383 (cpuinfo->loops_per_jiffy / (5000 / HZ)) % 100);
382 384
383 seq_printf(m, "software id\t: %ld\n\n", 385 seq_printf(m, "software id\t: %ld\n\n",
384 boot_cpu_data.pdc.model.sw_id); 386 boot_cpu_data.pdc.model.sw_id);
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 7d27853ff8c8..82131ca8e05c 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -58,11 +58,6 @@ int parisc_bus_is_phys __read_mostly = 1; /* Assume no IOMMU is present */
58EXPORT_SYMBOL(parisc_bus_is_phys); 58EXPORT_SYMBOL(parisc_bus_is_phys);
59#endif 59#endif
60 60
61/* This sets the vmerge boundary and size, it's here because it has to
62 * be available on all platforms (zero means no-virtual merging) */
63unsigned long parisc_vmerge_boundary = 0;
64unsigned long parisc_vmerge_max_size = 0;
65
66void __init setup_cmdline(char **cmdline_p) 61void __init setup_cmdline(char **cmdline_p)
67{ 62{
68 extern unsigned int boot_args[]; 63 extern unsigned int boot_args[];
@@ -321,7 +316,7 @@ static int __init parisc_init(void)
321 316
322 processor_init(); 317 processor_init();
323 printk(KERN_INFO "CPU(s): %d x %s at %d.%06d MHz\n", 318 printk(KERN_INFO "CPU(s): %d x %s at %d.%06d MHz\n",
324 boot_cpu_data.cpu_count, 319 num_present_cpus(),
325 boot_cpu_data.cpu_name, 320 boot_cpu_data.cpu_name,
326 boot_cpu_data.cpu_hz / 1000000, 321 boot_cpu_data.cpu_hz / 1000000,
327 boot_cpu_data.cpu_hz % 1000000 ); 322 boot_cpu_data.cpu_hz % 1000000 );
@@ -387,8 +382,8 @@ void start_parisc(void)
387 if (ret >= 0 && coproc_cfg.ccr_functional) { 382 if (ret >= 0 && coproc_cfg.ccr_functional) {
388 mtctl(coproc_cfg.ccr_functional, 10); 383 mtctl(coproc_cfg.ccr_functional, 10);
389 384
390 cpu_data[cpunum].fp_rev = coproc_cfg.revision; 385 per_cpu(cpu_data, cpunum).fp_rev = coproc_cfg.revision;
391 cpu_data[cpunum].fp_model = coproc_cfg.model; 386 per_cpu(cpu_data, cpunum).fp_model = coproc_cfg.model;
392 387
393 asm volatile ("fstd %fr0,8(%sp)"); 388 asm volatile ("fstd %fr0,8(%sp)");
394 } else { 389 } else {
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 80bc000523fa..9995d7ed5819 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -56,16 +56,17 @@ static int smp_debug_lvl = 0;
56 if (lvl >= smp_debug_lvl) \ 56 if (lvl >= smp_debug_lvl) \
57 printk(printargs); 57 printk(printargs);
58#else 58#else
59#define smp_debug(lvl, ...) 59#define smp_debug(lvl, ...) do { } while(0)
60#endif /* DEBUG_SMP */ 60#endif /* DEBUG_SMP */
61 61
62DEFINE_SPINLOCK(smp_lock); 62DEFINE_SPINLOCK(smp_lock);
63 63
64volatile struct task_struct *smp_init_current_idle_task; 64volatile struct task_struct *smp_init_current_idle_task;
65 65
66static volatile int cpu_now_booting __read_mostly = 0; /* track which CPU is booting */ 66/* track which CPU is booting */
67static volatile int cpu_now_booting __cpuinitdata;
67 68
68static int parisc_max_cpus __read_mostly = 1; 69static int parisc_max_cpus __cpuinitdata = 1;
69 70
70DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED; 71DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED;
71 72
@@ -123,7 +124,7 @@ irqreturn_t
123ipi_interrupt(int irq, void *dev_id) 124ipi_interrupt(int irq, void *dev_id)
124{ 125{
125 int this_cpu = smp_processor_id(); 126 int this_cpu = smp_processor_id();
126 struct cpuinfo_parisc *p = &cpu_data[this_cpu]; 127 struct cpuinfo_parisc *p = &per_cpu(cpu_data, this_cpu);
127 unsigned long ops; 128 unsigned long ops;
128 unsigned long flags; 129 unsigned long flags;
129 130
@@ -202,13 +203,13 @@ ipi_interrupt(int irq, void *dev_id)
202static inline void 203static inline void
203ipi_send(int cpu, enum ipi_message_type op) 204ipi_send(int cpu, enum ipi_message_type op)
204{ 205{
205 struct cpuinfo_parisc *p = &cpu_data[cpu]; 206 struct cpuinfo_parisc *p = &per_cpu(cpu_data, cpu);
206 spinlock_t *lock = &per_cpu(ipi_lock, cpu); 207 spinlock_t *lock = &per_cpu(ipi_lock, cpu);
207 unsigned long flags; 208 unsigned long flags;
208 209
209 spin_lock_irqsave(lock, flags); 210 spin_lock_irqsave(lock, flags);
210 p->pending_ipi |= 1 << op; 211 p->pending_ipi |= 1 << op;
211 gsc_writel(IPI_IRQ - CPU_IRQ_BASE, cpu_data[cpu].hpa); 212 gsc_writel(IPI_IRQ - CPU_IRQ_BASE, p->hpa);
212 spin_unlock_irqrestore(lock, flags); 213 spin_unlock_irqrestore(lock, flags);
213} 214}
214 215
@@ -224,10 +225,7 @@ send_IPI_mask(cpumask_t mask, enum ipi_message_type op)
224static inline void 225static inline void
225send_IPI_single(int dest_cpu, enum ipi_message_type op) 226send_IPI_single(int dest_cpu, enum ipi_message_type op)
226{ 227{
227 if (dest_cpu == NO_PROC_ID) { 228 BUG_ON(dest_cpu == NO_PROC_ID);
228 BUG();
229 return;
230 }
231 229
232 ipi_send(dest_cpu, op); 230 ipi_send(dest_cpu, op);
233} 231}
@@ -309,8 +307,7 @@ smp_cpu_init(int cpunum)
309 /* Initialise the idle task for this CPU */ 307 /* Initialise the idle task for this CPU */
310 atomic_inc(&init_mm.mm_count); 308 atomic_inc(&init_mm.mm_count);
311 current->active_mm = &init_mm; 309 current->active_mm = &init_mm;
312 if(current->mm) 310 BUG_ON(current->mm);
313 BUG();
314 enter_lazy_tlb(&init_mm, current); 311 enter_lazy_tlb(&init_mm, current);
315 312
316 init_IRQ(); /* make sure no IRQs are enabled or pending */ 313 init_IRQ(); /* make sure no IRQs are enabled or pending */
@@ -345,6 +342,7 @@ void __init smp_callin(void)
345 */ 342 */
346int __cpuinit smp_boot_one_cpu(int cpuid) 343int __cpuinit smp_boot_one_cpu(int cpuid)
347{ 344{
345 const struct cpuinfo_parisc *p = &per_cpu(cpu_data, cpuid);
348 struct task_struct *idle; 346 struct task_struct *idle;
349 long timeout; 347 long timeout;
350 348
@@ -376,7 +374,7 @@ int __cpuinit smp_boot_one_cpu(int cpuid)
376 smp_init_current_idle_task = idle ; 374 smp_init_current_idle_task = idle ;
377 mb(); 375 mb();
378 376
379 printk("Releasing cpu %d now, hpa=%lx\n", cpuid, cpu_data[cpuid].hpa); 377 printk(KERN_INFO "Releasing cpu %d now, hpa=%lx\n", cpuid, p->hpa);
380 378
381 /* 379 /*
382 ** This gets PDC to release the CPU from a very tight loop. 380 ** This gets PDC to release the CPU from a very tight loop.
@@ -387,7 +385,7 @@ int __cpuinit smp_boot_one_cpu(int cpuid)
387 ** EIR{0}). MEM_RENDEZ is valid only when it is nonzero and the 385 ** EIR{0}). MEM_RENDEZ is valid only when it is nonzero and the
388 ** contents of memory are valid." 386 ** contents of memory are valid."
389 */ 387 */
390 gsc_writel(TIMER_IRQ - CPU_IRQ_BASE, cpu_data[cpuid].hpa); 388 gsc_writel(TIMER_IRQ - CPU_IRQ_BASE, p->hpa);
391 mb(); 389 mb();
392 390
393 /* 391 /*
@@ -419,12 +417,12 @@ alive:
419 return 0; 417 return 0;
420} 418}
421 419
422void __devinit smp_prepare_boot_cpu(void) 420void __init smp_prepare_boot_cpu(void)
423{ 421{
424 int bootstrap_processor=cpu_data[0].cpuid; /* CPU ID of BSP */ 422 int bootstrap_processor = per_cpu(cpu_data, 0).cpuid;
425 423
426 /* Setup BSP mappings */ 424 /* Setup BSP mappings */
427 printk("SMP: bootstrap CPU ID is %d\n",bootstrap_processor); 425 printk(KERN_INFO "SMP: bootstrap CPU ID is %d\n", bootstrap_processor);
428 426
429 cpu_set(bootstrap_processor, cpu_online_map); 427 cpu_set(bootstrap_processor, cpu_online_map);
430 cpu_set(bootstrap_processor, cpu_present_map); 428 cpu_set(bootstrap_processor, cpu_present_map);
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 4d09203bc693..9d46c43a4152 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -60,7 +60,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
60 unsigned long cycles_elapsed, ticks_elapsed; 60 unsigned long cycles_elapsed, ticks_elapsed;
61 unsigned long cycles_remainder; 61 unsigned long cycles_remainder;
62 unsigned int cpu = smp_processor_id(); 62 unsigned int cpu = smp_processor_id();
63 struct cpuinfo_parisc *cpuinfo = &cpu_data[cpu]; 63 struct cpuinfo_parisc *cpuinfo = &per_cpu(cpu_data, cpu);
64 64
65 /* gcc can optimize for "read-only" case with a local clocktick */ 65 /* gcc can optimize for "read-only" case with a local clocktick */
66 unsigned long cpt = clocktick; 66 unsigned long cpt = clocktick;
@@ -213,7 +213,7 @@ void __init start_cpu_itimer(void)
213 213
214 mtctl(next_tick, 16); /* kick off Interval Timer (CR16) */ 214 mtctl(next_tick, 16); /* kick off Interval Timer (CR16) */
215 215
216 cpu_data[cpu].it_value = next_tick; 216 per_cpu(cpu_data, cpu).it_value = next_tick;
217} 217}
218 218
219struct platform_device rtc_parisc_dev = { 219struct platform_device rtc_parisc_dev = {
diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c
index d71cb018a21e..f5159381fdd6 100644
--- a/arch/parisc/kernel/topology.c
+++ b/arch/parisc/kernel/topology.c
@@ -22,14 +22,14 @@
22#include <linux/cpu.h> 22#include <linux/cpu.h>
23#include <linux/cache.h> 23#include <linux/cache.h>
24 24
25static struct cpu cpu_devices[NR_CPUS] __read_mostly; 25static DEFINE_PER_CPU(struct cpu, cpu_devices);
26 26
27static int __init topology_init(void) 27static int __init topology_init(void)
28{ 28{
29 int num; 29 int num;
30 30
31 for_each_present_cpu(num) { 31 for_each_present_cpu(num) {
32 register_cpu(&cpu_devices[num], num); 32 register_cpu(&per_cpu(cpu_devices, num), num);
33 } 33 }
34 return 0; 34 return 0;
35} 35}
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 4c771cd580ec..ba658d2086f7 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -745,6 +745,10 @@ void handle_interruption(int code, struct pt_regs *regs)
745 /* Fall Through */ 745 /* Fall Through */
746 case 27: 746 case 27:
747 /* Data memory protection ID trap */ 747 /* Data memory protection ID trap */
748 if (code == 27 && !user_mode(regs) &&
749 fixup_exception(regs))
750 return;
751
748 die_if_kernel("Protection id trap", regs, code); 752 die_if_kernel("Protection id trap", regs, code);
749 si.si_code = SEGV_MAPERR; 753 si.si_code = SEGV_MAPERR;
750 si.si_signo = SIGSEGV; 754 si.si_signo = SIGSEGV;
@@ -821,8 +825,8 @@ void handle_interruption(int code, struct pt_regs *regs)
821 825
822int __init check_ivt(void *iva) 826int __init check_ivt(void *iva)
823{ 827{
828 extern u32 os_hpmc_size;
824 extern const u32 os_hpmc[]; 829 extern const u32 os_hpmc[];
825 extern const u32 os_hpmc_end[];
826 830
827 int i; 831 int i;
828 u32 check = 0; 832 u32 check = 0;
@@ -839,8 +843,7 @@ int __init check_ivt(void *iva)
839 *ivap++ = 0; 843 *ivap++ = 0;
840 844
841 /* Compute Checksum for HPMC handler */ 845 /* Compute Checksum for HPMC handler */
842 846 length = os_hpmc_size;
843 length = os_hpmc_end - os_hpmc;
844 ivap[7] = length; 847 ivap[7] = length;
845 848
846 hpmcp = (u32 *)os_hpmc; 849 hpmcp = (u32 *)os_hpmc;
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 6773c582e457..69dad5a850a8 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -372,7 +372,7 @@ void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct
372 struct pt_regs *r = &t->thread.regs; 372 struct pt_regs *r = &t->thread.regs;
373 struct pt_regs *r2; 373 struct pt_regs *r2;
374 374
375 r2 = kmalloc(sizeof(struct pt_regs), GFP_KERNEL); 375 r2 = kmalloc(sizeof(struct pt_regs), GFP_ATOMIC);
376 if (!r2) 376 if (!r2)
377 return; 377 return;
378 *r2 = *r; 378 *r2 = *r;
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
index 9abed07db7fc..5069e8b2ca71 100644
--- a/arch/parisc/lib/iomap.c
+++ b/arch/parisc/lib/iomap.c
@@ -261,7 +261,7 @@ static const struct iomap_ops iomem_ops = {
261 iomem_write32r, 261 iomem_write32r,
262}; 262};
263 263
264const struct iomap_ops *iomap_ops[8] = { 264static const struct iomap_ops *iomap_ops[8] = {
265 [0] = &ioport_ops, 265 [0] = &ioport_ops,
266 [7] = &iomem_ops 266 [7] = &iomem_ops
267}; 267};
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 2d68431fc22e..bbda909c866e 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -275,7 +275,7 @@ handle_store_error:
275 275
276 276
277/* Returns 0 for success, otherwise, returns number of bytes not transferred. */ 277/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
278unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 278static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
279{ 279{
280 register unsigned long src, dst, t1, t2, t3; 280 register unsigned long src, dst, t1, t2, t3;
281 register unsigned char *pcs, *pcd; 281 register unsigned char *pcs, *pcd;
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index b2e3e9a8cece..92c7fa4ecc3f 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -139,13 +139,41 @@ parisc_acctyp(unsigned long code, unsigned int inst)
139 } 139 }
140#endif 140#endif
141 141
142int fixup_exception(struct pt_regs *regs)
143{
144 const struct exception_table_entry *fix;
145
146 fix = search_exception_tables(regs->iaoq[0]);
147 if (fix) {
148 struct exception_data *d;
149 d = &__get_cpu_var(exception_data);
150 d->fault_ip = regs->iaoq[0];
151 d->fault_space = regs->isr;
152 d->fault_addr = regs->ior;
153
154 regs->iaoq[0] = ((fix->fixup) & ~3);
155 /*
156 * NOTE: In some cases the faulting instruction
157 * may be in the delay slot of a branch. We
158 * don't want to take the branch, so we don't
159 * increment iaoq[1], instead we set it to be
160 * iaoq[0]+4, and clear the B bit in the PSW
161 */
162 regs->iaoq[1] = regs->iaoq[0] + 4;
163 regs->gr[0] &= ~PSW_B; /* IPSW in gr[0] */
164
165 return 1;
166 }
167
168 return 0;
169}
170
142void do_page_fault(struct pt_regs *regs, unsigned long code, 171void do_page_fault(struct pt_regs *regs, unsigned long code,
143 unsigned long address) 172 unsigned long address)
144{ 173{
145 struct vm_area_struct *vma, *prev_vma; 174 struct vm_area_struct *vma, *prev_vma;
146 struct task_struct *tsk = current; 175 struct task_struct *tsk = current;
147 struct mm_struct *mm = tsk->mm; 176 struct mm_struct *mm = tsk->mm;
148 const struct exception_table_entry *fix;
149 unsigned long acc_type; 177 unsigned long acc_type;
150 int fault; 178 int fault;
151 179
@@ -229,32 +257,8 @@ bad_area:
229 257
230no_context: 258no_context:
231 259
232 if (!user_mode(regs)) { 260 if (!user_mode(regs) && fixup_exception(regs)) {
233 fix = search_exception_tables(regs->iaoq[0]); 261 return;
234
235 if (fix) {
236 struct exception_data *d;
237
238 d = &__get_cpu_var(exception_data);
239 d->fault_ip = regs->iaoq[0];
240 d->fault_space = regs->isr;
241 d->fault_addr = regs->ior;
242
243 regs->iaoq[0] = ((fix->fixup) & ~3);
244
245 /*
246 * NOTE: In some cases the faulting instruction
247 * may be in the delay slot of a branch. We
248 * don't want to take the branch, so we don't
249 * increment iaoq[1], instead we set it to be
250 * iaoq[0]+4, and clear the B bit in the PSW
251 */
252
253 regs->iaoq[1] = regs->iaoq[0] + 4;
254 regs->gr[0] &= ~PSW_B; /* IPSW in gr[0] */
255
256 return;
257 }
258 } 262 }
259 263
260 parisc_terminate("Bad Address (null pointer deref?)", regs, code, address); 264 parisc_terminate("Bad Address (null pointer deref?)", regs, code, address);
diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
index 8066eede3a0c..b4b7338ad79e 100644
--- a/arch/powerpc/include/asm/cell-pmu.h
+++ b/arch/powerpc/include/asm/cell-pmu.h
@@ -37,9 +37,11 @@
37#define CBE_PM_STOP_AT_MAX 0x40000000 37#define CBE_PM_STOP_AT_MAX 0x40000000
38#define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3) 38#define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3)
39#define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28) 39#define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28)
40#define CBE_PM_TRACE_BUF_OVFLW(bit) (((bit) & 0x1) << 17)
40#define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18) 41#define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18)
41#define CBE_PM_FREEZE_ALL_CTRS 0x00100000 42#define CBE_PM_FREEZE_ALL_CTRS 0x00100000
42#define CBE_PM_ENABLE_EXT_TRACE 0x00008000 43#define CBE_PM_ENABLE_EXT_TRACE 0x00008000
44#define CBE_PM_SPU_ADDR_TRACE_SET(msk) (((msk) & 0x3) << 9)
43 45
44/* Macros for the trace_address register. */ 46/* Macros for the trace_address register. */
45#define CBE_PM_TRACE_BUF_FULL 0x00000800 47#define CBE_PM_TRACE_BUF_FULL 0x00000800
diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h
index 95035c602ba6..639dc96077ab 100644
--- a/arch/powerpc/include/asm/oprofile_impl.h
+++ b/arch/powerpc/include/asm/oprofile_impl.h
@@ -32,6 +32,12 @@ struct op_system_config {
32 unsigned long mmcr0; 32 unsigned long mmcr0;
33 unsigned long mmcr1; 33 unsigned long mmcr1;
34 unsigned long mmcra; 34 unsigned long mmcra;
35#ifdef CONFIG_OPROFILE_CELL
36 /* Register for oprofile user tool to check cell kernel profiling
37 * suport.
38 */
39 unsigned long cell_support;
40#endif
35#endif 41#endif
36 unsigned long enable_kernel; 42 unsigned long enable_kernel;
37 unsigned long enable_user; 43 unsigned long enable_user;
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
index dfdbffa06818..964b93974d89 100644
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -30,6 +30,10 @@
30extern struct delayed_work spu_work; 30extern struct delayed_work spu_work;
31extern int spu_prof_running; 31extern int spu_prof_running;
32 32
33#define TRACE_ARRAY_SIZE 1024
34
35extern spinlock_t oprof_spu_smpl_arry_lck;
36
33struct spu_overlay_info { /* map of sections within an SPU overlay */ 37struct spu_overlay_info { /* map of sections within an SPU overlay */
34 unsigned int vma; /* SPU virtual memory address from elf */ 38 unsigned int vma; /* SPU virtual memory address from elf */
35 unsigned int size; /* size of section from elf */ 39 unsigned int size; /* size of section from elf */
@@ -89,10 +93,11 @@ void vma_map_free(struct vma_to_fileoffset_map *map);
89 * Entry point for SPU profiling. 93 * Entry point for SPU profiling.
90 * cycles_reset is the SPU_CYCLES count value specified by the user. 94 * cycles_reset is the SPU_CYCLES count value specified by the user.
91 */ 95 */
92int start_spu_profiling(unsigned int cycles_reset); 96int start_spu_profiling_cycles(unsigned int cycles_reset);
93 97void start_spu_profiling_events(void);
94void stop_spu_profiling(void);
95 98
99void stop_spu_profiling_cycles(void);
100void stop_spu_profiling_events(void);
96 101
97/* add the necessary profiling hooks */ 102/* add the necessary profiling hooks */
98int spu_sync_start(void); 103int spu_sync_start(void);
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index 83faa958b9d4..9305ddaac512 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -18,11 +18,21 @@
18#include <asm/cell-pmu.h> 18#include <asm/cell-pmu.h>
19#include "pr_util.h" 19#include "pr_util.h"
20 20
21#define TRACE_ARRAY_SIZE 1024
22#define SCALE_SHIFT 14 21#define SCALE_SHIFT 14
23 22
24static u32 *samples; 23static u32 *samples;
25 24
25/* spu_prof_running is a flag used to indicate if spu profiling is enabled
26 * or not. It is set by the routines start_spu_profiling_cycles() and
27 * start_spu_profiling_events(). The flag is cleared by the routines
28 * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These
29 * routines are called via global_start() and global_stop() which are called in
30 * op_powerpc_start() and op_powerpc_stop(). These routines are called once
31 * per system as a result of the user starting/stopping oprofile. Hence, only
32 * one CPU per user at a time will be changing the value of spu_prof_running.
33 * In general, OProfile does not protect against multiple users trying to run
34 * OProfile at a time.
35 */
26int spu_prof_running; 36int spu_prof_running;
27static unsigned int profiling_interval; 37static unsigned int profiling_interval;
28 38
@@ -31,8 +41,8 @@ static unsigned int profiling_interval;
31 41
32#define SPU_PC_MASK 0xFFFF 42#define SPU_PC_MASK 0xFFFF
33 43
34static DEFINE_SPINLOCK(sample_array_lock); 44DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
35unsigned long sample_array_lock_flags; 45unsigned long oprof_spu_smpl_arry_lck_flags;
36 46
37void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) 47void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
38{ 48{
@@ -145,13 +155,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
145 * sample array must be loaded and then processed for a given 155 * sample array must be loaded and then processed for a given
146 * cpu. The sample array is not per cpu. 156 * cpu. The sample array is not per cpu.
147 */ 157 */
148 spin_lock_irqsave(&sample_array_lock, 158 spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
149 sample_array_lock_flags); 159 oprof_spu_smpl_arry_lck_flags);
150 num_samples = cell_spu_pc_collection(cpu); 160 num_samples = cell_spu_pc_collection(cpu);
151 161
152 if (num_samples == 0) { 162 if (num_samples == 0) {
153 spin_unlock_irqrestore(&sample_array_lock, 163 spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
154 sample_array_lock_flags); 164 oprof_spu_smpl_arry_lck_flags);
155 continue; 165 continue;
156 } 166 }
157 167
@@ -162,8 +172,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
162 num_samples); 172 num_samples);
163 } 173 }
164 174
165 spin_unlock_irqrestore(&sample_array_lock, 175 spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
166 sample_array_lock_flags); 176 oprof_spu_smpl_arry_lck_flags);
167 177
168 } 178 }
169 smp_wmb(); /* insure spu event buffer updates are written */ 179 smp_wmb(); /* insure spu event buffer updates are written */
@@ -182,13 +192,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
182 192
183static struct hrtimer timer; 193static struct hrtimer timer;
184/* 194/*
185 * Entry point for SPU profiling. 195 * Entry point for SPU cycle profiling.
186 * NOTE: SPU profiling is done system-wide, not per-CPU. 196 * NOTE: SPU profiling is done system-wide, not per-CPU.
187 * 197 *
188 * cycles_reset is the count value specified by the user when 198 * cycles_reset is the count value specified by the user when
189 * setting up OProfile to count SPU_CYCLES. 199 * setting up OProfile to count SPU_CYCLES.
190 */ 200 */
191int start_spu_profiling(unsigned int cycles_reset) 201int start_spu_profiling_cycles(unsigned int cycles_reset)
192{ 202{
193 ktime_t kt; 203 ktime_t kt;
194 204
@@ -212,10 +222,30 @@ int start_spu_profiling(unsigned int cycles_reset)
212 return 0; 222 return 0;
213} 223}
214 224
215void stop_spu_profiling(void) 225/*
226 * Entry point for SPU event profiling.
227 * NOTE: SPU profiling is done system-wide, not per-CPU.
228 *
229 * cycles_reset is the count value specified by the user when
230 * setting up OProfile to count SPU_CYCLES.
231 */
232void start_spu_profiling_events(void)
233{
234 spu_prof_running = 1;
235 schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
236
237 return;
238}
239
240void stop_spu_profiling_cycles(void)
216{ 241{
217 spu_prof_running = 0; 242 spu_prof_running = 0;
218 hrtimer_cancel(&timer); 243 hrtimer_cancel(&timer);
219 kfree(samples); 244 kfree(samples);
220 pr_debug("SPU_PROF: stop_spu_profiling issued\n"); 245 pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
246}
247
248void stop_spu_profiling_events(void)
249{
250 spu_prof_running = 0;
221} 251}
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 17807acb05d9..21f16edf6c8d 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -132,6 +132,28 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
132 oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); 132 oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0);
133 oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); 133 oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1);
134 oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); 134 oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra);
135#ifdef CONFIG_OPROFILE_CELL
136 /* create a file the user tool can check to see what level of profiling
137 * support exits with this kernel. Initialize bit mask to indicate
138 * what support the kernel has:
139 * bit 0 - Supports SPU event profiling in addition to PPU
140 * event and cycles; and SPU cycle profiling
141 * bits 1-31 - Currently unused.
142 *
143 * If the file does not exist, then the kernel only supports SPU
144 * cycle profiling, PPU event and cycle profiling.
145 */
146 oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support);
147 sys.cell_support = 0x1; /* Note, the user OProfile tool must check
148 * that this bit is set before attempting to
149 * user SPU event profiling. Older kernels
150 * will not have this file, hence the user
151 * tool is not allowed to do SPU event
152 * profiling on older kernels. Older kernels
153 * will accept SPU events but collected data
154 * is garbage.
155 */
156#endif
135#endif 157#endif
136 158
137 for (i = 0; i < model->num_counters; ++i) { 159 for (i = 0; i < model->num_counters; ++i) {
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index 25a4ec2514a3..ae06c6236d9c 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -40,14 +40,15 @@
40#include "../platforms/cell/interrupt.h" 40#include "../platforms/cell/interrupt.h"
41#include "cell/pr_util.h" 41#include "cell/pr_util.h"
42 42
43static void cell_global_stop_spu(void); 43#define PPU_PROFILING 0
44#define SPU_PROFILING_CYCLES 1
45#define SPU_PROFILING_EVENTS 2
44 46
45/* 47#define SPU_EVENT_NUM_START 4100
46 * spu_cycle_reset is the number of cycles between samples. 48#define SPU_EVENT_NUM_STOP 4399
47 * This variable is used for SPU profiling and should ONLY be set 49#define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */
48 * at the beginning of cell_reg_setup; otherwise, it's read-only. 50#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */
49 */ 51#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */
50static unsigned int spu_cycle_reset;
51 52
52#define NUM_SPUS_PER_NODE 8 53#define NUM_SPUS_PER_NODE 8
53#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ 54#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
@@ -66,6 +67,21 @@ static unsigned int spu_cycle_reset;
66 67
67#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ 68#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
68 69
70/* Minumum HW interval timer setting to send value to trace buffer is 10 cycle.
71 * To configure counter to send value every N cycles set counter to
72 * 2^32 - 1 - N.
73 */
74#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10
75
76/*
77 * spu_cycle_reset is the number of cycles between samples.
78 * This variable is used for SPU profiling and should ONLY be set
79 * at the beginning of cell_reg_setup; otherwise, it's read-only.
80 */
81static unsigned int spu_cycle_reset;
82static unsigned int profiling_mode;
83static int spu_evnt_phys_spu_indx;
84
69struct pmc_cntrl_data { 85struct pmc_cntrl_data {
70 unsigned long vcntr; 86 unsigned long vcntr;
71 unsigned long evnts; 87 unsigned long evnts;
@@ -105,6 +121,8 @@ struct pm_cntrl {
105 u16 trace_mode; 121 u16 trace_mode;
106 u16 freeze; 122 u16 freeze;
107 u16 count_mode; 123 u16 count_mode;
124 u16 spu_addr_trace;
125 u8 trace_buf_ovflw;
108}; 126};
109 127
110static struct { 128static struct {
@@ -122,7 +140,7 @@ static struct {
122#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) 140#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
123 141
124static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); 142static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
125 143static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
126static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 144static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
127 145
128/* 146/*
@@ -152,6 +170,7 @@ static u32 hdw_thread;
152 170
153static u32 virt_cntr_inter_mask; 171static u32 virt_cntr_inter_mask;
154static struct timer_list timer_virt_cntr; 172static struct timer_list timer_virt_cntr;
173static struct timer_list timer_spu_event_swap;
155 174
156/* 175/*
157 * pm_signal needs to be global since it is initialized in 176 * pm_signal needs to be global since it is initialized in
@@ -165,7 +184,7 @@ static int spu_rtas_token; /* token for SPU cycle profiling */
165static u32 reset_value[NR_PHYS_CTRS]; 184static u32 reset_value[NR_PHYS_CTRS];
166static int num_counters; 185static int num_counters;
167static int oprofile_running; 186static int oprofile_running;
168static DEFINE_SPINLOCK(virt_cntr_lock); 187static DEFINE_SPINLOCK(cntr_lock);
169 188
170static u32 ctr_enabled; 189static u32 ctr_enabled;
171 190
@@ -336,13 +355,13 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
336 for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) { 355 for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) {
337 if (bus_word & (1 << i)) { 356 if (bus_word & (1 << i)) {
338 pm_regs.debug_bus_control |= 357 pm_regs.debug_bus_control |=
339 (bus_type << (30 - (2 * i))); 358 (bus_type << (30 - (2 * i)));
340 359
341 for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { 360 for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) {
342 if (input_bus[j] == 0xff) { 361 if (input_bus[j] == 0xff) {
343 input_bus[j] = i; 362 input_bus[j] = i;
344 pm_regs.group_control |= 363 pm_regs.group_control |=
345 (i << (30 - (2 * j))); 364 (i << (30 - (2 * j)));
346 365
347 break; 366 break;
348 } 367 }
@@ -367,12 +386,16 @@ static void write_pm_cntrl(int cpu)
367 if (pm_regs.pm_cntrl.stop_at_max == 1) 386 if (pm_regs.pm_cntrl.stop_at_max == 1)
368 val |= CBE_PM_STOP_AT_MAX; 387 val |= CBE_PM_STOP_AT_MAX;
369 388
370 if (pm_regs.pm_cntrl.trace_mode == 1) 389 if (pm_regs.pm_cntrl.trace_mode != 0)
371 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); 390 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
372 391
392 if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
393 val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw);
373 if (pm_regs.pm_cntrl.freeze == 1) 394 if (pm_regs.pm_cntrl.freeze == 1)
374 val |= CBE_PM_FREEZE_ALL_CTRS; 395 val |= CBE_PM_FREEZE_ALL_CTRS;
375 396
397 val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace);
398
376 /* 399 /*
377 * Routine set_count_mode must be called previously to set 400 * Routine set_count_mode must be called previously to set
378 * the count mode based on the user selection of user and kernel. 401 * the count mode based on the user selection of user and kernel.
@@ -441,7 +464,7 @@ static void cell_virtual_cntr(unsigned long data)
441 * not both playing with the counters on the same node. 464 * not both playing with the counters on the same node.
442 */ 465 */
443 466
444 spin_lock_irqsave(&virt_cntr_lock, flags); 467 spin_lock_irqsave(&cntr_lock, flags);
445 468
446 prev_hdw_thread = hdw_thread; 469 prev_hdw_thread = hdw_thread;
447 470
@@ -480,7 +503,7 @@ static void cell_virtual_cntr(unsigned long data)
480 cbe_disable_pm_interrupts(cpu); 503 cbe_disable_pm_interrupts(cpu);
481 for (i = 0; i < num_counters; i++) { 504 for (i = 0; i < num_counters; i++) {
482 per_cpu(pmc_values, cpu + prev_hdw_thread)[i] 505 per_cpu(pmc_values, cpu + prev_hdw_thread)[i]
483 = cbe_read_ctr(cpu, i); 506 = cbe_read_ctr(cpu, i);
484 507
485 if (per_cpu(pmc_values, cpu + next_hdw_thread)[i] 508 if (per_cpu(pmc_values, cpu + next_hdw_thread)[i]
486 == 0xFFFFFFFF) 509 == 0xFFFFFFFF)
@@ -527,7 +550,7 @@ static void cell_virtual_cntr(unsigned long data)
527 cbe_enable_pm(cpu); 550 cbe_enable_pm(cpu);
528 } 551 }
529 552
530 spin_unlock_irqrestore(&virt_cntr_lock, flags); 553 spin_unlock_irqrestore(&cntr_lock, flags);
531 554
532 mod_timer(&timer_virt_cntr, jiffies + HZ / 10); 555 mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
533} 556}
@@ -541,38 +564,146 @@ static void start_virt_cntrs(void)
541 add_timer(&timer_virt_cntr); 564 add_timer(&timer_virt_cntr);
542} 565}
543 566
544/* This function is called once for all cpus combined */ 567static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
545static int cell_reg_setup(struct op_counter_config *ctr,
546 struct op_system_config *sys, int num_ctrs) 568 struct op_system_config *sys, int num_ctrs)
547{ 569{
548 int i, j, cpu; 570 spu_cycle_reset = ctr[0].count;
549 spu_cycle_reset = 0;
550 571
551 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { 572 /*
552 spu_cycle_reset = ctr[0].count; 573 * Each node will need to make the rtas call to start
574 * and stop SPU profiling. Get the token once and store it.
575 */
576 spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
577
578 if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
579 printk(KERN_ERR
580 "%s: rtas token ibm,cbe-spu-perftools unknown\n",
581 __func__);
582 return -EIO;
583 }
584 return 0;
585}
586
587/* Unfortunately, the hardware will only support event profiling
588 * on one SPU per node at a time. Therefore, we must time slice
589 * the profiling across all SPUs in the node. Note, we do this
590 * in parallel for each node. The following routine is called
591 * periodically based on kernel timer to switch which SPU is
592 * being monitored in a round robbin fashion.
593 */
594static void spu_evnt_swap(unsigned long data)
595{
596 int node;
597 int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
598 unsigned long flags;
599 int cpu;
600 int ret;
601 u32 interrupt_mask;
602
603
604 /* enable interrupts on cntr 0 */
605 interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0);
606
607 hdw_thread = 0;
608
609 /* Make sure spu event interrupt handler and spu event swap
610 * don't access the counters simultaneously.
611 */
612 spin_lock_irqsave(&cntr_lock, flags);
613
614 cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
615
616 if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE)
617 spu_evnt_phys_spu_indx = 0;
618
619 pm_signal[0].sub_unit = spu_evnt_phys_spu_indx;
620 pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
621 pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
622
623 /* switch the SPU being profiled on each node */
624 for_each_online_cpu(cpu) {
625 if (cbe_get_hw_thread_id(cpu))
626 continue;
627
628 node = cbe_cpu_to_node(cpu);
629 cur_phys_spu = (node * NUM_SPUS_PER_NODE)
630 + cur_spu_evnt_phys_spu_indx;
631 nxt_phys_spu = (node * NUM_SPUS_PER_NODE)
632 + spu_evnt_phys_spu_indx;
553 633
554 /* 634 /*
555 * Each node will need to make the rtas call to start 635 * stop counters, save counter values, restore counts
556 * and stop SPU profiling. Get the token once and store it. 636 * for previous physical SPU
557 */ 637 */
558 spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); 638 cbe_disable_pm(cpu);
639 cbe_disable_pm_interrupts(cpu);
559 640
560 if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { 641 spu_pm_cnt[cur_phys_spu]
561 printk(KERN_ERR 642 = cbe_read_ctr(cpu, 0);
562 "%s: rtas token ibm,cbe-spu-perftools unknown\n", 643
563 __func__); 644 /* restore previous count for the next spu to sample */
564 return -EIO; 645 /* NOTE, hardware issue, counter will not start if the
565 } 646 * counter value is at max (0xFFFFFFFF).
647 */
648 if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
649 cbe_write_ctr(cpu, 0, 0xFFFFFFF0);
650 else
651 cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]);
652
653 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
654
655 /* setup the debug bus measure the one event and
656 * the two events to route the next SPU's PC on
657 * the debug bus
658 */
659 ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3);
660 if (ret)
661 printk(KERN_ERR "%s: pm_rtas_activate_signals failed, "
662 "SPU event swap\n", __func__);
663
664 /* clear the trace buffer, don't want to take PC for
665 * previous SPU*/
666 cbe_write_pm(cpu, trace_address, 0);
667
668 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
669
670 /* Enable interrupts on the CPU thread that is starting */
671 cbe_enable_pm_interrupts(cpu, hdw_thread,
672 interrupt_mask);
673 cbe_enable_pm(cpu);
566 } 674 }
567 675
568 pm_rtas_token = rtas_token("ibm,cbe-perftools"); 676 spin_unlock_irqrestore(&cntr_lock, flags);
569 677
678 /* swap approximately every 0.1 seconds */
679 mod_timer(&timer_spu_event_swap, jiffies + HZ / 25);
680}
681
682static void start_spu_event_swap(void)
683{
684 init_timer(&timer_spu_event_swap);
685 timer_spu_event_swap.function = spu_evnt_swap;
686 timer_spu_event_swap.data = 0UL;
687 timer_spu_event_swap.expires = jiffies + HZ / 25;
688 add_timer(&timer_spu_event_swap);
689}
690
691static int cell_reg_setup_spu_events(struct op_counter_config *ctr,
692 struct op_system_config *sys, int num_ctrs)
693{
694 int i;
695
696 /* routine is called once for all nodes */
697
698 spu_evnt_phys_spu_indx = 0;
570 /* 699 /*
571 * For all events excetp PPU CYCLEs, each node will need to make 700 * For all events except PPU CYCLEs, each node will need to make
572 * the rtas cbe-perftools call to setup and reset the debug bus. 701 * the rtas cbe-perftools call to setup and reset the debug bus.
573 * Make the token lookup call once and store it in the global 702 * Make the token lookup call once and store it in the global
574 * variable pm_rtas_token. 703 * variable pm_rtas_token.
575 */ 704 */
705 pm_rtas_token = rtas_token("ibm,cbe-perftools");
706
576 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { 707 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
577 printk(KERN_ERR 708 printk(KERN_ERR
578 "%s: rtas token ibm,cbe-perftools unknown\n", 709 "%s: rtas token ibm,cbe-perftools unknown\n",
@@ -580,6 +711,58 @@ static int cell_reg_setup(struct op_counter_config *ctr,
580 return -EIO; 711 return -EIO;
581 } 712 }
582 713
714 /* setup the pm_control register settings,
715 * settings will be written per node by the
716 * cell_cpu_setup() function.
717 */
718 pm_regs.pm_cntrl.trace_buf_ovflw = 1;
719
720 /* Use the occurrence trace mode to have SPU PC saved
721 * to the trace buffer. Occurrence data in trace buffer
722 * is not used. Bit 2 must be set to store SPU addresses.
723 */
724 pm_regs.pm_cntrl.trace_mode = 2;
725
726 pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus
727 event 2 & 3 */
728
729 /* setup the debug bus event array with the SPU PC routing events.
730 * Note, pm_signal[0] will be filled in by set_pm_event() call below.
731 */
732 pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
733 pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A);
734 pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100;
735 pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
736
737 pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
738 pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B);
739 pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100;
740 pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
741
742 /* Set the user selected spu event to profile on,
743 * note, only one SPU profiling event is supported
744 */
745 num_counters = 1; /* Only support one SPU event at a time */
746 set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
747
748 reset_value[0] = 0xFFFFFFFF - ctr[0].count;
749
750 /* global, used by cell_cpu_setup */
751 ctr_enabled |= 1;
752
753 /* Initialize the count for each SPU to the reset value */
754 for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++)
755 spu_pm_cnt[i] = reset_value[0];
756
757 return 0;
758}
759
760static int cell_reg_setup_ppu(struct op_counter_config *ctr,
761 struct op_system_config *sys, int num_ctrs)
762{
763 /* routine is called once for all nodes */
764 int i, j, cpu;
765
583 num_counters = num_ctrs; 766 num_counters = num_ctrs;
584 767
585 if (unlikely(num_ctrs > NR_PHYS_CTRS)) { 768 if (unlikely(num_ctrs > NR_PHYS_CTRS)) {
@@ -589,14 +772,6 @@ static int cell_reg_setup(struct op_counter_config *ctr,
589 __func__); 772 __func__);
590 return -EIO; 773 return -EIO;
591 } 774 }
592 pm_regs.group_control = 0;
593 pm_regs.debug_bus_control = 0;
594
595 /* setup the pm_control register */
596 memset(&pm_regs.pm_cntrl, 0, sizeof(struct pm_cntrl));
597 pm_regs.pm_cntrl.stop_at_max = 1;
598 pm_regs.pm_cntrl.trace_mode = 0;
599 pm_regs.pm_cntrl.freeze = 1;
600 775
601 set_count_mode(sys->enable_kernel, sys->enable_user); 776 set_count_mode(sys->enable_kernel, sys->enable_user);
602 777
@@ -665,6 +840,63 @@ static int cell_reg_setup(struct op_counter_config *ctr,
665} 840}
666 841
667 842
843/* This function is called once for all cpus combined */
844static int cell_reg_setup(struct op_counter_config *ctr,
845 struct op_system_config *sys, int num_ctrs)
846{
847 int ret=0;
848 spu_cycle_reset = 0;
849
850 /* initialize the spu_arr_trace value, will be reset if
851 * doing spu event profiling.
852 */
853 pm_regs.group_control = 0;
854 pm_regs.debug_bus_control = 0;
855 pm_regs.pm_cntrl.stop_at_max = 1;
856 pm_regs.pm_cntrl.trace_mode = 0;
857 pm_regs.pm_cntrl.freeze = 1;
858 pm_regs.pm_cntrl.trace_buf_ovflw = 0;
859 pm_regs.pm_cntrl.spu_addr_trace = 0;
860
861 /*
862 * For all events except PPU CYCLEs, each node will need to make
863 * the rtas cbe-perftools call to setup and reset the debug bus.
864 * Make the token lookup call once and store it in the global
865 * variable pm_rtas_token.
866 */
867 pm_rtas_token = rtas_token("ibm,cbe-perftools");
868
869 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
870 printk(KERN_ERR
871 "%s: rtas token ibm,cbe-perftools unknown\n",
872 __func__);
873 return -EIO;
874 }
875
876 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
877 profiling_mode = SPU_PROFILING_CYCLES;
878 ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
879 } else if ((ctr[0].event >= SPU_EVENT_NUM_START) &&
880 (ctr[0].event <= SPU_EVENT_NUM_STOP)) {
881 profiling_mode = SPU_PROFILING_EVENTS;
882 spu_cycle_reset = ctr[0].count;
883
884 /* for SPU event profiling, need to setup the
885 * pm_signal array with the events to route the
886 * SPU PC before making the FW call. Note, only
887 * one SPU event for profiling can be specified
888 * at a time.
889 */
890 cell_reg_setup_spu_events(ctr, sys, num_ctrs);
891 } else {
892 profiling_mode = PPU_PROFILING;
893 ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
894 }
895
896 return ret;
897}
898
899
668 900
669/* This function is called once for each cpu */ 901/* This function is called once for each cpu */
670static int cell_cpu_setup(struct op_counter_config *cntr) 902static int cell_cpu_setup(struct op_counter_config *cntr)
@@ -672,8 +904,13 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
672 u32 cpu = smp_processor_id(); 904 u32 cpu = smp_processor_id();
673 u32 num_enabled = 0; 905 u32 num_enabled = 0;
674 int i; 906 int i;
907 int ret;
675 908
676 if (spu_cycle_reset) 909 /* Cycle based SPU profiling does not use the performance
910 * counters. The trace array is configured to collect
911 * the data.
912 */
913 if (profiling_mode == SPU_PROFILING_CYCLES)
677 return 0; 914 return 0;
678 915
679 /* There is one performance monitor per processor chip (i.e. node), 916 /* There is one performance monitor per processor chip (i.e. node),
@@ -686,7 +923,6 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
686 cbe_disable_pm(cpu); 923 cbe_disable_pm(cpu);
687 cbe_disable_pm_interrupts(cpu); 924 cbe_disable_pm_interrupts(cpu);
688 925
689 cbe_write_pm(cpu, pm_interval, 0);
690 cbe_write_pm(cpu, pm_start_stop, 0); 926 cbe_write_pm(cpu, pm_start_stop, 0);
691 cbe_write_pm(cpu, group_control, pm_regs.group_control); 927 cbe_write_pm(cpu, group_control, pm_regs.group_control);
692 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); 928 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
@@ -703,7 +939,20 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
703 * The pm_rtas_activate_signals will return -EIO if the FW 939 * The pm_rtas_activate_signals will return -EIO if the FW
704 * call failed. 940 * call failed.
705 */ 941 */
706 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 942 if (profiling_mode == SPU_PROFILING_EVENTS) {
943 /* For SPU event profiling also need to setup the
944 * pm interval timer
945 */
946 ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
947 num_enabled+2);
948 /* store PC from debug bus to Trace buffer as often
949 * as possible (every 10 cycles)
950 */
951 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
952 return ret;
953 } else
954 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
955 num_enabled);
707} 956}
708 957
709#define ENTRIES 303 958#define ENTRIES 303
@@ -885,7 +1134,122 @@ static struct notifier_block cpu_freq_notifier_block = {
885}; 1134};
886#endif 1135#endif
887 1136
888static int cell_global_start_spu(struct op_counter_config *ctr) 1137/*
1138 * Note the generic OProfile stop calls do not support returning
1139 * an error on stop. Hence, will not return an error if the FW
1140 * calls fail on stop. Failure to reset the debug bus is not an issue.
1141 * Failure to disable the SPU profiling is not an issue. The FW calls
1142 * to enable the performance counters and debug bus will work even if
1143 * the hardware was not cleanly reset.
1144 */
1145static void cell_global_stop_spu_cycles(void)
1146{
1147 int subfunc, rtn_value;
1148 unsigned int lfsr_value;
1149 int cpu;
1150
1151 oprofile_running = 0;
1152 smp_wmb();
1153
1154#ifdef CONFIG_CPU_FREQ
1155 cpufreq_unregister_notifier(&cpu_freq_notifier_block,
1156 CPUFREQ_TRANSITION_NOTIFIER);
1157#endif
1158
1159 for_each_online_cpu(cpu) {
1160 if (cbe_get_hw_thread_id(cpu))
1161 continue;
1162
1163 subfunc = 3; /*
1164 * 2 - activate SPU tracing,
1165 * 3 - deactivate
1166 */
1167 lfsr_value = 0x8f100000;
1168
1169 rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
1170 subfunc, cbe_cpu_to_node(cpu),
1171 lfsr_value);
1172
1173 if (unlikely(rtn_value != 0)) {
1174 printk(KERN_ERR
1175 "%s: rtas call ibm,cbe-spu-perftools " \
1176 "failed, return = %d\n",
1177 __func__, rtn_value);
1178 }
1179
1180 /* Deactivate the signals */
1181 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1182 }
1183
1184 stop_spu_profiling_cycles();
1185}
1186
1187static void cell_global_stop_spu_events(void)
1188{
1189 int cpu;
1190 oprofile_running = 0;
1191
1192 stop_spu_profiling_events();
1193 smp_wmb();
1194
1195 for_each_online_cpu(cpu) {
1196 if (cbe_get_hw_thread_id(cpu))
1197 continue;
1198
1199 cbe_sync_irq(cbe_cpu_to_node(cpu));
1200 /* Stop the counters */
1201 cbe_disable_pm(cpu);
1202 cbe_write_pm07_control(cpu, 0, 0);
1203
1204 /* Deactivate the signals */
1205 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1206
1207 /* Deactivate interrupts */
1208 cbe_disable_pm_interrupts(cpu);
1209 }
1210 del_timer_sync(&timer_spu_event_swap);
1211}
1212
1213static void cell_global_stop_ppu(void)
1214{
1215 int cpu;
1216
1217 /*
1218 * This routine will be called once for the system.
1219 * There is one performance monitor per node, so we
1220 * only need to perform this function once per node.
1221 */
1222 del_timer_sync(&timer_virt_cntr);
1223 oprofile_running = 0;
1224 smp_wmb();
1225
1226 for_each_online_cpu(cpu) {
1227 if (cbe_get_hw_thread_id(cpu))
1228 continue;
1229
1230 cbe_sync_irq(cbe_cpu_to_node(cpu));
1231 /* Stop the counters */
1232 cbe_disable_pm(cpu);
1233
1234 /* Deactivate the signals */
1235 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1236
1237 /* Deactivate interrupts */
1238 cbe_disable_pm_interrupts(cpu);
1239 }
1240}
1241
1242static void cell_global_stop(void)
1243{
1244 if (profiling_mode == PPU_PROFILING)
1245 cell_global_stop_ppu();
1246 else if (profiling_mode == SPU_PROFILING_EVENTS)
1247 cell_global_stop_spu_events();
1248 else
1249 cell_global_stop_spu_cycles();
1250}
1251
1252static int cell_global_start_spu_cycles(struct op_counter_config *ctr)
889{ 1253{
890 int subfunc; 1254 int subfunc;
891 unsigned int lfsr_value; 1255 unsigned int lfsr_value;
@@ -951,18 +1315,18 @@ static int cell_global_start_spu(struct op_counter_config *ctr)
951 1315
952 /* start profiling */ 1316 /* start profiling */
953 ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, 1317 ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
954 cbe_cpu_to_node(cpu), lfsr_value); 1318 cbe_cpu_to_node(cpu), lfsr_value);
955 1319
956 if (unlikely(ret != 0)) { 1320 if (unlikely(ret != 0)) {
957 printk(KERN_ERR 1321 printk(KERN_ERR
958 "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", 1322 "%s: rtas call ibm,cbe-spu-perftools failed, " \
959 __func__, ret); 1323 "return = %d\n", __func__, ret);
960 rtas_error = -EIO; 1324 rtas_error = -EIO;
961 goto out; 1325 goto out;
962 } 1326 }
963 } 1327 }
964 1328
965 rtas_error = start_spu_profiling(spu_cycle_reset); 1329 rtas_error = start_spu_profiling_cycles(spu_cycle_reset);
966 if (rtas_error) 1330 if (rtas_error)
967 goto out_stop; 1331 goto out_stop;
968 1332
@@ -970,11 +1334,74 @@ static int cell_global_start_spu(struct op_counter_config *ctr)
970 return 0; 1334 return 0;
971 1335
972out_stop: 1336out_stop:
973 cell_global_stop_spu(); /* clean up the PMU/debug bus */ 1337 cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */
974out: 1338out:
975 return rtas_error; 1339 return rtas_error;
976} 1340}
977 1341
1342static int cell_global_start_spu_events(struct op_counter_config *ctr)
1343{
1344 int cpu;
1345 u32 interrupt_mask = 0;
1346 int rtn = 0;
1347
1348 hdw_thread = 0;
1349
1350 /* spu event profiling, uses the performance counters to generate
1351 * an interrupt. The hardware is setup to store the SPU program
1352 * counter into the trace array. The occurrence mode is used to
1353 * enable storing data to the trace buffer. The bits are set
1354 * to send/store the SPU address in the trace buffer. The debug
1355 * bus must be setup to route the SPU program counter onto the
1356 * debug bus. The occurrence data in the trace buffer is not used.
1357 */
1358
1359 /* This routine gets called once for the system.
1360 * There is one performance monitor per node, so we
1361 * only need to perform this function once per node.
1362 */
1363
1364 for_each_online_cpu(cpu) {
1365 if (cbe_get_hw_thread_id(cpu))
1366 continue;
1367
1368 /*
1369 * Setup SPU event-based profiling.
1370 * Set perf_mon_control bit 0 to a zero before
1371 * enabling spu collection hardware.
1372 *
1373 * Only support one SPU event on one SPU per node.
1374 */
1375 if (ctr_enabled & 1) {
1376 cbe_write_ctr(cpu, 0, reset_value[0]);
1377 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
1378 interrupt_mask |=
1379 CBE_PM_CTR_OVERFLOW_INTR(0);
1380 } else {
1381 /* Disable counter */
1382 cbe_write_pm07_control(cpu, 0, 0);
1383 }
1384
1385 cbe_get_and_clear_pm_interrupts(cpu);
1386 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
1387 cbe_enable_pm(cpu);
1388
1389 /* clear the trace buffer */
1390 cbe_write_pm(cpu, trace_address, 0);
1391 }
1392
1393 /* Start the timer to time slice collecting the event profile
1394 * on each of the SPUs. Note, can collect profile on one SPU
1395 * per node at a time.
1396 */
1397 start_spu_event_swap();
1398 start_spu_profiling_events();
1399 oprofile_running = 1;
1400 smp_wmb();
1401
1402 return rtn;
1403}
1404
978static int cell_global_start_ppu(struct op_counter_config *ctr) 1405static int cell_global_start_ppu(struct op_counter_config *ctr)
979{ 1406{
980 u32 cpu, i; 1407 u32 cpu, i;
@@ -994,8 +1421,7 @@ static int cell_global_start_ppu(struct op_counter_config *ctr)
994 if (ctr_enabled & (1 << i)) { 1421 if (ctr_enabled & (1 << i)) {
995 cbe_write_ctr(cpu, i, reset_value[i]); 1422 cbe_write_ctr(cpu, i, reset_value[i]);
996 enable_ctr(cpu, i, pm_regs.pm07_cntrl); 1423 enable_ctr(cpu, i, pm_regs.pm07_cntrl);
997 interrupt_mask |= 1424 interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i);
998 CBE_PM_CTR_OVERFLOW_INTR(i);
999 } else { 1425 } else {
1000 /* Disable counter */ 1426 /* Disable counter */
1001 cbe_write_pm07_control(cpu, i, 0); 1427 cbe_write_pm07_control(cpu, i, 0);
@@ -1024,99 +1450,162 @@ static int cell_global_start_ppu(struct op_counter_config *ctr)
1024 1450
1025static int cell_global_start(struct op_counter_config *ctr) 1451static int cell_global_start(struct op_counter_config *ctr)
1026{ 1452{
1027 if (spu_cycle_reset) 1453 if (profiling_mode == SPU_PROFILING_CYCLES)
1028 return cell_global_start_spu(ctr); 1454 return cell_global_start_spu_cycles(ctr);
1455 else if (profiling_mode == SPU_PROFILING_EVENTS)
1456 return cell_global_start_spu_events(ctr);
1029 else 1457 else
1030 return cell_global_start_ppu(ctr); 1458 return cell_global_start_ppu(ctr);
1031} 1459}
1032 1460
1033/* 1461
1034 * Note the generic OProfile stop calls do not support returning 1462/* The SPU interrupt handler
1035 * an error on stop. Hence, will not return an error if the FW 1463 *
1036 * calls fail on stop. Failure to reset the debug bus is not an issue. 1464 * SPU event profiling works as follows:
1037 * Failure to disable the SPU profiling is not an issue. The FW calls 1465 * The pm_signal[0] holds the one SPU event to be measured. It is routed on
1038 * to enable the performance counters and debug bus will work even if 1466 * the debug bus using word 0 or 1. The value of pm_signal[1] and
1039 * the hardware was not cleanly reset. 1467 * pm_signal[2] contain the necessary events to route the SPU program
1468 * counter for the selected SPU onto the debug bus using words 2 and 3.
1469 * The pm_interval register is setup to write the SPU PC value into the
1470 * trace buffer at the maximum rate possible. The trace buffer is configured
1471 * to store the PCs, wrapping when it is full. The performance counter is
1472 * intialized to the max hardware count minus the number of events, N, between
1473 * samples. Once the N events have occured, a HW counter overflow occurs
1474 * causing the generation of a HW counter interrupt which also stops the
1475 * writing of the SPU PC values to the trace buffer. Hence the last PC
1476 * written to the trace buffer is the SPU PC that we want. Unfortunately,
1477 * we have to read from the beginning of the trace buffer to get to the
1478 * last value written. We just hope the PPU has nothing better to do then
1479 * service this interrupt. The PC for the specific SPU being profiled is
1480 * extracted from the trace buffer processed and stored. The trace buffer
1481 * is cleared, interrupts are cleared, the counter is reset to max - N.
1482 * A kernel timer is used to periodically call the routine spu_evnt_swap()
1483 * to switch to the next physical SPU in the node to profile in round robbin
1484 * order. This way data is collected for all SPUs on the node. It does mean
1485 * that we need to use a relatively small value of N to ensure enough samples
1486 * on each SPU are collected each SPU is being profiled 1/8 of the time.
1487 * It may also be necessary to use a longer sample collection period.
1040 */ 1488 */
1041static void cell_global_stop_spu(void) 1489static void cell_handle_interrupt_spu(struct pt_regs *regs,
1490 struct op_counter_config *ctr)
1042{ 1491{
1043 int subfunc, rtn_value; 1492 u32 cpu, cpu_tmp;
1044 unsigned int lfsr_value; 1493 u64 trace_entry;
1045 int cpu; 1494 u32 interrupt_mask;
1495 u64 trace_buffer[2];
1496 u64 last_trace_buffer;
1497 u32 sample;
1498 u32 trace_addr;
1499 unsigned long sample_array_lock_flags;
1500 int spu_num;
1501 unsigned long flags;
1046 1502
1047 oprofile_running = 0; 1503 /* Make sure spu event interrupt handler and spu event swap
1504 * don't access the counters simultaneously.
1505 */
1506 cpu = smp_processor_id();
1507 spin_lock_irqsave(&cntr_lock, flags);
1048 1508
1049#ifdef CONFIG_CPU_FREQ 1509 cpu_tmp = cpu;
1050 cpufreq_unregister_notifier(&cpu_freq_notifier_block, 1510 cbe_disable_pm(cpu);
1051 CPUFREQ_TRANSITION_NOTIFIER);
1052#endif
1053 1511
1054 for_each_online_cpu(cpu) { 1512 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
1055 if (cbe_get_hw_thread_id(cpu))
1056 continue;
1057 1513
1058 subfunc = 3; /* 1514 sample = 0xABCDEF;
1059 * 2 - activate SPU tracing, 1515 trace_entry = 0xfedcba;
1060 * 3 - deactivate 1516 last_trace_buffer = 0xdeadbeaf;
1061 */
1062 lfsr_value = 0x8f100000;
1063 1517
1064 rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, 1518 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1065 subfunc, cbe_cpu_to_node(cpu), 1519 /* disable writes to trace buff */
1066 lfsr_value); 1520 cbe_write_pm(cpu, pm_interval, 0);
1067 1521
1068 if (unlikely(rtn_value != 0)) { 1522 /* only have one perf cntr being used, cntr 0 */
1069 printk(KERN_ERR 1523 if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0))
1070 "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", 1524 && ctr[0].enabled)
1071 __func__, rtn_value); 1525 /* The SPU PC values will be read
1526 * from the trace buffer, reset counter
1527 */
1528
1529 cbe_write_ctr(cpu, 0, reset_value[0]);
1530
1531 trace_addr = cbe_read_pm(cpu, trace_address);
1532
1533 while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
1534 /* There is data in the trace buffer to process
1535 * Read the buffer until you get to the last
1536 * entry. This is the value we want.
1537 */
1538
1539 cbe_read_trace_buffer(cpu, trace_buffer);
1540 trace_addr = cbe_read_pm(cpu, trace_address);
1072 } 1541 }
1073 1542
1074 /* Deactivate the signals */ 1543 /* SPU Address 16 bit count format for 128 bit
1075 pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 1544 * HW trace buffer is used for the SPU PC storage
1076 } 1545 * HDR bits 0:15
1546 * SPU Addr 0 bits 16:31
1547 * SPU Addr 1 bits 32:47
1548 * unused bits 48:127
1549 *
1550 * HDR: bit4 = 1 SPU Address 0 valid
1551 * HDR: bit5 = 1 SPU Address 1 valid
1552 * - unfortunately, the valid bits don't seem to work
1553 *
1554 * Note trace_buffer[0] holds bits 0:63 of the HW
1555 * trace buffer, trace_buffer[1] holds bits 64:127
1556 */
1077 1557
1078 stop_spu_profiling(); 1558 trace_entry = trace_buffer[0]
1079} 1559 & 0x00000000FFFF0000;
1080 1560
1081static void cell_global_stop_ppu(void) 1561 /* only top 16 of the 18 bit SPU PC address
1082{ 1562 * is stored in trace buffer, hence shift right
1083 int cpu; 1563 * by 16 -2 bits */
1564 sample = trace_entry >> 14;
1565 last_trace_buffer = trace_buffer[0];
1084 1566
1085 /* 1567 spu_num = spu_evnt_phys_spu_indx
1086 * This routine will be called once for the system. 1568 + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE);
1087 * There is one performance monitor per node, so we
1088 * only need to perform this function once per node.
1089 */
1090 del_timer_sync(&timer_virt_cntr);
1091 oprofile_running = 0;
1092 smp_wmb();
1093 1569
1094 for_each_online_cpu(cpu) { 1570 /* make sure only one process at a time is calling
1095 if (cbe_get_hw_thread_id(cpu)) 1571 * spu_sync_buffer()
1096 continue; 1572 */
1573 spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
1574 sample_array_lock_flags);
1575 spu_sync_buffer(spu_num, &sample, 1);
1576 spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
1577 sample_array_lock_flags);
1097 1578
1098 cbe_sync_irq(cbe_cpu_to_node(cpu)); 1579 smp_wmb(); /* insure spu event buffer updates are written
1099 /* Stop the counters */ 1580 * don't want events intermingled... */
1100 cbe_disable_pm(cpu);
1101 1581
1102 /* Deactivate the signals */ 1582 /* The counters were frozen by the interrupt.
1103 pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 1583 * Reenable the interrupt and restart the counters.
1584 */
1585 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1586 cbe_enable_pm_interrupts(cpu, hdw_thread,
1587 virt_cntr_inter_mask);
1104 1588
1105 /* Deactivate interrupts */ 1589 /* clear the trace buffer, re-enable writes to trace buff */
1106 cbe_disable_pm_interrupts(cpu); 1590 cbe_write_pm(cpu, trace_address, 0);
1107 } 1591 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1108}
1109 1592
1110static void cell_global_stop(void) 1593 /* The writes to the various performance counters only writes
1111{ 1594 * to a latch. The new values (interrupt setting bits, reset
1112 if (spu_cycle_reset) 1595 * counter value etc.) are not copied to the actual registers
1113 cell_global_stop_spu(); 1596 * until the performance monitor is enabled. In order to get
1114 else 1597 * this to work as desired, the permormance monitor needs to
1115 cell_global_stop_ppu(); 1598 * be disabled while writing to the latches. This is a
1599 * HW design issue.
1600 */
1601 write_pm_cntrl(cpu);
1602 cbe_enable_pm(cpu);
1603 }
1604 spin_unlock_irqrestore(&cntr_lock, flags);
1116} 1605}
1117 1606
1118static void cell_handle_interrupt(struct pt_regs *regs, 1607static void cell_handle_interrupt_ppu(struct pt_regs *regs,
1119 struct op_counter_config *ctr) 1608 struct op_counter_config *ctr)
1120{ 1609{
1121 u32 cpu; 1610 u32 cpu;
1122 u64 pc; 1611 u64 pc;
@@ -1132,7 +1621,7 @@ static void cell_handle_interrupt(struct pt_regs *regs,
1132 * routine are not running at the same time. See the 1621 * routine are not running at the same time. See the
1133 * cell_virtual_cntr() routine for additional comments. 1622 * cell_virtual_cntr() routine for additional comments.
1134 */ 1623 */
1135 spin_lock_irqsave(&virt_cntr_lock, flags); 1624 spin_lock_irqsave(&cntr_lock, flags);
1136 1625
1137 /* 1626 /*
1138 * Need to disable and reenable the performance counters 1627 * Need to disable and reenable the performance counters
@@ -1185,7 +1674,16 @@ static void cell_handle_interrupt(struct pt_regs *regs,
1185 */ 1674 */
1186 cbe_enable_pm(cpu); 1675 cbe_enable_pm(cpu);
1187 } 1676 }
1188 spin_unlock_irqrestore(&virt_cntr_lock, flags); 1677 spin_unlock_irqrestore(&cntr_lock, flags);
1678}
1679
1680static void cell_handle_interrupt(struct pt_regs *regs,
1681 struct op_counter_config *ctr)
1682{
1683 if (profiling_mode == PPU_PROFILING)
1684 cell_handle_interrupt_ppu(regs, ctr);
1685 else
1686 cell_handle_interrupt_spu(regs, ctr);
1189} 1687}
1190 1688
1191/* 1689/*
@@ -1195,7 +1693,8 @@ static void cell_handle_interrupt(struct pt_regs *regs,
1195 */ 1693 */
1196static int cell_sync_start(void) 1694static int cell_sync_start(void)
1197{ 1695{
1198 if (spu_cycle_reset) 1696 if ((profiling_mode == SPU_PROFILING_CYCLES) ||
1697 (profiling_mode == SPU_PROFILING_EVENTS))
1199 return spu_sync_start(); 1698 return spu_sync_start();
1200 else 1699 else
1201 return DO_GENERIC_SYNC; 1700 return DO_GENERIC_SYNC;
@@ -1203,7 +1702,8 @@ static int cell_sync_start(void)
1203 1702
1204static int cell_sync_stop(void) 1703static int cell_sync_stop(void)
1205{ 1704{
1206 if (spu_cycle_reset) 1705 if ((profiling_mode == SPU_PROFILING_CYCLES) ||
1706 (profiling_mode == SPU_PROFILING_EVENTS))
1207 return spu_sync_stop(); 1707 return spu_sync_stop();
1208 else 1708 else
1209 return 1; 1709 return 1;
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 9fa9dcdf344b..e02a359d2aa5 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -300,7 +300,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
300 return oldbit; 300 return oldbit;
301} 301}
302 302
303static inline int constant_test_bit(int nr, const volatile unsigned long *addr) 303static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
304{ 304{
305 return ((1UL << (nr % BITS_PER_LONG)) & 305 return ((1UL << (nr % BITS_PER_LONG)) &
306 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; 306 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 29dc0c89d4af..d37593c2f438 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -47,7 +47,7 @@
47#endif 47#endif
48 48
49static int __initdata acpi_force = 0; 49static int __initdata acpi_force = 0;
50 50u32 acpi_rsdt_forced;
51#ifdef CONFIG_ACPI 51#ifdef CONFIG_ACPI
52int acpi_disabled = 0; 52int acpi_disabled = 0;
53#else 53#else
@@ -1374,6 +1374,17 @@ static void __init acpi_process_madt(void)
1374 "Invalid BIOS MADT, disabling ACPI\n"); 1374 "Invalid BIOS MADT, disabling ACPI\n");
1375 disable_acpi(); 1375 disable_acpi();
1376 } 1376 }
1377 } else {
1378 /*
1379 * ACPI found no MADT, and so ACPI wants UP PIC mode.
1380 * In the event an MPS table was found, forget it.
1381 * Boot with "acpi=off" to use MPS on such a system.
1382 */
1383 if (smp_found_config) {
1384 printk(KERN_WARNING PREFIX
1385 "No APIC-table, disabling MPS\n");
1386 smp_found_config = 0;
1387 }
1377 } 1388 }
1378 1389
1379 /* 1390 /*
@@ -1809,6 +1820,10 @@ static int __init parse_acpi(char *arg)
1809 disable_acpi(); 1820 disable_acpi();
1810 acpi_ht = 1; 1821 acpi_ht = 1;
1811 } 1822 }
1823 /* acpi=rsdt use RSDT instead of XSDT */
1824 else if (strcmp(arg, "rsdt") == 0) {
1825 acpi_rsdt_forced = 1;
1826 }
1812 /* "acpi=noirq" disables ACPI interrupt routing */ 1827 /* "acpi=noirq" disables ACPI interrupt routing */
1813 else if (strcmp(arg, "noirq") == 0) { 1828 else if (strcmp(arg, "noirq") == 0) {
1814 acpi_noirq_set(); 1829 acpi_noirq_set();
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index c2502eb9aa83..a4805b3b4095 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -56,6 +56,7 @@ static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */
56static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; 56static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
57 57
58#define MWAIT_SUBSTATE_MASK (0xf) 58#define MWAIT_SUBSTATE_MASK (0xf)
59#define MWAIT_CSTATE_MASK (0xf)
59#define MWAIT_SUBSTATE_SIZE (4) 60#define MWAIT_SUBSTATE_SIZE (4)
60 61
61#define CPUID_MWAIT_LEAF (5) 62#define CPUID_MWAIT_LEAF (5)
@@ -98,7 +99,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
98 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); 99 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
99 100
100 /* Check whether this particular cx_type (in CST) is supported or not */ 101 /* Check whether this particular cx_type (in CST) is supported or not */
101 cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1; 102 cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) &
103 MWAIT_CSTATE_MASK) + 1;
102 edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); 104 edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
103 num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; 105 num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
104 106
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 806b4e9051b4..707c1f6f95fa 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -159,6 +159,8 @@ static int __init acpi_sleep_setup(char *str)
159#endif 159#endif
160 if (strncmp(str, "old_ordering", 12) == 0) 160 if (strncmp(str, "old_ordering", 12) == 0)
161 acpi_old_suspend_ordering(); 161 acpi_old_suspend_ordering();
162 if (strncmp(str, "s4_nonvs", 8) == 0)
163 acpi_s4_no_nvs();
162 str = strchr(str, ','); 164 str = strchr(str, ',');
163 if (str != NULL) 165 if (str != NULL)
164 str += strspn(str, ", \t"); 166 str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 65a13943e098..e85826829cf2 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -665,6 +665,27 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
665} 665}
666#endif 666#endif
667 667
668#ifdef CONFIG_HIBERNATION
669/**
670 * Mark ACPI NVS memory region, so that we can save/restore it during
671 * hibernation and the subsequent resume.
672 */
673static int __init e820_mark_nvs_memory(void)
674{
675 int i;
676
677 for (i = 0; i < e820.nr_map; i++) {
678 struct e820entry *ei = &e820.map[i];
679
680 if (ei->type == E820_NVS)
681 hibernate_nvs_register(ei->addr, ei->size);
682 }
683
684 return 0;
685}
686core_initcall(e820_mark_nvs_memory);
687#endif
688
668/* 689/*
669 * Early reserved memory areas. 690 * Early reserved memory areas.
670 */ 691 */
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 744aa7fc49d5..76b8cd953dee 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -201,6 +201,12 @@ struct chipset {
201 void (*f)(int num, int slot, int func); 201 void (*f)(int num, int slot, int func);
202}; 202};
203 203
204/*
205 * Only works for devices on the root bus. If you add any devices
206 * not on bus 0 readd another loop level in early_quirks(). But
207 * be careful because at least the Nvidia quirk here relies on
208 * only matching on bus 0.
209 */
204static struct chipset early_qrk[] __initdata = { 210static struct chipset early_qrk[] __initdata = {
205 { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, 211 { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
206 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, 212 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
@@ -267,17 +273,17 @@ static int __init check_dev_quirk(int num, int slot, int func)
267 273
268void __init early_quirks(void) 274void __init early_quirks(void)
269{ 275{
270 int num, slot, func; 276 int slot, func;
271 277
272 if (!early_pci_allowed()) 278 if (!early_pci_allowed())
273 return; 279 return;
274 280
275 /* Poor man's PCI discovery */ 281 /* Poor man's PCI discovery */
276 for (num = 0; num < 32; num++) 282 /* Only scan the root bus */
277 for (slot = 0; slot < 32; slot++) 283 for (slot = 0; slot < 32; slot++)
278 for (func = 0; func < 8; func++) { 284 for (func = 0; func < 8; func++) {
279 /* Only probe function 0 on single fn devices */ 285 /* Only probe function 0 on single fn devices */
280 if (check_dev_quirk(num, slot, func)) 286 if (check_dev_quirk(0, slot, func))
281 break; 287 break;
282 } 288 }
283} 289}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 98658f25f542..8fdf06e4edf9 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -2,7 +2,7 @@
2 * @file op_model_amd.c 2 * @file op_model_amd.c
3 * athlon / K7 / K8 / Family 10h model-specific MSR operations 3 * athlon / K7 / K8 / Family 10h model-specific MSR operations
4 * 4 *
5 * @remark Copyright 2002-2008 OProfile authors 5 * @remark Copyright 2002-2009 OProfile authors
6 * @remark Read the file COPYING 6 * @remark Read the file COPYING
7 * 7 *
8 * @author John Levon 8 * @author John Levon
@@ -10,7 +10,7 @@
10 * @author Graydon Hoare 10 * @author Graydon Hoare
11 * @author Robert Richter <robert.richter@amd.com> 11 * @author Robert Richter <robert.richter@amd.com>
12 * @author Barry Kasindorf 12 * @author Barry Kasindorf
13*/ 13 */
14 14
15#include <linux/oprofile.h> 15#include <linux/oprofile.h>
16#include <linux/device.h> 16#include <linux/device.h>
@@ -60,53 +60,10 @@ static unsigned long reset_value[NUM_COUNTERS];
60#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ 60#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */
61#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ 61#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */
62 62
63/* Codes used in cpu_buffer.c */ 63#define IBS_FETCH_SIZE 6
64/* This produces duplicate code, need to be fixed */ 64#define IBS_OP_SIZE 12
65#define IBS_FETCH_BEGIN 3
66#define IBS_OP_BEGIN 4
67
68/*
69 * The function interface needs to be fixed, something like add
70 * data. Should then be added to linux/oprofile.h.
71 */
72extern void
73oprofile_add_ibs_sample(struct pt_regs * const regs,
74 unsigned int * const ibs_sample, int ibs_code);
75
76struct ibs_fetch_sample {
77 /* MSRC001_1031 IBS Fetch Linear Address Register */
78 unsigned int ibs_fetch_lin_addr_low;
79 unsigned int ibs_fetch_lin_addr_high;
80 /* MSRC001_1030 IBS Fetch Control Register */
81 unsigned int ibs_fetch_ctl_low;
82 unsigned int ibs_fetch_ctl_high;
83 /* MSRC001_1032 IBS Fetch Physical Address Register */
84 unsigned int ibs_fetch_phys_addr_low;
85 unsigned int ibs_fetch_phys_addr_high;
86};
87
88struct ibs_op_sample {
89 /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */
90 unsigned int ibs_op_rip_low;
91 unsigned int ibs_op_rip_high;
92 /* MSRC001_1035 IBS Op Data Register */
93 unsigned int ibs_op_data1_low;
94 unsigned int ibs_op_data1_high;
95 /* MSRC001_1036 IBS Op Data 2 Register */
96 unsigned int ibs_op_data2_low;
97 unsigned int ibs_op_data2_high;
98 /* MSRC001_1037 IBS Op Data 3 Register */
99 unsigned int ibs_op_data3_low;
100 unsigned int ibs_op_data3_high;
101 /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
102 unsigned int ibs_dc_linear_low;
103 unsigned int ibs_dc_linear_high;
104 /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
105 unsigned int ibs_dc_phys_low;
106 unsigned int ibs_dc_phys_high;
107};
108 65
109static int ibs_allowed; /* AMD Family10h and later */ 66static int has_ibs; /* AMD Family10h and later */
110 67
111struct op_ibs_config { 68struct op_ibs_config {
112 unsigned long op_enabled; 69 unsigned long op_enabled;
@@ -197,31 +154,29 @@ static inline int
197op_amd_handle_ibs(struct pt_regs * const regs, 154op_amd_handle_ibs(struct pt_regs * const regs,
198 struct op_msrs const * const msrs) 155 struct op_msrs const * const msrs)
199{ 156{
200 unsigned int low, high; 157 u32 low, high;
201 struct ibs_fetch_sample ibs_fetch; 158 u64 msr;
202 struct ibs_op_sample ibs_op; 159 struct op_entry entry;
203 160
204 if (!ibs_allowed) 161 if (!has_ibs)
205 return 1; 162 return 1;
206 163
207 if (ibs_config.fetch_enabled) { 164 if (ibs_config.fetch_enabled) {
208 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); 165 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
209 if (high & IBS_FETCH_HIGH_VALID_BIT) { 166 if (high & IBS_FETCH_HIGH_VALID_BIT) {
210 ibs_fetch.ibs_fetch_ctl_high = high; 167 rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
211 ibs_fetch.ibs_fetch_ctl_low = low; 168 oprofile_write_reserve(&entry, regs, msr,
212 rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); 169 IBS_FETCH_CODE, IBS_FETCH_SIZE);
213 ibs_fetch.ibs_fetch_lin_addr_high = high; 170 oprofile_add_data(&entry, (u32)msr);
214 ibs_fetch.ibs_fetch_lin_addr_low = low; 171 oprofile_add_data(&entry, (u32)(msr >> 32));
215 rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); 172 oprofile_add_data(&entry, low);
216 ibs_fetch.ibs_fetch_phys_addr_high = high; 173 oprofile_add_data(&entry, high);
217 ibs_fetch.ibs_fetch_phys_addr_low = low; 174 rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
218 175 oprofile_add_data(&entry, (u32)msr);
219 oprofile_add_ibs_sample(regs, 176 oprofile_add_data(&entry, (u32)(msr >> 32));
220 (unsigned int *)&ibs_fetch, 177 oprofile_write_commit(&entry);
221 IBS_FETCH_BEGIN);
222 178
223 /* reenable the IRQ */ 179 /* reenable the IRQ */
224 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
225 high &= ~IBS_FETCH_HIGH_VALID_BIT; 180 high &= ~IBS_FETCH_HIGH_VALID_BIT;
226 high |= IBS_FETCH_HIGH_ENABLE; 181 high |= IBS_FETCH_HIGH_ENABLE;
227 low &= IBS_FETCH_LOW_MAX_CNT_MASK; 182 low &= IBS_FETCH_LOW_MAX_CNT_MASK;
@@ -232,30 +187,29 @@ op_amd_handle_ibs(struct pt_regs * const regs,
232 if (ibs_config.op_enabled) { 187 if (ibs_config.op_enabled) {
233 rdmsr(MSR_AMD64_IBSOPCTL, low, high); 188 rdmsr(MSR_AMD64_IBSOPCTL, low, high);
234 if (low & IBS_OP_LOW_VALID_BIT) { 189 if (low & IBS_OP_LOW_VALID_BIT) {
235 rdmsr(MSR_AMD64_IBSOPRIP, low, high); 190 rdmsrl(MSR_AMD64_IBSOPRIP, msr);
236 ibs_op.ibs_op_rip_low = low; 191 oprofile_write_reserve(&entry, regs, msr,
237 ibs_op.ibs_op_rip_high = high; 192 IBS_OP_CODE, IBS_OP_SIZE);
238 rdmsr(MSR_AMD64_IBSOPDATA, low, high); 193 oprofile_add_data(&entry, (u32)msr);
239 ibs_op.ibs_op_data1_low = low; 194 oprofile_add_data(&entry, (u32)(msr >> 32));
240 ibs_op.ibs_op_data1_high = high; 195 rdmsrl(MSR_AMD64_IBSOPDATA, msr);
241 rdmsr(MSR_AMD64_IBSOPDATA2, low, high); 196 oprofile_add_data(&entry, (u32)msr);
242 ibs_op.ibs_op_data2_low = low; 197 oprofile_add_data(&entry, (u32)(msr >> 32));
243 ibs_op.ibs_op_data2_high = high; 198 rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
244 rdmsr(MSR_AMD64_IBSOPDATA3, low, high); 199 oprofile_add_data(&entry, (u32)msr);
245 ibs_op.ibs_op_data3_low = low; 200 oprofile_add_data(&entry, (u32)(msr >> 32));
246 ibs_op.ibs_op_data3_high = high; 201 rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
247 rdmsr(MSR_AMD64_IBSDCLINAD, low, high); 202 oprofile_add_data(&entry, (u32)msr);
248 ibs_op.ibs_dc_linear_low = low; 203 oprofile_add_data(&entry, (u32)(msr >> 32));
249 ibs_op.ibs_dc_linear_high = high; 204 rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
250 rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); 205 oprofile_add_data(&entry, (u32)msr);
251 ibs_op.ibs_dc_phys_low = low; 206 oprofile_add_data(&entry, (u32)(msr >> 32));
252 ibs_op.ibs_dc_phys_high = high; 207 rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
208 oprofile_add_data(&entry, (u32)msr);
209 oprofile_add_data(&entry, (u32)(msr >> 32));
210 oprofile_write_commit(&entry);
253 211
254 /* reenable the IRQ */ 212 /* reenable the IRQ */
255 oprofile_add_ibs_sample(regs,
256 (unsigned int *)&ibs_op,
257 IBS_OP_BEGIN);
258 rdmsr(MSR_AMD64_IBSOPCTL, low, high);
259 high = 0; 213 high = 0;
260 low &= ~IBS_OP_LOW_VALID_BIT; 214 low &= ~IBS_OP_LOW_VALID_BIT;
261 low |= IBS_OP_LOW_ENABLE; 215 low |= IBS_OP_LOW_ENABLE;
@@ -305,14 +259,14 @@ static void op_amd_start(struct op_msrs const * const msrs)
305 } 259 }
306 260
307#ifdef CONFIG_OPROFILE_IBS 261#ifdef CONFIG_OPROFILE_IBS
308 if (ibs_allowed && ibs_config.fetch_enabled) { 262 if (has_ibs && ibs_config.fetch_enabled) {
309 low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; 263 low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
310 high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ 264 high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
311 + IBS_FETCH_HIGH_ENABLE; 265 + IBS_FETCH_HIGH_ENABLE;
312 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); 266 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
313 } 267 }
314 268
315 if (ibs_allowed && ibs_config.op_enabled) { 269 if (has_ibs && ibs_config.op_enabled) {
316 low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) 270 low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
317 + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ 271 + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
318 + IBS_OP_LOW_ENABLE; 272 + IBS_OP_LOW_ENABLE;
@@ -341,14 +295,14 @@ static void op_amd_stop(struct op_msrs const * const msrs)
341 } 295 }
342 296
343#ifdef CONFIG_OPROFILE_IBS 297#ifdef CONFIG_OPROFILE_IBS
344 if (ibs_allowed && ibs_config.fetch_enabled) { 298 if (has_ibs && ibs_config.fetch_enabled) {
345 /* clear max count and enable */ 299 /* clear max count and enable */
346 low = 0; 300 low = 0;
347 high = 0; 301 high = 0;
348 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); 302 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
349 } 303 }
350 304
351 if (ibs_allowed && ibs_config.op_enabled) { 305 if (has_ibs && ibs_config.op_enabled) {
352 /* clear max count and enable */ 306 /* clear max count and enable */
353 low = 0; 307 low = 0;
354 high = 0; 308 high = 0;
@@ -409,6 +363,7 @@ static int init_ibs_nmi(void)
409 | IBSCTL_LVTOFFSETVAL); 363 | IBSCTL_LVTOFFSETVAL);
410 pci_read_config_dword(cpu_cfg, IBSCTL, &value); 364 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
411 if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { 365 if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
366 pci_dev_put(cpu_cfg);
412 printk(KERN_DEBUG "Failed to setup IBS LVT offset, " 367 printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
413 "IBSCTL = 0x%08x", value); 368 "IBSCTL = 0x%08x", value);
414 return 1; 369 return 1;
@@ -436,20 +391,20 @@ static int init_ibs_nmi(void)
436/* uninitialize the APIC for the IBS interrupts if needed */ 391/* uninitialize the APIC for the IBS interrupts if needed */
437static void clear_ibs_nmi(void) 392static void clear_ibs_nmi(void)
438{ 393{
439 if (ibs_allowed) 394 if (has_ibs)
440 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 395 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
441} 396}
442 397
443/* initialize the APIC for the IBS interrupts if available */ 398/* initialize the APIC for the IBS interrupts if available */
444static void ibs_init(void) 399static void ibs_init(void)
445{ 400{
446 ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); 401 has_ibs = boot_cpu_has(X86_FEATURE_IBS);
447 402
448 if (!ibs_allowed) 403 if (!has_ibs)
449 return; 404 return;
450 405
451 if (init_ibs_nmi()) { 406 if (init_ibs_nmi()) {
452 ibs_allowed = 0; 407 has_ibs = 0;
453 return; 408 return;
454 } 409 }
455 410
@@ -458,7 +413,7 @@ static void ibs_init(void)
458 413
459static void ibs_exit(void) 414static void ibs_exit(void)
460{ 415{
461 if (!ibs_allowed) 416 if (!has_ibs)
462 return; 417 return;
463 418
464 clear_ibs_nmi(); 419 clear_ibs_nmi();
@@ -478,7 +433,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
478 if (ret) 433 if (ret)
479 return ret; 434 return ret;
480 435
481 if (!ibs_allowed) 436 if (!has_ibs)
482 return ret; 437 return ret;
483 438
484 /* model specific files */ 439 /* model specific files */
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index dcbf1be149f3..f21147f3626a 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -28,351 +28,18 @@
28#include <linux/async_tx.h> 28#include <linux/async_tx.h>
29 29
30#ifdef CONFIG_DMA_ENGINE 30#ifdef CONFIG_DMA_ENGINE
31static enum dma_state_client 31static int __init async_tx_init(void)
32dma_channel_add_remove(struct dma_client *client,
33 struct dma_chan *chan, enum dma_state state);
34
35static struct dma_client async_tx_dma = {
36 .event_callback = dma_channel_add_remove,
37 /* .cap_mask == 0 defaults to all channels */
38};
39
40/**
41 * dma_cap_mask_all - enable iteration over all operation types
42 */
43static dma_cap_mask_t dma_cap_mask_all;
44
45/**
46 * chan_ref_percpu - tracks channel allocations per core/opertion
47 */
48struct chan_ref_percpu {
49 struct dma_chan_ref *ref;
50};
51
52static int channel_table_initialized;
53static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
54
55/**
56 * async_tx_lock - protect modification of async_tx_master_list and serialize
57 * rebalance operations
58 */
59static spinlock_t async_tx_lock;
60
61static LIST_HEAD(async_tx_master_list);
62
63/* async_tx_issue_pending_all - start all transactions on all channels */
64void async_tx_issue_pending_all(void)
65{
66 struct dma_chan_ref *ref;
67
68 rcu_read_lock();
69 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
70 ref->chan->device->device_issue_pending(ref->chan);
71 rcu_read_unlock();
72}
73EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
74
75/* dma_wait_for_async_tx - spin wait for a transcation to complete
76 * @tx: transaction to wait on
77 */
78enum dma_status
79dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
80{
81 enum dma_status status;
82 struct dma_async_tx_descriptor *iter;
83 struct dma_async_tx_descriptor *parent;
84
85 if (!tx)
86 return DMA_SUCCESS;
87
88 /* poll through the dependency chain, return when tx is complete */
89 do {
90 iter = tx;
91
92 /* find the root of the unsubmitted dependency chain */
93 do {
94 parent = iter->parent;
95 if (!parent)
96 break;
97 else
98 iter = parent;
99 } while (parent);
100
101 /* there is a small window for ->parent == NULL and
102 * ->cookie == -EBUSY
103 */
104 while (iter->cookie == -EBUSY)
105 cpu_relax();
106
107 status = dma_sync_wait(iter->chan, iter->cookie);
108 } while (status == DMA_IN_PROGRESS || (iter != tx));
109
110 return status;
111}
112EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
113
114/* async_tx_run_dependencies - helper routine for dma drivers to process
115 * (start) dependent operations on their target channel
116 * @tx: transaction with dependencies
117 */
118void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
119{
120 struct dma_async_tx_descriptor *dep = tx->next;
121 struct dma_async_tx_descriptor *dep_next;
122 struct dma_chan *chan;
123
124 if (!dep)
125 return;
126
127 chan = dep->chan;
128
129 /* keep submitting up until a channel switch is detected
130 * in that case we will be called again as a result of
131 * processing the interrupt from async_tx_channel_switch
132 */
133 for (; dep; dep = dep_next) {
134 spin_lock_bh(&dep->lock);
135 dep->parent = NULL;
136 dep_next = dep->next;
137 if (dep_next && dep_next->chan == chan)
138 dep->next = NULL; /* ->next will be submitted */
139 else
140 dep_next = NULL; /* submit current dep and terminate */
141 spin_unlock_bh(&dep->lock);
142
143 dep->tx_submit(dep);
144 }
145
146 chan->device->device_issue_pending(chan);
147}
148EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
149
150static void
151free_dma_chan_ref(struct rcu_head *rcu)
152{
153 struct dma_chan_ref *ref;
154 ref = container_of(rcu, struct dma_chan_ref, rcu);
155 kfree(ref);
156}
157
158static void
159init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
160{
161 INIT_LIST_HEAD(&ref->node);
162 INIT_RCU_HEAD(&ref->rcu);
163 ref->chan = chan;
164 atomic_set(&ref->count, 0);
165}
166
167/**
168 * get_chan_ref_by_cap - returns the nth channel of the given capability
169 * defaults to returning the channel with the desired capability and the
170 * lowest reference count if the index can not be satisfied
171 * @cap: capability to match
172 * @index: nth channel desired, passing -1 has the effect of forcing the
173 * default return value
174 */
175static struct dma_chan_ref *
176get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
177{
178 struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
179
180 rcu_read_lock();
181 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
182 if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
183 if (!min_ref)
184 min_ref = ref;
185 else if (atomic_read(&ref->count) <
186 atomic_read(&min_ref->count))
187 min_ref = ref;
188
189 if (index-- == 0) {
190 ret_ref = ref;
191 break;
192 }
193 }
194 rcu_read_unlock();
195
196 if (!ret_ref)
197 ret_ref = min_ref;
198
199 if (ret_ref)
200 atomic_inc(&ret_ref->count);
201
202 return ret_ref;
203}
204
205/**
206 * async_tx_rebalance - redistribute the available channels, optimize
207 * for cpu isolation in the SMP case, and opertaion isolation in the
208 * uniprocessor case
209 */
210static void async_tx_rebalance(void)
211{
212 int cpu, cap, cpu_idx = 0;
213 unsigned long flags;
214
215 if (!channel_table_initialized)
216 return;
217
218 spin_lock_irqsave(&async_tx_lock, flags);
219
220 /* undo the last distribution */
221 for_each_dma_cap_mask(cap, dma_cap_mask_all)
222 for_each_possible_cpu(cpu) {
223 struct dma_chan_ref *ref =
224 per_cpu_ptr(channel_table[cap], cpu)->ref;
225 if (ref) {
226 atomic_set(&ref->count, 0);
227 per_cpu_ptr(channel_table[cap], cpu)->ref =
228 NULL;
229 }
230 }
231
232 for_each_dma_cap_mask(cap, dma_cap_mask_all)
233 for_each_online_cpu(cpu) {
234 struct dma_chan_ref *new;
235 if (NR_CPUS > 1)
236 new = get_chan_ref_by_cap(cap, cpu_idx++);
237 else
238 new = get_chan_ref_by_cap(cap, -1);
239
240 per_cpu_ptr(channel_table[cap], cpu)->ref = new;
241 }
242
243 spin_unlock_irqrestore(&async_tx_lock, flags);
244}
245
246static enum dma_state_client
247dma_channel_add_remove(struct dma_client *client,
248 struct dma_chan *chan, enum dma_state state)
249{
250 unsigned long found, flags;
251 struct dma_chan_ref *master_ref, *ref;
252 enum dma_state_client ack = DMA_DUP; /* default: take no action */
253
254 switch (state) {
255 case DMA_RESOURCE_AVAILABLE:
256 found = 0;
257 rcu_read_lock();
258 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
259 if (ref->chan == chan) {
260 found = 1;
261 break;
262 }
263 rcu_read_unlock();
264
265 pr_debug("async_tx: dma resource available [%s]\n",
266 found ? "old" : "new");
267
268 if (!found)
269 ack = DMA_ACK;
270 else
271 break;
272
273 /* add the channel to the generic management list */
274 master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
275 if (master_ref) {
276 /* keep a reference until async_tx is unloaded */
277 dma_chan_get(chan);
278 init_dma_chan_ref(master_ref, chan);
279 spin_lock_irqsave(&async_tx_lock, flags);
280 list_add_tail_rcu(&master_ref->node,
281 &async_tx_master_list);
282 spin_unlock_irqrestore(&async_tx_lock,
283 flags);
284 } else {
285 printk(KERN_WARNING "async_tx: unable to create"
286 " new master entry in response to"
287 " a DMA_RESOURCE_ADDED event"
288 " (-ENOMEM)\n");
289 return 0;
290 }
291
292 async_tx_rebalance();
293 break;
294 case DMA_RESOURCE_REMOVED:
295 found = 0;
296 spin_lock_irqsave(&async_tx_lock, flags);
297 list_for_each_entry(ref, &async_tx_master_list, node)
298 if (ref->chan == chan) {
299 /* permit backing devices to go away */
300 dma_chan_put(ref->chan);
301 list_del_rcu(&ref->node);
302 call_rcu(&ref->rcu, free_dma_chan_ref);
303 found = 1;
304 break;
305 }
306 spin_unlock_irqrestore(&async_tx_lock, flags);
307
308 pr_debug("async_tx: dma resource removed [%s]\n",
309 found ? "ours" : "not ours");
310
311 if (found)
312 ack = DMA_ACK;
313 else
314 break;
315
316 async_tx_rebalance();
317 break;
318 case DMA_RESOURCE_SUSPEND:
319 case DMA_RESOURCE_RESUME:
320 printk(KERN_WARNING "async_tx: does not support dma channel"
321 " suspend/resume\n");
322 break;
323 default:
324 BUG();
325 }
326
327 return ack;
328}
329
330static int __init
331async_tx_init(void)
332{ 32{
333 enum dma_transaction_type cap; 33 dmaengine_get();
334
335 spin_lock_init(&async_tx_lock);
336 bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
337
338 /* an interrupt will never be an explicit operation type.
339 * clearing this bit prevents allocation to a slot in 'channel_table'
340 */
341 clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
342
343 for_each_dma_cap_mask(cap, dma_cap_mask_all) {
344 channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
345 if (!channel_table[cap])
346 goto err;
347 }
348
349 channel_table_initialized = 1;
350 dma_async_client_register(&async_tx_dma);
351 dma_async_client_chan_request(&async_tx_dma);
352 34
353 printk(KERN_INFO "async_tx: api initialized (async)\n"); 35 printk(KERN_INFO "async_tx: api initialized (async)\n");
354 36
355 return 0; 37 return 0;
356err:
357 printk(KERN_ERR "async_tx: initialization failure\n");
358
359 while (--cap >= 0)
360 free_percpu(channel_table[cap]);
361
362 return 1;
363} 38}
364 39
365static void __exit async_tx_exit(void) 40static void __exit async_tx_exit(void)
366{ 41{
367 enum dma_transaction_type cap; 42 dmaengine_put();
368
369 channel_table_initialized = 0;
370
371 for_each_dma_cap_mask(cap, dma_cap_mask_all)
372 if (channel_table[cap])
373 free_percpu(channel_table[cap]);
374
375 dma_async_client_unregister(&async_tx_dma);
376} 43}
377 44
378/** 45/**
@@ -387,16 +54,9 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
387{ 54{
388 /* see if we can keep the chain on one channel */ 55 /* see if we can keep the chain on one channel */
389 if (depend_tx && 56 if (depend_tx &&
390 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) 57 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
391 return depend_tx->chan; 58 return depend_tx->chan;
392 else if (likely(channel_table_initialized)) { 59 return dma_find_channel(tx_type);
393 struct dma_chan_ref *ref;
394 int cpu = get_cpu();
395 ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
396 put_cpu();
397 return ref ? ref->chan : NULL;
398 } else
399 return NULL;
400} 60}
401EXPORT_SYMBOL_GPL(__async_tx_find_channel); 61EXPORT_SYMBOL_GPL(__async_tx_find_channel);
402#else 62#else
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 2f557f570ade..00cf9553f740 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -107,4 +107,6 @@ source "drivers/uio/Kconfig"
107source "drivers/xen/Kconfig" 107source "drivers/xen/Kconfig"
108 108
109source "drivers/staging/Kconfig" 109source "drivers/staging/Kconfig"
110
111source "drivers/platform/Kconfig"
110endmenu 112endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 6326f4dbbdab..c1bf41737936 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -105,3 +105,4 @@ obj-$(CONFIG_OF) += of/
105obj-$(CONFIG_SSB) += ssb/ 105obj-$(CONFIG_SSB) += ssb/
106obj-$(CONFIG_VIRTIO) += virtio/ 106obj-$(CONFIG_VIRTIO) += virtio/
107obj-$(CONFIG_STAGING) += staging/ 107obj-$(CONFIG_STAGING) += staging/
108obj-y += platform/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index b0243fd55ac0..d7f9839ba264 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -196,90 +196,6 @@ config ACPI_NUMA
196 depends on (X86 || IA64) 196 depends on (X86 || IA64)
197 default y if IA64_GENERIC || IA64_SGI_SN2 197 default y if IA64_GENERIC || IA64_SGI_SN2
198 198
199config ACPI_WMI
200 tristate "WMI (EXPERIMENTAL)"
201 depends on X86
202 depends on EXPERIMENTAL
203 help
204 This driver adds support for the ACPI-WMI (Windows Management
205 Instrumentation) mapper device (PNP0C14) found on some systems.
206
207 ACPI-WMI is a proprietary extension to ACPI to expose parts of the
208 ACPI firmware to userspace - this is done through various vendor
209 defined methods and data blocks in a PNP0C14 device, which are then
210 made available for userspace to call.
211
212 The implementation of this in Linux currently only exposes this to
213 other kernel space drivers.
214
215 This driver is a required dependency to build the firmware specific
216 drivers needed on many machines, including Acer and HP laptops.
217
218 It is safe to enable this driver even if your DSDT doesn't define
219 any ACPI-WMI devices.
220
221config ACPI_ASUS
222 tristate "ASUS/Medion Laptop Extras"
223 depends on X86
224 select BACKLIGHT_CLASS_DEVICE
225 ---help---
226 This driver provides support for extra features of ACPI-compatible
227 ASUS laptops. As some of Medion laptops are made by ASUS, it may also
228 support some Medion laptops (such as 9675 for example). It makes all
229 the extra buttons generate standard ACPI events that go through
230 /proc/acpi/events, and (on some models) adds support for changing the
231 display brightness and output, switching the LCD backlight on and off,
232 and most importantly, allows you to blink those fancy LEDs intended
233 for reporting mail and wireless status.
234
235 Note: display switching code is currently considered EXPERIMENTAL,
236 toying with these values may even lock your machine.
237
238 All settings are changed via /proc/acpi/asus directory entries. Owner
239 and group for these entries can be set with asus_uid and asus_gid
240 parameters.
241
242 More information and a userspace daemon for handling the extra buttons
243 at <http://sourceforge.net/projects/acpi4asus/>.
244
245 If you have an ACPI-compatible ASUS laptop, say Y or M here. This
246 driver is still under development, so if your laptop is unsupported or
247 something works not quite as expected, please use the mailing list
248 available on the above page (acpi4asus-user@lists.sourceforge.net).
249
250 NOTE: This driver is deprecated and will probably be removed soon,
251 use asus-laptop instead.
252
253config ACPI_TOSHIBA
254 tristate "Toshiba Laptop Extras"
255 depends on X86 && INPUT
256 select INPUT_POLLDEV
257 select NET
258 select RFKILL
259 select BACKLIGHT_CLASS_DEVICE
260 ---help---
261 This driver adds support for access to certain system settings
262 on "legacy free" Toshiba laptops. These laptops can be recognized by
263 their lack of a BIOS setup menu and APM support.
264
265 On these machines, all system configuration is handled through the
266 ACPI. This driver is required for access to controls not covered
267 by the general ACPI drivers, such as LCD brightness, video output,
268 etc.
269
270 This driver differs from the non-ACPI Toshiba laptop driver (located
271 under "Processor type and features") in several aspects.
272 Configuration is accessed by reading and writing text files in the
273 /proc tree instead of by program interface to /dev. Furthermore, no
274 power management functions are exposed, as those are handled by the
275 general ACPI drivers.
276
277 More information about this driver is available at
278 <http://memebeam.org/toys/ToshibaAcpiDriver>.
279
280 If you have a legacy free Toshiba laptop (such as the Libretto L1
281 series), say Y.
282
283config ACPI_CUSTOM_DSDT_FILE 199config ACPI_CUSTOM_DSDT_FILE
284 string "Custom DSDT Table file to include" 200 string "Custom DSDT Table file to include"
285 default "" 201 default ""
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 3c0c93300f12..d80f4cc2e0da 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -2,15 +2,8 @@
2# Makefile for the Linux ACPI interpreter 2# Makefile for the Linux ACPI interpreter
3# 3#
4 4
5export ACPI_CFLAGS 5ccflags-y := -Os
6 6ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT
7ACPI_CFLAGS := -Os
8
9ifdef CONFIG_ACPI_DEBUG
10 ACPI_CFLAGS += -DACPI_DEBUG_OUTPUT
11endif
12
13EXTRA_CFLAGS += $(ACPI_CFLAGS)
14 7
15# 8#
16# ACPI Boot-Time Table Parsing 9# ACPI Boot-Time Table Parsing
@@ -22,9 +15,13 @@ obj-$(CONFIG_X86) += blacklist.o
22# ACPI Core Subsystem (Interpreter) 15# ACPI Core Subsystem (Interpreter)
23# 16#
24obj-y += osl.o utils.o reboot.o\ 17obj-y += osl.o utils.o reboot.o\
25 dispatcher/ events/ executer/ hardware/ \ 18 acpica/
26 namespace/ parser/ resources/ tables/ \ 19
27 utilities/ 20# sleep related files
21obj-y += wakeup.o
22obj-y += main.o
23obj-$(CONFIG_ACPI_SLEEP) += proc.o
24
28 25
29# 26#
30# ACPI Bus and Device Drivers 27# ACPI Bus and Device Drivers
@@ -35,7 +32,6 @@ ifdef CONFIG_CPU_FREQ
35processor-objs += processor_perflib.o 32processor-objs += processor_perflib.o
36endif 33endif
37 34
38obj-y += sleep/
39obj-y += bus.o glue.o 35obj-y += bus.o glue.o
40obj-y += scan.o 36obj-y += scan.o
41# Keep EC driver first. Initialization of others depend on it. 37# Keep EC driver first. Initialization of others depend on it.
@@ -59,9 +55,6 @@ obj-y += power.o
59obj-$(CONFIG_ACPI_SYSTEM) += system.o event.o 55obj-$(CONFIG_ACPI_SYSTEM) += system.o event.o
60obj-$(CONFIG_ACPI_DEBUG) += debug.o 56obj-$(CONFIG_ACPI_DEBUG) += debug.o
61obj-$(CONFIG_ACPI_NUMA) += numa.o 57obj-$(CONFIG_ACPI_NUMA) += numa.o
62obj-$(CONFIG_ACPI_WMI) += wmi.o
63obj-$(CONFIG_ACPI_ASUS) += asus_acpi.o
64obj-$(CONFIG_ACPI_TOSHIBA) += toshiba_acpi.o
65obj-$(CONFIG_ACPI_HOTPLUG_MEMORY) += acpi_memhotplug.o 58obj-$(CONFIG_ACPI_HOTPLUG_MEMORY) += acpi_memhotplug.o
66obj-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o 59obj-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
67obj-$(CONFIG_ACPI_SBS) += sbshc.o 60obj-$(CONFIG_ACPI_SBS) += sbshc.o
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
new file mode 100644
index 000000000000..3f23298ee3fd
--- /dev/null
+++ b/drivers/acpi/acpica/Makefile
@@ -0,0 +1,44 @@
1#
2# Makefile for ACPICA Core interpreter
3#
4
5ccflags-y := -Os
6ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT
7
8obj-y := dsfield.o dsmthdat.o dsopcode.o dswexec.o dswscope.o \
9 dsmethod.o dsobject.o dsutils.o dswload.o dswstate.o \
10 dsinit.o
11
12obj-y += evevent.o evregion.o evsci.o evxfevnt.o \
13 evmisc.o evrgnini.o evxface.o evxfregn.o \
14 evgpe.o evgpeblk.o
15
16obj-y += exconfig.o exfield.o exnames.o exoparg6.o exresolv.o exstorob.o\
17 exconvrt.o exfldio.o exoparg1.o exprep.o exresop.o exsystem.o\
18 excreate.o exmisc.o exoparg2.o exregion.o exstore.o exutils.o \
19 exdump.o exmutex.o exoparg3.o exresnte.o exstoren.o
20
21obj-y += hwacpi.o hwgpe.o hwregs.o hwsleep.o hwxface.o
22
23obj-$(ACPI_FUTURE_USAGE) += hwtimer.o
24
25obj-y += nsaccess.o nsload.o nssearch.o nsxfeval.o \
26 nsalloc.o nseval.o nsnames.o nsutils.o nsxfname.o \
27 nsdump.o nsinit.o nsobject.o nswalk.o nsxfobj.o \
28 nsparse.o nspredef.o
29
30obj-$(ACPI_FUTURE_USAGE) += nsdumpdv.o
31
32obj-y += psargs.o psparse.o psloop.o pstree.o pswalk.o \
33 psopcode.o psscope.o psutils.o psxface.o
34
35obj-y += rsaddr.o rscreate.o rsinfo.o rsio.o rslist.o rsmisc.o rsxface.o \
36 rscalc.o rsirq.o rsmemory.o rsutils.o
37
38obj-$(ACPI_FUTURE_USAGE) += rsdump.o
39
40obj-y += tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o
41
42obj-y += utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \
43 utcopy.o utdelete.o utglobal.o utmath.o utobject.o \
44 utstate.o utmutex.o utobject.o utresrc.o
diff --git a/drivers/acpi/acpica/accommon.h b/drivers/acpi/acpica/accommon.h
new file mode 100644
index 000000000000..3b20786cbb0d
--- /dev/null
+++ b/drivers/acpi/acpica/accommon.h
@@ -0,0 +1,63 @@
1/******************************************************************************
2 *
3 * Name: accommon.h - Common include files for generation of ACPICA source
4 *
5 *****************************************************************************/
6
7/*
8 * Copyright (C) 2000 - 2008, Intel Corp.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions, and the following disclaimer,
16 * without modification.
17 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
18 * substantially similar to the "NO WARRANTY" disclaimer below
19 * ("Disclaimer") and any redistribution must be conditioned upon
20 * including a substantially similar Disclaimer requirement for further
21 * binary redistribution.
22 * 3. Neither the names of the above-listed copyright holders nor the names
23 * of any contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * Alternatively, this software may be distributed under the terms of the
27 * GNU General Public License ("GPL") version 2 as published by the Free
28 * Software Foundation.
29 *
30 * NO WARRANTY
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
40 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGES.
42 */
43
44#ifndef __ACCOMMON_H__
45#define __ACCOMMON_H__
46
47/*
48 * Common set of includes for all ACPICA source files.
49 * We put them here because we don't want to duplicate them
50 * in the the source code again and again.
51 *
52 * Note: The order of these include files is important.
53 */
54#include "acconfig.h" /* Global configuration constants */
55#include "acmacros.h" /* C macros */
56#include "aclocal.h" /* Internal data types */
57#include "acobject.h" /* ACPI internal object */
58#include "acstruct.h" /* Common structures */
59#include "acglobal.h" /* All global variables */
60#include "achware.h" /* Hardware defines and interfaces */
61#include "acutils.h" /* Utility interfaces */
62
63#endif /* __ACCOMMON_H__ */
diff --git a/include/acpi/acconfig.h b/drivers/acpi/acpica/acconfig.h
index 29feee27f0ea..e6777fb883d2 100644
--- a/include/acpi/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -61,10 +61,6 @@
61 * 61 *
62 */ 62 */
63 63
64/* Current ACPICA subsystem version in YYYYMMDD format */
65
66#define ACPI_CA_VERSION 0x20080926
67
68/* 64/*
69 * OS name, used for the _OS object. The _OS object is essentially obsolete, 65 * OS name, used for the _OS object. The _OS object is essentially obsolete,
70 * but there is a large base of ASL/AML code in existing machines that check 66 * but there is a large base of ASL/AML code in existing machines that check
@@ -119,6 +115,10 @@
119 115
120#define ACPI_ROOT_TABLE_SIZE_INCREMENT 4 116#define ACPI_ROOT_TABLE_SIZE_INCREMENT 4
121 117
118/* Maximum number of While() loop iterations before forced abort */
119
120#define ACPI_MAX_LOOP_ITERATIONS 0xFFFF
121
122/****************************************************************************** 122/******************************************************************************
123 * 123 *
124 * ACPI Specification constants (Do not change unless the specification changes) 124 * ACPI Specification constants (Do not change unless the specification changes)
diff --git a/include/acpi/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 62c59df3b86c..62c59df3b86c 100644
--- a/include/acpi/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
diff --git a/include/acpi/acdispat.h b/drivers/acpi/acpica/acdispat.h
index 6291904be01e..6291904be01e 100644
--- a/include/acpi/acdispat.h
+++ b/drivers/acpi/acpica/acdispat.h
diff --git a/include/acpi/acevents.h b/drivers/acpi/acpica/acevents.h
index d5d099bf349c..07e20135f01b 100644
--- a/include/acpi/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -93,11 +93,13 @@ struct acpi_gpe_event_info *acpi_ev_get_gpe_event_info(acpi_handle gpe_device,
93 */ 93 */
94u8 acpi_ev_valid_gpe_event(struct acpi_gpe_event_info *gpe_event_info); 94u8 acpi_ev_valid_gpe_event(struct acpi_gpe_event_info *gpe_event_info);
95 95
96acpi_status acpi_ev_walk_gpe_list(acpi_gpe_callback gpe_walk_callback); 96acpi_status
97acpi_ev_walk_gpe_list(acpi_gpe_callback gpe_walk_callback, void *context);
97 98
98acpi_status 99acpi_status
99acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info, 100acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
100 struct acpi_gpe_block_info *gpe_block); 101 struct acpi_gpe_block_info *gpe_block,
102 void *context);
101 103
102acpi_status 104acpi_status
103acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device, 105acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device,
diff --git a/include/acpi/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 15dda46b70d1..ddb40f5c68fc 100644
--- a/include/acpi/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -102,6 +102,12 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE);
102 */ 102 */
103ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE); 103ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE);
104 104
105/*
106 * Optionally use default values for the ACPI register widths. Set this to
107 * TRUE to use the defaults, if an FADT contains incorrect widths/lengths.
108 */
109ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
110
105/***************************************************************************** 111/*****************************************************************************
106 * 112 *
107 * Debug support 113 * Debug support
@@ -140,7 +146,7 @@ ACPI_EXTERN u32 acpi_gbl_trace_flags;
140 */ 146 */
141ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list; 147ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list;
142ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT; 148ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT;
143extern u8 acpi_gbl_permanent_mmap; 149ACPI_EXTERN struct acpi_table_facs *acpi_gbl_FACS;
144 150
145/* These addresses are calculated from FADT address values */ 151/* These addresses are calculated from FADT address values */
146 152
@@ -326,6 +332,7 @@ ACPI_EXTERN struct acpi_fixed_event_handler
326ACPI_EXTERN struct acpi_gpe_xrupt_info *acpi_gbl_gpe_xrupt_list_head; 332ACPI_EXTERN struct acpi_gpe_xrupt_info *acpi_gbl_gpe_xrupt_list_head;
327ACPI_EXTERN struct acpi_gpe_block_info 333ACPI_EXTERN struct acpi_gpe_block_info
328*acpi_gbl_gpe_fadt_blocks[ACPI_MAX_GPE_BLOCKS]; 334*acpi_gbl_gpe_fadt_blocks[ACPI_MAX_GPE_BLOCKS];
335ACPI_EXTERN u32 acpi_current_gpe_count;
329 336
330/***************************************************************************** 337/*****************************************************************************
331 * 338 *
diff --git a/include/acpi/achware.h b/drivers/acpi/acpica/achware.h
index 97a72b193276..58c69dc49ab4 100644
--- a/include/acpi/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -44,11 +44,7 @@
44#ifndef __ACHWARE_H__ 44#ifndef __ACHWARE_H__
45#define __ACHWARE_H__ 45#define __ACHWARE_H__
46 46
47/* PM Timer ticks per second (HZ) */ 47/* Values for the _SST predefined method */
48
49#define PM_TIMER_FREQUENCY 3579545
50
51/* Values for the _SST reserved method */
52 48
53#define ACPI_SST_INDICATOR_OFF 0 49#define ACPI_SST_INDICATOR_OFF 0
54#define ACPI_SST_WORKING 1 50#define ACPI_SST_WORKING 1
@@ -56,8 +52,6 @@
56#define ACPI_SST_SLEEPING 3 52#define ACPI_SST_SLEEPING 3
57#define ACPI_SST_SLEEP_CONTEXT 4 53#define ACPI_SST_SLEEP_CONTEXT 4
58 54
59/* Prototypes */
60
61/* 55/*
62 * hwacpi - high level functions 56 * hwacpi - high level functions
63 */ 57 */
@@ -75,13 +69,6 @@ acpi_hw_register_read(u32 register_id, u32 * return_value);
75 69
76acpi_status acpi_hw_register_write(u32 register_id, u32 value); 70acpi_status acpi_hw_register_write(u32 register_id, u32 value);
77 71
78acpi_status
79acpi_hw_low_level_read(u32 width,
80 u32 * value, struct acpi_generic_address *reg);
81
82acpi_status
83acpi_hw_low_level_write(u32 width, u32 value, struct acpi_generic_address *reg);
84
85acpi_status acpi_hw_clear_acpi_status(void); 72acpi_status acpi_hw_clear_acpi_status(void);
86 73
87/* 74/*
@@ -94,13 +81,13 @@ acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info *gpe_event_info);
94 81
95acpi_status 82acpi_status
96acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, 83acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
97 struct acpi_gpe_block_info *gpe_block); 84 struct acpi_gpe_block_info *gpe_block, void *context);
98 85
99acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info *gpe_event_info); 86acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info *gpe_event_info);
100 87
101acpi_status 88acpi_status
102acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, 89acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
103 struct acpi_gpe_block_info *gpe_block); 90 struct acpi_gpe_block_info *gpe_block, void *context);
104 91
105acpi_status 92acpi_status
106acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info, 93acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
@@ -114,7 +101,8 @@ acpi_status acpi_hw_enable_all_wakeup_gpes(void);
114 101
115acpi_status 102acpi_status
116acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, 103acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
117 struct acpi_gpe_block_info *gpe_block); 104 struct acpi_gpe_block_info *gpe_block,
105 void *context);
118 106
119#ifdef ACPI_FUTURE_USAGE 107#ifdef ACPI_FUTURE_USAGE
120/* 108/*
diff --git a/include/acpi/acinterp.h b/drivers/acpi/acpica/acinterp.h
index e8db7a3143a5..e8db7a3143a5 100644
--- a/include/acpi/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h
diff --git a/include/acpi/aclocal.h b/drivers/acpi/acpica/aclocal.h
index ecab527cf78e..492d02761bb7 100644
--- a/include/acpi/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -46,8 +46,6 @@
46 46
47/* acpisrc:struct_defs -- for acpisrc conversion */ 47/* acpisrc:struct_defs -- for acpisrc conversion */
48 48
49#define ACPI_WAIT_FOREVER 0xFFFF /* u16, as per ACPI spec */
50#define ACPI_DO_NOT_WAIT 0
51#define ACPI_SERIALIZED 0xFF 49#define ACPI_SERIALIZED 0xFF
52 50
53typedef u32 acpi_mutex_handle; 51typedef u32 acpi_mutex_handle;
@@ -120,11 +118,6 @@ static char *acpi_gbl_mutex_names[ACPI_NUM_MUTEX] = {
120#define ACPI_MAX_LOCK 1 118#define ACPI_MAX_LOCK 1
121#define ACPI_NUM_LOCK ACPI_MAX_LOCK+1 119#define ACPI_NUM_LOCK ACPI_MAX_LOCK+1
122 120
123/* Owner IDs are used to track namespace nodes for selective deletion */
124
125typedef u8 acpi_owner_id;
126#define ACPI_OWNER_ID_MAX 0xFF
127
128/* This Thread ID means that the mutex is not in use (unlocked) */ 121/* This Thread ID means that the mutex is not in use (unlocked) */
129 122
130#define ACPI_MUTEX_NOT_ACQUIRED (acpi_thread_id) 0 123#define ACPI_MUTEX_NOT_ACQUIRED (acpi_thread_id) 0
@@ -165,11 +158,6 @@ typedef enum {
165 ACPI_IMODE_EXECUTE = 0x03 158 ACPI_IMODE_EXECUTE = 0x03
166} acpi_interpreter_mode; 159} acpi_interpreter_mode;
167 160
168union acpi_name_union {
169 u32 integer;
170 char ascii[4];
171};
172
173/* 161/*
174 * The Namespace Node describes a named object that appears in the AML. 162 * The Namespace Node describes a named object that appears in the AML.
175 * descriptor_type is used to differentiate between internal descriptors. 163 * descriptor_type is used to differentiate between internal descriptors.
@@ -216,26 +204,6 @@ struct acpi_namespace_node {
216#define ANOBJ_IS_BIT_OFFSET 0x40 /* i_aSL only: Reference is a bit offset */ 204#define ANOBJ_IS_BIT_OFFSET 0x40 /* i_aSL only: Reference is a bit offset */
217#define ANOBJ_IS_REFERENCED 0x80 /* i_aSL only: Object was referenced */ 205#define ANOBJ_IS_REFERENCED 0x80 /* i_aSL only: Object was referenced */
218 206
219/*
220 * ACPI Table Descriptor. One per ACPI table
221 */
222struct acpi_table_desc {
223 acpi_physical_address address;
224 struct acpi_table_header *pointer;
225 u32 length; /* Length fixed at 32 bits */
226 union acpi_name_union signature;
227 acpi_owner_id owner_id;
228 u8 flags;
229};
230
231/* Flags for above */
232
233#define ACPI_TABLE_ORIGIN_UNKNOWN (0)
234#define ACPI_TABLE_ORIGIN_MAPPED (1)
235#define ACPI_TABLE_ORIGIN_ALLOCATED (2)
236#define ACPI_TABLE_ORIGIN_MASK (3)
237#define ACPI_TABLE_IS_LOADED (4)
238
239/* One internal RSDT for table management */ 207/* One internal RSDT for table management */
240 208
241struct acpi_internal_rsdt { 209struct acpi_internal_rsdt {
@@ -266,15 +234,6 @@ struct acpi_ns_search_data {
266 struct acpi_namespace_node *node; 234 struct acpi_namespace_node *node;
267}; 235};
268 236
269/*
270 * Predefined Namespace items
271 */
272struct acpi_predefined_names {
273 char *name;
274 u8 type;
275 char *val;
276};
277
278/* Object types used during package copies */ 237/* Object types used during package copies */
279 238
280#define ACPI_COPY_TYPE_SIMPLE 0 239#define ACPI_COPY_TYPE_SIMPLE 0
@@ -487,10 +446,15 @@ struct acpi_gpe_walk_info {
487 struct acpi_gpe_block_info *gpe_block; 446 struct acpi_gpe_block_info *gpe_block;
488}; 447};
489 448
490typedef acpi_status(*acpi_gpe_callback) (struct acpi_gpe_xrupt_info * 449struct acpi_gpe_device_info {
491 gpe_xrupt_info, 450 u32 index;
492 struct acpi_gpe_block_info * 451 u32 next_block_base_index;
493 gpe_block); 452 acpi_status status;
453 struct acpi_namespace_node *gpe_device;
454};
455
456typedef acpi_status(*acpi_gpe_callback) (struct acpi_gpe_xrupt_info *gpe_xrupt_info,
457 struct acpi_gpe_block_info *gpe_block, void *context);
494 458
495/* Information about each particular fixed event */ 459/* Information about each particular fixed event */
496 460
@@ -566,6 +530,7 @@ struct acpi_control_state {
566 union acpi_parse_object *predicate_op; 530 union acpi_parse_object *predicate_op;
567 u8 *aml_predicate_start; /* Start of if/while predicate */ 531 u8 *aml_predicate_start; /* Start of if/while predicate */
568 u8 *package_end; /* End of if/while block */ 532 u8 *package_end; /* End of if/while block */
533 u32 loop_count; /* While() loop counter */
569}; 534};
570 535
571/* 536/*
@@ -671,6 +636,12 @@ union acpi_parse_value {
671 union acpi_parse_object *arg; /* arguments and contained ops */ 636 union acpi_parse_object *arg; /* arguments and contained ops */
672}; 637};
673 638
639#ifdef ACPI_DISASSEMBLER
640#define ACPI_DISASM_ONLY_MEMBERS(a) a;
641#else
642#define ACPI_DISASM_ONLY_MEMBERS(a)
643#endif
644
674#define ACPI_PARSE_COMMON \ 645#define ACPI_PARSE_COMMON \
675 union acpi_parse_object *parent; /* Parent op */\ 646 union acpi_parse_object *parent; /* Parent op */\
676 u8 descriptor_type; /* To differentiate various internal objs */\ 647 u8 descriptor_type; /* To differentiate various internal objs */\
@@ -790,9 +761,6 @@ struct acpi_parse_state {
790 * 761 *
791 ****************************************************************************/ 762 ****************************************************************************/
792 763
793#define PCI_ROOT_HID_STRING "PNP0A03"
794#define PCI_EXPRESS_ROOT_HID_STRING "PNP0A08"
795
796struct acpi_bit_register_info { 764struct acpi_bit_register_info {
797 u8 parent_register; 765 u8 parent_register;
798 u8 bit_position; 766 u8 bit_position;
@@ -1019,26 +987,4 @@ struct acpi_debug_mem_block {
1019#define ACPI_MEM_LIST_MAX 1 987#define ACPI_MEM_LIST_MAX 1
1020#define ACPI_NUM_MEM_LISTS 2 988#define ACPI_NUM_MEM_LISTS 2
1021 989
1022struct acpi_memory_list {
1023 char *list_name;
1024 void *list_head;
1025 u16 object_size;
1026 u16 max_depth;
1027 u16 current_depth;
1028 u16 link_offset;
1029
1030#ifdef ACPI_DBG_TRACK_ALLOCATIONS
1031
1032 /* Statistics for debug memory tracking only */
1033
1034 u32 total_allocated;
1035 u32 total_freed;
1036 u32 max_occupied;
1037 u32 total_size;
1038 u32 current_total_size;
1039 u32 requests;
1040 u32 hits;
1041#endif
1042};
1043
1044#endif /* __ACLOCAL_H__ */ 990#endif /* __ACLOCAL_H__ */
diff --git a/include/acpi/acmacros.h b/drivers/acpi/acpica/acmacros.h
index 1954c9d1d012..9c127e8e2d6d 100644
--- a/include/acpi/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -45,23 +45,6 @@
45#define __ACMACROS_H__ 45#define __ACMACROS_H__
46 46
47/* 47/*
48 * Data manipulation macros
49 */
50#define ACPI_LOWORD(l) ((u16)(u32)(l))
51#define ACPI_HIWORD(l) ((u16)((((u32)(l)) >> 16) & 0xFFFF))
52#define ACPI_LOBYTE(l) ((u8)(u16)(l))
53#define ACPI_HIBYTE(l) ((u8)((((u16)(l)) >> 8) & 0xFF))
54
55#define ACPI_SET_BIT(target,bit) ((target) |= (bit))
56#define ACPI_CLEAR_BIT(target,bit) ((target) &= ~(bit))
57#define ACPI_MIN(a,b) (((a)<(b))?(a):(b))
58#define ACPI_MAX(a,b) (((a)>(b))?(a):(b))
59
60/* Size calculation */
61
62#define ACPI_ARRAY_LENGTH(x) (sizeof(x) / sizeof((x)[0]))
63
64/*
65 * Extract data using a pointer. Any more than a byte and we 48 * Extract data using a pointer. Any more than a byte and we
66 * get into potential aligment issues -- see the STORE macros below. 49 * get into potential aligment issues -- see the STORE macros below.
67 * Use with care. 50 * Use with care.
@@ -76,39 +59,6 @@
76#define ACPI_SET64(ptr) *ACPI_CAST_PTR (u64, ptr) 59#define ACPI_SET64(ptr) *ACPI_CAST_PTR (u64, ptr)
77 60
78/* 61/*
79 * Pointer manipulation
80 */
81#define ACPI_CAST_PTR(t, p) ((t *) (acpi_uintptr_t) (p))
82#define ACPI_CAST_INDIRECT_PTR(t, p) ((t **) (acpi_uintptr_t) (p))
83#define ACPI_ADD_PTR(t, a, b) ACPI_CAST_PTR (t, (ACPI_CAST_PTR (u8, (a)) + (acpi_size)(b)))
84#define ACPI_PTR_DIFF(a, b) (acpi_size) (ACPI_CAST_PTR (u8, (a)) - ACPI_CAST_PTR (u8, (b)))
85
86/* Pointer/Integer type conversions */
87
88#define ACPI_TO_POINTER(i) ACPI_ADD_PTR (void, (void *) NULL, (acpi_size) i)
89#define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p, (void *) NULL)
90#define ACPI_OFFSET(d, f) (acpi_size) ACPI_PTR_DIFF (&(((d *)0)->f), (void *) NULL)
91#define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i)
92#define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i)
93
94#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
95#define ACPI_COMPARE_NAME(a, b) (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b)))
96#else
97#define ACPI_COMPARE_NAME(a, b) (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE))
98#endif
99
100/*
101 * Full 64-bit integer must be available on both 32-bit and 64-bit platforms
102 */
103struct acpi_integer_overlay {
104 u32 lo_dword;
105 u32 hi_dword;
106};
107
108#define ACPI_LODWORD(integer) (ACPI_CAST_PTR (struct acpi_integer_overlay, &integer)->lo_dword)
109#define ACPI_HIDWORD(integer) (ACPI_CAST_PTR (struct acpi_integer_overlay, &integer)->hi_dword)
110
111/*
112 * printf() format helpers 62 * printf() format helpers
113 */ 63 */
114 64
@@ -209,7 +159,7 @@ struct acpi_integer_overlay {
209/* 159/*
210 * The hardware does not support unaligned transfers. We must move the 160 * The hardware does not support unaligned transfers. We must move the
211 * data one byte at a time. These macros work whether the source or 161 * data one byte at a time. These macros work whether the source or
212 * the destination (or both) is/are unaligned. (Little-endian move) 162 * the destination (or both) is/are unaligned. (Little-endian move)
213 */ 163 */
214 164
215/* 16-bit source, 16/32/64 destination */ 165/* 16-bit source, 16/32/64 destination */
@@ -357,12 +307,6 @@ struct acpi_integer_overlay {
357 {(u32)(Pargs), (u32)(Iargs), (u32)(flags), obj_type, class, type} 307 {(u32)(Pargs), (u32)(Iargs), (u32)(flags), obj_type, class, type}
358#endif 308#endif
359 309
360#ifdef ACPI_DISASSEMBLER
361#define ACPI_DISASM_ONLY_MEMBERS(a) a;
362#else
363#define ACPI_DISASM_ONLY_MEMBERS(a)
364#endif
365
366#define ARG_TYPE_WIDTH 5 310#define ARG_TYPE_WIDTH 5
367#define ARG_1(x) ((u32)(x)) 311#define ARG_1(x) ((u32)(x))
368#define ARG_2(x) ((u32)(x) << (1 * ARG_TYPE_WIDTH)) 312#define ARG_2(x) ((u32)(x) << (1 * ARG_TYPE_WIDTH))
@@ -388,32 +332,16 @@ struct acpi_integer_overlay {
388#define GET_CURRENT_ARG_TYPE(list) (list & ((u32) 0x1F)) 332#define GET_CURRENT_ARG_TYPE(list) (list & ((u32) 0x1F))
389#define INCREMENT_ARG_LIST(list) (list >>= ((u32) ARG_TYPE_WIDTH)) 333#define INCREMENT_ARG_LIST(list) (list >>= ((u32) ARG_TYPE_WIDTH))
390 334
391#if defined (ACPI_DEBUG_OUTPUT) || !defined (ACPI_NO_ERROR_MESSAGES)
392/*
393 * Module name is include in both debug and non-debug versions primarily for
394 * error messages. The __FILE__ macro is not very useful for this, because it
395 * often includes the entire pathname to the module
396 */
397#define ACPI_MODULE_NAME(name) static const char ACPI_UNUSED_VAR _acpi_module_name[] = name;
398#else
399#define ACPI_MODULE_NAME(name)
400#endif
401
402/* 335/*
403 * Ascii error messages can be configured out 336 * Ascii error messages can be configured out
404 */ 337 */
405#ifndef ACPI_NO_ERROR_MESSAGES 338#ifndef ACPI_NO_ERROR_MESSAGES
406#define AE_INFO _acpi_module_name, __LINE__
407 339
408/* 340/*
409 * Error reporting. Callers module and line number are inserted by AE_INFO, 341 * Error reporting. Callers module and line number are inserted by AE_INFO,
410 * the plist contains a set of parens to allow variable-length lists. 342 * the plist contains a set of parens to allow variable-length lists.
411 * These macros are used for both the debug and non-debug versions of the code. 343 * These macros are used for both the debug and non-debug versions of the code.
412 */ 344 */
413#define ACPI_INFO(plist) acpi_ut_info plist
414#define ACPI_WARNING(plist) acpi_ut_warning plist
415#define ACPI_EXCEPTION(plist) acpi_ut_exception plist
416#define ACPI_ERROR(plist) acpi_ut_error plist
417#define ACPI_ERROR_NAMESPACE(s, e) acpi_ns_report_error (AE_INFO, s, e); 345#define ACPI_ERROR_NAMESPACE(s, e) acpi_ns_report_error (AE_INFO, s, e);
418#define ACPI_ERROR_METHOD(s, n, p, e) acpi_ns_report_method_error (AE_INFO, s, n, p, e); 346#define ACPI_ERROR_METHOD(s, n, p, e) acpi_ns_report_method_error (AE_INFO, s, n, p, e);
419 347
@@ -421,13 +349,9 @@ struct acpi_integer_overlay {
421 349
422/* No error messages */ 350/* No error messages */
423 351
424#define ACPI_INFO(plist)
425#define ACPI_WARNING(plist)
426#define ACPI_EXCEPTION(plist)
427#define ACPI_ERROR(plist)
428#define ACPI_ERROR_NAMESPACE(s, e) 352#define ACPI_ERROR_NAMESPACE(s, e)
429#define ACPI_ERROR_METHOD(s, n, p, e) 353#define ACPI_ERROR_METHOD(s, n, p, e)
430#endif 354#endif /* ACPI_NO_ERROR_MESSAGES */
431 355
432/* 356/*
433 * Debug macros that are conditionally compiled 357 * Debug macros that are conditionally compiled
@@ -435,36 +359,8 @@ struct acpi_integer_overlay {
435#ifdef ACPI_DEBUG_OUTPUT 359#ifdef ACPI_DEBUG_OUTPUT
436 360
437/* 361/*
438 * Common parameters used for debug output functions:
439 * line number, function name, module(file) name, component ID
440 */
441#define ACPI_DEBUG_PARAMETERS __LINE__, ACPI_GET_FUNCTION_NAME, _acpi_module_name, _COMPONENT
442
443/*
444 * Function entry tracing 362 * Function entry tracing
445 */ 363 */
446
447/*
448 * If ACPI_GET_FUNCTION_NAME was not defined in the compiler-dependent header,
449 * define it now. This is the case where there the compiler does not support
450 * a __func__ macro or equivalent.
451 */
452#ifndef ACPI_GET_FUNCTION_NAME
453#define ACPI_GET_FUNCTION_NAME _acpi_function_name
454/*
455 * The Name parameter should be the procedure name as a quoted string.
456 * The function name is also used by the function exit macros below.
457 * Note: (const char) is used to be compatible with the debug interfaces
458 * and macros such as __func__.
459 */
460#define ACPI_FUNCTION_NAME(name) static const char _acpi_function_name[] = #name;
461
462#else
463/* Compiler supports __func__ (or equivalent) -- Ignore this macro */
464
465#define ACPI_FUNCTION_NAME(name)
466#endif
467
468#ifdef CONFIG_ACPI_DEBUG_FUNC_TRACE 364#ifdef CONFIG_ACPI_DEBUG_FUNC_TRACE
469 365
470#define ACPI_FUNCTION_TRACE(a) ACPI_FUNCTION_NAME(a) \ 366#define ACPI_FUNCTION_TRACE(a) ACPI_FUNCTION_NAME(a) \
@@ -584,15 +480,6 @@ struct acpi_integer_overlay {
584#define ACPI_DUMP_RESOURCE_LIST(a) acpi_rs_dump_resource_list(a) 480#define ACPI_DUMP_RESOURCE_LIST(a) acpi_rs_dump_resource_list(a)
585#define ACPI_DUMP_BUFFER(a, b) acpi_ut_dump_buffer((u8 *) a, b, DB_BYTE_DISPLAY, _COMPONENT) 481#define ACPI_DUMP_BUFFER(a, b) acpi_ut_dump_buffer((u8 *) a, b, DB_BYTE_DISPLAY, _COMPONENT)
586 482
587/*
588 * Master debug print macros
589 * Print iff:
590 * 1) Debug print for the current component is enabled
591 * 2) Debug error level or trace level for the print statement is enabled
592 */
593#define ACPI_DEBUG_PRINT(plist) acpi_ut_debug_print plist
594#define ACPI_DEBUG_PRINT_RAW(plist) acpi_ut_debug_print_raw plist
595
596#else 483#else
597/* 484/*
598 * This is the non-debug case -- make everything go away, 485 * This is the non-debug case -- make everything go away,
@@ -603,7 +490,6 @@ struct acpi_integer_overlay {
603 490
604#define ACPI_DEBUG_DEFINE(a) do { } while(0) 491#define ACPI_DEBUG_DEFINE(a) do { } while(0)
605#define ACPI_DEBUG_ONLY_MEMBERS(a) do { } while(0) 492#define ACPI_DEBUG_ONLY_MEMBERS(a) do { } while(0)
606#define ACPI_FUNCTION_NAME(a) do { } while(0)
607#define ACPI_FUNCTION_TRACE(a) do { } while(0) 493#define ACPI_FUNCTION_TRACE(a) do { } while(0)
608#define ACPI_FUNCTION_TRACE_PTR(a, b) do { } while(0) 494#define ACPI_FUNCTION_TRACE_PTR(a, b) do { } while(0)
609#define ACPI_FUNCTION_TRACE_U32(a, b) do { } while(0) 495#define ACPI_FUNCTION_TRACE_U32(a, b) do { } while(0)
@@ -619,8 +505,6 @@ struct acpi_integer_overlay {
619#define ACPI_DUMP_PATHNAME(a, b, c, d) do { } while(0) 505#define ACPI_DUMP_PATHNAME(a, b, c, d) do { } while(0)
620#define ACPI_DUMP_RESOURCE_LIST(a) do { } while(0) 506#define ACPI_DUMP_RESOURCE_LIST(a) do { } while(0)
621#define ACPI_DUMP_BUFFER(a, b) do { } while(0) 507#define ACPI_DUMP_BUFFER(a, b) do { } while(0)
622#define ACPI_DEBUG_PRINT(pl) do { } while(0)
623#define ACPI_DEBUG_PRINT_RAW(pl) do { } while(0)
624 508
625#define return_VOID return 509#define return_VOID return
626#define return_ACPI_STATUS(s) return(s) 510#define return_ACPI_STATUS(s) return(s)
@@ -629,7 +513,7 @@ struct acpi_integer_overlay {
629#define return_UINT32(s) return(s) 513#define return_UINT32(s) return(s)
630#define return_PTR(s) return(s) 514#define return_PTR(s) return(s)
631 515
632#endif 516#endif /* ACPI_DEBUG_OUTPUT */
633 517
634/* 518/*
635 * Some code only gets executed when the debugger is built in. 519 * Some code only gets executed when the debugger is built in.
diff --git a/include/acpi/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index db4e6f677855..46cb5b46d280 100644
--- a/include/acpi/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -182,7 +182,9 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info *info);
182 */ 182 */
183acpi_status 183acpi_status
184acpi_ns_check_predefined_names(struct acpi_namespace_node *node, 184acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
185 union acpi_operand_object *return_object); 185 u32 user_param_count,
186 acpi_status return_status,
187 union acpi_operand_object **return_object);
186 188
187const union acpi_predefined_info *acpi_ns_check_for_predefined_name(struct 189const union acpi_predefined_info *acpi_ns_check_for_predefined_name(struct
188 acpi_namespace_node 190 acpi_namespace_node
@@ -191,6 +193,7 @@ const union acpi_predefined_info *acpi_ns_check_for_predefined_name(struct
191void 193void
192acpi_ns_check_parameter_count(char *pathname, 194acpi_ns_check_parameter_count(char *pathname,
193 struct acpi_namespace_node *node, 195 struct acpi_namespace_node *node,
196 u32 user_param_count,
194 const union acpi_predefined_info *info); 197 const union acpi_predefined_info *info);
195 198
196/* 199/*
diff --git a/include/acpi/acobject.h b/drivers/acpi/acpica/acobject.h
index eb6f038b03d9..eb6f038b03d9 100644
--- a/include/acpi/acobject.h
+++ b/drivers/acpi/acpica/acobject.h
diff --git a/include/acpi/acopcode.h b/drivers/acpi/acpica/acopcode.h
index dfdf63327885..dfdf63327885 100644
--- a/include/acpi/acopcode.h
+++ b/drivers/acpi/acpica/acopcode.h
diff --git a/include/acpi/acparser.h b/drivers/acpi/acpica/acparser.h
index 23ee0fbf5619..23ee0fbf5619 100644
--- a/include/acpi/acparser.h
+++ b/drivers/acpi/acpica/acparser.h
diff --git a/include/acpi/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 16a9ca9a66e4..16a9ca9a66e4 100644
--- a/include/acpi/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
diff --git a/include/acpi/acresrc.h b/drivers/acpi/acpica/acresrc.h
index eef5bd7a59fa..eef5bd7a59fa 100644
--- a/include/acpi/acresrc.h
+++ b/drivers/acpi/acpica/acresrc.h
diff --git a/include/acpi/acstruct.h b/drivers/acpi/acpica/acstruct.h
index 7980a26bad35..7980a26bad35 100644
--- a/include/acpi/acstruct.h
+++ b/drivers/acpi/acpica/acstruct.h
diff --git a/include/acpi/actables.h b/drivers/acpi/acpica/actables.h
index 0cbe1b9ab522..7ce6e33c7f78 100644
--- a/include/acpi/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -94,6 +94,8 @@ void acpi_tb_set_table_loaded_flag(u32 table_index, u8 is_loaded);
94/* 94/*
95 * tbutils - table manager utilities 95 * tbutils - table manager utilities
96 */ 96 */
97acpi_status acpi_tb_initialize_facs(void);
98
97u8 acpi_tb_tables_loaded(void); 99u8 acpi_tb_tables_loaded(void);
98 100
99void 101void
diff --git a/include/acpi/acutils.h b/drivers/acpi/acpica/acutils.h
index d8307b2987e3..80d8813484fe 100644
--- a/include/acpi/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -297,42 +297,6 @@ void acpi_ut_report_info(char *module_name, u32 line_number);
297 297
298void acpi_ut_report_warning(char *module_name, u32 line_number); 298void acpi_ut_report_warning(char *module_name, u32 line_number);
299 299
300/* Error and message reporting interfaces */
301
302void ACPI_INTERNAL_VAR_XFACE
303acpi_ut_debug_print(u32 requested_debug_level,
304 u32 line_number,
305 const char *function_name,
306 const char *module_name,
307 u32 component_id,
308 const char *format, ...) ACPI_PRINTF_LIKE(6);
309
310void ACPI_INTERNAL_VAR_XFACE
311acpi_ut_debug_print_raw(u32 requested_debug_level,
312 u32 line_number,
313 const char *function_name,
314 const char *module_name,
315 u32 component_id,
316 const char *format, ...) ACPI_PRINTF_LIKE(6);
317
318void ACPI_INTERNAL_VAR_XFACE
319acpi_ut_error(const char *module_name,
320 u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
321
322void ACPI_INTERNAL_VAR_XFACE
323acpi_ut_exception(const char *module_name,
324 u32 line_number,
325 acpi_status status,
326 const char *format, ...) ACPI_PRINTF_LIKE(4);
327
328void ACPI_INTERNAL_VAR_XFACE
329acpi_ut_warning(const char *module_name,
330 u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
331
332void ACPI_INTERNAL_VAR_XFACE
333acpi_ut_info(const char *module_name,
334 u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
335
336/* 300/*
337 * utdelete - Object deletion and reference counts 301 * utdelete - Object deletion and reference counts
338 */ 302 */
diff --git a/include/acpi/amlcode.h b/drivers/acpi/acpica/amlcode.h
index ff851c5df698..ff851c5df698 100644
--- a/include/acpi/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
diff --git a/include/acpi/amlresrc.h b/drivers/acpi/acpica/amlresrc.h
index 7b070e42b7c5..7b070e42b7c5 100644
--- a/include/acpi/amlresrc.h
+++ b/drivers/acpi/acpica/amlresrc.h
diff --git a/drivers/acpi/dispatcher/dsfield.c b/drivers/acpi/acpica/dsfield.c
index f988a5e7d2b4..53e27bc5a734 100644
--- a/drivers/acpi/dispatcher/dsfield.c
+++ b/drivers/acpi/acpica/dsfield.c
@@ -42,11 +42,12 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/amlcode.h> 45#include "accommon.h"
46#include <acpi/acdispat.h> 46#include "amlcode.h"
47#include <acpi/acinterp.h> 47#include "acdispat.h"
48#include <acpi/acnamesp.h> 48#include "acinterp.h"
49#include <acpi/acparser.h> 49#include "acnamesp.h"
50#include "acparser.h"
50 51
51#define _COMPONENT ACPI_DISPATCHER 52#define _COMPONENT ACPI_DISPATCHER
52ACPI_MODULE_NAME("dsfield") 53ACPI_MODULE_NAME("dsfield")
diff --git a/drivers/acpi/dispatcher/dsinit.c b/drivers/acpi/acpica/dsinit.c
index 949f7c75029e..eb144b13d8fa 100644
--- a/drivers/acpi/dispatcher/dsinit.c
+++ b/drivers/acpi/acpica/dsinit.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acdispat.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acdispat.h"
47#include <acpi/actables.h> 47#include "acnamesp.h"
48#include "actables.h"
48 49
49#define _COMPONENT ACPI_DISPATCHER 50#define _COMPONENT ACPI_DISPATCHER
50ACPI_MODULE_NAME("dsinit") 51ACPI_MODULE_NAME("dsinit")
diff --git a/drivers/acpi/dispatcher/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index 279a5a60a0dd..14b8b8ed8023 100644
--- a/drivers/acpi/dispatcher/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -42,11 +42,14 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/amlcode.h> 45#include "accommon.h"
46#include <acpi/acdispat.h> 46#include "amlcode.h"
47#include <acpi/acinterp.h> 47#include "acdispat.h"
48#include <acpi/acnamesp.h> 48#include "acinterp.h"
49#include "acnamesp.h"
50#ifdef ACPI_DISASSEMBLER
49#include <acpi/acdisasm.h> 51#include <acpi/acdisasm.h>
52#endif
50 53
51#define _COMPONENT ACPI_DISPATCHER 54#define _COMPONENT ACPI_DISPATCHER
52ACPI_MODULE_NAME("dsmethod") 55ACPI_MODULE_NAME("dsmethod")
@@ -412,6 +415,9 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread,
412 415
413 if (obj_desc->method.method_flags & AML_METHOD_INTERNAL_ONLY) { 416 if (obj_desc->method.method_flags & AML_METHOD_INTERNAL_ONLY) {
414 status = obj_desc->method.implementation(next_walk_state); 417 status = obj_desc->method.implementation(next_walk_state);
418 if (status == AE_OK) {
419 status = AE_CTRL_TERMINATE;
420 }
415 } 421 }
416 422
417 return_ACPI_STATUS(status); 423 return_ACPI_STATUS(status);
diff --git a/drivers/acpi/dispatcher/dsmthdat.c b/drivers/acpi/acpica/dsmthdat.c
index d03f81bd1bcb..da0f5468184c 100644
--- a/drivers/acpi/dispatcher/dsmthdat.c
+++ b/drivers/acpi/acpica/dsmthdat.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acdispat.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acdispat.h"
47#include <acpi/acinterp.h> 47#include "acnamesp.h"
48#include "acinterp.h"
48 49
49#define _COMPONENT ACPI_DISPATCHER 50#define _COMPONENT ACPI_DISPATCHER
50ACPI_MODULE_NAME("dsmthdat") 51ACPI_MODULE_NAME("dsmthdat")
diff --git a/drivers/acpi/dispatcher/dsobject.c b/drivers/acpi/acpica/dsobject.c
index 4f08e599d07e..15c628e6aa00 100644
--- a/drivers/acpi/dispatcher/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -42,11 +42,12 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acparser.h"
47#include <acpi/acdispat.h> 47#include "amlcode.h"
48#include <acpi/acnamesp.h> 48#include "acdispat.h"
49#include <acpi/acinterp.h> 49#include "acnamesp.h"
50#include "acinterp.h"
50 51
51#define _COMPONENT ACPI_DISPATCHER 52#define _COMPONENT ACPI_DISPATCHER
52ACPI_MODULE_NAME("dsobject") 53ACPI_MODULE_NAME("dsobject")
diff --git a/drivers/acpi/dispatcher/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index 69fae5905bb8..0c3b4dd60e8a 100644
--- a/drivers/acpi/dispatcher/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -43,13 +43,14 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acparser.h> 46#include "accommon.h"
47#include <acpi/amlcode.h> 47#include "acparser.h"
48#include <acpi/acdispat.h> 48#include "amlcode.h"
49#include <acpi/acinterp.h> 49#include "acdispat.h"
50#include <acpi/acnamesp.h> 50#include "acinterp.h"
51#include <acpi/acevents.h> 51#include "acnamesp.h"
52#include <acpi/actables.h> 52#include "acevents.h"
53#include "actables.h"
53 54
54#define _COMPONENT ACPI_DISPATCHER 55#define _COMPONENT ACPI_DISPATCHER
55ACPI_MODULE_NAME("dsopcode") 56ACPI_MODULE_NAME("dsopcode")
@@ -1140,10 +1141,29 @@ acpi_ds_exec_begin_control_op(struct acpi_walk_state *walk_state,
1140 op->common.aml_opcode, walk_state)); 1141 op->common.aml_opcode, walk_state));
1141 1142
1142 switch (op->common.aml_opcode) { 1143 switch (op->common.aml_opcode) {
1143 case AML_IF_OP:
1144 case AML_WHILE_OP: 1144 case AML_WHILE_OP:
1145 1145
1146 /* 1146 /*
1147 * If this is an additional iteration of a while loop, continue.
1148 * There is no need to allocate a new control state.
1149 */
1150 if (walk_state->control_state) {
1151 if (walk_state->control_state->control.aml_predicate_start
1152 == (walk_state->parser_state.aml - 1)) {
1153
1154 /* Reset the state to start-of-loop */
1155
1156 walk_state->control_state->common.state =
1157 ACPI_CONTROL_CONDITIONAL_EXECUTING;
1158 break;
1159 }
1160 }
1161
1162 /*lint -fallthrough */
1163
1164 case AML_IF_OP:
1165
1166 /*
1147 * IF/WHILE: Create a new control state to manage these 1167 * IF/WHILE: Create a new control state to manage these
1148 * constructs. We need to manage these as a stack, in order 1168 * constructs. We need to manage these as a stack, in order
1149 * to handle nesting. 1169 * to handle nesting.
@@ -1243,13 +1263,36 @@ acpi_ds_exec_end_control_op(struct acpi_walk_state * walk_state,
1243 1263
1244 ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "[WHILE_OP] Op=%p\n", op)); 1264 ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "[WHILE_OP] Op=%p\n", op));
1245 1265
1246 if (walk_state->control_state->common.value) { 1266 control_state = walk_state->control_state;
1267 if (control_state->common.value) {
1247 1268
1248 /* Predicate was true, go back and evaluate it again! */ 1269 /* Predicate was true, the body of the loop was just executed */
1249 1270
1271 /*
1272 * This loop counter mechanism allows the interpreter to escape
1273 * possibly infinite loops. This can occur in poorly written AML
1274 * when the hardware does not respond within a while loop and the
1275 * loop does not implement a timeout.
1276 */
1277 control_state->control.loop_count++;
1278 if (control_state->control.loop_count >
1279 ACPI_MAX_LOOP_ITERATIONS) {
1280 status = AE_AML_INFINITE_LOOP;
1281 break;
1282 }
1283
1284 /*
1285 * Go back and evaluate the predicate and maybe execute the loop
1286 * another time
1287 */
1250 status = AE_CTRL_PENDING; 1288 status = AE_CTRL_PENDING;
1289 walk_state->aml_last_while =
1290 control_state->control.aml_predicate_start;
1291 break;
1251 } 1292 }
1252 1293
1294 /* Predicate was false, terminate this while loop */
1295
1253 ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, 1296 ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
1254 "[WHILE_OP] termination! Op=%p\n", op)); 1297 "[WHILE_OP] termination! Op=%p\n", op));
1255 1298
@@ -1257,9 +1300,6 @@ acpi_ds_exec_end_control_op(struct acpi_walk_state * walk_state,
1257 1300
1258 control_state = 1301 control_state =
1259 acpi_ut_pop_generic_state(&walk_state->control_state); 1302 acpi_ut_pop_generic_state(&walk_state->control_state);
1260
1261 walk_state->aml_last_while =
1262 control_state->control.aml_predicate_start;
1263 acpi_ut_delete_generic_state(control_state); 1303 acpi_ut_delete_generic_state(control_state);
1264 break; 1304 break;
1265 1305
diff --git a/drivers/acpi/dispatcher/dsutils.c b/drivers/acpi/acpica/dsutils.c
index b398982f0d8b..dabc23a46176 100644
--- a/drivers/acpi/dispatcher/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -42,12 +42,13 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acparser.h"
47#include <acpi/acdispat.h> 47#include "amlcode.h"
48#include <acpi/acinterp.h> 48#include "acdispat.h"
49#include <acpi/acnamesp.h> 49#include "acinterp.h"
50#include <acpi/acdebug.h> 50#include "acnamesp.h"
51#include "acdebug.h"
51 52
52#define _COMPONENT ACPI_DISPATCHER 53#define _COMPONENT ACPI_DISPATCHER
53ACPI_MODULE_NAME("dsutils") 54ACPI_MODULE_NAME("dsutils")
diff --git a/drivers/acpi/dispatcher/dswexec.c b/drivers/acpi/acpica/dswexec.c
index 396fe12078cd..350e6656bc89 100644
--- a/drivers/acpi/dispatcher/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -43,12 +43,13 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acparser.h> 46#include "accommon.h"
47#include <acpi/amlcode.h> 47#include "acparser.h"
48#include <acpi/acdispat.h> 48#include "amlcode.h"
49#include <acpi/acinterp.h> 49#include "acdispat.h"
50#include <acpi/acnamesp.h> 50#include "acinterp.h"
51#include <acpi/acdebug.h> 51#include "acnamesp.h"
52#include "acdebug.h"
52 53
53#define _COMPONENT ACPI_DISPATCHER 54#define _COMPONENT ACPI_DISPATCHER
54ACPI_MODULE_NAME("dswexec") 55ACPI_MODULE_NAME("dswexec")
diff --git a/drivers/acpi/dispatcher/dswload.c b/drivers/acpi/acpica/dswload.c
index dff7a3e445a8..3023ceaa8d54 100644
--- a/drivers/acpi/dispatcher/dswload.c
+++ b/drivers/acpi/acpica/dswload.c
@@ -42,12 +42,13 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acparser.h"
47#include <acpi/acdispat.h> 47#include "amlcode.h"
48#include <acpi/acinterp.h> 48#include "acdispat.h"
49#include <acpi/acnamesp.h> 49#include "acinterp.h"
50#include <acpi/acevents.h> 50#include "acnamesp.h"
51#include "acevents.h"
51 52
52#ifdef ACPI_ASL_COMPILER 53#ifdef ACPI_ASL_COMPILER
53#include <acpi/acdisasm.h> 54#include <acpi/acdisasm.h>
diff --git a/drivers/acpi/dispatcher/dswscope.c b/drivers/acpi/acpica/dswscope.c
index 9e6073265873..908645e72f03 100644
--- a/drivers/acpi/dispatcher/dswscope.c
+++ b/drivers/acpi/acpica/dswscope.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acdispat.h> 45#include "accommon.h"
46#include "acdispat.h"
46 47
47#define _COMPONENT ACPI_DISPATCHER 48#define _COMPONENT ACPI_DISPATCHER
48ACPI_MODULE_NAME("dswscope") 49ACPI_MODULE_NAME("dswscope")
diff --git a/drivers/acpi/dispatcher/dswstate.c b/drivers/acpi/acpica/dswstate.c
index b00d4af791aa..40f92bf7dce5 100644
--- a/drivers/acpi/dispatcher/dswstate.c
+++ b/drivers/acpi/acpica/dswstate.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/acdispat.h> 46#include "acparser.h"
47#include <acpi/acnamesp.h> 47#include "acdispat.h"
48#include "acnamesp.h"
48 49
49#define _COMPONENT ACPI_DISPATCHER 50#define _COMPONENT ACPI_DISPATCHER
50ACPI_MODULE_NAME("dswstate") 51ACPI_MODULE_NAME("dswstate")
diff --git a/drivers/acpi/events/evevent.c b/drivers/acpi/acpica/evevent.c
index c56c5c6ea77b..803edd9e3f6a 100644
--- a/drivers/acpi/events/evevent.c
+++ b/drivers/acpi/acpica/evevent.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acevents.h> 45#include "accommon.h"
46#include "acevents.h"
46 47
47#define _COMPONENT ACPI_EVENTS 48#define _COMPONENT ACPI_EVENTS
48ACPI_MODULE_NAME("evevent") 49ACPI_MODULE_NAME("evevent")
@@ -72,8 +73,8 @@ acpi_status acpi_ev_initialize_events(void)
72 73
73 /* 74 /*
74 * Initialize the Fixed and General Purpose Events. This is done prior to 75 * Initialize the Fixed and General Purpose Events. This is done prior to
75 * enabling SCIs to prevent interrupts from occurring before the handlers are 76 * enabling SCIs to prevent interrupts from occurring before the handlers
76 * installed. 77 * are installed.
77 */ 78 */
78 status = acpi_ev_fixed_event_initialize(); 79 status = acpi_ev_fixed_event_initialize();
79 if (ACPI_FAILURE(status)) { 80 if (ACPI_FAILURE(status)) {
@@ -192,8 +193,8 @@ static acpi_status acpi_ev_fixed_event_initialize(void)
192 acpi_status status; 193 acpi_status status;
193 194
194 /* 195 /*
195 * Initialize the structure that keeps track of fixed event handlers 196 * Initialize the structure that keeps track of fixed event handlers and
196 * and enable the fixed events. 197 * enable the fixed events.
197 */ 198 */
198 for (i = 0; i < ACPI_NUM_FIXED_EVENTS; i++) { 199 for (i = 0; i < ACPI_NUM_FIXED_EVENTS; i++) {
199 acpi_gbl_fixed_event_handlers[i].handler = NULL; 200 acpi_gbl_fixed_event_handlers[i].handler = NULL;
@@ -237,7 +238,7 @@ u32 acpi_ev_fixed_event_detect(void)
237 238
238 /* 239 /*
239 * Read the fixed feature status and enable registers, as all the cases 240 * Read the fixed feature status and enable registers, as all the cases
240 * depend on their values. Ignore errors here. 241 * depend on their values. Ignore errors here.
241 */ 242 */
242 (void)acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &fixed_status); 243 (void)acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &fixed_status);
243 (void)acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &fixed_enable); 244 (void)acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &fixed_enable);
@@ -291,8 +292,8 @@ static u32 acpi_ev_fixed_event_dispatch(u32 event)
291 status_register_id, 1); 292 status_register_id, 1);
292 293
293 /* 294 /*
294 * Make sure we've got a handler. If not, report an error. 295 * Make sure we've got a handler. If not, report an error. The event is
295 * The event is disabled to prevent further interrupts. 296 * disabled to prevent further interrupts.
296 */ 297 */
297 if (NULL == acpi_gbl_fixed_event_handlers[event].handler) { 298 if (NULL == acpi_gbl_fixed_event_handlers[event].handler) {
298 (void)acpi_set_register(acpi_gbl_fixed_event_info[event]. 299 (void)acpi_set_register(acpi_gbl_fixed_event_info[event].
diff --git a/drivers/acpi/events/evgpe.c b/drivers/acpi/acpica/evgpe.c
index f45c74fe745e..f345ced36477 100644
--- a/drivers/acpi/events/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acevents.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acevents.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_EVENTS 49#define _COMPONENT ACPI_EVENTS
49ACPI_MODULE_NAME("evgpe") 50ACPI_MODULE_NAME("evgpe")
@@ -125,7 +126,7 @@ acpi_ev_update_gpe_enable_masks(struct acpi_gpe_event_info *gpe_event_info,
125 (1 << 126 (1 <<
126 (gpe_event_info->gpe_number - gpe_register_info->base_gpe_number)); 127 (gpe_event_info->gpe_number - gpe_register_info->base_gpe_number));
127 128
128 /* 1) Disable case. Simply clear all enable bits */ 129 /* 1) Disable case. Simply clear all enable bits */
129 130
130 if (type == ACPI_GPE_DISABLE) { 131 if (type == ACPI_GPE_DISABLE) {
131 ACPI_CLEAR_BIT(gpe_register_info->enable_for_wake, 132 ACPI_CLEAR_BIT(gpe_register_info->enable_for_wake,
@@ -134,7 +135,7 @@ acpi_ev_update_gpe_enable_masks(struct acpi_gpe_event_info *gpe_event_info,
134 return_ACPI_STATUS(AE_OK); 135 return_ACPI_STATUS(AE_OK);
135 } 136 }
136 137
137 /* 2) Enable case. Set/Clear the appropriate enable bits */ 138 /* 2) Enable case. Set/Clear the appropriate enable bits */
138 139
139 switch (gpe_event_info->flags & ACPI_GPE_TYPE_MASK) { 140 switch (gpe_event_info->flags & ACPI_GPE_TYPE_MASK) {
140 case ACPI_GPE_TYPE_WAKE: 141 case ACPI_GPE_TYPE_WAKE:
@@ -295,7 +296,7 @@ acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
295 * 296 *
296 * FUNCTION: acpi_ev_get_gpe_event_info 297 * FUNCTION: acpi_ev_get_gpe_event_info
297 * 298 *
298 * PARAMETERS: gpe_device - Device node. NULL for GPE0/GPE1 299 * PARAMETERS: gpe_device - Device node. NULL for GPE0/GPE1
299 * gpe_number - Raw GPE number 300 * gpe_number - Raw GPE number
300 * 301 *
301 * RETURN: A GPE event_info struct. NULL if not a valid GPE 302 * RETURN: A GPE event_info struct. NULL if not a valid GPE
@@ -372,7 +373,7 @@ struct acpi_gpe_event_info *acpi_ev_get_gpe_event_info(acpi_handle gpe_device,
372 * 373 *
373 * RETURN: INTERRUPT_HANDLED or INTERRUPT_NOT_HANDLED 374 * RETURN: INTERRUPT_HANDLED or INTERRUPT_NOT_HANDLED
374 * 375 *
375 * DESCRIPTION: Detect if any GP events have occurred. This function is 376 * DESCRIPTION: Detect if any GP events have occurred. This function is
376 * executed at interrupt level. 377 * executed at interrupt level.
377 * 378 *
378 ******************************************************************************/ 379 ******************************************************************************/
@@ -400,8 +401,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
400 401
401 /* 402 /*
402 * We need to obtain the GPE lock for both the data structs and registers 403 * We need to obtain the GPE lock for both the data structs and registers
403 * Note: Not necessary to obtain the hardware lock, since the GPE registers 404 * Note: Not necessary to obtain the hardware lock, since the GPE
404 * are owned by the gpe_lock. 405 * registers are owned by the gpe_lock.
405 */ 406 */
406 flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock); 407 flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
407 408
@@ -410,9 +411,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
410 gpe_block = gpe_xrupt_list->gpe_block_list_head; 411 gpe_block = gpe_xrupt_list->gpe_block_list_head;
411 while (gpe_block) { 412 while (gpe_block) {
412 /* 413 /*
413 * Read all of the 8-bit GPE status and enable registers 414 * Read all of the 8-bit GPE status and enable registers in this GPE
414 * in this GPE block, saving all of them. 415 * block, saving all of them. Find all currently active GP events.
415 * Find all currently active GP events.
416 */ 416 */
417 for (i = 0; i < gpe_block->register_count; i++) { 417 for (i = 0; i < gpe_block->register_count; i++) {
418 418
@@ -423,10 +423,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
423 /* Read the Status Register */ 423 /* Read the Status Register */
424 424
425 status = 425 status =
426 acpi_hw_low_level_read(ACPI_GPE_REGISTER_WIDTH, 426 acpi_read(&status_reg,
427 &status_reg, 427 &gpe_register_info->status_address);
428 &gpe_register_info->
429 status_address);
430 if (ACPI_FAILURE(status)) { 428 if (ACPI_FAILURE(status)) {
431 goto unlock_and_exit; 429 goto unlock_and_exit;
432 } 430 }
@@ -434,10 +432,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
434 /* Read the Enable Register */ 432 /* Read the Enable Register */
435 433
436 status = 434 status =
437 acpi_hw_low_level_read(ACPI_GPE_REGISTER_WIDTH, 435 acpi_read(&enable_reg,
438 &enable_reg, 436 &gpe_register_info->enable_address);
439 &gpe_register_info->
440 enable_address);
441 if (ACPI_FAILURE(status)) { 437 if (ACPI_FAILURE(status)) {
442 goto unlock_and_exit; 438 goto unlock_and_exit;
443 } 439 }
@@ -527,8 +523,8 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
527 (void)acpi_ev_enable_gpe(gpe_event_info, FALSE); 523 (void)acpi_ev_enable_gpe(gpe_event_info, FALSE);
528 524
529 /* 525 /*
530 * Take a snapshot of the GPE info for this level - we copy the 526 * Take a snapshot of the GPE info for this level - we copy the info to
531 * info to prevent a race condition with remove_handler/remove_block. 527 * prevent a race condition with remove_handler/remove_block.
532 */ 528 */
533 ACPI_MEMCPY(&local_gpe_event_info, gpe_event_info, 529 ACPI_MEMCPY(&local_gpe_event_info, gpe_event_info,
534 sizeof(struct acpi_gpe_event_info)); 530 sizeof(struct acpi_gpe_event_info));
@@ -539,8 +535,8 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
539 } 535 }
540 536
541 /* 537 /*
542 * Must check for control method type dispatch one more 538 * Must check for control method type dispatch one more time to avoid a
543 * time to avoid race with ev_gpe_install_handler 539 * race with ev_gpe_install_handler
544 */ 540 */
545 if ((local_gpe_event_info.flags & ACPI_GPE_DISPATCH_MASK) == 541 if ((local_gpe_event_info.flags & ACPI_GPE_DISPATCH_MASK) ==
546 ACPI_GPE_DISPATCH_METHOD) { 542 ACPI_GPE_DISPATCH_METHOD) {
@@ -584,8 +580,8 @@ static void acpi_ev_asynch_enable_gpe(void *context)
584 if ((gpe_event_info->flags & ACPI_GPE_XRUPT_TYPE_MASK) == 580 if ((gpe_event_info->flags & ACPI_GPE_XRUPT_TYPE_MASK) ==
585 ACPI_GPE_LEVEL_TRIGGERED) { 581 ACPI_GPE_LEVEL_TRIGGERED) {
586 /* 582 /*
587 * GPE is level-triggered, we clear the GPE status bit after 583 * GPE is level-triggered, we clear the GPE status bit after handling
588 * handling the event. 584 * the event.
589 */ 585 */
590 status = acpi_hw_clear_gpe(gpe_event_info); 586 status = acpi_hw_clear_gpe(gpe_event_info);
591 if (ACPI_FAILURE(status)) { 587 if (ACPI_FAILURE(status)) {
@@ -624,7 +620,7 @@ acpi_ev_gpe_dispatch(struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number)
624 acpi_os_gpe_count(gpe_number); 620 acpi_os_gpe_count(gpe_number);
625 621
626 /* 622 /*
627 * If edge-triggered, clear the GPE status bit now. Note that 623 * If edge-triggered, clear the GPE status bit now. Note that
628 * level-triggered events are cleared after the GPE is serviced. 624 * level-triggered events are cleared after the GPE is serviced.
629 */ 625 */
630 if ((gpe_event_info->flags & ACPI_GPE_XRUPT_TYPE_MASK) == 626 if ((gpe_event_info->flags & ACPI_GPE_XRUPT_TYPE_MASK) ==
@@ -650,7 +646,8 @@ acpi_ev_gpe_dispatch(struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number)
650 646
651 /* 647 /*
652 * Invoke the installed handler (at interrupt level) 648 * Invoke the installed handler (at interrupt level)
653 * Ignore return status for now. TBD: leave GPE disabled on error? 649 * Ignore return status for now.
650 * TBD: leave GPE disabled on error?
654 */ 651 */
655 (void)gpe_event_info->dispatch.handler->address(gpe_event_info-> 652 (void)gpe_event_info->dispatch.handler->address(gpe_event_info->
656 dispatch. 653 dispatch.
@@ -708,7 +705,7 @@ acpi_ev_gpe_dispatch(struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number)
708 gpe_number)); 705 gpe_number));
709 706
710 /* 707 /*
711 * Disable the GPE. The GPE will remain disabled until the ACPI 708 * Disable the GPE. The GPE will remain disabled until the ACPICA
712 * Core Subsystem is restarted, or a handler is installed. 709 * Core Subsystem is restarted, or a handler is installed.
713 */ 710 */
714 status = acpi_ev_disable_gpe(gpe_event_info); 711 status = acpi_ev_disable_gpe(gpe_event_info);
diff --git a/drivers/acpi/events/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 73c058e2f5c2..484cc0565d5b 100644
--- a/drivers/acpi/events/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acevents.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acevents.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_EVENTS 49#define _COMPONENT ACPI_EVENTS
49ACPI_MODULE_NAME("evgpeblk") 50ACPI_MODULE_NAME("evgpeblk")
@@ -124,6 +125,7 @@ u8 acpi_ev_valid_gpe_event(struct acpi_gpe_event_info *gpe_event_info)
124 * FUNCTION: acpi_ev_walk_gpe_list 125 * FUNCTION: acpi_ev_walk_gpe_list
125 * 126 *
126 * PARAMETERS: gpe_walk_callback - Routine called for each GPE block 127 * PARAMETERS: gpe_walk_callback - Routine called for each GPE block
128 * Context - Value passed to callback
127 * 129 *
128 * RETURN: Status 130 * RETURN: Status
129 * 131 *
@@ -131,7 +133,8 @@ u8 acpi_ev_valid_gpe_event(struct acpi_gpe_event_info *gpe_event_info)
131 * 133 *
132 ******************************************************************************/ 134 ******************************************************************************/
133 135
134acpi_status acpi_ev_walk_gpe_list(acpi_gpe_callback gpe_walk_callback) 136acpi_status
137acpi_ev_walk_gpe_list(acpi_gpe_callback gpe_walk_callback, void *context)
135{ 138{
136 struct acpi_gpe_block_info *gpe_block; 139 struct acpi_gpe_block_info *gpe_block;
137 struct acpi_gpe_xrupt_info *gpe_xrupt_info; 140 struct acpi_gpe_xrupt_info *gpe_xrupt_info;
@@ -154,8 +157,13 @@ acpi_status acpi_ev_walk_gpe_list(acpi_gpe_callback gpe_walk_callback)
154 157
155 /* One callback per GPE block */ 158 /* One callback per GPE block */
156 159
157 status = gpe_walk_callback(gpe_xrupt_info, gpe_block); 160 status =
161 gpe_walk_callback(gpe_xrupt_info, gpe_block,
162 context);
158 if (ACPI_FAILURE(status)) { 163 if (ACPI_FAILURE(status)) {
164 if (status == AE_CTRL_END) { /* Callback abort */
165 status = AE_OK;
166 }
159 goto unlock_and_exit; 167 goto unlock_and_exit;
160 } 168 }
161 169
@@ -186,7 +194,8 @@ acpi_status acpi_ev_walk_gpe_list(acpi_gpe_callback gpe_walk_callback)
186 194
187acpi_status 195acpi_status
188acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info, 196acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
189 struct acpi_gpe_block_info *gpe_block) 197 struct acpi_gpe_block_info *gpe_block,
198 void *context)
190{ 199{
191 struct acpi_gpe_event_info *gpe_event_info; 200 struct acpi_gpe_event_info *gpe_event_info;
192 u32 i; 201 u32 i;
@@ -309,17 +318,17 @@ acpi_ev_save_method_info(acpi_handle obj_handle,
309 (gpe_block->block_base_number + 318 (gpe_block->block_base_number +
310 (gpe_block->register_count * 8)))) { 319 (gpe_block->register_count * 8)))) {
311 /* 320 /*
312 * Not valid for this GPE block, just ignore it 321 * Not valid for this GPE block, just ignore it. However, it may be
313 * However, it may be valid for a different GPE block, since GPE0 and GPE1 322 * valid for a different GPE block, since GPE0 and GPE1 methods both
314 * methods both appear under \_GPE. 323 * appear under \_GPE.
315 */ 324 */
316 return_ACPI_STATUS(AE_OK); 325 return_ACPI_STATUS(AE_OK);
317 } 326 }
318 327
319 /* 328 /*
320 * Now we can add this information to the gpe_event_info block 329 * Now we can add this information to the gpe_event_info block for use
321 * for use during dispatch of this GPE. Default type is RUNTIME, although 330 * during dispatch of this GPE. Default type is RUNTIME, although this may
322 * this may change when the _PRW methods are executed later. 331 * change when the _PRW methods are executed later.
323 */ 332 */
324 gpe_event_info = 333 gpe_event_info =
325 &gpe_block->event_info[gpe_number - gpe_block->block_base_number]; 334 &gpe_block->event_info[gpe_number - gpe_block->block_base_number];
@@ -394,8 +403,8 @@ acpi_ev_match_prw_and_gpe(acpi_handle obj_handle,
394 gpe_block = gpe_info->gpe_block; 403 gpe_block = gpe_info->gpe_block;
395 404
396 /* 405 /*
397 * The _PRW object must return a package, we are only interested 406 * The _PRW object must return a package, we are only interested in the
398 * in the first element 407 * first element
399 */ 408 */
400 obj_desc = pkg_desc->package.elements[0]; 409 obj_desc = pkg_desc->package.elements[0];
401 410
@@ -434,7 +443,7 @@ acpi_ev_match_prw_and_gpe(acpi_handle obj_handle,
434 /* 443 /*
435 * Is this GPE within this block? 444 * Is this GPE within this block?
436 * 445 *
437 * TRUE iff these conditions are true: 446 * TRUE if and only if these conditions are true:
438 * 1) The GPE devices match. 447 * 1) The GPE devices match.
439 * 2) The GPE index(number) is within the range of the Gpe Block 448 * 2) The GPE index(number) is within the range of the Gpe Block
440 * associated with the GPE device. 449 * associated with the GPE device.
@@ -457,6 +466,7 @@ acpi_ev_match_prw_and_gpe(acpi_handle obj_handle,
457 if (ACPI_FAILURE(status)) { 466 if (ACPI_FAILURE(status)) {
458 goto cleanup; 467 goto cleanup;
459 } 468 }
469
460 status = 470 status =
461 acpi_ev_update_gpe_enable_masks(gpe_event_info, 471 acpi_ev_update_gpe_enable_masks(gpe_event_info,
462 ACPI_GPE_DISABLE); 472 ACPI_GPE_DISABLE);
@@ -476,9 +486,9 @@ acpi_ev_match_prw_and_gpe(acpi_handle obj_handle,
476 * RETURN: A GPE interrupt block 486 * RETURN: A GPE interrupt block
477 * 487 *
478 * DESCRIPTION: Get or Create a GPE interrupt block. There is one interrupt 488 * DESCRIPTION: Get or Create a GPE interrupt block. There is one interrupt
479 * block per unique interrupt level used for GPEs. 489 * block per unique interrupt level used for GPEs. Should be
480 * Should be called only when the GPE lists are semaphore locked 490 * called only when the GPE lists are semaphore locked and not
481 * and not subject to change. 491 * subject to change.
482 * 492 *
483 ******************************************************************************/ 493 ******************************************************************************/
484 494
@@ -608,8 +618,9 @@ acpi_ev_delete_gpe_xrupt(struct acpi_gpe_xrupt_info *gpe_xrupt)
608 * 618 *
609 * FUNCTION: acpi_ev_install_gpe_block 619 * FUNCTION: acpi_ev_install_gpe_block
610 * 620 *
611 * PARAMETERS: gpe_block - New GPE block 621 * PARAMETERS: gpe_block - New GPE block
612 * interrupt_number - Xrupt to be associated with this GPE block 622 * interrupt_number - Xrupt to be associated with this
623 * GPE block
613 * 624 *
614 * RETURN: Status 625 * RETURN: Status
615 * 626 *
@@ -666,7 +677,7 @@ acpi_ev_install_gpe_block(struct acpi_gpe_block_info *gpe_block,
666 * 677 *
667 * FUNCTION: acpi_ev_delete_gpe_block 678 * FUNCTION: acpi_ev_delete_gpe_block
668 * 679 *
669 * PARAMETERS: gpe_block - Existing GPE block 680 * PARAMETERS: gpe_block - Existing GPE block
670 * 681 *
671 * RETURN: Status 682 * RETURN: Status
672 * 683 *
@@ -688,7 +699,8 @@ acpi_status acpi_ev_delete_gpe_block(struct acpi_gpe_block_info *gpe_block)
688 699
689 /* Disable all GPEs in this block */ 700 /* Disable all GPEs in this block */
690 701
691 status = acpi_hw_disable_gpe_block(gpe_block->xrupt_block, gpe_block); 702 status =
703 acpi_hw_disable_gpe_block(gpe_block->xrupt_block, gpe_block, NULL);
692 704
693 if (!gpe_block->previous && !gpe_block->next) { 705 if (!gpe_block->previous && !gpe_block->next) {
694 706
@@ -715,6 +727,9 @@ acpi_status acpi_ev_delete_gpe_block(struct acpi_gpe_block_info *gpe_block)
715 acpi_os_release_lock(acpi_gbl_gpe_lock, flags); 727 acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
716 } 728 }
717 729
730 acpi_current_gpe_count -=
731 gpe_block->register_count * ACPI_GPE_REGISTER_WIDTH;
732
718 /* Free the gpe_block */ 733 /* Free the gpe_block */
719 734
720 ACPI_FREE(gpe_block->register_info); 735 ACPI_FREE(gpe_block->register_info);
@@ -786,9 +801,9 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block)
786 801
787 /* 802 /*
788 * Initialize the GPE Register and Event structures. A goal of these 803 * Initialize the GPE Register and Event structures. A goal of these
789 * tables is to hide the fact that there are two separate GPE register sets 804 * tables is to hide the fact that there are two separate GPE register
790 * in a given GPE hardware block, the status registers occupy the first half, 805 * sets in a given GPE hardware block, the status registers occupy the
791 * and the enable registers occupy the second half. 806 * first half, and the enable registers occupy the second half.
792 */ 807 */
793 this_register = gpe_register_info; 808 this_register = gpe_register_info;
794 this_event = gpe_event_info; 809 this_event = gpe_event_info;
@@ -816,10 +831,8 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block)
816 ACPI_GPE_REGISTER_WIDTH; 831 ACPI_GPE_REGISTER_WIDTH;
817 this_register->enable_address.bit_width = 832 this_register->enable_address.bit_width =
818 ACPI_GPE_REGISTER_WIDTH; 833 ACPI_GPE_REGISTER_WIDTH;
819 this_register->status_address.bit_offset = 834 this_register->status_address.bit_offset = 0;
820 ACPI_GPE_REGISTER_WIDTH; 835 this_register->enable_address.bit_offset = 0;
821 this_register->enable_address.bit_offset =
822 ACPI_GPE_REGISTER_WIDTH;
823 836
824 /* Init the event_info for each GPE within this register */ 837 /* Init the event_info for each GPE within this register */
825 838
@@ -832,18 +845,14 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block)
832 845
833 /* Disable all GPEs within this register */ 846 /* Disable all GPEs within this register */
834 847
835 status = acpi_hw_low_level_write(ACPI_GPE_REGISTER_WIDTH, 0x00, 848 status = acpi_write(0x00, &this_register->enable_address);
836 &this_register->
837 enable_address);
838 if (ACPI_FAILURE(status)) { 849 if (ACPI_FAILURE(status)) {
839 goto error_exit; 850 goto error_exit;
840 } 851 }
841 852
842 /* Clear any pending GPE events within this register */ 853 /* Clear any pending GPE events within this register */
843 854
844 status = acpi_hw_low_level_write(ACPI_GPE_REGISTER_WIDTH, 0xFF, 855 status = acpi_write(0xFF, &this_register->status_address);
845 &this_register->
846 status_address);
847 if (ACPI_FAILURE(status)) { 856 if (ACPI_FAILURE(status)) {
848 goto error_exit; 857 goto error_exit;
849 } 858 }
@@ -956,6 +965,9 @@ acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device,
956 gpe_device->name.ascii, gpe_block->register_count, 965 gpe_device->name.ascii, gpe_block->register_count,
957 interrupt_number)); 966 interrupt_number));
958 967
968 /* Update global count of currently available GPEs */
969
970 acpi_current_gpe_count += register_count * ACPI_GPE_REGISTER_WIDTH;
959 return_ACPI_STATUS(AE_OK); 971 return_ACPI_STATUS(AE_OK);
960} 972}
961 973
@@ -1055,7 +1067,7 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
1055 1067
1056 /* Enable all valid runtime GPEs found above */ 1068 /* Enable all valid runtime GPEs found above */
1057 1069
1058 status = acpi_hw_enable_runtime_gpe_block(NULL, gpe_block); 1070 status = acpi_hw_enable_runtime_gpe_block(NULL, gpe_block, NULL);
1059 if (ACPI_FAILURE(status)) { 1071 if (ACPI_FAILURE(status)) {
1060 ACPI_ERROR((AE_INFO, "Could not enable GPEs in GpeBlock %p", 1072 ACPI_ERROR((AE_INFO, "Could not enable GPEs in GpeBlock %p",
1061 gpe_block)); 1073 gpe_block));
diff --git a/drivers/acpi/events/evmisc.c b/drivers/acpi/acpica/evmisc.c
index 1d5670be729a..5f893057bcc6 100644
--- a/drivers/acpi/events/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c
@@ -42,18 +42,15 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acevents.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acevents.h"
47#include <acpi/acinterp.h> 47#include "acnamesp.h"
48#include "acinterp.h"
48 49
49#define _COMPONENT ACPI_EVENTS 50#define _COMPONENT ACPI_EVENTS
50ACPI_MODULE_NAME("evmisc") 51ACPI_MODULE_NAME("evmisc")
51 52
52/* Pointer to FACS needed for the Global Lock */
53static struct acpi_table_facs *facs = NULL;
54
55/* Local prototypes */ 53/* Local prototypes */
56
57static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context); 54static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context);
58 55
59static u32 acpi_ev_global_lock_handler(void *context); 56static u32 acpi_ev_global_lock_handler(void *context);
@@ -152,7 +149,9 @@ acpi_ev_queue_notify_request(struct acpi_namespace_node * node,
152 break; 149 break;
153 150
154 default: 151 default:
152
155 /* All other types are not supported */ 153 /* All other types are not supported */
154
156 return (AE_TYPE); 155 return (AE_TYPE);
157 } 156 }
158 } 157 }
@@ -193,9 +192,8 @@ acpi_ev_queue_notify_request(struct acpi_namespace_node * node,
193 acpi_ut_delete_generic_state(notify_info); 192 acpi_ut_delete_generic_state(notify_info);
194 } 193 }
195 } else { 194 } else {
196 /* 195 /* There is no notify handler (per-device or system) for this device */
197 * There is no notify handler (per-device or system) for this device. 196
198 */
199 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 197 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
200 "No notify handler for Notify (%4.4s, %X) node %p\n", 198 "No notify handler for Notify (%4.4s, %X) node %p\n",
201 acpi_ut_get_node_name(node), notify_value, 199 acpi_ut_get_node_name(node), notify_value,
@@ -229,9 +227,8 @@ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context)
229 ACPI_FUNCTION_ENTRY(); 227 ACPI_FUNCTION_ENTRY();
230 228
231 /* 229 /*
232 * We will invoke a global notify handler if installed. 230 * We will invoke a global notify handler if installed. This is done
233 * This is done _before_ we invoke the per-device handler attached 231 * _before_ we invoke the per-device handler attached to the device.
234 * to the device.
235 */ 232 */
236 if (notify_info->notify.value <= ACPI_MAX_SYS_NOTIFY) { 233 if (notify_info->notify.value <= ACPI_MAX_SYS_NOTIFY) {
237 234
@@ -299,7 +296,7 @@ static u32 acpi_ev_global_lock_handler(void *context)
299 * If we don't get it now, it will be marked pending and we will 296 * If we don't get it now, it will be marked pending and we will
300 * take another interrupt when it becomes free. 297 * take another interrupt when it becomes free.
301 */ 298 */
302 ACPI_ACQUIRE_GLOBAL_LOCK(facs, acquired); 299 ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired);
303 if (acquired) { 300 if (acquired) {
304 301
305 /* Got the lock, now wake all threads waiting for it */ 302 /* Got the lock, now wake all threads waiting for it */
@@ -336,34 +333,27 @@ acpi_status acpi_ev_init_global_lock_handler(void)
336 333
337 ACPI_FUNCTION_TRACE(ev_init_global_lock_handler); 334 ACPI_FUNCTION_TRACE(ev_init_global_lock_handler);
338 335
339 status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, 336 /* Attempt installation of the global lock handler */
340 ACPI_CAST_INDIRECT_PTR(struct
341 acpi_table_header,
342 &facs));
343 if (ACPI_FAILURE(status)) {
344 return_ACPI_STATUS(status);
345 }
346 337
347 acpi_gbl_global_lock_present = TRUE;
348 status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL, 338 status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL,
349 acpi_ev_global_lock_handler, 339 acpi_ev_global_lock_handler,
350 NULL); 340 NULL);
351 341
352 /* 342 /*
353 * If the global lock does not exist on this platform, the attempt 343 * If the global lock does not exist on this platform, the attempt to
354 * to enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick) 344 * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick).
355 * Map to AE_OK, but mark global lock as not present. 345 * Map to AE_OK, but mark global lock as not present. Any attempt to
356 * Any attempt to actually use the global lock will be flagged 346 * actually use the global lock will be flagged with an error.
357 * with an error.
358 */ 347 */
359 if (status == AE_NO_HARDWARE_RESPONSE) { 348 if (status == AE_NO_HARDWARE_RESPONSE) {
360 ACPI_ERROR((AE_INFO, 349 ACPI_ERROR((AE_INFO,
361 "No response from Global Lock hardware, disabling lock")); 350 "No response from Global Lock hardware, disabling lock"));
362 351
363 acpi_gbl_global_lock_present = FALSE; 352 acpi_gbl_global_lock_present = FALSE;
364 status = AE_OK; 353 return_ACPI_STATUS(AE_OK);
365 } 354 }
366 355
356 acpi_gbl_global_lock_present = TRUE;
367 return_ACPI_STATUS(status); 357 return_ACPI_STATUS(status);
368} 358}
369 359
@@ -462,8 +452,8 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout)
462 } 452 }
463 453
464 /* 454 /*
465 * Make sure that a global lock actually exists. If not, just treat 455 * Make sure that a global lock actually exists. If not, just treat the
466 * the lock as a standard mutex. 456 * lock as a standard mutex.
467 */ 457 */
468 if (!acpi_gbl_global_lock_present) { 458 if (!acpi_gbl_global_lock_present) {
469 acpi_gbl_global_lock_acquired = TRUE; 459 acpi_gbl_global_lock_acquired = TRUE;
@@ -472,7 +462,7 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout)
472 462
473 /* Attempt to acquire the actual hardware lock */ 463 /* Attempt to acquire the actual hardware lock */
474 464
475 ACPI_ACQUIRE_GLOBAL_LOCK(facs, acquired); 465 ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired);
476 if (acquired) { 466 if (acquired) {
477 467
478 /* We got the lock */ 468 /* We got the lock */
@@ -536,7 +526,7 @@ acpi_status acpi_ev_release_global_lock(void)
536 526
537 /* Allow any thread to release the lock */ 527 /* Allow any thread to release the lock */
538 528
539 ACPI_RELEASE_GLOBAL_LOCK(facs, pending); 529 ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending);
540 530
541 /* 531 /*
542 * If the pending bit was set, we must write GBL_RLS to the control 532 * If the pending bit was set, we must write GBL_RLS to the control
@@ -582,8 +572,8 @@ void acpi_ev_terminate(void)
582 572
583 if (acpi_gbl_events_initialized) { 573 if (acpi_gbl_events_initialized) {
584 /* 574 /*
585 * Disable all event-related functionality. 575 * Disable all event-related functionality. In all cases, on error,
586 * In all cases, on error, print a message but obviously we don't abort. 576 * print a message but obviously we don't abort.
587 */ 577 */
588 578
589 /* Disable all fixed events */ 579 /* Disable all fixed events */
@@ -599,7 +589,7 @@ void acpi_ev_terminate(void)
599 589
600 /* Disable all GPEs in all GPE blocks */ 590 /* Disable all GPEs in all GPE blocks */
601 591
602 status = acpi_ev_walk_gpe_list(acpi_hw_disable_gpe_block); 592 status = acpi_ev_walk_gpe_list(acpi_hw_disable_gpe_block, NULL);
603 593
604 /* Remove SCI handler */ 594 /* Remove SCI handler */
605 595
@@ -617,7 +607,7 @@ void acpi_ev_terminate(void)
617 607
618 /* Deallocate all handler objects installed within GPE info structs */ 608 /* Deallocate all handler objects installed within GPE info structs */
619 609
620 status = acpi_ev_walk_gpe_list(acpi_ev_delete_gpe_handlers); 610 status = acpi_ev_walk_gpe_list(acpi_ev_delete_gpe_handlers, NULL);
621 611
622 /* Return to original mode if necessary */ 612 /* Return to original mode if necessary */
623 613
diff --git a/drivers/acpi/events/evregion.c b/drivers/acpi/acpica/evregion.c
index 236fbd1ca438..665c0887ab4d 100644
--- a/drivers/acpi/events/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -42,22 +42,15 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acevents.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acevents.h"
47#include <acpi/acinterp.h> 47#include "acnamesp.h"
48#include "acinterp.h"
48 49
49#define _COMPONENT ACPI_EVENTS 50#define _COMPONENT ACPI_EVENTS
50ACPI_MODULE_NAME("evregion") 51ACPI_MODULE_NAME("evregion")
51#define ACPI_NUM_DEFAULT_SPACES 4
52static u8 acpi_gbl_default_address_spaces[ACPI_NUM_DEFAULT_SPACES] = {
53 ACPI_ADR_SPACE_SYSTEM_MEMORY,
54 ACPI_ADR_SPACE_SYSTEM_IO,
55 ACPI_ADR_SPACE_PCI_CONFIG,
56 ACPI_ADR_SPACE_DATA_TABLE
57};
58 52
59/* Local prototypes */ 53/* Local prototypes */
60
61static acpi_status 54static acpi_status
62acpi_ev_reg_run(acpi_handle obj_handle, 55acpi_ev_reg_run(acpi_handle obj_handle,
63 u32 level, void *context, void **return_value); 56 u32 level, void *context, void **return_value);
@@ -66,6 +59,17 @@ static acpi_status
66acpi_ev_install_handler(acpi_handle obj_handle, 59acpi_ev_install_handler(acpi_handle obj_handle,
67 u32 level, void *context, void **return_value); 60 u32 level, void *context, void **return_value);
68 61
62/* These are the address spaces that will get default handlers */
63
64#define ACPI_NUM_DEFAULT_SPACES 4
65
66static u8 acpi_gbl_default_address_spaces[ACPI_NUM_DEFAULT_SPACES] = {
67 ACPI_ADR_SPACE_SYSTEM_MEMORY,
68 ACPI_ADR_SPACE_SYSTEM_IO,
69 ACPI_ADR_SPACE_PCI_CONFIG,
70 ACPI_ADR_SPACE_DATA_TABLE
71};
72
69/******************************************************************************* 73/*******************************************************************************
70 * 74 *
71 * FUNCTION: acpi_ev_install_region_handlers 75 * FUNCTION: acpi_ev_install_region_handlers
@@ -91,18 +95,19 @@ acpi_status acpi_ev_install_region_handlers(void)
91 } 95 }
92 96
93 /* 97 /*
94 * All address spaces (PCI Config, EC, SMBus) are scope dependent 98 * All address spaces (PCI Config, EC, SMBus) are scope dependent and
95 * and registration must occur for a specific device. 99 * registration must occur for a specific device.
96 * 100 *
97 * In the case of the system memory and IO address spaces there is currently 101 * In the case of the system memory and IO address spaces there is
98 * no device associated with the address space. For these we use the root. 102 * currently no device associated with the address space. For these we
103 * use the root.
99 * 104 *
100 * We install the default PCI config space handler at the root so 105 * We install the default PCI config space handler at the root so that
101 * that this space is immediately available even though the we have 106 * this space is immediately available even though the we have not
102 * not enumerated all the PCI Root Buses yet. This is to conform 107 * enumerated all the PCI Root Buses yet. This is to conform to the ACPI
103 * to the ACPI specification which states that the PCI config 108 * specification which states that the PCI config space must be always
104 * space must be always available -- even though we are nowhere 109 * available -- even though we are nowhere near ready to find the PCI root
105 * near ready to find the PCI root buses at this point. 110 * buses at this point.
106 * 111 *
107 * NOTE: We ignore AE_ALREADY_EXISTS because this means that a handler 112 * NOTE: We ignore AE_ALREADY_EXISTS because this means that a handler
108 * has already been installed (via acpi_install_address_space_handler). 113 * has already been installed (via acpi_install_address_space_handler).
@@ -160,12 +165,11 @@ acpi_status acpi_ev_initialize_op_regions(void)
160 return_ACPI_STATUS(status); 165 return_ACPI_STATUS(status);
161 } 166 }
162 167
163 /* 168 /* Run the _REG methods for op_regions in each default address space */
164 * Run the _REG methods for op_regions in each default address space
165 */
166 for (i = 0; i < ACPI_NUM_DEFAULT_SPACES; i++) {
167 169
168 /* TBD: Make sure handler is the DEFAULT handler, otherwise 170 for (i = 0; i < ACPI_NUM_DEFAULT_SPACES; i++) {
171 /*
172 * TBD: Make sure handler is the DEFAULT handler, otherwise
169 * _REG will have already been run. 173 * _REG will have already been run.
170 */ 174 */
171 status = acpi_ev_execute_reg_methods(acpi_gbl_root_node, 175 status = acpi_ev_execute_reg_methods(acpi_gbl_root_node,
@@ -318,13 +322,13 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
318 } 322 }
319 323
320 /* 324 /*
321 * It may be the case that the region has never been initialized 325 * It may be the case that the region has never been initialized.
322 * Some types of regions require special init code 326 * Some types of regions require special init code
323 */ 327 */
324 if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE)) { 328 if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE)) {
325 /* 329
326 * This region has not been initialized yet, do it 330 /* This region has not been initialized yet, do it */
327 */ 331
328 region_setup = handler_desc->address_space.setup; 332 region_setup = handler_desc->address_space.setup;
329 if (!region_setup) { 333 if (!region_setup) {
330 334
@@ -339,9 +343,9 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
339 } 343 }
340 344
341 /* 345 /*
342 * We must exit the interpreter because the region 346 * We must exit the interpreter because the region setup will
343 * setup will potentially execute control methods 347 * potentially execute control methods (for example, the _REG method
344 * (e.g., _REG method for this region) 348 * for this region)
345 */ 349 */
346 acpi_ex_exit_interpreter(); 350 acpi_ex_exit_interpreter();
347 351
@@ -364,9 +368,8 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
364 return_ACPI_STATUS(status); 368 return_ACPI_STATUS(status);
365 } 369 }
366 370
367 /* 371 /* Region initialization may have been completed by region_setup */
368 * Region initialization may have been completed by region_setup 372
369 */
370 if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE)) { 373 if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE)) {
371 region_obj->region.flags |= AOPOBJ_SETUP_COMPLETE; 374 region_obj->region.flags |= AOPOBJ_SETUP_COMPLETE;
372 375
@@ -521,8 +524,8 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj,
521 } 524 }
522 525
523 /* 526 /*
524 * If the region has been activated, call the setup handler 527 * If the region has been activated, call the setup handler with
525 * with the deactivate notification 528 * the deactivate notification
526 */ 529 */
527 if (region_obj->region.flags & AOPOBJ_SETUP_COMPLETE) { 530 if (region_obj->region.flags & AOPOBJ_SETUP_COMPLETE) {
528 region_setup = handler_obj->address_space.setup; 531 region_setup = handler_obj->address_space.setup;
@@ -668,8 +671,8 @@ acpi_ev_install_handler(acpi_handle obj_handle,
668 } 671 }
669 672
670 /* 673 /*
671 * We only care about regions.and objects 674 * We only care about regions and objects that are allowed to have
672 * that are allowed to have address space handlers 675 * address space handlers
673 */ 676 */
674 if ((node->type != ACPI_TYPE_DEVICE) && 677 if ((node->type != ACPI_TYPE_DEVICE) &&
675 (node->type != ACPI_TYPE_REGION) && (node != acpi_gbl_root_node)) { 678 (node->type != ACPI_TYPE_REGION) && (node != acpi_gbl_root_node)) {
@@ -710,9 +713,9 @@ acpi_ev_install_handler(acpi_handle obj_handle,
710 /* 713 /*
711 * Since the object we found it on was a device, then it 714 * Since the object we found it on was a device, then it
712 * means that someone has already installed a handler for 715 * means that someone has already installed a handler for
713 * the branch of the namespace from this device on. Just 716 * the branch of the namespace from this device on. Just
714 * bail out telling the walk routine to not traverse this 717 * bail out telling the walk routine to not traverse this
715 * branch. This preserves the scoping rule for handlers. 718 * branch. This preserves the scoping rule for handlers.
716 */ 719 */
717 return (AE_CTRL_DEPTH); 720 return (AE_CTRL_DEPTH);
718 } 721 }
@@ -723,9 +726,8 @@ acpi_ev_install_handler(acpi_handle obj_handle,
723 } 726 }
724 727
725 /* 728 /*
726 * As long as the device didn't have a handler for this 729 * As long as the device didn't have a handler for this space we
727 * space we don't care about it. We just ignore it and 730 * don't care about it. We just ignore it and proceed.
728 * proceed.
729 */ 731 */
730 return (AE_OK); 732 return (AE_OK);
731 } 733 }
@@ -733,16 +735,14 @@ acpi_ev_install_handler(acpi_handle obj_handle,
733 /* Object is a Region */ 735 /* Object is a Region */
734 736
735 if (obj_desc->region.space_id != handler_obj->address_space.space_id) { 737 if (obj_desc->region.space_id != handler_obj->address_space.space_id) {
736 /* 738
737 * This region is for a different address space 739 /* This region is for a different address space, just ignore it */
738 * -- just ignore it 740
739 */
740 return (AE_OK); 741 return (AE_OK);
741 } 742 }
742 743
743 /* 744 /*
744 * Now we have a region and it is for the handler's address 745 * Now we have a region and it is for the handler's address space type.
745 * space type.
746 * 746 *
747 * First disconnect region for any previous handler (if any) 747 * First disconnect region for any previous handler (if any)
748 */ 748 */
@@ -786,9 +786,8 @@ acpi_ev_install_space_handler(struct acpi_namespace_node * node,
786 ACPI_FUNCTION_TRACE(ev_install_space_handler); 786 ACPI_FUNCTION_TRACE(ev_install_space_handler);
787 787
788 /* 788 /*
789 * This registration is valid for only the types below 789 * This registration is valid for only the types below and the root. This
790 * and the root. This is where the default handlers 790 * is where the default handlers get placed.
791 * get placed.
792 */ 791 */
793 if ((node->type != ACPI_TYPE_DEVICE) && 792 if ((node->type != ACPI_TYPE_DEVICE) &&
794 (node->type != ACPI_TYPE_PROCESSOR) && 793 (node->type != ACPI_TYPE_PROCESSOR) &&
@@ -848,8 +847,8 @@ acpi_ev_install_space_handler(struct acpi_namespace_node * node,
848 obj_desc = acpi_ns_get_attached_object(node); 847 obj_desc = acpi_ns_get_attached_object(node);
849 if (obj_desc) { 848 if (obj_desc) {
850 /* 849 /*
851 * The attached device object already exists. 850 * The attached device object already exists. Make sure the handler
852 * Make sure the handler is not already installed. 851 * is not already installed.
853 */ 852 */
854 handler_obj = obj_desc->device.handler; 853 handler_obj = obj_desc->device.handler;
855 854
@@ -864,8 +863,8 @@ acpi_ev_install_space_handler(struct acpi_namespace_node * node,
864 handler) { 863 handler) {
865 /* 864 /*
866 * It is (relatively) OK to attempt to install the SAME 865 * It is (relatively) OK to attempt to install the SAME
867 * handler twice. This can easily happen 866 * handler twice. This can easily happen with the
868 * with PCI_Config space. 867 * PCI_Config space.
869 */ 868 */
870 status = AE_SAME_HANDLER; 869 status = AE_SAME_HANDLER;
871 goto unlock_and_exit; 870 goto unlock_and_exit;
@@ -925,9 +924,8 @@ acpi_ev_install_space_handler(struct acpi_namespace_node * node,
925 /* 924 /*
926 * Install the handler 925 * Install the handler
927 * 926 *
928 * At this point there is no existing handler. 927 * At this point there is no existing handler. Just allocate the object
929 * Just allocate the object for the handler and link it 928 * for the handler and link it into the list.
930 * into the list.
931 */ 929 */
932 handler_obj = 930 handler_obj =
933 acpi_ut_create_internal_object(ACPI_TYPE_LOCAL_ADDRESS_HANDLER); 931 acpi_ut_create_internal_object(ACPI_TYPE_LOCAL_ADDRESS_HANDLER);
@@ -1000,11 +998,10 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node,
1000 ACPI_FUNCTION_TRACE(ev_execute_reg_methods); 998 ACPI_FUNCTION_TRACE(ev_execute_reg_methods);
1001 999
1002 /* 1000 /*
1003 * Run all _REG methods for all Operation Regions for this 1001 * Run all _REG methods for all Operation Regions for this space ID. This
1004 * space ID. This is a separate walk in order to handle any 1002 * is a separate walk in order to handle any interdependencies between
1005 * interdependencies between regions and _REG methods. (i.e. handlers 1003 * regions and _REG methods. (i.e. handlers must be installed for all
1006 * must be installed for all regions of this Space ID before we 1004 * regions of this Space ID before we can run any _REG methods)
1007 * can run any _REG methods)
1008 */ 1005 */
1009 status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, 1006 status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX,
1010 ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, 1007 ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run,
@@ -1042,8 +1039,8 @@ acpi_ev_reg_run(acpi_handle obj_handle,
1042 } 1039 }
1043 1040
1044 /* 1041 /*
1045 * We only care about regions.and objects 1042 * We only care about regions.and objects that are allowed to have address
1046 * that are allowed to have address space handlers 1043 * space handlers
1047 */ 1044 */
1048 if ((node->type != ACPI_TYPE_REGION) && (node != acpi_gbl_root_node)) { 1045 if ((node->type != ACPI_TYPE_REGION) && (node != acpi_gbl_root_node)) {
1049 return (AE_OK); 1046 return (AE_OK);
@@ -1062,10 +1059,9 @@ acpi_ev_reg_run(acpi_handle obj_handle,
1062 /* Object is a Region */ 1059 /* Object is a Region */
1063 1060
1064 if (obj_desc->region.space_id != space_id) { 1061 if (obj_desc->region.space_id != space_id) {
1065 /* 1062
1066 * This region is for a different address space 1063 /* This region is for a different address space, just ignore it */
1067 * -- just ignore it 1064
1068 */
1069 return (AE_OK); 1065 return (AE_OK);
1070 } 1066 }
1071 1067
diff --git a/drivers/acpi/events/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 6b94b38df07d..f3f1fb45c3dc 100644
--- a/drivers/acpi/events/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acevents.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acevents.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_EVENTS 49#define _COMPONENT ACPI_EVENTS
49ACPI_MODULE_NAME("evrgnini") 50ACPI_MODULE_NAME("evrgnini")
@@ -233,9 +234,9 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
233 if (ACPI_FAILURE(status)) { 234 if (ACPI_FAILURE(status)) {
234 if (status == AE_SAME_HANDLER) { 235 if (status == AE_SAME_HANDLER) {
235 /* 236 /*
236 * It is OK if the handler is already installed on the root 237 * It is OK if the handler is already installed on the
237 * bridge. Still need to return a context object for the 238 * root bridge. Still need to return a context object
238 * new PCI_Config operation region, however. 239 * for the new PCI_Config operation region, however.
239 */ 240 */
240 status = AE_OK; 241 status = AE_OK;
241 } else { 242 } else {
@@ -272,8 +273,8 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
272 } 273 }
273 274
274 /* 275 /*
275 * For PCI_Config space access, we need the segment, bus, 276 * For PCI_Config space access, we need the segment, bus, device and
276 * device and function numbers. Acquire them here. 277 * function numbers. Acquire them here.
277 * 278 *
278 * Find the parent device object. (This allows the operation region to be 279 * Find the parent device object. (This allows the operation region to be
279 * within a subscope under the device, such as a control method.) 280 * within a subscope under the device, such as a control method.)
@@ -289,16 +290,16 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
289 } 290 }
290 291
291 /* 292 /*
292 * Get the PCI device and function numbers from the _ADR object 293 * Get the PCI device and function numbers from the _ADR object contained
293 * contained in the parent's scope. 294 * in the parent's scope.
294 */ 295 */
295 status = 296 status =
296 acpi_ut_evaluate_numeric_object(METHOD_NAME__ADR, pci_device_node, 297 acpi_ut_evaluate_numeric_object(METHOD_NAME__ADR, pci_device_node,
297 &pci_value); 298 &pci_value);
298 299
299 /* 300 /*
300 * The default is zero, and since the allocation above zeroed 301 * The default is zero, and since the allocation above zeroed the data,
301 * the data, just do nothing on failure. 302 * just do nothing on failure.
302 */ 303 */
303 if (ACPI_SUCCESS(status)) { 304 if (ACPI_SUCCESS(status)) {
304 pci_id->device = ACPI_HIWORD(ACPI_LODWORD(pci_value)); 305 pci_id->device = ACPI_HIWORD(ACPI_LODWORD(pci_value));
@@ -382,9 +383,8 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
382 struct acpi_compatible_id_list *cid; 383 struct acpi_compatible_id_list *cid;
383 u32 i; 384 u32 i;
384 385
385 /* 386 /* Get the _HID and check for a PCI Root Bridge */
386 * Get the _HID and check for a PCI Root Bridge 387
387 */
388 status = acpi_ut_execute_HID(node, &hid); 388 status = acpi_ut_execute_HID(node, &hid);
389 if (ACPI_FAILURE(status)) { 389 if (ACPI_FAILURE(status)) {
390 return (FALSE); 390 return (FALSE);
@@ -394,10 +394,8 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
394 return (TRUE); 394 return (TRUE);
395 } 395 }
396 396
397 /* 397 /* The _HID did not match. Get the _CID and check for a PCI Root Bridge */
398 * The _HID did not match. 398
399 * Get the _CID and check for a PCI Root Bridge
400 */
401 status = acpi_ut_execute_CID(node, &cid); 399 status = acpi_ut_execute_CID(node, &cid);
402 if (ACPI_FAILURE(status)) { 400 if (ACPI_FAILURE(status)) {
403 return (FALSE); 401 return (FALSE);
@@ -516,9 +514,9 @@ acpi_ev_default_region_setup(acpi_handle handle,
516 * Get the appropriate address space handler for a newly 514 * Get the appropriate address space handler for a newly
517 * created region. 515 * created region.
518 * 516 *
519 * This also performs address space specific initialization. For 517 * This also performs address space specific initialization. For
520 * example, PCI regions must have an _ADR object that contains 518 * example, PCI regions must have an _ADR object that contains
521 * a PCI address in the scope of the definition. This address is 519 * a PCI address in the scope of the definition. This address is
522 * required to perform an access to PCI config space. 520 * required to perform an access to PCI config space.
523 * 521 *
524 * MUTEX: Interpreter should be unlocked, because we may run the _REG 522 * MUTEX: Interpreter should be unlocked, because we may run the _REG
@@ -572,7 +570,7 @@ acpi_ev_initialize_region(union acpi_operand_object *region_obj,
572 if (ACPI_SUCCESS(status)) { 570 if (ACPI_SUCCESS(status)) {
573 /* 571 /*
574 * The _REG method is optional and there can be only one per region 572 * The _REG method is optional and there can be only one per region
575 * definition. This will be executed when the handler is attached 573 * definition. This will be executed when the handler is attached
576 * or removed 574 * or removed
577 */ 575 */
578 region_obj2->extra.method_REG = method_node; 576 region_obj2->extra.method_REG = method_node;
@@ -670,10 +668,8 @@ acpi_ev_initialize_region(union acpi_operand_object *region_obj,
670 } 668 }
671 } 669 }
672 670
673 /* 671 /* This node does not have the handler we need; Pop up one level */
674 * This node does not have the handler we need; 672
675 * Pop up one level
676 */
677 node = acpi_ns_get_parent_node(node); 673 node = acpi_ns_get_parent_node(node);
678 } 674 }
679 675
diff --git a/drivers/acpi/events/evsci.c b/drivers/acpi/acpica/evsci.c
index 2a8b77877610..567b356c85af 100644
--- a/drivers/acpi/events/evsci.c
+++ b/drivers/acpi/acpica/evsci.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acevents.h> 46#include "accommon.h"
47#include "acevents.h"
47 48
48#define _COMPONENT ACPI_EVENTS 49#define _COMPONENT ACPI_EVENTS
49ACPI_MODULE_NAME("evsci") 50ACPI_MODULE_NAME("evsci")
@@ -115,10 +116,8 @@ u32 ACPI_SYSTEM_XFACE acpi_ev_gpe_xrupt_handler(void *context)
115 * if this interrupt handler is installed, ACPI is enabled. 116 * if this interrupt handler is installed, ACPI is enabled.
116 */ 117 */
117 118
118 /* 119 /* GPEs: Check for and dispatch any GPEs that have occurred */
119 * GPEs: 120
120 * Check for and dispatch any GPEs that have occurred
121 */
122 interrupt_handled |= acpi_ev_gpe_detect(gpe_xrupt_list); 121 interrupt_handled |= acpi_ev_gpe_detect(gpe_xrupt_list);
123 122
124 return_UINT32(interrupt_handled); 123 return_UINT32(interrupt_handled);
@@ -158,11 +157,11 @@ u32 acpi_ev_install_sci_handler(void)
158 * RETURN: E_OK if handler uninstalled OK, E_ERROR if handler was not 157 * RETURN: E_OK if handler uninstalled OK, E_ERROR if handler was not
159 * installed to begin with 158 * installed to begin with
160 * 159 *
161 * DESCRIPTION: Remove the SCI interrupt handler. No further SCIs will be 160 * DESCRIPTION: Remove the SCI interrupt handler. No further SCIs will be
162 * taken. 161 * taken.
163 * 162 *
164 * Note: It doesn't seem important to disable all events or set the event 163 * Note: It doesn't seem important to disable all events or set the event
165 * enable registers to their original values. The OS should disable 164 * enable registers to their original values. The OS should disable
166 * the SCI interrupt level when the handler is removed, so no more 165 * the SCI interrupt level when the handler is removed, so no more
167 * events will come in. 166 * events will come in.
168 * 167 *
diff --git a/drivers/acpi/events/evxface.c b/drivers/acpi/acpica/evxface.c
index 94a6efe020be..3aca9010a11e 100644
--- a/drivers/acpi/events/evxface.c
+++ b/drivers/acpi/acpica/evxface.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include <acpi/acevents.h> 46#include "acnamesp.h"
47#include <acpi/acinterp.h> 47#include "acevents.h"
48#include "acinterp.h"
48 49
49#define _COMPONENT ACPI_EVENTS 50#define _COMPONENT ACPI_EVENTS
50ACPI_MODULE_NAME("evxface") 51ACPI_MODULE_NAME("evxface")
@@ -267,7 +268,7 @@ acpi_install_notify_handler(acpi_handle device,
267 /* 268 /*
268 * Root Object: 269 * Root Object:
269 * Registering a notify handler on the root object indicates that the 270 * Registering a notify handler on the root object indicates that the
270 * caller wishes to receive notifications for all objects. Note that 271 * caller wishes to receive notifications for all objects. Note that
271 * only one <external> global handler can be regsitered (per notify type). 272 * only one <external> global handler can be regsitered (per notify type).
272 */ 273 */
273 if (device == ACPI_ROOT_OBJECT) { 274 if (device == ACPI_ROOT_OBJECT) {
diff --git a/drivers/acpi/events/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index 41554f736b68..35485e4b60a6 100644
--- a/drivers/acpi/events/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -42,13 +42,19 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acevents.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acevents.h"
47#include <acpi/actables.h> 47#include "acnamesp.h"
48#include "actables.h"
48 49
49#define _COMPONENT ACPI_EVENTS 50#define _COMPONENT ACPI_EVENTS
50ACPI_MODULE_NAME("evxfevnt") 51ACPI_MODULE_NAME("evxfevnt")
51 52
53/* Local prototypes */
54acpi_status
55acpi_ev_get_gpe_device(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
56 struct acpi_gpe_block_info *gpe_block, void *context);
57
52/******************************************************************************* 58/*******************************************************************************
53 * 59 *
54 * FUNCTION: acpi_enable 60 * FUNCTION: acpi_enable
@@ -60,6 +66,7 @@ ACPI_MODULE_NAME("evxfevnt")
60 * DESCRIPTION: Transfers the system into ACPI mode. 66 * DESCRIPTION: Transfers the system into ACPI mode.
61 * 67 *
62 ******************************************************************************/ 68 ******************************************************************************/
69
63acpi_status acpi_enable(void) 70acpi_status acpi_enable(void)
64{ 71{
65 acpi_status status = AE_OK; 72 acpi_status status = AE_OK;
@@ -161,8 +168,8 @@ acpi_status acpi_enable_event(u32 event, u32 flags)
161 } 168 }
162 169
163 /* 170 /*
164 * Enable the requested fixed event (by writing a one to the 171 * Enable the requested fixed event (by writing a one to the enable
165 * enable register bit) 172 * register bit)
166 */ 173 */
167 status = 174 status =
168 acpi_set_register(acpi_gbl_fixed_event_info[event]. 175 acpi_set_register(acpi_gbl_fixed_event_info[event].
@@ -343,8 +350,8 @@ acpi_status acpi_disable_event(u32 event, u32 flags)
343 } 350 }
344 351
345 /* 352 /*
346 * Disable the requested fixed event (by writing a zero to the 353 * Disable the requested fixed event (by writing a zero to the enable
347 * enable register bit) 354 * register bit)
348 */ 355 */
349 status = 356 status =
350 acpi_set_register(acpi_gbl_fixed_event_info[event]. 357 acpi_set_register(acpi_gbl_fixed_event_info[event].
@@ -396,8 +403,8 @@ acpi_status acpi_clear_event(u32 event)
396 } 403 }
397 404
398 /* 405 /*
399 * Clear the requested fixed event (By writing a one to the 406 * Clear the requested fixed event (By writing a one to the status
400 * status register bit) 407 * register bit)
401 */ 408 */
402 status = 409 status =
403 acpi_set_register(acpi_gbl_fixed_event_info[event]. 410 acpi_set_register(acpi_gbl_fixed_event_info[event].
@@ -717,3 +724,148 @@ acpi_status acpi_remove_gpe_block(acpi_handle gpe_device)
717} 724}
718 725
719ACPI_EXPORT_SYMBOL(acpi_remove_gpe_block) 726ACPI_EXPORT_SYMBOL(acpi_remove_gpe_block)
727
728/*******************************************************************************
729 *
730 * FUNCTION: acpi_get_gpe_device
731 *
732 * PARAMETERS: Index - System GPE index (0-current_gpe_count)
733 * gpe_device - Where the parent GPE Device is returned
734 *
735 * RETURN: Status
736 *
737 * DESCRIPTION: Obtain the GPE device associated with the input index. A NULL
738 * gpe device indicates that the gpe number is contained in one of
739 * the FADT-defined gpe blocks. Otherwise, the GPE block device.
740 *
741 ******************************************************************************/
742acpi_status
743acpi_get_gpe_device(u32 index, acpi_handle *gpe_device)
744{
745 struct acpi_gpe_device_info info;
746 acpi_status status;
747
748 ACPI_FUNCTION_TRACE(acpi_get_gpe_device);
749
750 if (!gpe_device) {
751 return_ACPI_STATUS(AE_BAD_PARAMETER);
752 }
753
754 if (index >= acpi_current_gpe_count) {
755 return_ACPI_STATUS(AE_NOT_EXIST);
756 }
757
758 /* Setup and walk the GPE list */
759
760 info.index = index;
761 info.status = AE_NOT_EXIST;
762 info.gpe_device = NULL;
763 info.next_block_base_index = 0;
764
765 status = acpi_ev_walk_gpe_list(acpi_ev_get_gpe_device, &info);
766 if (ACPI_FAILURE(status)) {
767 return_ACPI_STATUS(status);
768 }
769
770 *gpe_device = info.gpe_device;
771 return_ACPI_STATUS(info.status);
772}
773
774ACPI_EXPORT_SYMBOL(acpi_get_gpe_device)
775
776/*******************************************************************************
777 *
778 * FUNCTION: acpi_ev_get_gpe_device
779 *
780 * PARAMETERS: GPE_WALK_CALLBACK
781 *
782 * RETURN: Status
783 *
784 * DESCRIPTION: Matches the input GPE index (0-current_gpe_count) with a GPE
785 * block device. NULL if the GPE is one of the FADT-defined GPEs.
786 *
787 ******************************************************************************/
788acpi_status
789acpi_ev_get_gpe_device(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
790 struct acpi_gpe_block_info *gpe_block, void *context)
791{
792 struct acpi_gpe_device_info *info = context;
793
794 /* Increment Index by the number of GPEs in this block */
795
796 info->next_block_base_index +=
797 (gpe_block->register_count * ACPI_GPE_REGISTER_WIDTH);
798
799 if (info->index < info->next_block_base_index) {
800 /*
801 * The GPE index is within this block, get the node. Leave the node
802 * NULL for the FADT-defined GPEs
803 */
804 if ((gpe_block->node)->type == ACPI_TYPE_DEVICE) {
805 info->gpe_device = gpe_block->node;
806 }
807
808 info->status = AE_OK;
809 return (AE_CTRL_END);
810 }
811
812 return (AE_OK);
813}
814
815/******************************************************************************
816 *
817 * FUNCTION: acpi_disable_all_gpes
818 *
819 * PARAMETERS: None
820 *
821 * RETURN: Status
822 *
823 * DESCRIPTION: Disable and clear all GPEs in all GPE blocks
824 *
825 ******************************************************************************/
826
827acpi_status acpi_disable_all_gpes(void)
828{
829 acpi_status status;
830
831 ACPI_FUNCTION_TRACE(acpi_disable_all_gpes);
832
833 status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS);
834 if (ACPI_FAILURE(status)) {
835 return_ACPI_STATUS(status);
836 }
837
838 status = acpi_hw_disable_all_gpes();
839 (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
840
841 return_ACPI_STATUS(status);
842}
843
844/******************************************************************************
845 *
846 * FUNCTION: acpi_enable_all_runtime_gpes
847 *
848 * PARAMETERS: None
849 *
850 * RETURN: Status
851 *
852 * DESCRIPTION: Enable all "runtime" GPEs, in all GPE blocks
853 *
854 ******************************************************************************/
855
856acpi_status acpi_enable_all_runtime_gpes(void)
857{
858 acpi_status status;
859
860 ACPI_FUNCTION_TRACE(acpi_enable_all_runtime_gpes);
861
862 status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS);
863 if (ACPI_FAILURE(status)) {
864 return_ACPI_STATUS(status);
865 }
866
867 status = acpi_hw_enable_all_runtime_gpes();
868 (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
869
870 return_ACPI_STATUS(status);
871}
diff --git a/drivers/acpi/events/evxfregn.c b/drivers/acpi/acpica/evxfregn.c
index e8750807e57d..479e7a3721be 100644
--- a/drivers/acpi/events/evxfregn.c
+++ b/drivers/acpi/acpica/evxfregn.c
@@ -43,8 +43,9 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acnamesp.h> 46#include "accommon.h"
47#include <acpi/acevents.h> 47#include "acnamesp.h"
48#include "acevents.h"
48 49
49#define _COMPONENT ACPI_EVENTS 50#define _COMPONENT ACPI_EVENTS
50ACPI_MODULE_NAME("evxfregn") 51ACPI_MODULE_NAME("evxfregn")
diff --git a/drivers/acpi/executer/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 74da6fa52ef1..932bbc26aa04 100644
--- a/drivers/acpi/executer/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -42,10 +42,11 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acinterp.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acinterp.h"
47#include <acpi/actables.h> 47#include "acnamesp.h"
48#include <acpi/acdispat.h> 48#include "actables.h"
49#include "acdispat.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exconfig") 52ACPI_MODULE_NAME("exconfig")
diff --git a/drivers/acpi/executer/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index 1d1f35adddde..0be10188316e 100644
--- a/drivers/acpi/executer/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acinterp.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acinterp.h"
47#include "amlcode.h"
47 48
48#define _COMPONENT ACPI_EXECUTER 49#define _COMPONENT ACPI_EXECUTER
49ACPI_MODULE_NAME("exconvrt") 50ACPI_MODULE_NAME("exconvrt")
diff --git a/drivers/acpi/executer/excreate.c b/drivers/acpi/acpica/excreate.c
index ad09696d5069..a57ad2564ab0 100644
--- a/drivers/acpi/executer/excreate.c
+++ b/drivers/acpi/acpica/excreate.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acinterp.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acinterp.h"
47#include <acpi/acnamesp.h> 47#include "amlcode.h"
48#include "acnamesp.h"
48 49
49#define _COMPONENT ACPI_EXECUTER 50#define _COMPONENT ACPI_EXECUTER
50ACPI_MODULE_NAME("excreate") 51ACPI_MODULE_NAME("excreate")
diff --git a/drivers/acpi/executer/exdump.c b/drivers/acpi/acpica/exdump.c
index d087a7d28aa5..aa313574b0df 100644
--- a/drivers/acpi/executer/exdump.c
+++ b/drivers/acpi/acpica/exdump.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acinterp.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acinterp.h"
47#include <acpi/acnamesp.h> 47#include "amlcode.h"
48#include "acnamesp.h"
48 49
49#define _COMPONENT ACPI_EXECUTER 50#define _COMPONENT ACPI_EXECUTER
50ACPI_MODULE_NAME("exdump") 51ACPI_MODULE_NAME("exdump")
diff --git a/drivers/acpi/executer/exfield.c b/drivers/acpi/acpica/exfield.c
index 3e440d84226a..a352d0233857 100644
--- a/drivers/acpi/executer/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acdispat.h> 45#include "accommon.h"
46#include <acpi/acinterp.h> 46#include "acdispat.h"
47#include "acinterp.h"
47 48
48#define _COMPONENT ACPI_EXECUTER 49#define _COMPONENT ACPI_EXECUTER
49ACPI_MODULE_NAME("exfield") 50ACPI_MODULE_NAME("exfield")
diff --git a/drivers/acpi/executer/exfldio.c b/drivers/acpi/acpica/exfldio.c
index 9ff9d1f4615d..ef58ac4e687b 100644
--- a/drivers/acpi/executer/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -42,10 +42,11 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acinterp.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acinterp.h"
47#include <acpi/acevents.h> 47#include "amlcode.h"
48#include <acpi/acdispat.h> 48#include "acevents.h"
49#include "acdispat.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exfldio") 52ACPI_MODULE_NAME("exfldio")
@@ -498,14 +499,13 @@ acpi_ex_field_datum_io(union acpi_operand_object *obj_desc,
498 return_ACPI_STATUS(status); 499 return_ACPI_STATUS(status);
499 } 500 }
500 501
501 ACPI_DEBUG_PRINT((ACPI_DB_BFIELD,
502 "I/O to Data Register: ValuePtr %p\n",
503 value));
504
505 if (read_write == ACPI_READ) { 502 if (read_write == ACPI_READ) {
506 503
507 /* Read the datum from the data_register */ 504 /* Read the datum from the data_register */
508 505
506 ACPI_DEBUG_PRINT((ACPI_DB_BFIELD,
507 "Read from Data Register\n"));
508
509 status = 509 status =
510 acpi_ex_extract_from_field(obj_desc->index_field. 510 acpi_ex_extract_from_field(obj_desc->index_field.
511 data_obj, value, 511 data_obj, value,
@@ -513,6 +513,10 @@ acpi_ex_field_datum_io(union acpi_operand_object *obj_desc,
513 } else { 513 } else {
514 /* Write the datum to the data_register */ 514 /* Write the datum to the data_register */
515 515
516 ACPI_DEBUG_PRINT((ACPI_DB_BFIELD,
517 "Write to Data Register: Value %8.8X%8.8X\n",
518 ACPI_FORMAT_UINT64(*value)));
519
516 status = 520 status =
517 acpi_ex_insert_into_field(obj_desc->index_field. 521 acpi_ex_insert_into_field(obj_desc->index_field.
518 data_obj, value, 522 data_obj, value,
diff --git a/drivers/acpi/executer/exmisc.c b/drivers/acpi/acpica/exmisc.c
index efb191340059..6b0747ac683b 100644
--- a/drivers/acpi/executer/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -43,9 +43,10 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include <acpi/amlcode.h> 47#include "acinterp.h"
48#include <acpi/amlresrc.h> 48#include "amlcode.h"
49#include "amlresrc.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exmisc") 52ACPI_MODULE_NAME("exmisc")
diff --git a/drivers/acpi/executer/exmutex.c b/drivers/acpi/acpica/exmutex.c
index a8bf3d713e28..d301c1f363ef 100644
--- a/drivers/acpi/executer/exmutex.c
+++ b/drivers/acpi/acpica/exmutex.c
@@ -43,8 +43,9 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include <acpi/acevents.h> 47#include "acinterp.h"
48#include "acevents.h"
48 49
49#define _COMPONENT ACPI_EXECUTER 50#define _COMPONENT ACPI_EXECUTER
50ACPI_MODULE_NAME("exmutex") 51ACPI_MODULE_NAME("exmutex")
diff --git a/drivers/acpi/executer/exnames.c b/drivers/acpi/acpica/exnames.c
index 817e67be3697..ffdae122d94a 100644
--- a/drivers/acpi/executer/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -43,8 +43,9 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include <acpi/amlcode.h> 47#include "acinterp.h"
48#include "amlcode.h"
48 49
49#define _COMPONENT ACPI_EXECUTER 50#define _COMPONENT ACPI_EXECUTER
50ACPI_MODULE_NAME("exnames") 51ACPI_MODULE_NAME("exnames")
diff --git a/drivers/acpi/executer/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index f622f9eac8a1..b530480cc7d5 100644
--- a/drivers/acpi/executer/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -43,11 +43,12 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acparser.h> 46#include "accommon.h"
47#include <acpi/acdispat.h> 47#include "acparser.h"
48#include <acpi/acinterp.h> 48#include "acdispat.h"
49#include <acpi/amlcode.h> 49#include "acinterp.h"
50#include <acpi/acnamesp.h> 50#include "amlcode.h"
51#include "acnamesp.h"
51 52
52#define _COMPONENT ACPI_EXECUTER 53#define _COMPONENT ACPI_EXECUTER
53ACPI_MODULE_NAME("exoparg1") 54ACPI_MODULE_NAME("exoparg1")
diff --git a/drivers/acpi/executer/exoparg2.c b/drivers/acpi/acpica/exoparg2.c
index 368def5dffce..0b4f513ca885 100644
--- a/drivers/acpi/executer/exoparg2.c
+++ b/drivers/acpi/acpica/exoparg2.c
@@ -42,10 +42,11 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/acinterp.h> 46#include "acparser.h"
47#include <acpi/acevents.h> 47#include "acinterp.h"
48#include <acpi/amlcode.h> 48#include "acevents.h"
49#include "amlcode.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exoparg2") 52ACPI_MODULE_NAME("exoparg2")
diff --git a/drivers/acpi/executer/exoparg3.c b/drivers/acpi/acpica/exoparg3.c
index 9cb4197681af..c6520bbf882b 100644
--- a/drivers/acpi/executer/exoparg3.c
+++ b/drivers/acpi/acpica/exoparg3.c
@@ -43,9 +43,10 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include <acpi/acparser.h> 47#include "acinterp.h"
48#include <acpi/amlcode.h> 48#include "acparser.h"
49#include "amlcode.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exoparg3") 52ACPI_MODULE_NAME("exoparg3")
diff --git a/drivers/acpi/executer/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index 67d48737af53..ae43f7670a6c 100644
--- a/drivers/acpi/executer/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c
@@ -43,9 +43,10 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include <acpi/acparser.h> 47#include "acinterp.h"
48#include <acpi/amlcode.h> 48#include "acparser.h"
49#include "amlcode.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exoparg6") 52ACPI_MODULE_NAME("exoparg6")
diff --git a/drivers/acpi/executer/exprep.c b/drivers/acpi/acpica/exprep.c
index a7dc87ecee37..a226f74d4a5c 100644
--- a/drivers/acpi/executer/exprep.c
+++ b/drivers/acpi/acpica/exprep.c
@@ -43,9 +43,10 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include <acpi/amlcode.h> 47#include "acinterp.h"
48#include <acpi/acnamesp.h> 48#include "amlcode.h"
49#include "acnamesp.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exprep") 52ACPI_MODULE_NAME("exprep")
diff --git a/drivers/acpi/executer/exregion.c b/drivers/acpi/acpica/exregion.c
index 7a41c409ae4d..76ec8ff903b8 100644
--- a/drivers/acpi/executer/exregion.c
+++ b/drivers/acpi/acpica/exregion.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include "acinterp.h"
47 48
48#define _COMPONENT ACPI_EXECUTER 49#define _COMPONENT ACPI_EXECUTER
49ACPI_MODULE_NAME("exregion") 50ACPI_MODULE_NAME("exregion")
diff --git a/drivers/acpi/executer/exresnte.c b/drivers/acpi/acpica/exresnte.c
index 423ad3635f3d..a063a74006f6 100644
--- a/drivers/acpi/executer/exresnte.c
+++ b/drivers/acpi/acpica/exresnte.c
@@ -43,9 +43,10 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acdispat.h> 46#include "accommon.h"
47#include <acpi/acinterp.h> 47#include "acdispat.h"
48#include <acpi/acnamesp.h> 48#include "acinterp.h"
49#include "acnamesp.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exresnte") 52ACPI_MODULE_NAME("exresnte")
diff --git a/drivers/acpi/executer/exresolv.c b/drivers/acpi/acpica/exresolv.c
index 60e8c47128e9..f6105a6d6126 100644
--- a/drivers/acpi/executer/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c
@@ -43,10 +43,11 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/amlcode.h> 46#include "accommon.h"
47#include <acpi/acdispat.h> 47#include "amlcode.h"
48#include <acpi/acinterp.h> 48#include "acdispat.h"
49#include <acpi/acnamesp.h> 49#include "acinterp.h"
50#include "acnamesp.h"
50 51
51#define _COMPONENT ACPI_EXECUTER 52#define _COMPONENT ACPI_EXECUTER
52ACPI_MODULE_NAME("exresolv") 53ACPI_MODULE_NAME("exresolv")
diff --git a/drivers/acpi/executer/exresop.c b/drivers/acpi/acpica/exresop.c
index 0bb82593da72..3c3802764bfb 100644
--- a/drivers/acpi/executer/exresop.c
+++ b/drivers/acpi/acpica/exresop.c
@@ -43,10 +43,11 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/amlcode.h> 46#include "accommon.h"
47#include <acpi/acparser.h> 47#include "amlcode.h"
48#include <acpi/acinterp.h> 48#include "acparser.h"
49#include <acpi/acnamesp.h> 49#include "acinterp.h"
50#include "acnamesp.h"
50 51
51#define _COMPONENT ACPI_EXECUTER 52#define _COMPONENT ACPI_EXECUTER
52ACPI_MODULE_NAME("exresop") 53ACPI_MODULE_NAME("exresop")
diff --git a/drivers/acpi/executer/exstore.c b/drivers/acpi/acpica/exstore.c
index 1c118ba78adb..e35e9b4f6a4e 100644
--- a/drivers/acpi/executer/exstore.c
+++ b/drivers/acpi/acpica/exstore.c
@@ -43,10 +43,11 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acdispat.h> 46#include "accommon.h"
47#include <acpi/acinterp.h> 47#include "acdispat.h"
48#include <acpi/amlcode.h> 48#include "acinterp.h"
49#include <acpi/acnamesp.h> 49#include "amlcode.h"
50#include "acnamesp.h"
50 51
51#define _COMPONENT ACPI_EXECUTER 52#define _COMPONENT ACPI_EXECUTER
52ACPI_MODULE_NAME("exstore") 53ACPI_MODULE_NAME("exstore")
diff --git a/drivers/acpi/executer/exstoren.c b/drivers/acpi/acpica/exstoren.c
index eef61a00803e..145d15305f70 100644
--- a/drivers/acpi/executer/exstoren.c
+++ b/drivers/acpi/acpica/exstoren.c
@@ -44,8 +44,9 @@
44 */ 44 */
45 45
46#include <acpi/acpi.h> 46#include <acpi/acpi.h>
47#include <acpi/acinterp.h> 47#include "accommon.h"
48#include <acpi/amlcode.h> 48#include "acinterp.h"
49#include "amlcode.h"
49 50
50#define _COMPONENT ACPI_EXECUTER 51#define _COMPONENT ACPI_EXECUTER
51ACPI_MODULE_NAME("exstoren") 52ACPI_MODULE_NAME("exstoren")
diff --git a/drivers/acpi/executer/exstorob.c b/drivers/acpi/acpica/exstorob.c
index 9a75ff09fb0c..67340cc70142 100644
--- a/drivers/acpi/executer/exstorob.c
+++ b/drivers/acpi/acpica/exstorob.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include "acinterp.h"
47 48
48#define _COMPONENT ACPI_EXECUTER 49#define _COMPONENT ACPI_EXECUTER
49ACPI_MODULE_NAME("exstorob") 50ACPI_MODULE_NAME("exstorob")
diff --git a/drivers/acpi/executer/exsystem.c b/drivers/acpi/acpica/exsystem.c
index 68990f1df371..3d00b9357233 100644
--- a/drivers/acpi/executer/exsystem.c
+++ b/drivers/acpi/acpica/exsystem.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acinterp.h> 46#include "accommon.h"
47#include "acinterp.h"
47 48
48#define _COMPONENT ACPI_EXECUTER 49#define _COMPONENT ACPI_EXECUTER
49ACPI_MODULE_NAME("exsystem") 50ACPI_MODULE_NAME("exsystem")
diff --git a/drivers/acpi/executer/exutils.c b/drivers/acpi/acpica/exutils.c
index 86c03880b523..32b85d68e756 100644
--- a/drivers/acpi/executer/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -59,8 +59,9 @@
59#define DEFINE_AML_GLOBALS 59#define DEFINE_AML_GLOBALS
60 60
61#include <acpi/acpi.h> 61#include <acpi/acpi.h>
62#include <acpi/acinterp.h> 62#include "accommon.h"
63#include <acpi/amlcode.h> 63#include "acinterp.h"
64#include "amlcode.h"
64 65
65#define _COMPONENT ACPI_EXECUTER 66#define _COMPONENT ACPI_EXECUTER
66ACPI_MODULE_NAME("exutils") 67ACPI_MODULE_NAME("exutils")
diff --git a/drivers/acpi/hardware/hwacpi.c b/drivers/acpi/acpica/hwacpi.c
index 816894ea839e..a9d4fea4167f 100644
--- a/drivers/acpi/hardware/hwacpi.c
+++ b/drivers/acpi/acpica/hwacpi.c
@@ -43,6 +43,7 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include "accommon.h"
46 47
47#define _COMPONENT ACPI_HARDWARE 48#define _COMPONENT ACPI_HARDWARE
48ACPI_MODULE_NAME("hwacpi") 49ACPI_MODULE_NAME("hwacpi")
diff --git a/drivers/acpi/hardware/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index 0b80db9d9197..2013b66745d2 100644
--- a/drivers/acpi/hardware/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acevents.h> 46#include "accommon.h"
47#include "acevents.h"
47 48
48#define _COMPONENT ACPI_HARDWARE 49#define _COMPONENT ACPI_HARDWARE
49ACPI_MODULE_NAME("hwgpe") 50ACPI_MODULE_NAME("hwgpe")
@@ -51,7 +52,8 @@ ACPI_MODULE_NAME("hwgpe")
51/* Local prototypes */ 52/* Local prototypes */
52static acpi_status 53static acpi_status
53acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, 54acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
54 struct acpi_gpe_block_info *gpe_block); 55 struct acpi_gpe_block_info *gpe_block,
56 void *context);
55 57
56/****************************************************************************** 58/******************************************************************************
57 * 59 *
@@ -80,8 +82,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
80 82
81 /* Get current value of the enable register that contains this GPE */ 83 /* Get current value of the enable register that contains this GPE */
82 84
83 status = acpi_hw_low_level_read(ACPI_GPE_REGISTER_WIDTH, &enable_mask, 85 status = acpi_read(&enable_mask, &gpe_register_info->enable_address);
84 &gpe_register_info->enable_address);
85 if (ACPI_FAILURE(status)) { 86 if (ACPI_FAILURE(status)) {
86 return (status); 87 return (status);
87 } 88 }
@@ -95,9 +96,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
95 96
96 /* Write the updated enable mask */ 97 /* Write the updated enable mask */
97 98
98 status = acpi_hw_low_level_write(ACPI_GPE_REGISTER_WIDTH, enable_mask, 99 status = acpi_write(enable_mask, &gpe_register_info->enable_address);
99 &gpe_register_info->enable_address);
100
101 return (status); 100 return (status);
102} 101}
103 102
@@ -132,8 +131,8 @@ acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info)
132 131
133 /* Write the entire GPE (runtime) enable register */ 132 /* Write the entire GPE (runtime) enable register */
134 133
135 status = acpi_hw_low_level_write(8, gpe_register_info->enable_for_run, 134 status = acpi_write(gpe_register_info->enable_for_run,
136 &gpe_register_info->enable_address); 135 &gpe_register_info->enable_address);
137 136
138 return (status); 137 return (status);
139} 138}
@@ -166,9 +165,8 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info)
166 * Write a one to the appropriate bit in the status register to 165 * Write a one to the appropriate bit in the status register to
167 * clear this GPE. 166 * clear this GPE.
168 */ 167 */
169 status = acpi_hw_low_level_write(8, register_bit, 168 status = acpi_write(register_bit,
170 &gpe_event_info->register_info-> 169 &gpe_event_info->register_info->status_address);
171 status_address);
172 170
173 return (status); 171 return (status);
174} 172}
@@ -227,9 +225,7 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
227 225
228 /* GPE currently active (status bit == 1)? */ 226 /* GPE currently active (status bit == 1)? */
229 227
230 status = 228 status = acpi_read(&in_byte, &gpe_register_info->status_address);
231 acpi_hw_low_level_read(8, &in_byte,
232 &gpe_register_info->status_address);
233 if (ACPI_FAILURE(status)) { 229 if (ACPI_FAILURE(status)) {
234 goto unlock_and_exit; 230 goto unlock_and_exit;
235 } 231 }
@@ -260,8 +256,8 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
260 ******************************************************************************/ 256 ******************************************************************************/
261 257
262acpi_status 258acpi_status
263acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info, 259acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
264 struct acpi_gpe_block_info * gpe_block) 260 struct acpi_gpe_block_info *gpe_block, void *context)
265{ 261{
266 u32 i; 262 u32 i;
267 acpi_status status; 263 acpi_status status;
@@ -272,9 +268,9 @@ acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info,
272 268
273 /* Disable all GPEs in this register */ 269 /* Disable all GPEs in this register */
274 270
275 status = acpi_hw_low_level_write(8, 0x00, 271 status =
276 &gpe_block->register_info[i]. 272 acpi_write(0x00,
277 enable_address); 273 &gpe_block->register_info[i].enable_address);
278 if (ACPI_FAILURE(status)) { 274 if (ACPI_FAILURE(status)) {
279 return (status); 275 return (status);
280 } 276 }
@@ -297,8 +293,8 @@ acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info,
297 ******************************************************************************/ 293 ******************************************************************************/
298 294
299acpi_status 295acpi_status
300acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info, 296acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
301 struct acpi_gpe_block_info * gpe_block) 297 struct acpi_gpe_block_info *gpe_block, void *context)
302{ 298{
303 u32 i; 299 u32 i;
304 acpi_status status; 300 acpi_status status;
@@ -309,9 +305,9 @@ acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info,
309 305
310 /* Clear status on all GPEs in this register */ 306 /* Clear status on all GPEs in this register */
311 307
312 status = acpi_hw_low_level_write(8, 0xFF, 308 status =
313 &gpe_block->register_info[i]. 309 acpi_write(0xFF,
314 status_address); 310 &gpe_block->register_info[i].status_address);
315 if (ACPI_FAILURE(status)) { 311 if (ACPI_FAILURE(status)) {
316 return (status); 312 return (status);
317 } 313 }
@@ -335,8 +331,8 @@ acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info,
335 ******************************************************************************/ 331 ******************************************************************************/
336 332
337acpi_status 333acpi_status
338acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info, 334acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
339 struct acpi_gpe_block_info * gpe_block) 335 struct acpi_gpe_block_info *gpe_block, void *context)
340{ 336{
341 u32 i; 337 u32 i;
342 acpi_status status; 338 acpi_status status;
@@ -352,12 +348,9 @@ acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info,
352 348
353 /* Enable all "runtime" GPEs in this register */ 349 /* Enable all "runtime" GPEs in this register */
354 350
355 status = 351 status = acpi_write(gpe_block->register_info[i].enable_for_run,
356 acpi_hw_low_level_write(8, 352 &gpe_block->register_info[i].
357 gpe_block->register_info[i]. 353 enable_address);
358 enable_for_run,
359 &gpe_block->register_info[i].
360 enable_address);
361 if (ACPI_FAILURE(status)) { 354 if (ACPI_FAILURE(status)) {
362 return (status); 355 return (status);
363 } 356 }
@@ -382,7 +375,8 @@ acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info * gpe_xrupt_info,
382 375
383static acpi_status 376static acpi_status
384acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, 377acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
385 struct acpi_gpe_block_info *gpe_block) 378 struct acpi_gpe_block_info *gpe_block,
379 void *context)
386{ 380{
387 u32 i; 381 u32 i;
388 acpi_status status; 382 acpi_status status;
@@ -396,11 +390,9 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
396 390
397 /* Enable all "wake" GPEs in this register */ 391 /* Enable all "wake" GPEs in this register */
398 392
399 status = acpi_hw_low_level_write(8, 393 status = acpi_write(gpe_block->register_info[i].enable_for_wake,
400 gpe_block->register_info[i]. 394 &gpe_block->register_info[i].
401 enable_for_wake, 395 enable_address);
402 &gpe_block->register_info[i].
403 enable_address);
404 if (ACPI_FAILURE(status)) { 396 if (ACPI_FAILURE(status)) {
405 return (status); 397 return (status);
406 } 398 }
@@ -427,8 +419,8 @@ acpi_status acpi_hw_disable_all_gpes(void)
427 419
428 ACPI_FUNCTION_TRACE(hw_disable_all_gpes); 420 ACPI_FUNCTION_TRACE(hw_disable_all_gpes);
429 421
430 status = acpi_ev_walk_gpe_list(acpi_hw_disable_gpe_block); 422 status = acpi_ev_walk_gpe_list(acpi_hw_disable_gpe_block, NULL);
431 status = acpi_ev_walk_gpe_list(acpi_hw_clear_gpe_block); 423 status = acpi_ev_walk_gpe_list(acpi_hw_clear_gpe_block, NULL);
432 return_ACPI_STATUS(status); 424 return_ACPI_STATUS(status);
433} 425}
434 426
@@ -450,7 +442,7 @@ acpi_status acpi_hw_enable_all_runtime_gpes(void)
450 442
451 ACPI_FUNCTION_TRACE(hw_enable_all_runtime_gpes); 443 ACPI_FUNCTION_TRACE(hw_enable_all_runtime_gpes);
452 444
453 status = acpi_ev_walk_gpe_list(acpi_hw_enable_runtime_gpe_block); 445 status = acpi_ev_walk_gpe_list(acpi_hw_enable_runtime_gpe_block, NULL);
454 return_ACPI_STATUS(status); 446 return_ACPI_STATUS(status);
455} 447}
456 448
@@ -472,6 +464,6 @@ acpi_status acpi_hw_enable_all_wakeup_gpes(void)
472 464
473 ACPI_FUNCTION_TRACE(hw_enable_all_wakeup_gpes); 465 ACPI_FUNCTION_TRACE(hw_enable_all_wakeup_gpes);
474 466
475 status = acpi_ev_walk_gpe_list(acpi_hw_enable_wakeup_gpe_block); 467 status = acpi_ev_walk_gpe_list(acpi_hw_enable_wakeup_gpe_block, NULL);
476 return_ACPI_STATUS(status); 468 return_ACPI_STATUS(status);
477} 469}
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
new file mode 100644
index 000000000000..4dc43b018517
--- /dev/null
+++ b/drivers/acpi/acpica/hwregs.c
@@ -0,0 +1,353 @@
1
2/*******************************************************************************
3 *
4 * Module Name: hwregs - Read/write access functions for the various ACPI
5 * control and status registers.
6 *
7 ******************************************************************************/
8
9/*
10 * Copyright (C) 2000 - 2008, Intel Corp.
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions, and the following disclaimer,
18 * without modification.
19 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
20 * substantially similar to the "NO WARRANTY" disclaimer below
21 * ("Disclaimer") and any redistribution must be conditioned upon
22 * including a substantially similar Disclaimer requirement for further
23 * binary redistribution.
24 * 3. Neither the names of the above-listed copyright holders nor the names
25 * of any contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * Alternatively, this software may be distributed under the terms of the
29 * GNU General Public License ("GPL") version 2 as published by the Free
30 * Software Foundation.
31 *
32 * NO WARRANTY
33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
36 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
37 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
42 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 * POSSIBILITY OF SUCH DAMAGES.
44 */
45
46#include <acpi/acpi.h>
47#include "accommon.h"
48#include "acnamesp.h"
49#include "acevents.h"
50
51#define _COMPONENT ACPI_HARDWARE
52ACPI_MODULE_NAME("hwregs")
53
54/*******************************************************************************
55 *
56 * FUNCTION: acpi_hw_clear_acpi_status
57 *
58 * PARAMETERS: None
59 *
60 * RETURN: Status
61 *
62 * DESCRIPTION: Clears all fixed and general purpose status bits
63 * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
64 *
65 ******************************************************************************/
66acpi_status acpi_hw_clear_acpi_status(void)
67{
68 acpi_status status;
69 acpi_cpu_flags lock_flags = 0;
70
71 ACPI_FUNCTION_TRACE(hw_clear_acpi_status);
72
73 ACPI_DEBUG_PRINT((ACPI_DB_IO, "About to write %04X to %04X\n",
74 ACPI_BITMASK_ALL_FIXED_STATUS,
75 (u16) acpi_gbl_FADT.xpm1a_event_block.address));
76
77 lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
78
79 status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
80 ACPI_BITMASK_ALL_FIXED_STATUS);
81 if (ACPI_FAILURE(status)) {
82 goto unlock_and_exit;
83 }
84
85 /* Clear the fixed events */
86
87 if (acpi_gbl_FADT.xpm1b_event_block.address) {
88 status = acpi_write(ACPI_BITMASK_ALL_FIXED_STATUS,
89 &acpi_gbl_FADT.xpm1b_event_block);
90 if (ACPI_FAILURE(status)) {
91 goto unlock_and_exit;
92 }
93 }
94
95 /* Clear the GPE Bits in all GPE registers in all GPE blocks */
96
97 status = acpi_ev_walk_gpe_list(acpi_hw_clear_gpe_block, NULL);
98
99 unlock_and_exit:
100 acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
101 return_ACPI_STATUS(status);
102}
103
104/*******************************************************************************
105 *
106 * FUNCTION: acpi_hw_get_register_bit_mask
107 *
108 * PARAMETERS: register_id - Index of ACPI Register to access
109 *
110 * RETURN: The bitmask to be used when accessing the register
111 *
112 * DESCRIPTION: Map register_id into a register bitmask.
113 *
114 ******************************************************************************/
115
116struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id)
117{
118 ACPI_FUNCTION_ENTRY();
119
120 if (register_id > ACPI_BITREG_MAX) {
121 ACPI_ERROR((AE_INFO, "Invalid BitRegister ID: %X",
122 register_id));
123 return (NULL);
124 }
125
126 return (&acpi_gbl_bit_register_info[register_id]);
127}
128
129/******************************************************************************
130 *
131 * FUNCTION: acpi_hw_register_read
132 *
133 * PARAMETERS: register_id - ACPI Register ID
134 * return_value - Where the register value is returned
135 *
136 * RETURN: Status and the value read.
137 *
138 * DESCRIPTION: Read from the specified ACPI register
139 *
140 ******************************************************************************/
141acpi_status
142acpi_hw_register_read(u32 register_id, u32 * return_value)
143{
144 u32 value1 = 0;
145 u32 value2 = 0;
146 acpi_status status;
147
148 ACPI_FUNCTION_TRACE(hw_register_read);
149
150 switch (register_id) {
151 case ACPI_REGISTER_PM1_STATUS: /* 16-bit access */
152
153 status = acpi_read(&value1, &acpi_gbl_FADT.xpm1a_event_block);
154 if (ACPI_FAILURE(status)) {
155 goto exit;
156 }
157
158 /* PM1B is optional */
159
160 status = acpi_read(&value2, &acpi_gbl_FADT.xpm1b_event_block);
161 value1 |= value2;
162 break;
163
164 case ACPI_REGISTER_PM1_ENABLE: /* 16-bit access */
165
166 status = acpi_read(&value1, &acpi_gbl_xpm1a_enable);
167 if (ACPI_FAILURE(status)) {
168 goto exit;
169 }
170
171 /* PM1B is optional */
172
173 status = acpi_read(&value2, &acpi_gbl_xpm1b_enable);
174 value1 |= value2;
175 break;
176
177 case ACPI_REGISTER_PM1_CONTROL: /* 16-bit access */
178
179 status = acpi_read(&value1, &acpi_gbl_FADT.xpm1a_control_block);
180 if (ACPI_FAILURE(status)) {
181 goto exit;
182 }
183
184 status = acpi_read(&value2, &acpi_gbl_FADT.xpm1b_control_block);
185 value1 |= value2;
186 break;
187
188 case ACPI_REGISTER_PM2_CONTROL: /* 8-bit access */
189
190 status = acpi_read(&value1, &acpi_gbl_FADT.xpm2_control_block);
191 break;
192
193 case ACPI_REGISTER_PM_TIMER: /* 32-bit access */
194
195 status = acpi_read(&value1, &acpi_gbl_FADT.xpm_timer_block);
196 break;
197
198 case ACPI_REGISTER_SMI_COMMAND_BLOCK: /* 8-bit access */
199
200 status =
201 acpi_os_read_port(acpi_gbl_FADT.smi_command, &value1, 8);
202 break;
203
204 default:
205 ACPI_ERROR((AE_INFO, "Unknown Register ID: %X", register_id));
206 status = AE_BAD_PARAMETER;
207 break;
208 }
209
210 exit:
211
212 if (ACPI_SUCCESS(status)) {
213 *return_value = value1;
214 }
215
216 return_ACPI_STATUS(status);
217}
218
219/******************************************************************************
220 *
221 * FUNCTION: acpi_hw_register_write
222 *
223 * PARAMETERS: register_id - ACPI Register ID
224 * Value - The value to write
225 *
226 * RETURN: Status
227 *
228 * DESCRIPTION: Write to the specified ACPI register
229 *
230 * NOTE: In accordance with the ACPI specification, this function automatically
231 * preserves the value of the following bits, meaning that these bits cannot be
232 * changed via this interface:
233 *
234 * PM1_CONTROL[0] = SCI_EN
235 * PM1_CONTROL[9]
236 * PM1_STATUS[11]
237 *
238 * ACPI References:
239 * 1) Hardware Ignored Bits: When software writes to a register with ignored
240 * bit fields, it preserves the ignored bit fields
241 * 2) SCI_EN: OSPM always preserves this bit position
242 *
243 ******************************************************************************/
244
245acpi_status acpi_hw_register_write(u32 register_id, u32 value)
246{
247 acpi_status status;
248 u32 read_value;
249
250 ACPI_FUNCTION_TRACE(hw_register_write);
251
252 switch (register_id) {
253 case ACPI_REGISTER_PM1_STATUS: /* 16-bit access */
254
255 /* Perform a read first to preserve certain bits (per ACPI spec) */
256
257 status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS,
258 &read_value);
259 if (ACPI_FAILURE(status)) {
260 goto exit;
261 }
262
263 /* Insert the bits to be preserved */
264
265 ACPI_INSERT_BITS(value, ACPI_PM1_STATUS_PRESERVED_BITS,
266 read_value);
267
268 /* Now we can write the data */
269
270 status = acpi_write(value, &acpi_gbl_FADT.xpm1a_event_block);
271 if (ACPI_FAILURE(status)) {
272 goto exit;
273 }
274
275 /* PM1B is optional */
276
277 status = acpi_write(value, &acpi_gbl_FADT.xpm1b_event_block);
278 break;
279
280 case ACPI_REGISTER_PM1_ENABLE: /* 16-bit access */
281
282 status = acpi_write(value, &acpi_gbl_xpm1a_enable);
283 if (ACPI_FAILURE(status)) {
284 goto exit;
285 }
286
287 /* PM1B is optional */
288
289 status = acpi_write(value, &acpi_gbl_xpm1b_enable);
290 break;
291
292 case ACPI_REGISTER_PM1_CONTROL: /* 16-bit access */
293
294 /*
295 * Perform a read first to preserve certain bits (per ACPI spec)
296 */
297 status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL,
298 &read_value);
299 if (ACPI_FAILURE(status)) {
300 goto exit;
301 }
302
303 /* Insert the bits to be preserved */
304
305 ACPI_INSERT_BITS(value, ACPI_PM1_CONTROL_PRESERVED_BITS,
306 read_value);
307
308 /* Now we can write the data */
309
310 status = acpi_write(value, &acpi_gbl_FADT.xpm1a_control_block);
311 if (ACPI_FAILURE(status)) {
312 goto exit;
313 }
314
315 status = acpi_write(value, &acpi_gbl_FADT.xpm1b_control_block);
316 break;
317
318 case ACPI_REGISTER_PM1A_CONTROL: /* 16-bit access */
319
320 status = acpi_write(value, &acpi_gbl_FADT.xpm1a_control_block);
321 break;
322
323 case ACPI_REGISTER_PM1B_CONTROL: /* 16-bit access */
324
325 status = acpi_write(value, &acpi_gbl_FADT.xpm1b_control_block);
326 break;
327
328 case ACPI_REGISTER_PM2_CONTROL: /* 8-bit access */
329
330 status = acpi_write(value, &acpi_gbl_FADT.xpm2_control_block);
331 break;
332
333 case ACPI_REGISTER_PM_TIMER: /* 32-bit access */
334
335 status = acpi_write(value, &acpi_gbl_FADT.xpm_timer_block);
336 break;
337
338 case ACPI_REGISTER_SMI_COMMAND_BLOCK: /* 8-bit access */
339
340 /* SMI_CMD is currently always in IO space */
341
342 status =
343 acpi_os_write_port(acpi_gbl_FADT.smi_command, value, 8);
344 break;
345
346 default:
347 status = AE_BAD_PARAMETER;
348 break;
349 }
350
351 exit:
352 return_ACPI_STATUS(status);
353}
diff --git a/drivers/acpi/hardware/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index 25dccdf179b9..a2af2a4f2f26 100644
--- a/drivers/acpi/hardware/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/actables.h> 46#include "accommon.h"
47#include "actables.h"
47 48
48#define _COMPONENT ACPI_HARDWARE 49#define _COMPONENT ACPI_HARDWARE
49ACPI_MODULE_NAME("hwsleep") 50ACPI_MODULE_NAME("hwsleep")
@@ -52,31 +53,19 @@ ACPI_MODULE_NAME("hwsleep")
52 * 53 *
53 * FUNCTION: acpi_set_firmware_waking_vector 54 * FUNCTION: acpi_set_firmware_waking_vector
54 * 55 *
55 * PARAMETERS: physical_address - Physical address of ACPI real mode 56 * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode
56 * entry point. 57 * entry point.
57 * 58 *
58 * RETURN: Status 59 * RETURN: Status
59 * 60 *
60 * DESCRIPTION: Access function for the firmware_waking_vector field in FACS 61 * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS
61 * 62 *
62 ******************************************************************************/ 63 ******************************************************************************/
63acpi_status 64acpi_status
64acpi_set_firmware_waking_vector(acpi_physical_address physical_address) 65acpi_set_firmware_waking_vector(u32 physical_address)
65{ 66{
66 struct acpi_table_facs *facs;
67 acpi_status status;
68
69 ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector); 67 ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector);
70 68
71 /* Get the FACS */
72
73 status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS,
74 ACPI_CAST_INDIRECT_PTR(struct
75 acpi_table_header,
76 &facs));
77 if (ACPI_FAILURE(status)) {
78 return_ACPI_STATUS(status);
79 }
80 69
81 /* 70 /*
82 * According to the ACPI specification 2.0c and later, the 64-bit 71 * According to the ACPI specification 2.0c and later, the 64-bit
@@ -85,10 +74,16 @@ acpi_set_firmware_waking_vector(acpi_physical_address physical_address)
85 * Protected Mode. Some systems (for example HP dv5-1004nr) are known 74 * Protected Mode. Some systems (for example HP dv5-1004nr) are known
86 * to fail to resume if the 64-bit vector is used. 75 * to fail to resume if the 64-bit vector is used.
87 */ 76 */
88 if (facs->version >= 1)
89 facs->xfirmware_waking_vector = 0;
90 77
91 facs->firmware_waking_vector = (u32)physical_address; 78 /* Set the 32-bit vector */
79
80 acpi_gbl_FACS->firmware_waking_vector = physical_address;
81
82 /* Clear the 64-bit vector if it exists */
83
84 if ((acpi_gbl_FACS->length > 32) && (acpi_gbl_FACS->version >= 1)) {
85 acpi_gbl_FACS->xfirmware_waking_vector = 0;
86 }
92 87
93 return_ACPI_STATUS(AE_OK); 88 return_ACPI_STATUS(AE_OK);
94} 89}
@@ -97,48 +92,39 @@ ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector)
97 92
98/******************************************************************************* 93/*******************************************************************************
99 * 94 *
100 * FUNCTION: acpi_get_firmware_waking_vector 95 * FUNCTION: acpi_set_firmware_waking_vector64
101 * 96 *
102 * PARAMETERS: *physical_address - Where the contents of 97 * PARAMETERS: physical_address - 64-bit physical address of ACPI protected
103 * the firmware_waking_vector field of 98 * mode entry point.
104 * the FACS will be returned.
105 * 99 *
106 * RETURN: Status, vector 100 * RETURN: Status
107 * 101 *
108 * DESCRIPTION: Access function for the firmware_waking_vector field in FACS 102 * DESCRIPTION: Sets the 64-bit X_firmware_waking_vector field of the FACS, if
103 * it exists in the table.
109 * 104 *
110 ******************************************************************************/ 105 ******************************************************************************/
111#ifdef ACPI_FUTURE_USAGE
112acpi_status 106acpi_status
113acpi_get_firmware_waking_vector(acpi_physical_address * physical_address) 107acpi_set_firmware_waking_vector64(u64 physical_address)
114{ 108{
115 struct acpi_table_facs *facs; 109 ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64);
116 acpi_status status;
117 110
118 ACPI_FUNCTION_TRACE(acpi_get_firmware_waking_vector);
119
120 if (!physical_address) {
121 return_ACPI_STATUS(AE_BAD_PARAMETER);
122 }
123 111
124 /* Get the FACS */ 112 /* Determine if the 64-bit vector actually exists */
125 113
126 status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, 114 if ((acpi_gbl_FACS->length <= 32) || (acpi_gbl_FACS->version < 1)) {
127 ACPI_CAST_INDIRECT_PTR(struct 115 return_ACPI_STATUS(AE_NOT_EXIST);
128 acpi_table_header,
129 &facs));
130 if (ACPI_FAILURE(status)) {
131 return_ACPI_STATUS(status);
132 } 116 }
133 117
134 /* Get the vector */ 118 /* Clear 32-bit vector, set the 64-bit X_ vector */
135 *physical_address = (acpi_physical_address)facs->firmware_waking_vector; 119
120 acpi_gbl_FACS->firmware_waking_vector = 0;
121 acpi_gbl_FACS->xfirmware_waking_vector = physical_address;
136 122
137 return_ACPI_STATUS(AE_OK); 123 return_ACPI_STATUS(AE_OK);
138} 124}
139 125
140ACPI_EXPORT_SYMBOL(acpi_get_firmware_waking_vector) 126ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector64)
141#endif 127
142/******************************************************************************* 128/*******************************************************************************
143 * 129 *
144 * FUNCTION: acpi_enter_sleep_state_prep 130 * FUNCTION: acpi_enter_sleep_state_prep
diff --git a/drivers/acpi/hardware/hwtimer.c b/drivers/acpi/acpica/hwtimer.c
index b53d575491b9..b7f522c8f023 100644
--- a/drivers/acpi/hardware/hwtimer.c
+++ b/drivers/acpi/acpica/hwtimer.c
@@ -43,6 +43,7 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include "accommon.h"
46 47
47#define _COMPONENT ACPI_HARDWARE 48#define _COMPONENT ACPI_HARDWARE
48ACPI_MODULE_NAME("hwtimer") 49ACPI_MODULE_NAME("hwtimer")
diff --git a/drivers/acpi/hardware/hwregs.c b/drivers/acpi/acpica/hwxface.c
index ddf792adcf96..ae597c0ab53f 100644
--- a/drivers/acpi/hardware/hwregs.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -1,10 +1,9 @@
1 1
2/******************************************************************************* 2/******************************************************************************
3 * 3 *
4 * Module Name: hwregs - Read/write access functions for the various ACPI 4 * Module Name: hwxface - Public ACPICA hardware interfaces
5 * control and status registers.
6 * 5 *
7 ******************************************************************************/ 6 *****************************************************************************/
8 7
9/* 8/*
10 * Copyright (C) 2000 - 2008, Intel Corp. 9 * Copyright (C) 2000 - 2008, Intel Corp.
@@ -44,209 +43,208 @@
44 */ 43 */
45 44
46#include <acpi/acpi.h> 45#include <acpi/acpi.h>
47#include <acpi/acnamesp.h> 46#include "accommon.h"
48#include <acpi/acevents.h> 47#include "acnamesp.h"
49 48
50#define _COMPONENT ACPI_HARDWARE 49#define _COMPONENT ACPI_HARDWARE
51ACPI_MODULE_NAME("hwregs") 50ACPI_MODULE_NAME("hwxface")
52 51
53/******************************************************************************* 52/******************************************************************************
54 * 53 *
55 * FUNCTION: acpi_hw_clear_acpi_status 54 * FUNCTION: acpi_reset
56 * 55 *
57 * PARAMETERS: None 56 * PARAMETERS: None
58 * 57 *
59 * RETURN: None 58 * RETURN: Status
60 * 59 *
61 * DESCRIPTION: Clears all fixed and general purpose status bits 60 * DESCRIPTION: Set reset register in memory or IO space. Note: Does not
62 * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED 61 * support reset register in PCI config space, this must be
62 * handled separately.
63 * 63 *
64 ******************************************************************************/ 64 ******************************************************************************/
65acpi_status acpi_hw_clear_acpi_status(void) 65acpi_status acpi_reset(void)
66{ 66{
67 struct acpi_generic_address *reset_reg;
67 acpi_status status; 68 acpi_status status;
68 acpi_cpu_flags lock_flags = 0;
69 69
70 ACPI_FUNCTION_TRACE(hw_clear_acpi_status); 70 ACPI_FUNCTION_TRACE(acpi_reset);
71 71
72 ACPI_DEBUG_PRINT((ACPI_DB_IO, "About to write %04X to %04X\n", 72 reset_reg = &acpi_gbl_FADT.reset_register;
73 ACPI_BITMASK_ALL_FIXED_STATUS,
74 (u16) acpi_gbl_FADT.xpm1a_event_block.address));
75 73
76 lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); 74 /* Check if the reset register is supported */
77 75
78 status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, 76 if (!(acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER) ||
79 ACPI_BITMASK_ALL_FIXED_STATUS); 77 !reset_reg->address) {
80 if (ACPI_FAILURE(status)) { 78 return_ACPI_STATUS(AE_NOT_EXIST);
81 goto unlock_and_exit;
82 } 79 }
83 80
84 /* Clear the fixed events */ 81 /* Write the reset value to the reset register */
85
86 if (acpi_gbl_FADT.xpm1b_event_block.address) {
87 status =
88 acpi_hw_low_level_write(16, ACPI_BITMASK_ALL_FIXED_STATUS,
89 &acpi_gbl_FADT.xpm1b_event_block);
90 if (ACPI_FAILURE(status)) {
91 goto unlock_and_exit;
92 }
93 }
94
95 /* Clear the GPE Bits in all GPE registers in all GPE blocks */
96
97 status = acpi_ev_walk_gpe_list(acpi_hw_clear_gpe_block);
98 82
99 unlock_and_exit: 83 status = acpi_write(acpi_gbl_FADT.reset_value, reset_reg);
100 acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
101 return_ACPI_STATUS(status); 84 return_ACPI_STATUS(status);
102} 85}
103 86
104/******************************************************************************* 87ACPI_EXPORT_SYMBOL(acpi_reset)
88
89/******************************************************************************
105 * 90 *
106 * FUNCTION: acpi_get_sleep_type_data 91 * FUNCTION: acpi_read
107 * 92 *
108 * PARAMETERS: sleep_state - Numeric sleep state 93 * PARAMETERS: Value - Where the value is returned
109 * *sleep_type_a - Where SLP_TYPa is returned 94 * Reg - GAS register structure
110 * *sleep_type_b - Where SLP_TYPb is returned
111 * 95 *
112 * RETURN: Status - ACPI status 96 * RETURN: Status
113 * 97 *
114 * DESCRIPTION: Obtain the SLP_TYPa and SLP_TYPb values for the requested sleep 98 * DESCRIPTION: Read from either memory or IO space.
115 * state.
116 * 99 *
117 ******************************************************************************/ 100 ******************************************************************************/
118 101acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg)
119acpi_status
120acpi_get_sleep_type_data(u8 sleep_state, u8 * sleep_type_a, u8 * sleep_type_b)
121{ 102{
122 acpi_status status = AE_OK; 103 u32 width;
123 struct acpi_evaluate_info *info; 104 u64 address;
124 105 acpi_status status;
125 ACPI_FUNCTION_TRACE(acpi_get_sleep_type_data);
126
127 /* Validate parameters */
128
129 if ((sleep_state > ACPI_S_STATES_MAX) || !sleep_type_a || !sleep_type_b) {
130 return_ACPI_STATUS(AE_BAD_PARAMETER);
131 }
132 106
133 /* Allocate the evaluation information block */ 107 ACPI_FUNCTION_NAME(acpi_read);
134 108
135 info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info)); 109 /*
136 if (!info) { 110 * Must have a valid pointer to a GAS structure, and
137 return_ACPI_STATUS(AE_NO_MEMORY); 111 * a non-zero address within. However, don't return an error
112 * because the PM1A/B code must not fail if B isn't present.
113 */
114 if (!reg) {
115 return (AE_OK);
138 } 116 }
139 117
140 info->pathname = 118 /* Get a local copy of the address. Handles possible alignment issues */
141 ACPI_CAST_PTR(char, acpi_gbl_sleep_state_names[sleep_state]);
142
143 /* Evaluate the namespace object containing the values for this state */
144
145 status = acpi_ns_evaluate(info);
146 if (ACPI_FAILURE(status)) {
147 ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
148 "%s while evaluating SleepState [%s]\n",
149 acpi_format_exception(status),
150 info->pathname));
151 119
152 goto cleanup; 120 ACPI_MOVE_64_TO_64(&address, &reg->address);
121 if (!address) {
122 return (AE_OK);
153 } 123 }
154 124
155 /* Must have a return object */ 125 /* Supported widths are 8/16/32 */
156 126
157 if (!info->return_object) { 127 width = reg->bit_width;
158 ACPI_ERROR((AE_INFO, "No Sleep State object returned from [%s]", 128 if ((width != 8) && (width != 16) && (width != 32)) {
159 info->pathname)); 129 return (AE_SUPPORT);
160 status = AE_NOT_EXIST;
161 } 130 }
162 131
163 /* It must be of type Package */ 132 /* Initialize entire 32-bit return value to zero */
164 133
165 else if (ACPI_GET_OBJECT_TYPE(info->return_object) != ACPI_TYPE_PACKAGE) { 134 *value = 0;
166 ACPI_ERROR((AE_INFO,
167 "Sleep State return object is not a Package"));
168 status = AE_AML_OPERAND_TYPE;
169 }
170 135
171 /* 136 /*
172 * The package must have at least two elements. NOTE (March 2005): This 137 * Two address spaces supported: Memory or IO.
173 * goes against the current ACPI spec which defines this object as a 138 * PCI_Config is not supported here because the GAS struct is insufficient
174 * package with one encoded DWORD element. However, existing practice
175 * by BIOS vendors seems to be to have 2 or more elements, at least
176 * one per sleep type (A/B).
177 */ 139 */
178 else if (info->return_object->package.count < 2) { 140 switch (reg->space_id) {
179 ACPI_ERROR((AE_INFO, 141 case ACPI_ADR_SPACE_SYSTEM_MEMORY:
180 "Sleep State return package does not have at least two elements"));
181 status = AE_AML_NO_OPERAND;
182 }
183 142
184 /* The first two elements must both be of type Integer */ 143 status = acpi_os_read_memory((acpi_physical_address) address,
144 value, width);
145 break;
185 146
186 else if ((ACPI_GET_OBJECT_TYPE(info->return_object->package.elements[0]) 147 case ACPI_ADR_SPACE_SYSTEM_IO:
187 != ACPI_TYPE_INTEGER) ||
188 (ACPI_GET_OBJECT_TYPE(info->return_object->package.elements[1])
189 != ACPI_TYPE_INTEGER)) {
190 ACPI_ERROR((AE_INFO,
191 "Sleep State return package elements are not both Integers (%s, %s)",
192 acpi_ut_get_object_type_name(info->return_object->
193 package.elements[0]),
194 acpi_ut_get_object_type_name(info->return_object->
195 package.elements[1])));
196 status = AE_AML_OPERAND_TYPE;
197 } else {
198 /* Valid _Sx_ package size, type, and value */
199 148
200 *sleep_type_a = (u8) 149 status =
201 (info->return_object->package.elements[0])->integer.value; 150 acpi_os_read_port((acpi_io_address) address, value, width);
202 *sleep_type_b = (u8) 151 break;
203 (info->return_object->package.elements[1])->integer.value;
204 }
205 152
206 if (ACPI_FAILURE(status)) { 153 default:
207 ACPI_EXCEPTION((AE_INFO, status, 154 ACPI_ERROR((AE_INFO,
208 "While evaluating SleepState [%s], bad Sleep object %p type %s", 155 "Unsupported address space: %X", reg->space_id));
209 info->pathname, info->return_object, 156 return (AE_BAD_PARAMETER);
210 acpi_ut_get_object_type_name(info->
211 return_object)));
212 } 157 }
213 158
214 acpi_ut_remove_reference(info->return_object); 159 ACPI_DEBUG_PRINT((ACPI_DB_IO,
160 "Read: %8.8X width %2d from %8.8X%8.8X (%s)\n",
161 *value, width, ACPI_FORMAT_UINT64(address),
162 acpi_ut_get_region_name(reg->space_id)));
215 163
216 cleanup: 164 return (status);
217 ACPI_FREE(info);
218 return_ACPI_STATUS(status);
219} 165}
220 166
221ACPI_EXPORT_SYMBOL(acpi_get_sleep_type_data) 167ACPI_EXPORT_SYMBOL(acpi_read)
222 168
223/******************************************************************************* 169/******************************************************************************
224 * 170 *
225 * FUNCTION: acpi_hw_get_register_bit_mask 171 * FUNCTION: acpi_write
226 * 172 *
227 * PARAMETERS: register_id - Index of ACPI Register to access 173 * PARAMETERS: Value - To be written
174 * Reg - GAS register structure
228 * 175 *
229 * RETURN: The bitmask to be used when accessing the register 176 * RETURN: Status
230 * 177 *
231 * DESCRIPTION: Map register_id into a register bitmask. 178 * DESCRIPTION: Write to either memory or IO space.
232 * 179 *
233 ******************************************************************************/ 180 ******************************************************************************/
234struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id) 181acpi_status acpi_write(u32 value, struct acpi_generic_address *reg)
235{ 182{
236 ACPI_FUNCTION_ENTRY(); 183 u32 width;
184 u64 address;
185 acpi_status status;
237 186
238 if (register_id > ACPI_BITREG_MAX) { 187 ACPI_FUNCTION_NAME(acpi_write);
239 ACPI_ERROR((AE_INFO, "Invalid BitRegister ID: %X", 188
240 register_id)); 189 /*
241 return (NULL); 190 * Must have a valid pointer to a GAS structure, and
191 * a non-zero address within. However, don't return an error
192 * because the PM1A/B code must not fail if B isn't present.
193 */
194 if (!reg) {
195 return (AE_OK);
242 } 196 }
243 197
244 return (&acpi_gbl_bit_register_info[register_id]); 198 /* Get a local copy of the address. Handles possible alignment issues */
199
200 ACPI_MOVE_64_TO_64(&address, &reg->address);
201 if (!address) {
202 return (AE_OK);
203 }
204
205 /* Supported widths are 8/16/32 */
206
207 width = reg->bit_width;
208 if ((width != 8) && (width != 16) && (width != 32)) {
209 return (AE_SUPPORT);
210 }
211
212 /*
213 * Two address spaces supported: Memory or IO.
214 * PCI_Config is not supported here because the GAS struct is insufficient
215 */
216 switch (reg->space_id) {
217 case ACPI_ADR_SPACE_SYSTEM_MEMORY:
218
219 status = acpi_os_write_memory((acpi_physical_address) address,
220 value, width);
221 break;
222
223 case ACPI_ADR_SPACE_SYSTEM_IO:
224
225 status = acpi_os_write_port((acpi_io_address) address, value,
226 width);
227 break;
228
229 default:
230 ACPI_ERROR((AE_INFO,
231 "Unsupported address space: %X", reg->space_id));
232 return (AE_BAD_PARAMETER);
233 }
234
235 ACPI_DEBUG_PRINT((ACPI_DB_IO,
236 "Wrote: %8.8X width %2d to %8.8X%8.8X (%s)\n",
237 value, width, ACPI_FORMAT_UINT64(address),
238 acpi_ut_get_region_name(reg->space_id)));
239
240 return (status);
245} 241}
246 242
243ACPI_EXPORT_SYMBOL(acpi_write)
244
247/******************************************************************************* 245/*******************************************************************************
248 * 246 *
249 * FUNCTION: acpi_get_register 247 * FUNCTION: acpi_get_register_unlocked
250 * 248 *
251 * PARAMETERS: register_id - ID of ACPI bit_register to access 249 * PARAMETERS: register_id - ID of ACPI bit_register to access
252 * return_value - Value that was read from the register 250 * return_value - Value that was read from the register
@@ -254,17 +252,16 @@ struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id)
254 * RETURN: Status and the value read from specified Register. Value 252 * RETURN: Status and the value read from specified Register. Value
255 * returned is normalized to bit0 (is shifted all the way right) 253 * returned is normalized to bit0 (is shifted all the way right)
256 * 254 *
257 * DESCRIPTION: ACPI bit_register read function. 255 * DESCRIPTION: ACPI bit_register read function. Does not acquire the HW lock.
258 * 256 *
259 ******************************************************************************/ 257 ******************************************************************************/
260 258acpi_status acpi_get_register_unlocked(u32 register_id, u32 *return_value)
261acpi_status acpi_get_register_unlocked(u32 register_id, u32 * return_value)
262{ 259{
263 u32 register_value = 0; 260 u32 register_value = 0;
264 struct acpi_bit_register_info *bit_reg_info; 261 struct acpi_bit_register_info *bit_reg_info;
265 acpi_status status; 262 acpi_status status;
266 263
267 ACPI_FUNCTION_TRACE(acpi_get_register); 264 ACPI_FUNCTION_TRACE(acpi_get_register_unlocked);
268 265
269 /* Get the info structure corresponding to the requested ACPI Register */ 266 /* Get the info structure corresponding to the requested ACPI Register */
270 267
@@ -296,14 +293,31 @@ acpi_status acpi_get_register_unlocked(u32 register_id, u32 * return_value)
296 return_ACPI_STATUS(status); 293 return_ACPI_STATUS(status);
297} 294}
298 295
299acpi_status acpi_get_register(u32 register_id, u32 * return_value) 296ACPI_EXPORT_SYMBOL(acpi_get_register_unlocked)
297
298/*******************************************************************************
299 *
300 * FUNCTION: acpi_get_register
301 *
302 * PARAMETERS: register_id - ID of ACPI bit_register to access
303 * return_value - Value that was read from the register
304 *
305 * RETURN: Status and the value read from specified Register. Value
306 * returned is normalized to bit0 (is shifted all the way right)
307 *
308 * DESCRIPTION: ACPI bit_register read function.
309 *
310 ******************************************************************************/
311acpi_status acpi_get_register(u32 register_id, u32 *return_value)
300{ 312{
301 acpi_status status; 313 acpi_status status;
302 acpi_cpu_flags flags; 314 acpi_cpu_flags flags;
315
303 flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); 316 flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
304 status = acpi_get_register_unlocked(register_id, return_value); 317 status = acpi_get_register_unlocked(register_id, return_value);
305 acpi_os_release_lock(acpi_gbl_hardware_lock, flags); 318 acpi_os_release_lock(acpi_gbl_hardware_lock, flags);
306 return status; 319
320 return (status);
307} 321}
308 322
309ACPI_EXPORT_SYMBOL(acpi_get_register) 323ACPI_EXPORT_SYMBOL(acpi_get_register)
@@ -370,8 +384,9 @@ acpi_status acpi_set_register(u32 register_id, u32 value)
370 bit_reg_info-> 384 bit_reg_info->
371 access_bit_mask); 385 access_bit_mask);
372 if (value) { 386 if (value) {
373 status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, 387 status =
374 (u16) value); 388 acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
389 (u16) value);
375 register_value = 0; 390 register_value = 0;
376 } 391 }
377 break; 392 break;
@@ -459,399 +474,120 @@ acpi_status acpi_set_register(u32 register_id, u32 value)
459 474
460ACPI_EXPORT_SYMBOL(acpi_set_register) 475ACPI_EXPORT_SYMBOL(acpi_set_register)
461 476
462/****************************************************************************** 477/*******************************************************************************
463 * 478 *
464 * FUNCTION: acpi_hw_register_read 479 * FUNCTION: acpi_get_sleep_type_data
465 * 480 *
466 * PARAMETERS: register_id - ACPI Register ID 481 * PARAMETERS: sleep_state - Numeric sleep state
467 * return_value - Where the register value is returned 482 * *sleep_type_a - Where SLP_TYPa is returned
483 * *sleep_type_b - Where SLP_TYPb is returned
468 * 484 *
469 * RETURN: Status and the value read. 485 * RETURN: Status - ACPI status
470 * 486 *
471 * DESCRIPTION: Read from the specified ACPI register 487 * DESCRIPTION: Obtain the SLP_TYPa and SLP_TYPb values for the requested sleep
488 * state.
472 * 489 *
473 ******************************************************************************/ 490 ******************************************************************************/
474acpi_status 491acpi_status
475acpi_hw_register_read(u32 register_id, u32 * return_value) 492acpi_get_sleep_type_data(u8 sleep_state, u8 *sleep_type_a, u8 *sleep_type_b)
476{ 493{
477 u32 value1 = 0; 494 acpi_status status = AE_OK;
478 u32 value2 = 0; 495 struct acpi_evaluate_info *info;
479 acpi_status status;
480
481 ACPI_FUNCTION_TRACE(hw_register_read);
482
483 switch (register_id) {
484 case ACPI_REGISTER_PM1_STATUS: /* 16-bit access */
485
486 status =
487 acpi_hw_low_level_read(16, &value1,
488 &acpi_gbl_FADT.xpm1a_event_block);
489 if (ACPI_FAILURE(status)) {
490 goto exit;
491 }
492
493 /* PM1B is optional */
494
495 status =
496 acpi_hw_low_level_read(16, &value2,
497 &acpi_gbl_FADT.xpm1b_event_block);
498 value1 |= value2;
499 break;
500
501 case ACPI_REGISTER_PM1_ENABLE: /* 16-bit access */
502
503 status =
504 acpi_hw_low_level_read(16, &value1, &acpi_gbl_xpm1a_enable);
505 if (ACPI_FAILURE(status)) {
506 goto exit;
507 }
508
509 /* PM1B is optional */
510
511 status =
512 acpi_hw_low_level_read(16, &value2, &acpi_gbl_xpm1b_enable);
513 value1 |= value2;
514 break;
515
516 case ACPI_REGISTER_PM1_CONTROL: /* 16-bit access */
517
518 status =
519 acpi_hw_low_level_read(16, &value1,
520 &acpi_gbl_FADT.xpm1a_control_block);
521 if (ACPI_FAILURE(status)) {
522 goto exit;
523 }
524
525 status =
526 acpi_hw_low_level_read(16, &value2,
527 &acpi_gbl_FADT.xpm1b_control_block);
528 value1 |= value2;
529 break;
530
531 case ACPI_REGISTER_PM2_CONTROL: /* 8-bit access */
532
533 status =
534 acpi_hw_low_level_read(8, &value1,
535 &acpi_gbl_FADT.xpm2_control_block);
536 break;
537
538 case ACPI_REGISTER_PM_TIMER: /* 32-bit access */
539
540 status =
541 acpi_hw_low_level_read(32, &value1,
542 &acpi_gbl_FADT.xpm_timer_block);
543 break;
544 496
545 case ACPI_REGISTER_SMI_COMMAND_BLOCK: /* 8-bit access */ 497 ACPI_FUNCTION_TRACE(acpi_get_sleep_type_data);
546 498
547 status = 499 /* Validate parameters */
548 acpi_os_read_port(acpi_gbl_FADT.smi_command, &value1, 8);
549 break;
550 500
551 default: 501 if ((sleep_state > ACPI_S_STATES_MAX) || !sleep_type_a || !sleep_type_b) {
552 ACPI_ERROR((AE_INFO, "Unknown Register ID: %X", register_id)); 502 return_ACPI_STATUS(AE_BAD_PARAMETER);
553 status = AE_BAD_PARAMETER;
554 break;
555 } 503 }
556 504
557 exit: 505 /* Allocate the evaluation information block */
558 506
559 if (ACPI_SUCCESS(status)) { 507 info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info));
560 *return_value = value1; 508 if (!info) {
509 return_ACPI_STATUS(AE_NO_MEMORY);
561 } 510 }
562 511
563 return_ACPI_STATUS(status); 512 info->pathname =
564} 513 ACPI_CAST_PTR(char, acpi_gbl_sleep_state_names[sleep_state]);
565
566/******************************************************************************
567 *
568 * FUNCTION: acpi_hw_register_write
569 *
570 * PARAMETERS: register_id - ACPI Register ID
571 * Value - The value to write
572 *
573 * RETURN: Status
574 *
575 * DESCRIPTION: Write to the specified ACPI register
576 *
577 * NOTE: In accordance with the ACPI specification, this function automatically
578 * preserves the value of the following bits, meaning that these bits cannot be
579 * changed via this interface:
580 *
581 * PM1_CONTROL[0] = SCI_EN
582 * PM1_CONTROL[9]
583 * PM1_STATUS[11]
584 *
585 * ACPI References:
586 * 1) Hardware Ignored Bits: When software writes to a register with ignored
587 * bit fields, it preserves the ignored bit fields
588 * 2) SCI_EN: OSPM always preserves this bit position
589 *
590 ******************************************************************************/
591
592acpi_status acpi_hw_register_write(u32 register_id, u32 value)
593{
594 acpi_status status;
595 u32 read_value;
596
597 ACPI_FUNCTION_TRACE(hw_register_write);
598
599 switch (register_id) {
600 case ACPI_REGISTER_PM1_STATUS: /* 16-bit access */
601
602 /* Perform a read first to preserve certain bits (per ACPI spec) */
603
604 status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS,
605 &read_value);
606 if (ACPI_FAILURE(status)) {
607 goto exit;
608 }
609
610 /* Insert the bits to be preserved */
611
612 ACPI_INSERT_BITS(value, ACPI_PM1_STATUS_PRESERVED_BITS,
613 read_value);
614
615 /* Now we can write the data */
616
617 status =
618 acpi_hw_low_level_write(16, value,
619 &acpi_gbl_FADT.xpm1a_event_block);
620 if (ACPI_FAILURE(status)) {
621 goto exit;
622 }
623
624 /* PM1B is optional */
625
626 status =
627 acpi_hw_low_level_write(16, value,
628 &acpi_gbl_FADT.xpm1b_event_block);
629 break;
630
631 case ACPI_REGISTER_PM1_ENABLE: /* 16-bit access */
632
633 status =
634 acpi_hw_low_level_write(16, value, &acpi_gbl_xpm1a_enable);
635 if (ACPI_FAILURE(status)) {
636 goto exit;
637 }
638
639 /* PM1B is optional */
640
641 status =
642 acpi_hw_low_level_write(16, value, &acpi_gbl_xpm1b_enable);
643 break;
644
645 case ACPI_REGISTER_PM1_CONTROL: /* 16-bit access */
646
647 /*
648 * Perform a read first to preserve certain bits (per ACPI spec)
649 */
650 status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL,
651 &read_value);
652 if (ACPI_FAILURE(status)) {
653 goto exit;
654 }
655
656 /* Insert the bits to be preserved */
657
658 ACPI_INSERT_BITS(value, ACPI_PM1_CONTROL_PRESERVED_BITS,
659 read_value);
660
661 /* Now we can write the data */
662
663 status =
664 acpi_hw_low_level_write(16, value,
665 &acpi_gbl_FADT.xpm1a_control_block);
666 if (ACPI_FAILURE(status)) {
667 goto exit;
668 }
669
670 status =
671 acpi_hw_low_level_write(16, value,
672 &acpi_gbl_FADT.xpm1b_control_block);
673 break;
674
675 case ACPI_REGISTER_PM1A_CONTROL: /* 16-bit access */
676
677 status =
678 acpi_hw_low_level_write(16, value,
679 &acpi_gbl_FADT.xpm1a_control_block);
680 break;
681
682 case ACPI_REGISTER_PM1B_CONTROL: /* 16-bit access */
683
684 status =
685 acpi_hw_low_level_write(16, value,
686 &acpi_gbl_FADT.xpm1b_control_block);
687 break;
688
689 case ACPI_REGISTER_PM2_CONTROL: /* 8-bit access */
690
691 status =
692 acpi_hw_low_level_write(8, value,
693 &acpi_gbl_FADT.xpm2_control_block);
694 break;
695
696 case ACPI_REGISTER_PM_TIMER: /* 32-bit access */
697
698 status =
699 acpi_hw_low_level_write(32, value,
700 &acpi_gbl_FADT.xpm_timer_block);
701 break;
702
703 case ACPI_REGISTER_SMI_COMMAND_BLOCK: /* 8-bit access */
704 514
705 /* SMI_CMD is currently always in IO space */ 515 /* Evaluate the namespace object containing the values for this state */
706 516
707 status = 517 status = acpi_ns_evaluate(info);
708 acpi_os_write_port(acpi_gbl_FADT.smi_command, value, 8); 518 if (ACPI_FAILURE(status)) {
709 break; 519 ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
520 "%s while evaluating SleepState [%s]\n",
521 acpi_format_exception(status),
522 info->pathname));
710 523
711 default: 524 goto cleanup;
712 status = AE_BAD_PARAMETER;
713 break;
714 } 525 }
715 526
716 exit: 527 /* Must have a return object */
717 return_ACPI_STATUS(status);
718}
719
720/******************************************************************************
721 *
722 * FUNCTION: acpi_hw_low_level_read
723 *
724 * PARAMETERS: Width - 8, 16, or 32
725 * Value - Where the value is returned
726 * Reg - GAS register structure
727 *
728 * RETURN: Status
729 *
730 * DESCRIPTION: Read from either memory or IO space.
731 *
732 ******************************************************************************/
733
734acpi_status
735acpi_hw_low_level_read(u32 width, u32 * value, struct acpi_generic_address *reg)
736{
737 u64 address;
738 acpi_status status;
739
740 ACPI_FUNCTION_NAME(hw_low_level_read);
741 528
742 /* 529 if (!info->return_object) {
743 * Must have a valid pointer to a GAS structure, and 530 ACPI_ERROR((AE_INFO, "No Sleep State object returned from [%s]",
744 * a non-zero address within. However, don't return an error 531 info->pathname));
745 * because the PM1A/B code must not fail if B isn't present. 532 status = AE_NOT_EXIST;
746 */
747 if (!reg) {
748 return (AE_OK);
749 } 533 }
750 534
751 /* Get a local copy of the address. Handles possible alignment issues */ 535 /* It must be of type Package */
752 536
753 ACPI_MOVE_64_TO_64(&address, &reg->address); 537 else if (ACPI_GET_OBJECT_TYPE(info->return_object) != ACPI_TYPE_PACKAGE) {
754 if (!address) { 538 ACPI_ERROR((AE_INFO,
755 return (AE_OK); 539 "Sleep State return object is not a Package"));
540 status = AE_AML_OPERAND_TYPE;
756 } 541 }
757 *value = 0;
758 542
759 /* 543 /*
760 * Two address spaces supported: Memory or IO. 544 * The package must have at least two elements. NOTE (March 2005): This
761 * PCI_Config is not supported here because the GAS struct is insufficient 545 * goes against the current ACPI spec which defines this object as a
546 * package with one encoded DWORD element. However, existing practice
547 * by BIOS vendors seems to be to have 2 or more elements, at least
548 * one per sleep type (A/B).
762 */ 549 */
763 switch (reg->space_id) { 550 else if (info->return_object->package.count < 2) {
764 case ACPI_ADR_SPACE_SYSTEM_MEMORY:
765
766 status = acpi_os_read_memory((acpi_physical_address) address,
767 value, width);
768 break;
769
770 case ACPI_ADR_SPACE_SYSTEM_IO:
771
772 status =
773 acpi_os_read_port((acpi_io_address) address, value, width);
774 break;
775
776 default:
777 ACPI_ERROR((AE_INFO, 551 ACPI_ERROR((AE_INFO,
778 "Unsupported address space: %X", reg->space_id)); 552 "Sleep State return package does not have at least two elements"));
779 return (AE_BAD_PARAMETER); 553 status = AE_AML_NO_OPERAND;
780 } 554 }
781 555
782 ACPI_DEBUG_PRINT((ACPI_DB_IO, 556 /* The first two elements must both be of type Integer */
783 "Read: %8.8X width %2d from %8.8X%8.8X (%s)\n",
784 *value, width, ACPI_FORMAT_UINT64(address),
785 acpi_ut_get_region_name(reg->space_id)));
786
787 return (status);
788}
789
790/******************************************************************************
791 *
792 * FUNCTION: acpi_hw_low_level_write
793 *
794 * PARAMETERS: Width - 8, 16, or 32
795 * Value - To be written
796 * Reg - GAS register structure
797 *
798 * RETURN: Status
799 *
800 * DESCRIPTION: Write to either memory or IO space.
801 *
802 ******************************************************************************/
803
804acpi_status
805acpi_hw_low_level_write(u32 width, u32 value, struct acpi_generic_address * reg)
806{
807 u64 address;
808 acpi_status status;
809
810 ACPI_FUNCTION_NAME(hw_low_level_write);
811
812 /*
813 * Must have a valid pointer to a GAS structure, and
814 * a non-zero address within. However, don't return an error
815 * because the PM1A/B code must not fail if B isn't present.
816 */
817 if (!reg) {
818 return (AE_OK);
819 }
820 557
821 /* Get a local copy of the address. Handles possible alignment issues */ 558 else if ((ACPI_GET_OBJECT_TYPE(info->return_object->package.elements[0])
559 != ACPI_TYPE_INTEGER) ||
560 (ACPI_GET_OBJECT_TYPE(info->return_object->package.elements[1])
561 != ACPI_TYPE_INTEGER)) {
562 ACPI_ERROR((AE_INFO,
563 "Sleep State return package elements are not both Integers (%s, %s)",
564 acpi_ut_get_object_type_name(info->return_object->
565 package.elements[0]),
566 acpi_ut_get_object_type_name(info->return_object->
567 package.elements[1])));
568 status = AE_AML_OPERAND_TYPE;
569 } else {
570 /* Valid _Sx_ package size, type, and value */
822 571
823 ACPI_MOVE_64_TO_64(&address, &reg->address); 572 *sleep_type_a = (u8)
824 if (!address) { 573 (info->return_object->package.elements[0])->integer.value;
825 return (AE_OK); 574 *sleep_type_b = (u8)
575 (info->return_object->package.elements[1])->integer.value;
826 } 576 }
827 577
828 /* 578 if (ACPI_FAILURE(status)) {
829 * Two address spaces supported: Memory or IO. 579 ACPI_EXCEPTION((AE_INFO, status,
830 * PCI_Config is not supported here because the GAS struct is insufficient 580 "While evaluating SleepState [%s], bad Sleep object %p type %s",
831 */ 581 info->pathname, info->return_object,
832 switch (reg->space_id) { 582 acpi_ut_get_object_type_name(info->
833 case ACPI_ADR_SPACE_SYSTEM_MEMORY: 583 return_object)));
834
835 status = acpi_os_write_memory((acpi_physical_address) address,
836 value, width);
837 break;
838
839 case ACPI_ADR_SPACE_SYSTEM_IO:
840
841 status = acpi_os_write_port((acpi_io_address) address, value,
842 width);
843 break;
844
845 default:
846 ACPI_ERROR((AE_INFO,
847 "Unsupported address space: %X", reg->space_id));
848 return (AE_BAD_PARAMETER);
849 } 584 }
850 585
851 ACPI_DEBUG_PRINT((ACPI_DB_IO, 586 acpi_ut_remove_reference(info->return_object);
852 "Wrote: %8.8X width %2d to %8.8X%8.8X (%s)\n",
853 value, width, ACPI_FORMAT_UINT64(address),
854 acpi_ut_get_region_name(reg->space_id)));
855 587
856 return (status); 588 cleanup:
589 ACPI_FREE(info);
590 return_ACPI_STATUS(status);
857} 591}
592
593ACPI_EXPORT_SYMBOL(acpi_get_sleep_type_data)
diff --git a/drivers/acpi/namespace/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index c39a7f68b889..88303ebe924c 100644
--- a/drivers/acpi/namespace/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/amlcode.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "amlcode.h"
47#include <acpi/acdispat.h> 47#include "acnamesp.h"
48#include "acdispat.h"
48 49
49#define _COMPONENT ACPI_NAMESPACE 50#define _COMPONENT ACPI_NAMESPACE
50ACPI_MODULE_NAME("nsaccess") 51ACPI_MODULE_NAME("nsaccess")
@@ -165,12 +166,9 @@ acpi_status acpi_ns_root_initialize(void)
165 166
166 obj_desc->method.method_flags = 167 obj_desc->method.method_flags =
167 AML_METHOD_INTERNAL_ONLY; 168 AML_METHOD_INTERNAL_ONLY;
168
169#ifndef ACPI_DUMP_APP
170 obj_desc->method.implementation = 169 obj_desc->method.implementation =
171 acpi_ut_osi_implementation; 170 acpi_ut_osi_implementation;
172#endif 171#endif
173#endif
174 break; 172 break;
175 173
176 case ACPI_TYPE_INTEGER: 174 case ACPI_TYPE_INTEGER:
@@ -521,11 +519,11 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
521 } 519 }
522 520
523 /* 521 /*
524 * Search namespace for each segment of the name. Loop through and 522 * Search namespace for each segment of the name. Loop through and
525 * verify (or add to the namespace) each name segment. 523 * verify (or add to the namespace) each name segment.
526 * 524 *
527 * The object type is significant only at the last name 525 * The object type is significant only at the last name
528 * segment. (We don't care about the types along the path, only 526 * segment. (We don't care about the types along the path, only
529 * the type of the final target object.) 527 * the type of the final target object.)
530 */ 528 */
531 this_search_type = ACPI_TYPE_ANY; 529 this_search_type = ACPI_TYPE_ANY;
@@ -591,6 +589,10 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
591 * segments). 589 * segments).
592 */ 590 */
593 if (this_node->type == ACPI_TYPE_LOCAL_ALIAS) { 591 if (this_node->type == ACPI_TYPE_LOCAL_ALIAS) {
592 if (!this_node->object) {
593 return_ACPI_STATUS(AE_NOT_EXIST);
594 }
595
594 if (acpi_ns_opens_scope 596 if (acpi_ns_opens_scope
595 (((struct acpi_namespace_node *)this_node-> 597 (((struct acpi_namespace_node *)this_node->
596 object)->type)) { 598 object)->type)) {
diff --git a/drivers/acpi/namespace/nsalloc.c b/drivers/acpi/acpica/nsalloc.c
index 3a1740ac2edc..f976d848fe82 100644
--- a/drivers/acpi/namespace/nsalloc.c
+++ b/drivers/acpi/acpica/nsalloc.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include "acnamesp.h"
46 47
47#define _COMPONENT ACPI_NAMESPACE 48#define _COMPONENT ACPI_NAMESPACE
48ACPI_MODULE_NAME("nsalloc") 49ACPI_MODULE_NAME("nsalloc")
diff --git a/drivers/acpi/namespace/nsdump.c b/drivers/acpi/acpica/nsdump.c
index cc0ae39440e4..0da33c8e9ba2 100644
--- a/drivers/acpi/namespace/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include "acnamesp.h"
46 47
47#define _COMPONENT ACPI_NAMESPACE 48#define _COMPONENT ACPI_NAMESPACE
48ACPI_MODULE_NAME("nsdump") 49ACPI_MODULE_NAME("nsdump")
diff --git a/drivers/acpi/namespace/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c
index 428f50fde11a..41994fe7fbb8 100644
--- a/drivers/acpi/namespace/nsdumpdv.c
+++ b/drivers/acpi/acpica/nsdumpdv.c
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include "accommon.h"
45 46
46/* TBD: This entire module is apparently obsolete and should be removed */ 47/* TBD: This entire module is apparently obsolete and should be removed */
47 48
@@ -49,7 +50,7 @@
49ACPI_MODULE_NAME("nsdumpdv") 50ACPI_MODULE_NAME("nsdumpdv")
50#ifdef ACPI_OBSOLETE_FUNCTIONS 51#ifdef ACPI_OBSOLETE_FUNCTIONS
51#if defined(ACPI_DEBUG_OUTPUT) || defined(ACPI_DEBUGGER) 52#if defined(ACPI_DEBUG_OUTPUT) || defined(ACPI_DEBUGGER)
52#include <acpi/acnamesp.h> 53#include "acnamesp.h"
53/******************************************************************************* 54/*******************************************************************************
54 * 55 *
55 * FUNCTION: acpi_ns_dump_one_device 56 * FUNCTION: acpi_ns_dump_one_device
diff --git a/drivers/acpi/namespace/nseval.c b/drivers/acpi/acpica/nseval.c
index 4cdf03ac2b46..0f3d5f9b5966 100644
--- a/drivers/acpi/namespace/nseval.c
+++ b/drivers/acpi/acpica/nseval.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/acinterp.h> 46#include "acparser.h"
47#include <acpi/acnamesp.h> 47#include "acinterp.h"
48#include "acnamesp.h"
48 49
49#define _COMPONENT ACPI_NAMESPACE 50#define _COMPONENT ACPI_NAMESPACE
50ACPI_MODULE_NAME("nseval") 51ACPI_MODULE_NAME("nseval")
@@ -89,6 +90,7 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info * info)
89 /* Initialize the return value to an invalid object */ 90 /* Initialize the return value to an invalid object */
90 91
91 info->return_object = NULL; 92 info->return_object = NULL;
93 info->param_count = 0;
92 94
93 /* 95 /*
94 * Get the actual namespace node for the target object. Handles these cases: 96 * Get the actual namespace node for the target object. Handles these cases:
@@ -141,41 +143,17 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info * info)
141 return_ACPI_STATUS(AE_NULL_OBJECT); 143 return_ACPI_STATUS(AE_NULL_OBJECT);
142 } 144 }
143 145
144 /* 146 /* Count the number of arguments being passed to the method */
145 * Calculate the number of arguments being passed to the method
146 */
147 147
148 info->param_count = 0;
149 if (info->parameters) { 148 if (info->parameters) {
150 while (info->parameters[info->param_count]) 149 while (info->parameters[info->param_count]) {
150 if (info->param_count > ACPI_METHOD_MAX_ARG) {
151 return_ACPI_STATUS(AE_LIMIT);
152 }
151 info->param_count++; 153 info->param_count++;
154 }
152 } 155 }
153 156
154 /*
155 * Warning if too few or too many arguments have been passed by the
156 * caller. We don't want to abort here with an error because an
157 * incorrect number of arguments may not cause the method to fail.
158 * However, the method will fail if there are too few arguments passed
159 * and the method attempts to use one of the missing ones.
160 */
161
162 if (info->param_count < info->obj_desc->method.param_count) {
163 ACPI_WARNING((AE_INFO,
164 "Insufficient arguments - "
165 "method [%4.4s] needs %d, found %d",
166 acpi_ut_get_node_name(info->resolved_node),
167 info->obj_desc->method.param_count,
168 info->param_count));
169 } else if (info->param_count >
170 info->obj_desc->method.param_count) {
171 ACPI_WARNING((AE_INFO,
172 "Excess arguments - "
173 "method [%4.4s] needs %d, found %d",
174 acpi_ut_get_node_name(info->
175 resolved_node),
176 info->obj_desc->method.param_count,
177 info->param_count));
178 }
179 157
180 ACPI_DUMP_PATHNAME(info->resolved_node, "Execute Method:", 158 ACPI_DUMP_PATHNAME(info->resolved_node, "Execute Method:",
181 ACPI_LV_INFO, _COMPONENT); 159 ACPI_LV_INFO, _COMPONENT);
@@ -264,32 +242,13 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info * info)
264 } 242 }
265 } 243 }
266 244
267 /* Validation of return values for ACPI-predefined methods and objects */ 245 /*
268 246 * Check input argument count against the ASL-defined count for a method.
269 if ((status == AE_OK) || (status == AE_CTRL_RETURN_VALUE)) { 247 * Also check predefined names: argument count and return value against
270 /* 248 * the ACPI specification. Some incorrect return value types are repaired.
271 * If this is the first evaluation, check the return value. This 249 */
272 * ensures that any warnings will only be emitted during the very 250 (void)acpi_ns_check_predefined_names(node, info->param_count,
273 * first evaluation of the object. 251 status, &info->return_object);
274 */
275 if (!(node->flags & ANOBJ_EVALUATED)) {
276 /*
277 * Check for a predefined ACPI name. If found, validate the
278 * returned object.
279 *
280 * Note: Ignore return status for now, emit warnings if there are
281 * problems with the returned object. May change later to abort
282 * the method on invalid return object.
283 */
284 (void)acpi_ns_check_predefined_names(node,
285 info->
286 return_object);
287 }
288
289 /* Mark the node as having been evaluated */
290
291 node->flags |= ANOBJ_EVALUATED;
292 }
293 252
294 /* Check if there is a return value that must be dealt with */ 253 /* Check if there is a return value that must be dealt with */
295 254
diff --git a/drivers/acpi/namespace/nsinit.c b/drivers/acpi/acpica/nsinit.c
index e4c57510d798..13501cb81863 100644
--- a/drivers/acpi/namespace/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include <acpi/acdispat.h> 46#include "acnamesp.h"
47#include <acpi/acinterp.h> 47#include "acdispat.h"
48#include "acinterp.h"
48#include <linux/nmi.h> 49#include <linux/nmi.h>
49 50
50#define _COMPONENT ACPI_NAMESPACE 51#define _COMPONENT ACPI_NAMESPACE
diff --git a/drivers/acpi/namespace/nsload.c b/drivers/acpi/acpica/nsload.c
index a4a412b7c029..a0ba9e12379e 100644
--- a/drivers/acpi/namespace/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include <acpi/acdispat.h> 46#include "acnamesp.h"
47#include <acpi/actables.h> 47#include "acdispat.h"
48#include "actables.h"
48 49
49#define _COMPONENT ACPI_NAMESPACE 50#define _COMPONENT ACPI_NAMESPACE
50ACPI_MODULE_NAME("nsload") 51ACPI_MODULE_NAME("nsload")
diff --git a/drivers/acpi/namespace/nsnames.c b/drivers/acpi/acpica/nsnames.c
index 42a39a7c96e9..ae3dc10a7e81 100644
--- a/drivers/acpi/namespace/nsnames.c
+++ b/drivers/acpi/acpica/nsnames.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/amlcode.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "amlcode.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_NAMESPACE 49#define _COMPONENT ACPI_NAMESPACE
49ACPI_MODULE_NAME("nsnames") 50ACPI_MODULE_NAME("nsnames")
diff --git a/drivers/acpi/namespace/nsobject.c b/drivers/acpi/acpica/nsobject.c
index 15fe09e24f71..08a97a57f8f9 100644
--- a/drivers/acpi/namespace/nsobject.c
+++ b/drivers/acpi/acpica/nsobject.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acnamesp.h> 46#include "accommon.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_NAMESPACE 49#define _COMPONENT ACPI_NAMESPACE
49ACPI_MODULE_NAME("nsobject") 50ACPI_MODULE_NAME("nsobject")
diff --git a/drivers/acpi/namespace/nsparse.c b/drivers/acpi/acpica/nsparse.c
index a82271a9dbb3..b9e8d0070b6f 100644
--- a/drivers/acpi/namespace/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -42,10 +42,11 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include <acpi/acparser.h> 46#include "acnamesp.h"
47#include <acpi/acdispat.h> 47#include "acparser.h"
48#include <acpi/actables.h> 48#include "acdispat.h"
49#include "actables.h"
49 50
50#define _COMPONENT ACPI_NAMESPACE 51#define _COMPONENT ACPI_NAMESPACE
51ACPI_MODULE_NAME("nsparse") 52ACPI_MODULE_NAME("nsparse")
diff --git a/drivers/acpi/namespace/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 0f17cf0898c9..452703290d35 100644
--- a/drivers/acpi/namespace/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -43,8 +43,9 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acnamesp.h> 46#include "accommon.h"
47#include <acpi/acpredef.h> 47#include "acnamesp.h"
48#include "acpredef.h"
48 49
49#define _COMPONENT ACPI_NAMESPACE 50#define _COMPONENT ACPI_NAMESPACE
50ACPI_MODULE_NAME("nspredef") 51ACPI_MODULE_NAME("nspredef")
@@ -72,7 +73,7 @@ ACPI_MODULE_NAME("nspredef")
72/* Local prototypes */ 73/* Local prototypes */
73static acpi_status 74static acpi_status
74acpi_ns_check_package(char *pathname, 75acpi_ns_check_package(char *pathname,
75 union acpi_operand_object *return_object, 76 union acpi_operand_object **return_object_ptr,
76 const union acpi_predefined_info *predefined); 77 const union acpi_predefined_info *predefined);
77 78
78static acpi_status 79static acpi_status
@@ -82,13 +83,18 @@ acpi_ns_check_package_elements(char *pathname,
82 83
83static acpi_status 84static acpi_status
84acpi_ns_check_object_type(char *pathname, 85acpi_ns_check_object_type(char *pathname,
85 union acpi_operand_object *return_object, 86 union acpi_operand_object **return_object_ptr,
86 u32 expected_btypes, u32 package_index); 87 u32 expected_btypes, u32 package_index);
87 88
88static acpi_status 89static acpi_status
89acpi_ns_check_reference(char *pathname, 90acpi_ns_check_reference(char *pathname,
90 union acpi_operand_object *return_object); 91 union acpi_operand_object *return_object);
91 92
93static acpi_status
94acpi_ns_repair_object(u32 expected_btypes,
95 u32 package_index,
96 union acpi_operand_object **return_object_ptr);
97
92/* 98/*
93 * Names for the types that can be returned by the predefined objects. 99 * Names for the types that can be returned by the predefined objects.
94 * Used for warning messages. Must be in the same order as the ACPI_RTYPEs 100 * Used for warning messages. Must be in the same order as the ACPI_RTYPEs
@@ -108,8 +114,8 @@ static const char *acpi_rtype_names[] = {
108 * FUNCTION: acpi_ns_check_predefined_names 114 * FUNCTION: acpi_ns_check_predefined_names
109 * 115 *
110 * PARAMETERS: Node - Namespace node for the method/object 116 * PARAMETERS: Node - Namespace node for the method/object
111 * return_object - Object returned from the evaluation of this 117 * return_object_ptr - Pointer to the object returned from the
112 * method/object 118 * evaluation of a method or object
113 * 119 *
114 * RETURN: Status 120 * RETURN: Status
115 * 121 *
@@ -119,8 +125,11 @@ static const char *acpi_rtype_names[] = {
119 125
120acpi_status 126acpi_status
121acpi_ns_check_predefined_names(struct acpi_namespace_node *node, 127acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
122 union acpi_operand_object *return_object) 128 u32 user_param_count,
129 acpi_status return_status,
130 union acpi_operand_object **return_object_ptr)
123{ 131{
132 union acpi_operand_object *return_object = *return_object_ptr;
124 acpi_status status = AE_OK; 133 acpi_status status = AE_OK;
125 const union acpi_predefined_info *predefined; 134 const union acpi_predefined_info *predefined;
126 char *pathname; 135 char *pathname;
@@ -128,12 +137,6 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
128 /* Match the name for this method/object against the predefined list */ 137 /* Match the name for this method/object against the predefined list */
129 138
130 predefined = acpi_ns_check_for_predefined_name(node); 139 predefined = acpi_ns_check_for_predefined_name(node);
131 if (!predefined) {
132
133 /* Name was not one of the predefined names */
134
135 return (AE_OK);
136 }
137 140
138 /* Get the full pathname to the object, for use in error messages */ 141 /* Get the full pathname to the object, for use in error messages */
139 142
@@ -143,10 +146,37 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
143 } 146 }
144 147
145 /* 148 /*
146 * Check that the parameter count for this method is in accordance 149 * Check that the parameter count for this method matches the ASL
147 * with the ACPI specification. 150 * definition. For predefined names, ensure that both the caller and
151 * the method itself are in accordance with the ACPI specification.
148 */ 152 */
149 acpi_ns_check_parameter_count(pathname, node, predefined); 153 acpi_ns_check_parameter_count(pathname, node, user_param_count,
154 predefined);
155
156 /* If not a predefined name, we cannot validate the return object */
157
158 if (!predefined) {
159 goto exit;
160 }
161
162 /* If the method failed, we cannot validate the return object */
163
164 if ((return_status != AE_OK) && (return_status != AE_CTRL_RETURN_VALUE)) {
165 goto exit;
166 }
167
168 /*
169 * Only validate the return value on the first successful evaluation of
170 * the method. This ensures that any warnings will only be emitted during
171 * the very first evaluation of the method/object.
172 */
173 if (node->flags & ANOBJ_EVALUATED) {
174 goto exit;
175 }
176
177 /* Mark the node as having been successfully evaluated */
178
179 node->flags |= ANOBJ_EVALUATED;
150 180
151 /* 181 /*
152 * If there is no return value, check if we require a return value for 182 * If there is no return value, check if we require a return value for
@@ -171,7 +201,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
171 * We have a return value, but if one wasn't expected, just exit, this is 201 * We have a return value, but if one wasn't expected, just exit, this is
172 * not a problem 202 * not a problem
173 * 203 *
174 * For example, if "Implicit return value" is enabled, methods will 204 * For example, if the "Implicit Return" feature is enabled, methods will
175 * always return a value 205 * always return a value
176 */ 206 */
177 if (!predefined->info.expected_btypes) { 207 if (!predefined->info.expected_btypes) {
@@ -182,7 +212,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
182 * Check that the type of the return object is what is expected for 212 * Check that the type of the return object is what is expected for
183 * this predefined name 213 * this predefined name
184 */ 214 */
185 status = acpi_ns_check_object_type(pathname, return_object, 215 status = acpi_ns_check_object_type(pathname, return_object_ptr,
186 predefined->info.expected_btypes, 216 predefined->info.expected_btypes,
187 ACPI_NOT_PACKAGE); 217 ACPI_NOT_PACKAGE);
188 if (ACPI_FAILURE(status)) { 218 if (ACPI_FAILURE(status)) {
@@ -193,11 +223,12 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
193 223
194 if (ACPI_GET_OBJECT_TYPE(return_object) == ACPI_TYPE_PACKAGE) { 224 if (ACPI_GET_OBJECT_TYPE(return_object) == ACPI_TYPE_PACKAGE) {
195 status = 225 status =
196 acpi_ns_check_package(pathname, return_object, predefined); 226 acpi_ns_check_package(pathname, return_object_ptr,
227 predefined);
197 } 228 }
198 229
199 exit: 230 exit:
200 if (pathname) { 231 if (pathname != predefined->info.name) {
201 ACPI_FREE(pathname); 232 ACPI_FREE(pathname);
202 } 233 }
203 234
@@ -210,6 +241,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
210 * 241 *
211 * PARAMETERS: Pathname - Full pathname to the node (for error msgs) 242 * PARAMETERS: Pathname - Full pathname to the node (for error msgs)
212 * Node - Namespace node for the method/object 243 * Node - Namespace node for the method/object
244 * user_param_count - Number of args passed in by the caller
213 * Predefined - Pointer to entry in predefined name table 245 * Predefined - Pointer to entry in predefined name table
214 * 246 *
215 * RETURN: None 247 * RETURN: None
@@ -223,32 +255,76 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
223void 255void
224acpi_ns_check_parameter_count(char *pathname, 256acpi_ns_check_parameter_count(char *pathname,
225 struct acpi_namespace_node *node, 257 struct acpi_namespace_node *node,
258 u32 user_param_count,
226 const union acpi_predefined_info *predefined) 259 const union acpi_predefined_info *predefined)
227{ 260{
228 u32 param_count; 261 u32 param_count;
229 u32 required_params_current; 262 u32 required_params_current;
230 u32 required_params_old; 263 u32 required_params_old;
231 264
232 /* 265 /* Methods have 0-7 parameters. All other types have zero. */
233 * Check that the ASL-defined parameter count is what is expected for 266
234 * this predefined name.
235 *
236 * Methods have 0-7 parameters. All other types have zero.
237 */
238 param_count = 0; 267 param_count = 0;
239 if (node->type == ACPI_TYPE_METHOD) { 268 if (node->type == ACPI_TYPE_METHOD) {
240 param_count = node->object->method.param_count; 269 param_count = node->object->method.param_count;
241 } 270 }
242 271
243 /* Validate parameter count - allow two different legal counts (_SCP) */ 272 /* Argument count check for non-predefined methods/objects */
273
274 if (!predefined) {
275 /*
276 * Warning if too few or too many arguments have been passed by the
277 * caller. An incorrect number of arguments may not cause the method
278 * to fail. However, the method will fail if there are too few
279 * arguments and the method attempts to use one of the missing ones.
280 */
281 if (user_param_count < param_count) {
282 ACPI_WARNING((AE_INFO,
283 "%s: Insufficient arguments - needs %d, found %d",
284 pathname, param_count, user_param_count));
285 } else if (user_param_count > param_count) {
286 ACPI_WARNING((AE_INFO,
287 "%s: Excess arguments - needs %d, found %d",
288 pathname, param_count, user_param_count));
289 }
290 return;
291 }
292
293 /* Allow two different legal argument counts (_SCP, etc.) */
244 294
245 required_params_current = predefined->info.param_count & 0x0F; 295 required_params_current = predefined->info.param_count & 0x0F;
246 required_params_old = predefined->info.param_count >> 4; 296 required_params_old = predefined->info.param_count >> 4;
247 297
298 if (user_param_count != ACPI_UINT32_MAX) {
299
300 /* Validate the user-supplied parameter count */
301
302 if ((user_param_count != required_params_current) &&
303 (user_param_count != required_params_old)) {
304 ACPI_WARNING((AE_INFO,
305 "%s: Parameter count mismatch - caller passed %d, ACPI requires %d",
306 pathname, user_param_count,
307 required_params_current));
308 }
309 }
310
311 /*
312 * Only validate the argument count on the first successful evaluation of
313 * the method. This ensures that any warnings will only be emitted during
314 * the very first evaluation of the method/object.
315 */
316 if (node->flags & ANOBJ_EVALUATED) {
317 return;
318 }
319
320 /*
321 * Check that the ASL-defined parameter count is what is expected for
322 * this predefined name.
323 */
248 if ((param_count != required_params_current) && 324 if ((param_count != required_params_current) &&
249 (param_count != required_params_old)) { 325 (param_count != required_params_old)) {
250 ACPI_WARNING((AE_INFO, 326 ACPI_WARNING((AE_INFO,
251 "%s: Parameter count mismatch - ASL declared %d, expected %d", 327 "%s: Parameter count mismatch - ASL declared %d, ACPI requires %d",
252 pathname, param_count, required_params_current)); 328 pathname, param_count, required_params_current));
253 } 329 }
254} 330}
@@ -307,8 +383,8 @@ const union acpi_predefined_info *acpi_ns_check_for_predefined_name(struct
307 * FUNCTION: acpi_ns_check_package 383 * FUNCTION: acpi_ns_check_package
308 * 384 *
309 * PARAMETERS: Pathname - Full pathname to the node (for error msgs) 385 * PARAMETERS: Pathname - Full pathname to the node (for error msgs)
310 * return_object - Object returned from the evaluation of a 386 * return_object_ptr - Pointer to the object returned from the
311 * method or object 387 * evaluation of a method or object
312 * Predefined - Pointer to entry in predefined name table 388 * Predefined - Pointer to entry in predefined name table
313 * 389 *
314 * RETURN: Status 390 * RETURN: Status
@@ -320,9 +396,10 @@ const union acpi_predefined_info *acpi_ns_check_for_predefined_name(struct
320 396
321static acpi_status 397static acpi_status
322acpi_ns_check_package(char *pathname, 398acpi_ns_check_package(char *pathname,
323 union acpi_operand_object *return_object, 399 union acpi_operand_object **return_object_ptr,
324 const union acpi_predefined_info *predefined) 400 const union acpi_predefined_info *predefined)
325{ 401{
402 union acpi_operand_object *return_object = *return_object_ptr;
326 const union acpi_predefined_info *package; 403 const union acpi_predefined_info *package;
327 union acpi_operand_object *sub_package; 404 union acpi_operand_object *sub_package;
328 union acpi_operand_object **elements; 405 union acpi_operand_object **elements;
@@ -408,7 +485,7 @@ acpi_ns_check_package(char *pathname,
408 * elements must be of the same type 485 * elements must be of the same type
409 */ 486 */
410 for (i = 0; i < count; i++) { 487 for (i = 0; i < count; i++) {
411 status = acpi_ns_check_object_type(pathname, *elements, 488 status = acpi_ns_check_object_type(pathname, elements,
412 package->ret_info. 489 package->ret_info.
413 object_type1, i); 490 object_type1, i);
414 if (ACPI_FAILURE(status)) { 491 if (ACPI_FAILURE(status)) {
@@ -441,7 +518,7 @@ acpi_ns_check_package(char *pathname,
441 518
442 status = 519 status =
443 acpi_ns_check_object_type(pathname, 520 acpi_ns_check_object_type(pathname,
444 *elements, 521 elements,
445 package-> 522 package->
446 ret_info3. 523 ret_info3.
447 object_type[i], 524 object_type[i],
@@ -454,7 +531,7 @@ acpi_ns_check_package(char *pathname,
454 531
455 status = 532 status =
456 acpi_ns_check_object_type(pathname, 533 acpi_ns_check_object_type(pathname,
457 *elements, 534 elements,
458 package-> 535 package->
459 ret_info3. 536 ret_info3.
460 tail_object_type, 537 tail_object_type,
@@ -471,7 +548,7 @@ acpi_ns_check_package(char *pathname,
471 548
472 /* First element is the (Integer) count of sub-packages to follow */ 549 /* First element is the (Integer) count of sub-packages to follow */
473 550
474 status = acpi_ns_check_object_type(pathname, *elements, 551 status = acpi_ns_check_object_type(pathname, elements,
475 ACPI_RTYPE_INTEGER, 0); 552 ACPI_RTYPE_INTEGER, 0);
476 if (ACPI_FAILURE(status)) { 553 if (ACPI_FAILURE(status)) {
477 return (status); 554 return (status);
@@ -509,7 +586,7 @@ acpi_ns_check_package(char *pathname,
509 /* Each sub-object must be of type Package */ 586 /* Each sub-object must be of type Package */
510 587
511 status = 588 status =
512 acpi_ns_check_object_type(pathname, sub_package, 589 acpi_ns_check_object_type(pathname, &sub_package,
513 ACPI_RTYPE_PACKAGE, i); 590 ACPI_RTYPE_PACKAGE, i);
514 if (ACPI_FAILURE(status)) { 591 if (ACPI_FAILURE(status)) {
515 return (status); 592 return (status);
@@ -567,12 +644,8 @@ acpi_ns_check_package(char *pathname,
567 for (j = 0; j < expected_count; j++) { 644 for (j = 0; j < expected_count; j++) {
568 status = 645 status =
569 acpi_ns_check_object_type(pathname, 646 acpi_ns_check_object_type(pathname,
570 sub_elements 647 &sub_elements[j],
571 [j], 648 package->ret_info2.object_type[j], j);
572 package->
573 ret_info2.
574 object_type
575 [j], j);
576 if (ACPI_FAILURE(status)) { 649 if (ACPI_FAILURE(status)) {
577 return (status); 650 return (status);
578 } 651 }
@@ -611,7 +684,7 @@ acpi_ns_check_package(char *pathname,
611 684
612 status = 685 status =
613 acpi_ns_check_object_type(pathname, 686 acpi_ns_check_object_type(pathname,
614 *sub_elements, 687 sub_elements,
615 ACPI_RTYPE_INTEGER, 688 ACPI_RTYPE_INTEGER,
616 0); 689 0);
617 if (ACPI_FAILURE(status)) { 690 if (ACPI_FAILURE(status)) {
@@ -708,7 +781,7 @@ acpi_ns_check_package_elements(char *pathname,
708 * The second group can have a count of zero. 781 * The second group can have a count of zero.
709 */ 782 */
710 for (i = 0; i < count1; i++) { 783 for (i = 0; i < count1; i++) {
711 status = acpi_ns_check_object_type(pathname, *this_element, 784 status = acpi_ns_check_object_type(pathname, this_element,
712 type1, i); 785 type1, i);
713 if (ACPI_FAILURE(status)) { 786 if (ACPI_FAILURE(status)) {
714 return (status); 787 return (status);
@@ -717,7 +790,7 @@ acpi_ns_check_package_elements(char *pathname,
717 } 790 }
718 791
719 for (i = 0; i < count2; i++) { 792 for (i = 0; i < count2; i++) {
720 status = acpi_ns_check_object_type(pathname, *this_element, 793 status = acpi_ns_check_object_type(pathname, this_element,
721 type2, (i + count1)); 794 type2, (i + count1));
722 if (ACPI_FAILURE(status)) { 795 if (ACPI_FAILURE(status)) {
723 return (status); 796 return (status);
@@ -733,8 +806,8 @@ acpi_ns_check_package_elements(char *pathname,
733 * FUNCTION: acpi_ns_check_object_type 806 * FUNCTION: acpi_ns_check_object_type
734 * 807 *
735 * PARAMETERS: Pathname - Full pathname to the node (for error msgs) 808 * PARAMETERS: Pathname - Full pathname to the node (for error msgs)
736 * return_object - Object return from the execution of this 809 * return_object_ptr - Pointer to the object returned from the
737 * method/object 810 * evaluation of a method or object
738 * expected_btypes - Bitmap of expected return type(s) 811 * expected_btypes - Bitmap of expected return type(s)
739 * package_index - Index of object within parent package (if 812 * package_index - Index of object within parent package (if
740 * applicable - ACPI_NOT_PACKAGE otherwise) 813 * applicable - ACPI_NOT_PACKAGE otherwise)
@@ -748,9 +821,10 @@ acpi_ns_check_package_elements(char *pathname,
748 821
749static acpi_status 822static acpi_status
750acpi_ns_check_object_type(char *pathname, 823acpi_ns_check_object_type(char *pathname,
751 union acpi_operand_object *return_object, 824 union acpi_operand_object **return_object_ptr,
752 u32 expected_btypes, u32 package_index) 825 u32 expected_btypes, u32 package_index)
753{ 826{
827 union acpi_operand_object *return_object = *return_object_ptr;
754 acpi_status status = AE_OK; 828 acpi_status status = AE_OK;
755 u32 return_btype; 829 u32 return_btype;
756 char type_buffer[48]; /* Room for 5 types */ 830 char type_buffer[48]; /* Room for 5 types */
@@ -814,6 +888,14 @@ acpi_ns_check_object_type(char *pathname,
814 /* Is the object one of the expected types? */ 888 /* Is the object one of the expected types? */
815 889
816 if (!(return_btype & expected_btypes)) { 890 if (!(return_btype & expected_btypes)) {
891
892 /* Type mismatch -- attempt repair of the returned object */
893
894 status = acpi_ns_repair_object(expected_btypes, package_index,
895 return_object_ptr);
896 if (ACPI_SUCCESS(status)) {
897 return (status);
898 }
817 goto type_error_exit; 899 goto type_error_exit;
818 } 900 }
819 901
@@ -898,3 +980,86 @@ acpi_ns_check_reference(char *pathname,
898 980
899 return (AE_AML_OPERAND_TYPE); 981 return (AE_AML_OPERAND_TYPE);
900} 982}
983
984/*******************************************************************************
985 *
986 * FUNCTION: acpi_ns_repair_object
987 *
988 * PARAMETERS: Pathname - Full pathname to the node (for error msgs)
989 * package_index - Used to determine if target is in a package
990 * return_object_ptr - Pointer to the object returned from the
991 * evaluation of a method or object
992 *
993 * RETURN: Status. AE_OK if repair was successful.
994 *
995 * DESCRIPTION: Attempt to repair/convert a return object of a type that was
996 * not expected.
997 *
998 ******************************************************************************/
999
1000static acpi_status
1001acpi_ns_repair_object(u32 expected_btypes,
1002 u32 package_index,
1003 union acpi_operand_object **return_object_ptr)
1004{
1005 union acpi_operand_object *return_object = *return_object_ptr;
1006 union acpi_operand_object *new_object;
1007 acpi_size length;
1008
1009 switch (ACPI_GET_OBJECT_TYPE(return_object)) {
1010 case ACPI_TYPE_BUFFER:
1011
1012 if (!(expected_btypes & ACPI_RTYPE_STRING)) {
1013 return (AE_AML_OPERAND_TYPE);
1014 }
1015
1016 /*
1017 * Have a Buffer, expected a String, convert. Use a to_string
1018 * conversion, no transform performed on the buffer data. The best
1019 * example of this is the _BIF method, where the string data from
1020 * the battery is often (incorrectly) returned as buffer object(s).
1021 */
1022 length = 0;
1023 while ((length < return_object->buffer.length) &&
1024 (return_object->buffer.pointer[length])) {
1025 length++;
1026 }
1027
1028 /* Allocate a new string object */
1029
1030 new_object = acpi_ut_create_string_object(length);
1031 if (!new_object) {
1032 return (AE_NO_MEMORY);
1033 }
1034
1035 /*
1036 * Copy the raw buffer data with no transform. String is already NULL
1037 * terminated at Length+1.
1038 */
1039 ACPI_MEMCPY(new_object->string.pointer,
1040 return_object->buffer.pointer, length);
1041
1042 /* Install the new return object */
1043
1044 acpi_ut_remove_reference(return_object);
1045 *return_object_ptr = new_object;
1046
1047 /*
1048 * If the object is a package element, we need to:
1049 * 1. Decrement the reference count of the orignal object, it was
1050 * incremented when building the package
1051 * 2. Increment the reference count of the new object, it will be
1052 * decremented when releasing the package
1053 */
1054 if (package_index != ACPI_NOT_PACKAGE) {
1055 acpi_ut_remove_reference(return_object);
1056 acpi_ut_add_reference(new_object);
1057 }
1058 return (AE_OK);
1059
1060 default:
1061 break;
1062 }
1063
1064 return (AE_AML_OPERAND_TYPE);
1065}
diff --git a/drivers/acpi/namespace/nssearch.c b/drivers/acpi/acpica/nssearch.c
index a9a80bf811b3..6fea13f3f52d 100644
--- a/drivers/acpi/namespace/nssearch.c
+++ b/drivers/acpi/acpica/nssearch.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include "acnamesp.h"
46 47
47#define _COMPONENT ACPI_NAMESPACE 48#define _COMPONENT ACPI_NAMESPACE
48ACPI_MODULE_NAME("nssearch") 49ACPI_MODULE_NAME("nssearch")
diff --git a/drivers/acpi/namespace/nsutils.c b/drivers/acpi/acpica/nsutils.c
index b0817e1127b1..3e1149bf4aa5 100644
--- a/drivers/acpi/namespace/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -43,9 +43,10 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acnamesp.h> 46#include "accommon.h"
47#include <acpi/amlcode.h> 47#include "acnamesp.h"
48#include <acpi/actables.h> 48#include "amlcode.h"
49#include "actables.h"
49 50
50#define _COMPONENT ACPI_NAMESPACE 51#define _COMPONENT ACPI_NAMESPACE
51ACPI_MODULE_NAME("nsutils") 52ACPI_MODULE_NAME("nsutils")
@@ -314,9 +315,15 @@ void acpi_ns_get_internal_name_length(struct acpi_namestring_info *info)
314 * 315 *
315 * strlen() + 1 covers the first name_seg, which has no path separator 316 * strlen() + 1 covers the first name_seg, which has no path separator
316 */ 317 */
317 if (acpi_ns_valid_root_prefix(next_external_char[0])) { 318 if (acpi_ns_valid_root_prefix(*next_external_char)) {
318 info->fully_qualified = TRUE; 319 info->fully_qualified = TRUE;
319 next_external_char++; 320 next_external_char++;
321
322 /* Skip redundant root_prefix, like \\_SB.PCI0.SBRG.EC0 */
323
324 while (acpi_ns_valid_root_prefix(*next_external_char)) {
325 next_external_char++;
326 }
320 } else { 327 } else {
321 /* 328 /*
322 * Handle Carat prefixes 329 * Handle Carat prefixes
diff --git a/drivers/acpi/namespace/nswalk.c b/drivers/acpi/acpica/nswalk.c
index 3c905ce26d7d..200895fa2728 100644
--- a/drivers/acpi/namespace/nswalk.c
+++ b/drivers/acpi/acpica/nswalk.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include "acnamesp.h"
46 47
47#define _COMPONENT ACPI_NAMESPACE 48#define _COMPONENT ACPI_NAMESPACE
48ACPI_MODULE_NAME("nswalk") 49ACPI_MODULE_NAME("nswalk")
diff --git a/drivers/acpi/namespace/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c
index a085cc39c055..22a7171ac1ed 100644
--- a/drivers/acpi/namespace/nsxfeval.c
+++ b/drivers/acpi/acpica/nsxfeval.c
@@ -43,8 +43,9 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acnamesp.h> 46#include "accommon.h"
47#include <acpi/acinterp.h> 47#include "acnamesp.h"
48#include "acinterp.h"
48 49
49#define _COMPONENT ACPI_NAMESPACE 50#define _COMPONENT ACPI_NAMESPACE
50ACPI_MODULE_NAME("nsxfeval") 51ACPI_MODULE_NAME("nsxfeval")
diff --git a/drivers/acpi/namespace/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index 5efa4e7ddb0b..9589fea24997 100644
--- a/drivers/acpi/namespace/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acnamesp.h> 46#include "accommon.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_NAMESPACE 49#define _COMPONENT ACPI_NAMESPACE
49ACPI_MODULE_NAME("nsxfname") 50ACPI_MODULE_NAME("nsxfname")
diff --git a/drivers/acpi/namespace/nsxfobj.c b/drivers/acpi/acpica/nsxfobj.c
index 2b375ee80cef..1c7efc15225f 100644
--- a/drivers/acpi/namespace/nsxfobj.c
+++ b/drivers/acpi/acpica/nsxfobj.c
@@ -43,7 +43,8 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acnamesp.h> 46#include "accommon.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_NAMESPACE 49#define _COMPONENT ACPI_NAMESPACE
49ACPI_MODULE_NAME("nsxfobj") 50ACPI_MODULE_NAME("nsxfobj")
diff --git a/drivers/acpi/parser/psargs.c b/drivers/acpi/acpica/psargs.c
index d830b29b85b1..b161f3544b51 100644
--- a/drivers/acpi/parser/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -42,10 +42,11 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acparser.h"
47#include <acpi/acnamesp.h> 47#include "amlcode.h"
48#include <acpi/acdispat.h> 48#include "acnamesp.h"
49#include "acdispat.h"
49 50
50#define _COMPONENT ACPI_PARSER 51#define _COMPONENT ACPI_PARSER
51ACPI_MODULE_NAME("psargs") 52ACPI_MODULE_NAME("psargs")
diff --git a/drivers/acpi/parser/psloop.c b/drivers/acpi/acpica/psloop.c
index 4647039a0d8a..c5f6ce19a401 100644
--- a/drivers/acpi/parser/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -50,9 +50,10 @@
50 */ 50 */
51 51
52#include <acpi/acpi.h> 52#include <acpi/acpi.h>
53#include <acpi/acparser.h> 53#include "accommon.h"
54#include <acpi/acdispat.h> 54#include "acparser.h"
55#include <acpi/amlcode.h> 55#include "acdispat.h"
56#include "amlcode.h"
56 57
57#define _COMPONENT ACPI_PARSER 58#define _COMPONENT ACPI_PARSER
58ACPI_MODULE_NAME("psloop") 59ACPI_MODULE_NAME("psloop")
diff --git a/drivers/acpi/parser/psopcode.c b/drivers/acpi/acpica/psopcode.c
index f425ab30eae8..3bc3a60194d6 100644
--- a/drivers/acpi/parser/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/acopcode.h> 46#include "acparser.h"
47#include <acpi/amlcode.h> 47#include "acopcode.h"
48#include "amlcode.h"
48 49
49#define _COMPONENT ACPI_PARSER 50#define _COMPONENT ACPI_PARSER
50ACPI_MODULE_NAME("psopcode") 51ACPI_MODULE_NAME("psopcode")
diff --git a/drivers/acpi/parser/psparse.c b/drivers/acpi/acpica/psparse.c
index 68e932f215ea..70838e9b608c 100644
--- a/drivers/acpi/parser/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -51,11 +51,12 @@
51 */ 51 */
52 52
53#include <acpi/acpi.h> 53#include <acpi/acpi.h>
54#include <acpi/acparser.h> 54#include "accommon.h"
55#include <acpi/acdispat.h> 55#include "acparser.h"
56#include <acpi/amlcode.h> 56#include "acdispat.h"
57#include <acpi/acnamesp.h> 57#include "amlcode.h"
58#include <acpi/acinterp.h> 58#include "acnamesp.h"
59#include "acinterp.h"
59 60
60#define _COMPONENT ACPI_PARSER 61#define _COMPONENT ACPI_PARSER
61ACPI_MODULE_NAME("psparse") 62ACPI_MODULE_NAME("psparse")
@@ -447,10 +448,22 @@ acpi_status acpi_ps_parse_aml(struct acpi_walk_state *walk_state)
447 walk_state, walk_state->parser_state.aml, 448 walk_state, walk_state->parser_state.aml,
448 walk_state->parser_state.aml_size)); 449 walk_state->parser_state.aml_size));
449 450
451 if (!walk_state->parser_state.aml) {
452 return_ACPI_STATUS(AE_NULL_OBJECT);
453 }
454
450 /* Create and initialize a new thread state */ 455 /* Create and initialize a new thread state */
451 456
452 thread = acpi_ut_create_thread_state(); 457 thread = acpi_ut_create_thread_state();
453 if (!thread) { 458 if (!thread) {
459 if (walk_state->method_desc) {
460
461 /* Executing a control method - additional cleanup */
462
463 acpi_ds_terminate_control_method(
464 walk_state->method_desc, walk_state);
465 }
466
454 acpi_ds_delete_walk_state(walk_state); 467 acpi_ds_delete_walk_state(walk_state);
455 return_ACPI_STATUS(AE_NO_MEMORY); 468 return_ACPI_STATUS(AE_NO_MEMORY);
456 } 469 }
diff --git a/drivers/acpi/parser/psscope.c b/drivers/acpi/acpica/psscope.c
index ee50e67c9443..2feca5ca9581 100644
--- a/drivers/acpi/parser/psscope.c
+++ b/drivers/acpi/acpica/psscope.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include "acparser.h"
46 47
47#define _COMPONENT ACPI_PARSER 48#define _COMPONENT ACPI_PARSER
48ACPI_MODULE_NAME("psscope") 49ACPI_MODULE_NAME("psscope")
diff --git a/drivers/acpi/parser/pstree.c b/drivers/acpi/acpica/pstree.c
index 1dd355ddd182..4d3389118ec3 100644
--- a/drivers/acpi/parser/pstree.c
+++ b/drivers/acpi/acpica/pstree.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acparser.h"
47#include "amlcode.h"
47 48
48#define _COMPONENT ACPI_PARSER 49#define _COMPONENT ACPI_PARSER
49ACPI_MODULE_NAME("pstree") 50ACPI_MODULE_NAME("pstree")
diff --git a/drivers/acpi/parser/psutils.c b/drivers/acpi/acpica/psutils.c
index 7cf1f65cd5bb..e636e078ad3d 100644
--- a/drivers/acpi/parser/psutils.c
+++ b/drivers/acpi/acpica/psutils.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/amlcode.h> 46#include "acparser.h"
47#include "amlcode.h"
47 48
48#define _COMPONENT ACPI_PARSER 49#define _COMPONENT ACPI_PARSER
49ACPI_MODULE_NAME("psutils") 50ACPI_MODULE_NAME("psutils")
diff --git a/drivers/acpi/parser/pswalk.c b/drivers/acpi/acpica/pswalk.c
index 8b86ad5a3201..78b8b791f2ae 100644
--- a/drivers/acpi/parser/pswalk.c
+++ b/drivers/acpi/acpica/pswalk.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include "acparser.h"
46 47
47#define _COMPONENT ACPI_PARSER 48#define _COMPONENT ACPI_PARSER
48ACPI_MODULE_NAME("pswalk") 49ACPI_MODULE_NAME("pswalk")
diff --git a/drivers/acpi/parser/psxface.c b/drivers/acpi/acpica/psxface.c
index 270469aae842..ff06032c0f06 100644
--- a/drivers/acpi/parser/psxface.c
+++ b/drivers/acpi/acpica/psxface.c
@@ -42,9 +42,11 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acparser.h> 45#include "accommon.h"
46#include <acpi/acdispat.h> 46#include "acparser.h"
47#include <acpi/acinterp.h> 47#include "acdispat.h"
48#include "acinterp.h"
49#include "amlcode.h"
48 50
49#define _COMPONENT ACPI_PARSER 51#define _COMPONENT ACPI_PARSER
50ACPI_MODULE_NAME("psxface") 52ACPI_MODULE_NAME("psxface")
@@ -278,6 +280,38 @@ acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info)
278 goto cleanup; 280 goto cleanup;
279 } 281 }
280 282
283 /* Invoke an internal method if necessary */
284
285 if (info->obj_desc->method.method_flags & AML_METHOD_INTERNAL_ONLY) {
286 status = info->obj_desc->method.implementation(walk_state);
287 info->return_object = walk_state->return_desc;
288
289 /* Cleanup states */
290
291 acpi_ds_scope_stack_clear(walk_state);
292 acpi_ps_cleanup_scope(&walk_state->parser_state);
293 acpi_ds_terminate_control_method(walk_state->method_desc,
294 walk_state);
295 acpi_ds_delete_walk_state(walk_state);
296 goto cleanup;
297 }
298
299 /*
300 * Start method evaluation with an implicit return of zero.
301 * This is done for Windows compatibility.
302 */
303 if (acpi_gbl_enable_interpreter_slack) {
304 walk_state->implicit_return_obj =
305 acpi_ut_create_internal_object(ACPI_TYPE_INTEGER);
306 if (!walk_state->implicit_return_obj) {
307 status = AE_NO_MEMORY;
308 acpi_ds_delete_walk_state(walk_state);
309 goto cleanup;
310 }
311
312 walk_state->implicit_return_obj->integer.value = 0;
313 }
314
281 /* Parse the AML */ 315 /* Parse the AML */
282 316
283 status = acpi_ps_parse_aml(walk_state); 317 status = acpi_ps_parse_aml(walk_state);
diff --git a/drivers/acpi/resources/rsaddr.c b/drivers/acpi/acpica/rsaddr.c
index 7f96332822bf..1e437bfd8db5 100644
--- a/drivers/acpi/resources/rsaddr.c
+++ b/drivers/acpi/acpica/rsaddr.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include "acresrc.h"
46 47
47#define _COMPONENT ACPI_RESOURCES 48#define _COMPONENT ACPI_RESOURCES
48ACPI_MODULE_NAME("rsaddr") 49ACPI_MODULE_NAME("rsaddr")
diff --git a/drivers/acpi/resources/rscalc.c b/drivers/acpi/acpica/rscalc.c
index 8eaaecf92009..52865ee6bc77 100644
--- a/drivers/acpi/resources/rscalc.c
+++ b/drivers/acpi/acpica/rscalc.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acresrc.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_RESOURCES 49#define _COMPONENT ACPI_RESOURCES
49ACPI_MODULE_NAME("rscalc") 50ACPI_MODULE_NAME("rscalc")
diff --git a/drivers/acpi/resources/rscreate.c b/drivers/acpi/acpica/rscreate.c
index 08b8d73e6ee5..61566b1a0616 100644
--- a/drivers/acpi/resources/rscreate.c
+++ b/drivers/acpi/acpica/rscreate.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acresrc.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_RESOURCES 49#define _COMPONENT ACPI_RESOURCES
49ACPI_MODULE_NAME("rscreate") 50ACPI_MODULE_NAME("rscreate")
diff --git a/drivers/acpi/resources/rsdump.c b/drivers/acpi/acpica/rsdump.c
index 6bbbb7b8941a..3f0ca5a12d34 100644
--- a/drivers/acpi/resources/rsdump.c
+++ b/drivers/acpi/acpica/rsdump.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include "acresrc.h"
46 47
47#define _COMPONENT ACPI_RESOURCES 48#define _COMPONENT ACPI_RESOURCES
48ACPI_MODULE_NAME("rsdump") 49ACPI_MODULE_NAME("rsdump")
diff --git a/drivers/acpi/resources/rsinfo.c b/drivers/acpi/acpica/rsinfo.c
index 3f0a1fedbe0e..77b25fdb459c 100644
--- a/drivers/acpi/resources/rsinfo.c
+++ b/drivers/acpi/acpica/rsinfo.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include "acresrc.h"
46 47
47#define _COMPONENT ACPI_RESOURCES 48#define _COMPONENT ACPI_RESOURCES
48ACPI_MODULE_NAME("rsinfo") 49ACPI_MODULE_NAME("rsinfo")
diff --git a/drivers/acpi/resources/rsio.c b/drivers/acpi/acpica/rsio.c
index b66d42e7402e..35a49aa95609 100644
--- a/drivers/acpi/resources/rsio.c
+++ b/drivers/acpi/acpica/rsio.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include "acresrc.h"
46 47
47#define _COMPONENT ACPI_RESOURCES 48#define _COMPONENT ACPI_RESOURCES
48ACPI_MODULE_NAME("rsio") 49ACPI_MODULE_NAME("rsio")
diff --git a/drivers/acpi/resources/rsirq.c b/drivers/acpi/acpica/rsirq.c
index a8805efc0366..2e0256983aa6 100644
--- a/drivers/acpi/resources/rsirq.c
+++ b/drivers/acpi/acpica/rsirq.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include "acresrc.h"
46 47
47#define _COMPONENT ACPI_RESOURCES 48#define _COMPONENT ACPI_RESOURCES
48ACPI_MODULE_NAME("rsirq") 49ACPI_MODULE_NAME("rsirq")
diff --git a/drivers/acpi/resources/rslist.c b/drivers/acpi/acpica/rslist.c
index b78c7e797a19..1b1dbc69f087 100644
--- a/drivers/acpi/resources/rslist.c
+++ b/drivers/acpi/acpica/rslist.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include "acresrc.h"
46 47
47#define _COMPONENT ACPI_RESOURCES 48#define _COMPONENT ACPI_RESOURCES
48ACPI_MODULE_NAME("rslist") 49ACPI_MODULE_NAME("rslist")
diff --git a/drivers/acpi/resources/rsmemory.c b/drivers/acpi/acpica/rsmemory.c
index 63b21abd90bb..ddc76cebdc92 100644
--- a/drivers/acpi/resources/rsmemory.c
+++ b/drivers/acpi/acpica/rsmemory.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include "acresrc.h"
46 47
47#define _COMPONENT ACPI_RESOURCES 48#define _COMPONENT ACPI_RESOURCES
48ACPI_MODULE_NAME("rsmemory") 49ACPI_MODULE_NAME("rsmemory")
diff --git a/drivers/acpi/resources/rsmisc.c b/drivers/acpi/acpica/rsmisc.c
index 96a6c0353255..5bc49a553284 100644
--- a/drivers/acpi/resources/rsmisc.c
+++ b/drivers/acpi/acpica/rsmisc.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include "acresrc.h"
46 47
47#define _COMPONENT ACPI_RESOURCES 48#define _COMPONENT ACPI_RESOURCES
48ACPI_MODULE_NAME("rsmisc") 49ACPI_MODULE_NAME("rsmisc")
diff --git a/drivers/acpi/resources/rsutils.c b/drivers/acpi/acpica/rsutils.c
index f7b3bcd59ba7..bc03d5966829 100644
--- a/drivers/acpi/resources/rsutils.c
+++ b/drivers/acpi/acpica/rsutils.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include <acpi/acresrc.h> 46#include "acnamesp.h"
47#include "acresrc.h"
47 48
48#define _COMPONENT ACPI_RESOURCES 49#define _COMPONENT ACPI_RESOURCES
49ACPI_MODULE_NAME("rsutils") 50ACPI_MODULE_NAME("rsutils")
diff --git a/drivers/acpi/resources/rsxface.c b/drivers/acpi/acpica/rsxface.c
index f59f4c4e034c..69a2aa5b5d83 100644
--- a/drivers/acpi/resources/rsxface.c
+++ b/drivers/acpi/acpica/rsxface.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acresrc.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acresrc.h"
47#include "acnamesp.h"
47 48
48#define _COMPONENT ACPI_RESOURCES 49#define _COMPONENT ACPI_RESOURCES
49ACPI_MODULE_NAME("rsxface") 50ACPI_MODULE_NAME("rsxface")
diff --git a/drivers/acpi/tables/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 2817158fb6a1..3636e4f8fb73 100644
--- a/drivers/acpi/tables/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -42,15 +42,16 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/actables.h> 45#include "accommon.h"
46#include "actables.h"
46 47
47#define _COMPONENT ACPI_TABLES 48#define _COMPONENT ACPI_TABLES
48ACPI_MODULE_NAME("tbfadt") 49ACPI_MODULE_NAME("tbfadt")
49 50
50/* Local prototypes */ 51/* Local prototypes */
51static void inline 52static inline void
52acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, 53acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
53 u8 byte_width, u64 address); 54 u8 space_id, u8 byte_width, u64 address);
54 55
55static void acpi_tb_convert_fadt(void); 56static void acpi_tb_convert_fadt(void);
56 57
@@ -60,9 +61,10 @@ static void acpi_tb_validate_fadt(void);
60 61
61typedef struct acpi_fadt_info { 62typedef struct acpi_fadt_info {
62 char *name; 63 char *name;
63 u8 target; 64 u8 address64;
64 u8 source; 65 u8 address32;
65 u8 length; 66 u8 length;
67 u8 default_length;
66 u8 type; 68 u8 type;
67 69
68} acpi_fadt_info; 70} acpi_fadt_info;
@@ -71,37 +73,61 @@ typedef struct acpi_fadt_info {
71#define ACPI_FADT_SEPARATE_LENGTH 2 73#define ACPI_FADT_SEPARATE_LENGTH 2
72 74
73static struct acpi_fadt_info fadt_info_table[] = { 75static struct acpi_fadt_info fadt_info_table[] = {
74 {"Pm1aEventBlock", ACPI_FADT_OFFSET(xpm1a_event_block), 76 {"Pm1aEventBlock",
77 ACPI_FADT_OFFSET(xpm1a_event_block),
75 ACPI_FADT_OFFSET(pm1a_event_block), 78 ACPI_FADT_OFFSET(pm1a_event_block),
76 ACPI_FADT_OFFSET(pm1_event_length), ACPI_FADT_REQUIRED}, 79 ACPI_FADT_OFFSET(pm1_event_length),
80 ACPI_PM1_REGISTER_WIDTH * 2, /* Enable + Status register */
81 ACPI_FADT_REQUIRED},
77 82
78 {"Pm1bEventBlock", ACPI_FADT_OFFSET(xpm1b_event_block), 83 {"Pm1bEventBlock",
84 ACPI_FADT_OFFSET(xpm1b_event_block),
79 ACPI_FADT_OFFSET(pm1b_event_block), 85 ACPI_FADT_OFFSET(pm1b_event_block),
80 ACPI_FADT_OFFSET(pm1_event_length), 0}, 86 ACPI_FADT_OFFSET(pm1_event_length),
87 ACPI_PM1_REGISTER_WIDTH * 2, /* Enable + Status register */
88 0},
81 89
82 {"Pm1aControlBlock", ACPI_FADT_OFFSET(xpm1a_control_block), 90 {"Pm1aControlBlock",
91 ACPI_FADT_OFFSET(xpm1a_control_block),
83 ACPI_FADT_OFFSET(pm1a_control_block), 92 ACPI_FADT_OFFSET(pm1a_control_block),
84 ACPI_FADT_OFFSET(pm1_control_length), ACPI_FADT_REQUIRED}, 93 ACPI_FADT_OFFSET(pm1_control_length),
94 ACPI_PM1_REGISTER_WIDTH,
95 ACPI_FADT_REQUIRED},
85 96
86 {"Pm1bControlBlock", ACPI_FADT_OFFSET(xpm1b_control_block), 97 {"Pm1bControlBlock",
98 ACPI_FADT_OFFSET(xpm1b_control_block),
87 ACPI_FADT_OFFSET(pm1b_control_block), 99 ACPI_FADT_OFFSET(pm1b_control_block),
88 ACPI_FADT_OFFSET(pm1_control_length), 0}, 100 ACPI_FADT_OFFSET(pm1_control_length),
101 ACPI_PM1_REGISTER_WIDTH,
102 0},
89 103
90 {"Pm2ControlBlock", ACPI_FADT_OFFSET(xpm2_control_block), 104 {"Pm2ControlBlock",
105 ACPI_FADT_OFFSET(xpm2_control_block),
91 ACPI_FADT_OFFSET(pm2_control_block), 106 ACPI_FADT_OFFSET(pm2_control_block),
92 ACPI_FADT_OFFSET(pm2_control_length), ACPI_FADT_SEPARATE_LENGTH}, 107 ACPI_FADT_OFFSET(pm2_control_length),
108 ACPI_PM2_REGISTER_WIDTH,
109 ACPI_FADT_SEPARATE_LENGTH},
93 110
94 {"PmTimerBlock", ACPI_FADT_OFFSET(xpm_timer_block), 111 {"PmTimerBlock",
112 ACPI_FADT_OFFSET(xpm_timer_block),
95 ACPI_FADT_OFFSET(pm_timer_block), 113 ACPI_FADT_OFFSET(pm_timer_block),
96 ACPI_FADT_OFFSET(pm_timer_length), ACPI_FADT_REQUIRED}, 114 ACPI_FADT_OFFSET(pm_timer_length),
115 ACPI_PM_TIMER_WIDTH,
116 ACPI_FADT_REQUIRED},
97 117
98 {"Gpe0Block", ACPI_FADT_OFFSET(xgpe0_block), 118 {"Gpe0Block",
119 ACPI_FADT_OFFSET(xgpe0_block),
99 ACPI_FADT_OFFSET(gpe0_block), 120 ACPI_FADT_OFFSET(gpe0_block),
100 ACPI_FADT_OFFSET(gpe0_block_length), ACPI_FADT_SEPARATE_LENGTH}, 121 ACPI_FADT_OFFSET(gpe0_block_length),
122 0,
123 ACPI_FADT_SEPARATE_LENGTH},
101 124
102 {"Gpe1Block", ACPI_FADT_OFFSET(xgpe1_block), 125 {"Gpe1Block",
126 ACPI_FADT_OFFSET(xgpe1_block),
103 ACPI_FADT_OFFSET(gpe1_block), 127 ACPI_FADT_OFFSET(gpe1_block),
104 ACPI_FADT_OFFSET(gpe1_block_length), ACPI_FADT_SEPARATE_LENGTH} 128 ACPI_FADT_OFFSET(gpe1_block_length),
129 0,
130 ACPI_FADT_SEPARATE_LENGTH}
105}; 131};
106 132
107#define ACPI_FADT_INFO_ENTRIES (sizeof (fadt_info_table) / sizeof (struct acpi_fadt_info)) 133#define ACPI_FADT_INFO_ENTRIES (sizeof (fadt_info_table) / sizeof (struct acpi_fadt_info))
@@ -122,9 +148,9 @@ static struct acpi_fadt_info fadt_info_table[] = {
122 * 148 *
123 ******************************************************************************/ 149 ******************************************************************************/
124 150
125static void inline 151static inline void
126acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, 152acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
127 u8 byte_width, u64 address) 153 u8 space_id, u8 byte_width, u64 address)
128{ 154{
129 155
130 /* 156 /*
@@ -135,10 +161,10 @@ acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
135 161
136 /* All other fields are byte-wide */ 162 /* All other fields are byte-wide */
137 163
138 generic_address->space_id = ACPI_ADR_SPACE_SYSTEM_IO; 164 generic_address->space_id = space_id;
139 generic_address->bit_width = byte_width << 3; 165 generic_address->bit_width = (u8)ACPI_MUL_8(byte_width);
140 generic_address->bit_offset = 0; 166 generic_address->bit_offset = 0;
141 generic_address->access_width = 0; 167 generic_address->access_width = 0; /* Access width ANY */
142} 168}
143 169
144/******************************************************************************* 170/*******************************************************************************
@@ -225,7 +251,8 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length)
225 */ 251 */
226 if (length > sizeof(struct acpi_table_fadt)) { 252 if (length > sizeof(struct acpi_table_fadt)) {
227 ACPI_WARNING((AE_INFO, 253 ACPI_WARNING((AE_INFO,
228 "FADT (revision %u) is longer than ACPI 2.0 version, truncating length 0x%X to 0x%zX", 254 "FADT (revision %u) is longer than ACPI 2.0 version, "
255 "truncating length 0x%X to 0x%zX",
229 table->revision, (unsigned)length, 256 table->revision, (unsigned)length,
230 sizeof(struct acpi_table_fadt))); 257 sizeof(struct acpi_table_fadt)));
231 } 258 }
@@ -244,7 +271,6 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length)
244 * 2) Validate some of the important values within the FADT 271 * 2) Validate some of the important values within the FADT
245 */ 272 */
246 acpi_tb_convert_fadt(); 273 acpi_tb_convert_fadt();
247 acpi_tb_validate_fadt();
248} 274}
249 275
250/******************************************************************************* 276/*******************************************************************************
@@ -278,22 +304,36 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length)
278 304
279static void acpi_tb_convert_fadt(void) 305static void acpi_tb_convert_fadt(void)
280{ 306{
281 u8 pm1_register_length; 307 u8 pm1_register_bit_width;
282 struct acpi_generic_address *target; 308 u8 pm1_register_byte_width;
309 struct acpi_generic_address *target64;
283 u32 i; 310 u32 i;
284 311
285 /* Update the local FADT table header length */ 312 /* Update the local FADT table header length */
286 313
287 acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt); 314 acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt);
288 315
289 /* Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary */ 316 /*
290 317 * Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary.
318 * Later code will always use the X 64-bit field. Also, check for an
319 * address mismatch between the 32-bit and 64-bit address fields
320 * (FIRMWARE_CTRL/X_FIRMWARE_CTRL, DSDT/X_DSDT) which would indicate
321 * the presence of two FACS or two DSDT tables.
322 */
291 if (!acpi_gbl_FADT.Xfacs) { 323 if (!acpi_gbl_FADT.Xfacs) {
292 acpi_gbl_FADT.Xfacs = (u64) acpi_gbl_FADT.facs; 324 acpi_gbl_FADT.Xfacs = (u64) acpi_gbl_FADT.facs;
325 } else if (acpi_gbl_FADT.facs &&
326 (acpi_gbl_FADT.Xfacs != (u64) acpi_gbl_FADT.facs)) {
327 ACPI_WARNING((AE_INFO,
328 "32/64 FACS address mismatch in FADT - two FACS tables!"));
293 } 329 }
294 330
295 if (!acpi_gbl_FADT.Xdsdt) { 331 if (!acpi_gbl_FADT.Xdsdt) {
296 acpi_gbl_FADT.Xdsdt = (u64) acpi_gbl_FADT.dsdt; 332 acpi_gbl_FADT.Xdsdt = (u64) acpi_gbl_FADT.dsdt;
333 } else if (acpi_gbl_FADT.dsdt &&
334 (acpi_gbl_FADT.Xdsdt != (u64) acpi_gbl_FADT.dsdt)) {
335 ACPI_WARNING((AE_INFO,
336 "32/64 DSDT address mismatch in FADT - two DSDT tables!"));
297 } 337 }
298 338
299 /* 339 /*
@@ -312,18 +352,23 @@ static void acpi_tb_convert_fadt(void)
312 } 352 }
313 353
314 /* 354 /*
315 * Expand the ACPI 1.0 32-bit V1.0 addresses to the ACPI 2.0 64-bit "X" 355 * Expand the ACPI 1.0 32-bit addresses to the ACPI 2.0 64-bit "X"
316 * generic address structures as necessary. 356 * generic address structures as necessary. Later code will always use
357 * the 64-bit address structures.
317 */ 358 */
318 for (i = 0; i < ACPI_FADT_INFO_ENTRIES; i++) { 359 for (i = 0; i < ACPI_FADT_INFO_ENTRIES; i++) {
319 target = 360 target64 =
320 ACPI_ADD_PTR(struct acpi_generic_address, &acpi_gbl_FADT, 361 ACPI_ADD_PTR(struct acpi_generic_address, &acpi_gbl_FADT,
321 fadt_info_table[i].target); 362 fadt_info_table[i].address64);
322 363
323 /* Expand only if the X target is null */ 364 /* Expand only if the 64-bit X target is null */
324 365
325 if (!target->address) { 366 if (!target64->address) {
326 acpi_tb_init_generic_address(target, 367
368 /* The space_id is always I/O for the 32-bit legacy address fields */
369
370 acpi_tb_init_generic_address(target64,
371 ACPI_ADR_SPACE_SYSTEM_IO,
327 *ACPI_ADD_PTR(u8, 372 *ACPI_ADD_PTR(u8,
328 &acpi_gbl_FADT, 373 &acpi_gbl_FADT,
329 fadt_info_table 374 fadt_info_table
@@ -332,11 +377,64 @@ static void acpi_tb_convert_fadt(void)
332 &acpi_gbl_FADT, 377 &acpi_gbl_FADT,
333 fadt_info_table 378 fadt_info_table
334 [i]. 379 [i].
335 source)); 380 address32));
381 }
382 }
383
384 /* Validate FADT values now, before we make any changes */
385
386 acpi_tb_validate_fadt();
387
388 /*
389 * Optionally check all register lengths against the default values and
390 * update them if they are incorrect.
391 */
392 if (acpi_gbl_use_default_register_widths) {
393 for (i = 0; i < ACPI_FADT_INFO_ENTRIES; i++) {
394 target64 =
395 ACPI_ADD_PTR(struct acpi_generic_address,
396 &acpi_gbl_FADT,
397 fadt_info_table[i].address64);
398
399 /*
400 * If a valid register (Address != 0) and the (default_length > 0)
401 * (Not a GPE register), then check the width against the default.
402 */
403 if ((target64->address) &&
404 (fadt_info_table[i].default_length > 0) &&
405 (fadt_info_table[i].default_length !=
406 target64->bit_width)) {
407 ACPI_WARNING((AE_INFO,
408 "Invalid length for %s: %d, using default %d",
409 fadt_info_table[i].name,
410 target64->bit_width,
411 fadt_info_table[i].
412 default_length));
413
414 /* Incorrect size, set width to the default */
415
416 target64->bit_width =
417 fadt_info_table[i].default_length;
418 }
336 } 419 }
337 } 420 }
338 421
339 /* 422 /*
423 * Get the length of the individual PM1 registers (enable and status).
424 * Each register is defined to be (event block length / 2).
425 */
426 pm1_register_bit_width =
427 (u8)ACPI_DIV_2(acpi_gbl_FADT.xpm1a_event_block.bit_width);
428 pm1_register_byte_width = (u8)ACPI_DIV_8(pm1_register_bit_width);
429
430 /*
431 * Adjust the lengths of the PM1 Event Blocks so that they can be used to
432 * access the PM1 status register(s). Use (width / 2)
433 */
434 acpi_gbl_FADT.xpm1a_event_block.bit_width = pm1_register_bit_width;
435 acpi_gbl_FADT.xpm1b_event_block.bit_width = pm1_register_bit_width;
436
437 /*
340 * Calculate separate GAS structs for the PM1 Enable registers. 438 * Calculate separate GAS structs for the PM1 Enable registers.
341 * These addresses do not appear (directly) in the FADT, so it is 439 * These addresses do not appear (directly) in the FADT, so it is
342 * useful to calculate them once, here. 440 * useful to calculate them once, here.
@@ -356,14 +454,14 @@ static void acpi_tb_convert_fadt(void)
356 " PM1_EVT_LEN (%u)\n", 454 " PM1_EVT_LEN (%u)\n",
357 acpi_gbl_FADT.xpm1a_event_block.bit_width, 455 acpi_gbl_FADT.xpm1a_event_block.bit_width,
358 acpi_gbl_FADT.pm1_event_length); 456 acpi_gbl_FADT.pm1_event_length);
359 pm1_register_length = (u8) ACPI_DIV_2(acpi_gbl_FADT.pm1_event_length);
360 457
361 /* The PM1A register block is required */ 458 /* The PM1A register block is required */
362 459
363 acpi_tb_init_generic_address(&acpi_gbl_xpm1a_enable, 460 acpi_tb_init_generic_address(&acpi_gbl_xpm1a_enable,
364 pm1_register_length, 461 acpi_gbl_FADT.xpm1a_event_block.space_id,
462 pm1_register_byte_width,
365 (acpi_gbl_FADT.xpm1a_event_block.address + 463 (acpi_gbl_FADT.xpm1a_event_block.address +
366 pm1_register_length)); 464 pm1_register_byte_width));
367 /* Don't forget to copy space_id of the GAS */ 465 /* Don't forget to copy space_id of the GAS */
368 acpi_gbl_xpm1a_enable.space_id = 466 acpi_gbl_xpm1a_enable.space_id =
369 acpi_gbl_FADT.xpm1a_event_block.space_id; 467 acpi_gbl_FADT.xpm1a_event_block.space_id;
@@ -379,9 +477,10 @@ static void acpi_tb_convert_fadt(void)
379 acpi_gbl_FADT.xpm1b_event_block.bit_width, 477 acpi_gbl_FADT.xpm1b_event_block.bit_width,
380 acpi_gbl_FADT.pm1_event_length); 478 acpi_gbl_FADT.pm1_event_length);
381 acpi_tb_init_generic_address(&acpi_gbl_xpm1b_enable, 479 acpi_tb_init_generic_address(&acpi_gbl_xpm1b_enable,
382 pm1_register_length, 480 acpi_gbl_FADT.xpm1b_event_block.space_id,
481 pm1_register_byte_width,
383 (acpi_gbl_FADT.xpm1b_event_block. 482 (acpi_gbl_FADT.xpm1b_event_block.
384 address + pm1_register_length)); 483 address + pm1_register_byte_width));
385 /* Don't forget to copy space_id of the GAS */ 484 /* Don't forget to copy space_id of the GAS */
386 acpi_gbl_xpm1b_enable.space_id = 485 acpi_gbl_xpm1b_enable.space_id =
387 acpi_gbl_FADT.xpm1b_event_block.space_id; 486 acpi_gbl_FADT.xpm1b_event_block.space_id;
@@ -411,26 +510,63 @@ static void acpi_tb_convert_fadt(void)
411 510
412static void acpi_tb_validate_fadt(void) 511static void acpi_tb_validate_fadt(void)
413{ 512{
513 char *name;
414 u32 *address32; 514 u32 *address32;
415 struct acpi_generic_address *address64; 515 struct acpi_generic_address *address64;
416 u8 length; 516 u8 length;
417 u32 i; 517 u32 i;
418 518
419 /* Examine all of the 64-bit extended address fields (X fields) */ 519 /*
520 * Check for FACS and DSDT address mismatches. An address mismatch between
521 * the 32-bit and 64-bit address fields (FIRMWARE_CTRL/X_FIRMWARE_CTRL and
522 * DSDT/X_DSDT) would indicate the presence of two FACS or two DSDT tables.
523 */
524 if (acpi_gbl_FADT.facs &&
525 (acpi_gbl_FADT.Xfacs != (u64) acpi_gbl_FADT.facs)) {
526 ACPI_WARNING((AE_INFO,
527 "32/64X FACS address mismatch in FADT - "
528 "two FACS tables! %8.8X/%8.8X%8.8X",
529 acpi_gbl_FADT.facs,
530 ACPI_FORMAT_UINT64(acpi_gbl_FADT.Xfacs)));
531 }
420 532
421 for (i = 0; i < ACPI_FADT_INFO_ENTRIES; i++) { 533 if (acpi_gbl_FADT.dsdt &&
534 (acpi_gbl_FADT.Xdsdt != (u64) acpi_gbl_FADT.dsdt)) {
535 ACPI_WARNING((AE_INFO,
536 "32/64X DSDT address mismatch in FADT - "
537 "two DSDT tables! %8.8X/%8.8X%8.8X",
538 acpi_gbl_FADT.dsdt,
539 ACPI_FORMAT_UINT64(acpi_gbl_FADT.Xdsdt)));
540 }
422 541
423 /* Generate pointers to the 32-bit and 64-bit addresses and get the length */ 542 /* Examine all of the 64-bit extended address fields (X fields) */
424 543
425 address64 = 544 for (i = 0; i < ACPI_FADT_INFO_ENTRIES; i++) {
426 ACPI_ADD_PTR(struct acpi_generic_address, &acpi_gbl_FADT, 545 /*
427 fadt_info_table[i].target); 546 * Generate pointers to the 32-bit and 64-bit addresses, get the
547 * register length (width), and the register name
548 */
549 address64 = ACPI_ADD_PTR(struct acpi_generic_address,
550 &acpi_gbl_FADT,
551 fadt_info_table[i].address64);
428 address32 = 552 address32 =
429 ACPI_ADD_PTR(u32, &acpi_gbl_FADT, 553 ACPI_ADD_PTR(u32, &acpi_gbl_FADT,
430 fadt_info_table[i].source); 554 fadt_info_table[i].address32);
431 length = 555 length =
432 *ACPI_ADD_PTR(u8, &acpi_gbl_FADT, 556 *ACPI_ADD_PTR(u8, &acpi_gbl_FADT,
433 fadt_info_table[i].length); 557 fadt_info_table[i].length);
558 name = fadt_info_table[i].name;
559
560 /*
561 * For each extended field, check for length mismatch between the
562 * legacy length field and the corresponding 64-bit X length field.
563 */
564 if (address64 && (address64->bit_width != ACPI_MUL_8(length))) {
565 ACPI_WARNING((AE_INFO,
566 "32/64X length mismatch in %s: %d/%d",
567 name, ACPI_MUL_8(length),
568 address64->bit_width));
569 }
434 570
435 if (fadt_info_table[i].type & ACPI_FADT_REQUIRED) { 571 if (fadt_info_table[i].type & ACPI_FADT_REQUIRED) {
436 /* 572 /*
@@ -439,8 +575,8 @@ static void acpi_tb_validate_fadt(void)
439 */ 575 */
440 if (!address64->address || !length) { 576 if (!address64->address || !length) {
441 ACPI_ERROR((AE_INFO, 577 ACPI_ERROR((AE_INFO,
442 "Required field \"%s\" has zero address and/or length: %8.8X%8.8X/%X", 578 "Required field %s has zero address and/or length: %8.8X%8.8X/%X",
443 fadt_info_table[i].name, 579 name,
444 ACPI_FORMAT_UINT64(address64-> 580 ACPI_FORMAT_UINT64(address64->
445 address), 581 address),
446 length)); 582 length));
@@ -453,8 +589,8 @@ static void acpi_tb_validate_fadt(void)
453 if ((address64->address && !length) 589 if ((address64->address && !length)
454 || (!address64->address && length)) { 590 || (!address64->address && length)) {
455 ACPI_WARNING((AE_INFO, 591 ACPI_WARNING((AE_INFO,
456 "Optional field \"%s\" has zero address or length: %8.8X%8.8X/%X", 592 "Optional field %s has zero address or length: %8.8X%8.8X/%X",
457 fadt_info_table[i].name, 593 name,
458 ACPI_FORMAT_UINT64(address64-> 594 ACPI_FORMAT_UINT64(address64->
459 address), 595 address),
460 length)); 596 length));
@@ -466,8 +602,8 @@ static void acpi_tb_validate_fadt(void)
466 if (address64->address && *address32 && 602 if (address64->address && *address32 &&
467 (address64->address != (u64) * address32)) { 603 (address64->address != (u64) * address32)) {
468 ACPI_ERROR((AE_INFO, 604 ACPI_ERROR((AE_INFO,
469 "32/64X address mismatch in \"%s\": [%8.8X] [%8.8X%8.8X], using 64X", 605 "32/64X address mismatch in %s: %8.8X/%8.8X%8.8X, using 64X",
470 fadt_info_table[i].name, *address32, 606 name, *address32,
471 ACPI_FORMAT_UINT64(address64->address))); 607 ACPI_FORMAT_UINT64(address64->address)));
472 } 608 }
473 } 609 }
diff --git a/drivers/acpi/tables/tbfind.c b/drivers/acpi/acpica/tbfind.c
index 531584defbb8..1054dfd49207 100644
--- a/drivers/acpi/tables/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/actables.h> 45#include "accommon.h"
46#include "actables.h"
46 47
47#define _COMPONENT ACPI_TABLES 48#define _COMPONENT ACPI_TABLES
48ACPI_MODULE_NAME("tbfind") 49ACPI_MODULE_NAME("tbfind")
diff --git a/drivers/acpi/tables/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 18747ce8dd2f..37374b21969d 100644
--- a/drivers/acpi/tables/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include <acpi/actables.h> 46#include "acnamesp.h"
47#include "actables.h"
47 48
48#define _COMPONENT ACPI_TABLES 49#define _COMPONENT ACPI_TABLES
49ACPI_MODULE_NAME("tbinstal") 50ACPI_MODULE_NAME("tbinstal")
diff --git a/drivers/acpi/tables/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 0cc92ef5236f..9684cc827930 100644
--- a/drivers/acpi/tables/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/actables.h> 45#include "accommon.h"
46#include "actables.h"
46 47
47#define _COMPONENT ACPI_TABLES 48#define _COMPONENT ACPI_TABLES
48ACPI_MODULE_NAME("tbutils") 49ACPI_MODULE_NAME("tbutils")
@@ -113,6 +114,30 @@ acpi_tb_check_xsdt(acpi_physical_address address)
113 114
114/******************************************************************************* 115/*******************************************************************************
115 * 116 *
117 * FUNCTION: acpi_tb_initialize_facs
118 *
119 * PARAMETERS: None
120 *
121 * RETURN: Status
122 *
123 * DESCRIPTION: Create a permanent mapping for the FADT and save it in a global
124 * for accessing the Global Lock and Firmware Waking Vector
125 *
126 ******************************************************************************/
127
128acpi_status acpi_tb_initialize_facs(void)
129{
130 acpi_status status;
131
132 status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS,
133 ACPI_CAST_INDIRECT_PTR(struct
134 acpi_table_header,
135 &acpi_gbl_FACS));
136 return status;
137}
138
139/*******************************************************************************
140 *
116 * FUNCTION: acpi_tb_tables_loaded 141 * FUNCTION: acpi_tb_tables_loaded
117 * 142 *
118 * PARAMETERS: None 143 * PARAMETERS: None
@@ -420,7 +445,8 @@ acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags)
420 445
421 /* Differentiate between RSDT and XSDT root tables */ 446 /* Differentiate between RSDT and XSDT root tables */
422 447
423 if (rsdp->revision > 1 && rsdp->xsdt_physical_address) { 448 if (rsdp->revision > 1 && rsdp->xsdt_physical_address
449 && !acpi_rsdt_forced) {
424 /* 450 /*
425 * Root table is an XSDT (64-bit physical addresses). We must use the 451 * Root table is an XSDT (64-bit physical addresses). We must use the
426 * XSDT if the revision is > 1 and the XSDT pointer is present, as per 452 * XSDT if the revision is > 1 and the XSDT pointer is present, as per
diff --git a/drivers/acpi/tables/tbxface.c b/drivers/acpi/acpica/tbxface.c
index fd7770aa1061..c3e841f3cde9 100644
--- a/drivers/acpi/tables/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -43,8 +43,9 @@
43 */ 43 */
44 44
45#include <acpi/acpi.h> 45#include <acpi/acpi.h>
46#include <acpi/acnamesp.h> 46#include "accommon.h"
47#include <acpi/actables.h> 47#include "acnamesp.h"
48#include "actables.h"
48 49
49#define _COMPONENT ACPI_TABLES 50#define _COMPONENT ACPI_TABLES
50ACPI_MODULE_NAME("tbxface") 51ACPI_MODULE_NAME("tbxface")
diff --git a/drivers/acpi/tables/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index 2d157e0f98d2..b7fc8dd43341 100644
--- a/drivers/acpi/tables/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/actables.h> 45#include "accommon.h"
46#include "actables.h"
46 47
47#define _COMPONENT ACPI_TABLES 48#define _COMPONENT ACPI_TABLES
48ACPI_MODULE_NAME("tbxfroot") 49ACPI_MODULE_NAME("tbxfroot")
diff --git a/drivers/acpi/utilities/utalloc.c b/drivers/acpi/acpica/utalloc.c
index 241c535c1753..7580f6b3069e 100644
--- a/drivers/acpi/utilities/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acdebug.h> 45#include "accommon.h"
46#include "acdebug.h"
46 47
47#define _COMPONENT ACPI_UTILITIES 48#define _COMPONENT ACPI_UTILITIES
48ACPI_MODULE_NAME("utalloc") 49ACPI_MODULE_NAME("utalloc")
diff --git a/drivers/acpi/utilities/utcopy.c b/drivers/acpi/acpica/utcopy.c
index 5b2f7c27b705..b0dcfd3c872a 100644
--- a/drivers/acpi/utilities/utcopy.c
+++ b/drivers/acpi/acpica/utcopy.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include "acnamesp.h"
46 47
47 48
48#define _COMPONENT ACPI_UTILITIES 49#define _COMPONENT ACPI_UTILITIES
diff --git a/drivers/acpi/utilities/utdebug.c b/drivers/acpi/acpica/utdebug.c
index fd66ecb6741e..38821f53042c 100644
--- a/drivers/acpi/utilities/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include "accommon.h"
45 46
46#define _COMPONENT ACPI_UTILITIES 47#define _COMPONENT ACPI_UTILITIES
47ACPI_MODULE_NAME("utdebug") 48ACPI_MODULE_NAME("utdebug")
@@ -136,7 +137,7 @@ static const char *acpi_ut_trim_function_name(const char *function_name)
136 137
137/******************************************************************************* 138/*******************************************************************************
138 * 139 *
139 * FUNCTION: acpi_ut_debug_print 140 * FUNCTION: acpi_debug_print
140 * 141 *
141 * PARAMETERS: requested_debug_level - Requested debug print level 142 * PARAMETERS: requested_debug_level - Requested debug print level
142 * line_number - Caller's line number (for error output) 143 * line_number - Caller's line number (for error output)
@@ -154,11 +155,11 @@ static const char *acpi_ut_trim_function_name(const char *function_name)
154 ******************************************************************************/ 155 ******************************************************************************/
155 156
156void ACPI_INTERNAL_VAR_XFACE 157void ACPI_INTERNAL_VAR_XFACE
157acpi_ut_debug_print(u32 requested_debug_level, 158acpi_debug_print(u32 requested_debug_level,
158 u32 line_number, 159 u32 line_number,
159 const char *function_name, 160 const char *function_name,
160 const char *module_name, 161 const char *module_name,
161 u32 component_id, const char *format, ...) 162 u32 component_id, const char *format, ...)
162{ 163{
163 acpi_thread_id thread_id; 164 acpi_thread_id thread_id;
164 va_list args; 165 va_list args;
@@ -205,11 +206,11 @@ acpi_ut_debug_print(u32 requested_debug_level,
205 va_end(args); 206 va_end(args);
206} 207}
207 208
208ACPI_EXPORT_SYMBOL(acpi_ut_debug_print) 209ACPI_EXPORT_SYMBOL(acpi_debug_print)
209 210
210/******************************************************************************* 211/*******************************************************************************
211 * 212 *
212 * FUNCTION: acpi_ut_debug_print_raw 213 * FUNCTION: acpi_debug_print_raw
213 * 214 *
214 * PARAMETERS: requested_debug_level - Requested debug print level 215 * PARAMETERS: requested_debug_level - Requested debug print level
215 * line_number - Caller's line number 216 * line_number - Caller's line number
@@ -226,11 +227,11 @@ ACPI_EXPORT_SYMBOL(acpi_ut_debug_print)
226 * 227 *
227 ******************************************************************************/ 228 ******************************************************************************/
228void ACPI_INTERNAL_VAR_XFACE 229void ACPI_INTERNAL_VAR_XFACE
229acpi_ut_debug_print_raw(u32 requested_debug_level, 230acpi_debug_print_raw(u32 requested_debug_level,
230 u32 line_number, 231 u32 line_number,
231 const char *function_name, 232 const char *function_name,
232 const char *module_name, 233 const char *module_name,
233 u32 component_id, const char *format, ...) 234 u32 component_id, const char *format, ...)
234{ 235{
235 va_list args; 236 va_list args;
236 237
@@ -244,7 +245,7 @@ acpi_ut_debug_print_raw(u32 requested_debug_level,
244 va_end(args); 245 va_end(args);
245} 246}
246 247
247ACPI_EXPORT_SYMBOL(acpi_ut_debug_print_raw) 248ACPI_EXPORT_SYMBOL(acpi_debug_print_raw)
248 249
249/******************************************************************************* 250/*******************************************************************************
250 * 251 *
@@ -270,9 +271,9 @@ acpi_ut_trace(u32 line_number,
270 acpi_gbl_nesting_level++; 271 acpi_gbl_nesting_level++;
271 acpi_ut_track_stack_ptr(); 272 acpi_ut_track_stack_ptr();
272 273
273 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 274 acpi_debug_print(ACPI_LV_FUNCTIONS,
274 line_number, function_name, module_name, 275 line_number, function_name, module_name, component_id,
275 component_id, "%s\n", acpi_gbl_fn_entry_str); 276 "%s\n", acpi_gbl_fn_entry_str);
276} 277}
277 278
278ACPI_EXPORT_SYMBOL(acpi_ut_trace) 279ACPI_EXPORT_SYMBOL(acpi_ut_trace)
@@ -301,10 +302,9 @@ acpi_ut_trace_ptr(u32 line_number,
301 acpi_gbl_nesting_level++; 302 acpi_gbl_nesting_level++;
302 acpi_ut_track_stack_ptr(); 303 acpi_ut_track_stack_ptr();
303 304
304 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 305 acpi_debug_print(ACPI_LV_FUNCTIONS,
305 line_number, function_name, module_name, 306 line_number, function_name, module_name, component_id,
306 component_id, "%s %p\n", acpi_gbl_fn_entry_str, 307 "%s %p\n", acpi_gbl_fn_entry_str, pointer);
307 pointer);
308} 308}
309 309
310/******************************************************************************* 310/*******************************************************************************
@@ -333,10 +333,9 @@ acpi_ut_trace_str(u32 line_number,
333 acpi_gbl_nesting_level++; 333 acpi_gbl_nesting_level++;
334 acpi_ut_track_stack_ptr(); 334 acpi_ut_track_stack_ptr();
335 335
336 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 336 acpi_debug_print(ACPI_LV_FUNCTIONS,
337 line_number, function_name, module_name, 337 line_number, function_name, module_name, component_id,
338 component_id, "%s %s\n", acpi_gbl_fn_entry_str, 338 "%s %s\n", acpi_gbl_fn_entry_str, string);
339 string);
340} 339}
341 340
342/******************************************************************************* 341/*******************************************************************************
@@ -365,10 +364,9 @@ acpi_ut_trace_u32(u32 line_number,
365 acpi_gbl_nesting_level++; 364 acpi_gbl_nesting_level++;
366 acpi_ut_track_stack_ptr(); 365 acpi_ut_track_stack_ptr();
367 366
368 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 367 acpi_debug_print(ACPI_LV_FUNCTIONS,
369 line_number, function_name, module_name, 368 line_number, function_name, module_name, component_id,
370 component_id, "%s %08X\n", acpi_gbl_fn_entry_str, 369 "%s %08X\n", acpi_gbl_fn_entry_str, integer);
371 integer);
372} 370}
373 371
374/******************************************************************************* 372/*******************************************************************************
@@ -393,9 +391,9 @@ acpi_ut_exit(u32 line_number,
393 const char *module_name, u32 component_id) 391 const char *module_name, u32 component_id)
394{ 392{
395 393
396 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 394 acpi_debug_print(ACPI_LV_FUNCTIONS,
397 line_number, function_name, module_name, 395 line_number, function_name, module_name, component_id,
398 component_id, "%s\n", acpi_gbl_fn_exit_str); 396 "%s\n", acpi_gbl_fn_exit_str);
399 397
400 acpi_gbl_nesting_level--; 398 acpi_gbl_nesting_level--;
401} 399}
@@ -426,17 +424,16 @@ acpi_ut_status_exit(u32 line_number,
426{ 424{
427 425
428 if (ACPI_SUCCESS(status)) { 426 if (ACPI_SUCCESS(status)) {
429 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 427 acpi_debug_print(ACPI_LV_FUNCTIONS,
430 line_number, function_name, module_name, 428 line_number, function_name, module_name,
431 component_id, "%s %s\n", 429 component_id, "%s %s\n", acpi_gbl_fn_exit_str,
432 acpi_gbl_fn_exit_str, 430 acpi_format_exception(status));
433 acpi_format_exception(status));
434 } else { 431 } else {
435 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 432 acpi_debug_print(ACPI_LV_FUNCTIONS,
436 line_number, function_name, module_name, 433 line_number, function_name, module_name,
437 component_id, "%s ****Exception****: %s\n", 434 component_id, "%s ****Exception****: %s\n",
438 acpi_gbl_fn_exit_str, 435 acpi_gbl_fn_exit_str,
439 acpi_format_exception(status)); 436 acpi_format_exception(status));
440 } 437 }
441 438
442 acpi_gbl_nesting_level--; 439 acpi_gbl_nesting_level--;
@@ -467,10 +464,10 @@ acpi_ut_value_exit(u32 line_number,
467 u32 component_id, acpi_integer value) 464 u32 component_id, acpi_integer value)
468{ 465{
469 466
470 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 467 acpi_debug_print(ACPI_LV_FUNCTIONS,
471 line_number, function_name, module_name, 468 line_number, function_name, module_name, component_id,
472 component_id, "%s %8.8X%8.8X\n", 469 "%s %8.8X%8.8X\n", acpi_gbl_fn_exit_str,
473 acpi_gbl_fn_exit_str, ACPI_FORMAT_UINT64(value)); 470 ACPI_FORMAT_UINT64(value));
474 471
475 acpi_gbl_nesting_level--; 472 acpi_gbl_nesting_level--;
476} 473}
@@ -499,9 +496,9 @@ acpi_ut_ptr_exit(u32 line_number,
499 const char *module_name, u32 component_id, u8 *ptr) 496 const char *module_name, u32 component_id, u8 *ptr)
500{ 497{
501 498
502 acpi_ut_debug_print(ACPI_LV_FUNCTIONS, 499 acpi_debug_print(ACPI_LV_FUNCTIONS,
503 line_number, function_name, module_name, 500 line_number, function_name, module_name, component_id,
504 component_id, "%s %p\n", acpi_gbl_fn_exit_str, ptr); 501 "%s %p\n", acpi_gbl_fn_exit_str, ptr);
505 502
506 acpi_gbl_nesting_level--; 503 acpi_gbl_nesting_level--;
507} 504}
diff --git a/drivers/acpi/utilities/utdelete.c b/drivers/acpi/acpica/utdelete.c
index d197c6b29e17..a0be9e39531e 100644
--- a/drivers/acpi/utilities/utdelete.c
+++ b/drivers/acpi/acpica/utdelete.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acinterp.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acinterp.h"
47#include <acpi/acevents.h> 47#include "acnamesp.h"
48#include "acevents.h"
48 49
49#define _COMPONENT ACPI_UTILITIES 50#define _COMPONENT ACPI_UTILITIES
50ACPI_MODULE_NAME("utdelete") 51ACPI_MODULE_NAME("utdelete")
diff --git a/drivers/acpi/utilities/uteval.c b/drivers/acpi/acpica/uteval.c
index 352747e49c7a..da9450bc60f7 100644
--- a/drivers/acpi/utilities/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -42,8 +42,9 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include <acpi/acinterp.h> 46#include "acnamesp.h"
47#include "acinterp.h"
47 48
48#define _COMPONENT ACPI_UTILITIES 49#define _COMPONENT ACPI_UTILITIES
49ACPI_MODULE_NAME("uteval") 50ACPI_MODULE_NAME("uteval")
@@ -129,7 +130,7 @@ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
129 130
130 /* The interface is supported */ 131 /* The interface is supported */
131 132
132 return_ACPI_STATUS(AE_CTRL_TERMINATE); 133 return_ACPI_STATUS(AE_OK);
133 } 134 }
134 } 135 }
135 136
@@ -143,13 +144,13 @@ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
143 144
144 /* The interface is supported */ 145 /* The interface is supported */
145 146
146 return_ACPI_STATUS(AE_CTRL_TERMINATE); 147 return_ACPI_STATUS(AE_OK);
147 } 148 }
148 149
149 /* The interface is not supported */ 150 /* The interface is not supported */
150 151
151 return_desc->integer.value = 0; 152 return_desc->integer.value = 0;
152 return_ACPI_STATUS(AE_CTRL_TERMINATE); 153 return_ACPI_STATUS(AE_OK);
153} 154}
154 155
155/******************************************************************************* 156/*******************************************************************************
diff --git a/drivers/acpi/utilities/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 17ed5ac840f7..a3ab9d9da299 100644
--- a/drivers/acpi/utilities/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -44,11 +44,11 @@
44#define DEFINE_ACPI_GLOBALS 44#define DEFINE_ACPI_GLOBALS
45 45
46#include <acpi/acpi.h> 46#include <acpi/acpi.h>
47#include <acpi/acnamesp.h> 47#include "accommon.h"
48#include "acnamesp.h"
48 49
49ACPI_EXPORT_SYMBOL(acpi_gbl_FADT)
50#define _COMPONENT ACPI_UTILITIES 50#define _COMPONENT ACPI_UTILITIES
51 ACPI_MODULE_NAME("utglobal") 51ACPI_MODULE_NAME("utglobal")
52 52
53/******************************************************************************* 53/*******************************************************************************
54 * 54 *
@@ -352,7 +352,7 @@ const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS] = {
352 "PCI_Config", 352 "PCI_Config",
353 "EmbeddedControl", 353 "EmbeddedControl",
354 "SMBus", 354 "SMBus",
355 "CMOS", 355 "SystemCMOS",
356 "PCIBARTarget", 356 "PCIBARTarget",
357 "DataTable" 357 "DataTable"
358}; 358};
@@ -756,6 +756,7 @@ acpi_status acpi_ut_init_globals(void)
756 acpi_gbl_gpe_xrupt_list_head = NULL; 756 acpi_gbl_gpe_xrupt_list_head = NULL;
757 acpi_gbl_gpe_fadt_blocks[0] = NULL; 757 acpi_gbl_gpe_fadt_blocks[0] = NULL;
758 acpi_gbl_gpe_fadt_blocks[1] = NULL; 758 acpi_gbl_gpe_fadt_blocks[1] = NULL;
759 acpi_current_gpe_count = 0;
759 760
760 /* Global handlers */ 761 /* Global handlers */
761 762
@@ -771,6 +772,7 @@ acpi_status acpi_ut_init_globals(void)
771 acpi_gbl_global_lock_mutex = NULL; 772 acpi_gbl_global_lock_mutex = NULL;
772 acpi_gbl_global_lock_acquired = FALSE; 773 acpi_gbl_global_lock_acquired = FALSE;
773 acpi_gbl_global_lock_handle = 0; 774 acpi_gbl_global_lock_handle = 0;
775 acpi_gbl_global_lock_present = FALSE;
774 776
775 /* Miscellaneous variables */ 777 /* Miscellaneous variables */
776 778
@@ -815,5 +817,7 @@ acpi_status acpi_ut_init_globals(void)
815 return_ACPI_STATUS(AE_OK); 817 return_ACPI_STATUS(AE_OK);
816} 818}
817 819
820ACPI_EXPORT_SYMBOL(acpi_gbl_FADT)
818ACPI_EXPORT_SYMBOL(acpi_dbg_level) 821ACPI_EXPORT_SYMBOL(acpi_dbg_level)
819ACPI_EXPORT_SYMBOL(acpi_dbg_layer) 822ACPI_EXPORT_SYMBOL(acpi_dbg_layer)
823ACPI_EXPORT_SYMBOL(acpi_current_gpe_count)
diff --git a/drivers/acpi/utilities/utinit.c b/drivers/acpi/acpica/utinit.c
index cae515fc02d3..a54ca84eb362 100644
--- a/drivers/acpi/utilities/utinit.c
+++ b/drivers/acpi/acpica/utinit.c
@@ -42,9 +42,10 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include <acpi/acevents.h> 46#include "acnamesp.h"
47#include <acpi/actables.h> 47#include "acevents.h"
48#include "actables.h"
48 49
49#define _COMPONENT ACPI_UTILITIES 50#define _COMPONENT ACPI_UTILITIES
50ACPI_MODULE_NAME("utinit") 51ACPI_MODULE_NAME("utinit")
diff --git a/drivers/acpi/utilities/utmath.c b/drivers/acpi/acpica/utmath.c
index c927324fdd26..c9f682d640ef 100644
--- a/drivers/acpi/utilities/utmath.c
+++ b/drivers/acpi/acpica/utmath.c
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include "accommon.h"
45 46
46#define _COMPONENT ACPI_UTILITIES 47#define _COMPONENT ACPI_UTILITIES
47ACPI_MODULE_NAME("utmath") 48ACPI_MODULE_NAME("utmath")
diff --git a/drivers/acpi/utilities/utmisc.c b/drivers/acpi/acpica/utmisc.c
index 9089a158a874..c1f7f4e1a72d 100644
--- a/drivers/acpi/utilities/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -44,7 +44,8 @@
44#include <linux/module.h> 44#include <linux/module.h>
45 45
46#include <acpi/acpi.h> 46#include <acpi/acpi.h>
47#include <acpi/acnamesp.h> 47#include "accommon.h"
48#include "acnamesp.h"
48 49
49#define _COMPONENT ACPI_UTILITIES 50#define _COMPONENT ACPI_UTILITIES
50ACPI_MODULE_NAME("utmisc") 51ACPI_MODULE_NAME("utmisc")
@@ -1016,7 +1017,7 @@ acpi_ut_walk_package_tree(union acpi_operand_object * source_object,
1016 1017
1017/******************************************************************************* 1018/*******************************************************************************
1018 * 1019 *
1019 * FUNCTION: acpi_ut_error, acpi_ut_warning, acpi_ut_info 1020 * FUNCTION: acpi_error, acpi_exception, acpi_warning, acpi_info
1020 * 1021 *
1021 * PARAMETERS: module_name - Caller's module name (for error output) 1022 * PARAMETERS: module_name - Caller's module name (for error output)
1022 * line_number - Caller's line number (for error output) 1023 * line_number - Caller's line number (for error output)
@@ -1029,7 +1030,7 @@ acpi_ut_walk_package_tree(union acpi_operand_object * source_object,
1029 ******************************************************************************/ 1030 ******************************************************************************/
1030 1031
1031void ACPI_INTERNAL_VAR_XFACE 1032void ACPI_INTERNAL_VAR_XFACE
1032acpi_ut_error(const char *module_name, u32 line_number, const char *format, ...) 1033acpi_error(const char *module_name, u32 line_number, const char *format, ...)
1033{ 1034{
1034 va_list args; 1035 va_list args;
1035 1036
@@ -1042,8 +1043,8 @@ acpi_ut_error(const char *module_name, u32 line_number, const char *format, ...)
1042} 1043}
1043 1044
1044void ACPI_INTERNAL_VAR_XFACE 1045void ACPI_INTERNAL_VAR_XFACE
1045acpi_ut_exception(const char *module_name, 1046acpi_exception(const char *module_name,
1046 u32 line_number, acpi_status status, const char *format, ...) 1047 u32 line_number, acpi_status status, const char *format, ...)
1047{ 1048{
1048 va_list args; 1049 va_list args;
1049 1050
@@ -1056,11 +1057,8 @@ acpi_ut_exception(const char *module_name,
1056 va_end(args); 1057 va_end(args);
1057} 1058}
1058 1059
1059EXPORT_SYMBOL(acpi_ut_exception);
1060
1061void ACPI_INTERNAL_VAR_XFACE 1060void ACPI_INTERNAL_VAR_XFACE
1062acpi_ut_warning(const char *module_name, 1061acpi_warning(const char *module_name, u32 line_number, const char *format, ...)
1063 u32 line_number, const char *format, ...)
1064{ 1062{
1065 va_list args; 1063 va_list args;
1066 1064
@@ -1073,7 +1071,7 @@ acpi_ut_warning(const char *module_name,
1073} 1071}
1074 1072
1075void ACPI_INTERNAL_VAR_XFACE 1073void ACPI_INTERNAL_VAR_XFACE
1076acpi_ut_info(const char *module_name, u32 line_number, const char *format, ...) 1074acpi_info(const char *module_name, u32 line_number, const char *format, ...)
1077{ 1075{
1078 va_list args; 1076 va_list args;
1079 1077
@@ -1088,3 +1086,8 @@ acpi_ut_info(const char *module_name, u32 line_number, const char *format, ...)
1088 acpi_os_printf("\n"); 1086 acpi_os_printf("\n");
1089 va_end(args); 1087 va_end(args);
1090} 1088}
1089
1090ACPI_EXPORT_SYMBOL(acpi_error)
1091ACPI_EXPORT_SYMBOL(acpi_exception)
1092ACPI_EXPORT_SYMBOL(acpi_warning)
1093ACPI_EXPORT_SYMBOL(acpi_info)
diff --git a/drivers/acpi/utilities/utmutex.c b/drivers/acpi/acpica/utmutex.c
index 7331dde9e1b3..14eb52c4d647 100644
--- a/drivers/acpi/utilities/utmutex.c
+++ b/drivers/acpi/acpica/utmutex.c
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include "accommon.h"
45 46
46#define _COMPONENT ACPI_UTILITIES 47#define _COMPONENT ACPI_UTILITIES
47ACPI_MODULE_NAME("utmutex") 48ACPI_MODULE_NAME("utmutex")
diff --git a/drivers/acpi/utilities/utobject.c b/drivers/acpi/acpica/utobject.c
index 4bef3cfbaccb..fd5ea7543e5b 100644
--- a/drivers/acpi/utilities/utobject.c
+++ b/drivers/acpi/acpica/utobject.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acnamesp.h> 45#include "accommon.h"
46#include "acnamesp.h"
46 47
47#define _COMPONENT ACPI_UTILITIES 48#define _COMPONENT ACPI_UTILITIES
48ACPI_MODULE_NAME("utobject") 49ACPI_MODULE_NAME("utobject")
diff --git a/drivers/acpi/utilities/utresrc.c b/drivers/acpi/acpica/utresrc.c
index c3e3e1308edc..91b7c00236f4 100644
--- a/drivers/acpi/utilities/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c
@@ -42,7 +42,8 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/amlresrc.h> 45#include "accommon.h"
46#include "amlresrc.h"
46 47
47#define _COMPONENT ACPI_UTILITIES 48#define _COMPONENT ACPI_UTILITIES
48ACPI_MODULE_NAME("utresrc") 49ACPI_MODULE_NAME("utresrc")
diff --git a/drivers/acpi/utilities/utstate.c b/drivers/acpi/acpica/utstate.c
index 63a6d3d77d88..0440c958f5a4 100644
--- a/drivers/acpi/utilities/utstate.c
+++ b/drivers/acpi/acpica/utstate.c
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include "accommon.h"
45 46
46#define _COMPONENT ACPI_UTILITIES 47#define _COMPONENT ACPI_UTILITIES
47ACPI_MODULE_NAME("utstate") 48ACPI_MODULE_NAME("utstate")
diff --git a/drivers/acpi/utilities/utxface.c b/drivers/acpi/acpica/utxface.c
index c198a4d40583..078a22728c6b 100644
--- a/drivers/acpi/utilities/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -42,9 +42,11 @@
42 */ 42 */
43 43
44#include <acpi/acpi.h> 44#include <acpi/acpi.h>
45#include <acpi/acevents.h> 45#include "accommon.h"
46#include <acpi/acnamesp.h> 46#include "acevents.h"
47#include <acpi/acdebug.h> 47#include "acnamesp.h"
48#include "acdebug.h"
49#include "actables.h"
48 50
49#define _COMPONENT ACPI_UTILITIES 51#define _COMPONENT ACPI_UTILITIES
50ACPI_MODULE_NAME("utxface") 52ACPI_MODULE_NAME("utxface")
@@ -148,6 +150,16 @@ acpi_status acpi_enable_subsystem(u32 flags)
148 } 150 }
149 151
150 /* 152 /*
153 * Obtain a permanent mapping for the FACS. This is required for the
154 * Global Lock and the Firmware Waking Vector
155 */
156 status = acpi_tb_initialize_facs();
157 if (ACPI_FAILURE(status)) {
158 ACPI_WARNING((AE_INFO, "Could not map the FACS table"));
159 return_ACPI_STATUS(status);
160 }
161
162 /*
151 * Install the default op_region handlers. These are installed unless 163 * Install the default op_region handlers. These are installed unless
152 * other handlers have already been installed via the 164 * other handlers have already been installed via the
153 * install_address_space_handler interface. 165 * install_address_space_handler interface.
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 1423b0c0cd2e..65132f920459 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -471,7 +471,7 @@ static void sysfs_remove_battery(struct acpi_battery *battery)
471 471
472static int acpi_battery_update(struct acpi_battery *battery) 472static int acpi_battery_update(struct acpi_battery *battery)
473{ 473{
474 int result; 474 int result, old_present = acpi_battery_present(battery);
475 result = acpi_battery_get_status(battery); 475 result = acpi_battery_get_status(battery);
476 if (result) 476 if (result)
477 return result; 477 return result;
@@ -482,7 +482,8 @@ static int acpi_battery_update(struct acpi_battery *battery)
482 return 0; 482 return 0;
483 } 483 }
484#endif 484#endif
485 if (!battery->update_time) { 485 if (!battery->update_time ||
486 old_present != acpi_battery_present(battery)) {
486 result = acpi_battery_get_info(battery); 487 result = acpi_battery_get_info(battery);
487 if (result) 488 if (result)
488 return result; 489 return result;
diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c
index 307963bd1043..332fe4b21708 100644
--- a/drivers/acpi/cm_sbs.c
+++ b/drivers/acpi/cm_sbs.c
@@ -27,9 +27,6 @@
27#include <linux/seq_file.h> 27#include <linux/seq_file.h>
28#include <acpi/acpi_bus.h> 28#include <acpi/acpi_bus.h>
29#include <acpi/acpi_drivers.h> 29#include <acpi/acpi_drivers.h>
30#include <acpi/acmacros.h>
31#include <acpi/actypes.h>
32#include <acpi/acutils.h>
33 30
34ACPI_MODULE_NAME("cm_sbs"); 31ACPI_MODULE_NAME("cm_sbs");
35#define ACPI_AC_CLASS "ac_adapter" 32#define ACPI_AC_CLASS "ac_adapter"
diff --git a/drivers/acpi/debug.c b/drivers/acpi/debug.c
index c48396892008..20223cbd0d1c 100644
--- a/drivers/acpi/debug.c
+++ b/drivers/acpi/debug.c
@@ -9,7 +9,6 @@
9#include <linux/moduleparam.h> 9#include <linux/moduleparam.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11#include <acpi/acpi_drivers.h> 11#include <acpi/acpi_drivers.h>
12#include <acpi/acglobal.h>
13 12
14#define _COMPONENT ACPI_SYSTEM_COMPONENT 13#define _COMPONENT ACPI_SYSTEM_COMPONENT
15ACPI_MODULE_NAME("debug"); 14ACPI_MODULE_NAME("debug");
diff --git a/drivers/acpi/dispatcher/Makefile b/drivers/acpi/dispatcher/Makefile
deleted file mode 100644
index eb7e602a83cd..000000000000
--- a/drivers/acpi/dispatcher/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := dsfield.o dsmthdat.o dsopcode.o dswexec.o dswscope.o \
6 dsmethod.o dsobject.o dsutils.o dswload.o dswstate.o \
7 dsinit.o
8
9EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 30f3ef236ecb..8dfcbb8aff73 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -42,7 +42,6 @@
42#include <asm/io.h> 42#include <asm/io.h>
43#include <acpi/acpi_bus.h> 43#include <acpi/acpi_bus.h>
44#include <acpi/acpi_drivers.h> 44#include <acpi/acpi_drivers.h>
45#include <acpi/actypes.h>
46 45
47#define ACPI_EC_CLASS "embedded_controller" 46#define ACPI_EC_CLASS "embedded_controller"
48#define ACPI_EC_DEVICE_NAME "Embedded Controller" 47#define ACPI_EC_DEVICE_NAME "Embedded Controller"
@@ -370,7 +369,7 @@ unlock:
370 * Note: samsung nv5000 doesn't work with ec burst mode. 369 * Note: samsung nv5000 doesn't work with ec burst mode.
371 * http://bugzilla.kernel.org/show_bug.cgi?id=4980 370 * http://bugzilla.kernel.org/show_bug.cgi?id=4980
372 */ 371 */
373int acpi_ec_burst_enable(struct acpi_ec *ec) 372static int acpi_ec_burst_enable(struct acpi_ec *ec)
374{ 373{
375 u8 d; 374 u8 d;
376 struct transaction t = {.command = ACPI_EC_BURST_ENABLE, 375 struct transaction t = {.command = ACPI_EC_BURST_ENABLE,
@@ -380,7 +379,7 @@ int acpi_ec_burst_enable(struct acpi_ec *ec)
380 return acpi_ec_transaction(ec, &t, 0); 379 return acpi_ec_transaction(ec, &t, 0);
381} 380}
382 381
383int acpi_ec_burst_disable(struct acpi_ec *ec) 382static int acpi_ec_burst_disable(struct acpi_ec *ec)
384{ 383{
385 struct transaction t = {.command = ACPI_EC_BURST_DISABLE, 384 struct transaction t = {.command = ACPI_EC_BURST_DISABLE,
386 .wdata = NULL, .rdata = NULL, 385 .wdata = NULL, .rdata = NULL,
@@ -756,10 +755,15 @@ static acpi_status
756acpi_ec_register_query_methods(acpi_handle handle, u32 level, 755acpi_ec_register_query_methods(acpi_handle handle, u32 level,
757 void *context, void **return_value) 756 void *context, void **return_value)
758{ 757{
759 struct acpi_namespace_node *node = handle; 758 char node_name[5];
759 struct acpi_buffer buffer = { sizeof(node_name), node_name };
760 struct acpi_ec *ec = context; 760 struct acpi_ec *ec = context;
761 int value = 0; 761 int value = 0;
762 if (sscanf(node->name.ascii, "_Q%x", &value) == 1) { 762 acpi_status status;
763
764 status = acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer);
765
766 if (ACPI_SUCCESS(status) && sscanf(node_name, "_Q%x", &value) == 1) {
763 acpi_ec_add_query_handler(ec, value, handle, NULL, NULL); 767 acpi_ec_add_query_handler(ec, value, handle, NULL, NULL);
764 } 768 }
765 return AE_OK; 769 return AE_OK;
@@ -978,9 +982,9 @@ static const struct acpi_device_id ec_device_ids[] = {
978 982
979int __init acpi_ec_ecdt_probe(void) 983int __init acpi_ec_ecdt_probe(void)
980{ 984{
981 int ret;
982 acpi_status status; 985 acpi_status status;
983 struct acpi_table_ecdt *ecdt_ptr; 986 struct acpi_table_ecdt *ecdt_ptr;
987 acpi_handle dummy;
984 988
985 boot_ec = make_acpi_ec(); 989 boot_ec = make_acpi_ec();
986 if (!boot_ec) 990 if (!boot_ec)
@@ -1006,30 +1010,31 @@ int __init acpi_ec_ecdt_probe(void)
1006 boot_ec->gpe = ecdt_ptr->gpe; 1010 boot_ec->gpe = ecdt_ptr->gpe;
1007 boot_ec->handle = ACPI_ROOT_OBJECT; 1011 boot_ec->handle = ACPI_ROOT_OBJECT;
1008 acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id, &boot_ec->handle); 1012 acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id, &boot_ec->handle);
1009 } else { 1013 /* Add some basic check against completely broken table */
1010 /* This workaround is needed only on some broken machines, 1014 if (boot_ec->data_addr != boot_ec->command_addr)
1011 * which require early EC, but fail to provide ECDT */ 1015 goto install;
1012 acpi_handle x; 1016 /* fall through */
1013 printk(KERN_DEBUG PREFIX "Look up EC in DSDT\n");
1014 status = acpi_get_devices(ec_device_ids[0].id, ec_parse_device,
1015 boot_ec, NULL);
1016 /* Check that acpi_get_devices actually find something */
1017 if (ACPI_FAILURE(status) || !boot_ec->handle)
1018 goto error;
1019 /* We really need to limit this workaround, the only ASUS,
1020 * which needs it, has fake EC._INI method, so use it as flag.
1021 * Keep boot_ec struct as it will be needed soon.
1022 */
1023 if (ACPI_FAILURE(acpi_get_handle(boot_ec->handle, "_INI", &x)))
1024 return -ENODEV;
1025 } 1017 }
1026 1018 /* This workaround is needed only on some broken machines,
1027 ret = ec_install_handlers(boot_ec); 1019 * which require early EC, but fail to provide ECDT */
1028 if (!ret) { 1020 printk(KERN_DEBUG PREFIX "Look up EC in DSDT\n");
1021 status = acpi_get_devices(ec_device_ids[0].id, ec_parse_device,
1022 boot_ec, NULL);
1023 /* Check that acpi_get_devices actually find something */
1024 if (ACPI_FAILURE(status) || !boot_ec->handle)
1025 goto error;
1026 /* We really need to limit this workaround, the only ASUS,
1027 * which needs it, has fake EC._INI method, so use it as flag.
1028 * Keep boot_ec struct as it will be needed soon.
1029 */
1030 if (ACPI_FAILURE(acpi_get_handle(boot_ec->handle, "_INI", &dummy)))
1031 return -ENODEV;
1032install:
1033 if (!ec_install_handlers(boot_ec)) {
1029 first_ec = boot_ec; 1034 first_ec = boot_ec;
1030 return 0; 1035 return 0;
1031 } 1036 }
1032 error: 1037error:
1033 kfree(boot_ec); 1038 kfree(boot_ec);
1034 boot_ec = NULL; 1039 boot_ec = NULL;
1035 return -ENODEV; 1040 return -ENODEV;
diff --git a/drivers/acpi/events/Makefile b/drivers/acpi/events/Makefile
deleted file mode 100644
index d29f2ee449cc..000000000000
--- a/drivers/acpi/events/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := evevent.o evregion.o evsci.o evxfevnt.o \
6 evmisc.o evrgnini.o evxface.o evxfregn.o \
7 evgpe.o evgpeblk.o
8
9EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/executer/Makefile b/drivers/acpi/executer/Makefile
deleted file mode 100644
index e09998aa012f..000000000000
--- a/drivers/acpi/executer/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := exconfig.o exfield.o exnames.o exoparg6.o exresolv.o exstorob.o\
6 exconvrt.o exfldio.o exoparg1.o exprep.o exresop.o exsystem.o\
7 excreate.o exmisc.o exoparg2.o exregion.o exstore.o exutils.o \
8 exdump.o exmutex.o exoparg3.o exresnte.o exstoren.o
9
10EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/hardware/Makefile b/drivers/acpi/hardware/Makefile
deleted file mode 100644
index 438ad373b9ad..000000000000
--- a/drivers/acpi/hardware/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := hwacpi.o hwgpe.o hwregs.o hwsleep.o
6
7obj-$(ACPI_FUTURE_USAGE) += hwtimer.o
8
9EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/main.c
index 28a691cc625e..7e3c609cbef2 100644
--- a/drivers/acpi/sleep/main.c
+++ b/drivers/acpi/main.c
@@ -101,13 +101,26 @@ void __init acpi_old_suspend_ordering(void)
101 * cases. 101 * cases.
102 */ 102 */
103static bool set_sci_en_on_resume; 103static bool set_sci_en_on_resume;
104/*
105 * The ACPI specification wants us to save NVS memory regions during hibernation
106 * and to restore them during the subsequent resume. However, it is not certain
107 * if this mechanism is going to work on all machines, so we allow the user to
108 * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line
109 * option.
110 */
111static bool s4_no_nvs;
112
113void __init acpi_s4_no_nvs(void)
114{
115 s4_no_nvs = true;
116}
104 117
105/** 118/**
106 * acpi_pm_disable_gpes - Disable the GPEs. 119 * acpi_pm_disable_gpes - Disable the GPEs.
107 */ 120 */
108static int acpi_pm_disable_gpes(void) 121static int acpi_pm_disable_gpes(void)
109{ 122{
110 acpi_hw_disable_all_gpes(); 123 acpi_disable_all_gpes();
111 return 0; 124 return 0;
112} 125}
113 126
@@ -135,7 +148,7 @@ static int acpi_pm_prepare(void)
135 int error = __acpi_pm_prepare(); 148 int error = __acpi_pm_prepare();
136 149
137 if (!error) 150 if (!error)
138 acpi_hw_disable_all_gpes(); 151 acpi_disable_all_gpes();
139 return error; 152 return error;
140} 153}
141 154
@@ -267,7 +280,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
267 * (like wakeup GPE) haven't handler, this can avoid such GPE misfire. 280 * (like wakeup GPE) haven't handler, this can avoid such GPE misfire.
268 * acpi_leave_sleep_state will reenable specific GPEs later 281 * acpi_leave_sleep_state will reenable specific GPEs later
269 */ 282 */
270 acpi_hw_disable_all_gpes(); 283 acpi_disable_all_gpes();
271 284
272 local_irq_restore(flags); 285 local_irq_restore(flags);
273 printk(KERN_DEBUG "Back to C!\n"); 286 printk(KERN_DEBUG "Back to C!\n");
@@ -394,9 +407,25 @@ void __init acpi_no_s4_hw_signature(void)
394 407
395static int acpi_hibernation_begin(void) 408static int acpi_hibernation_begin(void)
396{ 409{
397 acpi_target_sleep_state = ACPI_STATE_S4; 410 int error;
398 acpi_sleep_tts_switch(acpi_target_sleep_state); 411
399 return 0; 412 error = s4_no_nvs ? 0 : hibernate_nvs_alloc();
413 if (!error) {
414 acpi_target_sleep_state = ACPI_STATE_S4;
415 acpi_sleep_tts_switch(acpi_target_sleep_state);
416 }
417
418 return error;
419}
420
421static int acpi_hibernation_pre_snapshot(void)
422{
423 int error = acpi_pm_prepare();
424
425 if (!error)
426 hibernate_nvs_save();
427
428 return error;
400} 429}
401 430
402static int acpi_hibernation_enter(void) 431static int acpi_hibernation_enter(void)
@@ -417,6 +446,12 @@ static int acpi_hibernation_enter(void)
417 return ACPI_SUCCESS(status) ? 0 : -EFAULT; 446 return ACPI_SUCCESS(status) ? 0 : -EFAULT;
418} 447}
419 448
449static void acpi_hibernation_finish(void)
450{
451 hibernate_nvs_free();
452 acpi_pm_finish();
453}
454
420static void acpi_hibernation_leave(void) 455static void acpi_hibernation_leave(void)
421{ 456{
422 /* 457 /*
@@ -432,18 +467,20 @@ static void acpi_hibernation_leave(void)
432 "cannot resume!\n"); 467 "cannot resume!\n");
433 panic("ACPI S4 hardware signature mismatch"); 468 panic("ACPI S4 hardware signature mismatch");
434 } 469 }
470 /* Restore the NVS memory area */
471 hibernate_nvs_restore();
435} 472}
436 473
437static void acpi_pm_enable_gpes(void) 474static void acpi_pm_enable_gpes(void)
438{ 475{
439 acpi_hw_enable_all_runtime_gpes(); 476 acpi_enable_all_runtime_gpes();
440} 477}
441 478
442static struct platform_hibernation_ops acpi_hibernation_ops = { 479static struct platform_hibernation_ops acpi_hibernation_ops = {
443 .begin = acpi_hibernation_begin, 480 .begin = acpi_hibernation_begin,
444 .end = acpi_pm_end, 481 .end = acpi_pm_end,
445 .pre_snapshot = acpi_pm_prepare, 482 .pre_snapshot = acpi_hibernation_pre_snapshot,
446 .finish = acpi_pm_finish, 483 .finish = acpi_hibernation_finish,
447 .prepare = acpi_pm_prepare, 484 .prepare = acpi_pm_prepare,
448 .enter = acpi_hibernation_enter, 485 .enter = acpi_hibernation_enter,
449 .leave = acpi_hibernation_leave, 486 .leave = acpi_hibernation_leave,
@@ -469,8 +506,22 @@ static int acpi_hibernation_begin_old(void)
469 506
470 error = acpi_sleep_prepare(ACPI_STATE_S4); 507 error = acpi_sleep_prepare(ACPI_STATE_S4);
471 508
509 if (!error) {
510 if (!s4_no_nvs)
511 error = hibernate_nvs_alloc();
512 if (!error)
513 acpi_target_sleep_state = ACPI_STATE_S4;
514 }
515 return error;
516}
517
518static int acpi_hibernation_pre_snapshot_old(void)
519{
520 int error = acpi_pm_disable_gpes();
521
472 if (!error) 522 if (!error)
473 acpi_target_sleep_state = ACPI_STATE_S4; 523 hibernate_nvs_save();
524
474 return error; 525 return error;
475} 526}
476 527
@@ -481,8 +532,8 @@ static int acpi_hibernation_begin_old(void)
481static struct platform_hibernation_ops acpi_hibernation_ops_old = { 532static struct platform_hibernation_ops acpi_hibernation_ops_old = {
482 .begin = acpi_hibernation_begin_old, 533 .begin = acpi_hibernation_begin_old,
483 .end = acpi_pm_end, 534 .end = acpi_pm_end,
484 .pre_snapshot = acpi_pm_disable_gpes, 535 .pre_snapshot = acpi_hibernation_pre_snapshot_old,
485 .finish = acpi_pm_finish, 536 .finish = acpi_hibernation_finish,
486 .prepare = acpi_pm_disable_gpes, 537 .prepare = acpi_pm_disable_gpes,
487 .enter = acpi_hibernation_enter, 538 .enter = acpi_hibernation_enter,
488 .leave = acpi_hibernation_leave, 539 .leave = acpi_hibernation_leave,
@@ -622,7 +673,7 @@ static void acpi_power_off_prepare(void)
622{ 673{
623 /* Prepare to power off the system */ 674 /* Prepare to power off the system */
624 acpi_sleep_prepare(ACPI_STATE_S5); 675 acpi_sleep_prepare(ACPI_STATE_S5);
625 acpi_hw_disable_all_gpes(); 676 acpi_disable_all_gpes();
626} 677}
627 678
628static void acpi_power_off(void) 679static void acpi_power_off(void)
@@ -671,7 +722,7 @@ int __init acpi_sleep_init(void)
671 sleep_states[ACPI_STATE_S4] = 1; 722 sleep_states[ACPI_STATE_S4] = 1;
672 printk(" S4"); 723 printk(" S4");
673 if (!nosigcheck) { 724 if (!nosigcheck) {
674 acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, 725 acpi_get_table(ACPI_SIG_FACS, 1,
675 (struct acpi_table_header **)&facs); 726 (struct acpi_table_header **)&facs);
676 if (facs) 727 if (facs)
677 s4_hardware_signature = 728 s4_hardware_signature =
diff --git a/drivers/acpi/namespace/Makefile b/drivers/acpi/namespace/Makefile
deleted file mode 100644
index 371a2daf837f..000000000000
--- a/drivers/acpi/namespace/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := nsaccess.o nsload.o nssearch.o nsxfeval.o \
6 nsalloc.o nseval.o nsnames.o nsutils.o nsxfname.o \
7 nsdump.o nsinit.o nsobject.o nswalk.o nsxfobj.o \
8 nsparse.o nspredef.o
9
10obj-$(ACPI_FUTURE_USAGE) += nsdumpdv.o
11
12EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 25ceae9191ef..c5e292aab0e3 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -29,7 +29,6 @@
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/acpi.h> 30#include <linux/acpi.h>
31#include <acpi/acpi_bus.h> 31#include <acpi/acpi_bus.h>
32#include <acpi/acmacros.h>
33 32
34#define ACPI_NUMA 0x80000000 33#define ACPI_NUMA 0x80000000
35#define _COMPONENT ACPI_NUMA 34#define _COMPONENT ACPI_NUMA
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index c8111424dcb8..6729a4992f2b 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -726,7 +726,7 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
726 726
727 dpc = kmalloc(sizeof(struct acpi_os_dpc), GFP_ATOMIC); 727 dpc = kmalloc(sizeof(struct acpi_os_dpc), GFP_ATOMIC);
728 if (!dpc) 728 if (!dpc)
729 return_ACPI_STATUS(AE_NO_MEMORY); 729 return AE_NO_MEMORY;
730 730
731 dpc->function = function; 731 dpc->function = function;
732 dpc->context = context; 732 dpc->context = context;
@@ -747,7 +747,7 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
747 status = AE_ERROR; 747 status = AE_ERROR;
748 kfree(dpc); 748 kfree(dpc);
749 } 749 }
750 return_ACPI_STATUS(status); 750 return status;
751} 751}
752 752
753acpi_status acpi_os_execute(acpi_execute_type type, 753acpi_status acpi_os_execute(acpi_execute_type type,
diff --git a/drivers/acpi/parser/Makefile b/drivers/acpi/parser/Makefile
deleted file mode 100644
index db24ee09cf11..000000000000
--- a/drivers/acpi/parser/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := psargs.o psparse.o psloop.o pstree.o pswalk.o \
6 psopcode.o psscope.o psutils.o psxface.o
7
8EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index 4b252ea0e952..95650f83ce2e 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -99,7 +99,7 @@ acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id)
99 */ 99 */
100 100
101 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 101 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
102 "Device %s has PCI address %02x:%02x:%02x.%02x\n", 102 "Device %s has PCI address %04x:%02x:%02x.%d\n",
103 acpi_device_bid(device), id->segment, id->bus, 103 acpi_device_bid(device), id->segment, id->bus,
104 id->device, id->function)); 104 id->device, id->function));
105 105
@@ -111,12 +111,11 @@ EXPORT_SYMBOL(acpi_get_pci_id);
111int acpi_pci_bind(struct acpi_device *device) 111int acpi_pci_bind(struct acpi_device *device)
112{ 112{
113 int result = 0; 113 int result = 0;
114 acpi_status status = AE_OK; 114 acpi_status status;
115 struct acpi_pci_data *data = NULL; 115 struct acpi_pci_data *data;
116 struct acpi_pci_data *pdata = NULL; 116 struct acpi_pci_data *pdata;
117 char *pathname = NULL; 117 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
118 struct acpi_buffer buffer = { 0, NULL }; 118 acpi_handle handle;
119 acpi_handle handle = NULL;
120 struct pci_dev *dev; 119 struct pci_dev *dev;
121 struct pci_bus *bus; 120 struct pci_bus *bus;
122 121
@@ -124,21 +123,18 @@ int acpi_pci_bind(struct acpi_device *device)
124 if (!device || !device->parent) 123 if (!device || !device->parent)
125 return -EINVAL; 124 return -EINVAL;
126 125
127 pathname = kzalloc(ACPI_PATHNAME_MAX, GFP_KERNEL);
128 if (!pathname)
129 return -ENOMEM;
130 buffer.length = ACPI_PATHNAME_MAX;
131 buffer.pointer = pathname;
132
133 data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL); 126 data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL);
134 if (!data) { 127 if (!data)
135 kfree(pathname);
136 return -ENOMEM; 128 return -ENOMEM;
129
130 status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
131 if (ACPI_FAILURE(status)) {
132 kfree(data);
133 return -ENODEV;
137 } 134 }
138 135
139 acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
140 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI device [%s]...\n", 136 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI device [%s]...\n",
141 pathname)); 137 (char *)buffer.pointer));
142 138
143 /* 139 /*
144 * Segment & Bus 140 * Segment & Bus
@@ -166,7 +162,7 @@ int acpi_pci_bind(struct acpi_device *device)
166 data->id.device = device->pnp.bus_address >> 16; 162 data->id.device = device->pnp.bus_address >> 16;
167 data->id.function = device->pnp.bus_address & 0xFFFF; 163 data->id.function = device->pnp.bus_address & 0xFFFF;
168 164
169 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "...to %02x:%02x:%02x.%02x\n", 165 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "...to %04x:%02x:%02x.%d\n",
170 data->id.segment, data->id.bus, data->id.device, 166 data->id.segment, data->id.bus, data->id.device,
171 data->id.function)); 167 data->id.function));
172 168
@@ -196,7 +192,7 @@ int acpi_pci_bind(struct acpi_device *device)
196 } 192 }
197 if (!data->dev) { 193 if (!data->dev) {
198 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 194 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
199 "Device %02x:%02x:%02x.%02x not present in PCI namespace\n", 195 "Device %04x:%02x:%02x.%d not present in PCI namespace\n",
200 data->id.segment, data->id.bus, 196 data->id.segment, data->id.bus,
201 data->id.device, data->id.function)); 197 data->id.device, data->id.function));
202 result = -ENODEV; 198 result = -ENODEV;
@@ -204,7 +200,7 @@ int acpi_pci_bind(struct acpi_device *device)
204 } 200 }
205 if (!data->dev->bus) { 201 if (!data->dev->bus) {
206 printk(KERN_ERR PREFIX 202 printk(KERN_ERR PREFIX
207 "Device %02x:%02x:%02x.%02x has invalid 'bus' field\n", 203 "Device %04x:%02x:%02x.%d has invalid 'bus' field\n",
208 data->id.segment, data->id.bus, 204 data->id.segment, data->id.bus,
209 data->id.device, data->id.function); 205 data->id.device, data->id.function);
210 result = -ENODEV; 206 result = -ENODEV;
@@ -219,7 +215,7 @@ int acpi_pci_bind(struct acpi_device *device)
219 */ 215 */
220 if (data->dev->subordinate) { 216 if (data->dev->subordinate) {
221 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 217 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
222 "Device %02x:%02x:%02x.%02x is a PCI bridge\n", 218 "Device %04x:%02x:%02x.%d is a PCI bridge\n",
223 data->id.segment, data->id.bus, 219 data->id.segment, data->id.bus,
224 data->id.device, data->id.function)); 220 data->id.device, data->id.function));
225 data->bus = data->dev->subordinate; 221 data->bus = data->dev->subordinate;
@@ -262,7 +258,7 @@ int acpi_pci_bind(struct acpi_device *device)
262 } 258 }
263 259
264 end: 260 end:
265 kfree(pathname); 261 kfree(buffer.pointer);
266 if (result) 262 if (result)
267 kfree(data); 263 kfree(data);
268 264
@@ -272,25 +268,21 @@ int acpi_pci_bind(struct acpi_device *device)
272static int acpi_pci_unbind(struct acpi_device *device) 268static int acpi_pci_unbind(struct acpi_device *device)
273{ 269{
274 int result = 0; 270 int result = 0;
275 acpi_status status = AE_OK; 271 acpi_status status;
276 struct acpi_pci_data *data = NULL; 272 struct acpi_pci_data *data;
277 char *pathname = NULL; 273 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
278 struct acpi_buffer buffer = { 0, NULL };
279 274
280 275
281 if (!device || !device->parent) 276 if (!device || !device->parent)
282 return -EINVAL; 277 return -EINVAL;
283 278
284 pathname = kzalloc(ACPI_PATHNAME_MAX, GFP_KERNEL); 279 status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
285 if (!pathname) 280 if (ACPI_FAILURE(status))
286 return -ENOMEM; 281 return -ENODEV;
287 282
288 buffer.length = ACPI_PATHNAME_MAX;
289 buffer.pointer = pathname;
290 acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
291 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Unbinding PCI device [%s]...\n", 283 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Unbinding PCI device [%s]...\n",
292 pathname)); 284 (char *) buffer.pointer));
293 kfree(pathname); 285 kfree(buffer.pointer);
294 286
295 status = 287 status =
296 acpi_get_data(device->handle, acpi_pci_data_handler, 288 acpi_get_data(device->handle, acpi_pci_data_handler,
@@ -322,50 +314,44 @@ acpi_pci_bind_root(struct acpi_device *device,
322 struct acpi_pci_id *id, struct pci_bus *bus) 314 struct acpi_pci_id *id, struct pci_bus *bus)
323{ 315{
324 int result = 0; 316 int result = 0;
325 acpi_status status = AE_OK; 317 acpi_status status;
326 struct acpi_pci_data *data = NULL; 318 struct acpi_pci_data *data = NULL;
327 char *pathname = NULL; 319 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
328 struct acpi_buffer buffer = { 0, NULL };
329
330 pathname = kzalloc(ACPI_PATHNAME_MAX, GFP_KERNEL);
331 if (!pathname)
332 return -ENOMEM;
333
334 buffer.length = ACPI_PATHNAME_MAX;
335 buffer.pointer = pathname;
336 320
337 if (!device || !id || !bus) { 321 if (!device || !id || !bus) {
338 kfree(pathname);
339 return -EINVAL; 322 return -EINVAL;
340 } 323 }
341 324
342 data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL); 325 data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL);
343 if (!data) { 326 if (!data)
344 kfree(pathname);
345 return -ENOMEM; 327 return -ENOMEM;
346 }
347 328
348 data->id = *id; 329 data->id = *id;
349 data->bus = bus; 330 data->bus = bus;
350 device->ops.bind = acpi_pci_bind; 331 device->ops.bind = acpi_pci_bind;
351 device->ops.unbind = acpi_pci_unbind; 332 device->ops.unbind = acpi_pci_unbind;
352 333
353 acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer); 334 status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
335 if (ACPI_FAILURE(status)) {
336 kfree (data);
337 return -ENODEV;
338 }
354 339
355 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI root bridge [%s] to " 340 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI root bridge [%s] to "
356 "%02x:%02x\n", pathname, id->segment, id->bus)); 341 "%04x:%02x\n", (char *)buffer.pointer,
342 id->segment, id->bus));
357 343
358 status = acpi_attach_data(device->handle, acpi_pci_data_handler, data); 344 status = acpi_attach_data(device->handle, acpi_pci_data_handler, data);
359 if (ACPI_FAILURE(status)) { 345 if (ACPI_FAILURE(status)) {
360 ACPI_EXCEPTION((AE_INFO, status, 346 ACPI_EXCEPTION((AE_INFO, status,
361 "Unable to attach ACPI-PCI context to device %s", 347 "Unable to attach ACPI-PCI context to device %s",
362 pathname)); 348 (char *)buffer.pointer));
363 result = -ENODEV; 349 result = -ENODEV;
364 goto end; 350 goto end;
365 } 351 }
366 352
367 end: 353 end:
368 kfree(pathname); 354 kfree(buffer.pointer);
369 if (result != 0) 355 if (result != 0)
370 kfree(data); 356 kfree(data);
371 357
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index bf79d83bdfbb..891bdf6679f3 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -4,6 +4,8 @@
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6 * Copyright (C) 2002 Dominik Brodowski <devel@brodo.de> 6 * Copyright (C) 2002 Dominik Brodowski <devel@brodo.de>
7 * (c) Copyright 2008 Hewlett-Packard Development Company, L.P.
8 * Bjorn Helgaas <bjorn.helgaas@hp.com>
7 * 9 *
8 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 10 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 * 11 *
@@ -41,29 +43,36 @@
41#define _COMPONENT ACPI_PCI_COMPONENT 43#define _COMPONENT ACPI_PCI_COMPONENT
42ACPI_MODULE_NAME("pci_irq"); 44ACPI_MODULE_NAME("pci_irq");
43 45
44static struct acpi_prt_list acpi_prt; 46struct acpi_prt_entry {
47 struct list_head list;
48 struct acpi_pci_id id;
49 u8 pin;
50 acpi_handle link;
51 u32 index; /* GSI, or link _CRS index */
52};
53
54static LIST_HEAD(acpi_prt_list);
45static DEFINE_SPINLOCK(acpi_prt_lock); 55static DEFINE_SPINLOCK(acpi_prt_lock);
46 56
57static inline char pin_name(int pin)
58{
59 return 'A' + pin - 1;
60}
61
47/* -------------------------------------------------------------------------- 62/* --------------------------------------------------------------------------
48 PCI IRQ Routing Table (PRT) Support 63 PCI IRQ Routing Table (PRT) Support
49 -------------------------------------------------------------------------- */ 64 -------------------------------------------------------------------------- */
50 65
51static struct acpi_prt_entry *acpi_pci_irq_find_prt_entry(int segment, 66static struct acpi_prt_entry *acpi_pci_irq_find_prt_entry(struct pci_dev *dev,
52 int bus, 67 int pin)
53 int device, int pin)
54{ 68{
55 struct acpi_prt_entry *entry = NULL; 69 struct acpi_prt_entry *entry;
56 70 int segment = pci_domain_nr(dev->bus);
57 if (!acpi_prt.count) 71 int bus = dev->bus->number;
58 return NULL; 72 int device = PCI_SLOT(dev->devfn);
59 73
60 /*
61 * Parse through all PRT entries looking for a match on the specified
62 * PCI device's segment, bus, device, and pin (don't care about func).
63 *
64 */
65 spin_lock(&acpi_prt_lock); 74 spin_lock(&acpi_prt_lock);
66 list_for_each_entry(entry, &acpi_prt.entries, node) { 75 list_for_each_entry(entry, &acpi_prt_list, list) {
67 if ((segment == entry->id.segment) 76 if ((segment == entry->id.segment)
68 && (bus == entry->id.bus) 77 && (bus == entry->id.bus)
69 && (device == entry->id.device) 78 && (device == entry->id.device)
@@ -72,7 +81,6 @@ static struct acpi_prt_entry *acpi_pci_irq_find_prt_entry(int segment,
72 return entry; 81 return entry;
73 } 82 }
74 } 83 }
75
76 spin_unlock(&acpi_prt_lock); 84 spin_unlock(&acpi_prt_lock);
77 return NULL; 85 return NULL;
78} 86}
@@ -124,25 +132,27 @@ struct prt_quirk {
124 char *actual_source; 132 char *actual_source;
125}; 133};
126 134
135#define PCI_INTX_PIN(c) (c - 'A' + 1)
136
127/* 137/*
128 * These systems have incorrect _PRT entries. The BIOS claims the PCI 138 * These systems have incorrect _PRT entries. The BIOS claims the PCI
129 * interrupt at the listed segment/bus/device/pin is connected to the first 139 * interrupt at the listed segment/bus/device/pin is connected to the first
130 * link device, but it is actually connected to the second. 140 * link device, but it is actually connected to the second.
131 */ 141 */
132static struct prt_quirk prt_quirks[] = { 142static struct prt_quirk prt_quirks[] = {
133 { medion_md9580, 0, 0, 9, 'A', 143 { medion_md9580, 0, 0, 9, PCI_INTX_PIN('A'),
134 "\\_SB_.PCI0.ISA_.LNKA", 144 "\\_SB_.PCI0.ISA_.LNKA",
135 "\\_SB_.PCI0.ISA_.LNKB"}, 145 "\\_SB_.PCI0.ISA_.LNKB"},
136 { dell_optiplex, 0, 0, 0xd, 'A', 146 { dell_optiplex, 0, 0, 0xd, PCI_INTX_PIN('A'),
137 "\\_SB_.LNKB", 147 "\\_SB_.LNKB",
138 "\\_SB_.LNKA"}, 148 "\\_SB_.LNKA"},
139 { hp_t5710, 0, 0, 1, 'A', 149 { hp_t5710, 0, 0, 1, PCI_INTX_PIN('A'),
140 "\\_SB_.PCI0.LNK1", 150 "\\_SB_.PCI0.LNK1",
141 "\\_SB_.PCI0.LNK3"}, 151 "\\_SB_.PCI0.LNK3"},
142}; 152};
143 153
144static void 154static void do_prt_fixups(struct acpi_prt_entry *entry,
145do_prt_fixups(struct acpi_prt_entry *entry, struct acpi_pci_routing_table *prt) 155 struct acpi_pci_routing_table *prt)
146{ 156{
147 int i; 157 int i;
148 struct prt_quirk *quirk; 158 struct prt_quirk *quirk;
@@ -158,42 +168,43 @@ do_prt_fixups(struct acpi_prt_entry *entry, struct acpi_pci_routing_table *prt)
158 entry->id.segment == quirk->segment && 168 entry->id.segment == quirk->segment &&
159 entry->id.bus == quirk->bus && 169 entry->id.bus == quirk->bus &&
160 entry->id.device == quirk->device && 170 entry->id.device == quirk->device &&
161 entry->pin + 'A' == quirk->pin && 171 entry->pin == quirk->pin &&
162 !strcmp(prt->source, quirk->source) && 172 !strcmp(prt->source, quirk->source) &&
163 strlen(prt->source) >= strlen(quirk->actual_source)) { 173 strlen(prt->source) >= strlen(quirk->actual_source)) {
164 printk(KERN_WARNING PREFIX "firmware reports " 174 printk(KERN_WARNING PREFIX "firmware reports "
165 "%04x:%02x:%02x PCI INT %c connected to %s; " 175 "%04x:%02x:%02x PCI INT %c connected to %s; "
166 "changing to %s\n", 176 "changing to %s\n",
167 entry->id.segment, entry->id.bus, 177 entry->id.segment, entry->id.bus,
168 entry->id.device, 'A' + entry->pin, 178 entry->id.device, pin_name(entry->pin),
169 prt->source, quirk->actual_source); 179 prt->source, quirk->actual_source);
170 strcpy(prt->source, quirk->actual_source); 180 strcpy(prt->source, quirk->actual_source);
171 } 181 }
172 } 182 }
173} 183}
174 184
175static int 185static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
176acpi_pci_irq_add_entry(acpi_handle handle, 186 struct acpi_pci_routing_table *prt)
177 int segment, int bus, struct acpi_pci_routing_table *prt)
178{ 187{
179 struct acpi_prt_entry *entry = NULL; 188 struct acpi_prt_entry *entry;
180
181
182 if (!prt)
183 return -EINVAL;
184 189
185 entry = kzalloc(sizeof(struct acpi_prt_entry), GFP_KERNEL); 190 entry = kzalloc(sizeof(struct acpi_prt_entry), GFP_KERNEL);
186 if (!entry) 191 if (!entry)
187 return -ENOMEM; 192 return -ENOMEM;
188 193
194 /*
195 * Note that the _PRT uses 0=INTA, 1=INTB, etc, while PCI uses
196 * 1=INTA, 2=INTB. We use the PCI encoding throughout, so convert
197 * it here.
198 */
189 entry->id.segment = segment; 199 entry->id.segment = segment;
190 entry->id.bus = bus; 200 entry->id.bus = bus;
191 entry->id.device = (prt->address >> 16) & 0xFFFF; 201 entry->id.device = (prt->address >> 16) & 0xFFFF;
192 entry->id.function = prt->address & 0xFFFF; 202 entry->pin = prt->pin + 1;
193 entry->pin = prt->pin;
194 203
195 do_prt_fixups(entry, prt); 204 do_prt_fixups(entry, prt);
196 205
206 entry->index = prt->source_index;
207
197 /* 208 /*
198 * Type 1: Dynamic 209 * Type 1: Dynamic
199 * --------------- 210 * ---------------
@@ -207,10 +218,9 @@ acpi_pci_irq_add_entry(acpi_handle handle,
207 * (e.g. exists somewhere 'below' this _PRT entry in the ACPI 218 * (e.g. exists somewhere 'below' this _PRT entry in the ACPI
208 * namespace). 219 * namespace).
209 */ 220 */
210 if (prt->source[0]) { 221 if (prt->source[0])
211 acpi_get_handle(handle, prt->source, &entry->link.handle); 222 acpi_get_handle(handle, prt->source, &entry->link);
212 entry->link.index = prt->source_index; 223
213 }
214 /* 224 /*
215 * Type 2: Static 225 * Type 2: Static
216 * -------------- 226 * --------------
@@ -218,84 +228,38 @@ acpi_pci_irq_add_entry(acpi_handle handle,
218 * the IRQ value, which is hardwired to specific interrupt inputs on 228 * the IRQ value, which is hardwired to specific interrupt inputs on
219 * the interrupt controller. 229 * the interrupt controller.
220 */ 230 */
221 else
222 entry->link.index = prt->source_index;
223 231
224 ACPI_DEBUG_PRINT_RAW((ACPI_DB_INFO, 232 ACPI_DEBUG_PRINT_RAW((ACPI_DB_INFO,
225 " %02X:%02X:%02X[%c] -> %s[%d]\n", 233 " %04x:%02x:%02x[%c] -> %s[%d]\n",
226 entry->id.segment, entry->id.bus, 234 entry->id.segment, entry->id.bus,
227 entry->id.device, ('A' + entry->pin), prt->source, 235 entry->id.device, pin_name(entry->pin),
228 entry->link.index)); 236 prt->source, entry->index));
229 237
230 spin_lock(&acpi_prt_lock); 238 spin_lock(&acpi_prt_lock);
231 list_add_tail(&entry->node, &acpi_prt.entries); 239 list_add_tail(&entry->list, &acpi_prt_list);
232 acpi_prt.count++;
233 spin_unlock(&acpi_prt_lock); 240 spin_unlock(&acpi_prt_lock);
234 241
235 return 0; 242 return 0;
236} 243}
237 244
238static void
239acpi_pci_irq_del_entry(int segment, int bus, struct acpi_prt_entry *entry)
240{
241 if (segment == entry->id.segment && bus == entry->id.bus) {
242 acpi_prt.count--;
243 list_del(&entry->node);
244 kfree(entry);
245 }
246}
247
248int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus) 245int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
249{ 246{
250 acpi_status status = AE_OK; 247 acpi_status status;
251 char *pathname = NULL; 248 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
252 struct acpi_buffer buffer = { 0, NULL }; 249 struct acpi_pci_routing_table *entry;
253 struct acpi_pci_routing_table *prt = NULL;
254 struct acpi_pci_routing_table *entry = NULL;
255 static int first_time = 1;
256
257
258 pathname = kzalloc(ACPI_PATHNAME_MAX, GFP_KERNEL);
259 if (!pathname)
260 return -ENOMEM;
261
262 if (first_time) {
263 acpi_prt.count = 0;
264 INIT_LIST_HEAD(&acpi_prt.entries);
265 first_time = 0;
266 }
267
268 /*
269 * NOTE: We're given a 'handle' to the _PRT object's parent device
270 * (either a PCI root bridge or PCI-PCI bridge).
271 */
272 250
273 buffer.length = ACPI_PATHNAME_MAX; 251 /* 'handle' is the _PRT's parent (root bridge or PCI-PCI bridge) */
274 buffer.pointer = pathname; 252 status = acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
275 acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); 253 if (ACPI_FAILURE(status))
254 return -ENODEV;
276 255
277 printk(KERN_DEBUG "ACPI: PCI Interrupt Routing Table [%s._PRT]\n", 256 printk(KERN_DEBUG "ACPI: PCI Interrupt Routing Table [%s._PRT]\n",
278 pathname); 257 (char *) buffer.pointer);
279 258
280 /* 259 kfree(buffer.pointer);
281 * Evaluate this _PRT and add its entries to our global list (acpi_prt).
282 */
283 260
284 buffer.length = 0; 261 buffer.length = ACPI_ALLOCATE_BUFFER;
285 buffer.pointer = NULL; 262 buffer.pointer = NULL;
286 kfree(pathname);
287 status = acpi_get_irq_routing_table(handle, &buffer);
288 if (status != AE_BUFFER_OVERFLOW) {
289 ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PRT [%s]",
290 acpi_format_exception(status)));
291 return -ENODEV;
292 }
293
294 prt = kzalloc(buffer.length, GFP_KERNEL);
295 if (!prt) {
296 return -ENOMEM;
297 }
298 buffer.pointer = prt;
299 263
300 status = acpi_get_irq_routing_table(handle, &buffer); 264 status = acpi_get_irq_routing_table(handle, &buffer);
301 if (ACPI_FAILURE(status)) { 265 if (ACPI_FAILURE(status)) {
@@ -305,36 +269,30 @@ int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
305 return -ENODEV; 269 return -ENODEV;
306 } 270 }
307 271
308 entry = prt; 272 entry = buffer.pointer;
309
310 while (entry && (entry->length > 0)) { 273 while (entry && (entry->length > 0)) {
311 acpi_pci_irq_add_entry(handle, segment, bus, entry); 274 acpi_pci_irq_add_entry(handle, segment, bus, entry);
312 entry = (struct acpi_pci_routing_table *) 275 entry = (struct acpi_pci_routing_table *)
313 ((unsigned long)entry + entry->length); 276 ((unsigned long)entry + entry->length);
314 } 277 }
315 278
316 kfree(prt); 279 kfree(buffer.pointer);
317
318 return 0; 280 return 0;
319} 281}
320 282
321void acpi_pci_irq_del_prt(int segment, int bus) 283void acpi_pci_irq_del_prt(int segment, int bus)
322{ 284{
323 struct list_head *node = NULL, *n = NULL; 285 struct acpi_prt_entry *entry, *tmp;
324 struct acpi_prt_entry *entry = NULL;
325
326 if (!acpi_prt.count) {
327 return;
328 }
329 286
330 printk(KERN_DEBUG 287 printk(KERN_DEBUG
331 "ACPI: Delete PCI Interrupt Routing Table for %x:%x\n", segment, 288 "ACPI: Delete PCI Interrupt Routing Table for %04x:%02x\n",
332 bus); 289 segment, bus);
333 spin_lock(&acpi_prt_lock); 290 spin_lock(&acpi_prt_lock);
334 list_for_each_safe(node, n, &acpi_prt.entries) { 291 list_for_each_entry_safe(entry, tmp, &acpi_prt_list, list) {
335 entry = list_entry(node, struct acpi_prt_entry, node); 292 if (segment == entry->id.segment && bus == entry->id.bus) {
336 293 list_del(&entry->list);
337 acpi_pci_irq_del_entry(segment, bus, entry); 294 kfree(entry);
295 }
338 } 296 }
339 spin_unlock(&acpi_prt_lock); 297 spin_unlock(&acpi_prt_lock);
340} 298}
@@ -342,162 +300,26 @@ void acpi_pci_irq_del_prt(int segment, int bus)
342/* -------------------------------------------------------------------------- 300/* --------------------------------------------------------------------------
343 PCI Interrupt Routing Support 301 PCI Interrupt Routing Support
344 -------------------------------------------------------------------------- */ 302 -------------------------------------------------------------------------- */
345typedef int (*irq_lookup_func) (struct acpi_prt_entry *, int *, int *, char **); 303static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
346
347static int
348acpi_pci_allocate_irq(struct acpi_prt_entry *entry,
349 int *triggering, int *polarity, char **link)
350{
351 int irq;
352
353
354 if (entry->link.handle) {
355 irq = acpi_pci_link_allocate_irq(entry->link.handle,
356 entry->link.index, triggering,
357 polarity, link);
358 if (irq < 0) {
359 printk(KERN_WARNING PREFIX
360 "Invalid IRQ link routing entry\n");
361 return -1;
362 }
363 } else {
364 irq = entry->link.index;
365 *triggering = ACPI_LEVEL_SENSITIVE;
366 *polarity = ACPI_ACTIVE_LOW;
367 }
368
369 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found IRQ %d\n", irq));
370 return irq;
371}
372
373static int
374acpi_pci_free_irq(struct acpi_prt_entry *entry,
375 int *triggering, int *polarity, char **link)
376{
377 int irq;
378
379 if (entry->link.handle) {
380 irq = acpi_pci_link_free_irq(entry->link.handle);
381 } else {
382 irq = entry->link.index;
383 }
384 return irq;
385}
386
387#ifdef CONFIG_X86_IO_APIC
388extern int noioapicquirk;
389
390static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
391{ 304{
392 struct pci_bus *bus_it; 305 struct acpi_prt_entry *entry;
393 306 struct pci_dev *bridge;
394 for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) { 307 u8 bridge_pin, orig_pin = pin;
395 if (!bus_it->self) 308
396 return 0; 309 entry = acpi_pci_irq_find_prt_entry(dev, pin);
397 310 if (entry) {
398 printk(KERN_INFO "vendor=%04x device=%04x\n", bus_it->self->vendor, 311 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n",
399 bus_it->self->device); 312 pci_name(dev), pin_name(pin)));
400 313 return entry;
401 if (bus_it->self->irq_reroute_variant)
402 return bus_it->self->irq_reroute_variant;
403 }
404 return 0;
405}
406#endif /* CONFIG_X86_IO_APIC */
407
408/*
409 * acpi_pci_irq_lookup
410 * success: return IRQ >= 0
411 * failure: return -1
412 */
413static int
414acpi_pci_irq_lookup(struct pci_bus *bus,
415 int device,
416 int pin,
417 int *triggering,
418 int *polarity, char **link, irq_lookup_func func)
419{
420 struct acpi_prt_entry *entry = NULL;
421 int segment = pci_domain_nr(bus);
422 int bus_nr = bus->number;
423 int ret;
424
425
426 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
427 "Searching for PRT entry for %02x:%02x:%02x[%c]\n",
428 segment, bus_nr, device, ('A' + pin)));
429
430 entry = acpi_pci_irq_find_prt_entry(segment, bus_nr, device, pin);
431 if (!entry) {
432 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "PRT entry not found\n"));
433 return -1;
434 }
435
436 ret = func(entry, triggering, polarity, link);
437
438#ifdef CONFIG_X86_IO_APIC
439 /*
440 * Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the
441 * IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel
442 * does during interrupt handling). When this INTx generation cannot be
443 * disabled, we reroute these interrupts to their legacy equivalent to
444 * get rid of spurious interrupts.
445 */
446 if (!noioapicquirk) {
447 switch (bridge_has_boot_interrupt_variant(bus)) {
448 case 0:
449 /* no rerouting necessary */
450 break;
451
452 case INTEL_IRQ_REROUTE_VARIANT:
453 /*
454 * Remap according to INTx routing table in 6700PXH
455 * specs, intel order number 302628-002, section
456 * 2.15.2. Other chipsets (80332, ...) have the same
457 * mapping and are handled here as well.
458 */
459 printk(KERN_INFO "pci irq %d -> rerouted to legacy "
460 "irq %d\n", ret, (ret % 4) + 16);
461 ret = (ret % 4) + 16;
462 break;
463
464 default:
465 printk(KERN_INFO "not rerouting irq %d to legacy irq: "
466 "unknown mapping\n", ret);
467 break;
468 }
469 } 314 }
470#endif /* CONFIG_X86_IO_APIC */
471
472 return ret;
473}
474
475/*
476 * acpi_pci_irq_derive
477 * success: return IRQ >= 0
478 * failure: return < 0
479 */
480static int
481acpi_pci_irq_derive(struct pci_dev *dev,
482 int pin,
483 int *triggering,
484 int *polarity, char **link, irq_lookup_func func)
485{
486 struct pci_dev *bridge = dev;
487 int irq = -1;
488 u8 bridge_pin = 0, orig_pin = pin;
489
490
491 if (!dev)
492 return -EINVAL;
493 315
494 /* 316 /*
495 * Attempt to derive an IRQ for this device from a parent bridge's 317 * Attempt to derive an IRQ for this device from a parent bridge's
496 * PCI interrupt routing entry (eg. yenta bridge and add-in card bridge). 318 * PCI interrupt routing entry (eg. yenta bridge and add-in card bridge).
497 */ 319 */
498 while (irq < 0 && bridge->bus->self) { 320 bridge = dev->bus->self;
499 pin = (pin + PCI_SLOT(bridge->devfn)) % 4; 321 while (bridge) {
500 bridge = bridge->bus->self; 322 pin = (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1;
501 323
502 if ((bridge->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS) { 324 if ((bridge->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS) {
503 /* PC card has the same IRQ as its cardbridge */ 325 /* PC card has the same IRQ as its cardbridge */
@@ -506,50 +328,40 @@ acpi_pci_irq_derive(struct pci_dev *dev,
506 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 328 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
507 "No interrupt pin configured for device %s\n", 329 "No interrupt pin configured for device %s\n",
508 pci_name(bridge))); 330 pci_name(bridge)));
509 return -1; 331 return NULL;
510 } 332 }
511 /* Pin is from 0 to 3 */
512 bridge_pin--;
513 pin = bridge_pin; 333 pin = bridge_pin;
514 } 334 }
515 335
516 irq = acpi_pci_irq_lookup(bridge->bus, PCI_SLOT(bridge->devfn), 336 entry = acpi_pci_irq_find_prt_entry(bridge, pin);
517 pin, triggering, polarity, 337 if (entry) {
518 link, func); 338 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
519 } 339 "Derived GSI for %s INT %c from %s\n",
340 pci_name(dev), pin_name(orig_pin),
341 pci_name(bridge)));
342 return entry;
343 }
520 344
521 if (irq < 0) { 345 dev = bridge;
522 dev_warn(&dev->dev, "can't derive routing for PCI INT %c\n", 346 bridge = dev->bus->self;
523 'A' + orig_pin);
524 return -1;
525 } 347 }
526 348
527 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Derive IRQ %d for device %s from %s\n", 349 dev_warn(&dev->dev, "can't derive routing for PCI INT %c\n",
528 irq, pci_name(dev), pci_name(bridge))); 350 pin_name(orig_pin));
529 351 return NULL;
530 return irq;
531} 352}
532 353
533/*
534 * acpi_pci_irq_enable
535 * success: return 0
536 * failure: return < 0
537 */
538
539int acpi_pci_irq_enable(struct pci_dev *dev) 354int acpi_pci_irq_enable(struct pci_dev *dev)
540{ 355{
541 int irq = 0; 356 struct acpi_prt_entry *entry;
542 u8 pin = 0; 357 int gsi;
358 u8 pin;
543 int triggering = ACPI_LEVEL_SENSITIVE; 359 int triggering = ACPI_LEVEL_SENSITIVE;
544 int polarity = ACPI_ACTIVE_LOW; 360 int polarity = ACPI_ACTIVE_LOW;
545 char *link = NULL; 361 char *link = NULL;
546 char link_desc[16]; 362 char link_desc[16];
547 int rc; 363 int rc;
548 364
549
550 if (!dev)
551 return -EINVAL;
552
553 pin = dev->pin; 365 pin = dev->pin;
554 if (!pin) { 366 if (!pin) {
555 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 367 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
@@ -557,31 +369,9 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
557 pci_name(dev))); 369 pci_name(dev)));
558 return 0; 370 return 0;
559 } 371 }
560 pin--;
561
562 if (!dev->bus) {
563 dev_err(&dev->dev, "invalid (NULL) 'bus' field\n");
564 return -ENODEV;
565 }
566
567 /*
568 * First we check the PCI IRQ routing table (PRT) for an IRQ. PRT
569 * values override any BIOS-assigned IRQs set during boot.
570 */
571 irq = acpi_pci_irq_lookup(dev->bus, PCI_SLOT(dev->devfn), pin,
572 &triggering, &polarity, &link,
573 acpi_pci_allocate_irq);
574
575 /*
576 * If no PRT entry was found, we'll try to derive an IRQ from the
577 * device's parent bridge.
578 */
579 if (irq < 0)
580 irq = acpi_pci_irq_derive(dev, pin, &triggering,
581 &polarity, &link,
582 acpi_pci_allocate_irq);
583 372
584 if (irq < 0) { 373 entry = acpi_pci_irq_lookup(dev, pin);
374 if (!entry) {
585 /* 375 /*
586 * IDE legacy mode controller IRQs are magic. Why do compat 376 * IDE legacy mode controller IRQs are magic. Why do compat
587 * extensions always make such a nasty mess. 377 * extensions always make such a nasty mess.
@@ -590,12 +380,24 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
590 (dev->class & 0x05) == 0) 380 (dev->class & 0x05) == 0)
591 return 0; 381 return 0;
592 } 382 }
383
384 if (entry) {
385 if (entry->link)
386 gsi = acpi_pci_link_allocate_irq(entry->link,
387 entry->index,
388 &triggering, &polarity,
389 &link);
390 else
391 gsi = entry->index;
392 } else
393 gsi = -1;
394
593 /* 395 /*
594 * No IRQ known to the ACPI subsystem - maybe the BIOS / 396 * No IRQ known to the ACPI subsystem - maybe the BIOS /
595 * driver reported one, then use it. Exit in any case. 397 * driver reported one, then use it. Exit in any case.
596 */ 398 */
597 if (irq < 0) { 399 if (gsi < 0) {
598 dev_warn(&dev->dev, "PCI INT %c: no GSI", 'A' + pin); 400 dev_warn(&dev->dev, "PCI INT %c: no GSI", pin_name(pin));
599 /* Interrupt Line values above 0xF are forbidden */ 401 /* Interrupt Line values above 0xF are forbidden */
600 if (dev->irq > 0 && (dev->irq <= 0xF)) { 402 if (dev->irq > 0 && (dev->irq <= 0xF)) {
601 printk(" - using IRQ %d\n", dev->irq); 403 printk(" - using IRQ %d\n", dev->irq);
@@ -608,10 +410,10 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
608 } 410 }
609 } 411 }
610 412
611 rc = acpi_register_gsi(irq, triggering, polarity); 413 rc = acpi_register_gsi(gsi, triggering, polarity);
612 if (rc < 0) { 414 if (rc < 0) {
613 dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n", 415 dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n",
614 'A' + pin); 416 pin_name(pin));
615 return rc; 417 return rc;
616 } 418 }
617 dev->irq = rc; 419 dev->irq = rc;
@@ -622,7 +424,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
622 link_desc[0] = '\0'; 424 link_desc[0] = '\0';
623 425
624 dev_info(&dev->dev, "PCI INT %c%s -> GSI %u (%s, %s) -> IRQ %d\n", 426 dev_info(&dev->dev, "PCI INT %c%s -> GSI %u (%s, %s) -> IRQ %d\n",
625 'A' + pin, link_desc, irq, 427 pin_name(pin), link_desc, gsi,
626 (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge", 428 (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge",
627 (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq); 429 (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq);
628 430
@@ -636,42 +438,28 @@ void __attribute__ ((weak)) acpi_unregister_gsi(u32 i)
636 438
637void acpi_pci_irq_disable(struct pci_dev *dev) 439void acpi_pci_irq_disable(struct pci_dev *dev)
638{ 440{
639 int gsi = 0; 441 struct acpi_prt_entry *entry;
640 u8 pin = 0; 442 int gsi;
641 int triggering = ACPI_LEVEL_SENSITIVE; 443 u8 pin;
642 int polarity = ACPI_ACTIVE_LOW;
643
644
645 if (!dev || !dev->bus)
646 return;
647 444
648 pin = dev->pin; 445 pin = dev->pin;
649 if (!pin) 446 if (!pin)
650 return; 447 return;
651 pin--;
652 448
653 /* 449 entry = acpi_pci_irq_lookup(dev, pin);
654 * First we check the PCI IRQ routing table (PRT) for an IRQ. 450 if (!entry)
655 */
656 gsi = acpi_pci_irq_lookup(dev->bus, PCI_SLOT(dev->devfn), pin,
657 &triggering, &polarity, NULL,
658 acpi_pci_free_irq);
659 /*
660 * If no PRT entry was found, we'll try to derive an IRQ from the
661 * device's parent bridge.
662 */
663 if (gsi < 0)
664 gsi = acpi_pci_irq_derive(dev, pin,
665 &triggering, &polarity, NULL,
666 acpi_pci_free_irq);
667 if (gsi < 0)
668 return; 451 return;
669 452
453 if (entry->link)
454 gsi = acpi_pci_link_free_irq(entry->link);
455 else
456 gsi = entry->index;
457
670 /* 458 /*
671 * TBD: It might be worth clearing dev->irq by magic constant 459 * TBD: It might be worth clearing dev->irq by magic constant
672 * (e.g. PCI_UNDEFINED_IRQ). 460 * (e.g. PCI_UNDEFINED_IRQ).
673 */ 461 */
674 462
675 dev_info(&dev->dev, "PCI INT %c disabled\n", 'A' + pin); 463 dev_info(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
676 acpi_unregister_gsi(gsi); 464 acpi_unregister_gsi(gsi);
677} 465}
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index e52ad91ce2dc..1c6e73c7865e 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -796,10 +796,6 @@ static int irqrouter_resume(struct sys_device *dev)
796 struct list_head *node = NULL; 796 struct list_head *node = NULL;
797 struct acpi_pci_link *link = NULL; 797 struct acpi_pci_link *link = NULL;
798 798
799
800 /* Make sure SCI is enabled again (Apple firmware bug?) */
801 acpi_set_register(ACPI_BITREG_SCI_ENABLE, 1);
802
803 list_for_each(node, &acpi_link.entries) { 799 list_for_each(node, &acpi_link.entries) {
804 link = list_entry(node, struct acpi_pci_link, node); 800 link = list_entry(node, struct acpi_pci_link, node);
805 if (!link) { 801 if (!link) {
@@ -912,7 +908,7 @@ static int __init acpi_irq_nobalance_set(char *str)
912 908
913__setup("acpi_irq_nobalance", acpi_irq_nobalance_set); 909__setup("acpi_irq_nobalance", acpi_irq_nobalance_set);
914 910
915int __init acpi_irq_balance_set(char *str) 911static int __init acpi_irq_balance_set(char *str)
916{ 912{
917 acpi_irq_balance = 1; 913 acpi_irq_balance = 1;
918 return 1; 914 return 1;
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index bb7d50dd2818..c926e7d4a0d6 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -139,6 +139,8 @@ static int acpi_power_get_state(acpi_handle handle, int *state)
139{ 139{
140 acpi_status status = AE_OK; 140 acpi_status status = AE_OK;
141 unsigned long long sta = 0; 141 unsigned long long sta = 0;
142 char node_name[5];
143 struct acpi_buffer buffer = { sizeof(node_name), node_name };
142 144
143 145
144 if (!handle || !state) 146 if (!handle || !state)
@@ -151,8 +153,10 @@ static int acpi_power_get_state(acpi_handle handle, int *state)
151 *state = (sta & 0x01)?ACPI_POWER_RESOURCE_STATE_ON: 153 *state = (sta & 0x01)?ACPI_POWER_RESOURCE_STATE_ON:
152 ACPI_POWER_RESOURCE_STATE_OFF; 154 ACPI_POWER_RESOURCE_STATE_OFF;
153 155
156 acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer);
157
154 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Resource [%s] is %s\n", 158 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Resource [%s] is %s\n",
155 acpi_ut_get_node_name(handle), 159 node_name,
156 *state ? "on" : "off")); 160 *state ? "on" : "off"));
157 161
158 return 0; 162 return 0;
diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/proc.c
index 4dbc2271acf5..428c911dba08 100644
--- a/drivers/acpi/sleep/proc.c
+++ b/drivers/acpi/proc.c
@@ -28,8 +28,6 @@ static int acpi_system_sleep_seq_show(struct seq_file *seq, void *offset)
28{ 28{
29 int i; 29 int i;
30 30
31 ACPI_FUNCTION_TRACE("acpi_system_sleep_seq_show");
32
33 for (i = 0; i <= ACPI_STATE_S5; i++) { 31 for (i = 0; i <= ACPI_STATE_S5; i++) {
34 if (sleep_states[i]) { 32 if (sleep_states[i]) {
35 seq_printf(seq, "S%d ", i); 33 seq_printf(seq, "S%d ", i);
@@ -86,49 +84,44 @@ acpi_system_write_sleep(struct file *file,
86 84
87#ifdef HAVE_ACPI_LEGACY_ALARM 85#ifdef HAVE_ACPI_LEGACY_ALARM
88 86
87static u32 cmos_bcd_read(int offset, int rtc_control);
88
89static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset) 89static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset)
90{ 90{
91 u32 sec, min, hr; 91 u32 sec, min, hr;
92 u32 day, mo, yr, cent = 0; 92 u32 day, mo, yr, cent = 0;
93 u32 today = 0;
93 unsigned char rtc_control = 0; 94 unsigned char rtc_control = 0;
94 unsigned long flags; 95 unsigned long flags;
95 96
96 ACPI_FUNCTION_TRACE("acpi_system_alarm_seq_show");
97
98 spin_lock_irqsave(&rtc_lock, flags); 97 spin_lock_irqsave(&rtc_lock, flags);
99 98
100 sec = CMOS_READ(RTC_SECONDS_ALARM);
101 min = CMOS_READ(RTC_MINUTES_ALARM);
102 hr = CMOS_READ(RTC_HOURS_ALARM);
103 rtc_control = CMOS_READ(RTC_CONTROL); 99 rtc_control = CMOS_READ(RTC_CONTROL);
100 sec = cmos_bcd_read(RTC_SECONDS_ALARM, rtc_control);
101 min = cmos_bcd_read(RTC_MINUTES_ALARM, rtc_control);
102 hr = cmos_bcd_read(RTC_HOURS_ALARM, rtc_control);
104 103
105 /* If we ever get an FACP with proper values... */ 104 /* If we ever get an FACP with proper values... */
106 if (acpi_gbl_FADT.day_alarm) 105 if (acpi_gbl_FADT.day_alarm) {
107 /* ACPI spec: only low 6 its should be cared */ 106 /* ACPI spec: only low 6 its should be cared */
108 day = CMOS_READ(acpi_gbl_FADT.day_alarm) & 0x3F; 107 day = CMOS_READ(acpi_gbl_FADT.day_alarm) & 0x3F;
109 else 108 if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
110 day = CMOS_READ(RTC_DAY_OF_MONTH); 109 day = bcd2bin(day);
110 } else
111 day = cmos_bcd_read(RTC_DAY_OF_MONTH, rtc_control);
111 if (acpi_gbl_FADT.month_alarm) 112 if (acpi_gbl_FADT.month_alarm)
112 mo = CMOS_READ(acpi_gbl_FADT.month_alarm); 113 mo = cmos_bcd_read(acpi_gbl_FADT.month_alarm, rtc_control);
113 else 114 else {
114 mo = CMOS_READ(RTC_MONTH); 115 mo = cmos_bcd_read(RTC_MONTH, rtc_control);
116 today = cmos_bcd_read(RTC_DAY_OF_MONTH, rtc_control);
117 }
115 if (acpi_gbl_FADT.century) 118 if (acpi_gbl_FADT.century)
116 cent = CMOS_READ(acpi_gbl_FADT.century); 119 cent = cmos_bcd_read(acpi_gbl_FADT.century, rtc_control);
117 120
118 yr = CMOS_READ(RTC_YEAR); 121 yr = cmos_bcd_read(RTC_YEAR, rtc_control);
119 122
120 spin_unlock_irqrestore(&rtc_lock, flags); 123 spin_unlock_irqrestore(&rtc_lock, flags);
121 124
122 if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
123 sec = bcd2bin(sec);
124 min = bcd2bin(min);
125 hr = bcd2bin(hr);
126 day = bcd2bin(day);
127 mo = bcd2bin(mo);
128 yr = bcd2bin(yr);
129 cent = bcd2bin(cent);
130 }
131
132 /* we're trusting the FADT (see above) */ 125 /* we're trusting the FADT (see above) */
133 if (!acpi_gbl_FADT.century) 126 if (!acpi_gbl_FADT.century)
134 /* If we're not trusting the FADT, we should at least make it 127 /* If we're not trusting the FADT, we should at least make it
@@ -153,6 +146,20 @@ static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset)
153 else 146 else
154 yr += cent * 100; 147 yr += cent * 100;
155 148
149 /*
150 * Show correct dates for alarms up to a month into the future.
151 * This solves issues for nearly all situations with the common
152 * 30-day alarm clocks in PC hardware.
153 */
154 if (day < today) {
155 if (mo < 12) {
156 mo += 1;
157 } else {
158 mo = 1;
159 yr += 1;
160 }
161 }
162
156 seq_printf(seq, "%4.4u-", yr); 163 seq_printf(seq, "%4.4u-", yr);
157 (mo > 12) ? seq_puts(seq, "**-") : seq_printf(seq, "%2.2u-", mo); 164 (mo > 12) ? seq_puts(seq, "**-") : seq_printf(seq, "%2.2u-", mo);
158 (day > 31) ? seq_puts(seq, "** ") : seq_printf(seq, "%2.2u ", day); 165 (day > 31) ? seq_puts(seq, "** ") : seq_printf(seq, "%2.2u ", day);
@@ -227,13 +234,11 @@ acpi_system_write_alarm(struct file *file,
227 int adjust = 0; 234 int adjust = 0;
228 unsigned char rtc_control = 0; 235 unsigned char rtc_control = 0;
229 236
230 ACPI_FUNCTION_TRACE("acpi_system_write_alarm");
231
232 if (count > sizeof(alarm_string) - 1) 237 if (count > sizeof(alarm_string) - 1)
233 return_VALUE(-EINVAL); 238 return -EINVAL;
234 239
235 if (copy_from_user(alarm_string, buffer, count)) 240 if (copy_from_user(alarm_string, buffer, count))
236 return_VALUE(-EFAULT); 241 return -EFAULT;
237 242
238 alarm_string[count] = '\0'; 243 alarm_string[count] = '\0';
239 244
@@ -334,7 +339,7 @@ acpi_system_write_alarm(struct file *file,
334 339
335 result = 0; 340 result = 0;
336 end: 341 end:
337 return_VALUE(result ? result : count); 342 return result ? result : count;
338} 343}
339#endif /* HAVE_ACPI_LEGACY_ALARM */ 344#endif /* HAVE_ACPI_LEGACY_ALARM */
340 345
diff --git a/drivers/acpi/reboot.c b/drivers/acpi/reboot.c
index a6b662c00b67..93f91142d7ad 100644
--- a/drivers/acpi/reboot.c
+++ b/drivers/acpi/reboot.c
@@ -42,7 +42,7 @@ void acpi_reboot(void)
42 case ACPI_ADR_SPACE_SYSTEM_MEMORY: 42 case ACPI_ADR_SPACE_SYSTEM_MEMORY:
43 case ACPI_ADR_SPACE_SYSTEM_IO: 43 case ACPI_ADR_SPACE_SYSTEM_IO:
44 printk(KERN_DEBUG "ACPI MEMORY or I/O RESET_REG.\n"); 44 printk(KERN_DEBUG "ACPI MEMORY or I/O RESET_REG.\n");
45 acpi_hw_low_level_write(8, reset_value, rr); 45 acpi_reset();
46 break; 46 break;
47 } 47 }
48 /* Wait ten seconds */ 48 /* Wait ten seconds */
diff --git a/drivers/acpi/resources/Makefile b/drivers/acpi/resources/Makefile
deleted file mode 100644
index 8de4f69dfa09..000000000000
--- a/drivers/acpi/resources/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := rsaddr.o rscreate.o rsinfo.o rsio.o rslist.o rsmisc.o rsxface.o \
6 rscalc.o rsirq.o rsmemory.o rsutils.o
7
8obj-$(ACPI_FUTURE_USAGE) += rsdump.o
9
10EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index e53e590252c0..0619734895b2 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -10,7 +10,6 @@
10 10
11#include <acpi/acpi_bus.h> 11#include <acpi/acpi_bus.h>
12#include <acpi/acpi_drivers.h> 12#include <acpi/acpi_drivers.h>
13#include <acpi/actypes.h>
14#include <linux/wait.h> 13#include <linux/wait.h>
15#include <linux/delay.h> 14#include <linux/delay.h>
16#include <linux/interrupt.h> 15#include <linux/interrupt.h>
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 39b7233c3485..c54d7b6c4066 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -10,7 +10,6 @@
10#include <linux/kthread.h> 10#include <linux/kthread.h>
11 11
12#include <acpi/acpi_drivers.h> 12#include <acpi/acpi_drivers.h>
13#include <acpi/acinterp.h> /* for acpi_ex_eisa_id_to_string() */
14 13
15#define _COMPONENT ACPI_BUS_COMPONENT 14#define _COMPONENT ACPI_BUS_COMPONENT
16ACPI_MODULE_NAME("scan"); 15ACPI_MODULE_NAME("scan");
diff --git a/drivers/acpi/sleep/sleep.h b/drivers/acpi/sleep.h
index cfaf8f5b0a14..cfaf8f5b0a14 100644
--- a/drivers/acpi/sleep/sleep.h
+++ b/drivers/acpi/sleep.h
diff --git a/drivers/acpi/sleep/Makefile b/drivers/acpi/sleep/Makefile
deleted file mode 100644
index f1fb888c2d29..000000000000
--- a/drivers/acpi/sleep/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
1obj-y := wakeup.o
2obj-y += main.o
3obj-$(CONFIG_ACPI_SLEEP) += proc.o
4
5EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 6e4107f82403..391d0358a592 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -192,65 +192,6 @@ static struct attribute_group interrupt_stats_attr_group = {
192}; 192};
193static struct kobj_attribute *counter_attrs; 193static struct kobj_attribute *counter_attrs;
194 194
195static int count_num_gpes(void)
196{
197 int count = 0;
198 struct acpi_gpe_xrupt_info *gpe_xrupt_info;
199 struct acpi_gpe_block_info *gpe_block;
200 acpi_cpu_flags flags;
201
202 flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
203
204 gpe_xrupt_info = acpi_gbl_gpe_xrupt_list_head;
205 while (gpe_xrupt_info) {
206 gpe_block = gpe_xrupt_info->gpe_block_list_head;
207 while (gpe_block) {
208 count += gpe_block->register_count *
209 ACPI_GPE_REGISTER_WIDTH;
210 gpe_block = gpe_block->next;
211 }
212 gpe_xrupt_info = gpe_xrupt_info->next;
213 }
214 acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
215
216 return count;
217}
218
219static int get_gpe_device(int index, acpi_handle *handle)
220{
221 struct acpi_gpe_xrupt_info *gpe_xrupt_info;
222 struct acpi_gpe_block_info *gpe_block;
223 acpi_cpu_flags flags;
224 struct acpi_namespace_node *node;
225
226 flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
227
228 gpe_xrupt_info = acpi_gbl_gpe_xrupt_list_head;
229 while (gpe_xrupt_info) {
230 gpe_block = gpe_xrupt_info->gpe_block_list_head;
231 node = gpe_block->node;
232 while (gpe_block) {
233 index -= gpe_block->register_count *
234 ACPI_GPE_REGISTER_WIDTH;
235 if (index < 0) {
236 acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
237 /* return NULL if it's FADT GPE */
238 if (node->type != ACPI_TYPE_DEVICE)
239 *handle = NULL;
240 else
241 *handle = node;
242 return 0;
243 }
244 node = gpe_block->node;
245 gpe_block = gpe_block->next;
246 }
247 gpe_xrupt_info = gpe_xrupt_info->next;
248 }
249 acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
250
251 return -ENODEV;
252}
253
254static void delete_gpe_attr_array(void) 195static void delete_gpe_attr_array(void)
255{ 196{
256 struct event_counter *tmp = all_counters; 197 struct event_counter *tmp = all_counters;
@@ -309,7 +250,7 @@ static int get_status(u32 index, acpi_event_status *status, acpi_handle *handle)
309 goto end; 250 goto end;
310 251
311 if (index < num_gpes) { 252 if (index < num_gpes) {
312 result = get_gpe_device(index, handle); 253 result = acpi_get_gpe_device(index, handle);
313 if (result) { 254 if (result) {
314 ACPI_EXCEPTION((AE_INFO, AE_NOT_FOUND, 255 ACPI_EXCEPTION((AE_INFO, AE_NOT_FOUND,
315 "Invalid GPE 0x%x\n", index)); 256 "Invalid GPE 0x%x\n", index));
@@ -436,7 +377,7 @@ void acpi_irq_stats_init(void)
436 if (all_counters) 377 if (all_counters)
437 return; 378 return;
438 379
439 num_gpes = count_num_gpes(); 380 num_gpes = acpi_current_gpe_count;
440 num_counters = num_gpes + ACPI_NUM_FIXED_EVENTS + NUM_COUNTERS_EXTRA; 381 num_counters = num_gpes + ACPI_NUM_FIXED_EVENTS + NUM_COUNTERS_EXTRA;
441 382
442 all_attrs = kzalloc(sizeof(struct attribute *) * (num_counters + 1), 383 all_attrs = kzalloc(sizeof(struct attribute *) * (num_counters + 1),
diff --git a/drivers/acpi/tables/Makefile b/drivers/acpi/tables/Makefile
deleted file mode 100644
index 7385efa61622..000000000000
--- a/drivers/acpi/tables/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o
6
7EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/utilities/Makefile b/drivers/acpi/utilities/Makefile
deleted file mode 100644
index 88eff14c4894..000000000000
--- a/drivers/acpi/utilities/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for all Linux ACPI interpreter subdirectories
3#
4
5obj-y := utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \
6 utcopy.o utdelete.o utglobal.o utmath.o utobject.o \
7 utstate.o utmutex.o utobject.o utcache.o utresrc.o
8
9EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/utilities/utcache.c b/drivers/acpi/utilities/utcache.c
deleted file mode 100644
index 245fa80cf600..000000000000
--- a/drivers/acpi/utilities/utcache.c
+++ /dev/null
@@ -1,314 +0,0 @@
1/******************************************************************************
2 *
3 * Module Name: utcache - local cache allocation routines
4 *
5 *****************************************************************************/
6
7/*
8 * Copyright (C) 2000 - 2008, Intel Corp.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions, and the following disclaimer,
16 * without modification.
17 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
18 * substantially similar to the "NO WARRANTY" disclaimer below
19 * ("Disclaimer") and any redistribution must be conditioned upon
20 * including a substantially similar Disclaimer requirement for further
21 * binary redistribution.
22 * 3. Neither the names of the above-listed copyright holders nor the names
23 * of any contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * Alternatively, this software may be distributed under the terms of the
27 * GNU General Public License ("GPL") version 2 as published by the Free
28 * Software Foundation.
29 *
30 * NO WARRANTY
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
40 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGES.
42 */
43
44#include <acpi/acpi.h>
45
46#define _COMPONENT ACPI_UTILITIES
47ACPI_MODULE_NAME("utcache")
48#ifdef ACPI_USE_LOCAL_CACHE
49/*******************************************************************************
50 *
51 * FUNCTION: acpi_os_create_cache
52 *
53 * PARAMETERS: cache_name - Ascii name for the cache
54 * object_size - Size of each cached object
55 * max_depth - Maximum depth of the cache (in objects)
56 * return_cache - Where the new cache object is returned
57 *
58 * RETURN: Status
59 *
60 * DESCRIPTION: Create a cache object
61 *
62 ******************************************************************************/
63acpi_status
64acpi_os_create_cache(char *cache_name,
65 u16 object_size,
66 u16 max_depth, struct acpi_memory_list ** return_cache)
67{
68 struct acpi_memory_list *cache;
69
70 ACPI_FUNCTION_ENTRY();
71
72 if (!cache_name || !return_cache || (object_size < 16)) {
73 return (AE_BAD_PARAMETER);
74 }
75
76 /* Create the cache object */
77
78 cache = acpi_os_allocate(sizeof(struct acpi_memory_list));
79 if (!cache) {
80 return (AE_NO_MEMORY);
81 }
82
83 /* Populate the cache object and return it */
84
85 ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list));
86 cache->link_offset = 8;
87 cache->list_name = cache_name;
88 cache->object_size = object_size;
89 cache->max_depth = max_depth;
90
91 *return_cache = cache;
92 return (AE_OK);
93}
94
95/*******************************************************************************
96 *
97 * FUNCTION: acpi_os_purge_cache
98 *
99 * PARAMETERS: Cache - Handle to cache object
100 *
101 * RETURN: Status
102 *
103 * DESCRIPTION: Free all objects within the requested cache.
104 *
105 ******************************************************************************/
106
107acpi_status acpi_os_purge_cache(struct acpi_memory_list * cache)
108{
109 char *next;
110
111 ACPI_FUNCTION_ENTRY();
112
113 if (!cache) {
114 return (AE_BAD_PARAMETER);
115 }
116
117 /* Walk the list of objects in this cache */
118
119 while (cache->list_head) {
120
121 /* Delete and unlink one cached state object */
122
123 next = *(ACPI_CAST_INDIRECT_PTR(char,
124 &(((char *)cache->
125 list_head)[cache->
126 link_offset])));
127 ACPI_FREE(cache->list_head);
128
129 cache->list_head = next;
130 cache->current_depth--;
131 }
132
133 return (AE_OK);
134}
135
136/*******************************************************************************
137 *
138 * FUNCTION: acpi_os_delete_cache
139 *
140 * PARAMETERS: Cache - Handle to cache object
141 *
142 * RETURN: Status
143 *
144 * DESCRIPTION: Free all objects within the requested cache and delete the
145 * cache object.
146 *
147 ******************************************************************************/
148
149acpi_status acpi_os_delete_cache(struct acpi_memory_list * cache)
150{
151 acpi_status status;
152
153 ACPI_FUNCTION_ENTRY();
154
155 /* Purge all objects in the cache */
156
157 status = acpi_os_purge_cache(cache);
158 if (ACPI_FAILURE(status)) {
159 return (status);
160 }
161
162 /* Now we can delete the cache object */
163
164 ACPI_FREE(cache);
165 return (AE_OK);
166}
167
168/*******************************************************************************
169 *
170 * FUNCTION: acpi_os_release_object
171 *
172 * PARAMETERS: Cache - Handle to cache object
173 * Object - The object to be released
174 *
175 * RETURN: None
176 *
177 * DESCRIPTION: Release an object to the specified cache. If cache is full,
178 * the object is deleted.
179 *
180 ******************************************************************************/
181
182acpi_status
183acpi_os_release_object(struct acpi_memory_list * cache, void *object)
184{
185 acpi_status status;
186
187 ACPI_FUNCTION_ENTRY();
188
189 if (!cache || !object) {
190 return (AE_BAD_PARAMETER);
191 }
192
193 /* If cache is full, just free this object */
194
195 if (cache->current_depth >= cache->max_depth) {
196 ACPI_FREE(object);
197 ACPI_MEM_TRACKING(cache->total_freed++);
198 }
199
200 /* Otherwise put this object back into the cache */
201
202 else {
203 status = acpi_ut_acquire_mutex(ACPI_MTX_CACHES);
204 if (ACPI_FAILURE(status)) {
205 return (status);
206 }
207
208 /* Mark the object as cached */
209
210 ACPI_MEMSET(object, 0xCA, cache->object_size);
211 ACPI_SET_DESCRIPTOR_TYPE(object, ACPI_DESC_TYPE_CACHED);
212
213 /* Put the object at the head of the cache list */
214
215 *(ACPI_CAST_INDIRECT_PTR(char,
216 &(((char *)object)[cache->
217 link_offset]))) =
218 cache->list_head;
219 cache->list_head = object;
220 cache->current_depth++;
221
222 (void)acpi_ut_release_mutex(ACPI_MTX_CACHES);
223 }
224
225 return (AE_OK);
226}
227
228/*******************************************************************************
229 *
230 * FUNCTION: acpi_os_acquire_object
231 *
232 * PARAMETERS: Cache - Handle to cache object
233 *
234 * RETURN: the acquired object. NULL on error
235 *
236 * DESCRIPTION: Get an object from the specified cache. If cache is empty,
237 * the object is allocated.
238 *
239 ******************************************************************************/
240
241void *acpi_os_acquire_object(struct acpi_memory_list *cache)
242{
243 acpi_status status;
244 void *object;
245
246 ACPI_FUNCTION_NAME(os_acquire_object);
247
248 if (!cache) {
249 return (NULL);
250 }
251
252 status = acpi_ut_acquire_mutex(ACPI_MTX_CACHES);
253 if (ACPI_FAILURE(status)) {
254 return (NULL);
255 }
256
257 ACPI_MEM_TRACKING(cache->requests++);
258
259 /* Check the cache first */
260
261 if (cache->list_head) {
262
263 /* There is an object available, use it */
264
265 object = cache->list_head;
266 cache->list_head = *(ACPI_CAST_INDIRECT_PTR(char,
267 &(((char *)
268 object)[cache->
269 link_offset])));
270
271 cache->current_depth--;
272
273 ACPI_MEM_TRACKING(cache->hits++);
274 ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
275 "Object %p from %s cache\n", object,
276 cache->list_name));
277
278 status = acpi_ut_release_mutex(ACPI_MTX_CACHES);
279 if (ACPI_FAILURE(status)) {
280 return (NULL);
281 }
282
283 /* Clear (zero) the previously used Object */
284
285 ACPI_MEMSET(object, 0, cache->object_size);
286 } else {
287 /* The cache is empty, create a new object */
288
289 ACPI_MEM_TRACKING(cache->total_allocated++);
290
291#ifdef ACPI_DBG_TRACK_ALLOCATIONS
292 if ((cache->total_allocated - cache->total_freed) >
293 cache->max_occupied) {
294 cache->max_occupied =
295 cache->total_allocated - cache->total_freed;
296 }
297#endif
298
299 /* Avoid deadlock with ACPI_ALLOCATE_ZEROED */
300
301 status = acpi_ut_release_mutex(ACPI_MTX_CACHES);
302 if (ACPI_FAILURE(status)) {
303 return (NULL);
304 }
305
306 object = ACPI_ALLOCATE_ZEROED(cache->object_size);
307 if (!object) {
308 return (NULL);
309 }
310 }
311
312 return (object);
313}
314#endif /* ACPI_USE_LOCAL_CACHE */
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index baa441929720..f261737636da 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -36,6 +36,7 @@
36#include <linux/backlight.h> 36#include <linux/backlight.h>
37#include <linux/thermal.h> 37#include <linux/thermal.h>
38#include <linux/video_output.h> 38#include <linux/video_output.h>
39#include <linux/sort.h>
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40 41
41#include <acpi/acpi_bus.h> 42#include <acpi/acpi_bus.h>
@@ -481,6 +482,7 @@ acpi_video_device_lcd_set_level(struct acpi_video_device *device, int level)
481 int status = AE_OK; 482 int status = AE_OK;
482 union acpi_object arg0 = { ACPI_TYPE_INTEGER }; 483 union acpi_object arg0 = { ACPI_TYPE_INTEGER };
483 struct acpi_object_list args = { 1, &arg0 }; 484 struct acpi_object_list args = { 1, &arg0 };
485 int state;
484 486
485 487
486 arg0.integer.value = level; 488 arg0.integer.value = level;
@@ -489,6 +491,10 @@ acpi_video_device_lcd_set_level(struct acpi_video_device *device, int level)
489 status = acpi_evaluate_object(device->dev->handle, "_BCM", 491 status = acpi_evaluate_object(device->dev->handle, "_BCM",
490 &args, NULL); 492 &args, NULL);
491 device->brightness->curr = level; 493 device->brightness->curr = level;
494 for (state = 2; state < device->brightness->count; state++)
495 if (level == device->brightness->levels[state])
496 device->backlight->props.brightness = state - 2;
497
492 return status; 498 return status;
493} 499}
494 500
@@ -626,6 +632,16 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag)
626} 632}
627 633
628/* 634/*
635 * Simple comparison function used to sort backlight levels.
636 */
637
638static int
639acpi_video_cmp_level(const void *a, const void *b)
640{
641 return *(int *)a - *(int *)b;
642}
643
644/*
629 * Arg: 645 * Arg:
630 * device : video output device (LCD, CRT, ..) 646 * device : video output device (LCD, CRT, ..)
631 * 647 *
@@ -676,6 +692,10 @@ acpi_video_init_brightness(struct acpi_video_device *device)
676 count++; 692 count++;
677 } 693 }
678 694
695 /* don't sort the first two brightness levels */
696 sort(&br->levels[2], count - 2, sizeof(br->levels[2]),
697 acpi_video_cmp_level, NULL);
698
679 if (count < 2) 699 if (count < 2)
680 goto out_free_levels; 700 goto out_free_levels;
681 701
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index f022eb6f5637..50e3d2dbf3af 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL(acpi_video_display_switch_support);
234 * To force that backlight or display output switching is processed by vendor 234 * To force that backlight or display output switching is processed by vendor
235 * specific acpi drivers or video.ko driver. 235 * specific acpi drivers or video.ko driver.
236 */ 236 */
237int __init acpi_backlight(char *str) 237static int __init acpi_backlight(char *str)
238{ 238{
239 if (str == NULL || *str == '\0') 239 if (str == NULL || *str == '\0')
240 return 1; 240 return 1;
@@ -250,7 +250,7 @@ int __init acpi_backlight(char *str)
250} 250}
251__setup("acpi_backlight=", acpi_backlight); 251__setup("acpi_backlight=", acpi_backlight);
252 252
253int __init acpi_display_output(char *str) 253static int __init acpi_display_output(char *str)
254{ 254{
255 if (str == NULL || *str == '\0') 255 if (str == NULL || *str == '\0')
256 return 1; 256 return 1;
diff --git a/drivers/acpi/sleep/wakeup.c b/drivers/acpi/wakeup.c
index dea4c23df764..2d34806d45dd 100644
--- a/drivers/acpi/sleep/wakeup.c
+++ b/drivers/acpi/wakeup.c
@@ -8,7 +8,6 @@
8#include <acpi/acpi_drivers.h> 8#include <acpi/acpi_drivers.h>
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/types.h> 10#include <linux/types.h>
11#include <acpi/acevents.h>
12#include "sleep.h" 11#include "sleep.h"
13 12
14#define _COMPONENT ACPI_SYSTEM_COMPONENT 13#define _COMPONENT ACPI_SYSTEM_COMPONENT
@@ -28,8 +27,6 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state)
28{ 27{
29 struct list_head *node, *next; 28 struct list_head *node, *next;
30 29
31 ACPI_FUNCTION_TRACE("acpi_enable_wakeup_device_prep");
32
33 spin_lock(&acpi_device_lock); 30 spin_lock(&acpi_device_lock);
34 list_for_each_safe(node, next, &acpi_wakeup_device_list) { 31 list_for_each_safe(node, next, &acpi_wakeup_device_list) {
35 struct acpi_device *dev = container_of(node, 32 struct acpi_device *dev = container_of(node,
@@ -61,7 +58,6 @@ void acpi_enable_wakeup_device(u8 sleep_state)
61 * Caution: this routine must be invoked when interrupt is disabled 58 * Caution: this routine must be invoked when interrupt is disabled
62 * Refer ACPI2.0: P212 59 * Refer ACPI2.0: P212
63 */ 60 */
64 ACPI_FUNCTION_TRACE("acpi_enable_wakeup_device");
65 spin_lock(&acpi_device_lock); 61 spin_lock(&acpi_device_lock);
66 list_for_each_safe(node, next, &acpi_wakeup_device_list) { 62 list_for_each_safe(node, next, &acpi_wakeup_device_list) {
67 struct acpi_device *dev = 63 struct acpi_device *dev =
@@ -103,8 +99,6 @@ void acpi_disable_wakeup_device(u8 sleep_state)
103{ 99{
104 struct list_head *node, *next; 100 struct list_head *node, *next;
105 101
106 ACPI_FUNCTION_TRACE("acpi_disable_wakeup_device");
107
108 spin_lock(&acpi_device_lock); 102 spin_lock(&acpi_device_lock);
109 list_for_each_safe(node, next, &acpi_wakeup_device_list) { 103 list_for_each_safe(node, next, &acpi_wakeup_device_list) {
110 struct acpi_device *dev = 104 struct acpi_device *dev =
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index ef02e488d468..6273d98d00eb 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -19,12 +19,6 @@
19#include "libata.h" 19#include "libata.h"
20 20
21#include <acpi/acpi_bus.h> 21#include <acpi/acpi_bus.h>
22#include <acpi/acnames.h>
23#include <acpi/acnamesp.h>
24#include <acpi/acparser.h>
25#include <acpi/acexcep.h>
26#include <acpi/acmacros.h>
27#include <acpi/actypes.h>
28 22
29enum { 23enum {
30 ATA_ACPI_FILTER_SETXFER = 1 << 0, 24 ATA_ACPI_FILTER_SETXFER = 1 << 0,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 175df54eb664..c507a9ac78f4 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4556,7 +4556,7 @@ void ata_sg_clean(struct ata_queued_cmd *qc)
4556 struct scatterlist *sg = qc->sg; 4556 struct scatterlist *sg = qc->sg;
4557 int dir = qc->dma_dir; 4557 int dir = qc->dma_dir;
4558 4558
4559 WARN_ON(sg == NULL); 4559 WARN_ON_ONCE(sg == NULL);
4560 4560
4561 VPRINTK("unmapping %u sg elements\n", qc->n_elem); 4561 VPRINTK("unmapping %u sg elements\n", qc->n_elem);
4562 4562
@@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
4776 struct ata_port *ap = qc->ap; 4776 struct ata_port *ap = qc->ap;
4777 unsigned int tag; 4777 unsigned int tag;
4778 4778
4779 WARN_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ 4779 WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
4780 4780
4781 qc->flags = 0; 4781 qc->flags = 0;
4782 tag = qc->tag; 4782 tag = qc->tag;
@@ -4791,8 +4791,8 @@ void __ata_qc_complete(struct ata_queued_cmd *qc)
4791 struct ata_port *ap = qc->ap; 4791 struct ata_port *ap = qc->ap;
4792 struct ata_link *link = qc->dev->link; 4792 struct ata_link *link = qc->dev->link;
4793 4793
4794 WARN_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ 4794 WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
4795 WARN_ON(!(qc->flags & ATA_QCFLAG_ACTIVE)); 4795 WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE));
4796 4796
4797 if (likely(qc->flags & ATA_QCFLAG_DMAMAP)) 4797 if (likely(qc->flags & ATA_QCFLAG_DMAMAP))
4798 ata_sg_clean(qc); 4798 ata_sg_clean(qc);
@@ -4878,7 +4878,7 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
4878 struct ata_device *dev = qc->dev; 4878 struct ata_device *dev = qc->dev;
4879 struct ata_eh_info *ehi = &dev->link->eh_info; 4879 struct ata_eh_info *ehi = &dev->link->eh_info;
4880 4880
4881 WARN_ON(ap->pflags & ATA_PFLAG_FROZEN); 4881 WARN_ON_ONCE(ap->pflags & ATA_PFLAG_FROZEN);
4882 4882
4883 if (unlikely(qc->err_mask)) 4883 if (unlikely(qc->err_mask))
4884 qc->flags |= ATA_QCFLAG_FAILED; 4884 qc->flags |= ATA_QCFLAG_FAILED;
@@ -5000,16 +5000,16 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
5000 * check is skipped for old EH because it reuses active qc to 5000 * check is skipped for old EH because it reuses active qc to
5001 * request ATAPI sense. 5001 * request ATAPI sense.
5002 */ 5002 */
5003 WARN_ON(ap->ops->error_handler && ata_tag_valid(link->active_tag)); 5003 WARN_ON_ONCE(ap->ops->error_handler && ata_tag_valid(link->active_tag));
5004 5004
5005 if (ata_is_ncq(prot)) { 5005 if (ata_is_ncq(prot)) {
5006 WARN_ON(link->sactive & (1 << qc->tag)); 5006 WARN_ON_ONCE(link->sactive & (1 << qc->tag));
5007 5007
5008 if (!link->sactive) 5008 if (!link->sactive)
5009 ap->nr_active_links++; 5009 ap->nr_active_links++;
5010 link->sactive |= 1 << qc->tag; 5010 link->sactive |= 1 << qc->tag;
5011 } else { 5011 } else {
5012 WARN_ON(link->sactive); 5012 WARN_ON_ONCE(link->sactive);
5013 5013
5014 ap->nr_active_links++; 5014 ap->nr_active_links++;
5015 link->active_tag = qc->tag; 5015 link->active_tag = qc->tag;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index c59ad76c84b1..0eae9b453556 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -578,7 +578,7 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
578 } 578 }
579 579
580 if (is_addr && (tf->flags & ATA_TFLAG_LBA48)) { 580 if (is_addr && (tf->flags & ATA_TFLAG_LBA48)) {
581 WARN_ON(!ioaddr->ctl_addr); 581 WARN_ON_ONCE(!ioaddr->ctl_addr);
582 iowrite8(tf->hob_feature, ioaddr->feature_addr); 582 iowrite8(tf->hob_feature, ioaddr->feature_addr);
583 iowrite8(tf->hob_nsect, ioaddr->nsect_addr); 583 iowrite8(tf->hob_nsect, ioaddr->nsect_addr);
584 iowrite8(tf->hob_lbal, ioaddr->lbal_addr); 584 iowrite8(tf->hob_lbal, ioaddr->lbal_addr);
@@ -651,7 +651,7 @@ void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
651 iowrite8(tf->ctl, ioaddr->ctl_addr); 651 iowrite8(tf->ctl, ioaddr->ctl_addr);
652 ap->last_ctl = tf->ctl; 652 ap->last_ctl = tf->ctl;
653 } else 653 } else
654 WARN_ON(1); 654 WARN_ON_ONCE(1);
655 } 655 }
656} 656}
657EXPORT_SYMBOL_GPL(ata_sff_tf_read); 657EXPORT_SYMBOL_GPL(ata_sff_tf_read);
@@ -891,7 +891,7 @@ static void ata_pio_sectors(struct ata_queued_cmd *qc)
891 /* READ/WRITE MULTIPLE */ 891 /* READ/WRITE MULTIPLE */
892 unsigned int nsect; 892 unsigned int nsect;
893 893
894 WARN_ON(qc->dev->multi_count == 0); 894 WARN_ON_ONCE(qc->dev->multi_count == 0);
895 895
896 nsect = min((qc->nbytes - qc->curbytes) / qc->sect_size, 896 nsect = min((qc->nbytes - qc->curbytes) / qc->sect_size,
897 qc->dev->multi_count); 897 qc->dev->multi_count);
@@ -918,7 +918,7 @@ static void atapi_send_cdb(struct ata_port *ap, struct ata_queued_cmd *qc)
918{ 918{
919 /* send SCSI cdb */ 919 /* send SCSI cdb */
920 DPRINTK("send cdb\n"); 920 DPRINTK("send cdb\n");
921 WARN_ON(qc->dev->cdb_len < 12); 921 WARN_ON_ONCE(qc->dev->cdb_len < 12);
922 922
923 ap->ops->sff_data_xfer(qc->dev, qc->cdb, qc->dev->cdb_len, 1); 923 ap->ops->sff_data_xfer(qc->dev, qc->cdb, qc->dev->cdb_len, 1);
924 ata_sff_sync(ap); 924 ata_sff_sync(ap);
@@ -1014,7 +1014,7 @@ next_sg:
1014 } 1014 }
1015 1015
1016 /* consumed can be larger than count only for the last transfer */ 1016 /* consumed can be larger than count only for the last transfer */
1017 WARN_ON(qc->cursg && count != consumed); 1017 WARN_ON_ONCE(qc->cursg && count != consumed);
1018 1018
1019 if (bytes) 1019 if (bytes)
1020 goto next_sg; 1020 goto next_sg;
@@ -1172,13 +1172,13 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
1172 unsigned long flags = 0; 1172 unsigned long flags = 0;
1173 int poll_next; 1173 int poll_next;
1174 1174
1175 WARN_ON((qc->flags & ATA_QCFLAG_ACTIVE) == 0); 1175 WARN_ON_ONCE((qc->flags & ATA_QCFLAG_ACTIVE) == 0);
1176 1176
1177 /* Make sure ata_sff_qc_issue() does not throw things 1177 /* Make sure ata_sff_qc_issue() does not throw things
1178 * like DMA polling into the workqueue. Notice that 1178 * like DMA polling into the workqueue. Notice that
1179 * in_wq is not equivalent to (qc->tf.flags & ATA_TFLAG_POLLING). 1179 * in_wq is not equivalent to (qc->tf.flags & ATA_TFLAG_POLLING).
1180 */ 1180 */
1181 WARN_ON(in_wq != ata_hsm_ok_in_wq(ap, qc)); 1181 WARN_ON_ONCE(in_wq != ata_hsm_ok_in_wq(ap, qc));
1182 1182
1183fsm_start: 1183fsm_start:
1184 DPRINTK("ata%u: protocol %d task_state %d (dev_stat 0x%X)\n", 1184 DPRINTK("ata%u: protocol %d task_state %d (dev_stat 0x%X)\n",
@@ -1387,7 +1387,7 @@ fsm_start:
1387 DPRINTK("ata%u: dev %u command complete, drv_stat 0x%x\n", 1387 DPRINTK("ata%u: dev %u command complete, drv_stat 0x%x\n",
1388 ap->print_id, qc->dev->devno, status); 1388 ap->print_id, qc->dev->devno, status);
1389 1389
1390 WARN_ON(qc->err_mask & (AC_ERR_DEV | AC_ERR_HSM)); 1390 WARN_ON_ONCE(qc->err_mask & (AC_ERR_DEV | AC_ERR_HSM));
1391 1391
1392 ap->hsm_task_state = HSM_ST_IDLE; 1392 ap->hsm_task_state = HSM_ST_IDLE;
1393 1393
@@ -1423,7 +1423,7 @@ void ata_pio_task(struct work_struct *work)
1423 int poll_next; 1423 int poll_next;
1424 1424
1425fsm_start: 1425fsm_start:
1426 WARN_ON(ap->hsm_task_state == HSM_ST_IDLE); 1426 WARN_ON_ONCE(ap->hsm_task_state == HSM_ST_IDLE);
1427 1427
1428 /* 1428 /*
1429 * This is purely heuristic. This is a fast path. 1429 * This is purely heuristic. This is a fast path.
@@ -1512,7 +1512,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
1512 break; 1512 break;
1513 1513
1514 case ATA_PROT_DMA: 1514 case ATA_PROT_DMA:
1515 WARN_ON(qc->tf.flags & ATA_TFLAG_POLLING); 1515 WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
1516 1516
1517 ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */ 1517 ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */
1518 ap->ops->bmdma_setup(qc); /* set up bmdma */ 1518 ap->ops->bmdma_setup(qc); /* set up bmdma */
@@ -1564,7 +1564,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
1564 break; 1564 break;
1565 1565
1566 case ATAPI_PROT_DMA: 1566 case ATAPI_PROT_DMA:
1567 WARN_ON(qc->tf.flags & ATA_TFLAG_POLLING); 1567 WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
1568 1568
1569 ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */ 1569 ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */
1570 ap->ops->bmdma_setup(qc); /* set up bmdma */ 1570 ap->ops->bmdma_setup(qc); /* set up bmdma */
@@ -1576,7 +1576,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
1576 break; 1576 break;
1577 1577
1578 default: 1578 default:
1579 WARN_ON(1); 1579 WARN_ON_ONCE(1);
1580 return AC_ERR_SYSTEM; 1580 return AC_ERR_SYSTEM;
1581 } 1581 }
1582 1582
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index e2e332d8ff95..8b77a9802df1 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -13,12 +13,6 @@
13#include <linux/device.h> 13#include <linux/device.h>
14#include <scsi/scsi_host.h> 14#include <scsi/scsi_host.h>
15#include <acpi/acpi_bus.h> 15#include <acpi/acpi_bus.h>
16#include <acpi/acnames.h>
17#include <acpi/acnamesp.h>
18#include <acpi/acparser.h>
19#include <acpi/acexcep.h>
20#include <acpi/acmacros.h>
21#include <acpi/actypes.h>
22 16
23#include <linux/libata.h> 17#include <linux/libata.h>
24#include <linux/ata.h> 18#include <linux/ata.h>
diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c
index 68f052b42ed7..ed306eb1057f 100644
--- a/drivers/char/tpm/tpm_bios.c
+++ b/drivers/char/tpm/tpm_bios.c
@@ -23,8 +23,6 @@
23#include <linux/security.h> 23#include <linux/security.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <acpi/acpi.h> 25#include <acpi/acpi.h>
26#include <acpi/actypes.h>
27#include <acpi/actbl.h>
28#include "tpm.h" 26#include "tpm.h"
29 27
30#define TCG_EVENT_NAME_LEN_MAX 255 28#define TCG_EVENT_NAME_LEN_MAX 255
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 8d7cf3f31450..f1df59f59a37 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -15,12 +15,14 @@
15#include <linux/tick.h> 15#include <linux/tick.h>
16 16
17#define BREAK_FUZZ 4 /* 4 us */ 17#define BREAK_FUZZ 4 /* 4 us */
18#define PRED_HISTORY_PCT 50
18 19
19struct menu_device { 20struct menu_device {
20 int last_state_idx; 21 int last_state_idx;
21 22
22 unsigned int expected_us; 23 unsigned int expected_us;
23 unsigned int predicted_us; 24 unsigned int predicted_us;
25 unsigned int current_predicted_us;
24 unsigned int last_measured_us; 26 unsigned int last_measured_us;
25 unsigned int elapsed_us; 27 unsigned int elapsed_us;
26}; 28};
@@ -47,6 +49,12 @@ static int menu_select(struct cpuidle_device *dev)
47 data->expected_us = 49 data->expected_us =
48 (u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000; 50 (u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
49 51
52 /* Recalculate predicted_us based on prediction_history_pct */
53 data->predicted_us *= PRED_HISTORY_PCT;
54 data->predicted_us += (100 - PRED_HISTORY_PCT) *
55 data->current_predicted_us;
56 data->predicted_us /= 100;
57
50 /* find the deepest idle state that satisfies our constraints */ 58 /* find the deepest idle state that satisfies our constraints */
51 for (i = CPUIDLE_DRIVER_STATE_START + 1; i < dev->state_count; i++) { 59 for (i = CPUIDLE_DRIVER_STATE_START + 1; i < dev->state_count; i++) {
52 struct cpuidle_state *s = &dev->states[i]; 60 struct cpuidle_state *s = &dev->states[i];
@@ -97,7 +105,7 @@ static void menu_reflect(struct cpuidle_device *dev)
97 measured_us = -1; 105 measured_us = -1;
98 106
99 /* Predict time until next break event */ 107 /* Predict time until next break event */
100 data->predicted_us = max(measured_us, data->last_measured_us); 108 data->current_predicted_us = max(measured_us, data->last_measured_us);
101 109
102 if (last_idle_us + BREAK_FUZZ < 110 if (last_idle_us + BREAK_FUZZ <
103 data->expected_us - target->exit_latency) { 111 data->expected_us - target->exit_latency) {
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index d883e1b8bb8c..55433849bfa6 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -270,6 +270,6 @@ static void __exit dca_exit(void)
270 dca_sysfs_exit(); 270 dca_sysfs_exit();
271} 271}
272 272
273subsys_initcall(dca_init); 273arch_initcall(dca_init);
274module_exit(dca_exit); 274module_exit(dca_exit);
275 275
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 904e57558bb5..e34b06420816 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -33,7 +33,6 @@ config INTEL_IOATDMA
33config INTEL_IOP_ADMA 33config INTEL_IOP_ADMA
34 tristate "Intel IOP ADMA support" 34 tristate "Intel IOP ADMA support"
35 depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX 35 depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
36 select ASYNC_CORE
37 select DMA_ENGINE 36 select DMA_ENGINE
38 help 37 help
39 Enable support for the Intel(R) IOP Series RAID engines. 38 Enable support for the Intel(R) IOP Series RAID engines.
@@ -59,7 +58,6 @@ config FSL_DMA
59config MV_XOR 58config MV_XOR
60 bool "Marvell XOR engine support" 59 bool "Marvell XOR engine support"
61 depends on PLAT_ORION 60 depends on PLAT_ORION
62 select ASYNC_CORE
63 select DMA_ENGINE 61 select DMA_ENGINE
64 ---help--- 62 ---help---
65 Enable support for the Marvell XOR engine. 63 Enable support for the Marvell XOR engine.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 657996517374..403dbe781122 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -31,32 +31,18 @@
31 * 31 *
32 * LOCKING: 32 * LOCKING:
33 * 33 *
34 * The subsystem keeps two global lists, dma_device_list and dma_client_list. 34 * The subsystem keeps a global list of dma_device structs it is protected by a
35 * Both of these are protected by a mutex, dma_list_mutex. 35 * mutex, dma_list_mutex.
36 *
37 * A subsystem can get access to a channel by calling dmaengine_get() followed
38 * by dma_find_channel(), or if it has need for an exclusive channel it can call
39 * dma_request_channel(). Once a channel is allocated a reference is taken
40 * against its corresponding driver to disable removal.
36 * 41 *
37 * Each device has a channels list, which runs unlocked but is never modified 42 * Each device has a channels list, which runs unlocked but is never modified
38 * once the device is registered, it's just setup by the driver. 43 * once the device is registered, it's just setup by the driver.
39 * 44 *
40 * Each client is responsible for keeping track of the channels it uses. See 45 * See Documentation/dmaengine.txt for more details
41 * the definition of dma_event_callback in dmaengine.h.
42 *
43 * Each device has a kref, which is initialized to 1 when the device is
44 * registered. A kref_get is done for each device registered. When the
45 * device is released, the corresponding kref_put is done in the release
46 * method. Every time one of the device's channels is allocated to a client,
47 * a kref_get occurs. When the channel is freed, the corresponding kref_put
48 * happens. The device's release function does a completion, so
49 * unregister_device does a remove event, device_unregister, a kref_put
50 * for the first reference, then waits on the completion for all other
51 * references to finish.
52 *
53 * Each channel has an open-coded implementation of Rusty Russell's "bigref,"
54 * with a kref and a per_cpu local_t. A dma_chan_get is called when a client
55 * signals that it wants to use a channel, and dma_chan_put is called when
56 * a channel is removed or a client using it is unregistered. A client can
57 * take extra references per outstanding transaction, as is the case with
58 * the NET DMA client. The release function does a kref_put on the device.
59 * -ChrisL, DanW
60 */ 46 */
61 47
62#include <linux/init.h> 48#include <linux/init.h>
@@ -70,54 +56,85 @@
70#include <linux/rcupdate.h> 56#include <linux/rcupdate.h>
71#include <linux/mutex.h> 57#include <linux/mutex.h>
72#include <linux/jiffies.h> 58#include <linux/jiffies.h>
59#include <linux/rculist.h>
60#include <linux/idr.h>
73 61
74static DEFINE_MUTEX(dma_list_mutex); 62static DEFINE_MUTEX(dma_list_mutex);
75static LIST_HEAD(dma_device_list); 63static LIST_HEAD(dma_device_list);
76static LIST_HEAD(dma_client_list); 64static long dmaengine_ref_count;
65static struct idr dma_idr;
77 66
78/* --- sysfs implementation --- */ 67/* --- sysfs implementation --- */
79 68
69/**
70 * dev_to_dma_chan - convert a device pointer to the its sysfs container object
71 * @dev - device node
72 *
73 * Must be called under dma_list_mutex
74 */
75static struct dma_chan *dev_to_dma_chan(struct device *dev)
76{
77 struct dma_chan_dev *chan_dev;
78
79 chan_dev = container_of(dev, typeof(*chan_dev), device);
80 return chan_dev->chan;
81}
82
80static ssize_t show_memcpy_count(struct device *dev, struct device_attribute *attr, char *buf) 83static ssize_t show_memcpy_count(struct device *dev, struct device_attribute *attr, char *buf)
81{ 84{
82 struct dma_chan *chan = to_dma_chan(dev); 85 struct dma_chan *chan;
83 unsigned long count = 0; 86 unsigned long count = 0;
84 int i; 87 int i;
88 int err;
85 89
86 for_each_possible_cpu(i) 90 mutex_lock(&dma_list_mutex);
87 count += per_cpu_ptr(chan->local, i)->memcpy_count; 91 chan = dev_to_dma_chan(dev);
92 if (chan) {
93 for_each_possible_cpu(i)
94 count += per_cpu_ptr(chan->local, i)->memcpy_count;
95 err = sprintf(buf, "%lu\n", count);
96 } else
97 err = -ENODEV;
98 mutex_unlock(&dma_list_mutex);
88 99
89 return sprintf(buf, "%lu\n", count); 100 return err;
90} 101}
91 102
92static ssize_t show_bytes_transferred(struct device *dev, struct device_attribute *attr, 103static ssize_t show_bytes_transferred(struct device *dev, struct device_attribute *attr,
93 char *buf) 104 char *buf)
94{ 105{
95 struct dma_chan *chan = to_dma_chan(dev); 106 struct dma_chan *chan;
96 unsigned long count = 0; 107 unsigned long count = 0;
97 int i; 108 int i;
109 int err;
98 110
99 for_each_possible_cpu(i) 111 mutex_lock(&dma_list_mutex);
100 count += per_cpu_ptr(chan->local, i)->bytes_transferred; 112 chan = dev_to_dma_chan(dev);
113 if (chan) {
114 for_each_possible_cpu(i)
115 count += per_cpu_ptr(chan->local, i)->bytes_transferred;
116 err = sprintf(buf, "%lu\n", count);
117 } else
118 err = -ENODEV;
119 mutex_unlock(&dma_list_mutex);
101 120
102 return sprintf(buf, "%lu\n", count); 121 return err;
103} 122}
104 123
105static ssize_t show_in_use(struct device *dev, struct device_attribute *attr, char *buf) 124static ssize_t show_in_use(struct device *dev, struct device_attribute *attr, char *buf)
106{ 125{
107 struct dma_chan *chan = to_dma_chan(dev); 126 struct dma_chan *chan;
108 int in_use = 0; 127 int err;
109
110 if (unlikely(chan->slow_ref) &&
111 atomic_read(&chan->refcount.refcount) > 1)
112 in_use = 1;
113 else {
114 if (local_read(&(per_cpu_ptr(chan->local,
115 get_cpu())->refcount)) > 0)
116 in_use = 1;
117 put_cpu();
118 }
119 128
120 return sprintf(buf, "%d\n", in_use); 129 mutex_lock(&dma_list_mutex);
130 chan = dev_to_dma_chan(dev);
131 if (chan)
132 err = sprintf(buf, "%d\n", chan->client_count);
133 else
134 err = -ENODEV;
135 mutex_unlock(&dma_list_mutex);
136
137 return err;
121} 138}
122 139
123static struct device_attribute dma_attrs[] = { 140static struct device_attribute dma_attrs[] = {
@@ -127,76 +144,110 @@ static struct device_attribute dma_attrs[] = {
127 __ATTR_NULL 144 __ATTR_NULL
128}; 145};
129 146
130static void dma_async_device_cleanup(struct kref *kref); 147static void chan_dev_release(struct device *dev)
131
132static void dma_dev_release(struct device *dev)
133{ 148{
134 struct dma_chan *chan = to_dma_chan(dev); 149 struct dma_chan_dev *chan_dev;
135 kref_put(&chan->device->refcount, dma_async_device_cleanup); 150
151 chan_dev = container_of(dev, typeof(*chan_dev), device);
152 if (atomic_dec_and_test(chan_dev->idr_ref)) {
153 mutex_lock(&dma_list_mutex);
154 idr_remove(&dma_idr, chan_dev->dev_id);
155 mutex_unlock(&dma_list_mutex);
156 kfree(chan_dev->idr_ref);
157 }
158 kfree(chan_dev);
136} 159}
137 160
138static struct class dma_devclass = { 161static struct class dma_devclass = {
139 .name = "dma", 162 .name = "dma",
140 .dev_attrs = dma_attrs, 163 .dev_attrs = dma_attrs,
141 .dev_release = dma_dev_release, 164 .dev_release = chan_dev_release,
142}; 165};
143 166
144/* --- client and device registration --- */ 167/* --- client and device registration --- */
145 168
146#define dma_chan_satisfies_mask(chan, mask) \ 169#define dma_device_satisfies_mask(device, mask) \
147 __dma_chan_satisfies_mask((chan), &(mask)) 170 __dma_device_satisfies_mask((device), &(mask))
148static int 171static int
149__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want) 172__dma_device_satisfies_mask(struct dma_device *device, dma_cap_mask_t *want)
150{ 173{
151 dma_cap_mask_t has; 174 dma_cap_mask_t has;
152 175
153 bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits, 176 bitmap_and(has.bits, want->bits, device->cap_mask.bits,
154 DMA_TX_TYPE_END); 177 DMA_TX_TYPE_END);
155 return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END); 178 return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
156} 179}
157 180
181static struct module *dma_chan_to_owner(struct dma_chan *chan)
182{
183 return chan->device->dev->driver->owner;
184}
185
158/** 186/**
159 * dma_client_chan_alloc - try to allocate channels to a client 187 * balance_ref_count - catch up the channel reference count
160 * @client: &dma_client 188 * @chan - channel to balance ->client_count versus dmaengine_ref_count
161 * 189 *
162 * Called with dma_list_mutex held. 190 * balance_ref_count must be called under dma_list_mutex
163 */ 191 */
164static void dma_client_chan_alloc(struct dma_client *client) 192static void balance_ref_count(struct dma_chan *chan)
165{ 193{
166 struct dma_device *device; 194 struct module *owner = dma_chan_to_owner(chan);
167 struct dma_chan *chan;
168 int desc; /* allocated descriptor count */
169 enum dma_state_client ack;
170 195
171 /* Find a channel */ 196 while (chan->client_count < dmaengine_ref_count) {
172 list_for_each_entry(device, &dma_device_list, global_node) { 197 __module_get(owner);
173 /* Does the client require a specific DMA controller? */ 198 chan->client_count++;
174 if (client->slave && client->slave->dma_dev 199 }
175 && client->slave->dma_dev != device->dev) 200}
176 continue;
177 201
178 list_for_each_entry(chan, &device->channels, device_node) { 202/**
179 if (!dma_chan_satisfies_mask(chan, client->cap_mask)) 203 * dma_chan_get - try to grab a dma channel's parent driver module
180 continue; 204 * @chan - channel to grab
205 *
206 * Must be called under dma_list_mutex
207 */
208static int dma_chan_get(struct dma_chan *chan)
209{
210 int err = -ENODEV;
211 struct module *owner = dma_chan_to_owner(chan);
212
213 if (chan->client_count) {
214 __module_get(owner);
215 err = 0;
216 } else if (try_module_get(owner))
217 err = 0;
218
219 if (err == 0)
220 chan->client_count++;
221
222 /* allocate upon first client reference */
223 if (chan->client_count == 1 && err == 0) {
224 int desc_cnt = chan->device->device_alloc_chan_resources(chan);
225
226 if (desc_cnt < 0) {
227 err = desc_cnt;
228 chan->client_count = 0;
229 module_put(owner);
230 } else if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask))
231 balance_ref_count(chan);
232 }
181 233
182 desc = chan->device->device_alloc_chan_resources( 234 return err;
183 chan, client); 235}
184 if (desc >= 0) {
185 ack = client->event_callback(client,
186 chan,
187 DMA_RESOURCE_AVAILABLE);
188 236
189 /* we are done once this client rejects 237/**
190 * an available resource 238 * dma_chan_put - drop a reference to a dma channel's parent driver module
191 */ 239 * @chan - channel to release
192 if (ack == DMA_ACK) { 240 *
193 dma_chan_get(chan); 241 * Must be called under dma_list_mutex
194 chan->client_count++; 242 */
195 } else if (ack == DMA_NAK) 243static void dma_chan_put(struct dma_chan *chan)
196 return; 244{
197 } 245 if (!chan->client_count)
198 } 246 return; /* this channel failed alloc_chan_resources */
199 } 247 chan->client_count--;
248 module_put(dma_chan_to_owner(chan));
249 if (chan->client_count == 0)
250 chan->device->device_free_chan_resources(chan);
200} 251}
201 252
202enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) 253enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
@@ -218,138 +269,342 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
218EXPORT_SYMBOL(dma_sync_wait); 269EXPORT_SYMBOL(dma_sync_wait);
219 270
220/** 271/**
221 * dma_chan_cleanup - release a DMA channel's resources 272 * dma_cap_mask_all - enable iteration over all operation types
222 * @kref: kernel reference structure that contains the DMA channel device 273 */
274static dma_cap_mask_t dma_cap_mask_all;
275
276/**
277 * dma_chan_tbl_ent - tracks channel allocations per core/operation
278 * @chan - associated channel for this entry
279 */
280struct dma_chan_tbl_ent {
281 struct dma_chan *chan;
282};
283
284/**
285 * channel_table - percpu lookup table for memory-to-memory offload providers
223 */ 286 */
224void dma_chan_cleanup(struct kref *kref) 287static struct dma_chan_tbl_ent *channel_table[DMA_TX_TYPE_END];
288
289static int __init dma_channel_table_init(void)
225{ 290{
226 struct dma_chan *chan = container_of(kref, struct dma_chan, refcount); 291 enum dma_transaction_type cap;
227 chan->device->device_free_chan_resources(chan); 292 int err = 0;
228 kref_put(&chan->device->refcount, dma_async_device_cleanup); 293
294 bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
295
296 /* 'interrupt', 'private', and 'slave' are channel capabilities,
297 * but are not associated with an operation so they do not need
298 * an entry in the channel_table
299 */
300 clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
301 clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
302 clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
303
304 for_each_dma_cap_mask(cap, dma_cap_mask_all) {
305 channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
306 if (!channel_table[cap]) {
307 err = -ENOMEM;
308 break;
309 }
310 }
311
312 if (err) {
313 pr_err("dmaengine: initialization failure\n");
314 for_each_dma_cap_mask(cap, dma_cap_mask_all)
315 if (channel_table[cap])
316 free_percpu(channel_table[cap]);
317 }
318
319 return err;
229} 320}
230EXPORT_SYMBOL(dma_chan_cleanup); 321arch_initcall(dma_channel_table_init);
231 322
232static void dma_chan_free_rcu(struct rcu_head *rcu) 323/**
324 * dma_find_channel - find a channel to carry out the operation
325 * @tx_type: transaction type
326 */
327struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
233{ 328{
234 struct dma_chan *chan = container_of(rcu, struct dma_chan, rcu); 329 struct dma_chan *chan;
235 int bias = 0x7FFFFFFF; 330 int cpu;
236 int i; 331
237 for_each_possible_cpu(i) 332 WARN_ONCE(dmaengine_ref_count == 0,
238 bias -= local_read(&per_cpu_ptr(chan->local, i)->refcount); 333 "client called %s without a reference", __func__);
239 atomic_sub(bias, &chan->refcount.refcount); 334
240 kref_put(&chan->refcount, dma_chan_cleanup); 335 cpu = get_cpu();
336 chan = per_cpu_ptr(channel_table[tx_type], cpu)->chan;
337 put_cpu();
338
339 return chan;
241} 340}
341EXPORT_SYMBOL(dma_find_channel);
242 342
243static void dma_chan_release(struct dma_chan *chan) 343/**
344 * dma_issue_pending_all - flush all pending operations across all channels
345 */
346void dma_issue_pending_all(void)
244{ 347{
245 atomic_add(0x7FFFFFFF, &chan->refcount.refcount); 348 struct dma_device *device;
246 chan->slow_ref = 1; 349 struct dma_chan *chan;
247 call_rcu(&chan->rcu, dma_chan_free_rcu); 350
351 WARN_ONCE(dmaengine_ref_count == 0,
352 "client called %s without a reference", __func__);
353
354 rcu_read_lock();
355 list_for_each_entry_rcu(device, &dma_device_list, global_node) {
356 if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
357 continue;
358 list_for_each_entry(chan, &device->channels, device_node)
359 if (chan->client_count)
360 device->device_issue_pending(chan);
361 }
362 rcu_read_unlock();
248} 363}
364EXPORT_SYMBOL(dma_issue_pending_all);
249 365
250/** 366/**
251 * dma_chans_notify_available - broadcast available channels to the clients 367 * nth_chan - returns the nth channel of the given capability
368 * @cap: capability to match
369 * @n: nth channel desired
370 *
371 * Defaults to returning the channel with the desired capability and the
372 * lowest reference count when 'n' cannot be satisfied. Must be called
373 * under dma_list_mutex.
252 */ 374 */
253static void dma_clients_notify_available(void) 375static struct dma_chan *nth_chan(enum dma_transaction_type cap, int n)
254{ 376{
255 struct dma_client *client; 377 struct dma_device *device;
378 struct dma_chan *chan;
379 struct dma_chan *ret = NULL;
380 struct dma_chan *min = NULL;
256 381
257 mutex_lock(&dma_list_mutex); 382 list_for_each_entry(device, &dma_device_list, global_node) {
383 if (!dma_has_cap(cap, device->cap_mask) ||
384 dma_has_cap(DMA_PRIVATE, device->cap_mask))
385 continue;
386 list_for_each_entry(chan, &device->channels, device_node) {
387 if (!chan->client_count)
388 continue;
389 if (!min)
390 min = chan;
391 else if (chan->table_count < min->table_count)
392 min = chan;
393
394 if (n-- == 0) {
395 ret = chan;
396 break; /* done */
397 }
398 }
399 if (ret)
400 break; /* done */
401 }
258 402
259 list_for_each_entry(client, &dma_client_list, global_node) 403 if (!ret)
260 dma_client_chan_alloc(client); 404 ret = min;
261 405
262 mutex_unlock(&dma_list_mutex); 406 if (ret)
407 ret->table_count++;
408
409 return ret;
263} 410}
264 411
265/** 412/**
266 * dma_chans_notify_available - tell the clients that a channel is going away 413 * dma_channel_rebalance - redistribute the available channels
267 * @chan: channel on its way out 414 *
415 * Optimize for cpu isolation (each cpu gets a dedicated channel for an
416 * operation type) in the SMP case, and operation isolation (avoid
417 * multi-tasking channels) in the non-SMP case. Must be called under
418 * dma_list_mutex.
268 */ 419 */
269static void dma_clients_notify_removed(struct dma_chan *chan) 420static void dma_channel_rebalance(void)
270{ 421{
271 struct dma_client *client; 422 struct dma_chan *chan;
272 enum dma_state_client ack; 423 struct dma_device *device;
424 int cpu;
425 int cap;
426 int n;
273 427
274 mutex_lock(&dma_list_mutex); 428 /* undo the last distribution */
429 for_each_dma_cap_mask(cap, dma_cap_mask_all)
430 for_each_possible_cpu(cpu)
431 per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
432
433 list_for_each_entry(device, &dma_device_list, global_node) {
434 if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
435 continue;
436 list_for_each_entry(chan, &device->channels, device_node)
437 chan->table_count = 0;
438 }
275 439
276 list_for_each_entry(client, &dma_client_list, global_node) { 440 /* don't populate the channel_table if no clients are available */
277 ack = client->event_callback(client, chan, 441 if (!dmaengine_ref_count)
278 DMA_RESOURCE_REMOVED); 442 return;
279 443
280 /* client was holding resources for this channel so 444 /* redistribute available channels */
281 * free it 445 n = 0;
282 */ 446 for_each_dma_cap_mask(cap, dma_cap_mask_all)
283 if (ack == DMA_ACK) { 447 for_each_online_cpu(cpu) {
284 dma_chan_put(chan); 448 if (num_possible_cpus() > 1)
285 chan->client_count--; 449 chan = nth_chan(cap, n++);
450 else
451 chan = nth_chan(cap, -1);
452
453 per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
454 }
455}
456
457static struct dma_chan *private_candidate(dma_cap_mask_t *mask, struct dma_device *dev,
458 dma_filter_fn fn, void *fn_param)
459{
460 struct dma_chan *chan;
461
462 if (!__dma_device_satisfies_mask(dev, mask)) {
463 pr_debug("%s: wrong capabilities\n", __func__);
464 return NULL;
465 }
466 /* devices with multiple channels need special handling as we need to
467 * ensure that all channels are either private or public.
468 */
469 if (dev->chancnt > 1 && !dma_has_cap(DMA_PRIVATE, dev->cap_mask))
470 list_for_each_entry(chan, &dev->channels, device_node) {
471 /* some channels are already publicly allocated */
472 if (chan->client_count)
473 return NULL;
286 } 474 }
475
476 list_for_each_entry(chan, &dev->channels, device_node) {
477 if (chan->client_count) {
478 pr_debug("%s: %s busy\n",
479 __func__, dma_chan_name(chan));
480 continue;
481 }
482 if (fn && !fn(chan, fn_param)) {
483 pr_debug("%s: %s filter said false\n",
484 __func__, dma_chan_name(chan));
485 continue;
486 }
487 return chan;
287 } 488 }
288 489
289 mutex_unlock(&dma_list_mutex); 490 return NULL;
290} 491}
291 492
292/** 493/**
293 * dma_async_client_register - register a &dma_client 494 * dma_request_channel - try to allocate an exclusive channel
294 * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask' 495 * @mask: capabilities that the channel must satisfy
496 * @fn: optional callback to disposition available channels
497 * @fn_param: opaque parameter to pass to dma_filter_fn
295 */ 498 */
296void dma_async_client_register(struct dma_client *client) 499struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param)
297{ 500{
298 /* validate client data */ 501 struct dma_device *device, *_d;
299 BUG_ON(dma_has_cap(DMA_SLAVE, client->cap_mask) && 502 struct dma_chan *chan = NULL;
300 !client->slave); 503 int err;
301 504
505 /* Find a channel */
506 mutex_lock(&dma_list_mutex);
507 list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
508 chan = private_candidate(mask, device, fn, fn_param);
509 if (chan) {
510 /* Found a suitable channel, try to grab, prep, and
511 * return it. We first set DMA_PRIVATE to disable
512 * balance_ref_count as this channel will not be
513 * published in the general-purpose allocator
514 */
515 dma_cap_set(DMA_PRIVATE, device->cap_mask);
516 err = dma_chan_get(chan);
517
518 if (err == -ENODEV) {
519 pr_debug("%s: %s module removed\n", __func__,
520 dma_chan_name(chan));
521 list_del_rcu(&device->global_node);
522 } else if (err)
523 pr_err("dmaengine: failed to get %s: (%d)\n",
524 dma_chan_name(chan), err);
525 else
526 break;
527 chan = NULL;
528 }
529 }
530 mutex_unlock(&dma_list_mutex);
531
532 pr_debug("%s: %s (%s)\n", __func__, chan ? "success" : "fail",
533 chan ? dma_chan_name(chan) : NULL);
534
535 return chan;
536}
537EXPORT_SYMBOL_GPL(__dma_request_channel);
538
539void dma_release_channel(struct dma_chan *chan)
540{
302 mutex_lock(&dma_list_mutex); 541 mutex_lock(&dma_list_mutex);
303 list_add_tail(&client->global_node, &dma_client_list); 542 WARN_ONCE(chan->client_count != 1,
543 "chan reference count %d != 1\n", chan->client_count);
544 dma_chan_put(chan);
304 mutex_unlock(&dma_list_mutex); 545 mutex_unlock(&dma_list_mutex);
305} 546}
306EXPORT_SYMBOL(dma_async_client_register); 547EXPORT_SYMBOL_GPL(dma_release_channel);
307 548
308/** 549/**
309 * dma_async_client_unregister - unregister a client and free the &dma_client 550 * dmaengine_get - register interest in dma_channels
310 * @client: &dma_client to free
311 *
312 * Force frees any allocated DMA channels, frees the &dma_client memory
313 */ 551 */
314void dma_async_client_unregister(struct dma_client *client) 552void dmaengine_get(void)
315{ 553{
316 struct dma_device *device; 554 struct dma_device *device, *_d;
317 struct dma_chan *chan; 555 struct dma_chan *chan;
318 enum dma_state_client ack; 556 int err;
319
320 if (!client)
321 return;
322 557
323 mutex_lock(&dma_list_mutex); 558 mutex_lock(&dma_list_mutex);
324 /* free all channels the client is holding */ 559 dmaengine_ref_count++;
325 list_for_each_entry(device, &dma_device_list, global_node)
326 list_for_each_entry(chan, &device->channels, device_node) {
327 ack = client->event_callback(client, chan,
328 DMA_RESOURCE_REMOVED);
329 560
330 if (ack == DMA_ACK) { 561 /* try to grab channels */
331 dma_chan_put(chan); 562 list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
332 chan->client_count--; 563 if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
333 } 564 continue;
565 list_for_each_entry(chan, &device->channels, device_node) {
566 err = dma_chan_get(chan);
567 if (err == -ENODEV) {
568 /* module removed before we could use it */
569 list_del_rcu(&device->global_node);
570 break;
571 } else if (err)
572 pr_err("dmaengine: failed to get %s: (%d)\n",
573 dma_chan_name(chan), err);
334 } 574 }
575 }
335 576
336 list_del(&client->global_node); 577 /* if this is the first reference and there were channels
578 * waiting we need to rebalance to get those channels
579 * incorporated into the channel table
580 */
581 if (dmaengine_ref_count == 1)
582 dma_channel_rebalance();
337 mutex_unlock(&dma_list_mutex); 583 mutex_unlock(&dma_list_mutex);
338} 584}
339EXPORT_SYMBOL(dma_async_client_unregister); 585EXPORT_SYMBOL(dmaengine_get);
340 586
341/** 587/**
342 * dma_async_client_chan_request - send all available channels to the 588 * dmaengine_put - let dma drivers be removed when ref_count == 0
343 * client that satisfy the capability mask
344 * @client - requester
345 */ 589 */
346void dma_async_client_chan_request(struct dma_client *client) 590void dmaengine_put(void)
347{ 591{
592 struct dma_device *device;
593 struct dma_chan *chan;
594
348 mutex_lock(&dma_list_mutex); 595 mutex_lock(&dma_list_mutex);
349 dma_client_chan_alloc(client); 596 dmaengine_ref_count--;
597 BUG_ON(dmaengine_ref_count < 0);
598 /* drop channel references */
599 list_for_each_entry(device, &dma_device_list, global_node) {
600 if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
601 continue;
602 list_for_each_entry(chan, &device->channels, device_node)
603 dma_chan_put(chan);
604 }
350 mutex_unlock(&dma_list_mutex); 605 mutex_unlock(&dma_list_mutex);
351} 606}
352EXPORT_SYMBOL(dma_async_client_chan_request); 607EXPORT_SYMBOL(dmaengine_put);
353 608
354/** 609/**
355 * dma_async_device_register - registers DMA devices found 610 * dma_async_device_register - registers DMA devices found
@@ -357,9 +612,9 @@ EXPORT_SYMBOL(dma_async_client_chan_request);
357 */ 612 */
358int dma_async_device_register(struct dma_device *device) 613int dma_async_device_register(struct dma_device *device)
359{ 614{
360 static int id;
361 int chancnt = 0, rc; 615 int chancnt = 0, rc;
362 struct dma_chan* chan; 616 struct dma_chan* chan;
617 atomic_t *idr_ref;
363 618
364 if (!device) 619 if (!device)
365 return -ENODEV; 620 return -ENODEV;
@@ -386,57 +641,83 @@ int dma_async_device_register(struct dma_device *device)
386 BUG_ON(!device->device_issue_pending); 641 BUG_ON(!device->device_issue_pending);
387 BUG_ON(!device->dev); 642 BUG_ON(!device->dev);
388 643
389 init_completion(&device->done); 644 idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
390 kref_init(&device->refcount); 645 if (!idr_ref)
391 646 return -ENOMEM;
647 atomic_set(idr_ref, 0);
648 idr_retry:
649 if (!idr_pre_get(&dma_idr, GFP_KERNEL))
650 return -ENOMEM;
392 mutex_lock(&dma_list_mutex); 651 mutex_lock(&dma_list_mutex);
393 device->dev_id = id++; 652 rc = idr_get_new(&dma_idr, NULL, &device->dev_id);
394 mutex_unlock(&dma_list_mutex); 653 mutex_unlock(&dma_list_mutex);
654 if (rc == -EAGAIN)
655 goto idr_retry;
656 else if (rc != 0)
657 return rc;
395 658
396 /* represent channels in sysfs. Probably want devs too */ 659 /* represent channels in sysfs. Probably want devs too */
397 list_for_each_entry(chan, &device->channels, device_node) { 660 list_for_each_entry(chan, &device->channels, device_node) {
398 chan->local = alloc_percpu(typeof(*chan->local)); 661 chan->local = alloc_percpu(typeof(*chan->local));
399 if (chan->local == NULL) 662 if (chan->local == NULL)
400 continue; 663 continue;
664 chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
665 if (chan->dev == NULL) {
666 free_percpu(chan->local);
667 continue;
668 }
401 669
402 chan->chan_id = chancnt++; 670 chan->chan_id = chancnt++;
403 chan->dev.class = &dma_devclass; 671 chan->dev->device.class = &dma_devclass;
404 chan->dev.parent = device->dev; 672 chan->dev->device.parent = device->dev;
405 dev_set_name(&chan->dev, "dma%dchan%d", 673 chan->dev->chan = chan;
674 chan->dev->idr_ref = idr_ref;
675 chan->dev->dev_id = device->dev_id;
676 atomic_inc(idr_ref);
677 dev_set_name(&chan->dev->device, "dma%dchan%d",
406 device->dev_id, chan->chan_id); 678 device->dev_id, chan->chan_id);
407 679
408 rc = device_register(&chan->dev); 680 rc = device_register(&chan->dev->device);
409 if (rc) { 681 if (rc) {
410 chancnt--;
411 free_percpu(chan->local); 682 free_percpu(chan->local);
412 chan->local = NULL; 683 chan->local = NULL;
413 goto err_out; 684 goto err_out;
414 } 685 }
415
416 /* One for the channel, one of the class device */
417 kref_get(&device->refcount);
418 kref_get(&device->refcount);
419 kref_init(&chan->refcount);
420 chan->client_count = 0; 686 chan->client_count = 0;
421 chan->slow_ref = 0;
422 INIT_RCU_HEAD(&chan->rcu);
423 } 687 }
688 device->chancnt = chancnt;
424 689
425 mutex_lock(&dma_list_mutex); 690 mutex_lock(&dma_list_mutex);
426 list_add_tail(&device->global_node, &dma_device_list); 691 /* take references on public channels */
692 if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask))
693 list_for_each_entry(chan, &device->channels, device_node) {
694 /* if clients are already waiting for channels we need
695 * to take references on their behalf
696 */
697 if (dma_chan_get(chan) == -ENODEV) {
698 /* note we can only get here for the first
699 * channel as the remaining channels are
700 * guaranteed to get a reference
701 */
702 rc = -ENODEV;
703 mutex_unlock(&dma_list_mutex);
704 goto err_out;
705 }
706 }
707 list_add_tail_rcu(&device->global_node, &dma_device_list);
708 dma_channel_rebalance();
427 mutex_unlock(&dma_list_mutex); 709 mutex_unlock(&dma_list_mutex);
428 710
429 dma_clients_notify_available();
430
431 return 0; 711 return 0;
432 712
433err_out: 713err_out:
434 list_for_each_entry(chan, &device->channels, device_node) { 714 list_for_each_entry(chan, &device->channels, device_node) {
435 if (chan->local == NULL) 715 if (chan->local == NULL)
436 continue; 716 continue;
437 kref_put(&device->refcount, dma_async_device_cleanup); 717 mutex_lock(&dma_list_mutex);
438 device_unregister(&chan->dev); 718 chan->dev->chan = NULL;
439 chancnt--; 719 mutex_unlock(&dma_list_mutex);
720 device_unregister(&chan->dev->device);
440 free_percpu(chan->local); 721 free_percpu(chan->local);
441 } 722 }
442 return rc; 723 return rc;
@@ -444,37 +725,30 @@ err_out:
444EXPORT_SYMBOL(dma_async_device_register); 725EXPORT_SYMBOL(dma_async_device_register);
445 726
446/** 727/**
447 * dma_async_device_cleanup - function called when all references are released 728 * dma_async_device_unregister - unregister a DMA device
448 * @kref: kernel reference object
449 */
450static void dma_async_device_cleanup(struct kref *kref)
451{
452 struct dma_device *device;
453
454 device = container_of(kref, struct dma_device, refcount);
455 complete(&device->done);
456}
457
458/**
459 * dma_async_device_unregister - unregisters DMA devices
460 * @device: &dma_device 729 * @device: &dma_device
730 *
731 * This routine is called by dma driver exit routines, dmaengine holds module
732 * references to prevent it being called while channels are in use.
461 */ 733 */
462void dma_async_device_unregister(struct dma_device *device) 734void dma_async_device_unregister(struct dma_device *device)
463{ 735{
464 struct dma_chan *chan; 736 struct dma_chan *chan;
465 737
466 mutex_lock(&dma_list_mutex); 738 mutex_lock(&dma_list_mutex);
467 list_del(&device->global_node); 739 list_del_rcu(&device->global_node);
740 dma_channel_rebalance();
468 mutex_unlock(&dma_list_mutex); 741 mutex_unlock(&dma_list_mutex);
469 742
470 list_for_each_entry(chan, &device->channels, device_node) { 743 list_for_each_entry(chan, &device->channels, device_node) {
471 dma_clients_notify_removed(chan); 744 WARN_ONCE(chan->client_count,
472 device_unregister(&chan->dev); 745 "%s called while %d clients hold a reference\n",
473 dma_chan_release(chan); 746 __func__, chan->client_count);
747 mutex_lock(&dma_list_mutex);
748 chan->dev->chan = NULL;
749 mutex_unlock(&dma_list_mutex);
750 device_unregister(&chan->dev->device);
474 } 751 }
475
476 kref_put(&device->refcount, dma_async_device_cleanup);
477 wait_for_completion(&device->done);
478} 752}
479EXPORT_SYMBOL(dma_async_device_unregister); 753EXPORT_SYMBOL(dma_async_device_unregister);
480 754
@@ -626,10 +900,96 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
626} 900}
627EXPORT_SYMBOL(dma_async_tx_descriptor_init); 901EXPORT_SYMBOL(dma_async_tx_descriptor_init);
628 902
903/* dma_wait_for_async_tx - spin wait for a transaction to complete
904 * @tx: in-flight transaction to wait on
905 *
906 * This routine assumes that tx was obtained from a call to async_memcpy,
907 * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
908 * and submitted). Walking the parent chain is only meant to cover for DMA
909 * drivers that do not implement the DMA_INTERRUPT capability and may race with
910 * the driver's descriptor cleanup routine.
911 */
912enum dma_status
913dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
914{
915 enum dma_status status;
916 struct dma_async_tx_descriptor *iter;
917 struct dma_async_tx_descriptor *parent;
918
919 if (!tx)
920 return DMA_SUCCESS;
921
922 WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
923 " %s\n", __func__, dma_chan_name(tx->chan));
924
925 /* poll through the dependency chain, return when tx is complete */
926 do {
927 iter = tx;
928
929 /* find the root of the unsubmitted dependency chain */
930 do {
931 parent = iter->parent;
932 if (!parent)
933 break;
934 else
935 iter = parent;
936 } while (parent);
937
938 /* there is a small window for ->parent == NULL and
939 * ->cookie == -EBUSY
940 */
941 while (iter->cookie == -EBUSY)
942 cpu_relax();
943
944 status = dma_sync_wait(iter->chan, iter->cookie);
945 } while (status == DMA_IN_PROGRESS || (iter != tx));
946
947 return status;
948}
949EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
950
951/* dma_run_dependencies - helper routine for dma drivers to process
952 * (start) dependent operations on their target channel
953 * @tx: transaction with dependencies
954 */
955void dma_run_dependencies(struct dma_async_tx_descriptor *tx)
956{
957 struct dma_async_tx_descriptor *dep = tx->next;
958 struct dma_async_tx_descriptor *dep_next;
959 struct dma_chan *chan;
960
961 if (!dep)
962 return;
963
964 chan = dep->chan;
965
966 /* keep submitting up until a channel switch is detected
967 * in that case we will be called again as a result of
968 * processing the interrupt from async_tx_channel_switch
969 */
970 for (; dep; dep = dep_next) {
971 spin_lock_bh(&dep->lock);
972 dep->parent = NULL;
973 dep_next = dep->next;
974 if (dep_next && dep_next->chan == chan)
975 dep->next = NULL; /* ->next will be submitted */
976 else
977 dep_next = NULL; /* submit current dep and terminate */
978 spin_unlock_bh(&dep->lock);
979
980 dep->tx_submit(dep);
981 }
982
983 chan->device->device_issue_pending(chan);
984}
985EXPORT_SYMBOL_GPL(dma_run_dependencies);
986
629static int __init dma_bus_init(void) 987static int __init dma_bus_init(void)
630{ 988{
989 idr_init(&dma_idr);
631 mutex_init(&dma_list_mutex); 990 mutex_init(&dma_list_mutex);
632 return class_register(&dma_devclass); 991 return class_register(&dma_devclass);
633} 992}
634subsys_initcall(dma_bus_init); 993arch_initcall(dma_bus_init);
994
635 995
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index ed9636bfb54a..3603f1ea5b28 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -35,7 +35,7 @@ MODULE_PARM_DESC(threads_per_chan,
35 35
36static unsigned int max_channels; 36static unsigned int max_channels;
37module_param(max_channels, uint, S_IRUGO); 37module_param(max_channels, uint, S_IRUGO);
38MODULE_PARM_DESC(nr_channels, 38MODULE_PARM_DESC(max_channels,
39 "Maximum number of channels to use (default: all)"); 39 "Maximum number of channels to use (default: all)");
40 40
41/* 41/*
@@ -71,7 +71,7 @@ struct dmatest_chan {
71 71
72/* 72/*
73 * These are protected by dma_list_mutex since they're only used by 73 * These are protected by dma_list_mutex since they're only used by
74 * the DMA client event callback 74 * the DMA filter function callback
75 */ 75 */
76static LIST_HEAD(dmatest_channels); 76static LIST_HEAD(dmatest_channels);
77static unsigned int nr_channels; 77static unsigned int nr_channels;
@@ -80,7 +80,7 @@ static bool dmatest_match_channel(struct dma_chan *chan)
80{ 80{
81 if (test_channel[0] == '\0') 81 if (test_channel[0] == '\0')
82 return true; 82 return true;
83 return strcmp(dev_name(&chan->dev), test_channel) == 0; 83 return strcmp(dma_chan_name(chan), test_channel) == 0;
84} 84}
85 85
86static bool dmatest_match_device(struct dma_device *device) 86static bool dmatest_match_device(struct dma_device *device)
@@ -215,7 +215,6 @@ static int dmatest_func(void *data)
215 215
216 smp_rmb(); 216 smp_rmb();
217 chan = thread->chan; 217 chan = thread->chan;
218 dma_chan_get(chan);
219 218
220 while (!kthread_should_stop()) { 219 while (!kthread_should_stop()) {
221 total_tests++; 220 total_tests++;
@@ -293,7 +292,6 @@ static int dmatest_func(void *data)
293 } 292 }
294 293
295 ret = 0; 294 ret = 0;
296 dma_chan_put(chan);
297 kfree(thread->dstbuf); 295 kfree(thread->dstbuf);
298err_dstbuf: 296err_dstbuf:
299 kfree(thread->srcbuf); 297 kfree(thread->srcbuf);
@@ -319,21 +317,16 @@ static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
319 kfree(dtc); 317 kfree(dtc);
320} 318}
321 319
322static enum dma_state_client dmatest_add_channel(struct dma_chan *chan) 320static int dmatest_add_channel(struct dma_chan *chan)
323{ 321{
324 struct dmatest_chan *dtc; 322 struct dmatest_chan *dtc;
325 struct dmatest_thread *thread; 323 struct dmatest_thread *thread;
326 unsigned int i; 324 unsigned int i;
327 325
328 /* Have we already been told about this channel? */
329 list_for_each_entry(dtc, &dmatest_channels, node)
330 if (dtc->chan == chan)
331 return DMA_DUP;
332
333 dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL); 326 dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
334 if (!dtc) { 327 if (!dtc) {
335 pr_warning("dmatest: No memory for %s\n", dev_name(&chan->dev)); 328 pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
336 return DMA_NAK; 329 return -ENOMEM;
337 } 330 }
338 331
339 dtc->chan = chan; 332 dtc->chan = chan;
@@ -343,16 +336,16 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
343 thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL); 336 thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
344 if (!thread) { 337 if (!thread) {
345 pr_warning("dmatest: No memory for %s-test%u\n", 338 pr_warning("dmatest: No memory for %s-test%u\n",
346 dev_name(&chan->dev), i); 339 dma_chan_name(chan), i);
347 break; 340 break;
348 } 341 }
349 thread->chan = dtc->chan; 342 thread->chan = dtc->chan;
350 smp_wmb(); 343 smp_wmb();
351 thread->task = kthread_run(dmatest_func, thread, "%s-test%u", 344 thread->task = kthread_run(dmatest_func, thread, "%s-test%u",
352 dev_name(&chan->dev), i); 345 dma_chan_name(chan), i);
353 if (IS_ERR(thread->task)) { 346 if (IS_ERR(thread->task)) {
354 pr_warning("dmatest: Failed to run thread %s-test%u\n", 347 pr_warning("dmatest: Failed to run thread %s-test%u\n",
355 dev_name(&chan->dev), i); 348 dma_chan_name(chan), i);
356 kfree(thread); 349 kfree(thread);
357 break; 350 break;
358 } 351 }
@@ -362,86 +355,62 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
362 list_add_tail(&thread->node, &dtc->threads); 355 list_add_tail(&thread->node, &dtc->threads);
363 } 356 }
364 357
365 pr_info("dmatest: Started %u threads using %s\n", i, dev_name(&chan->dev)); 358 pr_info("dmatest: Started %u threads using %s\n", i, dma_chan_name(chan));
366 359
367 list_add_tail(&dtc->node, &dmatest_channels); 360 list_add_tail(&dtc->node, &dmatest_channels);
368 nr_channels++; 361 nr_channels++;
369 362
370 return DMA_ACK; 363 return 0;
371}
372
373static enum dma_state_client dmatest_remove_channel(struct dma_chan *chan)
374{
375 struct dmatest_chan *dtc, *_dtc;
376
377 list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
378 if (dtc->chan == chan) {
379 list_del(&dtc->node);
380 dmatest_cleanup_channel(dtc);
381 pr_debug("dmatest: lost channel %s\n",
382 dev_name(&chan->dev));
383 return DMA_ACK;
384 }
385 }
386
387 return DMA_DUP;
388} 364}
389 365
390/* 366static bool filter(struct dma_chan *chan, void *param)
391 * Start testing threads as new channels are assigned to us, and kill
392 * them when the channels go away.
393 *
394 * When we unregister the client, all channels are removed so this
395 * will also take care of cleaning things up when the module is
396 * unloaded.
397 */
398static enum dma_state_client
399dmatest_event(struct dma_client *client, struct dma_chan *chan,
400 enum dma_state state)
401{ 367{
402 enum dma_state_client ack = DMA_NAK; 368 if (!dmatest_match_channel(chan) || !dmatest_match_device(chan->device))
403 369 return false;
404 switch (state) { 370 else
405 case DMA_RESOURCE_AVAILABLE: 371 return true;
406 if (!dmatest_match_channel(chan)
407 || !dmatest_match_device(chan->device))
408 ack = DMA_DUP;
409 else if (max_channels && nr_channels >= max_channels)
410 ack = DMA_NAK;
411 else
412 ack = dmatest_add_channel(chan);
413 break;
414
415 case DMA_RESOURCE_REMOVED:
416 ack = dmatest_remove_channel(chan);
417 break;
418
419 default:
420 pr_info("dmatest: Unhandled event %u (%s)\n",
421 state, dev_name(&chan->dev));
422 break;
423 }
424
425 return ack;
426} 372}
427 373
428static struct dma_client dmatest_client = {
429 .event_callback = dmatest_event,
430};
431
432static int __init dmatest_init(void) 374static int __init dmatest_init(void)
433{ 375{
434 dma_cap_set(DMA_MEMCPY, dmatest_client.cap_mask); 376 dma_cap_mask_t mask;
435 dma_async_client_register(&dmatest_client); 377 struct dma_chan *chan;
436 dma_async_client_chan_request(&dmatest_client); 378 int err = 0;
379
380 dma_cap_zero(mask);
381 dma_cap_set(DMA_MEMCPY, mask);
382 for (;;) {
383 chan = dma_request_channel(mask, filter, NULL);
384 if (chan) {
385 err = dmatest_add_channel(chan);
386 if (err == 0)
387 continue;
388 else {
389 dma_release_channel(chan);
390 break; /* add_channel failed, punt */
391 }
392 } else
393 break; /* no more channels available */
394 if (max_channels && nr_channels >= max_channels)
395 break; /* we have all we need */
396 }
437 397
438 return 0; 398 return err;
439} 399}
440module_init(dmatest_init); 400/* when compiled-in wait for drivers to load first */
401late_initcall(dmatest_init);
441 402
442static void __exit dmatest_exit(void) 403static void __exit dmatest_exit(void)
443{ 404{
444 dma_async_client_unregister(&dmatest_client); 405 struct dmatest_chan *dtc, *_dtc;
406
407 list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
408 list_del(&dtc->node);
409 dmatest_cleanup_channel(dtc);
410 pr_debug("dmatest: dropped channel %s\n",
411 dma_chan_name(dtc->chan));
412 dma_release_channel(dtc->chan);
413 }
445} 414}
446module_exit(dmatest_exit); 415module_exit(dmatest_exit);
447 416
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 0778d99aea7c..6b702cc46b3d 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -70,6 +70,15 @@
70 * the controller, though. 70 * the controller, though.
71 */ 71 */
72 72
73static struct device *chan2dev(struct dma_chan *chan)
74{
75 return &chan->dev->device;
76}
77static struct device *chan2parent(struct dma_chan *chan)
78{
79 return chan->dev->device.parent;
80}
81
73static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) 82static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
74{ 83{
75 return list_entry(dwc->active_list.next, struct dw_desc, desc_node); 84 return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
@@ -93,12 +102,12 @@ static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
93 ret = desc; 102 ret = desc;
94 break; 103 break;
95 } 104 }
96 dev_dbg(&dwc->chan.dev, "desc %p not ACKed\n", desc); 105 dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
97 i++; 106 i++;
98 } 107 }
99 spin_unlock_bh(&dwc->lock); 108 spin_unlock_bh(&dwc->lock);
100 109
101 dev_vdbg(&dwc->chan.dev, "scanned %u descriptors on freelist\n", i); 110 dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
102 111
103 return ret; 112 return ret;
104} 113}
@@ -108,10 +117,10 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
108 struct dw_desc *child; 117 struct dw_desc *child;
109 118
110 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 119 list_for_each_entry(child, &desc->txd.tx_list, desc_node)
111 dma_sync_single_for_cpu(dwc->chan.dev.parent, 120 dma_sync_single_for_cpu(chan2parent(&dwc->chan),
112 child->txd.phys, sizeof(child->lli), 121 child->txd.phys, sizeof(child->lli),
113 DMA_TO_DEVICE); 122 DMA_TO_DEVICE);
114 dma_sync_single_for_cpu(dwc->chan.dev.parent, 123 dma_sync_single_for_cpu(chan2parent(&dwc->chan),
115 desc->txd.phys, sizeof(desc->lli), 124 desc->txd.phys, sizeof(desc->lli),
116 DMA_TO_DEVICE); 125 DMA_TO_DEVICE);
117} 126}
@@ -129,11 +138,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
129 138
130 spin_lock_bh(&dwc->lock); 139 spin_lock_bh(&dwc->lock);
131 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 140 list_for_each_entry(child, &desc->txd.tx_list, desc_node)
132 dev_vdbg(&dwc->chan.dev, 141 dev_vdbg(chan2dev(&dwc->chan),
133 "moving child desc %p to freelist\n", 142 "moving child desc %p to freelist\n",
134 child); 143 child);
135 list_splice_init(&desc->txd.tx_list, &dwc->free_list); 144 list_splice_init(&desc->txd.tx_list, &dwc->free_list);
136 dev_vdbg(&dwc->chan.dev, "moving desc %p to freelist\n", desc); 145 dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
137 list_add(&desc->desc_node, &dwc->free_list); 146 list_add(&desc->desc_node, &dwc->free_list);
138 spin_unlock_bh(&dwc->lock); 147 spin_unlock_bh(&dwc->lock);
139 } 148 }
@@ -163,9 +172,9 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
163 172
164 /* ASSERT: channel is idle */ 173 /* ASSERT: channel is idle */
165 if (dma_readl(dw, CH_EN) & dwc->mask) { 174 if (dma_readl(dw, CH_EN) & dwc->mask) {
166 dev_err(&dwc->chan.dev, 175 dev_err(chan2dev(&dwc->chan),
167 "BUG: Attempted to start non-idle channel\n"); 176 "BUG: Attempted to start non-idle channel\n");
168 dev_err(&dwc->chan.dev, 177 dev_err(chan2dev(&dwc->chan),
169 " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", 178 " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
170 channel_readl(dwc, SAR), 179 channel_readl(dwc, SAR),
171 channel_readl(dwc, DAR), 180 channel_readl(dwc, DAR),
@@ -193,7 +202,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
193 void *param; 202 void *param;
194 struct dma_async_tx_descriptor *txd = &desc->txd; 203 struct dma_async_tx_descriptor *txd = &desc->txd;
195 204
196 dev_vdbg(&dwc->chan.dev, "descriptor %u complete\n", txd->cookie); 205 dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
197 206
198 dwc->completed = txd->cookie; 207 dwc->completed = txd->cookie;
199 callback = txd->callback; 208 callback = txd->callback;
@@ -208,11 +217,11 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
208 * mapped before they were submitted... 217 * mapped before they were submitted...
209 */ 218 */
210 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) 219 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
211 dma_unmap_page(dwc->chan.dev.parent, desc->lli.dar, desc->len, 220 dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar,
212 DMA_FROM_DEVICE); 221 desc->len, DMA_FROM_DEVICE);
213 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) 222 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
214 dma_unmap_page(dwc->chan.dev.parent, desc->lli.sar, desc->len, 223 dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar,
215 DMA_TO_DEVICE); 224 desc->len, DMA_TO_DEVICE);
216 225
217 /* 226 /*
218 * The API requires that no submissions are done from a 227 * The API requires that no submissions are done from a
@@ -228,7 +237,7 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
228 LIST_HEAD(list); 237 LIST_HEAD(list);
229 238
230 if (dma_readl(dw, CH_EN) & dwc->mask) { 239 if (dma_readl(dw, CH_EN) & dwc->mask) {
231 dev_err(&dwc->chan.dev, 240 dev_err(chan2dev(&dwc->chan),
232 "BUG: XFER bit set, but channel not idle!\n"); 241 "BUG: XFER bit set, but channel not idle!\n");
233 242
234 /* Try to continue after resetting the channel... */ 243 /* Try to continue after resetting the channel... */
@@ -273,7 +282,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
273 return; 282 return;
274 } 283 }
275 284
276 dev_vdbg(&dwc->chan.dev, "scan_descriptors: llp=0x%x\n", llp); 285 dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp);
277 286
278 list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { 287 list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
279 if (desc->lli.llp == llp) 288 if (desc->lli.llp == llp)
@@ -292,7 +301,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
292 dwc_descriptor_complete(dwc, desc); 301 dwc_descriptor_complete(dwc, desc);
293 } 302 }
294 303
295 dev_err(&dwc->chan.dev, 304 dev_err(chan2dev(&dwc->chan),
296 "BUG: All descriptors done, but channel not idle!\n"); 305 "BUG: All descriptors done, but channel not idle!\n");
297 306
298 /* Try to continue after resetting the channel... */ 307 /* Try to continue after resetting the channel... */
@@ -308,7 +317,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
308 317
309static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) 318static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
310{ 319{
311 dev_printk(KERN_CRIT, &dwc->chan.dev, 320 dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
312 " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", 321 " desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
313 lli->sar, lli->dar, lli->llp, 322 lli->sar, lli->dar, lli->llp,
314 lli->ctlhi, lli->ctllo); 323 lli->ctlhi, lli->ctllo);
@@ -342,9 +351,9 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
342 * controller flagged an error instead of scribbling over 351 * controller flagged an error instead of scribbling over
343 * random memory locations. 352 * random memory locations.
344 */ 353 */
345 dev_printk(KERN_CRIT, &dwc->chan.dev, 354 dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
346 "Bad descriptor submitted for DMA!\n"); 355 "Bad descriptor submitted for DMA!\n");
347 dev_printk(KERN_CRIT, &dwc->chan.dev, 356 dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
348 " cookie: %d\n", bad_desc->txd.cookie); 357 " cookie: %d\n", bad_desc->txd.cookie);
349 dwc_dump_lli(dwc, &bad_desc->lli); 358 dwc_dump_lli(dwc, &bad_desc->lli);
350 list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) 359 list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
@@ -442,12 +451,12 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
442 * for DMA. But this is hard to do in a race-free manner. 451 * for DMA. But this is hard to do in a race-free manner.
443 */ 452 */
444 if (list_empty(&dwc->active_list)) { 453 if (list_empty(&dwc->active_list)) {
445 dev_vdbg(&tx->chan->dev, "tx_submit: started %u\n", 454 dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
446 desc->txd.cookie); 455 desc->txd.cookie);
447 dwc_dostart(dwc, desc); 456 dwc_dostart(dwc, desc);
448 list_add_tail(&desc->desc_node, &dwc->active_list); 457 list_add_tail(&desc->desc_node, &dwc->active_list);
449 } else { 458 } else {
450 dev_vdbg(&tx->chan->dev, "tx_submit: queued %u\n", 459 dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
451 desc->txd.cookie); 460 desc->txd.cookie);
452 461
453 list_add_tail(&desc->desc_node, &dwc->queue); 462 list_add_tail(&desc->desc_node, &dwc->queue);
@@ -472,11 +481,11 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
472 unsigned int dst_width; 481 unsigned int dst_width;
473 u32 ctllo; 482 u32 ctllo;
474 483
475 dev_vdbg(&chan->dev, "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n", 484 dev_vdbg(chan2dev(chan), "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n",
476 dest, src, len, flags); 485 dest, src, len, flags);
477 486
478 if (unlikely(!len)) { 487 if (unlikely(!len)) {
479 dev_dbg(&chan->dev, "prep_dma_memcpy: length is zero!\n"); 488 dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
480 return NULL; 489 return NULL;
481 } 490 }
482 491
@@ -516,7 +525,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
516 first = desc; 525 first = desc;
517 } else { 526 } else {
518 prev->lli.llp = desc->txd.phys; 527 prev->lli.llp = desc->txd.phys;
519 dma_sync_single_for_device(chan->dev.parent, 528 dma_sync_single_for_device(chan2parent(chan),
520 prev->txd.phys, sizeof(prev->lli), 529 prev->txd.phys, sizeof(prev->lli),
521 DMA_TO_DEVICE); 530 DMA_TO_DEVICE);
522 list_add_tail(&desc->desc_node, 531 list_add_tail(&desc->desc_node,
@@ -531,7 +540,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
531 prev->lli.ctllo |= DWC_CTLL_INT_EN; 540 prev->lli.ctllo |= DWC_CTLL_INT_EN;
532 541
533 prev->lli.llp = 0; 542 prev->lli.llp = 0;
534 dma_sync_single_for_device(chan->dev.parent, 543 dma_sync_single_for_device(chan2parent(chan),
535 prev->txd.phys, sizeof(prev->lli), 544 prev->txd.phys, sizeof(prev->lli),
536 DMA_TO_DEVICE); 545 DMA_TO_DEVICE);
537 546
@@ -562,15 +571,15 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
562 struct scatterlist *sg; 571 struct scatterlist *sg;
563 size_t total_len = 0; 572 size_t total_len = 0;
564 573
565 dev_vdbg(&chan->dev, "prep_dma_slave\n"); 574 dev_vdbg(chan2dev(chan), "prep_dma_slave\n");
566 575
567 if (unlikely(!dws || !sg_len)) 576 if (unlikely(!dws || !sg_len))
568 return NULL; 577 return NULL;
569 578
570 reg_width = dws->slave.reg_width; 579 reg_width = dws->reg_width;
571 prev = first = NULL; 580 prev = first = NULL;
572 581
573 sg_len = dma_map_sg(chan->dev.parent, sgl, sg_len, direction); 582 sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
574 583
575 switch (direction) { 584 switch (direction) {
576 case DMA_TO_DEVICE: 585 case DMA_TO_DEVICE:
@@ -579,7 +588,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
579 | DWC_CTLL_DST_FIX 588 | DWC_CTLL_DST_FIX
580 | DWC_CTLL_SRC_INC 589 | DWC_CTLL_SRC_INC
581 | DWC_CTLL_FC_M2P); 590 | DWC_CTLL_FC_M2P);
582 reg = dws->slave.tx_reg; 591 reg = dws->tx_reg;
583 for_each_sg(sgl, sg, sg_len, i) { 592 for_each_sg(sgl, sg, sg_len, i) {
584 struct dw_desc *desc; 593 struct dw_desc *desc;
585 u32 len; 594 u32 len;
@@ -587,7 +596,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
587 596
588 desc = dwc_desc_get(dwc); 597 desc = dwc_desc_get(dwc);
589 if (!desc) { 598 if (!desc) {
590 dev_err(&chan->dev, 599 dev_err(chan2dev(chan),
591 "not enough descriptors available\n"); 600 "not enough descriptors available\n");
592 goto err_desc_get; 601 goto err_desc_get;
593 } 602 }
@@ -607,7 +616,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
607 first = desc; 616 first = desc;
608 } else { 617 } else {
609 prev->lli.llp = desc->txd.phys; 618 prev->lli.llp = desc->txd.phys;
610 dma_sync_single_for_device(chan->dev.parent, 619 dma_sync_single_for_device(chan2parent(chan),
611 prev->txd.phys, 620 prev->txd.phys,
612 sizeof(prev->lli), 621 sizeof(prev->lli),
613 DMA_TO_DEVICE); 622 DMA_TO_DEVICE);
@@ -625,7 +634,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
625 | DWC_CTLL_SRC_FIX 634 | DWC_CTLL_SRC_FIX
626 | DWC_CTLL_FC_P2M); 635 | DWC_CTLL_FC_P2M);
627 636
628 reg = dws->slave.rx_reg; 637 reg = dws->rx_reg;
629 for_each_sg(sgl, sg, sg_len, i) { 638 for_each_sg(sgl, sg, sg_len, i) {
630 struct dw_desc *desc; 639 struct dw_desc *desc;
631 u32 len; 640 u32 len;
@@ -633,7 +642,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
633 642
634 desc = dwc_desc_get(dwc); 643 desc = dwc_desc_get(dwc);
635 if (!desc) { 644 if (!desc) {
636 dev_err(&chan->dev, 645 dev_err(chan2dev(chan),
637 "not enough descriptors available\n"); 646 "not enough descriptors available\n");
638 goto err_desc_get; 647 goto err_desc_get;
639 } 648 }
@@ -653,7 +662,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
653 first = desc; 662 first = desc;
654 } else { 663 } else {
655 prev->lli.llp = desc->txd.phys; 664 prev->lli.llp = desc->txd.phys;
656 dma_sync_single_for_device(chan->dev.parent, 665 dma_sync_single_for_device(chan2parent(chan),
657 prev->txd.phys, 666 prev->txd.phys,
658 sizeof(prev->lli), 667 sizeof(prev->lli),
659 DMA_TO_DEVICE); 668 DMA_TO_DEVICE);
@@ -673,7 +682,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
673 prev->lli.ctllo |= DWC_CTLL_INT_EN; 682 prev->lli.ctllo |= DWC_CTLL_INT_EN;
674 683
675 prev->lli.llp = 0; 684 prev->lli.llp = 0;
676 dma_sync_single_for_device(chan->dev.parent, 685 dma_sync_single_for_device(chan2parent(chan),
677 prev->txd.phys, sizeof(prev->lli), 686 prev->txd.phys, sizeof(prev->lli),
678 DMA_TO_DEVICE); 687 DMA_TO_DEVICE);
679 688
@@ -758,29 +767,21 @@ static void dwc_issue_pending(struct dma_chan *chan)
758 spin_unlock_bh(&dwc->lock); 767 spin_unlock_bh(&dwc->lock);
759} 768}
760 769
761static int dwc_alloc_chan_resources(struct dma_chan *chan, 770static int dwc_alloc_chan_resources(struct dma_chan *chan)
762 struct dma_client *client)
763{ 771{
764 struct dw_dma_chan *dwc = to_dw_dma_chan(chan); 772 struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
765 struct dw_dma *dw = to_dw_dma(chan->device); 773 struct dw_dma *dw = to_dw_dma(chan->device);
766 struct dw_desc *desc; 774 struct dw_desc *desc;
767 struct dma_slave *slave;
768 struct dw_dma_slave *dws; 775 struct dw_dma_slave *dws;
769 int i; 776 int i;
770 u32 cfghi; 777 u32 cfghi;
771 u32 cfglo; 778 u32 cfglo;
772 779
773 dev_vdbg(&chan->dev, "alloc_chan_resources\n"); 780 dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
774
775 /* Channels doing slave DMA can only handle one client. */
776 if (dwc->dws || client->slave) {
777 if (chan->client_count)
778 return -EBUSY;
779 }
780 781
781 /* ASSERT: channel is idle */ 782 /* ASSERT: channel is idle */
782 if (dma_readl(dw, CH_EN) & dwc->mask) { 783 if (dma_readl(dw, CH_EN) & dwc->mask) {
783 dev_dbg(&chan->dev, "DMA channel not idle?\n"); 784 dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
784 return -EIO; 785 return -EIO;
785 } 786 }
786 787
@@ -789,23 +790,17 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan,
789 cfghi = DWC_CFGH_FIFO_MODE; 790 cfghi = DWC_CFGH_FIFO_MODE;
790 cfglo = 0; 791 cfglo = 0;
791 792
792 slave = client->slave; 793 dws = dwc->dws;
793 if (slave) { 794 if (dws) {
794 /* 795 /*
795 * We need controller-specific data to set up slave 796 * We need controller-specific data to set up slave
796 * transfers. 797 * transfers.
797 */ 798 */
798 BUG_ON(!slave->dma_dev || slave->dma_dev != dw->dma.dev); 799 BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
799
800 dws = container_of(slave, struct dw_dma_slave, slave);
801 800
802 dwc->dws = dws;
803 cfghi = dws->cfg_hi; 801 cfghi = dws->cfg_hi;
804 cfglo = dws->cfg_lo; 802 cfglo = dws->cfg_lo;
805 } else {
806 dwc->dws = NULL;
807 } 803 }
808
809 channel_writel(dwc, CFG_LO, cfglo); 804 channel_writel(dwc, CFG_LO, cfglo);
810 channel_writel(dwc, CFG_HI, cfghi); 805 channel_writel(dwc, CFG_HI, cfghi);
811 806
@@ -822,7 +817,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan,
822 817
823 desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL); 818 desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL);
824 if (!desc) { 819 if (!desc) {
825 dev_info(&chan->dev, 820 dev_info(chan2dev(chan),
826 "only allocated %d descriptors\n", i); 821 "only allocated %d descriptors\n", i);
827 spin_lock_bh(&dwc->lock); 822 spin_lock_bh(&dwc->lock);
828 break; 823 break;
@@ -832,7 +827,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan,
832 desc->txd.tx_submit = dwc_tx_submit; 827 desc->txd.tx_submit = dwc_tx_submit;
833 desc->txd.flags = DMA_CTRL_ACK; 828 desc->txd.flags = DMA_CTRL_ACK;
834 INIT_LIST_HEAD(&desc->txd.tx_list); 829 INIT_LIST_HEAD(&desc->txd.tx_list);
835 desc->txd.phys = dma_map_single(chan->dev.parent, &desc->lli, 830 desc->txd.phys = dma_map_single(chan2parent(chan), &desc->lli,
836 sizeof(desc->lli), DMA_TO_DEVICE); 831 sizeof(desc->lli), DMA_TO_DEVICE);
837 dwc_desc_put(dwc, desc); 832 dwc_desc_put(dwc, desc);
838 833
@@ -847,7 +842,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan,
847 842
848 spin_unlock_bh(&dwc->lock); 843 spin_unlock_bh(&dwc->lock);
849 844
850 dev_dbg(&chan->dev, 845 dev_dbg(chan2dev(chan),
851 "alloc_chan_resources allocated %d descriptors\n", i); 846 "alloc_chan_resources allocated %d descriptors\n", i);
852 847
853 return i; 848 return i;
@@ -860,7 +855,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
860 struct dw_desc *desc, *_desc; 855 struct dw_desc *desc, *_desc;
861 LIST_HEAD(list); 856 LIST_HEAD(list);
862 857
863 dev_dbg(&chan->dev, "free_chan_resources (descs allocated=%u)\n", 858 dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
864 dwc->descs_allocated); 859 dwc->descs_allocated);
865 860
866 /* ASSERT: channel is idle */ 861 /* ASSERT: channel is idle */
@@ -881,13 +876,13 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
881 spin_unlock_bh(&dwc->lock); 876 spin_unlock_bh(&dwc->lock);
882 877
883 list_for_each_entry_safe(desc, _desc, &list, desc_node) { 878 list_for_each_entry_safe(desc, _desc, &list, desc_node) {
884 dev_vdbg(&chan->dev, " freeing descriptor %p\n", desc); 879 dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
885 dma_unmap_single(chan->dev.parent, desc->txd.phys, 880 dma_unmap_single(chan2parent(chan), desc->txd.phys,
886 sizeof(desc->lli), DMA_TO_DEVICE); 881 sizeof(desc->lli), DMA_TO_DEVICE);
887 kfree(desc); 882 kfree(desc);
888 } 883 }
889 884
890 dev_vdbg(&chan->dev, "free_chan_resources done\n"); 885 dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
891} 886}
892 887
893/*----------------------------------------------------------------------*/ 888/*----------------------------------------------------------------------*/
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 0b95dcce447e..ca70a21afc68 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -366,8 +366,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
366 * 366 *
367 * Return - The number of descriptors allocated. 367 * Return - The number of descriptors allocated.
368 */ 368 */
369static int fsl_dma_alloc_chan_resources(struct dma_chan *chan, 369static int fsl_dma_alloc_chan_resources(struct dma_chan *chan)
370 struct dma_client *client)
371{ 370{
372 struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan); 371 struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
373 372
@@ -823,7 +822,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
823 */ 822 */
824 WARN_ON(fdev->feature != new_fsl_chan->feature); 823 WARN_ON(fdev->feature != new_fsl_chan->feature);
825 824
826 new_fsl_chan->dev = &new_fsl_chan->common.dev; 825 new_fsl_chan->dev = &new_fsl_chan->common.dev->device;
827 new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start, 826 new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
828 new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1); 827 new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
829 828
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
index 9b16a3af9a0a..4105d6575b64 100644
--- a/drivers/dma/ioat.c
+++ b/drivers/dma/ioat.c
@@ -75,60 +75,10 @@ static int ioat_dca_enabled = 1;
75module_param(ioat_dca_enabled, int, 0644); 75module_param(ioat_dca_enabled, int, 0644);
76MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); 76MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
77 77
78static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase)
79{
80 struct ioat_device *device = pci_get_drvdata(pdev);
81 u8 version;
82 int err = 0;
83
84 version = readb(iobase + IOAT_VER_OFFSET);
85 switch (version) {
86 case IOAT_VER_1_2:
87 device->dma = ioat_dma_probe(pdev, iobase);
88 if (device->dma && ioat_dca_enabled)
89 device->dca = ioat_dca_init(pdev, iobase);
90 break;
91 case IOAT_VER_2_0:
92 device->dma = ioat_dma_probe(pdev, iobase);
93 if (device->dma && ioat_dca_enabled)
94 device->dca = ioat2_dca_init(pdev, iobase);
95 break;
96 case IOAT_VER_3_0:
97 device->dma = ioat_dma_probe(pdev, iobase);
98 if (device->dma && ioat_dca_enabled)
99 device->dca = ioat3_dca_init(pdev, iobase);
100 break;
101 default:
102 err = -ENODEV;
103 break;
104 }
105 if (!device->dma)
106 err = -ENODEV;
107 return err;
108}
109
110static void ioat_shutdown_functionality(struct pci_dev *pdev)
111{
112 struct ioat_device *device = pci_get_drvdata(pdev);
113
114 dev_err(&pdev->dev, "Removing dma and dca services\n");
115 if (device->dca) {
116 unregister_dca_provider(device->dca);
117 free_dca_provider(device->dca);
118 device->dca = NULL;
119 }
120
121 if (device->dma) {
122 ioat_dma_remove(device->dma);
123 device->dma = NULL;
124 }
125}
126
127static struct pci_driver ioat_pci_driver = { 78static struct pci_driver ioat_pci_driver = {
128 .name = "ioatdma", 79 .name = "ioatdma",
129 .id_table = ioat_pci_tbl, 80 .id_table = ioat_pci_tbl,
130 .probe = ioat_probe, 81 .probe = ioat_probe,
131 .shutdown = ioat_shutdown_functionality,
132 .remove = __devexit_p(ioat_remove), 82 .remove = __devexit_p(ioat_remove),
133}; 83};
134 84
@@ -179,7 +129,29 @@ static int __devinit ioat_probe(struct pci_dev *pdev,
179 129
180 pci_set_master(pdev); 130 pci_set_master(pdev);
181 131
182 err = ioat_setup_functionality(pdev, iobase); 132 switch (readb(iobase + IOAT_VER_OFFSET)) {
133 case IOAT_VER_1_2:
134 device->dma = ioat_dma_probe(pdev, iobase);
135 if (device->dma && ioat_dca_enabled)
136 device->dca = ioat_dca_init(pdev, iobase);
137 break;
138 case IOAT_VER_2_0:
139 device->dma = ioat_dma_probe(pdev, iobase);
140 if (device->dma && ioat_dca_enabled)
141 device->dca = ioat2_dca_init(pdev, iobase);
142 break;
143 case IOAT_VER_3_0:
144 device->dma = ioat_dma_probe(pdev, iobase);
145 if (device->dma && ioat_dca_enabled)
146 device->dca = ioat3_dca_init(pdev, iobase);
147 break;
148 default:
149 err = -ENODEV;
150 break;
151 }
152 if (!device->dma)
153 err = -ENODEV;
154
183 if (err) 155 if (err)
184 goto err_version; 156 goto err_version;
185 157
@@ -198,17 +170,21 @@ err_enable_device:
198 return err; 170 return err;
199} 171}
200 172
201/*
202 * It is unsafe to remove this module: if removed while a requested
203 * dma is outstanding, esp. from tcp, it is possible to hang while
204 * waiting for something that will never finish. However, if you're
205 * feeling lucky, this usually works just fine.
206 */
207static void __devexit ioat_remove(struct pci_dev *pdev) 173static void __devexit ioat_remove(struct pci_dev *pdev)
208{ 174{
209 struct ioat_device *device = pci_get_drvdata(pdev); 175 struct ioat_device *device = pci_get_drvdata(pdev);
210 176
211 ioat_shutdown_functionality(pdev); 177 dev_err(&pdev->dev, "Removing dma and dca services\n");
178 if (device->dca) {
179 unregister_dca_provider(device->dca);
180 free_dca_provider(device->dca);
181 device->dca = NULL;
182 }
183
184 if (device->dma) {
185 ioat_dma_remove(device->dma);
186 device->dma = NULL;
187 }
212 188
213 kfree(device); 189 kfree(device);
214} 190}
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index 6607fdd00b1c..b3759c4b6536 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -734,8 +734,7 @@ static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
734 * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors 734 * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
735 * @chan: the channel to be filled out 735 * @chan: the channel to be filled out
736 */ 736 */
737static int ioat_dma_alloc_chan_resources(struct dma_chan *chan, 737static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
738 struct dma_client *client)
739{ 738{
740 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 739 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
741 struct ioat_desc_sw *desc; 740 struct ioat_desc_sw *desc;
@@ -1341,12 +1340,11 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
1341 */ 1340 */
1342#define IOAT_TEST_SIZE 2000 1341#define IOAT_TEST_SIZE 2000
1343 1342
1344DECLARE_COMPLETION(test_completion);
1345static void ioat_dma_test_callback(void *dma_async_param) 1343static void ioat_dma_test_callback(void *dma_async_param)
1346{ 1344{
1347 printk(KERN_ERR "ioatdma: ioat_dma_test_callback(%p)\n", 1345 struct completion *cmp = dma_async_param;
1348 dma_async_param); 1346
1349 complete(&test_completion); 1347 complete(cmp);
1350} 1348}
1351 1349
1352/** 1350/**
@@ -1363,6 +1361,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
1363 dma_addr_t dma_dest, dma_src; 1361 dma_addr_t dma_dest, dma_src;
1364 dma_cookie_t cookie; 1362 dma_cookie_t cookie;
1365 int err = 0; 1363 int err = 0;
1364 struct completion cmp;
1366 1365
1367 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); 1366 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
1368 if (!src) 1367 if (!src)
@@ -1381,7 +1380,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
1381 dma_chan = container_of(device->common.channels.next, 1380 dma_chan = container_of(device->common.channels.next,
1382 struct dma_chan, 1381 struct dma_chan,
1383 device_node); 1382 device_node);
1384 if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) { 1383 if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
1385 dev_err(&device->pdev->dev, 1384 dev_err(&device->pdev->dev,
1386 "selftest cannot allocate chan resource\n"); 1385 "selftest cannot allocate chan resource\n");
1387 err = -ENODEV; 1386 err = -ENODEV;
@@ -1402,8 +1401,9 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
1402 } 1401 }
1403 1402
1404 async_tx_ack(tx); 1403 async_tx_ack(tx);
1404 init_completion(&cmp);
1405 tx->callback = ioat_dma_test_callback; 1405 tx->callback = ioat_dma_test_callback;
1406 tx->callback_param = (void *)0x8086; 1406 tx->callback_param = &cmp;
1407 cookie = tx->tx_submit(tx); 1407 cookie = tx->tx_submit(tx);
1408 if (cookie < 0) { 1408 if (cookie < 0) {
1409 dev_err(&device->pdev->dev, 1409 dev_err(&device->pdev->dev,
@@ -1413,7 +1413,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
1413 } 1413 }
1414 device->common.device_issue_pending(dma_chan); 1414 device->common.device_issue_pending(dma_chan);
1415 1415
1416 wait_for_completion_timeout(&test_completion, msecs_to_jiffies(3000)); 1416 wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1417 1417
1418 if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) 1418 if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
1419 != DMA_SUCCESS) { 1419 != DMA_SUCCESS) {
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 6be317262200..ea5440dd10dc 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -24,7 +24,6 @@
24 24
25#include <linux/init.h> 25#include <linux/init.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/async_tx.h>
28#include <linux/delay.h> 27#include <linux/delay.h>
29#include <linux/dma-mapping.h> 28#include <linux/dma-mapping.h>
30#include <linux/spinlock.h> 29#include <linux/spinlock.h>
@@ -116,7 +115,7 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
116 } 115 }
117 116
118 /* run dependent operations */ 117 /* run dependent operations */
119 async_tx_run_dependencies(&desc->async_tx); 118 dma_run_dependencies(&desc->async_tx);
120 119
121 return cookie; 120 return cookie;
122} 121}
@@ -270,8 +269,6 @@ static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
270 break; 269 break;
271 } 270 }
272 271
273 BUG_ON(!seen_current);
274
275 if (cookie > 0) { 272 if (cookie > 0) {
276 iop_chan->completed_cookie = cookie; 273 iop_chan->completed_cookie = cookie;
277 pr_debug("\tcompleted cookie %d\n", cookie); 274 pr_debug("\tcompleted cookie %d\n", cookie);
@@ -471,8 +468,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
471 * greater than 2x the number slots needed to satisfy a device->max_xor 468 * greater than 2x the number slots needed to satisfy a device->max_xor
472 * request. 469 * request.
473 * */ 470 * */
474static int iop_adma_alloc_chan_resources(struct dma_chan *chan, 471static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
475 struct dma_client *client)
476{ 472{
477 char *hw_desc; 473 char *hw_desc;
478 int idx; 474 int idx;
@@ -866,7 +862,7 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
866 dma_chan = container_of(device->common.channels.next, 862 dma_chan = container_of(device->common.channels.next,
867 struct dma_chan, 863 struct dma_chan,
868 device_node); 864 device_node);
869 if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) { 865 if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
870 err = -ENODEV; 866 err = -ENODEV;
871 goto out; 867 goto out;
872 } 868 }
@@ -964,7 +960,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
964 dma_chan = container_of(device->common.channels.next, 960 dma_chan = container_of(device->common.channels.next,
965 struct dma_chan, 961 struct dma_chan,
966 device_node); 962 device_node);
967 if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) { 963 if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
968 err = -ENODEV; 964 err = -ENODEV;
969 goto out; 965 goto out;
970 } 966 }
@@ -1115,26 +1111,13 @@ static int __devexit iop_adma_remove(struct platform_device *dev)
1115 struct iop_adma_device *device = platform_get_drvdata(dev); 1111 struct iop_adma_device *device = platform_get_drvdata(dev);
1116 struct dma_chan *chan, *_chan; 1112 struct dma_chan *chan, *_chan;
1117 struct iop_adma_chan *iop_chan; 1113 struct iop_adma_chan *iop_chan;
1118 int i;
1119 struct iop_adma_platform_data *plat_data = dev->dev.platform_data; 1114 struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
1120 1115
1121 dma_async_device_unregister(&device->common); 1116 dma_async_device_unregister(&device->common);
1122 1117
1123 for (i = 0; i < 3; i++) {
1124 unsigned int irq;
1125 irq = platform_get_irq(dev, i);
1126 free_irq(irq, device);
1127 }
1128
1129 dma_free_coherent(&dev->dev, plat_data->pool_size, 1118 dma_free_coherent(&dev->dev, plat_data->pool_size,
1130 device->dma_desc_pool_virt, device->dma_desc_pool); 1119 device->dma_desc_pool_virt, device->dma_desc_pool);
1131 1120
1132 do {
1133 struct resource *res;
1134 res = platform_get_resource(dev, IORESOURCE_MEM, 0);
1135 release_mem_region(res->start, res->end - res->start);
1136 } while (0);
1137
1138 list_for_each_entry_safe(chan, _chan, &device->common.channels, 1121 list_for_each_entry_safe(chan, _chan, &device->common.channels,
1139 device_node) { 1122 device_node) {
1140 iop_chan = to_iop_adma_chan(chan); 1123 iop_chan = to_iop_adma_chan(chan);
@@ -1255,7 +1238,6 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1255 spin_lock_init(&iop_chan->lock); 1238 spin_lock_init(&iop_chan->lock);
1256 INIT_LIST_HEAD(&iop_chan->chain); 1239 INIT_LIST_HEAD(&iop_chan->chain);
1257 INIT_LIST_HEAD(&iop_chan->all_slots); 1240 INIT_LIST_HEAD(&iop_chan->all_slots);
1258 INIT_RCU_HEAD(&iop_chan->common.rcu);
1259 iop_chan->common.device = dma_dev; 1241 iop_chan->common.device = dma_dev;
1260 list_add_tail(&iop_chan->common.device_node, &dma_dev->channels); 1242 list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
1261 1243
@@ -1431,16 +1413,12 @@ static int __init iop_adma_init (void)
1431 return platform_driver_register(&iop_adma_driver); 1413 return platform_driver_register(&iop_adma_driver);
1432} 1414}
1433 1415
1434/* it's currently unsafe to unload this module */
1435#if 0
1436static void __exit iop_adma_exit (void) 1416static void __exit iop_adma_exit (void)
1437{ 1417{
1438 platform_driver_unregister(&iop_adma_driver); 1418 platform_driver_unregister(&iop_adma_driver);
1439 return; 1419 return;
1440} 1420}
1441module_exit(iop_adma_exit); 1421module_exit(iop_adma_exit);
1442#endif
1443
1444module_init(iop_adma_init); 1422module_init(iop_adma_init);
1445 1423
1446MODULE_AUTHOR("Intel Corporation"); 1424MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index bcda17426411..d35cbd1ff0b3 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -18,7 +18,6 @@
18 18
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/async_tx.h>
22#include <linux/delay.h> 21#include <linux/delay.h>
23#include <linux/dma-mapping.h> 22#include <linux/dma-mapping.h>
24#include <linux/spinlock.h> 23#include <linux/spinlock.h>
@@ -340,7 +339,7 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
340 } 339 }
341 340
342 /* run dependent operations */ 341 /* run dependent operations */
343 async_tx_run_dependencies(&desc->async_tx); 342 dma_run_dependencies(&desc->async_tx);
344 343
345 return cookie; 344 return cookie;
346} 345}
@@ -607,8 +606,7 @@ submit_done:
607} 606}
608 607
609/* returns the number of allocated descriptors */ 608/* returns the number of allocated descriptors */
610static int mv_xor_alloc_chan_resources(struct dma_chan *chan, 609static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
611 struct dma_client *client)
612{ 610{
613 char *hw_desc; 611 char *hw_desc;
614 int idx; 612 int idx;
@@ -958,7 +956,7 @@ static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
958 dma_chan = container_of(device->common.channels.next, 956 dma_chan = container_of(device->common.channels.next,
959 struct dma_chan, 957 struct dma_chan,
960 device_node); 958 device_node);
961 if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) { 959 if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
962 err = -ENODEV; 960 err = -ENODEV;
963 goto out; 961 goto out;
964 } 962 }
@@ -1053,7 +1051,7 @@ mv_xor_xor_self_test(struct mv_xor_device *device)
1053 dma_chan = container_of(device->common.channels.next, 1051 dma_chan = container_of(device->common.channels.next,
1054 struct dma_chan, 1052 struct dma_chan,
1055 device_node); 1053 device_node);
1056 if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) { 1054 if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
1057 err = -ENODEV; 1055 err = -ENODEV;
1058 goto out; 1056 goto out;
1059 } 1057 }
@@ -1221,7 +1219,6 @@ static int __devinit mv_xor_probe(struct platform_device *pdev)
1221 INIT_LIST_HEAD(&mv_chan->chain); 1219 INIT_LIST_HEAD(&mv_chan->chain);
1222 INIT_LIST_HEAD(&mv_chan->completed_slots); 1220 INIT_LIST_HEAD(&mv_chan->completed_slots);
1223 INIT_LIST_HEAD(&mv_chan->all_slots); 1221 INIT_LIST_HEAD(&mv_chan->all_slots);
1224 INIT_RCU_HEAD(&mv_chan->common.rcu);
1225 mv_chan->common.device = dma_dev; 1222 mv_chan->common.device = dma_dev;
1226 1223
1227 list_add_tail(&mv_chan->common.device_node, &dma_dev->channels); 1224 list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 2f9e941968d6..d8f295bdad76 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -18,12 +18,6 @@
18#include <linux/dmi.h> 18#include <linux/dmi.h>
19 19
20#include <acpi/acpi_bus.h> 20#include <acpi/acpi_bus.h>
21#include <acpi/acnames.h>
22#include <acpi/acnamesp.h>
23#include <acpi/acparser.h>
24#include <acpi/acexcep.h>
25#include <acpi/acmacros.h>
26#include <acpi/actypes.h>
27 21
28#define REGS_PER_GTF 7 22#define REGS_PER_GTF 7
29struct taskfile_array { 23struct taskfile_array {
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index e7fb7d2fcbfc..a4a1ae214630 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -63,6 +63,12 @@ config LEDS_WRAP
63 help 63 help
64 This option enables support for the PCEngines WRAP programmable LEDs. 64 This option enables support for the PCEngines WRAP programmable LEDs.
65 65
66config LEDS_ALIX2
67 tristate "LED Support for ALIX.2 and ALIX.3 series"
68 depends on LEDS_CLASS && X86 && EXPERIMENTAL
69 help
70 This option enables support for the PCEngines ALIX.2 and ALIX.3 LEDs.
71
66config LEDS_H1940 72config LEDS_H1940
67 tristate "LED Support for iPAQ H1940 device" 73 tristate "LED Support for iPAQ H1940 device"
68 depends on LEDS_CLASS && ARCH_H1940 74 depends on LEDS_CLASS && ARCH_H1940
@@ -77,7 +83,7 @@ config LEDS_COBALT_QUBE
77 83
78config LEDS_COBALT_RAQ 84config LEDS_COBALT_RAQ
79 bool "LED Support for the Cobalt Raq series" 85 bool "LED Support for the Cobalt Raq series"
80 depends on LEDS_CLASS && MIPS_COBALT 86 depends on LEDS_CLASS=y && MIPS_COBALT
81 select LEDS_TRIGGERS 87 select LEDS_TRIGGERS
82 help 88 help
83 This option enables support for the Cobalt Raq series LEDs. 89 This option enables support for the Cobalt Raq series LEDs.
@@ -158,6 +164,13 @@ config LEDS_PCA955X
158 LED driver chips accessed via the I2C bus. Supported 164 LED driver chips accessed via the I2C bus. Supported
159 devices include PCA9550, PCA9551, PCA9552, and PCA9553. 165 devices include PCA9550, PCA9551, PCA9552, and PCA9553.
160 166
167config LEDS_WM8350
168 tristate "LED Support for WM8350 AudioPlus PMIC"
169 depends on LEDS_CLASS && MFD_WM8350
170 help
171 This option enables support for LEDs driven by the Wolfson
172 Microelectronics WM8350 AudioPlus PMIC.
173
161config LEDS_DA903X 174config LEDS_DA903X
162 tristate "LED Support for DA9030/DA9034 PMIC" 175 tristate "LED Support for DA9030/DA9034 PMIC"
163 depends on LEDS_CLASS && PMIC_DA903X 176 depends on LEDS_CLASS && PMIC_DA903X
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index e1967a29850e..bc247cb02e82 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_LEDS_S3C24XX) += leds-s3c24xx.o
11obj-$(CONFIG_LEDS_AMS_DELTA) += leds-ams-delta.o 11obj-$(CONFIG_LEDS_AMS_DELTA) += leds-ams-delta.o
12obj-$(CONFIG_LEDS_NET48XX) += leds-net48xx.o 12obj-$(CONFIG_LEDS_NET48XX) += leds-net48xx.o
13obj-$(CONFIG_LEDS_WRAP) += leds-wrap.o 13obj-$(CONFIG_LEDS_WRAP) += leds-wrap.o
14obj-$(CONFIG_LEDS_ALIX2) += leds-alix2.o
14obj-$(CONFIG_LEDS_H1940) += leds-h1940.o 15obj-$(CONFIG_LEDS_H1940) += leds-h1940.o
15obj-$(CONFIG_LEDS_COBALT_QUBE) += leds-cobalt-qube.o 16obj-$(CONFIG_LEDS_COBALT_QUBE) += leds-cobalt-qube.o
16obj-$(CONFIG_LEDS_COBALT_RAQ) += leds-cobalt-raq.o 17obj-$(CONFIG_LEDS_COBALT_RAQ) += leds-cobalt-raq.o
@@ -23,6 +24,7 @@ obj-$(CONFIG_LEDS_FSG) += leds-fsg.o
23obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o 24obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o
24obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o 25obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o
25obj-$(CONFIG_LEDS_HP_DISK) += leds-hp-disk.o 26obj-$(CONFIG_LEDS_HP_DISK) += leds-hp-disk.o
27obj-$(CONFIG_LEDS_WM8350) += leds-wm8350.o
26 28
27# LED Triggers 29# LED Triggers
28obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o 30obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 6c4a326176d7..52f82e3ea13a 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -91,9 +91,29 @@ void led_classdev_resume(struct led_classdev *led_cdev)
91} 91}
92EXPORT_SYMBOL_GPL(led_classdev_resume); 92EXPORT_SYMBOL_GPL(led_classdev_resume);
93 93
94static int led_suspend(struct device *dev, pm_message_t state)
95{
96 struct led_classdev *led_cdev = dev_get_drvdata(dev);
97
98 if (led_cdev->flags & LED_CORE_SUSPENDRESUME)
99 led_classdev_suspend(led_cdev);
100
101 return 0;
102}
103
104static int led_resume(struct device *dev)
105{
106 struct led_classdev *led_cdev = dev_get_drvdata(dev);
107
108 if (led_cdev->flags & LED_CORE_SUSPENDRESUME)
109 led_classdev_resume(led_cdev);
110
111 return 0;
112}
113
94/** 114/**
95 * led_classdev_register - register a new object of led_classdev class. 115 * led_classdev_register - register a new object of led_classdev class.
96 * @dev: The device to register. 116 * @parent: The device to register.
97 * @led_cdev: the led_classdev structure for this device. 117 * @led_cdev: the led_classdev structure for this device.
98 */ 118 */
99int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) 119int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
@@ -174,6 +194,8 @@ static int __init leds_init(void)
174 leds_class = class_create(THIS_MODULE, "leds"); 194 leds_class = class_create(THIS_MODULE, "leds");
175 if (IS_ERR(leds_class)) 195 if (IS_ERR(leds_class))
176 return PTR_ERR(leds_class); 196 return PTR_ERR(leds_class);
197 leds_class->suspend = led_suspend;
198 leds_class->resume = led_resume;
177 return 0; 199 return 0;
178} 200}
179 201
diff --git a/drivers/leds/leds-alix2.c b/drivers/leds/leds-alix2.c
new file mode 100644
index 000000000000..ddbd7730dfc8
--- /dev/null
+++ b/drivers/leds/leds-alix2.c
@@ -0,0 +1,181 @@
1/*
2 * LEDs driver for PCEngines ALIX.2 and ALIX.3
3 *
4 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
5 */
6
7#include <linux/err.h>
8#include <linux/io.h>
9#include <linux/kernel.h>
10#include <linux/leds.h>
11#include <linux/module.h>
12#include <linux/platform_device.h>
13#include <linux/string.h>
14
15static int force = 0;
16module_param(force, bool, 0444);
17MODULE_PARM_DESC(force, "Assume system has ALIX.2 style LEDs");
18
19struct alix_led {
20 struct led_classdev cdev;
21 unsigned short port;
22 unsigned int on_value;
23 unsigned int off_value;
24};
25
26static void alix_led_set(struct led_classdev *led_cdev,
27 enum led_brightness brightness)
28{
29 struct alix_led *led_dev =
30 container_of(led_cdev, struct alix_led, cdev);
31
32 if (brightness)
33 outl(led_dev->on_value, led_dev->port);
34 else
35 outl(led_dev->off_value, led_dev->port);
36}
37
38static struct alix_led alix_leds[] = {
39 {
40 .cdev = {
41 .name = "alix:1",
42 .brightness_set = alix_led_set,
43 },
44 .port = 0x6100,
45 .on_value = 1 << 22,
46 .off_value = 1 << 6,
47 },
48 {
49 .cdev = {
50 .name = "alix:2",
51 .brightness_set = alix_led_set,
52 },
53 .port = 0x6180,
54 .on_value = 1 << 25,
55 .off_value = 1 << 9,
56 },
57 {
58 .cdev = {
59 .name = "alix:3",
60 .brightness_set = alix_led_set,
61 },
62 .port = 0x6180,
63 .on_value = 1 << 27,
64 .off_value = 1 << 11,
65 },
66};
67
68static int __init alix_led_probe(struct platform_device *pdev)
69{
70 int i;
71 int ret;
72
73 for (i = 0; i < ARRAY_SIZE(alix_leds); i++) {
74 alix_leds[i].cdev.flags |= LED_CORE_SUSPENDRESUME;
75 ret = led_classdev_register(&pdev->dev, &alix_leds[i].cdev);
76 if (ret < 0)
77 goto fail;
78 }
79 return 0;
80
81fail:
82 while (--i >= 0)
83 led_classdev_unregister(&alix_leds[i].cdev);
84 return ret;
85}
86
87static int alix_led_remove(struct platform_device *pdev)
88{
89 int i;
90
91 for (i = 0; i < ARRAY_SIZE(alix_leds); i++)
92 led_classdev_unregister(&alix_leds[i].cdev);
93 return 0;
94}
95
96static struct platform_driver alix_led_driver = {
97 .remove = alix_led_remove,
98 .driver = {
99 .name = KBUILD_MODNAME,
100 .owner = THIS_MODULE,
101 },
102};
103
104static int __init alix_present(void)
105{
106 const unsigned long bios_phys = 0x000f0000;
107 const size_t bios_len = 0x00010000;
108 const char alix_sig[] = "PC Engines ALIX.";
109 const size_t alix_sig_len = sizeof(alix_sig) - 1;
110
111 const char *bios_virt;
112 const char *scan_end;
113 const char *p;
114 int ret = 0;
115
116 if (force) {
117 printk(KERN_NOTICE "%s: forced to skip BIOS test, "
118 "assume system has ALIX.2 style LEDs\n",
119 KBUILD_MODNAME);
120 ret = 1;
121 goto out;
122 }
123
124 bios_virt = phys_to_virt(bios_phys);
125 scan_end = bios_virt + bios_len - (alix_sig_len + 2);
126 for (p = bios_virt; p < scan_end; p++) {
127 const char *tail;
128
129 if (memcmp(p, alix_sig, alix_sig_len) != 0) {
130 continue;
131 }
132
133 tail = p + alix_sig_len;
134 if ((tail[0] == '2' || tail[0] == '3') && tail[1] == '\0') {
135 printk(KERN_INFO
136 "%s: system is recognized as \"%s\"\n",
137 KBUILD_MODNAME, p);
138 ret = 1;
139 break;
140 }
141 }
142
143out:
144 return ret;
145}
146
147static struct platform_device *pdev;
148
149static int __init alix_led_init(void)
150{
151 int ret;
152
153 if (!alix_present()) {
154 ret = -ENODEV;
155 goto out;
156 }
157
158 pdev = platform_device_register_simple(KBUILD_MODNAME, -1, NULL, 0);
159 if (!IS_ERR(pdev)) {
160 ret = platform_driver_probe(&alix_led_driver, alix_led_probe);
161 if (ret)
162 platform_device_unregister(pdev);
163 } else
164 ret = PTR_ERR(pdev);
165
166out:
167 return ret;
168}
169
170static void __exit alix_led_exit(void)
171{
172 platform_device_unregister(pdev);
173 platform_driver_unregister(&alix_led_driver);
174}
175
176module_init(alix_led_init);
177module_exit(alix_led_exit);
178
179MODULE_AUTHOR("Constantin Baranov <const@mimas.ru>");
180MODULE_DESCRIPTION("PCEngines ALIX.2 and ALIX.3 LED driver");
181MODULE_LICENSE("GPL");
diff --git a/drivers/leds/leds-ams-delta.c b/drivers/leds/leds-ams-delta.c
index 1bd590bb3a6e..446050759b4d 100644
--- a/drivers/leds/leds-ams-delta.c
+++ b/drivers/leds/leds-ams-delta.c
@@ -79,37 +79,12 @@ static struct ams_delta_led ams_delta_leds[] = {
79 }, 79 },
80}; 80};
81 81
82#ifdef CONFIG_PM
83static int ams_delta_led_suspend(struct platform_device *dev,
84 pm_message_t state)
85{
86 int i;
87
88 for (i = 0; i < ARRAY_SIZE(ams_delta_leds); i++)
89 led_classdev_suspend(&ams_delta_leds[i].cdev);
90
91 return 0;
92}
93
94static int ams_delta_led_resume(struct platform_device *dev)
95{
96 int i;
97
98 for (i = 0; i < ARRAY_SIZE(ams_delta_leds); i++)
99 led_classdev_resume(&ams_delta_leds[i].cdev);
100
101 return 0;
102}
103#else
104#define ams_delta_led_suspend NULL
105#define ams_delta_led_resume NULL
106#endif
107
108static int ams_delta_led_probe(struct platform_device *pdev) 82static int ams_delta_led_probe(struct platform_device *pdev)
109{ 83{
110 int i, ret; 84 int i, ret;
111 85
112 for (i = 0; i < ARRAY_SIZE(ams_delta_leds); i++) { 86 for (i = 0; i < ARRAY_SIZE(ams_delta_leds); i++) {
87 ams_delta_leds[i].cdev.flags |= LED_CORE_SUSPENDRESUME;
113 ret = led_classdev_register(&pdev->dev, 88 ret = led_classdev_register(&pdev->dev,
114 &ams_delta_leds[i].cdev); 89 &ams_delta_leds[i].cdev);
115 if (ret < 0) 90 if (ret < 0)
@@ -127,7 +102,7 @@ static int ams_delta_led_remove(struct platform_device *pdev)
127{ 102{
128 int i; 103 int i;
129 104
130 for (i = 0; i < ARRAY_SIZE(ams_delta_leds); i--) 105 for (i = 0; i < ARRAY_SIZE(ams_delta_leds); i++)
131 led_classdev_unregister(&ams_delta_leds[i].cdev); 106 led_classdev_unregister(&ams_delta_leds[i].cdev);
132 107
133 return 0; 108 return 0;
@@ -136,8 +111,6 @@ static int ams_delta_led_remove(struct platform_device *pdev)
136static struct platform_driver ams_delta_led_driver = { 111static struct platform_driver ams_delta_led_driver = {
137 .probe = ams_delta_led_probe, 112 .probe = ams_delta_led_probe,
138 .remove = ams_delta_led_remove, 113 .remove = ams_delta_led_remove,
139 .suspend = ams_delta_led_suspend,
140 .resume = ams_delta_led_resume,
141 .driver = { 114 .driver = {
142 .name = "ams-delta-led", 115 .name = "ams-delta-led",
143 .owner = THIS_MODULE, 116 .owner = THIS_MODULE,
@@ -151,7 +124,7 @@ static int __init ams_delta_led_init(void)
151 124
152static void __exit ams_delta_led_exit(void) 125static void __exit ams_delta_led_exit(void)
153{ 126{
154 return platform_driver_unregister(&ams_delta_led_driver); 127 platform_driver_unregister(&ams_delta_led_driver);
155} 128}
156 129
157module_init(ams_delta_led_init); 130module_init(ams_delta_led_init);
diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c
index eb3415e88f43..1813c84ea5fc 100644
--- a/drivers/leds/leds-clevo-mail.c
+++ b/drivers/leds/leds-clevo-mail.c
@@ -142,6 +142,7 @@ static struct led_classdev clevo_mail_led = {
142 .name = "clevo::mail", 142 .name = "clevo::mail",
143 .brightness_set = clevo_mail_led_set, 143 .brightness_set = clevo_mail_led_set,
144 .blink_set = clevo_mail_led_blink, 144 .blink_set = clevo_mail_led_blink,
145 .flags = LED_CORE_SUSPENDRESUME,
145}; 146};
146 147
147static int __init clevo_mail_led_probe(struct platform_device *pdev) 148static int __init clevo_mail_led_probe(struct platform_device *pdev)
@@ -155,29 +156,9 @@ static int clevo_mail_led_remove(struct platform_device *pdev)
155 return 0; 156 return 0;
156} 157}
157 158
158#ifdef CONFIG_PM
159static int clevo_mail_led_suspend(struct platform_device *dev,
160 pm_message_t state)
161{
162 led_classdev_suspend(&clevo_mail_led);
163 return 0;
164}
165
166static int clevo_mail_led_resume(struct platform_device *dev)
167{
168 led_classdev_resume(&clevo_mail_led);
169 return 0;
170}
171#else
172#define clevo_mail_led_suspend NULL
173#define clevo_mail_led_resume NULL
174#endif
175
176static struct platform_driver clevo_mail_led_driver = { 159static struct platform_driver clevo_mail_led_driver = {
177 .probe = clevo_mail_led_probe, 160 .probe = clevo_mail_led_probe,
178 .remove = clevo_mail_led_remove, 161 .remove = clevo_mail_led_remove,
179 .suspend = clevo_mail_led_suspend,
180 .resume = clevo_mail_led_resume,
181 .driver = { 162 .driver = {
182 .name = KBUILD_MODNAME, 163 .name = KBUILD_MODNAME,
183 .owner = THIS_MODULE, 164 .owner = THIS_MODULE,
diff --git a/drivers/leds/leds-fsg.c b/drivers/leds/leds-fsg.c
index 34935155c1c0..5f7c9c5c09b1 100644
--- a/drivers/leds/leds-fsg.c
+++ b/drivers/leds/leds-fsg.c
@@ -99,64 +99,43 @@ static void fsg_led_ring_set(struct led_classdev *led_cdev,
99} 99}
100 100
101 101
102
103static struct led_classdev fsg_wlan_led = { 102static struct led_classdev fsg_wlan_led = {
104 .name = "fsg:blue:wlan", 103 .name = "fsg:blue:wlan",
105 .brightness_set = fsg_led_wlan_set, 104 .brightness_set = fsg_led_wlan_set,
105 .flags = LED_CORE_SUSPENDRESUME,
106}; 106};
107 107
108static struct led_classdev fsg_wan_led = { 108static struct led_classdev fsg_wan_led = {
109 .name = "fsg:blue:wan", 109 .name = "fsg:blue:wan",
110 .brightness_set = fsg_led_wan_set, 110 .brightness_set = fsg_led_wan_set,
111 .flags = LED_CORE_SUSPENDRESUME,
111}; 112};
112 113
113static struct led_classdev fsg_sata_led = { 114static struct led_classdev fsg_sata_led = {
114 .name = "fsg:blue:sata", 115 .name = "fsg:blue:sata",
115 .brightness_set = fsg_led_sata_set, 116 .brightness_set = fsg_led_sata_set,
117 .flags = LED_CORE_SUSPENDRESUME,
116}; 118};
117 119
118static struct led_classdev fsg_usb_led = { 120static struct led_classdev fsg_usb_led = {
119 .name = "fsg:blue:usb", 121 .name = "fsg:blue:usb",
120 .brightness_set = fsg_led_usb_set, 122 .brightness_set = fsg_led_usb_set,
123 .flags = LED_CORE_SUSPENDRESUME,
121}; 124};
122 125
123static struct led_classdev fsg_sync_led = { 126static struct led_classdev fsg_sync_led = {
124 .name = "fsg:blue:sync", 127 .name = "fsg:blue:sync",
125 .brightness_set = fsg_led_sync_set, 128 .brightness_set = fsg_led_sync_set,
129 .flags = LED_CORE_SUSPENDRESUME,
126}; 130};
127 131
128static struct led_classdev fsg_ring_led = { 132static struct led_classdev fsg_ring_led = {
129 .name = "fsg:blue:ring", 133 .name = "fsg:blue:ring",
130 .brightness_set = fsg_led_ring_set, 134 .brightness_set = fsg_led_ring_set,
135 .flags = LED_CORE_SUSPENDRESUME,
131}; 136};
132 137
133 138
134
135#ifdef CONFIG_PM
136static int fsg_led_suspend(struct platform_device *dev, pm_message_t state)
137{
138 led_classdev_suspend(&fsg_wlan_led);
139 led_classdev_suspend(&fsg_wan_led);
140 led_classdev_suspend(&fsg_sata_led);
141 led_classdev_suspend(&fsg_usb_led);
142 led_classdev_suspend(&fsg_sync_led);
143 led_classdev_suspend(&fsg_ring_led);
144 return 0;
145}
146
147static int fsg_led_resume(struct platform_device *dev)
148{
149 led_classdev_resume(&fsg_wlan_led);
150 led_classdev_resume(&fsg_wan_led);
151 led_classdev_resume(&fsg_sata_led);
152 led_classdev_resume(&fsg_usb_led);
153 led_classdev_resume(&fsg_sync_led);
154 led_classdev_resume(&fsg_ring_led);
155 return 0;
156}
157#endif
158
159
160static int fsg_led_probe(struct platform_device *pdev) 139static int fsg_led_probe(struct platform_device *pdev)
161{ 140{
162 int ret; 141 int ret;
@@ -232,10 +211,6 @@ static int fsg_led_remove(struct platform_device *pdev)
232static struct platform_driver fsg_led_driver = { 211static struct platform_driver fsg_led_driver = {
233 .probe = fsg_led_probe, 212 .probe = fsg_led_probe,
234 .remove = fsg_led_remove, 213 .remove = fsg_led_remove,
235#ifdef CONFIG_PM
236 .suspend = fsg_led_suspend,
237 .resume = fsg_led_resume,
238#endif
239 .driver = { 214 .driver = {
240 .name = "fsg-led", 215 .name = "fsg-led",
241 }, 216 },
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index b13bd2950e95..2e3df08b649b 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -105,6 +105,7 @@ static int gpio_led_probe(struct platform_device *pdev)
105 } 105 }
106 led_dat->cdev.brightness_set = gpio_led_set; 106 led_dat->cdev.brightness_set = gpio_led_set;
107 led_dat->cdev.brightness = LED_OFF; 107 led_dat->cdev.brightness = LED_OFF;
108 led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
108 109
109 gpio_direction_output(led_dat->gpio, led_dat->active_low); 110 gpio_direction_output(led_dat->gpio, led_dat->active_low);
110 111
@@ -154,44 +155,9 @@ static int __devexit gpio_led_remove(struct platform_device *pdev)
154 return 0; 155 return 0;
155} 156}
156 157
157#ifdef CONFIG_PM
158static int gpio_led_suspend(struct platform_device *pdev, pm_message_t state)
159{
160 struct gpio_led_platform_data *pdata = pdev->dev.platform_data;
161 struct gpio_led_data *leds_data;
162 int i;
163
164 leds_data = platform_get_drvdata(pdev);
165
166 for (i = 0; i < pdata->num_leds; i++)
167 led_classdev_suspend(&leds_data[i].cdev);
168
169 return 0;
170}
171
172static int gpio_led_resume(struct platform_device *pdev)
173{
174 struct gpio_led_platform_data *pdata = pdev->dev.platform_data;
175 struct gpio_led_data *leds_data;
176 int i;
177
178 leds_data = platform_get_drvdata(pdev);
179
180 for (i = 0; i < pdata->num_leds; i++)
181 led_classdev_resume(&leds_data[i].cdev);
182
183 return 0;
184}
185#else
186#define gpio_led_suspend NULL
187#define gpio_led_resume NULL
188#endif
189
190static struct platform_driver gpio_led_driver = { 158static struct platform_driver gpio_led_driver = {
191 .probe = gpio_led_probe, 159 .probe = gpio_led_probe,
192 .remove = __devexit_p(gpio_led_remove), 160 .remove = __devexit_p(gpio_led_remove),
193 .suspend = gpio_led_suspend,
194 .resume = gpio_led_resume,
195 .driver = { 161 .driver = {
196 .name = "leds-gpio", 162 .name = "leds-gpio",
197 .owner = THIS_MODULE, 163 .owner = THIS_MODULE,
diff --git a/drivers/leds/leds-hp-disk.c b/drivers/leds/leds-hp-disk.c
index 44fa757d8254..d786adc8c5e3 100644
--- a/drivers/leds/leds-hp-disk.c
+++ b/drivers/leds/leds-hp-disk.c
@@ -68,25 +68,9 @@ static struct led_classdev hpled_led = {
68 .name = "hp:red:hddprotection", 68 .name = "hp:red:hddprotection",
69 .default_trigger = "heartbeat", 69 .default_trigger = "heartbeat",
70 .brightness_set = hpled_set, 70 .brightness_set = hpled_set,
71 .flags = LED_CORE_SUSPENDRESUME,
71}; 72};
72 73
73#ifdef CONFIG_PM
74static int hpled_suspend(struct acpi_device *dev, pm_message_t state)
75{
76 led_classdev_suspend(&hpled_led);
77 return 0;
78}
79
80static int hpled_resume(struct acpi_device *dev)
81{
82 led_classdev_resume(&hpled_led);
83 return 0;
84}
85#else
86#define hpled_suspend NULL
87#define hpled_resume NULL
88#endif
89
90static int hpled_add(struct acpi_device *device) 74static int hpled_add(struct acpi_device *device)
91{ 75{
92 int ret; 76 int ret;
@@ -121,8 +105,6 @@ static struct acpi_driver leds_hp_driver = {
121 .ops = { 105 .ops = {
122 .add = hpled_add, 106 .add = hpled_add,
123 .remove = hpled_remove, 107 .remove = hpled_remove,
124 .suspend = hpled_suspend,
125 .resume = hpled_resume,
126 } 108 }
127}; 109};
128 110
diff --git a/drivers/leds/leds-hp6xx.c b/drivers/leds/leds-hp6xx.c
index e8fb1baf8a50..e4ce1fd46338 100644
--- a/drivers/leds/leds-hp6xx.c
+++ b/drivers/leds/leds-hp6xx.c
@@ -45,30 +45,16 @@ static struct led_classdev hp6xx_red_led = {
45 .name = "hp6xx:red", 45 .name = "hp6xx:red",
46 .default_trigger = "hp6xx-charge", 46 .default_trigger = "hp6xx-charge",
47 .brightness_set = hp6xxled_red_set, 47 .brightness_set = hp6xxled_red_set,
48 .flags = LED_CORE_SUSPENDRESUME,
48}; 49};
49 50
50static struct led_classdev hp6xx_green_led = { 51static struct led_classdev hp6xx_green_led = {
51 .name = "hp6xx:green", 52 .name = "hp6xx:green",
52 .default_trigger = "ide-disk", 53 .default_trigger = "ide-disk",
53 .brightness_set = hp6xxled_green_set, 54 .brightness_set = hp6xxled_green_set,
55 .flags = LED_CORE_SUSPENDRESUME,
54}; 56};
55 57
56#ifdef CONFIG_PM
57static int hp6xxled_suspend(struct platform_device *dev, pm_message_t state)
58{
59 led_classdev_suspend(&hp6xx_red_led);
60 led_classdev_suspend(&hp6xx_green_led);
61 return 0;
62}
63
64static int hp6xxled_resume(struct platform_device *dev)
65{
66 led_classdev_resume(&hp6xx_red_led);
67 led_classdev_resume(&hp6xx_green_led);
68 return 0;
69}
70#endif
71
72static int hp6xxled_probe(struct platform_device *pdev) 58static int hp6xxled_probe(struct platform_device *pdev)
73{ 59{
74 int ret; 60 int ret;
@@ -98,10 +84,6 @@ MODULE_ALIAS("platform:hp6xx-led");
98static struct platform_driver hp6xxled_driver = { 84static struct platform_driver hp6xxled_driver = {
99 .probe = hp6xxled_probe, 85 .probe = hp6xxled_probe,
100 .remove = hp6xxled_remove, 86 .remove = hp6xxled_remove,
101#ifdef CONFIG_PM
102 .suspend = hp6xxled_suspend,
103 .resume = hp6xxled_resume,
104#endif
105 .driver = { 87 .driver = {
106 .name = "hp6xx-led", 88 .name = "hp6xx-led",
107 .owner = THIS_MODULE, 89 .owner = THIS_MODULE,
diff --git a/drivers/leds/leds-net48xx.c b/drivers/leds/leds-net48xx.c
index 054360473c94..93987a12da49 100644
--- a/drivers/leds/leds-net48xx.c
+++ b/drivers/leds/leds-net48xx.c
@@ -33,26 +33,9 @@ static void net48xx_error_led_set(struct led_classdev *led_cdev,
33static struct led_classdev net48xx_error_led = { 33static struct led_classdev net48xx_error_led = {
34 .name = "net48xx::error", 34 .name = "net48xx::error",
35 .brightness_set = net48xx_error_led_set, 35 .brightness_set = net48xx_error_led_set,
36 .flags = LED_CORE_SUSPENDRESUME,
36}; 37};
37 38
38#ifdef CONFIG_PM
39static int net48xx_led_suspend(struct platform_device *dev,
40 pm_message_t state)
41{
42 led_classdev_suspend(&net48xx_error_led);
43 return 0;
44}
45
46static int net48xx_led_resume(struct platform_device *dev)
47{
48 led_classdev_resume(&net48xx_error_led);
49 return 0;
50}
51#else
52#define net48xx_led_suspend NULL
53#define net48xx_led_resume NULL
54#endif
55
56static int net48xx_led_probe(struct platform_device *pdev) 39static int net48xx_led_probe(struct platform_device *pdev)
57{ 40{
58 return led_classdev_register(&pdev->dev, &net48xx_error_led); 41 return led_classdev_register(&pdev->dev, &net48xx_error_led);
@@ -67,8 +50,6 @@ static int net48xx_led_remove(struct platform_device *pdev)
67static struct platform_driver net48xx_led_driver = { 50static struct platform_driver net48xx_led_driver = {
68 .probe = net48xx_led_probe, 51 .probe = net48xx_led_probe,
69 .remove = net48xx_led_remove, 52 .remove = net48xx_led_remove,
70 .suspend = net48xx_led_suspend,
71 .resume = net48xx_led_resume,
72 .driver = { 53 .driver = {
73 .name = DRVNAME, 54 .name = DRVNAME,
74 .owner = THIS_MODULE, 55 .owner = THIS_MODULE,
diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c
index 4064d4f6b33b..76ec7498e2d5 100644
--- a/drivers/leds/leds-pca9532.c
+++ b/drivers/leds/leds-pca9532.c
@@ -16,6 +16,7 @@
16#include <linux/leds.h> 16#include <linux/leds.h>
17#include <linux/input.h> 17#include <linux/input.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/workqueue.h>
19#include <linux/leds-pca9532.h> 20#include <linux/leds-pca9532.h>
20 21
21static const unsigned short normal_i2c[] = { /*0x60,*/ I2C_CLIENT_END}; 22static const unsigned short normal_i2c[] = { /*0x60,*/ I2C_CLIENT_END};
@@ -34,6 +35,7 @@ struct pca9532_data {
34 struct pca9532_led leds[16]; 35 struct pca9532_led leds[16];
35 struct mutex update_lock; 36 struct mutex update_lock;
36 struct input_dev *idev; 37 struct input_dev *idev;
38 struct work_struct work;
37 u8 pwm[2]; 39 u8 pwm[2];
38 u8 psc[2]; 40 u8 psc[2];
39}; 41};
@@ -63,7 +65,7 @@ static struct i2c_driver pca9532_driver = {
63 * as a compromise we average one pwm to the values requested by all 65 * as a compromise we average one pwm to the values requested by all
64 * leds that are not ON/OFF. 66 * leds that are not ON/OFF.
65 * */ 67 * */
66static int pca9532_setpwm(struct i2c_client *client, int pwm, int blink, 68static int pca9532_calcpwm(struct i2c_client *client, int pwm, int blink,
67 enum led_brightness value) 69 enum led_brightness value)
68{ 70{
69 int a = 0, b = 0, i = 0; 71 int a = 0, b = 0, i = 0;
@@ -84,11 +86,17 @@ static int pca9532_setpwm(struct i2c_client *client, int pwm, int blink,
84 b = b/a; 86 b = b/a;
85 if (b > 0xFF) 87 if (b > 0xFF)
86 return -EINVAL; 88 return -EINVAL;
87 mutex_lock(&data->update_lock);
88 data->pwm[pwm] = b; 89 data->pwm[pwm] = b;
90 data->psc[pwm] = blink;
91 return 0;
92}
93
94static int pca9532_setpwm(struct i2c_client *client, int pwm)
95{
96 struct pca9532_data *data = i2c_get_clientdata(client);
97 mutex_lock(&data->update_lock);
89 i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(pwm), 98 i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(pwm),
90 data->pwm[pwm]); 99 data->pwm[pwm]);
91 data->psc[pwm] = blink;
92 i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(pwm), 100 i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(pwm),
93 data->psc[pwm]); 101 data->psc[pwm]);
94 mutex_unlock(&data->update_lock); 102 mutex_unlock(&data->update_lock);
@@ -124,11 +132,11 @@ static void pca9532_set_brightness(struct led_classdev *led_cdev,
124 led->state = PCA9532_ON; 132 led->state = PCA9532_ON;
125 else { 133 else {
126 led->state = PCA9532_PWM0; /* Thecus: hardcode one pwm */ 134 led->state = PCA9532_PWM0; /* Thecus: hardcode one pwm */
127 err = pca9532_setpwm(led->client, 0, 0, value); 135 err = pca9532_calcpwm(led->client, 0, 0, value);
128 if (err) 136 if (err)
129 return; /* XXX: led api doesn't allow error code? */ 137 return; /* XXX: led api doesn't allow error code? */
130 } 138 }
131 pca9532_setled(led); 139 schedule_work(&led->work);
132} 140}
133 141
134static int pca9532_set_blink(struct led_classdev *led_cdev, 142static int pca9532_set_blink(struct led_classdev *led_cdev,
@@ -137,6 +145,7 @@ static int pca9532_set_blink(struct led_classdev *led_cdev,
137 struct pca9532_led *led = ldev_to_led(led_cdev); 145 struct pca9532_led *led = ldev_to_led(led_cdev);
138 struct i2c_client *client = led->client; 146 struct i2c_client *client = led->client;
139 int psc; 147 int psc;
148 int err = 0;
140 149
141 if (*delay_on == 0 && *delay_off == 0) { 150 if (*delay_on == 0 && *delay_off == 0) {
142 /* led subsystem ask us for a blink rate */ 151 /* led subsystem ask us for a blink rate */
@@ -148,11 +157,15 @@ static int pca9532_set_blink(struct led_classdev *led_cdev,
148 157
149 /* Thecus specific: only use PSC/PWM 0 */ 158 /* Thecus specific: only use PSC/PWM 0 */
150 psc = (*delay_on * 152-1)/1000; 159 psc = (*delay_on * 152-1)/1000;
151 return pca9532_setpwm(client, 0, psc, led_cdev->brightness); 160 err = pca9532_calcpwm(client, 0, psc, led_cdev->brightness);
161 if (err)
162 return err;
163 schedule_work(&led->work);
164 return 0;
152} 165}
153 166
154int pca9532_event(struct input_dev *dev, unsigned int type, unsigned int code, 167static int pca9532_event(struct input_dev *dev, unsigned int type,
155 int value) 168 unsigned int code, int value)
156{ 169{
157 struct pca9532_data *data = input_get_drvdata(dev); 170 struct pca9532_data *data = input_get_drvdata(dev);
158 171
@@ -165,13 +178,28 @@ int pca9532_event(struct input_dev *dev, unsigned int type, unsigned int code,
165 else 178 else
166 data->pwm[1] = 0; 179 data->pwm[1] = 0;
167 180
168 dev_info(&dev->dev, "setting beep to %d \n", data->pwm[1]); 181 schedule_work(&data->work);
182
183 return 0;
184}
185
186static void pca9532_input_work(struct work_struct *work)
187{
188 struct pca9532_data *data;
189 data = container_of(work, struct pca9532_data, work);
169 mutex_lock(&data->update_lock); 190 mutex_lock(&data->update_lock);
170 i2c_smbus_write_byte_data(data->client, PCA9532_REG_PWM(1), 191 i2c_smbus_write_byte_data(data->client, PCA9532_REG_PWM(1),
171 data->pwm[1]); 192 data->pwm[1]);
172 mutex_unlock(&data->update_lock); 193 mutex_unlock(&data->update_lock);
194}
173 195
174 return 0; 196static void pca9532_led_work(struct work_struct *work)
197{
198 struct pca9532_led *led;
199 led = container_of(work, struct pca9532_led, work);
200 if (led->state == PCA9532_PWM0)
201 pca9532_setpwm(led->client, 0);
202 pca9532_setled(led);
175} 203}
176 204
177static int pca9532_configure(struct i2c_client *client, 205static int pca9532_configure(struct i2c_client *client,
@@ -204,8 +232,9 @@ static int pca9532_configure(struct i2c_client *client,
204 led->ldev.brightness = LED_OFF; 232 led->ldev.brightness = LED_OFF;
205 led->ldev.brightness_set = pca9532_set_brightness; 233 led->ldev.brightness_set = pca9532_set_brightness;
206 led->ldev.blink_set = pca9532_set_blink; 234 led->ldev.blink_set = pca9532_set_blink;
207 if (led_classdev_register(&client->dev, 235 INIT_WORK(&led->work, pca9532_led_work);
208 &led->ldev) < 0) { 236 err = led_classdev_register(&client->dev, &led->ldev);
237 if (err < 0) {
209 dev_err(&client->dev, 238 dev_err(&client->dev,
210 "couldn't register LED %s\n", 239 "couldn't register LED %s\n",
211 led->name); 240 led->name);
@@ -233,9 +262,11 @@ static int pca9532_configure(struct i2c_client *client,
233 BIT_MASK(SND_TONE); 262 BIT_MASK(SND_TONE);
234 data->idev->event = pca9532_event; 263 data->idev->event = pca9532_event;
235 input_set_drvdata(data->idev, data); 264 input_set_drvdata(data->idev, data);
265 INIT_WORK(&data->work, pca9532_input_work);
236 err = input_register_device(data->idev); 266 err = input_register_device(data->idev);
237 if (err) { 267 if (err) {
238 input_free_device(data->idev); 268 input_free_device(data->idev);
269 cancel_work_sync(&data->work);
239 data->idev = NULL; 270 data->idev = NULL;
240 goto exit; 271 goto exit;
241 } 272 }
@@ -252,18 +283,19 @@ exit:
252 break; 283 break;
253 case PCA9532_TYPE_LED: 284 case PCA9532_TYPE_LED:
254 led_classdev_unregister(&data->leds[i].ldev); 285 led_classdev_unregister(&data->leds[i].ldev);
286 cancel_work_sync(&data->leds[i].work);
255 break; 287 break;
256 case PCA9532_TYPE_N2100_BEEP: 288 case PCA9532_TYPE_N2100_BEEP:
257 if (data->idev != NULL) { 289 if (data->idev != NULL) {
258 input_unregister_device(data->idev); 290 input_unregister_device(data->idev);
259 input_free_device(data->idev); 291 input_free_device(data->idev);
292 cancel_work_sync(&data->work);
260 data->idev = NULL; 293 data->idev = NULL;
261 } 294 }
262 break; 295 break;
263 } 296 }
264 297
265 return err; 298 return err;
266
267} 299}
268 300
269static int pca9532_probe(struct i2c_client *client, 301static int pca9532_probe(struct i2c_client *client,
@@ -271,12 +303,16 @@ static int pca9532_probe(struct i2c_client *client,
271{ 303{
272 struct pca9532_data *data = i2c_get_clientdata(client); 304 struct pca9532_data *data = i2c_get_clientdata(client);
273 struct pca9532_platform_data *pca9532_pdata = client->dev.platform_data; 305 struct pca9532_platform_data *pca9532_pdata = client->dev.platform_data;
306 int err;
307
308 if (!pca9532_pdata)
309 return -EIO;
274 310
275 if (!i2c_check_functionality(client->adapter, 311 if (!i2c_check_functionality(client->adapter,
276 I2C_FUNC_SMBUS_BYTE_DATA)) 312 I2C_FUNC_SMBUS_BYTE_DATA))
277 return -EIO; 313 return -EIO;
278 314
279 data = kzalloc(sizeof(struct pca9532_data), GFP_KERNEL); 315 data = kzalloc(sizeof(*data), GFP_KERNEL);
280 if (!data) 316 if (!data)
281 return -ENOMEM; 317 return -ENOMEM;
282 318
@@ -285,12 +321,13 @@ static int pca9532_probe(struct i2c_client *client,
285 data->client = client; 321 data->client = client;
286 mutex_init(&data->update_lock); 322 mutex_init(&data->update_lock);
287 323
288 if (pca9532_pdata == NULL) 324 err = pca9532_configure(client, data, pca9532_pdata);
289 return -EIO; 325 if (err) {
290 326 kfree(data);
291 pca9532_configure(client, data, pca9532_pdata); 327 i2c_set_clientdata(client, NULL);
292 return 0; 328 }
293 329
330 return err;
294} 331}
295 332
296static int pca9532_remove(struct i2c_client *client) 333static int pca9532_remove(struct i2c_client *client)
@@ -303,11 +340,13 @@ static int pca9532_remove(struct i2c_client *client)
303 break; 340 break;
304 case PCA9532_TYPE_LED: 341 case PCA9532_TYPE_LED:
305 led_classdev_unregister(&data->leds[i].ldev); 342 led_classdev_unregister(&data->leds[i].ldev);
343 cancel_work_sync(&data->leds[i].work);
306 break; 344 break;
307 case PCA9532_TYPE_N2100_BEEP: 345 case PCA9532_TYPE_N2100_BEEP:
308 if (data->idev != NULL) { 346 if (data->idev != NULL) {
309 input_unregister_device(data->idev); 347 input_unregister_device(data->idev);
310 input_free_device(data->idev); 348 input_free_device(data->idev);
349 cancel_work_sync(&data->work);
311 data->idev = NULL; 350 data->idev = NULL;
312 } 351 }
313 break; 352 break;
diff --git a/drivers/leds/leds-s3c24xx.c b/drivers/leds/leds-s3c24xx.c
index 25a07f2643ad..4d81131542ae 100644
--- a/drivers/leds/leds-s3c24xx.c
+++ b/drivers/leds/leds-s3c24xx.c
@@ -82,6 +82,7 @@ static int s3c24xx_led_probe(struct platform_device *dev)
82 led->cdev.brightness_set = s3c24xx_led_set; 82 led->cdev.brightness_set = s3c24xx_led_set;
83 led->cdev.default_trigger = pdata->def_trigger; 83 led->cdev.default_trigger = pdata->def_trigger;
84 led->cdev.name = pdata->name; 84 led->cdev.name = pdata->name;
85 led->cdev.flags |= LED_CORE_SUSPENDRESUME;
85 86
86 led->pdata = pdata; 87 led->pdata = pdata;
87 88
@@ -111,33 +112,9 @@ static int s3c24xx_led_probe(struct platform_device *dev)
111 return ret; 112 return ret;
112} 113}
113 114
114
115#ifdef CONFIG_PM
116static int s3c24xx_led_suspend(struct platform_device *dev, pm_message_t state)
117{
118 struct s3c24xx_gpio_led *led = pdev_to_gpio(dev);
119
120 led_classdev_suspend(&led->cdev);
121 return 0;
122}
123
124static int s3c24xx_led_resume(struct platform_device *dev)
125{
126 struct s3c24xx_gpio_led *led = pdev_to_gpio(dev);
127
128 led_classdev_resume(&led->cdev);
129 return 0;
130}
131#else
132#define s3c24xx_led_suspend NULL
133#define s3c24xx_led_resume NULL
134#endif
135
136static struct platform_driver s3c24xx_led_driver = { 115static struct platform_driver s3c24xx_led_driver = {
137 .probe = s3c24xx_led_probe, 116 .probe = s3c24xx_led_probe,
138 .remove = s3c24xx_led_remove, 117 .remove = s3c24xx_led_remove,
139 .suspend = s3c24xx_led_suspend,
140 .resume = s3c24xx_led_resume,
141 .driver = { 118 .driver = {
142 .name = "s3c24xx_led", 119 .name = "s3c24xx_led",
143 .owner = THIS_MODULE, 120 .owner = THIS_MODULE,
diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c
new file mode 100644
index 000000000000..38c6bcb07e6c
--- /dev/null
+++ b/drivers/leds/leds-wm8350.c
@@ -0,0 +1,311 @@
1/*
2 * LED driver for WM8350 driven LEDS.
3 *
4 * Copyright(C) 2007, 2008 Wolfson Microelectronics PLC.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 */
11
12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/platform_device.h>
15#include <linux/leds.h>
16#include <linux/err.h>
17#include <linux/mfd/wm8350/pmic.h>
18#include <linux/regulator/consumer.h>
19
20/* Microamps */
21static const int isink_cur[] = {
22 4,
23 5,
24 6,
25 7,
26 8,
27 10,
28 11,
29 14,
30 16,
31 19,
32 23,
33 27,
34 32,
35 39,
36 46,
37 54,
38 65,
39 77,
40 92,
41 109,
42 130,
43 154,
44 183,
45 218,
46 259,
47 308,
48 367,
49 436,
50 518,
51 616,
52 733,
53 872,
54 1037,
55 1233,
56 1466,
57 1744,
58 2073,
59 2466,
60 2933,
61 3487,
62 4147,
63 4932,
64 5865,
65 6975,
66 8294,
67 9864,
68 11730,
69 13949,
70 16589,
71 19728,
72 23460,
73 27899,
74 33178,
75 39455,
76 46920,
77 55798,
78 66355,
79 78910,
80 93840,
81 111596,
82 132710,
83 157820,
84 187681,
85 223191
86};
87
88#define to_wm8350_led(led_cdev) \
89 container_of(led_cdev, struct wm8350_led, cdev)
90
91static void wm8350_led_enable(struct wm8350_led *led)
92{
93 int ret;
94
95 if (led->enabled)
96 return;
97
98 ret = regulator_enable(led->isink);
99 if (ret != 0) {
100 dev_err(led->cdev.dev, "Failed to enable ISINK: %d\n", ret);
101 return;
102 }
103
104 ret = regulator_enable(led->dcdc);
105 if (ret != 0) {
106 dev_err(led->cdev.dev, "Failed to enable DCDC: %d\n", ret);
107 regulator_disable(led->isink);
108 return;
109 }
110
111 led->enabled = 1;
112}
113
114static void wm8350_led_disable(struct wm8350_led *led)
115{
116 int ret;
117
118 if (!led->enabled)
119 return;
120
121 ret = regulator_disable(led->dcdc);
122 if (ret != 0) {
123 dev_err(led->cdev.dev, "Failed to disable DCDC: %d\n", ret);
124 return;
125 }
126
127 ret = regulator_disable(led->isink);
128 if (ret != 0) {
129 dev_err(led->cdev.dev, "Failed to disable ISINK: %d\n", ret);
130 regulator_enable(led->dcdc);
131 return;
132 }
133
134 led->enabled = 0;
135}
136
137static void led_work(struct work_struct *work)
138{
139 struct wm8350_led *led = container_of(work, struct wm8350_led, work);
140 int ret;
141 int uA;
142 unsigned long flags;
143
144 mutex_lock(&led->mutex);
145
146 spin_lock_irqsave(&led->value_lock, flags);
147
148 if (led->value == LED_OFF) {
149 spin_unlock_irqrestore(&led->value_lock, flags);
150 wm8350_led_disable(led);
151 goto out;
152 }
153
154 /* This scales linearly into the index of valid current
155 * settings which results in a linear scaling of perceived
156 * brightness due to the non-linear current settings provided
157 * by the hardware.
158 */
159 uA = (led->max_uA_index * led->value) / LED_FULL;
160 spin_unlock_irqrestore(&led->value_lock, flags);
161 BUG_ON(uA >= ARRAY_SIZE(isink_cur));
162
163 ret = regulator_set_current_limit(led->isink, isink_cur[uA],
164 isink_cur[uA]);
165 if (ret != 0)
166 dev_err(led->cdev.dev, "Failed to set %duA: %d\n",
167 isink_cur[uA], ret);
168
169 wm8350_led_enable(led);
170
171out:
172 mutex_unlock(&led->mutex);
173}
174
175static void wm8350_led_set(struct led_classdev *led_cdev,
176 enum led_brightness value)
177{
178 struct wm8350_led *led = to_wm8350_led(led_cdev);
179 unsigned long flags;
180
181 spin_lock_irqsave(&led->value_lock, flags);
182 led->value = value;
183 schedule_work(&led->work);
184 spin_unlock_irqrestore(&led->value_lock, flags);
185}
186
187static void wm8350_led_shutdown(struct platform_device *pdev)
188{
189 struct wm8350_led *led = platform_get_drvdata(pdev);
190
191 mutex_lock(&led->mutex);
192 led->value = LED_OFF;
193 wm8350_led_disable(led);
194 mutex_unlock(&led->mutex);
195}
196
197static int wm8350_led_probe(struct platform_device *pdev)
198{
199 struct regulator *isink, *dcdc;
200 struct wm8350_led *led;
201 struct wm8350_led_platform_data *pdata = pdev->dev.platform_data;
202 int ret, i;
203
204 if (pdata == NULL) {
205 dev_err(&pdev->dev, "no platform data\n");
206 return -ENODEV;
207 }
208
209 if (pdata->max_uA < isink_cur[0]) {
210 dev_err(&pdev->dev, "Invalid maximum current %duA\n",
211 pdata->max_uA);
212 return -EINVAL;
213 }
214
215 isink = regulator_get(&pdev->dev, "led_isink");
216 if (IS_ERR(isink)) {
217 printk(KERN_ERR "%s: cant get ISINK\n", __func__);
218 return PTR_ERR(isink);
219 }
220
221 dcdc = regulator_get(&pdev->dev, "led_vcc");
222 if (IS_ERR(dcdc)) {
223 printk(KERN_ERR "%s: cant get DCDC\n", __func__);
224 ret = PTR_ERR(dcdc);
225 goto err_isink;
226 }
227
228 led = kzalloc(sizeof(*led), GFP_KERNEL);
229 if (led == NULL) {
230 ret = -ENOMEM;
231 goto err_dcdc;
232 }
233
234 led->cdev.brightness_set = wm8350_led_set;
235 led->cdev.default_trigger = pdata->default_trigger;
236 led->cdev.name = pdata->name;
237 led->cdev.flags |= LED_CORE_SUSPENDRESUME;
238 led->enabled = regulator_is_enabled(isink);
239 led->isink = isink;
240 led->dcdc = dcdc;
241
242 for (i = 0; i < ARRAY_SIZE(isink_cur) - 1; i++)
243 if (isink_cur[i] >= pdata->max_uA)
244 break;
245 led->max_uA_index = i;
246 if (pdata->max_uA != isink_cur[i])
247 dev_warn(&pdev->dev,
248 "Maximum current %duA is not directly supported,"
249 " check platform data\n",
250 pdata->max_uA);
251
252 spin_lock_init(&led->value_lock);
253 mutex_init(&led->mutex);
254 INIT_WORK(&led->work, led_work);
255 led->value = LED_OFF;
256 platform_set_drvdata(pdev, led);
257
258 ret = led_classdev_register(&pdev->dev, &led->cdev);
259 if (ret < 0)
260 goto err_led;
261
262 return 0;
263
264 err_led:
265 kfree(led);
266 err_dcdc:
267 regulator_put(dcdc);
268 err_isink:
269 regulator_put(isink);
270 return ret;
271}
272
273static int wm8350_led_remove(struct platform_device *pdev)
274{
275 struct wm8350_led *led = platform_get_drvdata(pdev);
276
277 led_classdev_unregister(&led->cdev);
278 flush_scheduled_work();
279 wm8350_led_disable(led);
280 regulator_put(led->dcdc);
281 regulator_put(led->isink);
282 kfree(led);
283 return 0;
284}
285
286static struct platform_driver wm8350_led_driver = {
287 .driver = {
288 .name = "wm8350-led",
289 .owner = THIS_MODULE,
290 },
291 .probe = wm8350_led_probe,
292 .remove = wm8350_led_remove,
293 .shutdown = wm8350_led_shutdown,
294};
295
296static int __devinit wm8350_led_init(void)
297{
298 return platform_driver_register(&wm8350_led_driver);
299}
300module_init(wm8350_led_init);
301
302static void wm8350_led_exit(void)
303{
304 platform_driver_unregister(&wm8350_led_driver);
305}
306module_exit(wm8350_led_exit);
307
308MODULE_AUTHOR("Mark Brown");
309MODULE_DESCRIPTION("WM8350 LED driver");
310MODULE_LICENSE("GPL");
311MODULE_ALIAS("platform:wm8350-led");
diff --git a/drivers/leds/leds-wrap.c b/drivers/leds/leds-wrap.c
index 2f3aa87f2a1f..2982c86ac4cf 100644
--- a/drivers/leds/leds-wrap.c
+++ b/drivers/leds/leds-wrap.c
@@ -56,40 +56,21 @@ static struct led_classdev wrap_power_led = {
56 .name = "wrap::power", 56 .name = "wrap::power",
57 .brightness_set = wrap_power_led_set, 57 .brightness_set = wrap_power_led_set,
58 .default_trigger = "default-on", 58 .default_trigger = "default-on",
59 .flags = LED_CORE_SUSPENDRESUME,
59}; 60};
60 61
61static struct led_classdev wrap_error_led = { 62static struct led_classdev wrap_error_led = {
62 .name = "wrap::error", 63 .name = "wrap::error",
63 .brightness_set = wrap_error_led_set, 64 .brightness_set = wrap_error_led_set,
65 .flags = LED_CORE_SUSPENDRESUME,
64}; 66};
65 67
66static struct led_classdev wrap_extra_led = { 68static struct led_classdev wrap_extra_led = {
67 .name = "wrap::extra", 69 .name = "wrap::extra",
68 .brightness_set = wrap_extra_led_set, 70 .brightness_set = wrap_extra_led_set,
71 .flags = LED_CORE_SUSPENDRESUME,
69}; 72};
70 73
71#ifdef CONFIG_PM
72static int wrap_led_suspend(struct platform_device *dev,
73 pm_message_t state)
74{
75 led_classdev_suspend(&wrap_power_led);
76 led_classdev_suspend(&wrap_error_led);
77 led_classdev_suspend(&wrap_extra_led);
78 return 0;
79}
80
81static int wrap_led_resume(struct platform_device *dev)
82{
83 led_classdev_resume(&wrap_power_led);
84 led_classdev_resume(&wrap_error_led);
85 led_classdev_resume(&wrap_extra_led);
86 return 0;
87}
88#else
89#define wrap_led_suspend NULL
90#define wrap_led_resume NULL
91#endif
92
93static int wrap_led_probe(struct platform_device *pdev) 74static int wrap_led_probe(struct platform_device *pdev)
94{ 75{
95 int ret; 76 int ret;
@@ -127,8 +108,6 @@ static int wrap_led_remove(struct platform_device *pdev)
127static struct platform_driver wrap_led_driver = { 108static struct platform_driver wrap_led_driver = {
128 .probe = wrap_led_probe, 109 .probe = wrap_led_probe,
129 .remove = wrap_led_remove, 110 .remove = wrap_led_remove,
130 .suspend = wrap_led_suspend,
131 .resume = wrap_led_resume,
132 .driver = { 111 .driver = {
133 .name = DRVNAME, 112 .name = DRVNAME,
134 .owner = THIS_MODULE, 113 .owner = THIS_MODULE,
diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c
index db681962d7bb..3d6531396dda 100644
--- a/drivers/leds/ledtrig-timer.c
+++ b/drivers/leds/ledtrig-timer.c
@@ -199,6 +199,7 @@ err_out:
199static void timer_trig_deactivate(struct led_classdev *led_cdev) 199static void timer_trig_deactivate(struct led_classdev *led_cdev)
200{ 200{
201 struct timer_trig_data *timer_data = led_cdev->trigger_data; 201 struct timer_trig_data *timer_data = led_cdev->trigger_data;
202 unsigned long on = 0, off = 0;
202 203
203 if (timer_data) { 204 if (timer_data) {
204 device_remove_file(led_cdev->dev, &dev_attr_delay_on); 205 device_remove_file(led_cdev->dev, &dev_attr_delay_on);
@@ -206,6 +207,10 @@ static void timer_trig_deactivate(struct led_classdev *led_cdev)
206 del_timer_sync(&timer_data->timer); 207 del_timer_sync(&timer_data->timer);
207 kfree(timer_data); 208 kfree(timer_data);
208 } 209 }
210
211 /* If there is hardware support for blinking, stop it */
212 if (led_cdev->blink_set)
213 led_cdev->blink_set(led_cdev, &on, &off);
209} 214}
210 215
211static struct led_trigger timer_led_trigger = { 216static struct led_trigger timer_led_trigger = {
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 3a273ccef3f2..f92595c8f165 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1453,6 +1453,9 @@ void wm8350_device_exit(struct wm8350 *wm8350)
1453{ 1453{
1454 int i; 1454 int i;
1455 1455
1456 for (i = 0; i < ARRAY_SIZE(wm8350->pmic.led); i++)
1457 platform_device_unregister(wm8350->pmic.led[i].pdev);
1458
1456 for (i = 0; i < ARRAY_SIZE(wm8350->pmic.pdev); i++) 1459 for (i = 0; i < ARRAY_SIZE(wm8350->pmic.pdev); i++)
1457 platform_device_unregister(wm8350->pmic.pdev[i]); 1460 platform_device_unregister(wm8350->pmic.pdev[i]);
1458 1461
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 3949a1c73451..419c378bd24b 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -120,7 +120,7 @@ config TIFM_CORE
120 cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD 120 cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD
121 Interface support (MMC_TIFM_SD)'. 121 Interface support (MMC_TIFM_SD)'.
122 122
123 To compile this driver as a module, choose M here: the module will 123 To compile this driver as a module, choose M here: the module will
124 be called tifm_core. 124 be called tifm_core.
125 125
126config TIFM_7XX1 126config TIFM_7XX1
@@ -133,100 +133,9 @@ config TIFM_7XX1
133 To make actual use of the device, you will have to select some 133 To make actual use of the device, you will have to select some
134 flash card format drivers, as outlined in the TIFM_CORE Help. 134 flash card format drivers, as outlined in the TIFM_CORE Help.
135 135
136 To compile this driver as a module, choose M here: the module will 136 To compile this driver as a module, choose M here: the module will
137 be called tifm_7xx1. 137 be called tifm_7xx1.
138 138
139config ACER_WMI
140 tristate "Acer WMI Laptop Extras (EXPERIMENTAL)"
141 depends on X86
142 depends on EXPERIMENTAL
143 depends on ACPI
144 depends on LEDS_CLASS
145 depends on NEW_LEDS
146 depends on BACKLIGHT_CLASS_DEVICE
147 depends on SERIO_I8042
148 depends on RFKILL
149 select ACPI_WMI
150 ---help---
151 This is a driver for newer Acer (and Wistron) laptops. It adds
152 wireless radio and bluetooth control, and on some laptops,
153 exposes the mail LED and LCD backlight.
154
155 For more information about this driver see
156 <file:Documentation/laptops/acer-wmi.txt>
157
158 If you have an ACPI-WMI compatible Acer/ Wistron laptop, say Y or M
159 here.
160
161config ASUS_LAPTOP
162 tristate "Asus Laptop Extras (EXPERIMENTAL)"
163 depends on X86
164 depends on ACPI
165 depends on EXPERIMENTAL && !ACPI_ASUS
166 depends on LEDS_CLASS
167 depends on NEW_LEDS
168 depends on BACKLIGHT_CLASS_DEVICE
169 ---help---
170 This is the new Linux driver for Asus laptops. It may also support some
171 MEDION, JVC or VICTOR laptops. It makes all the extra buttons generate
172 standard ACPI events that go through /proc/acpi/events. It also adds
173 support for video output switching, LCD backlight control, Bluetooth and
174 Wlan control, and most importantly, allows you to blink those fancy LEDs.
175
176 For more information and a userspace daemon for handling the extra
177 buttons see <http://acpi4asus.sf.net/>.
178
179 If you have an ACPI-compatible ASUS laptop, say Y or M here.
180
181config FUJITSU_LAPTOP
182 tristate "Fujitsu Laptop Extras"
183 depends on X86
184 depends on ACPI
185 depends on INPUT
186 depends on BACKLIGHT_CLASS_DEVICE
187 ---help---
188 This is a driver for laptops built by Fujitsu:
189
190 * P2xxx/P5xxx/S6xxx/S7xxx series Lifebooks
191 * Possibly other Fujitsu laptop models
192 * Tested with S6410 and S7020
193
194 It adds support for LCD brightness control and some hotkeys.
195
196 If you have a Fujitsu laptop, say Y or M here.
197
198config FUJITSU_LAPTOP_DEBUG
199 bool "Verbose debug mode for Fujitsu Laptop Extras"
200 depends on FUJITSU_LAPTOP
201 default n
202 ---help---
203 Enables extra debug output from the fujitsu extras driver, at the
204 expense of a slight increase in driver size.
205
206 If you are not sure, say N here.
207
208config TC1100_WMI
209 tristate "HP Compaq TC1100 Tablet WMI Extras (EXPERIMENTAL)"
210 depends on X86 && !X86_64
211 depends on EXPERIMENTAL
212 depends on ACPI
213 select ACPI_WMI
214 ---help---
215 This is a driver for the WMI extensions (wireless and bluetooth power
216 control) of the HP Compaq TC1100 tablet.
217
218config HP_WMI
219 tristate "HP WMI extras"
220 depends on ACPI_WMI
221 depends on INPUT
222 depends on RFKILL
223 help
224 Say Y here if you want to support WMI-based hotkeys on HP laptops and
225 to read data from WMI such as docking or ambient light sensor state.
226
227 To compile this driver as a module, choose M here: the module will
228 be called hp-wmi.
229
230config ICS932S401 139config ICS932S401
231 tristate "Integrated Circuits ICS932S401" 140 tristate "Integrated Circuits ICS932S401"
232 depends on I2C && EXPERIMENTAL 141 depends on I2C && EXPERIMENTAL
@@ -237,170 +146,6 @@ config ICS932S401
237 This driver can also be built as a module. If so, the module 146 This driver can also be built as a module. If so, the module
238 will be called ics932s401. 147 will be called ics932s401.
239 148
240config MSI_LAPTOP
241 tristate "MSI Laptop Extras"
242 depends on X86
243 depends on ACPI
244 depends on BACKLIGHT_CLASS_DEVICE
245 ---help---
246 This is a driver for laptops built by MSI (MICRO-STAR
247 INTERNATIONAL):
248
249 MSI MegaBook S270 (MS-1013)
250 Cytron/TCM/Medion/Tchibo MD96100/SAM2000
251
252 It adds support for Bluetooth, WLAN and LCD brightness control.
253
254 More information about this driver is available at
255 <http://0pointer.de/lennart/tchibo.html>.
256
257 If you have an MSI S270 laptop, say Y or M here.
258
259config PANASONIC_LAPTOP
260 tristate "Panasonic Laptop Extras"
261 depends on X86 && INPUT && ACPI
262 depends on BACKLIGHT_CLASS_DEVICE
263 ---help---
264 This driver adds support for access to backlight control and hotkeys
265 on Panasonic Let's Note laptops.
266
267 If you have a Panasonic Let's note laptop (such as the R1(N variant),
268 R2, R3, R5, T2, W2 and Y2 series), say Y.
269
270config COMPAL_LAPTOP
271 tristate "Compal Laptop Extras"
272 depends on X86
273 depends on ACPI
274 depends on BACKLIGHT_CLASS_DEVICE
275 ---help---
276 This is a driver for laptops built by Compal:
277
278 Compal FL90/IFL90
279 Compal FL91/IFL91
280 Compal FL92/JFL92
281 Compal FT00/IFT00
282
283 It adds support for Bluetooth, WLAN and LCD brightness control.
284
285 If you have an Compal FL9x/IFL9x/FT00 laptop, say Y or M here.
286
287config SONY_LAPTOP
288 tristate "Sony Laptop Extras"
289 depends on X86 && ACPI
290 select BACKLIGHT_CLASS_DEVICE
291 depends on INPUT
292 ---help---
293 This mini-driver drives the SNC and SPIC devices present in the ACPI
294 BIOS of the Sony Vaio laptops.
295
296 It gives access to some extra laptop functionalities like Bluetooth,
297 screen brightness control, Fn keys and allows powering on/off some
298 devices.
299
300 Read <file:Documentation/laptops/sony-laptop.txt> for more information.
301
302config SONYPI_COMPAT
303 bool "Sonypi compatibility"
304 depends on SONY_LAPTOP
305 ---help---
306 Build the sonypi driver compatibility code into the sony-laptop driver.
307
308config THINKPAD_ACPI
309 tristate "ThinkPad ACPI Laptop Extras"
310 depends on X86 && ACPI
311 select BACKLIGHT_LCD_SUPPORT
312 select BACKLIGHT_CLASS_DEVICE
313 select HWMON
314 select NVRAM
315 select INPUT
316 select NEW_LEDS
317 select LEDS_CLASS
318 select NET
319 select RFKILL
320 ---help---
321 This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
322 support for Fn-Fx key combinations, Bluetooth control, video
323 output switching, ThinkLight control, UltraBay eject and more.
324 For more information about this driver see
325 <file:Documentation/laptops/thinkpad-acpi.txt> and
326 <http://ibm-acpi.sf.net/> .
327
328 This driver was formerly known as ibm-acpi.
329
330 If you have an IBM or Lenovo ThinkPad laptop, say Y or M here.
331
332config THINKPAD_ACPI_DEBUG
333 bool "Verbose debug mode"
334 depends on THINKPAD_ACPI
335 default n
336 ---help---
337 Enables extra debugging information, at the expense of a slightly
338 increase in driver size.
339
340 If you are not sure, say N here.
341
342config THINKPAD_ACPI_DOCK
343 bool "Legacy Docking Station Support"
344 depends on THINKPAD_ACPI
345 depends on ACPI_DOCK=n
346 default n
347 ---help---
348 Allows the thinkpad_acpi driver to handle docking station events.
349 This support was made obsolete by the generic ACPI docking station
350 support (CONFIG_ACPI_DOCK). It will allow locking and removing the
351 laptop from the docking station, but will not properly connect PCI
352 devices.
353
354 If you are not sure, say N here.
355
356config THINKPAD_ACPI_BAY
357 bool "Legacy Removable Bay Support"
358 depends on THINKPAD_ACPI
359 default y
360 ---help---
361 Allows the thinkpad_acpi driver to handle removable bays. It will
362 electrically disable the device in the bay, and also generate
363 notifications when the bay lever is ejected or inserted.
364
365 If you are not sure, say Y here.
366
367config THINKPAD_ACPI_VIDEO
368 bool "Video output control support"
369 depends on THINKPAD_ACPI
370 default y
371 ---help---
372 Allows the thinkpad_acpi driver to provide an interface to control
373 the various video output ports.
374
375 This feature often won't work well, depending on ThinkPad model,
376 display state, video output devices in use, whether there is a X
377 server running, phase of the moon, and the current mood of
378 Schroedinger's cat. If you can use X.org's RandR to control
379 your ThinkPad's video output ports instead of this feature,
380 don't think twice: do it and say N here to save some memory.
381
382 If you are not sure, say Y here.
383
384config THINKPAD_ACPI_HOTKEY_POLL
385 bool "Support NVRAM polling for hot keys"
386 depends on THINKPAD_ACPI
387 default y
388 ---help---
389 Some thinkpad models benefit from NVRAM polling to detect a few of
390 the hot key press events. If you know your ThinkPad model does not
391 need to do NVRAM polling to support any of the hot keys you use,
392 unselecting this option will save about 1kB of memory.
393
394 ThinkPads T40 and newer, R52 and newer, and X31 and newer are
395 unlikely to need NVRAM polling in their latest BIOS versions.
396
397 NVRAM polling can detect at most the following keys: ThinkPad/Access
398 IBM, Zoom, Switch Display (fn+F7), ThinkLight, Volume up/down/mute,
399 Brightness up/down, Display Expand (fn+F8), Hibernate (fn+F12).
400
401 If you are not sure, say Y here. The driver enables polling only if
402 it is strictly necessary to do so.
403
404config ATMEL_SSC 149config ATMEL_SSC
405 tristate "Device driver for Atmel SSC peripheral" 150 tristate "Device driver for Atmel SSC peripheral"
406 depends on AVR32 || ARCH_AT91 151 depends on AVR32 || ARCH_AT91
@@ -413,31 +158,6 @@ config ATMEL_SSC
413 158
414 If unsure, say N. 159 If unsure, say N.
415 160
416config INTEL_MENLOW
417 tristate "Thermal Management driver for Intel menlow platform"
418 depends on ACPI_THERMAL
419 select THERMAL
420 depends on X86
421 ---help---
422 ACPI thermal management enhancement driver on
423 Intel Menlow platform.
424
425 If unsure, say N.
426
427config EEEPC_LAPTOP
428 tristate "Eee PC Hotkey Driver (EXPERIMENTAL)"
429 depends on X86
430 depends on ACPI
431 depends on BACKLIGHT_CLASS_DEVICE
432 depends on HWMON
433 depends on EXPERIMENTAL
434 depends on RFKILL
435 ---help---
436 This driver supports the Fn-Fx keys on Eee PC laptops.
437 It also adds the ability to switch camera/wlan on/off.
438
439 If you have an Eee PC laptop, say Y or M here.
440
441config ENCLOSURE_SERVICES 161config ENCLOSURE_SERVICES
442 tristate "Enclosure Services" 162 tristate "Enclosure Services"
443 default n 163 default n
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 5de863a0e395..9cf8ae6e4b39 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -1,33 +1,20 @@
1# 1#
2# Makefile for misc devices that really don't fit anywhere else. 2# Makefile for misc devices that really don't fit anywhere else.
3# 3#
4obj- := misc.o # Dummy rule to force built-in.o to be made
5 4
6obj-$(CONFIG_IBM_ASM) += ibmasm/ 5obj-$(CONFIG_IBM_ASM) += ibmasm/
7obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/ 6obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/
8obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o
9obj-$(CONFIG_EEEPC_LAPTOP) += eeepc-laptop.o
10obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o
11obj-$(CONFIG_COMPAL_LAPTOP) += compal-laptop.o
12obj-$(CONFIG_ACER_WMI) += acer-wmi.o
13obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o 7obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o
14obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o 8obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
15obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o 9obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
16obj-$(CONFIG_HP_WMI) += hp-wmi.o
17obj-$(CONFIG_ICS932S401) += ics932s401.o 10obj-$(CONFIG_ICS932S401) += ics932s401.o
18obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
19obj-$(CONFIG_LKDTM) += lkdtm.o 11obj-$(CONFIG_LKDTM) += lkdtm.o
20obj-$(CONFIG_TIFM_CORE) += tifm_core.o 12obj-$(CONFIG_TIFM_CORE) += tifm_core.o
21obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o 13obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o
22obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o 14obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o
23obj-$(CONFIG_PHANTOM) += phantom.o 15obj-$(CONFIG_PHANTOM) += phantom.o
24obj-$(CONFIG_SGI_IOC4) += ioc4.o 16obj-$(CONFIG_SGI_IOC4) += ioc4.o
25obj-$(CONFIG_SONY_LAPTOP) += sony-laptop.o
26obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o
27obj-$(CONFIG_FUJITSU_LAPTOP) += fujitsu-laptop.o
28obj-$(CONFIG_PANASONIC_LAPTOP) += panasonic-laptop.o
29obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o 17obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o
30obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o
31obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o 18obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
32obj-$(CONFIG_KGDB_TESTS) += kgdbts.o 19obj-$(CONFIG_KGDB_TESTS) += kgdbts.o
33obj-$(CONFIG_SGI_XP) += sgi-xp/ 20obj-$(CONFIG_SGI_XP) += sgi-xp/
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 1e97916914ad..76bfe16c09b1 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -55,7 +55,6 @@ enum atmel_mci_state {
55 55
56struct atmel_mci_dma { 56struct atmel_mci_dma {
57#ifdef CONFIG_MMC_ATMELMCI_DMA 57#ifdef CONFIG_MMC_ATMELMCI_DMA
58 struct dma_client client;
59 struct dma_chan *chan; 58 struct dma_chan *chan;
60 struct dma_async_tx_descriptor *data_desc; 59 struct dma_async_tx_descriptor *data_desc;
61#endif 60#endif
@@ -593,10 +592,8 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
593 592
594 /* If we don't have a channel, we can't do DMA */ 593 /* If we don't have a channel, we can't do DMA */
595 chan = host->dma.chan; 594 chan = host->dma.chan;
596 if (chan) { 595 if (chan)
597 dma_chan_get(chan);
598 host->data_chan = chan; 596 host->data_chan = chan;
599 }
600 597
601 if (!chan) 598 if (!chan)
602 return -ENODEV; 599 return -ENODEV;
@@ -1443,60 +1440,6 @@ static irqreturn_t atmci_detect_interrupt(int irq, void *dev_id)
1443 return IRQ_HANDLED; 1440 return IRQ_HANDLED;
1444} 1441}
1445 1442
1446#ifdef CONFIG_MMC_ATMELMCI_DMA
1447
1448static inline struct atmel_mci *
1449dma_client_to_atmel_mci(struct dma_client *client)
1450{
1451 return container_of(client, struct atmel_mci, dma.client);
1452}
1453
1454static enum dma_state_client atmci_dma_event(struct dma_client *client,
1455 struct dma_chan *chan, enum dma_state state)
1456{
1457 struct atmel_mci *host;
1458 enum dma_state_client ret = DMA_NAK;
1459
1460 host = dma_client_to_atmel_mci(client);
1461
1462 switch (state) {
1463 case DMA_RESOURCE_AVAILABLE:
1464 spin_lock_bh(&host->lock);
1465 if (!host->dma.chan) {
1466 host->dma.chan = chan;
1467 ret = DMA_ACK;
1468 }
1469 spin_unlock_bh(&host->lock);
1470
1471 if (ret == DMA_ACK)
1472 dev_info(&host->pdev->dev,
1473 "Using %s for DMA transfers\n",
1474 chan->dev.bus_id);
1475 break;
1476
1477 case DMA_RESOURCE_REMOVED:
1478 spin_lock_bh(&host->lock);
1479 if (host->dma.chan == chan) {
1480 host->dma.chan = NULL;
1481 ret = DMA_ACK;
1482 }
1483 spin_unlock_bh(&host->lock);
1484
1485 if (ret == DMA_ACK)
1486 dev_info(&host->pdev->dev,
1487 "Lost %s, falling back to PIO\n",
1488 chan->dev.bus_id);
1489 break;
1490
1491 default:
1492 break;
1493 }
1494
1495
1496 return ret;
1497}
1498#endif /* CONFIG_MMC_ATMELMCI_DMA */
1499
1500static int __init atmci_init_slot(struct atmel_mci *host, 1443static int __init atmci_init_slot(struct atmel_mci *host,
1501 struct mci_slot_pdata *slot_data, unsigned int id, 1444 struct mci_slot_pdata *slot_data, unsigned int id,
1502 u32 sdc_reg) 1445 u32 sdc_reg)
@@ -1600,6 +1543,18 @@ static void __exit atmci_cleanup_slot(struct atmel_mci_slot *slot,
1600 mmc_free_host(slot->mmc); 1543 mmc_free_host(slot->mmc);
1601} 1544}
1602 1545
1546#ifdef CONFIG_MMC_ATMELMCI_DMA
1547static bool filter(struct dma_chan *chan, void *slave)
1548{
1549 struct dw_dma_slave *dws = slave;
1550
1551 if (dws->dma_dev == chan->device->dev)
1552 return true;
1553 else
1554 return false;
1555}
1556#endif
1557
1603static int __init atmci_probe(struct platform_device *pdev) 1558static int __init atmci_probe(struct platform_device *pdev)
1604{ 1559{
1605 struct mci_platform_data *pdata; 1560 struct mci_platform_data *pdata;
@@ -1652,22 +1607,20 @@ static int __init atmci_probe(struct platform_device *pdev)
1652 goto err_request_irq; 1607 goto err_request_irq;
1653 1608
1654#ifdef CONFIG_MMC_ATMELMCI_DMA 1609#ifdef CONFIG_MMC_ATMELMCI_DMA
1655 if (pdata->dma_slave) { 1610 if (pdata->dma_slave.dma_dev) {
1656 struct dma_slave *slave = pdata->dma_slave; 1611 struct dw_dma_slave *dws = &pdata->dma_slave;
1612 dma_cap_mask_t mask;
1657 1613
1658 slave->tx_reg = regs->start + MCI_TDR; 1614 dws->tx_reg = regs->start + MCI_TDR;
1659 slave->rx_reg = regs->start + MCI_RDR; 1615 dws->rx_reg = regs->start + MCI_RDR;
1660 1616
1661 /* Try to grab a DMA channel */ 1617 /* Try to grab a DMA channel */
1662 host->dma.client.event_callback = atmci_dma_event; 1618 dma_cap_zero(mask);
1663 dma_cap_set(DMA_SLAVE, host->dma.client.cap_mask); 1619 dma_cap_set(DMA_SLAVE, mask);
1664 host->dma.client.slave = slave; 1620 host->dma.chan = dma_request_channel(mask, filter, dws);
1665
1666 dma_async_client_register(&host->dma.client);
1667 dma_async_client_chan_request(&host->dma.client);
1668 } else {
1669 dev_notice(&pdev->dev, "DMA not available, using PIO\n");
1670 } 1621 }
1622 if (!host->dma.chan)
1623 dev_notice(&pdev->dev, "DMA not available, using PIO\n");
1671#endif /* CONFIG_MMC_ATMELMCI_DMA */ 1624#endif /* CONFIG_MMC_ATMELMCI_DMA */
1672 1625
1673 platform_set_drvdata(pdev, host); 1626 platform_set_drvdata(pdev, host);
@@ -1699,8 +1652,8 @@ static int __init atmci_probe(struct platform_device *pdev)
1699 1652
1700err_init_slot: 1653err_init_slot:
1701#ifdef CONFIG_MMC_ATMELMCI_DMA 1654#ifdef CONFIG_MMC_ATMELMCI_DMA
1702 if (pdata->dma_slave) 1655 if (host->dma.chan)
1703 dma_async_client_unregister(&host->dma.client); 1656 dma_release_channel(host->dma.chan);
1704#endif 1657#endif
1705 free_irq(irq, host); 1658 free_irq(irq, host);
1706err_request_irq: 1659err_request_irq:
@@ -1731,8 +1684,8 @@ static int __exit atmci_remove(struct platform_device *pdev)
1731 clk_disable(host->mck); 1684 clk_disable(host->mck);
1732 1685
1733#ifdef CONFIG_MMC_ATMELMCI_DMA 1686#ifdef CONFIG_MMC_ATMELMCI_DMA
1734 if (host->dma.client.slave) 1687 if (host->dma.chan)
1735 dma_async_client_unregister(&host->dma.client); 1688 dma_release_channel(host->dma.chan);
1736#endif 1689#endif
1737 1690
1738 free_irq(platform_get_irq(pdev, 0), host); 1691 free_irq(platform_get_irq(pdev, 0), host);
@@ -1761,7 +1714,7 @@ static void __exit atmci_exit(void)
1761 platform_driver_unregister(&atmci_driver); 1714 platform_driver_unregister(&atmci_driver);
1762} 1715}
1763 1716
1764module_init(atmci_init); 1717late_initcall(atmci_init); /* try to load after dma driver when built-in */
1765module_exit(atmci_exit); 1718module_exit(atmci_exit);
1766 1719
1767MODULE_DESCRIPTION("Atmel Multimedia Card Interface driver"); 1720MODULE_DESCRIPTION("Atmel Multimedia Card Interface driver");
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index a90d50c2c3e5..7d04fb9ddcaa 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -45,6 +45,14 @@ config MTD_PARTITIONS
45 devices. Partitioning on NFTL 'devices' is a different - that's the 45 devices. Partitioning on NFTL 'devices' is a different - that's the
46 'normal' form of partitioning used on a block device. 46 'normal' form of partitioning used on a block device.
47 47
48config MTD_TESTS
49 tristate "MTD tests support"
50 depends on m
51 help
52 This option includes various MTD tests into compilation. The tests
53 should normally be compiled as kernel modules. The modules perform
54 various checks and verifications when loaded.
55
48config MTD_REDBOOT_PARTS 56config MTD_REDBOOT_PARTS
49 tristate "RedBoot partition table parsing" 57 tristate "RedBoot partition table parsing"
50 depends on MTD_PARTITIONS 58 depends on MTD_PARTITIONS
@@ -316,6 +324,8 @@ source "drivers/mtd/nand/Kconfig"
316 324
317source "drivers/mtd/onenand/Kconfig" 325source "drivers/mtd/onenand/Kconfig"
318 326
327source "drivers/mtd/lpddr/Kconfig"
328
319source "drivers/mtd/ubi/Kconfig" 329source "drivers/mtd/ubi/Kconfig"
320 330
321endif # MTD 331endif # MTD
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 4b77335715f0..4521b1ecce45 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -29,6 +29,6 @@ obj-$(CONFIG_MTD_OOPS) += mtdoops.o
29nftl-objs := nftlcore.o nftlmount.o 29nftl-objs := nftlcore.o nftlmount.o
30inftl-objs := inftlcore.o inftlmount.o 30inftl-objs := inftlcore.o inftlmount.o
31 31
32obj-y += chips/ maps/ devices/ nand/ onenand/ 32obj-y += chips/ lpddr/ maps/ devices/ nand/ onenand/ tests/
33 33
34obj-$(CONFIG_MTD_UBI) += ubi/ 34obj-$(CONFIG_MTD_UBI) += ubi/
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index c93a8be5d5f1..f5ab6fa1057b 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -58,8 +58,8 @@ static int cfi_intelext_write_buffers(struct mtd_info *, loff_t, size_t, size_t
58static int cfi_intelext_writev(struct mtd_info *, const struct kvec *, unsigned long, loff_t, size_t *); 58static int cfi_intelext_writev(struct mtd_info *, const struct kvec *, unsigned long, loff_t, size_t *);
59static int cfi_intelext_erase_varsize(struct mtd_info *, struct erase_info *); 59static int cfi_intelext_erase_varsize(struct mtd_info *, struct erase_info *);
60static void cfi_intelext_sync (struct mtd_info *); 60static void cfi_intelext_sync (struct mtd_info *);
61static int cfi_intelext_lock(struct mtd_info *mtd, loff_t ofs, size_t len); 61static int cfi_intelext_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
62static int cfi_intelext_unlock(struct mtd_info *mtd, loff_t ofs, size_t len); 62static int cfi_intelext_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
63#ifdef CONFIG_MTD_OTP 63#ifdef CONFIG_MTD_OTP
64static int cfi_intelext_read_fact_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); 64static int cfi_intelext_read_fact_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
65static int cfi_intelext_read_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); 65static int cfi_intelext_read_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
@@ -558,8 +558,8 @@ static struct mtd_info *cfi_intelext_setup(struct mtd_info *mtd)
558 } 558 }
559 559
560 for (i=0; i<mtd->numeraseregions;i++){ 560 for (i=0; i<mtd->numeraseregions;i++){
561 printk(KERN_DEBUG "erase region %d: offset=0x%x,size=0x%x,blocks=%d\n", 561 printk(KERN_DEBUG "erase region %d: offset=0x%llx,size=0x%x,blocks=%d\n",
562 i,mtd->eraseregions[i].offset, 562 i,(unsigned long long)mtd->eraseregions[i].offset,
563 mtd->eraseregions[i].erasesize, 563 mtd->eraseregions[i].erasesize,
564 mtd->eraseregions[i].numblocks); 564 mtd->eraseregions[i].numblocks);
565 } 565 }
@@ -2058,7 +2058,7 @@ out: put_chip(map, chip, adr);
2058 return ret; 2058 return ret;
2059} 2059}
2060 2060
2061static int cfi_intelext_lock(struct mtd_info *mtd, loff_t ofs, size_t len) 2061static int cfi_intelext_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
2062{ 2062{
2063 int ret; 2063 int ret;
2064 2064
@@ -2082,7 +2082,7 @@ static int cfi_intelext_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
2082 return ret; 2082 return ret;
2083} 2083}
2084 2084
2085static int cfi_intelext_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) 2085static int cfi_intelext_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
2086{ 2086{
2087 int ret; 2087 int ret;
2088 2088
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index d74ec46aa032..94bb61e19047 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -71,8 +71,8 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
71static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr); 71static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr);
72#include "fwh_lock.h" 72#include "fwh_lock.h"
73 73
74static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, size_t len); 74static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
75static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, size_t len); 75static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
76 76
77static struct mtd_chip_driver cfi_amdstd_chipdrv = { 77static struct mtd_chip_driver cfi_amdstd_chipdrv = {
78 .probe = NULL, /* Not usable directly */ 78 .probe = NULL, /* Not usable directly */
@@ -322,6 +322,14 @@ static struct cfi_fixup fixup_table[] = {
322}; 322};
323 323
324 324
325static void cfi_fixup_major_minor(struct cfi_private *cfi,
326 struct cfi_pri_amdstd *extp)
327{
328 if (cfi->mfr == CFI_MFR_SAMSUNG && cfi->id == 0x257e &&
329 extp->MajorVersion == '0')
330 extp->MajorVersion = '1';
331}
332
325struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) 333struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
326{ 334{
327 struct cfi_private *cfi = map->fldrv_priv; 335 struct cfi_private *cfi = map->fldrv_priv;
@@ -363,6 +371,8 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
363 return NULL; 371 return NULL;
364 } 372 }
365 373
374 cfi_fixup_major_minor(cfi, extp);
375
366 if (extp->MajorVersion != '1' || 376 if (extp->MajorVersion != '1' ||
367 (extp->MinorVersion < '0' || extp->MinorVersion > '4')) { 377 (extp->MinorVersion < '0' || extp->MinorVersion > '4')) {
368 printk(KERN_ERR " Unknown Amd/Fujitsu Extended Query " 378 printk(KERN_ERR " Unknown Amd/Fujitsu Extended Query "
@@ -1774,12 +1784,12 @@ out_unlock:
1774 return ret; 1784 return ret;
1775} 1785}
1776 1786
1777static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, size_t len) 1787static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
1778{ 1788{
1779 return cfi_varsize_frob(mtd, do_atmel_lock, ofs, len, NULL); 1789 return cfi_varsize_frob(mtd, do_atmel_lock, ofs, len, NULL);
1780} 1790}
1781 1791
1782static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) 1792static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
1783{ 1793{
1784 return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL); 1794 return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL);
1785} 1795}
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index d4714dd9f7ab..6c740f346f91 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -42,8 +42,8 @@ static int cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
42 unsigned long count, loff_t to, size_t *retlen); 42 unsigned long count, loff_t to, size_t *retlen);
43static int cfi_staa_erase_varsize(struct mtd_info *, struct erase_info *); 43static int cfi_staa_erase_varsize(struct mtd_info *, struct erase_info *);
44static void cfi_staa_sync (struct mtd_info *); 44static void cfi_staa_sync (struct mtd_info *);
45static int cfi_staa_lock(struct mtd_info *mtd, loff_t ofs, size_t len); 45static int cfi_staa_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
46static int cfi_staa_unlock(struct mtd_info *mtd, loff_t ofs, size_t len); 46static int cfi_staa_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
47static int cfi_staa_suspend (struct mtd_info *); 47static int cfi_staa_suspend (struct mtd_info *);
48static void cfi_staa_resume (struct mtd_info *); 48static void cfi_staa_resume (struct mtd_info *);
49 49
@@ -221,8 +221,8 @@ static struct mtd_info *cfi_staa_setup(struct map_info *map)
221 } 221 }
222 222
223 for (i=0; i<mtd->numeraseregions;i++){ 223 for (i=0; i<mtd->numeraseregions;i++){
224 printk(KERN_DEBUG "%d: offset=0x%x,size=0x%x,blocks=%d\n", 224 printk(KERN_DEBUG "%d: offset=0x%llx,size=0x%x,blocks=%d\n",
225 i,mtd->eraseregions[i].offset, 225 i, (unsigned long long)mtd->eraseregions[i].offset,
226 mtd->eraseregions[i].erasesize, 226 mtd->eraseregions[i].erasesize,
227 mtd->eraseregions[i].numblocks); 227 mtd->eraseregions[i].numblocks);
228 } 228 }
@@ -964,7 +964,7 @@ static int cfi_staa_erase_varsize(struct mtd_info *mtd,
964 adr += regions[i].erasesize; 964 adr += regions[i].erasesize;
965 len -= regions[i].erasesize; 965 len -= regions[i].erasesize;
966 966
967 if (adr % (1<< cfi->chipshift) == ((regions[i].offset + (regions[i].erasesize * regions[i].numblocks)) %( 1<< cfi->chipshift))) 967 if (adr % (1<< cfi->chipshift) == (((unsigned long)regions[i].offset + (regions[i].erasesize * regions[i].numblocks)) %( 1<< cfi->chipshift)))
968 i++; 968 i++;
969 969
970 if (adr >> cfi->chipshift) { 970 if (adr >> cfi->chipshift) {
@@ -1135,7 +1135,7 @@ retry:
1135 spin_unlock_bh(chip->mutex); 1135 spin_unlock_bh(chip->mutex);
1136 return 0; 1136 return 0;
1137} 1137}
1138static int cfi_staa_lock(struct mtd_info *mtd, loff_t ofs, size_t len) 1138static int cfi_staa_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
1139{ 1139{
1140 struct map_info *map = mtd->priv; 1140 struct map_info *map = mtd->priv;
1141 struct cfi_private *cfi = map->fldrv_priv; 1141 struct cfi_private *cfi = map->fldrv_priv;
@@ -1284,7 +1284,7 @@ retry:
1284 spin_unlock_bh(chip->mutex); 1284 spin_unlock_bh(chip->mutex);
1285 return 0; 1285 return 0;
1286} 1286}
1287static int cfi_staa_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) 1287static int cfi_staa_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
1288{ 1288{
1289 struct map_info *map = mtd->priv; 1289 struct map_info *map = mtd->priv;
1290 struct cfi_private *cfi = map->fldrv_priv; 1290 struct cfi_private *cfi = map->fldrv_priv;
diff --git a/drivers/mtd/chips/fwh_lock.h b/drivers/mtd/chips/fwh_lock.h
index ab44f2b996f8..57e0e4e921f9 100644
--- a/drivers/mtd/chips/fwh_lock.h
+++ b/drivers/mtd/chips/fwh_lock.h
@@ -77,7 +77,7 @@ static int fwh_xxlock_oneblock(struct map_info *map, struct flchip *chip,
77} 77}
78 78
79 79
80static int fwh_lock_varsize(struct mtd_info *mtd, loff_t ofs, size_t len) 80static int fwh_lock_varsize(struct mtd_info *mtd, loff_t ofs, uint64_t len)
81{ 81{
82 int ret; 82 int ret;
83 83
@@ -88,7 +88,7 @@ static int fwh_lock_varsize(struct mtd_info *mtd, loff_t ofs, size_t len)
88} 88}
89 89
90 90
91static int fwh_unlock_varsize(struct mtd_info *mtd, loff_t ofs, size_t len) 91static int fwh_unlock_varsize(struct mtd_info *mtd, loff_t ofs, uint64_t len)
92{ 92{
93 int ret; 93 int ret;
94 94
diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c
index f4bda4cee495..578de1c67bfe 100644
--- a/drivers/mtd/devices/lart.c
+++ b/drivers/mtd/devices/lart.c
@@ -619,7 +619,7 @@ static struct mtd_partition lart_partitions[] = {
619}; 619};
620#endif 620#endif
621 621
622int __init lart_flash_init (void) 622static int __init lart_flash_init (void)
623{ 623{
624 int result; 624 int result;
625 memset (&mtd,0,sizeof (mtd)); 625 memset (&mtd,0,sizeof (mtd));
@@ -690,7 +690,7 @@ int __init lart_flash_init (void)
690 return (result); 690 return (result);
691} 691}
692 692
693void __exit lart_flash_exit (void) 693static void __exit lart_flash_exit (void)
694{ 694{
695#ifndef HAVE_PARTITIONS 695#ifndef HAVE_PARTITIONS
696 del_mtd_device (&mtd); 696 del_mtd_device (&mtd);
@@ -705,5 +705,3 @@ module_exit (lart_flash_exit);
705MODULE_LICENSE("GPL"); 705MODULE_LICENSE("GPL");
706MODULE_AUTHOR("Abraham vd Merwe <abraham@2d3d.co.za>"); 706MODULE_AUTHOR("Abraham vd Merwe <abraham@2d3d.co.za>");
707MODULE_DESCRIPTION("MTD driver for Intel 28F160F3 on LART board"); 707MODULE_DESCRIPTION("MTD driver for Intel 28F160F3 on LART board");
708
709
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 5733f0643843..7c3fc766dcf1 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -20,6 +20,7 @@
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/math64.h>
23 24
24#include <linux/mtd/mtd.h> 25#include <linux/mtd/mtd.h>
25#include <linux/mtd/partitions.h> 26#include <linux/mtd/partitions.h>
@@ -169,9 +170,9 @@ static int wait_till_ready(struct m25p *flash)
169 */ 170 */
170static int erase_chip(struct m25p *flash) 171static int erase_chip(struct m25p *flash)
171{ 172{
172 DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB\n", 173 DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %lldKiB\n",
173 dev_name(&flash->spi->dev), __func__, 174 dev_name(&flash->spi->dev), __func__,
174 flash->mtd.size / 1024); 175 (long long)(flash->mtd.size >> 10));
175 176
176 /* Wait until finished previous write command. */ 177 /* Wait until finished previous write command. */
177 if (wait_till_ready(flash)) 178 if (wait_till_ready(flash))
@@ -232,18 +233,18 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
232{ 233{
233 struct m25p *flash = mtd_to_m25p(mtd); 234 struct m25p *flash = mtd_to_m25p(mtd);
234 u32 addr,len; 235 u32 addr,len;
236 uint32_t rem;
235 237
236 DEBUG(MTD_DEBUG_LEVEL2, "%s: %s %s 0x%08x, len %d\n", 238 DEBUG(MTD_DEBUG_LEVEL2, "%s: %s %s 0x%llx, len %lld\n",
237 dev_name(&flash->spi->dev), __func__, "at", 239 dev_name(&flash->spi->dev), __func__, "at",
238 (u32)instr->addr, instr->len); 240 (long long)instr->addr, (long long)instr->len);
239 241
240 /* sanity checks */ 242 /* sanity checks */
241 if (instr->addr + instr->len > flash->mtd.size) 243 if (instr->addr + instr->len > flash->mtd.size)
242 return -EINVAL; 244 return -EINVAL;
243 if ((instr->addr % mtd->erasesize) != 0 245 div_u64_rem(instr->len, mtd->erasesize, &rem);
244 || (instr->len % mtd->erasesize) != 0) { 246 if (rem)
245 return -EINVAL; 247 return -EINVAL;
246 }
247 248
248 addr = instr->addr; 249 addr = instr->addr;
249 len = instr->len; 250 len = instr->len;
@@ -677,24 +678,24 @@ static int __devinit m25p_probe(struct spi_device *spi)
677 flash->mtd.erasesize = info->sector_size; 678 flash->mtd.erasesize = info->sector_size;
678 } 679 }
679 680
680 dev_info(&spi->dev, "%s (%d Kbytes)\n", info->name, 681 dev_info(&spi->dev, "%s (%lld Kbytes)\n", info->name,
681 flash->mtd.size / 1024); 682 (long long)flash->mtd.size >> 10);
682 683
683 DEBUG(MTD_DEBUG_LEVEL2, 684 DEBUG(MTD_DEBUG_LEVEL2,
684 "mtd .name = %s, .size = 0x%.8x (%uMiB) " 685 "mtd .name = %s, .size = 0x%llx (%lldMiB) "
685 ".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n", 686 ".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n",
686 flash->mtd.name, 687 flash->mtd.name,
687 flash->mtd.size, flash->mtd.size / (1024*1024), 688 (long long)flash->mtd.size, (long long)(flash->mtd.size >> 20),
688 flash->mtd.erasesize, flash->mtd.erasesize / 1024, 689 flash->mtd.erasesize, flash->mtd.erasesize / 1024,
689 flash->mtd.numeraseregions); 690 flash->mtd.numeraseregions);
690 691
691 if (flash->mtd.numeraseregions) 692 if (flash->mtd.numeraseregions)
692 for (i = 0; i < flash->mtd.numeraseregions; i++) 693 for (i = 0; i < flash->mtd.numeraseregions; i++)
693 DEBUG(MTD_DEBUG_LEVEL2, 694 DEBUG(MTD_DEBUG_LEVEL2,
694 "mtd.eraseregions[%d] = { .offset = 0x%.8x, " 695 "mtd.eraseregions[%d] = { .offset = 0x%llx, "
695 ".erasesize = 0x%.8x (%uKiB), " 696 ".erasesize = 0x%.8x (%uKiB), "
696 ".numblocks = %d }\n", 697 ".numblocks = %d }\n",
697 i, flash->mtd.eraseregions[i].offset, 698 i, (long long)flash->mtd.eraseregions[i].offset,
698 flash->mtd.eraseregions[i].erasesize, 699 flash->mtd.eraseregions[i].erasesize,
699 flash->mtd.eraseregions[i].erasesize / 1024, 700 flash->mtd.eraseregions[i].erasesize / 1024,
700 flash->mtd.eraseregions[i].numblocks); 701 flash->mtd.eraseregions[i].numblocks);
@@ -722,12 +723,12 @@ static int __devinit m25p_probe(struct spi_device *spi)
722 if (nr_parts > 0) { 723 if (nr_parts > 0) {
723 for (i = 0; i < nr_parts; i++) { 724 for (i = 0; i < nr_parts; i++) {
724 DEBUG(MTD_DEBUG_LEVEL2, "partitions[%d] = " 725 DEBUG(MTD_DEBUG_LEVEL2, "partitions[%d] = "
725 "{.name = %s, .offset = 0x%.8x, " 726 "{.name = %s, .offset = 0x%llx, "
726 ".size = 0x%.8x (%uKiB) }\n", 727 ".size = 0x%llx (%lldKiB) }\n",
727 i, parts[i].name, 728 i, parts[i].name,
728 parts[i].offset, 729 (long long)parts[i].offset,
729 parts[i].size, 730 (long long)parts[i].size,
730 parts[i].size / 1024); 731 (long long)(parts[i].size >> 10));
731 } 732 }
732 flash->partitioned = 1; 733 flash->partitioned = 1;
733 return add_mtd_partitions(&flash->mtd, parts, nr_parts); 734 return add_mtd_partitions(&flash->mtd, parts, nr_parts);
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 65126cd668ff..d44f741ae229 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -16,6 +16,7 @@
16#include <linux/device.h> 16#include <linux/device.h>
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/err.h> 18#include <linux/err.h>
19#include <linux/math64.h>
19 20
20#include <linux/spi/spi.h> 21#include <linux/spi/spi.h>
21#include <linux/spi/flash.h> 22#include <linux/spi/flash.h>
@@ -152,15 +153,20 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
152 struct spi_message msg; 153 struct spi_message msg;
153 unsigned blocksize = priv->page_size << 3; 154 unsigned blocksize = priv->page_size << 3;
154 uint8_t *command; 155 uint8_t *command;
156 uint32_t rem;
155 157
156 DEBUG(MTD_DEBUG_LEVEL2, "%s: erase addr=0x%x len 0x%x\n", 158 DEBUG(MTD_DEBUG_LEVEL2, "%s: erase addr=0x%llx len 0x%llx\n",
157 dev_name(&spi->dev), 159 dev_name(&spi->dev), (long long)instr->addr,
158 instr->addr, instr->len); 160 (long long)instr->len);
159 161
160 /* Sanity checks */ 162 /* Sanity checks */
161 if ((instr->addr + instr->len) > mtd->size 163 if (instr->addr + instr->len > mtd->size)
162 || (instr->len % priv->page_size) != 0 164 return -EINVAL;
163 || (instr->addr % priv->page_size) != 0) 165 div_u64_rem(instr->len, priv->page_size, &rem);
166 if (rem)
167 return -EINVAL;
168 div_u64_rem(instr->addr, priv->page_size, &rem);
169 if (rem)
164 return -EINVAL; 170 return -EINVAL;
165 171
166 spi_message_init(&msg); 172 spi_message_init(&msg);
@@ -178,7 +184,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
178 /* Calculate flash page address; use block erase (for speed) if 184 /* Calculate flash page address; use block erase (for speed) if
179 * we're at a block boundary and need to erase the whole block. 185 * we're at a block boundary and need to erase the whole block.
180 */ 186 */
181 pageaddr = instr->addr / priv->page_size; 187 pageaddr = div_u64(instr->len, priv->page_size);
182 do_block = (pageaddr & 0x7) == 0 && instr->len >= blocksize; 188 do_block = (pageaddr & 0x7) == 0 && instr->len >= blocksize;
183 pageaddr = pageaddr << priv->page_offset; 189 pageaddr = pageaddr << priv->page_offset;
184 190
@@ -667,8 +673,8 @@ add_dataflash_otp(struct spi_device *spi, char *name,
667 if (revision >= 'c') 673 if (revision >= 'c')
668 otp_tag = otp_setup(device, revision); 674 otp_tag = otp_setup(device, revision);
669 675
670 dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes%s\n", 676 dev_info(&spi->dev, "%s (%lld KBytes) pagesize %d bytes%s\n",
671 name, DIV_ROUND_UP(device->size, 1024), 677 name, (long long)((device->size + 1023) >> 10),
672 pagesize, otp_tag); 678 pagesize, otp_tag);
673 dev_set_drvdata(&spi->dev, priv); 679 dev_set_drvdata(&spi->dev, priv);
674 680
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 9bf581c4f740..a790c062af1f 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -109,25 +109,25 @@ module_param(shuffle_freq, int, 0);
109/* Each memory region corresponds to a minor device */ 109/* Each memory region corresponds to a minor device */
110typedef struct partition_t { 110typedef struct partition_t {
111 struct mtd_blktrans_dev mbd; 111 struct mtd_blktrans_dev mbd;
112 u_int32_t state; 112 uint32_t state;
113 u_int32_t *VirtualBlockMap; 113 uint32_t *VirtualBlockMap;
114 u_int32_t *VirtualPageMap; 114 uint32_t *VirtualPageMap;
115 u_int32_t FreeTotal; 115 uint32_t FreeTotal;
116 struct eun_info_t { 116 struct eun_info_t {
117 u_int32_t Offset; 117 uint32_t Offset;
118 u_int32_t EraseCount; 118 uint32_t EraseCount;
119 u_int32_t Free; 119 uint32_t Free;
120 u_int32_t Deleted; 120 uint32_t Deleted;
121 } *EUNInfo; 121 } *EUNInfo;
122 struct xfer_info_t { 122 struct xfer_info_t {
123 u_int32_t Offset; 123 uint32_t Offset;
124 u_int32_t EraseCount; 124 uint32_t EraseCount;
125 u_int16_t state; 125 uint16_t state;
126 } *XferInfo; 126 } *XferInfo;
127 u_int16_t bam_index; 127 uint16_t bam_index;
128 u_int32_t *bam_cache; 128 uint32_t *bam_cache;
129 u_int16_t DataUnits; 129 uint16_t DataUnits;
130 u_int32_t BlocksPerUnit; 130 uint32_t BlocksPerUnit;
131 erase_unit_header_t header; 131 erase_unit_header_t header;
132} partition_t; 132} partition_t;
133 133
@@ -199,8 +199,8 @@ static int scan_header(partition_t *part)
199static int build_maps(partition_t *part) 199static int build_maps(partition_t *part)
200{ 200{
201 erase_unit_header_t header; 201 erase_unit_header_t header;
202 u_int16_t xvalid, xtrans, i; 202 uint16_t xvalid, xtrans, i;
203 u_int blocks, j; 203 unsigned blocks, j;
204 int hdr_ok, ret = -1; 204 int hdr_ok, ret = -1;
205 ssize_t retval; 205 ssize_t retval;
206 loff_t offset; 206 loff_t offset;
@@ -269,14 +269,14 @@ static int build_maps(partition_t *part)
269 269
270 /* Set up virtual page map */ 270 /* Set up virtual page map */
271 blocks = le32_to_cpu(header.FormattedSize) >> header.BlockSize; 271 blocks = le32_to_cpu(header.FormattedSize) >> header.BlockSize;
272 part->VirtualBlockMap = vmalloc(blocks * sizeof(u_int32_t)); 272 part->VirtualBlockMap = vmalloc(blocks * sizeof(uint32_t));
273 if (!part->VirtualBlockMap) 273 if (!part->VirtualBlockMap)
274 goto out_XferInfo; 274 goto out_XferInfo;
275 275
276 memset(part->VirtualBlockMap, 0xff, blocks * sizeof(u_int32_t)); 276 memset(part->VirtualBlockMap, 0xff, blocks * sizeof(uint32_t));
277 part->BlocksPerUnit = (1 << header.EraseUnitSize) >> header.BlockSize; 277 part->BlocksPerUnit = (1 << header.EraseUnitSize) >> header.BlockSize;
278 278
279 part->bam_cache = kmalloc(part->BlocksPerUnit * sizeof(u_int32_t), 279 part->bam_cache = kmalloc(part->BlocksPerUnit * sizeof(uint32_t),
280 GFP_KERNEL); 280 GFP_KERNEL);
281 if (!part->bam_cache) 281 if (!part->bam_cache)
282 goto out_VirtualBlockMap; 282 goto out_VirtualBlockMap;
@@ -290,7 +290,7 @@ static int build_maps(partition_t *part)
290 offset = part->EUNInfo[i].Offset + le32_to_cpu(header.BAMOffset); 290 offset = part->EUNInfo[i].Offset + le32_to_cpu(header.BAMOffset);
291 291
292 ret = part->mbd.mtd->read(part->mbd.mtd, offset, 292 ret = part->mbd.mtd->read(part->mbd.mtd, offset,
293 part->BlocksPerUnit * sizeof(u_int32_t), &retval, 293 part->BlocksPerUnit * sizeof(uint32_t), &retval,
294 (unsigned char *)part->bam_cache); 294 (unsigned char *)part->bam_cache);
295 295
296 if (ret) 296 if (ret)
@@ -332,7 +332,7 @@ out:
332======================================================================*/ 332======================================================================*/
333 333
334static int erase_xfer(partition_t *part, 334static int erase_xfer(partition_t *part,
335 u_int16_t xfernum) 335 uint16_t xfernum)
336{ 336{
337 int ret; 337 int ret;
338 struct xfer_info_t *xfer; 338 struct xfer_info_t *xfer;
@@ -408,7 +408,7 @@ static int prepare_xfer(partition_t *part, int i)
408 erase_unit_header_t header; 408 erase_unit_header_t header;
409 struct xfer_info_t *xfer; 409 struct xfer_info_t *xfer;
410 int nbam, ret; 410 int nbam, ret;
411 u_int32_t ctl; 411 uint32_t ctl;
412 ssize_t retlen; 412 ssize_t retlen;
413 loff_t offset; 413 loff_t offset;
414 414
@@ -430,15 +430,15 @@ static int prepare_xfer(partition_t *part, int i)
430 } 430 }
431 431
432 /* Write the BAM stub */ 432 /* Write the BAM stub */
433 nbam = (part->BlocksPerUnit * sizeof(u_int32_t) + 433 nbam = (part->BlocksPerUnit * sizeof(uint32_t) +
434 le32_to_cpu(part->header.BAMOffset) + SECTOR_SIZE - 1) / SECTOR_SIZE; 434 le32_to_cpu(part->header.BAMOffset) + SECTOR_SIZE - 1) / SECTOR_SIZE;
435 435
436 offset = xfer->Offset + le32_to_cpu(part->header.BAMOffset); 436 offset = xfer->Offset + le32_to_cpu(part->header.BAMOffset);
437 ctl = cpu_to_le32(BLOCK_CONTROL); 437 ctl = cpu_to_le32(BLOCK_CONTROL);
438 438
439 for (i = 0; i < nbam; i++, offset += sizeof(u_int32_t)) { 439 for (i = 0; i < nbam; i++, offset += sizeof(uint32_t)) {
440 440
441 ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(u_int32_t), 441 ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint32_t),
442 &retlen, (u_char *)&ctl); 442 &retlen, (u_char *)&ctl);
443 443
444 if (ret) 444 if (ret)
@@ -461,18 +461,18 @@ static int prepare_xfer(partition_t *part, int i)
461 461
462======================================================================*/ 462======================================================================*/
463 463
464static int copy_erase_unit(partition_t *part, u_int16_t srcunit, 464static int copy_erase_unit(partition_t *part, uint16_t srcunit,
465 u_int16_t xferunit) 465 uint16_t xferunit)
466{ 466{
467 u_char buf[SECTOR_SIZE]; 467 u_char buf[SECTOR_SIZE];
468 struct eun_info_t *eun; 468 struct eun_info_t *eun;
469 struct xfer_info_t *xfer; 469 struct xfer_info_t *xfer;
470 u_int32_t src, dest, free, i; 470 uint32_t src, dest, free, i;
471 u_int16_t unit; 471 uint16_t unit;
472 int ret; 472 int ret;
473 ssize_t retlen; 473 ssize_t retlen;
474 loff_t offset; 474 loff_t offset;
475 u_int16_t srcunitswap = cpu_to_le16(srcunit); 475 uint16_t srcunitswap = cpu_to_le16(srcunit);
476 476
477 eun = &part->EUNInfo[srcunit]; 477 eun = &part->EUNInfo[srcunit];
478 xfer = &part->XferInfo[xferunit]; 478 xfer = &part->XferInfo[xferunit];
@@ -486,7 +486,7 @@ static int copy_erase_unit(partition_t *part, u_int16_t srcunit,
486 offset = eun->Offset + le32_to_cpu(part->header.BAMOffset); 486 offset = eun->Offset + le32_to_cpu(part->header.BAMOffset);
487 487
488 ret = part->mbd.mtd->read(part->mbd.mtd, offset, 488 ret = part->mbd.mtd->read(part->mbd.mtd, offset,
489 part->BlocksPerUnit * sizeof(u_int32_t), 489 part->BlocksPerUnit * sizeof(uint32_t),
490 &retlen, (u_char *) (part->bam_cache)); 490 &retlen, (u_char *) (part->bam_cache));
491 491
492 /* mark the cache bad, in case we get an error later */ 492 /* mark the cache bad, in case we get an error later */
@@ -503,7 +503,7 @@ static int copy_erase_unit(partition_t *part, u_int16_t srcunit,
503 offset = xfer->Offset + 20; /* Bad! */ 503 offset = xfer->Offset + 20; /* Bad! */
504 unit = cpu_to_le16(0x7fff); 504 unit = cpu_to_le16(0x7fff);
505 505
506 ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(u_int16_t), 506 ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint16_t),
507 &retlen, (u_char *) &unit); 507 &retlen, (u_char *) &unit);
508 508
509 if (ret) { 509 if (ret) {
@@ -560,7 +560,7 @@ static int copy_erase_unit(partition_t *part, u_int16_t srcunit,
560 560
561 561
562 /* All clear? Then update the LogicalEUN again */ 562 /* All clear? Then update the LogicalEUN again */
563 ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset + 20, sizeof(u_int16_t), 563 ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset + 20, sizeof(uint16_t),
564 &retlen, (u_char *)&srcunitswap); 564 &retlen, (u_char *)&srcunitswap);
565 565
566 if (ret) { 566 if (ret) {
@@ -605,8 +605,8 @@ static int copy_erase_unit(partition_t *part, u_int16_t srcunit,
605 605
606static int reclaim_block(partition_t *part) 606static int reclaim_block(partition_t *part)
607{ 607{
608 u_int16_t i, eun, xfer; 608 uint16_t i, eun, xfer;
609 u_int32_t best; 609 uint32_t best;
610 int queued, ret; 610 int queued, ret;
611 611
612 DEBUG(0, "ftl_cs: reclaiming space...\n"); 612 DEBUG(0, "ftl_cs: reclaiming space...\n");
@@ -723,10 +723,10 @@ static void dump_lists(partition_t *part)
723} 723}
724#endif 724#endif
725 725
726static u_int32_t find_free(partition_t *part) 726static uint32_t find_free(partition_t *part)
727{ 727{
728 u_int16_t stop, eun; 728 uint16_t stop, eun;
729 u_int32_t blk; 729 uint32_t blk;
730 size_t retlen; 730 size_t retlen;
731 int ret; 731 int ret;
732 732
@@ -749,7 +749,7 @@ static u_int32_t find_free(partition_t *part)
749 749
750 ret = part->mbd.mtd->read(part->mbd.mtd, 750 ret = part->mbd.mtd->read(part->mbd.mtd,
751 part->EUNInfo[eun].Offset + le32_to_cpu(part->header.BAMOffset), 751 part->EUNInfo[eun].Offset + le32_to_cpu(part->header.BAMOffset),
752 part->BlocksPerUnit * sizeof(u_int32_t), 752 part->BlocksPerUnit * sizeof(uint32_t),
753 &retlen, (u_char *) (part->bam_cache)); 753 &retlen, (u_char *) (part->bam_cache));
754 754
755 if (ret) { 755 if (ret) {
@@ -786,7 +786,7 @@ static u_int32_t find_free(partition_t *part)
786static int ftl_read(partition_t *part, caddr_t buffer, 786static int ftl_read(partition_t *part, caddr_t buffer,
787 u_long sector, u_long nblocks) 787 u_long sector, u_long nblocks)
788{ 788{
789 u_int32_t log_addr, bsize; 789 uint32_t log_addr, bsize;
790 u_long i; 790 u_long i;
791 int ret; 791 int ret;
792 size_t offset, retlen; 792 size_t offset, retlen;
@@ -829,14 +829,14 @@ static int ftl_read(partition_t *part, caddr_t buffer,
829 829
830======================================================================*/ 830======================================================================*/
831 831
832static int set_bam_entry(partition_t *part, u_int32_t log_addr, 832static int set_bam_entry(partition_t *part, uint32_t log_addr,
833 u_int32_t virt_addr) 833 uint32_t virt_addr)
834{ 834{
835 u_int32_t bsize, blk, le_virt_addr; 835 uint32_t bsize, blk, le_virt_addr;
836#ifdef PSYCHO_DEBUG 836#ifdef PSYCHO_DEBUG
837 u_int32_t old_addr; 837 uint32_t old_addr;
838#endif 838#endif
839 u_int16_t eun; 839 uint16_t eun;
840 int ret; 840 int ret;
841 size_t retlen, offset; 841 size_t retlen, offset;
842 842
@@ -845,11 +845,11 @@ static int set_bam_entry(partition_t *part, u_int32_t log_addr,
845 bsize = 1 << part->header.EraseUnitSize; 845 bsize = 1 << part->header.EraseUnitSize;
846 eun = log_addr / bsize; 846 eun = log_addr / bsize;
847 blk = (log_addr % bsize) / SECTOR_SIZE; 847 blk = (log_addr % bsize) / SECTOR_SIZE;
848 offset = (part->EUNInfo[eun].Offset + blk * sizeof(u_int32_t) + 848 offset = (part->EUNInfo[eun].Offset + blk * sizeof(uint32_t) +
849 le32_to_cpu(part->header.BAMOffset)); 849 le32_to_cpu(part->header.BAMOffset));
850 850
851#ifdef PSYCHO_DEBUG 851#ifdef PSYCHO_DEBUG
852 ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(u_int32_t), 852 ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(uint32_t),
853 &retlen, (u_char *)&old_addr); 853 &retlen, (u_char *)&old_addr);
854 if (ret) { 854 if (ret) {
855 printk(KERN_WARNING"ftl: Error reading old_addr in set_bam_entry: %d\n",ret); 855 printk(KERN_WARNING"ftl: Error reading old_addr in set_bam_entry: %d\n",ret);
@@ -886,7 +886,7 @@ static int set_bam_entry(partition_t *part, u_int32_t log_addr,
886#endif 886#endif
887 part->bam_cache[blk] = le_virt_addr; 887 part->bam_cache[blk] = le_virt_addr;
888 } 888 }
889 ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(u_int32_t), 889 ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint32_t),
890 &retlen, (u_char *)&le_virt_addr); 890 &retlen, (u_char *)&le_virt_addr);
891 891
892 if (ret) { 892 if (ret) {
@@ -900,7 +900,7 @@ static int set_bam_entry(partition_t *part, u_int32_t log_addr,
900static int ftl_write(partition_t *part, caddr_t buffer, 900static int ftl_write(partition_t *part, caddr_t buffer,
901 u_long sector, u_long nblocks) 901 u_long sector, u_long nblocks)
902{ 902{
903 u_int32_t bsize, log_addr, virt_addr, old_addr, blk; 903 uint32_t bsize, log_addr, virt_addr, old_addr, blk;
904 u_long i; 904 u_long i;
905 int ret; 905 int ret;
906 size_t retlen, offset; 906 size_t retlen, offset;
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 50ce13887f63..73f05227dc8c 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -50,7 +50,7 @@ static void inftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
50 struct INFTLrecord *inftl; 50 struct INFTLrecord *inftl;
51 unsigned long temp; 51 unsigned long temp;
52 52
53 if (mtd->type != MTD_NANDFLASH) 53 if (mtd->type != MTD_NANDFLASH || mtd->size > UINT_MAX)
54 return; 54 return;
55 /* OK, this is moderately ugly. But probably safe. Alternatives? */ 55 /* OK, this is moderately ugly. But probably safe. Alternatives? */
56 if (memcmp(mtd->name, "DiskOnChip", 10)) 56 if (memcmp(mtd->name, "DiskOnChip", 10))
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 9113628ed1ef..f751dd97c549 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -63,7 +63,7 @@ static int find_boot_record(struct INFTLrecord *inftl)
63 * otherwise. 63 * otherwise.
64 */ 64 */
65 inftl->EraseSize = inftl->mbd.mtd->erasesize; 65 inftl->EraseSize = inftl->mbd.mtd->erasesize;
66 inftl->nb_blocks = inftl->mbd.mtd->size / inftl->EraseSize; 66 inftl->nb_blocks = (u32)inftl->mbd.mtd->size / inftl->EraseSize;
67 67
68 inftl->MediaUnit = BLOCK_NIL; 68 inftl->MediaUnit = BLOCK_NIL;
69 69
@@ -187,7 +187,7 @@ static int find_boot_record(struct INFTLrecord *inftl)
187 mh->BlockMultiplierBits); 187 mh->BlockMultiplierBits);
188 inftl->EraseSize = inftl->mbd.mtd->erasesize << 188 inftl->EraseSize = inftl->mbd.mtd->erasesize <<
189 mh->BlockMultiplierBits; 189 mh->BlockMultiplierBits;
190 inftl->nb_blocks = inftl->mbd.mtd->size / inftl->EraseSize; 190 inftl->nb_blocks = (u32)inftl->mbd.mtd->size / inftl->EraseSize;
191 block >>= mh->BlockMultiplierBits; 191 block >>= mh->BlockMultiplierBits;
192 } 192 }
193 193
diff --git a/drivers/mtd/lpddr/Kconfig b/drivers/mtd/lpddr/Kconfig
new file mode 100644
index 000000000000..acd4ea9b2278
--- /dev/null
+++ b/drivers/mtd/lpddr/Kconfig
@@ -0,0 +1,22 @@
1# drivers/mtd/chips/Kconfig
2
3menu "LPDDR flash memory drivers"
4 depends on MTD!=n
5
6config MTD_LPDDR
7 tristate "Support for LPDDR flash chips"
8 select MTD_QINFO_PROBE
9 help
10 This option enables support of LPDDR (Low power double data rate)
11 flash chips. Synonymous with Mobile-DDR. It is a new standard for
12 DDR memories, intended for battery-operated systems.
13
14config MTD_QINFO_PROBE
15 tristate "Detect flash chips by QINFO probe"
16 help
17 Device Information for LPDDR chips is offered through the Overlay
18 Window QINFO interface, permits software to be used for entire
19 families of devices. This serves similar purpose of CFI on legacy
20 Flash products
21endmenu
22
diff --git a/drivers/mtd/lpddr/Makefile b/drivers/mtd/lpddr/Makefile
new file mode 100644
index 000000000000..da48e46b5812
--- /dev/null
+++ b/drivers/mtd/lpddr/Makefile
@@ -0,0 +1,6 @@
1#
2# linux/drivers/mtd/lpddr/Makefile
3#
4
5obj-$(CONFIG_MTD_QINFO_PROBE) += qinfo_probe.o
6obj-$(CONFIG_MTD_LPDDR) += lpddr_cmds.o
diff --git a/drivers/mtd/lpddr/lpddr_cmds.c b/drivers/mtd/lpddr/lpddr_cmds.c
new file mode 100644
index 000000000000..e22ca49583e7
--- /dev/null
+++ b/drivers/mtd/lpddr/lpddr_cmds.c
@@ -0,0 +1,796 @@
1/*
2 * LPDDR flash memory device operations. This module provides read, write,
3 * erase, lock/unlock support for LPDDR flash memories
4 * (C) 2008 Korolev Alexey <akorolev@infradead.org>
5 * (C) 2008 Vasiliy Leonenko <vasiliy.leonenko@gmail.com>
6 * Many thanks to Roman Borisov for intial enabling
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 * 02110-1301, USA.
22 * TODO:
23 * Implement VPP management
24 * Implement XIP support
25 * Implement OTP support
26 */
27#include <linux/mtd/pfow.h>
28#include <linux/mtd/qinfo.h>
29
30static int lpddr_read(struct mtd_info *mtd, loff_t adr, size_t len,
31 size_t *retlen, u_char *buf);
32static int lpddr_write_buffers(struct mtd_info *mtd, loff_t to,
33 size_t len, size_t *retlen, const u_char *buf);
34static int lpddr_writev(struct mtd_info *mtd, const struct kvec *vecs,
35 unsigned long count, loff_t to, size_t *retlen);
36static int lpddr_erase(struct mtd_info *mtd, struct erase_info *instr);
37static int lpddr_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
38static int lpddr_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
39static int lpddr_point(struct mtd_info *mtd, loff_t adr, size_t len,
40 size_t *retlen, void **mtdbuf, resource_size_t *phys);
41static void lpddr_unpoint(struct mtd_info *mtd, loff_t adr, size_t len);
42static int get_chip(struct map_info *map, struct flchip *chip, int mode);
43static int chip_ready(struct map_info *map, struct flchip *chip, int mode);
44static void put_chip(struct map_info *map, struct flchip *chip);
45
46struct mtd_info *lpddr_cmdset(struct map_info *map)
47{
48 struct lpddr_private *lpddr = map->fldrv_priv;
49 struct flchip_shared *shared;
50 struct flchip *chip;
51 struct mtd_info *mtd;
52 int numchips;
53 int i, j;
54
55 mtd = kzalloc(sizeof(*mtd), GFP_KERNEL);
56 if (!mtd) {
57 printk(KERN_ERR "Failed to allocate memory for MTD device\n");
58 return NULL;
59 }
60 mtd->priv = map;
61 mtd->type = MTD_NORFLASH;
62
63 /* Fill in the default mtd operations */
64 mtd->read = lpddr_read;
65 mtd->type = MTD_NORFLASH;
66 mtd->flags = MTD_CAP_NORFLASH;
67 mtd->flags &= ~MTD_BIT_WRITEABLE;
68 mtd->erase = lpddr_erase;
69 mtd->write = lpddr_write_buffers;
70 mtd->writev = lpddr_writev;
71 mtd->read_oob = NULL;
72 mtd->write_oob = NULL;
73 mtd->sync = NULL;
74 mtd->lock = lpddr_lock;
75 mtd->unlock = lpddr_unlock;
76 mtd->suspend = NULL;
77 mtd->resume = NULL;
78 if (map_is_linear(map)) {
79 mtd->point = lpddr_point;
80 mtd->unpoint = lpddr_unpoint;
81 }
82 mtd->block_isbad = NULL;
83 mtd->block_markbad = NULL;
84 mtd->size = 1 << lpddr->qinfo->DevSizeShift;
85 mtd->erasesize = 1 << lpddr->qinfo->UniformBlockSizeShift;
86 mtd->writesize = 1 << lpddr->qinfo->BufSizeShift;
87
88 shared = kmalloc(sizeof(struct flchip_shared) * lpddr->numchips,
89 GFP_KERNEL);
90 if (!shared) {
91 kfree(lpddr);
92 kfree(mtd);
93 return NULL;
94 }
95
96 chip = &lpddr->chips[0];
97 numchips = lpddr->numchips / lpddr->qinfo->HWPartsNum;
98 for (i = 0; i < numchips; i++) {
99 shared[i].writing = shared[i].erasing = NULL;
100 spin_lock_init(&shared[i].lock);
101 for (j = 0; j < lpddr->qinfo->HWPartsNum; j++) {
102 *chip = lpddr->chips[i];
103 chip->start += j << lpddr->chipshift;
104 chip->oldstate = chip->state = FL_READY;
105 chip->priv = &shared[i];
106 /* those should be reset too since
107 they create memory references. */
108 init_waitqueue_head(&chip->wq);
109 spin_lock_init(&chip->_spinlock);
110 chip->mutex = &chip->_spinlock;
111 chip++;
112 }
113 }
114
115 return mtd;
116}
117EXPORT_SYMBOL(lpddr_cmdset);
118
119static int wait_for_ready(struct map_info *map, struct flchip *chip,
120 unsigned int chip_op_time)
121{
122 unsigned int timeo, reset_timeo, sleep_time;
123 unsigned int dsr;
124 flstate_t chip_state = chip->state;
125 int ret = 0;
126
127 /* set our timeout to 8 times the expected delay */
128 timeo = chip_op_time * 8;
129 if (!timeo)
130 timeo = 500000;
131 reset_timeo = timeo;
132 sleep_time = chip_op_time / 2;
133
134 for (;;) {
135 dsr = CMDVAL(map_read(map, map->pfow_base + PFOW_DSR));
136 if (dsr & DSR_READY_STATUS)
137 break;
138 if (!timeo) {
139 printk(KERN_ERR "%s: Flash timeout error state %d \n",
140 map->name, chip_state);
141 ret = -ETIME;
142 break;
143 }
144
145 /* OK Still waiting. Drop the lock, wait a while and retry. */
146 spin_unlock(chip->mutex);
147 if (sleep_time >= 1000000/HZ) {
148 /*
149 * Half of the normal delay still remaining
150 * can be performed with a sleeping delay instead
151 * of busy waiting.
152 */
153 msleep(sleep_time/1000);
154 timeo -= sleep_time;
155 sleep_time = 1000000/HZ;
156 } else {
157 udelay(1);
158 cond_resched();
159 timeo--;
160 }
161 spin_lock(chip->mutex);
162
163 while (chip->state != chip_state) {
164 /* Someone's suspended the operation: sleep */
165 DECLARE_WAITQUEUE(wait, current);
166 set_current_state(TASK_UNINTERRUPTIBLE);
167 add_wait_queue(&chip->wq, &wait);
168 spin_unlock(chip->mutex);
169 schedule();
170 remove_wait_queue(&chip->wq, &wait);
171 spin_lock(chip->mutex);
172 }
173 if (chip->erase_suspended || chip->write_suspended) {
174 /* Suspend has occured while sleep: reset timeout */
175 timeo = reset_timeo;
176 chip->erase_suspended = chip->write_suspended = 0;
177 }
178 }
179 /* check status for errors */
180 if (dsr & DSR_ERR) {
181 /* Clear DSR*/
182 map_write(map, CMD(~(DSR_ERR)), map->pfow_base + PFOW_DSR);
183 printk(KERN_WARNING"%s: Bad status on wait: 0x%x \n",
184 map->name, dsr);
185 print_drs_error(dsr);
186 ret = -EIO;
187 }
188 chip->state = FL_READY;
189 return ret;
190}
191
192static int get_chip(struct map_info *map, struct flchip *chip, int mode)
193{
194 int ret;
195 DECLARE_WAITQUEUE(wait, current);
196
197 retry:
198 if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING)
199 && chip->state != FL_SYNCING) {
200 /*
201 * OK. We have possibility for contension on the write/erase
202 * operations which are global to the real chip and not per
203 * partition. So let's fight it over in the partition which
204 * currently has authority on the operation.
205 *
206 * The rules are as follows:
207 *
208 * - any write operation must own shared->writing.
209 *
210 * - any erase operation must own _both_ shared->writing and
211 * shared->erasing.
212 *
213 * - contension arbitration is handled in the owner's context.
214 *
215 * The 'shared' struct can be read and/or written only when
216 * its lock is taken.
217 */
218 struct flchip_shared *shared = chip->priv;
219 struct flchip *contender;
220 spin_lock(&shared->lock);
221 contender = shared->writing;
222 if (contender && contender != chip) {
223 /*
224 * The engine to perform desired operation on this
225 * partition is already in use by someone else.
226 * Let's fight over it in the context of the chip
227 * currently using it. If it is possible to suspend,
228 * that other partition will do just that, otherwise
229 * it'll happily send us to sleep. In any case, when
230 * get_chip returns success we're clear to go ahead.
231 */
232 ret = spin_trylock(contender->mutex);
233 spin_unlock(&shared->lock);
234 if (!ret)
235 goto retry;
236 spin_unlock(chip->mutex);
237 ret = chip_ready(map, contender, mode);
238 spin_lock(chip->mutex);
239
240 if (ret == -EAGAIN) {
241 spin_unlock(contender->mutex);
242 goto retry;
243 }
244 if (ret) {
245 spin_unlock(contender->mutex);
246 return ret;
247 }
248 spin_lock(&shared->lock);
249
250 /* We should not own chip if it is already in FL_SYNCING
251 * state. Put contender and retry. */
252 if (chip->state == FL_SYNCING) {
253 put_chip(map, contender);
254 spin_unlock(contender->mutex);
255 goto retry;
256 }
257 spin_unlock(contender->mutex);
258 }
259
260 /* Check if we have suspended erase on this chip.
261 Must sleep in such a case. */
262 if (mode == FL_ERASING && shared->erasing
263 && shared->erasing->oldstate == FL_ERASING) {
264 spin_unlock(&shared->lock);
265 set_current_state(TASK_UNINTERRUPTIBLE);
266 add_wait_queue(&chip->wq, &wait);
267 spin_unlock(chip->mutex);
268 schedule();
269 remove_wait_queue(&chip->wq, &wait);
270 spin_lock(chip->mutex);
271 goto retry;
272 }
273
274 /* We now own it */
275 shared->writing = chip;
276 if (mode == FL_ERASING)
277 shared->erasing = chip;
278 spin_unlock(&shared->lock);
279 }
280
281 ret = chip_ready(map, chip, mode);
282 if (ret == -EAGAIN)
283 goto retry;
284
285 return ret;
286}
287
288static int chip_ready(struct map_info *map, struct flchip *chip, int mode)
289{
290 struct lpddr_private *lpddr = map->fldrv_priv;
291 int ret = 0;
292 DECLARE_WAITQUEUE(wait, current);
293
294 /* Prevent setting state FL_SYNCING for chip in suspended state. */
295 if (FL_SYNCING == mode && FL_READY != chip->oldstate)
296 goto sleep;
297
298 switch (chip->state) {
299 case FL_READY:
300 case FL_JEDEC_QUERY:
301 return 0;
302
303 case FL_ERASING:
304 if (!lpddr->qinfo->SuspEraseSupp ||
305 !(mode == FL_READY || mode == FL_POINT))
306 goto sleep;
307
308 map_write(map, CMD(LPDDR_SUSPEND),
309 map->pfow_base + PFOW_PROGRAM_ERASE_SUSPEND);
310 chip->oldstate = FL_ERASING;
311 chip->state = FL_ERASE_SUSPENDING;
312 ret = wait_for_ready(map, chip, 0);
313 if (ret) {
314 /* Oops. something got wrong. */
315 /* Resume and pretend we weren't here. */
316 map_write(map, CMD(LPDDR_RESUME),
317 map->pfow_base + PFOW_COMMAND_CODE);
318 map_write(map, CMD(LPDDR_START_EXECUTION),
319 map->pfow_base + PFOW_COMMAND_EXECUTE);
320 chip->state = FL_ERASING;
321 chip->oldstate = FL_READY;
322 printk(KERN_ERR "%s: suspend operation failed."
323 "State may be wrong \n", map->name);
324 return -EIO;
325 }
326 chip->erase_suspended = 1;
327 chip->state = FL_READY;
328 return 0;
329 /* Erase suspend */
330 case FL_POINT:
331 /* Only if there's no operation suspended... */
332 if (mode == FL_READY && chip->oldstate == FL_READY)
333 return 0;
334
335 default:
336sleep:
337 set_current_state(TASK_UNINTERRUPTIBLE);
338 add_wait_queue(&chip->wq, &wait);
339 spin_unlock(chip->mutex);
340 schedule();
341 remove_wait_queue(&chip->wq, &wait);
342 spin_lock(chip->mutex);
343 return -EAGAIN;
344 }
345}
346
347static void put_chip(struct map_info *map, struct flchip *chip)
348{
349 if (chip->priv) {
350 struct flchip_shared *shared = chip->priv;
351 spin_lock(&shared->lock);
352 if (shared->writing == chip && chip->oldstate == FL_READY) {
353 /* We own the ability to write, but we're done */
354 shared->writing = shared->erasing;
355 if (shared->writing && shared->writing != chip) {
356 /* give back the ownership */
357 struct flchip *loaner = shared->writing;
358 spin_lock(loaner->mutex);
359 spin_unlock(&shared->lock);
360 spin_unlock(chip->mutex);
361 put_chip(map, loaner);
362 spin_lock(chip->mutex);
363 spin_unlock(loaner->mutex);
364 wake_up(&chip->wq);
365 return;
366 }
367 shared->erasing = NULL;
368 shared->writing = NULL;
369 } else if (shared->erasing == chip && shared->writing != chip) {
370 /*
371 * We own the ability to erase without the ability
372 * to write, which means the erase was suspended
373 * and some other partition is currently writing.
374 * Don't let the switch below mess things up since
375 * we don't have ownership to resume anything.
376 */
377 spin_unlock(&shared->lock);
378 wake_up(&chip->wq);
379 return;
380 }
381 spin_unlock(&shared->lock);
382 }
383
384 switch (chip->oldstate) {
385 case FL_ERASING:
386 chip->state = chip->oldstate;
387 map_write(map, CMD(LPDDR_RESUME),
388 map->pfow_base + PFOW_COMMAND_CODE);
389 map_write(map, CMD(LPDDR_START_EXECUTION),
390 map->pfow_base + PFOW_COMMAND_EXECUTE);
391 chip->oldstate = FL_READY;
392 chip->state = FL_ERASING;
393 break;
394 case FL_READY:
395 break;
396 default:
397 printk(KERN_ERR "%s: put_chip() called with oldstate %d!\n",
398 map->name, chip->oldstate);
399 }
400 wake_up(&chip->wq);
401}
402
403int do_write_buffer(struct map_info *map, struct flchip *chip,
404 unsigned long adr, const struct kvec **pvec,
405 unsigned long *pvec_seek, int len)
406{
407 struct lpddr_private *lpddr = map->fldrv_priv;
408 map_word datum;
409 int ret, wbufsize, word_gap, words;
410 const struct kvec *vec;
411 unsigned long vec_seek;
412 unsigned long prog_buf_ofs;
413
414 wbufsize = 1 << lpddr->qinfo->BufSizeShift;
415
416 spin_lock(chip->mutex);
417 ret = get_chip(map, chip, FL_WRITING);
418 if (ret) {
419 spin_unlock(chip->mutex);
420 return ret;
421 }
422 /* Figure out the number of words to write */
423 word_gap = (-adr & (map_bankwidth(map)-1));
424 words = (len - word_gap + map_bankwidth(map) - 1) / map_bankwidth(map);
425 if (!word_gap) {
426 words--;
427 } else {
428 word_gap = map_bankwidth(map) - word_gap;
429 adr -= word_gap;
430 datum = map_word_ff(map);
431 }
432 /* Write data */
433 /* Get the program buffer offset from PFOW register data first*/
434 prog_buf_ofs = map->pfow_base + CMDVAL(map_read(map,
435 map->pfow_base + PFOW_PROGRAM_BUFFER_OFFSET));
436 vec = *pvec;
437 vec_seek = *pvec_seek;
438 do {
439 int n = map_bankwidth(map) - word_gap;
440
441 if (n > vec->iov_len - vec_seek)
442 n = vec->iov_len - vec_seek;
443 if (n > len)
444 n = len;
445
446 if (!word_gap && (len < map_bankwidth(map)))
447 datum = map_word_ff(map);
448
449 datum = map_word_load_partial(map, datum,
450 vec->iov_base + vec_seek, word_gap, n);
451
452 len -= n;
453 word_gap += n;
454 if (!len || word_gap == map_bankwidth(map)) {
455 map_write(map, datum, prog_buf_ofs);
456 prog_buf_ofs += map_bankwidth(map);
457 word_gap = 0;
458 }
459
460 vec_seek += n;
461 if (vec_seek == vec->iov_len) {
462 vec++;
463 vec_seek = 0;
464 }
465 } while (len);
466 *pvec = vec;
467 *pvec_seek = vec_seek;
468
469 /* GO GO GO */
470 send_pfow_command(map, LPDDR_BUFF_PROGRAM, adr, wbufsize, NULL);
471 chip->state = FL_WRITING;
472 ret = wait_for_ready(map, chip, (1<<lpddr->qinfo->ProgBufferTime));
473 if (ret) {
474 printk(KERN_WARNING"%s Buffer program error: %d at %lx; \n",
475 map->name, ret, adr);
476 goto out;
477 }
478
479 out: put_chip(map, chip);
480 spin_unlock(chip->mutex);
481 return ret;
482}
483
484int do_erase_oneblock(struct mtd_info *mtd, loff_t adr)
485{
486 struct map_info *map = mtd->priv;
487 struct lpddr_private *lpddr = map->fldrv_priv;
488 int chipnum = adr >> lpddr->chipshift;
489 struct flchip *chip = &lpddr->chips[chipnum];
490 int ret;
491
492 spin_lock(chip->mutex);
493 ret = get_chip(map, chip, FL_ERASING);
494 if (ret) {
495 spin_unlock(chip->mutex);
496 return ret;
497 }
498 send_pfow_command(map, LPDDR_BLOCK_ERASE, adr, 0, NULL);
499 chip->state = FL_ERASING;
500 ret = wait_for_ready(map, chip, (1<<lpddr->qinfo->BlockEraseTime)*1000);
501 if (ret) {
502 printk(KERN_WARNING"%s Erase block error %d at : %llx\n",
503 map->name, ret, adr);
504 goto out;
505 }
506 out: put_chip(map, chip);
507 spin_unlock(chip->mutex);
508 return ret;
509}
510
511static int lpddr_read(struct mtd_info *mtd, loff_t adr, size_t len,
512 size_t *retlen, u_char *buf)
513{
514 struct map_info *map = mtd->priv;
515 struct lpddr_private *lpddr = map->fldrv_priv;
516 int chipnum = adr >> lpddr->chipshift;
517 struct flchip *chip = &lpddr->chips[chipnum];
518 int ret = 0;
519
520 spin_lock(chip->mutex);
521 ret = get_chip(map, chip, FL_READY);
522 if (ret) {
523 spin_unlock(chip->mutex);
524 return ret;
525 }
526
527 map_copy_from(map, buf, adr, len);
528 *retlen = len;
529
530 put_chip(map, chip);
531 spin_unlock(chip->mutex);
532 return ret;
533}
534
535static int lpddr_point(struct mtd_info *mtd, loff_t adr, size_t len,
536 size_t *retlen, void **mtdbuf, resource_size_t *phys)
537{
538 struct map_info *map = mtd->priv;
539 struct lpddr_private *lpddr = map->fldrv_priv;
540 int chipnum = adr >> lpddr->chipshift;
541 unsigned long ofs, last_end = 0;
542 struct flchip *chip = &lpddr->chips[chipnum];
543 int ret = 0;
544
545 if (!map->virt || (adr + len > mtd->size))
546 return -EINVAL;
547
548 /* ofs: offset within the first chip that the first read should start */
549 ofs = adr - (chipnum << lpddr->chipshift);
550
551 *mtdbuf = (void *)map->virt + chip->start + ofs;
552 *retlen = 0;
553
554 while (len) {
555 unsigned long thislen;
556
557 if (chipnum >= lpddr->numchips)
558 break;
559
560 /* We cannot point across chips that are virtually disjoint */
561 if (!last_end)
562 last_end = chip->start;
563 else if (chip->start != last_end)
564 break;
565
566 if ((len + ofs - 1) >> lpddr->chipshift)
567 thislen = (1<<lpddr->chipshift) - ofs;
568 else
569 thislen = len;
570 /* get the chip */
571 spin_lock(chip->mutex);
572 ret = get_chip(map, chip, FL_POINT);
573 spin_unlock(chip->mutex);
574 if (ret)
575 break;
576
577 chip->state = FL_POINT;
578 chip->ref_point_counter++;
579 *retlen += thislen;
580 len -= thislen;
581
582 ofs = 0;
583 last_end += 1 << lpddr->chipshift;
584 chipnum++;
585 chip = &lpddr->chips[chipnum];
586 }
587 return 0;
588}
589
590static void lpddr_unpoint (struct mtd_info *mtd, loff_t adr, size_t len)
591{
592 struct map_info *map = mtd->priv;
593 struct lpddr_private *lpddr = map->fldrv_priv;
594 int chipnum = adr >> lpddr->chipshift;
595 unsigned long ofs;
596
597 /* ofs: offset within the first chip that the first read should start */
598 ofs = adr - (chipnum << lpddr->chipshift);
599
600 while (len) {
601 unsigned long thislen;
602 struct flchip *chip;
603
604 chip = &lpddr->chips[chipnum];
605 if (chipnum >= lpddr->numchips)
606 break;
607
608 if ((len + ofs - 1) >> lpddr->chipshift)
609 thislen = (1<<lpddr->chipshift) - ofs;
610 else
611 thislen = len;
612
613 spin_lock(chip->mutex);
614 if (chip->state == FL_POINT) {
615 chip->ref_point_counter--;
616 if (chip->ref_point_counter == 0)
617 chip->state = FL_READY;
618 } else
619 printk(KERN_WARNING "%s: Warning: unpoint called on non"
620 "pointed region\n", map->name);
621
622 put_chip(map, chip);
623 spin_unlock(chip->mutex);
624
625 len -= thislen;
626 ofs = 0;
627 chipnum++;
628 }
629}
630
631static int lpddr_write_buffers(struct mtd_info *mtd, loff_t to, size_t len,
632 size_t *retlen, const u_char *buf)
633{
634 struct kvec vec;
635
636 vec.iov_base = (void *) buf;
637 vec.iov_len = len;
638
639 return lpddr_writev(mtd, &vec, 1, to, retlen);
640}
641
642
643static int lpddr_writev(struct mtd_info *mtd, const struct kvec *vecs,
644 unsigned long count, loff_t to, size_t *retlen)
645{
646 struct map_info *map = mtd->priv;
647 struct lpddr_private *lpddr = map->fldrv_priv;
648 int ret = 0;
649 int chipnum;
650 unsigned long ofs, vec_seek, i;
651 int wbufsize = 1 << lpddr->qinfo->BufSizeShift;
652
653 size_t len = 0;
654
655 for (i = 0; i < count; i++)
656 len += vecs[i].iov_len;
657
658 *retlen = 0;
659 if (!len)
660 return 0;
661
662 chipnum = to >> lpddr->chipshift;
663
664 ofs = to;
665 vec_seek = 0;
666
667 do {
668 /* We must not cross write block boundaries */
669 int size = wbufsize - (ofs & (wbufsize-1));
670
671 if (size > len)
672 size = len;
673
674 ret = do_write_buffer(map, &lpddr->chips[chipnum],
675 ofs, &vecs, &vec_seek, size);
676 if (ret)
677 return ret;
678
679 ofs += size;
680 (*retlen) += size;
681 len -= size;
682
683 /* Be nice and reschedule with the chip in a usable
684 * state for other processes */
685 cond_resched();
686
687 } while (len);
688
689 return 0;
690}
691
692static int lpddr_erase(struct mtd_info *mtd, struct erase_info *instr)
693{
694 unsigned long ofs, len;
695 int ret;
696 struct map_info *map = mtd->priv;
697 struct lpddr_private *lpddr = map->fldrv_priv;
698 int size = 1 << lpddr->qinfo->UniformBlockSizeShift;
699
700 ofs = instr->addr;
701 len = instr->len;
702
703 if (ofs > mtd->size || (len + ofs) > mtd->size)
704 return -EINVAL;
705
706 while (len > 0) {
707 ret = do_erase_oneblock(mtd, ofs);
708 if (ret)
709 return ret;
710 ofs += size;
711 len -= size;
712 }
713 instr->state = MTD_ERASE_DONE;
714 mtd_erase_callback(instr);
715
716 return 0;
717}
718
719#define DO_XXLOCK_LOCK 1
720#define DO_XXLOCK_UNLOCK 2
721int do_xxlock(struct mtd_info *mtd, loff_t adr, uint32_t len, int thunk)
722{
723 int ret = 0;
724 struct map_info *map = mtd->priv;
725 struct lpddr_private *lpddr = map->fldrv_priv;
726 int chipnum = adr >> lpddr->chipshift;
727 struct flchip *chip = &lpddr->chips[chipnum];
728
729 spin_lock(chip->mutex);
730 ret = get_chip(map, chip, FL_LOCKING);
731 if (ret) {
732 spin_unlock(chip->mutex);
733 return ret;
734 }
735
736 if (thunk == DO_XXLOCK_LOCK) {
737 send_pfow_command(map, LPDDR_LOCK_BLOCK, adr, adr + len, NULL);
738 chip->state = FL_LOCKING;
739 } else if (thunk == DO_XXLOCK_UNLOCK) {
740 send_pfow_command(map, LPDDR_UNLOCK_BLOCK, adr, adr + len, NULL);
741 chip->state = FL_UNLOCKING;
742 } else
743 BUG();
744
745 ret = wait_for_ready(map, chip, 1);
746 if (ret) {
747 printk(KERN_ERR "%s: block unlock error status %d \n",
748 map->name, ret);
749 goto out;
750 }
751out: put_chip(map, chip);
752 spin_unlock(chip->mutex);
753 return ret;
754}
755
756static int lpddr_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
757{
758 return do_xxlock(mtd, ofs, len, DO_XXLOCK_LOCK);
759}
760
761static int lpddr_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
762{
763 return do_xxlock(mtd, ofs, len, DO_XXLOCK_UNLOCK);
764}
765
766int word_program(struct map_info *map, loff_t adr, uint32_t curval)
767{
768 int ret;
769 struct lpddr_private *lpddr = map->fldrv_priv;
770 int chipnum = adr >> lpddr->chipshift;
771 struct flchip *chip = &lpddr->chips[chipnum];
772
773 spin_lock(chip->mutex);
774 ret = get_chip(map, chip, FL_WRITING);
775 if (ret) {
776 spin_unlock(chip->mutex);
777 return ret;
778 }
779
780 send_pfow_command(map, LPDDR_WORD_PROGRAM, adr, 0x00, (map_word *)&curval);
781
782 ret = wait_for_ready(map, chip, (1<<lpddr->qinfo->SingleWordProgTime));
783 if (ret) {
784 printk(KERN_WARNING"%s word_program error at: %llx; val: %x\n",
785 map->name, adr, curval);
786 goto out;
787 }
788
789out: put_chip(map, chip);
790 spin_unlock(chip->mutex);
791 return ret;
792}
793
794MODULE_LICENSE("GPL");
795MODULE_AUTHOR("Alexey Korolev <akorolev@infradead.org>");
796MODULE_DESCRIPTION("MTD driver for LPDDR flash chips");
diff --git a/drivers/mtd/lpddr/qinfo_probe.c b/drivers/mtd/lpddr/qinfo_probe.c
new file mode 100644
index 000000000000..79bf40f48b75
--- /dev/null
+++ b/drivers/mtd/lpddr/qinfo_probe.c
@@ -0,0 +1,255 @@
1/*
2 * Probing flash chips with QINFO records.
3 * (C) 2008 Korolev Alexey <akorolev@infradead.org>
4 * (C) 2008 Vasiliy Leonenko <vasiliy.leonenko@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 * 02110-1301, USA.
20 */
21#include <linux/module.h>
22#include <linux/types.h>
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/errno.h>
26#include <linux/slab.h>
27#include <linux/interrupt.h>
28
29#include <linux/mtd/xip.h>
30#include <linux/mtd/map.h>
31#include <linux/mtd/pfow.h>
32#include <linux/mtd/qinfo.h>
33
34static int lpddr_chip_setup(struct map_info *map, struct lpddr_private *lpddr);
35struct mtd_info *lpddr_probe(struct map_info *map);
36static struct lpddr_private *lpddr_probe_chip(struct map_info *map);
37static int lpddr_pfow_present(struct map_info *map,
38 struct lpddr_private *lpddr);
39
40static struct qinfo_query_info qinfo_array[] = {
41 /* General device info */
42 {0, 0, "DevSizeShift", "Device size 2^n bytes"},
43 {0, 3, "BufSizeShift", "Program buffer size 2^n bytes"},
44 /* Erase block information */
45 {1, 1, "TotalBlocksNum", "Total number of blocks"},
46 {1, 2, "UniformBlockSizeShift", "Uniform block size 2^n bytes"},
47 /* Partition information */
48 {2, 1, "HWPartsNum", "Number of hardware partitions"},
49 /* Optional features */
50 {5, 1, "SuspEraseSupp", "Suspend erase supported"},
51 /* Operation typical time */
52 {10, 0, "SingleWordProgTime", "Single word program 2^n u-sec"},
53 {10, 1, "ProgBufferTime", "Program buffer write 2^n u-sec"},
54 {10, 2, "BlockEraseTime", "Block erase 2^n m-sec"},
55 {10, 3, "FullChipEraseTime", "Full chip erase 2^n m-sec"},
56};
57
58static long lpddr_get_qinforec_pos(struct map_info *map, char *id_str)
59{
60 int qinfo_lines = sizeof(qinfo_array)/sizeof(struct qinfo_query_info);
61 int i;
62 int bankwidth = map_bankwidth(map) * 8;
63 int major, minor;
64
65 for (i = 0; i < qinfo_lines; i++) {
66 if (strcmp(id_str, qinfo_array[i].id_str) == 0) {
67 major = qinfo_array[i].major & ((1 << bankwidth) - 1);
68 minor = qinfo_array[i].minor & ((1 << bankwidth) - 1);
69 return minor | (major << bankwidth);
70 }
71 }
72 printk(KERN_ERR"%s qinfo id string is wrong! \n", map->name);
73 BUG();
74 return -1;
75}
76
77static uint16_t lpddr_info_query(struct map_info *map, char *id_str)
78{
79 unsigned int dsr, val;
80 int bits_per_chip = map_bankwidth(map) * 8;
81 unsigned long adr = lpddr_get_qinforec_pos(map, id_str);
82 int attempts = 20;
83
84 /* Write a request for the PFOW record */
85 map_write(map, CMD(LPDDR_INFO_QUERY),
86 map->pfow_base + PFOW_COMMAND_CODE);
87 map_write(map, CMD(adr & ((1 << bits_per_chip) - 1)),
88 map->pfow_base + PFOW_COMMAND_ADDRESS_L);
89 map_write(map, CMD(adr >> bits_per_chip),
90 map->pfow_base + PFOW_COMMAND_ADDRESS_H);
91 map_write(map, CMD(LPDDR_START_EXECUTION),
92 map->pfow_base + PFOW_COMMAND_EXECUTE);
93
94 while ((attempts--) > 0) {
95 dsr = CMDVAL(map_read(map, map->pfow_base + PFOW_DSR));
96 if (dsr & DSR_READY_STATUS)
97 break;
98 udelay(10);
99 }
100
101 val = CMDVAL(map_read(map, map->pfow_base + PFOW_COMMAND_DATA));
102 return val;
103}
104
105static int lpddr_pfow_present(struct map_info *map, struct lpddr_private *lpddr)
106{
107 map_word pfow_val[4];
108
109 /* Check identification string */
110 pfow_val[0] = map_read(map, map->pfow_base + PFOW_QUERY_STRING_P);
111 pfow_val[1] = map_read(map, map->pfow_base + PFOW_QUERY_STRING_F);
112 pfow_val[2] = map_read(map, map->pfow_base + PFOW_QUERY_STRING_O);
113 pfow_val[3] = map_read(map, map->pfow_base + PFOW_QUERY_STRING_W);
114
115 if (!map_word_equal(map, CMD('P'), pfow_val[0]))
116 goto out;
117
118 if (!map_word_equal(map, CMD('F'), pfow_val[1]))
119 goto out;
120
121 if (!map_word_equal(map, CMD('O'), pfow_val[2]))
122 goto out;
123
124 if (!map_word_equal(map, CMD('W'), pfow_val[3]))
125 goto out;
126
127 return 1; /* "PFOW" is found */
128out:
129 printk(KERN_WARNING"%s: PFOW string at 0x%lx is not found \n",
130 map->name, map->pfow_base);
131 return 0;
132}
133
134static int lpddr_chip_setup(struct map_info *map, struct lpddr_private *lpddr)
135{
136
137 lpddr->qinfo = kmalloc(sizeof(struct qinfo_chip), GFP_KERNEL);
138 if (!lpddr->qinfo) {
139 printk(KERN_WARNING "%s: no memory for LPDDR qinfo structure\n",
140 map->name);
141 return 0;
142 }
143 memset(lpddr->qinfo, 0, sizeof(struct qinfo_chip));
144
145 /* Get the ManuID */
146 lpddr->ManufactId = CMDVAL(map_read(map, map->pfow_base + PFOW_MANUFACTURER_ID));
147 /* Get the DeviceID */
148 lpddr->DevId = CMDVAL(map_read(map, map->pfow_base + PFOW_DEVICE_ID));
149 /* read parameters from chip qinfo table */
150 lpddr->qinfo->DevSizeShift = lpddr_info_query(map, "DevSizeShift");
151 lpddr->qinfo->TotalBlocksNum = lpddr_info_query(map, "TotalBlocksNum");
152 lpddr->qinfo->BufSizeShift = lpddr_info_query(map, "BufSizeShift");
153 lpddr->qinfo->HWPartsNum = lpddr_info_query(map, "HWPartsNum");
154 lpddr->qinfo->UniformBlockSizeShift =
155 lpddr_info_query(map, "UniformBlockSizeShift");
156 lpddr->qinfo->SuspEraseSupp = lpddr_info_query(map, "SuspEraseSupp");
157 lpddr->qinfo->SingleWordProgTime =
158 lpddr_info_query(map, "SingleWordProgTime");
159 lpddr->qinfo->ProgBufferTime = lpddr_info_query(map, "ProgBufferTime");
160 lpddr->qinfo->BlockEraseTime = lpddr_info_query(map, "BlockEraseTime");
161 return 1;
162}
163static struct lpddr_private *lpddr_probe_chip(struct map_info *map)
164{
165 struct lpddr_private lpddr;
166 struct lpddr_private *retlpddr;
167 int numvirtchips;
168
169
170 if ((map->pfow_base + 0x1000) >= map->size) {
171 printk(KERN_NOTICE"%s Probe at base (0x%08lx) past the end of"
172 "the map(0x%08lx)\n", map->name,
173 (unsigned long)map->pfow_base, map->size - 1);
174 return NULL;
175 }
176 memset(&lpddr, 0, sizeof(struct lpddr_private));
177 if (!lpddr_pfow_present(map, &lpddr))
178 return NULL;
179
180 if (!lpddr_chip_setup(map, &lpddr))
181 return NULL;
182
183 /* Ok so we found a chip */
184 lpddr.chipshift = lpddr.qinfo->DevSizeShift;
185 lpddr.numchips = 1;
186
187 numvirtchips = lpddr.numchips * lpddr.qinfo->HWPartsNum;
188 retlpddr = kmalloc(sizeof(struct lpddr_private) +
189 numvirtchips * sizeof(struct flchip), GFP_KERNEL);
190 if (!retlpddr)
191 return NULL;
192
193 memset(retlpddr, 0, sizeof(struct lpddr_private) +
194 numvirtchips * sizeof(struct flchip));
195 memcpy(retlpddr, &lpddr, sizeof(struct lpddr_private));
196
197 retlpddr->numchips = numvirtchips;
198 retlpddr->chipshift = retlpddr->qinfo->DevSizeShift -
199 __ffs(retlpddr->qinfo->HWPartsNum);
200
201 return retlpddr;
202}
203
204struct mtd_info *lpddr_probe(struct map_info *map)
205{
206 struct mtd_info *mtd = NULL;
207 struct lpddr_private *lpddr;
208
209 /* First probe the map to see if we havecan open PFOW here */
210 lpddr = lpddr_probe_chip(map);
211 if (!lpddr)
212 return NULL;
213
214 map->fldrv_priv = lpddr;
215 mtd = lpddr_cmdset(map);
216 if (mtd) {
217 if (mtd->size > map->size) {
218 printk(KERN_WARNING "Reducing visibility of %ldKiB chip"
219 "to %ldKiB\n", (unsigned long)mtd->size >> 10,
220 (unsigned long)map->size >> 10);
221 mtd->size = map->size;
222 }
223 return mtd;
224 }
225
226 kfree(lpddr->qinfo);
227 kfree(lpddr);
228 map->fldrv_priv = NULL;
229 return NULL;
230}
231
232static struct mtd_chip_driver lpddr_chipdrv = {
233 .probe = lpddr_probe,
234 .name = "qinfo_probe",
235 .module = THIS_MODULE
236};
237
238static int __init lpddr_probe_init(void)
239{
240 register_mtd_chip_driver(&lpddr_chipdrv);
241 return 0;
242}
243
244static void __exit lpddr_probe_exit(void)
245{
246 unregister_mtd_chip_driver(&lpddr_chipdrv);
247}
248
249module_init(lpddr_probe_init);
250module_exit(lpddr_probe_exit);
251
252MODULE_LICENSE("GPL");
253MODULE_AUTHOR("Vasiliy Leonenko <vasiliy.leonenko@gmail.com>");
254MODULE_DESCRIPTION("Driver to probe qinfo flash chips");
255
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 5ea169362164..0225cbbf22de 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -10,8 +10,8 @@ config MTD_COMPLEX_MAPPINGS
10 paged mappings of flash chips. 10 paged mappings of flash chips.
11 11
12config MTD_PHYSMAP 12config MTD_PHYSMAP
13 tristate "CFI Flash device in physical memory map" 13 tristate "Flash device in physical memory map"
14 depends on MTD_CFI || MTD_JEDECPROBE || MTD_ROM 14 depends on MTD_CFI || MTD_JEDECPROBE || MTD_ROM || MTD_LPDDR
15 help 15 help
16 This provides a 'mapping' driver which allows the NOR Flash and 16 This provides a 'mapping' driver which allows the NOR Flash and
17 ROM driver code to communicate with chips which are mapped 17 ROM driver code to communicate with chips which are mapped
@@ -23,9 +23,20 @@ config MTD_PHYSMAP
23 To compile this driver as a module, choose M here: the 23 To compile this driver as a module, choose M here: the
24 module will be called physmap. 24 module will be called physmap.
25 25
26config MTD_PHYSMAP_COMPAT
27 bool "Physmap compat support"
28 depends on MTD_PHYSMAP
29 default n
30 help
31 Setup a simple mapping via the Kconfig options. Normally the
32 physmap configuration options are done via your board's
33 resource file.
34
35 If unsure, say N here.
36
26config MTD_PHYSMAP_START 37config MTD_PHYSMAP_START
27 hex "Physical start address of flash mapping" 38 hex "Physical start address of flash mapping"
28 depends on MTD_PHYSMAP 39 depends on MTD_PHYSMAP_COMPAT
29 default "0x8000000" 40 default "0x8000000"
30 help 41 help
31 This is the physical memory location at which the flash chips 42 This is the physical memory location at which the flash chips
@@ -37,7 +48,7 @@ config MTD_PHYSMAP_START
37 48
38config MTD_PHYSMAP_LEN 49config MTD_PHYSMAP_LEN
39 hex "Physical length of flash mapping" 50 hex "Physical length of flash mapping"
40 depends on MTD_PHYSMAP 51 depends on MTD_PHYSMAP_COMPAT
41 default "0" 52 default "0"
42 help 53 help
43 This is the total length of the mapping of the flash chips on 54 This is the total length of the mapping of the flash chips on
@@ -51,7 +62,7 @@ config MTD_PHYSMAP_LEN
51 62
52config MTD_PHYSMAP_BANKWIDTH 63config MTD_PHYSMAP_BANKWIDTH
53 int "Bank width in octets" 64 int "Bank width in octets"
54 depends on MTD_PHYSMAP 65 depends on MTD_PHYSMAP_COMPAT
55 default "2" 66 default "2"
56 help 67 help
57 This is the total width of the data bus of the flash devices 68 This is the total width of the data bus of the flash devices
diff --git a/drivers/mtd/maps/alchemy-flash.c b/drivers/mtd/maps/alchemy-flash.c
index 82811bcb0436..845ad4f2a542 100644
--- a/drivers/mtd/maps/alchemy-flash.c
+++ b/drivers/mtd/maps/alchemy-flash.c
@@ -111,7 +111,7 @@ static struct mtd_partition alchemy_partitions[] = {
111 111
112static struct mtd_info *mymtd; 112static struct mtd_info *mymtd;
113 113
114int __init alchemy_mtd_init(void) 114static int __init alchemy_mtd_init(void)
115{ 115{
116 struct mtd_partition *parts; 116 struct mtd_partition *parts;
117 int nb_parts = 0; 117 int nb_parts = 0;
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index d1eec7d3243f..237733d094c4 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -232,8 +232,8 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
232 /* Trim the size if we are larger than the map */ 232 /* Trim the size if we are larger than the map */
233 if (map->mtd->size > map->map.size) { 233 if (map->mtd->size > map->map.size) {
234 printk(KERN_WARNING MOD_NAME 234 printk(KERN_WARNING MOD_NAME
235 " rom(%u) larger than window(%lu). fixing...\n", 235 " rom(%llu) larger than window(%lu). fixing...\n",
236 map->mtd->size, map->map.size); 236 (unsigned long long)map->mtd->size, map->map.size);
237 map->mtd->size = map->map.size; 237 map->mtd->size = map->map.size;
238 } 238 }
239 if (window->rsrc.parent) { 239 if (window->rsrc.parent) {
diff --git a/drivers/mtd/maps/cfi_flagadm.c b/drivers/mtd/maps/cfi_flagadm.c
index 0ecc3f6d735b..b4ed81611918 100644
--- a/drivers/mtd/maps/cfi_flagadm.c
+++ b/drivers/mtd/maps/cfi_flagadm.c
@@ -88,7 +88,7 @@ struct mtd_partition flagadm_parts[] = {
88 88
89static struct mtd_info *mymtd; 89static struct mtd_info *mymtd;
90 90
91int __init init_flagadm(void) 91static int __init init_flagadm(void)
92{ 92{
93 printk(KERN_NOTICE "FlagaDM flash device: %x at %x\n", 93 printk(KERN_NOTICE "FlagaDM flash device: %x at %x\n",
94 FLASH_SIZE, FLASH_PHYS_ADDR); 94 FLASH_SIZE, FLASH_PHYS_ADDR);
diff --git a/drivers/mtd/maps/ck804xrom.c b/drivers/mtd/maps/ck804xrom.c
index 1a6feb4474de..5f7a245ed132 100644
--- a/drivers/mtd/maps/ck804xrom.c
+++ b/drivers/mtd/maps/ck804xrom.c
@@ -263,8 +263,8 @@ static int __devinit ck804xrom_init_one (struct pci_dev *pdev,
263 /* Trim the size if we are larger than the map */ 263 /* Trim the size if we are larger than the map */
264 if (map->mtd->size > map->map.size) { 264 if (map->mtd->size > map->map.size) {
265 printk(KERN_WARNING MOD_NAME 265 printk(KERN_WARNING MOD_NAME
266 " rom(%u) larger than window(%lu). fixing...\n", 266 " rom(%llu) larger than window(%lu). fixing...\n",
267 map->mtd->size, map->map.size); 267 (unsigned long long)map->mtd->size, map->map.size);
268 map->mtd->size = map->map.size; 268 map->mtd->size = map->map.size;
269 } 269 }
270 if (window->rsrc.parent) { 270 if (window->rsrc.parent) {
diff --git a/drivers/mtd/maps/dbox2-flash.c b/drivers/mtd/maps/dbox2-flash.c
index e115667bf1d0..cfacfa6f45dd 100644
--- a/drivers/mtd/maps/dbox2-flash.c
+++ b/drivers/mtd/maps/dbox2-flash.c
@@ -69,7 +69,7 @@ struct map_info dbox2_flash_map = {
69 .phys = WINDOW_ADDR, 69 .phys = WINDOW_ADDR,
70}; 70};
71 71
72int __init init_dbox2_flash(void) 72static int __init init_dbox2_flash(void)
73{ 73{
74 printk(KERN_NOTICE "D-Box 2 flash driver (size->0x%X mem->0x%X)\n", WINDOW_SIZE, WINDOW_ADDR); 74 printk(KERN_NOTICE "D-Box 2 flash driver (size->0x%X mem->0x%X)\n", WINDOW_SIZE, WINDOW_ADDR);
75 dbox2_flash_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE); 75 dbox2_flash_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE);
diff --git a/drivers/mtd/maps/edb7312.c b/drivers/mtd/maps/edb7312.c
index 9433738c1664..be9e90b44587 100644
--- a/drivers/mtd/maps/edb7312.c
+++ b/drivers/mtd/maps/edb7312.c
@@ -71,7 +71,7 @@ static const char *probes[] = { "RedBoot", "cmdlinepart", NULL };
71static int mtd_parts_nb = 0; 71static int mtd_parts_nb = 0;
72static struct mtd_partition *mtd_parts = 0; 72static struct mtd_partition *mtd_parts = 0;
73 73
74int __init init_edb7312nor(void) 74static int __init init_edb7312nor(void)
75{ 75{
76 static const char *rom_probe_types[] = PROBETYPES; 76 static const char *rom_probe_types[] = PROBETYPES;
77 const char **type; 77 const char **type;
diff --git a/drivers/mtd/maps/esb2rom.c b/drivers/mtd/maps/esb2rom.c
index bbbcdd4c8d13..11a2f57df9cf 100644
--- a/drivers/mtd/maps/esb2rom.c
+++ b/drivers/mtd/maps/esb2rom.c
@@ -324,8 +324,8 @@ static int __devinit esb2rom_init_one(struct pci_dev *pdev,
324 /* Trim the size if we are larger than the map */ 324 /* Trim the size if we are larger than the map */
325 if (map->mtd->size > map->map.size) { 325 if (map->mtd->size > map->map.size) {
326 printk(KERN_WARNING MOD_NAME 326 printk(KERN_WARNING MOD_NAME
327 " rom(%u) larger than window(%lu). fixing...\n", 327 " rom(%llu) larger than window(%lu). fixing...\n",
328 map->mtd->size, map->map.size); 328 (unsigned long long)map->mtd->size, map->map.size);
329 map->mtd->size = map->map.size; 329 map->mtd->size = map->map.size;
330 } 330 }
331 if (window->rsrc.parent) { 331 if (window->rsrc.parent) {
diff --git a/drivers/mtd/maps/fortunet.c b/drivers/mtd/maps/fortunet.c
index a8e3fde4cbd5..1e43124d498b 100644
--- a/drivers/mtd/maps/fortunet.c
+++ b/drivers/mtd/maps/fortunet.c
@@ -181,7 +181,7 @@ __setup("MTD_Partition=", MTD_New_Partition);
181/* Backwards-spelling-compatibility */ 181/* Backwards-spelling-compatibility */
182__setup("MTD_Partion=", MTD_New_Partition); 182__setup("MTD_Partion=", MTD_New_Partition);
183 183
184int __init init_fortunet(void) 184static int __init init_fortunet(void)
185{ 185{
186 int ix,iy; 186 int ix,iy;
187 for(iy=ix=0;ix<MAX_NUM_REGIONS;ix++) 187 for(iy=ix=0;ix<MAX_NUM_REGIONS;ix++)
diff --git a/drivers/mtd/maps/h720x-flash.c b/drivers/mtd/maps/h720x-flash.c
index 3b959fad1c4e..72c724fa8c27 100644
--- a/drivers/mtd/maps/h720x-flash.c
+++ b/drivers/mtd/maps/h720x-flash.c
@@ -65,7 +65,7 @@ static const char *probes[] = { "cmdlinepart", NULL };
65/* 65/*
66 * Initialize FLASH support 66 * Initialize FLASH support
67 */ 67 */
68int __init h720x_mtd_init(void) 68static int __init h720x_mtd_init(void)
69{ 69{
70 70
71 char *part_type = NULL; 71 char *part_type = NULL;
diff --git a/drivers/mtd/maps/ichxrom.c b/drivers/mtd/maps/ichxrom.c
index aeb6c916e23f..c32bc28920b3 100644
--- a/drivers/mtd/maps/ichxrom.c
+++ b/drivers/mtd/maps/ichxrom.c
@@ -258,8 +258,8 @@ static int __devinit ichxrom_init_one (struct pci_dev *pdev,
258 /* Trim the size if we are larger than the map */ 258 /* Trim the size if we are larger than the map */
259 if (map->mtd->size > map->map.size) { 259 if (map->mtd->size > map->map.size) {
260 printk(KERN_WARNING MOD_NAME 260 printk(KERN_WARNING MOD_NAME
261 " rom(%u) larger than window(%lu). fixing...\n", 261 " rom(%llu) larger than window(%lu). fixing...\n",
262 map->mtd->size, map->map.size); 262 (unsigned long long)map->mtd->size, map->map.size);
263 map->mtd->size = map->map.size; 263 map->mtd->size = map->map.size;
264 } 264 }
265 if (window->rsrc.parent) { 265 if (window->rsrc.parent) {
diff --git a/drivers/mtd/maps/impa7.c b/drivers/mtd/maps/impa7.c
index 2682ab51a367..998a27da97f3 100644
--- a/drivers/mtd/maps/impa7.c
+++ b/drivers/mtd/maps/impa7.c
@@ -70,7 +70,7 @@ static struct mtd_partition *mtd_parts[NUM_FLASHBANKS];
70 70
71static const char *probes[] = { "cmdlinepart", NULL }; 71static const char *probes[] = { "cmdlinepart", NULL };
72 72
73int __init init_impa7(void) 73static int __init init_impa7(void)
74{ 74{
75 static const char *rom_probe_types[] = PROBETYPES; 75 static const char *rom_probe_types[] = PROBETYPES;
76 const char **type; 76 const char **type;
diff --git a/drivers/mtd/maps/ipaq-flash.c b/drivers/mtd/maps/ipaq-flash.c
index ed58f6a77bd9..748c85f635f1 100644
--- a/drivers/mtd/maps/ipaq-flash.c
+++ b/drivers/mtd/maps/ipaq-flash.c
@@ -202,7 +202,7 @@ static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL };
202 202
203static int __init h1900_special_case(void); 203static int __init h1900_special_case(void);
204 204
205int __init ipaq_mtd_init(void) 205static int __init ipaq_mtd_init(void)
206{ 206{
207 struct mtd_partition *parts = NULL; 207 struct mtd_partition *parts = NULL;
208 int nb_parts = 0; 208 int nb_parts = 0;
diff --git a/drivers/mtd/maps/mbx860.c b/drivers/mtd/maps/mbx860.c
index 706f67394b07..0eb5a7c85380 100644
--- a/drivers/mtd/maps/mbx860.c
+++ b/drivers/mtd/maps/mbx860.c
@@ -55,7 +55,7 @@ struct map_info mbx_map = {
55 .bankwidth = 4, 55 .bankwidth = 4,
56}; 56};
57 57
58int __init init_mbx(void) 58static int __init init_mbx(void)
59{ 59{
60 printk(KERN_NOTICE "Motorola MBX flash device: 0x%x at 0x%x\n", WINDOW_SIZE*4, WINDOW_ADDR); 60 printk(KERN_NOTICE "Motorola MBX flash device: 0x%x at 0x%x\n", WINDOW_SIZE*4, WINDOW_ADDR);
61 mbx_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE * 4); 61 mbx_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE * 4);
diff --git a/drivers/mtd/maps/nettel.c b/drivers/mtd/maps/nettel.c
index 965e6c6d6ab0..a97133eb9d70 100644
--- a/drivers/mtd/maps/nettel.c
+++ b/drivers/mtd/maps/nettel.c
@@ -226,7 +226,7 @@ static int __init nettel_init(void)
226 226
227 if ((amd_mtd = do_map_probe("jedec_probe", &nettel_amd_map))) { 227 if ((amd_mtd = do_map_probe("jedec_probe", &nettel_amd_map))) {
228 printk(KERN_NOTICE "SNAPGEAR: AMD flash device size = %dK\n", 228 printk(KERN_NOTICE "SNAPGEAR: AMD flash device size = %dK\n",
229 amd_mtd->size>>10); 229 (int)(amd_mtd->size>>10));
230 230
231 amd_mtd->owner = THIS_MODULE; 231 amd_mtd->owner = THIS_MODULE;
232 232
@@ -357,13 +357,12 @@ static int __init nettel_init(void)
357 *intel1par = 0; 357 *intel1par = 0;
358 } 358 }
359 359
360 printk(KERN_NOTICE "SNAPGEAR: Intel flash device size = %dK\n", 360 printk(KERN_NOTICE "SNAPGEAR: Intel flash device size = %lldKiB\n",
361 (intel_mtd->size >> 10)); 361 (unsigned long long)(intel_mtd->size >> 10));
362 362
363 intel_mtd->owner = THIS_MODULE; 363 intel_mtd->owner = THIS_MODULE;
364 364
365 num_intel_partitions = sizeof(nettel_intel_partitions) / 365 num_intel_partitions = ARRAY_SIZE(nettel_intel_partitions);
366 sizeof(nettel_intel_partitions[0]);
367 366
368 if (intelboot) { 367 if (intelboot) {
369 /* 368 /*
diff --git a/drivers/mtd/maps/octagon-5066.c b/drivers/mtd/maps/octagon-5066.c
index 43e04c1d22a9..2b2e45093218 100644
--- a/drivers/mtd/maps/octagon-5066.c
+++ b/drivers/mtd/maps/octagon-5066.c
@@ -184,7 +184,7 @@ void cleanup_oct5066(void)
184 release_region(PAGE_IO, 1); 184 release_region(PAGE_IO, 1);
185} 185}
186 186
187int __init init_oct5066(void) 187static int __init init_oct5066(void)
188{ 188{
189 int i; 189 int i;
190 int ret = 0; 190 int ret = 0;
diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 1db16e549e38..87743661d48e 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -29,7 +29,6 @@ struct physmap_flash_info {
29 struct map_info map[MAX_RESOURCES]; 29 struct map_info map[MAX_RESOURCES];
30#ifdef CONFIG_MTD_PARTITIONS 30#ifdef CONFIG_MTD_PARTITIONS
31 int nr_parts; 31 int nr_parts;
32 struct mtd_partition *parts;
33#endif 32#endif
34}; 33};
35 34
@@ -56,14 +55,10 @@ static int physmap_flash_remove(struct platform_device *dev)
56 for (i = 0; i < MAX_RESOURCES; i++) { 55 for (i = 0; i < MAX_RESOURCES; i++) {
57 if (info->mtd[i] != NULL) { 56 if (info->mtd[i] != NULL) {
58#ifdef CONFIG_MTD_PARTITIONS 57#ifdef CONFIG_MTD_PARTITIONS
59 if (info->nr_parts) { 58 if (info->nr_parts || physmap_data->nr_parts)
60 del_mtd_partitions(info->mtd[i]); 59 del_mtd_partitions(info->mtd[i]);
61 kfree(info->parts); 60 else
62 } else if (physmap_data->nr_parts) {
63 del_mtd_partitions(info->mtd[i]);
64 } else {
65 del_mtd_device(info->mtd[i]); 61 del_mtd_device(info->mtd[i]);
66 }
67#else 62#else
68 del_mtd_device(info->mtd[i]); 63 del_mtd_device(info->mtd[i]);
69#endif 64#endif
@@ -73,7 +68,12 @@ static int physmap_flash_remove(struct platform_device *dev)
73 return 0; 68 return 0;
74} 69}
75 70
76static const char *rom_probe_types[] = { "cfi_probe", "jedec_probe", "map_rom", NULL }; 71static const char *rom_probe_types[] = {
72 "cfi_probe",
73 "jedec_probe",
74 "qinfo_probe",
75 "map_rom",
76 NULL };
77#ifdef CONFIG_MTD_PARTITIONS 77#ifdef CONFIG_MTD_PARTITIONS
78static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", NULL }; 78static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", NULL };
79#endif 79#endif
@@ -86,6 +86,9 @@ static int physmap_flash_probe(struct platform_device *dev)
86 int err = 0; 86 int err = 0;
87 int i; 87 int i;
88 int devices_found = 0; 88 int devices_found = 0;
89#ifdef CONFIG_MTD_PARTITIONS
90 struct mtd_partition *parts;
91#endif
89 92
90 physmap_data = dev->dev.platform_data; 93 physmap_data = dev->dev.platform_data;
91 if (physmap_data == NULL) 94 if (physmap_data == NULL)
@@ -119,6 +122,7 @@ static int physmap_flash_probe(struct platform_device *dev)
119 info->map[i].size = dev->resource[i].end - dev->resource[i].start + 1; 122 info->map[i].size = dev->resource[i].end - dev->resource[i].start + 1;
120 info->map[i].bankwidth = physmap_data->width; 123 info->map[i].bankwidth = physmap_data->width;
121 info->map[i].set_vpp = physmap_data->set_vpp; 124 info->map[i].set_vpp = physmap_data->set_vpp;
125 info->map[i].pfow_base = physmap_data->pfow_base;
122 126
123 info->map[i].virt = devm_ioremap(&dev->dev, info->map[i].phys, 127 info->map[i].virt = devm_ioremap(&dev->dev, info->map[i].phys,
124 info->map[i].size); 128 info->map[i].size);
@@ -163,9 +167,10 @@ static int physmap_flash_probe(struct platform_device *dev)
163 goto err_out; 167 goto err_out;
164 168
165#ifdef CONFIG_MTD_PARTITIONS 169#ifdef CONFIG_MTD_PARTITIONS
166 err = parse_mtd_partitions(info->cmtd, part_probe_types, &info->parts, 0); 170 err = parse_mtd_partitions(info->cmtd, part_probe_types, &parts, 0);
167 if (err > 0) { 171 if (err > 0) {
168 add_mtd_partitions(info->cmtd, info->parts, err); 172 add_mtd_partitions(info->cmtd, parts, err);
173 kfree(parts);
169 return 0; 174 return 0;
170 } 175 }
171 176
@@ -251,14 +256,7 @@ static struct platform_driver physmap_flash_driver = {
251}; 256};
252 257
253 258
254#ifdef CONFIG_MTD_PHYSMAP_LEN 259#ifdef CONFIG_MTD_PHYSMAP_COMPAT
255#if CONFIG_MTD_PHYSMAP_LEN != 0
256#warning using PHYSMAP compat code
257#define PHYSMAP_COMPAT
258#endif
259#endif
260
261#ifdef PHYSMAP_COMPAT
262static struct physmap_flash_data physmap_flash_data = { 260static struct physmap_flash_data physmap_flash_data = {
263 .width = CONFIG_MTD_PHYSMAP_BANKWIDTH, 261 .width = CONFIG_MTD_PHYSMAP_BANKWIDTH,
264}; 262};
@@ -302,7 +300,7 @@ static int __init physmap_init(void)
302 int err; 300 int err;
303 301
304 err = platform_driver_register(&physmap_flash_driver); 302 err = platform_driver_register(&physmap_flash_driver);
305#ifdef PHYSMAP_COMPAT 303#ifdef CONFIG_MTD_PHYSMAP_COMPAT
306 if (err == 0) 304 if (err == 0)
307 platform_device_register(&physmap_flash); 305 platform_device_register(&physmap_flash);
308#endif 306#endif
@@ -312,7 +310,7 @@ static int __init physmap_init(void)
312 310
313static void __exit physmap_exit(void) 311static void __exit physmap_exit(void)
314{ 312{
315#ifdef PHYSMAP_COMPAT 313#ifdef CONFIG_MTD_PHYSMAP_COMPAT
316 platform_device_unregister(&physmap_flash); 314 platform_device_unregister(&physmap_flash);
317#endif 315#endif
318 platform_driver_unregister(&physmap_flash_driver); 316 platform_driver_unregister(&physmap_flash_driver);
@@ -326,8 +324,7 @@ MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
326MODULE_DESCRIPTION("Generic configurable MTD map driver"); 324MODULE_DESCRIPTION("Generic configurable MTD map driver");
327 325
328/* legacy platform drivers can't hotplug or coldplg */ 326/* legacy platform drivers can't hotplug or coldplg */
329#ifndef PHYSMAP_COMPAT 327#ifndef CONFIG_MTD_PHYSMAP_COMPAT
330/* work with hotplug and coldplug */ 328/* work with hotplug and coldplug */
331MODULE_ALIAS("platform:physmap-flash"); 329MODULE_ALIAS("platform:physmap-flash");
332#endif 330#endif
333
diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c
index f43ba2815cbb..4768bd5459d6 100644
--- a/drivers/mtd/maps/pmcmsp-flash.c
+++ b/drivers/mtd/maps/pmcmsp-flash.c
@@ -48,7 +48,7 @@ static int fcnt;
48 48
49#define DEBUG_MARKER printk(KERN_NOTICE "%s[%d]\n", __func__, __LINE__) 49#define DEBUG_MARKER printk(KERN_NOTICE "%s[%d]\n", __func__, __LINE__)
50 50
51int __init init_msp_flash(void) 51static int __init init_msp_flash(void)
52{ 52{
53 int i, j; 53 int i, j;
54 int offset, coff; 54 int offset, coff;
diff --git a/drivers/mtd/maps/redwood.c b/drivers/mtd/maps/redwood.c
index de002eb1a7fe..933c0b63b016 100644
--- a/drivers/mtd/maps/redwood.c
+++ b/drivers/mtd/maps/redwood.c
@@ -122,7 +122,7 @@ struct map_info redwood_flash_map = {
122 122
123static struct mtd_info *redwood_mtd; 123static struct mtd_info *redwood_mtd;
124 124
125int __init init_redwood_flash(void) 125static int __init init_redwood_flash(void)
126{ 126{
127 int err; 127 int err;
128 128
diff --git a/drivers/mtd/maps/rpxlite.c b/drivers/mtd/maps/rpxlite.c
index 14d90edb4430..3e3ef53d4fd4 100644
--- a/drivers/mtd/maps/rpxlite.c
+++ b/drivers/mtd/maps/rpxlite.c
@@ -23,7 +23,7 @@ static struct map_info rpxlite_map = {
23 .phys = WINDOW_ADDR, 23 .phys = WINDOW_ADDR,
24}; 24};
25 25
26int __init init_rpxlite(void) 26static int __init init_rpxlite(void)
27{ 27{
28 printk(KERN_NOTICE "RPX Lite or CLLF flash device: %x at %x\n", WINDOW_SIZE*4, WINDOW_ADDR); 28 printk(KERN_NOTICE "RPX Lite or CLLF flash device: %x at %x\n", WINDOW_SIZE*4, WINDOW_ADDR);
29 rpxlite_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE * 4); 29 rpxlite_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE * 4);
diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c
index 6e1e99cd2b59..d5374cdcb163 100644
--- a/drivers/mtd/maps/sbc8240.c
+++ b/drivers/mtd/maps/sbc8240.c
@@ -136,7 +136,7 @@ static struct mtd_part_def sbc8240_part_banks[NUM_FLASH_BANKS];
136#endif /* CONFIG_MTD_PARTITIONS */ 136#endif /* CONFIG_MTD_PARTITIONS */
137 137
138 138
139int __init init_sbc8240_mtd (void) 139static int __init init_sbc8240_mtd (void)
140{ 140{
141 static struct _cjs { 141 static struct _cjs {
142 u_long addr; 142 u_long addr;
diff --git a/drivers/mtd/maps/scb2_flash.c b/drivers/mtd/maps/scb2_flash.c
index 21169e6d646c..7e329f09a548 100644
--- a/drivers/mtd/maps/scb2_flash.c
+++ b/drivers/mtd/maps/scb2_flash.c
@@ -118,7 +118,8 @@ scb2_fixup_mtd(struct mtd_info *mtd)
118 struct mtd_erase_region_info *region = &mtd->eraseregions[i]; 118 struct mtd_erase_region_info *region = &mtd->eraseregions[i];
119 119
120 if (region->numblocks * region->erasesize > mtd->size) { 120 if (region->numblocks * region->erasesize > mtd->size) {
121 region->numblocks = (mtd->size / region->erasesize); 121 region->numblocks = ((unsigned long)mtd->size /
122 region->erasesize);
122 done = 1; 123 done = 1;
123 } else { 124 } else {
124 region->numblocks = 0; 125 region->numblocks = 0;
@@ -187,8 +188,9 @@ scb2_flash_probe(struct pci_dev *dev, const struct pci_device_id *ent)
187 return -ENODEV; 188 return -ENODEV;
188 } 189 }
189 190
190 printk(KERN_NOTICE MODNAME ": chip size 0x%x at offset 0x%x\n", 191 printk(KERN_NOTICE MODNAME ": chip size 0x%llx at offset 0x%llx\n",
191 scb2_mtd->size, SCB2_WINDOW - scb2_mtd->size); 192 (unsigned long long)scb2_mtd->size,
193 (unsigned long long)(SCB2_WINDOW - scb2_mtd->size));
192 194
193 add_mtd_device(scb2_mtd); 195 add_mtd_device(scb2_mtd);
194 196
diff --git a/drivers/mtd/maps/sharpsl-flash.c b/drivers/mtd/maps/sharpsl-flash.c
index 026eab028189..b392f096c706 100644
--- a/drivers/mtd/maps/sharpsl-flash.c
+++ b/drivers/mtd/maps/sharpsl-flash.c
@@ -47,7 +47,7 @@ static struct mtd_partition sharpsl_partitions[1] = {
47 } 47 }
48}; 48};
49 49
50int __init init_sharpsl(void) 50static int __init init_sharpsl(void)
51{ 51{
52 struct mtd_partition *parts; 52 struct mtd_partition *parts;
53 int nb_parts = 0; 53 int nb_parts = 0;
diff --git a/drivers/mtd/maps/tqm8xxl.c b/drivers/mtd/maps/tqm8xxl.c
index a5d3d8531faa..60146984f4be 100644
--- a/drivers/mtd/maps/tqm8xxl.c
+++ b/drivers/mtd/maps/tqm8xxl.c
@@ -109,7 +109,7 @@ static struct mtd_partition tqm8xxl_fs_partitions[] = {
109}; 109};
110#endif 110#endif
111 111
112int __init init_tqm_mtd(void) 112static int __init init_tqm_mtd(void)
113{ 113{
114 int idx = 0, ret = 0; 114 int idx = 0, ret = 0;
115 unsigned long flash_addr, flash_size, mtd_size = 0; 115 unsigned long flash_addr, flash_size, mtd_size = 0;
diff --git a/drivers/mtd/maps/uclinux.c b/drivers/mtd/maps/uclinux.c
index 0dc645f8152f..81756e397711 100644
--- a/drivers/mtd/maps/uclinux.c
+++ b/drivers/mtd/maps/uclinux.c
@@ -51,7 +51,7 @@ int uclinux_point(struct mtd_info *mtd, loff_t from, size_t len,
51 51
52/****************************************************************************/ 52/****************************************************************************/
53 53
54int __init uclinux_mtd_init(void) 54static int __init uclinux_mtd_init(void)
55{ 55{
56 struct mtd_info *mtd; 56 struct mtd_info *mtd;
57 struct map_info *mapp; 57 struct map_info *mapp;
@@ -94,7 +94,7 @@ int __init uclinux_mtd_init(void)
94 94
95/****************************************************************************/ 95/****************************************************************************/
96 96
97void __exit uclinux_mtd_cleanup(void) 97static void __exit uclinux_mtd_cleanup(void)
98{ 98{
99 if (uclinux_ram_mtdinfo) { 99 if (uclinux_ram_mtdinfo) {
100 del_mtd_partitions(uclinux_ram_mtdinfo); 100 del_mtd_partitions(uclinux_ram_mtdinfo);
diff --git a/drivers/mtd/maps/vmax301.c b/drivers/mtd/maps/vmax301.c
index 5a0c9a353b0f..6d452dcdfe34 100644
--- a/drivers/mtd/maps/vmax301.c
+++ b/drivers/mtd/maps/vmax301.c
@@ -146,7 +146,7 @@ static void __exit cleanup_vmax301(void)
146 iounmap((void *)vmax_map[0].map_priv_1 - WINDOW_START); 146 iounmap((void *)vmax_map[0].map_priv_1 - WINDOW_START);
147} 147}
148 148
149int __init init_vmax301(void) 149static int __init init_vmax301(void)
150{ 150{
151 int i; 151 int i;
152 unsigned long iomapadr; 152 unsigned long iomapadr;
diff --git a/drivers/mtd/maps/wr_sbc82xx_flash.c b/drivers/mtd/maps/wr_sbc82xx_flash.c
index 413b0cf9bbd2..933a2b6598b4 100644
--- a/drivers/mtd/maps/wr_sbc82xx_flash.c
+++ b/drivers/mtd/maps/wr_sbc82xx_flash.c
@@ -74,7 +74,7 @@ do { \
74 } \ 74 } \
75} while (0); 75} while (0);
76 76
77int __init init_sbc82xx_flash(void) 77static int __init init_sbc82xx_flash(void)
78{ 78{
79 volatile memctl_cpm2_t *mc = &cpm2_immr->im_memctl; 79 volatile memctl_cpm2_t *mc = &cpm2_immr->im_memctl;
80 int bigflash; 80 int bigflash;
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index bcffeda2df3d..e9ec59e9a566 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -450,16 +450,20 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
450 if (!erase) 450 if (!erase)
451 ret = -ENOMEM; 451 ret = -ENOMEM;
452 else { 452 else {
453 struct erase_info_user einfo;
454
453 wait_queue_head_t waitq; 455 wait_queue_head_t waitq;
454 DECLARE_WAITQUEUE(wait, current); 456 DECLARE_WAITQUEUE(wait, current);
455 457
456 init_waitqueue_head(&waitq); 458 init_waitqueue_head(&waitq);
457 459
458 if (copy_from_user(&erase->addr, argp, 460 if (copy_from_user(&einfo, argp,
459 sizeof(struct erase_info_user))) { 461 sizeof(struct erase_info_user))) {
460 kfree(erase); 462 kfree(erase);
461 return -EFAULT; 463 return -EFAULT;
462 } 464 }
465 erase->addr = einfo.start;
466 erase->len = einfo.length;
463 erase->mtd = mtd; 467 erase->mtd = mtd;
464 erase->callback = mtdchar_erase_callback; 468 erase->callback = mtdchar_erase_callback;
465 erase->priv = (unsigned long)&waitq; 469 erase->priv = (unsigned long)&waitq;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 1a05cf37851e..3dbb1b38db66 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -197,7 +197,7 @@ concat_writev(struct mtd_info *mtd, const struct kvec *vecs,
197 continue; 197 continue;
198 } 198 }
199 199
200 size = min(total_len, (size_t)(subdev->size - to)); 200 size = min_t(uint64_t, total_len, subdev->size - to);
201 wsize = size; /* store for future use */ 201 wsize = size; /* store for future use */
202 202
203 entry_high = entry_low; 203 entry_high = entry_low;
@@ -385,7 +385,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
385 struct mtd_concat *concat = CONCAT(mtd); 385 struct mtd_concat *concat = CONCAT(mtd);
386 struct mtd_info *subdev; 386 struct mtd_info *subdev;
387 int i, err; 387 int i, err;
388 u_int32_t length, offset = 0; 388 uint64_t length, offset = 0;
389 struct erase_info *erase; 389 struct erase_info *erase;
390 390
391 if (!(mtd->flags & MTD_WRITEABLE)) 391 if (!(mtd->flags & MTD_WRITEABLE))
@@ -518,7 +518,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
518 return 0; 518 return 0;
519} 519}
520 520
521static int concat_lock(struct mtd_info *mtd, loff_t ofs, size_t len) 521static int concat_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
522{ 522{
523 struct mtd_concat *concat = CONCAT(mtd); 523 struct mtd_concat *concat = CONCAT(mtd);
524 int i, err = -EINVAL; 524 int i, err = -EINVAL;
@@ -528,7 +528,7 @@ static int concat_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
528 528
529 for (i = 0; i < concat->num_subdev; i++) { 529 for (i = 0; i < concat->num_subdev; i++) {
530 struct mtd_info *subdev = concat->subdev[i]; 530 struct mtd_info *subdev = concat->subdev[i];
531 size_t size; 531 uint64_t size;
532 532
533 if (ofs >= subdev->size) { 533 if (ofs >= subdev->size) {
534 size = 0; 534 size = 0;
@@ -556,7 +556,7 @@ static int concat_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
556 return err; 556 return err;
557} 557}
558 558
559static int concat_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) 559static int concat_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
560{ 560{
561 struct mtd_concat *concat = CONCAT(mtd); 561 struct mtd_concat *concat = CONCAT(mtd);
562 int i, err = 0; 562 int i, err = 0;
@@ -566,7 +566,7 @@ static int concat_unlock(struct mtd_info *mtd, loff_t ofs, size_t len)
566 566
567 for (i = 0; i < concat->num_subdev; i++) { 567 for (i = 0; i < concat->num_subdev; i++) {
568 struct mtd_info *subdev = concat->subdev[i]; 568 struct mtd_info *subdev = concat->subdev[i];
569 size_t size; 569 uint64_t size;
570 570
571 if (ofs >= subdev->size) { 571 if (ofs >= subdev->size) {
572 size = 0; 572 size = 0;
@@ -696,7 +696,7 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c
696 int i; 696 int i;
697 size_t size; 697 size_t size;
698 struct mtd_concat *concat; 698 struct mtd_concat *concat;
699 u_int32_t max_erasesize, curr_erasesize; 699 uint32_t max_erasesize, curr_erasesize;
700 int num_erase_region; 700 int num_erase_region;
701 701
702 printk(KERN_NOTICE "Concatenating MTD devices:\n"); 702 printk(KERN_NOTICE "Concatenating MTD devices:\n");
@@ -842,12 +842,14 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c
842 concat->mtd.erasesize = curr_erasesize; 842 concat->mtd.erasesize = curr_erasesize;
843 concat->mtd.numeraseregions = 0; 843 concat->mtd.numeraseregions = 0;
844 } else { 844 } else {
845 uint64_t tmp64;
846
845 /* 847 /*
846 * erase block size varies across the subdevices: allocate 848 * erase block size varies across the subdevices: allocate
847 * space to store the data describing the variable erase regions 849 * space to store the data describing the variable erase regions
848 */ 850 */
849 struct mtd_erase_region_info *erase_region_p; 851 struct mtd_erase_region_info *erase_region_p;
850 u_int32_t begin, position; 852 uint64_t begin, position;
851 853
852 concat->mtd.erasesize = max_erasesize; 854 concat->mtd.erasesize = max_erasesize;
853 concat->mtd.numeraseregions = num_erase_region; 855 concat->mtd.numeraseregions = num_erase_region;
@@ -879,8 +881,9 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c
879 erase_region_p->offset = begin; 881 erase_region_p->offset = begin;
880 erase_region_p->erasesize = 882 erase_region_p->erasesize =
881 curr_erasesize; 883 curr_erasesize;
882 erase_region_p->numblocks = 884 tmp64 = position - begin;
883 (position - begin) / curr_erasesize; 885 do_div(tmp64, curr_erasesize);
886 erase_region_p->numblocks = tmp64;
884 begin = position; 887 begin = position;
885 888
886 curr_erasesize = subdev[i]->erasesize; 889 curr_erasesize = subdev[i]->erasesize;
@@ -897,9 +900,9 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c
897 erase_region_p->offset = begin; 900 erase_region_p->offset = begin;
898 erase_region_p->erasesize = 901 erase_region_p->erasesize =
899 curr_erasesize; 902 curr_erasesize;
900 erase_region_p->numblocks = 903 tmp64 = position - begin;
901 (position - 904 do_div(tmp64, curr_erasesize);
902 begin) / curr_erasesize; 905 erase_region_p->numblocks = tmp64;
903 begin = position; 906 begin = position;
904 907
905 curr_erasesize = 908 curr_erasesize =
@@ -909,14 +912,16 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c
909 } 912 }
910 position += 913 position +=
911 subdev[i]->eraseregions[j]. 914 subdev[i]->eraseregions[j].
912 numblocks * curr_erasesize; 915 numblocks * (uint64_t)curr_erasesize;
913 } 916 }
914 } 917 }
915 } 918 }
916 /* Now write the final entry */ 919 /* Now write the final entry */
917 erase_region_p->offset = begin; 920 erase_region_p->offset = begin;
918 erase_region_p->erasesize = curr_erasesize; 921 erase_region_p->erasesize = curr_erasesize;
919 erase_region_p->numblocks = (position - begin) / curr_erasesize; 922 tmp64 = position - begin;
923 do_div(tmp64, curr_erasesize);
924 erase_region_p->numblocks = tmp64;
920 } 925 }
921 926
922 return &concat->mtd; 927 return &concat->mtd;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index a9d246949820..76fe0a1e7a5e 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -57,6 +57,19 @@ int add_mtd_device(struct mtd_info *mtd)
57 mtd->index = i; 57 mtd->index = i;
58 mtd->usecount = 0; 58 mtd->usecount = 0;
59 59
60 if (is_power_of_2(mtd->erasesize))
61 mtd->erasesize_shift = ffs(mtd->erasesize) - 1;
62 else
63 mtd->erasesize_shift = 0;
64
65 if (is_power_of_2(mtd->writesize))
66 mtd->writesize_shift = ffs(mtd->writesize) - 1;
67 else
68 mtd->writesize_shift = 0;
69
70 mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1;
71 mtd->writesize_mask = (1 << mtd->writesize_shift) - 1;
72
60 /* Some chips always power up locked. Unlock them now */ 73 /* Some chips always power up locked. Unlock them now */
61 if ((mtd->flags & MTD_WRITEABLE) 74 if ((mtd->flags & MTD_WRITEABLE)
62 && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) { 75 && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) {
@@ -344,7 +357,8 @@ static inline int mtd_proc_info (char *buf, int i)
344 if (!this) 357 if (!this)
345 return 0; 358 return 0;
346 359
347 return sprintf(buf, "mtd%d: %8.8x %8.8x \"%s\"\n", i, this->size, 360 return sprintf(buf, "mtd%d: %8.8llx %8.8x \"%s\"\n", i,
361 (unsigned long long)this->size,
348 this->erasesize, this->name); 362 this->erasesize, this->name);
349} 363}
350 364
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index aebb3b27edbd..1a6b3beabe8d 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -80,9 +80,9 @@ static int mtdoops_erase_block(struct mtd_info *mtd, int offset)
80 if (ret) { 80 if (ret) {
81 set_current_state(TASK_RUNNING); 81 set_current_state(TASK_RUNNING);
82 remove_wait_queue(&wait_q, &wait); 82 remove_wait_queue(&wait_q, &wait);
83 printk (KERN_WARNING "mtdoops: erase of region [0x%x, 0x%x] " 83 printk (KERN_WARNING "mtdoops: erase of region [0x%llx, 0x%llx] "
84 "on \"%s\" failed\n", 84 "on \"%s\" failed\n",
85 erase.addr, erase.len, mtd->name); 85 (unsigned long long)erase.addr, (unsigned long long)erase.len, mtd->name);
86 return ret; 86 return ret;
87 } 87 }
88 88
@@ -289,7 +289,10 @@ static void mtdoops_notify_add(struct mtd_info *mtd)
289 } 289 }
290 290
291 cxt->mtd = mtd; 291 cxt->mtd = mtd;
292 cxt->oops_pages = mtd->size / OOPS_PAGE_SIZE; 292 if (mtd->size > INT_MAX)
293 cxt->oops_pages = INT_MAX / OOPS_PAGE_SIZE;
294 else
295 cxt->oops_pages = (int)mtd->size / OOPS_PAGE_SIZE;
293 296
294 find_next_position(cxt); 297 find_next_position(cxt);
295 298
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 3728913fa5fa..144e6b613a77 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -26,7 +26,7 @@ static LIST_HEAD(mtd_partitions);
26struct mtd_part { 26struct mtd_part {
27 struct mtd_info mtd; 27 struct mtd_info mtd;
28 struct mtd_info *master; 28 struct mtd_info *master;
29 u_int32_t offset; 29 uint64_t offset;
30 int index; 30 int index;
31 struct list_head list; 31 struct list_head list;
32 int registered; 32 int registered;
@@ -235,7 +235,7 @@ void mtd_erase_callback(struct erase_info *instr)
235} 235}
236EXPORT_SYMBOL_GPL(mtd_erase_callback); 236EXPORT_SYMBOL_GPL(mtd_erase_callback);
237 237
238static int part_lock(struct mtd_info *mtd, loff_t ofs, size_t len) 238static int part_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
239{ 239{
240 struct mtd_part *part = PART(mtd); 240 struct mtd_part *part = PART(mtd);
241 if ((len + ofs) > mtd->size) 241 if ((len + ofs) > mtd->size)
@@ -243,7 +243,7 @@ static int part_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
243 return part->master->lock(part->master, ofs + part->offset, len); 243 return part->master->lock(part->master, ofs + part->offset, len);
244} 244}
245 245
246static int part_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) 246static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
247{ 247{
248 struct mtd_part *part = PART(mtd); 248 struct mtd_part *part = PART(mtd);
249 if ((len + ofs) > mtd->size) 249 if ((len + ofs) > mtd->size)
@@ -317,7 +317,7 @@ EXPORT_SYMBOL(del_mtd_partitions);
317 317
318static struct mtd_part *add_one_partition(struct mtd_info *master, 318static struct mtd_part *add_one_partition(struct mtd_info *master,
319 const struct mtd_partition *part, int partno, 319 const struct mtd_partition *part, int partno,
320 u_int32_t cur_offset) 320 uint64_t cur_offset)
321{ 321{
322 struct mtd_part *slave; 322 struct mtd_part *slave;
323 323
@@ -395,19 +395,19 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
395 slave->offset = cur_offset; 395 slave->offset = cur_offset;
396 if (slave->offset == MTDPART_OFS_NXTBLK) { 396 if (slave->offset == MTDPART_OFS_NXTBLK) {
397 slave->offset = cur_offset; 397 slave->offset = cur_offset;
398 if ((cur_offset % master->erasesize) != 0) { 398 if (mtd_mod_by_eb(cur_offset, master) != 0) {
399 /* Round up to next erasesize */ 399 /* Round up to next erasesize */
400 slave->offset = ((cur_offset / master->erasesize) + 1) * master->erasesize; 400 slave->offset = (mtd_div_by_eb(cur_offset, master) + 1) * master->erasesize;
401 printk(KERN_NOTICE "Moving partition %d: " 401 printk(KERN_NOTICE "Moving partition %d: "
402 "0x%08x -> 0x%08x\n", partno, 402 "0x%012llx -> 0x%012llx\n", partno,
403 cur_offset, slave->offset); 403 (unsigned long long)cur_offset, (unsigned long long)slave->offset);
404 } 404 }
405 } 405 }
406 if (slave->mtd.size == MTDPART_SIZ_FULL) 406 if (slave->mtd.size == MTDPART_SIZ_FULL)
407 slave->mtd.size = master->size - slave->offset; 407 slave->mtd.size = master->size - slave->offset;
408 408
409 printk(KERN_NOTICE "0x%08x-0x%08x : \"%s\"\n", slave->offset, 409 printk(KERN_NOTICE "0x%012llx-0x%012llx : \"%s\"\n", (unsigned long long)slave->offset,
410 slave->offset + slave->mtd.size, slave->mtd.name); 410 (unsigned long long)(slave->offset + slave->mtd.size), slave->mtd.name);
411 411
412 /* let's do some sanity checks */ 412 /* let's do some sanity checks */
413 if (slave->offset >= master->size) { 413 if (slave->offset >= master->size) {
@@ -420,13 +420,13 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
420 } 420 }
421 if (slave->offset + slave->mtd.size > master->size) { 421 if (slave->offset + slave->mtd.size > master->size) {
422 slave->mtd.size = master->size - slave->offset; 422 slave->mtd.size = master->size - slave->offset;
423 printk(KERN_WARNING"mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#x\n", 423 printk(KERN_WARNING"mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#llx\n",
424 part->name, master->name, slave->mtd.size); 424 part->name, master->name, (unsigned long long)slave->mtd.size);
425 } 425 }
426 if (master->numeraseregions > 1) { 426 if (master->numeraseregions > 1) {
427 /* Deal with variable erase size stuff */ 427 /* Deal with variable erase size stuff */
428 int i, max = master->numeraseregions; 428 int i, max = master->numeraseregions;
429 u32 end = slave->offset + slave->mtd.size; 429 u64 end = slave->offset + slave->mtd.size;
430 struct mtd_erase_region_info *regions = master->eraseregions; 430 struct mtd_erase_region_info *regions = master->eraseregions;
431 431
432 /* Find the first erase regions which is part of this 432 /* Find the first erase regions which is part of this
@@ -449,7 +449,7 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
449 } 449 }
450 450
451 if ((slave->mtd.flags & MTD_WRITEABLE) && 451 if ((slave->mtd.flags & MTD_WRITEABLE) &&
452 (slave->offset % slave->mtd.erasesize)) { 452 mtd_mod_by_eb(slave->offset, &slave->mtd)) {
453 /* Doesn't start on a boundary of major erase size */ 453 /* Doesn't start on a boundary of major erase size */
454 /* FIXME: Let it be writable if it is on a boundary of 454 /* FIXME: Let it be writable if it is on a boundary of
455 * _minor_ erase size though */ 455 * _minor_ erase size though */
@@ -458,7 +458,7 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
458 part->name); 458 part->name);
459 } 459 }
460 if ((slave->mtd.flags & MTD_WRITEABLE) && 460 if ((slave->mtd.flags & MTD_WRITEABLE) &&
461 (slave->mtd.size % slave->mtd.erasesize)) { 461 mtd_mod_by_eb(slave->mtd.size, &slave->mtd)) {
462 slave->mtd.flags &= ~MTD_WRITEABLE; 462 slave->mtd.flags &= ~MTD_WRITEABLE;
463 printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase block -- force read-only\n", 463 printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase block -- force read-only\n",
464 part->name); 464 part->name);
@@ -466,7 +466,7 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
466 466
467 slave->mtd.ecclayout = master->ecclayout; 467 slave->mtd.ecclayout = master->ecclayout;
468 if (master->block_isbad) { 468 if (master->block_isbad) {
469 uint32_t offs = 0; 469 uint64_t offs = 0;
470 470
471 while (offs < slave->mtd.size) { 471 while (offs < slave->mtd.size) {
472 if (master->block_isbad(master, 472 if (master->block_isbad(master,
@@ -501,7 +501,7 @@ int add_mtd_partitions(struct mtd_info *master,
501 int nbparts) 501 int nbparts)
502{ 502{
503 struct mtd_part *slave; 503 struct mtd_part *slave;
504 u_int32_t cur_offset = 0; 504 uint64_t cur_offset = 0;
505 int i; 505 int i;
506 506
507 printk(KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name); 507 printk(KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name);
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index f8ae0400c49c..8b12e6e109d3 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -163,6 +163,13 @@ config MTD_NAND_S3C2410_HWECC
163 incorrect ECC generation, and if using these, the default of 163 incorrect ECC generation, and if using these, the default of
164 software ECC is preferable. 164 software ECC is preferable.
165 165
166config MTD_NAND_NDFC
167 tristate "NDFC NanD Flash Controller"
168 depends on 4xx
169 select MTD_NAND_ECC_SMC
170 help
171 NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs
172
166config MTD_NAND_S3C2410_CLKSTOP 173config MTD_NAND_S3C2410_CLKSTOP
167 bool "S3C2410 NAND IDLE clock stop" 174 bool "S3C2410 NAND IDLE clock stop"
168 depends on MTD_NAND_S3C2410 175 depends on MTD_NAND_S3C2410
diff --git a/drivers/mtd/nand/alauda.c b/drivers/mtd/nand/alauda.c
index 962380394855..6d9649159a18 100644
--- a/drivers/mtd/nand/alauda.c
+++ b/drivers/mtd/nand/alauda.c
@@ -676,11 +676,11 @@ static int alauda_probe(struct usb_interface *interface,
676 goto error; 676 goto error;
677 677
678 al->write_out = usb_sndbulkpipe(al->dev, 678 al->write_out = usb_sndbulkpipe(al->dev,
679 ep_wr->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); 679 usb_endpoint_num(ep_wr));
680 al->bulk_in = usb_rcvbulkpipe(al->dev, 680 al->bulk_in = usb_rcvbulkpipe(al->dev,
681 ep_in->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); 681 usb_endpoint_num(ep_in));
682 al->bulk_out = usb_sndbulkpipe(al->dev, 682 al->bulk_out = usb_sndbulkpipe(al->dev,
683 ep_out->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); 683 usb_endpoint_num(ep_out));
684 684
685 /* second device is identical up to now */ 685 /* second device is identical up to now */
686 memcpy(al+1, al, sizeof(*al)); 686 memcpy(al+1, al, sizeof(*al));
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index b8064bf3aee4..22a6b2e50e91 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -90,7 +90,7 @@ static int timing[3];
90module_param_array(timing, int, &numtimings, 0644); 90module_param_array(timing, int, &numtimings, 0644);
91 91
92#ifdef CONFIG_MTD_PARTITIONS 92#ifdef CONFIG_MTD_PARTITIONS
93static const char *part_probes[] = { "RedBoot", NULL }; 93static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL };
94#endif 94#endif
95 95
96/* Hrm. Why isn't this already conditional on something in the struct device? */ 96/* Hrm. Why isn't this already conditional on something in the struct device? */
@@ -805,10 +805,13 @@ static int __devinit cafe_nand_probe(struct pci_dev *pdev,
805 add_mtd_device(mtd); 805 add_mtd_device(mtd);
806 806
807#ifdef CONFIG_MTD_PARTITIONS 807#ifdef CONFIG_MTD_PARTITIONS
808#ifdef CONFIG_MTD_CMDLINE_PARTS
809 mtd->name = "cafe_nand";
810#endif
808 nr_parts = parse_mtd_partitions(mtd, part_probes, &parts, 0); 811 nr_parts = parse_mtd_partitions(mtd, part_probes, &parts, 0);
809 if (nr_parts > 0) { 812 if (nr_parts > 0) {
810 cafe->parts = parts; 813 cafe->parts = parts;
811 dev_info(&cafe->pdev->dev, "%d RedBoot partitions found\n", nr_parts); 814 dev_info(&cafe->pdev->dev, "%d partitions found\n", nr_parts);
812 add_mtd_partitions(mtd, parts, nr_parts); 815 add_mtd_partitions(mtd, parts, nr_parts);
813 } 816 }
814#endif 817#endif
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 4aa5bd6158da..65929db29446 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -777,7 +777,9 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
777 /* Fill in fsl_elbc_mtd structure */ 777 /* Fill in fsl_elbc_mtd structure */
778 priv->mtd.priv = chip; 778 priv->mtd.priv = chip;
779 priv->mtd.owner = THIS_MODULE; 779 priv->mtd.owner = THIS_MODULE;
780 priv->fmr = 0; /* rest filled in later */ 780
781 /* Set the ECCM according to the settings in bootloader.*/
782 priv->fmr = in_be32(&lbc->fmr) & FMR_ECCM;
781 783
782 /* fill in nand_chip structure */ 784 /* fill in nand_chip structure */
783 /* set up function call table */ 785 /* set up function call table */
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 0a9c9cd33f96..0c3afccde8a2 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2014,13 +2014,14 @@ static int nand_erase(struct mtd_info *mtd, struct erase_info *instr)
2014int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, 2014int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
2015 int allowbbt) 2015 int allowbbt)
2016{ 2016{
2017 int page, len, status, pages_per_block, ret, chipnr; 2017 int page, status, pages_per_block, ret, chipnr;
2018 struct nand_chip *chip = mtd->priv; 2018 struct nand_chip *chip = mtd->priv;
2019 int rewrite_bbt[NAND_MAX_CHIPS]={0}; 2019 loff_t rewrite_bbt[NAND_MAX_CHIPS]={0};
2020 unsigned int bbt_masked_page = 0xffffffff; 2020 unsigned int bbt_masked_page = 0xffffffff;
2021 loff_t len;
2021 2022
2022 DEBUG(MTD_DEBUG_LEVEL3, "nand_erase: start = 0x%08x, len = %i\n", 2023 DEBUG(MTD_DEBUG_LEVEL3, "nand_erase: start = 0x%012llx, len = %llu\n",
2023 (unsigned int)instr->addr, (unsigned int)instr->len); 2024 (unsigned long long)instr->addr, (unsigned long long)instr->len);
2024 2025
2025 /* Start address must align on block boundary */ 2026 /* Start address must align on block boundary */
2026 if (instr->addr & ((1 << chip->phys_erase_shift) - 1)) { 2027 if (instr->addr & ((1 << chip->phys_erase_shift) - 1)) {
@@ -2116,7 +2117,8 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
2116 DEBUG(MTD_DEBUG_LEVEL0, "nand_erase: " 2117 DEBUG(MTD_DEBUG_LEVEL0, "nand_erase: "
2117 "Failed erase, page 0x%08x\n", page); 2118 "Failed erase, page 0x%08x\n", page);
2118 instr->state = MTD_ERASE_FAILED; 2119 instr->state = MTD_ERASE_FAILED;
2119 instr->fail_addr = (page << chip->page_shift); 2120 instr->fail_addr =
2121 ((loff_t)page << chip->page_shift);
2120 goto erase_exit; 2122 goto erase_exit;
2121 } 2123 }
2122 2124
@@ -2126,7 +2128,8 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
2126 */ 2128 */
2127 if (bbt_masked_page != 0xffffffff && 2129 if (bbt_masked_page != 0xffffffff &&
2128 (page & BBT_PAGE_MASK) == bbt_masked_page) 2130 (page & BBT_PAGE_MASK) == bbt_masked_page)
2129 rewrite_bbt[chipnr] = (page << chip->page_shift); 2131 rewrite_bbt[chipnr] =
2132 ((loff_t)page << chip->page_shift);
2130 2133
2131 /* Increment page address and decrement length */ 2134 /* Increment page address and decrement length */
2132 len -= (1 << chip->phys_erase_shift); 2135 len -= (1 << chip->phys_erase_shift);
@@ -2173,7 +2176,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
2173 continue; 2176 continue;
2174 /* update the BBT for chip */ 2177 /* update the BBT for chip */
2175 DEBUG(MTD_DEBUG_LEVEL0, "nand_erase_nand: nand_update_bbt " 2178 DEBUG(MTD_DEBUG_LEVEL0, "nand_erase_nand: nand_update_bbt "
2176 "(%d:0x%0x 0x%0x)\n", chipnr, rewrite_bbt[chipnr], 2179 "(%d:0x%0llx 0x%0x)\n", chipnr, rewrite_bbt[chipnr],
2177 chip->bbt_td->pages[chipnr]); 2180 chip->bbt_td->pages[chipnr]);
2178 nand_update_bbt(mtd, rewrite_bbt[chipnr]); 2181 nand_update_bbt(mtd, rewrite_bbt[chipnr]);
2179 } 2182 }
@@ -2365,7 +2368,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
2365 if (!mtd->name) 2368 if (!mtd->name)
2366 mtd->name = type->name; 2369 mtd->name = type->name;
2367 2370
2368 chip->chipsize = type->chipsize << 20; 2371 chip->chipsize = (uint64_t)type->chipsize << 20;
2369 2372
2370 /* Newer devices have all the information in additional id bytes */ 2373 /* Newer devices have all the information in additional id bytes */
2371 if (!type->pagesize) { 2374 if (!type->pagesize) {
@@ -2423,7 +2426,10 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
2423 2426
2424 chip->bbt_erase_shift = chip->phys_erase_shift = 2427 chip->bbt_erase_shift = chip->phys_erase_shift =
2425 ffs(mtd->erasesize) - 1; 2428 ffs(mtd->erasesize) - 1;
2426 chip->chip_shift = ffs(chip->chipsize) - 1; 2429 if (chip->chipsize & 0xffffffff)
2430 chip->chip_shift = ffs((unsigned)chip->chipsize) - 1;
2431 else
2432 chip->chip_shift = ffs((unsigned)(chip->chipsize >> 32)) + 32 - 1;
2427 2433
2428 /* Set the bad block position */ 2434 /* Set the bad block position */
2429 chip->badblockpos = mtd->writesize > 512 ? 2435 chip->badblockpos = mtd->writesize > 512 ?
@@ -2517,7 +2523,6 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
2517/** 2523/**
2518 * nand_scan_tail - [NAND Interface] Scan for the NAND device 2524 * nand_scan_tail - [NAND Interface] Scan for the NAND device
2519 * @mtd: MTD device structure 2525 * @mtd: MTD device structure
2520 * @maxchips: Number of chips to scan for
2521 * 2526 *
2522 * This is the second phase of the normal nand_scan() function. It 2527 * This is the second phase of the normal nand_scan() function. It
2523 * fills out all the uninitialized function pointers with the defaults 2528 * fills out all the uninitialized function pointers with the defaults
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 0b1c48595f12..55c23e5cd210 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -171,16 +171,16 @@ static int read_bbt(struct mtd_info *mtd, uint8_t *buf, int page, int num,
171 if (tmp == msk) 171 if (tmp == msk)
172 continue; 172 continue;
173 if (reserved_block_code && (tmp == reserved_block_code)) { 173 if (reserved_block_code && (tmp == reserved_block_code)) {
174 printk(KERN_DEBUG "nand_read_bbt: Reserved block at 0x%08x\n", 174 printk(KERN_DEBUG "nand_read_bbt: Reserved block at 0x%012llx\n",
175 ((offs << 2) + (act >> 1)) << this->bbt_erase_shift); 175 (loff_t)((offs << 2) + (act >> 1)) << this->bbt_erase_shift);
176 this->bbt[offs + (act >> 3)] |= 0x2 << (act & 0x06); 176 this->bbt[offs + (act >> 3)] |= 0x2 << (act & 0x06);
177 mtd->ecc_stats.bbtblocks++; 177 mtd->ecc_stats.bbtblocks++;
178 continue; 178 continue;
179 } 179 }
180 /* Leave it for now, if its matured we can move this 180 /* Leave it for now, if its matured we can move this
181 * message to MTD_DEBUG_LEVEL0 */ 181 * message to MTD_DEBUG_LEVEL0 */
182 printk(KERN_DEBUG "nand_read_bbt: Bad block at 0x%08x\n", 182 printk(KERN_DEBUG "nand_read_bbt: Bad block at 0x%012llx\n",
183 ((offs << 2) + (act >> 1)) << this->bbt_erase_shift); 183 (loff_t)((offs << 2) + (act >> 1)) << this->bbt_erase_shift);
184 /* Factory marked bad or worn out ? */ 184 /* Factory marked bad or worn out ? */
185 if (tmp == 0) 185 if (tmp == 0)
186 this->bbt[offs + (act >> 3)] |= 0x3 << (act & 0x06); 186 this->bbt[offs + (act >> 3)] |= 0x3 << (act & 0x06);
@@ -284,7 +284,7 @@ static int read_abs_bbts(struct mtd_info *mtd, uint8_t *buf,
284 284
285 /* Read the primary version, if available */ 285 /* Read the primary version, if available */
286 if (td->options & NAND_BBT_VERSION) { 286 if (td->options & NAND_BBT_VERSION) {
287 scan_read_raw(mtd, buf, td->pages[0] << this->page_shift, 287 scan_read_raw(mtd, buf, (loff_t)td->pages[0] << this->page_shift,
288 mtd->writesize); 288 mtd->writesize);
289 td->version[0] = buf[mtd->writesize + td->veroffs]; 289 td->version[0] = buf[mtd->writesize + td->veroffs];
290 printk(KERN_DEBUG "Bad block table at page %d, version 0x%02X\n", 290 printk(KERN_DEBUG "Bad block table at page %d, version 0x%02X\n",
@@ -293,7 +293,7 @@ static int read_abs_bbts(struct mtd_info *mtd, uint8_t *buf,
293 293
294 /* Read the mirror version, if available */ 294 /* Read the mirror version, if available */
295 if (md && (md->options & NAND_BBT_VERSION)) { 295 if (md && (md->options & NAND_BBT_VERSION)) {
296 scan_read_raw(mtd, buf, md->pages[0] << this->page_shift, 296 scan_read_raw(mtd, buf, (loff_t)md->pages[0] << this->page_shift,
297 mtd->writesize); 297 mtd->writesize);
298 md->version[0] = buf[mtd->writesize + md->veroffs]; 298 md->version[0] = buf[mtd->writesize + md->veroffs];
299 printk(KERN_DEBUG "Bad block table at page %d, version 0x%02X\n", 299 printk(KERN_DEBUG "Bad block table at page %d, version 0x%02X\n",
@@ -411,7 +411,7 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf,
411 numblocks = this->chipsize >> (this->bbt_erase_shift - 1); 411 numblocks = this->chipsize >> (this->bbt_erase_shift - 1);
412 startblock = chip * numblocks; 412 startblock = chip * numblocks;
413 numblocks += startblock; 413 numblocks += startblock;
414 from = startblock << (this->bbt_erase_shift - 1); 414 from = (loff_t)startblock << (this->bbt_erase_shift - 1);
415 } 415 }
416 416
417 for (i = startblock; i < numblocks;) { 417 for (i = startblock; i < numblocks;) {
@@ -428,8 +428,8 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf,
428 428
429 if (ret) { 429 if (ret) {
430 this->bbt[i >> 3] |= 0x03 << (i & 0x6); 430 this->bbt[i >> 3] |= 0x03 << (i & 0x6);
431 printk(KERN_WARNING "Bad eraseblock %d at 0x%08x\n", 431 printk(KERN_WARNING "Bad eraseblock %d at 0x%012llx\n",
432 i >> 1, (unsigned int)from); 432 i >> 1, (unsigned long long)from);
433 mtd->ecc_stats.badblocks++; 433 mtd->ecc_stats.badblocks++;
434 } 434 }
435 435
@@ -495,7 +495,7 @@ static int search_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr
495 for (block = 0; block < td->maxblocks; block++) { 495 for (block = 0; block < td->maxblocks; block++) {
496 496
497 int actblock = startblock + dir * block; 497 int actblock = startblock + dir * block;
498 loff_t offs = actblock << this->bbt_erase_shift; 498 loff_t offs = (loff_t)actblock << this->bbt_erase_shift;
499 499
500 /* Read first page */ 500 /* Read first page */
501 scan_read_raw(mtd, buf, offs, mtd->writesize); 501 scan_read_raw(mtd, buf, offs, mtd->writesize);
@@ -719,7 +719,7 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
719 719
720 memset(&einfo, 0, sizeof(einfo)); 720 memset(&einfo, 0, sizeof(einfo));
721 einfo.mtd = mtd; 721 einfo.mtd = mtd;
722 einfo.addr = (unsigned long)to; 722 einfo.addr = to;
723 einfo.len = 1 << this->bbt_erase_shift; 723 einfo.len = 1 << this->bbt_erase_shift;
724 res = nand_erase_nand(mtd, &einfo, 1); 724 res = nand_erase_nand(mtd, &einfo, 1);
725 if (res < 0) 725 if (res < 0)
@@ -729,8 +729,8 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
729 if (res < 0) 729 if (res < 0)
730 goto outerr; 730 goto outerr;
731 731
732 printk(KERN_DEBUG "Bad block table written to 0x%08x, version " 732 printk(KERN_DEBUG "Bad block table written to 0x%012llx, version "
733 "0x%02X\n", (unsigned int)to, td->version[chip]); 733 "0x%02X\n", (unsigned long long)to, td->version[chip]);
734 734
735 /* Mark it as used */ 735 /* Mark it as used */
736 td->pages[chip] = page; 736 td->pages[chip] = page;
@@ -910,7 +910,7 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td)
910 newval = oldval | (0x2 << (block & 0x06)); 910 newval = oldval | (0x2 << (block & 0x06));
911 this->bbt[(block >> 3)] = newval; 911 this->bbt[(block >> 3)] = newval;
912 if ((oldval != newval) && td->reserved_block_code) 912 if ((oldval != newval) && td->reserved_block_code)
913 nand_update_bbt(mtd, block << (this->bbt_erase_shift - 1)); 913 nand_update_bbt(mtd, (loff_t)block << (this->bbt_erase_shift - 1));
914 continue; 914 continue;
915 } 915 }
916 update = 0; 916 update = 0;
@@ -931,7 +931,7 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td)
931 new ones have been marked, then we need to update the stored 931 new ones have been marked, then we need to update the stored
932 bbts. This should only happen once. */ 932 bbts. This should only happen once. */
933 if (update && td->reserved_block_code) 933 if (update && td->reserved_block_code)
934 nand_update_bbt(mtd, (block - 2) << (this->bbt_erase_shift - 1)); 934 nand_update_bbt(mtd, (loff_t)(block - 2) << (this->bbt_erase_shift - 1));
935 } 935 }
936} 936}
937 937
@@ -1027,7 +1027,6 @@ int nand_update_bbt(struct mtd_info *mtd, loff_t offs)
1027 if (!this->bbt || !td) 1027 if (!this->bbt || !td)
1028 return -EINVAL; 1028 return -EINVAL;
1029 1029
1030 len = mtd->size >> (this->bbt_erase_shift + 2);
1031 /* Allocate a temporary buffer for one eraseblock incl. oob */ 1030 /* Allocate a temporary buffer for one eraseblock incl. oob */
1032 len = (1 << this->bbt_erase_shift); 1031 len = (1 << this->bbt_erase_shift);
1033 len += (len >> this->page_shift) * mtd->oobsize; 1032 len += (len >> this->page_shift) * mtd->oobsize;
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index ae7c57781a68..cd0711b83ac4 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -38,6 +38,9 @@
38#include <linux/delay.h> 38#include <linux/delay.h>
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/random.h> 40#include <linux/random.h>
41#include <linux/sched.h>
42#include <linux/fs.h>
43#include <linux/pagemap.h>
41 44
42/* Default simulator parameters values */ 45/* Default simulator parameters values */
43#if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE) || \ 46#if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE) || \
@@ -100,6 +103,7 @@ static unsigned int bitflips = 0;
100static char *gravepages = NULL; 103static char *gravepages = NULL;
101static unsigned int rptwear = 0; 104static unsigned int rptwear = 0;
102static unsigned int overridesize = 0; 105static unsigned int overridesize = 0;
106static char *cache_file = NULL;
103 107
104module_param(first_id_byte, uint, 0400); 108module_param(first_id_byte, uint, 0400);
105module_param(second_id_byte, uint, 0400); 109module_param(second_id_byte, uint, 0400);
@@ -122,12 +126,13 @@ module_param(bitflips, uint, 0400);
122module_param(gravepages, charp, 0400); 126module_param(gravepages, charp, 0400);
123module_param(rptwear, uint, 0400); 127module_param(rptwear, uint, 0400);
124module_param(overridesize, uint, 0400); 128module_param(overridesize, uint, 0400);
129module_param(cache_file, charp, 0400);
125 130
126MODULE_PARM_DESC(first_id_byte, "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)"); 131MODULE_PARM_DESC(first_id_byte, "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)");
127MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)"); 132MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)");
128MODULE_PARM_DESC(third_id_byte, "The third byte returned by NAND Flash 'read ID' command"); 133MODULE_PARM_DESC(third_id_byte, "The third byte returned by NAND Flash 'read ID' command");
129MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command"); 134MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command");
130MODULE_PARM_DESC(access_delay, "Initial page access delay (microiseconds)"); 135MODULE_PARM_DESC(access_delay, "Initial page access delay (microseconds)");
131MODULE_PARM_DESC(programm_delay, "Page programm delay (microseconds"); 136MODULE_PARM_DESC(programm_delay, "Page programm delay (microseconds");
132MODULE_PARM_DESC(erase_delay, "Sector erase delay (milliseconds)"); 137MODULE_PARM_DESC(erase_delay, "Sector erase delay (milliseconds)");
133MODULE_PARM_DESC(output_cycle, "Word output (from flash) time (nanodeconds)"); 138MODULE_PARM_DESC(output_cycle, "Word output (from flash) time (nanodeconds)");
@@ -153,6 +158,7 @@ MODULE_PARM_DESC(rptwear, "Number of erases inbetween reporting wear, if
153MODULE_PARM_DESC(overridesize, "Specifies the NAND Flash size overriding the ID bytes. " 158MODULE_PARM_DESC(overridesize, "Specifies the NAND Flash size overriding the ID bytes. "
154 "The size is specified in erase blocks and as the exponent of a power of two" 159 "The size is specified in erase blocks and as the exponent of a power of two"
155 " e.g. 5 means a size of 32 erase blocks"); 160 " e.g. 5 means a size of 32 erase blocks");
161MODULE_PARM_DESC(cache_file, "File to use to cache nand pages instead of memory");
156 162
157/* The largest possible page size */ 163/* The largest possible page size */
158#define NS_LARGEST_PAGE_SIZE 2048 164#define NS_LARGEST_PAGE_SIZE 2048
@@ -266,6 +272,9 @@ MODULE_PARM_DESC(overridesize, "Specifies the NAND Flash size overriding the I
266 */ 272 */
267#define NS_MAX_PREVSTATES 1 273#define NS_MAX_PREVSTATES 1
268 274
275/* Maximum page cache pages needed to read or write a NAND page to the cache_file */
276#define NS_MAX_HELD_PAGES 16
277
269/* 278/*
270 * A union to represent flash memory contents and flash buffer. 279 * A union to represent flash memory contents and flash buffer.
271 */ 280 */
@@ -295,6 +304,9 @@ struct nandsim {
295 /* The simulated NAND flash pages array */ 304 /* The simulated NAND flash pages array */
296 union ns_mem *pages; 305 union ns_mem *pages;
297 306
307 /* Slab allocator for nand pages */
308 struct kmem_cache *nand_pages_slab;
309
298 /* Internal buffer of page + OOB size bytes */ 310 /* Internal buffer of page + OOB size bytes */
299 union ns_mem buf; 311 union ns_mem buf;
300 312
@@ -335,6 +347,13 @@ struct nandsim {
335 int ale; /* address Latch Enable */ 347 int ale; /* address Latch Enable */
336 int wp; /* write Protect */ 348 int wp; /* write Protect */
337 } lines; 349 } lines;
350
351 /* Fields needed when using a cache file */
352 struct file *cfile; /* Open file */
353 unsigned char *pages_written; /* Which pages have been written */
354 void *file_buf;
355 struct page *held_pages[NS_MAX_HELD_PAGES];
356 int held_cnt;
338}; 357};
339 358
340/* 359/*
@@ -420,25 +439,69 @@ static struct mtd_info *nsmtd;
420static u_char ns_verify_buf[NS_LARGEST_PAGE_SIZE]; 439static u_char ns_verify_buf[NS_LARGEST_PAGE_SIZE];
421 440
422/* 441/*
423 * Allocate array of page pointers and initialize the array to NULL 442 * Allocate array of page pointers, create slab allocation for an array
424 * pointers. 443 * and initialize the array by NULL pointers.
425 * 444 *
426 * RETURNS: 0 if success, -ENOMEM if memory alloc fails. 445 * RETURNS: 0 if success, -ENOMEM if memory alloc fails.
427 */ 446 */
428static int alloc_device(struct nandsim *ns) 447static int alloc_device(struct nandsim *ns)
429{ 448{
430 int i; 449 struct file *cfile;
450 int i, err;
451
452 if (cache_file) {
453 cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600);
454 if (IS_ERR(cfile))
455 return PTR_ERR(cfile);
456 if (!cfile->f_op || (!cfile->f_op->read && !cfile->f_op->aio_read)) {
457 NS_ERR("alloc_device: cache file not readable\n");
458 err = -EINVAL;
459 goto err_close;
460 }
461 if (!cfile->f_op->write && !cfile->f_op->aio_write) {
462 NS_ERR("alloc_device: cache file not writeable\n");
463 err = -EINVAL;
464 goto err_close;
465 }
466 ns->pages_written = vmalloc(ns->geom.pgnum);
467 if (!ns->pages_written) {
468 NS_ERR("alloc_device: unable to allocate pages written array\n");
469 err = -ENOMEM;
470 goto err_close;
471 }
472 ns->file_buf = kmalloc(ns->geom.pgszoob, GFP_KERNEL);
473 if (!ns->file_buf) {
474 NS_ERR("alloc_device: unable to allocate file buf\n");
475 err = -ENOMEM;
476 goto err_free;
477 }
478 ns->cfile = cfile;
479 memset(ns->pages_written, 0, ns->geom.pgnum);
480 return 0;
481 }
431 482
432 ns->pages = vmalloc(ns->geom.pgnum * sizeof(union ns_mem)); 483 ns->pages = vmalloc(ns->geom.pgnum * sizeof(union ns_mem));
433 if (!ns->pages) { 484 if (!ns->pages) {
434 NS_ERR("alloc_map: unable to allocate page array\n"); 485 NS_ERR("alloc_device: unable to allocate page array\n");
435 return -ENOMEM; 486 return -ENOMEM;
436 } 487 }
437 for (i = 0; i < ns->geom.pgnum; i++) { 488 for (i = 0; i < ns->geom.pgnum; i++) {
438 ns->pages[i].byte = NULL; 489 ns->pages[i].byte = NULL;
439 } 490 }
491 ns->nand_pages_slab = kmem_cache_create("nandsim",
492 ns->geom.pgszoob, 0, 0, NULL);
493 if (!ns->nand_pages_slab) {
494 NS_ERR("cache_create: unable to create kmem_cache\n");
495 return -ENOMEM;
496 }
440 497
441 return 0; 498 return 0;
499
500err_free:
501 vfree(ns->pages_written);
502err_close:
503 filp_close(cfile, NULL);
504 return err;
442} 505}
443 506
444/* 507/*
@@ -448,11 +511,20 @@ static void free_device(struct nandsim *ns)
448{ 511{
449 int i; 512 int i;
450 513
514 if (ns->cfile) {
515 kfree(ns->file_buf);
516 vfree(ns->pages_written);
517 filp_close(ns->cfile, NULL);
518 return;
519 }
520
451 if (ns->pages) { 521 if (ns->pages) {
452 for (i = 0; i < ns->geom.pgnum; i++) { 522 for (i = 0; i < ns->geom.pgnum; i++) {
453 if (ns->pages[i].byte) 523 if (ns->pages[i].byte)
454 kfree(ns->pages[i].byte); 524 kmem_cache_free(ns->nand_pages_slab,
525 ns->pages[i].byte);
455 } 526 }
527 kmem_cache_destroy(ns->nand_pages_slab);
456 vfree(ns->pages); 528 vfree(ns->pages);
457 } 529 }
458} 530}
@@ -464,7 +536,7 @@ static char *get_partition_name(int i)
464 return kstrdup(buf, GFP_KERNEL); 536 return kstrdup(buf, GFP_KERNEL);
465} 537}
466 538
467static u_int64_t divide(u_int64_t n, u_int32_t d) 539static uint64_t divide(uint64_t n, uint32_t d)
468{ 540{
469 do_div(n, d); 541 do_div(n, d);
470 return n; 542 return n;
@@ -480,8 +552,8 @@ static int init_nandsim(struct mtd_info *mtd)
480 struct nand_chip *chip = (struct nand_chip *)mtd->priv; 552 struct nand_chip *chip = (struct nand_chip *)mtd->priv;
481 struct nandsim *ns = (struct nandsim *)(chip->priv); 553 struct nandsim *ns = (struct nandsim *)(chip->priv);
482 int i, ret = 0; 554 int i, ret = 0;
483 u_int64_t remains; 555 uint64_t remains;
484 u_int64_t next_offset; 556 uint64_t next_offset;
485 557
486 if (NS_IS_INITIALIZED(ns)) { 558 if (NS_IS_INITIALIZED(ns)) {
487 NS_ERR("init_nandsim: nandsim is already initialized\n"); 559 NS_ERR("init_nandsim: nandsim is already initialized\n");
@@ -548,7 +620,7 @@ static int init_nandsim(struct mtd_info *mtd)
548 remains = ns->geom.totsz; 620 remains = ns->geom.totsz;
549 next_offset = 0; 621 next_offset = 0;
550 for (i = 0; i < parts_num; ++i) { 622 for (i = 0; i < parts_num; ++i) {
551 u_int64_t part_sz = (u_int64_t)parts[i] * ns->geom.secsz; 623 uint64_t part_sz = (uint64_t)parts[i] * ns->geom.secsz;
552 624
553 if (!part_sz || part_sz > remains) { 625 if (!part_sz || part_sz > remains) {
554 NS_ERR("bad partition size.\n"); 626 NS_ERR("bad partition size.\n");
@@ -1211,6 +1283,97 @@ static int find_operation(struct nandsim *ns, uint32_t flag)
1211 return -1; 1283 return -1;
1212} 1284}
1213 1285
1286static void put_pages(struct nandsim *ns)
1287{
1288 int i;
1289
1290 for (i = 0; i < ns->held_cnt; i++)
1291 page_cache_release(ns->held_pages[i]);
1292}
1293
1294/* Get page cache pages in advance to provide NOFS memory allocation */
1295static int get_pages(struct nandsim *ns, struct file *file, size_t count, loff_t pos)
1296{
1297 pgoff_t index, start_index, end_index;
1298 struct page *page;
1299 struct address_space *mapping = file->f_mapping;
1300
1301 start_index = pos >> PAGE_CACHE_SHIFT;
1302 end_index = (pos + count - 1) >> PAGE_CACHE_SHIFT;
1303 if (end_index - start_index + 1 > NS_MAX_HELD_PAGES)
1304 return -EINVAL;
1305 ns->held_cnt = 0;
1306 for (index = start_index; index <= end_index; index++) {
1307 page = find_get_page(mapping, index);
1308 if (page == NULL) {
1309 page = find_or_create_page(mapping, index, GFP_NOFS);
1310 if (page == NULL) {
1311 write_inode_now(mapping->host, 1);
1312 page = find_or_create_page(mapping, index, GFP_NOFS);
1313 }
1314 if (page == NULL) {
1315 put_pages(ns);
1316 return -ENOMEM;
1317 }
1318 unlock_page(page);
1319 }
1320 ns->held_pages[ns->held_cnt++] = page;
1321 }
1322 return 0;
1323}
1324
1325static int set_memalloc(void)
1326{
1327 if (current->flags & PF_MEMALLOC)
1328 return 0;
1329 current->flags |= PF_MEMALLOC;
1330 return 1;
1331}
1332
1333static void clear_memalloc(int memalloc)
1334{
1335 if (memalloc)
1336 current->flags &= ~PF_MEMALLOC;
1337}
1338
1339static ssize_t read_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t *pos)
1340{
1341 mm_segment_t old_fs;
1342 ssize_t tx;
1343 int err, memalloc;
1344
1345 err = get_pages(ns, file, count, *pos);
1346 if (err)
1347 return err;
1348 old_fs = get_fs();
1349 set_fs(get_ds());
1350 memalloc = set_memalloc();
1351 tx = vfs_read(file, (char __user *)buf, count, pos);
1352 clear_memalloc(memalloc);
1353 set_fs(old_fs);
1354 put_pages(ns);
1355 return tx;
1356}
1357
1358static ssize_t write_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t *pos)
1359{
1360 mm_segment_t old_fs;
1361 ssize_t tx;
1362 int err, memalloc;
1363
1364 err = get_pages(ns, file, count, *pos);
1365 if (err)
1366 return err;
1367 old_fs = get_fs();
1368 set_fs(get_ds());
1369 memalloc = set_memalloc();
1370 tx = vfs_write(file, (char __user *)buf, count, pos);
1371 clear_memalloc(memalloc);
1372 set_fs(old_fs);
1373 put_pages(ns);
1374 return tx;
1375}
1376
1214/* 1377/*
1215 * Returns a pointer to the current page. 1378 * Returns a pointer to the current page.
1216 */ 1379 */
@@ -1227,6 +1390,38 @@ static inline u_char *NS_PAGE_BYTE_OFF(struct nandsim *ns)
1227 return NS_GET_PAGE(ns)->byte + ns->regs.column + ns->regs.off; 1390 return NS_GET_PAGE(ns)->byte + ns->regs.column + ns->regs.off;
1228} 1391}
1229 1392
1393int do_read_error(struct nandsim *ns, int num)
1394{
1395 unsigned int page_no = ns->regs.row;
1396
1397 if (read_error(page_no)) {
1398 int i;
1399 memset(ns->buf.byte, 0xFF, num);
1400 for (i = 0; i < num; ++i)
1401 ns->buf.byte[i] = random32();
1402 NS_WARN("simulating read error in page %u\n", page_no);
1403 return 1;
1404 }
1405 return 0;
1406}
1407
1408void do_bit_flips(struct nandsim *ns, int num)
1409{
1410 if (bitflips && random32() < (1 << 22)) {
1411 int flips = 1;
1412 if (bitflips > 1)
1413 flips = (random32() % (int) bitflips) + 1;
1414 while (flips--) {
1415 int pos = random32() % (num * 8);
1416 ns->buf.byte[pos / 8] ^= (1 << (pos % 8));
1417 NS_WARN("read_page: flipping bit %d in page %d "
1418 "reading from %d ecc: corrected=%u failed=%u\n",
1419 pos, ns->regs.row, ns->regs.column + ns->regs.off,
1420 nsmtd->ecc_stats.corrected, nsmtd->ecc_stats.failed);
1421 }
1422 }
1423}
1424
1230/* 1425/*
1231 * Fill the NAND buffer with data read from the specified page. 1426 * Fill the NAND buffer with data read from the specified page.
1232 */ 1427 */
@@ -1234,36 +1429,40 @@ static void read_page(struct nandsim *ns, int num)
1234{ 1429{
1235 union ns_mem *mypage; 1430 union ns_mem *mypage;
1236 1431
1432 if (ns->cfile) {
1433 if (!ns->pages_written[ns->regs.row]) {
1434 NS_DBG("read_page: page %d not written\n", ns->regs.row);
1435 memset(ns->buf.byte, 0xFF, num);
1436 } else {
1437 loff_t pos;
1438 ssize_t tx;
1439
1440 NS_DBG("read_page: page %d written, reading from %d\n",
1441 ns->regs.row, ns->regs.column + ns->regs.off);
1442 if (do_read_error(ns, num))
1443 return;
1444 pos = (loff_t)ns->regs.row * ns->geom.pgszoob + ns->regs.column + ns->regs.off;
1445 tx = read_file(ns, ns->cfile, ns->buf.byte, num, &pos);
1446 if (tx != num) {
1447 NS_ERR("read_page: read error for page %d ret %ld\n", ns->regs.row, (long)tx);
1448 return;
1449 }
1450 do_bit_flips(ns, num);
1451 }
1452 return;
1453 }
1454
1237 mypage = NS_GET_PAGE(ns); 1455 mypage = NS_GET_PAGE(ns);
1238 if (mypage->byte == NULL) { 1456 if (mypage->byte == NULL) {
1239 NS_DBG("read_page: page %d not allocated\n", ns->regs.row); 1457 NS_DBG("read_page: page %d not allocated\n", ns->regs.row);
1240 memset(ns->buf.byte, 0xFF, num); 1458 memset(ns->buf.byte, 0xFF, num);
1241 } else { 1459 } else {
1242 unsigned int page_no = ns->regs.row;
1243 NS_DBG("read_page: page %d allocated, reading from %d\n", 1460 NS_DBG("read_page: page %d allocated, reading from %d\n",
1244 ns->regs.row, ns->regs.column + ns->regs.off); 1461 ns->regs.row, ns->regs.column + ns->regs.off);
1245 if (read_error(page_no)) { 1462 if (do_read_error(ns, num))
1246 int i;
1247 memset(ns->buf.byte, 0xFF, num);
1248 for (i = 0; i < num; ++i)
1249 ns->buf.byte[i] = random32();
1250 NS_WARN("simulating read error in page %u\n", page_no);
1251 return; 1463 return;
1252 }
1253 memcpy(ns->buf.byte, NS_PAGE_BYTE_OFF(ns), num); 1464 memcpy(ns->buf.byte, NS_PAGE_BYTE_OFF(ns), num);
1254 if (bitflips && random32() < (1 << 22)) { 1465 do_bit_flips(ns, num);
1255 int flips = 1;
1256 if (bitflips > 1)
1257 flips = (random32() % (int) bitflips) + 1;
1258 while (flips--) {
1259 int pos = random32() % (num * 8);
1260 ns->buf.byte[pos / 8] ^= (1 << (pos % 8));
1261 NS_WARN("read_page: flipping bit %d in page %d "
1262 "reading from %d ecc: corrected=%u failed=%u\n",
1263 pos, ns->regs.row, ns->regs.column + ns->regs.off,
1264 nsmtd->ecc_stats.corrected, nsmtd->ecc_stats.failed);
1265 }
1266 }
1267 } 1466 }
1268} 1467}
1269 1468
@@ -1275,11 +1474,20 @@ static void erase_sector(struct nandsim *ns)
1275 union ns_mem *mypage; 1474 union ns_mem *mypage;
1276 int i; 1475 int i;
1277 1476
1477 if (ns->cfile) {
1478 for (i = 0; i < ns->geom.pgsec; i++)
1479 if (ns->pages_written[ns->regs.row + i]) {
1480 NS_DBG("erase_sector: freeing page %d\n", ns->regs.row + i);
1481 ns->pages_written[ns->regs.row + i] = 0;
1482 }
1483 return;
1484 }
1485
1278 mypage = NS_GET_PAGE(ns); 1486 mypage = NS_GET_PAGE(ns);
1279 for (i = 0; i < ns->geom.pgsec; i++) { 1487 for (i = 0; i < ns->geom.pgsec; i++) {
1280 if (mypage->byte != NULL) { 1488 if (mypage->byte != NULL) {
1281 NS_DBG("erase_sector: freeing page %d\n", ns->regs.row+i); 1489 NS_DBG("erase_sector: freeing page %d\n", ns->regs.row+i);
1282 kfree(mypage->byte); 1490 kmem_cache_free(ns->nand_pages_slab, mypage->byte);
1283 mypage->byte = NULL; 1491 mypage->byte = NULL;
1284 } 1492 }
1285 mypage++; 1493 mypage++;
@@ -1295,16 +1503,57 @@ static int prog_page(struct nandsim *ns, int num)
1295 union ns_mem *mypage; 1503 union ns_mem *mypage;
1296 u_char *pg_off; 1504 u_char *pg_off;
1297 1505
1506 if (ns->cfile) {
1507 loff_t off, pos;
1508 ssize_t tx;
1509 int all;
1510
1511 NS_DBG("prog_page: writing page %d\n", ns->regs.row);
1512 pg_off = ns->file_buf + ns->regs.column + ns->regs.off;
1513 off = (loff_t)ns->regs.row * ns->geom.pgszoob + ns->regs.column + ns->regs.off;
1514 if (!ns->pages_written[ns->regs.row]) {
1515 all = 1;
1516 memset(ns->file_buf, 0xff, ns->geom.pgszoob);
1517 } else {
1518 all = 0;
1519 pos = off;
1520 tx = read_file(ns, ns->cfile, pg_off, num, &pos);
1521 if (tx != num) {
1522 NS_ERR("prog_page: read error for page %d ret %ld\n", ns->regs.row, (long)tx);
1523 return -1;
1524 }
1525 }
1526 for (i = 0; i < num; i++)
1527 pg_off[i] &= ns->buf.byte[i];
1528 if (all) {
1529 pos = (loff_t)ns->regs.row * ns->geom.pgszoob;
1530 tx = write_file(ns, ns->cfile, ns->file_buf, ns->geom.pgszoob, &pos);
1531 if (tx != ns->geom.pgszoob) {
1532 NS_ERR("prog_page: write error for page %d ret %ld\n", ns->regs.row, (long)tx);
1533 return -1;
1534 }
1535 ns->pages_written[ns->regs.row] = 1;
1536 } else {
1537 pos = off;
1538 tx = write_file(ns, ns->cfile, pg_off, num, &pos);
1539 if (tx != num) {
1540 NS_ERR("prog_page: write error for page %d ret %ld\n", ns->regs.row, (long)tx);
1541 return -1;
1542 }
1543 }
1544 return 0;
1545 }
1546
1298 mypage = NS_GET_PAGE(ns); 1547 mypage = NS_GET_PAGE(ns);
1299 if (mypage->byte == NULL) { 1548 if (mypage->byte == NULL) {
1300 NS_DBG("prog_page: allocating page %d\n", ns->regs.row); 1549 NS_DBG("prog_page: allocating page %d\n", ns->regs.row);
1301 /* 1550 /*
1302 * We allocate memory with GFP_NOFS because a flash FS may 1551 * We allocate memory with GFP_NOFS because a flash FS may
1303 * utilize this. If it is holding an FS lock, then gets here, 1552 * utilize this. If it is holding an FS lock, then gets here,
1304 * then kmalloc runs writeback which goes to the FS again 1553 * then kernel memory alloc runs writeback which goes to the FS
1305 * and deadlocks. This was seen in practice. 1554 * again and deadlocks. This was seen in practice.
1306 */ 1555 */
1307 mypage->byte = kmalloc(ns->geom.pgszoob, GFP_NOFS); 1556 mypage->byte = kmem_cache_alloc(ns->nand_pages_slab, GFP_NOFS);
1308 if (mypage->byte == NULL) { 1557 if (mypage->byte == NULL) {
1309 NS_ERR("prog_page: error allocating memory for page %d\n", ns->regs.row); 1558 NS_ERR("prog_page: error allocating memory for page %d\n", ns->regs.row);
1310 return -1; 1559 return -1;
@@ -1736,13 +1985,17 @@ static void ns_nand_write_byte(struct mtd_info *mtd, u_char byte)
1736 1985
1737 /* Check if chip is expecting command */ 1986 /* Check if chip is expecting command */
1738 if (NS_STATE(ns->nxstate) != STATE_UNKNOWN && !(ns->nxstate & STATE_CMD_MASK)) { 1987 if (NS_STATE(ns->nxstate) != STATE_UNKNOWN && !(ns->nxstate & STATE_CMD_MASK)) {
1739 /* 1988 /* Do not warn if only 2 id bytes are read */
1740 * We are in situation when something else (not command) 1989 if (!(ns->regs.command == NAND_CMD_READID &&
1741 * was expected but command was input. In this case ignore 1990 NS_STATE(ns->state) == STATE_DATAOUT_ID && ns->regs.count == 2)) {
1742 * previous command(s)/state(s) and accept the last one. 1991 /*
1743 */ 1992 * We are in situation when something else (not command)
1744 NS_WARN("write_byte: command (%#x) wasn't expected, expected state is %s, " 1993 * was expected but command was input. In this case ignore
1745 "ignore previous states\n", (uint)byte, get_state_name(ns->nxstate)); 1994 * previous command(s)/state(s) and accept the last one.
1995 */
1996 NS_WARN("write_byte: command (%#x) wasn't expected, expected state is %s, "
1997 "ignore previous states\n", (uint)byte, get_state_name(ns->nxstate));
1998 }
1746 switch_to_ready_state(ns, NS_STATUS_FAILED(ns)); 1999 switch_to_ready_state(ns, NS_STATUS_FAILED(ns));
1747 } 2000 }
1748 2001
@@ -2044,7 +2297,7 @@ static int __init ns_init_module(void)
2044 } 2297 }
2045 2298
2046 if (overridesize) { 2299 if (overridesize) {
2047 u_int64_t new_size = (u_int64_t)nsmtd->erasesize << overridesize; 2300 uint64_t new_size = (uint64_t)nsmtd->erasesize << overridesize;
2048 if (new_size >> overridesize != nsmtd->erasesize) { 2301 if (new_size >> overridesize != nsmtd->erasesize) {
2049 NS_ERR("overridesize is too big\n"); 2302 NS_ERR("overridesize is too big\n");
2050 goto err_exit; 2303 goto err_exit;
diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c
index 955959eb02d4..582cf80f555a 100644
--- a/drivers/mtd/nand/ndfc.c
+++ b/drivers/mtd/nand/ndfc.c
@@ -2,12 +2,20 @@
2 * drivers/mtd/ndfc.c 2 * drivers/mtd/ndfc.c
3 * 3 *
4 * Overview: 4 * Overview:
5 * Platform independend driver for NDFC (NanD Flash Controller) 5 * Platform independent driver for NDFC (NanD Flash Controller)
6 * integrated into EP440 cores 6 * integrated into EP440 cores
7 * 7 *
8 * Ported to an OF platform driver by Sean MacLennan
9 *
10 * The NDFC supports multiple chips, but this driver only supports a
11 * single chip since I do not have access to any boards with
12 * multiple chips.
13 *
8 * Author: Thomas Gleixner 14 * Author: Thomas Gleixner
9 * 15 *
10 * Copyright 2006 IBM 16 * Copyright 2006 IBM
17 * Copyright 2008 PIKA Technologies
18 * Sean MacLennan <smaclennan@pikatech.com>
11 * 19 *
12 * This program is free software; you can redistribute it and/or modify it 20 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the 21 * under the terms of the GNU General Public License as published by the
@@ -21,27 +29,20 @@
21#include <linux/mtd/partitions.h> 29#include <linux/mtd/partitions.h>
22#include <linux/mtd/ndfc.h> 30#include <linux/mtd/ndfc.h>
23#include <linux/mtd/mtd.h> 31#include <linux/mtd/mtd.h>
24#include <linux/platform_device.h> 32#include <linux/of_platform.h>
25
26#include <asm/io.h> 33#include <asm/io.h>
27#ifdef CONFIG_40x
28#include <asm/ibm405.h>
29#else
30#include <asm/ibm44x.h>
31#endif
32
33struct ndfc_nand_mtd {
34 struct mtd_info mtd;
35 struct nand_chip chip;
36 struct platform_nand_chip *pl_chip;
37};
38 34
39static struct ndfc_nand_mtd ndfc_mtd[NDFC_MAX_BANKS];
40 35
41struct ndfc_controller { 36struct ndfc_controller {
42 void __iomem *ndfcbase; 37 struct of_device *ofdev;
43 struct nand_hw_control ndfc_control; 38 void __iomem *ndfcbase;
44 atomic_t childs_active; 39 struct mtd_info mtd;
40 struct nand_chip chip;
41 int chip_select;
42 struct nand_hw_control ndfc_control;
43#ifdef CONFIG_MTD_PARTITIONS
44 struct mtd_partition *parts;
45#endif
45}; 46};
46 47
47static struct ndfc_controller ndfc_ctrl; 48static struct ndfc_controller ndfc_ctrl;
@@ -50,17 +51,14 @@ static void ndfc_select_chip(struct mtd_info *mtd, int chip)
50{ 51{
51 uint32_t ccr; 52 uint32_t ccr;
52 struct ndfc_controller *ndfc = &ndfc_ctrl; 53 struct ndfc_controller *ndfc = &ndfc_ctrl;
53 struct nand_chip *nandchip = mtd->priv;
54 struct ndfc_nand_mtd *nandmtd = nandchip->priv;
55 struct platform_nand_chip *pchip = nandmtd->pl_chip;
56 54
57 ccr = __raw_readl(ndfc->ndfcbase + NDFC_CCR); 55 ccr = in_be32(ndfc->ndfcbase + NDFC_CCR);
58 if (chip >= 0) { 56 if (chip >= 0) {
59 ccr &= ~NDFC_CCR_BS_MASK; 57 ccr &= ~NDFC_CCR_BS_MASK;
60 ccr |= NDFC_CCR_BS(chip + pchip->chip_offset); 58 ccr |= NDFC_CCR_BS(chip + ndfc->chip_select);
61 } else 59 } else
62 ccr |= NDFC_CCR_RESET_CE; 60 ccr |= NDFC_CCR_RESET_CE;
63 __raw_writel(ccr, ndfc->ndfcbase + NDFC_CCR); 61 out_be32(ndfc->ndfcbase + NDFC_CCR, ccr);
64} 62}
65 63
66static void ndfc_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl) 64static void ndfc_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl)
@@ -80,7 +78,7 @@ static int ndfc_ready(struct mtd_info *mtd)
80{ 78{
81 struct ndfc_controller *ndfc = &ndfc_ctrl; 79 struct ndfc_controller *ndfc = &ndfc_ctrl;
82 80
83 return __raw_readl(ndfc->ndfcbase + NDFC_STAT) & NDFC_STAT_IS_READY; 81 return in_be32(ndfc->ndfcbase + NDFC_STAT) & NDFC_STAT_IS_READY;
84} 82}
85 83
86static void ndfc_enable_hwecc(struct mtd_info *mtd, int mode) 84static void ndfc_enable_hwecc(struct mtd_info *mtd, int mode)
@@ -88,9 +86,9 @@ static void ndfc_enable_hwecc(struct mtd_info *mtd, int mode)
88 uint32_t ccr; 86 uint32_t ccr;
89 struct ndfc_controller *ndfc = &ndfc_ctrl; 87 struct ndfc_controller *ndfc = &ndfc_ctrl;
90 88
91 ccr = __raw_readl(ndfc->ndfcbase + NDFC_CCR); 89 ccr = in_be32(ndfc->ndfcbase + NDFC_CCR);
92 ccr |= NDFC_CCR_RESET_ECC; 90 ccr |= NDFC_CCR_RESET_ECC;
93 __raw_writel(ccr, ndfc->ndfcbase + NDFC_CCR); 91 out_be32(ndfc->ndfcbase + NDFC_CCR, ccr);
94 wmb(); 92 wmb();
95} 93}
96 94
@@ -102,9 +100,10 @@ static int ndfc_calculate_ecc(struct mtd_info *mtd,
102 uint8_t *p = (uint8_t *)&ecc; 100 uint8_t *p = (uint8_t *)&ecc;
103 101
104 wmb(); 102 wmb();
105 ecc = __raw_readl(ndfc->ndfcbase + NDFC_ECC); 103 ecc = in_be32(ndfc->ndfcbase + NDFC_ECC);
106 ecc_code[0] = p[1]; 104 /* The NDFC uses Smart Media (SMC) bytes order */
107 ecc_code[1] = p[2]; 105 ecc_code[0] = p[2];
106 ecc_code[1] = p[1];
108 ecc_code[2] = p[3]; 107 ecc_code[2] = p[3];
109 108
110 return 0; 109 return 0;
@@ -123,7 +122,7 @@ static void ndfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
123 uint32_t *p = (uint32_t *) buf; 122 uint32_t *p = (uint32_t *) buf;
124 123
125 for(;len > 0; len -= 4) 124 for(;len > 0; len -= 4)
126 *p++ = __raw_readl(ndfc->ndfcbase + NDFC_DATA); 125 *p++ = in_be32(ndfc->ndfcbase + NDFC_DATA);
127} 126}
128 127
129static void ndfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len) 128static void ndfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
@@ -132,7 +131,7 @@ static void ndfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
132 uint32_t *p = (uint32_t *) buf; 131 uint32_t *p = (uint32_t *) buf;
133 132
134 for(;len > 0; len -= 4) 133 for(;len > 0; len -= 4)
135 __raw_writel(*p++, ndfc->ndfcbase + NDFC_DATA); 134 out_be32(ndfc->ndfcbase + NDFC_DATA, *p++);
136} 135}
137 136
138static int ndfc_verify_buf(struct mtd_info *mtd, const uint8_t *buf, int len) 137static int ndfc_verify_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
@@ -141,7 +140,7 @@ static int ndfc_verify_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
141 uint32_t *p = (uint32_t *) buf; 140 uint32_t *p = (uint32_t *) buf;
142 141
143 for(;len > 0; len -= 4) 142 for(;len > 0; len -= 4)
144 if (*p++ != __raw_readl(ndfc->ndfcbase + NDFC_DATA)) 143 if (*p++ != in_be32(ndfc->ndfcbase + NDFC_DATA))
145 return -EFAULT; 144 return -EFAULT;
146 return 0; 145 return 0;
147} 146}
@@ -149,10 +148,19 @@ static int ndfc_verify_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
149/* 148/*
150 * Initialize chip structure 149 * Initialize chip structure
151 */ 150 */
152static void ndfc_chip_init(struct ndfc_nand_mtd *mtd) 151static int ndfc_chip_init(struct ndfc_controller *ndfc,
152 struct device_node *node)
153{ 153{
154 struct ndfc_controller *ndfc = &ndfc_ctrl; 154#ifdef CONFIG_MTD_PARTITIONS
155 struct nand_chip *chip = &mtd->chip; 155#ifdef CONFIG_MTD_CMDLINE_PARTS
156 static const char *part_types[] = { "cmdlinepart", NULL };
157#else
158 static const char *part_types[] = { NULL };
159#endif
160#endif
161 struct device_node *flash_np;
162 struct nand_chip *chip = &ndfc->chip;
163 int ret;
156 164
157 chip->IO_ADDR_R = ndfc->ndfcbase + NDFC_DATA; 165 chip->IO_ADDR_R = ndfc->ndfcbase + NDFC_DATA;
158 chip->IO_ADDR_W = ndfc->ndfcbase + NDFC_DATA; 166 chip->IO_ADDR_W = ndfc->ndfcbase + NDFC_DATA;
@@ -160,8 +168,6 @@ static void ndfc_chip_init(struct ndfc_nand_mtd *mtd)
160 chip->dev_ready = ndfc_ready; 168 chip->dev_ready = ndfc_ready;
161 chip->select_chip = ndfc_select_chip; 169 chip->select_chip = ndfc_select_chip;
162 chip->chip_delay = 50; 170 chip->chip_delay = 50;
163 chip->priv = mtd;
164 chip->options = mtd->pl_chip->options;
165 chip->controller = &ndfc->ndfc_control; 171 chip->controller = &ndfc->ndfc_control;
166 chip->read_buf = ndfc_read_buf; 172 chip->read_buf = ndfc_read_buf;
167 chip->write_buf = ndfc_write_buf; 173 chip->write_buf = ndfc_write_buf;
@@ -172,143 +178,136 @@ static void ndfc_chip_init(struct ndfc_nand_mtd *mtd)
172 chip->ecc.mode = NAND_ECC_HW; 178 chip->ecc.mode = NAND_ECC_HW;
173 chip->ecc.size = 256; 179 chip->ecc.size = 256;
174 chip->ecc.bytes = 3; 180 chip->ecc.bytes = 3;
175 chip->ecclayout = chip->ecc.layout = mtd->pl_chip->ecclayout;
176 mtd->mtd.priv = chip;
177 mtd->mtd.owner = THIS_MODULE;
178}
179
180static int ndfc_chip_probe(struct platform_device *pdev)
181{
182 struct platform_nand_chip *nc = pdev->dev.platform_data;
183 struct ndfc_chip_settings *settings = nc->priv;
184 struct ndfc_controller *ndfc = &ndfc_ctrl;
185 struct ndfc_nand_mtd *nandmtd;
186
187 if (nc->chip_offset >= NDFC_MAX_BANKS || nc->nr_chips > NDFC_MAX_BANKS)
188 return -EINVAL;
189
190 /* Set the bank settings */
191 __raw_writel(settings->bank_settings,
192 ndfc->ndfcbase + NDFC_BCFG0 + (nc->chip_offset << 2));
193 181
194 nandmtd = &ndfc_mtd[pdev->id]; 182 ndfc->mtd.priv = chip;
195 if (nandmtd->pl_chip) 183 ndfc->mtd.owner = THIS_MODULE;
196 return -EBUSY;
197 184
198 nandmtd->pl_chip = nc; 185 flash_np = of_get_next_child(node, NULL);
199 ndfc_chip_init(nandmtd); 186 if (!flash_np)
200
201 /* Scan for chips */
202 if (nand_scan(&nandmtd->mtd, nc->nr_chips)) {
203 nandmtd->pl_chip = NULL;
204 return -ENODEV; 187 return -ENODEV;
188
189 ndfc->mtd.name = kasprintf(GFP_KERNEL, "%s.%s",
190 ndfc->ofdev->dev.bus_id, flash_np->name);
191 if (!ndfc->mtd.name) {
192 ret = -ENOMEM;
193 goto err;
205 } 194 }
206 195
207#ifdef CONFIG_MTD_PARTITIONS 196 ret = nand_scan(&ndfc->mtd, 1);
208 printk("Number of partitions %d\n", nc->nr_partitions); 197 if (ret)
209 if (nc->nr_partitions) { 198 goto err;
210 /* Add the full device, so complete dumps can be made */
211 add_mtd_device(&nandmtd->mtd);
212 add_mtd_partitions(&nandmtd->mtd, nc->partitions,
213 nc->nr_partitions);
214 199
215 } else 200#ifdef CONFIG_MTD_PARTITIONS
216#else 201 ret = parse_mtd_partitions(&ndfc->mtd, part_types, &ndfc->parts, 0);
217 add_mtd_device(&nandmtd->mtd); 202 if (ret < 0)
203 goto err;
204
205#ifdef CONFIG_MTD_OF_PARTS
206 if (ret == 0) {
207 ret = of_mtd_parse_partitions(&ndfc->ofdev->dev, flash_np,
208 &ndfc->parts);
209 if (ret < 0)
210 goto err;
211 }
218#endif 212#endif
219 213
220 atomic_inc(&ndfc->childs_active); 214 if (ret > 0)
221 return 0; 215 ret = add_mtd_partitions(&ndfc->mtd, ndfc->parts, ret);
222} 216 else
217#endif
218 ret = add_mtd_device(&ndfc->mtd);
223 219
224static int ndfc_chip_remove(struct platform_device *pdev) 220err:
225{ 221 of_node_put(flash_np);
226 return 0; 222 if (ret)
223 kfree(ndfc->mtd.name);
224 return ret;
227} 225}
228 226
229static int ndfc_nand_probe(struct platform_device *pdev) 227static int __devinit ndfc_probe(struct of_device *ofdev,
228 const struct of_device_id *match)
230{ 229{
231 struct platform_nand_ctrl *nc = pdev->dev.platform_data;
232 struct ndfc_controller_settings *settings = nc->priv;
233 struct resource *res = pdev->resource;
234 struct ndfc_controller *ndfc = &ndfc_ctrl; 230 struct ndfc_controller *ndfc = &ndfc_ctrl;
235 unsigned long long phys = settings->ndfc_erpn | res->start; 231 const u32 *reg;
232 u32 ccr;
233 int err, len;
236 234
237#ifndef CONFIG_PHYS_64BIT 235 spin_lock_init(&ndfc->ndfc_control.lock);
238 ndfc->ndfcbase = ioremap((phys_addr_t)phys, res->end - res->start + 1); 236 init_waitqueue_head(&ndfc->ndfc_control.wq);
239#else 237 ndfc->ofdev = ofdev;
240 ndfc->ndfcbase = ioremap64(phys, res->end - res->start + 1); 238 dev_set_drvdata(&ofdev->dev, ndfc);
241#endif 239
240 /* Read the reg property to get the chip select */
241 reg = of_get_property(ofdev->node, "reg", &len);
242 if (reg == NULL || len != 12) {
243 dev_err(&ofdev->dev, "unable read reg property (%d)\n", len);
244 return -ENOENT;
245 }
246 ndfc->chip_select = reg[0];
247
248 ndfc->ndfcbase = of_iomap(ofdev->node, 0);
242 if (!ndfc->ndfcbase) { 249 if (!ndfc->ndfcbase) {
243 printk(KERN_ERR "NDFC: ioremap failed\n"); 250 dev_err(&ofdev->dev, "failed to get memory\n");
244 return -EIO; 251 return -EIO;
245 } 252 }
246 253
247 __raw_writel(settings->ccr_settings, ndfc->ndfcbase + NDFC_CCR); 254 ccr = NDFC_CCR_BS(ndfc->chip_select);
248 255
249 spin_lock_init(&ndfc->ndfc_control.lock); 256 /* It is ok if ccr does not exist - just default to 0 */
250 init_waitqueue_head(&ndfc->ndfc_control.wq); 257 reg = of_get_property(ofdev->node, "ccr", NULL);
258 if (reg)
259 ccr |= *reg;
251 260
252 platform_set_drvdata(pdev, ndfc); 261 out_be32(ndfc->ndfcbase + NDFC_CCR, ccr);
253 262
254 printk("NDFC NAND Driver initialized. Chip-Rev: 0x%08x\n", 263 /* Set the bank settings if given */
255 __raw_readl(ndfc->ndfcbase + NDFC_REVID)); 264 reg = of_get_property(ofdev->node, "bank-settings", NULL);
265 if (reg) {
266 int offset = NDFC_BCFG0 + (ndfc->chip_select << 2);
267 out_be32(ndfc->ndfcbase + offset, *reg);
268 }
269
270 err = ndfc_chip_init(ndfc, ofdev->node);
271 if (err) {
272 iounmap(ndfc->ndfcbase);
273 return err;
274 }
256 275
257 return 0; 276 return 0;
258} 277}
259 278
260static int ndfc_nand_remove(struct platform_device *pdev) 279static int __devexit ndfc_remove(struct of_device *ofdev)
261{ 280{
262 struct ndfc_controller *ndfc = platform_get_drvdata(pdev); 281 struct ndfc_controller *ndfc = dev_get_drvdata(&ofdev->dev);
263 282
264 if (atomic_read(&ndfc->childs_active)) 283 nand_release(&ndfc->mtd);
265 return -EBUSY;
266 284
267 if (ndfc) {
268 platform_set_drvdata(pdev, NULL);
269 iounmap(ndfc_ctrl.ndfcbase);
270 ndfc_ctrl.ndfcbase = NULL;
271 }
272 return 0; 285 return 0;
273} 286}
274 287
275/* driver device registration */ 288static const struct of_device_id ndfc_match[] = {
276 289 { .compatible = "ibm,ndfc", },
277static struct platform_driver ndfc_chip_driver = { 290 {}
278 .probe = ndfc_chip_probe,
279 .remove = ndfc_chip_remove,
280 .driver = {
281 .name = "ndfc-chip",
282 .owner = THIS_MODULE,
283 },
284}; 291};
292MODULE_DEVICE_TABLE(of, ndfc_match);
285 293
286static struct platform_driver ndfc_nand_driver = { 294static struct of_platform_driver ndfc_driver = {
287 .probe = ndfc_nand_probe, 295 .driver = {
288 .remove = ndfc_nand_remove, 296 .name = "ndfc",
289 .driver = {
290 .name = "ndfc-nand",
291 .owner = THIS_MODULE,
292 }, 297 },
298 .match_table = ndfc_match,
299 .probe = ndfc_probe,
300 .remove = __devexit_p(ndfc_remove),
293}; 301};
294 302
295static int __init ndfc_nand_init(void) 303static int __init ndfc_nand_init(void)
296{ 304{
297 int ret; 305 return of_register_platform_driver(&ndfc_driver);
298
299 spin_lock_init(&ndfc_ctrl.ndfc_control.lock);
300 init_waitqueue_head(&ndfc_ctrl.ndfc_control.wq);
301
302 ret = platform_driver_register(&ndfc_nand_driver);
303 if (!ret)
304 ret = platform_driver_register(&ndfc_chip_driver);
305 return ret;
306} 306}
307 307
308static void __exit ndfc_nand_exit(void) 308static void __exit ndfc_nand_exit(void)
309{ 309{
310 platform_driver_unregister(&ndfc_chip_driver); 310 of_unregister_platform_driver(&ndfc_driver);
311 platform_driver_unregister(&ndfc_nand_driver);
312} 311}
313 312
314module_init(ndfc_nand_init); 313module_init(ndfc_nand_init);
@@ -316,6 +315,4 @@ module_exit(ndfc_nand_exit);
316 315
317MODULE_LICENSE("GPL"); 316MODULE_LICENSE("GPL");
318MODULE_AUTHOR("Thomas Gleixner <tglx@linutronix.de>"); 317MODULE_AUTHOR("Thomas Gleixner <tglx@linutronix.de>");
319MODULE_DESCRIPTION("Platform driver for NDFC"); 318MODULE_DESCRIPTION("OF Platform driver for NDFC");
320MODULE_ALIAS("platform:ndfc-chip");
321MODULE_ALIAS("platform:ndfc-nand");
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index fc4144495610..cc55cbc2b308 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -298,7 +298,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = {
298#define NDTR1_tAR(c) (min((c), 15) << 0) 298#define NDTR1_tAR(c) (min((c), 15) << 0)
299 299
300/* convert nano-seconds to nand flash controller clock cycles */ 300/* convert nano-seconds to nand flash controller clock cycles */
301#define ns2cycle(ns, clk) (int)(((ns) * (clk / 1000000) / 1000) + 1) 301#define ns2cycle(ns, clk) (int)(((ns) * (clk / 1000000) / 1000) - 1)
302 302
303static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info, 303static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info,
304 const struct pxa3xx_nand_timing *t) 304 const struct pxa3xx_nand_timing *t)
@@ -368,14 +368,14 @@ static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
368 /* large block, 2 cycles for column address 368 /* large block, 2 cycles for column address
369 * row address starts from 3rd cycle 369 * row address starts from 3rd cycle
370 */ 370 */
371 info->ndcb1 |= (page_addr << 16) | (column & 0xffff); 371 info->ndcb1 |= page_addr << 16;
372 if (info->row_addr_cycles == 3) 372 if (info->row_addr_cycles == 3)
373 info->ndcb2 = (page_addr >> 16) & 0xff; 373 info->ndcb2 = (page_addr >> 16) & 0xff;
374 } else 374 } else
375 /* small block, 1 cycles for column address 375 /* small block, 1 cycles for column address
376 * row address starts from 2nd cycle 376 * row address starts from 2nd cycle
377 */ 377 */
378 info->ndcb1 = (page_addr << 8) | (column & 0xff); 378 info->ndcb1 = page_addr << 8;
379 379
380 if (cmd == cmdset->program) 380 if (cmd == cmdset->program)
381 info->ndcb0 |= NDCB0_CMD_TYPE(1) | NDCB0_AUTO_RS; 381 info->ndcb0 |= NDCB0_CMD_TYPE(1) | NDCB0_AUTO_RS;
diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c
index 30a518e211bd..54ec7542a7b7 100644
--- a/drivers/mtd/nand/sharpsl.c
+++ b/drivers/mtd/nand/sharpsl.c
@@ -2,6 +2,7 @@
2 * drivers/mtd/nand/sharpsl.c 2 * drivers/mtd/nand/sharpsl.c
3 * 3 *
4 * Copyright (C) 2004 Richard Purdie 4 * Copyright (C) 2004 Richard Purdie
5 * Copyright (C) 2008 Dmitry Baryshkov
5 * 6 *
6 * Based on Sharp's NAND driver sharp_sl.c 7 * Based on Sharp's NAND driver sharp_sl.c
7 * 8 *
@@ -19,22 +20,31 @@
19#include <linux/mtd/nand.h> 20#include <linux/mtd/nand.h>
20#include <linux/mtd/nand_ecc.h> 21#include <linux/mtd/nand_ecc.h>
21#include <linux/mtd/partitions.h> 22#include <linux/mtd/partitions.h>
23#include <linux/mtd/sharpsl.h>
22#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/platform_device.h>
26
23#include <asm/io.h> 27#include <asm/io.h>
24#include <mach/hardware.h> 28#include <mach/hardware.h>
25#include <asm/mach-types.h> 29#include <asm/mach-types.h>
26 30
27static void __iomem *sharpsl_io_base; 31struct sharpsl_nand {
28static int sharpsl_phys_base = 0x0C000000; 32 struct mtd_info mtd;
33 struct nand_chip chip;
34
35 void __iomem *io;
36};
37
38#define mtd_to_sharpsl(_mtd) container_of(_mtd, struct sharpsl_nand, mtd)
29 39
30/* register offset */ 40/* register offset */
31#define ECCLPLB sharpsl_io_base+0x00 /* line parity 7 - 0 bit */ 41#define ECCLPLB 0x00 /* line parity 7 - 0 bit */
32#define ECCLPUB sharpsl_io_base+0x04 /* line parity 15 - 8 bit */ 42#define ECCLPUB 0x04 /* line parity 15 - 8 bit */
33#define ECCCP sharpsl_io_base+0x08 /* column parity 5 - 0 bit */ 43#define ECCCP 0x08 /* column parity 5 - 0 bit */
34#define ECCCNTR sharpsl_io_base+0x0C /* ECC byte counter */ 44#define ECCCNTR 0x0C /* ECC byte counter */
35#define ECCCLRR sharpsl_io_base+0x10 /* cleare ECC */ 45#define ECCCLRR 0x10 /* cleare ECC */
36#define FLASHIO sharpsl_io_base+0x14 /* Flash I/O */ 46#define FLASHIO 0x14 /* Flash I/O */
37#define FLASHCTL sharpsl_io_base+0x18 /* Flash Control */ 47#define FLASHCTL 0x18 /* Flash Control */
38 48
39/* Flash control bit */ 49/* Flash control bit */
40#define FLRYBY (1 << 5) 50#define FLRYBY (1 << 5)
@@ -45,35 +55,6 @@ static int sharpsl_phys_base = 0x0C000000;
45#define FLCE0 (1 << 0) 55#define FLCE0 (1 << 0)
46 56
47/* 57/*
48 * MTD structure for SharpSL
49 */
50static struct mtd_info *sharpsl_mtd = NULL;
51
52/*
53 * Define partitions for flash device
54 */
55#define DEFAULT_NUM_PARTITIONS 3
56
57static int nr_partitions;
58static struct mtd_partition sharpsl_nand_default_partition_info[] = {
59 {
60 .name = "System Area",
61 .offset = 0,
62 .size = 7 * 1024 * 1024,
63 },
64 {
65 .name = "Root Filesystem",
66 .offset = 7 * 1024 * 1024,
67 .size = 30 * 1024 * 1024,
68 },
69 {
70 .name = "Home Filesystem",
71 .offset = MTDPART_OFS_APPEND,
72 .size = MTDPART_SIZ_FULL,
73 },
74};
75
76/*
77 * hardware specific access to control-lines 58 * hardware specific access to control-lines
78 * ctrl: 59 * ctrl:
79 * NAND_CNE: bit 0 -> ! bit 0 & 4 60 * NAND_CNE: bit 0 -> ! bit 0 & 4
@@ -84,6 +65,7 @@ static struct mtd_partition sharpsl_nand_default_partition_info[] = {
84static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd, 65static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd,
85 unsigned int ctrl) 66 unsigned int ctrl)
86{ 67{
68 struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd);
87 struct nand_chip *chip = mtd->priv; 69 struct nand_chip *chip = mtd->priv;
88 70
89 if (ctrl & NAND_CTRL_CHANGE) { 71 if (ctrl & NAND_CTRL_CHANGE) {
@@ -93,103 +75,97 @@ static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd,
93 75
94 bits ^= 0x11; 76 bits ^= 0x11;
95 77
96 writeb((readb(FLASHCTL) & ~0x17) | bits, FLASHCTL); 78 writeb((readb(sharpsl->io + FLASHCTL) & ~0x17) | bits, sharpsl->io + FLASHCTL);
97 } 79 }
98 80
99 if (cmd != NAND_CMD_NONE) 81 if (cmd != NAND_CMD_NONE)
100 writeb(cmd, chip->IO_ADDR_W); 82 writeb(cmd, chip->IO_ADDR_W);
101} 83}
102 84
103static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
104
105static struct nand_bbt_descr sharpsl_bbt = {
106 .options = 0,
107 .offs = 4,
108 .len = 2,
109 .pattern = scan_ff_pattern
110};
111
112static struct nand_bbt_descr sharpsl_akita_bbt = {
113 .options = 0,
114 .offs = 4,
115 .len = 1,
116 .pattern = scan_ff_pattern
117};
118
119static struct nand_ecclayout akita_oobinfo = {
120 .eccbytes = 24,
121 .eccpos = {
122 0x5, 0x1, 0x2, 0x3, 0x6, 0x7, 0x15, 0x11,
123 0x12, 0x13, 0x16, 0x17, 0x25, 0x21, 0x22, 0x23,
124 0x26, 0x27, 0x35, 0x31, 0x32, 0x33, 0x36, 0x37},
125 .oobfree = {{0x08, 0x09}}
126};
127
128static int sharpsl_nand_dev_ready(struct mtd_info *mtd) 85static int sharpsl_nand_dev_ready(struct mtd_info *mtd)
129{ 86{
130 return !((readb(FLASHCTL) & FLRYBY) == 0); 87 struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd);
88 return !((readb(sharpsl->io + FLASHCTL) & FLRYBY) == 0);
131} 89}
132 90
133static void sharpsl_nand_enable_hwecc(struct mtd_info *mtd, int mode) 91static void sharpsl_nand_enable_hwecc(struct mtd_info *mtd, int mode)
134{ 92{
135 writeb(0, ECCCLRR); 93 struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd);
94 writeb(0, sharpsl->io + ECCCLRR);
136} 95}
137 96
138static int sharpsl_nand_calculate_ecc(struct mtd_info *mtd, const u_char * dat, u_char * ecc_code) 97static int sharpsl_nand_calculate_ecc(struct mtd_info *mtd, const u_char * dat, u_char * ecc_code)
139{ 98{
140 ecc_code[0] = ~readb(ECCLPUB); 99 struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd);
141 ecc_code[1] = ~readb(ECCLPLB); 100 ecc_code[0] = ~readb(sharpsl->io + ECCLPUB);
142 ecc_code[2] = (~readb(ECCCP) << 2) | 0x03; 101 ecc_code[1] = ~readb(sharpsl->io + ECCLPLB);
143 return readb(ECCCNTR) != 0; 102 ecc_code[2] = (~readb(sharpsl->io + ECCCP) << 2) | 0x03;
103 return readb(sharpsl->io + ECCCNTR) != 0;
144} 104}
145 105
146#ifdef CONFIG_MTD_PARTITIONS 106#ifdef CONFIG_MTD_PARTITIONS
147const char *part_probes[] = { "cmdlinepart", NULL }; 107static const char *part_probes[] = { "cmdlinepart", NULL };
148#endif 108#endif
149 109
150/* 110/*
151 * Main initialization routine 111 * Main initialization routine
152 */ 112 */
153static int __init sharpsl_nand_init(void) 113static int __devinit sharpsl_nand_probe(struct platform_device *pdev)
154{ 114{
155 struct nand_chip *this; 115 struct nand_chip *this;
116#ifdef CONFIG_MTD_PARTITIONS
156 struct mtd_partition *sharpsl_partition_info; 117 struct mtd_partition *sharpsl_partition_info;
118 int nr_partitions;
119#endif
120 struct resource *r;
157 int err = 0; 121 int err = 0;
122 struct sharpsl_nand *sharpsl;
123 struct sharpsl_nand_platform_data *data = pdev->dev.platform_data;
124
125 if (!data) {
126 dev_err(&pdev->dev, "no platform data!\n");
127 return -EINVAL;
128 }
158 129
159 /* Allocate memory for MTD device structure and private data */ 130 /* Allocate memory for MTD device structure and private data */
160 sharpsl_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL); 131 sharpsl = kzalloc(sizeof(struct sharpsl_nand), GFP_KERNEL);
161 if (!sharpsl_mtd) { 132 if (!sharpsl) {
162 printk("Unable to allocate SharpSL NAND MTD device structure.\n"); 133 printk("Unable to allocate SharpSL NAND MTD device structure.\n");
163 return -ENOMEM; 134 return -ENOMEM;
164 } 135 }
165 136
137 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
138 if (!r) {
139 dev_err(&pdev->dev, "no io memory resource defined!\n");
140 err = -ENODEV;
141 goto err_get_res;
142 }
143
166 /* map physical address */ 144 /* map physical address */
167 sharpsl_io_base = ioremap(sharpsl_phys_base, 0x1000); 145 sharpsl->io = ioremap(r->start, resource_size(r));
168 if (!sharpsl_io_base) { 146 if (!sharpsl->io) {
169 printk("ioremap to access Sharp SL NAND chip failed\n"); 147 printk("ioremap to access Sharp SL NAND chip failed\n");
170 kfree(sharpsl_mtd); 148 err = -EIO;
171 return -EIO; 149 goto err_ioremap;
172 } 150 }
173 151
174 /* Get pointer to private data */ 152 /* Get pointer to private data */
175 this = (struct nand_chip *)(&sharpsl_mtd[1]); 153 this = (struct nand_chip *)(&sharpsl->chip);
176
177 /* Initialize structures */
178 memset(sharpsl_mtd, 0, sizeof(struct mtd_info));
179 memset(this, 0, sizeof(struct nand_chip));
180 154
181 /* Link the private data with the MTD structure */ 155 /* Link the private data with the MTD structure */
182 sharpsl_mtd->priv = this; 156 sharpsl->mtd.priv = this;
183 sharpsl_mtd->owner = THIS_MODULE; 157 sharpsl->mtd.owner = THIS_MODULE;
158
159 platform_set_drvdata(pdev, sharpsl);
184 160
185 /* 161 /*
186 * PXA initialize 162 * PXA initialize
187 */ 163 */
188 writeb(readb(FLASHCTL) | FLWP, FLASHCTL); 164 writeb(readb(sharpsl->io + FLASHCTL) | FLWP, sharpsl->io + FLASHCTL);
189 165
190 /* Set address of NAND IO lines */ 166 /* Set address of NAND IO lines */
191 this->IO_ADDR_R = FLASHIO; 167 this->IO_ADDR_R = sharpsl->io + FLASHIO;
192 this->IO_ADDR_W = FLASHIO; 168 this->IO_ADDR_W = sharpsl->io + FLASHIO;
193 /* Set address of hardware control function */ 169 /* Set address of hardware control function */
194 this->cmd_ctrl = sharpsl_nand_hwcontrol; 170 this->cmd_ctrl = sharpsl_nand_hwcontrol;
195 this->dev_ready = sharpsl_nand_dev_ready; 171 this->dev_ready = sharpsl_nand_dev_ready;
@@ -199,68 +175,89 @@ static int __init sharpsl_nand_init(void)
199 this->ecc.mode = NAND_ECC_HW; 175 this->ecc.mode = NAND_ECC_HW;
200 this->ecc.size = 256; 176 this->ecc.size = 256;
201 this->ecc.bytes = 3; 177 this->ecc.bytes = 3;
202 this->badblock_pattern = &sharpsl_bbt; 178 this->badblock_pattern = data->badblock_pattern;
203 if (machine_is_akita() || machine_is_borzoi()) { 179 this->ecc.layout = data->ecc_layout;
204 this->badblock_pattern = &sharpsl_akita_bbt;
205 this->ecc.layout = &akita_oobinfo;
206 }
207 this->ecc.hwctl = sharpsl_nand_enable_hwecc; 180 this->ecc.hwctl = sharpsl_nand_enable_hwecc;
208 this->ecc.calculate = sharpsl_nand_calculate_ecc; 181 this->ecc.calculate = sharpsl_nand_calculate_ecc;
209 this->ecc.correct = nand_correct_data; 182 this->ecc.correct = nand_correct_data;
210 183
211 /* Scan to find existence of the device */ 184 /* Scan to find existence of the device */
212 err = nand_scan(sharpsl_mtd, 1); 185 err = nand_scan(&sharpsl->mtd, 1);
213 if (err) { 186 if (err)
214 iounmap(sharpsl_io_base); 187 goto err_scan;
215 kfree(sharpsl_mtd);
216 return err;
217 }
218 188
219 /* Register the partitions */ 189 /* Register the partitions */
220 sharpsl_mtd->name = "sharpsl-nand"; 190 sharpsl->mtd.name = "sharpsl-nand";
221 nr_partitions = parse_mtd_partitions(sharpsl_mtd, part_probes, &sharpsl_partition_info, 0); 191#ifdef CONFIG_MTD_PARTITIONS
222 192 nr_partitions = parse_mtd_partitions(&sharpsl->mtd, part_probes, &sharpsl_partition_info, 0);
223 if (nr_partitions <= 0) { 193 if (nr_partitions <= 0) {
224 nr_partitions = DEFAULT_NUM_PARTITIONS; 194 nr_partitions = data->nr_partitions;
225 sharpsl_partition_info = sharpsl_nand_default_partition_info; 195 sharpsl_partition_info = data->partitions;
226 if (machine_is_poodle()) {
227 sharpsl_partition_info[1].size = 22 * 1024 * 1024;
228 } else if (machine_is_corgi() || machine_is_shepherd()) {
229 sharpsl_partition_info[1].size = 25 * 1024 * 1024;
230 } else if (machine_is_husky()) {
231 sharpsl_partition_info[1].size = 53 * 1024 * 1024;
232 } else if (machine_is_spitz()) {
233 sharpsl_partition_info[1].size = 5 * 1024 * 1024;
234 } else if (machine_is_akita()) {
235 sharpsl_partition_info[1].size = 58 * 1024 * 1024;
236 } else if (machine_is_borzoi()) {
237 sharpsl_partition_info[1].size = 32 * 1024 * 1024;
238 }
239 } 196 }
240 197
241 add_mtd_partitions(sharpsl_mtd, sharpsl_partition_info, nr_partitions); 198 if (nr_partitions > 0)
199 err = add_mtd_partitions(&sharpsl->mtd, sharpsl_partition_info, nr_partitions);
200 else
201#endif
202 err = add_mtd_device(&sharpsl->mtd);
203 if (err)
204 goto err_add;
242 205
243 /* Return happy */ 206 /* Return happy */
244 return 0; 207 return 0;
245}
246 208
247module_init(sharpsl_nand_init); 209err_add:
210 nand_release(&sharpsl->mtd);
211
212err_scan:
213 platform_set_drvdata(pdev, NULL);
214 iounmap(sharpsl->io);
215err_ioremap:
216err_get_res:
217 kfree(sharpsl);
218 return err;
219}
248 220
249/* 221/*
250 * Clean up routine 222 * Clean up routine
251 */ 223 */
252static void __exit sharpsl_nand_cleanup(void) 224static int __devexit sharpsl_nand_remove(struct platform_device *pdev)
253{ 225{
226 struct sharpsl_nand *sharpsl = platform_get_drvdata(pdev);
227
254 /* Release resources, unregister device */ 228 /* Release resources, unregister device */
255 nand_release(sharpsl_mtd); 229 nand_release(&sharpsl->mtd);
256 230
257 iounmap(sharpsl_io_base); 231 platform_set_drvdata(pdev, NULL);
232
233 iounmap(sharpsl->io);
258 234
259 /* Free the MTD device structure */ 235 /* Free the MTD device structure */
260 kfree(sharpsl_mtd); 236 kfree(sharpsl);
237
238 return 0;
239}
240
241static struct platform_driver sharpsl_nand_driver = {
242 .driver = {
243 .name = "sharpsl-nand",
244 .owner = THIS_MODULE,
245 },
246 .probe = sharpsl_nand_probe,
247 .remove = __devexit_p(sharpsl_nand_remove),
248};
249
250static int __init sharpsl_nand_init(void)
251{
252 return platform_driver_register(&sharpsl_nand_driver);
261} 253}
254module_init(sharpsl_nand_init);
262 255
263module_exit(sharpsl_nand_cleanup); 256static void __exit sharpsl_nand_exit(void)
257{
258 platform_driver_unregister(&sharpsl_nand_driver);
259}
260module_exit(sharpsl_nand_exit);
264 261
265MODULE_LICENSE("GPL"); 262MODULE_LICENSE("GPL");
266MODULE_AUTHOR("Richard Purdie <rpurdie@rpsys.net>"); 263MODULE_AUTHOR("Richard Purdie <rpurdie@rpsys.net>");
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 320b929abe79..d1c4546513f7 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -39,7 +39,7 @@ static void nftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
39 struct NFTLrecord *nftl; 39 struct NFTLrecord *nftl;
40 unsigned long temp; 40 unsigned long temp;
41 41
42 if (mtd->type != MTD_NANDFLASH) 42 if (mtd->type != MTD_NANDFLASH || mtd->size > UINT_MAX)
43 return; 43 return;
44 /* OK, this is moderately ugly. But probably safe. Alternatives? */ 44 /* OK, this is moderately ugly. But probably safe. Alternatives? */
45 if (memcmp(mtd->name, "DiskOnChip", 10)) 45 if (memcmp(mtd->name, "DiskOnChip", 10))
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index ccc4f209fbb5..8b22b1836e9f 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -51,7 +51,7 @@ static int find_boot_record(struct NFTLrecord *nftl)
51 the mtd device accordingly. We could even get rid of 51 the mtd device accordingly. We could even get rid of
52 nftl->EraseSize if there were any point in doing so. */ 52 nftl->EraseSize if there were any point in doing so. */
53 nftl->EraseSize = nftl->mbd.mtd->erasesize; 53 nftl->EraseSize = nftl->mbd.mtd->erasesize;
54 nftl->nb_blocks = nftl->mbd.mtd->size / nftl->EraseSize; 54 nftl->nb_blocks = (u32)nftl->mbd.mtd->size / nftl->EraseSize;
55 55
56 nftl->MediaUnit = BLOCK_NIL; 56 nftl->MediaUnit = BLOCK_NIL;
57 nftl->SpareMediaUnit = BLOCK_NIL; 57 nftl->SpareMediaUnit = BLOCK_NIL;
@@ -168,7 +168,7 @@ device is already correct.
168 printk(KERN_NOTICE "WARNING: Support for NFTL with UnitSizeFactor 0x%02x is experimental\n", 168 printk(KERN_NOTICE "WARNING: Support for NFTL with UnitSizeFactor 0x%02x is experimental\n",
169 mh->UnitSizeFactor); 169 mh->UnitSizeFactor);
170 nftl->EraseSize = nftl->mbd.mtd->erasesize << (0xff - mh->UnitSizeFactor); 170 nftl->EraseSize = nftl->mbd.mtd->erasesize << (0xff - mh->UnitSizeFactor);
171 nftl->nb_blocks = nftl->mbd.mtd->size / nftl->EraseSize; 171 nftl->nb_blocks = (u32)nftl->mbd.mtd->size / nftl->EraseSize;
172 } 172 }
173#endif 173#endif
174 nftl->nb_boot_blocks = le16_to_cpu(mh->FirstPhysicalEUN); 174 nftl->nb_boot_blocks = le16_to_cpu(mh->FirstPhysicalEUN);
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 90ed319f26e6..529af271db17 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1772,7 +1772,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
1772 int len; 1772 int len;
1773 int ret = 0; 1773 int ret = 0;
1774 1774
1775 DEBUG(MTD_DEBUG_LEVEL3, "onenand_erase: start = 0x%08x, len = %i\n", (unsigned int) instr->addr, (unsigned int) instr->len); 1775 DEBUG(MTD_DEBUG_LEVEL3, "onenand_erase: start = 0x%012llx, len = %llu\n", (unsigned long long) instr->addr, (unsigned long long) instr->len);
1776 1776
1777 block_size = (1 << this->erase_shift); 1777 block_size = (1 << this->erase_shift);
1778 1778
@@ -1810,7 +1810,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
1810 1810
1811 /* Check if we have a bad block, we do not erase bad blocks */ 1811 /* Check if we have a bad block, we do not erase bad blocks */
1812 if (onenand_block_isbad_nolock(mtd, addr, 0)) { 1812 if (onenand_block_isbad_nolock(mtd, addr, 0)) {
1813 printk (KERN_WARNING "onenand_erase: attempt to erase a bad block at addr 0x%08x\n", (unsigned int) addr); 1813 printk (KERN_WARNING "onenand_erase: attempt to erase a bad block at addr 0x%012llx\n", (unsigned long long) addr);
1814 instr->state = MTD_ERASE_FAILED; 1814 instr->state = MTD_ERASE_FAILED;
1815 goto erase_exit; 1815 goto erase_exit;
1816 } 1816 }
@@ -2029,7 +2029,7 @@ static int onenand_do_lock_cmd(struct mtd_info *mtd, loff_t ofs, size_t len, int
2029 * 2029 *
2030 * Lock one or more blocks 2030 * Lock one or more blocks
2031 */ 2031 */
2032static int onenand_lock(struct mtd_info *mtd, loff_t ofs, size_t len) 2032static int onenand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
2033{ 2033{
2034 int ret; 2034 int ret;
2035 2035
@@ -2047,7 +2047,7 @@ static int onenand_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
2047 * 2047 *
2048 * Unlock one or more blocks 2048 * Unlock one or more blocks
2049 */ 2049 */
2050static int onenand_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) 2050static int onenand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
2051{ 2051{
2052 int ret; 2052 int ret;
2053 2053
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index e538c0a72abb..d2aa9c46530f 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -21,8 +21,6 @@
21 21
22#include <asm/types.h> 22#include <asm/types.h>
23 23
24#define const_cpu_to_le16 __constant_cpu_to_le16
25
26static int block_size = 0; 24static int block_size = 0;
27module_param(block_size, int, 0); 25module_param(block_size, int, 0);
28MODULE_PARM_DESC(block_size, "Block size to use by RFD, defaults to erase unit size"); 26MODULE_PARM_DESC(block_size, "Block size to use by RFD, defaults to erase unit size");
@@ -156,7 +154,7 @@ static int scan_header(struct partition *part)
156 size_t retlen; 154 size_t retlen;
157 155
158 sectors_per_block = part->block_size / SECTOR_SIZE; 156 sectors_per_block = part->block_size / SECTOR_SIZE;
159 part->total_blocks = part->mbd.mtd->size / part->block_size; 157 part->total_blocks = (u32)part->mbd.mtd->size / part->block_size;
160 158
161 if (part->total_blocks < 2) 159 if (part->total_blocks < 2)
162 return -ENOENT; 160 return -ENOENT;
@@ -276,16 +274,17 @@ static void erase_callback(struct erase_info *erase)
276 274
277 part = (struct partition*)erase->priv; 275 part = (struct partition*)erase->priv;
278 276
279 i = erase->addr / part->block_size; 277 i = (u32)erase->addr / part->block_size;
280 if (i >= part->total_blocks || part->blocks[i].offset != erase->addr) { 278 if (i >= part->total_blocks || part->blocks[i].offset != erase->addr ||
281 printk(KERN_ERR PREFIX "erase callback for unknown offset %x " 279 erase->addr > UINT_MAX) {
282 "on '%s'\n", erase->addr, part->mbd.mtd->name); 280 printk(KERN_ERR PREFIX "erase callback for unknown offset %llx "
281 "on '%s'\n", (unsigned long long)erase->addr, part->mbd.mtd->name);
283 return; 282 return;
284 } 283 }
285 284
286 if (erase->state != MTD_ERASE_DONE) { 285 if (erase->state != MTD_ERASE_DONE) {
287 printk(KERN_WARNING PREFIX "erase failed at 0x%x on '%s', " 286 printk(KERN_WARNING PREFIX "erase failed at 0x%llx on '%s', "
288 "state %d\n", erase->addr, 287 "state %d\n", (unsigned long long)erase->addr,
289 part->mbd.mtd->name, erase->state); 288 part->mbd.mtd->name, erase->state);
290 289
291 part->blocks[i].state = BLOCK_FAILED; 290 part->blocks[i].state = BLOCK_FAILED;
@@ -297,7 +296,7 @@ static void erase_callback(struct erase_info *erase)
297 return; 296 return;
298 } 297 }
299 298
300 magic = const_cpu_to_le16(RFD_MAGIC); 299 magic = cpu_to_le16(RFD_MAGIC);
301 300
302 part->blocks[i].state = BLOCK_ERASED; 301 part->blocks[i].state = BLOCK_ERASED;
303 part->blocks[i].free_sectors = part->data_sectors_per_block; 302 part->blocks[i].free_sectors = part->data_sectors_per_block;
@@ -345,9 +344,9 @@ static int erase_block(struct partition *part, int block)
345 rc = part->mbd.mtd->erase(part->mbd.mtd, erase); 344 rc = part->mbd.mtd->erase(part->mbd.mtd, erase);
346 345
347 if (rc) { 346 if (rc) {
348 printk(KERN_ERR PREFIX "erase of region %x,%x on '%s' " 347 printk(KERN_ERR PREFIX "erase of region %llx,%llx on '%s' "
349 "failed\n", erase->addr, erase->len, 348 "failed\n", (unsigned long long)erase->addr,
350 part->mbd.mtd->name); 349 (unsigned long long)erase->len, part->mbd.mtd->name);
351 kfree(erase); 350 kfree(erase);
352 } 351 }
353 352
@@ -587,7 +586,7 @@ static int mark_sector_deleted(struct partition *part, u_long old_addr)
587 int block, offset, rc; 586 int block, offset, rc;
588 u_long addr; 587 u_long addr;
589 size_t retlen; 588 size_t retlen;
590 u16 del = const_cpu_to_le16(SECTOR_DELETED); 589 u16 del = cpu_to_le16(SECTOR_DELETED);
591 590
592 block = old_addr / part->block_size; 591 block = old_addr / part->block_size;
593 offset = (old_addr % part->block_size) / SECTOR_SIZE - 592 offset = (old_addr % part->block_size) / SECTOR_SIZE -
@@ -763,7 +762,7 @@ static void rfd_ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
763{ 762{
764 struct partition *part; 763 struct partition *part;
765 764
766 if (mtd->type != MTD_NORFLASH) 765 if (mtd->type != MTD_NORFLASH || mtd->size > UINT_MAX)
767 return; 766 return;
768 767
769 part = kzalloc(sizeof(struct partition), GFP_KERNEL); 768 part = kzalloc(sizeof(struct partition), GFP_KERNEL);
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index 33a5d6ed6f18..3f67e00d98e0 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -294,7 +294,8 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
294 int cis_sector; 294 int cis_sector;
295 295
296 /* Check for small page NAND flash */ 296 /* Check for small page NAND flash */
297 if (mtd->type != MTD_NANDFLASH || mtd->oobsize != OOB_SIZE) 297 if (mtd->type != MTD_NANDFLASH || mtd->oobsize != OOB_SIZE ||
298 mtd->size > UINT_MAX)
298 return; 299 return;
299 300
300 /* Check for SSDFC format by reading CIS/IDI sector */ 301 /* Check for SSDFC format by reading CIS/IDI sector */
@@ -316,7 +317,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
316 317
317 ssfdc->cis_block = cis_sector / (mtd->erasesize >> SECTOR_SHIFT); 318 ssfdc->cis_block = cis_sector / (mtd->erasesize >> SECTOR_SHIFT);
318 ssfdc->erase_size = mtd->erasesize; 319 ssfdc->erase_size = mtd->erasesize;
319 ssfdc->map_len = mtd->size / mtd->erasesize; 320 ssfdc->map_len = (u32)mtd->size / mtd->erasesize;
320 321
321 DEBUG(MTD_DEBUG_LEVEL1, 322 DEBUG(MTD_DEBUG_LEVEL1,
322 "SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n", 323 "SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n",
@@ -327,7 +328,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
327 ssfdc->heads = 16; 328 ssfdc->heads = 16;
328 ssfdc->sectors = 32; 329 ssfdc->sectors = 32;
329 get_chs(mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors); 330 get_chs(mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors);
330 ssfdc->cylinders = (unsigned short)((mtd->size >> SECTOR_SHIFT) / 331 ssfdc->cylinders = (unsigned short)(((u32)mtd->size >> SECTOR_SHIFT) /
331 ((long)ssfdc->sectors * (long)ssfdc->heads)); 332 ((long)ssfdc->sectors * (long)ssfdc->heads));
332 333
333 DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: using C:%d H:%d S:%d == %ld sects\n", 334 DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: using C:%d H:%d S:%d == %ld sects\n",
diff --git a/drivers/mtd/tests/Makefile b/drivers/mtd/tests/Makefile
new file mode 100644
index 000000000000..c1d501335006
--- /dev/null
+++ b/drivers/mtd/tests/Makefile
@@ -0,0 +1,7 @@
1obj-$(CONFIG_MTD_TESTS) += mtd_oobtest.o
2obj-$(CONFIG_MTD_TESTS) += mtd_pagetest.o
3obj-$(CONFIG_MTD_TESTS) += mtd_readtest.o
4obj-$(CONFIG_MTD_TESTS) += mtd_speedtest.o
5obj-$(CONFIG_MTD_TESTS) += mtd_stresstest.o
6obj-$(CONFIG_MTD_TESTS) += mtd_subpagetest.o
7obj-$(CONFIG_MTD_TESTS) += mtd_torturetest.o
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
new file mode 100644
index 000000000000..afbc3f8126db
--- /dev/null
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -0,0 +1,742 @@
1/*
2 * Copyright (C) 2006-2008 Nokia Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; see the file COPYING. If not, write to the Free Software
15 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Test OOB read and write on MTD device.
18 *
19 * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
20 */
21
22#include <asm/div64.h>
23#include <linux/init.h>
24#include <linux/module.h>
25#include <linux/moduleparam.h>
26#include <linux/err.h>
27#include <linux/mtd/mtd.h>
28#include <linux/sched.h>
29
30#define PRINT_PREF KERN_INFO "mtd_oobtest: "
31
32static int dev;
33module_param(dev, int, S_IRUGO);
34MODULE_PARM_DESC(dev, "MTD device number to use");
35
36static struct mtd_info *mtd;
37static unsigned char *readbuf;
38static unsigned char *writebuf;
39static unsigned char *bbt;
40
41static int ebcnt;
42static int pgcnt;
43static int errcnt;
44static int use_offset;
45static int use_len;
46static int use_len_max;
47static int vary_offset;
48static unsigned long next = 1;
49
50static inline unsigned int simple_rand(void)
51{
52 next = next * 1103515245 + 12345;
53 return (unsigned int)((next / 65536) % 32768);
54}
55
56static inline void simple_srand(unsigned long seed)
57{
58 next = seed;
59}
60
61static void set_random_data(unsigned char *buf, size_t len)
62{
63 size_t i;
64
65 for (i = 0; i < len; ++i)
66 buf[i] = simple_rand();
67}
68
69static int erase_eraseblock(int ebnum)
70{
71 int err;
72 struct erase_info ei;
73 loff_t addr = ebnum * mtd->erasesize;
74
75 memset(&ei, 0, sizeof(struct erase_info));
76 ei.mtd = mtd;
77 ei.addr = addr;
78 ei.len = mtd->erasesize;
79
80 err = mtd->erase(mtd, &ei);
81 if (err) {
82 printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
83 return err;
84 }
85
86 if (ei.state == MTD_ERASE_FAILED) {
87 printk(PRINT_PREF "some erase error occurred at EB %d\n",
88 ebnum);
89 return -EIO;
90 }
91
92 return 0;
93}
94
95static int erase_whole_device(void)
96{
97 int err;
98 unsigned int i;
99
100 printk(PRINT_PREF "erasing whole device\n");
101 for (i = 0; i < ebcnt; ++i) {
102 if (bbt[i])
103 continue;
104 err = erase_eraseblock(i);
105 if (err)
106 return err;
107 cond_resched();
108 }
109 printk(PRINT_PREF "erased %u eraseblocks\n", i);
110 return 0;
111}
112
113static void do_vary_offset(void)
114{
115 use_len -= 1;
116 if (use_len < 1) {
117 use_offset += 1;
118 if (use_offset >= use_len_max)
119 use_offset = 0;
120 use_len = use_len_max - use_offset;
121 }
122}
123
124static int write_eraseblock(int ebnum)
125{
126 int i;
127 struct mtd_oob_ops ops;
128 int err = 0;
129 loff_t addr = ebnum * mtd->erasesize;
130
131 for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
132 set_random_data(writebuf, use_len);
133 ops.mode = MTD_OOB_AUTO;
134 ops.len = 0;
135 ops.retlen = 0;
136 ops.ooblen = use_len;
137 ops.oobretlen = 0;
138 ops.ooboffs = use_offset;
139 ops.datbuf = 0;
140 ops.oobbuf = writebuf;
141 err = mtd->write_oob(mtd, addr, &ops);
142 if (err || ops.oobretlen != use_len) {
143 printk(PRINT_PREF "error: writeoob failed at %#llx\n",
144 (long long)addr);
145 printk(PRINT_PREF "error: use_len %d, use_offset %d\n",
146 use_len, use_offset);
147 errcnt += 1;
148 return err ? err : -1;
149 }
150 if (vary_offset)
151 do_vary_offset();
152 }
153
154 return err;
155}
156
157static int write_whole_device(void)
158{
159 int err;
160 unsigned int i;
161
162 printk(PRINT_PREF "writing OOBs of whole device\n");
163 for (i = 0; i < ebcnt; ++i) {
164 if (bbt[i])
165 continue;
166 err = write_eraseblock(i);
167 if (err)
168 return err;
169 if (i % 256 == 0)
170 printk(PRINT_PREF "written up to eraseblock %u\n", i);
171 cond_resched();
172 }
173 printk(PRINT_PREF "written %u eraseblocks\n", i);
174 return 0;
175}
176
177static int verify_eraseblock(int ebnum)
178{
179 int i;
180 struct mtd_oob_ops ops;
181 int err = 0;
182 loff_t addr = ebnum * mtd->erasesize;
183
184 for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
185 set_random_data(writebuf, use_len);
186 ops.mode = MTD_OOB_AUTO;
187 ops.len = 0;
188 ops.retlen = 0;
189 ops.ooblen = use_len;
190 ops.oobretlen = 0;
191 ops.ooboffs = use_offset;
192 ops.datbuf = 0;
193 ops.oobbuf = readbuf;
194 err = mtd->read_oob(mtd, addr, &ops);
195 if (err || ops.oobretlen != use_len) {
196 printk(PRINT_PREF "error: readoob failed at %#llx\n",
197 (long long)addr);
198 errcnt += 1;
199 return err ? err : -1;
200 }
201 if (memcmp(readbuf, writebuf, use_len)) {
202 printk(PRINT_PREF "error: verify failed at %#llx\n",
203 (long long)addr);
204 errcnt += 1;
205 if (errcnt > 1000) {
206 printk(PRINT_PREF "error: too many errors\n");
207 return -1;
208 }
209 }
210 if (use_offset != 0 || use_len < mtd->ecclayout->oobavail) {
211 int k;
212
213 ops.mode = MTD_OOB_AUTO;
214 ops.len = 0;
215 ops.retlen = 0;
216 ops.ooblen = mtd->ecclayout->oobavail;
217 ops.oobretlen = 0;
218 ops.ooboffs = 0;
219 ops.datbuf = 0;
220 ops.oobbuf = readbuf;
221 err = mtd->read_oob(mtd, addr, &ops);
222 if (err || ops.oobretlen != mtd->ecclayout->oobavail) {
223 printk(PRINT_PREF "error: readoob failed at "
224 "%#llx\n", (long long)addr);
225 errcnt += 1;
226 return err ? err : -1;
227 }
228 if (memcmp(readbuf + use_offset, writebuf, use_len)) {
229 printk(PRINT_PREF "error: verify failed at "
230 "%#llx\n", (long long)addr);
231 errcnt += 1;
232 if (errcnt > 1000) {
233 printk(PRINT_PREF "error: too many "
234 "errors\n");
235 return -1;
236 }
237 }
238 for (k = 0; k < use_offset; ++k)
239 if (readbuf[k] != 0xff) {
240 printk(PRINT_PREF "error: verify 0xff "
241 "failed at %#llx\n",
242 (long long)addr);
243 errcnt += 1;
244 if (errcnt > 1000) {
245 printk(PRINT_PREF "error: too "
246 "many errors\n");
247 return -1;
248 }
249 }
250 for (k = use_offset + use_len;
251 k < mtd->ecclayout->oobavail; ++k)
252 if (readbuf[k] != 0xff) {
253 printk(PRINT_PREF "error: verify 0xff "
254 "failed at %#llx\n",
255 (long long)addr);
256 errcnt += 1;
257 if (errcnt > 1000) {
258 printk(PRINT_PREF "error: too "
259 "many errors\n");
260 return -1;
261 }
262 }
263 }
264 if (vary_offset)
265 do_vary_offset();
266 }
267 return err;
268}
269
270static int verify_eraseblock_in_one_go(int ebnum)
271{
272 struct mtd_oob_ops ops;
273 int err = 0;
274 loff_t addr = ebnum * mtd->erasesize;
275 size_t len = mtd->ecclayout->oobavail * pgcnt;
276
277 set_random_data(writebuf, len);
278 ops.mode = MTD_OOB_AUTO;
279 ops.len = 0;
280 ops.retlen = 0;
281 ops.ooblen = len;
282 ops.oobretlen = 0;
283 ops.ooboffs = 0;
284 ops.datbuf = 0;
285 ops.oobbuf = readbuf;
286 err = mtd->read_oob(mtd, addr, &ops);
287 if (err || ops.oobretlen != len) {
288 printk(PRINT_PREF "error: readoob failed at %#llx\n",
289 (long long)addr);
290 errcnt += 1;
291 return err ? err : -1;
292 }
293 if (memcmp(readbuf, writebuf, len)) {
294 printk(PRINT_PREF "error: verify failed at %#llx\n",
295 (long long)addr);
296 errcnt += 1;
297 if (errcnt > 1000) {
298 printk(PRINT_PREF "error: too many errors\n");
299 return -1;
300 }
301 }
302
303 return err;
304}
305
306static int verify_all_eraseblocks(void)
307{
308 int err;
309 unsigned int i;
310
311 printk(PRINT_PREF "verifying all eraseblocks\n");
312 for (i = 0; i < ebcnt; ++i) {
313 if (bbt[i])
314 continue;
315 err = verify_eraseblock(i);
316 if (err)
317 return err;
318 if (i % 256 == 0)
319 printk(PRINT_PREF "verified up to eraseblock %u\n", i);
320 cond_resched();
321 }
322 printk(PRINT_PREF "verified %u eraseblocks\n", i);
323 return 0;
324}
325
326static int is_block_bad(int ebnum)
327{
328 int ret;
329 loff_t addr = ebnum * mtd->erasesize;
330
331 ret = mtd->block_isbad(mtd, addr);
332 if (ret)
333 printk(PRINT_PREF "block %d is bad\n", ebnum);
334 return ret;
335}
336
337static int scan_for_bad_eraseblocks(void)
338{
339 int i, bad = 0;
340
341 bbt = kmalloc(ebcnt, GFP_KERNEL);
342 if (!bbt) {
343 printk(PRINT_PREF "error: cannot allocate memory\n");
344 return -ENOMEM;
345 }
346 memset(bbt, 0 , ebcnt);
347
348 printk(PRINT_PREF "scanning for bad eraseblocks\n");
349 for (i = 0; i < ebcnt; ++i) {
350 bbt[i] = is_block_bad(i) ? 1 : 0;
351 if (bbt[i])
352 bad += 1;
353 cond_resched();
354 }
355 printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
356 return 0;
357}
358
359static int __init mtd_oobtest_init(void)
360{
361 int err = 0;
362 unsigned int i;
363 uint64_t tmp;
364 struct mtd_oob_ops ops;
365 loff_t addr = 0, addr0;
366
367 printk(KERN_INFO "\n");
368 printk(KERN_INFO "=================================================\n");
369 printk(PRINT_PREF "MTD device: %d\n", dev);
370
371 mtd = get_mtd_device(NULL, dev);
372 if (IS_ERR(mtd)) {
373 err = PTR_ERR(mtd);
374 printk(PRINT_PREF "error: cannot get MTD device\n");
375 return err;
376 }
377
378 if (mtd->type != MTD_NANDFLASH) {
379 printk(PRINT_PREF "this test requires NAND flash\n");
380 goto out;
381 }
382
383 tmp = mtd->size;
384 do_div(tmp, mtd->erasesize);
385 ebcnt = tmp;
386 pgcnt = mtd->erasesize / mtd->writesize;
387
388 printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
389 "page size %u, count of eraseblocks %u, pages per "
390 "eraseblock %u, OOB size %u\n",
391 (unsigned long long)mtd->size, mtd->erasesize,
392 mtd->writesize, ebcnt, pgcnt, mtd->oobsize);
393
394 err = -ENOMEM;
395 mtd->erasesize = mtd->erasesize;
396 readbuf = kmalloc(mtd->erasesize, GFP_KERNEL);
397 if (!readbuf) {
398 printk(PRINT_PREF "error: cannot allocate memory\n");
399 goto out;
400 }
401 writebuf = kmalloc(mtd->erasesize, GFP_KERNEL);
402 if (!writebuf) {
403 printk(PRINT_PREF "error: cannot allocate memory\n");
404 goto out;
405 }
406
407 err = scan_for_bad_eraseblocks();
408 if (err)
409 goto out;
410
411 use_offset = 0;
412 use_len = mtd->ecclayout->oobavail;
413 use_len_max = mtd->ecclayout->oobavail;
414 vary_offset = 0;
415
416 /* First test: write all OOB, read it back and verify */
417 printk(PRINT_PREF "test 1 of 5\n");
418
419 err = erase_whole_device();
420 if (err)
421 goto out;
422
423 simple_srand(1);
424 err = write_whole_device();
425 if (err)
426 goto out;
427
428 simple_srand(1);
429 err = verify_all_eraseblocks();
430 if (err)
431 goto out;
432
433 /*
434 * Second test: write all OOB, a block at a time, read it back and
435 * verify.
436 */
437 printk(PRINT_PREF "test 2 of 5\n");
438
439 err = erase_whole_device();
440 if (err)
441 goto out;
442
443 simple_srand(3);
444 err = write_whole_device();
445 if (err)
446 goto out;
447
448 /* Check all eraseblocks */
449 simple_srand(3);
450 printk(PRINT_PREF "verifying all eraseblocks\n");
451 for (i = 0; i < ebcnt; ++i) {
452 if (bbt[i])
453 continue;
454 err = verify_eraseblock_in_one_go(i);
455 if (err)
456 goto out;
457 if (i % 256 == 0)
458 printk(PRINT_PREF "verified up to eraseblock %u\n", i);
459 cond_resched();
460 }
461 printk(PRINT_PREF "verified %u eraseblocks\n", i);
462
463 /*
464 * Third test: write OOB at varying offsets and lengths, read it back
465 * and verify.
466 */
467 printk(PRINT_PREF "test 3 of 5\n");
468
469 err = erase_whole_device();
470 if (err)
471 goto out;
472
473 /* Write all eraseblocks */
474 use_offset = 0;
475 use_len = mtd->ecclayout->oobavail;
476 use_len_max = mtd->ecclayout->oobavail;
477 vary_offset = 1;
478 simple_srand(5);
479 printk(PRINT_PREF "writing OOBs of whole device\n");
480 for (i = 0; i < ebcnt; ++i) {
481 if (bbt[i])
482 continue;
483 err = write_eraseblock(i);
484 if (err)
485 goto out;
486 if (i % 256 == 0)
487 printk(PRINT_PREF "written up to eraseblock %u\n", i);
488 cond_resched();
489 }
490 printk(PRINT_PREF "written %u eraseblocks\n", i);
491
492 /* Check all eraseblocks */
493 use_offset = 0;
494 use_len = mtd->ecclayout->oobavail;
495 use_len_max = mtd->ecclayout->oobavail;
496 vary_offset = 1;
497 simple_srand(5);
498 err = verify_all_eraseblocks();
499 if (err)
500 goto out;
501
502 use_offset = 0;
503 use_len = mtd->ecclayout->oobavail;
504 use_len_max = mtd->ecclayout->oobavail;
505 vary_offset = 0;
506
507 /* Fourth test: try to write off end of device */
508 printk(PRINT_PREF "test 4 of 5\n");
509
510 err = erase_whole_device();
511 if (err)
512 goto out;
513
514 addr0 = 0;
515 for (i = 0; bbt[i] && i < ebcnt; ++i)
516 addr0 += mtd->erasesize;
517
518 /* Attempt to write off end of OOB */
519 ops.mode = MTD_OOB_AUTO;
520 ops.len = 0;
521 ops.retlen = 0;
522 ops.ooblen = 1;
523 ops.oobretlen = 0;
524 ops.ooboffs = mtd->ecclayout->oobavail;
525 ops.datbuf = 0;
526 ops.oobbuf = writebuf;
527 printk(PRINT_PREF "attempting to start write past end of OOB\n");
528 printk(PRINT_PREF "an error is expected...\n");
529 err = mtd->write_oob(mtd, addr0, &ops);
530 if (err) {
531 printk(PRINT_PREF "error occurred as expected\n");
532 err = 0;
533 } else {
534 printk(PRINT_PREF "error: can write past end of OOB\n");
535 errcnt += 1;
536 }
537
538 /* Attempt to read off end of OOB */
539 ops.mode = MTD_OOB_AUTO;
540 ops.len = 0;
541 ops.retlen = 0;
542 ops.ooblen = 1;
543 ops.oobretlen = 0;
544 ops.ooboffs = mtd->ecclayout->oobavail;
545 ops.datbuf = 0;
546 ops.oobbuf = readbuf;
547 printk(PRINT_PREF "attempting to start read past end of OOB\n");
548 printk(PRINT_PREF "an error is expected...\n");
549 err = mtd->read_oob(mtd, addr0, &ops);
550 if (err) {
551 printk(PRINT_PREF "error occurred as expected\n");
552 err = 0;
553 } else {
554 printk(PRINT_PREF "error: can read past end of OOB\n");
555 errcnt += 1;
556 }
557
558 if (bbt[ebcnt - 1])
559 printk(PRINT_PREF "skipping end of device tests because last "
560 "block is bad\n");
561 else {
562 /* Attempt to write off end of device */
563 ops.mode = MTD_OOB_AUTO;
564 ops.len = 0;
565 ops.retlen = 0;
566 ops.ooblen = mtd->ecclayout->oobavail + 1;
567 ops.oobretlen = 0;
568 ops.ooboffs = 0;
569 ops.datbuf = 0;
570 ops.oobbuf = writebuf;
571 printk(PRINT_PREF "attempting to write past end of device\n");
572 printk(PRINT_PREF "an error is expected...\n");
573 err = mtd->write_oob(mtd, mtd->size - mtd->writesize, &ops);
574 if (err) {
575 printk(PRINT_PREF "error occurred as expected\n");
576 err = 0;
577 } else {
578 printk(PRINT_PREF "error: wrote past end of device\n");
579 errcnt += 1;
580 }
581
582 /* Attempt to read off end of device */
583 ops.mode = MTD_OOB_AUTO;
584 ops.len = 0;
585 ops.retlen = 0;
586 ops.ooblen = mtd->ecclayout->oobavail + 1;
587 ops.oobretlen = 0;
588 ops.ooboffs = 0;
589 ops.datbuf = 0;
590 ops.oobbuf = readbuf;
591 printk(PRINT_PREF "attempting to read past end of device\n");
592 printk(PRINT_PREF "an error is expected...\n");
593 err = mtd->read_oob(mtd, mtd->size - mtd->writesize, &ops);
594 if (err) {
595 printk(PRINT_PREF "error occurred as expected\n");
596 err = 0;
597 } else {
598 printk(PRINT_PREF "error: read past end of device\n");
599 errcnt += 1;
600 }
601
602 err = erase_eraseblock(ebcnt - 1);
603 if (err)
604 goto out;
605
606 /* Attempt to write off end of device */
607 ops.mode = MTD_OOB_AUTO;
608 ops.len = 0;
609 ops.retlen = 0;
610 ops.ooblen = mtd->ecclayout->oobavail;
611 ops.oobretlen = 0;
612 ops.ooboffs = 1;
613 ops.datbuf = 0;
614 ops.oobbuf = writebuf;
615 printk(PRINT_PREF "attempting to write past end of device\n");
616 printk(PRINT_PREF "an error is expected...\n");
617 err = mtd->write_oob(mtd, mtd->size - mtd->writesize, &ops);
618 if (err) {
619 printk(PRINT_PREF "error occurred as expected\n");
620 err = 0;
621 } else {
622 printk(PRINT_PREF "error: wrote past end of device\n");
623 errcnt += 1;
624 }
625
626 /* Attempt to read off end of device */
627 ops.mode = MTD_OOB_AUTO;
628 ops.len = 0;
629 ops.retlen = 0;
630 ops.ooblen = mtd->ecclayout->oobavail;
631 ops.oobretlen = 0;
632 ops.ooboffs = 1;
633 ops.datbuf = 0;
634 ops.oobbuf = readbuf;
635 printk(PRINT_PREF "attempting to read past end of device\n");
636 printk(PRINT_PREF "an error is expected...\n");
637 err = mtd->read_oob(mtd, mtd->size - mtd->writesize, &ops);
638 if (err) {
639 printk(PRINT_PREF "error occurred as expected\n");
640 err = 0;
641 } else {
642 printk(PRINT_PREF "error: read past end of device\n");
643 errcnt += 1;
644 }
645 }
646
647 /* Fifth test: write / read across block boundaries */
648 printk(PRINT_PREF "test 5 of 5\n");
649
650 /* Erase all eraseblocks */
651 err = erase_whole_device();
652 if (err)
653 goto out;
654
655 /* Write all eraseblocks */
656 simple_srand(11);
657 printk(PRINT_PREF "writing OOBs of whole device\n");
658 for (i = 0; i < ebcnt - 1; ++i) {
659 int cnt = 2;
660 int pg;
661 size_t sz = mtd->ecclayout->oobavail;
662 if (bbt[i] || bbt[i + 1])
663 continue;
664 addr = (i + 1) * mtd->erasesize - mtd->writesize;
665 for (pg = 0; pg < cnt; ++pg) {
666 set_random_data(writebuf, sz);
667 ops.mode = MTD_OOB_AUTO;
668 ops.len = 0;
669 ops.retlen = 0;
670 ops.ooblen = sz;
671 ops.oobretlen = 0;
672 ops.ooboffs = 0;
673 ops.datbuf = 0;
674 ops.oobbuf = writebuf;
675 err = mtd->write_oob(mtd, addr, &ops);
676 if (err)
677 goto out;
678 if (i % 256 == 0)
679 printk(PRINT_PREF "written up to eraseblock "
680 "%u\n", i);
681 cond_resched();
682 addr += mtd->writesize;
683 }
684 }
685 printk(PRINT_PREF "written %u eraseblocks\n", i);
686
687 /* Check all eraseblocks */
688 simple_srand(11);
689 printk(PRINT_PREF "verifying all eraseblocks\n");
690 for (i = 0; i < ebcnt - 1; ++i) {
691 if (bbt[i] || bbt[i + 1])
692 continue;
693 set_random_data(writebuf, mtd->ecclayout->oobavail * 2);
694 addr = (i + 1) * mtd->erasesize - mtd->writesize;
695 ops.mode = MTD_OOB_AUTO;
696 ops.len = 0;
697 ops.retlen = 0;
698 ops.ooblen = mtd->ecclayout->oobavail * 2;
699 ops.oobretlen = 0;
700 ops.ooboffs = 0;
701 ops.datbuf = 0;
702 ops.oobbuf = readbuf;
703 err = mtd->read_oob(mtd, addr, &ops);
704 if (err)
705 goto out;
706 if (memcmp(readbuf, writebuf, mtd->ecclayout->oobavail * 2)) {
707 printk(PRINT_PREF "error: verify failed at %#llx\n",
708 (long long)addr);
709 errcnt += 1;
710 if (errcnt > 1000) {
711 printk(PRINT_PREF "error: too many errors\n");
712 goto out;
713 }
714 }
715 if (i % 256 == 0)
716 printk(PRINT_PREF "verified up to eraseblock %u\n", i);
717 cond_resched();
718 }
719 printk(PRINT_PREF "verified %u eraseblocks\n", i);
720
721 printk(PRINT_PREF "finished with %d errors\n", errcnt);
722out:
723 kfree(bbt);
724 kfree(writebuf);
725 kfree(readbuf);
726 put_mtd_device(mtd);
727 if (err)
728 printk(PRINT_PREF "error %d occurred\n", err);
729 printk(KERN_INFO "=================================================\n");
730 return err;
731}
732module_init(mtd_oobtest_init);
733
734static void __exit mtd_oobtest_exit(void)
735{
736 return;
737}
738module_exit(mtd_oobtest_exit);
739
740MODULE_DESCRIPTION("Out-of-band test module");
741MODULE_AUTHOR("Adrian Hunter");
742MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
new file mode 100644
index 000000000000..9648818b9e2c
--- /dev/null
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -0,0 +1,632 @@
1/*
2 * Copyright (C) 2006-2008 Nokia Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; see the file COPYING. If not, write to the Free Software
15 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Test page read and write on MTD device.
18 *
19 * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
20 */
21
22#include <asm/div64.h>
23#include <linux/init.h>
24#include <linux/module.h>
25#include <linux/moduleparam.h>
26#include <linux/err.h>
27#include <linux/mtd/mtd.h>
28#include <linux/sched.h>
29
30#define PRINT_PREF KERN_INFO "mtd_pagetest: "
31
32static int dev;
33module_param(dev, int, S_IRUGO);
34MODULE_PARM_DESC(dev, "MTD device number to use");
35
36static struct mtd_info *mtd;
37static unsigned char *twopages;
38static unsigned char *writebuf;
39static unsigned char *boundary;
40static unsigned char *bbt;
41
42static int pgsize;
43static int bufsize;
44static int ebcnt;
45static int pgcnt;
46static int errcnt;
47static unsigned long next = 1;
48
49static inline unsigned int simple_rand(void)
50{
51 next = next * 1103515245 + 12345;
52 return (unsigned int)((next / 65536) % 32768);
53}
54
55static inline void simple_srand(unsigned long seed)
56{
57 next = seed;
58}
59
60static void set_random_data(unsigned char *buf, size_t len)
61{
62 size_t i;
63
64 for (i = 0; i < len; ++i)
65 buf[i] = simple_rand();
66}
67
68static int erase_eraseblock(int ebnum)
69{
70 int err;
71 struct erase_info ei;
72 loff_t addr = ebnum * mtd->erasesize;
73
74 memset(&ei, 0, sizeof(struct erase_info));
75 ei.mtd = mtd;
76 ei.addr = addr;
77 ei.len = mtd->erasesize;
78
79 err = mtd->erase(mtd, &ei);
80 if (err) {
81 printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
82 return err;
83 }
84
85 if (ei.state == MTD_ERASE_FAILED) {
86 printk(PRINT_PREF "some erase error occurred at EB %d\n",
87 ebnum);
88 return -EIO;
89 }
90
91 return 0;
92}
93
94static int write_eraseblock(int ebnum)
95{
96 int err = 0;
97 size_t written = 0;
98 loff_t addr = ebnum * mtd->erasesize;
99
100 set_random_data(writebuf, mtd->erasesize);
101 cond_resched();
102 err = mtd->write(mtd, addr, mtd->erasesize, &written, writebuf);
103 if (err || written != mtd->erasesize)
104 printk(PRINT_PREF "error: write failed at %#llx\n",
105 (long long)addr);
106
107 return err;
108}
109
110static int verify_eraseblock(int ebnum)
111{
112 uint32_t j;
113 size_t read = 0;
114 int err = 0, i;
115 loff_t addr0, addrn;
116 loff_t addr = ebnum * mtd->erasesize;
117
118 addr0 = 0;
119 for (i = 0; bbt[i] && i < ebcnt; ++i)
120 addr0 += mtd->erasesize;
121
122 addrn = mtd->size;
123 for (i = 0; bbt[ebcnt - i - 1] && i < ebcnt; ++i)
124 addrn -= mtd->erasesize;
125
126 set_random_data(writebuf, mtd->erasesize);
127 for (j = 0; j < pgcnt - 1; ++j, addr += pgsize) {
128 /* Do a read to set the internal dataRAMs to different data */
129 err = mtd->read(mtd, addr0, bufsize, &read, twopages);
130 if (err == -EUCLEAN)
131 err = 0;
132 if (err || read != bufsize) {
133 printk(PRINT_PREF "error: read failed at %#llx\n",
134 (long long)addr0);
135 return err;
136 }
137 err = mtd->read(mtd, addrn - bufsize, bufsize, &read, twopages);
138 if (err == -EUCLEAN)
139 err = 0;
140 if (err || read != bufsize) {
141 printk(PRINT_PREF "error: read failed at %#llx\n",
142 (long long)(addrn - bufsize));
143 return err;
144 }
145 memset(twopages, 0, bufsize);
146 read = 0;
147 err = mtd->read(mtd, addr, bufsize, &read, twopages);
148 if (err == -EUCLEAN)
149 err = 0;
150 if (err || read != bufsize) {
151 printk(PRINT_PREF "error: read failed at %#llx\n",
152 (long long)addr);
153 break;
154 }
155 if (memcmp(twopages, writebuf + (j * pgsize), bufsize)) {
156 printk(PRINT_PREF "error: verify failed at %#llx\n",
157 (long long)addr);
158 errcnt += 1;
159 }
160 }
161 /* Check boundary between eraseblocks */
162 if (addr <= addrn - pgsize - pgsize && !bbt[ebnum + 1]) {
163 unsigned long oldnext = next;
164 /* Do a read to set the internal dataRAMs to different data */
165 err = mtd->read(mtd, addr0, bufsize, &read, twopages);
166 if (err == -EUCLEAN)
167 err = 0;
168 if (err || read != bufsize) {
169 printk(PRINT_PREF "error: read failed at %#llx\n",
170 (long long)addr0);
171 return err;
172 }
173 err = mtd->read(mtd, addrn - bufsize, bufsize, &read, twopages);
174 if (err == -EUCLEAN)
175 err = 0;
176 if (err || read != bufsize) {
177 printk(PRINT_PREF "error: read failed at %#llx\n",
178 (long long)(addrn - bufsize));
179 return err;
180 }
181 memset(twopages, 0, bufsize);
182 read = 0;
183 err = mtd->read(mtd, addr, bufsize, &read, twopages);
184 if (err == -EUCLEAN)
185 err = 0;
186 if (err || read != bufsize) {
187 printk(PRINT_PREF "error: read failed at %#llx\n",
188 (long long)addr);
189 return err;
190 }
191 memcpy(boundary, writebuf + mtd->erasesize - pgsize, pgsize);
192 set_random_data(boundary + pgsize, pgsize);
193 if (memcmp(twopages, boundary, bufsize)) {
194 printk(PRINT_PREF "error: verify failed at %#llx\n",
195 (long long)addr);
196 errcnt += 1;
197 }
198 next = oldnext;
199 }
200 return err;
201}
202
203static int crosstest(void)
204{
205 size_t read = 0;
206 int err = 0, i;
207 loff_t addr, addr0, addrn;
208 unsigned char *pp1, *pp2, *pp3, *pp4;
209
210 printk(PRINT_PREF "crosstest\n");
211 pp1 = kmalloc(pgsize * 4, GFP_KERNEL);
212 if (!pp1) {
213 printk(PRINT_PREF "error: cannot allocate memory\n");
214 return -ENOMEM;
215 }
216 pp2 = pp1 + pgsize;
217 pp3 = pp2 + pgsize;
218 pp4 = pp3 + pgsize;
219 memset(pp1, 0, pgsize * 4);
220
221 addr0 = 0;
222 for (i = 0; bbt[i] && i < ebcnt; ++i)
223 addr0 += mtd->erasesize;
224
225 addrn = mtd->size;
226 for (i = 0; bbt[ebcnt - i - 1] && i < ebcnt; ++i)
227 addrn -= mtd->erasesize;
228
229 /* Read 2nd-to-last page to pp1 */
230 read = 0;
231 addr = addrn - pgsize - pgsize;
232 err = mtd->read(mtd, addr, pgsize, &read, pp1);
233 if (err == -EUCLEAN)
234 err = 0;
235 if (err || read != pgsize) {
236 printk(PRINT_PREF "error: read failed at %#llx\n",
237 (long long)addr);
238 kfree(pp1);
239 return err;
240 }
241
242 /* Read 3rd-to-last page to pp1 */
243 read = 0;
244 addr = addrn - pgsize - pgsize - pgsize;
245 err = mtd->read(mtd, addr, pgsize, &read, pp1);
246 if (err == -EUCLEAN)
247 err = 0;
248 if (err || read != pgsize) {
249 printk(PRINT_PREF "error: read failed at %#llx\n",
250 (long long)addr);
251 kfree(pp1);
252 return err;
253 }
254
255 /* Read first page to pp2 */
256 read = 0;
257 addr = addr0;
258 printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
259 err = mtd->read(mtd, addr, pgsize, &read, pp2);
260 if (err == -EUCLEAN)
261 err = 0;
262 if (err || read != pgsize) {
263 printk(PRINT_PREF "error: read failed at %#llx\n",
264 (long long)addr);
265 kfree(pp1);
266 return err;
267 }
268
269 /* Read last page to pp3 */
270 read = 0;
271 addr = addrn - pgsize;
272 printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
273 err = mtd->read(mtd, addr, pgsize, &read, pp3);
274 if (err == -EUCLEAN)
275 err = 0;
276 if (err || read != pgsize) {
277 printk(PRINT_PREF "error: read failed at %#llx\n",
278 (long long)addr);
279 kfree(pp1);
280 return err;
281 }
282
283 /* Read first page again to pp4 */
284 read = 0;
285 addr = addr0;
286 printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
287 err = mtd->read(mtd, addr, pgsize, &read, pp4);
288 if (err == -EUCLEAN)
289 err = 0;
290 if (err || read != pgsize) {
291 printk(PRINT_PREF "error: read failed at %#llx\n",
292 (long long)addr);
293 kfree(pp1);
294 return err;
295 }
296
297 /* pp2 and pp4 should be the same */
298 printk(PRINT_PREF "verifying pages read at %#llx match\n",
299 (long long)addr0);
300 if (memcmp(pp2, pp4, pgsize)) {
301 printk(PRINT_PREF "verify failed!\n");
302 errcnt += 1;
303 } else if (!err)
304 printk(PRINT_PREF "crosstest ok\n");
305 kfree(pp1);
306 return err;
307}
308
309static int erasecrosstest(void)
310{
311 size_t read = 0, written = 0;
312 int err = 0, i, ebnum, ok = 1, ebnum2;
313 loff_t addr0;
314 char *readbuf = twopages;
315
316 printk(PRINT_PREF "erasecrosstest\n");
317
318 ebnum = 0;
319 addr0 = 0;
320 for (i = 0; bbt[i] && i < ebcnt; ++i) {
321 addr0 += mtd->erasesize;
322 ebnum += 1;
323 }
324
325 ebnum2 = ebcnt - 1;
326 while (ebnum2 && bbt[ebnum2])
327 ebnum2 -= 1;
328
329 printk(PRINT_PREF "erasing block %d\n", ebnum);
330 err = erase_eraseblock(ebnum);
331 if (err)
332 return err;
333
334 printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
335 set_random_data(writebuf, pgsize);
336 strcpy(writebuf, "There is no data like this!");
337 err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
338 if (err || written != pgsize) {
339 printk(PRINT_PREF "error: write failed at %#llx\n",
340 (long long)addr0);
341 return err ? err : -1;
342 }
343
344 printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
345 memset(readbuf, 0, pgsize);
346 err = mtd->read(mtd, addr0, pgsize, &read, readbuf);
347 if (err == -EUCLEAN)
348 err = 0;
349 if (err || read != pgsize) {
350 printk(PRINT_PREF "error: read failed at %#llx\n",
351 (long long)addr0);
352 return err ? err : -1;
353 }
354
355 printk(PRINT_PREF "verifying 1st page of block %d\n", ebnum);
356 if (memcmp(writebuf, readbuf, pgsize)) {
357 printk(PRINT_PREF "verify failed!\n");
358 errcnt += 1;
359 ok = 0;
360 return err;
361 }
362
363 printk(PRINT_PREF "erasing block %d\n", ebnum);
364 err = erase_eraseblock(ebnum);
365 if (err)
366 return err;
367
368 printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
369 set_random_data(writebuf, pgsize);
370 strcpy(writebuf, "There is no data like this!");
371 err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
372 if (err || written != pgsize) {
373 printk(PRINT_PREF "error: write failed at %#llx\n",
374 (long long)addr0);
375 return err ? err : -1;
376 }
377
378 printk(PRINT_PREF "erasing block %d\n", ebnum2);
379 err = erase_eraseblock(ebnum2);
380 if (err)
381 return err;
382
383 printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
384 memset(readbuf, 0, pgsize);
385 err = mtd->read(mtd, addr0, pgsize, &read, readbuf);
386 if (err == -EUCLEAN)
387 err = 0;
388 if (err || read != pgsize) {
389 printk(PRINT_PREF "error: read failed at %#llx\n",
390 (long long)addr0);
391 return err ? err : -1;
392 }
393
394 printk(PRINT_PREF "verifying 1st page of block %d\n", ebnum);
395 if (memcmp(writebuf, readbuf, pgsize)) {
396 printk(PRINT_PREF "verify failed!\n");
397 errcnt += 1;
398 ok = 0;
399 }
400
401 if (ok && !err)
402 printk(PRINT_PREF "erasecrosstest ok\n");
403 return err;
404}
405
406static int erasetest(void)
407{
408 size_t read = 0, written = 0;
409 int err = 0, i, ebnum, ok = 1;
410 loff_t addr0;
411
412 printk(PRINT_PREF "erasetest\n");
413
414 ebnum = 0;
415 addr0 = 0;
416 for (i = 0; bbt[i] && i < ebcnt; ++i) {
417 addr0 += mtd->erasesize;
418 ebnum += 1;
419 }
420
421 printk(PRINT_PREF "erasing block %d\n", ebnum);
422 err = erase_eraseblock(ebnum);
423 if (err)
424 return err;
425
426 printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
427 set_random_data(writebuf, pgsize);
428 err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
429 if (err || written != pgsize) {
430 printk(PRINT_PREF "error: write failed at %#llx\n",
431 (long long)addr0);
432 return err ? err : -1;
433 }
434
435 printk(PRINT_PREF "erasing block %d\n", ebnum);
436 err = erase_eraseblock(ebnum);
437 if (err)
438 return err;
439
440 printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
441 err = mtd->read(mtd, addr0, pgsize, &read, twopages);
442 if (err == -EUCLEAN)
443 err = 0;
444 if (err || read != pgsize) {
445 printk(PRINT_PREF "error: read failed at %#llx\n",
446 (long long)addr0);
447 return err ? err : -1;
448 }
449
450 printk(PRINT_PREF "verifying 1st page of block %d is all 0xff\n",
451 ebnum);
452 for (i = 0; i < pgsize; ++i)
453 if (twopages[i] != 0xff) {
454 printk(PRINT_PREF "verifying all 0xff failed at %d\n",
455 i);
456 errcnt += 1;
457 ok = 0;
458 break;
459 }
460
461 if (ok && !err)
462 printk(PRINT_PREF "erasetest ok\n");
463
464 return err;
465}
466
467static int is_block_bad(int ebnum)
468{
469 loff_t addr = ebnum * mtd->erasesize;
470 int ret;
471
472 ret = mtd->block_isbad(mtd, addr);
473 if (ret)
474 printk(PRINT_PREF "block %d is bad\n", ebnum);
475 return ret;
476}
477
478static int scan_for_bad_eraseblocks(void)
479{
480 int i, bad = 0;
481
482 bbt = kmalloc(ebcnt, GFP_KERNEL);
483 if (!bbt) {
484 printk(PRINT_PREF "error: cannot allocate memory\n");
485 return -ENOMEM;
486 }
487 memset(bbt, 0 , ebcnt);
488
489 printk(PRINT_PREF "scanning for bad eraseblocks\n");
490 for (i = 0; i < ebcnt; ++i) {
491 bbt[i] = is_block_bad(i) ? 1 : 0;
492 if (bbt[i])
493 bad += 1;
494 cond_resched();
495 }
496 printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
497 return 0;
498}
499
500static int __init mtd_pagetest_init(void)
501{
502 int err = 0;
503 uint64_t tmp;
504 uint32_t i;
505
506 printk(KERN_INFO "\n");
507 printk(KERN_INFO "=================================================\n");
508 printk(PRINT_PREF "MTD device: %d\n", dev);
509
510 mtd = get_mtd_device(NULL, dev);
511 if (IS_ERR(mtd)) {
512 err = PTR_ERR(mtd);
513 printk(PRINT_PREF "error: cannot get MTD device\n");
514 return err;
515 }
516
517 if (mtd->type != MTD_NANDFLASH) {
518 printk(PRINT_PREF "this test requires NAND flash\n");
519 goto out;
520 }
521
522 tmp = mtd->size;
523 do_div(tmp, mtd->erasesize);
524 ebcnt = tmp;
525 pgcnt = mtd->erasesize / mtd->writesize;
526
527 printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
528 "page size %u, count of eraseblocks %u, pages per "
529 "eraseblock %u, OOB size %u\n",
530 (unsigned long long)mtd->size, mtd->erasesize,
531 pgsize, ebcnt, pgcnt, mtd->oobsize);
532
533 err = -ENOMEM;
534 bufsize = pgsize * 2;
535 writebuf = kmalloc(mtd->erasesize, GFP_KERNEL);
536 if (!writebuf) {
537 printk(PRINT_PREF "error: cannot allocate memory\n");
538 goto out;
539 }
540 twopages = kmalloc(bufsize, GFP_KERNEL);
541 if (!twopages) {
542 printk(PRINT_PREF "error: cannot allocate memory\n");
543 goto out;
544 }
545 boundary = kmalloc(bufsize, GFP_KERNEL);
546 if (!boundary) {
547 printk(PRINT_PREF "error: cannot allocate memory\n");
548 goto out;
549 }
550
551 err = scan_for_bad_eraseblocks();
552 if (err)
553 goto out;
554
555 /* Erase all eraseblocks */
556 printk(PRINT_PREF "erasing whole device\n");
557 for (i = 0; i < ebcnt; ++i) {
558 if (bbt[i])
559 continue;
560 err = erase_eraseblock(i);
561 if (err)
562 goto out;
563 cond_resched();
564 }
565 printk(PRINT_PREF "erased %u eraseblocks\n", i);
566
567 /* Write all eraseblocks */
568 simple_srand(1);
569 printk(PRINT_PREF "writing whole device\n");
570 for (i = 0; i < ebcnt; ++i) {
571 if (bbt[i])
572 continue;
573 err = write_eraseblock(i);
574 if (err)
575 goto out;
576 if (i % 256 == 0)
577 printk(PRINT_PREF "written up to eraseblock %u\n", i);
578 cond_resched();
579 }
580 printk(PRINT_PREF "written %u eraseblocks\n", i);
581
582 /* Check all eraseblocks */
583 simple_srand(1);
584 printk(PRINT_PREF "verifying all eraseblocks\n");
585 for (i = 0; i < ebcnt; ++i) {
586 if (bbt[i])
587 continue;
588 err = verify_eraseblock(i);
589 if (err)
590 goto out;
591 if (i % 256 == 0)
592 printk(PRINT_PREF "verified up to eraseblock %u\n", i);
593 cond_resched();
594 }
595 printk(PRINT_PREF "verified %u eraseblocks\n", i);
596
597 err = crosstest();
598 if (err)
599 goto out;
600
601 err = erasecrosstest();
602 if (err)
603 goto out;
604
605 err = erasetest();
606 if (err)
607 goto out;
608
609 printk(PRINT_PREF "finished with %d errors\n", errcnt);
610out:
611
612 kfree(bbt);
613 kfree(boundary);
614 kfree(twopages);
615 kfree(writebuf);
616 put_mtd_device(mtd);
617 if (err)
618 printk(PRINT_PREF "error %d occurred\n", err);
619 printk(KERN_INFO "=================================================\n");
620 return err;
621}
622module_init(mtd_pagetest_init);
623
624static void __exit mtd_pagetest_exit(void)
625{
626 return;
627}
628module_exit(mtd_pagetest_exit);
629
630MODULE_DESCRIPTION("NAND page test");
631MODULE_AUTHOR("Adrian Hunter");
632MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
new file mode 100644
index 000000000000..645e77fdc63d
--- /dev/null
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -0,0 +1,253 @@
1/*
2 * Copyright (C) 2006-2008 Nokia Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; see the file COPYING. If not, write to the Free Software
15 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Check MTD device read.
18 *
19 * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
20 */
21
22#include <linux/init.h>
23#include <linux/module.h>
24#include <linux/moduleparam.h>
25#include <linux/err.h>
26#include <linux/mtd/mtd.h>
27#include <linux/sched.h>
28
29#define PRINT_PREF KERN_INFO "mtd_readtest: "
30
31static int dev;
32module_param(dev, int, S_IRUGO);
33MODULE_PARM_DESC(dev, "MTD device number to use");
34
35static struct mtd_info *mtd;
36static unsigned char *iobuf;
37static unsigned char *iobuf1;
38static unsigned char *bbt;
39
40static int pgsize;
41static int ebcnt;
42static int pgcnt;
43
44static int read_eraseblock_by_page(int ebnum)
45{
46 size_t read = 0;
47 int i, ret, err = 0;
48 loff_t addr = ebnum * mtd->erasesize;
49 void *buf = iobuf;
50 void *oobbuf = iobuf1;
51
52 for (i = 0; i < pgcnt; i++) {
53 memset(buf, 0 , pgcnt);
54 ret = mtd->read(mtd, addr, pgsize, &read, buf);
55 if (ret == -EUCLEAN)
56 ret = 0;
57 if (ret || read != pgsize) {
58 printk(PRINT_PREF "error: read failed at %#llx\n",
59 (long long)addr);
60 if (!err)
61 err = ret;
62 if (!err)
63 err = -EINVAL;
64 }
65 if (mtd->oobsize) {
66 struct mtd_oob_ops ops;
67
68 ops.mode = MTD_OOB_PLACE;
69 ops.len = 0;
70 ops.retlen = 0;
71 ops.ooblen = mtd->oobsize;
72 ops.oobretlen = 0;
73 ops.ooboffs = 0;
74 ops.datbuf = 0;
75 ops.oobbuf = oobbuf;
76 ret = mtd->read_oob(mtd, addr, &ops);
77 if (ret || ops.oobretlen != mtd->oobsize) {
78 printk(PRINT_PREF "error: read oob failed at "
79 "%#llx\n", (long long)addr);
80 if (!err)
81 err = ret;
82 if (!err)
83 err = -EINVAL;
84 }
85 oobbuf += mtd->oobsize;
86 }
87 addr += pgsize;
88 buf += pgsize;
89 }
90
91 return err;
92}
93
94static void dump_eraseblock(int ebnum)
95{
96 int i, j, n;
97 char line[128];
98 int pg, oob;
99
100 printk(PRINT_PREF "dumping eraseblock %d\n", ebnum);
101 n = mtd->erasesize;
102 for (i = 0; i < n;) {
103 char *p = line;
104
105 p += sprintf(p, "%05x: ", i);
106 for (j = 0; j < 32 && i < n; j++, i++)
107 p += sprintf(p, "%02x", (unsigned int)iobuf[i]);
108 printk(KERN_CRIT "%s\n", line);
109 cond_resched();
110 }
111 if (!mtd->oobsize)
112 return;
113 printk(PRINT_PREF "dumping oob from eraseblock %d\n", ebnum);
114 n = mtd->oobsize;
115 for (pg = 0, i = 0; pg < pgcnt; pg++)
116 for (oob = 0; oob < n;) {
117 char *p = line;
118
119 p += sprintf(p, "%05x: ", i);
120 for (j = 0; j < 32 && oob < n; j++, oob++, i++)
121 p += sprintf(p, "%02x",
122 (unsigned int)iobuf1[i]);
123 printk(KERN_CRIT "%s\n", line);
124 cond_resched();
125 }
126}
127
128static int is_block_bad(int ebnum)
129{
130 loff_t addr = ebnum * mtd->erasesize;
131 int ret;
132
133 ret = mtd->block_isbad(mtd, addr);
134 if (ret)
135 printk(PRINT_PREF "block %d is bad\n", ebnum);
136 return ret;
137}
138
139static int scan_for_bad_eraseblocks(void)
140{
141 int i, bad = 0;
142
143 bbt = kmalloc(ebcnt, GFP_KERNEL);
144 if (!bbt) {
145 printk(PRINT_PREF "error: cannot allocate memory\n");
146 return -ENOMEM;
147 }
148 memset(bbt, 0 , ebcnt);
149
150 printk(PRINT_PREF "scanning for bad eraseblocks\n");
151 for (i = 0; i < ebcnt; ++i) {
152 bbt[i] = is_block_bad(i) ? 1 : 0;
153 if (bbt[i])
154 bad += 1;
155 cond_resched();
156 }
157 printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
158 return 0;
159}
160
161static int __init mtd_readtest_init(void)
162{
163 uint64_t tmp;
164 int err, i;
165
166 printk(KERN_INFO "\n");
167 printk(KERN_INFO "=================================================\n");
168 printk(PRINT_PREF "MTD device: %d\n", dev);
169
170 mtd = get_mtd_device(NULL, dev);
171 if (IS_ERR(mtd)) {
172 err = PTR_ERR(mtd);
173 printk(PRINT_PREF "error: Cannot get MTD device\n");
174 return err;
175 }
176
177 if (mtd->writesize == 1) {
178 printk(PRINT_PREF "not NAND flash, assume page size is 512 "
179 "bytes.\n");
180 pgsize = 512;
181 } else
182 pgsize = mtd->writesize;
183
184 tmp = mtd->size;
185 do_div(tmp, mtd->erasesize);
186 ebcnt = tmp;
187 pgcnt = mtd->erasesize / mtd->writesize;
188
189 printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
190 "page size %u, count of eraseblocks %u, pages per "
191 "eraseblock %u, OOB size %u\n",
192 (unsigned long long)mtd->size, mtd->erasesize,
193 pgsize, ebcnt, pgcnt, mtd->oobsize);
194
195 err = -ENOMEM;
196 iobuf = kmalloc(mtd->erasesize, GFP_KERNEL);
197 if (!iobuf) {
198 printk(PRINT_PREF "error: cannot allocate memory\n");
199 goto out;
200 }
201 iobuf1 = kmalloc(mtd->erasesize, GFP_KERNEL);
202 if (!iobuf1) {
203 printk(PRINT_PREF "error: cannot allocate memory\n");
204 goto out;
205 }
206
207 err = scan_for_bad_eraseblocks();
208 if (err)
209 goto out;
210
211 /* Read all eraseblocks 1 page at a time */
212 printk(PRINT_PREF "testing page read\n");
213 for (i = 0; i < ebcnt; ++i) {
214 int ret;
215
216 if (bbt[i])
217 continue;
218 ret = read_eraseblock_by_page(i);
219 if (ret) {
220 dump_eraseblock(i);
221 if (!err)
222 err = ret;
223 }
224 cond_resched();
225 }
226
227 if (err)
228 printk(PRINT_PREF "finished with errors\n");
229 else
230 printk(PRINT_PREF "finished\n");
231
232out:
233
234 kfree(iobuf);
235 kfree(iobuf1);
236 kfree(bbt);
237 put_mtd_device(mtd);
238 if (err)
239 printk(PRINT_PREF "error %d occurred\n", err);
240 printk(KERN_INFO "=================================================\n");
241 return err;
242}
243module_init(mtd_readtest_init);
244
245static void __exit mtd_readtest_exit(void)
246{
247 return;
248}
249module_exit(mtd_readtest_exit);
250
251MODULE_DESCRIPTION("Read test module");
252MODULE_AUTHOR("Adrian Hunter");
253MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
new file mode 100644
index 000000000000..141363a7e805
--- /dev/null
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -0,0 +1,502 @@
1/*
2 * Copyright (C) 2007 Nokia Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; see the file COPYING. If not, write to the Free Software
15 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Test read and write speed of a MTD device.
18 *
19 * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
20 */
21
22#include <linux/init.h>
23#include <linux/module.h>
24#include <linux/moduleparam.h>
25#include <linux/err.h>
26#include <linux/mtd/mtd.h>
27#include <linux/sched.h>
28
29#define PRINT_PREF KERN_INFO "mtd_speedtest: "
30
31static int dev;
32module_param(dev, int, S_IRUGO);
33MODULE_PARM_DESC(dev, "MTD device number to use");
34
35static struct mtd_info *mtd;
36static unsigned char *iobuf;
37static unsigned char *bbt;
38
39static int pgsize;
40static int ebcnt;
41static int pgcnt;
42static int goodebcnt;
43static struct timeval start, finish;
44static unsigned long next = 1;
45
46static inline unsigned int simple_rand(void)
47{
48 next = next * 1103515245 + 12345;
49 return (unsigned int)((next / 65536) % 32768);
50}
51
52static inline void simple_srand(unsigned long seed)
53{
54 next = seed;
55}
56
57static void set_random_data(unsigned char *buf, size_t len)
58{
59 size_t i;
60
61 for (i = 0; i < len; ++i)
62 buf[i] = simple_rand();
63}
64
65static int erase_eraseblock(int ebnum)
66{
67 int err;
68 struct erase_info ei;
69 loff_t addr = ebnum * mtd->erasesize;
70
71 memset(&ei, 0, sizeof(struct erase_info));
72 ei.mtd = mtd;
73 ei.addr = addr;
74 ei.len = mtd->erasesize;
75
76 err = mtd->erase(mtd, &ei);
77 if (err) {
78 printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
79 return err;
80 }
81
82 if (ei.state == MTD_ERASE_FAILED) {
83 printk(PRINT_PREF "some erase error occurred at EB %d\n",
84 ebnum);
85 return -EIO;
86 }
87
88 return 0;
89}
90
91static int erase_whole_device(void)
92{
93 int err;
94 unsigned int i;
95
96 for (i = 0; i < ebcnt; ++i) {
97 if (bbt[i])
98 continue;
99 err = erase_eraseblock(i);
100 if (err)
101 return err;
102 cond_resched();
103 }
104 return 0;
105}
106
107static int write_eraseblock(int ebnum)
108{
109 size_t written = 0;
110 int err = 0;
111 loff_t addr = ebnum * mtd->erasesize;
112
113 err = mtd->write(mtd, addr, mtd->erasesize, &written, iobuf);
114 if (err || written != mtd->erasesize) {
115 printk(PRINT_PREF "error: write failed at %#llx\n", addr);
116 if (!err)
117 err = -EINVAL;
118 }
119
120 return err;
121}
122
123static int write_eraseblock_by_page(int ebnum)
124{
125 size_t written = 0;
126 int i, err = 0;
127 loff_t addr = ebnum * mtd->erasesize;
128 void *buf = iobuf;
129
130 for (i = 0; i < pgcnt; i++) {
131 err = mtd->write(mtd, addr, pgsize, &written, buf);
132 if (err || written != pgsize) {
133 printk(PRINT_PREF "error: write failed at %#llx\n",
134 addr);
135 if (!err)
136 err = -EINVAL;
137 break;
138 }
139 addr += pgsize;
140 buf += pgsize;
141 }
142
143 return err;
144}
145
146static int write_eraseblock_by_2pages(int ebnum)
147{
148 size_t written = 0, sz = pgsize * 2;
149 int i, n = pgcnt / 2, err = 0;
150 loff_t addr = ebnum * mtd->erasesize;
151 void *buf = iobuf;
152
153 for (i = 0; i < n; i++) {
154 err = mtd->write(mtd, addr, sz, &written, buf);
155 if (err || written != sz) {
156 printk(PRINT_PREF "error: write failed at %#llx\n",
157 addr);
158 if (!err)
159 err = -EINVAL;
160 return err;
161 }
162 addr += sz;
163 buf += sz;
164 }
165 if (pgcnt % 2) {
166 err = mtd->write(mtd, addr, pgsize, &written, buf);
167 if (err || written != pgsize) {
168 printk(PRINT_PREF "error: write failed at %#llx\n",
169 addr);
170 if (!err)
171 err = -EINVAL;
172 }
173 }
174
175 return err;
176}
177
178static int read_eraseblock(int ebnum)
179{
180 size_t read = 0;
181 int err = 0;
182 loff_t addr = ebnum * mtd->erasesize;
183
184 err = mtd->read(mtd, addr, mtd->erasesize, &read, iobuf);
185 /* Ignore corrected ECC errors */
186 if (err == -EUCLEAN)
187 err = 0;
188 if (err || read != mtd->erasesize) {
189 printk(PRINT_PREF "error: read failed at %#llx\n", addr);
190 if (!err)
191 err = -EINVAL;
192 }
193
194 return err;
195}
196
197static int read_eraseblock_by_page(int ebnum)
198{
199 size_t read = 0;
200 int i, err = 0;
201 loff_t addr = ebnum * mtd->erasesize;
202 void *buf = iobuf;
203
204 for (i = 0; i < pgcnt; i++) {
205 err = mtd->read(mtd, addr, pgsize, &read, buf);
206 /* Ignore corrected ECC errors */
207 if (err == -EUCLEAN)
208 err = 0;
209 if (err || read != pgsize) {
210 printk(PRINT_PREF "error: read failed at %#llx\n",
211 addr);
212 if (!err)
213 err = -EINVAL;
214 break;
215 }
216 addr += pgsize;
217 buf += pgsize;
218 }
219
220 return err;
221}
222
223static int read_eraseblock_by_2pages(int ebnum)
224{
225 size_t read = 0, sz = pgsize * 2;
226 int i, n = pgcnt / 2, err = 0;
227 loff_t addr = ebnum * mtd->erasesize;
228 void *buf = iobuf;
229
230 for (i = 0; i < n; i++) {
231 err = mtd->read(mtd, addr, sz, &read, buf);
232 /* Ignore corrected ECC errors */
233 if (err == -EUCLEAN)
234 err = 0;
235 if (err || read != sz) {
236 printk(PRINT_PREF "error: read failed at %#llx\n",
237 addr);
238 if (!err)
239 err = -EINVAL;
240 return err;
241 }
242 addr += sz;
243 buf += sz;
244 }
245 if (pgcnt % 2) {
246 err = mtd->read(mtd, addr, pgsize, &read, buf);
247 /* Ignore corrected ECC errors */
248 if (err == -EUCLEAN)
249 err = 0;
250 if (err || read != pgsize) {
251 printk(PRINT_PREF "error: read failed at %#llx\n",
252 addr);
253 if (!err)
254 err = -EINVAL;
255 }
256 }
257
258 return err;
259}
260
261static int is_block_bad(int ebnum)
262{
263 loff_t addr = ebnum * mtd->erasesize;
264 int ret;
265
266 ret = mtd->block_isbad(mtd, addr);
267 if (ret)
268 printk(PRINT_PREF "block %d is bad\n", ebnum);
269 return ret;
270}
271
272static inline void start_timing(void)
273{
274 do_gettimeofday(&start);
275}
276
277static inline void stop_timing(void)
278{
279 do_gettimeofday(&finish);
280}
281
282static long calc_speed(void)
283{
284 long ms, k, speed;
285
286 ms = (finish.tv_sec - start.tv_sec) * 1000 +
287 (finish.tv_usec - start.tv_usec) / 1000;
288 k = goodebcnt * mtd->erasesize / 1024;
289 speed = (k * 1000) / ms;
290 return speed;
291}
292
293static int scan_for_bad_eraseblocks(void)
294{
295 int i, bad = 0;
296
297 bbt = kmalloc(ebcnt, GFP_KERNEL);
298 if (!bbt) {
299 printk(PRINT_PREF "error: cannot allocate memory\n");
300 return -ENOMEM;
301 }
302 memset(bbt, 0 , ebcnt);
303
304 printk(PRINT_PREF "scanning for bad eraseblocks\n");
305 for (i = 0; i < ebcnt; ++i) {
306 bbt[i] = is_block_bad(i) ? 1 : 0;
307 if (bbt[i])
308 bad += 1;
309 cond_resched();
310 }
311 printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
312 goodebcnt = ebcnt - bad;
313 return 0;
314}
315
316static int __init mtd_speedtest_init(void)
317{
318 int err, i;
319 long speed;
320 uint64_t tmp;
321
322 printk(KERN_INFO "\n");
323 printk(KERN_INFO "=================================================\n");
324 printk(PRINT_PREF "MTD device: %d\n", dev);
325
326 mtd = get_mtd_device(NULL, dev);
327 if (IS_ERR(mtd)) {
328 err = PTR_ERR(mtd);
329 printk(PRINT_PREF "error: cannot get MTD device\n");
330 return err;
331 }
332
333 if (mtd->writesize == 1) {
334 printk(PRINT_PREF "not NAND flash, assume page size is 512 "
335 "bytes.\n");
336 pgsize = 512;
337 } else
338 pgsize = mtd->writesize;
339
340 tmp = mtd->size;
341 do_div(tmp, mtd->erasesize);
342 ebcnt = tmp;
343 pgcnt = mtd->erasesize / mtd->writesize;
344
345 printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
346 "page size %u, count of eraseblocks %u, pages per "
347 "eraseblock %u, OOB size %u\n",
348 (unsigned long long)mtd->size, mtd->erasesize,
349 pgsize, ebcnt, pgcnt, mtd->oobsize);
350
351 err = -ENOMEM;
352 iobuf = kmalloc(mtd->erasesize, GFP_KERNEL);
353 if (!iobuf) {
354 printk(PRINT_PREF "error: cannot allocate memory\n");
355 goto out;
356 }
357
358 simple_srand(1);
359 set_random_data(iobuf, mtd->erasesize);
360
361 err = scan_for_bad_eraseblocks();
362 if (err)
363 goto out;
364
365 err = erase_whole_device();
366 if (err)
367 goto out;
368
369 /* Write all eraseblocks, 1 eraseblock at a time */
370 printk(PRINT_PREF "testing eraseblock write speed\n");
371 start_timing();
372 for (i = 0; i < ebcnt; ++i) {
373 if (bbt[i])
374 continue;
375 err = write_eraseblock(i);
376 if (err)
377 goto out;
378 cond_resched();
379 }
380 stop_timing();
381 speed = calc_speed();
382 printk(PRINT_PREF "eraseblock write speed is %ld KiB/s\n", speed);
383
384 /* Read all eraseblocks, 1 eraseblock at a time */
385 printk(PRINT_PREF "testing eraseblock read speed\n");
386 start_timing();
387 for (i = 0; i < ebcnt; ++i) {
388 if (bbt[i])
389 continue;
390 err = read_eraseblock(i);
391 if (err)
392 goto out;
393 cond_resched();
394 }
395 stop_timing();
396 speed = calc_speed();
397 printk(PRINT_PREF "eraseblock read speed is %ld KiB/s\n", speed);
398
399 err = erase_whole_device();
400 if (err)
401 goto out;
402
403 /* Write all eraseblocks, 1 page at a time */
404 printk(PRINT_PREF "testing page write speed\n");
405 start_timing();
406 for (i = 0; i < ebcnt; ++i) {
407 if (bbt[i])
408 continue;
409 err = write_eraseblock_by_page(i);
410 if (err)
411 goto out;
412 cond_resched();
413 }
414 stop_timing();
415 speed = calc_speed();
416 printk(PRINT_PREF "page write speed is %ld KiB/s\n", speed);
417
418 /* Read all eraseblocks, 1 page at a time */
419 printk(PRINT_PREF "testing page read speed\n");
420 start_timing();
421 for (i = 0; i < ebcnt; ++i) {
422 if (bbt[i])
423 continue;
424 err = read_eraseblock_by_page(i);
425 if (err)
426 goto out;
427 cond_resched();
428 }
429 stop_timing();
430 speed = calc_speed();
431 printk(PRINT_PREF "page read speed is %ld KiB/s\n", speed);
432
433 err = erase_whole_device();
434 if (err)
435 goto out;
436
437 /* Write all eraseblocks, 2 pages at a time */
438 printk(PRINT_PREF "testing 2 page write speed\n");
439 start_timing();
440 for (i = 0; i < ebcnt; ++i) {
441 if (bbt[i])
442 continue;
443 err = write_eraseblock_by_2pages(i);
444 if (err)
445 goto out;
446 cond_resched();
447 }
448 stop_timing();
449 speed = calc_speed();
450 printk(PRINT_PREF "2 page write speed is %ld KiB/s\n", speed);
451
452 /* Read all eraseblocks, 2 pages at a time */
453 printk(PRINT_PREF "testing 2 page read speed\n");
454 start_timing();
455 for (i = 0; i < ebcnt; ++i) {
456 if (bbt[i])
457 continue;
458 err = read_eraseblock_by_2pages(i);
459 if (err)
460 goto out;
461 cond_resched();
462 }
463 stop_timing();
464 speed = calc_speed();
465 printk(PRINT_PREF "2 page read speed is %ld KiB/s\n", speed);
466
467 /* Erase all eraseblocks */
468 printk(PRINT_PREF "Testing erase speed\n");
469 start_timing();
470 for (i = 0; i < ebcnt; ++i) {
471 if (bbt[i])
472 continue;
473 err = erase_eraseblock(i);
474 if (err)
475 goto out;
476 cond_resched();
477 }
478 stop_timing();
479 speed = calc_speed();
480 printk(PRINT_PREF "erase speed is %ld KiB/s\n", speed);
481
482 printk(PRINT_PREF "finished\n");
483out:
484 kfree(iobuf);
485 kfree(bbt);
486 put_mtd_device(mtd);
487 if (err)
488 printk(PRINT_PREF "error %d occurred\n", err);
489 printk(KERN_INFO "=================================================\n");
490 return err;
491}
492module_init(mtd_speedtest_init);
493
494static void __exit mtd_speedtest_exit(void)
495{
496 return;
497}
498module_exit(mtd_speedtest_exit);
499
500MODULE_DESCRIPTION("Speed test module");
501MODULE_AUTHOR("Adrian Hunter");
502MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
new file mode 100644
index 000000000000..63920476b57a
--- /dev/null
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -0,0 +1,330 @@
1/*
2 * Copyright (C) 2006-2008 Nokia Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; see the file COPYING. If not, write to the Free Software
15 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Test random reads, writes and erases on MTD device.
18 *
19 * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
20 */
21
22#include <linux/init.h>
23#include <linux/module.h>
24#include <linux/moduleparam.h>
25#include <linux/err.h>
26#include <linux/mtd/mtd.h>
27#include <linux/sched.h>
28#include <linux/vmalloc.h>
29
30#define PRINT_PREF KERN_INFO "mtd_stresstest: "
31
32static int dev;
33module_param(dev, int, S_IRUGO);
34MODULE_PARM_DESC(dev, "MTD device number to use");
35
36static int count = 10000;
37module_param(count, int, S_IRUGO);
38MODULE_PARM_DESC(count, "Number of operations to do (default is 10000)");
39
40static struct mtd_info *mtd;
41static unsigned char *writebuf;
42static unsigned char *readbuf;
43static unsigned char *bbt;
44static int *offsets;
45
46static int pgsize;
47static int bufsize;
48static int ebcnt;
49static int pgcnt;
50static unsigned long next = 1;
51
52static inline unsigned int simple_rand(void)
53{
54 next = next * 1103515245 + 12345;
55 return (unsigned int)((next / 65536) % 32768);
56}
57
58static inline void simple_srand(unsigned long seed)
59{
60 next = seed;
61}
62
63static int rand_eb(void)
64{
65 int eb;
66
67again:
68 if (ebcnt < 32768)
69 eb = simple_rand();
70 else
71 eb = (simple_rand() << 15) | simple_rand();
72 /* Read or write up 2 eraseblocks at a time - hence 'ebcnt - 1' */
73 eb %= (ebcnt - 1);
74 if (bbt[eb])
75 goto again;
76 return eb;
77}
78
79static int rand_offs(void)
80{
81 int offs;
82
83 if (bufsize < 32768)
84 offs = simple_rand();
85 else
86 offs = (simple_rand() << 15) | simple_rand();
87 offs %= bufsize;
88 return offs;
89}
90
91static int rand_len(int offs)
92{
93 int len;
94
95 if (bufsize < 32768)
96 len = simple_rand();
97 else
98 len = (simple_rand() << 15) | simple_rand();
99 len %= (bufsize - offs);
100 return len;
101}
102
103static int erase_eraseblock(int ebnum)
104{
105 int err;
106 struct erase_info ei;
107 loff_t addr = ebnum * mtd->erasesize;
108
109 memset(&ei, 0, sizeof(struct erase_info));
110 ei.mtd = mtd;
111 ei.addr = addr;
112 ei.len = mtd->erasesize;
113
114 err = mtd->erase(mtd, &ei);
115 if (unlikely(err)) {
116 printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
117 return err;
118 }
119
120 if (unlikely(ei.state == MTD_ERASE_FAILED)) {
121 printk(PRINT_PREF "some erase error occurred at EB %d\n",
122 ebnum);
123 return -EIO;
124 }
125
126 return 0;
127}
128
129static int is_block_bad(int ebnum)
130{
131 loff_t addr = ebnum * mtd->erasesize;
132 int ret;
133
134 ret = mtd->block_isbad(mtd, addr);
135 if (ret)
136 printk(PRINT_PREF "block %d is bad\n", ebnum);
137 return ret;
138}
139
140static int do_read(void)
141{
142 size_t read = 0;
143 int eb = rand_eb();
144 int offs = rand_offs();
145 int len = rand_len(offs), err;
146 loff_t addr;
147
148 if (bbt[eb + 1]) {
149 if (offs >= mtd->erasesize)
150 offs -= mtd->erasesize;
151 if (offs + len > mtd->erasesize)
152 len = mtd->erasesize - offs;
153 }
154 addr = eb * mtd->erasesize + offs;
155 err = mtd->read(mtd, addr, len, &read, readbuf);
156 if (err == -EUCLEAN)
157 err = 0;
158 if (unlikely(err || read != len)) {
159 printk(PRINT_PREF "error: read failed at 0x%llx\n",
160 (long long)addr);
161 if (!err)
162 err = -EINVAL;
163 return err;
164 }
165 return 0;
166}
167
168static int do_write(void)
169{
170 int eb = rand_eb(), offs, err, len;
171 size_t written = 0;
172 loff_t addr;
173
174 offs = offsets[eb];
175 if (offs >= mtd->erasesize) {
176 err = erase_eraseblock(eb);
177 if (err)
178 return err;
179 offs = offsets[eb] = 0;
180 }
181 len = rand_len(offs);
182 len = ((len + pgsize - 1) / pgsize) * pgsize;
183 if (offs + len > mtd->erasesize) {
184 if (bbt[eb + 1])
185 len = mtd->erasesize - offs;
186 else {
187 err = erase_eraseblock(eb + 1);
188 if (err)
189 return err;
190 offsets[eb + 1] = 0;
191 }
192 }
193 addr = eb * mtd->erasesize + offs;
194 err = mtd->write(mtd, addr, len, &written, writebuf);
195 if (unlikely(err || written != len)) {
196 printk(PRINT_PREF "error: write failed at 0x%llx\n",
197 (long long)addr);
198 if (!err)
199 err = -EINVAL;
200 return err;
201 }
202 offs += len;
203 while (offs > mtd->erasesize) {
204 offsets[eb++] = mtd->erasesize;
205 offs -= mtd->erasesize;
206 }
207 offsets[eb] = offs;
208 return 0;
209}
210
211static int do_operation(void)
212{
213 if (simple_rand() & 1)
214 return do_read();
215 else
216 return do_write();
217}
218
219static int scan_for_bad_eraseblocks(void)
220{
221 int i, bad = 0;
222
223 bbt = kmalloc(ebcnt, GFP_KERNEL);
224 if (!bbt) {
225 printk(PRINT_PREF "error: cannot allocate memory\n");
226 return -ENOMEM;
227 }
228 memset(bbt, 0 , ebcnt);
229
230 printk(PRINT_PREF "scanning for bad eraseblocks\n");
231 for (i = 0; i < ebcnt; ++i) {
232 bbt[i] = is_block_bad(i) ? 1 : 0;
233 if (bbt[i])
234 bad += 1;
235 cond_resched();
236 }
237 printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
238 return 0;
239}
240
241static int __init mtd_stresstest_init(void)
242{
243 int err;
244 int i, op;
245 uint64_t tmp;
246
247 printk(KERN_INFO "\n");
248 printk(KERN_INFO "=================================================\n");
249 printk(PRINT_PREF "MTD device: %d\n", dev);
250
251 mtd = get_mtd_device(NULL, dev);
252 if (IS_ERR(mtd)) {
253 err = PTR_ERR(mtd);
254 printk(PRINT_PREF "error: cannot get MTD device\n");
255 return err;
256 }
257
258 if (mtd->writesize == 1) {
259 printk(PRINT_PREF "not NAND flash, assume page size is 512 "
260 "bytes.\n");
261 pgsize = 512;
262 } else
263 pgsize = mtd->writesize;
264
265 tmp = mtd->size;
266 do_div(tmp, mtd->erasesize);
267 ebcnt = tmp;
268 pgcnt = mtd->erasesize / mtd->writesize;
269
270 printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
271 "page size %u, count of eraseblocks %u, pages per "
272 "eraseblock %u, OOB size %u\n",
273 (unsigned long long)mtd->size, mtd->erasesize,
274 pgsize, ebcnt, pgcnt, mtd->oobsize);
275
276 /* Read or write up 2 eraseblocks at a time */
277 bufsize = mtd->erasesize * 2;
278
279 err = -ENOMEM;
280 readbuf = vmalloc(bufsize);
281 writebuf = vmalloc(bufsize);
282 offsets = kmalloc(ebcnt * sizeof(int), GFP_KERNEL);
283 if (!readbuf || !writebuf || !offsets) {
284 printk(PRINT_PREF "error: cannot allocate memory\n");
285 goto out;
286 }
287 for (i = 0; i < ebcnt; i++)
288 offsets[i] = mtd->erasesize;
289 simple_srand(current->pid);
290 for (i = 0; i < bufsize; i++)
291 writebuf[i] = simple_rand();
292
293 err = scan_for_bad_eraseblocks();
294 if (err)
295 goto out;
296
297 /* Do operations */
298 printk(PRINT_PREF "doing operations\n");
299 for (op = 0; op < count; op++) {
300 if ((op & 1023) == 0)
301 printk(PRINT_PREF "%d operations done\n", op);
302 err = do_operation();
303 if (err)
304 goto out;
305 cond_resched();
306 }
307 printk(PRINT_PREF "finished, %d operations done\n", op);
308
309out:
310 kfree(offsets);
311 kfree(bbt);
312 vfree(writebuf);
313 vfree(readbuf);
314 put_mtd_device(mtd);
315 if (err)
316 printk(PRINT_PREF "error %d occurred\n", err);
317 printk(KERN_INFO "=================================================\n");
318 return err;
319}
320module_init(mtd_stresstest_init);
321
322static void __exit mtd_stresstest_exit(void)
323{
324 return;
325}
326module_exit(mtd_stresstest_exit);
327
328MODULE_DESCRIPTION("Stress test module");
329MODULE_AUTHOR("Adrian Hunter");
330MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
new file mode 100644
index 000000000000..5b889724268e
--- /dev/null
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -0,0 +1,525 @@
1/*
2 * Copyright (C) 2006-2007 Nokia Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; see the file COPYING. If not, write to the Free Software
15 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 *
17 * Test sub-page read and write on MTD device.
18 * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
19 *
20 */
21
22#include <linux/init.h>
23#include <linux/module.h>
24#include <linux/moduleparam.h>
25#include <linux/err.h>
26#include <linux/mtd/mtd.h>
27#include <linux/sched.h>
28
29#define PRINT_PREF KERN_INFO "mtd_subpagetest: "
30
31static int dev;
32module_param(dev, int, S_IRUGO);
33MODULE_PARM_DESC(dev, "MTD device number to use");
34
35static struct mtd_info *mtd;
36static unsigned char *writebuf;
37static unsigned char *readbuf;
38static unsigned char *bbt;
39
40static int subpgsize;
41static int bufsize;
42static int ebcnt;
43static int pgcnt;
44static int errcnt;
45static unsigned long next = 1;
46
47static inline unsigned int simple_rand(void)
48{
49 next = next * 1103515245 + 12345;
50 return (unsigned int)((next / 65536) % 32768);
51}
52
53static inline void simple_srand(unsigned long seed)
54{
55 next = seed;
56}
57
58static void set_random_data(unsigned char *buf, size_t len)
59{
60 size_t i;
61
62 for (i = 0; i < len; ++i)
63 buf[i] = simple_rand();
64}
65
66static inline void clear_data(unsigned char *buf, size_t len)
67{
68 memset(buf, 0, len);
69}
70
71static int erase_eraseblock(int ebnum)
72{
73 int err;
74 struct erase_info ei;
75 loff_t addr = ebnum * mtd->erasesize;
76
77 memset(&ei, 0, sizeof(struct erase_info));
78 ei.mtd = mtd;
79 ei.addr = addr;
80 ei.len = mtd->erasesize;
81
82 err = mtd->erase(mtd, &ei);
83 if (err) {
84 printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
85 return err;
86 }
87
88 if (ei.state == MTD_ERASE_FAILED) {
89 printk(PRINT_PREF "some erase error occurred at EB %d\n",
90 ebnum);
91 return -EIO;
92 }
93
94 return 0;
95}
96
97static int erase_whole_device(void)
98{
99 int err;
100 unsigned int i;
101
102 printk(PRINT_PREF "erasing whole device\n");
103 for (i = 0; i < ebcnt; ++i) {
104 if (bbt[i])
105 continue;
106 err = erase_eraseblock(i);
107 if (err)
108 return err;
109 cond_resched();
110 }
111 printk(PRINT_PREF "erased %u eraseblocks\n", i);
112 return 0;
113}
114
115static int write_eraseblock(int ebnum)
116{
117 size_t written = 0;
118 int err = 0;
119 loff_t addr = ebnum * mtd->erasesize;
120
121 set_random_data(writebuf, subpgsize);
122 err = mtd->write(mtd, addr, subpgsize, &written, writebuf);
123 if (unlikely(err || written != subpgsize)) {
124 printk(PRINT_PREF "error: write failed at %#llx\n",
125 (long long)addr);
126 if (written != subpgsize) {
127 printk(PRINT_PREF " write size: %#x\n", subpgsize);
128 printk(PRINT_PREF " written: %#zx\n", written);
129 }
130 return err ? err : -1;
131 }
132
133 addr += subpgsize;
134
135 set_random_data(writebuf, subpgsize);
136 err = mtd->write(mtd, addr, subpgsize, &written, writebuf);
137 if (unlikely(err || written != subpgsize)) {
138 printk(PRINT_PREF "error: write failed at %#llx\n",
139 (long long)addr);
140 if (written != subpgsize) {
141 printk(PRINT_PREF " write size: %#x\n", subpgsize);
142 printk(PRINT_PREF " written: %#zx\n", written);
143 }
144 return err ? err : -1;
145 }
146
147 return err;
148}
149
150static int write_eraseblock2(int ebnum)
151{
152 size_t written = 0;
153 int err = 0, k;
154 loff_t addr = ebnum * mtd->erasesize;
155
156 for (k = 1; k < 33; ++k) {
157 if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
158 break;
159 set_random_data(writebuf, subpgsize * k);
160 err = mtd->write(mtd, addr, subpgsize * k, &written, writebuf);
161 if (unlikely(err || written != subpgsize * k)) {
162 printk(PRINT_PREF "error: write failed at %#llx\n",
163 (long long)addr);
164 if (written != subpgsize) {
165 printk(PRINT_PREF " write size: %#x\n",
166 subpgsize * k);
167 printk(PRINT_PREF " written: %#08zx\n",
168 written);
169 }
170 return err ? err : -1;
171 }
172 addr += subpgsize * k;
173 }
174
175 return err;
176}
177
178static void print_subpage(unsigned char *p)
179{
180 int i, j;
181
182 for (i = 0; i < subpgsize; ) {
183 for (j = 0; i < subpgsize && j < 32; ++i, ++j)
184 printk("%02x", *p++);
185 printk("\n");
186 }
187}
188
189static int verify_eraseblock(int ebnum)
190{
191 size_t read = 0;
192 int err = 0;
193 loff_t addr = ebnum * mtd->erasesize;
194
195 set_random_data(writebuf, subpgsize);
196 clear_data(readbuf, subpgsize);
197 read = 0;
198 err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
199 if (unlikely(err || read != subpgsize)) {
200 if (err == -EUCLEAN && read == subpgsize) {
201 printk(PRINT_PREF "ECC correction at %#llx\n",
202 (long long)addr);
203 err = 0;
204 } else {
205 printk(PRINT_PREF "error: read failed at %#llx\n",
206 (long long)addr);
207 return err ? err : -1;
208 }
209 }
210 if (unlikely(memcmp(readbuf, writebuf, subpgsize))) {
211 printk(PRINT_PREF "error: verify failed at %#llx\n",
212 (long long)addr);
213 printk(PRINT_PREF "------------- written----------------\n");
214 print_subpage(writebuf);
215 printk(PRINT_PREF "------------- read ------------------\n");
216 print_subpage(readbuf);
217 printk(PRINT_PREF "-------------------------------------\n");
218 errcnt += 1;
219 }
220
221 addr += subpgsize;
222
223 set_random_data(writebuf, subpgsize);
224 clear_data(readbuf, subpgsize);
225 read = 0;
226 err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
227 if (unlikely(err || read != subpgsize)) {
228 if (err == -EUCLEAN && read == subpgsize) {
229 printk(PRINT_PREF "ECC correction at %#llx\n",
230 (long long)addr);
231 err = 0;
232 } else {
233 printk(PRINT_PREF "error: read failed at %#llx\n",
234 (long long)addr);
235 return err ? err : -1;
236 }
237 }
238 if (unlikely(memcmp(readbuf, writebuf, subpgsize))) {
239 printk(PRINT_PREF "error: verify failed at %#llx\n",
240 (long long)addr);
241 printk(PRINT_PREF "------------- written----------------\n");
242 print_subpage(writebuf);
243 printk(PRINT_PREF "------------- read ------------------\n");
244 print_subpage(readbuf);
245 printk(PRINT_PREF "-------------------------------------\n");
246 errcnt += 1;
247 }
248
249 return err;
250}
251
252static int verify_eraseblock2(int ebnum)
253{
254 size_t read = 0;
255 int err = 0, k;
256 loff_t addr = ebnum * mtd->erasesize;
257
258 for (k = 1; k < 33; ++k) {
259 if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
260 break;
261 set_random_data(writebuf, subpgsize * k);
262 clear_data(readbuf, subpgsize * k);
263 read = 0;
264 err = mtd->read(mtd, addr, subpgsize * k, &read, readbuf);
265 if (unlikely(err || read != subpgsize * k)) {
266 if (err == -EUCLEAN && read == subpgsize * k) {
267 printk(PRINT_PREF "ECC correction at %#llx\n",
268 (long long)addr);
269 err = 0;
270 } else {
271 printk(PRINT_PREF "error: read failed at "
272 "%#llx\n", (long long)addr);
273 return err ? err : -1;
274 }
275 }
276 if (unlikely(memcmp(readbuf, writebuf, subpgsize * k))) {
277 printk(PRINT_PREF "error: verify failed at %#llx\n",
278 (long long)addr);
279 errcnt += 1;
280 }
281 addr += subpgsize * k;
282 }
283
284 return err;
285}
286
287static int verify_eraseblock_ff(int ebnum)
288{
289 uint32_t j;
290 size_t read = 0;
291 int err = 0;
292 loff_t addr = ebnum * mtd->erasesize;
293
294 memset(writebuf, 0xff, subpgsize);
295 for (j = 0; j < mtd->erasesize / subpgsize; ++j) {
296 clear_data(readbuf, subpgsize);
297 read = 0;
298 err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
299 if (unlikely(err || read != subpgsize)) {
300 if (err == -EUCLEAN && read == subpgsize) {
301 printk(PRINT_PREF "ECC correction at %#llx\n",
302 (long long)addr);
303 err = 0;
304 } else {
305 printk(PRINT_PREF "error: read failed at "
306 "%#llx\n", (long long)addr);
307 return err ? err : -1;
308 }
309 }
310 if (unlikely(memcmp(readbuf, writebuf, subpgsize))) {
311 printk(PRINT_PREF "error: verify 0xff failed at "
312 "%#llx\n", (long long)addr);
313 errcnt += 1;
314 }
315 addr += subpgsize;
316 }
317
318 return err;
319}
320
321static int verify_all_eraseblocks_ff(void)
322{
323 int err;
324 unsigned int i;
325
326 printk(PRINT_PREF "verifying all eraseblocks for 0xff\n");
327 for (i = 0; i < ebcnt; ++i) {
328 if (bbt[i])
329 continue;
330 err = verify_eraseblock_ff(i);
331 if (err)
332 return err;
333 if (i % 256 == 0)
334 printk(PRINT_PREF "verified up to eraseblock %u\n", i);
335 cond_resched();
336 }
337 printk(PRINT_PREF "verified %u eraseblocks\n", i);
338 return 0;
339}
340
341static int is_block_bad(int ebnum)
342{
343 loff_t addr = ebnum * mtd->erasesize;
344 int ret;
345
346 ret = mtd->block_isbad(mtd, addr);
347 if (ret)
348 printk(PRINT_PREF "block %d is bad\n", ebnum);
349 return ret;
350}
351
352static int scan_for_bad_eraseblocks(void)
353{
354 int i, bad = 0;
355
356 bbt = kmalloc(ebcnt, GFP_KERNEL);
357 if (!bbt) {
358 printk(PRINT_PREF "error: cannot allocate memory\n");
359 return -ENOMEM;
360 }
361 memset(bbt, 0 , ebcnt);
362
363 printk(PRINT_PREF "scanning for bad eraseblocks\n");
364 for (i = 0; i < ebcnt; ++i) {
365 bbt[i] = is_block_bad(i) ? 1 : 0;
366 if (bbt[i])
367 bad += 1;
368 cond_resched();
369 }
370 printk(PRINT_PREF "scanned %d eraseblocks, %d are bad\n", i, bad);
371 return 0;
372}
373
374static int __init mtd_subpagetest_init(void)
375{
376 int err = 0;
377 uint32_t i;
378 uint64_t tmp;
379
380 printk(KERN_INFO "\n");
381 printk(KERN_INFO "=================================================\n");
382 printk(PRINT_PREF "MTD device: %d\n", dev);
383
384 mtd = get_mtd_device(NULL, dev);
385 if (IS_ERR(mtd)) {
386 err = PTR_ERR(mtd);
387 printk(PRINT_PREF "error: cannot get MTD device\n");
388 return err;
389 }
390
391 if (mtd->type != MTD_NANDFLASH) {
392 printk(PRINT_PREF "this test requires NAND flash\n");
393 goto out;
394 }
395
396 subpgsize = mtd->writesize >> mtd->subpage_sft;
397 printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
398 "page size %u, subpage size %u, count of eraseblocks %u, "
399 "pages per eraseblock %u, OOB size %u\n",
400 (unsigned long long)mtd->size, mtd->erasesize,
401 mtd->writesize, subpgsize, ebcnt, pgcnt, mtd->oobsize);
402
403 err = -ENOMEM;
404 bufsize = subpgsize * 32;
405 writebuf = kmalloc(bufsize, GFP_KERNEL);
406 if (!writebuf) {
407 printk(PRINT_PREF "error: cannot allocate memory\n");
408 goto out;
409 }
410 readbuf = kmalloc(bufsize, GFP_KERNEL);
411 if (!readbuf) {
412 printk(PRINT_PREF "error: cannot allocate memory\n");
413 goto out;
414 }
415
416 tmp = mtd->size;
417 do_div(tmp, mtd->erasesize);
418 ebcnt = tmp;
419 pgcnt = mtd->erasesize / mtd->writesize;
420
421 err = scan_for_bad_eraseblocks();
422 if (err)
423 goto out;
424
425 err = erase_whole_device();
426 if (err)
427 goto out;
428
429 printk(PRINT_PREF "writing whole device\n");
430 simple_srand(1);
431 for (i = 0; i < ebcnt; ++i) {
432 if (bbt[i])
433 continue;
434 err = write_eraseblock(i);
435 if (unlikely(err))
436 goto out;
437 if (i % 256 == 0)
438 printk(PRINT_PREF "written up to eraseblock %u\n", i);
439 cond_resched();
440 }
441 printk(PRINT_PREF "written %u eraseblocks\n", i);
442
443 simple_srand(1);
444 printk(PRINT_PREF "verifying all eraseblocks\n");
445 for (i = 0; i < ebcnt; ++i) {
446 if (bbt[i])
447 continue;
448 err = verify_eraseblock(i);
449 if (unlikely(err))
450 goto out;
451 if (i % 256 == 0)
452 printk(PRINT_PREF "verified up to eraseblock %u\n", i);
453 cond_resched();
454 }
455 printk(PRINT_PREF "verified %u eraseblocks\n", i);
456
457 err = erase_whole_device();
458 if (err)
459 goto out;
460
461 err = verify_all_eraseblocks_ff();
462 if (err)
463 goto out;
464
465 /* Write all eraseblocks */
466 simple_srand(3);
467 printk(PRINT_PREF "writing whole device\n");
468 for (i = 0; i < ebcnt; ++i) {
469 if (bbt[i])
470 continue;
471 err = write_eraseblock2(i);
472 if (unlikely(err))
473 goto out;
474 if (i % 256 == 0)
475 printk(PRINT_PREF "written up to eraseblock %u\n", i);
476 cond_resched();
477 }
478 printk(PRINT_PREF "written %u eraseblocks\n", i);
479
480 /* Check all eraseblocks */
481 simple_srand(3);
482 printk(PRINT_PREF "verifying all eraseblocks\n");
483 for (i = 0; i < ebcnt; ++i) {
484 if (bbt[i])
485 continue;
486 err = verify_eraseblock2(i);
487 if (unlikely(err))
488 goto out;
489 if (i % 256 == 0)
490 printk(PRINT_PREF "verified up to eraseblock %u\n", i);
491 cond_resched();
492 }
493 printk(PRINT_PREF "verified %u eraseblocks\n", i);
494
495 err = erase_whole_device();
496 if (err)
497 goto out;
498
499 err = verify_all_eraseblocks_ff();
500 if (err)
501 goto out;
502
503 printk(PRINT_PREF "finished with %d errors\n", errcnt);
504
505out:
506 kfree(bbt);
507 kfree(readbuf);
508 kfree(writebuf);
509 put_mtd_device(mtd);
510 if (err)
511 printk(PRINT_PREF "error %d occurred\n", err);
512 printk(KERN_INFO "=================================================\n");
513 return err;
514}
515module_init(mtd_subpagetest_init);
516
517static void __exit mtd_subpagetest_exit(void)
518{
519 return;
520}
521module_exit(mtd_subpagetest_exit);
522
523MODULE_DESCRIPTION("Subpage test module");
524MODULE_AUTHOR("Adrian Hunter");
525MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
new file mode 100644
index 000000000000..631a0ab3a33c
--- /dev/null
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -0,0 +1,530 @@
1/*
2 * Copyright (C) 2006-2008 Artem Bityutskiy
3 * Copyright (C) 2006-2008 Jarkko Lavinen
4 * Copyright (C) 2006-2008 Adrian Hunter
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; see the file COPYING. If not, write to the Free Software
17 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * Authors: Artem Bityutskiy, Jarkko Lavinen, Adria Hunter
20 *
21 * WARNING: this test program may kill your flash and your device. Do not
22 * use it unless you know what you do. Authors are not responsible for any
23 * damage caused by this program.
24 */
25
26#include <linux/init.h>
27#include <linux/module.h>
28#include <linux/moduleparam.h>
29#include <linux/err.h>
30#include <linux/mtd/mtd.h>
31#include <linux/sched.h>
32
33#define PRINT_PREF KERN_INFO "mtd_torturetest: "
34#define RETRIES 3
35
36static int eb = 8;
37module_param(eb, int, S_IRUGO);
38MODULE_PARM_DESC(eb, "eraseblock number within the selected MTD device");
39
40static int ebcnt = 32;
41module_param(ebcnt, int, S_IRUGO);
42MODULE_PARM_DESC(ebcnt, "number of consecutive eraseblocks to torture");
43
44static int pgcnt;
45module_param(pgcnt, int, S_IRUGO);
46MODULE_PARM_DESC(pgcnt, "number of pages per eraseblock to torture (0 => all)");
47
48static int dev;
49module_param(dev, int, S_IRUGO);
50MODULE_PARM_DESC(dev, "MTD device number to use");
51
52static int gran = 512;
53module_param(gran, int, S_IRUGO);
54MODULE_PARM_DESC(gran, "how often the status information should be printed");
55
56static int check = 1;
57module_param(check, int, S_IRUGO);
58MODULE_PARM_DESC(check, "if the written data should be checked");
59
60static unsigned int cycles_count;
61module_param(cycles_count, uint, S_IRUGO);
62MODULE_PARM_DESC(cycles_count, "how many erase cycles to do "
63 "(infinite by default)");
64
65static struct mtd_info *mtd;
66
67/* This buffer contains 0x555555...0xAAAAAA... pattern */
68static unsigned char *patt_5A5;
69/* This buffer contains 0xAAAAAA...0x555555... pattern */
70static unsigned char *patt_A5A;
71/* This buffer contains all 0xFF bytes */
72static unsigned char *patt_FF;
73/* This a temporary buffer is use when checking data */
74static unsigned char *check_buf;
75/* How many erase cycles were done */
76static unsigned int erase_cycles;
77
78static int pgsize;
79static struct timeval start, finish;
80
81static void report_corrupt(unsigned char *read, unsigned char *written);
82
83static inline void start_timing(void)
84{
85 do_gettimeofday(&start);
86}
87
88static inline void stop_timing(void)
89{
90 do_gettimeofday(&finish);
91}
92
93/*
94 * Erase eraseblock number @ebnum.
95 */
96static inline int erase_eraseblock(int ebnum)
97{
98 int err;
99 struct erase_info ei;
100 loff_t addr = ebnum * mtd->erasesize;
101
102 memset(&ei, 0, sizeof(struct erase_info));
103 ei.mtd = mtd;
104 ei.addr = addr;
105 ei.len = mtd->erasesize;
106
107 err = mtd->erase(mtd, &ei);
108 if (err) {
109 printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
110 return err;
111 }
112
113 if (ei.state == MTD_ERASE_FAILED) {
114 printk(PRINT_PREF "some erase error occurred at EB %d\n",
115 ebnum);
116 return -EIO;
117 }
118
119 return 0;
120}
121
122/*
123 * Check that the contents of eraseblock number @enbum is equivalent to the
124 * @buf buffer.
125 */
126static inline int check_eraseblock(int ebnum, unsigned char *buf)
127{
128 int err, retries = 0;
129 size_t read = 0;
130 loff_t addr = ebnum * mtd->erasesize;
131 size_t len = mtd->erasesize;
132
133 if (pgcnt) {
134 addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
135 len = pgcnt * pgsize;
136 }
137
138retry:
139 err = mtd->read(mtd, addr, len, &read, check_buf);
140 if (err == -EUCLEAN)
141 printk(PRINT_PREF "single bit flip occurred at EB %d "
142 "MTD reported that it was fixed.\n", ebnum);
143 else if (err) {
144 printk(PRINT_PREF "error %d while reading EB %d, "
145 "read %zd\n", err, ebnum, read);
146 return err;
147 }
148
149 if (read != len) {
150 printk(PRINT_PREF "failed to read %zd bytes from EB %d, "
151 "read only %zd, but no error reported\n",
152 len, ebnum, read);
153 return -EIO;
154 }
155
156 if (memcmp(buf, check_buf, len)) {
157 printk(PRINT_PREF "read wrong data from EB %d\n", ebnum);
158 report_corrupt(check_buf, buf);
159
160 if (retries++ < RETRIES) {
161 /* Try read again */
162 yield();
163 printk(PRINT_PREF "re-try reading data from EB %d\n",
164 ebnum);
165 goto retry;
166 } else {
167 printk(PRINT_PREF "retried %d times, still errors, "
168 "give-up\n", RETRIES);
169 return -EINVAL;
170 }
171 }
172
173 if (retries != 0)
174 printk(PRINT_PREF "only attempt number %d was OK (!!!)\n",
175 retries);
176
177 return 0;
178}
179
180static inline int write_pattern(int ebnum, void *buf)
181{
182 int err;
183 size_t written = 0;
184 loff_t addr = ebnum * mtd->erasesize;
185 size_t len = mtd->erasesize;
186
187 if (pgcnt) {
188 addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
189 len = pgcnt * pgsize;
190 }
191 err = mtd->write(mtd, addr, len, &written, buf);
192 if (err) {
193 printk(PRINT_PREF "error %d while writing EB %d, written %zd"
194 " bytes\n", err, ebnum, written);
195 return err;
196 }
197 if (written != len) {
198 printk(PRINT_PREF "written only %zd bytes of %zd, but no error"
199 " reported\n", written, len);
200 return -EIO;
201 }
202
203 return 0;
204}
205
206static int __init tort_init(void)
207{
208 int err = 0, i, infinite = !cycles_count;
209 int bad_ebs[ebcnt];
210
211 printk(KERN_INFO "\n");
212 printk(KERN_INFO "=================================================\n");
213 printk(PRINT_PREF "Warning: this program is trying to wear out your "
214 "flash, stop it if this is not wanted.\n");
215 printk(PRINT_PREF "MTD device: %d\n", dev);
216 printk(PRINT_PREF "torture %d eraseblocks (%d-%d) of mtd%d\n",
217 ebcnt, eb, eb + ebcnt - 1, dev);
218 if (pgcnt)
219 printk(PRINT_PREF "torturing just %d pages per eraseblock\n",
220 pgcnt);
221 printk(PRINT_PREF "write verify %s\n", check ? "enabled" : "disabled");
222
223 mtd = get_mtd_device(NULL, dev);
224 if (IS_ERR(mtd)) {
225 err = PTR_ERR(mtd);
226 printk(PRINT_PREF "error: cannot get MTD device\n");
227 return err;
228 }
229
230 if (mtd->writesize == 1) {
231 printk(PRINT_PREF "not NAND flash, assume page size is 512 "
232 "bytes.\n");
233 pgsize = 512;
234 } else
235 pgsize = mtd->writesize;
236
237 if (pgcnt && (pgcnt > mtd->erasesize / pgsize || pgcnt < 0)) {
238 printk(PRINT_PREF "error: invalid pgcnt value %d\n", pgcnt);
239 goto out_mtd;
240 }
241
242 err = -ENOMEM;
243 patt_5A5 = kmalloc(mtd->erasesize, GFP_KERNEL);
244 if (!patt_5A5) {
245 printk(PRINT_PREF "error: cannot allocate memory\n");
246 goto out_mtd;
247 }
248
249 patt_A5A = kmalloc(mtd->erasesize, GFP_KERNEL);
250 if (!patt_A5A) {
251 printk(PRINT_PREF "error: cannot allocate memory\n");
252 goto out_patt_5A5;
253 }
254
255 patt_FF = kmalloc(mtd->erasesize, GFP_KERNEL);
256 if (!patt_FF) {
257 printk(PRINT_PREF "error: cannot allocate memory\n");
258 goto out_patt_A5A;
259 }
260
261 check_buf = kmalloc(mtd->erasesize, GFP_KERNEL);
262 if (!check_buf) {
263 printk(PRINT_PREF "error: cannot allocate memory\n");
264 goto out_patt_FF;
265 }
266
267 err = 0;
268
269 /* Initialize patterns */
270 memset(patt_FF, 0xFF, mtd->erasesize);
271 for (i = 0; i < mtd->erasesize / pgsize; i++) {
272 if (!(i & 1)) {
273 memset(patt_5A5 + i * pgsize, 0x55, pgsize);
274 memset(patt_A5A + i * pgsize, 0xAA, pgsize);
275 } else {
276 memset(patt_5A5 + i * pgsize, 0xAA, pgsize);
277 memset(patt_A5A + i * pgsize, 0x55, pgsize);
278 }
279 }
280
281 /*
282 * Check if there is a bad eraseblock among those we are going to test.
283 */
284 memset(&bad_ebs[0], 0, sizeof(int) * ebcnt);
285 if (mtd->block_isbad) {
286 for (i = eb; i < eb + ebcnt; i++) {
287 err = mtd->block_isbad(mtd,
288 (loff_t)i * mtd->erasesize);
289
290 if (err < 0) {
291 printk(PRINT_PREF "block_isbad() returned %d "
292 "for EB %d\n", err, i);
293 goto out;
294 }
295
296 if (err) {
297 printk("EB %d is bad. Skip it.\n", i);
298 bad_ebs[i - eb] = 1;
299 }
300 }
301 }
302
303 start_timing();
304 while (1) {
305 int i;
306 void *patt;
307
308 /* Erase all eraseblocks */
309 for (i = eb; i < eb + ebcnt; i++) {
310 if (bad_ebs[i - eb])
311 continue;
312 err = erase_eraseblock(i);
313 if (err)
314 goto out;
315 cond_resched();
316 }
317
318 /* Check if the eraseblocks contain only 0xFF bytes */
319 if (check) {
320 for (i = eb; i < eb + ebcnt; i++) {
321 if (bad_ebs[i - eb])
322 continue;
323 err = check_eraseblock(i, patt_FF);
324 if (err) {
325 printk(PRINT_PREF "verify failed"
326 " for 0xFF... pattern\n");
327 goto out;
328 }
329 cond_resched();
330 }
331 }
332
333 /* Write the pattern */
334 for (i = eb; i < eb + ebcnt; i++) {
335 if (bad_ebs[i - eb])
336 continue;
337 if ((eb + erase_cycles) & 1)
338 patt = patt_5A5;
339 else
340 patt = patt_A5A;
341 err = write_pattern(i, patt);
342 if (err)
343 goto out;
344 cond_resched();
345 }
346
347 /* Verify what we wrote */
348 if (check) {
349 for (i = eb; i < eb + ebcnt; i++) {
350 if (bad_ebs[i - eb])
351 continue;
352 if ((eb + erase_cycles) & 1)
353 patt = patt_5A5;
354 else
355 patt = patt_A5A;
356 err = check_eraseblock(i, patt);
357 if (err) {
358 printk(PRINT_PREF "verify failed for %s"
359 " pattern\n",
360 ((eb + erase_cycles) & 1) ?
361 "0x55AA55..." : "0xAA55AA...");
362 goto out;
363 }
364 cond_resched();
365 }
366 }
367
368 erase_cycles += 1;
369
370 if (erase_cycles % gran == 0) {
371 long ms;
372
373 stop_timing();
374 ms = (finish.tv_sec - start.tv_sec) * 1000 +
375 (finish.tv_usec - start.tv_usec) / 1000;
376 printk(PRINT_PREF "%08u erase cycles done, took %lu "
377 "milliseconds (%lu seconds)\n",
378 erase_cycles, ms, ms / 1000);
379 start_timing();
380 }
381
382 if (!infinite && --cycles_count == 0)
383 break;
384 }
385out:
386
387 printk(PRINT_PREF "finished after %u erase cycles\n",
388 erase_cycles);
389 kfree(check_buf);
390out_patt_FF:
391 kfree(patt_FF);
392out_patt_A5A:
393 kfree(patt_A5A);
394out_patt_5A5:
395 kfree(patt_5A5);
396out_mtd:
397 put_mtd_device(mtd);
398 if (err)
399 printk(PRINT_PREF "error %d occurred during torturing\n", err);
400 printk(KERN_INFO "=================================================\n");
401 return err;
402}
403module_init(tort_init);
404
405static void __exit tort_exit(void)
406{
407 return;
408}
409module_exit(tort_exit);
410
411static int countdiffs(unsigned char *buf, unsigned char *check_buf,
412 unsigned offset, unsigned len, unsigned *bytesp,
413 unsigned *bitsp);
414static void print_bufs(unsigned char *read, unsigned char *written, int start,
415 int len);
416
417/*
418 * Report the detailed information about how the read EB differs from what was
419 * written.
420 */
421static void report_corrupt(unsigned char *read, unsigned char *written)
422{
423 int i;
424 int bytes, bits, pages, first;
425 int offset, len;
426 size_t check_len = mtd->erasesize;
427
428 if (pgcnt)
429 check_len = pgcnt * pgsize;
430
431 bytes = bits = pages = 0;
432 for (i = 0; i < check_len; i += pgsize)
433 if (countdiffs(written, read, i, pgsize, &bytes,
434 &bits) >= 0)
435 pages++;
436
437 printk(PRINT_PREF "verify fails on %d pages, %d bytes/%d bits\n",
438 pages, bytes, bits);
439 printk(PRINT_PREF "The following is a list of all differences between"
440 " what was read from flash and what was expected\n");
441
442 for (i = 0; i < check_len; i += pgsize) {
443 cond_resched();
444 bytes = bits = 0;
445 first = countdiffs(written, read, i, pgsize, &bytes,
446 &bits);
447 if (first < 0)
448 continue;
449
450 printk("-------------------------------------------------------"
451 "----------------------------------\n");
452
453 printk(PRINT_PREF "Page %zd has %d bytes/%d bits failing verify,"
454 " starting at offset 0x%x\n",
455 (mtd->erasesize - check_len + i) / pgsize,
456 bytes, bits, first);
457
458 offset = first & ~0x7;
459 len = ((first + bytes) | 0x7) + 1 - offset;
460
461 print_bufs(read, written, offset, len);
462 }
463}
464
465static void print_bufs(unsigned char *read, unsigned char *written, int start,
466 int len)
467{
468 int i = 0, j1, j2;
469 char *diff;
470
471 printk("Offset Read Written\n");
472 while (i < len) {
473 printk("0x%08x: ", start + i);
474 diff = " ";
475 for (j1 = 0; j1 < 8 && i + j1 < len; j1++) {
476 printk(" %02x", read[start + i + j1]);
477 if (read[start + i + j1] != written[start + i + j1])
478 diff = "***";
479 }
480
481 while (j1 < 8) {
482 printk(" ");
483 j1 += 1;
484 }
485
486 printk(" %s ", diff);
487
488 for (j2 = 0; j2 < 8 && i + j2 < len; j2++)
489 printk(" %02x", written[start + i + j2]);
490 printk("\n");
491 i += 8;
492 }
493}
494
495/*
496 * Count the number of differing bytes and bits and return the first differing
497 * offset.
498 */
499static int countdiffs(unsigned char *buf, unsigned char *check_buf,
500 unsigned offset, unsigned len, unsigned *bytesp,
501 unsigned *bitsp)
502{
503 unsigned i, bit;
504 int first = -1;
505
506 for (i = offset; i < offset + len; i++)
507 if (buf[i] != check_buf[i]) {
508 first = i;
509 break;
510 }
511
512 while (i < offset + len) {
513 if (buf[i] != check_buf[i]) {
514 (*bytesp)++;
515 bit = 1;
516 while (bit < 256) {
517 if ((buf[i] & bit) != (check_buf[i] & bit))
518 (*bitsp)++;
519 bit <<= 1;
520 }
521 }
522 i++;
523 }
524
525 return first;
526}
527
528MODULE_DESCRIPTION("Eraseblock torturing module");
529MODULE_AUTHOR("Artem Bityutskiy, Jarkko Lavinen, Adrian Hunter");
530MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 7caf22cd5ad0..9082768cc6c3 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -561,7 +561,7 @@ static int io_init(struct ubi_device *ubi)
561 */ 561 */
562 562
563 ubi->peb_size = ubi->mtd->erasesize; 563 ubi->peb_size = ubi->mtd->erasesize;
564 ubi->peb_count = ubi->mtd->size / ubi->mtd->erasesize; 564 ubi->peb_count = mtd_div_by_eb(ubi->mtd->size, ubi->mtd);
565 ubi->flash_size = ubi->mtd->size; 565 ubi->flash_size = ubi->mtd->size;
566 566
567 if (ubi->mtd->block_isbad && ubi->mtd->block_markbad) 567 if (ubi->mtd->block_isbad && ubi->mtd->block_markbad)
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index 605812bb0b1a..6dd4f5e77f82 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -215,7 +215,8 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
215 struct ubi_volume *vol; 215 struct ubi_volume *vol;
216 struct ubi_device *ubi; 216 struct ubi_device *ubi;
217 217
218 dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr); 218 dbg_gen("erase %llu bytes at offset %llu", (unsigned long long)instr->len,
219 (unsigned long long)instr->addr);
219 220
220 if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize) 221 if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
221 return -EINVAL; 222 return -EINVAL;
@@ -223,11 +224,11 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
223 if (instr->len < 0 || instr->addr + instr->len > mtd->size) 224 if (instr->len < 0 || instr->addr + instr->len > mtd->size)
224 return -EINVAL; 225 return -EINVAL;
225 226
226 if (instr->addr % mtd->writesize || instr->len % mtd->writesize) 227 if (mtd_mod_by_ws(instr->addr, mtd) || mtd_mod_by_ws(instr->len, mtd))
227 return -EINVAL; 228 return -EINVAL;
228 229
229 lnum = instr->addr / mtd->erasesize; 230 lnum = mtd_div_by_eb(instr->addr, mtd);
230 count = instr->len / mtd->erasesize; 231 count = mtd_div_by_eb(instr->len, mtd);
231 232
232 vol = container_of(mtd, struct ubi_volume, gluebi_mtd); 233 vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
233 ubi = vol->ubi; 234 ubi = vol->ubi;
@@ -255,7 +256,7 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
255 256
256out_err: 257out_err:
257 instr->state = MTD_ERASE_FAILED; 258 instr->state = MTD_ERASE_FAILED;
258 instr->fail_addr = lnum * mtd->erasesize; 259 instr->fail_addr = (long long)lnum * mtd->erasesize;
259 return err; 260 return err;
260} 261}
261 262
@@ -294,7 +295,7 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
294 * bytes. 295 * bytes.
295 */ 296 */
296 if (vol->vol_type == UBI_DYNAMIC_VOLUME) 297 if (vol->vol_type == UBI_DYNAMIC_VOLUME)
297 mtd->size = vol->usable_leb_size * vol->reserved_pebs; 298 mtd->size = (long long)vol->usable_leb_size * vol->reserved_pebs;
298 else 299 else
299 mtd->size = vol->used_bytes; 300 mtd->size = vol->used_bytes;
300 301
@@ -304,8 +305,8 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
304 return -ENFILE; 305 return -ENFILE;
305 } 306 }
306 307
307 dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u", 308 dbg_gen("added mtd%d (\"%s\"), size %llu, EB size %u",
308 mtd->index, mtd->name, mtd->size, mtd->erasesize); 309 mtd->index, mtd->name, (unsigned long long)mtd->size, mtd->erasesize);
309 return 0; 310 return 0;
310} 311}
311 312
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 65e8294a9e29..9da5a4b81133 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -1,11 +1,12 @@
1/** 1/**
2 * @file buffer_sync.c 2 * @file buffer_sync.c
3 * 3 *
4 * @remark Copyright 2002 OProfile authors 4 * @remark Copyright 2002-2009 OProfile authors
5 * @remark Read the file COPYING 5 * @remark Read the file COPYING
6 * 6 *
7 * @author John Levon <levon@movementarian.org> 7 * @author John Levon <levon@movementarian.org>
8 * @author Barry Kasindorf 8 * @author Barry Kasindorf
9 * @author Robert Richter <robert.richter@amd.com>
9 * 10 *
10 * This is the core of the buffer management. Each 11 * This is the core of the buffer management. Each
11 * CPU buffer is processed and entered into the 12 * CPU buffer is processed and entered into the
@@ -315,88 +316,73 @@ static void add_trace_begin(void)
315 add_event_entry(TRACE_BEGIN_CODE); 316 add_event_entry(TRACE_BEGIN_CODE);
316} 317}
317 318
318#ifdef CONFIG_OPROFILE_IBS 319static void add_data(struct op_entry *entry, struct mm_struct *mm)
319
320#define IBS_FETCH_CODE_SIZE 2
321#define IBS_OP_CODE_SIZE 5
322
323/*
324 * Add IBS fetch and op entries to event buffer
325 */
326static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
327{ 320{
328 unsigned long rip; 321 unsigned long code, pc, val;
329 int i, count; 322 unsigned long cookie;
330 unsigned long ibs_cookie = 0;
331 off_t offset; 323 off_t offset;
332 struct op_sample *sample;
333
334 sample = cpu_buffer_read_entry(cpu);
335 if (!sample)
336 goto Error;
337 rip = sample->eip;
338 324
339#ifdef __LP64__ 325 if (!op_cpu_buffer_get_data(entry, &code))
340 rip += sample->event << 32; 326 return;
341#endif 327 if (!op_cpu_buffer_get_data(entry, &pc))
328 return;
329 if (!op_cpu_buffer_get_size(entry))
330 return;
342 331
343 if (mm) { 332 if (mm) {
344 ibs_cookie = lookup_dcookie(mm, rip, &offset); 333 cookie = lookup_dcookie(mm, pc, &offset);
345 334
346 if (ibs_cookie == NO_COOKIE) 335 if (cookie == NO_COOKIE)
347 offset = rip; 336 offset = pc;
348 if (ibs_cookie == INVALID_COOKIE) { 337 if (cookie == INVALID_COOKIE) {
349 atomic_inc(&oprofile_stats.sample_lost_no_mapping); 338 atomic_inc(&oprofile_stats.sample_lost_no_mapping);
350 offset = rip; 339 offset = pc;
351 } 340 }
352 if (ibs_cookie != last_cookie) { 341 if (cookie != last_cookie) {
353 add_cookie_switch(ibs_cookie); 342 add_cookie_switch(cookie);
354 last_cookie = ibs_cookie; 343 last_cookie = cookie;
355 } 344 }
356 } else 345 } else
357 offset = rip; 346 offset = pc;
358 347
359 add_event_entry(ESCAPE_CODE); 348 add_event_entry(ESCAPE_CODE);
360 add_event_entry(code); 349 add_event_entry(code);
361 add_event_entry(offset); /* Offset from Dcookie */ 350 add_event_entry(offset); /* Offset from Dcookie */
362 351
363 /* we send the Dcookie offset, but send the raw Linear Add also*/ 352 while (op_cpu_buffer_get_data(entry, &val))
364 add_event_entry(sample->eip); 353 add_event_entry(val);
365 add_event_entry(sample->event);
366
367 if (code == IBS_FETCH_CODE)
368 count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/
369 else
370 count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/
371
372 for (i = 0; i < count; i++) {
373 sample = cpu_buffer_read_entry(cpu);
374 if (!sample)
375 goto Error;
376 add_event_entry(sample->eip);
377 add_event_entry(sample->event);
378 }
379
380 return;
381
382Error:
383 return;
384} 354}
385 355
386#endif 356static inline void add_sample_entry(unsigned long offset, unsigned long event)
387
388static void add_sample_entry(unsigned long offset, unsigned long event)
389{ 357{
390 add_event_entry(offset); 358 add_event_entry(offset);
391 add_event_entry(event); 359 add_event_entry(event);
392} 360}
393 361
394 362
395static int add_us_sample(struct mm_struct *mm, struct op_sample *s) 363/*
364 * Add a sample to the global event buffer. If possible the
365 * sample is converted into a persistent dentry/offset pair
366 * for later lookup from userspace. Return 0 on failure.
367 */
368static int
369add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
396{ 370{
397 unsigned long cookie; 371 unsigned long cookie;
398 off_t offset; 372 off_t offset;
399 373
374 if (in_kernel) {
375 add_sample_entry(s->eip, s->event);
376 return 1;
377 }
378
379 /* add userspace sample */
380
381 if (!mm) {
382 atomic_inc(&oprofile_stats.sample_lost_no_mm);
383 return 0;
384 }
385
400 cookie = lookup_dcookie(mm, s->eip, &offset); 386 cookie = lookup_dcookie(mm, s->eip, &offset);
401 387
402 if (cookie == INVALID_COOKIE) { 388 if (cookie == INVALID_COOKIE) {
@@ -415,25 +401,6 @@ static int add_us_sample(struct mm_struct *mm, struct op_sample *s)
415} 401}
416 402
417 403
418/* Add a sample to the global event buffer. If possible the
419 * sample is converted into a persistent dentry/offset pair
420 * for later lookup from userspace.
421 */
422static int
423add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
424{
425 if (in_kernel) {
426 add_sample_entry(s->eip, s->event);
427 return 1;
428 } else if (mm) {
429 return add_us_sample(mm, s);
430 } else {
431 atomic_inc(&oprofile_stats.sample_lost_no_mm);
432 }
433 return 0;
434}
435
436
437static void release_mm(struct mm_struct *mm) 404static void release_mm(struct mm_struct *mm)
438{ 405{
439 if (!mm) 406 if (!mm)
@@ -526,66 +493,69 @@ void sync_buffer(int cpu)
526{ 493{
527 struct mm_struct *mm = NULL; 494 struct mm_struct *mm = NULL;
528 struct mm_struct *oldmm; 495 struct mm_struct *oldmm;
496 unsigned long val;
529 struct task_struct *new; 497 struct task_struct *new;
530 unsigned long cookie = 0; 498 unsigned long cookie = 0;
531 int in_kernel = 1; 499 int in_kernel = 1;
532 sync_buffer_state state = sb_buffer_start; 500 sync_buffer_state state = sb_buffer_start;
533 unsigned int i; 501 unsigned int i;
534 unsigned long available; 502 unsigned long available;
503 unsigned long flags;
504 struct op_entry entry;
505 struct op_sample *sample;
535 506
536 mutex_lock(&buffer_mutex); 507 mutex_lock(&buffer_mutex);
537 508
538 add_cpu_switch(cpu); 509 add_cpu_switch(cpu);
539 510
540 cpu_buffer_reset(cpu); 511 op_cpu_buffer_reset(cpu);
541 available = cpu_buffer_entries(cpu); 512 available = op_cpu_buffer_entries(cpu);
542 513
543 for (i = 0; i < available; ++i) { 514 for (i = 0; i < available; ++i) {
544 struct op_sample *s = cpu_buffer_read_entry(cpu); 515 sample = op_cpu_buffer_read_entry(&entry, cpu);
545 if (!s) 516 if (!sample)
546 break; 517 break;
547 518
548 if (is_code(s->eip)) { 519 if (is_code(sample->eip)) {
549 switch (s->event) { 520 flags = sample->event;
550 case 0: 521 if (flags & TRACE_BEGIN) {
551 case CPU_IS_KERNEL: 522 state = sb_bt_start;
523 add_trace_begin();
524 }
525 if (flags & KERNEL_CTX_SWITCH) {
552 /* kernel/userspace switch */ 526 /* kernel/userspace switch */
553 in_kernel = s->event; 527 in_kernel = flags & IS_KERNEL;
554 if (state == sb_buffer_start) 528 if (state == sb_buffer_start)
555 state = sb_sample_start; 529 state = sb_sample_start;
556 add_kernel_ctx_switch(s->event); 530 add_kernel_ctx_switch(flags & IS_KERNEL);
557 break; 531 }
558 case CPU_TRACE_BEGIN: 532 if (flags & USER_CTX_SWITCH
559 state = sb_bt_start; 533 && op_cpu_buffer_get_data(&entry, &val)) {
560 add_trace_begin();
561 break;
562#ifdef CONFIG_OPROFILE_IBS
563 case IBS_FETCH_BEGIN:
564 state = sb_bt_start;
565 add_ibs_begin(cpu, IBS_FETCH_CODE, mm);
566 break;
567 case IBS_OP_BEGIN:
568 state = sb_bt_start;
569 add_ibs_begin(cpu, IBS_OP_CODE, mm);
570 break;
571#endif
572 default:
573 /* userspace context switch */ 534 /* userspace context switch */
535 new = (struct task_struct *)val;
574 oldmm = mm; 536 oldmm = mm;
575 new = (struct task_struct *)s->event;
576 release_mm(oldmm); 537 release_mm(oldmm);
577 mm = take_tasks_mm(new); 538 mm = take_tasks_mm(new);
578 if (mm != oldmm) 539 if (mm != oldmm)
579 cookie = get_exec_dcookie(mm); 540 cookie = get_exec_dcookie(mm);
580 add_user_ctx_switch(new, cookie); 541 add_user_ctx_switch(new, cookie);
581 break;
582 }
583 } else if (state >= sb_bt_start &&
584 !add_sample(mm, s, in_kernel)) {
585 if (state == sb_bt_start) {
586 state = sb_bt_ignore;
587 atomic_inc(&oprofile_stats.bt_lost_no_mapping);
588 } 542 }
543 if (op_cpu_buffer_get_size(&entry))
544 add_data(&entry, mm);
545 continue;
546 }
547
548 if (state < sb_bt_start)
549 /* ignore sample */
550 continue;
551
552 if (add_sample(mm, sample, in_kernel))
553 continue;
554
555 /* ignore backtraces if failed to add a sample */
556 if (state == sb_bt_start) {
557 state = sb_bt_ignore;
558 atomic_inc(&oprofile_stats.bt_lost_no_mapping);
589 } 559 }
590 } 560 }
591 release_mm(mm); 561 release_mm(mm);
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 61090969158f..2e03b6d796d3 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -1,11 +1,12 @@
1/** 1/**
2 * @file cpu_buffer.c 2 * @file cpu_buffer.c
3 * 3 *
4 * @remark Copyright 2002 OProfile authors 4 * @remark Copyright 2002-2009 OProfile authors
5 * @remark Read the file COPYING 5 * @remark Read the file COPYING
6 * 6 *
7 * @author John Levon <levon@movementarian.org> 7 * @author John Levon <levon@movementarian.org>
8 * @author Barry Kasindorf <barry.kasindorf@amd.com> 8 * @author Barry Kasindorf <barry.kasindorf@amd.com>
9 * @author Robert Richter <robert.richter@amd.com>
9 * 10 *
10 * Each CPU has a local buffer that stores PC value/event 11 * Each CPU has a local buffer that stores PC value/event
11 * pairs. We also log context switches when we notice them. 12 * pairs. We also log context switches when we notice them.
@@ -45,8 +46,8 @@
45 * can be changed to a single buffer solution when the ring buffer 46 * can be changed to a single buffer solution when the ring buffer
46 * access is implemented as non-locking atomic code. 47 * access is implemented as non-locking atomic code.
47 */ 48 */
48struct ring_buffer *op_ring_buffer_read; 49static struct ring_buffer *op_ring_buffer_read;
49struct ring_buffer *op_ring_buffer_write; 50static struct ring_buffer *op_ring_buffer_write;
50DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); 51DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
51 52
52static void wq_sync_buffer(struct work_struct *work); 53static void wq_sync_buffer(struct work_struct *work);
@@ -54,19 +55,9 @@ static void wq_sync_buffer(struct work_struct *work);
54#define DEFAULT_TIMER_EXPIRE (HZ / 10) 55#define DEFAULT_TIMER_EXPIRE (HZ / 10)
55static int work_enabled; 56static int work_enabled;
56 57
57void free_cpu_buffers(void)
58{
59 if (op_ring_buffer_read)
60 ring_buffer_free(op_ring_buffer_read);
61 op_ring_buffer_read = NULL;
62 if (op_ring_buffer_write)
63 ring_buffer_free(op_ring_buffer_write);
64 op_ring_buffer_write = NULL;
65}
66
67unsigned long oprofile_get_cpu_buffer_size(void) 58unsigned long oprofile_get_cpu_buffer_size(void)
68{ 59{
69 return fs_cpu_buffer_size; 60 return oprofile_cpu_buffer_size;
70} 61}
71 62
72void oprofile_cpu_buffer_inc_smpl_lost(void) 63void oprofile_cpu_buffer_inc_smpl_lost(void)
@@ -77,11 +68,21 @@ void oprofile_cpu_buffer_inc_smpl_lost(void)
77 cpu_buf->sample_lost_overflow++; 68 cpu_buf->sample_lost_overflow++;
78} 69}
79 70
71void free_cpu_buffers(void)
72{
73 if (op_ring_buffer_read)
74 ring_buffer_free(op_ring_buffer_read);
75 op_ring_buffer_read = NULL;
76 if (op_ring_buffer_write)
77 ring_buffer_free(op_ring_buffer_write);
78 op_ring_buffer_write = NULL;
79}
80
80int alloc_cpu_buffers(void) 81int alloc_cpu_buffers(void)
81{ 82{
82 int i; 83 int i;
83 84
84 unsigned long buffer_size = fs_cpu_buffer_size; 85 unsigned long buffer_size = oprofile_cpu_buffer_size;
85 86
86 op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); 87 op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
87 if (!op_ring_buffer_read) 88 if (!op_ring_buffer_read)
@@ -97,8 +98,6 @@ int alloc_cpu_buffers(void)
97 b->last_is_kernel = -1; 98 b->last_is_kernel = -1;
98 b->tracing = 0; 99 b->tracing = 0;
99 b->buffer_size = buffer_size; 100 b->buffer_size = buffer_size;
100 b->tail_pos = 0;
101 b->head_pos = 0;
102 b->sample_received = 0; 101 b->sample_received = 0;
103 b->sample_lost_overflow = 0; 102 b->sample_lost_overflow = 0;
104 b->backtrace_aborted = 0; 103 b->backtrace_aborted = 0;
@@ -145,47 +144,156 @@ void end_cpu_work(void)
145 flush_scheduled_work(); 144 flush_scheduled_work();
146} 145}
147 146
148static inline int 147/*
149add_sample(struct oprofile_cpu_buffer *cpu_buf, 148 * This function prepares the cpu buffer to write a sample.
150 unsigned long pc, unsigned long event) 149 *
150 * Struct op_entry is used during operations on the ring buffer while
151 * struct op_sample contains the data that is stored in the ring
152 * buffer. Struct entry can be uninitialized. The function reserves a
153 * data array that is specified by size. Use
154 * op_cpu_buffer_write_commit() after preparing the sample. In case of
155 * errors a null pointer is returned, otherwise the pointer to the
156 * sample.
157 *
158 */
159struct op_sample
160*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
161{
162 entry->event = ring_buffer_lock_reserve
163 (op_ring_buffer_write, sizeof(struct op_sample) +
164 size * sizeof(entry->sample->data[0]), &entry->irq_flags);
165 if (entry->event)
166 entry->sample = ring_buffer_event_data(entry->event);
167 else
168 entry->sample = NULL;
169
170 if (!entry->sample)
171 return NULL;
172
173 entry->size = size;
174 entry->data = entry->sample->data;
175
176 return entry->sample;
177}
178
179int op_cpu_buffer_write_commit(struct op_entry *entry)
180{
181 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event,
182 entry->irq_flags);
183}
184
185struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
186{
187 struct ring_buffer_event *e;
188 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
189 if (e)
190 goto event;
191 if (ring_buffer_swap_cpu(op_ring_buffer_read,
192 op_ring_buffer_write,
193 cpu))
194 return NULL;
195 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
196 if (e)
197 goto event;
198 return NULL;
199
200event:
201 entry->event = e;
202 entry->sample = ring_buffer_event_data(e);
203 entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
204 / sizeof(entry->sample->data[0]);
205 entry->data = entry->sample->data;
206 return entry->sample;
207}
208
209unsigned long op_cpu_buffer_entries(int cpu)
210{
211 return ring_buffer_entries_cpu(op_ring_buffer_read, cpu)
212 + ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
213}
214
215static int
216op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
217 int is_kernel, struct task_struct *task)
151{ 218{
152 struct op_entry entry; 219 struct op_entry entry;
153 int ret; 220 struct op_sample *sample;
221 unsigned long flags;
222 int size;
223
224 flags = 0;
225
226 if (backtrace)
227 flags |= TRACE_BEGIN;
228
229 /* notice a switch from user->kernel or vice versa */
230 is_kernel = !!is_kernel;
231 if (cpu_buf->last_is_kernel != is_kernel) {
232 cpu_buf->last_is_kernel = is_kernel;
233 flags |= KERNEL_CTX_SWITCH;
234 if (is_kernel)
235 flags |= IS_KERNEL;
236 }
237
238 /* notice a task switch */
239 if (cpu_buf->last_task != task) {
240 cpu_buf->last_task = task;
241 flags |= USER_CTX_SWITCH;
242 }
243
244 if (!flags)
245 /* nothing to do */
246 return 0;
247
248 if (flags & USER_CTX_SWITCH)
249 size = 1;
250 else
251 size = 0;
252
253 sample = op_cpu_buffer_write_reserve(&entry, size);
254 if (!sample)
255 return -ENOMEM;
154 256
155 ret = cpu_buffer_write_entry(&entry); 257 sample->eip = ESCAPE_CODE;
156 if (ret) 258 sample->event = flags;
157 return ret;
158 259
159 entry.sample->eip = pc; 260 if (size)
160 entry.sample->event = event; 261 op_cpu_buffer_add_data(&entry, (unsigned long)task);
161 262
162 ret = cpu_buffer_write_commit(&entry); 263 op_cpu_buffer_write_commit(&entry);
163 if (ret)
164 return ret;
165 264
166 return 0; 265 return 0;
167} 266}
168 267
169static inline int 268static inline int
170add_code(struct oprofile_cpu_buffer *buffer, unsigned long value) 269op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
270 unsigned long pc, unsigned long event)
171{ 271{
172 return add_sample(buffer, ESCAPE_CODE, value); 272 struct op_entry entry;
273 struct op_sample *sample;
274
275 sample = op_cpu_buffer_write_reserve(&entry, 0);
276 if (!sample)
277 return -ENOMEM;
278
279 sample->eip = pc;
280 sample->event = event;
281
282 return op_cpu_buffer_write_commit(&entry);
173} 283}
174 284
175/* This must be safe from any context. It's safe writing here 285/*
176 * because of the head/tail separation of the writer and reader 286 * This must be safe from any context.
177 * of the CPU buffer.
178 * 287 *
179 * is_kernel is needed because on some architectures you cannot 288 * is_kernel is needed because on some architectures you cannot
180 * tell if you are in kernel or user space simply by looking at 289 * tell if you are in kernel or user space simply by looking at
181 * pc. We tag this in the buffer by generating kernel enter/exit 290 * pc. We tag this in the buffer by generating kernel enter/exit
182 * events whenever is_kernel changes 291 * events whenever is_kernel changes
183 */ 292 */
184static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, 293static int
185 int is_kernel, unsigned long event) 294log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
295 unsigned long backtrace, int is_kernel, unsigned long event)
186{ 296{
187 struct task_struct *task;
188
189 cpu_buf->sample_received++; 297 cpu_buf->sample_received++;
190 298
191 if (pc == ESCAPE_CODE) { 299 if (pc == ESCAPE_CODE) {
@@ -193,25 +301,10 @@ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
193 return 0; 301 return 0;
194 } 302 }
195 303
196 is_kernel = !!is_kernel; 304 if (op_add_code(cpu_buf, backtrace, is_kernel, current))
197 305 goto fail;
198 task = current;
199
200 /* notice a switch from user->kernel or vice versa */
201 if (cpu_buf->last_is_kernel != is_kernel) {
202 cpu_buf->last_is_kernel = is_kernel;
203 if (add_code(cpu_buf, is_kernel))
204 goto fail;
205 }
206
207 /* notice a task switch */
208 if (cpu_buf->last_task != task) {
209 cpu_buf->last_task = task;
210 if (add_code(cpu_buf, (unsigned long)task))
211 goto fail;
212 }
213 306
214 if (add_sample(cpu_buf, pc, event)) 307 if (op_add_sample(cpu_buf, pc, event))
215 goto fail; 308 goto fail;
216 309
217 return 1; 310 return 1;
@@ -221,109 +314,102 @@ fail:
221 return 0; 314 return 0;
222} 315}
223 316
224static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) 317static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
225{ 318{
226 add_code(cpu_buf, CPU_TRACE_BEGIN);
227 cpu_buf->tracing = 1; 319 cpu_buf->tracing = 1;
228 return 1;
229} 320}
230 321
231static void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) 322static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
232{ 323{
233 cpu_buf->tracing = 0; 324 cpu_buf->tracing = 0;
234} 325}
235 326
236void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, 327static inline void
237 unsigned long event, int is_kernel) 328__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
329 unsigned long event, int is_kernel)
238{ 330{
239 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 331 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
240 332 unsigned long backtrace = oprofile_backtrace_depth;
241 if (!backtrace_depth) {
242 log_sample(cpu_buf, pc, is_kernel, event);
243 return;
244 }
245
246 if (!oprofile_begin_trace(cpu_buf))
247 return;
248 333
249 /* 334 /*
250 * if log_sample() fail we can't backtrace since we lost the 335 * if log_sample() fail we can't backtrace since we lost the
251 * source of this event 336 * source of this event
252 */ 337 */
253 if (log_sample(cpu_buf, pc, is_kernel, event)) 338 if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event))
254 oprofile_ops.backtrace(regs, backtrace_depth); 339 /* failed */
340 return;
341
342 if (!backtrace)
343 return;
344
345 oprofile_begin_trace(cpu_buf);
346 oprofile_ops.backtrace(regs, backtrace);
255 oprofile_end_trace(cpu_buf); 347 oprofile_end_trace(cpu_buf);
256} 348}
257 349
350void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
351 unsigned long event, int is_kernel)
352{
353 __oprofile_add_ext_sample(pc, regs, event, is_kernel);
354}
355
258void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) 356void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
259{ 357{
260 int is_kernel = !user_mode(regs); 358 int is_kernel = !user_mode(regs);
261 unsigned long pc = profile_pc(regs); 359 unsigned long pc = profile_pc(regs);
262 360
263 oprofile_add_ext_sample(pc, regs, event, is_kernel); 361 __oprofile_add_ext_sample(pc, regs, event, is_kernel);
264} 362}
265 363
266#ifdef CONFIG_OPROFILE_IBS 364/*
267 365 * Add samples with data to the ring buffer.
268#define MAX_IBS_SAMPLE_SIZE 14 366 *
269 367 * Use oprofile_add_data(&entry, val) to add data and
270void oprofile_add_ibs_sample(struct pt_regs * const regs, 368 * oprofile_write_commit(&entry) to commit the sample.
271 unsigned int * const ibs_sample, int ibs_code) 369 */
370void
371oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs,
372 unsigned long pc, int code, int size)
272{ 373{
374 struct op_sample *sample;
273 int is_kernel = !user_mode(regs); 375 int is_kernel = !user_mode(regs);
274 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 376 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
275 struct task_struct *task;
276 int fail = 0;
277 377
278 cpu_buf->sample_received++; 378 cpu_buf->sample_received++;
279 379
280 /* notice a switch from user->kernel or vice versa */ 380 /* no backtraces for samples with data */
281 if (cpu_buf->last_is_kernel != is_kernel) { 381 if (op_add_code(cpu_buf, 0, is_kernel, current))
282 if (add_code(cpu_buf, is_kernel)) 382 goto fail;
283 goto fail;
284 cpu_buf->last_is_kernel = is_kernel;
285 }
286
287 /* notice a task switch */
288 if (!is_kernel) {
289 task = current;
290 if (cpu_buf->last_task != task) {
291 if (add_code(cpu_buf, (unsigned long)task))
292 goto fail;
293 cpu_buf->last_task = task;
294 }
295 }
296
297 fail = fail || add_code(cpu_buf, ibs_code);
298 fail = fail || add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]);
299 fail = fail || add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]);
300 fail = fail || add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]);
301
302 if (ibs_code == IBS_OP_BEGIN) {
303 fail = fail || add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]);
304 fail = fail || add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]);
305 fail = fail || add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]);
306 }
307 383
308 if (fail) 384 sample = op_cpu_buffer_write_reserve(entry, size + 2);
385 if (!sample)
309 goto fail; 386 goto fail;
387 sample->eip = ESCAPE_CODE;
388 sample->event = 0; /* no flags */
310 389
311 if (backtrace_depth) 390 op_cpu_buffer_add_data(entry, code);
312 oprofile_ops.backtrace(regs, backtrace_depth); 391 op_cpu_buffer_add_data(entry, pc);
313 392
314 return; 393 return;
315 394
316fail: 395fail:
317 cpu_buf->sample_lost_overflow++; 396 cpu_buf->sample_lost_overflow++;
318 return;
319} 397}
320 398
321#endif 399int oprofile_add_data(struct op_entry *entry, unsigned long val)
400{
401 return op_cpu_buffer_add_data(entry, val);
402}
403
404int oprofile_write_commit(struct op_entry *entry)
405{
406 return op_cpu_buffer_write_commit(entry);
407}
322 408
323void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) 409void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
324{ 410{
325 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 411 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
326 log_sample(cpu_buf, pc, is_kernel, event); 412 log_sample(cpu_buf, pc, 0, is_kernel, event);
327} 413}
328 414
329void oprofile_add_trace(unsigned long pc) 415void oprofile_add_trace(unsigned long pc)
@@ -340,7 +426,7 @@ void oprofile_add_trace(unsigned long pc)
340 if (pc == ESCAPE_CODE) 426 if (pc == ESCAPE_CODE)
341 goto fail; 427 goto fail;
342 428
343 if (add_sample(cpu_buf, pc, 0)) 429 if (op_add_sample(cpu_buf, pc, 0))
344 goto fail; 430 goto fail;
345 431
346 return; 432 return;
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index aacb0f0bc566..63f81c44846a 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -1,10 +1,11 @@
1/** 1/**
2 * @file cpu_buffer.h 2 * @file cpu_buffer.h
3 * 3 *
4 * @remark Copyright 2002 OProfile authors 4 * @remark Copyright 2002-2009 OProfile authors
5 * @remark Read the file COPYING 5 * @remark Read the file COPYING
6 * 6 *
7 * @author John Levon <levon@movementarian.org> 7 * @author John Levon <levon@movementarian.org>
8 * @author Robert Richter <robert.richter@amd.com>
8 */ 9 */
9 10
10#ifndef OPROFILE_CPU_BUFFER_H 11#ifndef OPROFILE_CPU_BUFFER_H
@@ -31,17 +32,12 @@ void end_cpu_work(void);
31struct op_sample { 32struct op_sample {
32 unsigned long eip; 33 unsigned long eip;
33 unsigned long event; 34 unsigned long event;
35 unsigned long data[0];
34}; 36};
35 37
36struct op_entry { 38struct op_entry;
37 struct ring_buffer_event *event;
38 struct op_sample *sample;
39 unsigned long irq_flags;
40};
41 39
42struct oprofile_cpu_buffer { 40struct oprofile_cpu_buffer {
43 volatile unsigned long head_pos;
44 volatile unsigned long tail_pos;
45 unsigned long buffer_size; 41 unsigned long buffer_size;
46 struct task_struct *last_task; 42 struct task_struct *last_task;
47 int last_is_kernel; 43 int last_is_kernel;
@@ -54,8 +50,6 @@ struct oprofile_cpu_buffer {
54 struct delayed_work work; 50 struct delayed_work work;
55}; 51};
56 52
57extern struct ring_buffer *op_ring_buffer_read;
58extern struct ring_buffer *op_ring_buffer_write;
59DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); 53DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
60 54
61/* 55/*
@@ -64,7 +58,7 @@ DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
64 * reset these to invalid values; the next sample collected will 58 * reset these to invalid values; the next sample collected will
65 * populate the buffer with proper values to initialize the buffer 59 * populate the buffer with proper values to initialize the buffer
66 */ 60 */
67static inline void cpu_buffer_reset(int cpu) 61static inline void op_cpu_buffer_reset(int cpu)
68{ 62{
69 struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); 63 struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
70 64
@@ -72,55 +66,48 @@ static inline void cpu_buffer_reset(int cpu)
72 cpu_buf->last_task = NULL; 66 cpu_buf->last_task = NULL;
73} 67}
74 68
75static inline int cpu_buffer_write_entry(struct op_entry *entry) 69struct op_sample
76{ 70*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size);
77 entry->event = ring_buffer_lock_reserve(op_ring_buffer_write, 71int op_cpu_buffer_write_commit(struct op_entry *entry);
78 sizeof(struct op_sample), 72struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu);
79 &entry->irq_flags); 73unsigned long op_cpu_buffer_entries(int cpu);
80 if (entry->event)
81 entry->sample = ring_buffer_event_data(entry->event);
82 else
83 entry->sample = NULL;
84
85 if (!entry->sample)
86 return -ENOMEM;
87
88 return 0;
89}
90 74
91static inline int cpu_buffer_write_commit(struct op_entry *entry) 75/* returns the remaining free size of data in the entry */
76static inline
77int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
92{ 78{
93 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, 79 if (!entry->size)
94 entry->irq_flags); 80 return 0;
81 *entry->data = val;
82 entry->size--;
83 entry->data++;
84 return entry->size;
95} 85}
96 86
97static inline struct op_sample *cpu_buffer_read_entry(int cpu) 87/* returns the size of data in the entry */
88static inline
89int op_cpu_buffer_get_size(struct op_entry *entry)
98{ 90{
99 struct ring_buffer_event *e; 91 return entry->size;
100 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
101 if (e)
102 return ring_buffer_event_data(e);
103 if (ring_buffer_swap_cpu(op_ring_buffer_read,
104 op_ring_buffer_write,
105 cpu))
106 return NULL;
107 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
108 if (e)
109 return ring_buffer_event_data(e);
110 return NULL;
111} 92}
112 93
113/* "acquire" as many cpu buffer slots as we can */ 94/* returns 0 if empty or the size of data including the current value */
114static inline unsigned long cpu_buffer_entries(int cpu) 95static inline
96int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
115{ 97{
116 return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) 98 int size = entry->size;
117 + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); 99 if (!size)
100 return 0;
101 *val = *entry->data;
102 entry->size--;
103 entry->data++;
104 return size;
118} 105}
119 106
120/* transient events for the CPU buffer -> event buffer */ 107/* extra data flags */
121#define CPU_IS_KERNEL 1 108#define KERNEL_CTX_SWITCH (1UL << 0)
122#define CPU_TRACE_BEGIN 2 109#define IS_KERNEL (1UL << 1)
123#define IBS_FETCH_BEGIN 3 110#define TRACE_BEGIN (1UL << 2)
124#define IBS_OP_BEGIN 4 111#define USER_CTX_SWITCH (1UL << 3)
125 112
126#endif /* OPROFILE_CPU_BUFFER_H */ 113#endif /* OPROFILE_CPU_BUFFER_H */
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index 191a3202cecc..2b7ae366ceb1 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -73,8 +73,8 @@ int alloc_event_buffer(void)
73 unsigned long flags; 73 unsigned long flags;
74 74
75 spin_lock_irqsave(&oprofilefs_lock, flags); 75 spin_lock_irqsave(&oprofilefs_lock, flags);
76 buffer_size = fs_buffer_size; 76 buffer_size = oprofile_buffer_size;
77 buffer_watershed = fs_buffer_watershed; 77 buffer_watershed = oprofile_buffer_watershed;
78 spin_unlock_irqrestore(&oprofilefs_lock, flags); 78 spin_unlock_irqrestore(&oprofilefs_lock, flags);
79 79
80 if (buffer_watershed >= buffer_size) 80 if (buffer_watershed >= buffer_size)
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index cd375907f26f..3cffce90f82a 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -23,7 +23,7 @@
23struct oprofile_operations oprofile_ops; 23struct oprofile_operations oprofile_ops;
24 24
25unsigned long oprofile_started; 25unsigned long oprofile_started;
26unsigned long backtrace_depth; 26unsigned long oprofile_backtrace_depth;
27static unsigned long is_setup; 27static unsigned long is_setup;
28static DEFINE_MUTEX(start_mutex); 28static DEFINE_MUTEX(start_mutex);
29 29
@@ -172,7 +172,7 @@ int oprofile_set_backtrace(unsigned long val)
172 goto out; 172 goto out;
173 } 173 }
174 174
175 backtrace_depth = val; 175 oprofile_backtrace_depth = val;
176 176
177out: 177out:
178 mutex_unlock(&start_mutex); 178 mutex_unlock(&start_mutex);
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h
index 5df0c21a608f..c288d3c24b50 100644
--- a/drivers/oprofile/oprof.h
+++ b/drivers/oprofile/oprof.h
@@ -21,12 +21,12 @@ void oprofile_stop(void);
21 21
22struct oprofile_operations; 22struct oprofile_operations;
23 23
24extern unsigned long fs_buffer_size; 24extern unsigned long oprofile_buffer_size;
25extern unsigned long fs_cpu_buffer_size; 25extern unsigned long oprofile_cpu_buffer_size;
26extern unsigned long fs_buffer_watershed; 26extern unsigned long oprofile_buffer_watershed;
27extern struct oprofile_operations oprofile_ops; 27extern struct oprofile_operations oprofile_ops;
28extern unsigned long oprofile_started; 28extern unsigned long oprofile_started;
29extern unsigned long backtrace_depth; 29extern unsigned long oprofile_backtrace_depth;
30 30
31struct super_block; 31struct super_block;
32struct dentry; 32struct dentry;
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c
index d8201998b0b7..5d36ffc30dd5 100644
--- a/drivers/oprofile/oprofile_files.c
+++ b/drivers/oprofile/oprofile_files.c
@@ -14,17 +14,18 @@
14#include "oprofile_stats.h" 14#include "oprofile_stats.h"
15#include "oprof.h" 15#include "oprof.h"
16 16
17#define FS_BUFFER_SIZE_DEFAULT 131072 17#define BUFFER_SIZE_DEFAULT 131072
18#define FS_CPU_BUFFER_SIZE_DEFAULT 8192 18#define CPU_BUFFER_SIZE_DEFAULT 8192
19#define FS_BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */ 19#define BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */
20 20
21unsigned long fs_buffer_size; 21unsigned long oprofile_buffer_size;
22unsigned long fs_cpu_buffer_size; 22unsigned long oprofile_cpu_buffer_size;
23unsigned long fs_buffer_watershed; 23unsigned long oprofile_buffer_watershed;
24 24
25static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) 25static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
26{ 26{
27 return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); 27 return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count,
28 offset);
28} 29}
29 30
30 31
@@ -125,16 +126,16 @@ static const struct file_operations dump_fops = {
125void oprofile_create_files(struct super_block *sb, struct dentry *root) 126void oprofile_create_files(struct super_block *sb, struct dentry *root)
126{ 127{
127 /* reinitialize default values */ 128 /* reinitialize default values */
128 fs_buffer_size = FS_BUFFER_SIZE_DEFAULT; 129 oprofile_buffer_size = BUFFER_SIZE_DEFAULT;
129 fs_cpu_buffer_size = FS_CPU_BUFFER_SIZE_DEFAULT; 130 oprofile_cpu_buffer_size = CPU_BUFFER_SIZE_DEFAULT;
130 fs_buffer_watershed = FS_BUFFER_WATERSHED_DEFAULT; 131 oprofile_buffer_watershed = BUFFER_WATERSHED_DEFAULT;
131 132
132 oprofilefs_create_file(sb, root, "enable", &enable_fops); 133 oprofilefs_create_file(sb, root, "enable", &enable_fops);
133 oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); 134 oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
134 oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); 135 oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
135 oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); 136 oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size);
136 oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); 137 oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed);
137 oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size); 138 oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &oprofile_cpu_buffer_size);
138 oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); 139 oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops);
139 oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); 140 oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops);
140 oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); 141 oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops);
diff --git a/drivers/parisc/asp.c b/drivers/parisc/asp.c
index 821369135369..7931133526c4 100644
--- a/drivers/parisc/asp.c
+++ b/drivers/parisc/asp.c
@@ -71,8 +71,7 @@ static void asp_choose_irq(struct parisc_device *dev, void *ctrl)
71 */ 71 */
72#define ASP_INTERRUPT_ADDR 0xf0800000 72#define ASP_INTERRUPT_ADDR 0xf0800000
73 73
74int __init 74static int __init asp_init_chip(struct parisc_device *dev)
75asp_init_chip(struct parisc_device *dev)
76{ 75{
77 struct gsc_irq gsc_irq; 76 struct gsc_irq gsc_irq;
78 int ret; 77 int ret;
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index dcc1e9958d2f..cd4dd7ed2c06 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -555,7 +555,7 @@ static u32 hint_lookup[] = {
555 * (Load Coherence Index) instruction. The 8 bits used for the virtual 555 * (Load Coherence Index) instruction. The 8 bits used for the virtual
556 * index are bits 12:19 of the value returned by LCI. 556 * index are bits 12:19 of the value returned by LCI.
557 */ 557 */
558void CCIO_INLINE 558static void CCIO_INLINE
559ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba, 559ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
560 unsigned long hints) 560 unsigned long hints)
561{ 561{
@@ -1578,8 +1578,6 @@ static int __init ccio_probe(struct parisc_device *dev)
1578 1578
1579 ioc_count++; 1579 ioc_count++;
1580 1580
1581 parisc_vmerge_boundary = IOVP_SIZE;
1582 parisc_vmerge_max_size = BITS_PER_LONG * IOVP_SIZE;
1583 parisc_has_iommu(); 1581 parisc_has_iommu();
1584 return 0; 1582 return 0;
1585} 1583}
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index 77cc8bfef8c9..d539d9df88e7 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -287,7 +287,7 @@ DINO_PORT_OUT(b, 8, 3)
287DINO_PORT_OUT(w, 16, 2) 287DINO_PORT_OUT(w, 16, 2)
288DINO_PORT_OUT(l, 32, 0) 288DINO_PORT_OUT(l, 32, 0)
289 289
290struct pci_port_ops dino_port_ops = { 290static struct pci_port_ops dino_port_ops = {
291 .inb = dino_in8, 291 .inb = dino_in8,
292 .inw = dino_in16, 292 .inw = dino_in16,
293 .inl = dino_in32, 293 .inl = dino_in32,
@@ -690,7 +690,7 @@ dino_fixup_bus(struct pci_bus *bus)
690} 690}
691 691
692 692
693struct pci_bios_ops dino_bios_ops = { 693static struct pci_bios_ops dino_bios_ops = {
694 .init = dino_bios_init, 694 .init = dino_bios_init,
695 .fixup_bus = dino_fixup_bus 695 .fixup_bus = dino_fixup_bus
696}; 696};
diff --git a/drivers/parisc/hppb.c b/drivers/parisc/hppb.c
index 65eee67aa2ae..13856415b432 100644
--- a/drivers/parisc/hppb.c
+++ b/drivers/parisc/hppb.c
@@ -29,7 +29,7 @@ struct hppb_card {
29 struct hppb_card *next; 29 struct hppb_card *next;
30}; 30};
31 31
32struct hppb_card hppb_card_head = { 32static struct hppb_card hppb_card_head = {
33 .hpa = 0, 33 .hpa = 0,
34 .next = NULL, 34 .next = NULL,
35}; 35};
diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c
index bee510098ce8..e65727ca9fc0 100644
--- a/drivers/parisc/lasi.c
+++ b/drivers/parisc/lasi.c
@@ -107,7 +107,7 @@ lasi_init_irq(struct gsc_asic *this_lasi)
107 107
108#else 108#else
109 109
110void __init lasi_led_init(unsigned long lasi_hpa) 110static void __init lasi_led_init(unsigned long lasi_hpa)
111{ 111{
112 unsigned long datareg; 112 unsigned long datareg;
113 113
@@ -163,8 +163,7 @@ static void lasi_power_off(void)
163 gsc_writel(0x02, datareg); 163 gsc_writel(0x02, datareg);
164} 164}
165 165
166int __init 166static int __init lasi_init_chip(struct parisc_device *dev)
167lasi_init_chip(struct parisc_device *dev)
168{ 167{
169 extern void (*chassis_power_off)(void); 168 extern void (*chassis_power_off)(void);
170 struct gsc_asic *lasi; 169 struct gsc_asic *lasi;
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index a28c8946deaa..d8233de8c75d 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -824,7 +824,7 @@ lba_fixup_bus(struct pci_bus *bus)
824} 824}
825 825
826 826
827struct pci_bios_ops lba_bios_ops = { 827static struct pci_bios_ops lba_bios_ops = {
828 .init = lba_bios_init, 828 .init = lba_bios_init,
829 .fixup_bus = lba_fixup_bus, 829 .fixup_bus = lba_fixup_bus,
830}; 830};
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index bc73b96346ff..3fac8f81d59d 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -561,7 +561,7 @@ typedef unsigned long space_t;
561 * IOMMU uses little endian for the pdir. 561 * IOMMU uses little endian for the pdir.
562 */ 562 */
563 563
564void SBA_INLINE 564static void SBA_INLINE
565sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba, 565sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
566 unsigned long hint) 566 unsigned long hint)
567{ 567{
@@ -1874,7 +1874,7 @@ static struct parisc_device_id sba_tbl[] = {
1874 { 0, } 1874 { 0, }
1875}; 1875};
1876 1876
1877int sba_driver_callback(struct parisc_device *); 1877static int sba_driver_callback(struct parisc_device *);
1878 1878
1879static struct parisc_driver sba_driver = { 1879static struct parisc_driver sba_driver = {
1880 .name = MODULE_NAME, 1880 .name = MODULE_NAME,
@@ -1887,8 +1887,7 @@ static struct parisc_driver sba_driver = {
1887** If so, initialize the chip and tell other partners in crime they 1887** If so, initialize the chip and tell other partners in crime they
1888** have work to do. 1888** have work to do.
1889*/ 1889*/
1890int 1890static int sba_driver_callback(struct parisc_device *dev)
1891sba_driver_callback(struct parisc_device *dev)
1892{ 1891{
1893 struct sba_device *sba_dev; 1892 struct sba_device *sba_dev;
1894 u32 func_class; 1893 u32 func_class;
@@ -1979,8 +1978,6 @@ sba_driver_callback(struct parisc_device *dev)
1979 proc_create("sba_iommu-bitmap", 0, root, &sba_proc_bitmap_fops); 1978 proc_create("sba_iommu-bitmap", 0, root, &sba_proc_bitmap_fops);
1980#endif 1979#endif
1981 1980
1982 parisc_vmerge_boundary = IOVP_SIZE;
1983 parisc_vmerge_max_size = IOVP_SIZE * BITS_PER_LONG;
1984 parisc_has_iommu(); 1981 parisc_has_iommu();
1985 return 0; 1982 return 0;
1986} 1983}
diff --git a/drivers/parisc/wax.c b/drivers/parisc/wax.c
index 892a83bbe73d..da9d5ad1353c 100644
--- a/drivers/parisc/wax.c
+++ b/drivers/parisc/wax.c
@@ -68,8 +68,7 @@ wax_init_irq(struct gsc_asic *wax)
68// gsc_writel(0xFFFFFFFF, base+0x2000); /* RS232-B on Wax */ 68// gsc_writel(0xFFFFFFFF, base+0x2000); /* RS232-B on Wax */
69} 69}
70 70
71int __init 71static int __init wax_init_chip(struct parisc_device *dev)
72wax_init_chip(struct parisc_device *dev)
73{ 72{
74 struct gsc_asic *wax; 73 struct gsc_asic *wax;
75 struct parisc_device *parent; 74 struct parisc_device *parent;
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index c62ab8d240aa..1c1141801060 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -33,7 +33,6 @@
33#include <linux/pci-acpi.h> 33#include <linux/pci-acpi.h>
34#include <acpi/acpi.h> 34#include <acpi/acpi.h>
35#include <acpi/acpi_bus.h> 35#include <acpi/acpi_bus.h>
36#include <acpi/actypes.h>
37 36
38#define MY_NAME "acpi_pcihp" 37#define MY_NAME "acpi_pcihp"
39 38
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 27fd18f019f8..db85284ffb62 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -217,7 +217,6 @@ struct hpc_ops {
217#ifdef CONFIG_ACPI 217#ifdef CONFIG_ACPI
218#include <acpi/acpi.h> 218#include <acpi/acpi.h>
219#include <acpi/acpi_bus.h> 219#include <acpi/acpi_bus.h>
220#include <acpi/actypes.h>
221#include <linux/pci-acpi.h> 220#include <linux/pci-acpi.h>
222 221
223extern void __init pciehp_acpi_slot_detection_init(void); 222extern void __init pciehp_acpi_slot_detection_init(void);
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 3582512e7226..deea8a187eb8 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -13,8 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/pci-aspm.h> 14#include <linux/pci-aspm.h>
15#include <acpi/acpi.h> 15#include <acpi/acpi.h>
16#include <acpi/acnamesp.h>
17#include <acpi/acresrc.h>
18#include <acpi/acpi_bus.h> 16#include <acpi/acpi_bus.h>
19 17
20#include <linux/pci-acpi.h> 18#include <linux/pci-acpi.h>
diff --git a/drivers/platform/Kconfig b/drivers/platform/Kconfig
new file mode 100644
index 000000000000..9652c3fe7f5e
--- /dev/null
+++ b/drivers/platform/Kconfig
@@ -0,0 +1,5 @@
1# drivers/platform/Kconfig
2
3if X86
4source "drivers/platform/x86/Kconfig"
5endif
diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile
new file mode 100644
index 000000000000..782953ae4c03
--- /dev/null
+++ b/drivers/platform/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for linux/drivers/platform
3#
4
5obj-$(CONFIG_X86) += x86/
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
new file mode 100644
index 000000000000..e65448e99b48
--- /dev/null
+++ b/drivers/platform/x86/Kconfig
@@ -0,0 +1,375 @@
1#
2# X86 Platform Specific Drivers
3#
4
5menuconfig X86_PLATFORM_DEVICES
6 bool "X86 Platform Specific Device Drivers"
7 default y
8 ---help---
9 Say Y here to get to see options for device drivers for various
10 x86 platforms, including vendor-specific laptop extension drivers.
11 This option alone does not add any kernel code.
12
13 If you say N, all options in this submenu will be skipped and disabled.
14
15if X86_PLATFORM_DEVICES
16
17config ACER_WMI
18 tristate "Acer WMI Laptop Extras (EXPERIMENTAL)"
19 depends on EXPERIMENTAL
20 depends on ACPI
21 depends on LEDS_CLASS
22 depends on NEW_LEDS
23 depends on BACKLIGHT_CLASS_DEVICE
24 depends on SERIO_I8042
25 depends on RFKILL
26 select ACPI_WMI
27 ---help---
28 This is a driver for newer Acer (and Wistron) laptops. It adds
29 wireless radio and bluetooth control, and on some laptops,
30 exposes the mail LED and LCD backlight.
31
32 For more information about this driver see
33 <file:Documentation/laptops/acer-wmi.txt>
34
35 If you have an ACPI-WMI compatible Acer/ Wistron laptop, say Y or M
36 here.
37
38config ASUS_LAPTOP
39 tristate "Asus Laptop Extras (EXPERIMENTAL)"
40 depends on ACPI
41 depends on EXPERIMENTAL && !ACPI_ASUS
42 depends on LEDS_CLASS
43 depends on NEW_LEDS
44 depends on BACKLIGHT_CLASS_DEVICE
45 ---help---
46 This is the new Linux driver for Asus laptops. It may also support some
47 MEDION, JVC or VICTOR laptops. It makes all the extra buttons generate
48 standard ACPI events that go through /proc/acpi/events. It also adds
49 support for video output switching, LCD backlight control, Bluetooth and
50 Wlan control, and most importantly, allows you to blink those fancy LEDs.
51
52 For more information and a userspace daemon for handling the extra
53 buttons see <http://acpi4asus.sf.net/>.
54
55 If you have an ACPI-compatible ASUS laptop, say Y or M here.
56
57config FUJITSU_LAPTOP
58 tristate "Fujitsu Laptop Extras"
59 depends on ACPI
60 depends on INPUT
61 depends on BACKLIGHT_CLASS_DEVICE
62 ---help---
63 This is a driver for laptops built by Fujitsu:
64
65 * P2xxx/P5xxx/S6xxx/S7xxx series Lifebooks
66 * Possibly other Fujitsu laptop models
67 * Tested with S6410 and S7020
68
69 It adds support for LCD brightness control and some hotkeys.
70
71 If you have a Fujitsu laptop, say Y or M here.
72
73config FUJITSU_LAPTOP_DEBUG
74 bool "Verbose debug mode for Fujitsu Laptop Extras"
75 depends on FUJITSU_LAPTOP
76 default n
77 ---help---
78 Enables extra debug output from the fujitsu extras driver, at the
79 expense of a slight increase in driver size.
80
81 If you are not sure, say N here.
82
83config TC1100_WMI
84 tristate "HP Compaq TC1100 Tablet WMI Extras (EXPERIMENTAL)"
85 depends on !X86_64
86 depends on EXPERIMENTAL
87 depends on ACPI
88 select ACPI_WMI
89 ---help---
90 This is a driver for the WMI extensions (wireless and bluetooth power
91 control) of the HP Compaq TC1100 tablet.
92
93config HP_WMI
94 tristate "HP WMI extras"
95 depends on ACPI_WMI
96 depends on INPUT
97 depends on RFKILL
98 help
99 Say Y here if you want to support WMI-based hotkeys on HP laptops and
100 to read data from WMI such as docking or ambient light sensor state.
101
102 To compile this driver as a module, choose M here: the module will
103 be called hp-wmi.
104
105config MSI_LAPTOP
106 tristate "MSI Laptop Extras"
107 depends on ACPI
108 depends on BACKLIGHT_CLASS_DEVICE
109 ---help---
110 This is a driver for laptops built by MSI (MICRO-STAR
111 INTERNATIONAL):
112
113 MSI MegaBook S270 (MS-1013)
114 Cytron/TCM/Medion/Tchibo MD96100/SAM2000
115
116 It adds support for Bluetooth, WLAN and LCD brightness control.
117
118 More information about this driver is available at
119 <http://0pointer.de/lennart/tchibo.html>.
120
121 If you have an MSI S270 laptop, say Y or M here.
122
123config PANASONIC_LAPTOP
124 tristate "Panasonic Laptop Extras"
125 depends on INPUT && ACPI
126 depends on BACKLIGHT_CLASS_DEVICE
127 ---help---
128 This driver adds support for access to backlight control and hotkeys
129 on Panasonic Let's Note laptops.
130
131 If you have a Panasonic Let's note laptop (such as the R1(N variant),
132 R2, R3, R5, T2, W2 and Y2 series), say Y.
133
134config COMPAL_LAPTOP
135 tristate "Compal Laptop Extras"
136 depends on ACPI
137 depends on BACKLIGHT_CLASS_DEVICE
138 ---help---
139 This is a driver for laptops built by Compal:
140
141 Compal FL90/IFL90
142 Compal FL91/IFL91
143 Compal FL92/JFL92
144 Compal FT00/IFT00
145
146 It adds support for Bluetooth, WLAN and LCD brightness control.
147
148 If you have an Compal FL9x/IFL9x/FT00 laptop, say Y or M here.
149
150config SONY_LAPTOP
151 tristate "Sony Laptop Extras"
152 depends on ACPI
153 select BACKLIGHT_CLASS_DEVICE
154 depends on INPUT
155 ---help---
156 This mini-driver drives the SNC and SPIC devices present in the ACPI
157 BIOS of the Sony Vaio laptops.
158
159 It gives access to some extra laptop functionalities like Bluetooth,
160 screen brightness control, Fn keys and allows powering on/off some
161 devices.
162
163 Read <file:Documentation/laptops/sony-laptop.txt> for more information.
164
165config SONYPI_COMPAT
166 bool "Sonypi compatibility"
167 depends on SONY_LAPTOP
168 ---help---
169 Build the sonypi driver compatibility code into the sony-laptop driver.
170
171config THINKPAD_ACPI
172 tristate "ThinkPad ACPI Laptop Extras"
173 depends on ACPI
174 select BACKLIGHT_LCD_SUPPORT
175 select BACKLIGHT_CLASS_DEVICE
176 select HWMON
177 select NVRAM
178 select INPUT
179 select NEW_LEDS
180 select LEDS_CLASS
181 select NET
182 select RFKILL
183 ---help---
184 This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
185 support for Fn-Fx key combinations, Bluetooth control, video
186 output switching, ThinkLight control, UltraBay eject and more.
187 For more information about this driver see
188 <file:Documentation/laptops/thinkpad-acpi.txt> and
189 <http://ibm-acpi.sf.net/> .
190
191 This driver was formerly known as ibm-acpi.
192
193 If you have an IBM or Lenovo ThinkPad laptop, say Y or M here.
194
195config THINKPAD_ACPI_DEBUG
196 bool "Verbose debug mode"
197 depends on THINKPAD_ACPI
198 default n
199 ---help---
200 Enables extra debugging information, at the expense of a slightly
201 increase in driver size.
202
203 If you are not sure, say N here.
204
205config THINKPAD_ACPI_DOCK
206 bool "Legacy Docking Station Support"
207 depends on THINKPAD_ACPI
208 depends on ACPI_DOCK=n
209 default n
210 ---help---
211 Allows the thinkpad_acpi driver to handle docking station events.
212 This support was made obsolete by the generic ACPI docking station
213 support (CONFIG_ACPI_DOCK). It will allow locking and removing the
214 laptop from the docking station, but will not properly connect PCI
215 devices.
216
217 If you are not sure, say N here.
218
219config THINKPAD_ACPI_BAY
220 bool "Legacy Removable Bay Support"
221 depends on THINKPAD_ACPI
222 default y
223 ---help---
224 Allows the thinkpad_acpi driver to handle removable bays. It will
225 electrically disable the device in the bay, and also generate
226 notifications when the bay lever is ejected or inserted.
227
228 If you are not sure, say Y here.
229
230config THINKPAD_ACPI_VIDEO
231 bool "Video output control support"
232 depends on THINKPAD_ACPI
233 default y
234 ---help---
235 Allows the thinkpad_acpi driver to provide an interface to control
236 the various video output ports.
237
238 This feature often won't work well, depending on ThinkPad model,
239 display state, video output devices in use, whether there is a X
240 server running, phase of the moon, and the current mood of
241 Schroedinger's cat. If you can use X.org's RandR to control
242 your ThinkPad's video output ports instead of this feature,
243 don't think twice: do it and say N here to save some memory.
244
245 If you are not sure, say Y here.
246
247config THINKPAD_ACPI_HOTKEY_POLL
248 bool "Support NVRAM polling for hot keys"
249 depends on THINKPAD_ACPI
250 default y
251 ---help---
252 Some thinkpad models benefit from NVRAM polling to detect a few of
253 the hot key press events. If you know your ThinkPad model does not
254 need to do NVRAM polling to support any of the hot keys you use,
255 unselecting this option will save about 1kB of memory.
256
257 ThinkPads T40 and newer, R52 and newer, and X31 and newer are
258 unlikely to need NVRAM polling in their latest BIOS versions.
259
260 NVRAM polling can detect at most the following keys: ThinkPad/Access
261 IBM, Zoom, Switch Display (fn+F7), ThinkLight, Volume up/down/mute,
262 Brightness up/down, Display Expand (fn+F8), Hibernate (fn+F12).
263
264 If you are not sure, say Y here. The driver enables polling only if
265 it is strictly necessary to do so.
266
267config INTEL_MENLOW
268 tristate "Thermal Management driver for Intel menlow platform"
269 depends on ACPI_THERMAL
270 select THERMAL
271 ---help---
272 ACPI thermal management enhancement driver on
273 Intel Menlow platform.
274
275 If unsure, say N.
276
277config EEEPC_LAPTOP
278 tristate "Eee PC Hotkey Driver (EXPERIMENTAL)"
279 depends on ACPI
280 depends on EXPERIMENTAL
281 select BACKLIGHT_CLASS_DEVICE
282 select HWMON
283 select RFKILL
284 ---help---
285 This driver supports the Fn-Fx keys on Eee PC laptops.
286 It also adds the ability to switch camera/wlan on/off.
287
288 If you have an Eee PC laptop, say Y or M here.
289
290
291config ACPI_WMI
292 tristate "WMI (EXPERIMENTAL)"
293 depends on ACPI
294 depends on EXPERIMENTAL
295 help
296 This driver adds support for the ACPI-WMI (Windows Management
297 Instrumentation) mapper device (PNP0C14) found on some systems.
298
299 ACPI-WMI is a proprietary extension to ACPI to expose parts of the
300 ACPI firmware to userspace - this is done through various vendor
301 defined methods and data blocks in a PNP0C14 device, which are then
302 made available for userspace to call.
303
304 The implementation of this in Linux currently only exposes this to
305 other kernel space drivers.
306
307 This driver is a required dependency to build the firmware specific
308 drivers needed on many machines, including Acer and HP laptops.
309
310 It is safe to enable this driver even if your DSDT doesn't define
311 any ACPI-WMI devices.
312
313config ACPI_ASUS
314 tristate "ASUS/Medion Laptop Extras"
315 depends on ACPI
316 select BACKLIGHT_CLASS_DEVICE
317 ---help---
318 This driver provides support for extra features of ACPI-compatible
319 ASUS laptops. As some of Medion laptops are made by ASUS, it may also
320 support some Medion laptops (such as 9675 for example). It makes all
321 the extra buttons generate standard ACPI events that go through
322 /proc/acpi/events, and (on some models) adds support for changing the
323 display brightness and output, switching the LCD backlight on and off,
324 and most importantly, allows you to blink those fancy LEDs intended
325 for reporting mail and wireless status.
326
327 Note: display switching code is currently considered EXPERIMENTAL,
328 toying with these values may even lock your machine.
329
330 All settings are changed via /proc/acpi/asus directory entries. Owner
331 and group for these entries can be set with asus_uid and asus_gid
332 parameters.
333
334 More information and a userspace daemon for handling the extra buttons
335 at <http://sourceforge.net/projects/acpi4asus/>.
336
337 If you have an ACPI-compatible ASUS laptop, say Y or M here. This
338 driver is still under development, so if your laptop is unsupported or
339 something works not quite as expected, please use the mailing list
340 available on the above page (acpi4asus-user@lists.sourceforge.net).
341
342 NOTE: This driver is deprecated and will probably be removed soon,
343 use asus-laptop instead.
344
345config ACPI_TOSHIBA
346 tristate "Toshiba Laptop Extras"
347 depends on ACPI
348 depends on INPUT
349 select INPUT_POLLDEV
350 select NET
351 select RFKILL
352 select BACKLIGHT_CLASS_DEVICE
353 ---help---
354 This driver adds support for access to certain system settings
355 on "legacy free" Toshiba laptops. These laptops can be recognized by
356 their lack of a BIOS setup menu and APM support.
357
358 On these machines, all system configuration is handled through the
359 ACPI. This driver is required for access to controls not covered
360 by the general ACPI drivers, such as LCD brightness, video output,
361 etc.
362
363 This driver differs from the non-ACPI Toshiba laptop driver (located
364 under "Processor type and features") in several aspects.
365 Configuration is accessed by reading and writing text files in the
366 /proc tree instead of by program interface to /dev. Furthermore, no
367 power management functions are exposed, as those are handled by the
368 general ACPI drivers.
369
370 More information about this driver is available at
371 <http://memebeam.org/toys/ToshibaAcpiDriver>.
372
373 If you have a legacy free Toshiba laptop (such as the Libretto L1
374 series), say Y.
375endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
new file mode 100644
index 000000000000..1e9de2ae0de5
--- /dev/null
+++ b/drivers/platform/x86/Makefile
@@ -0,0 +1,19 @@
1#
2# Makefile for linux/drivers/platform/x86
3# x86 Platform-Specific Drivers
4#
5obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o
6obj-$(CONFIG_EEEPC_LAPTOP) += eeepc-laptop.o
7obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o
8obj-$(CONFIG_COMPAL_LAPTOP) += compal-laptop.o
9obj-$(CONFIG_ACER_WMI) += acer-wmi.o
10obj-$(CONFIG_HP_WMI) += hp-wmi.o
11obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
12obj-$(CONFIG_SONY_LAPTOP) += sony-laptop.o
13obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o
14obj-$(CONFIG_FUJITSU_LAPTOP) += fujitsu-laptop.o
15obj-$(CONFIG_PANASONIC_LAPTOP) += panasonic-laptop.o
16obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o
17obj-$(CONFIG_ACPI_WMI) += wmi.o
18obj-$(CONFIG_ACPI_ASUS) += asus_acpi.o
19obj-$(CONFIG_ACPI_TOSHIBA) += toshiba_acpi.o
diff --git a/drivers/misc/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 94c9f911824e..94c9f911824e 100644
--- a/drivers/misc/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
diff --git a/drivers/misc/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 8fb8b3591048..8fb8b3591048 100644
--- a/drivers/misc/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
diff --git a/drivers/acpi/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index 1e74988c7b2d..1e74988c7b2d 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
diff --git a/drivers/misc/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 11003bba10d3..11003bba10d3 100644
--- a/drivers/misc/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
diff --git a/drivers/misc/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 02fe2b8b8939..02fe2b8b8939 100644
--- a/drivers/misc/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
diff --git a/drivers/misc/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index a7dd3e9fb79d..65dc41540c62 100644
--- a/drivers/misc/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -3,6 +3,7 @@
3/* 3/*
4 Copyright (C) 2007,2008 Jonathan Woithe <jwoithe@physics.adelaide.edu.au> 4 Copyright (C) 2007,2008 Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
5 Copyright (C) 2008 Peter Gruber <nokos@gmx.net> 5 Copyright (C) 2008 Peter Gruber <nokos@gmx.net>
6 Copyright (C) 2008 Tony Vroon <tony@linx.net>
6 Based on earlier work: 7 Based on earlier work:
7 Copyright (C) 2003 Shane Spencer <shane@bogomip.com> 8 Copyright (C) 2003 Shane Spencer <shane@bogomip.com>
8 Adrian Yee <brewt-fujitsu@brewt.org> 9 Adrian Yee <brewt-fujitsu@brewt.org>
@@ -65,8 +66,11 @@
65#include <linux/kfifo.h> 66#include <linux/kfifo.h>
66#include <linux/video_output.h> 67#include <linux/video_output.h>
67#include <linux/platform_device.h> 68#include <linux/platform_device.h>
69#ifdef CONFIG_LEDS_CLASS
70#include <linux/leds.h>
71#endif
68 72
69#define FUJITSU_DRIVER_VERSION "0.4.3" 73#define FUJITSU_DRIVER_VERSION "0.5.0"
70 74
71#define FUJITSU_LCD_N_LEVELS 8 75#define FUJITSU_LCD_N_LEVELS 8
72 76
@@ -83,6 +87,24 @@
83#define ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS 0x86 87#define ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS 0x86
84#define ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS 0x87 88#define ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS 0x87
85 89
90/* FUNC interface - command values */
91#define FUNC_RFKILL 0x1000
92#define FUNC_LEDS 0x1001
93#define FUNC_BUTTONS 0x1002
94#define FUNC_BACKLIGHT 0x1004
95
96/* FUNC interface - responses */
97#define UNSUPPORTED_CMD 0x80000000
98
99#ifdef CONFIG_LEDS_CLASS
100/* FUNC interface - LED control */
101#define FUNC_LED_OFF 0x1
102#define FUNC_LED_ON 0x30001
103#define KEYBOARD_LAMPS 0x100
104#define LOGOLAMP_POWERON 0x2000
105#define LOGOLAMP_ALWAYS 0x4000
106#endif
107
86/* Hotkey details */ 108/* Hotkey details */
87#define KEY1_CODE 0x410 /* codes for the keys in the GIRB register */ 109#define KEY1_CODE 0x410 /* codes for the keys in the GIRB register */
88#define KEY2_CODE 0x411 110#define KEY2_CODE 0x411
@@ -133,7 +155,6 @@ struct fujitsu_t {
133 155
134static struct fujitsu_t *fujitsu; 156static struct fujitsu_t *fujitsu;
135static int use_alt_lcd_levels = -1; 157static int use_alt_lcd_levels = -1;
136static int disable_brightness_keys = -1;
137static int disable_brightness_adjust = -1; 158static int disable_brightness_adjust = -1;
138 159
139/* Device used to access other hotkeys on the laptop */ 160/* Device used to access other hotkeys on the laptop */
@@ -145,8 +166,9 @@ struct fujitsu_hotkey_t {
145 struct platform_device *pf_device; 166 struct platform_device *pf_device;
146 struct kfifo *fifo; 167 struct kfifo *fifo;
147 spinlock_t fifo_lock; 168 spinlock_t fifo_lock;
148 169 int rfkill_state;
149 unsigned int irb; /* info about the pressed buttons */ 170 int logolamp_registered;
171 int kblamps_registered;
150}; 172};
151 173
152static struct fujitsu_hotkey_t *fujitsu_hotkey; 174static struct fujitsu_hotkey_t *fujitsu_hotkey;
@@ -154,12 +176,139 @@ static struct fujitsu_hotkey_t *fujitsu_hotkey;
154static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event, 176static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event,
155 void *data); 177 void *data);
156 178
179#ifdef CONFIG_LEDS_CLASS
180static enum led_brightness logolamp_get(struct led_classdev *cdev);
181static void logolamp_set(struct led_classdev *cdev,
182 enum led_brightness brightness);
183
184struct led_classdev logolamp_led = {
185 .name = "fujitsu::logolamp",
186 .brightness_get = logolamp_get,
187 .brightness_set = logolamp_set
188};
189
190static enum led_brightness kblamps_get(struct led_classdev *cdev);
191static void kblamps_set(struct led_classdev *cdev,
192 enum led_brightness brightness);
193
194struct led_classdev kblamps_led = {
195 .name = "fujitsu::kblamps",
196 .brightness_get = kblamps_get,
197 .brightness_set = kblamps_set
198};
199#endif
200
157#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG 201#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG
158static u32 dbg_level = 0x03; 202static u32 dbg_level = 0x03;
159#endif 203#endif
160 204
161static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data); 205static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data);
162 206
207/* Fujitsu ACPI interface function */
208
209static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
210{
211 acpi_status status = AE_OK;
212 union acpi_object params[4] = {
213 { .type = ACPI_TYPE_INTEGER },
214 { .type = ACPI_TYPE_INTEGER },
215 { .type = ACPI_TYPE_INTEGER },
216 { .type = ACPI_TYPE_INTEGER }
217 };
218 struct acpi_object_list arg_list = { 4, &params[0] };
219 struct acpi_buffer output;
220 union acpi_object out_obj;
221 acpi_handle handle = NULL;
222
223 status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle);
224 if (ACPI_FAILURE(status)) {
225 vdbg_printk(FUJLAPTOP_DBG_ERROR,
226 "FUNC interface is not present\n");
227 return -ENODEV;
228 }
229
230 params[0].integer.value = cmd;
231 params[1].integer.value = arg0;
232 params[2].integer.value = arg1;
233 params[3].integer.value = arg2;
234
235 output.length = sizeof(out_obj);
236 output.pointer = &out_obj;
237
238 status = acpi_evaluate_object(handle, NULL, &arg_list, &output);
239 if (ACPI_FAILURE(status)) {
240 vdbg_printk(FUJLAPTOP_DBG_WARN,
241 "FUNC 0x%x (args 0x%x, 0x%x, 0x%x) call failed\n",
242 cmd, arg0, arg1, arg2);
243 return -ENODEV;
244 }
245
246 if (out_obj.type != ACPI_TYPE_INTEGER) {
247 vdbg_printk(FUJLAPTOP_DBG_WARN,
248 "FUNC 0x%x (args 0x%x, 0x%x, 0x%x) did not "
249 "return an integer\n",
250 cmd, arg0, arg1, arg2);
251 return -ENODEV;
252 }
253
254 vdbg_printk(FUJLAPTOP_DBG_TRACE,
255 "FUNC 0x%x (args 0x%x, 0x%x, 0x%x) returned 0x%x\n",
256 cmd, arg0, arg1, arg2, (int)out_obj.integer.value);
257 return out_obj.integer.value;
258}
259
260#ifdef CONFIG_LEDS_CLASS
261/* LED class callbacks */
262
263static void logolamp_set(struct led_classdev *cdev,
264 enum led_brightness brightness)
265{
266 if (brightness >= LED_FULL) {
267 call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON);
268 call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_ON);
269 } else if (brightness >= LED_HALF) {
270 call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON);
271 call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_OFF);
272 } else {
273 call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_OFF);
274 }
275}
276
277static void kblamps_set(struct led_classdev *cdev,
278 enum led_brightness brightness)
279{
280 if (brightness >= LED_FULL)
281 call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_ON);
282 else
283 call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_OFF);
284}
285
286static enum led_brightness logolamp_get(struct led_classdev *cdev)
287{
288 enum led_brightness brightness = LED_OFF;
289 int poweron, always;
290
291 poweron = call_fext_func(FUNC_LEDS, 0x2, LOGOLAMP_POWERON, 0x0);
292 if (poweron == FUNC_LED_ON) {
293 brightness = LED_HALF;
294 always = call_fext_func(FUNC_LEDS, 0x2, LOGOLAMP_ALWAYS, 0x0);
295 if (always == FUNC_LED_ON)
296 brightness = LED_FULL;
297 }
298 return brightness;
299}
300
301static enum led_brightness kblamps_get(struct led_classdev *cdev)
302{
303 enum led_brightness brightness = LED_OFF;
304
305 if (call_fext_func(FUNC_LEDS, 0x2, KEYBOARD_LAMPS, 0x0) == FUNC_LED_ON)
306 brightness = LED_FULL;
307
308 return brightness;
309}
310#endif
311
163/* Hardware access for LCD brightness control */ 312/* Hardware access for LCD brightness control */
164 313
165static int set_lcd_level(int level) 314static int set_lcd_level(int level)
@@ -263,44 +412,34 @@ static int get_max_brightness(void)
263 return fujitsu->max_brightness; 412 return fujitsu->max_brightness;
264} 413}
265 414
266static int get_lcd_level_alt(void)
267{
268 unsigned long long state = 0;
269 acpi_status status = AE_OK;
270
271 vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLS\n");
272
273 status =
274 acpi_evaluate_integer(fujitsu->acpi_handle, "GBLS", NULL, &state);
275 if (status < 0)
276 return status;
277
278 fujitsu->brightness_level = state & 0x0fffffff;
279
280 if (state & 0x80000000)
281 fujitsu->brightness_changed = 1;
282 else
283 fujitsu->brightness_changed = 0;
284
285 return fujitsu->brightness_level;
286}
287
288/* Backlight device stuff */ 415/* Backlight device stuff */
289 416
290static int bl_get_brightness(struct backlight_device *b) 417static int bl_get_brightness(struct backlight_device *b)
291{ 418{
292 if (use_alt_lcd_levels) 419 return get_lcd_level();
293 return get_lcd_level_alt();
294 else
295 return get_lcd_level();
296} 420}
297 421
298static int bl_update_status(struct backlight_device *b) 422static int bl_update_status(struct backlight_device *b)
299{ 423{
424 int ret;
425 if (b->props.power == 4)
426 ret = call_fext_func(FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
427 else
428 ret = call_fext_func(FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
429 if (ret != 0)
430 vdbg_printk(FUJLAPTOP_DBG_ERROR,
431 "Unable to adjust backlight power, error code %i\n",
432 ret);
433
300 if (use_alt_lcd_levels) 434 if (use_alt_lcd_levels)
301 return set_lcd_level_alt(b->props.brightness); 435 ret = set_lcd_level_alt(b->props.brightness);
302 else 436 else
303 return set_lcd_level(b->props.brightness); 437 ret = set_lcd_level(b->props.brightness);
438 if (ret != 0)
439 vdbg_printk(FUJLAPTOP_DBG_ERROR,
440 "Unable to adjust LCD brightness, error code %i\n",
441 ret);
442 return ret;
304} 443}
305 444
306static struct backlight_ops fujitsubl_ops = { 445static struct backlight_ops fujitsubl_ops = {
@@ -344,10 +483,7 @@ static ssize_t show_lcd_level(struct device *dev,
344 483
345 int ret; 484 int ret;
346 485
347 if (use_alt_lcd_levels) 486 ret = get_lcd_level();
348 ret = get_lcd_level_alt();
349 else
350 ret = get_lcd_level();
351 if (ret < 0) 487 if (ret < 0)
352 return ret; 488 return ret;
353 489
@@ -372,52 +508,71 @@ static ssize_t store_lcd_level(struct device *dev,
372 if (ret < 0) 508 if (ret < 0)
373 return ret; 509 return ret;
374 510
375 if (use_alt_lcd_levels) 511 ret = get_lcd_level();
376 ret = get_lcd_level_alt();
377 else
378 ret = get_lcd_level();
379 if (ret < 0) 512 if (ret < 0)
380 return ret; 513 return ret;
381 514
382 return count; 515 return count;
383} 516}
384 517
385/* Hardware access for hotkey device */ 518static ssize_t
386 519ignore_store(struct device *dev,
387static int get_irb(void) 520 struct device_attribute *attr, const char *buf, size_t count)
388{ 521{
389 unsigned long long state = 0; 522 return count;
390 acpi_status status = AE_OK; 523}
391
392 vdbg_printk(FUJLAPTOP_DBG_TRACE, "Get irb\n");
393
394 status =
395 acpi_evaluate_integer(fujitsu_hotkey->acpi_handle, "GIRB", NULL,
396 &state);
397 if (status < 0)
398 return status;
399 524
400 fujitsu_hotkey->irb = state; 525static ssize_t
526show_lid_state(struct device *dev,
527 struct device_attribute *attr, char *buf)
528{
529 if (fujitsu_hotkey->rfkill_state == UNSUPPORTED_CMD)
530 return sprintf(buf, "unknown\n");
531 if (fujitsu_hotkey->rfkill_state & 0x100)
532 return sprintf(buf, "open\n");
533 else
534 return sprintf(buf, "closed\n");
535}
401 536
402 return fujitsu_hotkey->irb; 537static ssize_t
538show_dock_state(struct device *dev,
539 struct device_attribute *attr, char *buf)
540{
541 if (fujitsu_hotkey->rfkill_state == UNSUPPORTED_CMD)
542 return sprintf(buf, "unknown\n");
543 if (fujitsu_hotkey->rfkill_state & 0x200)
544 return sprintf(buf, "docked\n");
545 else
546 return sprintf(buf, "undocked\n");
403} 547}
404 548
405static ssize_t 549static ssize_t
406ignore_store(struct device *dev, 550show_radios_state(struct device *dev,
407 struct device_attribute *attr, const char *buf, size_t count) 551 struct device_attribute *attr, char *buf)
408{ 552{
409 return count; 553 if (fujitsu_hotkey->rfkill_state == UNSUPPORTED_CMD)
554 return sprintf(buf, "unknown\n");
555 if (fujitsu_hotkey->rfkill_state & 0x20)
556 return sprintf(buf, "on\n");
557 else
558 return sprintf(buf, "killed\n");
410} 559}
411 560
412static DEVICE_ATTR(max_brightness, 0444, show_max_brightness, ignore_store); 561static DEVICE_ATTR(max_brightness, 0444, show_max_brightness, ignore_store);
413static DEVICE_ATTR(brightness_changed, 0444, show_brightness_changed, 562static DEVICE_ATTR(brightness_changed, 0444, show_brightness_changed,
414 ignore_store); 563 ignore_store);
415static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level); 564static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level);
565static DEVICE_ATTR(lid, 0444, show_lid_state, ignore_store);
566static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store);
567static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store);
416 568
417static struct attribute *fujitsupf_attributes[] = { 569static struct attribute *fujitsupf_attributes[] = {
418 &dev_attr_brightness_changed.attr, 570 &dev_attr_brightness_changed.attr,
419 &dev_attr_max_brightness.attr, 571 &dev_attr_max_brightness.attr,
420 &dev_attr_lcd_level.attr, 572 &dev_attr_lcd_level.attr,
573 &dev_attr_lid.attr,
574 &dev_attr_dock.attr,
575 &dev_attr_radios.attr,
421 NULL 576 NULL
422}; 577};
423 578
@@ -435,24 +590,16 @@ static struct platform_driver fujitsupf_driver = {
435static void dmi_check_cb_common(const struct dmi_system_id *id) 590static void dmi_check_cb_common(const struct dmi_system_id *id)
436{ 591{
437 acpi_handle handle; 592 acpi_handle handle;
438 int have_blnf;
439 printk(KERN_INFO "fujitsu-laptop: Identified laptop model '%s'.\n", 593 printk(KERN_INFO "fujitsu-laptop: Identified laptop model '%s'.\n",
440 id->ident); 594 id->ident);
441 have_blnf = ACPI_SUCCESS
442 (acpi_get_handle(NULL, "\\_SB.PCI0.GFX0.LCD.BLNF", &handle));
443 if (use_alt_lcd_levels == -1) { 595 if (use_alt_lcd_levels == -1) {
444 vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detecting usealt\n"); 596 if (ACPI_SUCCESS(acpi_get_handle(NULL,
445 use_alt_lcd_levels = 1; 597 "\\_SB.PCI0.LPCB.FJEX.SBL2", &handle)))
446 } 598 use_alt_lcd_levels = 1;
447 if (disable_brightness_keys == -1) { 599 else
448 vdbg_printk(FUJLAPTOP_DBG_TRACE, 600 use_alt_lcd_levels = 0;
449 "auto-detecting disable_keys\n"); 601 vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as "
450 disable_brightness_keys = have_blnf ? 1 : 0; 602 "%i\n", use_alt_lcd_levels);
451 }
452 if (disable_brightness_adjust == -1) {
453 vdbg_printk(FUJLAPTOP_DBG_TRACE,
454 "auto-detecting disable_adjust\n");
455 disable_brightness_adjust = have_blnf ? 0 : 1;
456 } 603 }
457} 604}
458 605
@@ -581,19 +728,14 @@ static int acpi_fujitsu_add(struct acpi_device *device)
581 728
582 /* do config (detect defaults) */ 729 /* do config (detect defaults) */
583 use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0; 730 use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0;
584 disable_brightness_keys = disable_brightness_keys == 1 ? 1 : 0;
585 disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0; 731 disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0;
586 vdbg_printk(FUJLAPTOP_DBG_INFO, 732 vdbg_printk(FUJLAPTOP_DBG_INFO,
587 "config: [alt interface: %d], [key disable: %d], [adjust disable: %d]\n", 733 "config: [alt interface: %d], [adjust disable: %d]\n",
588 use_alt_lcd_levels, disable_brightness_keys, 734 use_alt_lcd_levels, disable_brightness_adjust);
589 disable_brightness_adjust);
590 735
591 if (get_max_brightness() <= 0) 736 if (get_max_brightness() <= 0)
592 fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS; 737 fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS;
593 if (use_alt_lcd_levels) 738 get_lcd_level();
594 get_lcd_level_alt();
595 else
596 get_lcd_level();
597 739
598 return result; 740 return result;
599 741
@@ -644,43 +786,23 @@ static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data)
644 case ACPI_FUJITSU_NOTIFY_CODE1: 786 case ACPI_FUJITSU_NOTIFY_CODE1:
645 keycode = 0; 787 keycode = 0;
646 oldb = fujitsu->brightness_level; 788 oldb = fujitsu->brightness_level;
647 get_lcd_level(); /* the alt version always yields changed */ 789 get_lcd_level();
648 newb = fujitsu->brightness_level; 790 newb = fujitsu->brightness_level;
649 791
650 vdbg_printk(FUJLAPTOP_DBG_TRACE, 792 vdbg_printk(FUJLAPTOP_DBG_TRACE,
651 "brightness button event [%i -> %i (%i)]\n", 793 "brightness button event [%i -> %i (%i)]\n",
652 oldb, newb, fujitsu->brightness_changed); 794 oldb, newb, fujitsu->brightness_changed);
653 795
654 if (oldb == newb && fujitsu->brightness_changed) { 796 if (oldb < newb) {
655 keycode = 0;
656 if (disable_brightness_keys != 1) {
657 if (oldb == 0) {
658 acpi_bus_generate_proc_event
659 (fujitsu->dev,
660 ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS,
661 0);
662 keycode = KEY_BRIGHTNESSDOWN;
663 } else if (oldb ==
664 (fujitsu->max_brightness) - 1) {
665 acpi_bus_generate_proc_event
666 (fujitsu->dev,
667 ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS,
668 0);
669 keycode = KEY_BRIGHTNESSUP;
670 }
671 }
672 } else if (oldb < newb) {
673 if (disable_brightness_adjust != 1) { 797 if (disable_brightness_adjust != 1) {
674 if (use_alt_lcd_levels) 798 if (use_alt_lcd_levels)
675 set_lcd_level_alt(newb); 799 set_lcd_level_alt(newb);
676 else 800 else
677 set_lcd_level(newb); 801 set_lcd_level(newb);
678 } 802 }
679 if (disable_brightness_keys != 1) { 803 acpi_bus_generate_proc_event(fujitsu->dev,
680 acpi_bus_generate_proc_event(fujitsu->dev, 804 ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS, 0);
681 ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS, 0); 805 keycode = KEY_BRIGHTNESSUP;
682 keycode = KEY_BRIGHTNESSUP;
683 }
684 } else if (oldb > newb) { 806 } else if (oldb > newb) {
685 if (disable_brightness_adjust != 1) { 807 if (disable_brightness_adjust != 1) {
686 if (use_alt_lcd_levels) 808 if (use_alt_lcd_levels)
@@ -688,13 +810,9 @@ static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data)
688 else 810 else
689 set_lcd_level(newb); 811 set_lcd_level(newb);
690 } 812 }
691 if (disable_brightness_keys != 1) { 813 acpi_bus_generate_proc_event(fujitsu->dev,
692 acpi_bus_generate_proc_event(fujitsu->dev, 814 ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS, 0);
693 ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS, 0); 815 keycode = KEY_BRIGHTNESSDOWN;
694 keycode = KEY_BRIGHTNESSDOWN;
695 }
696 } else {
697 keycode = KEY_UNKNOWN;
698 } 816 }
699 break; 817 break;
700 default: 818 default:
@@ -771,7 +889,8 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
771 input->id.bustype = BUS_HOST; 889 input->id.bustype = BUS_HOST;
772 input->id.product = 0x06; 890 input->id.product = 0x06;
773 input->dev.parent = &device->dev; 891 input->dev.parent = &device->dev;
774 input->evbit[0] = BIT(EV_KEY); 892
893 set_bit(EV_KEY, input->evbit);
775 set_bit(fujitsu->keycode1, input->keybit); 894 set_bit(fujitsu->keycode1, input->keybit);
776 set_bit(fujitsu->keycode2, input->keybit); 895 set_bit(fujitsu->keycode2, input->keybit);
777 set_bit(fujitsu->keycode3, input->keybit); 896 set_bit(fujitsu->keycode3, input->keybit);
@@ -803,10 +922,44 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
803 printk(KERN_ERR "_INI Method failed\n"); 922 printk(KERN_ERR "_INI Method failed\n");
804 } 923 }
805 924
806 i = 0; /* Discard hotkey ringbuffer */ 925 i = 0;
807 while (get_irb() != 0 && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) ; 926 while (call_fext_func(FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0
927 && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE)
928 ; /* No action, result is discarded */
808 vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i); 929 vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
809 930
931 fujitsu_hotkey->rfkill_state =
932 call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
933
934 /* Suspect this is a keymap of the application panel, print it */
935 printk(KERN_INFO "fujitsu-laptop: BTNI: [0x%x]\n",
936 call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
937
938 #ifdef CONFIG_LEDS_CLASS
939 if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
940 result = led_classdev_register(&fujitsu->pf_device->dev,
941 &logolamp_led);
942 if (result == 0) {
943 fujitsu_hotkey->logolamp_registered = 1;
944 } else {
945 printk(KERN_ERR "fujitsu-laptop: Could not register "
946 "LED handler for logo lamp, error %i\n", result);
947 }
948 }
949
950 if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
951 (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
952 result = led_classdev_register(&fujitsu->pf_device->dev,
953 &kblamps_led);
954 if (result == 0) {
955 fujitsu_hotkey->kblamps_registered = 1;
956 } else {
957 printk(KERN_ERR "fujitsu-laptop: Could not register "
958 "LED handler for keyboard lamps, error %i\n", result);
959 }
960 }
961 #endif
962
810 return result; 963 return result;
811 964
812end: 965end:
@@ -852,16 +1005,15 @@ static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event,
852 1005
853 input = fujitsu_hotkey->input; 1006 input = fujitsu_hotkey->input;
854 1007
855 vdbg_printk(FUJLAPTOP_DBG_TRACE, "Hotkey event\n"); 1008 fujitsu_hotkey->rfkill_state =
1009 call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
856 1010
857 switch (event) { 1011 switch (event) {
858 case ACPI_FUJITSU_NOTIFY_CODE1: 1012 case ACPI_FUJITSU_NOTIFY_CODE1:
859 i = 0; 1013 i = 0;
860 while ((irb = get_irb()) != 0 1014 while ((irb =
861 && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) { 1015 call_fext_func(FUNC_BUTTONS, 0x1, 0x0, 0x0)) != 0
862 vdbg_printk(FUJLAPTOP_DBG_TRACE, "GIRB result [%x]\n", 1016 && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
863 irb);
864
865 switch (irb & 0x4ff) { 1017 switch (irb & 0x4ff) {
866 case KEY1_CODE: 1018 case KEY1_CODE:
867 keycode = fujitsu->keycode1; 1019 keycode = fujitsu->keycode1;
@@ -1035,6 +1187,15 @@ static int __init fujitsu_init(void)
1035 goto fail_hotkey1; 1187 goto fail_hotkey1;
1036 } 1188 }
1037 1189
1190 /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
1191
1192 if (!acpi_video_backlight_support()) {
1193 if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
1194 fujitsu->bl_device->props.power = 4;
1195 else
1196 fujitsu->bl_device->props.power = 0;
1197 }
1198
1038 printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION 1199 printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION
1039 " successfully loaded.\n"); 1200 " successfully loaded.\n");
1040 1201
@@ -1074,6 +1235,14 @@ fail_acpi:
1074 1235
1075static void __exit fujitsu_cleanup(void) 1236static void __exit fujitsu_cleanup(void)
1076{ 1237{
1238 #ifdef CONFIG_LEDS_CLASS
1239 if (fujitsu_hotkey->logolamp_registered != 0)
1240 led_classdev_unregister(&logolamp_led);
1241
1242 if (fujitsu_hotkey->kblamps_registered != 0)
1243 led_classdev_unregister(&kblamps_led);
1244 #endif
1245
1077 sysfs_remove_group(&fujitsu->pf_device->dev.kobj, 1246 sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
1078 &fujitsupf_attribute_group); 1247 &fujitsupf_attribute_group);
1079 platform_device_unregister(fujitsu->pf_device); 1248 platform_device_unregister(fujitsu->pf_device);
@@ -1098,9 +1267,6 @@ module_exit(fujitsu_cleanup);
1098module_param(use_alt_lcd_levels, uint, 0644); 1267module_param(use_alt_lcd_levels, uint, 0644);
1099MODULE_PARM_DESC(use_alt_lcd_levels, 1268MODULE_PARM_DESC(use_alt_lcd_levels,
1100 "Use alternative interface for lcd_levels (needed for Lifebook s6410)."); 1269 "Use alternative interface for lcd_levels (needed for Lifebook s6410).");
1101module_param(disable_brightness_keys, uint, 0644);
1102MODULE_PARM_DESC(disable_brightness_keys,
1103 "Disable brightness keys (eg. if they are already handled by the generic ACPI_VIDEO device).");
1104module_param(disable_brightness_adjust, uint, 0644); 1270module_param(disable_brightness_adjust, uint, 0644);
1105MODULE_PARM_DESC(disable_brightness_adjust, "Disable brightness adjustment ."); 1271MODULE_PARM_DESC(disable_brightness_adjust, "Disable brightness adjustment .");
1106#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG 1272#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG
@@ -1108,12 +1274,13 @@ module_param_named(debug, dbg_level, uint, 0644);
1108MODULE_PARM_DESC(debug, "Sets debug level bit-mask"); 1274MODULE_PARM_DESC(debug, "Sets debug level bit-mask");
1109#endif 1275#endif
1110 1276
1111MODULE_AUTHOR("Jonathan Woithe, Peter Gruber"); 1277MODULE_AUTHOR("Jonathan Woithe, Peter Gruber, Tony Vroon");
1112MODULE_DESCRIPTION("Fujitsu laptop extras support"); 1278MODULE_DESCRIPTION("Fujitsu laptop extras support");
1113MODULE_VERSION(FUJITSU_DRIVER_VERSION); 1279MODULE_VERSION(FUJITSU_DRIVER_VERSION);
1114MODULE_LICENSE("GPL"); 1280MODULE_LICENSE("GPL");
1115 1281
1116MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*"); 1282MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*");
1283MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*");
1117MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*"); 1284MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*");
1118 1285
1119static struct pnp_device_id pnp_ids[] = { 1286static struct pnp_device_id pnp_ids[] = {
diff --git a/drivers/misc/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 4b7c24c519c3..4b7c24c519c3 100644
--- a/drivers/misc/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
diff --git a/drivers/misc/intel_menlow.c b/drivers/platform/x86/intel_menlow.c
index 27b7662955bb..27b7662955bb 100644
--- a/drivers/misc/intel_menlow.c
+++ b/drivers/platform/x86/intel_menlow.c
diff --git a/drivers/misc/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index 759763d18e4c..759763d18e4c 100644
--- a/drivers/misc/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
diff --git a/drivers/misc/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index 4a1bc64485d5..f30db367c82e 100644
--- a/drivers/misc/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -241,8 +241,6 @@ static int acpi_pcc_write_sset(struct pcc_acpi *pcc, int func, int val)
241 }; 241 };
242 acpi_status status = AE_OK; 242 acpi_status status = AE_OK;
243 243
244 ACPI_FUNCTION_TRACE("acpi_pcc_write_sset");
245
246 status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SSET, 244 status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SSET,
247 &params, NULL); 245 &params, NULL);
248 246
@@ -254,8 +252,6 @@ static inline int acpi_pcc_get_sqty(struct acpi_device *device)
254 unsigned long long s; 252 unsigned long long s;
255 acpi_status status; 253 acpi_status status;
256 254
257 ACPI_FUNCTION_TRACE("acpi_pcc_get_sqty");
258
259 status = acpi_evaluate_integer(device->handle, METHOD_HKEY_SQTY, 255 status = acpi_evaluate_integer(device->handle, METHOD_HKEY_SQTY,
260 NULL, &s); 256 NULL, &s);
261 if (ACPI_SUCCESS(status)) 257 if (ACPI_SUCCESS(status))
@@ -274,8 +270,6 @@ static int acpi_pcc_retrieve_biosdata(struct pcc_acpi *pcc, u32 *sinf)
274 union acpi_object *hkey = NULL; 270 union acpi_object *hkey = NULL;
275 int i; 271 int i;
276 272
277 ACPI_FUNCTION_TRACE("acpi_pcc_retrieve_biosdata");
278
279 status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, 0, 273 status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, 0,
280 &buffer); 274 &buffer);
281 if (ACPI_FAILURE(status)) { 275 if (ACPI_FAILURE(status)) {
@@ -501,8 +495,6 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc)
501 int key_code, hkey_num; 495 int key_code, hkey_num;
502 unsigned long long result; 496 unsigned long long result;
503 497
504 ACPI_FUNCTION_TRACE("acpi_pcc_generate_keyinput");
505
506 rc = acpi_evaluate_integer(pcc->handle, METHOD_HKEY_QUERY, 498 rc = acpi_evaluate_integer(pcc->handle, METHOD_HKEY_QUERY,
507 NULL, &result); 499 NULL, &result);
508 if (!ACPI_SUCCESS(rc)) { 500 if (!ACPI_SUCCESS(rc)) {
@@ -538,8 +530,6 @@ static void acpi_pcc_hotkey_notify(acpi_handle handle, u32 event, void *data)
538{ 530{
539 struct pcc_acpi *pcc = (struct pcc_acpi *) data; 531 struct pcc_acpi *pcc = (struct pcc_acpi *) data;
540 532
541 ACPI_FUNCTION_TRACE("acpi_pcc_hotkey_notify");
542
543 switch (event) { 533 switch (event) {
544 case HKEY_NOTIFY: 534 case HKEY_NOTIFY:
545 acpi_pcc_generate_keyinput(pcc); 535 acpi_pcc_generate_keyinput(pcc);
@@ -554,8 +544,6 @@ static int acpi_pcc_init_input(struct pcc_acpi *pcc)
554{ 544{
555 int i, rc; 545 int i, rc;
556 546
557 ACPI_FUNCTION_TRACE("acpi_pcc_init_input");
558
559 pcc->input_dev = input_allocate_device(); 547 pcc->input_dev = input_allocate_device();
560 if (!pcc->input_dev) { 548 if (!pcc->input_dev) {
561 ACPI_DEBUG_PRINT((ACPI_DB_ERROR, 549 ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
@@ -597,8 +585,6 @@ static int acpi_pcc_hotkey_resume(struct acpi_device *device)
597 struct pcc_acpi *pcc = acpi_driver_data(device); 585 struct pcc_acpi *pcc = acpi_driver_data(device);
598 acpi_status status = AE_OK; 586 acpi_status status = AE_OK;
599 587
600 ACPI_FUNCTION_TRACE("acpi_pcc_hotkey_resume");
601
602 if (device == NULL || pcc == NULL) 588 if (device == NULL || pcc == NULL)
603 return -EINVAL; 589 return -EINVAL;
604 590
@@ -616,8 +602,6 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
616 struct pcc_acpi *pcc; 602 struct pcc_acpi *pcc;
617 int num_sifr, result; 603 int num_sifr, result;
618 604
619 ACPI_FUNCTION_TRACE("acpi_pcc_hotkey_add");
620
621 if (!device) 605 if (!device)
622 return -EINVAL; 606 return -EINVAL;
623 607
@@ -714,8 +698,6 @@ static int __init acpi_pcc_init(void)
714{ 698{
715 int result = 0; 699 int result = 0;
716 700
717 ACPI_FUNCTION_TRACE("acpi_pcc_init");
718
719 if (acpi_disabled) 701 if (acpi_disabled)
720 return -ENODEV; 702 return -ENODEV;
721 703
@@ -733,8 +715,6 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device, int type)
733{ 715{
734 struct pcc_acpi *pcc = acpi_driver_data(device); 716 struct pcc_acpi *pcc = acpi_driver_data(device);
735 717
736 ACPI_FUNCTION_TRACE("acpi_pcc_hotkey_remove");
737
738 if (!device || !pcc) 718 if (!device || !pcc)
739 return -EINVAL; 719 return -EINVAL;
740 720
@@ -757,8 +737,6 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device, int type)
757 737
758static void __exit acpi_pcc_exit(void) 738static void __exit acpi_pcc_exit(void)
759{ 739{
760 ACPI_FUNCTION_TRACE("acpi_pcc_exit");
761
762 acpi_bus_unregister_driver(&acpi_pcc_driver); 740 acpi_bus_unregister_driver(&acpi_pcc_driver);
763} 741}
764 742
diff --git a/drivers/misc/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 571b211608d1..537959d07148 100644
--- a/drivers/misc/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -935,14 +935,17 @@ static void sony_acpi_notify(acpi_handle handle, u32 event, void *data)
935static acpi_status sony_walk_callback(acpi_handle handle, u32 level, 935static acpi_status sony_walk_callback(acpi_handle handle, u32 level,
936 void *context, void **return_value) 936 void *context, void **return_value)
937{ 937{
938 struct acpi_namespace_node *node; 938 struct acpi_device_info *info;
939 union acpi_operand_object *operand; 939 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
940 940
941 node = (struct acpi_namespace_node *)handle; 941 if (ACPI_SUCCESS(acpi_get_object_info(handle, &buffer))) {
942 operand = (union acpi_operand_object *)node->object; 942 info = buffer.pointer;
943 943
944 printk(KERN_WARNING DRV_PFX "method: name: %4.4s, args %X\n", node->name.ascii, 944 printk(KERN_WARNING DRV_PFX "method: name: %4.4s, args %X\n",
945 (u32) operand->method.param_count); 945 (char *)&info->name, info->param_count);
946
947 kfree(buffer.pointer);
948 }
946 949
947 return AE_OK; 950 return AE_OK;
948} 951}
diff --git a/drivers/misc/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c
index f25e4c974dcf..b4a4aa9ee482 100644
--- a/drivers/misc/tc1100-wmi.c
+++ b/drivers/platform/x86/tc1100-wmi.c
@@ -30,7 +30,6 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/types.h> 31#include <linux/types.h>
32#include <acpi/acpi.h> 32#include <acpi/acpi.h>
33#include <acpi/actypes.h>
34#include <acpi/acpi_bus.h> 33#include <acpi/acpi_bus.h>
35#include <acpi/acpi_drivers.h> 34#include <acpi/acpi_drivers.h>
36#include <linux/platform_device.h> 35#include <linux/platform_device.h>
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 899766e16fa8..3478453eba7a 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -76,7 +76,6 @@
76#include <linux/workqueue.h> 76#include <linux/workqueue.h>
77 77
78#include <acpi/acpi_drivers.h> 78#include <acpi/acpi_drivers.h>
79#include <acpi/acnamesp.h>
80 79
81#include <linux/pci_ids.h> 80#include <linux/pci_ids.h>
82 81
diff --git a/drivers/acpi/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 40e60fc2e596..40e60fc2e596 100644
--- a/drivers/acpi/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
diff --git a/drivers/acpi/wmi.c b/drivers/platform/x86/wmi.c
index 8a8b377712c9..8a8b377712c9 100644
--- a/drivers/acpi/wmi.c
+++ b/drivers/platform/x86/wmi.c
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 383e47c392a4..2834846a185d 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -23,7 +23,6 @@
23#include <linux/pnp.h> 23#include <linux/pnp.h>
24#include <linux/mod_devicetable.h> 24#include <linux/mod_devicetable.h>
25#include <acpi/acpi_bus.h> 25#include <acpi/acpi_bus.h>
26#include <acpi/actypes.h>
27 26
28#include "../base.h" 27#include "../base.h"
29#include "pnpacpi.h" 28#include "pnpacpi.h"
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index c68c496b2c49..7aa35248181b 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1412,6 +1412,97 @@ int wm8350_register_regulator(struct wm8350 *wm8350, int reg,
1412} 1412}
1413EXPORT_SYMBOL_GPL(wm8350_register_regulator); 1413EXPORT_SYMBOL_GPL(wm8350_register_regulator);
1414 1414
1415/**
1416 * wm8350_register_led - Register a WM8350 LED output
1417 *
1418 * @param wm8350 The WM8350 device to configure.
1419 * @param lednum LED device index to create.
1420 * @param dcdc The DCDC to use for the LED.
1421 * @param isink The ISINK to use for the LED.
1422 * @param pdata Configuration for the LED.
1423 *
1424 * The WM8350 supports the use of an ISINK together with a DCDC to
1425 * provide a power-efficient LED driver. This function registers the
1426 * regulators and instantiates the platform device for a LED. The
1427 * operating modes for the LED regulators must be configured using
1428 * wm8350_isink_set_flash(), wm8350_dcdc25_set_mode() and
1429 * wm8350_dcdc_set_slot() prior to calling this function.
1430 */
1431int wm8350_register_led(struct wm8350 *wm8350, int lednum, int dcdc, int isink,
1432 struct wm8350_led_platform_data *pdata)
1433{
1434 struct wm8350_led *led;
1435 struct platform_device *pdev;
1436 int ret;
1437
1438 if (lednum > ARRAY_SIZE(wm8350->pmic.led) || lednum < 0) {
1439 dev_err(wm8350->dev, "Invalid LED index %d\n", lednum);
1440 return -ENODEV;
1441 }
1442
1443 led = &wm8350->pmic.led[lednum];
1444
1445 if (led->pdev) {
1446 dev_err(wm8350->dev, "LED %d already allocated\n", lednum);
1447 return -EINVAL;
1448 }
1449
1450 pdev = platform_device_alloc("wm8350-led", lednum);
1451 if (pdev == NULL) {
1452 dev_err(wm8350->dev, "Failed to allocate LED %d\n", lednum);
1453 return -ENOMEM;
1454 }
1455
1456 led->isink_consumer.dev = &pdev->dev;
1457 led->isink_consumer.supply = "led_isink";
1458 led->isink_init.num_consumer_supplies = 1;
1459 led->isink_init.consumer_supplies = &led->isink_consumer;
1460 led->isink_init.constraints.min_uA = 0;
1461 led->isink_init.constraints.max_uA = pdata->max_uA;
1462 led->isink_init.constraints.valid_ops_mask = REGULATOR_CHANGE_CURRENT;
1463 led->isink_init.constraints.valid_modes_mask = REGULATOR_MODE_NORMAL;
1464 ret = wm8350_register_regulator(wm8350, isink, &led->isink_init);
1465 if (ret != 0) {
1466 platform_device_put(pdev);
1467 return ret;
1468 }
1469
1470 led->dcdc_consumer.dev = &pdev->dev;
1471 led->dcdc_consumer.supply = "led_vcc";
1472 led->dcdc_init.num_consumer_supplies = 1;
1473 led->dcdc_init.consumer_supplies = &led->dcdc_consumer;
1474 led->dcdc_init.constraints.valid_modes_mask = REGULATOR_MODE_NORMAL;
1475 ret = wm8350_register_regulator(wm8350, dcdc, &led->dcdc_init);
1476 if (ret != 0) {
1477 platform_device_put(pdev);
1478 return ret;
1479 }
1480
1481 switch (isink) {
1482 case WM8350_ISINK_A:
1483 wm8350->pmic.isink_A_dcdc = dcdc;
1484 break;
1485 case WM8350_ISINK_B:
1486 wm8350->pmic.isink_B_dcdc = dcdc;
1487 break;
1488 }
1489
1490 pdev->dev.platform_data = pdata;
1491 pdev->dev.parent = wm8350->dev;
1492 ret = platform_device_add(pdev);
1493 if (ret != 0) {
1494 dev_err(wm8350->dev, "Failed to register LED %d: %d\n",
1495 lednum, ret);
1496 platform_device_put(pdev);
1497 return ret;
1498 }
1499
1500 led->pdev = pdev;
1501
1502 return 0;
1503}
1504EXPORT_SYMBOL_GPL(wm8350_register_led);
1505
1415static struct platform_driver wm8350_regulator_driver = { 1506static struct platform_driver wm8350_regulator_driver = {
1416 .probe = wm8350_regulator_probe, 1507 .probe = wm8350_regulator_probe,
1417 .remove = wm8350_regulator_remove, 1508 .remove = wm8350_regulator_remove,
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
index 346d633655e7..c6bfa6fe1a2a 100644
--- a/drivers/rtc/rtc-parisc.c
+++ b/drivers/rtc/rtc-parisc.c
@@ -34,7 +34,8 @@ static int parisc_get_time(struct device *dev, struct rtc_time *tm)
34static int parisc_set_time(struct device *dev, struct rtc_time *tm) 34static int parisc_set_time(struct device *dev, struct rtc_time *tm)
35{ 35{
36 struct parisc_rtc *p = dev_get_drvdata(dev); 36 struct parisc_rtc *p = dev_get_drvdata(dev);
37 unsigned long flags, ret; 37 unsigned long flags;
38 int ret;
38 39
39 spin_lock_irqsave(&p->lock, flags); 40 spin_lock_irqsave(&p->lock, flags);
40 ret = set_rtc_time(tm); 41 ret = set_rtc_time(tm);
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 4a4dd9adc328..72facb9eb7db 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -52,11 +52,11 @@ config LCD_ILI9320
52 then say y to include a power driver for it. 52 then say y to include a power driver for it.
53 53
54config LCD_TDO24M 54config LCD_TDO24M
55 tristate "Toppoly TDO24M LCD Panels support" 55 tristate "Toppoly TDO24M and TDO35S LCD Panels support"
56 depends on LCD_CLASS_DEVICE && SPI_MASTER 56 depends on LCD_CLASS_DEVICE && SPI_MASTER
57 default n 57 default n
58 help 58 help
59 If you have a Toppoly TDO24M series LCD panel, say y here to 59 If you have a Toppoly TDO24M/TDO35S series LCD panel, say y here to
60 include the support for it. 60 include the support for it.
61 61
62config LCD_VGG2432A4 62config LCD_VGG2432A4
@@ -123,17 +123,14 @@ config BACKLIGHT_ATMEL_PWM
123 To compile this driver as a module, choose M here: the module will be 123 To compile this driver as a module, choose M here: the module will be
124 called atmel-pwm-bl. 124 called atmel-pwm-bl.
125 125
126config BACKLIGHT_CORGI 126config BACKLIGHT_GENERIC
127 tristate "Generic (aka Sharp Corgi) Backlight Driver (DEPRECATED)" 127 tristate "Generic (aka Sharp Corgi) Backlight Driver"
128 depends on BACKLIGHT_CLASS_DEVICE 128 depends on BACKLIGHT_CLASS_DEVICE
129 default n 129 default y
130 help 130 help
131 Say y to enable the generic platform backlight driver previously 131 Say y to enable the generic platform backlight driver previously
132 known as the Corgi backlight driver. If you have a Sharp Zaurus 132 known as the Corgi backlight driver. If you have a Sharp Zaurus
133 SL-C7xx, SL-Cxx00 or SL-6000x say y. Most users can say n. 133 SL-C7xx, SL-Cxx00 or SL-6000x say y.
134
135 Note: this driver is marked as deprecated, try enable SPI and
136 use the new corgi_lcd driver with integrated backlight control
137 134
138config BACKLIGHT_LOCOMO 135config BACKLIGHT_LOCOMO
139 tristate "Sharp LOCOMO LCD/Backlight Driver" 136 tristate "Sharp LOCOMO LCD/Backlight Driver"
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 103427de6703..363b3cb2f01b 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_LCD_TOSA) += tosa_lcd.o
11 11
12obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o 12obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o
13obj-$(CONFIG_BACKLIGHT_ATMEL_PWM) += atmel-pwm-bl.o 13obj-$(CONFIG_BACKLIGHT_ATMEL_PWM) += atmel-pwm-bl.o
14obj-$(CONFIG_BACKLIGHT_CORGI) += corgi_bl.o 14obj-$(CONFIG_BACKLIGHT_GENERIC) += generic_bl.o
15obj-$(CONFIG_BACKLIGHT_HP680) += hp680_bl.o 15obj-$(CONFIG_BACKLIGHT_HP680) += hp680_bl.o
16obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o 16obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o
17obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o 17obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 0664fc032235..157057c79ca3 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -40,6 +40,10 @@ static int fb_notifier_callback(struct notifier_block *self,
40 if (!bd->ops->check_fb || 40 if (!bd->ops->check_fb ||
41 bd->ops->check_fb(evdata->info)) { 41 bd->ops->check_fb(evdata->info)) {
42 bd->props.fb_blank = *(int *)evdata->data; 42 bd->props.fb_blank = *(int *)evdata->data;
43 if (bd->props.fb_blank == FB_BLANK_UNBLANK)
44 bd->props.state &= ~BL_CORE_FBBLANK;
45 else
46 bd->props.state |= BL_CORE_FBBLANK;
43 backlight_update_status(bd); 47 backlight_update_status(bd);
44 } 48 }
45 mutex_unlock(&bd->ops_lock); 49 mutex_unlock(&bd->ops_lock);
@@ -80,20 +84,18 @@ static ssize_t backlight_show_power(struct device *dev,
80static ssize_t backlight_store_power(struct device *dev, 84static ssize_t backlight_store_power(struct device *dev,
81 struct device_attribute *attr, const char *buf, size_t count) 85 struct device_attribute *attr, const char *buf, size_t count)
82{ 86{
83 int rc = -ENXIO; 87 int rc;
84 char *endp;
85 struct backlight_device *bd = to_backlight_device(dev); 88 struct backlight_device *bd = to_backlight_device(dev);
86 int power = simple_strtoul(buf, &endp, 0); 89 unsigned long power;
87 size_t size = endp - buf;
88 90
89 if (*endp && isspace(*endp)) 91 rc = strict_strtoul(buf, 0, &power);
90 size++; 92 if (rc)
91 if (size != count) 93 return rc;
92 return -EINVAL;
93 94
95 rc = -ENXIO;
94 mutex_lock(&bd->ops_lock); 96 mutex_lock(&bd->ops_lock);
95 if (bd->ops) { 97 if (bd->ops) {
96 pr_debug("backlight: set power to %d\n", power); 98 pr_debug("backlight: set power to %lu\n", power);
97 if (bd->props.power != power) { 99 if (bd->props.power != power) {
98 bd->props.power = power; 100 bd->props.power = power;
99 backlight_update_status(bd); 101 backlight_update_status(bd);
@@ -116,28 +118,25 @@ static ssize_t backlight_show_brightness(struct device *dev,
116static ssize_t backlight_store_brightness(struct device *dev, 118static ssize_t backlight_store_brightness(struct device *dev,
117 struct device_attribute *attr, const char *buf, size_t count) 119 struct device_attribute *attr, const char *buf, size_t count)
118{ 120{
119 int rc = -ENXIO; 121 int rc;
120 char *endp;
121 struct backlight_device *bd = to_backlight_device(dev); 122 struct backlight_device *bd = to_backlight_device(dev);
122 int brightness = simple_strtoul(buf, &endp, 0); 123 unsigned long brightness;
123 size_t size = endp - buf; 124
125 rc = strict_strtoul(buf, 0, &brightness);
126 if (rc)
127 return rc;
124 128
125 if (*endp && isspace(*endp)) 129 rc = -ENXIO;
126 size++;
127 if (size != count)
128 return -EINVAL;
129 130
130 mutex_lock(&bd->ops_lock); 131 mutex_lock(&bd->ops_lock);
131 if (bd->ops) { 132 if (bd->ops) {
132 if (brightness > bd->props.max_brightness) 133 if (brightness > bd->props.max_brightness)
133 rc = -EINVAL; 134 rc = -EINVAL;
134 else { 135 else {
135 pr_debug("backlight: set brightness to %d\n", 136 pr_debug("backlight: set brightness to %lu\n",
136 brightness); 137 brightness);
137 if (bd->props.brightness != brightness) { 138 bd->props.brightness = brightness;
138 bd->props.brightness = brightness; 139 backlight_update_status(bd);
139 backlight_update_status(bd);
140 }
141 rc = count; 140 rc = count;
142 } 141 }
143 } 142 }
@@ -170,6 +169,34 @@ static ssize_t backlight_show_actual_brightness(struct device *dev,
170 169
171static struct class *backlight_class; 170static struct class *backlight_class;
172 171
172static int backlight_suspend(struct device *dev, pm_message_t state)
173{
174 struct backlight_device *bd = to_backlight_device(dev);
175
176 if (bd->ops->options & BL_CORE_SUSPENDRESUME) {
177 mutex_lock(&bd->ops_lock);
178 bd->props.state |= BL_CORE_SUSPENDED;
179 backlight_update_status(bd);
180 mutex_unlock(&bd->ops_lock);
181 }
182
183 return 0;
184}
185
186static int backlight_resume(struct device *dev)
187{
188 struct backlight_device *bd = to_backlight_device(dev);
189
190 if (bd->ops->options & BL_CORE_SUSPENDRESUME) {
191 mutex_lock(&bd->ops_lock);
192 bd->props.state &= ~BL_CORE_SUSPENDED;
193 backlight_update_status(bd);
194 mutex_unlock(&bd->ops_lock);
195 }
196
197 return 0;
198}
199
173static void bl_device_release(struct device *dev) 200static void bl_device_release(struct device *dev)
174{ 201{
175 struct backlight_device *bd = to_backlight_device(dev); 202 struct backlight_device *bd = to_backlight_device(dev);
@@ -286,6 +313,8 @@ static int __init backlight_class_init(void)
286 } 313 }
287 314
288 backlight_class->dev_attrs = bl_device_attributes; 315 backlight_class->dev_attrs = bl_device_attributes;
316 backlight_class->suspend = backlight_suspend;
317 backlight_class->resume = backlight_resume;
289 return 0; 318 return 0;
290} 319}
291 320
diff --git a/drivers/video/backlight/corgi_bl.c b/drivers/video/backlight/corgi_bl.c
deleted file mode 100644
index 4d4d037e3ec9..000000000000
--- a/drivers/video/backlight/corgi_bl.c
+++ /dev/null
@@ -1,169 +0,0 @@
1/*
2 * Backlight Driver for Sharp Zaurus Handhelds (various models)
3 *
4 * Copyright (c) 2004-2006 Richard Purdie
5 *
6 * Based on Sharp's 2.4 Backlight Driver
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 */
13
14#include <linux/module.h>
15#include <linux/kernel.h>
16#include <linux/init.h>
17#include <linux/platform_device.h>
18#include <linux/mutex.h>
19#include <linux/fb.h>
20#include <linux/backlight.h>
21
22static int corgibl_intensity;
23static struct backlight_properties corgibl_data;
24static struct backlight_device *corgi_backlight_device;
25static struct generic_bl_info *bl_machinfo;
26
27static unsigned long corgibl_flags;
28#define CORGIBL_SUSPENDED 0x01
29#define CORGIBL_BATTLOW 0x02
30
31static int corgibl_send_intensity(struct backlight_device *bd)
32{
33 int intensity = bd->props.brightness;
34
35 if (bd->props.power != FB_BLANK_UNBLANK)
36 intensity = 0;
37 if (bd->props.fb_blank != FB_BLANK_UNBLANK)
38 intensity = 0;
39 if (corgibl_flags & CORGIBL_SUSPENDED)
40 intensity = 0;
41 if (corgibl_flags & CORGIBL_BATTLOW)
42 intensity &= bl_machinfo->limit_mask;
43
44 bl_machinfo->set_bl_intensity(intensity);
45
46 corgibl_intensity = intensity;
47
48 if (bl_machinfo->kick_battery)
49 bl_machinfo->kick_battery();
50
51 return 0;
52}
53
54#ifdef CONFIG_PM
55static int corgibl_suspend(struct platform_device *pdev, pm_message_t state)
56{
57 struct backlight_device *bd = platform_get_drvdata(pdev);
58
59 corgibl_flags |= CORGIBL_SUSPENDED;
60 backlight_update_status(bd);
61 return 0;
62}
63
64static int corgibl_resume(struct platform_device *pdev)
65{
66 struct backlight_device *bd = platform_get_drvdata(pdev);
67
68 corgibl_flags &= ~CORGIBL_SUSPENDED;
69 backlight_update_status(bd);
70 return 0;
71}
72#else
73#define corgibl_suspend NULL
74#define corgibl_resume NULL
75#endif
76
77static int corgibl_get_intensity(struct backlight_device *bd)
78{
79 return corgibl_intensity;
80}
81
82/*
83 * Called when the battery is low to limit the backlight intensity.
84 * If limit==0 clear any limit, otherwise limit the intensity
85 */
86void corgibl_limit_intensity(int limit)
87{
88 if (limit)
89 corgibl_flags |= CORGIBL_BATTLOW;
90 else
91 corgibl_flags &= ~CORGIBL_BATTLOW;
92 backlight_update_status(corgi_backlight_device);
93}
94EXPORT_SYMBOL(corgibl_limit_intensity);
95
96
97static struct backlight_ops corgibl_ops = {
98 .get_brightness = corgibl_get_intensity,
99 .update_status = corgibl_send_intensity,
100};
101
102static int corgibl_probe(struct platform_device *pdev)
103{
104 struct generic_bl_info *machinfo = pdev->dev.platform_data;
105 const char *name = "generic-bl";
106
107 bl_machinfo = machinfo;
108 if (!machinfo->limit_mask)
109 machinfo->limit_mask = -1;
110
111 if (machinfo->name)
112 name = machinfo->name;
113
114 corgi_backlight_device = backlight_device_register (name,
115 &pdev->dev, NULL, &corgibl_ops);
116 if (IS_ERR (corgi_backlight_device))
117 return PTR_ERR (corgi_backlight_device);
118
119 platform_set_drvdata(pdev, corgi_backlight_device);
120
121 corgi_backlight_device->props.max_brightness = machinfo->max_intensity;
122 corgi_backlight_device->props.power = FB_BLANK_UNBLANK;
123 corgi_backlight_device->props.brightness = machinfo->default_intensity;
124 backlight_update_status(corgi_backlight_device);
125
126 printk("Corgi Backlight Driver Initialized.\n");
127 return 0;
128}
129
130static int corgibl_remove(struct platform_device *pdev)
131{
132 struct backlight_device *bd = platform_get_drvdata(pdev);
133
134 corgibl_data.power = 0;
135 corgibl_data.brightness = 0;
136 backlight_update_status(bd);
137
138 backlight_device_unregister(bd);
139
140 printk("Corgi Backlight Driver Unloaded\n");
141 return 0;
142}
143
144static struct platform_driver corgibl_driver = {
145 .probe = corgibl_probe,
146 .remove = corgibl_remove,
147 .suspend = corgibl_suspend,
148 .resume = corgibl_resume,
149 .driver = {
150 .name = "generic-bl",
151 },
152};
153
154static int __init corgibl_init(void)
155{
156 return platform_driver_register(&corgibl_driver);
157}
158
159static void __exit corgibl_exit(void)
160{
161 platform_driver_unregister(&corgibl_driver);
162}
163
164module_init(corgibl_init);
165module_exit(corgibl_exit);
166
167MODULE_AUTHOR("Richard Purdie <rpurdie@rpsys.net>");
168MODULE_DESCRIPTION("Corgi Backlight Driver");
169MODULE_LICENSE("GPL");
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index 26add8898605..b9fe62b475c6 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -259,22 +259,18 @@ static int __init cr_backlight_init(void)
259{ 259{
260 int ret = platform_driver_register(&cr_backlight_driver); 260 int ret = platform_driver_register(&cr_backlight_driver);
261 261
262 if (!ret) { 262 if (ret)
263 crp = platform_device_alloc("cr_backlight", -1); 263 return ret;
264 if (!crp)
265 return -ENOMEM;
266 264
267 ret = platform_device_add(crp); 265 crp = platform_device_register_simple("cr_backlight", -1, NULL, 0);
268 266 if (IS_ERR(crp)) {
269 if (ret) { 267 platform_driver_unregister(&cr_backlight_driver);
270 platform_device_put(crp); 268 return PTR_ERR(crp);
271 platform_driver_unregister(&cr_backlight_driver);
272 }
273 } 269 }
274 270
275 printk("Carillo Ranch Backlight Driver Initialized.\n"); 271 printk("Carillo Ranch Backlight Driver Initialized.\n");
276 272
277 return ret; 273 return 0;
278} 274}
279 275
280static void __exit cr_backlight_exit(void) 276static void __exit cr_backlight_exit(void)
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
new file mode 100644
index 000000000000..6d27f62fdcd0
--- /dev/null
+++ b/drivers/video/backlight/generic_bl.c
@@ -0,0 +1,147 @@
1/*
2 * Generic Backlight Driver
3 *
4 * Copyright (c) 2004-2008 Richard Purdie
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/platform_device.h>
16#include <linux/mutex.h>
17#include <linux/fb.h>
18#include <linux/backlight.h>
19
20static int genericbl_intensity;
21static struct backlight_device *generic_backlight_device;
22static struct generic_bl_info *bl_machinfo;
23
24/* Flag to signal when the battery is low */
25#define GENERICBL_BATTLOW BL_CORE_DRIVER1
26
27static int genericbl_send_intensity(struct backlight_device *bd)
28{
29 int intensity = bd->props.brightness;
30
31 if (bd->props.power != FB_BLANK_UNBLANK)
32 intensity = 0;
33 if (bd->props.state & BL_CORE_FBBLANK)
34 intensity = 0;
35 if (bd->props.state & BL_CORE_SUSPENDED)
36 intensity = 0;
37 if (bd->props.state & GENERICBL_BATTLOW)
38 intensity &= bl_machinfo->limit_mask;
39
40 bl_machinfo->set_bl_intensity(intensity);
41
42 genericbl_intensity = intensity;
43
44 if (bl_machinfo->kick_battery)
45 bl_machinfo->kick_battery();
46
47 return 0;
48}
49
50static int genericbl_get_intensity(struct backlight_device *bd)
51{
52 return genericbl_intensity;
53}
54
55/*
56 * Called when the battery is low to limit the backlight intensity.
57 * If limit==0 clear any limit, otherwise limit the intensity
58 */
59void corgibl_limit_intensity(int limit)
60{
61 struct backlight_device *bd = generic_backlight_device;
62
63 mutex_lock(&bd->ops_lock);
64 if (limit)
65 bd->props.state |= GENERICBL_BATTLOW;
66 else
67 bd->props.state &= ~GENERICBL_BATTLOW;
68 backlight_update_status(generic_backlight_device);
69 mutex_unlock(&bd->ops_lock);
70}
71EXPORT_SYMBOL(corgibl_limit_intensity);
72
73static struct backlight_ops genericbl_ops = {
74 .options = BL_CORE_SUSPENDRESUME,
75 .get_brightness = genericbl_get_intensity,
76 .update_status = genericbl_send_intensity,
77};
78
79static int genericbl_probe(struct platform_device *pdev)
80{
81 struct generic_bl_info *machinfo = pdev->dev.platform_data;
82 const char *name = "generic-bl";
83 struct backlight_device *bd;
84
85 bl_machinfo = machinfo;
86 if (!machinfo->limit_mask)
87 machinfo->limit_mask = -1;
88
89 if (machinfo->name)
90 name = machinfo->name;
91
92 bd = backlight_device_register (name,
93 &pdev->dev, NULL, &genericbl_ops);
94 if (IS_ERR (bd))
95 return PTR_ERR (bd);
96
97 platform_set_drvdata(pdev, bd);
98
99 bd->props.max_brightness = machinfo->max_intensity;
100 bd->props.power = FB_BLANK_UNBLANK;
101 bd->props.brightness = machinfo->default_intensity;
102 backlight_update_status(bd);
103
104 generic_backlight_device = bd;
105
106 printk("Generic Backlight Driver Initialized.\n");
107 return 0;
108}
109
110static int genericbl_remove(struct platform_device *pdev)
111{
112 struct backlight_device *bd = platform_get_drvdata(pdev);
113
114 bd->props.power = 0;
115 bd->props.brightness = 0;
116 backlight_update_status(bd);
117
118 backlight_device_unregister(bd);
119
120 printk("Generic Backlight Driver Unloaded\n");
121 return 0;
122}
123
124static struct platform_driver genericbl_driver = {
125 .probe = genericbl_probe,
126 .remove = genericbl_remove,
127 .driver = {
128 .name = "generic-bl",
129 },
130};
131
132static int __init genericbl_init(void)
133{
134 return platform_driver_register(&genericbl_driver);
135}
136
137static void __exit genericbl_exit(void)
138{
139 platform_driver_unregister(&genericbl_driver);
140}
141
142module_init(genericbl_init);
143module_exit(genericbl_exit);
144
145MODULE_AUTHOR("Richard Purdie <rpurdie@rpsys.net>");
146MODULE_DESCRIPTION("Generic Backlight Driver");
147MODULE_LICENSE("GPL");
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index d4cfed0b26d5..5be55a20d8c7 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -151,19 +151,15 @@ static int __init hp680bl_init(void)
151 int ret; 151 int ret;
152 152
153 ret = platform_driver_register(&hp680bl_driver); 153 ret = platform_driver_register(&hp680bl_driver);
154 if (!ret) { 154 if (ret)
155 hp680bl_device = platform_device_alloc("hp680-bl", -1); 155 return ret;
156 if (!hp680bl_device) 156 hp680bl_device = platform_device_register_simple("hp680-bl", -1,
157 return -ENOMEM; 157 NULL, 0);
158 158 if (IS_ERR(hp680bl_device)) {
159 ret = platform_device_add(hp680bl_device); 159 platform_driver_unregister(&hp680bl_driver);
160 160 return PTR_ERR(hp680bl_device);
161 if (ret) {
162 platform_device_put(hp680bl_device);
163 platform_driver_unregister(&hp680bl_driver);
164 }
165 } 161 }
166 return ret; 162 return 0;
167} 163}
168 164
169static void __exit hp680bl_exit(void) 165static void __exit hp680bl_exit(void)
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index 06964af761c6..65864c500455 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -70,6 +70,7 @@ static int mbp_get_intensity(struct backlight_device *bd)
70} 70}
71 71
72static struct backlight_ops mbp_ops = { 72static struct backlight_ops mbp_ops = {
73 .options = BL_CORE_SUSPENDRESUME,
73 .get_brightness = mbp_get_intensity, 74 .get_brightness = mbp_get_intensity,
74 .update_status = mbp_send_intensity, 75 .update_status = mbp_send_intensity,
75}; 76};
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 15fb4d58b5bc..9edaf24fd82d 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -119,20 +119,16 @@ static int __init progearbl_init(void)
119{ 119{
120 int ret = platform_driver_register(&progearbl_driver); 120 int ret = platform_driver_register(&progearbl_driver);
121 121
122 if (!ret) { 122 if (ret)
123 progearbl_device = platform_device_alloc("progear-bl", -1); 123 return ret;
124 if (!progearbl_device) 124 progearbl_device = platform_device_register_simple("progear-bl", -1,
125 return -ENOMEM; 125 NULL, 0);
126 126 if (IS_ERR(progearbl_device)) {
127 ret = platform_device_add(progearbl_device); 127 platform_driver_unregister(&progearbl_driver);
128 128 return PTR_ERR(progearbl_device);
129 if (ret) {
130 platform_device_put(progearbl_device);
131 platform_driver_unregister(&progearbl_driver);
132 }
133 } 129 }
134 130
135 return ret; 131 return 0;
136} 132}
137 133
138static void __exit progearbl_exit(void) 134static void __exit progearbl_exit(void)
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 8427669162ea..1dae7f8f3c6b 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -14,6 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/device.h> 15#include <linux/device.h>
16#include <linux/spi/spi.h> 16#include <linux/spi/spi.h>
17#include <linux/spi/tdo24m.h>
17#include <linux/fb.h> 18#include <linux/fb.h>
18#include <linux/lcd.h> 19#include <linux/lcd.h>
19 20
@@ -31,6 +32,9 @@ struct tdo24m {
31 struct spi_transfer xfer; 32 struct spi_transfer xfer;
32 uint8_t *buf; 33 uint8_t *buf;
33 34
35 int (*adj_mode)(struct tdo24m *lcd, int mode);
36 int color_invert;
37
34 int power; 38 int power;
35 int mode; 39 int mode;
36}; 40};
@@ -66,7 +70,7 @@ static uint32_t lcd_panel_off[] = {
66 CMD_NULL, 70 CMD_NULL,
67}; 71};
68 72
69static uint32_t lcd_vga_pass_through[] = { 73static uint32_t lcd_vga_pass_through_tdo24m[] = {
70 CMD1(0xB0, 0x16), 74 CMD1(0xB0, 0x16),
71 CMD1(0xBC, 0x80), 75 CMD1(0xBC, 0x80),
72 CMD1(0xE1, 0x00), 76 CMD1(0xE1, 0x00),
@@ -75,7 +79,7 @@ static uint32_t lcd_vga_pass_through[] = {
75 CMD_NULL, 79 CMD_NULL,
76}; 80};
77 81
78static uint32_t lcd_qvga_pass_through[] = { 82static uint32_t lcd_qvga_pass_through_tdo24m[] = {
79 CMD1(0xB0, 0x16), 83 CMD1(0xB0, 0x16),
80 CMD1(0xBC, 0x81), 84 CMD1(0xBC, 0x81),
81 CMD1(0xE1, 0x00), 85 CMD1(0xE1, 0x00),
@@ -84,7 +88,7 @@ static uint32_t lcd_qvga_pass_through[] = {
84 CMD_NULL, 88 CMD_NULL,
85}; 89};
86 90
87static uint32_t lcd_vga_transfer[] = { 91static uint32_t lcd_vga_transfer_tdo24m[] = {
88 CMD1(0xcf, 0x02), /* Blanking period control (1) */ 92 CMD1(0xcf, 0x02), /* Blanking period control (1) */
89 CMD2(0xd0, 0x08, 0x04), /* Blanking period control (2) */ 93 CMD2(0xd0, 0x08, 0x04), /* Blanking period control (2) */
90 CMD1(0xd1, 0x01), /* CKV timing control on/off */ 94 CMD1(0xd1, 0x01), /* CKV timing control on/off */
@@ -110,6 +114,35 @@ static uint32_t lcd_qvga_transfer[] = {
110 CMD_NULL, 114 CMD_NULL,
111}; 115};
112 116
117static uint32_t lcd_vga_pass_through_tdo35s[] = {
118 CMD1(0xB0, 0x16),
119 CMD1(0xBC, 0x80),
120 CMD1(0xE1, 0x00),
121 CMD1(0x3B, 0x00),
122 CMD_NULL,
123};
124
125static uint32_t lcd_qvga_pass_through_tdo35s[] = {
126 CMD1(0xB0, 0x16),
127 CMD1(0xBC, 0x81),
128 CMD1(0xE1, 0x00),
129 CMD1(0x3B, 0x22),
130 CMD_NULL,
131};
132
133static uint32_t lcd_vga_transfer_tdo35s[] = {
134 CMD1(0xcf, 0x02), /* Blanking period control (1) */
135 CMD2(0xd0, 0x08, 0x04), /* Blanking period control (2) */
136 CMD1(0xd1, 0x01), /* CKV timing control on/off */
137 CMD2(0xd2, 0x00, 0x1e), /* CKV 1,2 timing control */
138 CMD2(0xd3, 0x14, 0x28), /* OEV timing control */
139 CMD2(0xd4, 0x28, 0x64), /* ASW timing control (1) */
140 CMD1(0xd5, 0x28), /* ASW timing control (2) */
141 CMD0(0x21), /* Invert for normally black display */
142 CMD0(0x29), /* Display on */
143 CMD_NULL,
144};
145
113static uint32_t lcd_panel_config[] = { 146static uint32_t lcd_panel_config[] = {
114 CMD2(0xb8, 0xff, 0xf9), /* Output control */ 147 CMD2(0xb8, 0xff, 0xf9), /* Output control */
115 CMD0(0x11), /* sleep out */ 148 CMD0(0x11), /* sleep out */
@@ -148,6 +181,8 @@ static int tdo24m_writes(struct tdo24m *lcd, uint32_t *array)
148 int nparams, err = 0; 181 int nparams, err = 0;
149 182
150 for (; *p != CMD_NULL; p++) { 183 for (; *p != CMD_NULL; p++) {
184 if (!lcd->color_invert && *p == CMD0(0x21))
185 continue;
151 186
152 nparams = (*p >> 30) & 0x3; 187 nparams = (*p >> 30) & 0x3;
153 188
@@ -184,12 +219,33 @@ static int tdo24m_adj_mode(struct tdo24m *lcd, int mode)
184{ 219{
185 switch (mode) { 220 switch (mode) {
186 case MODE_VGA: 221 case MODE_VGA:
187 tdo24m_writes(lcd, lcd_vga_pass_through); 222 tdo24m_writes(lcd, lcd_vga_pass_through_tdo24m);
188 tdo24m_writes(lcd, lcd_panel_config); 223 tdo24m_writes(lcd, lcd_panel_config);
189 tdo24m_writes(lcd, lcd_vga_transfer); 224 tdo24m_writes(lcd, lcd_vga_transfer_tdo24m);
190 break; 225 break;
191 case MODE_QVGA: 226 case MODE_QVGA:
192 tdo24m_writes(lcd, lcd_qvga_pass_through); 227 tdo24m_writes(lcd, lcd_qvga_pass_through_tdo24m);
228 tdo24m_writes(lcd, lcd_panel_config);
229 tdo24m_writes(lcd, lcd_qvga_transfer);
230 break;
231 default:
232 return -EINVAL;
233 }
234
235 lcd->mode = mode;
236 return 0;
237}
238
239static int tdo35s_adj_mode(struct tdo24m *lcd, int mode)
240{
241 switch (mode) {
242 case MODE_VGA:
243 tdo24m_writes(lcd, lcd_vga_pass_through_tdo35s);
244 tdo24m_writes(lcd, lcd_panel_config);
245 tdo24m_writes(lcd, lcd_vga_transfer_tdo35s);
246 break;
247 case MODE_QVGA:
248 tdo24m_writes(lcd, lcd_qvga_pass_through_tdo35s);
193 tdo24m_writes(lcd, lcd_panel_config); 249 tdo24m_writes(lcd, lcd_panel_config);
194 tdo24m_writes(lcd, lcd_qvga_transfer); 250 tdo24m_writes(lcd, lcd_qvga_transfer);
195 break; 251 break;
@@ -213,7 +269,7 @@ static int tdo24m_power_on(struct tdo24m *lcd)
213 if (err) 269 if (err)
214 goto out; 270 goto out;
215 271
216 err = tdo24m_adj_mode(lcd, lcd->mode); 272 err = lcd->adj_mode(lcd, lcd->mode);
217out: 273out:
218 return err; 274 return err;
219} 275}
@@ -262,7 +318,7 @@ static int tdo24m_set_mode(struct lcd_device *ld, struct fb_videomode *m)
262 if (lcd->mode == mode) 318 if (lcd->mode == mode)
263 return 0; 319 return 0;
264 320
265 return tdo24m_adj_mode(lcd, mode); 321 return lcd->adj_mode(lcd, mode);
266} 322}
267 323
268static struct lcd_ops tdo24m_ops = { 324static struct lcd_ops tdo24m_ops = {
@@ -276,8 +332,16 @@ static int __devinit tdo24m_probe(struct spi_device *spi)
276 struct tdo24m *lcd; 332 struct tdo24m *lcd;
277 struct spi_message *m; 333 struct spi_message *m;
278 struct spi_transfer *x; 334 struct spi_transfer *x;
335 struct tdo24m_platform_data *pdata;
336 enum tdo24m_model model;
279 int err; 337 int err;
280 338
339 pdata = spi->dev.platform_data;
340 if (pdata)
341 model = pdata->model;
342 else
343 model = TDO24M;
344
281 spi->bits_per_word = 8; 345 spi->bits_per_word = 8;
282 spi->mode = SPI_MODE_3; 346 spi->mode = SPI_MODE_3;
283 err = spi_setup(spi); 347 err = spi_setup(spi);
@@ -306,6 +370,20 @@ static int __devinit tdo24m_probe(struct spi_device *spi)
306 x->tx_buf = &lcd->buf[0]; 370 x->tx_buf = &lcd->buf[0];
307 spi_message_add_tail(x, m); 371 spi_message_add_tail(x, m);
308 372
373 switch (model) {
374 case TDO24M:
375 lcd->color_invert = 1;
376 lcd->adj_mode = tdo24m_adj_mode;
377 break;
378 case TDO35S:
379 lcd->adj_mode = tdo35s_adj_mode;
380 lcd->color_invert = 0;
381 break;
382 default:
383 dev_err(&spi->dev, "Unsupported model");
384 goto out_free;
385 }
386
309 lcd->lcd_dev = lcd_device_register("tdo24m", &spi->dev, 387 lcd->lcd_dev = lcd_device_register("tdo24m", &spi->dev,
310 lcd, &tdo24m_ops); 388 lcd, &tdo24m_ops);
311 if (IS_ERR(lcd->lcd_dev)) { 389 if (IS_ERR(lcd->lcd_dev)) {
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index 57a26649f1a5..b7fbc75a62fc 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -39,6 +39,7 @@ struct tosa_lcd_data {
39 struct i2c_client *i2c; 39 struct i2c_client *i2c;
40 40
41 int lcd_power; 41 int lcd_power;
42 bool is_vga;
42}; 43};
43 44
44static int tosa_tg_send(struct spi_device *spi, int adrs, uint8_t data) 45static int tosa_tg_send(struct spi_device *spi, int adrs, uint8_t data)
@@ -81,8 +82,12 @@ static void tosa_lcd_tg_init(struct tosa_lcd_data *data)
81static void tosa_lcd_tg_on(struct tosa_lcd_data *data) 82static void tosa_lcd_tg_on(struct tosa_lcd_data *data)
82{ 83{
83 struct spi_device *spi = data->spi; 84 struct spi_device *spi = data->spi;
84 const int value = TG_REG0_COLOR | TG_REG0_UD | TG_REG0_LR; 85 int value = TG_REG0_COLOR | TG_REG0_UD | TG_REG0_LR;
85 tosa_tg_send(spi, TG_PNLCTL, value | TG_REG0_VQV); /* this depends on mode */ 86
87 if (data->is_vga)
88 value |= TG_REG0_VQV;
89
90 tosa_tg_send(spi, TG_PNLCTL, value);
86 91
87 /* TG LCD pannel power up */ 92 /* TG LCD pannel power up */
88 tosa_tg_send(spi, TG_PINICTL,0x4); 93 tosa_tg_send(spi, TG_PINICTL,0x4);
@@ -142,9 +147,25 @@ static int tosa_lcd_get_power(struct lcd_device *lcd)
142 return data->lcd_power; 147 return data->lcd_power;
143} 148}
144 149
150static int tosa_lcd_set_mode(struct lcd_device *lcd, struct fb_videomode *mode)
151{
152 struct tosa_lcd_data *data = lcd_get_data(lcd);
153
154 if (mode->xres == 320 || mode->yres == 320)
155 data->is_vga = false;
156 else
157 data->is_vga = true;
158
159 if (POWER_IS_ON(data->lcd_power))
160 tosa_lcd_tg_on(data);
161
162 return 0;
163}
164
145static struct lcd_ops tosa_lcd_ops = { 165static struct lcd_ops tosa_lcd_ops = {
146 .set_power = tosa_lcd_set_power, 166 .set_power = tosa_lcd_set_power,
147 .get_power = tosa_lcd_get_power, 167 .get_power = tosa_lcd_get_power,
168 .set_mode = tosa_lcd_set_mode,
148}; 169};
149 170
150static int __devinit tosa_lcd_probe(struct spi_device *spi) 171static int __devinit tosa_lcd_probe(struct spi_device *spi)
@@ -156,6 +177,8 @@ static int __devinit tosa_lcd_probe(struct spi_device *spi)
156 if (!data) 177 if (!data)
157 return -ENOMEM; 178 return -ENOMEM;
158 179
180 data->is_vga = true; /* defaut to VGA mode */
181
159 /* 182 /*
160 * bits_per_word cannot be configured in platform data 183 * bits_per_word cannot be configured in platform data
161 */ 184 */
diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c
index 593c7687d54a..8e653b8a6f17 100644
--- a/drivers/video/backlight/vgg2432a4.c
+++ b/drivers/video/backlight/vgg2432a4.c
@@ -137,7 +137,7 @@ static int vgg2432a4_lcd_init(struct ili9320 *lcd,
137 137
138 ili9320_write(lcd, ILI9320_RGB_IF1, cfg->rgb_if1); 138 ili9320_write(lcd, ILI9320_RGB_IF1, cfg->rgb_if1);
139 ili9320_write(lcd, ILI9320_FRAMEMAKER, 0x0); 139 ili9320_write(lcd, ILI9320_FRAMEMAKER, 0x0);
140 ili9320_write(lcd, ILI9320_RGB_IF2, ILI9320_RGBIF2_DPL); 140 ili9320_write(lcd, ILI9320_RGB_IF2, cfg->rgb_if2);
141 141
142 ret = ili9320_write_regs(lcd, vgg_init1, ARRAY_SIZE(vgg_init1)); 142 ret = ili9320_write_regs(lcd, vgg_init1, ARRAY_SIZE(vgg_init1));
143 if (ret != 0) 143 if (ret != 0)
diff --git a/fs/Kconfig b/fs/Kconfig
index 32883589ee54..02cff86af1b4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -269,6 +269,25 @@ config OCFS2_FS_POSIX_ACL
269 Posix Access Control Lists (ACLs) support permissions for users and 269 Posix Access Control Lists (ACLs) support permissions for users and
270 groups beyond the owner/group/world scheme. 270 groups beyond the owner/group/world scheme.
271 271
272config BTRFS_FS
273 tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format"
274 depends on EXPERIMENTAL
275 select LIBCRC32C
276 select ZLIB_INFLATE
277 select ZLIB_DEFLATE
278 help
279 Btrfs is a new filesystem with extents, writable snapshotting,
280 support for multiple devices and many more features.
281
282 Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
283 FINALIZED. You should say N here unless you are interested in
284 testing Btrfs with non-critical data.
285
286 To compile this file system support as a module, choose M here. The
287 module will be called btrfs.
288
289 If unsure, say N.
290
272endif # BLOCK 291endif # BLOCK
273 292
274source "fs/notify/Kconfig" 293source "fs/notify/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index c830611550d3..bc4e14df1082 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -119,4 +119,5 @@ obj-$(CONFIG_HOSTFS) += hostfs/
119obj-$(CONFIG_HPPFS) += hppfs/ 119obj-$(CONFIG_HPPFS) += hppfs/
120obj-$(CONFIG_DEBUG_FS) += debugfs/ 120obj-$(CONFIG_DEBUG_FS) += debugfs/
121obj-$(CONFIG_OCFS2_FS) += ocfs2/ 121obj-$(CONFIG_OCFS2_FS) += ocfs2/
122obj-$(CONFIG_BTRFS_FS) += btrfs/
122obj-$(CONFIG_GFS2_FS) += gfs2/ 123obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
new file mode 100644
index 000000000000..d2cf5a54a4b8
--- /dev/null
+++ b/fs/btrfs/Makefile
@@ -0,0 +1,25 @@
1ifneq ($(KERNELRELEASE),)
2# kbuild part of makefile
3
4obj-$(CONFIG_BTRFS_FS) := btrfs.o
5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o inode.o file.o tree-defrag.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
11 compression.o
12else
13
14# Normal Makefile
15
16KERNELDIR := /lib/modules/`uname -r`/build
17all:
18 $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
19
20modules_install:
21 $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
22clean:
23 $(MAKE) -C $(KERNELDIR) M=`pwd` clean
24
25endif
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
new file mode 100644
index 000000000000..1d53b62dbba5
--- /dev/null
+++ b/fs/btrfs/acl.c
@@ -0,0 +1,351 @@
1/*
2 * Copyright (C) 2007 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/fs.h>
20#include <linux/string.h>
21#include <linux/xattr.h>
22#include <linux/posix_acl_xattr.h>
23#include <linux/posix_acl.h>
24#include <linux/sched.h>
25
26#include "ctree.h"
27#include "btrfs_inode.h"
28#include "xattr.h"
29
30#ifdef CONFIG_FS_POSIX_ACL
31
32static void btrfs_update_cached_acl(struct inode *inode,
33 struct posix_acl **p_acl,
34 struct posix_acl *acl)
35{
36 spin_lock(&inode->i_lock);
37 if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
38 posix_acl_release(*p_acl);
39 *p_acl = posix_acl_dup(acl);
40 spin_unlock(&inode->i_lock);
41}
42
43static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
44{
45 int size;
46 const char *name;
47 char *value = NULL;
48 struct posix_acl *acl = NULL, **p_acl;
49
50 switch (type) {
51 case ACL_TYPE_ACCESS:
52 name = POSIX_ACL_XATTR_ACCESS;
53 p_acl = &BTRFS_I(inode)->i_acl;
54 break;
55 case ACL_TYPE_DEFAULT:
56 name = POSIX_ACL_XATTR_DEFAULT;
57 p_acl = &BTRFS_I(inode)->i_default_acl;
58 break;
59 default:
60 return ERR_PTR(-EINVAL);
61 }
62
63 spin_lock(&inode->i_lock);
64 if (*p_acl != BTRFS_ACL_NOT_CACHED)
65 acl = posix_acl_dup(*p_acl);
66 spin_unlock(&inode->i_lock);
67
68 if (acl)
69 return acl;
70
71
72 size = __btrfs_getxattr(inode, name, "", 0);
73 if (size > 0) {
74 value = kzalloc(size, GFP_NOFS);
75 if (!value)
76 return ERR_PTR(-ENOMEM);
77 size = __btrfs_getxattr(inode, name, value, size);
78 if (size > 0) {
79 acl = posix_acl_from_xattr(value, size);
80 btrfs_update_cached_acl(inode, p_acl, acl);
81 }
82 kfree(value);
83 } else if (size == -ENOENT) {
84 acl = NULL;
85 btrfs_update_cached_acl(inode, p_acl, acl);
86 }
87
88 return acl;
89}
90
91static int btrfs_xattr_get_acl(struct inode *inode, int type,
92 void *value, size_t size)
93{
94 struct posix_acl *acl;
95 int ret = 0;
96
97 acl = btrfs_get_acl(inode, type);
98
99 if (IS_ERR(acl))
100 return PTR_ERR(acl);
101 if (acl == NULL)
102 return -ENODATA;
103 ret = posix_acl_to_xattr(acl, value, size);
104 posix_acl_release(acl);
105
106 return ret;
107}
108
109/*
110 * Needs to be called with fs_mutex held
111 */
112static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
113{
114 int ret, size = 0;
115 const char *name;
116 struct posix_acl **p_acl;
117 char *value = NULL;
118 mode_t mode;
119
120 if (acl) {
121 ret = posix_acl_valid(acl);
122 if (ret < 0)
123 return ret;
124 ret = 0;
125 }
126
127 switch (type) {
128 case ACL_TYPE_ACCESS:
129 mode = inode->i_mode;
130 ret = posix_acl_equiv_mode(acl, &mode);
131 if (ret < 0)
132 return ret;
133 ret = 0;
134 inode->i_mode = mode;
135 name = POSIX_ACL_XATTR_ACCESS;
136 p_acl = &BTRFS_I(inode)->i_acl;
137 break;
138 case ACL_TYPE_DEFAULT:
139 if (!S_ISDIR(inode->i_mode))
140 return acl ? -EINVAL : 0;
141 name = POSIX_ACL_XATTR_DEFAULT;
142 p_acl = &BTRFS_I(inode)->i_default_acl;
143 break;
144 default:
145 return -EINVAL;
146 }
147
148 if (acl) {
149 size = posix_acl_xattr_size(acl->a_count);
150 value = kmalloc(size, GFP_NOFS);
151 if (!value) {
152 ret = -ENOMEM;
153 goto out;
154 }
155
156 ret = posix_acl_to_xattr(acl, value, size);
157 if (ret < 0)
158 goto out;
159 }
160
161 ret = __btrfs_setxattr(inode, name, value, size, 0);
162
163out:
164 kfree(value);
165
166 if (!ret)
167 btrfs_update_cached_acl(inode, p_acl, acl);
168
169 return ret;
170}
171
172static int btrfs_xattr_set_acl(struct inode *inode, int type,
173 const void *value, size_t size)
174{
175 int ret = 0;
176 struct posix_acl *acl = NULL;
177
178 if (value) {
179 acl = posix_acl_from_xattr(value, size);
180 if (acl == NULL) {
181 value = NULL;
182 size = 0;
183 } else if (IS_ERR(acl)) {
184 return PTR_ERR(acl);
185 }
186 }
187
188 ret = btrfs_set_acl(inode, acl, type);
189
190 posix_acl_release(acl);
191
192 return ret;
193}
194
195
196static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
197 void *value, size_t size)
198{
199 return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
200}
201
202static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
203 const void *value, size_t size, int flags)
204{
205 return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
206}
207
208static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
209 void *value, size_t size)
210{
211 return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
212}
213
214static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
215 const void *value, size_t size, int flags)
216{
217 return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
218}
219
220int btrfs_check_acl(struct inode *inode, int mask)
221{
222 struct posix_acl *acl;
223 int error = -EAGAIN;
224
225 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
226
227 if (IS_ERR(acl))
228 return PTR_ERR(acl);
229 if (acl) {
230 error = posix_acl_permission(inode, acl, mask);
231 posix_acl_release(acl);
232 }
233
234 return error;
235}
236
237/*
238 * btrfs_init_acl is already generally called under fs_mutex, so the locking
239 * stuff has been fixed to work with that. If the locking stuff changes, we
240 * need to re-evaluate the acl locking stuff.
241 */
242int btrfs_init_acl(struct inode *inode, struct inode *dir)
243{
244 struct posix_acl *acl = NULL;
245 int ret = 0;
246
247 /* this happens with subvols */
248 if (!dir)
249 return 0;
250
251 if (!S_ISLNK(inode->i_mode)) {
252 if (IS_POSIXACL(dir)) {
253 acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
254 if (IS_ERR(acl))
255 return PTR_ERR(acl);
256 }
257
258 if (!acl)
259 inode->i_mode &= ~current->fs->umask;
260 }
261
262 if (IS_POSIXACL(dir) && acl) {
263 struct posix_acl *clone;
264 mode_t mode;
265
266 if (S_ISDIR(inode->i_mode)) {
267 ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
268 if (ret)
269 goto failed;
270 }
271 clone = posix_acl_clone(acl, GFP_NOFS);
272 ret = -ENOMEM;
273 if (!clone)
274 goto failed;
275
276 mode = inode->i_mode;
277 ret = posix_acl_create_masq(clone, &mode);
278 if (ret >= 0) {
279 inode->i_mode = mode;
280 if (ret > 0) {
281 /* we need an acl */
282 ret = btrfs_set_acl(inode, clone,
283 ACL_TYPE_ACCESS);
284 }
285 }
286 }
287failed:
288 posix_acl_release(acl);
289
290 return ret;
291}
292
293int btrfs_acl_chmod(struct inode *inode)
294{
295 struct posix_acl *acl, *clone;
296 int ret = 0;
297
298 if (S_ISLNK(inode->i_mode))
299 return -EOPNOTSUPP;
300
301 if (!IS_POSIXACL(inode))
302 return 0;
303
304 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
305 if (IS_ERR(acl) || !acl)
306 return PTR_ERR(acl);
307
308 clone = posix_acl_clone(acl, GFP_KERNEL);
309 posix_acl_release(acl);
310 if (!clone)
311 return -ENOMEM;
312
313 ret = posix_acl_chmod_masq(clone, inode->i_mode);
314 if (!ret)
315 ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
316
317 posix_acl_release(clone);
318
319 return ret;
320}
321
322struct xattr_handler btrfs_xattr_acl_default_handler = {
323 .prefix = POSIX_ACL_XATTR_DEFAULT,
324 .get = btrfs_xattr_acl_default_get,
325 .set = btrfs_xattr_acl_default_set,
326};
327
328struct xattr_handler btrfs_xattr_acl_access_handler = {
329 .prefix = POSIX_ACL_XATTR_ACCESS,
330 .get = btrfs_xattr_acl_access_get,
331 .set = btrfs_xattr_acl_access_set,
332};
333
334#else /* CONFIG_FS_POSIX_ACL */
335
336int btrfs_acl_chmod(struct inode *inode)
337{
338 return 0;
339}
340
341int btrfs_init_acl(struct inode *inode, struct inode *dir)
342{
343 return 0;
344}
345
346int btrfs_check_acl(struct inode *inode, int mask)
347{
348 return 0;
349}
350
351#endif /* CONFIG_FS_POSIX_ACL */
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
new file mode 100644
index 000000000000..8e2fec05dbe0
--- /dev/null
+++ b/fs/btrfs/async-thread.c
@@ -0,0 +1,419 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/version.h>
20#include <linux/kthread.h>
21#include <linux/list.h>
22#include <linux/spinlock.h>
23# include <linux/freezer.h>
24#include "async-thread.h"
25
26#define WORK_QUEUED_BIT 0
27#define WORK_DONE_BIT 1
28#define WORK_ORDER_DONE_BIT 2
29
30/*
31 * container for the kthread task pointer and the list of pending work
32 * One of these is allocated per thread.
33 */
34struct btrfs_worker_thread {
35 /* pool we belong to */
36 struct btrfs_workers *workers;
37
38 /* list of struct btrfs_work that are waiting for service */
39 struct list_head pending;
40
41 /* list of worker threads from struct btrfs_workers */
42 struct list_head worker_list;
43
44 /* kthread */
45 struct task_struct *task;
46
47 /* number of things on the pending list */
48 atomic_t num_pending;
49
50 unsigned long sequence;
51
52 /* protects the pending list. */
53 spinlock_t lock;
54
55 /* set to non-zero when this thread is already awake and kicking */
56 int working;
57
58 /* are we currently idle */
59 int idle;
60};
61
62/*
63 * helper function to move a thread onto the idle list after it
64 * has finished some requests.
65 */
66static void check_idle_worker(struct btrfs_worker_thread *worker)
67{
68 if (!worker->idle && atomic_read(&worker->num_pending) <
69 worker->workers->idle_thresh / 2) {
70 unsigned long flags;
71 spin_lock_irqsave(&worker->workers->lock, flags);
72 worker->idle = 1;
73 list_move(&worker->worker_list, &worker->workers->idle_list);
74 spin_unlock_irqrestore(&worker->workers->lock, flags);
75 }
76}
77
78/*
79 * helper function to move a thread off the idle list after new
80 * pending work is added.
81 */
82static void check_busy_worker(struct btrfs_worker_thread *worker)
83{
84 if (worker->idle && atomic_read(&worker->num_pending) >=
85 worker->workers->idle_thresh) {
86 unsigned long flags;
87 spin_lock_irqsave(&worker->workers->lock, flags);
88 worker->idle = 0;
89 list_move_tail(&worker->worker_list,
90 &worker->workers->worker_list);
91 spin_unlock_irqrestore(&worker->workers->lock, flags);
92 }
93}
94
95static noinline int run_ordered_completions(struct btrfs_workers *workers,
96 struct btrfs_work *work)
97{
98 unsigned long flags;
99
100 if (!workers->ordered)
101 return 0;
102
103 set_bit(WORK_DONE_BIT, &work->flags);
104
105 spin_lock_irqsave(&workers->lock, flags);
106
107 while (!list_empty(&workers->order_list)) {
108 work = list_entry(workers->order_list.next,
109 struct btrfs_work, order_list);
110
111 if (!test_bit(WORK_DONE_BIT, &work->flags))
112 break;
113
114 /* we are going to call the ordered done function, but
115 * we leave the work item on the list as a barrier so
116 * that later work items that are done don't have their
117 * functions called before this one returns
118 */
119 if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
120 break;
121
122 spin_unlock_irqrestore(&workers->lock, flags);
123
124 work->ordered_func(work);
125
126 /* now take the lock again and call the freeing code */
127 spin_lock_irqsave(&workers->lock, flags);
128 list_del(&work->order_list);
129 work->ordered_free(work);
130 }
131
132 spin_unlock_irqrestore(&workers->lock, flags);
133 return 0;
134}
135
136/*
137 * main loop for servicing work items
138 */
139static int worker_loop(void *arg)
140{
141 struct btrfs_worker_thread *worker = arg;
142 struct list_head *cur;
143 struct btrfs_work *work;
144 do {
145 spin_lock_irq(&worker->lock);
146 while (!list_empty(&worker->pending)) {
147 cur = worker->pending.next;
148 work = list_entry(cur, struct btrfs_work, list);
149 list_del(&work->list);
150 clear_bit(WORK_QUEUED_BIT, &work->flags);
151
152 work->worker = worker;
153 spin_unlock_irq(&worker->lock);
154
155 work->func(work);
156
157 atomic_dec(&worker->num_pending);
158 /*
159 * unless this is an ordered work queue,
160 * 'work' was probably freed by func above.
161 */
162 run_ordered_completions(worker->workers, work);
163
164 spin_lock_irq(&worker->lock);
165 check_idle_worker(worker);
166
167 }
168 worker->working = 0;
169 if (freezing(current)) {
170 refrigerator();
171 } else {
172 set_current_state(TASK_INTERRUPTIBLE);
173 spin_unlock_irq(&worker->lock);
174 if (!kthread_should_stop())
175 schedule();
176 __set_current_state(TASK_RUNNING);
177 }
178 } while (!kthread_should_stop());
179 return 0;
180}
181
182/*
183 * this will wait for all the worker threads to shutdown
184 */
185int btrfs_stop_workers(struct btrfs_workers *workers)
186{
187 struct list_head *cur;
188 struct btrfs_worker_thread *worker;
189
190 list_splice_init(&workers->idle_list, &workers->worker_list);
191 while (!list_empty(&workers->worker_list)) {
192 cur = workers->worker_list.next;
193 worker = list_entry(cur, struct btrfs_worker_thread,
194 worker_list);
195 kthread_stop(worker->task);
196 list_del(&worker->worker_list);
197 kfree(worker);
198 }
199 return 0;
200}
201
202/*
203 * simple init on struct btrfs_workers
204 */
205void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
206{
207 workers->num_workers = 0;
208 INIT_LIST_HEAD(&workers->worker_list);
209 INIT_LIST_HEAD(&workers->idle_list);
210 INIT_LIST_HEAD(&workers->order_list);
211 spin_lock_init(&workers->lock);
212 workers->max_workers = max;
213 workers->idle_thresh = 32;
214 workers->name = name;
215 workers->ordered = 0;
216}
217
218/*
219 * starts new worker threads. This does not enforce the max worker
220 * count in case you need to temporarily go past it.
221 */
222int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
223{
224 struct btrfs_worker_thread *worker;
225 int ret = 0;
226 int i;
227
228 for (i = 0; i < num_workers; i++) {
229 worker = kzalloc(sizeof(*worker), GFP_NOFS);
230 if (!worker) {
231 ret = -ENOMEM;
232 goto fail;
233 }
234
235 INIT_LIST_HEAD(&worker->pending);
236 INIT_LIST_HEAD(&worker->worker_list);
237 spin_lock_init(&worker->lock);
238 atomic_set(&worker->num_pending, 0);
239 worker->task = kthread_run(worker_loop, worker,
240 "btrfs-%s-%d", workers->name,
241 workers->num_workers + i);
242 worker->workers = workers;
243 if (IS_ERR(worker->task)) {
244 kfree(worker);
245 ret = PTR_ERR(worker->task);
246 goto fail;
247 }
248
249 spin_lock_irq(&workers->lock);
250 list_add_tail(&worker->worker_list, &workers->idle_list);
251 worker->idle = 1;
252 workers->num_workers++;
253 spin_unlock_irq(&workers->lock);
254 }
255 return 0;
256fail:
257 btrfs_stop_workers(workers);
258 return ret;
259}
260
261/*
262 * run through the list and find a worker thread that doesn't have a lot
263 * to do right now. This can return null if we aren't yet at the thread
264 * count limit and all of the threads are busy.
265 */
266static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
267{
268 struct btrfs_worker_thread *worker;
269 struct list_head *next;
270 int enforce_min = workers->num_workers < workers->max_workers;
271
272 /*
273 * if we find an idle thread, don't move it to the end of the
274 * idle list. This improves the chance that the next submission
275 * will reuse the same thread, and maybe catch it while it is still
276 * working
277 */
278 if (!list_empty(&workers->idle_list)) {
279 next = workers->idle_list.next;
280 worker = list_entry(next, struct btrfs_worker_thread,
281 worker_list);
282 return worker;
283 }
284 if (enforce_min || list_empty(&workers->worker_list))
285 return NULL;
286
287 /*
288 * if we pick a busy task, move the task to the end of the list.
289 * hopefully this will keep things somewhat evenly balanced.
290 * Do the move in batches based on the sequence number. This groups
291 * requests submitted at roughly the same time onto the same worker.
292 */
293 next = workers->worker_list.next;
294 worker = list_entry(next, struct btrfs_worker_thread, worker_list);
295 atomic_inc(&worker->num_pending);
296 worker->sequence++;
297
298 if (worker->sequence % workers->idle_thresh == 0)
299 list_move_tail(next, &workers->worker_list);
300 return worker;
301}
302
303/*
304 * selects a worker thread to take the next job. This will either find
305 * an idle worker, start a new worker up to the max count, or just return
306 * one of the existing busy workers.
307 */
308static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
309{
310 struct btrfs_worker_thread *worker;
311 unsigned long flags;
312
313again:
314 spin_lock_irqsave(&workers->lock, flags);
315 worker = next_worker(workers);
316 spin_unlock_irqrestore(&workers->lock, flags);
317
318 if (!worker) {
319 spin_lock_irqsave(&workers->lock, flags);
320 if (workers->num_workers >= workers->max_workers) {
321 struct list_head *fallback = NULL;
322 /*
323 * we have failed to find any workers, just
324 * return the force one
325 */
326 if (!list_empty(&workers->worker_list))
327 fallback = workers->worker_list.next;
328 if (!list_empty(&workers->idle_list))
329 fallback = workers->idle_list.next;
330 BUG_ON(!fallback);
331 worker = list_entry(fallback,
332 struct btrfs_worker_thread, worker_list);
333 spin_unlock_irqrestore(&workers->lock, flags);
334 } else {
335 spin_unlock_irqrestore(&workers->lock, flags);
336 /* we're below the limit, start another worker */
337 btrfs_start_workers(workers, 1);
338 goto again;
339 }
340 }
341 return worker;
342}
343
344/*
345 * btrfs_requeue_work just puts the work item back on the tail of the list
346 * it was taken from. It is intended for use with long running work functions
347 * that make some progress and want to give the cpu up for others.
348 */
349int btrfs_requeue_work(struct btrfs_work *work)
350{
351 struct btrfs_worker_thread *worker = work->worker;
352 unsigned long flags;
353
354 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
355 goto out;
356
357 spin_lock_irqsave(&worker->lock, flags);
358 atomic_inc(&worker->num_pending);
359 list_add_tail(&work->list, &worker->pending);
360
361 /* by definition we're busy, take ourselves off the idle
362 * list
363 */
364 if (worker->idle) {
365 spin_lock_irqsave(&worker->workers->lock, flags);
366 worker->idle = 0;
367 list_move_tail(&worker->worker_list,
368 &worker->workers->worker_list);
369 spin_unlock_irqrestore(&worker->workers->lock, flags);
370 }
371
372 spin_unlock_irqrestore(&worker->lock, flags);
373
374out:
375 return 0;
376}
377
378/*
379 * places a struct btrfs_work into the pending queue of one of the kthreads
380 */
381int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
382{
383 struct btrfs_worker_thread *worker;
384 unsigned long flags;
385 int wake = 0;
386
387 /* don't requeue something already on a list */
388 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
389 goto out;
390
391 worker = find_worker(workers);
392 if (workers->ordered) {
393 spin_lock_irqsave(&workers->lock, flags);
394 list_add_tail(&work->order_list, &workers->order_list);
395 spin_unlock_irqrestore(&workers->lock, flags);
396 } else {
397 INIT_LIST_HEAD(&work->order_list);
398 }
399
400 spin_lock_irqsave(&worker->lock, flags);
401 atomic_inc(&worker->num_pending);
402 check_busy_worker(worker);
403 list_add_tail(&work->list, &worker->pending);
404
405 /*
406 * avoid calling into wake_up_process if this thread has already
407 * been kicked
408 */
409 if (!worker->working)
410 wake = 1;
411 worker->working = 1;
412
413 spin_unlock_irqrestore(&worker->lock, flags);
414
415 if (wake)
416 wake_up_process(worker->task);
417out:
418 return 0;
419}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
new file mode 100644
index 000000000000..31be4ed8b63e
--- /dev/null
+++ b/fs/btrfs/async-thread.h
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_ASYNC_THREAD_
20#define __BTRFS_ASYNC_THREAD_
21
22struct btrfs_worker_thread;
23
24/*
25 * This is similar to a workqueue, but it is meant to spread the operations
26 * across all available cpus instead of just the CPU that was used to
27 * queue the work. There is also some batching introduced to try and
28 * cut down on context switches.
29 *
30 * By default threads are added on demand up to 2 * the number of cpus.
31 * Changing struct btrfs_workers->max_workers is one way to prevent
32 * demand creation of kthreads.
33 *
34 * the basic model of these worker threads is to embed a btrfs_work
35 * structure in your own data struct, and use container_of in a
36 * work function to get back to your data struct.
37 */
38struct btrfs_work {
39 /*
40 * func should be set to the function you want called
41 * your work struct is passed as the only arg
42 *
43 * ordered_func must be set for work sent to an ordered work queue,
44 * and it is called to complete a given work item in the same
45 * order they were sent to the queue.
46 */
47 void (*func)(struct btrfs_work *work);
48 void (*ordered_func)(struct btrfs_work *work);
49 void (*ordered_free)(struct btrfs_work *work);
50
51 /*
52 * flags should be set to zero. It is used to make sure the
53 * struct is only inserted once into the list.
54 */
55 unsigned long flags;
56
57 /* don't touch these */
58 struct btrfs_worker_thread *worker;
59 struct list_head list;
60 struct list_head order_list;
61};
62
63struct btrfs_workers {
64 /* current number of running workers */
65 int num_workers;
66
67 /* max number of workers allowed. changed by btrfs_start_workers */
68 int max_workers;
69
70 /* once a worker has this many requests or fewer, it is idle */
71 int idle_thresh;
72
73 /* force completions in the order they were queued */
74 int ordered;
75
76 /* list with all the work threads. The workers on the idle thread
77 * may be actively servicing jobs, but they haven't yet hit the
78 * idle thresh limit above.
79 */
80 struct list_head worker_list;
81 struct list_head idle_list;
82
83 /*
84 * when operating in ordered mode, this maintains the list
85 * of work items waiting for completion
86 */
87 struct list_head order_list;
88
89 /* lock for finding the next worker thread to queue on */
90 spinlock_t lock;
91
92 /* extra name for this worker, used for current->name */
93 char *name;
94};
95
96int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
97int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
98int btrfs_stop_workers(struct btrfs_workers *workers);
99void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
100int btrfs_requeue_work(struct btrfs_work *work);
101#endif
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
new file mode 100644
index 000000000000..a8c9693b75ac
--- /dev/null
+++ b/fs/btrfs/btrfs_inode.h
@@ -0,0 +1,131 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_I__
20#define __BTRFS_I__
21
22#include "extent_map.h"
23#include "extent_io.h"
24#include "ordered-data.h"
25
26/* in memory btrfs inode */
27struct btrfs_inode {
28 /* which subvolume this inode belongs to */
29 struct btrfs_root *root;
30
31 /* key used to find this inode on disk. This is used by the code
32 * to read in roots of subvolumes
33 */
34 struct btrfs_key location;
35
36 /* the extent_tree has caches of all the extent mappings to disk */
37 struct extent_map_tree extent_tree;
38
39 /* the io_tree does range state (DIRTY, LOCKED etc) */
40 struct extent_io_tree io_tree;
41
42 /* special utility tree used to record which mirrors have already been
43 * tried when checksums fail for a given block
44 */
45 struct extent_io_tree io_failure_tree;
46
47 /* held while inesrting or deleting extents from files */
48 struct mutex extent_mutex;
49
50 /* held while logging the inode in tree-log.c */
51 struct mutex log_mutex;
52
53 /* used to order data wrt metadata */
54 struct btrfs_ordered_inode_tree ordered_tree;
55
56 /* standard acl pointers */
57 struct posix_acl *i_acl;
58 struct posix_acl *i_default_acl;
59
60 /* for keeping track of orphaned inodes */
61 struct list_head i_orphan;
62
63 /* list of all the delalloc inodes in the FS. There are times we need
64 * to write all the delalloc pages to disk, and this list is used
65 * to walk them all.
66 */
67 struct list_head delalloc_inodes;
68
69 /* full 64 bit generation number, struct vfs_inode doesn't have a big
70 * enough field for this.
71 */
72 u64 generation;
73
74 /* sequence number for NFS changes */
75 u64 sequence;
76
77 /*
78 * transid of the trans_handle that last modified this inode
79 */
80 u64 last_trans;
81 /*
82 * transid that last logged this inode
83 */
84 u64 logged_trans;
85
86 /*
87 * trans that last made a change that should be fully fsync'd. This
88 * gets reset to zero each time the inode is logged
89 */
90 u64 log_dirty_trans;
91
92 /* total number of bytes pending delalloc, used by stat to calc the
93 * real block usage of the file
94 */
95 u64 delalloc_bytes;
96
97 /*
98 * the size of the file stored in the metadata on disk. data=ordered
99 * means the in-memory i_size might be larger than the size on disk
100 * because not all the blocks are written yet.
101 */
102 u64 disk_i_size;
103
104 /* flags field from the on disk inode */
105 u32 flags;
106
107 /*
108 * if this is a directory then index_cnt is the counter for the index
109 * number for new files that are created
110 */
111 u64 index_cnt;
112
113 /* the start of block group preferred for allocations. */
114 u64 block_group;
115
116 struct inode vfs_inode;
117};
118
119static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
120{
121 return container_of(inode, struct btrfs_inode, vfs_inode);
122}
123
124static inline void btrfs_i_size_write(struct inode *inode, u64 size)
125{
126 inode->i_size = size;
127 BTRFS_I(inode)->disk_i_size = size;
128}
129
130
131#endif
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
new file mode 100644
index 000000000000..7c4503ef6efd
--- /dev/null
+++ b/fs/btrfs/compat.h
@@ -0,0 +1,7 @@
1#ifndef _COMPAT_H_
2#define _COMPAT_H_
3
4#define btrfs_drop_nlink(inode) drop_nlink(inode)
5#define btrfs_inc_nlink(inode) inc_nlink(inode)
6
7#endif /* _COMPAT_H_ */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
new file mode 100644
index 000000000000..ee848d8585d9
--- /dev/null
+++ b/fs/btrfs/compression.c
@@ -0,0 +1,709 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/kernel.h>
20#include <linux/bio.h>
21#include <linux/buffer_head.h>
22#include <linux/file.h>
23#include <linux/fs.h>
24#include <linux/pagemap.h>
25#include <linux/highmem.h>
26#include <linux/time.h>
27#include <linux/init.h>
28#include <linux/string.h>
29#include <linux/smp_lock.h>
30#include <linux/backing-dev.h>
31#include <linux/mpage.h>
32#include <linux/swap.h>
33#include <linux/writeback.h>
34#include <linux/bit_spinlock.h>
35#include <linux/version.h>
36#include <linux/pagevec.h>
37#include "compat.h"
38#include "ctree.h"
39#include "disk-io.h"
40#include "transaction.h"
41#include "btrfs_inode.h"
42#include "volumes.h"
43#include "ordered-data.h"
44#include "compression.h"
45#include "extent_io.h"
46#include "extent_map.h"
47
48struct compressed_bio {
49 /* number of bios pending for this compressed extent */
50 atomic_t pending_bios;
51
52 /* the pages with the compressed data on them */
53 struct page **compressed_pages;
54
55 /* inode that owns this data */
56 struct inode *inode;
57
58 /* starting offset in the inode for our pages */
59 u64 start;
60
61 /* number of bytes in the inode we're working on */
62 unsigned long len;
63
64 /* number of bytes on disk */
65 unsigned long compressed_len;
66
67 /* number of compressed pages in the array */
68 unsigned long nr_pages;
69
70 /* IO errors */
71 int errors;
72 int mirror_num;
73
74 /* for reads, this is the bio we are copying the data into */
75 struct bio *orig_bio;
76
77 /*
78 * the start of a variable length array of checksums only
79 * used by reads
80 */
81 u32 sums;
82};
83
84static inline int compressed_bio_size(struct btrfs_root *root,
85 unsigned long disk_size)
86{
87 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
88 return sizeof(struct compressed_bio) +
89 ((disk_size + root->sectorsize - 1) / root->sectorsize) *
90 csum_size;
91}
92
93static struct bio *compressed_bio_alloc(struct block_device *bdev,
94 u64 first_byte, gfp_t gfp_flags)
95{
96 struct bio *bio;
97 int nr_vecs;
98
99 nr_vecs = bio_get_nr_vecs(bdev);
100 bio = bio_alloc(gfp_flags, nr_vecs);
101
102 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
103 while (!bio && (nr_vecs /= 2))
104 bio = bio_alloc(gfp_flags, nr_vecs);
105 }
106
107 if (bio) {
108 bio->bi_size = 0;
109 bio->bi_bdev = bdev;
110 bio->bi_sector = first_byte >> 9;
111 }
112 return bio;
113}
114
115static int check_compressed_csum(struct inode *inode,
116 struct compressed_bio *cb,
117 u64 disk_start)
118{
119 int ret;
120 struct btrfs_root *root = BTRFS_I(inode)->root;
121 struct page *page;
122 unsigned long i;
123 char *kaddr;
124 u32 csum;
125 u32 *cb_sum = &cb->sums;
126
127 if (btrfs_test_flag(inode, NODATASUM))
128 return 0;
129
130 for (i = 0; i < cb->nr_pages; i++) {
131 page = cb->compressed_pages[i];
132 csum = ~(u32)0;
133
134 kaddr = kmap_atomic(page, KM_USER0);
135 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
136 btrfs_csum_final(csum, (char *)&csum);
137 kunmap_atomic(kaddr, KM_USER0);
138
139 if (csum != *cb_sum) {
140 printk(KERN_INFO "btrfs csum failed ino %lu "
141 "extent %llu csum %u "
142 "wanted %u mirror %d\n", inode->i_ino,
143 (unsigned long long)disk_start,
144 csum, *cb_sum, cb->mirror_num);
145 ret = -EIO;
146 goto fail;
147 }
148 cb_sum++;
149
150 }
151 ret = 0;
152fail:
153 return ret;
154}
155
156/* when we finish reading compressed pages from the disk, we
157 * decompress them and then run the bio end_io routines on the
158 * decompressed pages (in the inode address space).
159 *
160 * This allows the checksumming and other IO error handling routines
161 * to work normally
162 *
163 * The compressed pages are freed here, and it must be run
164 * in process context
165 */
166static void end_compressed_bio_read(struct bio *bio, int err)
167{
168 struct extent_io_tree *tree;
169 struct compressed_bio *cb = bio->bi_private;
170 struct inode *inode;
171 struct page *page;
172 unsigned long index;
173 int ret;
174
175 if (err)
176 cb->errors = 1;
177
178 /* if there are more bios still pending for this compressed
179 * extent, just exit
180 */
181 if (!atomic_dec_and_test(&cb->pending_bios))
182 goto out;
183
184 inode = cb->inode;
185 ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
186 if (ret)
187 goto csum_failed;
188
189 /* ok, we're the last bio for this extent, lets start
190 * the decompression.
191 */
192 tree = &BTRFS_I(inode)->io_tree;
193 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
194 cb->start,
195 cb->orig_bio->bi_io_vec,
196 cb->orig_bio->bi_vcnt,
197 cb->compressed_len);
198csum_failed:
199 if (ret)
200 cb->errors = 1;
201
202 /* release the compressed pages */
203 index = 0;
204 for (index = 0; index < cb->nr_pages; index++) {
205 page = cb->compressed_pages[index];
206 page->mapping = NULL;
207 page_cache_release(page);
208 }
209
210 /* do io completion on the original bio */
211 if (cb->errors) {
212 bio_io_error(cb->orig_bio);
213 } else {
214 int bio_index = 0;
215 struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
216
217 /*
218 * we have verified the checksum already, set page
219 * checked so the end_io handlers know about it
220 */
221 while (bio_index < cb->orig_bio->bi_vcnt) {
222 SetPageChecked(bvec->bv_page);
223 bvec++;
224 bio_index++;
225 }
226 bio_endio(cb->orig_bio, 0);
227 }
228
229 /* finally free the cb struct */
230 kfree(cb->compressed_pages);
231 kfree(cb);
232out:
233 bio_put(bio);
234}
235
236/*
237 * Clear the writeback bits on all of the file
238 * pages for a compressed write
239 */
240static noinline int end_compressed_writeback(struct inode *inode, u64 start,
241 unsigned long ram_size)
242{
243 unsigned long index = start >> PAGE_CACHE_SHIFT;
244 unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
245 struct page *pages[16];
246 unsigned long nr_pages = end_index - index + 1;
247 int i;
248 int ret;
249
250 while (nr_pages > 0) {
251 ret = find_get_pages_contig(inode->i_mapping, index,
252 min_t(unsigned long,
253 nr_pages, ARRAY_SIZE(pages)), pages);
254 if (ret == 0) {
255 nr_pages -= 1;
256 index += 1;
257 continue;
258 }
259 for (i = 0; i < ret; i++) {
260 end_page_writeback(pages[i]);
261 page_cache_release(pages[i]);
262 }
263 nr_pages -= ret;
264 index += ret;
265 }
266 /* the inode may be gone now */
267 return 0;
268}
269
270/*
271 * do the cleanup once all the compressed pages hit the disk.
272 * This will clear writeback on the file pages and free the compressed
273 * pages.
274 *
275 * This also calls the writeback end hooks for the file pages so that
276 * metadata and checksums can be updated in the file.
277 */
278static void end_compressed_bio_write(struct bio *bio, int err)
279{
280 struct extent_io_tree *tree;
281 struct compressed_bio *cb = bio->bi_private;
282 struct inode *inode;
283 struct page *page;
284 unsigned long index;
285
286 if (err)
287 cb->errors = 1;
288
289 /* if there are more bios still pending for this compressed
290 * extent, just exit
291 */
292 if (!atomic_dec_and_test(&cb->pending_bios))
293 goto out;
294
295 /* ok, we're the last bio for this extent, step one is to
296 * call back into the FS and do all the end_io operations
297 */
298 inode = cb->inode;
299 tree = &BTRFS_I(inode)->io_tree;
300 cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
301 tree->ops->writepage_end_io_hook(cb->compressed_pages[0],
302 cb->start,
303 cb->start + cb->len - 1,
304 NULL, 1);
305 cb->compressed_pages[0]->mapping = NULL;
306
307 end_compressed_writeback(inode, cb->start, cb->len);
308 /* note, our inode could be gone now */
309
310 /*
311 * release the compressed pages, these came from alloc_page and
312 * are not attached to the inode at all
313 */
314 index = 0;
315 for (index = 0; index < cb->nr_pages; index++) {
316 page = cb->compressed_pages[index];
317 page->mapping = NULL;
318 page_cache_release(page);
319 }
320
321 /* finally free the cb struct */
322 kfree(cb->compressed_pages);
323 kfree(cb);
324out:
325 bio_put(bio);
326}
327
328/*
329 * worker function to build and submit bios for previously compressed pages.
330 * The corresponding pages in the inode should be marked for writeback
331 * and the compressed pages should have a reference on them for dropping
332 * when the IO is complete.
333 *
334 * This also checksums the file bytes and gets things ready for
335 * the end io hooks.
336 */
337int btrfs_submit_compressed_write(struct inode *inode, u64 start,
338 unsigned long len, u64 disk_start,
339 unsigned long compressed_len,
340 struct page **compressed_pages,
341 unsigned long nr_pages)
342{
343 struct bio *bio = NULL;
344 struct btrfs_root *root = BTRFS_I(inode)->root;
345 struct compressed_bio *cb;
346 unsigned long bytes_left;
347 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
348 int page_index = 0;
349 struct page *page;
350 u64 first_byte = disk_start;
351 struct block_device *bdev;
352 int ret;
353
354 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
355 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
356 atomic_set(&cb->pending_bios, 0);
357 cb->errors = 0;
358 cb->inode = inode;
359 cb->start = start;
360 cb->len = len;
361 cb->mirror_num = 0;
362 cb->compressed_pages = compressed_pages;
363 cb->compressed_len = compressed_len;
364 cb->orig_bio = NULL;
365 cb->nr_pages = nr_pages;
366
367 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
368
369 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
370 bio->bi_private = cb;
371 bio->bi_end_io = end_compressed_bio_write;
372 atomic_inc(&cb->pending_bios);
373
374 /* create and submit bios for the compressed pages */
375 bytes_left = compressed_len;
376 for (page_index = 0; page_index < cb->nr_pages; page_index++) {
377 page = compressed_pages[page_index];
378 page->mapping = inode->i_mapping;
379 if (bio->bi_size)
380 ret = io_tree->ops->merge_bio_hook(page, 0,
381 PAGE_CACHE_SIZE,
382 bio, 0);
383 else
384 ret = 0;
385
386 page->mapping = NULL;
387 if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
388 PAGE_CACHE_SIZE) {
389 bio_get(bio);
390
391 /*
392 * inc the count before we submit the bio so
393 * we know the end IO handler won't happen before
394 * we inc the count. Otherwise, the cb might get
395 * freed before we're done setting it up
396 */
397 atomic_inc(&cb->pending_bios);
398 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
399 BUG_ON(ret);
400
401 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
402 BUG_ON(ret);
403
404 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
405 BUG_ON(ret);
406
407 bio_put(bio);
408
409 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
410 bio->bi_private = cb;
411 bio->bi_end_io = end_compressed_bio_write;
412 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
413 }
414 if (bytes_left < PAGE_CACHE_SIZE) {
415 printk("bytes left %lu compress len %lu nr %lu\n",
416 bytes_left, cb->compressed_len, cb->nr_pages);
417 }
418 bytes_left -= PAGE_CACHE_SIZE;
419 first_byte += PAGE_CACHE_SIZE;
420 cond_resched();
421 }
422 bio_get(bio);
423
424 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
425 BUG_ON(ret);
426
427 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
428 BUG_ON(ret);
429
430 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
431 BUG_ON(ret);
432
433 bio_put(bio);
434 return 0;
435}
436
437static noinline int add_ra_bio_pages(struct inode *inode,
438 u64 compressed_end,
439 struct compressed_bio *cb)
440{
441 unsigned long end_index;
442 unsigned long page_index;
443 u64 last_offset;
444 u64 isize = i_size_read(inode);
445 int ret;
446 struct page *page;
447 unsigned long nr_pages = 0;
448 struct extent_map *em;
449 struct address_space *mapping = inode->i_mapping;
450 struct pagevec pvec;
451 struct extent_map_tree *em_tree;
452 struct extent_io_tree *tree;
453 u64 end;
454 int misses = 0;
455
456 page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
457 last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
458 em_tree = &BTRFS_I(inode)->extent_tree;
459 tree = &BTRFS_I(inode)->io_tree;
460
461 if (isize == 0)
462 return 0;
463
464 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
465
466 pagevec_init(&pvec, 0);
467 while (last_offset < compressed_end) {
468 page_index = last_offset >> PAGE_CACHE_SHIFT;
469
470 if (page_index > end_index)
471 break;
472
473 rcu_read_lock();
474 page = radix_tree_lookup(&mapping->page_tree, page_index);
475 rcu_read_unlock();
476 if (page) {
477 misses++;
478 if (misses > 4)
479 break;
480 goto next;
481 }
482
483 page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS);
484 if (!page)
485 break;
486
487 page->index = page_index;
488 /*
489 * what we want to do here is call add_to_page_cache_lru,
490 * but that isn't exported, so we reproduce it here
491 */
492 if (add_to_page_cache(page, mapping,
493 page->index, GFP_NOFS)) {
494 page_cache_release(page);
495 goto next;
496 }
497
498 /* open coding of lru_cache_add, also not exported */
499 page_cache_get(page);
500 if (!pagevec_add(&pvec, page))
501 __pagevec_lru_add_file(&pvec);
502
503 end = last_offset + PAGE_CACHE_SIZE - 1;
504 /*
505 * at this point, we have a locked page in the page cache
506 * for these bytes in the file. But, we have to make
507 * sure they map to this compressed extent on disk.
508 */
509 set_page_extent_mapped(page);
510 lock_extent(tree, last_offset, end, GFP_NOFS);
511 spin_lock(&em_tree->lock);
512 em = lookup_extent_mapping(em_tree, last_offset,
513 PAGE_CACHE_SIZE);
514 spin_unlock(&em_tree->lock);
515
516 if (!em || last_offset < em->start ||
517 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
518 (em->block_start >> 9) != cb->orig_bio->bi_sector) {
519 free_extent_map(em);
520 unlock_extent(tree, last_offset, end, GFP_NOFS);
521 unlock_page(page);
522 page_cache_release(page);
523 break;
524 }
525 free_extent_map(em);
526
527 if (page->index == end_index) {
528 char *userpage;
529 size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
530
531 if (zero_offset) {
532 int zeros;
533 zeros = PAGE_CACHE_SIZE - zero_offset;
534 userpage = kmap_atomic(page, KM_USER0);
535 memset(userpage + zero_offset, 0, zeros);
536 flush_dcache_page(page);
537 kunmap_atomic(userpage, KM_USER0);
538 }
539 }
540
541 ret = bio_add_page(cb->orig_bio, page,
542 PAGE_CACHE_SIZE, 0);
543
544 if (ret == PAGE_CACHE_SIZE) {
545 nr_pages++;
546 page_cache_release(page);
547 } else {
548 unlock_extent(tree, last_offset, end, GFP_NOFS);
549 unlock_page(page);
550 page_cache_release(page);
551 break;
552 }
553next:
554 last_offset += PAGE_CACHE_SIZE;
555 }
556 if (pagevec_count(&pvec))
557 __pagevec_lru_add_file(&pvec);
558 return 0;
559}
560
561/*
562 * for a compressed read, the bio we get passed has all the inode pages
563 * in it. We don't actually do IO on those pages but allocate new ones
564 * to hold the compressed pages on disk.
565 *
566 * bio->bi_sector points to the compressed extent on disk
567 * bio->bi_io_vec points to all of the inode pages
568 * bio->bi_vcnt is a count of pages
569 *
570 * After the compressed pages are read, we copy the bytes into the
571 * bio we were passed and then call the bio end_io calls
572 */
573int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
574 int mirror_num, unsigned long bio_flags)
575{
576 struct extent_io_tree *tree;
577 struct extent_map_tree *em_tree;
578 struct compressed_bio *cb;
579 struct btrfs_root *root = BTRFS_I(inode)->root;
580 unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
581 unsigned long compressed_len;
582 unsigned long nr_pages;
583 unsigned long page_index;
584 struct page *page;
585 struct block_device *bdev;
586 struct bio *comp_bio;
587 u64 cur_disk_byte = (u64)bio->bi_sector << 9;
588 u64 em_len;
589 u64 em_start;
590 struct extent_map *em;
591 int ret;
592 u32 *sums;
593
594 tree = &BTRFS_I(inode)->io_tree;
595 em_tree = &BTRFS_I(inode)->extent_tree;
596
597 /* we need the actual starting offset of this extent in the file */
598 spin_lock(&em_tree->lock);
599 em = lookup_extent_mapping(em_tree,
600 page_offset(bio->bi_io_vec->bv_page),
601 PAGE_CACHE_SIZE);
602 spin_unlock(&em_tree->lock);
603
604 compressed_len = em->block_len;
605 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
606 atomic_set(&cb->pending_bios, 0);
607 cb->errors = 0;
608 cb->inode = inode;
609 cb->mirror_num = mirror_num;
610 sums = &cb->sums;
611
612 cb->start = em->orig_start;
613 em_len = em->len;
614 em_start = em->start;
615
616 free_extent_map(em);
617 em = NULL;
618
619 cb->len = uncompressed_len;
620 cb->compressed_len = compressed_len;
621 cb->orig_bio = bio;
622
623 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
624 PAGE_CACHE_SIZE;
625 cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages,
626 GFP_NOFS);
627 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
628
629 for (page_index = 0; page_index < nr_pages; page_index++) {
630 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
631 __GFP_HIGHMEM);
632 }
633 cb->nr_pages = nr_pages;
634
635 add_ra_bio_pages(inode, em_start + em_len, cb);
636
637 /* include any pages we added in add_ra-bio_pages */
638 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
639 cb->len = uncompressed_len;
640
641 comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
642 comp_bio->bi_private = cb;
643 comp_bio->bi_end_io = end_compressed_bio_read;
644 atomic_inc(&cb->pending_bios);
645
646 for (page_index = 0; page_index < nr_pages; page_index++) {
647 page = cb->compressed_pages[page_index];
648 page->mapping = inode->i_mapping;
649 page->index = em_start >> PAGE_CACHE_SHIFT;
650
651 if (comp_bio->bi_size)
652 ret = tree->ops->merge_bio_hook(page, 0,
653 PAGE_CACHE_SIZE,
654 comp_bio, 0);
655 else
656 ret = 0;
657
658 page->mapping = NULL;
659 if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
660 PAGE_CACHE_SIZE) {
661 bio_get(comp_bio);
662
663 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
664 BUG_ON(ret);
665
666 /*
667 * inc the count before we submit the bio so
668 * we know the end IO handler won't happen before
669 * we inc the count. Otherwise, the cb might get
670 * freed before we're done setting it up
671 */
672 atomic_inc(&cb->pending_bios);
673
674 if (!btrfs_test_flag(inode, NODATASUM)) {
675 btrfs_lookup_bio_sums(root, inode, comp_bio,
676 sums);
677 }
678 sums += (comp_bio->bi_size + root->sectorsize - 1) /
679 root->sectorsize;
680
681 ret = btrfs_map_bio(root, READ, comp_bio,
682 mirror_num, 0);
683 BUG_ON(ret);
684
685 bio_put(comp_bio);
686
687 comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
688 GFP_NOFS);
689 comp_bio->bi_private = cb;
690 comp_bio->bi_end_io = end_compressed_bio_read;
691
692 bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
693 }
694 cur_disk_byte += PAGE_CACHE_SIZE;
695 }
696 bio_get(comp_bio);
697
698 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
699 BUG_ON(ret);
700
701 if (!btrfs_test_flag(inode, NODATASUM))
702 btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
703
704 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
705 BUG_ON(ret);
706
707 bio_put(comp_bio);
708 return 0;
709}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
new file mode 100644
index 000000000000..421f5b4aa715
--- /dev/null
+++ b/fs/btrfs/compression.h
@@ -0,0 +1,47 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_COMPRESSION_
20#define __BTRFS_COMPRESSION_
21
22int btrfs_zlib_decompress(unsigned char *data_in,
23 struct page *dest_page,
24 unsigned long start_byte,
25 size_t srclen, size_t destlen);
26int btrfs_zlib_compress_pages(struct address_space *mapping,
27 u64 start, unsigned long len,
28 struct page **pages,
29 unsigned long nr_dest_pages,
30 unsigned long *out_pages,
31 unsigned long *total_in,
32 unsigned long *total_out,
33 unsigned long max_out);
34int btrfs_zlib_decompress_biovec(struct page **pages_in,
35 u64 disk_start,
36 struct bio_vec *bvec,
37 int vcnt,
38 size_t srclen);
39void btrfs_zlib_exit(void);
40int btrfs_submit_compressed_write(struct inode *inode, u64 start,
41 unsigned long len, u64 disk_start,
42 unsigned long compressed_len,
43 struct page **compressed_pages,
44 unsigned long nr_pages);
45int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
46 int mirror_num, unsigned long bio_flags);
47#endif
diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h
new file mode 100644
index 000000000000..6e1b3de36700
--- /dev/null
+++ b/fs/btrfs/crc32c.h
@@ -0,0 +1,29 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_CRC32C__
20#define __BTRFS_CRC32C__
21#include <linux/crc32c.h>
22
23/*
24 * this file used to do more for selecting the HW version of crc32c,
25 * perhaps it will one day again soon.
26 */
27#define btrfs_crc32c(seed, data, length) crc32c(seed, data, length)
28#endif
29
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
new file mode 100644
index 000000000000..9e46c0776816
--- /dev/null
+++ b/fs/btrfs/ctree.c
@@ -0,0 +1,3953 @@
1/*
2 * Copyright (C) 2007,2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21#include "disk-io.h"
22#include "transaction.h"
23#include "print-tree.h"
24#include "locking.h"
25
26static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
27 *root, struct btrfs_path *path, int level);
28static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
29 *root, struct btrfs_key *ins_key,
30 struct btrfs_path *path, int data_size, int extend);
31static int push_node_left(struct btrfs_trans_handle *trans,
32 struct btrfs_root *root, struct extent_buffer *dst,
33 struct extent_buffer *src, int empty);
34static int balance_node_right(struct btrfs_trans_handle *trans,
35 struct btrfs_root *root,
36 struct extent_buffer *dst_buf,
37 struct extent_buffer *src_buf);
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot);
40
41inline void btrfs_init_path(struct btrfs_path *p)
42{
43 memset(p, 0, sizeof(*p));
44}
45
46struct btrfs_path *btrfs_alloc_path(void)
47{
48 struct btrfs_path *path;
49 path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
50 if (path) {
51 btrfs_init_path(path);
52 path->reada = 1;
53 }
54 return path;
55}
56
57/* this also releases the path */
58void btrfs_free_path(struct btrfs_path *p)
59{
60 btrfs_release_path(NULL, p);
61 kmem_cache_free(btrfs_path_cachep, p);
62}
63
64/*
65 * path release drops references on the extent buffers in the path
66 * and it drops any locks held by this path
67 *
68 * It is safe to call this on paths that no locks or extent buffers held.
69 */
70noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
71{
72 int i;
73
74 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
75 p->slots[i] = 0;
76 if (!p->nodes[i])
77 continue;
78 if (p->locks[i]) {
79 btrfs_tree_unlock(p->nodes[i]);
80 p->locks[i] = 0;
81 }
82 free_extent_buffer(p->nodes[i]);
83 p->nodes[i] = NULL;
84 }
85}
86
87/*
88 * safely gets a reference on the root node of a tree. A lock
89 * is not taken, so a concurrent writer may put a different node
90 * at the root of the tree. See btrfs_lock_root_node for the
91 * looping required.
92 *
93 * The extent buffer returned by this has a reference taken, so
94 * it won't disappear. It may stop being the root of the tree
95 * at any time because there are no locks held.
96 */
97struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
98{
99 struct extent_buffer *eb;
100 spin_lock(&root->node_lock);
101 eb = root->node;
102 extent_buffer_get(eb);
103 spin_unlock(&root->node_lock);
104 return eb;
105}
106
107/* loop around taking references on and locking the root node of the
108 * tree until you end up with a lock on the root. A locked buffer
109 * is returned, with a reference held.
110 */
111struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
112{
113 struct extent_buffer *eb;
114
115 while (1) {
116 eb = btrfs_root_node(root);
117 btrfs_tree_lock(eb);
118
119 spin_lock(&root->node_lock);
120 if (eb == root->node) {
121 spin_unlock(&root->node_lock);
122 break;
123 }
124 spin_unlock(&root->node_lock);
125
126 btrfs_tree_unlock(eb);
127 free_extent_buffer(eb);
128 }
129 return eb;
130}
131
132/* cowonly root (everything not a reference counted cow subvolume), just get
133 * put onto a simple dirty list. transaction.c walks this to make sure they
134 * get properly updated on disk.
135 */
136static void add_root_to_dirty_list(struct btrfs_root *root)
137{
138 if (root->track_dirty && list_empty(&root->dirty_list)) {
139 list_add(&root->dirty_list,
140 &root->fs_info->dirty_cowonly_roots);
141 }
142}
143
144/*
145 * used by snapshot creation to make a copy of a root for a tree with
146 * a given objectid. The buffer with the new root node is returned in
147 * cow_ret, and this func returns zero on success or a negative error code.
148 */
149int btrfs_copy_root(struct btrfs_trans_handle *trans,
150 struct btrfs_root *root,
151 struct extent_buffer *buf,
152 struct extent_buffer **cow_ret, u64 new_root_objectid)
153{
154 struct extent_buffer *cow;
155 u32 nritems;
156 int ret = 0;
157 int level;
158 struct btrfs_root *new_root;
159
160 new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
161 if (!new_root)
162 return -ENOMEM;
163
164 memcpy(new_root, root, sizeof(*new_root));
165 new_root->root_key.objectid = new_root_objectid;
166
167 WARN_ON(root->ref_cows && trans->transid !=
168 root->fs_info->running_transaction->transid);
169 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
170
171 level = btrfs_header_level(buf);
172 nritems = btrfs_header_nritems(buf);
173
174 cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
175 new_root_objectid, trans->transid,
176 level, buf->start, 0);
177 if (IS_ERR(cow)) {
178 kfree(new_root);
179 return PTR_ERR(cow);
180 }
181
182 copy_extent_buffer(cow, buf, 0, 0, cow->len);
183 btrfs_set_header_bytenr(cow, cow->start);
184 btrfs_set_header_generation(cow, trans->transid);
185 btrfs_set_header_owner(cow, new_root_objectid);
186 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
187
188 write_extent_buffer(cow, root->fs_info->fsid,
189 (unsigned long)btrfs_header_fsid(cow),
190 BTRFS_FSID_SIZE);
191
192 WARN_ON(btrfs_header_generation(buf) > trans->transid);
193 ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
194 kfree(new_root);
195
196 if (ret)
197 return ret;
198
199 btrfs_mark_buffer_dirty(cow);
200 *cow_ret = cow;
201 return 0;
202}
203
204/*
205 * does the dirty work in cow of a single block. The parent block (if
206 * supplied) is updated to point to the new cow copy. The new buffer is marked
207 * dirty and returned locked. If you modify the block it needs to be marked
208 * dirty again.
209 *
210 * search_start -- an allocation hint for the new block
211 *
212 * empty_size -- a hint that you plan on doing more cow. This is the size in
213 * bytes the allocator should try to find free next to the block it returns.
214 * This is just a hint and may be ignored by the allocator.
215 *
216 * prealloc_dest -- if you have already reserved a destination for the cow,
217 * this uses that block instead of allocating a new one.
218 * btrfs_alloc_reserved_extent is used to finish the allocation.
219 */
220static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
221 struct btrfs_root *root,
222 struct extent_buffer *buf,
223 struct extent_buffer *parent, int parent_slot,
224 struct extent_buffer **cow_ret,
225 u64 search_start, u64 empty_size,
226 u64 prealloc_dest)
227{
228 u64 parent_start;
229 struct extent_buffer *cow;
230 u32 nritems;
231 int ret = 0;
232 int level;
233 int unlock_orig = 0;
234
235 if (*cow_ret == buf)
236 unlock_orig = 1;
237
238 WARN_ON(!btrfs_tree_locked(buf));
239
240 if (parent)
241 parent_start = parent->start;
242 else
243 parent_start = 0;
244
245 WARN_ON(root->ref_cows && trans->transid !=
246 root->fs_info->running_transaction->transid);
247 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
248
249 level = btrfs_header_level(buf);
250 nritems = btrfs_header_nritems(buf);
251
252 if (prealloc_dest) {
253 struct btrfs_key ins;
254
255 ins.objectid = prealloc_dest;
256 ins.offset = buf->len;
257 ins.type = BTRFS_EXTENT_ITEM_KEY;
258
259 ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
260 root->root_key.objectid,
261 trans->transid, level, &ins);
262 BUG_ON(ret);
263 cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
264 buf->len);
265 } else {
266 cow = btrfs_alloc_free_block(trans, root, buf->len,
267 parent_start,
268 root->root_key.objectid,
269 trans->transid, level,
270 search_start, empty_size);
271 }
272 if (IS_ERR(cow))
273 return PTR_ERR(cow);
274
275 copy_extent_buffer(cow, buf, 0, 0, cow->len);
276 btrfs_set_header_bytenr(cow, cow->start);
277 btrfs_set_header_generation(cow, trans->transid);
278 btrfs_set_header_owner(cow, root->root_key.objectid);
279 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
280
281 write_extent_buffer(cow, root->fs_info->fsid,
282 (unsigned long)btrfs_header_fsid(cow),
283 BTRFS_FSID_SIZE);
284
285 WARN_ON(btrfs_header_generation(buf) > trans->transid);
286 if (btrfs_header_generation(buf) != trans->transid) {
287 u32 nr_extents;
288 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
289 if (ret)
290 return ret;
291
292 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
293 WARN_ON(ret);
294 } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
295 /*
296 * There are only two places that can drop reference to
297 * tree blocks owned by living reloc trees, one is here,
298 * the other place is btrfs_drop_subtree. In both places,
299 * we check reference count while tree block is locked.
300 * Furthermore, if reference count is one, it won't get
301 * increased by someone else.
302 */
303 u32 refs;
304 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
305 buf->len, &refs);
306 BUG_ON(ret);
307 if (refs == 1) {
308 ret = btrfs_update_ref(trans, root, buf, cow,
309 0, nritems);
310 clean_tree_block(trans, root, buf);
311 } else {
312 ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
313 }
314 BUG_ON(ret);
315 } else {
316 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
317 if (ret)
318 return ret;
319 clean_tree_block(trans, root, buf);
320 }
321
322 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
323 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
324 WARN_ON(ret);
325 }
326
327 if (buf == root->node) {
328 WARN_ON(parent && parent != buf);
329
330 spin_lock(&root->node_lock);
331 root->node = cow;
332 extent_buffer_get(cow);
333 spin_unlock(&root->node_lock);
334
335 if (buf != root->commit_root) {
336 btrfs_free_extent(trans, root, buf->start,
337 buf->len, buf->start,
338 root->root_key.objectid,
339 btrfs_header_generation(buf),
340 level, 1);
341 }
342 free_extent_buffer(buf);
343 add_root_to_dirty_list(root);
344 } else {
345 btrfs_set_node_blockptr(parent, parent_slot,
346 cow->start);
347 WARN_ON(trans->transid == 0);
348 btrfs_set_node_ptr_generation(parent, parent_slot,
349 trans->transid);
350 btrfs_mark_buffer_dirty(parent);
351 WARN_ON(btrfs_header_generation(parent) != trans->transid);
352 btrfs_free_extent(trans, root, buf->start, buf->len,
353 parent_start, btrfs_header_owner(parent),
354 btrfs_header_generation(parent), level, 1);
355 }
356 if (unlock_orig)
357 btrfs_tree_unlock(buf);
358 free_extent_buffer(buf);
359 btrfs_mark_buffer_dirty(cow);
360 *cow_ret = cow;
361 return 0;
362}
363
364/*
365 * cows a single block, see __btrfs_cow_block for the real work.
366 * This version of it has extra checks so that a block isn't cow'd more than
367 * once per transaction, as long as it hasn't been written yet
368 */
369noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
370 struct btrfs_root *root, struct extent_buffer *buf,
371 struct extent_buffer *parent, int parent_slot,
372 struct extent_buffer **cow_ret, u64 prealloc_dest)
373{
374 u64 search_start;
375 int ret;
376
377 if (trans->transaction != root->fs_info->running_transaction) {
378 printk(KERN_CRIT "trans %llu running %llu\n",
379 (unsigned long long)trans->transid,
380 (unsigned long long)
381 root->fs_info->running_transaction->transid);
382 WARN_ON(1);
383 }
384 if (trans->transid != root->fs_info->generation) {
385 printk(KERN_CRIT "trans %llu running %llu\n",
386 (unsigned long long)trans->transid,
387 (unsigned long long)root->fs_info->generation);
388 WARN_ON(1);
389 }
390
391 spin_lock(&root->fs_info->hash_lock);
392 if (btrfs_header_generation(buf) == trans->transid &&
393 btrfs_header_owner(buf) == root->root_key.objectid &&
394 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
395 *cow_ret = buf;
396 spin_unlock(&root->fs_info->hash_lock);
397 WARN_ON(prealloc_dest);
398 return 0;
399 }
400 spin_unlock(&root->fs_info->hash_lock);
401 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
402 ret = __btrfs_cow_block(trans, root, buf, parent,
403 parent_slot, cow_ret, search_start, 0,
404 prealloc_dest);
405 return ret;
406}
407
408/*
409 * helper function for defrag to decide if two blocks pointed to by a
410 * node are actually close by
411 */
412static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
413{
414 if (blocknr < other && other - (blocknr + blocksize) < 32768)
415 return 1;
416 if (blocknr > other && blocknr - (other + blocksize) < 32768)
417 return 1;
418 return 0;
419}
420
421/*
422 * compare two keys in a memcmp fashion
423 */
424static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
425{
426 struct btrfs_key k1;
427
428 btrfs_disk_key_to_cpu(&k1, disk);
429
430 if (k1.objectid > k2->objectid)
431 return 1;
432 if (k1.objectid < k2->objectid)
433 return -1;
434 if (k1.type > k2->type)
435 return 1;
436 if (k1.type < k2->type)
437 return -1;
438 if (k1.offset > k2->offset)
439 return 1;
440 if (k1.offset < k2->offset)
441 return -1;
442 return 0;
443}
444
445/*
446 * same as comp_keys only with two btrfs_key's
447 */
448static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
449{
450 if (k1->objectid > k2->objectid)
451 return 1;
452 if (k1->objectid < k2->objectid)
453 return -1;
454 if (k1->type > k2->type)
455 return 1;
456 if (k1->type < k2->type)
457 return -1;
458 if (k1->offset > k2->offset)
459 return 1;
460 if (k1->offset < k2->offset)
461 return -1;
462 return 0;
463}
464
465/*
466 * this is used by the defrag code to go through all the
467 * leaves pointed to by a node and reallocate them so that
468 * disk order is close to key order
469 */
470int btrfs_realloc_node(struct btrfs_trans_handle *trans,
471 struct btrfs_root *root, struct extent_buffer *parent,
472 int start_slot, int cache_only, u64 *last_ret,
473 struct btrfs_key *progress)
474{
475 struct extent_buffer *cur;
476 u64 blocknr;
477 u64 gen;
478 u64 search_start = *last_ret;
479 u64 last_block = 0;
480 u64 other;
481 u32 parent_nritems;
482 int end_slot;
483 int i;
484 int err = 0;
485 int parent_level;
486 int uptodate;
487 u32 blocksize;
488 int progress_passed = 0;
489 struct btrfs_disk_key disk_key;
490
491 parent_level = btrfs_header_level(parent);
492 if (cache_only && parent_level != 1)
493 return 0;
494
495 if (trans->transaction != root->fs_info->running_transaction)
496 WARN_ON(1);
497 if (trans->transid != root->fs_info->generation)
498 WARN_ON(1);
499
500 parent_nritems = btrfs_header_nritems(parent);
501 blocksize = btrfs_level_size(root, parent_level - 1);
502 end_slot = parent_nritems;
503
504 if (parent_nritems == 1)
505 return 0;
506
507 for (i = start_slot; i < end_slot; i++) {
508 int close = 1;
509
510 if (!parent->map_token) {
511 map_extent_buffer(parent,
512 btrfs_node_key_ptr_offset(i),
513 sizeof(struct btrfs_key_ptr),
514 &parent->map_token, &parent->kaddr,
515 &parent->map_start, &parent->map_len,
516 KM_USER1);
517 }
518 btrfs_node_key(parent, &disk_key, i);
519 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
520 continue;
521
522 progress_passed = 1;
523 blocknr = btrfs_node_blockptr(parent, i);
524 gen = btrfs_node_ptr_generation(parent, i);
525 if (last_block == 0)
526 last_block = blocknr;
527
528 if (i > 0) {
529 other = btrfs_node_blockptr(parent, i - 1);
530 close = close_blocks(blocknr, other, blocksize);
531 }
532 if (!close && i < end_slot - 2) {
533 other = btrfs_node_blockptr(parent, i + 1);
534 close = close_blocks(blocknr, other, blocksize);
535 }
536 if (close) {
537 last_block = blocknr;
538 continue;
539 }
540 if (parent->map_token) {
541 unmap_extent_buffer(parent, parent->map_token,
542 KM_USER1);
543 parent->map_token = NULL;
544 }
545
546 cur = btrfs_find_tree_block(root, blocknr, blocksize);
547 if (cur)
548 uptodate = btrfs_buffer_uptodate(cur, gen);
549 else
550 uptodate = 0;
551 if (!cur || !uptodate) {
552 if (cache_only) {
553 free_extent_buffer(cur);
554 continue;
555 }
556 if (!cur) {
557 cur = read_tree_block(root, blocknr,
558 blocksize, gen);
559 } else if (!uptodate) {
560 btrfs_read_buffer(cur, gen);
561 }
562 }
563 if (search_start == 0)
564 search_start = last_block;
565
566 btrfs_tree_lock(cur);
567 err = __btrfs_cow_block(trans, root, cur, parent, i,
568 &cur, search_start,
569 min(16 * blocksize,
570 (end_slot - i) * blocksize), 0);
571 if (err) {
572 btrfs_tree_unlock(cur);
573 free_extent_buffer(cur);
574 break;
575 }
576 search_start = cur->start;
577 last_block = cur->start;
578 *last_ret = search_start;
579 btrfs_tree_unlock(cur);
580 free_extent_buffer(cur);
581 }
582 if (parent->map_token) {
583 unmap_extent_buffer(parent, parent->map_token,
584 KM_USER1);
585 parent->map_token = NULL;
586 }
587 return err;
588}
589
590/*
591 * The leaf data grows from end-to-front in the node.
592 * this returns the address of the start of the last item,
593 * which is the stop of the leaf data stack
594 */
595static inline unsigned int leaf_data_end(struct btrfs_root *root,
596 struct extent_buffer *leaf)
597{
598 u32 nr = btrfs_header_nritems(leaf);
599 if (nr == 0)
600 return BTRFS_LEAF_DATA_SIZE(root);
601 return btrfs_item_offset_nr(leaf, nr - 1);
602}
603
604/*
605 * extra debugging checks to make sure all the items in a key are
606 * well formed and in the proper order
607 */
608static int check_node(struct btrfs_root *root, struct btrfs_path *path,
609 int level)
610{
611 struct extent_buffer *parent = NULL;
612 struct extent_buffer *node = path->nodes[level];
613 struct btrfs_disk_key parent_key;
614 struct btrfs_disk_key node_key;
615 int parent_slot;
616 int slot;
617 struct btrfs_key cpukey;
618 u32 nritems = btrfs_header_nritems(node);
619
620 if (path->nodes[level + 1])
621 parent = path->nodes[level + 1];
622
623 slot = path->slots[level];
624 BUG_ON(nritems == 0);
625 if (parent) {
626 parent_slot = path->slots[level + 1];
627 btrfs_node_key(parent, &parent_key, parent_slot);
628 btrfs_node_key(node, &node_key, 0);
629 BUG_ON(memcmp(&parent_key, &node_key,
630 sizeof(struct btrfs_disk_key)));
631 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
632 btrfs_header_bytenr(node));
633 }
634 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
635 if (slot != 0) {
636 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
637 btrfs_node_key(node, &node_key, slot);
638 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
639 }
640 if (slot < nritems - 1) {
641 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
642 btrfs_node_key(node, &node_key, slot);
643 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
644 }
645 return 0;
646}
647
648/*
649 * extra checking to make sure all the items in a leaf are
650 * well formed and in the proper order
651 */
652static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
653 int level)
654{
655 struct extent_buffer *leaf = path->nodes[level];
656 struct extent_buffer *parent = NULL;
657 int parent_slot;
658 struct btrfs_key cpukey;
659 struct btrfs_disk_key parent_key;
660 struct btrfs_disk_key leaf_key;
661 int slot = path->slots[0];
662
663 u32 nritems = btrfs_header_nritems(leaf);
664
665 if (path->nodes[level + 1])
666 parent = path->nodes[level + 1];
667
668 if (nritems == 0)
669 return 0;
670
671 if (parent) {
672 parent_slot = path->slots[level + 1];
673 btrfs_node_key(parent, &parent_key, parent_slot);
674 btrfs_item_key(leaf, &leaf_key, 0);
675
676 BUG_ON(memcmp(&parent_key, &leaf_key,
677 sizeof(struct btrfs_disk_key)));
678 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
679 btrfs_header_bytenr(leaf));
680 }
681 if (slot != 0 && slot < nritems - 1) {
682 btrfs_item_key(leaf, &leaf_key, slot);
683 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
684 if (comp_keys(&leaf_key, &cpukey) <= 0) {
685 btrfs_print_leaf(root, leaf);
686 printk(KERN_CRIT "slot %d offset bad key\n", slot);
687 BUG_ON(1);
688 }
689 if (btrfs_item_offset_nr(leaf, slot - 1) !=
690 btrfs_item_end_nr(leaf, slot)) {
691 btrfs_print_leaf(root, leaf);
692 printk(KERN_CRIT "slot %d offset bad\n", slot);
693 BUG_ON(1);
694 }
695 }
696 if (slot < nritems - 1) {
697 btrfs_item_key(leaf, &leaf_key, slot);
698 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
699 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
700 if (btrfs_item_offset_nr(leaf, slot) !=
701 btrfs_item_end_nr(leaf, slot + 1)) {
702 btrfs_print_leaf(root, leaf);
703 printk(KERN_CRIT "slot %d offset bad\n", slot);
704 BUG_ON(1);
705 }
706 }
707 BUG_ON(btrfs_item_offset_nr(leaf, 0) +
708 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
709 return 0;
710}
711
712static noinline int check_block(struct btrfs_root *root,
713 struct btrfs_path *path, int level)
714{
715 return 0;
716 if (level == 0)
717 return check_leaf(root, path, level);
718 return check_node(root, path, level);
719}
720
721/*
722 * search for key in the extent_buffer. The items start at offset p,
723 * and they are item_size apart. There are 'max' items in p.
724 *
725 * the slot in the array is returned via slot, and it points to
726 * the place where you would insert key if it is not found in
727 * the array.
728 *
729 * slot may point to max if the key is bigger than all of the keys
730 */
731static noinline int generic_bin_search(struct extent_buffer *eb,
732 unsigned long p,
733 int item_size, struct btrfs_key *key,
734 int max, int *slot)
735{
736 int low = 0;
737 int high = max;
738 int mid;
739 int ret;
740 struct btrfs_disk_key *tmp = NULL;
741 struct btrfs_disk_key unaligned;
742 unsigned long offset;
743 char *map_token = NULL;
744 char *kaddr = NULL;
745 unsigned long map_start = 0;
746 unsigned long map_len = 0;
747 int err;
748
749 while (low < high) {
750 mid = (low + high) / 2;
751 offset = p + mid * item_size;
752
753 if (!map_token || offset < map_start ||
754 (offset + sizeof(struct btrfs_disk_key)) >
755 map_start + map_len) {
756 if (map_token) {
757 unmap_extent_buffer(eb, map_token, KM_USER0);
758 map_token = NULL;
759 }
760
761 err = map_private_extent_buffer(eb, offset,
762 sizeof(struct btrfs_disk_key),
763 &map_token, &kaddr,
764 &map_start, &map_len, KM_USER0);
765
766 if (!err) {
767 tmp = (struct btrfs_disk_key *)(kaddr + offset -
768 map_start);
769 } else {
770 read_extent_buffer(eb, &unaligned,
771 offset, sizeof(unaligned));
772 tmp = &unaligned;
773 }
774
775 } else {
776 tmp = (struct btrfs_disk_key *)(kaddr + offset -
777 map_start);
778 }
779 ret = comp_keys(tmp, key);
780
781 if (ret < 0)
782 low = mid + 1;
783 else if (ret > 0)
784 high = mid;
785 else {
786 *slot = mid;
787 if (map_token)
788 unmap_extent_buffer(eb, map_token, KM_USER0);
789 return 0;
790 }
791 }
792 *slot = low;
793 if (map_token)
794 unmap_extent_buffer(eb, map_token, KM_USER0);
795 return 1;
796}
797
798/*
799 * simple bin_search frontend that does the right thing for
800 * leaves vs nodes
801 */
802static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
803 int level, int *slot)
804{
805 if (level == 0) {
806 return generic_bin_search(eb,
807 offsetof(struct btrfs_leaf, items),
808 sizeof(struct btrfs_item),
809 key, btrfs_header_nritems(eb),
810 slot);
811 } else {
812 return generic_bin_search(eb,
813 offsetof(struct btrfs_node, ptrs),
814 sizeof(struct btrfs_key_ptr),
815 key, btrfs_header_nritems(eb),
816 slot);
817 }
818 return -1;
819}
820
821/* given a node and slot number, this reads the blocks it points to. The
822 * extent buffer is returned with a reference taken (but unlocked).
823 * NULL is returned on error.
824 */
825static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
826 struct extent_buffer *parent, int slot)
827{
828 int level = btrfs_header_level(parent);
829 if (slot < 0)
830 return NULL;
831 if (slot >= btrfs_header_nritems(parent))
832 return NULL;
833
834 BUG_ON(level == 0);
835
836 return read_tree_block(root, btrfs_node_blockptr(parent, slot),
837 btrfs_level_size(root, level - 1),
838 btrfs_node_ptr_generation(parent, slot));
839}
840
841/*
842 * node level balancing, used to make sure nodes are in proper order for
843 * item deletion. We balance from the top down, so we have to make sure
844 * that a deletion won't leave an node completely empty later on.
845 */
846static noinline int balance_level(struct btrfs_trans_handle *trans,
847 struct btrfs_root *root,
848 struct btrfs_path *path, int level)
849{
850 struct extent_buffer *right = NULL;
851 struct extent_buffer *mid;
852 struct extent_buffer *left = NULL;
853 struct extent_buffer *parent = NULL;
854 int ret = 0;
855 int wret;
856 int pslot;
857 int orig_slot = path->slots[level];
858 int err_on_enospc = 0;
859 u64 orig_ptr;
860
861 if (level == 0)
862 return 0;
863
864 mid = path->nodes[level];
865 WARN_ON(!path->locks[level]);
866 WARN_ON(btrfs_header_generation(mid) != trans->transid);
867
868 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
869
870 if (level < BTRFS_MAX_LEVEL - 1)
871 parent = path->nodes[level + 1];
872 pslot = path->slots[level + 1];
873
874 /*
875 * deal with the case where there is only one pointer in the root
876 * by promoting the node below to a root
877 */
878 if (!parent) {
879 struct extent_buffer *child;
880
881 if (btrfs_header_nritems(mid) != 1)
882 return 0;
883
884 /* promote the child to a root */
885 child = read_node_slot(root, mid, 0);
886 btrfs_tree_lock(child);
887 BUG_ON(!child);
888 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
889 BUG_ON(ret);
890
891 spin_lock(&root->node_lock);
892 root->node = child;
893 spin_unlock(&root->node_lock);
894
895 ret = btrfs_update_extent_ref(trans, root, child->start,
896 mid->start, child->start,
897 root->root_key.objectid,
898 trans->transid, level - 1);
899 BUG_ON(ret);
900
901 add_root_to_dirty_list(root);
902 btrfs_tree_unlock(child);
903 path->locks[level] = 0;
904 path->nodes[level] = NULL;
905 clean_tree_block(trans, root, mid);
906 btrfs_tree_unlock(mid);
907 /* once for the path */
908 free_extent_buffer(mid);
909 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
910 mid->start, root->root_key.objectid,
911 btrfs_header_generation(mid),
912 level, 1);
913 /* once for the root ptr */
914 free_extent_buffer(mid);
915 return ret;
916 }
917 if (btrfs_header_nritems(mid) >
918 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
919 return 0;
920
921 if (btrfs_header_nritems(mid) < 2)
922 err_on_enospc = 1;
923
924 left = read_node_slot(root, parent, pslot - 1);
925 if (left) {
926 btrfs_tree_lock(left);
927 wret = btrfs_cow_block(trans, root, left,
928 parent, pslot - 1, &left, 0);
929 if (wret) {
930 ret = wret;
931 goto enospc;
932 }
933 }
934 right = read_node_slot(root, parent, pslot + 1);
935 if (right) {
936 btrfs_tree_lock(right);
937 wret = btrfs_cow_block(trans, root, right,
938 parent, pslot + 1, &right, 0);
939 if (wret) {
940 ret = wret;
941 goto enospc;
942 }
943 }
944
945 /* first, try to make some room in the middle buffer */
946 if (left) {
947 orig_slot += btrfs_header_nritems(left);
948 wret = push_node_left(trans, root, left, mid, 1);
949 if (wret < 0)
950 ret = wret;
951 if (btrfs_header_nritems(mid) < 2)
952 err_on_enospc = 1;
953 }
954
955 /*
956 * then try to empty the right most buffer into the middle
957 */
958 if (right) {
959 wret = push_node_left(trans, root, mid, right, 1);
960 if (wret < 0 && wret != -ENOSPC)
961 ret = wret;
962 if (btrfs_header_nritems(right) == 0) {
963 u64 bytenr = right->start;
964 u64 generation = btrfs_header_generation(parent);
965 u32 blocksize = right->len;
966
967 clean_tree_block(trans, root, right);
968 btrfs_tree_unlock(right);
969 free_extent_buffer(right);
970 right = NULL;
971 wret = del_ptr(trans, root, path, level + 1, pslot +
972 1);
973 if (wret)
974 ret = wret;
975 wret = btrfs_free_extent(trans, root, bytenr,
976 blocksize, parent->start,
977 btrfs_header_owner(parent),
978 generation, level, 1);
979 if (wret)
980 ret = wret;
981 } else {
982 struct btrfs_disk_key right_key;
983 btrfs_node_key(right, &right_key, 0);
984 btrfs_set_node_key(parent, &right_key, pslot + 1);
985 btrfs_mark_buffer_dirty(parent);
986 }
987 }
988 if (btrfs_header_nritems(mid) == 1) {
989 /*
990 * we're not allowed to leave a node with one item in the
991 * tree during a delete. A deletion from lower in the tree
992 * could try to delete the only pointer in this node.
993 * So, pull some keys from the left.
994 * There has to be a left pointer at this point because
995 * otherwise we would have pulled some pointers from the
996 * right
997 */
998 BUG_ON(!left);
999 wret = balance_node_right(trans, root, mid, left);
1000 if (wret < 0) {
1001 ret = wret;
1002 goto enospc;
1003 }
1004 if (wret == 1) {
1005 wret = push_node_left(trans, root, left, mid, 1);
1006 if (wret < 0)
1007 ret = wret;
1008 }
1009 BUG_ON(wret == 1);
1010 }
1011 if (btrfs_header_nritems(mid) == 0) {
1012 /* we've managed to empty the middle node, drop it */
1013 u64 root_gen = btrfs_header_generation(parent);
1014 u64 bytenr = mid->start;
1015 u32 blocksize = mid->len;
1016
1017 clean_tree_block(trans, root, mid);
1018 btrfs_tree_unlock(mid);
1019 free_extent_buffer(mid);
1020 mid = NULL;
1021 wret = del_ptr(trans, root, path, level + 1, pslot);
1022 if (wret)
1023 ret = wret;
1024 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1025 parent->start,
1026 btrfs_header_owner(parent),
1027 root_gen, level, 1);
1028 if (wret)
1029 ret = wret;
1030 } else {
1031 /* update the parent key to reflect our changes */
1032 struct btrfs_disk_key mid_key;
1033 btrfs_node_key(mid, &mid_key, 0);
1034 btrfs_set_node_key(parent, &mid_key, pslot);
1035 btrfs_mark_buffer_dirty(parent);
1036 }
1037
1038 /* update the path */
1039 if (left) {
1040 if (btrfs_header_nritems(left) > orig_slot) {
1041 extent_buffer_get(left);
1042 /* left was locked after cow */
1043 path->nodes[level] = left;
1044 path->slots[level + 1] -= 1;
1045 path->slots[level] = orig_slot;
1046 if (mid) {
1047 btrfs_tree_unlock(mid);
1048 free_extent_buffer(mid);
1049 }
1050 } else {
1051 orig_slot -= btrfs_header_nritems(left);
1052 path->slots[level] = orig_slot;
1053 }
1054 }
1055 /* double check we haven't messed things up */
1056 check_block(root, path, level);
1057 if (orig_ptr !=
1058 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1059 BUG();
1060enospc:
1061 if (right) {
1062 btrfs_tree_unlock(right);
1063 free_extent_buffer(right);
1064 }
1065 if (left) {
1066 if (path->nodes[level] != left)
1067 btrfs_tree_unlock(left);
1068 free_extent_buffer(left);
1069 }
1070 return ret;
1071}
1072
1073/* Node balancing for insertion. Here we only split or push nodes around
1074 * when they are completely full. This is also done top down, so we
1075 * have to be pessimistic.
1076 */
1077static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1078 struct btrfs_root *root,
1079 struct btrfs_path *path, int level)
1080{
1081 struct extent_buffer *right = NULL;
1082 struct extent_buffer *mid;
1083 struct extent_buffer *left = NULL;
1084 struct extent_buffer *parent = NULL;
1085 int ret = 0;
1086 int wret;
1087 int pslot;
1088 int orig_slot = path->slots[level];
1089 u64 orig_ptr;
1090
1091 if (level == 0)
1092 return 1;
1093
1094 mid = path->nodes[level];
1095 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1096 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1097
1098 if (level < BTRFS_MAX_LEVEL - 1)
1099 parent = path->nodes[level + 1];
1100 pslot = path->slots[level + 1];
1101
1102 if (!parent)
1103 return 1;
1104
1105 left = read_node_slot(root, parent, pslot - 1);
1106
1107 /* first, try to make some room in the middle buffer */
1108 if (left) {
1109 u32 left_nr;
1110
1111 btrfs_tree_lock(left);
1112 left_nr = btrfs_header_nritems(left);
1113 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1114 wret = 1;
1115 } else {
1116 ret = btrfs_cow_block(trans, root, left, parent,
1117 pslot - 1, &left, 0);
1118 if (ret)
1119 wret = 1;
1120 else {
1121 wret = push_node_left(trans, root,
1122 left, mid, 0);
1123 }
1124 }
1125 if (wret < 0)
1126 ret = wret;
1127 if (wret == 0) {
1128 struct btrfs_disk_key disk_key;
1129 orig_slot += left_nr;
1130 btrfs_node_key(mid, &disk_key, 0);
1131 btrfs_set_node_key(parent, &disk_key, pslot);
1132 btrfs_mark_buffer_dirty(parent);
1133 if (btrfs_header_nritems(left) > orig_slot) {
1134 path->nodes[level] = left;
1135 path->slots[level + 1] -= 1;
1136 path->slots[level] = orig_slot;
1137 btrfs_tree_unlock(mid);
1138 free_extent_buffer(mid);
1139 } else {
1140 orig_slot -=
1141 btrfs_header_nritems(left);
1142 path->slots[level] = orig_slot;
1143 btrfs_tree_unlock(left);
1144 free_extent_buffer(left);
1145 }
1146 return 0;
1147 }
1148 btrfs_tree_unlock(left);
1149 free_extent_buffer(left);
1150 }
1151 right = read_node_slot(root, parent, pslot + 1);
1152
1153 /*
1154 * then try to empty the right most buffer into the middle
1155 */
1156 if (right) {
1157 u32 right_nr;
1158 btrfs_tree_lock(right);
1159 right_nr = btrfs_header_nritems(right);
1160 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1161 wret = 1;
1162 } else {
1163 ret = btrfs_cow_block(trans, root, right,
1164 parent, pslot + 1,
1165 &right, 0);
1166 if (ret)
1167 wret = 1;
1168 else {
1169 wret = balance_node_right(trans, root,
1170 right, mid);
1171 }
1172 }
1173 if (wret < 0)
1174 ret = wret;
1175 if (wret == 0) {
1176 struct btrfs_disk_key disk_key;
1177
1178 btrfs_node_key(right, &disk_key, 0);
1179 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1180 btrfs_mark_buffer_dirty(parent);
1181
1182 if (btrfs_header_nritems(mid) <= orig_slot) {
1183 path->nodes[level] = right;
1184 path->slots[level + 1] += 1;
1185 path->slots[level] = orig_slot -
1186 btrfs_header_nritems(mid);
1187 btrfs_tree_unlock(mid);
1188 free_extent_buffer(mid);
1189 } else {
1190 btrfs_tree_unlock(right);
1191 free_extent_buffer(right);
1192 }
1193 return 0;
1194 }
1195 btrfs_tree_unlock(right);
1196 free_extent_buffer(right);
1197 }
1198 return 1;
1199}
1200
1201/*
1202 * readahead one full node of leaves, finding things that are close
1203 * to the block in 'slot', and triggering ra on them.
1204 */
1205static noinline void reada_for_search(struct btrfs_root *root,
1206 struct btrfs_path *path,
1207 int level, int slot, u64 objectid)
1208{
1209 struct extent_buffer *node;
1210 struct btrfs_disk_key disk_key;
1211 u32 nritems;
1212 u64 search;
1213 u64 lowest_read;
1214 u64 highest_read;
1215 u64 nread = 0;
1216 int direction = path->reada;
1217 struct extent_buffer *eb;
1218 u32 nr;
1219 u32 blocksize;
1220 u32 nscan = 0;
1221
1222 if (level != 1)
1223 return;
1224
1225 if (!path->nodes[level])
1226 return;
1227
1228 node = path->nodes[level];
1229
1230 search = btrfs_node_blockptr(node, slot);
1231 blocksize = btrfs_level_size(root, level - 1);
1232 eb = btrfs_find_tree_block(root, search, blocksize);
1233 if (eb) {
1234 free_extent_buffer(eb);
1235 return;
1236 }
1237
1238 highest_read = search;
1239 lowest_read = search;
1240
1241 nritems = btrfs_header_nritems(node);
1242 nr = slot;
1243 while (1) {
1244 if (direction < 0) {
1245 if (nr == 0)
1246 break;
1247 nr--;
1248 } else if (direction > 0) {
1249 nr++;
1250 if (nr >= nritems)
1251 break;
1252 }
1253 if (path->reada < 0 && objectid) {
1254 btrfs_node_key(node, &disk_key, nr);
1255 if (btrfs_disk_key_objectid(&disk_key) != objectid)
1256 break;
1257 }
1258 search = btrfs_node_blockptr(node, nr);
1259 if ((search >= lowest_read && search <= highest_read) ||
1260 (search < lowest_read && lowest_read - search <= 16384) ||
1261 (search > highest_read && search - highest_read <= 16384)) {
1262 readahead_tree_block(root, search, blocksize,
1263 btrfs_node_ptr_generation(node, nr));
1264 nread += blocksize;
1265 }
1266 nscan++;
1267 if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32))
1268 break;
1269
1270 if (nread > (256 * 1024) || nscan > 128)
1271 break;
1272
1273 if (search < lowest_read)
1274 lowest_read = search;
1275 if (search > highest_read)
1276 highest_read = search;
1277 }
1278}
1279
1280/*
1281 * when we walk down the tree, it is usually safe to unlock the higher layers
1282 * in the tree. The exceptions are when our path goes through slot 0, because
1283 * operations on the tree might require changing key pointers higher up in the
1284 * tree.
1285 *
1286 * callers might also have set path->keep_locks, which tells this code to keep
1287 * the lock if the path points to the last slot in the block. This is part of
1288 * walking through the tree, and selecting the next slot in the higher block.
1289 *
1290 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
1291 * if lowest_unlock is 1, level 0 won't be unlocked
1292 */
1293static noinline void unlock_up(struct btrfs_path *path, int level,
1294 int lowest_unlock)
1295{
1296 int i;
1297 int skip_level = level;
1298 int no_skips = 0;
1299 struct extent_buffer *t;
1300
1301 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1302 if (!path->nodes[i])
1303 break;
1304 if (!path->locks[i])
1305 break;
1306 if (!no_skips && path->slots[i] == 0) {
1307 skip_level = i + 1;
1308 continue;
1309 }
1310 if (!no_skips && path->keep_locks) {
1311 u32 nritems;
1312 t = path->nodes[i];
1313 nritems = btrfs_header_nritems(t);
1314 if (nritems < 1 || path->slots[i] >= nritems - 1) {
1315 skip_level = i + 1;
1316 continue;
1317 }
1318 }
1319 if (skip_level < i && i >= lowest_unlock)
1320 no_skips = 1;
1321
1322 t = path->nodes[i];
1323 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1324 btrfs_tree_unlock(t);
1325 path->locks[i] = 0;
1326 }
1327 }
1328}
1329
1330/*
1331 * look for key in the tree. path is filled in with nodes along the way
1332 * if key is found, we return zero and you can find the item in the leaf
1333 * level of the path (level 0)
1334 *
1335 * If the key isn't found, the path points to the slot where it should
1336 * be inserted, and 1 is returned. If there are other errors during the
1337 * search a negative error number is returned.
1338 *
1339 * if ins_len > 0, nodes and leaves will be split as we walk down the
1340 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
1341 * possible)
1342 */
1343int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1344 *root, struct btrfs_key *key, struct btrfs_path *p, int
1345 ins_len, int cow)
1346{
1347 struct extent_buffer *b;
1348 struct extent_buffer *tmp;
1349 int slot;
1350 int ret;
1351 int level;
1352 int should_reada = p->reada;
1353 int lowest_unlock = 1;
1354 int blocksize;
1355 u8 lowest_level = 0;
1356 u64 blocknr;
1357 u64 gen;
1358 struct btrfs_key prealloc_block;
1359
1360 lowest_level = p->lowest_level;
1361 WARN_ON(lowest_level && ins_len > 0);
1362 WARN_ON(p->nodes[0] != NULL);
1363
1364 if (ins_len < 0)
1365 lowest_unlock = 2;
1366
1367 prealloc_block.objectid = 0;
1368
1369again:
1370 if (p->skip_locking)
1371 b = btrfs_root_node(root);
1372 else
1373 b = btrfs_lock_root_node(root);
1374
1375 while (b) {
1376 level = btrfs_header_level(b);
1377
1378 /*
1379 * setup the path here so we can release it under lock
1380 * contention with the cow code
1381 */
1382 p->nodes[level] = b;
1383 if (!p->skip_locking)
1384 p->locks[level] = 1;
1385
1386 if (cow) {
1387 int wret;
1388
1389 /* is a cow on this block not required */
1390 spin_lock(&root->fs_info->hash_lock);
1391 if (btrfs_header_generation(b) == trans->transid &&
1392 btrfs_header_owner(b) == root->root_key.objectid &&
1393 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1394 spin_unlock(&root->fs_info->hash_lock);
1395 goto cow_done;
1396 }
1397 spin_unlock(&root->fs_info->hash_lock);
1398
1399 /* ok, we have to cow, is our old prealloc the right
1400 * size?
1401 */
1402 if (prealloc_block.objectid &&
1403 prealloc_block.offset != b->len) {
1404 btrfs_free_reserved_extent(root,
1405 prealloc_block.objectid,
1406 prealloc_block.offset);
1407 prealloc_block.objectid = 0;
1408 }
1409
1410 /*
1411 * for higher level blocks, try not to allocate blocks
1412 * with the block and the parent locks held.
1413 */
1414 if (level > 1 && !prealloc_block.objectid &&
1415 btrfs_path_lock_waiting(p, level)) {
1416 u32 size = b->len;
1417 u64 hint = b->start;
1418
1419 btrfs_release_path(root, p);
1420 ret = btrfs_reserve_extent(trans, root,
1421 size, size, 0,
1422 hint, (u64)-1,
1423 &prealloc_block, 0);
1424 BUG_ON(ret);
1425 goto again;
1426 }
1427
1428 wret = btrfs_cow_block(trans, root, b,
1429 p->nodes[level + 1],
1430 p->slots[level + 1],
1431 &b, prealloc_block.objectid);
1432 prealloc_block.objectid = 0;
1433 if (wret) {
1434 free_extent_buffer(b);
1435 ret = wret;
1436 goto done;
1437 }
1438 }
1439cow_done:
1440 BUG_ON(!cow && ins_len);
1441 if (level != btrfs_header_level(b))
1442 WARN_ON(1);
1443 level = btrfs_header_level(b);
1444
1445 p->nodes[level] = b;
1446 if (!p->skip_locking)
1447 p->locks[level] = 1;
1448
1449 ret = check_block(root, p, level);
1450 if (ret) {
1451 ret = -1;
1452 goto done;
1453 }
1454
1455 ret = bin_search(b, key, level, &slot);
1456 if (level != 0) {
1457 if (ret && slot > 0)
1458 slot -= 1;
1459 p->slots[level] = slot;
1460 if ((p->search_for_split || ins_len > 0) &&
1461 btrfs_header_nritems(b) >=
1462 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1463 int sret = split_node(trans, root, p, level);
1464 BUG_ON(sret > 0);
1465 if (sret) {
1466 ret = sret;
1467 goto done;
1468 }
1469 b = p->nodes[level];
1470 slot = p->slots[level];
1471 } else if (ins_len < 0) {
1472 int sret = balance_level(trans, root, p,
1473 level);
1474 if (sret) {
1475 ret = sret;
1476 goto done;
1477 }
1478 b = p->nodes[level];
1479 if (!b) {
1480 btrfs_release_path(NULL, p);
1481 goto again;
1482 }
1483 slot = p->slots[level];
1484 BUG_ON(btrfs_header_nritems(b) == 1);
1485 }
1486 unlock_up(p, level, lowest_unlock);
1487
1488 /* this is only true while dropping a snapshot */
1489 if (level == lowest_level) {
1490 ret = 0;
1491 goto done;
1492 }
1493
1494 blocknr = btrfs_node_blockptr(b, slot);
1495 gen = btrfs_node_ptr_generation(b, slot);
1496 blocksize = btrfs_level_size(root, level - 1);
1497
1498 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1499 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1500 b = tmp;
1501 } else {
1502 /*
1503 * reduce lock contention at high levels
1504 * of the btree by dropping locks before
1505 * we read.
1506 */
1507 if (level > 1) {
1508 btrfs_release_path(NULL, p);
1509 if (tmp)
1510 free_extent_buffer(tmp);
1511 if (should_reada)
1512 reada_for_search(root, p,
1513 level, slot,
1514 key->objectid);
1515
1516 tmp = read_tree_block(root, blocknr,
1517 blocksize, gen);
1518 if (tmp)
1519 free_extent_buffer(tmp);
1520 goto again;
1521 } else {
1522 if (tmp)
1523 free_extent_buffer(tmp);
1524 if (should_reada)
1525 reada_for_search(root, p,
1526 level, slot,
1527 key->objectid);
1528 b = read_node_slot(root, b, slot);
1529 }
1530 }
1531 if (!p->skip_locking)
1532 btrfs_tree_lock(b);
1533 } else {
1534 p->slots[level] = slot;
1535 if (ins_len > 0 &&
1536 btrfs_leaf_free_space(root, b) < ins_len) {
1537 int sret = split_leaf(trans, root, key,
1538 p, ins_len, ret == 0);
1539 BUG_ON(sret > 0);
1540 if (sret) {
1541 ret = sret;
1542 goto done;
1543 }
1544 }
1545 if (!p->search_for_split)
1546 unlock_up(p, level, lowest_unlock);
1547 goto done;
1548 }
1549 }
1550 ret = 1;
1551done:
1552 if (prealloc_block.objectid) {
1553 btrfs_free_reserved_extent(root,
1554 prealloc_block.objectid,
1555 prealloc_block.offset);
1556 }
1557
1558 return ret;
1559}
1560
1561int btrfs_merge_path(struct btrfs_trans_handle *trans,
1562 struct btrfs_root *root,
1563 struct btrfs_key *node_keys,
1564 u64 *nodes, int lowest_level)
1565{
1566 struct extent_buffer *eb;
1567 struct extent_buffer *parent;
1568 struct btrfs_key key;
1569 u64 bytenr;
1570 u64 generation;
1571 u32 blocksize;
1572 int level;
1573 int slot;
1574 int key_match;
1575 int ret;
1576
1577 eb = btrfs_lock_root_node(root);
1578 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
1579 BUG_ON(ret);
1580
1581 parent = eb;
1582 while (1) {
1583 level = btrfs_header_level(parent);
1584 if (level == 0 || level <= lowest_level)
1585 break;
1586
1587 ret = bin_search(parent, &node_keys[lowest_level], level,
1588 &slot);
1589 if (ret && slot > 0)
1590 slot--;
1591
1592 bytenr = btrfs_node_blockptr(parent, slot);
1593 if (nodes[level - 1] == bytenr)
1594 break;
1595
1596 blocksize = btrfs_level_size(root, level - 1);
1597 generation = btrfs_node_ptr_generation(parent, slot);
1598 btrfs_node_key_to_cpu(eb, &key, slot);
1599 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1600
1601 if (generation == trans->transid) {
1602 eb = read_tree_block(root, bytenr, blocksize,
1603 generation);
1604 btrfs_tree_lock(eb);
1605 }
1606
1607 /*
1608 * if node keys match and node pointer hasn't been modified
1609 * in the running transaction, we can merge the path. for
1610 * blocks owened by reloc trees, the node pointer check is
1611 * skipped, this is because these blocks are fully controlled
1612 * by the space balance code, no one else can modify them.
1613 */
1614 if (!nodes[level - 1] || !key_match ||
1615 (generation == trans->transid &&
1616 btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1617 if (level == 1 || level == lowest_level + 1) {
1618 if (generation == trans->transid) {
1619 btrfs_tree_unlock(eb);
1620 free_extent_buffer(eb);
1621 }
1622 break;
1623 }
1624
1625 if (generation != trans->transid) {
1626 eb = read_tree_block(root, bytenr, blocksize,
1627 generation);
1628 btrfs_tree_lock(eb);
1629 }
1630
1631 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1632 &eb, 0);
1633 BUG_ON(ret);
1634
1635 if (root->root_key.objectid ==
1636 BTRFS_TREE_RELOC_OBJECTID) {
1637 if (!nodes[level - 1]) {
1638 nodes[level - 1] = eb->start;
1639 memcpy(&node_keys[level - 1], &key,
1640 sizeof(node_keys[0]));
1641 } else {
1642 WARN_ON(1);
1643 }
1644 }
1645
1646 btrfs_tree_unlock(parent);
1647 free_extent_buffer(parent);
1648 parent = eb;
1649 continue;
1650 }
1651
1652 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1653 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1654 btrfs_mark_buffer_dirty(parent);
1655
1656 ret = btrfs_inc_extent_ref(trans, root,
1657 nodes[level - 1],
1658 blocksize, parent->start,
1659 btrfs_header_owner(parent),
1660 btrfs_header_generation(parent),
1661 level - 1);
1662 BUG_ON(ret);
1663
1664 /*
1665 * If the block was created in the running transaction,
1666 * it's possible this is the last reference to it, so we
1667 * should drop the subtree.
1668 */
1669 if (generation == trans->transid) {
1670 ret = btrfs_drop_subtree(trans, root, eb, parent);
1671 BUG_ON(ret);
1672 btrfs_tree_unlock(eb);
1673 free_extent_buffer(eb);
1674 } else {
1675 ret = btrfs_free_extent(trans, root, bytenr,
1676 blocksize, parent->start,
1677 btrfs_header_owner(parent),
1678 btrfs_header_generation(parent),
1679 level - 1, 1);
1680 BUG_ON(ret);
1681 }
1682 break;
1683 }
1684 btrfs_tree_unlock(parent);
1685 free_extent_buffer(parent);
1686 return 0;
1687}
1688
1689/*
1690 * adjust the pointers going up the tree, starting at level
1691 * making sure the right key of each node is points to 'key'.
1692 * This is used after shifting pointers to the left, so it stops
1693 * fixing up pointers when a given leaf/node is not in slot 0 of the
1694 * higher levels
1695 *
1696 * If this fails to write a tree block, it returns -1, but continues
1697 * fixing up the blocks in ram so the tree is consistent.
1698 */
1699static int fixup_low_keys(struct btrfs_trans_handle *trans,
1700 struct btrfs_root *root, struct btrfs_path *path,
1701 struct btrfs_disk_key *key, int level)
1702{
1703 int i;
1704 int ret = 0;
1705 struct extent_buffer *t;
1706
1707 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1708 int tslot = path->slots[i];
1709 if (!path->nodes[i])
1710 break;
1711 t = path->nodes[i];
1712 btrfs_set_node_key(t, key, tslot);
1713 btrfs_mark_buffer_dirty(path->nodes[i]);
1714 if (tslot != 0)
1715 break;
1716 }
1717 return ret;
1718}
1719
1720/*
1721 * update item key.
1722 *
1723 * This function isn't completely safe. It's the caller's responsibility
1724 * that the new key won't break the order
1725 */
1726int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1727 struct btrfs_root *root, struct btrfs_path *path,
1728 struct btrfs_key *new_key)
1729{
1730 struct btrfs_disk_key disk_key;
1731 struct extent_buffer *eb;
1732 int slot;
1733
1734 eb = path->nodes[0];
1735 slot = path->slots[0];
1736 if (slot > 0) {
1737 btrfs_item_key(eb, &disk_key, slot - 1);
1738 if (comp_keys(&disk_key, new_key) >= 0)
1739 return -1;
1740 }
1741 if (slot < btrfs_header_nritems(eb) - 1) {
1742 btrfs_item_key(eb, &disk_key, slot + 1);
1743 if (comp_keys(&disk_key, new_key) <= 0)
1744 return -1;
1745 }
1746
1747 btrfs_cpu_key_to_disk(&disk_key, new_key);
1748 btrfs_set_item_key(eb, &disk_key, slot);
1749 btrfs_mark_buffer_dirty(eb);
1750 if (slot == 0)
1751 fixup_low_keys(trans, root, path, &disk_key, 1);
1752 return 0;
1753}
1754
1755/*
1756 * try to push data from one node into the next node left in the
1757 * tree.
1758 *
1759 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1760 * error, and > 0 if there was no room in the left hand block.
1761 */
1762static int push_node_left(struct btrfs_trans_handle *trans,
1763 struct btrfs_root *root, struct extent_buffer *dst,
1764 struct extent_buffer *src, int empty)
1765{
1766 int push_items = 0;
1767 int src_nritems;
1768 int dst_nritems;
1769 int ret = 0;
1770
1771 src_nritems = btrfs_header_nritems(src);
1772 dst_nritems = btrfs_header_nritems(dst);
1773 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1774 WARN_ON(btrfs_header_generation(src) != trans->transid);
1775 WARN_ON(btrfs_header_generation(dst) != trans->transid);
1776
1777 if (!empty && src_nritems <= 8)
1778 return 1;
1779
1780 if (push_items <= 0)
1781 return 1;
1782
1783 if (empty) {
1784 push_items = min(src_nritems, push_items);
1785 if (push_items < src_nritems) {
1786 /* leave at least 8 pointers in the node if
1787 * we aren't going to empty it
1788 */
1789 if (src_nritems - push_items < 8) {
1790 if (push_items <= 8)
1791 return 1;
1792 push_items -= 8;
1793 }
1794 }
1795 } else
1796 push_items = min(src_nritems - 8, push_items);
1797
1798 copy_extent_buffer(dst, src,
1799 btrfs_node_key_ptr_offset(dst_nritems),
1800 btrfs_node_key_ptr_offset(0),
1801 push_items * sizeof(struct btrfs_key_ptr));
1802
1803 if (push_items < src_nritems) {
1804 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1805 btrfs_node_key_ptr_offset(push_items),
1806 (src_nritems - push_items) *
1807 sizeof(struct btrfs_key_ptr));
1808 }
1809 btrfs_set_header_nritems(src, src_nritems - push_items);
1810 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1811 btrfs_mark_buffer_dirty(src);
1812 btrfs_mark_buffer_dirty(dst);
1813
1814 ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
1815 BUG_ON(ret);
1816
1817 return ret;
1818}
1819
1820/*
1821 * try to push data from one node into the next node right in the
1822 * tree.
1823 *
1824 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1825 * error, and > 0 if there was no room in the right hand block.
1826 *
1827 * this will only push up to 1/2 the contents of the left node over
1828 */
1829static int balance_node_right(struct btrfs_trans_handle *trans,
1830 struct btrfs_root *root,
1831 struct extent_buffer *dst,
1832 struct extent_buffer *src)
1833{
1834 int push_items = 0;
1835 int max_push;
1836 int src_nritems;
1837 int dst_nritems;
1838 int ret = 0;
1839
1840 WARN_ON(btrfs_header_generation(src) != trans->transid);
1841 WARN_ON(btrfs_header_generation(dst) != trans->transid);
1842
1843 src_nritems = btrfs_header_nritems(src);
1844 dst_nritems = btrfs_header_nritems(dst);
1845 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1846 if (push_items <= 0)
1847 return 1;
1848
1849 if (src_nritems < 4)
1850 return 1;
1851
1852 max_push = src_nritems / 2 + 1;
1853 /* don't try to empty the node */
1854 if (max_push >= src_nritems)
1855 return 1;
1856
1857 if (max_push < push_items)
1858 push_items = max_push;
1859
1860 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1861 btrfs_node_key_ptr_offset(0),
1862 (dst_nritems) *
1863 sizeof(struct btrfs_key_ptr));
1864
1865 copy_extent_buffer(dst, src,
1866 btrfs_node_key_ptr_offset(0),
1867 btrfs_node_key_ptr_offset(src_nritems - push_items),
1868 push_items * sizeof(struct btrfs_key_ptr));
1869
1870 btrfs_set_header_nritems(src, src_nritems - push_items);
1871 btrfs_set_header_nritems(dst, dst_nritems + push_items);
1872
1873 btrfs_mark_buffer_dirty(src);
1874 btrfs_mark_buffer_dirty(dst);
1875
1876 ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
1877 BUG_ON(ret);
1878
1879 return ret;
1880}
1881
1882/*
1883 * helper function to insert a new root level in the tree.
1884 * A new node is allocated, and a single item is inserted to
1885 * point to the existing root
1886 *
1887 * returns zero on success or < 0 on failure.
1888 */
1889static noinline int insert_new_root(struct btrfs_trans_handle *trans,
1890 struct btrfs_root *root,
1891 struct btrfs_path *path, int level)
1892{
1893 u64 lower_gen;
1894 struct extent_buffer *lower;
1895 struct extent_buffer *c;
1896 struct extent_buffer *old;
1897 struct btrfs_disk_key lower_key;
1898 int ret;
1899
1900 BUG_ON(path->nodes[level]);
1901 BUG_ON(path->nodes[level-1] != root->node);
1902
1903 lower = path->nodes[level-1];
1904 if (level == 1)
1905 btrfs_item_key(lower, &lower_key, 0);
1906 else
1907 btrfs_node_key(lower, &lower_key, 0);
1908
1909 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
1910 root->root_key.objectid, trans->transid,
1911 level, root->node->start, 0);
1912 if (IS_ERR(c))
1913 return PTR_ERR(c);
1914
1915 memset_extent_buffer(c, 0, 0, root->nodesize);
1916 btrfs_set_header_nritems(c, 1);
1917 btrfs_set_header_level(c, level);
1918 btrfs_set_header_bytenr(c, c->start);
1919 btrfs_set_header_generation(c, trans->transid);
1920 btrfs_set_header_owner(c, root->root_key.objectid);
1921
1922 write_extent_buffer(c, root->fs_info->fsid,
1923 (unsigned long)btrfs_header_fsid(c),
1924 BTRFS_FSID_SIZE);
1925
1926 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1927 (unsigned long)btrfs_header_chunk_tree_uuid(c),
1928 BTRFS_UUID_SIZE);
1929
1930 btrfs_set_node_key(c, &lower_key, 0);
1931 btrfs_set_node_blockptr(c, 0, lower->start);
1932 lower_gen = btrfs_header_generation(lower);
1933 WARN_ON(lower_gen != trans->transid);
1934
1935 btrfs_set_node_ptr_generation(c, 0, lower_gen);
1936
1937 btrfs_mark_buffer_dirty(c);
1938
1939 spin_lock(&root->node_lock);
1940 old = root->node;
1941 root->node = c;
1942 spin_unlock(&root->node_lock);
1943
1944 ret = btrfs_update_extent_ref(trans, root, lower->start,
1945 lower->start, c->start,
1946 root->root_key.objectid,
1947 trans->transid, level - 1);
1948 BUG_ON(ret);
1949
1950 /* the super has an extra ref to root->node */
1951 free_extent_buffer(old);
1952
1953 add_root_to_dirty_list(root);
1954 extent_buffer_get(c);
1955 path->nodes[level] = c;
1956 path->locks[level] = 1;
1957 path->slots[level] = 0;
1958 return 0;
1959}
1960
1961/*
1962 * worker function to insert a single pointer in a node.
1963 * the node should have enough room for the pointer already
1964 *
1965 * slot and level indicate where you want the key to go, and
1966 * blocknr is the block the key points to.
1967 *
1968 * returns zero on success and < 0 on any error
1969 */
1970static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1971 *root, struct btrfs_path *path, struct btrfs_disk_key
1972 *key, u64 bytenr, int slot, int level)
1973{
1974 struct extent_buffer *lower;
1975 int nritems;
1976
1977 BUG_ON(!path->nodes[level]);
1978 lower = path->nodes[level];
1979 nritems = btrfs_header_nritems(lower);
1980 if (slot > nritems)
1981 BUG();
1982 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1983 BUG();
1984 if (slot != nritems) {
1985 memmove_extent_buffer(lower,
1986 btrfs_node_key_ptr_offset(slot + 1),
1987 btrfs_node_key_ptr_offset(slot),
1988 (nritems - slot) * sizeof(struct btrfs_key_ptr));
1989 }
1990 btrfs_set_node_key(lower, key, slot);
1991 btrfs_set_node_blockptr(lower, slot, bytenr);
1992 WARN_ON(trans->transid == 0);
1993 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1994 btrfs_set_header_nritems(lower, nritems + 1);
1995 btrfs_mark_buffer_dirty(lower);
1996 return 0;
1997}
1998
1999/*
2000 * split the node at the specified level in path in two.
2001 * The path is corrected to point to the appropriate node after the split
2002 *
2003 * Before splitting this tries to make some room in the node by pushing
2004 * left and right, if either one works, it returns right away.
2005 *
2006 * returns 0 on success and < 0 on failure
2007 */
2008static noinline int split_node(struct btrfs_trans_handle *trans,
2009 struct btrfs_root *root,
2010 struct btrfs_path *path, int level)
2011{
2012 struct extent_buffer *c;
2013 struct extent_buffer *split;
2014 struct btrfs_disk_key disk_key;
2015 int mid;
2016 int ret;
2017 int wret;
2018 u32 c_nritems;
2019
2020 c = path->nodes[level];
2021 WARN_ON(btrfs_header_generation(c) != trans->transid);
2022 if (c == root->node) {
2023 /* trying to split the root, lets make a new one */
2024 ret = insert_new_root(trans, root, path, level + 1);
2025 if (ret)
2026 return ret;
2027 } else {
2028 ret = push_nodes_for_insert(trans, root, path, level);
2029 c = path->nodes[level];
2030 if (!ret && btrfs_header_nritems(c) <
2031 BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
2032 return 0;
2033 if (ret < 0)
2034 return ret;
2035 }
2036
2037 c_nritems = btrfs_header_nritems(c);
2038
2039 split = btrfs_alloc_free_block(trans, root, root->nodesize,
2040 path->nodes[level + 1]->start,
2041 root->root_key.objectid,
2042 trans->transid, level, c->start, 0);
2043 if (IS_ERR(split))
2044 return PTR_ERR(split);
2045
2046 btrfs_set_header_flags(split, btrfs_header_flags(c));
2047 btrfs_set_header_level(split, btrfs_header_level(c));
2048 btrfs_set_header_bytenr(split, split->start);
2049 btrfs_set_header_generation(split, trans->transid);
2050 btrfs_set_header_owner(split, root->root_key.objectid);
2051 btrfs_set_header_flags(split, 0);
2052 write_extent_buffer(split, root->fs_info->fsid,
2053 (unsigned long)btrfs_header_fsid(split),
2054 BTRFS_FSID_SIZE);
2055 write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2056 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2057 BTRFS_UUID_SIZE);
2058
2059 mid = (c_nritems + 1) / 2;
2060
2061 copy_extent_buffer(split, c,
2062 btrfs_node_key_ptr_offset(0),
2063 btrfs_node_key_ptr_offset(mid),
2064 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2065 btrfs_set_header_nritems(split, c_nritems - mid);
2066 btrfs_set_header_nritems(c, mid);
2067 ret = 0;
2068
2069 btrfs_mark_buffer_dirty(c);
2070 btrfs_mark_buffer_dirty(split);
2071
2072 btrfs_node_key(split, &disk_key, 0);
2073 wret = insert_ptr(trans, root, path, &disk_key, split->start,
2074 path->slots[level + 1] + 1,
2075 level + 1);
2076 if (wret)
2077 ret = wret;
2078
2079 ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2080 BUG_ON(ret);
2081
2082 if (path->slots[level] >= mid) {
2083 path->slots[level] -= mid;
2084 btrfs_tree_unlock(c);
2085 free_extent_buffer(c);
2086 path->nodes[level] = split;
2087 path->slots[level + 1] += 1;
2088 } else {
2089 btrfs_tree_unlock(split);
2090 free_extent_buffer(split);
2091 }
2092 return ret;
2093}
2094
2095/*
2096 * how many bytes are required to store the items in a leaf. start
2097 * and nr indicate which items in the leaf to check. This totals up the
2098 * space used both by the item structs and the item data
2099 */
2100static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2101{
2102 int data_len;
2103 int nritems = btrfs_header_nritems(l);
2104 int end = min(nritems, start + nr) - 1;
2105
2106 if (!nr)
2107 return 0;
2108 data_len = btrfs_item_end_nr(l, start);
2109 data_len = data_len - btrfs_item_offset_nr(l, end);
2110 data_len += sizeof(struct btrfs_item) * nr;
2111 WARN_ON(data_len < 0);
2112 return data_len;
2113}
2114
2115/*
2116 * The space between the end of the leaf items and
2117 * the start of the leaf data. IOW, how much room
2118 * the leaf has left for both items and data
2119 */
2120noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2121 struct extent_buffer *leaf)
2122{
2123 int nritems = btrfs_header_nritems(leaf);
2124 int ret;
2125 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2126 if (ret < 0) {
2127 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2128 "used %d nritems %d\n",
2129 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
2130 leaf_space_used(leaf, 0, nritems), nritems);
2131 }
2132 return ret;
2133}
2134
2135/*
2136 * push some data in the path leaf to the right, trying to free up at
2137 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2138 *
2139 * returns 1 if the push failed because the other node didn't have enough
2140 * room, 0 if everything worked out and < 0 if there were major errors.
2141 */
2142static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2143 *root, struct btrfs_path *path, int data_size,
2144 int empty)
2145{
2146 struct extent_buffer *left = path->nodes[0];
2147 struct extent_buffer *right;
2148 struct extent_buffer *upper;
2149 struct btrfs_disk_key disk_key;
2150 int slot;
2151 u32 i;
2152 int free_space;
2153 int push_space = 0;
2154 int push_items = 0;
2155 struct btrfs_item *item;
2156 u32 left_nritems;
2157 u32 nr;
2158 u32 right_nritems;
2159 u32 data_end;
2160 u32 this_item_size;
2161 int ret;
2162
2163 slot = path->slots[1];
2164 if (!path->nodes[1])
2165 return 1;
2166
2167 upper = path->nodes[1];
2168 if (slot >= btrfs_header_nritems(upper) - 1)
2169 return 1;
2170
2171 WARN_ON(!btrfs_tree_locked(path->nodes[1]));
2172
2173 right = read_node_slot(root, upper, slot + 1);
2174 btrfs_tree_lock(right);
2175 free_space = btrfs_leaf_free_space(root, right);
2176 if (free_space < data_size)
2177 goto out_unlock;
2178
2179 /* cow and double check */
2180 ret = btrfs_cow_block(trans, root, right, upper,
2181 slot + 1, &right, 0);
2182 if (ret)
2183 goto out_unlock;
2184
2185 free_space = btrfs_leaf_free_space(root, right);
2186 if (free_space < data_size)
2187 goto out_unlock;
2188
2189 left_nritems = btrfs_header_nritems(left);
2190 if (left_nritems == 0)
2191 goto out_unlock;
2192
2193 if (empty)
2194 nr = 0;
2195 else
2196 nr = 1;
2197
2198 if (path->slots[0] >= left_nritems)
2199 push_space += data_size;
2200
2201 i = left_nritems - 1;
2202 while (i >= nr) {
2203 item = btrfs_item_nr(left, i);
2204
2205 if (!empty && push_items > 0) {
2206 if (path->slots[0] > i)
2207 break;
2208 if (path->slots[0] == i) {
2209 int space = btrfs_leaf_free_space(root, left);
2210 if (space + push_space * 2 > free_space)
2211 break;
2212 }
2213 }
2214
2215 if (path->slots[0] == i)
2216 push_space += data_size;
2217
2218 if (!left->map_token) {
2219 map_extent_buffer(left, (unsigned long)item,
2220 sizeof(struct btrfs_item),
2221 &left->map_token, &left->kaddr,
2222 &left->map_start, &left->map_len,
2223 KM_USER1);
2224 }
2225
2226 this_item_size = btrfs_item_size(left, item);
2227 if (this_item_size + sizeof(*item) + push_space > free_space)
2228 break;
2229
2230 push_items++;
2231 push_space += this_item_size + sizeof(*item);
2232 if (i == 0)
2233 break;
2234 i--;
2235 }
2236 if (left->map_token) {
2237 unmap_extent_buffer(left, left->map_token, KM_USER1);
2238 left->map_token = NULL;
2239 }
2240
2241 if (push_items == 0)
2242 goto out_unlock;
2243
2244 if (!empty && push_items == left_nritems)
2245 WARN_ON(1);
2246
2247 /* push left to right */
2248 right_nritems = btrfs_header_nritems(right);
2249
2250 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2251 push_space -= leaf_data_end(root, left);
2252
2253 /* make room in the right data area */
2254 data_end = leaf_data_end(root, right);
2255 memmove_extent_buffer(right,
2256 btrfs_leaf_data(right) + data_end - push_space,
2257 btrfs_leaf_data(right) + data_end,
2258 BTRFS_LEAF_DATA_SIZE(root) - data_end);
2259
2260 /* copy from the left data area */
2261 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
2262 BTRFS_LEAF_DATA_SIZE(root) - push_space,
2263 btrfs_leaf_data(left) + leaf_data_end(root, left),
2264 push_space);
2265
2266 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2267 btrfs_item_nr_offset(0),
2268 right_nritems * sizeof(struct btrfs_item));
2269
2270 /* copy the items from left to right */
2271 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2272 btrfs_item_nr_offset(left_nritems - push_items),
2273 push_items * sizeof(struct btrfs_item));
2274
2275 /* update the item pointers */
2276 right_nritems += push_items;
2277 btrfs_set_header_nritems(right, right_nritems);
2278 push_space = BTRFS_LEAF_DATA_SIZE(root);
2279 for (i = 0; i < right_nritems; i++) {
2280 item = btrfs_item_nr(right, i);
2281 if (!right->map_token) {
2282 map_extent_buffer(right, (unsigned long)item,
2283 sizeof(struct btrfs_item),
2284 &right->map_token, &right->kaddr,
2285 &right->map_start, &right->map_len,
2286 KM_USER1);
2287 }
2288 push_space -= btrfs_item_size(right, item);
2289 btrfs_set_item_offset(right, item, push_space);
2290 }
2291
2292 if (right->map_token) {
2293 unmap_extent_buffer(right, right->map_token, KM_USER1);
2294 right->map_token = NULL;
2295 }
2296 left_nritems -= push_items;
2297 btrfs_set_header_nritems(left, left_nritems);
2298
2299 if (left_nritems)
2300 btrfs_mark_buffer_dirty(left);
2301 btrfs_mark_buffer_dirty(right);
2302
2303 ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2304 BUG_ON(ret);
2305
2306 btrfs_item_key(right, &disk_key, 0);
2307 btrfs_set_node_key(upper, &disk_key, slot + 1);
2308 btrfs_mark_buffer_dirty(upper);
2309
2310 /* then fixup the leaf pointer in the path */
2311 if (path->slots[0] >= left_nritems) {
2312 path->slots[0] -= left_nritems;
2313 if (btrfs_header_nritems(path->nodes[0]) == 0)
2314 clean_tree_block(trans, root, path->nodes[0]);
2315 btrfs_tree_unlock(path->nodes[0]);
2316 free_extent_buffer(path->nodes[0]);
2317 path->nodes[0] = right;
2318 path->slots[1] += 1;
2319 } else {
2320 btrfs_tree_unlock(right);
2321 free_extent_buffer(right);
2322 }
2323 return 0;
2324
2325out_unlock:
2326 btrfs_tree_unlock(right);
2327 free_extent_buffer(right);
2328 return 1;
2329}
2330
2331/*
2332 * push some data in the path leaf to the left, trying to free up at
2333 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2334 */
2335static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2336 *root, struct btrfs_path *path, int data_size,
2337 int empty)
2338{
2339 struct btrfs_disk_key disk_key;
2340 struct extent_buffer *right = path->nodes[0];
2341 struct extent_buffer *left;
2342 int slot;
2343 int i;
2344 int free_space;
2345 int push_space = 0;
2346 int push_items = 0;
2347 struct btrfs_item *item;
2348 u32 old_left_nritems;
2349 u32 right_nritems;
2350 u32 nr;
2351 int ret = 0;
2352 int wret;
2353 u32 this_item_size;
2354 u32 old_left_item_size;
2355
2356 slot = path->slots[1];
2357 if (slot == 0)
2358 return 1;
2359 if (!path->nodes[1])
2360 return 1;
2361
2362 right_nritems = btrfs_header_nritems(right);
2363 if (right_nritems == 0)
2364 return 1;
2365
2366 WARN_ON(!btrfs_tree_locked(path->nodes[1]));
2367
2368 left = read_node_slot(root, path->nodes[1], slot - 1);
2369 btrfs_tree_lock(left);
2370 free_space = btrfs_leaf_free_space(root, left);
2371 if (free_space < data_size) {
2372 ret = 1;
2373 goto out;
2374 }
2375
2376 /* cow and double check */
2377 ret = btrfs_cow_block(trans, root, left,
2378 path->nodes[1], slot - 1, &left, 0);
2379 if (ret) {
2380 /* we hit -ENOSPC, but it isn't fatal here */
2381 ret = 1;
2382 goto out;
2383 }
2384
2385 free_space = btrfs_leaf_free_space(root, left);
2386 if (free_space < data_size) {
2387 ret = 1;
2388 goto out;
2389 }
2390
2391 if (empty)
2392 nr = right_nritems;
2393 else
2394 nr = right_nritems - 1;
2395
2396 for (i = 0; i < nr; i++) {
2397 item = btrfs_item_nr(right, i);
2398 if (!right->map_token) {
2399 map_extent_buffer(right, (unsigned long)item,
2400 sizeof(struct btrfs_item),
2401 &right->map_token, &right->kaddr,
2402 &right->map_start, &right->map_len,
2403 KM_USER1);
2404 }
2405
2406 if (!empty && push_items > 0) {
2407 if (path->slots[0] < i)
2408 break;
2409 if (path->slots[0] == i) {
2410 int space = btrfs_leaf_free_space(root, right);
2411 if (space + push_space * 2 > free_space)
2412 break;
2413 }
2414 }
2415
2416 if (path->slots[0] == i)
2417 push_space += data_size;
2418
2419 this_item_size = btrfs_item_size(right, item);
2420 if (this_item_size + sizeof(*item) + push_space > free_space)
2421 break;
2422
2423 push_items++;
2424 push_space += this_item_size + sizeof(*item);
2425 }
2426
2427 if (right->map_token) {
2428 unmap_extent_buffer(right, right->map_token, KM_USER1);
2429 right->map_token = NULL;
2430 }
2431
2432 if (push_items == 0) {
2433 ret = 1;
2434 goto out;
2435 }
2436 if (!empty && push_items == btrfs_header_nritems(right))
2437 WARN_ON(1);
2438
2439 /* push data from right to left */
2440 copy_extent_buffer(left, right,
2441 btrfs_item_nr_offset(btrfs_header_nritems(left)),
2442 btrfs_item_nr_offset(0),
2443 push_items * sizeof(struct btrfs_item));
2444
2445 push_space = BTRFS_LEAF_DATA_SIZE(root) -
2446 btrfs_item_offset_nr(right, push_items - 1);
2447
2448 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
2449 leaf_data_end(root, left) - push_space,
2450 btrfs_leaf_data(right) +
2451 btrfs_item_offset_nr(right, push_items - 1),
2452 push_space);
2453 old_left_nritems = btrfs_header_nritems(left);
2454 BUG_ON(old_left_nritems <= 0);
2455
2456 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2457 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2458 u32 ioff;
2459
2460 item = btrfs_item_nr(left, i);
2461 if (!left->map_token) {
2462 map_extent_buffer(left, (unsigned long)item,
2463 sizeof(struct btrfs_item),
2464 &left->map_token, &left->kaddr,
2465 &left->map_start, &left->map_len,
2466 KM_USER1);
2467 }
2468
2469 ioff = btrfs_item_offset(left, item);
2470 btrfs_set_item_offset(left, item,
2471 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2472 }
2473 btrfs_set_header_nritems(left, old_left_nritems + push_items);
2474 if (left->map_token) {
2475 unmap_extent_buffer(left, left->map_token, KM_USER1);
2476 left->map_token = NULL;
2477 }
2478
2479 /* fixup right node */
2480 if (push_items > right_nritems) {
2481 printk(KERN_CRIT "push items %d nr %u\n", push_items,
2482 right_nritems);
2483 WARN_ON(1);
2484 }
2485
2486 if (push_items < right_nritems) {
2487 push_space = btrfs_item_offset_nr(right, push_items - 1) -
2488 leaf_data_end(root, right);
2489 memmove_extent_buffer(right, btrfs_leaf_data(right) +
2490 BTRFS_LEAF_DATA_SIZE(root) - push_space,
2491 btrfs_leaf_data(right) +
2492 leaf_data_end(root, right), push_space);
2493
2494 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2495 btrfs_item_nr_offset(push_items),
2496 (btrfs_header_nritems(right) - push_items) *
2497 sizeof(struct btrfs_item));
2498 }
2499 right_nritems -= push_items;
2500 btrfs_set_header_nritems(right, right_nritems);
2501 push_space = BTRFS_LEAF_DATA_SIZE(root);
2502 for (i = 0; i < right_nritems; i++) {
2503 item = btrfs_item_nr(right, i);
2504
2505 if (!right->map_token) {
2506 map_extent_buffer(right, (unsigned long)item,
2507 sizeof(struct btrfs_item),
2508 &right->map_token, &right->kaddr,
2509 &right->map_start, &right->map_len,
2510 KM_USER1);
2511 }
2512
2513 push_space = push_space - btrfs_item_size(right, item);
2514 btrfs_set_item_offset(right, item, push_space);
2515 }
2516 if (right->map_token) {
2517 unmap_extent_buffer(right, right->map_token, KM_USER1);
2518 right->map_token = NULL;
2519 }
2520
2521 btrfs_mark_buffer_dirty(left);
2522 if (right_nritems)
2523 btrfs_mark_buffer_dirty(right);
2524
2525 ret = btrfs_update_ref(trans, root, right, left,
2526 old_left_nritems, push_items);
2527 BUG_ON(ret);
2528
2529 btrfs_item_key(right, &disk_key, 0);
2530 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2531 if (wret)
2532 ret = wret;
2533
2534 /* then fixup the leaf pointer in the path */
2535 if (path->slots[0] < push_items) {
2536 path->slots[0] += old_left_nritems;
2537 if (btrfs_header_nritems(path->nodes[0]) == 0)
2538 clean_tree_block(trans, root, path->nodes[0]);
2539 btrfs_tree_unlock(path->nodes[0]);
2540 free_extent_buffer(path->nodes[0]);
2541 path->nodes[0] = left;
2542 path->slots[1] -= 1;
2543 } else {
2544 btrfs_tree_unlock(left);
2545 free_extent_buffer(left);
2546 path->slots[0] -= push_items;
2547 }
2548 BUG_ON(path->slots[0] < 0);
2549 return ret;
2550out:
2551 btrfs_tree_unlock(left);
2552 free_extent_buffer(left);
2553 return ret;
2554}
2555
2556/*
2557 * split the path's leaf in two, making sure there is at least data_size
2558 * available for the resulting leaf level of the path.
2559 *
2560 * returns 0 if all went well and < 0 on failure.
2561 */
2562static noinline int split_leaf(struct btrfs_trans_handle *trans,
2563 struct btrfs_root *root,
2564 struct btrfs_key *ins_key,
2565 struct btrfs_path *path, int data_size,
2566 int extend)
2567{
2568 struct extent_buffer *l;
2569 u32 nritems;
2570 int mid;
2571 int slot;
2572 struct extent_buffer *right;
2573 int data_copy_size;
2574 int rt_data_off;
2575 int i;
2576 int ret = 0;
2577 int wret;
2578 int double_split;
2579 int num_doubles = 0;
2580 struct btrfs_disk_key disk_key;
2581
2582 /* first try to make some room by pushing left and right */
2583 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2584 wret = push_leaf_right(trans, root, path, data_size, 0);
2585 if (wret < 0)
2586 return wret;
2587 if (wret) {
2588 wret = push_leaf_left(trans, root, path, data_size, 0);
2589 if (wret < 0)
2590 return wret;
2591 }
2592 l = path->nodes[0];
2593
2594 /* did the pushes work? */
2595 if (btrfs_leaf_free_space(root, l) >= data_size)
2596 return 0;
2597 }
2598
2599 if (!path->nodes[1]) {
2600 ret = insert_new_root(trans, root, path, 1);
2601 if (ret)
2602 return ret;
2603 }
2604again:
2605 double_split = 0;
2606 l = path->nodes[0];
2607 slot = path->slots[0];
2608 nritems = btrfs_header_nritems(l);
2609 mid = (nritems + 1) / 2;
2610
2611 right = btrfs_alloc_free_block(trans, root, root->leafsize,
2612 path->nodes[1]->start,
2613 root->root_key.objectid,
2614 trans->transid, 0, l->start, 0);
2615 if (IS_ERR(right)) {
2616 BUG_ON(1);
2617 return PTR_ERR(right);
2618 }
2619
2620 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2621 btrfs_set_header_bytenr(right, right->start);
2622 btrfs_set_header_generation(right, trans->transid);
2623 btrfs_set_header_owner(right, root->root_key.objectid);
2624 btrfs_set_header_level(right, 0);
2625 write_extent_buffer(right, root->fs_info->fsid,
2626 (unsigned long)btrfs_header_fsid(right),
2627 BTRFS_FSID_SIZE);
2628
2629 write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2630 (unsigned long)btrfs_header_chunk_tree_uuid(right),
2631 BTRFS_UUID_SIZE);
2632 if (mid <= slot) {
2633 if (nritems == 1 ||
2634 leaf_space_used(l, mid, nritems - mid) + data_size >
2635 BTRFS_LEAF_DATA_SIZE(root)) {
2636 if (slot >= nritems) {
2637 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2638 btrfs_set_header_nritems(right, 0);
2639 wret = insert_ptr(trans, root, path,
2640 &disk_key, right->start,
2641 path->slots[1] + 1, 1);
2642 if (wret)
2643 ret = wret;
2644
2645 btrfs_tree_unlock(path->nodes[0]);
2646 free_extent_buffer(path->nodes[0]);
2647 path->nodes[0] = right;
2648 path->slots[0] = 0;
2649 path->slots[1] += 1;
2650 btrfs_mark_buffer_dirty(right);
2651 return ret;
2652 }
2653 mid = slot;
2654 if (mid != nritems &&
2655 leaf_space_used(l, mid, nritems - mid) +
2656 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2657 double_split = 1;
2658 }
2659 }
2660 } else {
2661 if (leaf_space_used(l, 0, mid) + data_size >
2662 BTRFS_LEAF_DATA_SIZE(root)) {
2663 if (!extend && data_size && slot == 0) {
2664 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2665 btrfs_set_header_nritems(right, 0);
2666 wret = insert_ptr(trans, root, path,
2667 &disk_key,
2668 right->start,
2669 path->slots[1], 1);
2670 if (wret)
2671 ret = wret;
2672 btrfs_tree_unlock(path->nodes[0]);
2673 free_extent_buffer(path->nodes[0]);
2674 path->nodes[0] = right;
2675 path->slots[0] = 0;
2676 if (path->slots[1] == 0) {
2677 wret = fixup_low_keys(trans, root,
2678 path, &disk_key, 1);
2679 if (wret)
2680 ret = wret;
2681 }
2682 btrfs_mark_buffer_dirty(right);
2683 return ret;
2684 } else if ((extend || !data_size) && slot == 0) {
2685 mid = 1;
2686 } else {
2687 mid = slot;
2688 if (mid != nritems &&
2689 leaf_space_used(l, mid, nritems - mid) +
2690 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2691 double_split = 1;
2692 }
2693 }
2694 }
2695 }
2696 nritems = nritems - mid;
2697 btrfs_set_header_nritems(right, nritems);
2698 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2699
2700 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2701 btrfs_item_nr_offset(mid),
2702 nritems * sizeof(struct btrfs_item));
2703
2704 copy_extent_buffer(right, l,
2705 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2706 data_copy_size, btrfs_leaf_data(l) +
2707 leaf_data_end(root, l), data_copy_size);
2708
2709 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2710 btrfs_item_end_nr(l, mid);
2711
2712 for (i = 0; i < nritems; i++) {
2713 struct btrfs_item *item = btrfs_item_nr(right, i);
2714 u32 ioff;
2715
2716 if (!right->map_token) {
2717 map_extent_buffer(right, (unsigned long)item,
2718 sizeof(struct btrfs_item),
2719 &right->map_token, &right->kaddr,
2720 &right->map_start, &right->map_len,
2721 KM_USER1);
2722 }
2723
2724 ioff = btrfs_item_offset(right, item);
2725 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2726 }
2727
2728 if (right->map_token) {
2729 unmap_extent_buffer(right, right->map_token, KM_USER1);
2730 right->map_token = NULL;
2731 }
2732
2733 btrfs_set_header_nritems(l, mid);
2734 ret = 0;
2735 btrfs_item_key(right, &disk_key, 0);
2736 wret = insert_ptr(trans, root, path, &disk_key, right->start,
2737 path->slots[1] + 1, 1);
2738 if (wret)
2739 ret = wret;
2740
2741 btrfs_mark_buffer_dirty(right);
2742 btrfs_mark_buffer_dirty(l);
2743 BUG_ON(path->slots[0] != slot);
2744
2745 ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2746 BUG_ON(ret);
2747
2748 if (mid <= slot) {
2749 btrfs_tree_unlock(path->nodes[0]);
2750 free_extent_buffer(path->nodes[0]);
2751 path->nodes[0] = right;
2752 path->slots[0] -= mid;
2753 path->slots[1] += 1;
2754 } else {
2755 btrfs_tree_unlock(right);
2756 free_extent_buffer(right);
2757 }
2758
2759 BUG_ON(path->slots[0] < 0);
2760
2761 if (double_split) {
2762 BUG_ON(num_doubles != 0);
2763 num_doubles++;
2764 goto again;
2765 }
2766 return ret;
2767}
2768
2769/*
2770 * This function splits a single item into two items,
2771 * giving 'new_key' to the new item and splitting the
2772 * old one at split_offset (from the start of the item).
2773 *
2774 * The path may be released by this operation. After
2775 * the split, the path is pointing to the old item. The
2776 * new item is going to be in the same node as the old one.
2777 *
2778 * Note, the item being split must be smaller enough to live alone on
2779 * a tree block with room for one extra struct btrfs_item
2780 *
2781 * This allows us to split the item in place, keeping a lock on the
2782 * leaf the entire time.
2783 */
2784int btrfs_split_item(struct btrfs_trans_handle *trans,
2785 struct btrfs_root *root,
2786 struct btrfs_path *path,
2787 struct btrfs_key *new_key,
2788 unsigned long split_offset)
2789{
2790 u32 item_size;
2791 struct extent_buffer *leaf;
2792 struct btrfs_key orig_key;
2793 struct btrfs_item *item;
2794 struct btrfs_item *new_item;
2795 int ret = 0;
2796 int slot;
2797 u32 nritems;
2798 u32 orig_offset;
2799 struct btrfs_disk_key disk_key;
2800 char *buf;
2801
2802 leaf = path->nodes[0];
2803 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
2804 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
2805 goto split;
2806
2807 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2808 btrfs_release_path(root, path);
2809
2810 path->search_for_split = 1;
2811 path->keep_locks = 1;
2812
2813 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
2814 path->search_for_split = 0;
2815
2816 /* if our item isn't there or got smaller, return now */
2817 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
2818 path->slots[0])) {
2819 path->keep_locks = 0;
2820 return -EAGAIN;
2821 }
2822
2823 ret = split_leaf(trans, root, &orig_key, path,
2824 sizeof(struct btrfs_item), 1);
2825 path->keep_locks = 0;
2826 BUG_ON(ret);
2827
2828 leaf = path->nodes[0];
2829 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
2830
2831split:
2832 item = btrfs_item_nr(leaf, path->slots[0]);
2833 orig_offset = btrfs_item_offset(leaf, item);
2834 item_size = btrfs_item_size(leaf, item);
2835
2836
2837 buf = kmalloc(item_size, GFP_NOFS);
2838 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
2839 path->slots[0]), item_size);
2840 slot = path->slots[0] + 1;
2841 leaf = path->nodes[0];
2842
2843 nritems = btrfs_header_nritems(leaf);
2844
2845 if (slot != nritems) {
2846 /* shift the items */
2847 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2848 btrfs_item_nr_offset(slot),
2849 (nritems - slot) * sizeof(struct btrfs_item));
2850
2851 }
2852
2853 btrfs_cpu_key_to_disk(&disk_key, new_key);
2854 btrfs_set_item_key(leaf, &disk_key, slot);
2855
2856 new_item = btrfs_item_nr(leaf, slot);
2857
2858 btrfs_set_item_offset(leaf, new_item, orig_offset);
2859 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
2860
2861 btrfs_set_item_offset(leaf, item,
2862 orig_offset + item_size - split_offset);
2863 btrfs_set_item_size(leaf, item, split_offset);
2864
2865 btrfs_set_header_nritems(leaf, nritems + 1);
2866
2867 /* write the data for the start of the original item */
2868 write_extent_buffer(leaf, buf,
2869 btrfs_item_ptr_offset(leaf, path->slots[0]),
2870 split_offset);
2871
2872 /* write the data for the new item */
2873 write_extent_buffer(leaf, buf + split_offset,
2874 btrfs_item_ptr_offset(leaf, slot),
2875 item_size - split_offset);
2876 btrfs_mark_buffer_dirty(leaf);
2877
2878 ret = 0;
2879 if (btrfs_leaf_free_space(root, leaf) < 0) {
2880 btrfs_print_leaf(root, leaf);
2881 BUG();
2882 }
2883 kfree(buf);
2884 return ret;
2885}
2886
2887/*
2888 * make the item pointed to by the path smaller. new_size indicates
2889 * how small to make it, and from_end tells us if we just chop bytes
2890 * off the end of the item or if we shift the item to chop bytes off
2891 * the front.
2892 */
2893int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2894 struct btrfs_root *root,
2895 struct btrfs_path *path,
2896 u32 new_size, int from_end)
2897{
2898 int ret = 0;
2899 int slot;
2900 int slot_orig;
2901 struct extent_buffer *leaf;
2902 struct btrfs_item *item;
2903 u32 nritems;
2904 unsigned int data_end;
2905 unsigned int old_data_start;
2906 unsigned int old_size;
2907 unsigned int size_diff;
2908 int i;
2909
2910 slot_orig = path->slots[0];
2911 leaf = path->nodes[0];
2912 slot = path->slots[0];
2913
2914 old_size = btrfs_item_size_nr(leaf, slot);
2915 if (old_size == new_size)
2916 return 0;
2917
2918 nritems = btrfs_header_nritems(leaf);
2919 data_end = leaf_data_end(root, leaf);
2920
2921 old_data_start = btrfs_item_offset_nr(leaf, slot);
2922
2923 size_diff = old_size - new_size;
2924
2925 BUG_ON(slot < 0);
2926 BUG_ON(slot >= nritems);
2927
2928 /*
2929 * item0..itemN ... dataN.offset..dataN.size .. data0.size
2930 */
2931 /* first correct the data pointers */
2932 for (i = slot; i < nritems; i++) {
2933 u32 ioff;
2934 item = btrfs_item_nr(leaf, i);
2935
2936 if (!leaf->map_token) {
2937 map_extent_buffer(leaf, (unsigned long)item,
2938 sizeof(struct btrfs_item),
2939 &leaf->map_token, &leaf->kaddr,
2940 &leaf->map_start, &leaf->map_len,
2941 KM_USER1);
2942 }
2943
2944 ioff = btrfs_item_offset(leaf, item);
2945 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2946 }
2947
2948 if (leaf->map_token) {
2949 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2950 leaf->map_token = NULL;
2951 }
2952
2953 /* shift the data */
2954 if (from_end) {
2955 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2956 data_end + size_diff, btrfs_leaf_data(leaf) +
2957 data_end, old_data_start + new_size - data_end);
2958 } else {
2959 struct btrfs_disk_key disk_key;
2960 u64 offset;
2961
2962 btrfs_item_key(leaf, &disk_key, slot);
2963
2964 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2965 unsigned long ptr;
2966 struct btrfs_file_extent_item *fi;
2967
2968 fi = btrfs_item_ptr(leaf, slot,
2969 struct btrfs_file_extent_item);
2970 fi = (struct btrfs_file_extent_item *)(
2971 (unsigned long)fi - size_diff);
2972
2973 if (btrfs_file_extent_type(leaf, fi) ==
2974 BTRFS_FILE_EXTENT_INLINE) {
2975 ptr = btrfs_item_ptr_offset(leaf, slot);
2976 memmove_extent_buffer(leaf, ptr,
2977 (unsigned long)fi,
2978 offsetof(struct btrfs_file_extent_item,
2979 disk_bytenr));
2980 }
2981 }
2982
2983 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2984 data_end + size_diff, btrfs_leaf_data(leaf) +
2985 data_end, old_data_start - data_end);
2986
2987 offset = btrfs_disk_key_offset(&disk_key);
2988 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2989 btrfs_set_item_key(leaf, &disk_key, slot);
2990 if (slot == 0)
2991 fixup_low_keys(trans, root, path, &disk_key, 1);
2992 }
2993
2994 item = btrfs_item_nr(leaf, slot);
2995 btrfs_set_item_size(leaf, item, new_size);
2996 btrfs_mark_buffer_dirty(leaf);
2997
2998 ret = 0;
2999 if (btrfs_leaf_free_space(root, leaf) < 0) {
3000 btrfs_print_leaf(root, leaf);
3001 BUG();
3002 }
3003 return ret;
3004}
3005
3006/*
3007 * make the item pointed to by the path bigger, data_size is the new size.
3008 */
3009int btrfs_extend_item(struct btrfs_trans_handle *trans,
3010 struct btrfs_root *root, struct btrfs_path *path,
3011 u32 data_size)
3012{
3013 int ret = 0;
3014 int slot;
3015 int slot_orig;
3016 struct extent_buffer *leaf;
3017 struct btrfs_item *item;
3018 u32 nritems;
3019 unsigned int data_end;
3020 unsigned int old_data;
3021 unsigned int old_size;
3022 int i;
3023
3024 slot_orig = path->slots[0];
3025 leaf = path->nodes[0];
3026
3027 nritems = btrfs_header_nritems(leaf);
3028 data_end = leaf_data_end(root, leaf);
3029
3030 if (btrfs_leaf_free_space(root, leaf) < data_size) {
3031 btrfs_print_leaf(root, leaf);
3032 BUG();
3033 }
3034 slot = path->slots[0];
3035 old_data = btrfs_item_end_nr(leaf, slot);
3036
3037 BUG_ON(slot < 0);
3038 if (slot >= nritems) {
3039 btrfs_print_leaf(root, leaf);
3040 printk(KERN_CRIT "slot %d too large, nritems %d\n",
3041 slot, nritems);
3042 BUG_ON(1);
3043 }
3044
3045 /*
3046 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3047 */
3048 /* first correct the data pointers */
3049 for (i = slot; i < nritems; i++) {
3050 u32 ioff;
3051 item = btrfs_item_nr(leaf, i);
3052
3053 if (!leaf->map_token) {
3054 map_extent_buffer(leaf, (unsigned long)item,
3055 sizeof(struct btrfs_item),
3056 &leaf->map_token, &leaf->kaddr,
3057 &leaf->map_start, &leaf->map_len,
3058 KM_USER1);
3059 }
3060 ioff = btrfs_item_offset(leaf, item);
3061 btrfs_set_item_offset(leaf, item, ioff - data_size);
3062 }
3063
3064 if (leaf->map_token) {
3065 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3066 leaf->map_token = NULL;
3067 }
3068
3069 /* shift the data */
3070 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3071 data_end - data_size, btrfs_leaf_data(leaf) +
3072 data_end, old_data - data_end);
3073
3074 data_end = old_data;
3075 old_size = btrfs_item_size_nr(leaf, slot);
3076 item = btrfs_item_nr(leaf, slot);
3077 btrfs_set_item_size(leaf, item, old_size + data_size);
3078 btrfs_mark_buffer_dirty(leaf);
3079
3080 ret = 0;
3081 if (btrfs_leaf_free_space(root, leaf) < 0) {
3082 btrfs_print_leaf(root, leaf);
3083 BUG();
3084 }
3085 return ret;
3086}
3087
3088/*
3089 * Given a key and some data, insert items into the tree.
3090 * This does all the path init required, making room in the tree if needed.
3091 * Returns the number of keys that were inserted.
3092 */
3093int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3094 struct btrfs_root *root,
3095 struct btrfs_path *path,
3096 struct btrfs_key *cpu_key, u32 *data_size,
3097 int nr)
3098{
3099 struct extent_buffer *leaf;
3100 struct btrfs_item *item;
3101 int ret = 0;
3102 int slot;
3103 int i;
3104 u32 nritems;
3105 u32 total_data = 0;
3106 u32 total_size = 0;
3107 unsigned int data_end;
3108 struct btrfs_disk_key disk_key;
3109 struct btrfs_key found_key;
3110
3111 for (i = 0; i < nr; i++) {
3112 if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3113 BTRFS_LEAF_DATA_SIZE(root)) {
3114 break;
3115 nr = i;
3116 }
3117 total_data += data_size[i];
3118 total_size += data_size[i] + sizeof(struct btrfs_item);
3119 }
3120 BUG_ON(nr == 0);
3121
3122 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3123 if (ret == 0)
3124 return -EEXIST;
3125 if (ret < 0)
3126 goto out;
3127
3128 leaf = path->nodes[0];
3129
3130 nritems = btrfs_header_nritems(leaf);
3131 data_end = leaf_data_end(root, leaf);
3132
3133 if (btrfs_leaf_free_space(root, leaf) < total_size) {
3134 for (i = nr; i >= 0; i--) {
3135 total_data -= data_size[i];
3136 total_size -= data_size[i] + sizeof(struct btrfs_item);
3137 if (total_size < btrfs_leaf_free_space(root, leaf))
3138 break;
3139 }
3140 nr = i;
3141 }
3142
3143 slot = path->slots[0];
3144 BUG_ON(slot < 0);
3145
3146 if (slot != nritems) {
3147 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3148
3149 item = btrfs_item_nr(leaf, slot);
3150 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3151
3152 /* figure out how many keys we can insert in here */
3153 total_data = data_size[0];
3154 for (i = 1; i < nr; i++) {
3155 if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3156 break;
3157 total_data += data_size[i];
3158 }
3159 nr = i;
3160
3161 if (old_data < data_end) {
3162 btrfs_print_leaf(root, leaf);
3163 printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3164 slot, old_data, data_end);
3165 BUG_ON(1);
3166 }
3167 /*
3168 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3169 */
3170 /* first correct the data pointers */
3171 WARN_ON(leaf->map_token);
3172 for (i = slot; i < nritems; i++) {
3173 u32 ioff;
3174
3175 item = btrfs_item_nr(leaf, i);
3176 if (!leaf->map_token) {
3177 map_extent_buffer(leaf, (unsigned long)item,
3178 sizeof(struct btrfs_item),
3179 &leaf->map_token, &leaf->kaddr,
3180 &leaf->map_start, &leaf->map_len,
3181 KM_USER1);
3182 }
3183
3184 ioff = btrfs_item_offset(leaf, item);
3185 btrfs_set_item_offset(leaf, item, ioff - total_data);
3186 }
3187 if (leaf->map_token) {
3188 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3189 leaf->map_token = NULL;
3190 }
3191
3192 /* shift the items */
3193 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3194 btrfs_item_nr_offset(slot),
3195 (nritems - slot) * sizeof(struct btrfs_item));
3196
3197 /* shift the data */
3198 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3199 data_end - total_data, btrfs_leaf_data(leaf) +
3200 data_end, old_data - data_end);
3201 data_end = old_data;
3202 } else {
3203 /*
3204 * this sucks but it has to be done, if we are inserting at
3205 * the end of the leaf only insert 1 of the items, since we
3206 * have no way of knowing whats on the next leaf and we'd have
3207 * to drop our current locks to figure it out
3208 */
3209 nr = 1;
3210 }
3211
3212 /* setup the item for the new data */
3213 for (i = 0; i < nr; i++) {
3214 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3215 btrfs_set_item_key(leaf, &disk_key, slot + i);
3216 item = btrfs_item_nr(leaf, slot + i);
3217 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3218 data_end -= data_size[i];
3219 btrfs_set_item_size(leaf, item, data_size[i]);
3220 }
3221 btrfs_set_header_nritems(leaf, nritems + nr);
3222 btrfs_mark_buffer_dirty(leaf);
3223
3224 ret = 0;
3225 if (slot == 0) {
3226 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3227 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3228 }
3229
3230 if (btrfs_leaf_free_space(root, leaf) < 0) {
3231 btrfs_print_leaf(root, leaf);
3232 BUG();
3233 }
3234out:
3235 if (!ret)
3236 ret = nr;
3237 return ret;
3238}
3239
3240/*
3241 * Given a key and some data, insert items into the tree.
3242 * This does all the path init required, making room in the tree if needed.
3243 */
3244int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3245 struct btrfs_root *root,
3246 struct btrfs_path *path,
3247 struct btrfs_key *cpu_key, u32 *data_size,
3248 int nr)
3249{
3250 struct extent_buffer *leaf;
3251 struct btrfs_item *item;
3252 int ret = 0;
3253 int slot;
3254 int slot_orig;
3255 int i;
3256 u32 nritems;
3257 u32 total_size = 0;
3258 u32 total_data = 0;
3259 unsigned int data_end;
3260 struct btrfs_disk_key disk_key;
3261
3262 for (i = 0; i < nr; i++)
3263 total_data += data_size[i];
3264
3265 total_size = total_data + (nr * sizeof(struct btrfs_item));
3266 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3267 if (ret == 0)
3268 return -EEXIST;
3269 if (ret < 0)
3270 goto out;
3271
3272 slot_orig = path->slots[0];
3273 leaf = path->nodes[0];
3274
3275 nritems = btrfs_header_nritems(leaf);
3276 data_end = leaf_data_end(root, leaf);
3277
3278 if (btrfs_leaf_free_space(root, leaf) < total_size) {
3279 btrfs_print_leaf(root, leaf);
3280 printk(KERN_CRIT "not enough freespace need %u have %d\n",
3281 total_size, btrfs_leaf_free_space(root, leaf));
3282 BUG();
3283 }
3284
3285 slot = path->slots[0];
3286 BUG_ON(slot < 0);
3287
3288 if (slot != nritems) {
3289 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3290
3291 if (old_data < data_end) {
3292 btrfs_print_leaf(root, leaf);
3293 printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3294 slot, old_data, data_end);
3295 BUG_ON(1);
3296 }
3297 /*
3298 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3299 */
3300 /* first correct the data pointers */
3301 WARN_ON(leaf->map_token);
3302 for (i = slot; i < nritems; i++) {
3303 u32 ioff;
3304
3305 item = btrfs_item_nr(leaf, i);
3306 if (!leaf->map_token) {
3307 map_extent_buffer(leaf, (unsigned long)item,
3308 sizeof(struct btrfs_item),
3309 &leaf->map_token, &leaf->kaddr,
3310 &leaf->map_start, &leaf->map_len,
3311 KM_USER1);
3312 }
3313
3314 ioff = btrfs_item_offset(leaf, item);
3315 btrfs_set_item_offset(leaf, item, ioff - total_data);
3316 }
3317 if (leaf->map_token) {
3318 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3319 leaf->map_token = NULL;
3320 }
3321
3322 /* shift the items */
3323 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3324 btrfs_item_nr_offset(slot),
3325 (nritems - slot) * sizeof(struct btrfs_item));
3326
3327 /* shift the data */
3328 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3329 data_end - total_data, btrfs_leaf_data(leaf) +
3330 data_end, old_data - data_end);
3331 data_end = old_data;
3332 }
3333
3334 /* setup the item for the new data */
3335 for (i = 0; i < nr; i++) {
3336 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3337 btrfs_set_item_key(leaf, &disk_key, slot + i);
3338 item = btrfs_item_nr(leaf, slot + i);
3339 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3340 data_end -= data_size[i];
3341 btrfs_set_item_size(leaf, item, data_size[i]);
3342 }
3343 btrfs_set_header_nritems(leaf, nritems + nr);
3344 btrfs_mark_buffer_dirty(leaf);
3345
3346 ret = 0;
3347 if (slot == 0) {
3348 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3349 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3350 }
3351
3352 if (btrfs_leaf_free_space(root, leaf) < 0) {
3353 btrfs_print_leaf(root, leaf);
3354 BUG();
3355 }
3356out:
3357 return ret;
3358}
3359
3360/*
3361 * Given a key and some data, insert an item into the tree.
3362 * This does all the path init required, making room in the tree if needed.
3363 */
3364int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3365 *root, struct btrfs_key *cpu_key, void *data, u32
3366 data_size)
3367{
3368 int ret = 0;
3369 struct btrfs_path *path;
3370 struct extent_buffer *leaf;
3371 unsigned long ptr;
3372
3373 path = btrfs_alloc_path();
3374 BUG_ON(!path);
3375 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3376 if (!ret) {
3377 leaf = path->nodes[0];
3378 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3379 write_extent_buffer(leaf, data, ptr, data_size);
3380 btrfs_mark_buffer_dirty(leaf);
3381 }
3382 btrfs_free_path(path);
3383 return ret;
3384}
3385
3386/*
3387 * delete the pointer from a given node.
3388 *
3389 * the tree should have been previously balanced so the deletion does not
3390 * empty a node.
3391 */
3392static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3393 struct btrfs_path *path, int level, int slot)
3394{
3395 struct extent_buffer *parent = path->nodes[level];
3396 u32 nritems;
3397 int ret = 0;
3398 int wret;
3399
3400 nritems = btrfs_header_nritems(parent);
3401 if (slot != nritems - 1) {
3402 memmove_extent_buffer(parent,
3403 btrfs_node_key_ptr_offset(slot),
3404 btrfs_node_key_ptr_offset(slot + 1),
3405 sizeof(struct btrfs_key_ptr) *
3406 (nritems - slot - 1));
3407 }
3408 nritems--;
3409 btrfs_set_header_nritems(parent, nritems);
3410 if (nritems == 0 && parent == root->node) {
3411 BUG_ON(btrfs_header_level(root->node) != 1);
3412 /* just turn the root into a leaf and break */
3413 btrfs_set_header_level(root->node, 0);
3414 } else if (slot == 0) {
3415 struct btrfs_disk_key disk_key;
3416
3417 btrfs_node_key(parent, &disk_key, 0);
3418 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
3419 if (wret)
3420 ret = wret;
3421 }
3422 btrfs_mark_buffer_dirty(parent);
3423 return ret;
3424}
3425
3426/*
3427 * a helper function to delete the leaf pointed to by path->slots[1] and
3428 * path->nodes[1]. bytenr is the node block pointer, but since the callers
3429 * already know it, it is faster to have them pass it down than to
3430 * read it out of the node again.
3431 *
3432 * This deletes the pointer in path->nodes[1] and frees the leaf
3433 * block extent. zero is returned if it all worked out, < 0 otherwise.
3434 *
3435 * The path must have already been setup for deleting the leaf, including
3436 * all the proper balancing. path->nodes[1] must be locked.
3437 */
3438noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3439 struct btrfs_root *root,
3440 struct btrfs_path *path, u64 bytenr)
3441{
3442 int ret;
3443 u64 root_gen = btrfs_header_generation(path->nodes[1]);
3444
3445 ret = del_ptr(trans, root, path, 1, path->slots[1]);
3446 if (ret)
3447 return ret;
3448
3449 ret = btrfs_free_extent(trans, root, bytenr,
3450 btrfs_level_size(root, 0),
3451 path->nodes[1]->start,
3452 btrfs_header_owner(path->nodes[1]),
3453 root_gen, 0, 1);
3454 return ret;
3455}
3456/*
3457 * delete the item at the leaf level in path. If that empties
3458 * the leaf, remove it from the tree
3459 */
3460int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3461 struct btrfs_path *path, int slot, int nr)
3462{
3463 struct extent_buffer *leaf;
3464 struct btrfs_item *item;
3465 int last_off;
3466 int dsize = 0;
3467 int ret = 0;
3468 int wret;
3469 int i;
3470 u32 nritems;
3471
3472 leaf = path->nodes[0];
3473 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3474
3475 for (i = 0; i < nr; i++)
3476 dsize += btrfs_item_size_nr(leaf, slot + i);
3477
3478 nritems = btrfs_header_nritems(leaf);
3479
3480 if (slot + nr != nritems) {
3481 int data_end = leaf_data_end(root, leaf);
3482
3483 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3484 data_end + dsize,
3485 btrfs_leaf_data(leaf) + data_end,
3486 last_off - data_end);
3487
3488 for (i = slot + nr; i < nritems; i++) {
3489 u32 ioff;
3490
3491 item = btrfs_item_nr(leaf, i);
3492 if (!leaf->map_token) {
3493 map_extent_buffer(leaf, (unsigned long)item,
3494 sizeof(struct btrfs_item),
3495 &leaf->map_token, &leaf->kaddr,
3496 &leaf->map_start, &leaf->map_len,
3497 KM_USER1);
3498 }
3499 ioff = btrfs_item_offset(leaf, item);
3500 btrfs_set_item_offset(leaf, item, ioff + dsize);
3501 }
3502
3503 if (leaf->map_token) {
3504 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3505 leaf->map_token = NULL;
3506 }
3507
3508 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3509 btrfs_item_nr_offset(slot + nr),
3510 sizeof(struct btrfs_item) *
3511 (nritems - slot - nr));
3512 }
3513 btrfs_set_header_nritems(leaf, nritems - nr);
3514 nritems -= nr;
3515
3516 /* delete the leaf if we've emptied it */
3517 if (nritems == 0) {
3518 if (leaf == root->node) {
3519 btrfs_set_header_level(leaf, 0);
3520 } else {
3521 ret = btrfs_del_leaf(trans, root, path, leaf->start);
3522 BUG_ON(ret);
3523 }
3524 } else {
3525 int used = leaf_space_used(leaf, 0, nritems);
3526 if (slot == 0) {
3527 struct btrfs_disk_key disk_key;
3528
3529 btrfs_item_key(leaf, &disk_key, 0);
3530 wret = fixup_low_keys(trans, root, path,
3531 &disk_key, 1);
3532 if (wret)
3533 ret = wret;
3534 }
3535
3536 /* delete the leaf if it is mostly empty */
3537 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
3538 /* push_leaf_left fixes the path.
3539 * make sure the path still points to our leaf
3540 * for possible call to del_ptr below
3541 */
3542 slot = path->slots[1];
3543 extent_buffer_get(leaf);
3544
3545 wret = push_leaf_left(trans, root, path, 1, 1);
3546 if (wret < 0 && wret != -ENOSPC)
3547 ret = wret;
3548
3549 if (path->nodes[0] == leaf &&
3550 btrfs_header_nritems(leaf)) {
3551 wret = push_leaf_right(trans, root, path, 1, 1);
3552 if (wret < 0 && wret != -ENOSPC)
3553 ret = wret;
3554 }
3555
3556 if (btrfs_header_nritems(leaf) == 0) {
3557 path->slots[1] = slot;
3558 ret = btrfs_del_leaf(trans, root, path,
3559 leaf->start);
3560 BUG_ON(ret);
3561 free_extent_buffer(leaf);
3562 } else {
3563 /* if we're still in the path, make sure
3564 * we're dirty. Otherwise, one of the
3565 * push_leaf functions must have already
3566 * dirtied this buffer
3567 */
3568 if (path->nodes[0] == leaf)
3569 btrfs_mark_buffer_dirty(leaf);
3570 free_extent_buffer(leaf);
3571 }
3572 } else {
3573 btrfs_mark_buffer_dirty(leaf);
3574 }
3575 }
3576 return ret;
3577}
3578
3579/*
3580 * search the tree again to find a leaf with lesser keys
3581 * returns 0 if it found something or 1 if there are no lesser leaves.
3582 * returns < 0 on io errors.
3583 *
3584 * This may release the path, and so you may lose any locks held at the
3585 * time you call it.
3586 */
3587int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3588{
3589 struct btrfs_key key;
3590 struct btrfs_disk_key found_key;
3591 int ret;
3592
3593 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3594
3595 if (key.offset > 0)
3596 key.offset--;
3597 else if (key.type > 0)
3598 key.type--;
3599 else if (key.objectid > 0)
3600 key.objectid--;
3601 else
3602 return 1;
3603
3604 btrfs_release_path(root, path);
3605 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3606 if (ret < 0)
3607 return ret;
3608 btrfs_item_key(path->nodes[0], &found_key, 0);
3609 ret = comp_keys(&found_key, &key);
3610 if (ret < 0)
3611 return 0;
3612 return 1;
3613}
3614
3615/*
3616 * A helper function to walk down the tree starting at min_key, and looking
3617 * for nodes or leaves that are either in cache or have a minimum
3618 * transaction id. This is used by the btree defrag code, and tree logging
3619 *
3620 * This does not cow, but it does stuff the starting key it finds back
3621 * into min_key, so you can call btrfs_search_slot with cow=1 on the
3622 * key and get a writable path.
3623 *
3624 * This does lock as it descends, and path->keep_locks should be set
3625 * to 1 by the caller.
3626 *
3627 * This honors path->lowest_level to prevent descent past a given level
3628 * of the tree.
3629 *
3630 * min_trans indicates the oldest transaction that you are interested
3631 * in walking through. Any nodes or leaves older than min_trans are
3632 * skipped over (without reading them).
3633 *
3634 * returns zero if something useful was found, < 0 on error and 1 if there
3635 * was nothing in the tree that matched the search criteria.
3636 */
3637int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3638 struct btrfs_key *max_key,
3639 struct btrfs_path *path, int cache_only,
3640 u64 min_trans)
3641{
3642 struct extent_buffer *cur;
3643 struct btrfs_key found_key;
3644 int slot;
3645 int sret;
3646 u32 nritems;
3647 int level;
3648 int ret = 1;
3649
3650 WARN_ON(!path->keep_locks);
3651again:
3652 cur = btrfs_lock_root_node(root);
3653 level = btrfs_header_level(cur);
3654 WARN_ON(path->nodes[level]);
3655 path->nodes[level] = cur;
3656 path->locks[level] = 1;
3657
3658 if (btrfs_header_generation(cur) < min_trans) {
3659 ret = 1;
3660 goto out;
3661 }
3662 while (1) {
3663 nritems = btrfs_header_nritems(cur);
3664 level = btrfs_header_level(cur);
3665 sret = bin_search(cur, min_key, level, &slot);
3666
3667 /* at the lowest level, we're done, setup the path and exit */
3668 if (level == path->lowest_level) {
3669 if (slot >= nritems)
3670 goto find_next_key;
3671 ret = 0;
3672 path->slots[level] = slot;
3673 btrfs_item_key_to_cpu(cur, &found_key, slot);
3674 goto out;
3675 }
3676 if (sret && slot > 0)
3677 slot--;
3678 /*
3679 * check this node pointer against the cache_only and
3680 * min_trans parameters. If it isn't in cache or is too
3681 * old, skip to the next one.
3682 */
3683 while (slot < nritems) {
3684 u64 blockptr;
3685 u64 gen;
3686 struct extent_buffer *tmp;
3687 struct btrfs_disk_key disk_key;
3688
3689 blockptr = btrfs_node_blockptr(cur, slot);
3690 gen = btrfs_node_ptr_generation(cur, slot);
3691 if (gen < min_trans) {
3692 slot++;
3693 continue;
3694 }
3695 if (!cache_only)
3696 break;
3697
3698 if (max_key) {
3699 btrfs_node_key(cur, &disk_key, slot);
3700 if (comp_keys(&disk_key, max_key) >= 0) {
3701 ret = 1;
3702 goto out;
3703 }
3704 }
3705
3706 tmp = btrfs_find_tree_block(root, blockptr,
3707 btrfs_level_size(root, level - 1));
3708
3709 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3710 free_extent_buffer(tmp);
3711 break;
3712 }
3713 if (tmp)
3714 free_extent_buffer(tmp);
3715 slot++;
3716 }
3717find_next_key:
3718 /*
3719 * we didn't find a candidate key in this node, walk forward
3720 * and find another one
3721 */
3722 if (slot >= nritems) {
3723 path->slots[level] = slot;
3724 sret = btrfs_find_next_key(root, path, min_key, level,
3725 cache_only, min_trans);
3726 if (sret == 0) {
3727 btrfs_release_path(root, path);
3728 goto again;
3729 } else {
3730 goto out;
3731 }
3732 }
3733 /* save our key for returning back */
3734 btrfs_node_key_to_cpu(cur, &found_key, slot);
3735 path->slots[level] = slot;
3736 if (level == path->lowest_level) {
3737 ret = 0;
3738 unlock_up(path, level, 1);
3739 goto out;
3740 }
3741 cur = read_node_slot(root, cur, slot);
3742
3743 btrfs_tree_lock(cur);
3744 path->locks[level - 1] = 1;
3745 path->nodes[level - 1] = cur;
3746 unlock_up(path, level, 1);
3747 }
3748out:
3749 if (ret == 0)
3750 memcpy(min_key, &found_key, sizeof(found_key));
3751 return ret;
3752}
3753
3754/*
3755 * this is similar to btrfs_next_leaf, but does not try to preserve
3756 * and fixup the path. It looks for and returns the next key in the
3757 * tree based on the current path and the cache_only and min_trans
3758 * parameters.
3759 *
3760 * 0 is returned if another key is found, < 0 if there are any errors
3761 * and 1 is returned if there are no higher keys in the tree
3762 *
3763 * path->keep_locks should be set to 1 on the search made before
3764 * calling this function.
3765 */
3766int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3767 struct btrfs_key *key, int lowest_level,
3768 int cache_only, u64 min_trans)
3769{
3770 int level = lowest_level;
3771 int slot;
3772 struct extent_buffer *c;
3773
3774 WARN_ON(!path->keep_locks);
3775 while (level < BTRFS_MAX_LEVEL) {
3776 if (!path->nodes[level])
3777 return 1;
3778
3779 slot = path->slots[level] + 1;
3780 c = path->nodes[level];
3781next:
3782 if (slot >= btrfs_header_nritems(c)) {
3783 level++;
3784 if (level == BTRFS_MAX_LEVEL)
3785 return 1;
3786 continue;
3787 }
3788 if (level == 0)
3789 btrfs_item_key_to_cpu(c, key, slot);
3790 else {
3791 u64 blockptr = btrfs_node_blockptr(c, slot);
3792 u64 gen = btrfs_node_ptr_generation(c, slot);
3793
3794 if (cache_only) {
3795 struct extent_buffer *cur;
3796 cur = btrfs_find_tree_block(root, blockptr,
3797 btrfs_level_size(root, level - 1));
3798 if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
3799 slot++;
3800 if (cur)
3801 free_extent_buffer(cur);
3802 goto next;
3803 }
3804 free_extent_buffer(cur);
3805 }
3806 if (gen < min_trans) {
3807 slot++;
3808 goto next;
3809 }
3810 btrfs_node_key_to_cpu(c, key, slot);
3811 }
3812 return 0;
3813 }
3814 return 1;
3815}
3816
3817/*
3818 * search the tree again to find a leaf with greater keys
3819 * returns 0 if it found something or 1 if there are no greater leaves.
3820 * returns < 0 on io errors.
3821 */
3822int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
3823{
3824 int slot;
3825 int level = 1;
3826 struct extent_buffer *c;
3827 struct extent_buffer *next = NULL;
3828 struct btrfs_key key;
3829 u32 nritems;
3830 int ret;
3831
3832 nritems = btrfs_header_nritems(path->nodes[0]);
3833 if (nritems == 0)
3834 return 1;
3835
3836 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
3837
3838 btrfs_release_path(root, path);
3839 path->keep_locks = 1;
3840 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3841 path->keep_locks = 0;
3842
3843 if (ret < 0)
3844 return ret;
3845
3846 nritems = btrfs_header_nritems(path->nodes[0]);
3847 /*
3848 * by releasing the path above we dropped all our locks. A balance
3849 * could have added more items next to the key that used to be
3850 * at the very end of the block. So, check again here and
3851 * advance the path if there are now more items available.
3852 */
3853 if (nritems > 0 && path->slots[0] < nritems - 1) {
3854 path->slots[0]++;
3855 goto done;
3856 }
3857
3858 while (level < BTRFS_MAX_LEVEL) {
3859 if (!path->nodes[level])
3860 return 1;
3861
3862 slot = path->slots[level] + 1;
3863 c = path->nodes[level];
3864 if (slot >= btrfs_header_nritems(c)) {
3865 level++;
3866 if (level == BTRFS_MAX_LEVEL)
3867 return 1;
3868 continue;
3869 }
3870
3871 if (next) {
3872 btrfs_tree_unlock(next);
3873 free_extent_buffer(next);
3874 }
3875
3876 if (level == 1 && (path->locks[1] || path->skip_locking) &&
3877 path->reada)
3878 reada_for_search(root, path, level, slot, 0);
3879
3880 next = read_node_slot(root, c, slot);
3881 if (!path->skip_locking) {
3882 WARN_ON(!btrfs_tree_locked(c));
3883 btrfs_tree_lock(next);
3884 }
3885 break;
3886 }
3887 path->slots[level] = slot;
3888 while (1) {
3889 level--;
3890 c = path->nodes[level];
3891 if (path->locks[level])
3892 btrfs_tree_unlock(c);
3893 free_extent_buffer(c);
3894 path->nodes[level] = next;
3895 path->slots[level] = 0;
3896 if (!path->skip_locking)
3897 path->locks[level] = 1;
3898 if (!level)
3899 break;
3900 if (level == 1 && path->locks[1] && path->reada)
3901 reada_for_search(root, path, level, slot, 0);
3902 next = read_node_slot(root, next, 0);
3903 if (!path->skip_locking) {
3904 WARN_ON(!btrfs_tree_locked(path->nodes[level]));
3905 btrfs_tree_lock(next);
3906 }
3907 }
3908done:
3909 unlock_up(path, 0, 1);
3910 return 0;
3911}
3912
3913/*
3914 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
3915 * searching until it gets past min_objectid or finds an item of 'type'
3916 *
3917 * returns 0 if something is found, 1 if nothing was found and < 0 on error
3918 */
3919int btrfs_previous_item(struct btrfs_root *root,
3920 struct btrfs_path *path, u64 min_objectid,
3921 int type)
3922{
3923 struct btrfs_key found_key;
3924 struct extent_buffer *leaf;
3925 u32 nritems;
3926 int ret;
3927
3928 while (1) {
3929 if (path->slots[0] == 0) {
3930 ret = btrfs_prev_leaf(root, path);
3931 if (ret != 0)
3932 return ret;
3933 } else {
3934 path->slots[0]--;
3935 }
3936 leaf = path->nodes[0];
3937 nritems = btrfs_header_nritems(leaf);
3938 if (nritems == 0)
3939 return 1;
3940 if (path->slots[0] == nritems)
3941 path->slots[0]--;
3942
3943 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3944 if (found_key.type == type)
3945 return 0;
3946 if (found_key.objectid < min_objectid)
3947 break;
3948 if (found_key.objectid == min_objectid &&
3949 found_key.type < type)
3950 break;
3951 }
3952 return 1;
3953}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
new file mode 100644
index 000000000000..eee060f88113
--- /dev/null
+++ b/fs/btrfs/ctree.h
@@ -0,0 +1,2129 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_CTREE__
20#define __BTRFS_CTREE__
21
22#include <linux/version.h>
23#include <linux/mm.h>
24#include <linux/highmem.h>
25#include <linux/fs.h>
26#include <linux/completion.h>
27#include <linux/backing-dev.h>
28#include <linux/wait.h>
29#include <asm/kmap_types.h>
30#include "extent_io.h"
31#include "extent_map.h"
32#include "async-thread.h"
33
34struct btrfs_trans_handle;
35struct btrfs_transaction;
36extern struct kmem_cache *btrfs_trans_handle_cachep;
37extern struct kmem_cache *btrfs_transaction_cachep;
38extern struct kmem_cache *btrfs_bit_radix_cachep;
39extern struct kmem_cache *btrfs_path_cachep;
40struct btrfs_ordered_sum;
41
42#define BTRFS_MAGIC "_BHRfS_M"
43
44#define BTRFS_ACL_NOT_CACHED ((void *)-1)
45
46#ifdef CONFIG_LOCKDEP
47# define BTRFS_MAX_LEVEL 7
48#else
49# define BTRFS_MAX_LEVEL 8
50#endif
51
52/* holds pointers to all of the tree roots */
53#define BTRFS_ROOT_TREE_OBJECTID 1ULL
54
55/* stores information about which extents are in use, and reference counts */
56#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
57
58/*
59 * chunk tree stores translations from logical -> physical block numbering
60 * the super block points to the chunk tree
61 */
62#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
63
64/*
65 * stores information about which areas of a given device are in use.
66 * one per device. The tree of tree roots points to the device tree
67 */
68#define BTRFS_DEV_TREE_OBJECTID 4ULL
69
70/* one per subvolume, storing files and directories */
71#define BTRFS_FS_TREE_OBJECTID 5ULL
72
73/* directory objectid inside the root tree */
74#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
75
76/* holds checksums of all the data extents */
77#define BTRFS_CSUM_TREE_OBJECTID 7ULL
78
79/* orhpan objectid for tracking unlinked/truncated files */
80#define BTRFS_ORPHAN_OBJECTID -5ULL
81
82/* does write ahead logging to speed up fsyncs */
83#define BTRFS_TREE_LOG_OBJECTID -6ULL
84#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
85
86/* for space balancing */
87#define BTRFS_TREE_RELOC_OBJECTID -8ULL
88#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
89
90/*
91 * extent checksums all have this objectid
92 * this allows them to share the logging tree
93 * for fsyncs
94 */
95#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
96
97/* dummy objectid represents multiple objectids */
98#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
99
100/*
101 * All files have objectids in this range.
102 */
103#define BTRFS_FIRST_FREE_OBJECTID 256ULL
104#define BTRFS_LAST_FREE_OBJECTID -256ULL
105#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
106
107
108/*
109 * the device items go into the chunk tree. The key is in the form
110 * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
111 */
112#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
113
114/*
115 * we can actually store much bigger names, but lets not confuse the rest
116 * of linux
117 */
118#define BTRFS_NAME_LEN 255
119
120/* 32 bytes in various csum fields */
121#define BTRFS_CSUM_SIZE 32
122
123/* csum types */
124#define BTRFS_CSUM_TYPE_CRC32 0
125
126static int btrfs_csum_sizes[] = { 4, 0 };
127
128/* four bytes for CRC32 */
129#define BTRFS_EMPTY_DIR_SIZE 0
130
131#define BTRFS_FT_UNKNOWN 0
132#define BTRFS_FT_REG_FILE 1
133#define BTRFS_FT_DIR 2
134#define BTRFS_FT_CHRDEV 3
135#define BTRFS_FT_BLKDEV 4
136#define BTRFS_FT_FIFO 5
137#define BTRFS_FT_SOCK 6
138#define BTRFS_FT_SYMLINK 7
139#define BTRFS_FT_XATTR 8
140#define BTRFS_FT_MAX 9
141
142/*
143 * the key defines the order in the tree, and so it also defines (optimal)
144 * block layout. objectid corresonds to the inode number. The flags
145 * tells us things about the object, and is a kind of stream selector.
146 * so for a given inode, keys with flags of 1 might refer to the inode
147 * data, flags of 2 may point to file data in the btree and flags == 3
148 * may point to extents.
149 *
150 * offset is the starting byte offset for this key in the stream.
151 *
152 * btrfs_disk_key is in disk byte order. struct btrfs_key is always
153 * in cpu native order. Otherwise they are identical and their sizes
154 * should be the same (ie both packed)
155 */
156struct btrfs_disk_key {
157 __le64 objectid;
158 u8 type;
159 __le64 offset;
160} __attribute__ ((__packed__));
161
162struct btrfs_key {
163 u64 objectid;
164 u8 type;
165 u64 offset;
166} __attribute__ ((__packed__));
167
168struct btrfs_mapping_tree {
169 struct extent_map_tree map_tree;
170};
171
172#define BTRFS_UUID_SIZE 16
173struct btrfs_dev_item {
174 /* the internal btrfs device id */
175 __le64 devid;
176
177 /* size of the device */
178 __le64 total_bytes;
179
180 /* bytes used */
181 __le64 bytes_used;
182
183 /* optimal io alignment for this device */
184 __le32 io_align;
185
186 /* optimal io width for this device */
187 __le32 io_width;
188
189 /* minimal io size for this device */
190 __le32 sector_size;
191
192 /* type and info about this device */
193 __le64 type;
194
195 /* expected generation for this device */
196 __le64 generation;
197
198 /*
199 * starting byte of this partition on the device,
200 * to allowr for stripe alignment in the future
201 */
202 __le64 start_offset;
203
204 /* grouping information for allocation decisions */
205 __le32 dev_group;
206
207 /* seek speed 0-100 where 100 is fastest */
208 u8 seek_speed;
209
210 /* bandwidth 0-100 where 100 is fastest */
211 u8 bandwidth;
212
213 /* btrfs generated uuid for this device */
214 u8 uuid[BTRFS_UUID_SIZE];
215
216 /* uuid of FS who owns this device */
217 u8 fsid[BTRFS_UUID_SIZE];
218} __attribute__ ((__packed__));
219
220struct btrfs_stripe {
221 __le64 devid;
222 __le64 offset;
223 u8 dev_uuid[BTRFS_UUID_SIZE];
224} __attribute__ ((__packed__));
225
226struct btrfs_chunk {
227 /* size of this chunk in bytes */
228 __le64 length;
229
230 /* objectid of the root referencing this chunk */
231 __le64 owner;
232
233 __le64 stripe_len;
234 __le64 type;
235
236 /* optimal io alignment for this chunk */
237 __le32 io_align;
238
239 /* optimal io width for this chunk */
240 __le32 io_width;
241
242 /* minimal io size for this chunk */
243 __le32 sector_size;
244
245 /* 2^16 stripes is quite a lot, a second limit is the size of a single
246 * item in the btree
247 */
248 __le16 num_stripes;
249
250 /* sub stripes only matter for raid10 */
251 __le16 sub_stripes;
252 struct btrfs_stripe stripe;
253 /* additional stripes go here */
254} __attribute__ ((__packed__));
255
256static inline unsigned long btrfs_chunk_item_size(int num_stripes)
257{
258 BUG_ON(num_stripes == 0);
259 return sizeof(struct btrfs_chunk) +
260 sizeof(struct btrfs_stripe) * (num_stripes - 1);
261}
262
263#define BTRFS_FSID_SIZE 16
264#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0)
265
266/*
267 * every tree block (leaf or node) starts with this header.
268 */
269struct btrfs_header {
270 /* these first four must match the super block */
271 u8 csum[BTRFS_CSUM_SIZE];
272 u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
273 __le64 bytenr; /* which block this node is supposed to live in */
274 __le64 flags;
275
276 /* allowed to be different from the super from here on down */
277 u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
278 __le64 generation;
279 __le64 owner;
280 __le32 nritems;
281 u8 level;
282} __attribute__ ((__packed__));
283
284#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
285 sizeof(struct btrfs_header)) / \
286 sizeof(struct btrfs_key_ptr))
287#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
288#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize))
289#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
290 sizeof(struct btrfs_item) - \
291 sizeof(struct btrfs_file_extent_item))
292
293#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
294
295/*
296 * this is a very generous portion of the super block, giving us
297 * room to translate 14 chunks with 3 stripes each.
298 */
299#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
300#define BTRFS_LABEL_SIZE 256
301
302/*
303 * the super block basically lists the main trees of the FS
304 * it currently lacks any block count etc etc
305 */
306struct btrfs_super_block {
307 u8 csum[BTRFS_CSUM_SIZE];
308 /* the first 4 fields must match struct btrfs_header */
309 u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
310 __le64 bytenr; /* this block number */
311 __le64 flags;
312
313 /* allowed to be different from the btrfs_header from here own down */
314 __le64 magic;
315 __le64 generation;
316 __le64 root;
317 __le64 chunk_root;
318 __le64 log_root;
319
320 /* this will help find the new super based on the log root */
321 __le64 log_root_transid;
322 __le64 total_bytes;
323 __le64 bytes_used;
324 __le64 root_dir_objectid;
325 __le64 num_devices;
326 __le32 sectorsize;
327 __le32 nodesize;
328 __le32 leafsize;
329 __le32 stripesize;
330 __le32 sys_chunk_array_size;
331 __le64 chunk_root_generation;
332 __le64 compat_flags;
333 __le64 compat_ro_flags;
334 __le64 incompat_flags;
335 __le16 csum_type;
336 u8 root_level;
337 u8 chunk_root_level;
338 u8 log_root_level;
339 struct btrfs_dev_item dev_item;
340
341 char label[BTRFS_LABEL_SIZE];
342
343 /* future expansion */
344 __le64 reserved[32];
345 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
346} __attribute__ ((__packed__));
347
348/*
349 * Compat flags that we support. If any incompat flags are set other than the
350 * ones specified below then we will fail to mount
351 */
352#define BTRFS_FEATURE_COMPAT_SUPP 0x0
353#define BTRFS_FEATURE_COMPAT_RO_SUPP 0x0
354#define BTRFS_FEATURE_INCOMPAT_SUPP 0x0
355
356/*
357 * A leaf is full of items. offset and size tell us where to find
358 * the item in the leaf (relative to the start of the data area)
359 */
360struct btrfs_item {
361 struct btrfs_disk_key key;
362 __le32 offset;
363 __le32 size;
364} __attribute__ ((__packed__));
365
366/*
367 * leaves have an item area and a data area:
368 * [item0, item1....itemN] [free space] [dataN...data1, data0]
369 *
370 * The data is separate from the items to get the keys closer together
371 * during searches.
372 */
373struct btrfs_leaf {
374 struct btrfs_header header;
375 struct btrfs_item items[];
376} __attribute__ ((__packed__));
377
378/*
379 * all non-leaf blocks are nodes, they hold only keys and pointers to
380 * other blocks
381 */
382struct btrfs_key_ptr {
383 struct btrfs_disk_key key;
384 __le64 blockptr;
385 __le64 generation;
386} __attribute__ ((__packed__));
387
388struct btrfs_node {
389 struct btrfs_header header;
390 struct btrfs_key_ptr ptrs[];
391} __attribute__ ((__packed__));
392
393/*
394 * btrfs_paths remember the path taken from the root down to the leaf.
395 * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
396 * to any other levels that are present.
397 *
398 * The slots array records the index of the item or block pointer
399 * used while walking the tree.
400 */
401struct btrfs_path {
402 struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
403 int slots[BTRFS_MAX_LEVEL];
404 /* if there is real range locking, this locks field will change */
405 int locks[BTRFS_MAX_LEVEL];
406 int reada;
407 /* keep some upper locks as we walk down */
408 int keep_locks;
409 int skip_locking;
410 int lowest_level;
411
412 /*
413 * set by btrfs_split_item, tells search_slot to keep all locks
414 * and to force calls to keep space in the nodes
415 */
416 int search_for_split;
417};
418
419/*
420 * items in the extent btree are used to record the objectid of the
421 * owner of the block and the number of references
422 */
423struct btrfs_extent_item {
424 __le32 refs;
425} __attribute__ ((__packed__));
426
427struct btrfs_extent_ref {
428 __le64 root;
429 __le64 generation;
430 __le64 objectid;
431 __le32 num_refs;
432} __attribute__ ((__packed__));
433
434/* dev extents record free space on individual devices. The owner
435 * field points back to the chunk allocation mapping tree that allocated
436 * the extent. The chunk tree uuid field is a way to double check the owner
437 */
438struct btrfs_dev_extent {
439 __le64 chunk_tree;
440 __le64 chunk_objectid;
441 __le64 chunk_offset;
442 __le64 length;
443 u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
444} __attribute__ ((__packed__));
445
446struct btrfs_inode_ref {
447 __le64 index;
448 __le16 name_len;
449 /* name goes here */
450} __attribute__ ((__packed__));
451
452struct btrfs_timespec {
453 __le64 sec;
454 __le32 nsec;
455} __attribute__ ((__packed__));
456
457typedef enum {
458 BTRFS_COMPRESS_NONE = 0,
459 BTRFS_COMPRESS_ZLIB = 1,
460 BTRFS_COMPRESS_LAST = 2,
461} btrfs_compression_type;
462
463/* we don't understand any encryption methods right now */
464typedef enum {
465 BTRFS_ENCRYPTION_NONE = 0,
466 BTRFS_ENCRYPTION_LAST = 1,
467} btrfs_encryption_type;
468
469struct btrfs_inode_item {
470 /* nfs style generation number */
471 __le64 generation;
472 /* transid that last touched this inode */
473 __le64 transid;
474 __le64 size;
475 __le64 nbytes;
476 __le64 block_group;
477 __le32 nlink;
478 __le32 uid;
479 __le32 gid;
480 __le32 mode;
481 __le64 rdev;
482 __le64 flags;
483
484 /* modification sequence number for NFS */
485 __le64 sequence;
486
487 /*
488 * a little future expansion, for more than this we can
489 * just grow the inode item and version it
490 */
491 __le64 reserved[4];
492 struct btrfs_timespec atime;
493 struct btrfs_timespec ctime;
494 struct btrfs_timespec mtime;
495 struct btrfs_timespec otime;
496} __attribute__ ((__packed__));
497
498struct btrfs_dir_log_item {
499 __le64 end;
500} __attribute__ ((__packed__));
501
502struct btrfs_dir_item {
503 struct btrfs_disk_key location;
504 __le64 transid;
505 __le16 data_len;
506 __le16 name_len;
507 u8 type;
508} __attribute__ ((__packed__));
509
510struct btrfs_root_item {
511 struct btrfs_inode_item inode;
512 __le64 generation;
513 __le64 root_dirid;
514 __le64 bytenr;
515 __le64 byte_limit;
516 __le64 bytes_used;
517 __le64 last_snapshot;
518 __le64 flags;
519 __le32 refs;
520 struct btrfs_disk_key drop_progress;
521 u8 drop_level;
522 u8 level;
523} __attribute__ ((__packed__));
524
525/*
526 * this is used for both forward and backward root refs
527 */
528struct btrfs_root_ref {
529 __le64 dirid;
530 __le64 sequence;
531 __le16 name_len;
532} __attribute__ ((__packed__));
533
534#define BTRFS_FILE_EXTENT_INLINE 0
535#define BTRFS_FILE_EXTENT_REG 1
536#define BTRFS_FILE_EXTENT_PREALLOC 2
537
538struct btrfs_file_extent_item {
539 /*
540 * transaction id that created this extent
541 */
542 __le64 generation;
543 /*
544 * max number of bytes to hold this extent in ram
545 * when we split a compressed extent we can't know how big
546 * each of the resulting pieces will be. So, this is
547 * an upper limit on the size of the extent in ram instead of
548 * an exact limit.
549 */
550 __le64 ram_bytes;
551
552 /*
553 * 32 bits for the various ways we might encode the data,
554 * including compression and encryption. If any of these
555 * are set to something a given disk format doesn't understand
556 * it is treated like an incompat flag for reading and writing,
557 * but not for stat.
558 */
559 u8 compression;
560 u8 encryption;
561 __le16 other_encoding; /* spare for later use */
562
563 /* are we inline data or a real extent? */
564 u8 type;
565
566 /*
567 * disk space consumed by the extent, checksum blocks are included
568 * in these numbers
569 */
570 __le64 disk_bytenr;
571 __le64 disk_num_bytes;
572 /*
573 * the logical offset in file blocks (no csums)
574 * this extent record is for. This allows a file extent to point
575 * into the middle of an existing extent on disk, sharing it
576 * between two snapshots (useful if some bytes in the middle of the
577 * extent have changed
578 */
579 __le64 offset;
580 /*
581 * the logical number of file blocks (no csums included). This
582 * always reflects the size uncompressed and without encoding.
583 */
584 __le64 num_bytes;
585
586} __attribute__ ((__packed__));
587
588struct btrfs_csum_item {
589 u8 csum;
590} __attribute__ ((__packed__));
591
592/* different types of block groups (and chunks) */
593#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
594#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
595#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
596#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
597#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
598#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
599#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
600
601struct btrfs_block_group_item {
602 __le64 used;
603 __le64 chunk_objectid;
604 __le64 flags;
605} __attribute__ ((__packed__));
606
607struct btrfs_space_info {
608 u64 flags;
609 u64 total_bytes;
610 u64 bytes_used;
611 u64 bytes_pinned;
612 u64 bytes_reserved;
613 u64 bytes_readonly;
614 int full;
615 int force_alloc;
616 struct list_head list;
617
618 /* for block groups in our same type */
619 struct list_head block_groups;
620 spinlock_t lock;
621 struct rw_semaphore groups_sem;
622};
623
624struct btrfs_free_space {
625 struct rb_node bytes_index;
626 struct rb_node offset_index;
627 u64 offset;
628 u64 bytes;
629};
630
631struct btrfs_block_group_cache {
632 struct btrfs_key key;
633 struct btrfs_block_group_item item;
634 spinlock_t lock;
635 struct mutex alloc_mutex;
636 struct mutex cache_mutex;
637 u64 pinned;
638 u64 reserved;
639 u64 flags;
640 int cached;
641 int ro;
642 int dirty;
643
644 struct btrfs_space_info *space_info;
645
646 /* free space cache stuff */
647 struct rb_root free_space_bytes;
648 struct rb_root free_space_offset;
649
650 /* block group cache stuff */
651 struct rb_node cache_node;
652
653 /* for block groups in the same raid type */
654 struct list_head list;
655
656 /* usage count */
657 atomic_t count;
658};
659
660struct btrfs_leaf_ref_tree {
661 struct rb_root root;
662 struct list_head list;
663 spinlock_t lock;
664};
665
666struct btrfs_device;
667struct btrfs_fs_devices;
668struct btrfs_fs_info {
669 u8 fsid[BTRFS_FSID_SIZE];
670 u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
671 struct btrfs_root *extent_root;
672 struct btrfs_root *tree_root;
673 struct btrfs_root *chunk_root;
674 struct btrfs_root *dev_root;
675 struct btrfs_root *fs_root;
676 struct btrfs_root *csum_root;
677
678 /* the log root tree is a directory of all the other log roots */
679 struct btrfs_root *log_root_tree;
680 struct radix_tree_root fs_roots_radix;
681
682 /* block group cache stuff */
683 spinlock_t block_group_cache_lock;
684 struct rb_root block_group_cache_tree;
685
686 struct extent_io_tree pinned_extents;
687 struct extent_io_tree pending_del;
688 struct extent_io_tree extent_ins;
689
690 /* logical->physical extent mapping */
691 struct btrfs_mapping_tree mapping_tree;
692
693 u64 generation;
694 u64 last_trans_committed;
695 u64 last_trans_new_blockgroup;
696 u64 open_ioctl_trans;
697 unsigned long mount_opt;
698 u64 max_extent;
699 u64 max_inline;
700 u64 alloc_start;
701 struct btrfs_transaction *running_transaction;
702 wait_queue_head_t transaction_throttle;
703 wait_queue_head_t transaction_wait;
704
705 wait_queue_head_t async_submit_wait;
706 wait_queue_head_t tree_log_wait;
707
708 struct btrfs_super_block super_copy;
709 struct btrfs_super_block super_for_commit;
710 struct block_device *__bdev;
711 struct super_block *sb;
712 struct inode *btree_inode;
713 struct backing_dev_info bdi;
714 spinlock_t hash_lock;
715 struct mutex trans_mutex;
716 struct mutex tree_log_mutex;
717 struct mutex transaction_kthread_mutex;
718 struct mutex cleaner_mutex;
719 struct mutex extent_ins_mutex;
720 struct mutex pinned_mutex;
721 struct mutex chunk_mutex;
722 struct mutex drop_mutex;
723 struct mutex volume_mutex;
724 struct mutex tree_reloc_mutex;
725 struct list_head trans_list;
726 struct list_head hashers;
727 struct list_head dead_roots;
728
729 atomic_t nr_async_submits;
730 atomic_t async_submit_draining;
731 atomic_t nr_async_bios;
732 atomic_t async_delalloc_pages;
733 atomic_t tree_log_writers;
734 atomic_t tree_log_commit;
735 unsigned long tree_log_batch;
736 u64 tree_log_transid;
737
738 /*
739 * this is used by the balancing code to wait for all the pending
740 * ordered extents
741 */
742 spinlock_t ordered_extent_lock;
743 struct list_head ordered_extents;
744 struct list_head delalloc_inodes;
745
746 /*
747 * there is a pool of worker threads for checksumming during writes
748 * and a pool for checksumming after reads. This is because readers
749 * can run with FS locks held, and the writers may be waiting for
750 * those locks. We don't want ordering in the pending list to cause
751 * deadlocks, and so the two are serviced separately.
752 *
753 * A third pool does submit_bio to avoid deadlocking with the other
754 * two
755 */
756 struct btrfs_workers workers;
757 struct btrfs_workers delalloc_workers;
758 struct btrfs_workers endio_workers;
759 struct btrfs_workers endio_meta_workers;
760 struct btrfs_workers endio_meta_write_workers;
761 struct btrfs_workers endio_write_workers;
762 struct btrfs_workers submit_workers;
763 /*
764 * fixup workers take dirty pages that didn't properly go through
765 * the cow mechanism and make them safe to write. It happens
766 * for the sys_munmap function call path
767 */
768 struct btrfs_workers fixup_workers;
769 struct task_struct *transaction_kthread;
770 struct task_struct *cleaner_kthread;
771 int thread_pool_size;
772
773 /* tree relocation relocated fields */
774 struct list_head dead_reloc_roots;
775 struct btrfs_leaf_ref_tree reloc_ref_tree;
776 struct btrfs_leaf_ref_tree shared_ref_tree;
777
778 struct kobject super_kobj;
779 struct completion kobj_unregister;
780 int do_barriers;
781 int closing;
782 int log_root_recovering;
783 atomic_t throttles;
784 atomic_t throttle_gen;
785
786 u64 total_pinned;
787 struct list_head dirty_cowonly_roots;
788
789 struct btrfs_fs_devices *fs_devices;
790 struct list_head space_info;
791 spinlock_t delalloc_lock;
792 spinlock_t new_trans_lock;
793 u64 delalloc_bytes;
794 u64 last_alloc;
795 u64 last_data_alloc;
796
797 spinlock_t ref_cache_lock;
798 u64 total_ref_cache_size;
799
800 u64 avail_data_alloc_bits;
801 u64 avail_metadata_alloc_bits;
802 u64 avail_system_alloc_bits;
803 u64 data_alloc_profile;
804 u64 metadata_alloc_profile;
805 u64 system_alloc_profile;
806
807 void *bdev_holder;
808};
809
810/*
811 * in ram representation of the tree. extent_root is used for all allocations
812 * and for the extent tree extent_root root.
813 */
814struct btrfs_dirty_root;
815struct btrfs_root {
816 struct extent_buffer *node;
817
818 /* the node lock is held while changing the node pointer */
819 spinlock_t node_lock;
820
821 struct extent_buffer *commit_root;
822 struct btrfs_leaf_ref_tree *ref_tree;
823 struct btrfs_leaf_ref_tree ref_tree_struct;
824 struct btrfs_dirty_root *dirty_root;
825 struct btrfs_root *log_root;
826 struct btrfs_root *reloc_root;
827
828 struct btrfs_root_item root_item;
829 struct btrfs_key root_key;
830 struct btrfs_fs_info *fs_info;
831 struct extent_io_tree dirty_log_pages;
832
833 struct kobject root_kobj;
834 struct completion kobj_unregister;
835 struct mutex objectid_mutex;
836 struct mutex log_mutex;
837
838 u64 objectid;
839 u64 last_trans;
840
841 /* data allocations are done in sectorsize units */
842 u32 sectorsize;
843
844 /* node allocations are done in nodesize units */
845 u32 nodesize;
846
847 /* leaf allocations are done in leafsize units */
848 u32 leafsize;
849
850 u32 stripesize;
851
852 u32 type;
853 u64 highest_inode;
854 u64 last_inode_alloc;
855 int ref_cows;
856 int track_dirty;
857 u64 defrag_trans_start;
858 struct btrfs_key defrag_progress;
859 struct btrfs_key defrag_max;
860 int defrag_running;
861 int defrag_level;
862 char *name;
863 int in_sysfs;
864
865 /* the dirty list is only used by non-reference counted roots */
866 struct list_head dirty_list;
867
868 spinlock_t list_lock;
869 struct list_head dead_list;
870 struct list_head orphan_list;
871
872 /*
873 * right now this just gets used so that a root has its own devid
874 * for stat. It may be used for more later
875 */
876 struct super_block anon_super;
877};
878
879/*
880
881 * inode items have the data typically returned from stat and store other
882 * info about object characteristics. There is one for every file and dir in
883 * the FS
884 */
885#define BTRFS_INODE_ITEM_KEY 1
886#define BTRFS_INODE_REF_KEY 12
887#define BTRFS_XATTR_ITEM_KEY 24
888#define BTRFS_ORPHAN_ITEM_KEY 48
889/* reserve 2-15 close to the inode for later flexibility */
890
891/*
892 * dir items are the name -> inode pointers in a directory. There is one
893 * for every name in a directory.
894 */
895#define BTRFS_DIR_LOG_ITEM_KEY 60
896#define BTRFS_DIR_LOG_INDEX_KEY 72
897#define BTRFS_DIR_ITEM_KEY 84
898#define BTRFS_DIR_INDEX_KEY 96
899/*
900 * extent data is for file data
901 */
902#define BTRFS_EXTENT_DATA_KEY 108
903
904/*
905 * extent csums are stored in a separate tree and hold csums for
906 * an entire extent on disk.
907 */
908#define BTRFS_EXTENT_CSUM_KEY 128
909
910/*
911 * root items point to tree roots. There are typically in the root
912 * tree used by the super block to find all the other trees
913 */
914#define BTRFS_ROOT_ITEM_KEY 132
915
916/*
917 * root backrefs tie subvols and snapshots to the directory entries that
918 * reference them
919 */
920#define BTRFS_ROOT_BACKREF_KEY 144
921
922/*
923 * root refs make a fast index for listing all of the snapshots and
924 * subvolumes referenced by a given root. They point directly to the
925 * directory item in the root that references the subvol
926 */
927#define BTRFS_ROOT_REF_KEY 156
928
929/*
930 * extent items are in the extent map tree. These record which blocks
931 * are used, and how many references there are to each block
932 */
933#define BTRFS_EXTENT_ITEM_KEY 168
934#define BTRFS_EXTENT_REF_KEY 180
935
936/*
937 * block groups give us hints into the extent allocation trees. Which
938 * blocks are free etc etc
939 */
940#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
941
942#define BTRFS_DEV_EXTENT_KEY 204
943#define BTRFS_DEV_ITEM_KEY 216
944#define BTRFS_CHUNK_ITEM_KEY 228
945
946/*
947 * string items are for debugging. They just store a short string of
948 * data in the FS
949 */
950#define BTRFS_STRING_ITEM_KEY 253
951
952#define BTRFS_MOUNT_NODATASUM (1 << 0)
953#define BTRFS_MOUNT_NODATACOW (1 << 1)
954#define BTRFS_MOUNT_NOBARRIER (1 << 2)
955#define BTRFS_MOUNT_SSD (1 << 3)
956#define BTRFS_MOUNT_DEGRADED (1 << 4)
957#define BTRFS_MOUNT_COMPRESS (1 << 5)
958
959#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
960#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
961#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
962 BTRFS_MOUNT_##opt)
963/*
964 * Inode flags
965 */
966#define BTRFS_INODE_NODATASUM (1 << 0)
967#define BTRFS_INODE_NODATACOW (1 << 1)
968#define BTRFS_INODE_READONLY (1 << 2)
969#define BTRFS_INODE_NOCOMPRESS (1 << 3)
970#define BTRFS_INODE_PREALLOC (1 << 4)
971#define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \
972 ~BTRFS_INODE_##flag)
973#define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \
974 BTRFS_INODE_##flag)
975#define btrfs_test_flag(inode, flag) (BTRFS_I(inode)->flags & \
976 BTRFS_INODE_##flag)
977/* some macros to generate set/get funcs for the struct fields. This
978 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
979 * one for u8:
980 */
981#define le8_to_cpu(v) (v)
982#define cpu_to_le8(v) (v)
983#define __le8 u8
984
985#define read_eb_member(eb, ptr, type, member, result) ( \
986 read_extent_buffer(eb, (char *)(result), \
987 ((unsigned long)(ptr)) + \
988 offsetof(type, member), \
989 sizeof(((type *)0)->member)))
990
991#define write_eb_member(eb, ptr, type, member, result) ( \
992 write_extent_buffer(eb, (char *)(result), \
993 ((unsigned long)(ptr)) + \
994 offsetof(type, member), \
995 sizeof(((type *)0)->member)))
996
997#ifndef BTRFS_SETGET_FUNCS
998#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
999u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
1000void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
1001#endif
1002
1003#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
1004static inline u##bits btrfs_##name(struct extent_buffer *eb) \
1005{ \
1006 type *p = kmap_atomic(eb->first_page, KM_USER0); \
1007 u##bits res = le##bits##_to_cpu(p->member); \
1008 kunmap_atomic(p, KM_USER0); \
1009 return res; \
1010} \
1011static inline void btrfs_set_##name(struct extent_buffer *eb, \
1012 u##bits val) \
1013{ \
1014 type *p = kmap_atomic(eb->first_page, KM_USER0); \
1015 p->member = cpu_to_le##bits(val); \
1016 kunmap_atomic(p, KM_USER0); \
1017}
1018
1019#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
1020static inline u##bits btrfs_##name(type *s) \
1021{ \
1022 return le##bits##_to_cpu(s->member); \
1023} \
1024static inline void btrfs_set_##name(type *s, u##bits val) \
1025{ \
1026 s->member = cpu_to_le##bits(val); \
1027}
1028
1029BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
1030BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
1031BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
1032BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
1033BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
1034BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item,
1035 start_offset, 64);
1036BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
1037BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
1038BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
1039BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
1040BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
1041BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64);
1042
1043BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
1044BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
1045 total_bytes, 64);
1046BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
1047 bytes_used, 64);
1048BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
1049 io_align, 32);
1050BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
1051 io_width, 32);
1052BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
1053 sector_size, 32);
1054BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
1055BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
1056 dev_group, 32);
1057BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
1058 seek_speed, 8);
1059BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
1060 bandwidth, 8);
1061BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
1062 generation, 64);
1063
1064static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
1065{
1066 return (char *)d + offsetof(struct btrfs_dev_item, uuid);
1067}
1068
1069static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
1070{
1071 return (char *)d + offsetof(struct btrfs_dev_item, fsid);
1072}
1073
1074BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
1075BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
1076BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
1077BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
1078BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
1079BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
1080BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
1081BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
1082BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
1083BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
1084BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
1085
1086static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
1087{
1088 return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
1089}
1090
1091BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
1092BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
1093BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
1094 stripe_len, 64);
1095BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
1096 io_align, 32);
1097BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
1098 io_width, 32);
1099BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
1100 sector_size, 32);
1101BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
1102BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
1103 num_stripes, 16);
1104BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
1105 sub_stripes, 16);
1106BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
1107BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
1108
1109static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
1110 int nr)
1111{
1112 unsigned long offset = (unsigned long)c;
1113 offset += offsetof(struct btrfs_chunk, stripe);
1114 offset += nr * sizeof(struct btrfs_stripe);
1115 return (struct btrfs_stripe *)offset;
1116}
1117
1118static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
1119{
1120 return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
1121}
1122
1123static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
1124 struct btrfs_chunk *c, int nr)
1125{
1126 return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
1127}
1128
1129static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
1130 struct btrfs_chunk *c, int nr,
1131 u64 val)
1132{
1133 btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
1134}
1135
1136static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
1137 struct btrfs_chunk *c, int nr)
1138{
1139 return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
1140}
1141
1142static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
1143 struct btrfs_chunk *c, int nr,
1144 u64 val)
1145{
1146 btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
1147}
1148
1149/* struct btrfs_block_group_item */
1150BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
1151 used, 64);
1152BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
1153 used, 64);
1154BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
1155 struct btrfs_block_group_item, chunk_objectid, 64);
1156
1157BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
1158 struct btrfs_block_group_item, chunk_objectid, 64);
1159BTRFS_SETGET_FUNCS(disk_block_group_flags,
1160 struct btrfs_block_group_item, flags, 64);
1161BTRFS_SETGET_STACK_FUNCS(block_group_flags,
1162 struct btrfs_block_group_item, flags, 64);
1163
1164/* struct btrfs_inode_ref */
1165BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
1166BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
1167
1168/* struct btrfs_inode_item */
1169BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
1170BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64);
1171BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
1172BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
1173BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64);
1174BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
1175BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
1176BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
1177BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
1178BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
1179BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
1180BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
1181
1182static inline struct btrfs_timespec *
1183btrfs_inode_atime(struct btrfs_inode_item *inode_item)
1184{
1185 unsigned long ptr = (unsigned long)inode_item;
1186 ptr += offsetof(struct btrfs_inode_item, atime);
1187 return (struct btrfs_timespec *)ptr;
1188}
1189
1190static inline struct btrfs_timespec *
1191btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
1192{
1193 unsigned long ptr = (unsigned long)inode_item;
1194 ptr += offsetof(struct btrfs_inode_item, mtime);
1195 return (struct btrfs_timespec *)ptr;
1196}
1197
1198static inline struct btrfs_timespec *
1199btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
1200{
1201 unsigned long ptr = (unsigned long)inode_item;
1202 ptr += offsetof(struct btrfs_inode_item, ctime);
1203 return (struct btrfs_timespec *)ptr;
1204}
1205
1206static inline struct btrfs_timespec *
1207btrfs_inode_otime(struct btrfs_inode_item *inode_item)
1208{
1209 unsigned long ptr = (unsigned long)inode_item;
1210 ptr += offsetof(struct btrfs_inode_item, otime);
1211 return (struct btrfs_timespec *)ptr;
1212}
1213
1214BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
1215BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
1216
1217/* struct btrfs_dev_extent */
1218BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
1219 chunk_tree, 64);
1220BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
1221 chunk_objectid, 64);
1222BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
1223 chunk_offset, 64);
1224BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
1225
1226static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
1227{
1228 unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
1229 return (u8 *)((unsigned long)dev + ptr);
1230}
1231
1232/* struct btrfs_extent_ref */
1233BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
1234BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
1235BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64);
1236BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
1237
1238BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64);
1239BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref,
1240 generation, 64);
1241BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref,
1242 objectid, 64);
1243BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref,
1244 num_refs, 32);
1245
1246/* struct btrfs_extent_item */
1247BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
1248BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item,
1249 refs, 32);
1250
1251/* struct btrfs_node */
1252BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
1253BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
1254
1255static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
1256{
1257 unsigned long ptr;
1258 ptr = offsetof(struct btrfs_node, ptrs) +
1259 sizeof(struct btrfs_key_ptr) * nr;
1260 return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
1261}
1262
1263static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
1264 int nr, u64 val)
1265{
1266 unsigned long ptr;
1267 ptr = offsetof(struct btrfs_node, ptrs) +
1268 sizeof(struct btrfs_key_ptr) * nr;
1269 btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
1270}
1271
1272static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
1273{
1274 unsigned long ptr;
1275 ptr = offsetof(struct btrfs_node, ptrs) +
1276 sizeof(struct btrfs_key_ptr) * nr;
1277 return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
1278}
1279
1280static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
1281 int nr, u64 val)
1282{
1283 unsigned long ptr;
1284 ptr = offsetof(struct btrfs_node, ptrs) +
1285 sizeof(struct btrfs_key_ptr) * nr;
1286 btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
1287}
1288
1289static inline unsigned long btrfs_node_key_ptr_offset(int nr)
1290{
1291 return offsetof(struct btrfs_node, ptrs) +
1292 sizeof(struct btrfs_key_ptr) * nr;
1293}
1294
1295void btrfs_node_key(struct extent_buffer *eb,
1296 struct btrfs_disk_key *disk_key, int nr);
1297
1298static inline void btrfs_set_node_key(struct extent_buffer *eb,
1299 struct btrfs_disk_key *disk_key, int nr)
1300{
1301 unsigned long ptr;
1302 ptr = btrfs_node_key_ptr_offset(nr);
1303 write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
1304 struct btrfs_key_ptr, key, disk_key);
1305}
1306
1307/* struct btrfs_item */
1308BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
1309BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
1310
1311static inline unsigned long btrfs_item_nr_offset(int nr)
1312{
1313 return offsetof(struct btrfs_leaf, items) +
1314 sizeof(struct btrfs_item) * nr;
1315}
1316
1317static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb,
1318 int nr)
1319{
1320 return (struct btrfs_item *)btrfs_item_nr_offset(nr);
1321}
1322
1323static inline u32 btrfs_item_end(struct extent_buffer *eb,
1324 struct btrfs_item *item)
1325{
1326 return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
1327}
1328
1329static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
1330{
1331 return btrfs_item_end(eb, btrfs_item_nr(eb, nr));
1332}
1333
1334static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
1335{
1336 return btrfs_item_offset(eb, btrfs_item_nr(eb, nr));
1337}
1338
1339static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
1340{
1341 return btrfs_item_size(eb, btrfs_item_nr(eb, nr));
1342}
1343
1344static inline void btrfs_item_key(struct extent_buffer *eb,
1345 struct btrfs_disk_key *disk_key, int nr)
1346{
1347 struct btrfs_item *item = btrfs_item_nr(eb, nr);
1348 read_eb_member(eb, item, struct btrfs_item, key, disk_key);
1349}
1350
1351static inline void btrfs_set_item_key(struct extent_buffer *eb,
1352 struct btrfs_disk_key *disk_key, int nr)
1353{
1354 struct btrfs_item *item = btrfs_item_nr(eb, nr);
1355 write_eb_member(eb, item, struct btrfs_item, key, disk_key);
1356}
1357
1358BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
1359
1360/*
1361 * struct btrfs_root_ref
1362 */
1363BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64);
1364BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64);
1365BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16);
1366
1367/* struct btrfs_dir_item */
1368BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
1369BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
1370BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
1371BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
1372
1373static inline void btrfs_dir_item_key(struct extent_buffer *eb,
1374 struct btrfs_dir_item *item,
1375 struct btrfs_disk_key *key)
1376{
1377 read_eb_member(eb, item, struct btrfs_dir_item, location, key);
1378}
1379
1380static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
1381 struct btrfs_dir_item *item,
1382 struct btrfs_disk_key *key)
1383{
1384 write_eb_member(eb, item, struct btrfs_dir_item, location, key);
1385}
1386
1387/* struct btrfs_disk_key */
1388BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
1389 objectid, 64);
1390BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
1391BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
1392
1393static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
1394 struct btrfs_disk_key *disk)
1395{
1396 cpu->offset = le64_to_cpu(disk->offset);
1397 cpu->type = disk->type;
1398 cpu->objectid = le64_to_cpu(disk->objectid);
1399}
1400
1401static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
1402 struct btrfs_key *cpu)
1403{
1404 disk->offset = cpu_to_le64(cpu->offset);
1405 disk->type = cpu->type;
1406 disk->objectid = cpu_to_le64(cpu->objectid);
1407}
1408
1409static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
1410 struct btrfs_key *key, int nr)
1411{
1412 struct btrfs_disk_key disk_key;
1413 btrfs_node_key(eb, &disk_key, nr);
1414 btrfs_disk_key_to_cpu(key, &disk_key);
1415}
1416
1417static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
1418 struct btrfs_key *key, int nr)
1419{
1420 struct btrfs_disk_key disk_key;
1421 btrfs_item_key(eb, &disk_key, nr);
1422 btrfs_disk_key_to_cpu(key, &disk_key);
1423}
1424
1425static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
1426 struct btrfs_dir_item *item,
1427 struct btrfs_key *key)
1428{
1429 struct btrfs_disk_key disk_key;
1430 btrfs_dir_item_key(eb, item, &disk_key);
1431 btrfs_disk_key_to_cpu(key, &disk_key);
1432}
1433
1434
1435static inline u8 btrfs_key_type(struct btrfs_key *key)
1436{
1437 return key->type;
1438}
1439
1440static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val)
1441{
1442 key->type = val;
1443}
1444
1445/* struct btrfs_header */
1446BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
1447BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
1448 generation, 64);
1449BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
1450BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
1451BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
1452BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
1453
1454static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
1455{
1456 return (btrfs_header_flags(eb) & flag) == flag;
1457}
1458
1459static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
1460{
1461 u64 flags = btrfs_header_flags(eb);
1462 btrfs_set_header_flags(eb, flags | flag);
1463 return (flags & flag) == flag;
1464}
1465
1466static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
1467{
1468 u64 flags = btrfs_header_flags(eb);
1469 btrfs_set_header_flags(eb, flags & ~flag);
1470 return (flags & flag) == flag;
1471}
1472
1473static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
1474{
1475 unsigned long ptr = offsetof(struct btrfs_header, fsid);
1476 return (u8 *)ptr;
1477}
1478
1479static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
1480{
1481 unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid);
1482 return (u8 *)ptr;
1483}
1484
1485static inline u8 *btrfs_super_fsid(struct extent_buffer *eb)
1486{
1487 unsigned long ptr = offsetof(struct btrfs_super_block, fsid);
1488 return (u8 *)ptr;
1489}
1490
1491static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
1492{
1493 unsigned long ptr = offsetof(struct btrfs_header, csum);
1494 return (u8 *)ptr;
1495}
1496
1497static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb)
1498{
1499 return NULL;
1500}
1501
1502static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb)
1503{
1504 return NULL;
1505}
1506
1507static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb)
1508{
1509 return NULL;
1510}
1511
1512static inline int btrfs_is_leaf(struct extent_buffer *eb)
1513{
1514 return btrfs_header_level(eb) == 0;
1515}
1516
1517/* struct btrfs_root_item */
1518BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item,
1519 generation, 64);
1520BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
1521BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
1522BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
1523
1524BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
1525 generation, 64);
1526BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
1527BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
1528BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
1529BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
1530BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64);
1531BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
1532BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
1533BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
1534 last_snapshot, 64);
1535
1536/* struct btrfs_super_block */
1537
1538BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
1539BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
1540BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
1541 generation, 64);
1542BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
1543BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
1544 struct btrfs_super_block, sys_chunk_array_size, 32);
1545BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation,
1546 struct btrfs_super_block, chunk_root_generation, 64);
1547BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
1548 root_level, 8);
1549BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
1550 chunk_root, 64);
1551BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
1552 chunk_root_level, 8);
1553BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
1554 log_root, 64);
1555BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block,
1556 log_root_transid, 64);
1557BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
1558 log_root_level, 8);
1559BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
1560 total_bytes, 64);
1561BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
1562 bytes_used, 64);
1563BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
1564 sectorsize, 32);
1565BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
1566 nodesize, 32);
1567BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block,
1568 leafsize, 32);
1569BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
1570 stripesize, 32);
1571BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
1572 root_dir_objectid, 64);
1573BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
1574 num_devices, 64);
1575BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block,
1576 compat_flags, 64);
1577BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block,
1578 compat_flags, 64);
1579BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
1580 incompat_flags, 64);
1581BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
1582 csum_type, 16);
1583
1584static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
1585{
1586 int t = btrfs_super_csum_type(s);
1587 BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes));
1588 return btrfs_csum_sizes[t];
1589}
1590
1591static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
1592{
1593 return offsetof(struct btrfs_leaf, items);
1594}
1595
1596/* struct btrfs_file_extent_item */
1597BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
1598
1599static inline unsigned long
1600btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
1601{
1602 unsigned long offset = (unsigned long)e;
1603 offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
1604 return offset;
1605}
1606
1607static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
1608{
1609 return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
1610}
1611
1612BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
1613 disk_bytenr, 64);
1614BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
1615 generation, 64);
1616BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
1617 disk_num_bytes, 64);
1618BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
1619 offset, 64);
1620BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
1621 num_bytes, 64);
1622BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
1623 ram_bytes, 64);
1624BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
1625 compression, 8);
1626BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
1627 encryption, 8);
1628BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
1629 other_encoding, 16);
1630
1631/* this returns the number of file bytes represented by the inline item.
1632 * If an item is compressed, this is the uncompressed size
1633 */
1634static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
1635 struct btrfs_file_extent_item *e)
1636{
1637 return btrfs_file_extent_ram_bytes(eb, e);
1638}
1639
1640/*
1641 * this returns the number of bytes used by the item on disk, minus the
1642 * size of any extent headers. If a file is compressed on disk, this is
1643 * the compressed size
1644 */
1645static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
1646 struct btrfs_item *e)
1647{
1648 unsigned long offset;
1649 offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
1650 return btrfs_item_size(eb, e) - offset;
1651}
1652
1653static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
1654{
1655 return sb->s_fs_info;
1656}
1657
1658static inline int btrfs_set_root_name(struct btrfs_root *root,
1659 const char *name, int len)
1660{
1661 /* if we already have a name just free it */
1662 kfree(root->name);
1663
1664 root->name = kmalloc(len+1, GFP_KERNEL);
1665 if (!root->name)
1666 return -ENOMEM;
1667
1668 memcpy(root->name, name, len);
1669 root->name[len] = '\0';
1670
1671 return 0;
1672}
1673
1674static inline u32 btrfs_level_size(struct btrfs_root *root, int level)
1675{
1676 if (level == 0)
1677 return root->leafsize;
1678 return root->nodesize;
1679}
1680
1681/* helper function to cast into the data area of the leaf. */
1682#define btrfs_item_ptr(leaf, slot, type) \
1683 ((type *)(btrfs_leaf_data(leaf) + \
1684 btrfs_item_offset_nr(leaf, slot)))
1685
1686#define btrfs_item_ptr_offset(leaf, slot) \
1687 ((unsigned long)(btrfs_leaf_data(leaf) + \
1688 btrfs_item_offset_nr(leaf, slot)))
1689
1690static inline struct dentry *fdentry(struct file *file)
1691{
1692 return file->f_path.dentry;
1693}
1694
1695/* extent-tree.c */
1696int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
1697int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
1698 struct btrfs_root *root, u64 bytenr,
1699 u64 num_bytes, u32 *refs);
1700int btrfs_update_pinned_extents(struct btrfs_root *root,
1701 u64 bytenr, u64 num, int pin);
1702int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1703 struct btrfs_root *root, struct extent_buffer *leaf);
1704int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1705 struct btrfs_root *root, u64 objectid, u64 bytenr);
1706int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1707 struct btrfs_root *root);
1708int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
1709struct btrfs_block_group_cache *btrfs_lookup_block_group(
1710 struct btrfs_fs_info *info,
1711 u64 bytenr);
1712u64 btrfs_find_block_group(struct btrfs_root *root,
1713 u64 search_start, u64 search_hint, int owner);
1714struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1715 struct btrfs_root *root,
1716 u32 blocksize, u64 parent,
1717 u64 root_objectid,
1718 u64 ref_generation,
1719 int level,
1720 u64 hint,
1721 u64 empty_size);
1722struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1723 struct btrfs_root *root,
1724 u64 bytenr, u32 blocksize);
1725int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1726 struct btrfs_root *root,
1727 u64 num_bytes, u64 parent, u64 min_bytes,
1728 u64 root_objectid, u64 ref_generation,
1729 u64 owner, u64 empty_size, u64 hint_byte,
1730 u64 search_end, struct btrfs_key *ins, u64 data);
1731int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1732 struct btrfs_root *root, u64 parent,
1733 u64 root_objectid, u64 ref_generation,
1734 u64 owner, struct btrfs_key *ins);
1735int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
1736 struct btrfs_root *root, u64 parent,
1737 u64 root_objectid, u64 ref_generation,
1738 u64 owner, struct btrfs_key *ins);
1739int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1740 struct btrfs_root *root,
1741 u64 num_bytes, u64 min_alloc_size,
1742 u64 empty_size, u64 hint_byte,
1743 u64 search_end, struct btrfs_key *ins,
1744 u64 data);
1745int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1746 struct extent_buffer *orig_buf, struct extent_buffer *buf,
1747 u32 *nr_extents);
1748int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1749 struct extent_buffer *buf, u32 nr_extents);
1750int btrfs_update_ref(struct btrfs_trans_handle *trans,
1751 struct btrfs_root *root, struct extent_buffer *orig_buf,
1752 struct extent_buffer *buf, int start_slot, int nr);
1753int btrfs_free_extent(struct btrfs_trans_handle *trans,
1754 struct btrfs_root *root,
1755 u64 bytenr, u64 num_bytes, u64 parent,
1756 u64 root_objectid, u64 ref_generation,
1757 u64 owner_objectid, int pin);
1758int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
1759int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1760 struct btrfs_root *root,
1761 struct extent_io_tree *unpin);
1762int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1763 struct btrfs_root *root,
1764 u64 bytenr, u64 num_bytes, u64 parent,
1765 u64 root_objectid, u64 ref_generation,
1766 u64 owner_objectid);
1767int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1768 struct btrfs_root *root, u64 bytenr,
1769 u64 orig_parent, u64 parent,
1770 u64 root_objectid, u64 ref_generation,
1771 u64 owner_objectid);
1772int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1773 struct btrfs_root *root);
1774int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
1775int btrfs_free_block_groups(struct btrfs_fs_info *info);
1776int btrfs_read_block_groups(struct btrfs_root *root);
1777int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1778 struct btrfs_root *root, u64 bytes_used,
1779 u64 type, u64 chunk_objectid, u64 chunk_offset,
1780 u64 size);
1781int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
1782 struct btrfs_root *root, u64 group_start);
1783int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
1784int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
1785 struct btrfs_root *root);
1786int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
1787int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
1788 struct btrfs_root *root,
1789 struct extent_buffer *buf, u64 orig_start);
1790int btrfs_add_dead_reloc_root(struct btrfs_root *root);
1791int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
1792int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
1793u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
1794/* ctree.c */
1795int btrfs_previous_item(struct btrfs_root *root,
1796 struct btrfs_path *path, u64 min_objectid,
1797 int type);
1798int btrfs_merge_path(struct btrfs_trans_handle *trans,
1799 struct btrfs_root *root,
1800 struct btrfs_key *node_keys,
1801 u64 *nodes, int lowest_level);
1802int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1803 struct btrfs_root *root, struct btrfs_path *path,
1804 struct btrfs_key *new_key);
1805struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
1806struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
1807int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
1808 struct btrfs_key *key, int lowest_level,
1809 int cache_only, u64 min_trans);
1810int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
1811 struct btrfs_key *max_key,
1812 struct btrfs_path *path, int cache_only,
1813 u64 min_trans);
1814int btrfs_cow_block(struct btrfs_trans_handle *trans,
1815 struct btrfs_root *root, struct extent_buffer *buf,
1816 struct extent_buffer *parent, int parent_slot,
1817 struct extent_buffer **cow_ret, u64 prealloc_dest);
1818int btrfs_copy_root(struct btrfs_trans_handle *trans,
1819 struct btrfs_root *root,
1820 struct extent_buffer *buf,
1821 struct extent_buffer **cow_ret, u64 new_root_objectid);
1822int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1823 *root, struct btrfs_path *path, u32 data_size);
1824int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1825 struct btrfs_root *root,
1826 struct btrfs_path *path,
1827 u32 new_size, int from_end);
1828int btrfs_split_item(struct btrfs_trans_handle *trans,
1829 struct btrfs_root *root,
1830 struct btrfs_path *path,
1831 struct btrfs_key *new_key,
1832 unsigned long split_offset);
1833int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1834 *root, struct btrfs_key *key, struct btrfs_path *p, int
1835 ins_len, int cow);
1836int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1837 struct btrfs_root *root, struct extent_buffer *parent,
1838 int start_slot, int cache_only, u64 *last_ret,
1839 struct btrfs_key *progress);
1840void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
1841struct btrfs_path *btrfs_alloc_path(void);
1842void btrfs_free_path(struct btrfs_path *p);
1843void btrfs_init_path(struct btrfs_path *p);
1844int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1845 struct btrfs_path *path, int slot, int nr);
1846int btrfs_del_leaf(struct btrfs_trans_handle *trans,
1847 struct btrfs_root *root,
1848 struct btrfs_path *path, u64 bytenr);
1849static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
1850 struct btrfs_root *root,
1851 struct btrfs_path *path)
1852{
1853 return btrfs_del_items(trans, root, path, path->slots[0], 1);
1854}
1855
1856int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1857 *root, struct btrfs_key *key, void *data, u32 data_size);
1858int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
1859 struct btrfs_root *root,
1860 struct btrfs_path *path,
1861 struct btrfs_key *cpu_key, u32 *data_size,
1862 int nr);
1863int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
1864 struct btrfs_root *root,
1865 struct btrfs_path *path,
1866 struct btrfs_key *cpu_key, u32 *data_size, int nr);
1867
1868static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
1869 struct btrfs_root *root,
1870 struct btrfs_path *path,
1871 struct btrfs_key *key,
1872 u32 data_size)
1873{
1874 return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
1875}
1876
1877int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
1878int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
1879int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
1880int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1881 *root);
1882int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
1883 struct btrfs_root *root,
1884 struct extent_buffer *node,
1885 struct extent_buffer *parent);
1886/* root-item.c */
1887int btrfs_find_root_ref(struct btrfs_root *tree_root,
1888 struct btrfs_path *path,
1889 u64 root_id, u64 ref_id);
1890int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
1891 struct btrfs_root *tree_root,
1892 u64 root_id, u8 type, u64 ref_id,
1893 u64 dirid, u64 sequence,
1894 const char *name, int name_len);
1895int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1896 struct btrfs_key *key);
1897int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
1898 *root, struct btrfs_key *key, struct btrfs_root_item
1899 *item);
1900int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
1901 *root, struct btrfs_key *key, struct btrfs_root_item
1902 *item);
1903int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
1904 btrfs_root_item *item, struct btrfs_key *key);
1905int btrfs_search_root(struct btrfs_root *root, u64 search_start,
1906 u64 *found_objectid);
1907int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
1908 struct btrfs_root *latest_root);
1909/* dir-item.c */
1910int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
1911 struct btrfs_root *root, const char *name,
1912 int name_len, u64 dir,
1913 struct btrfs_key *location, u8 type, u64 index);
1914struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
1915 struct btrfs_root *root,
1916 struct btrfs_path *path, u64 dir,
1917 const char *name, int name_len,
1918 int mod);
1919struct btrfs_dir_item *
1920btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
1921 struct btrfs_root *root,
1922 struct btrfs_path *path, u64 dir,
1923 u64 objectid, const char *name, int name_len,
1924 int mod);
1925struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
1926 struct btrfs_path *path,
1927 const char *name, int name_len);
1928int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
1929 struct btrfs_root *root,
1930 struct btrfs_path *path,
1931 struct btrfs_dir_item *di);
1932int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
1933 struct btrfs_root *root, const char *name,
1934 u16 name_len, const void *data, u16 data_len,
1935 u64 dir);
1936struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
1937 struct btrfs_root *root,
1938 struct btrfs_path *path, u64 dir,
1939 const char *name, u16 name_len,
1940 int mod);
1941
1942/* orphan.c */
1943int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
1944 struct btrfs_root *root, u64 offset);
1945int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
1946 struct btrfs_root *root, u64 offset);
1947
1948/* inode-map.c */
1949int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
1950 struct btrfs_root *fs_root,
1951 u64 dirid, u64 *objectid);
1952int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);
1953
1954/* inode-item.c */
1955int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
1956 struct btrfs_root *root,
1957 const char *name, int name_len,
1958 u64 inode_objectid, u64 ref_objectid, u64 index);
1959int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
1960 struct btrfs_root *root,
1961 const char *name, int name_len,
1962 u64 inode_objectid, u64 ref_objectid, u64 *index);
1963int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
1964 struct btrfs_root *root,
1965 struct btrfs_path *path, u64 objectid);
1966int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
1967 *root, struct btrfs_path *path,
1968 struct btrfs_key *location, int mod);
1969
1970/* file-item.c */
1971int btrfs_del_csums(struct btrfs_trans_handle *trans,
1972 struct btrfs_root *root, u64 bytenr, u64 len);
1973int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
1974 struct bio *bio, u32 *dst);
1975int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
1976 struct btrfs_root *root,
1977 u64 objectid, u64 pos,
1978 u64 disk_offset, u64 disk_num_bytes,
1979 u64 num_bytes, u64 offset, u64 ram_bytes,
1980 u8 compression, u8 encryption, u16 other_encoding);
1981int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
1982 struct btrfs_root *root,
1983 struct btrfs_path *path, u64 objectid,
1984 u64 bytenr, int mod);
1985int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
1986 struct btrfs_root *root,
1987 struct btrfs_ordered_sum *sums);
1988int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
1989 struct bio *bio, u64 file_start, int contig);
1990int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
1991 u64 start, unsigned long len);
1992struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
1993 struct btrfs_root *root,
1994 struct btrfs_path *path,
1995 u64 bytenr, int cow);
1996int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
1997 struct btrfs_root *root, struct btrfs_path *path,
1998 u64 isize);
1999int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start,
2000 u64 end, struct list_head *list);
2001/* inode.c */
2002
2003/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
2004#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
2005#define ClearPageChecked ClearPageFsMisc
2006#define SetPageChecked SetPageFsMisc
2007#define PageChecked PageFsMisc
2008#endif
2009
2010struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
2011int btrfs_set_inode_index(struct inode *dir, u64 *index);
2012int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2013 struct btrfs_root *root,
2014 struct inode *dir, struct inode *inode,
2015 const char *name, int name_len);
2016int btrfs_add_link(struct btrfs_trans_handle *trans,
2017 struct inode *parent_inode, struct inode *inode,
2018 const char *name, int name_len, int add_backref, u64 index);
2019int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2020 struct btrfs_root *root,
2021 struct inode *inode, u64 new_size,
2022 u32 min_type);
2023
2024int btrfs_start_delalloc_inodes(struct btrfs_root *root);
2025int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2026int btrfs_writepages(struct address_space *mapping,
2027 struct writeback_control *wbc);
2028int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2029 struct btrfs_root *new_root, struct dentry *dentry,
2030 u64 new_dirid, u64 alloc_hint);
2031int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2032 size_t size, struct bio *bio, unsigned long bio_flags);
2033
2034unsigned long btrfs_force_ra(struct address_space *mapping,
2035 struct file_ra_state *ra, struct file *file,
2036 pgoff_t offset, pgoff_t last_index);
2037int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
2038 int for_del);
2039int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
2040int btrfs_readpage(struct file *file, struct page *page);
2041void btrfs_delete_inode(struct inode *inode);
2042void btrfs_put_inode(struct inode *inode);
2043void btrfs_read_locked_inode(struct inode *inode);
2044int btrfs_write_inode(struct inode *inode, int wait);
2045void btrfs_dirty_inode(struct inode *inode);
2046struct inode *btrfs_alloc_inode(struct super_block *sb);
2047void btrfs_destroy_inode(struct inode *inode);
2048int btrfs_init_cachep(void);
2049void btrfs_destroy_cachep(void);
2050long btrfs_ioctl_trans_end(struct file *file);
2051struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
2052 struct btrfs_root *root, int wait);
2053struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
2054 struct btrfs_root *root);
2055struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2056 struct btrfs_root *root, int *is_new);
2057int btrfs_commit_write(struct file *file, struct page *page,
2058 unsigned from, unsigned to);
2059struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2060 size_t page_offset, u64 start, u64 end,
2061 int create);
2062int btrfs_update_inode(struct btrfs_trans_handle *trans,
2063 struct btrfs_root *root,
2064 struct inode *inode);
2065int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
2066int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2067void btrfs_orphan_cleanup(struct btrfs_root *root);
2068int btrfs_cont_expand(struct inode *inode, loff_t size);
2069
2070/* ioctl.c */
2071long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
2072
2073/* file.c */
2074int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
2075int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2076 int skip_pinned);
2077int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2078extern struct file_operations btrfs_file_operations;
2079int btrfs_drop_extents(struct btrfs_trans_handle *trans,
2080 struct btrfs_root *root, struct inode *inode,
2081 u64 start, u64 end, u64 inline_limit, u64 *hint_block);
2082int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2083 struct btrfs_root *root,
2084 struct inode *inode, u64 start, u64 end);
2085int btrfs_release_file(struct inode *inode, struct file *file);
2086
2087/* tree-defrag.c */
2088int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
2089 struct btrfs_root *root, int cache_only);
2090
2091/* sysfs.c */
2092int btrfs_init_sysfs(void);
2093void btrfs_exit_sysfs(void);
2094int btrfs_sysfs_add_super(struct btrfs_fs_info *fs);
2095int btrfs_sysfs_add_root(struct btrfs_root *root);
2096void btrfs_sysfs_del_root(struct btrfs_root *root);
2097void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
2098
2099/* xattr.c */
2100ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
2101
2102/* super.c */
2103u64 btrfs_parse_size(char *str);
2104int btrfs_parse_options(struct btrfs_root *root, char *options);
2105int btrfs_sync_fs(struct super_block *sb, int wait);
2106
2107/* acl.c */
2108int btrfs_check_acl(struct inode *inode, int mask);
2109int btrfs_init_acl(struct inode *inode, struct inode *dir);
2110int btrfs_acl_chmod(struct inode *inode);
2111
2112/* free-space-cache.c */
2113int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
2114 u64 bytenr, u64 size);
2115int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group,
2116 u64 offset, u64 bytes);
2117int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
2118 u64 bytenr, u64 size);
2119int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group,
2120 u64 offset, u64 bytes);
2121void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
2122 *block_group);
2123struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
2124 *block_group, u64 offset,
2125 u64 bytes);
2126void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
2127 u64 bytes);
2128u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
2129#endif
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
new file mode 100644
index 000000000000..926a0b287a7d
--- /dev/null
+++ b/fs/btrfs/dir-item.c
@@ -0,0 +1,386 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include "ctree.h"
20#include "disk-io.h"
21#include "hash.h"
22#include "transaction.h"
23
24/*
25 * insert a name into a directory, doing overflow properly if there is a hash
26 * collision. data_size indicates how big the item inserted should be. On
27 * success a struct btrfs_dir_item pointer is returned, otherwise it is
28 * an ERR_PTR.
29 *
30 * The name is not copied into the dir item, you have to do that yourself.
31 */
32static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
33 *trans,
34 struct btrfs_root *root,
35 struct btrfs_path *path,
36 struct btrfs_key *cpu_key,
37 u32 data_size,
38 const char *name,
39 int name_len)
40{
41 int ret;
42 char *ptr;
43 struct btrfs_item *item;
44 struct extent_buffer *leaf;
45
46 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
47 if (ret == -EEXIST) {
48 struct btrfs_dir_item *di;
49 di = btrfs_match_dir_item_name(root, path, name, name_len);
50 if (di)
51 return ERR_PTR(-EEXIST);
52 ret = btrfs_extend_item(trans, root, path, data_size);
53 WARN_ON(ret > 0);
54 }
55 if (ret < 0)
56 return ERR_PTR(ret);
57 WARN_ON(ret > 0);
58 leaf = path->nodes[0];
59 item = btrfs_item_nr(leaf, path->slots[0]);
60 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
61 BUG_ON(data_size > btrfs_item_size(leaf, item));
62 ptr += btrfs_item_size(leaf, item) - data_size;
63 return (struct btrfs_dir_item *)ptr;
64}
65
66/*
67 * xattrs work a lot like directories, this inserts an xattr item
68 * into the tree
69 */
70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root, const char *name,
72 u16 name_len, const void *data, u16 data_len,
73 u64 dir)
74{
75 int ret = 0;
76 struct btrfs_path *path;
77 struct btrfs_dir_item *dir_item;
78 unsigned long name_ptr, data_ptr;
79 struct btrfs_key key, location;
80 struct btrfs_disk_key disk_key;
81 struct extent_buffer *leaf;
82 u32 data_size;
83
84 key.objectid = dir;
85 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
86 key.offset = btrfs_name_hash(name, name_len);
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
90 if (name_len + data_len + sizeof(struct btrfs_dir_item) >
91 BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
92 return -ENOSPC;
93
94 data_size = sizeof(*dir_item) + name_len + data_len;
95 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
96 name, name_len);
97 /*
98 * FIXME: at some point we should handle xattr's that are larger than
99 * what we can fit in our leaf. We set location to NULL b/c we arent
100 * pointing at anything else, that will change if we store the xattr
101 * data in a separate inode.
102 */
103 BUG_ON(IS_ERR(dir_item));
104 memset(&location, 0, sizeof(location));
105
106 leaf = path->nodes[0];
107 btrfs_cpu_key_to_disk(&disk_key, &location);
108 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
109 btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
110 btrfs_set_dir_name_len(leaf, dir_item, name_len);
111 btrfs_set_dir_transid(leaf, dir_item, trans->transid);
112 btrfs_set_dir_data_len(leaf, dir_item, data_len);
113 name_ptr = (unsigned long)(dir_item + 1);
114 data_ptr = (unsigned long)((char *)name_ptr + name_len);
115
116 write_extent_buffer(leaf, name, name_ptr, name_len);
117 write_extent_buffer(leaf, data, data_ptr, data_len);
118 btrfs_mark_buffer_dirty(path->nodes[0]);
119
120 btrfs_free_path(path);
121 return ret;
122}
123
124/*
125 * insert a directory item in the tree, doing all the magic for
126 * both indexes. 'dir' indicates which objectid to insert it into,
127 * 'location' is the key to stuff into the directory item, 'type' is the
128 * type of the inode we're pointing to, and 'index' is the sequence number
129 * to use for the second index (if one is created).
130 */
131int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
132 *root, const char *name, int name_len, u64 dir,
133 struct btrfs_key *location, u8 type, u64 index)
134{
135 int ret = 0;
136 int ret2 = 0;
137 struct btrfs_path *path;
138 struct btrfs_dir_item *dir_item;
139 struct extent_buffer *leaf;
140 unsigned long name_ptr;
141 struct btrfs_key key;
142 struct btrfs_disk_key disk_key;
143 u32 data_size;
144
145 key.objectid = dir;
146 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
147 key.offset = btrfs_name_hash(name, name_len);
148 path = btrfs_alloc_path();
149 data_size = sizeof(*dir_item) + name_len;
150 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
151 name, name_len);
152 if (IS_ERR(dir_item)) {
153 ret = PTR_ERR(dir_item);
154 if (ret == -EEXIST)
155 goto second_insert;
156 goto out;
157 }
158
159 leaf = path->nodes[0];
160 btrfs_cpu_key_to_disk(&disk_key, location);
161 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
162 btrfs_set_dir_type(leaf, dir_item, type);
163 btrfs_set_dir_data_len(leaf, dir_item, 0);
164 btrfs_set_dir_name_len(leaf, dir_item, name_len);
165 btrfs_set_dir_transid(leaf, dir_item, trans->transid);
166 name_ptr = (unsigned long)(dir_item + 1);
167
168 write_extent_buffer(leaf, name, name_ptr, name_len);
169 btrfs_mark_buffer_dirty(leaf);
170
171second_insert:
172 /* FIXME, use some real flag for selecting the extra index */
173 if (root == root->fs_info->tree_root) {
174 ret = 0;
175 goto out;
176 }
177 btrfs_release_path(root, path);
178
179 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
180 key.offset = index;
181 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
182 name, name_len);
183 if (IS_ERR(dir_item)) {
184 ret2 = PTR_ERR(dir_item);
185 goto out;
186 }
187 leaf = path->nodes[0];
188 btrfs_cpu_key_to_disk(&disk_key, location);
189 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
190 btrfs_set_dir_type(leaf, dir_item, type);
191 btrfs_set_dir_data_len(leaf, dir_item, 0);
192 btrfs_set_dir_name_len(leaf, dir_item, name_len);
193 btrfs_set_dir_transid(leaf, dir_item, trans->transid);
194 name_ptr = (unsigned long)(dir_item + 1);
195 write_extent_buffer(leaf, name, name_ptr, name_len);
196 btrfs_mark_buffer_dirty(leaf);
197out:
198 btrfs_free_path(path);
199 if (ret)
200 return ret;
201 if (ret2)
202 return ret2;
203 return 0;
204}
205
206/*
207 * lookup a directory item based on name. 'dir' is the objectid
208 * we're searching in, and 'mod' tells us if you plan on deleting the
209 * item (use mod < 0) or changing the options (use mod > 0)
210 */
211struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
212 struct btrfs_root *root,
213 struct btrfs_path *path, u64 dir,
214 const char *name, int name_len,
215 int mod)
216{
217 int ret;
218 struct btrfs_key key;
219 int ins_len = mod < 0 ? -1 : 0;
220 int cow = mod != 0;
221 struct btrfs_key found_key;
222 struct extent_buffer *leaf;
223
224 key.objectid = dir;
225 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
226
227 key.offset = btrfs_name_hash(name, name_len);
228
229 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
230 if (ret < 0)
231 return ERR_PTR(ret);
232 if (ret > 0) {
233 if (path->slots[0] == 0)
234 return NULL;
235 path->slots[0]--;
236 }
237
238 leaf = path->nodes[0];
239 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
240
241 if (found_key.objectid != dir ||
242 btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
243 found_key.offset != key.offset)
244 return NULL;
245
246 return btrfs_match_dir_item_name(root, path, name, name_len);
247}
248
249/*
250 * lookup a directory item based on index. 'dir' is the objectid
251 * we're searching in, and 'mod' tells us if you plan on deleting the
252 * item (use mod < 0) or changing the options (use mod > 0)
253 *
254 * The name is used to make sure the index really points to the name you were
255 * looking for.
256 */
257struct btrfs_dir_item *
258btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
259 struct btrfs_root *root,
260 struct btrfs_path *path, u64 dir,
261 u64 objectid, const char *name, int name_len,
262 int mod)
263{
264 int ret;
265 struct btrfs_key key;
266 int ins_len = mod < 0 ? -1 : 0;
267 int cow = mod != 0;
268
269 key.objectid = dir;
270 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
271 key.offset = objectid;
272
273 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
274 if (ret < 0)
275 return ERR_PTR(ret);
276 if (ret > 0)
277 return ERR_PTR(-ENOENT);
278 return btrfs_match_dir_item_name(root, path, name, name_len);
279}
280
281struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
282 struct btrfs_root *root,
283 struct btrfs_path *path, u64 dir,
284 const char *name, u16 name_len,
285 int mod)
286{
287 int ret;
288 struct btrfs_key key;
289 int ins_len = mod < 0 ? -1 : 0;
290 int cow = mod != 0;
291 struct btrfs_key found_key;
292 struct extent_buffer *leaf;
293
294 key.objectid = dir;
295 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
296 key.offset = btrfs_name_hash(name, name_len);
297 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
298 if (ret < 0)
299 return ERR_PTR(ret);
300 if (ret > 0) {
301 if (path->slots[0] == 0)
302 return NULL;
303 path->slots[0]--;
304 }
305
306 leaf = path->nodes[0];
307 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
308
309 if (found_key.objectid != dir ||
310 btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
311 found_key.offset != key.offset)
312 return NULL;
313
314 return btrfs_match_dir_item_name(root, path, name, name_len);
315}
316
317/*
318 * helper function to look at the directory item pointed to by 'path'
319 * this walks through all the entries in a dir item and finds one
320 * for a specific name.
321 */
322struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
323 struct btrfs_path *path,
324 const char *name, int name_len)
325{
326 struct btrfs_dir_item *dir_item;
327 unsigned long name_ptr;
328 u32 total_len;
329 u32 cur = 0;
330 u32 this_len;
331 struct extent_buffer *leaf;
332
333 leaf = path->nodes[0];
334 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
335 total_len = btrfs_item_size_nr(leaf, path->slots[0]);
336 while (cur < total_len) {
337 this_len = sizeof(*dir_item) +
338 btrfs_dir_name_len(leaf, dir_item) +
339 btrfs_dir_data_len(leaf, dir_item);
340 name_ptr = (unsigned long)(dir_item + 1);
341
342 if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
343 memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
344 return dir_item;
345
346 cur += this_len;
347 dir_item = (struct btrfs_dir_item *)((char *)dir_item +
348 this_len);
349 }
350 return NULL;
351}
352
353/*
354 * given a pointer into a directory item, delete it. This
355 * handles items that have more than one entry in them.
356 */
357int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
358 struct btrfs_root *root,
359 struct btrfs_path *path,
360 struct btrfs_dir_item *di)
361{
362
363 struct extent_buffer *leaf;
364 u32 sub_item_len;
365 u32 item_len;
366 int ret = 0;
367
368 leaf = path->nodes[0];
369 sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) +
370 btrfs_dir_data_len(leaf, di);
371 item_len = btrfs_item_size_nr(leaf, path->slots[0]);
372 if (sub_item_len == item_len) {
373 ret = btrfs_del_item(trans, root, path);
374 } else {
375 /* MARKER */
376 unsigned long ptr = (unsigned long)di;
377 unsigned long start;
378
379 start = btrfs_item_ptr_offset(leaf, path->slots[0]);
380 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
381 item_len - (ptr + sub_item_len - start));
382 ret = btrfs_truncate_item(trans, root, path,
383 item_len - sub_item_len, 1);
384 }
385 return 0;
386}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
new file mode 100644
index 000000000000..81a313874ae5
--- /dev/null
+++ b/fs/btrfs/disk-io.c
@@ -0,0 +1,2343 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/version.h>
20#include <linux/fs.h>
21#include <linux/blkdev.h>
22#include <linux/scatterlist.h>
23#include <linux/swap.h>
24#include <linux/radix-tree.h>
25#include <linux/writeback.h>
26#include <linux/buffer_head.h>
27#include <linux/workqueue.h>
28#include <linux/kthread.h>
29#include <linux/freezer.h>
30#include "compat.h"
31#include "crc32c.h"
32#include "ctree.h"
33#include "disk-io.h"
34#include "transaction.h"
35#include "btrfs_inode.h"
36#include "volumes.h"
37#include "print-tree.h"
38#include "async-thread.h"
39#include "locking.h"
40#include "ref-cache.h"
41#include "tree-log.h"
42
43static struct extent_io_ops btree_extent_io_ops;
44static void end_workqueue_fn(struct btrfs_work *work);
45
46/*
47 * end_io_wq structs are used to do processing in task context when an IO is
48 * complete. This is used during reads to verify checksums, and it is used
49 * by writes to insert metadata for new file extents after IO is complete.
50 */
51struct end_io_wq {
52 struct bio *bio;
53 bio_end_io_t *end_io;
54 void *private;
55 struct btrfs_fs_info *info;
56 int error;
57 int metadata;
58 struct list_head list;
59 struct btrfs_work work;
60};
61
62/*
63 * async submit bios are used to offload expensive checksumming
64 * onto the worker threads. They checksum file and metadata bios
65 * just before they are sent down the IO stack.
66 */
67struct async_submit_bio {
68 struct inode *inode;
69 struct bio *bio;
70 struct list_head list;
71 extent_submit_bio_hook_t *submit_bio_start;
72 extent_submit_bio_hook_t *submit_bio_done;
73 int rw;
74 int mirror_num;
75 unsigned long bio_flags;
76 struct btrfs_work work;
77};
78
79/*
80 * extents on the btree inode are pretty simple, there's one extent
81 * that covers the entire device
82 */
83static struct extent_map *btree_get_extent(struct inode *inode,
84 struct page *page, size_t page_offset, u64 start, u64 len,
85 int create)
86{
87 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
88 struct extent_map *em;
89 int ret;
90
91 spin_lock(&em_tree->lock);
92 em = lookup_extent_mapping(em_tree, start, len);
93 if (em) {
94 em->bdev =
95 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
96 spin_unlock(&em_tree->lock);
97 goto out;
98 }
99 spin_unlock(&em_tree->lock);
100
101 em = alloc_extent_map(GFP_NOFS);
102 if (!em) {
103 em = ERR_PTR(-ENOMEM);
104 goto out;
105 }
106 em->start = 0;
107 em->len = (u64)-1;
108 em->block_len = (u64)-1;
109 em->block_start = 0;
110 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
111
112 spin_lock(&em_tree->lock);
113 ret = add_extent_mapping(em_tree, em);
114 if (ret == -EEXIST) {
115 u64 failed_start = em->start;
116 u64 failed_len = em->len;
117
118 free_extent_map(em);
119 em = lookup_extent_mapping(em_tree, start, len);
120 if (em) {
121 ret = 0;
122 } else {
123 em = lookup_extent_mapping(em_tree, failed_start,
124 failed_len);
125 ret = -EIO;
126 }
127 } else if (ret) {
128 free_extent_map(em);
129 em = NULL;
130 }
131 spin_unlock(&em_tree->lock);
132
133 if (ret)
134 em = ERR_PTR(ret);
135out:
136 return em;
137}
138
139u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
140{
141 return btrfs_crc32c(seed, data, len);
142}
143
144void btrfs_csum_final(u32 crc, char *result)
145{
146 *(__le32 *)result = ~cpu_to_le32(crc);
147}
148
149/*
150 * compute the csum for a btree block, and either verify it or write it
151 * into the csum field of the block.
152 */
153static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
154 int verify)
155{
156 u16 csum_size =
157 btrfs_super_csum_size(&root->fs_info->super_copy);
158 char *result = NULL;
159 unsigned long len;
160 unsigned long cur_len;
161 unsigned long offset = BTRFS_CSUM_SIZE;
162 char *map_token = NULL;
163 char *kaddr;
164 unsigned long map_start;
165 unsigned long map_len;
166 int err;
167 u32 crc = ~(u32)0;
168 unsigned long inline_result;
169
170 len = buf->len - offset;
171 while (len > 0) {
172 err = map_private_extent_buffer(buf, offset, 32,
173 &map_token, &kaddr,
174 &map_start, &map_len, KM_USER0);
175 if (err)
176 return 1;
177 cur_len = min(len, map_len - (offset - map_start));
178 crc = btrfs_csum_data(root, kaddr + offset - map_start,
179 crc, cur_len);
180 len -= cur_len;
181 offset += cur_len;
182 unmap_extent_buffer(buf, map_token, KM_USER0);
183 }
184 if (csum_size > sizeof(inline_result)) {
185 result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
186 if (!result)
187 return 1;
188 } else {
189 result = (char *)&inline_result;
190 }
191
192 btrfs_csum_final(crc, result);
193
194 if (verify) {
195 if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
196 u32 val;
197 u32 found = 0;
198 memcpy(&found, result, csum_size);
199
200 read_extent_buffer(buf, &val, 0, csum_size);
201 printk(KERN_INFO "btrfs: %s checksum verify failed "
202 "on %llu wanted %X found %X level %d\n",
203 root->fs_info->sb->s_id,
204 buf->start, val, found, btrfs_header_level(buf));
205 if (result != (char *)&inline_result)
206 kfree(result);
207 return 1;
208 }
209 } else {
210 write_extent_buffer(buf, result, 0, csum_size);
211 }
212 if (result != (char *)&inline_result)
213 kfree(result);
214 return 0;
215}
216
217/*
218 * we can't consider a given block up to date unless the transid of the
219 * block matches the transid in the parent node's pointer. This is how we
220 * detect blocks that either didn't get written at all or got written
221 * in the wrong place.
222 */
223static int verify_parent_transid(struct extent_io_tree *io_tree,
224 struct extent_buffer *eb, u64 parent_transid)
225{
226 int ret;
227
228 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
229 return 0;
230
231 lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS);
232 if (extent_buffer_uptodate(io_tree, eb) &&
233 btrfs_header_generation(eb) == parent_transid) {
234 ret = 0;
235 goto out;
236 }
237 printk("parent transid verify failed on %llu wanted %llu found %llu\n",
238 (unsigned long long)eb->start,
239 (unsigned long long)parent_transid,
240 (unsigned long long)btrfs_header_generation(eb));
241 ret = 1;
242 clear_extent_buffer_uptodate(io_tree, eb);
243out:
244 unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
245 GFP_NOFS);
246 return ret;
247}
248
249/*
250 * helper to read a given tree block, doing retries as required when
251 * the checksums don't match and we have alternate mirrors to try.
252 */
253static int btree_read_extent_buffer_pages(struct btrfs_root *root,
254 struct extent_buffer *eb,
255 u64 start, u64 parent_transid)
256{
257 struct extent_io_tree *io_tree;
258 int ret;
259 int num_copies = 0;
260 int mirror_num = 0;
261
262 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
263 while (1) {
264 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
265 btree_get_extent, mirror_num);
266 if (!ret &&
267 !verify_parent_transid(io_tree, eb, parent_transid))
268 return ret;
269
270 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
271 eb->start, eb->len);
272 if (num_copies == 1)
273 return ret;
274
275 mirror_num++;
276 if (mirror_num > num_copies)
277 return ret;
278 }
279 return -EIO;
280}
281
282/*
283 * checksum a dirty tree block before IO. This has extra checks to make sure
284 * we only fill in the checksum field in the first page of a multi-page block
285 */
286
287static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
288{
289 struct extent_io_tree *tree;
290 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
291 u64 found_start;
292 int found_level;
293 unsigned long len;
294 struct extent_buffer *eb;
295 int ret;
296
297 tree = &BTRFS_I(page->mapping->host)->io_tree;
298
299 if (page->private == EXTENT_PAGE_PRIVATE)
300 goto out;
301 if (!page->private)
302 goto out;
303 len = page->private >> 2;
304 WARN_ON(len == 0);
305
306 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
307 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
308 btrfs_header_generation(eb));
309 BUG_ON(ret);
310 found_start = btrfs_header_bytenr(eb);
311 if (found_start != start) {
312 WARN_ON(1);
313 goto err;
314 }
315 if (eb->first_page != page) {
316 WARN_ON(1);
317 goto err;
318 }
319 if (!PageUptodate(page)) {
320 WARN_ON(1);
321 goto err;
322 }
323 found_level = btrfs_header_level(eb);
324
325 csum_tree_block(root, eb, 0);
326err:
327 free_extent_buffer(eb);
328out:
329 return 0;
330}
331
332static int check_tree_block_fsid(struct btrfs_root *root,
333 struct extent_buffer *eb)
334{
335 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
336 u8 fsid[BTRFS_UUID_SIZE];
337 int ret = 1;
338
339 read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb),
340 BTRFS_FSID_SIZE);
341 while (fs_devices) {
342 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
343 ret = 0;
344 break;
345 }
346 fs_devices = fs_devices->seed;
347 }
348 return ret;
349}
350
351static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
352 struct extent_state *state)
353{
354 struct extent_io_tree *tree;
355 u64 found_start;
356 int found_level;
357 unsigned long len;
358 struct extent_buffer *eb;
359 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
360 int ret = 0;
361
362 tree = &BTRFS_I(page->mapping->host)->io_tree;
363 if (page->private == EXTENT_PAGE_PRIVATE)
364 goto out;
365 if (!page->private)
366 goto out;
367
368 len = page->private >> 2;
369 WARN_ON(len == 0);
370
371 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
372
373 found_start = btrfs_header_bytenr(eb);
374 if (found_start != start) {
375 printk(KERN_INFO "btrfs bad tree block start %llu %llu\n",
376 (unsigned long long)found_start,
377 (unsigned long long)eb->start);
378 ret = -EIO;
379 goto err;
380 }
381 if (eb->first_page != page) {
382 printk(KERN_INFO "btrfs bad first page %lu %lu\n",
383 eb->first_page->index, page->index);
384 WARN_ON(1);
385 ret = -EIO;
386 goto err;
387 }
388 if (check_tree_block_fsid(root, eb)) {
389 printk(KERN_INFO "btrfs bad fsid on block %llu\n",
390 (unsigned long long)eb->start);
391 ret = -EIO;
392 goto err;
393 }
394 found_level = btrfs_header_level(eb);
395
396 ret = csum_tree_block(root, eb, 1);
397 if (ret)
398 ret = -EIO;
399
400 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
401 end = eb->start + end - 1;
402err:
403 free_extent_buffer(eb);
404out:
405 return ret;
406}
407
408static void end_workqueue_bio(struct bio *bio, int err)
409{
410 struct end_io_wq *end_io_wq = bio->bi_private;
411 struct btrfs_fs_info *fs_info;
412
413 fs_info = end_io_wq->info;
414 end_io_wq->error = err;
415 end_io_wq->work.func = end_workqueue_fn;
416 end_io_wq->work.flags = 0;
417
418 if (bio->bi_rw & (1 << BIO_RW)) {
419 if (end_io_wq->metadata)
420 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
421 &end_io_wq->work);
422 else
423 btrfs_queue_worker(&fs_info->endio_write_workers,
424 &end_io_wq->work);
425 } else {
426 if (end_io_wq->metadata)
427 btrfs_queue_worker(&fs_info->endio_meta_workers,
428 &end_io_wq->work);
429 else
430 btrfs_queue_worker(&fs_info->endio_workers,
431 &end_io_wq->work);
432 }
433}
434
435int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
436 int metadata)
437{
438 struct end_io_wq *end_io_wq;
439 end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
440 if (!end_io_wq)
441 return -ENOMEM;
442
443 end_io_wq->private = bio->bi_private;
444 end_io_wq->end_io = bio->bi_end_io;
445 end_io_wq->info = info;
446 end_io_wq->error = 0;
447 end_io_wq->bio = bio;
448 end_io_wq->metadata = metadata;
449
450 bio->bi_private = end_io_wq;
451 bio->bi_end_io = end_workqueue_bio;
452 return 0;
453}
454
455unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
456{
457 unsigned long limit = min_t(unsigned long,
458 info->workers.max_workers,
459 info->fs_devices->open_devices);
460 return 256 * limit;
461}
462
463int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
464{
465 return atomic_read(&info->nr_async_bios) >
466 btrfs_async_submit_limit(info);
467}
468
469static void run_one_async_start(struct btrfs_work *work)
470{
471 struct btrfs_fs_info *fs_info;
472 struct async_submit_bio *async;
473
474 async = container_of(work, struct async_submit_bio, work);
475 fs_info = BTRFS_I(async->inode)->root->fs_info;
476 async->submit_bio_start(async->inode, async->rw, async->bio,
477 async->mirror_num, async->bio_flags);
478}
479
480static void run_one_async_done(struct btrfs_work *work)
481{
482 struct btrfs_fs_info *fs_info;
483 struct async_submit_bio *async;
484 int limit;
485
486 async = container_of(work, struct async_submit_bio, work);
487 fs_info = BTRFS_I(async->inode)->root->fs_info;
488
489 limit = btrfs_async_submit_limit(fs_info);
490 limit = limit * 2 / 3;
491
492 atomic_dec(&fs_info->nr_async_submits);
493
494 if (atomic_read(&fs_info->nr_async_submits) < limit &&
495 waitqueue_active(&fs_info->async_submit_wait))
496 wake_up(&fs_info->async_submit_wait);
497
498 async->submit_bio_done(async->inode, async->rw, async->bio,
499 async->mirror_num, async->bio_flags);
500}
501
502static void run_one_async_free(struct btrfs_work *work)
503{
504 struct async_submit_bio *async;
505
506 async = container_of(work, struct async_submit_bio, work);
507 kfree(async);
508}
509
510int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
511 int rw, struct bio *bio, int mirror_num,
512 unsigned long bio_flags,
513 extent_submit_bio_hook_t *submit_bio_start,
514 extent_submit_bio_hook_t *submit_bio_done)
515{
516 struct async_submit_bio *async;
517
518 async = kmalloc(sizeof(*async), GFP_NOFS);
519 if (!async)
520 return -ENOMEM;
521
522 async->inode = inode;
523 async->rw = rw;
524 async->bio = bio;
525 async->mirror_num = mirror_num;
526 async->submit_bio_start = submit_bio_start;
527 async->submit_bio_done = submit_bio_done;
528
529 async->work.func = run_one_async_start;
530 async->work.ordered_func = run_one_async_done;
531 async->work.ordered_free = run_one_async_free;
532
533 async->work.flags = 0;
534 async->bio_flags = bio_flags;
535
536 atomic_inc(&fs_info->nr_async_submits);
537 btrfs_queue_worker(&fs_info->workers, &async->work);
538#if 0
539 int limit = btrfs_async_submit_limit(fs_info);
540 if (atomic_read(&fs_info->nr_async_submits) > limit) {
541 wait_event_timeout(fs_info->async_submit_wait,
542 (atomic_read(&fs_info->nr_async_submits) < limit),
543 HZ/10);
544
545 wait_event_timeout(fs_info->async_submit_wait,
546 (atomic_read(&fs_info->nr_async_bios) < limit),
547 HZ/10);
548 }
549#endif
550 while (atomic_read(&fs_info->async_submit_draining) &&
551 atomic_read(&fs_info->nr_async_submits)) {
552 wait_event(fs_info->async_submit_wait,
553 (atomic_read(&fs_info->nr_async_submits) == 0));
554 }
555
556 return 0;
557}
558
559static int btree_csum_one_bio(struct bio *bio)
560{
561 struct bio_vec *bvec = bio->bi_io_vec;
562 int bio_index = 0;
563 struct btrfs_root *root;
564
565 WARN_ON(bio->bi_vcnt <= 0);
566 while (bio_index < bio->bi_vcnt) {
567 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
568 csum_dirty_buffer(root, bvec->bv_page);
569 bio_index++;
570 bvec++;
571 }
572 return 0;
573}
574
575static int __btree_submit_bio_start(struct inode *inode, int rw,
576 struct bio *bio, int mirror_num,
577 unsigned long bio_flags)
578{
579 /*
580 * when we're called for a write, we're already in the async
581 * submission context. Just jump into btrfs_map_bio
582 */
583 btree_csum_one_bio(bio);
584 return 0;
585}
586
587static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
588 int mirror_num, unsigned long bio_flags)
589{
590 /*
591 * when we're called for a write, we're already in the async
592 * submission context. Just jump into btrfs_map_bio
593 */
594 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
595}
596
597static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
598 int mirror_num, unsigned long bio_flags)
599{
600 int ret;
601
602 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
603 bio, 1);
604 BUG_ON(ret);
605
606 if (!(rw & (1 << BIO_RW))) {
607 /*
608 * called for a read, do the setup so that checksum validation
609 * can happen in the async kernel threads
610 */
611 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
612 mirror_num, 0);
613 }
614 /*
615 * kthread helpers are used to submit writes so that checksumming
616 * can happen in parallel across all CPUs
617 */
618 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
619 inode, rw, bio, mirror_num, 0,
620 __btree_submit_bio_start,
621 __btree_submit_bio_done);
622}
623
624static int btree_writepage(struct page *page, struct writeback_control *wbc)
625{
626 struct extent_io_tree *tree;
627 tree = &BTRFS_I(page->mapping->host)->io_tree;
628
629 if (current->flags & PF_MEMALLOC) {
630 redirty_page_for_writepage(wbc, page);
631 unlock_page(page);
632 return 0;
633 }
634 return extent_write_full_page(tree, page, btree_get_extent, wbc);
635}
636
637static int btree_writepages(struct address_space *mapping,
638 struct writeback_control *wbc)
639{
640 struct extent_io_tree *tree;
641 tree = &BTRFS_I(mapping->host)->io_tree;
642 if (wbc->sync_mode == WB_SYNC_NONE) {
643 u64 num_dirty;
644 u64 start = 0;
645 unsigned long thresh = 32 * 1024 * 1024;
646
647 if (wbc->for_kupdate)
648 return 0;
649
650 num_dirty = count_range_bits(tree, &start, (u64)-1,
651 thresh, EXTENT_DIRTY);
652 if (num_dirty < thresh)
653 return 0;
654 }
655 return extent_writepages(tree, mapping, btree_get_extent, wbc);
656}
657
658static int btree_readpage(struct file *file, struct page *page)
659{
660 struct extent_io_tree *tree;
661 tree = &BTRFS_I(page->mapping->host)->io_tree;
662 return extent_read_full_page(tree, page, btree_get_extent);
663}
664
665static int btree_releasepage(struct page *page, gfp_t gfp_flags)
666{
667 struct extent_io_tree *tree;
668 struct extent_map_tree *map;
669 int ret;
670
671 if (PageWriteback(page) || PageDirty(page))
672 return 0;
673
674 tree = &BTRFS_I(page->mapping->host)->io_tree;
675 map = &BTRFS_I(page->mapping->host)->extent_tree;
676
677 ret = try_release_extent_state(map, tree, page, gfp_flags);
678 if (!ret)
679 return 0;
680
681 ret = try_release_extent_buffer(tree, page);
682 if (ret == 1) {
683 ClearPagePrivate(page);
684 set_page_private(page, 0);
685 page_cache_release(page);
686 }
687
688 return ret;
689}
690
691static void btree_invalidatepage(struct page *page, unsigned long offset)
692{
693 struct extent_io_tree *tree;
694 tree = &BTRFS_I(page->mapping->host)->io_tree;
695 extent_invalidatepage(tree, page, offset);
696 btree_releasepage(page, GFP_NOFS);
697 if (PagePrivate(page)) {
698 printk(KERN_WARNING "btrfs warning page private not zero "
699 "on page %llu\n", (unsigned long long)page_offset(page));
700 ClearPagePrivate(page);
701 set_page_private(page, 0);
702 page_cache_release(page);
703 }
704}
705
706#if 0
707static int btree_writepage(struct page *page, struct writeback_control *wbc)
708{
709 struct buffer_head *bh;
710 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
711 struct buffer_head *head;
712 if (!page_has_buffers(page)) {
713 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
714 (1 << BH_Dirty)|(1 << BH_Uptodate));
715 }
716 head = page_buffers(page);
717 bh = head;
718 do {
719 if (buffer_dirty(bh))
720 csum_tree_block(root, bh, 0);
721 bh = bh->b_this_page;
722 } while (bh != head);
723 return block_write_full_page(page, btree_get_block, wbc);
724}
725#endif
726
727static struct address_space_operations btree_aops = {
728 .readpage = btree_readpage,
729 .writepage = btree_writepage,
730 .writepages = btree_writepages,
731 .releasepage = btree_releasepage,
732 .invalidatepage = btree_invalidatepage,
733 .sync_page = block_sync_page,
734};
735
736int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
737 u64 parent_transid)
738{
739 struct extent_buffer *buf = NULL;
740 struct inode *btree_inode = root->fs_info->btree_inode;
741 int ret = 0;
742
743 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
744 if (!buf)
745 return 0;
746 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
747 buf, 0, 0, btree_get_extent, 0);
748 free_extent_buffer(buf);
749 return ret;
750}
751
752struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
753 u64 bytenr, u32 blocksize)
754{
755 struct inode *btree_inode = root->fs_info->btree_inode;
756 struct extent_buffer *eb;
757 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
758 bytenr, blocksize, GFP_NOFS);
759 return eb;
760}
761
762struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
763 u64 bytenr, u32 blocksize)
764{
765 struct inode *btree_inode = root->fs_info->btree_inode;
766 struct extent_buffer *eb;
767
768 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
769 bytenr, blocksize, NULL, GFP_NOFS);
770 return eb;
771}
772
773
774int btrfs_write_tree_block(struct extent_buffer *buf)
775{
776 return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start,
777 buf->start + buf->len - 1, WB_SYNC_ALL);
778}
779
780int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
781{
782 return btrfs_wait_on_page_writeback_range(buf->first_page->mapping,
783 buf->start, buf->start + buf->len - 1);
784}
785
786struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
787 u32 blocksize, u64 parent_transid)
788{
789 struct extent_buffer *buf = NULL;
790 struct inode *btree_inode = root->fs_info->btree_inode;
791 struct extent_io_tree *io_tree;
792 int ret;
793
794 io_tree = &BTRFS_I(btree_inode)->io_tree;
795
796 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
797 if (!buf)
798 return NULL;
799
800 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
801
802 if (ret == 0)
803 buf->flags |= EXTENT_UPTODATE;
804 else
805 WARN_ON(1);
806 return buf;
807
808}
809
810int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
811 struct extent_buffer *buf)
812{
813 struct inode *btree_inode = root->fs_info->btree_inode;
814 if (btrfs_header_generation(buf) ==
815 root->fs_info->running_transaction->transid) {
816 WARN_ON(!btrfs_tree_locked(buf));
817 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
818 buf);
819 }
820 return 0;
821}
822
823static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
824 u32 stripesize, struct btrfs_root *root,
825 struct btrfs_fs_info *fs_info,
826 u64 objectid)
827{
828 root->node = NULL;
829 root->commit_root = NULL;
830 root->ref_tree = NULL;
831 root->sectorsize = sectorsize;
832 root->nodesize = nodesize;
833 root->leafsize = leafsize;
834 root->stripesize = stripesize;
835 root->ref_cows = 0;
836 root->track_dirty = 0;
837
838 root->fs_info = fs_info;
839 root->objectid = objectid;
840 root->last_trans = 0;
841 root->highest_inode = 0;
842 root->last_inode_alloc = 0;
843 root->name = NULL;
844 root->in_sysfs = 0;
845
846 INIT_LIST_HEAD(&root->dirty_list);
847 INIT_LIST_HEAD(&root->orphan_list);
848 INIT_LIST_HEAD(&root->dead_list);
849 spin_lock_init(&root->node_lock);
850 spin_lock_init(&root->list_lock);
851 mutex_init(&root->objectid_mutex);
852 mutex_init(&root->log_mutex);
853 extent_io_tree_init(&root->dirty_log_pages,
854 fs_info->btree_inode->i_mapping, GFP_NOFS);
855
856 btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
857 root->ref_tree = &root->ref_tree_struct;
858
859 memset(&root->root_key, 0, sizeof(root->root_key));
860 memset(&root->root_item, 0, sizeof(root->root_item));
861 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
862 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
863 root->defrag_trans_start = fs_info->generation;
864 init_completion(&root->kobj_unregister);
865 root->defrag_running = 0;
866 root->defrag_level = 0;
867 root->root_key.objectid = objectid;
868 root->anon_super.s_root = NULL;
869 root->anon_super.s_dev = 0;
870 INIT_LIST_HEAD(&root->anon_super.s_list);
871 INIT_LIST_HEAD(&root->anon_super.s_instances);
872 init_rwsem(&root->anon_super.s_umount);
873
874 return 0;
875}
876
877static int find_and_setup_root(struct btrfs_root *tree_root,
878 struct btrfs_fs_info *fs_info,
879 u64 objectid,
880 struct btrfs_root *root)
881{
882 int ret;
883 u32 blocksize;
884 u64 generation;
885
886 __setup_root(tree_root->nodesize, tree_root->leafsize,
887 tree_root->sectorsize, tree_root->stripesize,
888 root, fs_info, objectid);
889 ret = btrfs_find_last_root(tree_root, objectid,
890 &root->root_item, &root->root_key);
891 BUG_ON(ret);
892
893 generation = btrfs_root_generation(&root->root_item);
894 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
895 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
896 blocksize, generation);
897 BUG_ON(!root->node);
898 return 0;
899}
900
901int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
902 struct btrfs_fs_info *fs_info)
903{
904 struct extent_buffer *eb;
905 struct btrfs_root *log_root_tree = fs_info->log_root_tree;
906 u64 start = 0;
907 u64 end = 0;
908 int ret;
909
910 if (!log_root_tree)
911 return 0;
912
913 while (1) {
914 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
915 0, &start, &end, EXTENT_DIRTY);
916 if (ret)
917 break;
918
919 clear_extent_dirty(&log_root_tree->dirty_log_pages,
920 start, end, GFP_NOFS);
921 }
922 eb = fs_info->log_root_tree->node;
923
924 WARN_ON(btrfs_header_level(eb) != 0);
925 WARN_ON(btrfs_header_nritems(eb) != 0);
926
927 ret = btrfs_free_reserved_extent(fs_info->tree_root,
928 eb->start, eb->len);
929 BUG_ON(ret);
930
931 free_extent_buffer(eb);
932 kfree(fs_info->log_root_tree);
933 fs_info->log_root_tree = NULL;
934 return 0;
935}
936
937int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
938 struct btrfs_fs_info *fs_info)
939{
940 struct btrfs_root *root;
941 struct btrfs_root *tree_root = fs_info->tree_root;
942
943 root = kzalloc(sizeof(*root), GFP_NOFS);
944 if (!root)
945 return -ENOMEM;
946
947 __setup_root(tree_root->nodesize, tree_root->leafsize,
948 tree_root->sectorsize, tree_root->stripesize,
949 root, fs_info, BTRFS_TREE_LOG_OBJECTID);
950
951 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
952 root->root_key.type = BTRFS_ROOT_ITEM_KEY;
953 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
954 root->ref_cows = 0;
955
956 root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
957 0, BTRFS_TREE_LOG_OBJECTID,
958 trans->transid, 0, 0, 0);
959
960 btrfs_set_header_nritems(root->node, 0);
961 btrfs_set_header_level(root->node, 0);
962 btrfs_set_header_bytenr(root->node, root->node->start);
963 btrfs_set_header_generation(root->node, trans->transid);
964 btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
965
966 write_extent_buffer(root->node, root->fs_info->fsid,
967 (unsigned long)btrfs_header_fsid(root->node),
968 BTRFS_FSID_SIZE);
969 btrfs_mark_buffer_dirty(root->node);
970 btrfs_tree_unlock(root->node);
971 fs_info->log_root_tree = root;
972 return 0;
973}
974
975struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
976 struct btrfs_key *location)
977{
978 struct btrfs_root *root;
979 struct btrfs_fs_info *fs_info = tree_root->fs_info;
980 struct btrfs_path *path;
981 struct extent_buffer *l;
982 u64 highest_inode;
983 u64 generation;
984 u32 blocksize;
985 int ret = 0;
986
987 root = kzalloc(sizeof(*root), GFP_NOFS);
988 if (!root)
989 return ERR_PTR(-ENOMEM);
990 if (location->offset == (u64)-1) {
991 ret = find_and_setup_root(tree_root, fs_info,
992 location->objectid, root);
993 if (ret) {
994 kfree(root);
995 return ERR_PTR(ret);
996 }
997 goto insert;
998 }
999
1000 __setup_root(tree_root->nodesize, tree_root->leafsize,
1001 tree_root->sectorsize, tree_root->stripesize,
1002 root, fs_info, location->objectid);
1003
1004 path = btrfs_alloc_path();
1005 BUG_ON(!path);
1006 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
1007 if (ret != 0) {
1008 if (ret > 0)
1009 ret = -ENOENT;
1010 goto out;
1011 }
1012 l = path->nodes[0];
1013 read_extent_buffer(l, &root->root_item,
1014 btrfs_item_ptr_offset(l, path->slots[0]),
1015 sizeof(root->root_item));
1016 memcpy(&root->root_key, location, sizeof(*location));
1017 ret = 0;
1018out:
1019 btrfs_release_path(root, path);
1020 btrfs_free_path(path);
1021 if (ret) {
1022 kfree(root);
1023 return ERR_PTR(ret);
1024 }
1025 generation = btrfs_root_generation(&root->root_item);
1026 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1027 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1028 blocksize, generation);
1029 BUG_ON(!root->node);
1030insert:
1031 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1032 root->ref_cows = 1;
1033 ret = btrfs_find_highest_inode(root, &highest_inode);
1034 if (ret == 0) {
1035 root->highest_inode = highest_inode;
1036 root->last_inode_alloc = highest_inode;
1037 }
1038 }
1039 return root;
1040}
1041
1042struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1043 u64 root_objectid)
1044{
1045 struct btrfs_root *root;
1046
1047 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
1048 return fs_info->tree_root;
1049 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
1050 return fs_info->extent_root;
1051
1052 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1053 (unsigned long)root_objectid);
1054 return root;
1055}
1056
1057struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1058 struct btrfs_key *location)
1059{
1060 struct btrfs_root *root;
1061 int ret;
1062
1063 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1064 return fs_info->tree_root;
1065 if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
1066 return fs_info->extent_root;
1067 if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
1068 return fs_info->chunk_root;
1069 if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
1070 return fs_info->dev_root;
1071 if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
1072 return fs_info->csum_root;
1073
1074 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1075 (unsigned long)location->objectid);
1076 if (root)
1077 return root;
1078
1079 root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
1080 if (IS_ERR(root))
1081 return root;
1082
1083 set_anon_super(&root->anon_super, NULL);
1084
1085 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1086 (unsigned long)root->root_key.objectid,
1087 root);
1088 if (ret) {
1089 free_extent_buffer(root->node);
1090 kfree(root);
1091 return ERR_PTR(ret);
1092 }
1093 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
1094 ret = btrfs_find_dead_roots(fs_info->tree_root,
1095 root->root_key.objectid, root);
1096 BUG_ON(ret);
1097 btrfs_orphan_cleanup(root);
1098 }
1099 return root;
1100}
1101
1102struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
1103 struct btrfs_key *location,
1104 const char *name, int namelen)
1105{
1106 struct btrfs_root *root;
1107 int ret;
1108
1109 root = btrfs_read_fs_root_no_name(fs_info, location);
1110 if (!root)
1111 return NULL;
1112
1113 if (root->in_sysfs)
1114 return root;
1115
1116 ret = btrfs_set_root_name(root, name, namelen);
1117 if (ret) {
1118 free_extent_buffer(root->node);
1119 kfree(root);
1120 return ERR_PTR(ret);
1121 }
1122#if 0
1123 ret = btrfs_sysfs_add_root(root);
1124 if (ret) {
1125 free_extent_buffer(root->node);
1126 kfree(root->name);
1127 kfree(root);
1128 return ERR_PTR(ret);
1129 }
1130#endif
1131 root->in_sysfs = 1;
1132 return root;
1133}
1134
1135static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1136{
1137 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
1138 int ret = 0;
1139 struct list_head *cur;
1140 struct btrfs_device *device;
1141 struct backing_dev_info *bdi;
1142#if 0
1143 if ((bdi_bits & (1 << BDI_write_congested)) &&
1144 btrfs_congested_async(info, 0))
1145 return 1;
1146#endif
1147 list_for_each(cur, &info->fs_devices->devices) {
1148 device = list_entry(cur, struct btrfs_device, dev_list);
1149 if (!device->bdev)
1150 continue;
1151 bdi = blk_get_backing_dev_info(device->bdev);
1152 if (bdi && bdi_congested(bdi, bdi_bits)) {
1153 ret = 1;
1154 break;
1155 }
1156 }
1157 return ret;
1158}
1159
1160/*
1161 * this unplugs every device on the box, and it is only used when page
1162 * is null
1163 */
1164static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1165{
1166 struct list_head *cur;
1167 struct btrfs_device *device;
1168 struct btrfs_fs_info *info;
1169
1170 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1171 list_for_each(cur, &info->fs_devices->devices) {
1172 device = list_entry(cur, struct btrfs_device, dev_list);
1173 if (!device->bdev)
1174 continue;
1175
1176 bdi = blk_get_backing_dev_info(device->bdev);
1177 if (bdi->unplug_io_fn)
1178 bdi->unplug_io_fn(bdi, page);
1179 }
1180}
1181
1182static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1183{
1184 struct inode *inode;
1185 struct extent_map_tree *em_tree;
1186 struct extent_map *em;
1187 struct address_space *mapping;
1188 u64 offset;
1189
1190 /* the generic O_DIRECT read code does this */
1191 if (1 || !page) {
1192 __unplug_io_fn(bdi, page);
1193 return;
1194 }
1195
1196 /*
1197 * page->mapping may change at any time. Get a consistent copy
1198 * and use that for everything below
1199 */
1200 smp_mb();
1201 mapping = page->mapping;
1202 if (!mapping)
1203 return;
1204
1205 inode = mapping->host;
1206
1207 /*
1208 * don't do the expensive searching for a small number of
1209 * devices
1210 */
1211 if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
1212 __unplug_io_fn(bdi, page);
1213 return;
1214 }
1215
1216 offset = page_offset(page);
1217
1218 em_tree = &BTRFS_I(inode)->extent_tree;
1219 spin_lock(&em_tree->lock);
1220 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1221 spin_unlock(&em_tree->lock);
1222 if (!em) {
1223 __unplug_io_fn(bdi, page);
1224 return;
1225 }
1226
1227 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1228 free_extent_map(em);
1229 __unplug_io_fn(bdi, page);
1230 return;
1231 }
1232 offset = offset - em->start;
1233 btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
1234 em->block_start + offset, page);
1235 free_extent_map(em);
1236}
1237
1238static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1239{
1240 bdi_init(bdi);
1241 bdi->ra_pages = default_backing_dev_info.ra_pages;
1242 bdi->state = 0;
1243 bdi->capabilities = default_backing_dev_info.capabilities;
1244 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1245 bdi->unplug_io_data = info;
1246 bdi->congested_fn = btrfs_congested_fn;
1247 bdi->congested_data = info;
1248 return 0;
1249}
1250
1251static int bio_ready_for_csum(struct bio *bio)
1252{
1253 u64 length = 0;
1254 u64 buf_len = 0;
1255 u64 start = 0;
1256 struct page *page;
1257 struct extent_io_tree *io_tree = NULL;
1258 struct btrfs_fs_info *info = NULL;
1259 struct bio_vec *bvec;
1260 int i;
1261 int ret;
1262
1263 bio_for_each_segment(bvec, bio, i) {
1264 page = bvec->bv_page;
1265 if (page->private == EXTENT_PAGE_PRIVATE) {
1266 length += bvec->bv_len;
1267 continue;
1268 }
1269 if (!page->private) {
1270 length += bvec->bv_len;
1271 continue;
1272 }
1273 length = bvec->bv_len;
1274 buf_len = page->private >> 2;
1275 start = page_offset(page) + bvec->bv_offset;
1276 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1277 info = BTRFS_I(page->mapping->host)->root->fs_info;
1278 }
1279 /* are we fully contained in this bio? */
1280 if (buf_len <= length)
1281 return 1;
1282
1283 ret = extent_range_uptodate(io_tree, start + length,
1284 start + buf_len - 1);
1285 if (ret == 1)
1286 return ret;
1287 return ret;
1288}
1289
1290/*
1291 * called by the kthread helper functions to finally call the bio end_io
1292 * functions. This is where read checksum verification actually happens
1293 */
1294static void end_workqueue_fn(struct btrfs_work *work)
1295{
1296 struct bio *bio;
1297 struct end_io_wq *end_io_wq;
1298 struct btrfs_fs_info *fs_info;
1299 int error;
1300
1301 end_io_wq = container_of(work, struct end_io_wq, work);
1302 bio = end_io_wq->bio;
1303 fs_info = end_io_wq->info;
1304
1305 /* metadata bio reads are special because the whole tree block must
1306 * be checksummed at once. This makes sure the entire block is in
1307 * ram and up to date before trying to verify things. For
1308 * blocksize <= pagesize, it is basically a noop
1309 */
1310 if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata &&
1311 !bio_ready_for_csum(bio)) {
1312 btrfs_queue_worker(&fs_info->endio_meta_workers,
1313 &end_io_wq->work);
1314 return;
1315 }
1316 error = end_io_wq->error;
1317 bio->bi_private = end_io_wq->private;
1318 bio->bi_end_io = end_io_wq->end_io;
1319 kfree(end_io_wq);
1320 bio_endio(bio, error);
1321}
1322
1323static int cleaner_kthread(void *arg)
1324{
1325 struct btrfs_root *root = arg;
1326
1327 do {
1328 smp_mb();
1329 if (root->fs_info->closing)
1330 break;
1331
1332 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1333 mutex_lock(&root->fs_info->cleaner_mutex);
1334 btrfs_clean_old_snapshots(root);
1335 mutex_unlock(&root->fs_info->cleaner_mutex);
1336
1337 if (freezing(current)) {
1338 refrigerator();
1339 } else {
1340 smp_mb();
1341 if (root->fs_info->closing)
1342 break;
1343 set_current_state(TASK_INTERRUPTIBLE);
1344 schedule();
1345 __set_current_state(TASK_RUNNING);
1346 }
1347 } while (!kthread_should_stop());
1348 return 0;
1349}
1350
1351static int transaction_kthread(void *arg)
1352{
1353 struct btrfs_root *root = arg;
1354 struct btrfs_trans_handle *trans;
1355 struct btrfs_transaction *cur;
1356 unsigned long now;
1357 unsigned long delay;
1358 int ret;
1359
1360 do {
1361 smp_mb();
1362 if (root->fs_info->closing)
1363 break;
1364
1365 delay = HZ * 30;
1366 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1367 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1368
1369 if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
1370 printk(KERN_INFO "btrfs: total reference cache "
1371 "size %llu\n",
1372 root->fs_info->total_ref_cache_size);
1373 }
1374
1375 mutex_lock(&root->fs_info->trans_mutex);
1376 cur = root->fs_info->running_transaction;
1377 if (!cur) {
1378 mutex_unlock(&root->fs_info->trans_mutex);
1379 goto sleep;
1380 }
1381
1382 now = get_seconds();
1383 if (now < cur->start_time || now - cur->start_time < 30) {
1384 mutex_unlock(&root->fs_info->trans_mutex);
1385 delay = HZ * 5;
1386 goto sleep;
1387 }
1388 mutex_unlock(&root->fs_info->trans_mutex);
1389 trans = btrfs_start_transaction(root, 1);
1390 ret = btrfs_commit_transaction(trans, root);
1391sleep:
1392 wake_up_process(root->fs_info->cleaner_kthread);
1393 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
1394
1395 if (freezing(current)) {
1396 refrigerator();
1397 } else {
1398 if (root->fs_info->closing)
1399 break;
1400 set_current_state(TASK_INTERRUPTIBLE);
1401 schedule_timeout(delay);
1402 __set_current_state(TASK_RUNNING);
1403 }
1404 } while (!kthread_should_stop());
1405 return 0;
1406}
1407
1408struct btrfs_root *open_ctree(struct super_block *sb,
1409 struct btrfs_fs_devices *fs_devices,
1410 char *options)
1411{
1412 u32 sectorsize;
1413 u32 nodesize;
1414 u32 leafsize;
1415 u32 blocksize;
1416 u32 stripesize;
1417 u64 generation;
1418 u64 features;
1419 struct btrfs_key location;
1420 struct buffer_head *bh;
1421 struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
1422 GFP_NOFS);
1423 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1424 GFP_NOFS);
1425 struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
1426 GFP_NOFS);
1427 struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
1428 GFP_NOFS);
1429 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
1430 GFP_NOFS);
1431 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
1432 GFP_NOFS);
1433 struct btrfs_root *log_tree_root;
1434
1435 int ret;
1436 int err = -EINVAL;
1437
1438 struct btrfs_super_block *disk_super;
1439
1440 if (!extent_root || !tree_root || !fs_info ||
1441 !chunk_root || !dev_root || !csum_root) {
1442 err = -ENOMEM;
1443 goto fail;
1444 }
1445 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
1446 INIT_LIST_HEAD(&fs_info->trans_list);
1447 INIT_LIST_HEAD(&fs_info->dead_roots);
1448 INIT_LIST_HEAD(&fs_info->hashers);
1449 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1450 spin_lock_init(&fs_info->hash_lock);
1451 spin_lock_init(&fs_info->delalloc_lock);
1452 spin_lock_init(&fs_info->new_trans_lock);
1453 spin_lock_init(&fs_info->ref_cache_lock);
1454
1455 init_completion(&fs_info->kobj_unregister);
1456 fs_info->tree_root = tree_root;
1457 fs_info->extent_root = extent_root;
1458 fs_info->csum_root = csum_root;
1459 fs_info->chunk_root = chunk_root;
1460 fs_info->dev_root = dev_root;
1461 fs_info->fs_devices = fs_devices;
1462 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
1463 INIT_LIST_HEAD(&fs_info->space_info);
1464 btrfs_mapping_init(&fs_info->mapping_tree);
1465 atomic_set(&fs_info->nr_async_submits, 0);
1466 atomic_set(&fs_info->async_delalloc_pages, 0);
1467 atomic_set(&fs_info->async_submit_draining, 0);
1468 atomic_set(&fs_info->nr_async_bios, 0);
1469 atomic_set(&fs_info->throttles, 0);
1470 atomic_set(&fs_info->throttle_gen, 0);
1471 fs_info->sb = sb;
1472 fs_info->max_extent = (u64)-1;
1473 fs_info->max_inline = 8192 * 1024;
1474 setup_bdi(fs_info, &fs_info->bdi);
1475 fs_info->btree_inode = new_inode(sb);
1476 fs_info->btree_inode->i_ino = 1;
1477 fs_info->btree_inode->i_nlink = 1;
1478
1479 fs_info->thread_pool_size = min_t(unsigned long,
1480 num_online_cpus() + 2, 8);
1481
1482 INIT_LIST_HEAD(&fs_info->ordered_extents);
1483 spin_lock_init(&fs_info->ordered_extent_lock);
1484
1485 sb->s_blocksize = 4096;
1486 sb->s_blocksize_bits = blksize_bits(4096);
1487
1488 /*
1489 * we set the i_size on the btree inode to the max possible int.
1490 * the real end of the address space is determined by all of
1491 * the devices in the system
1492 */
1493 fs_info->btree_inode->i_size = OFFSET_MAX;
1494 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
1495 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
1496
1497 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1498 fs_info->btree_inode->i_mapping,
1499 GFP_NOFS);
1500 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
1501 GFP_NOFS);
1502
1503 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1504
1505 spin_lock_init(&fs_info->block_group_cache_lock);
1506 fs_info->block_group_cache_tree.rb_node = NULL;
1507
1508 extent_io_tree_init(&fs_info->pinned_extents,
1509 fs_info->btree_inode->i_mapping, GFP_NOFS);
1510 extent_io_tree_init(&fs_info->pending_del,
1511 fs_info->btree_inode->i_mapping, GFP_NOFS);
1512 extent_io_tree_init(&fs_info->extent_ins,
1513 fs_info->btree_inode->i_mapping, GFP_NOFS);
1514 fs_info->do_barriers = 1;
1515
1516 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
1517 btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
1518 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
1519
1520 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1521 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1522 sizeof(struct btrfs_key));
1523 insert_inode_hash(fs_info->btree_inode);
1524
1525 mutex_init(&fs_info->trans_mutex);
1526 mutex_init(&fs_info->tree_log_mutex);
1527 mutex_init(&fs_info->drop_mutex);
1528 mutex_init(&fs_info->extent_ins_mutex);
1529 mutex_init(&fs_info->pinned_mutex);
1530 mutex_init(&fs_info->chunk_mutex);
1531 mutex_init(&fs_info->transaction_kthread_mutex);
1532 mutex_init(&fs_info->cleaner_mutex);
1533 mutex_init(&fs_info->volume_mutex);
1534 mutex_init(&fs_info->tree_reloc_mutex);
1535 init_waitqueue_head(&fs_info->transaction_throttle);
1536 init_waitqueue_head(&fs_info->transaction_wait);
1537 init_waitqueue_head(&fs_info->async_submit_wait);
1538 init_waitqueue_head(&fs_info->tree_log_wait);
1539 atomic_set(&fs_info->tree_log_commit, 0);
1540 atomic_set(&fs_info->tree_log_writers, 0);
1541 fs_info->tree_log_transid = 0;
1542
1543 __setup_root(4096, 4096, 4096, 4096, tree_root,
1544 fs_info, BTRFS_ROOT_TREE_OBJECTID);
1545
1546
1547 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1548 if (!bh)
1549 goto fail_iput;
1550
1551 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
1552 memcpy(&fs_info->super_for_commit, &fs_info->super_copy,
1553 sizeof(fs_info->super_for_commit));
1554 brelse(bh);
1555
1556 memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE);
1557
1558 disk_super = &fs_info->super_copy;
1559 if (!btrfs_super_root(disk_super))
1560 goto fail_iput;
1561
1562 ret = btrfs_parse_options(tree_root, options);
1563 if (ret) {
1564 err = ret;
1565 goto fail_iput;
1566 }
1567
1568 features = btrfs_super_incompat_flags(disk_super) &
1569 ~BTRFS_FEATURE_INCOMPAT_SUPP;
1570 if (features) {
1571 printk(KERN_ERR "BTRFS: couldn't mount because of "
1572 "unsupported optional features (%Lx).\n",
1573 features);
1574 err = -EINVAL;
1575 goto fail_iput;
1576 }
1577
1578 features = btrfs_super_compat_ro_flags(disk_super) &
1579 ~BTRFS_FEATURE_COMPAT_RO_SUPP;
1580 if (!(sb->s_flags & MS_RDONLY) && features) {
1581 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
1582 "unsupported option features (%Lx).\n",
1583 features);
1584 err = -EINVAL;
1585 goto fail_iput;
1586 }
1587
1588 /*
1589 * we need to start all the end_io workers up front because the
1590 * queue work function gets called at interrupt time, and so it
1591 * cannot dynamically grow.
1592 */
1593 btrfs_init_workers(&fs_info->workers, "worker",
1594 fs_info->thread_pool_size);
1595
1596 btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
1597 fs_info->thread_pool_size);
1598
1599 btrfs_init_workers(&fs_info->submit_workers, "submit",
1600 min_t(u64, fs_devices->num_devices,
1601 fs_info->thread_pool_size));
1602
1603 /* a higher idle thresh on the submit workers makes it much more
1604 * likely that bios will be send down in a sane order to the
1605 * devices
1606 */
1607 fs_info->submit_workers.idle_thresh = 64;
1608
1609 fs_info->workers.idle_thresh = 16;
1610 fs_info->workers.ordered = 1;
1611
1612 fs_info->delalloc_workers.idle_thresh = 2;
1613 fs_info->delalloc_workers.ordered = 1;
1614
1615 btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
1616 btrfs_init_workers(&fs_info->endio_workers, "endio",
1617 fs_info->thread_pool_size);
1618 btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
1619 fs_info->thread_pool_size);
1620 btrfs_init_workers(&fs_info->endio_meta_write_workers,
1621 "endio-meta-write", fs_info->thread_pool_size);
1622 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
1623 fs_info->thread_pool_size);
1624
1625 /*
1626 * endios are largely parallel and should have a very
1627 * low idle thresh
1628 */
1629 fs_info->endio_workers.idle_thresh = 4;
1630 fs_info->endio_write_workers.idle_thresh = 64;
1631 fs_info->endio_meta_write_workers.idle_thresh = 64;
1632
1633 btrfs_start_workers(&fs_info->workers, 1);
1634 btrfs_start_workers(&fs_info->submit_workers, 1);
1635 btrfs_start_workers(&fs_info->delalloc_workers, 1);
1636 btrfs_start_workers(&fs_info->fixup_workers, 1);
1637 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
1638 btrfs_start_workers(&fs_info->endio_meta_workers,
1639 fs_info->thread_pool_size);
1640 btrfs_start_workers(&fs_info->endio_meta_write_workers,
1641 fs_info->thread_pool_size);
1642 btrfs_start_workers(&fs_info->endio_write_workers,
1643 fs_info->thread_pool_size);
1644
1645 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1646 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
1647 4 * 1024 * 1024 / PAGE_CACHE_SIZE);
1648
1649 nodesize = btrfs_super_nodesize(disk_super);
1650 leafsize = btrfs_super_leafsize(disk_super);
1651 sectorsize = btrfs_super_sectorsize(disk_super);
1652 stripesize = btrfs_super_stripesize(disk_super);
1653 tree_root->nodesize = nodesize;
1654 tree_root->leafsize = leafsize;
1655 tree_root->sectorsize = sectorsize;
1656 tree_root->stripesize = stripesize;
1657
1658 sb->s_blocksize = sectorsize;
1659 sb->s_blocksize_bits = blksize_bits(sectorsize);
1660
1661 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
1662 sizeof(disk_super->magic))) {
1663 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
1664 goto fail_sb_buffer;
1665 }
1666
1667 mutex_lock(&fs_info->chunk_mutex);
1668 ret = btrfs_read_sys_array(tree_root);
1669 mutex_unlock(&fs_info->chunk_mutex);
1670 if (ret) {
1671 printk(KERN_WARNING "btrfs: failed to read the system "
1672 "array on %s\n", sb->s_id);
1673 goto fail_sys_array;
1674 }
1675
1676 blocksize = btrfs_level_size(tree_root,
1677 btrfs_super_chunk_root_level(disk_super));
1678 generation = btrfs_super_chunk_root_generation(disk_super);
1679
1680 __setup_root(nodesize, leafsize, sectorsize, stripesize,
1681 chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
1682
1683 chunk_root->node = read_tree_block(chunk_root,
1684 btrfs_super_chunk_root(disk_super),
1685 blocksize, generation);
1686 BUG_ON(!chunk_root->node);
1687
1688 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
1689 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
1690 BTRFS_UUID_SIZE);
1691
1692 mutex_lock(&fs_info->chunk_mutex);
1693 ret = btrfs_read_chunk_tree(chunk_root);
1694 mutex_unlock(&fs_info->chunk_mutex);
1695 if (ret) {
1696 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
1697 sb->s_id);
1698 goto fail_chunk_root;
1699 }
1700
1701 btrfs_close_extra_devices(fs_devices);
1702
1703 blocksize = btrfs_level_size(tree_root,
1704 btrfs_super_root_level(disk_super));
1705 generation = btrfs_super_generation(disk_super);
1706
1707 tree_root->node = read_tree_block(tree_root,
1708 btrfs_super_root(disk_super),
1709 blocksize, generation);
1710 if (!tree_root->node)
1711 goto fail_chunk_root;
1712
1713
1714 ret = find_and_setup_root(tree_root, fs_info,
1715 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
1716 if (ret)
1717 goto fail_tree_root;
1718 extent_root->track_dirty = 1;
1719
1720 ret = find_and_setup_root(tree_root, fs_info,
1721 BTRFS_DEV_TREE_OBJECTID, dev_root);
1722 dev_root->track_dirty = 1;
1723
1724 if (ret)
1725 goto fail_extent_root;
1726
1727 ret = find_and_setup_root(tree_root, fs_info,
1728 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1729 if (ret)
1730 goto fail_extent_root;
1731
1732 csum_root->track_dirty = 1;
1733
1734 btrfs_read_block_groups(extent_root);
1735
1736 fs_info->generation = generation;
1737 fs_info->last_trans_committed = generation;
1738 fs_info->data_alloc_profile = (u64)-1;
1739 fs_info->metadata_alloc_profile = (u64)-1;
1740 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
1741 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
1742 "btrfs-cleaner");
1743 if (!fs_info->cleaner_kthread)
1744 goto fail_csum_root;
1745
1746 fs_info->transaction_kthread = kthread_run(transaction_kthread,
1747 tree_root,
1748 "btrfs-transaction");
1749 if (!fs_info->transaction_kthread)
1750 goto fail_cleaner;
1751
1752 if (btrfs_super_log_root(disk_super) != 0) {
1753 u64 bytenr = btrfs_super_log_root(disk_super);
1754
1755 if (fs_devices->rw_devices == 0) {
1756 printk(KERN_WARNING "Btrfs log replay required "
1757 "on RO media\n");
1758 err = -EIO;
1759 goto fail_trans_kthread;
1760 }
1761 blocksize =
1762 btrfs_level_size(tree_root,
1763 btrfs_super_log_root_level(disk_super));
1764
1765 log_tree_root = kzalloc(sizeof(struct btrfs_root),
1766 GFP_NOFS);
1767
1768 __setup_root(nodesize, leafsize, sectorsize, stripesize,
1769 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
1770
1771 log_tree_root->node = read_tree_block(tree_root, bytenr,
1772 blocksize,
1773 generation + 1);
1774 ret = btrfs_recover_log_trees(log_tree_root);
1775 BUG_ON(ret);
1776
1777 if (sb->s_flags & MS_RDONLY) {
1778 ret = btrfs_commit_super(tree_root);
1779 BUG_ON(ret);
1780 }
1781 }
1782
1783 if (!(sb->s_flags & MS_RDONLY)) {
1784 ret = btrfs_cleanup_reloc_trees(tree_root);
1785 BUG_ON(ret);
1786 }
1787
1788 location.objectid = BTRFS_FS_TREE_OBJECTID;
1789 location.type = BTRFS_ROOT_ITEM_KEY;
1790 location.offset = (u64)-1;
1791
1792 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
1793 if (!fs_info->fs_root)
1794 goto fail_trans_kthread;
1795 return tree_root;
1796
1797fail_trans_kthread:
1798 kthread_stop(fs_info->transaction_kthread);
1799fail_cleaner:
1800 kthread_stop(fs_info->cleaner_kthread);
1801
1802 /*
1803 * make sure we're done with the btree inode before we stop our
1804 * kthreads
1805 */
1806 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
1807 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
1808
1809fail_csum_root:
1810 free_extent_buffer(csum_root->node);
1811fail_extent_root:
1812 free_extent_buffer(extent_root->node);
1813fail_tree_root:
1814 free_extent_buffer(tree_root->node);
1815fail_chunk_root:
1816 free_extent_buffer(chunk_root->node);
1817fail_sys_array:
1818 free_extent_buffer(dev_root->node);
1819fail_sb_buffer:
1820 btrfs_stop_workers(&fs_info->fixup_workers);
1821 btrfs_stop_workers(&fs_info->delalloc_workers);
1822 btrfs_stop_workers(&fs_info->workers);
1823 btrfs_stop_workers(&fs_info->endio_workers);
1824 btrfs_stop_workers(&fs_info->endio_meta_workers);
1825 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
1826 btrfs_stop_workers(&fs_info->endio_write_workers);
1827 btrfs_stop_workers(&fs_info->submit_workers);
1828fail_iput:
1829 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
1830 iput(fs_info->btree_inode);
1831fail:
1832 btrfs_close_devices(fs_info->fs_devices);
1833 btrfs_mapping_tree_free(&fs_info->mapping_tree);
1834
1835 kfree(extent_root);
1836 kfree(tree_root);
1837 bdi_destroy(&fs_info->bdi);
1838 kfree(fs_info);
1839 kfree(chunk_root);
1840 kfree(dev_root);
1841 kfree(csum_root);
1842 return ERR_PTR(err);
1843}
1844
1845static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
1846{
1847 char b[BDEVNAME_SIZE];
1848
1849 if (uptodate) {
1850 set_buffer_uptodate(bh);
1851 } else {
1852 if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
1853 printk(KERN_WARNING "lost page write due to "
1854 "I/O error on %s\n",
1855 bdevname(bh->b_bdev, b));
1856 }
1857 /* note, we dont' set_buffer_write_io_error because we have
1858 * our own ways of dealing with the IO errors
1859 */
1860 clear_buffer_uptodate(bh);
1861 }
1862 unlock_buffer(bh);
1863 put_bh(bh);
1864}
1865
1866struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
1867{
1868 struct buffer_head *bh;
1869 struct buffer_head *latest = NULL;
1870 struct btrfs_super_block *super;
1871 int i;
1872 u64 transid = 0;
1873 u64 bytenr;
1874
1875 /* we would like to check all the supers, but that would make
1876 * a btrfs mount succeed after a mkfs from a different FS.
1877 * So, we need to add a special mount option to scan for
1878 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
1879 */
1880 for (i = 0; i < 1; i++) {
1881 bytenr = btrfs_sb_offset(i);
1882 if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
1883 break;
1884 bh = __bread(bdev, bytenr / 4096, 4096);
1885 if (!bh)
1886 continue;
1887
1888 super = (struct btrfs_super_block *)bh->b_data;
1889 if (btrfs_super_bytenr(super) != bytenr ||
1890 strncmp((char *)(&super->magic), BTRFS_MAGIC,
1891 sizeof(super->magic))) {
1892 brelse(bh);
1893 continue;
1894 }
1895
1896 if (!latest || btrfs_super_generation(super) > transid) {
1897 brelse(latest);
1898 latest = bh;
1899 transid = btrfs_super_generation(super);
1900 } else {
1901 brelse(bh);
1902 }
1903 }
1904 return latest;
1905}
1906
1907static int write_dev_supers(struct btrfs_device *device,
1908 struct btrfs_super_block *sb,
1909 int do_barriers, int wait, int max_mirrors)
1910{
1911 struct buffer_head *bh;
1912 int i;
1913 int ret;
1914 int errors = 0;
1915 u32 crc;
1916 u64 bytenr;
1917 int last_barrier = 0;
1918
1919 if (max_mirrors == 0)
1920 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
1921
1922 /* make sure only the last submit_bh does a barrier */
1923 if (do_barriers) {
1924 for (i = 0; i < max_mirrors; i++) {
1925 bytenr = btrfs_sb_offset(i);
1926 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
1927 device->total_bytes)
1928 break;
1929 last_barrier = i;
1930 }
1931 }
1932
1933 for (i = 0; i < max_mirrors; i++) {
1934 bytenr = btrfs_sb_offset(i);
1935 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1936 break;
1937
1938 if (wait) {
1939 bh = __find_get_block(device->bdev, bytenr / 4096,
1940 BTRFS_SUPER_INFO_SIZE);
1941 BUG_ON(!bh);
1942 brelse(bh);
1943 wait_on_buffer(bh);
1944 if (buffer_uptodate(bh)) {
1945 brelse(bh);
1946 continue;
1947 }
1948 } else {
1949 btrfs_set_super_bytenr(sb, bytenr);
1950
1951 crc = ~(u32)0;
1952 crc = btrfs_csum_data(NULL, (char *)sb +
1953 BTRFS_CSUM_SIZE, crc,
1954 BTRFS_SUPER_INFO_SIZE -
1955 BTRFS_CSUM_SIZE);
1956 btrfs_csum_final(crc, sb->csum);
1957
1958 bh = __getblk(device->bdev, bytenr / 4096,
1959 BTRFS_SUPER_INFO_SIZE);
1960 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
1961
1962 set_buffer_uptodate(bh);
1963 get_bh(bh);
1964 lock_buffer(bh);
1965 bh->b_end_io = btrfs_end_buffer_write_sync;
1966 }
1967
1968 if (i == last_barrier && do_barriers && device->barriers) {
1969 ret = submit_bh(WRITE_BARRIER, bh);
1970 if (ret == -EOPNOTSUPP) {
1971 printk("btrfs: disabling barriers on dev %s\n",
1972 device->name);
1973 set_buffer_uptodate(bh);
1974 device->barriers = 0;
1975 get_bh(bh);
1976 lock_buffer(bh);
1977 ret = submit_bh(WRITE, bh);
1978 }
1979 } else {
1980 ret = submit_bh(WRITE, bh);
1981 }
1982
1983 if (!ret && wait) {
1984 wait_on_buffer(bh);
1985 if (!buffer_uptodate(bh))
1986 errors++;
1987 } else if (ret) {
1988 errors++;
1989 }
1990 if (wait)
1991 brelse(bh);
1992 }
1993 return errors < i ? 0 : -1;
1994}
1995
1996int write_all_supers(struct btrfs_root *root, int max_mirrors)
1997{
1998 struct list_head *cur;
1999 struct list_head *head = &root->fs_info->fs_devices->devices;
2000 struct btrfs_device *dev;
2001 struct btrfs_super_block *sb;
2002 struct btrfs_dev_item *dev_item;
2003 int ret;
2004 int do_barriers;
2005 int max_errors;
2006 int total_errors = 0;
2007 u64 flags;
2008
2009 max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
2010 do_barriers = !btrfs_test_opt(root, NOBARRIER);
2011
2012 sb = &root->fs_info->super_for_commit;
2013 dev_item = &sb->dev_item;
2014 list_for_each(cur, head) {
2015 dev = list_entry(cur, struct btrfs_device, dev_list);
2016 if (!dev->bdev) {
2017 total_errors++;
2018 continue;
2019 }
2020 if (!dev->in_fs_metadata || !dev->writeable)
2021 continue;
2022
2023 btrfs_set_stack_device_generation(dev_item, 0);
2024 btrfs_set_stack_device_type(dev_item, dev->type);
2025 btrfs_set_stack_device_id(dev_item, dev->devid);
2026 btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
2027 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
2028 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
2029 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
2030 btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
2031 memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
2032 memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
2033
2034 flags = btrfs_super_flags(sb);
2035 btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
2036
2037 ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
2038 if (ret)
2039 total_errors++;
2040 }
2041 if (total_errors > max_errors) {
2042 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2043 total_errors);
2044 BUG();
2045 }
2046
2047 total_errors = 0;
2048 list_for_each(cur, head) {
2049 dev = list_entry(cur, struct btrfs_device, dev_list);
2050 if (!dev->bdev)
2051 continue;
2052 if (!dev->in_fs_metadata || !dev->writeable)
2053 continue;
2054
2055 ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
2056 if (ret)
2057 total_errors++;
2058 }
2059 if (total_errors > max_errors) {
2060 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2061 total_errors);
2062 BUG();
2063 }
2064 return 0;
2065}
2066
2067int write_ctree_super(struct btrfs_trans_handle *trans,
2068 struct btrfs_root *root, int max_mirrors)
2069{
2070 int ret;
2071
2072 ret = write_all_supers(root, max_mirrors);
2073 return ret;
2074}
2075
2076int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2077{
2078 radix_tree_delete(&fs_info->fs_roots_radix,
2079 (unsigned long)root->root_key.objectid);
2080 if (root->anon_super.s_dev) {
2081 down_write(&root->anon_super.s_umount);
2082 kill_anon_super(&root->anon_super);
2083 }
2084 if (root->node)
2085 free_extent_buffer(root->node);
2086 if (root->commit_root)
2087 free_extent_buffer(root->commit_root);
2088 kfree(root->name);
2089 kfree(root);
2090 return 0;
2091}
2092
2093static int del_fs_roots(struct btrfs_fs_info *fs_info)
2094{
2095 int ret;
2096 struct btrfs_root *gang[8];
2097 int i;
2098
2099 while (1) {
2100 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
2101 (void **)gang, 0,
2102 ARRAY_SIZE(gang));
2103 if (!ret)
2104 break;
2105 for (i = 0; i < ret; i++)
2106 btrfs_free_fs_root(fs_info, gang[i]);
2107 }
2108 return 0;
2109}
2110
2111int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2112{
2113 u64 root_objectid = 0;
2114 struct btrfs_root *gang[8];
2115 int i;
2116 int ret;
2117
2118 while (1) {
2119 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
2120 (void **)gang, root_objectid,
2121 ARRAY_SIZE(gang));
2122 if (!ret)
2123 break;
2124 for (i = 0; i < ret; i++) {
2125 root_objectid = gang[i]->root_key.objectid;
2126 ret = btrfs_find_dead_roots(fs_info->tree_root,
2127 root_objectid, gang[i]);
2128 BUG_ON(ret);
2129 btrfs_orphan_cleanup(gang[i]);
2130 }
2131 root_objectid++;
2132 }
2133 return 0;
2134}
2135
2136int btrfs_commit_super(struct btrfs_root *root)
2137{
2138 struct btrfs_trans_handle *trans;
2139 int ret;
2140
2141 mutex_lock(&root->fs_info->cleaner_mutex);
2142 btrfs_clean_old_snapshots(root);
2143 mutex_unlock(&root->fs_info->cleaner_mutex);
2144 trans = btrfs_start_transaction(root, 1);
2145 ret = btrfs_commit_transaction(trans, root);
2146 BUG_ON(ret);
2147 /* run commit again to drop the original snapshot */
2148 trans = btrfs_start_transaction(root, 1);
2149 btrfs_commit_transaction(trans, root);
2150 ret = btrfs_write_and_wait_transaction(NULL, root);
2151 BUG_ON(ret);
2152
2153 ret = write_ctree_super(NULL, root, 0);
2154 return ret;
2155}
2156
2157int close_ctree(struct btrfs_root *root)
2158{
2159 struct btrfs_fs_info *fs_info = root->fs_info;
2160 int ret;
2161
2162 fs_info->closing = 1;
2163 smp_mb();
2164
2165 kthread_stop(root->fs_info->transaction_kthread);
2166 kthread_stop(root->fs_info->cleaner_kthread);
2167
2168 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2169 ret = btrfs_commit_super(root);
2170 if (ret)
2171 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2172 }
2173
2174 if (fs_info->delalloc_bytes) {
2175 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
2176 fs_info->delalloc_bytes);
2177 }
2178 if (fs_info->total_ref_cache_size) {
2179 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
2180 (unsigned long long)fs_info->total_ref_cache_size);
2181 }
2182
2183 if (fs_info->extent_root->node)
2184 free_extent_buffer(fs_info->extent_root->node);
2185
2186 if (fs_info->tree_root->node)
2187 free_extent_buffer(fs_info->tree_root->node);
2188
2189 if (root->fs_info->chunk_root->node)
2190 free_extent_buffer(root->fs_info->chunk_root->node);
2191
2192 if (root->fs_info->dev_root->node)
2193 free_extent_buffer(root->fs_info->dev_root->node);
2194
2195 if (root->fs_info->csum_root->node)
2196 free_extent_buffer(root->fs_info->csum_root->node);
2197
2198 btrfs_free_block_groups(root->fs_info);
2199
2200 del_fs_roots(fs_info);
2201
2202 iput(fs_info->btree_inode);
2203
2204 btrfs_stop_workers(&fs_info->fixup_workers);
2205 btrfs_stop_workers(&fs_info->delalloc_workers);
2206 btrfs_stop_workers(&fs_info->workers);
2207 btrfs_stop_workers(&fs_info->endio_workers);
2208 btrfs_stop_workers(&fs_info->endio_meta_workers);
2209 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2210 btrfs_stop_workers(&fs_info->endio_write_workers);
2211 btrfs_stop_workers(&fs_info->submit_workers);
2212
2213#if 0
2214 while (!list_empty(&fs_info->hashers)) {
2215 struct btrfs_hasher *hasher;
2216 hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
2217 hashers);
2218 list_del(&hasher->hashers);
2219 crypto_free_hash(&fs_info->hash_tfm);
2220 kfree(hasher);
2221 }
2222#endif
2223 btrfs_close_devices(fs_info->fs_devices);
2224 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2225
2226 bdi_destroy(&fs_info->bdi);
2227
2228 kfree(fs_info->extent_root);
2229 kfree(fs_info->tree_root);
2230 kfree(fs_info->chunk_root);
2231 kfree(fs_info->dev_root);
2232 kfree(fs_info->csum_root);
2233 return 0;
2234}
2235
2236int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
2237{
2238 int ret;
2239 struct inode *btree_inode = buf->first_page->mapping->host;
2240
2241 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf);
2242 if (!ret)
2243 return ret;
2244
2245 ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
2246 parent_transid);
2247 return !ret;
2248}
2249
2250int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
2251{
2252 struct inode *btree_inode = buf->first_page->mapping->host;
2253 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
2254 buf);
2255}
2256
2257void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2258{
2259 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2260 u64 transid = btrfs_header_generation(buf);
2261 struct inode *btree_inode = root->fs_info->btree_inode;
2262
2263 WARN_ON(!btrfs_tree_locked(buf));
2264 if (transid != root->fs_info->generation) {
2265 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
2266 "found %llu running %llu\n",
2267 (unsigned long long)buf->start,
2268 (unsigned long long)transid,
2269 (unsigned long long)root->fs_info->generation);
2270 WARN_ON(1);
2271 }
2272 set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
2273}
2274
2275void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2276{
2277 /*
2278 * looks as though older kernels can get into trouble with
2279 * this code, they end up stuck in balance_dirty_pages forever
2280 */
2281 struct extent_io_tree *tree;
2282 u64 num_dirty;
2283 u64 start = 0;
2284 unsigned long thresh = 32 * 1024 * 1024;
2285 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
2286
2287 if (current_is_pdflush() || current->flags & PF_MEMALLOC)
2288 return;
2289
2290 num_dirty = count_range_bits(tree, &start, (u64)-1,
2291 thresh, EXTENT_DIRTY);
2292 if (num_dirty > thresh) {
2293 balance_dirty_pages_ratelimited_nr(
2294 root->fs_info->btree_inode->i_mapping, 1);
2295 }
2296 return;
2297}
2298
2299int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
2300{
2301 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2302 int ret;
2303 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
2304 if (ret == 0)
2305 buf->flags |= EXTENT_UPTODATE;
2306 return ret;
2307}
2308
2309int btree_lock_page_hook(struct page *page)
2310{
2311 struct inode *inode = page->mapping->host;
2312 struct btrfs_root *root = BTRFS_I(inode)->root;
2313 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2314 struct extent_buffer *eb;
2315 unsigned long len;
2316 u64 bytenr = page_offset(page);
2317
2318 if (page->private == EXTENT_PAGE_PRIVATE)
2319 goto out;
2320
2321 len = page->private >> 2;
2322 eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS);
2323 if (!eb)
2324 goto out;
2325
2326 btrfs_tree_lock(eb);
2327 spin_lock(&root->fs_info->hash_lock);
2328 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2329 spin_unlock(&root->fs_info->hash_lock);
2330 btrfs_tree_unlock(eb);
2331 free_extent_buffer(eb);
2332out:
2333 lock_page(page);
2334 return 0;
2335}
2336
2337static struct extent_io_ops btree_extent_io_ops = {
2338 .write_cache_pages_lock_hook = btree_lock_page_hook,
2339 .readpage_end_io_hook = btree_readpage_end_io_hook,
2340 .submit_bio_hook = btree_submit_bio_hook,
2341 /* note we're sharing with inode.c for the merge bio hook */
2342 .merge_bio_hook = btrfs_merge_bio_hook,
2343};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
new file mode 100644
index 000000000000..c0ff404c31b7
--- /dev/null
+++ b/fs/btrfs/disk-io.h
@@ -0,0 +1,102 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __DISKIO__
20#define __DISKIO__
21
22#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
23#define BTRFS_SUPER_INFO_SIZE 4096
24
25#define BTRFS_SUPER_MIRROR_MAX 3
26#define BTRFS_SUPER_MIRROR_SHIFT 12
27
28static inline u64 btrfs_sb_offset(int mirror)
29{
30 u64 start = 16 * 1024;
31 if (mirror)
32 return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
33 return BTRFS_SUPER_INFO_OFFSET;
34}
35
36struct btrfs_device;
37struct btrfs_fs_devices;
38
39struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
40 u32 blocksize, u64 parent_transid);
41int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
42 u64 parent_transid);
43struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
44 u64 bytenr, u32 blocksize);
45int clean_tree_block(struct btrfs_trans_handle *trans,
46 struct btrfs_root *root, struct extent_buffer *buf);
47struct btrfs_root *open_ctree(struct super_block *sb,
48 struct btrfs_fs_devices *fs_devices,
49 char *options);
50int close_ctree(struct btrfs_root *root);
51int write_ctree_super(struct btrfs_trans_handle *trans,
52 struct btrfs_root *root, int max_mirrors);
53struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
54int btrfs_commit_super(struct btrfs_root *root);
55struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
56 u64 bytenr, u32 blocksize);
57struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
58 u64 root_objectid);
59struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
60 struct btrfs_key *location,
61 const char *name, int namelen);
62struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
63 struct btrfs_key *location);
64struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
65 struct btrfs_key *location);
66int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
67int btrfs_insert_dev_radix(struct btrfs_root *root,
68 struct block_device *bdev,
69 u64 device_id,
70 u64 block_start,
71 u64 num_blocks);
72void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
73int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
74void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
75int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
76int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
77int wait_on_tree_block_writeback(struct btrfs_root *root,
78 struct extent_buffer *buf);
79int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
80u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
81void btrfs_csum_final(u32 crc, char *result);
82int btrfs_open_device(struct btrfs_device *dev);
83int btrfs_verify_block_csum(struct btrfs_root *root,
84 struct extent_buffer *buf);
85int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
86 int metadata);
87int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
88 int rw, struct bio *bio, int mirror_num,
89 unsigned long bio_flags,
90 extent_submit_bio_hook_t *submit_bio_start,
91 extent_submit_bio_hook_t *submit_bio_done);
92
93int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
94unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
95int btrfs_write_tree_block(struct extent_buffer *buf);
96int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
97int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
98 struct btrfs_fs_info *fs_info);
99int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
100 struct btrfs_fs_info *fs_info);
101int btree_lock_page_hook(struct page *page);
102#endif
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
new file mode 100644
index 000000000000..85315d2c90de
--- /dev/null
+++ b/fs/btrfs/export.c
@@ -0,0 +1,203 @@
1#include <linux/fs.h>
2#include <linux/types.h>
3#include "ctree.h"
4#include "disk-io.h"
5#include "btrfs_inode.h"
6#include "print-tree.h"
7#include "export.h"
8#include "compat.h"
9
10#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
11 parent_objectid) / 4)
12#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, \
13 parent_root_objectid) / 4)
14#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4)
15
16static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
17 int connectable)
18{
19 struct btrfs_fid *fid = (struct btrfs_fid *)fh;
20 struct inode *inode = dentry->d_inode;
21 int len = *max_len;
22 int type;
23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
26 return 255;
27
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT;
30
31 fid->objectid = BTRFS_I(inode)->location.objectid;
32 fid->root_objectid = BTRFS_I(inode)->root->objectid;
33 fid->gen = inode->i_generation;
34
35 if (connectable && !S_ISDIR(inode->i_mode)) {
36 struct inode *parent;
37 u64 parent_root_id;
38
39 spin_lock(&dentry->d_lock);
40
41 parent = dentry->d_parent->d_inode;
42 fid->parent_objectid = BTRFS_I(parent)->location.objectid;
43 fid->parent_gen = parent->i_generation;
44 parent_root_id = BTRFS_I(parent)->root->objectid;
45
46 spin_unlock(&dentry->d_lock);
47
48 if (parent_root_id != fid->root_objectid) {
49 fid->parent_root_objectid = parent_root_id;
50 len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
51 type = FILEID_BTRFS_WITH_PARENT_ROOT;
52 } else {
53 len = BTRFS_FID_SIZE_CONNECTABLE;
54 type = FILEID_BTRFS_WITH_PARENT;
55 }
56 }
57
58 *max_len = len;
59 return type;
60}
61
62static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
63 u64 root_objectid, u32 generation)
64{
65 struct btrfs_root *root;
66 struct inode *inode;
67 struct btrfs_key key;
68
69 key.objectid = root_objectid;
70 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
71 key.offset = (u64)-1;
72
73 root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key);
74 if (IS_ERR(root))
75 return ERR_CAST(root);
76
77 key.objectid = objectid;
78 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
79 key.offset = 0;
80
81 inode = btrfs_iget(sb, &key, root, NULL);
82 if (IS_ERR(inode))
83 return (void *)inode;
84
85 if (generation != inode->i_generation) {
86 iput(inode);
87 return ERR_PTR(-ESTALE);
88 }
89
90 return d_obtain_alias(inode);
91}
92
93static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
94 int fh_len, int fh_type)
95{
96 struct btrfs_fid *fid = (struct btrfs_fid *) fh;
97 u64 objectid, root_objectid;
98 u32 generation;
99
100 if (fh_type == FILEID_BTRFS_WITH_PARENT) {
101 if (fh_len != BTRFS_FID_SIZE_CONNECTABLE)
102 return NULL;
103 root_objectid = fid->root_objectid;
104 } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
105 if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
106 return NULL;
107 root_objectid = fid->parent_root_objectid;
108 } else
109 return NULL;
110
111 objectid = fid->parent_objectid;
112 generation = fid->parent_gen;
113
114 return btrfs_get_dentry(sb, objectid, root_objectid, generation);
115}
116
117static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
118 int fh_len, int fh_type)
119{
120 struct btrfs_fid *fid = (struct btrfs_fid *) fh;
121 u64 objectid, root_objectid;
122 u32 generation;
123
124 if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
125 fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
126 (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
127 fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
128 (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
129 fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
130 return NULL;
131
132 objectid = fid->objectid;
133 root_objectid = fid->root_objectid;
134 generation = fid->gen;
135
136 return btrfs_get_dentry(sb, objectid, root_objectid, generation);
137}
138
139static struct dentry *btrfs_get_parent(struct dentry *child)
140{
141 struct inode *dir = child->d_inode;
142 struct btrfs_root *root = BTRFS_I(dir)->root;
143 struct btrfs_key key;
144 struct btrfs_path *path;
145 struct extent_buffer *leaf;
146 int slot;
147 u64 objectid;
148 int ret;
149
150 path = btrfs_alloc_path();
151
152 key.objectid = dir->i_ino;
153 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
154 key.offset = (u64)-1;
155
156 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
157 if (ret < 0) {
158 /* Error */
159 btrfs_free_path(path);
160 return ERR_PTR(ret);
161 }
162 leaf = path->nodes[0];
163 slot = path->slots[0];
164 if (ret) {
165 /* btrfs_search_slot() returns the slot where we'd want to
166 insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
167 The _real_ backref, telling us what the parent inode
168 _actually_ is, will be in the slot _before_ the one
169 that btrfs_search_slot() returns. */
170 if (!slot) {
171 /* Unless there is _no_ key in the tree before... */
172 btrfs_free_path(path);
173 return ERR_PTR(-EIO);
174 }
175 slot--;
176 }
177
178 btrfs_item_key_to_cpu(leaf, &key, slot);
179 btrfs_free_path(path);
180
181 if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
182 return ERR_PTR(-EINVAL);
183
184 objectid = key.offset;
185
186 /* If we are already at the root of a subvol, return the real root */
187 if (objectid == dir->i_ino)
188 return dget(dir->i_sb->s_root);
189
190 /* Build a new key for the inode item */
191 key.objectid = objectid;
192 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
193 key.offset = 0;
194
195 return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
196}
197
198const struct export_operations btrfs_export_ops = {
199 .encode_fh = btrfs_encode_fh,
200 .fh_to_dentry = btrfs_fh_to_dentry,
201 .fh_to_parent = btrfs_fh_to_parent,
202 .get_parent = btrfs_get_parent,
203};
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
new file mode 100644
index 000000000000..074348a95841
--- /dev/null
+++ b/fs/btrfs/export.h
@@ -0,0 +1,19 @@
1#ifndef BTRFS_EXPORT_H
2#define BTRFS_EXPORT_H
3
4#include <linux/exportfs.h>
5
6extern const struct export_operations btrfs_export_ops;
7
8struct btrfs_fid {
9 u64 objectid;
10 u64 root_objectid;
11 u32 gen;
12
13 u64 parent_objectid;
14 u32 parent_gen;
15
16 u64 parent_root_objectid;
17} __attribute__ ((packed));
18
19#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
new file mode 100644
index 000000000000..293da650873f
--- /dev/null
+++ b/fs/btrfs/extent-tree.c
@@ -0,0 +1,5986 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/pagemap.h>
20#include <linux/writeback.h>
21#include <linux/blkdev.h>
22#include <linux/version.h>
23#include "compat.h"
24#include "hash.h"
25#include "crc32c.h"
26#include "ctree.h"
27#include "disk-io.h"
28#include "print-tree.h"
29#include "transaction.h"
30#include "volumes.h"
31#include "locking.h"
32#include "ref-cache.h"
33#include "compat.h"
34
35#define PENDING_EXTENT_INSERT 0
36#define PENDING_EXTENT_DELETE 1
37#define PENDING_BACKREF_UPDATE 2
38
39struct pending_extent_op {
40 int type;
41 u64 bytenr;
42 u64 num_bytes;
43 u64 parent;
44 u64 orig_parent;
45 u64 generation;
46 u64 orig_generation;
47 int level;
48 struct list_head list;
49 int del;
50};
51
52static int finish_current_insert(struct btrfs_trans_handle *trans,
53 struct btrfs_root *extent_root, int all);
54static int del_pending_extents(struct btrfs_trans_handle *trans,
55 struct btrfs_root *extent_root, int all);
56static int pin_down_bytes(struct btrfs_trans_handle *trans,
57 struct btrfs_root *root,
58 u64 bytenr, u64 num_bytes, int is_data);
59static int update_block_group(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root,
61 u64 bytenr, u64 num_bytes, int alloc,
62 int mark_free);
63
64static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
65{
66 return (cache->flags & bits) == bits;
67}
68
69/*
70 * this adds the block group to the fs_info rb tree for the block group
71 * cache
72 */
73static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
74 struct btrfs_block_group_cache *block_group)
75{
76 struct rb_node **p;
77 struct rb_node *parent = NULL;
78 struct btrfs_block_group_cache *cache;
79
80 spin_lock(&info->block_group_cache_lock);
81 p = &info->block_group_cache_tree.rb_node;
82
83 while (*p) {
84 parent = *p;
85 cache = rb_entry(parent, struct btrfs_block_group_cache,
86 cache_node);
87 if (block_group->key.objectid < cache->key.objectid) {
88 p = &(*p)->rb_left;
89 } else if (block_group->key.objectid > cache->key.objectid) {
90 p = &(*p)->rb_right;
91 } else {
92 spin_unlock(&info->block_group_cache_lock);
93 return -EEXIST;
94 }
95 }
96
97 rb_link_node(&block_group->cache_node, parent, p);
98 rb_insert_color(&block_group->cache_node,
99 &info->block_group_cache_tree);
100 spin_unlock(&info->block_group_cache_lock);
101
102 return 0;
103}
104
105/*
106 * This will return the block group at or after bytenr if contains is 0, else
107 * it will return the block group that contains the bytenr
108 */
109static struct btrfs_block_group_cache *
110block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
111 int contains)
112{
113 struct btrfs_block_group_cache *cache, *ret = NULL;
114 struct rb_node *n;
115 u64 end, start;
116
117 spin_lock(&info->block_group_cache_lock);
118 n = info->block_group_cache_tree.rb_node;
119
120 while (n) {
121 cache = rb_entry(n, struct btrfs_block_group_cache,
122 cache_node);
123 end = cache->key.objectid + cache->key.offset - 1;
124 start = cache->key.objectid;
125
126 if (bytenr < start) {
127 if (!contains && (!ret || start < ret->key.objectid))
128 ret = cache;
129 n = n->rb_left;
130 } else if (bytenr > start) {
131 if (contains && bytenr <= end) {
132 ret = cache;
133 break;
134 }
135 n = n->rb_right;
136 } else {
137 ret = cache;
138 break;
139 }
140 }
141 if (ret)
142 atomic_inc(&ret->count);
143 spin_unlock(&info->block_group_cache_lock);
144
145 return ret;
146}
147
148/*
149 * this is only called by cache_block_group, since we could have freed extents
150 * we need to check the pinned_extents for any extents that can't be used yet
151 * since their free space will be released as soon as the transaction commits.
152 */
153static int add_new_free_space(struct btrfs_block_group_cache *block_group,
154 struct btrfs_fs_info *info, u64 start, u64 end)
155{
156 u64 extent_start, extent_end, size;
157 int ret;
158
159 mutex_lock(&info->pinned_mutex);
160 while (start < end) {
161 ret = find_first_extent_bit(&info->pinned_extents, start,
162 &extent_start, &extent_end,
163 EXTENT_DIRTY);
164 if (ret)
165 break;
166
167 if (extent_start == start) {
168 start = extent_end + 1;
169 } else if (extent_start > start && extent_start < end) {
170 size = extent_start - start;
171 ret = btrfs_add_free_space(block_group, start,
172 size);
173 BUG_ON(ret);
174 start = extent_end + 1;
175 } else {
176 break;
177 }
178 }
179
180 if (start < end) {
181 size = end - start;
182 ret = btrfs_add_free_space(block_group, start, size);
183 BUG_ON(ret);
184 }
185 mutex_unlock(&info->pinned_mutex);
186
187 return 0;
188}
189
190static int remove_sb_from_cache(struct btrfs_root *root,
191 struct btrfs_block_group_cache *cache)
192{
193 u64 bytenr;
194 u64 *logical;
195 int stripe_len;
196 int i, nr, ret;
197
198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
199 bytenr = btrfs_sb_offset(i);
200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
201 cache->key.objectid, bytenr, 0,
202 &logical, &nr, &stripe_len);
203 BUG_ON(ret);
204 while (nr--) {
205 btrfs_remove_free_space(cache, logical[nr],
206 stripe_len);
207 }
208 kfree(logical);
209 }
210 return 0;
211}
212
213static int cache_block_group(struct btrfs_root *root,
214 struct btrfs_block_group_cache *block_group)
215{
216 struct btrfs_path *path;
217 int ret = 0;
218 struct btrfs_key key;
219 struct extent_buffer *leaf;
220 int slot;
221 u64 last;
222
223 if (!block_group)
224 return 0;
225
226 root = root->fs_info->extent_root;
227
228 if (block_group->cached)
229 return 0;
230
231 path = btrfs_alloc_path();
232 if (!path)
233 return -ENOMEM;
234
235 path->reada = 2;
236 /*
237 * we get into deadlocks with paths held by callers of this function.
238 * since the alloc_mutex is protecting things right now, just
239 * skip the locking here
240 */
241 path->skip_locking = 1;
242 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
243 key.objectid = last;
244 key.offset = 0;
245 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
246 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
247 if (ret < 0)
248 goto err;
249
250 while (1) {
251 leaf = path->nodes[0];
252 slot = path->slots[0];
253 if (slot >= btrfs_header_nritems(leaf)) {
254 ret = btrfs_next_leaf(root, path);
255 if (ret < 0)
256 goto err;
257 if (ret == 0)
258 continue;
259 else
260 break;
261 }
262 btrfs_item_key_to_cpu(leaf, &key, slot);
263 if (key.objectid < block_group->key.objectid)
264 goto next;
265
266 if (key.objectid >= block_group->key.objectid +
267 block_group->key.offset)
268 break;
269
270 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
271 add_new_free_space(block_group, root->fs_info, last,
272 key.objectid);
273
274 last = key.objectid + key.offset;
275 }
276next:
277 path->slots[0]++;
278 }
279
280 add_new_free_space(block_group, root->fs_info, last,
281 block_group->key.objectid +
282 block_group->key.offset);
283
284 remove_sb_from_cache(root, block_group);
285 block_group->cached = 1;
286 ret = 0;
287err:
288 btrfs_free_path(path);
289 return ret;
290}
291
292/*
293 * return the block group that starts at or after bytenr
294 */
295static struct btrfs_block_group_cache *
296btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
297{
298 struct btrfs_block_group_cache *cache;
299
300 cache = block_group_cache_tree_search(info, bytenr, 0);
301
302 return cache;
303}
304
305/*
306 * return the block group that contains teh given bytenr
307 */
308struct btrfs_block_group_cache *btrfs_lookup_block_group(
309 struct btrfs_fs_info *info,
310 u64 bytenr)
311{
312 struct btrfs_block_group_cache *cache;
313
314 cache = block_group_cache_tree_search(info, bytenr, 1);
315
316 return cache;
317}
318
319static inline void put_block_group(struct btrfs_block_group_cache *cache)
320{
321 if (atomic_dec_and_test(&cache->count))
322 kfree(cache);
323}
324
325static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
326 u64 flags)
327{
328 struct list_head *head = &info->space_info;
329 struct list_head *cur;
330 struct btrfs_space_info *found;
331 list_for_each(cur, head) {
332 found = list_entry(cur, struct btrfs_space_info, list);
333 if (found->flags == flags)
334 return found;
335 }
336 return NULL;
337}
338
339static u64 div_factor(u64 num, int factor)
340{
341 if (factor == 10)
342 return num;
343 num *= factor;
344 do_div(num, 10);
345 return num;
346}
347
348u64 btrfs_find_block_group(struct btrfs_root *root,
349 u64 search_start, u64 search_hint, int owner)
350{
351 struct btrfs_block_group_cache *cache;
352 u64 used;
353 u64 last = max(search_hint, search_start);
354 u64 group_start = 0;
355 int full_search = 0;
356 int factor = 9;
357 int wrapped = 0;
358again:
359 while (1) {
360 cache = btrfs_lookup_first_block_group(root->fs_info, last);
361 if (!cache)
362 break;
363
364 spin_lock(&cache->lock);
365 last = cache->key.objectid + cache->key.offset;
366 used = btrfs_block_group_used(&cache->item);
367
368 if ((full_search || !cache->ro) &&
369 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
370 if (used + cache->pinned + cache->reserved <
371 div_factor(cache->key.offset, factor)) {
372 group_start = cache->key.objectid;
373 spin_unlock(&cache->lock);
374 put_block_group(cache);
375 goto found;
376 }
377 }
378 spin_unlock(&cache->lock);
379 put_block_group(cache);
380 cond_resched();
381 }
382 if (!wrapped) {
383 last = search_start;
384 wrapped = 1;
385 goto again;
386 }
387 if (!full_search && factor < 10) {
388 last = search_start;
389 full_search = 1;
390 factor = 10;
391 goto again;
392 }
393found:
394 return group_start;
395}
396
397/* simple helper to search for an existing extent at a given offset */
398int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
399{
400 int ret;
401 struct btrfs_key key;
402 struct btrfs_path *path;
403
404 path = btrfs_alloc_path();
405 BUG_ON(!path);
406 key.objectid = start;
407 key.offset = len;
408 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
409 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
410 0, 0);
411 btrfs_free_path(path);
412 return ret;
413}
414
415/*
416 * Back reference rules. Back refs have three main goals:
417 *
418 * 1) differentiate between all holders of references to an extent so that
419 * when a reference is dropped we can make sure it was a valid reference
420 * before freeing the extent.
421 *
422 * 2) Provide enough information to quickly find the holders of an extent
423 * if we notice a given block is corrupted or bad.
424 *
425 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
426 * maintenance. This is actually the same as #2, but with a slightly
427 * different use case.
428 *
429 * File extents can be referenced by:
430 *
431 * - multiple snapshots, subvolumes, or different generations in one subvol
432 * - different files inside a single subvolume
433 * - different offsets inside a file (bookend extents in file.c)
434 *
435 * The extent ref structure has fields for:
436 *
437 * - Objectid of the subvolume root
438 * - Generation number of the tree holding the reference
439 * - objectid of the file holding the reference
440 * - number of references holding by parent node (alway 1 for tree blocks)
441 *
442 * Btree leaf may hold multiple references to a file extent. In most cases,
443 * these references are from same file and the corresponding offsets inside
444 * the file are close together.
445 *
446 * When a file extent is allocated the fields are filled in:
447 * (root_key.objectid, trans->transid, inode objectid, 1)
448 *
449 * When a leaf is cow'd new references are added for every file extent found
450 * in the leaf. It looks similar to the create case, but trans->transid will
451 * be different when the block is cow'd.
452 *
453 * (root_key.objectid, trans->transid, inode objectid,
454 * number of references in the leaf)
455 *
456 * When a file extent is removed either during snapshot deletion or
457 * file truncation, we find the corresponding back reference and check
458 * the following fields:
459 *
460 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
461 * inode objectid)
462 *
463 * Btree extents can be referenced by:
464 *
465 * - Different subvolumes
466 * - Different generations of the same subvolume
467 *
468 * When a tree block is created, back references are inserted:
469 *
470 * (root->root_key.objectid, trans->transid, level, 1)
471 *
472 * When a tree block is cow'd, new back references are added for all the
473 * blocks it points to. If the tree block isn't in reference counted root,
474 * the old back references are removed. These new back references are of
475 * the form (trans->transid will have increased since creation):
476 *
477 * (root->root_key.objectid, trans->transid, level, 1)
478 *
479 * When a backref is in deleting, the following fields are checked:
480 *
481 * if backref was for a tree root:
482 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
483 * else
484 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
485 *
486 * Back Reference Key composing:
487 *
488 * The key objectid corresponds to the first byte in the extent, the key
489 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
490 * byte of parent extent. If a extent is tree root, the key offset is set
491 * to the key objectid.
492 */
493
494static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
495 struct btrfs_root *root,
496 struct btrfs_path *path,
497 u64 bytenr, u64 parent,
498 u64 ref_root, u64 ref_generation,
499 u64 owner_objectid, int del)
500{
501 struct btrfs_key key;
502 struct btrfs_extent_ref *ref;
503 struct extent_buffer *leaf;
504 u64 ref_objectid;
505 int ret;
506
507 key.objectid = bytenr;
508 key.type = BTRFS_EXTENT_REF_KEY;
509 key.offset = parent;
510
511 ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
512 if (ret < 0)
513 goto out;
514 if (ret > 0) {
515 ret = -ENOENT;
516 goto out;
517 }
518
519 leaf = path->nodes[0];
520 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
521 ref_objectid = btrfs_ref_objectid(leaf, ref);
522 if (btrfs_ref_root(leaf, ref) != ref_root ||
523 btrfs_ref_generation(leaf, ref) != ref_generation ||
524 (ref_objectid != owner_objectid &&
525 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
526 ret = -EIO;
527 WARN_ON(1);
528 goto out;
529 }
530 ret = 0;
531out:
532 return ret;
533}
534
535/*
536 * updates all the backrefs that are pending on update_list for the
537 * extent_root
538 */
539static noinline int update_backrefs(struct btrfs_trans_handle *trans,
540 struct btrfs_root *extent_root,
541 struct btrfs_path *path,
542 struct list_head *update_list)
543{
544 struct btrfs_key key;
545 struct btrfs_extent_ref *ref;
546 struct btrfs_fs_info *info = extent_root->fs_info;
547 struct pending_extent_op *op;
548 struct extent_buffer *leaf;
549 int ret = 0;
550 struct list_head *cur = update_list->next;
551 u64 ref_objectid;
552 u64 ref_root = extent_root->root_key.objectid;
553
554 op = list_entry(cur, struct pending_extent_op, list);
555
556search:
557 key.objectid = op->bytenr;
558 key.type = BTRFS_EXTENT_REF_KEY;
559 key.offset = op->orig_parent;
560
561 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
562 BUG_ON(ret);
563
564 leaf = path->nodes[0];
565
566loop:
567 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
568
569 ref_objectid = btrfs_ref_objectid(leaf, ref);
570
571 if (btrfs_ref_root(leaf, ref) != ref_root ||
572 btrfs_ref_generation(leaf, ref) != op->orig_generation ||
573 (ref_objectid != op->level &&
574 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
575 printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
576 "root %llu, owner %u\n",
577 (unsigned long long)op->bytenr,
578 (unsigned long long)op->orig_parent,
579 (unsigned long long)ref_root, op->level);
580 btrfs_print_leaf(extent_root, leaf);
581 BUG();
582 }
583
584 key.objectid = op->bytenr;
585 key.offset = op->parent;
586 key.type = BTRFS_EXTENT_REF_KEY;
587 ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
588 BUG_ON(ret);
589 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
590 btrfs_set_ref_generation(leaf, ref, op->generation);
591
592 cur = cur->next;
593
594 list_del_init(&op->list);
595 unlock_extent(&info->extent_ins, op->bytenr,
596 op->bytenr + op->num_bytes - 1, GFP_NOFS);
597 kfree(op);
598
599 if (cur == update_list) {
600 btrfs_mark_buffer_dirty(path->nodes[0]);
601 btrfs_release_path(extent_root, path);
602 goto out;
603 }
604
605 op = list_entry(cur, struct pending_extent_op, list);
606
607 path->slots[0]++;
608 while (path->slots[0] < btrfs_header_nritems(leaf)) {
609 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
610 if (key.objectid == op->bytenr &&
611 key.type == BTRFS_EXTENT_REF_KEY)
612 goto loop;
613 path->slots[0]++;
614 }
615
616 btrfs_mark_buffer_dirty(path->nodes[0]);
617 btrfs_release_path(extent_root, path);
618 goto search;
619
620out:
621 return 0;
622}
623
624static noinline int insert_extents(struct btrfs_trans_handle *trans,
625 struct btrfs_root *extent_root,
626 struct btrfs_path *path,
627 struct list_head *insert_list, int nr)
628{
629 struct btrfs_key *keys;
630 u32 *data_size;
631 struct pending_extent_op *op;
632 struct extent_buffer *leaf;
633 struct list_head *cur = insert_list->next;
634 struct btrfs_fs_info *info = extent_root->fs_info;
635 u64 ref_root = extent_root->root_key.objectid;
636 int i = 0, last = 0, ret;
637 int total = nr * 2;
638
639 if (!nr)
640 return 0;
641
642 keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
643 if (!keys)
644 return -ENOMEM;
645
646 data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
647 if (!data_size) {
648 kfree(keys);
649 return -ENOMEM;
650 }
651
652 list_for_each_entry(op, insert_list, list) {
653 keys[i].objectid = op->bytenr;
654 keys[i].offset = op->num_bytes;
655 keys[i].type = BTRFS_EXTENT_ITEM_KEY;
656 data_size[i] = sizeof(struct btrfs_extent_item);
657 i++;
658
659 keys[i].objectid = op->bytenr;
660 keys[i].offset = op->parent;
661 keys[i].type = BTRFS_EXTENT_REF_KEY;
662 data_size[i] = sizeof(struct btrfs_extent_ref);
663 i++;
664 }
665
666 op = list_entry(cur, struct pending_extent_op, list);
667 i = 0;
668 while (i < total) {
669 int c;
670 ret = btrfs_insert_some_items(trans, extent_root, path,
671 keys+i, data_size+i, total-i);
672 BUG_ON(ret < 0);
673
674 if (last && ret > 1)
675 BUG();
676
677 leaf = path->nodes[0];
678 for (c = 0; c < ret; c++) {
679 int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
680
681 /*
682 * if the first item we inserted was a backref, then
683 * the EXTENT_ITEM will be the odd c's, else it will
684 * be the even c's
685 */
686 if ((ref_first && (c % 2)) ||
687 (!ref_first && !(c % 2))) {
688 struct btrfs_extent_item *itm;
689
690 itm = btrfs_item_ptr(leaf, path->slots[0] + c,
691 struct btrfs_extent_item);
692 btrfs_set_extent_refs(path->nodes[0], itm, 1);
693 op->del++;
694 } else {
695 struct btrfs_extent_ref *ref;
696
697 ref = btrfs_item_ptr(leaf, path->slots[0] + c,
698 struct btrfs_extent_ref);
699 btrfs_set_ref_root(leaf, ref, ref_root);
700 btrfs_set_ref_generation(leaf, ref,
701 op->generation);
702 btrfs_set_ref_objectid(leaf, ref, op->level);
703 btrfs_set_ref_num_refs(leaf, ref, 1);
704 op->del++;
705 }
706
707 /*
708 * using del to see when its ok to free up the
709 * pending_extent_op. In the case where we insert the
710 * last item on the list in order to help do batching
711 * we need to not free the extent op until we actually
712 * insert the extent_item
713 */
714 if (op->del == 2) {
715 unlock_extent(&info->extent_ins, op->bytenr,
716 op->bytenr + op->num_bytes - 1,
717 GFP_NOFS);
718 cur = cur->next;
719 list_del_init(&op->list);
720 kfree(op);
721 if (cur != insert_list)
722 op = list_entry(cur,
723 struct pending_extent_op,
724 list);
725 }
726 }
727 btrfs_mark_buffer_dirty(leaf);
728 btrfs_release_path(extent_root, path);
729
730 /*
731 * Ok backref's and items usually go right next to eachother,
732 * but if we could only insert 1 item that means that we
733 * inserted on the end of a leaf, and we have no idea what may
734 * be on the next leaf so we just play it safe. In order to
735 * try and help this case we insert the last thing on our
736 * insert list so hopefully it will end up being the last
737 * thing on the leaf and everything else will be before it,
738 * which will let us insert a whole bunch of items at the same
739 * time.
740 */
741 if (ret == 1 && !last && (i + ret < total)) {
742 /*
743 * last: where we will pick up the next time around
744 * i: our current key to insert, will be total - 1
745 * cur: the current op we are screwing with
746 * op: duh
747 */
748 last = i + ret;
749 i = total - 1;
750 cur = insert_list->prev;
751 op = list_entry(cur, struct pending_extent_op, list);
752 } else if (last) {
753 /*
754 * ok we successfully inserted the last item on the
755 * list, lets reset everything
756 *
757 * i: our current key to insert, so where we left off
758 * last time
759 * last: done with this
760 * cur: the op we are messing with
761 * op: duh
762 * total: since we inserted the last key, we need to
763 * decrement total so we dont overflow
764 */
765 i = last;
766 last = 0;
767 total--;
768 if (i < total) {
769 cur = insert_list->next;
770 op = list_entry(cur, struct pending_extent_op,
771 list);
772 }
773 } else {
774 i += ret;
775 }
776
777 cond_resched();
778 }
779 ret = 0;
780 kfree(keys);
781 kfree(data_size);
782 return ret;
783}
784
785static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
786 struct btrfs_root *root,
787 struct btrfs_path *path,
788 u64 bytenr, u64 parent,
789 u64 ref_root, u64 ref_generation,
790 u64 owner_objectid)
791{
792 struct btrfs_key key;
793 struct extent_buffer *leaf;
794 struct btrfs_extent_ref *ref;
795 u32 num_refs;
796 int ret;
797
798 key.objectid = bytenr;
799 key.type = BTRFS_EXTENT_REF_KEY;
800 key.offset = parent;
801
802 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
803 if (ret == 0) {
804 leaf = path->nodes[0];
805 ref = btrfs_item_ptr(leaf, path->slots[0],
806 struct btrfs_extent_ref);
807 btrfs_set_ref_root(leaf, ref, ref_root);
808 btrfs_set_ref_generation(leaf, ref, ref_generation);
809 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
810 btrfs_set_ref_num_refs(leaf, ref, 1);
811 } else if (ret == -EEXIST) {
812 u64 existing_owner;
813 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
814 leaf = path->nodes[0];
815 ref = btrfs_item_ptr(leaf, path->slots[0],
816 struct btrfs_extent_ref);
817 if (btrfs_ref_root(leaf, ref) != ref_root ||
818 btrfs_ref_generation(leaf, ref) != ref_generation) {
819 ret = -EIO;
820 WARN_ON(1);
821 goto out;
822 }
823
824 num_refs = btrfs_ref_num_refs(leaf, ref);
825 BUG_ON(num_refs == 0);
826 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
827
828 existing_owner = btrfs_ref_objectid(leaf, ref);
829 if (existing_owner != owner_objectid &&
830 existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
831 btrfs_set_ref_objectid(leaf, ref,
832 BTRFS_MULTIPLE_OBJECTIDS);
833 }
834 ret = 0;
835 } else {
836 goto out;
837 }
838 btrfs_mark_buffer_dirty(path->nodes[0]);
839out:
840 btrfs_release_path(root, path);
841 return ret;
842}
843
844static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
845 struct btrfs_root *root,
846 struct btrfs_path *path)
847{
848 struct extent_buffer *leaf;
849 struct btrfs_extent_ref *ref;
850 u32 num_refs;
851 int ret = 0;
852
853 leaf = path->nodes[0];
854 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
855 num_refs = btrfs_ref_num_refs(leaf, ref);
856 BUG_ON(num_refs == 0);
857 num_refs -= 1;
858 if (num_refs == 0) {
859 ret = btrfs_del_item(trans, root, path);
860 } else {
861 btrfs_set_ref_num_refs(leaf, ref, num_refs);
862 btrfs_mark_buffer_dirty(leaf);
863 }
864 btrfs_release_path(root, path);
865 return ret;
866}
867
868#ifdef BIO_RW_DISCARD
869static void btrfs_issue_discard(struct block_device *bdev,
870 u64 start, u64 len)
871{
872 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
873}
874#endif
875
876static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
877 u64 num_bytes)
878{
879#ifdef BIO_RW_DISCARD
880 int ret;
881 u64 map_length = num_bytes;
882 struct btrfs_multi_bio *multi = NULL;
883
884 /* Tell the block device(s) that the sectors can be discarded */
885 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
886 bytenr, &map_length, &multi, 0);
887 if (!ret) {
888 struct btrfs_bio_stripe *stripe = multi->stripes;
889 int i;
890
891 if (map_length > num_bytes)
892 map_length = num_bytes;
893
894 for (i = 0; i < multi->num_stripes; i++, stripe++) {
895 btrfs_issue_discard(stripe->dev->bdev,
896 stripe->physical,
897 map_length);
898 }
899 kfree(multi);
900 }
901
902 return ret;
903#else
904 return 0;
905#endif
906}
907
908static noinline int free_extents(struct btrfs_trans_handle *trans,
909 struct btrfs_root *extent_root,
910 struct list_head *del_list)
911{
912 struct btrfs_fs_info *info = extent_root->fs_info;
913 struct btrfs_path *path;
914 struct btrfs_key key, found_key;
915 struct extent_buffer *leaf;
916 struct list_head *cur;
917 struct pending_extent_op *op;
918 struct btrfs_extent_item *ei;
919 int ret, num_to_del, extent_slot = 0, found_extent = 0;
920 u32 refs;
921 u64 bytes_freed = 0;
922
923 path = btrfs_alloc_path();
924 if (!path)
925 return -ENOMEM;
926 path->reada = 1;
927
928search:
929 /* search for the backref for the current ref we want to delete */
930 cur = del_list->next;
931 op = list_entry(cur, struct pending_extent_op, list);
932 ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
933 op->orig_parent,
934 extent_root->root_key.objectid,
935 op->orig_generation, op->level, 1);
936 if (ret) {
937 printk(KERN_ERR "btrfs unable to find backref byte nr %llu "
938 "root %llu gen %llu owner %u\n",
939 (unsigned long long)op->bytenr,
940 (unsigned long long)extent_root->root_key.objectid,
941 (unsigned long long)op->orig_generation, op->level);
942 btrfs_print_leaf(extent_root, path->nodes[0]);
943 WARN_ON(1);
944 goto out;
945 }
946
947 extent_slot = path->slots[0];
948 num_to_del = 1;
949 found_extent = 0;
950
951 /*
952 * if we aren't the first item on the leaf we can move back one and see
953 * if our ref is right next to our extent item
954 */
955 if (likely(extent_slot)) {
956 extent_slot--;
957 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
958 extent_slot);
959 if (found_key.objectid == op->bytenr &&
960 found_key.type == BTRFS_EXTENT_ITEM_KEY &&
961 found_key.offset == op->num_bytes) {
962 num_to_del++;
963 found_extent = 1;
964 }
965 }
966
967 /*
968 * if we didn't find the extent we need to delete the backref and then
969 * search for the extent item key so we can update its ref count
970 */
971 if (!found_extent) {
972 key.objectid = op->bytenr;
973 key.type = BTRFS_EXTENT_ITEM_KEY;
974 key.offset = op->num_bytes;
975
976 ret = remove_extent_backref(trans, extent_root, path);
977 BUG_ON(ret);
978 btrfs_release_path(extent_root, path);
979 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
980 BUG_ON(ret);
981 extent_slot = path->slots[0];
982 }
983
984 /* this is where we update the ref count for the extent */
985 leaf = path->nodes[0];
986 ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
987 refs = btrfs_extent_refs(leaf, ei);
988 BUG_ON(refs == 0);
989 refs--;
990 btrfs_set_extent_refs(leaf, ei, refs);
991
992 btrfs_mark_buffer_dirty(leaf);
993
994 /*
995 * This extent needs deleting. The reason cur_slot is extent_slot +
996 * num_to_del is because extent_slot points to the slot where the extent
997 * is, and if the backref was not right next to the extent we will be
998 * deleting at least 1 item, and will want to start searching at the
999 * slot directly next to extent_slot. However if we did find the
1000 * backref next to the extent item them we will be deleting at least 2
1001 * items and will want to start searching directly after the ref slot
1002 */
1003 if (!refs) {
1004 struct list_head *pos, *n, *end;
1005 int cur_slot = extent_slot+num_to_del;
1006 u64 super_used;
1007 u64 root_used;
1008
1009 path->slots[0] = extent_slot;
1010 bytes_freed = op->num_bytes;
1011
1012 mutex_lock(&info->pinned_mutex);
1013 ret = pin_down_bytes(trans, extent_root, op->bytenr,
1014 op->num_bytes, op->level >=
1015 BTRFS_FIRST_FREE_OBJECTID);
1016 mutex_unlock(&info->pinned_mutex);
1017 BUG_ON(ret < 0);
1018 op->del = ret;
1019
1020 /*
1021 * we need to see if we can delete multiple things at once, so
1022 * start looping through the list of extents we are wanting to
1023 * delete and see if their extent/backref's are right next to
1024 * eachother and the extents only have 1 ref
1025 */
1026 for (pos = cur->next; pos != del_list; pos = pos->next) {
1027 struct pending_extent_op *tmp;
1028
1029 tmp = list_entry(pos, struct pending_extent_op, list);
1030
1031 /* we only want to delete extent+ref at this stage */
1032 if (cur_slot >= btrfs_header_nritems(leaf) - 1)
1033 break;
1034
1035 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
1036 if (found_key.objectid != tmp->bytenr ||
1037 found_key.type != BTRFS_EXTENT_ITEM_KEY ||
1038 found_key.offset != tmp->num_bytes)
1039 break;
1040
1041 /* check to make sure this extent only has one ref */
1042 ei = btrfs_item_ptr(leaf, cur_slot,
1043 struct btrfs_extent_item);
1044 if (btrfs_extent_refs(leaf, ei) != 1)
1045 break;
1046
1047 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
1048 if (found_key.objectid != tmp->bytenr ||
1049 found_key.type != BTRFS_EXTENT_REF_KEY ||
1050 found_key.offset != tmp->orig_parent)
1051 break;
1052
1053 /*
1054 * the ref is right next to the extent, we can set the
1055 * ref count to 0 since we will delete them both now
1056 */
1057 btrfs_set_extent_refs(leaf, ei, 0);
1058
1059 /* pin down the bytes for this extent */
1060 mutex_lock(&info->pinned_mutex);
1061 ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
1062 tmp->num_bytes, tmp->level >=
1063 BTRFS_FIRST_FREE_OBJECTID);
1064 mutex_unlock(&info->pinned_mutex);
1065 BUG_ON(ret < 0);
1066
1067 /*
1068 * use the del field to tell if we need to go ahead and
1069 * free up the extent when we delete the item or not.
1070 */
1071 tmp->del = ret;
1072 bytes_freed += tmp->num_bytes;
1073
1074 num_to_del += 2;
1075 cur_slot += 2;
1076 }
1077 end = pos;
1078
1079 /* update the free space counters */
1080 spin_lock(&info->delalloc_lock);
1081 super_used = btrfs_super_bytes_used(&info->super_copy);
1082 btrfs_set_super_bytes_used(&info->super_copy,
1083 super_used - bytes_freed);
1084
1085 root_used = btrfs_root_used(&extent_root->root_item);
1086 btrfs_set_root_used(&extent_root->root_item,
1087 root_used - bytes_freed);
1088 spin_unlock(&info->delalloc_lock);
1089
1090 /* delete the items */
1091 ret = btrfs_del_items(trans, extent_root, path,
1092 path->slots[0], num_to_del);
1093 BUG_ON(ret);
1094
1095 /*
1096 * loop through the extents we deleted and do the cleanup work
1097 * on them
1098 */
1099 for (pos = cur, n = pos->next; pos != end;
1100 pos = n, n = pos->next) {
1101 struct pending_extent_op *tmp;
1102 tmp = list_entry(pos, struct pending_extent_op, list);
1103
1104 /*
1105 * remember tmp->del tells us wether or not we pinned
1106 * down the extent
1107 */
1108 ret = update_block_group(trans, extent_root,
1109 tmp->bytenr, tmp->num_bytes, 0,
1110 tmp->del);
1111 BUG_ON(ret);
1112
1113 list_del_init(&tmp->list);
1114 unlock_extent(&info->extent_ins, tmp->bytenr,
1115 tmp->bytenr + tmp->num_bytes - 1,
1116 GFP_NOFS);
1117 kfree(tmp);
1118 }
1119 } else if (refs && found_extent) {
1120 /*
1121 * the ref and extent were right next to eachother, but the
1122 * extent still has a ref, so just free the backref and keep
1123 * going
1124 */
1125 ret = remove_extent_backref(trans, extent_root, path);
1126 BUG_ON(ret);
1127
1128 list_del_init(&op->list);
1129 unlock_extent(&info->extent_ins, op->bytenr,
1130 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1131 kfree(op);
1132 } else {
1133 /*
1134 * the extent has multiple refs and the backref we were looking
1135 * for was not right next to it, so just unlock and go next,
1136 * we're good to go
1137 */
1138 list_del_init(&op->list);
1139 unlock_extent(&info->extent_ins, op->bytenr,
1140 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1141 kfree(op);
1142 }
1143
1144 btrfs_release_path(extent_root, path);
1145 if (!list_empty(del_list))
1146 goto search;
1147
1148out:
1149 btrfs_free_path(path);
1150 return ret;
1151}
1152
1153static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1154 struct btrfs_root *root, u64 bytenr,
1155 u64 orig_parent, u64 parent,
1156 u64 orig_root, u64 ref_root,
1157 u64 orig_generation, u64 ref_generation,
1158 u64 owner_objectid)
1159{
1160 int ret;
1161 struct btrfs_root *extent_root = root->fs_info->extent_root;
1162 struct btrfs_path *path;
1163
1164 if (root == root->fs_info->extent_root) {
1165 struct pending_extent_op *extent_op;
1166 u64 num_bytes;
1167
1168 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
1169 num_bytes = btrfs_level_size(root, (int)owner_objectid);
1170 mutex_lock(&root->fs_info->extent_ins_mutex);
1171 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
1172 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
1173 u64 priv;
1174 ret = get_state_private(&root->fs_info->extent_ins,
1175 bytenr, &priv);
1176 BUG_ON(ret);
1177 extent_op = (struct pending_extent_op *)
1178 (unsigned long)priv;
1179 BUG_ON(extent_op->parent != orig_parent);
1180 BUG_ON(extent_op->generation != orig_generation);
1181
1182 extent_op->parent = parent;
1183 extent_op->generation = ref_generation;
1184 } else {
1185 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1186 BUG_ON(!extent_op);
1187
1188 extent_op->type = PENDING_BACKREF_UPDATE;
1189 extent_op->bytenr = bytenr;
1190 extent_op->num_bytes = num_bytes;
1191 extent_op->parent = parent;
1192 extent_op->orig_parent = orig_parent;
1193 extent_op->generation = ref_generation;
1194 extent_op->orig_generation = orig_generation;
1195 extent_op->level = (int)owner_objectid;
1196 INIT_LIST_HEAD(&extent_op->list);
1197 extent_op->del = 0;
1198
1199 set_extent_bits(&root->fs_info->extent_ins,
1200 bytenr, bytenr + num_bytes - 1,
1201 EXTENT_WRITEBACK, GFP_NOFS);
1202 set_state_private(&root->fs_info->extent_ins,
1203 bytenr, (unsigned long)extent_op);
1204 }
1205 mutex_unlock(&root->fs_info->extent_ins_mutex);
1206 return 0;
1207 }
1208
1209 path = btrfs_alloc_path();
1210 if (!path)
1211 return -ENOMEM;
1212 ret = lookup_extent_backref(trans, extent_root, path,
1213 bytenr, orig_parent, orig_root,
1214 orig_generation, owner_objectid, 1);
1215 if (ret)
1216 goto out;
1217 ret = remove_extent_backref(trans, extent_root, path);
1218 if (ret)
1219 goto out;
1220 ret = insert_extent_backref(trans, extent_root, path, bytenr,
1221 parent, ref_root, ref_generation,
1222 owner_objectid);
1223 BUG_ON(ret);
1224 finish_current_insert(trans, extent_root, 0);
1225 del_pending_extents(trans, extent_root, 0);
1226out:
1227 btrfs_free_path(path);
1228 return ret;
1229}
1230
1231int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1232 struct btrfs_root *root, u64 bytenr,
1233 u64 orig_parent, u64 parent,
1234 u64 ref_root, u64 ref_generation,
1235 u64 owner_objectid)
1236{
1237 int ret;
1238 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1239 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1240 return 0;
1241 ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
1242 parent, ref_root, ref_root,
1243 ref_generation, ref_generation,
1244 owner_objectid);
1245 return ret;
1246}
1247
1248static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1249 struct btrfs_root *root, u64 bytenr,
1250 u64 orig_parent, u64 parent,
1251 u64 orig_root, u64 ref_root,
1252 u64 orig_generation, u64 ref_generation,
1253 u64 owner_objectid)
1254{
1255 struct btrfs_path *path;
1256 int ret;
1257 struct btrfs_key key;
1258 struct extent_buffer *l;
1259 struct btrfs_extent_item *item;
1260 u32 refs;
1261
1262 path = btrfs_alloc_path();
1263 if (!path)
1264 return -ENOMEM;
1265
1266 path->reada = 1;
1267 key.objectid = bytenr;
1268 key.type = BTRFS_EXTENT_ITEM_KEY;
1269 key.offset = (u64)-1;
1270
1271 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1272 0, 1);
1273 if (ret < 0)
1274 return ret;
1275 BUG_ON(ret == 0 || path->slots[0] == 0);
1276
1277 path->slots[0]--;
1278 l = path->nodes[0];
1279
1280 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1281 if (key.objectid != bytenr) {
1282 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
1283 printk(KERN_ERR "btrfs wanted %llu found %llu\n",
1284 (unsigned long long)bytenr,
1285 (unsigned long long)key.objectid);
1286 BUG();
1287 }
1288 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
1289
1290 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1291 refs = btrfs_extent_refs(l, item);
1292 btrfs_set_extent_refs(l, item, refs + 1);
1293 btrfs_mark_buffer_dirty(path->nodes[0]);
1294
1295 btrfs_release_path(root->fs_info->extent_root, path);
1296
1297 path->reada = 1;
1298 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1299 path, bytenr, parent,
1300 ref_root, ref_generation,
1301 owner_objectid);
1302 BUG_ON(ret);
1303 finish_current_insert(trans, root->fs_info->extent_root, 0);
1304 del_pending_extents(trans, root->fs_info->extent_root, 0);
1305
1306 btrfs_free_path(path);
1307 return 0;
1308}
1309
1310int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1311 struct btrfs_root *root,
1312 u64 bytenr, u64 num_bytes, u64 parent,
1313 u64 ref_root, u64 ref_generation,
1314 u64 owner_objectid)
1315{
1316 int ret;
1317 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1318 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1319 return 0;
1320 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
1321 0, ref_root, 0, ref_generation,
1322 owner_objectid);
1323 return ret;
1324}
1325
1326int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1327 struct btrfs_root *root)
1328{
1329 finish_current_insert(trans, root->fs_info->extent_root, 1);
1330 del_pending_extents(trans, root->fs_info->extent_root, 1);
1331 return 0;
1332}
1333
1334int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
1335 struct btrfs_root *root, u64 bytenr,
1336 u64 num_bytes, u32 *refs)
1337{
1338 struct btrfs_path *path;
1339 int ret;
1340 struct btrfs_key key;
1341 struct extent_buffer *l;
1342 struct btrfs_extent_item *item;
1343
1344 WARN_ON(num_bytes < root->sectorsize);
1345 path = btrfs_alloc_path();
1346 path->reada = 1;
1347 key.objectid = bytenr;
1348 key.offset = num_bytes;
1349 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1350 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1351 0, 0);
1352 if (ret < 0)
1353 goto out;
1354 if (ret != 0) {
1355 btrfs_print_leaf(root, path->nodes[0]);
1356 printk(KERN_INFO "btrfs failed to find block number %llu\n",
1357 (unsigned long long)bytenr);
1358 BUG();
1359 }
1360 l = path->nodes[0];
1361 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1362 *refs = btrfs_extent_refs(l, item);
1363out:
1364 btrfs_free_path(path);
1365 return 0;
1366}
1367
1368int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1369 struct btrfs_root *root, u64 objectid, u64 bytenr)
1370{
1371 struct btrfs_root *extent_root = root->fs_info->extent_root;
1372 struct btrfs_path *path;
1373 struct extent_buffer *leaf;
1374 struct btrfs_extent_ref *ref_item;
1375 struct btrfs_key key;
1376 struct btrfs_key found_key;
1377 u64 ref_root;
1378 u64 last_snapshot;
1379 u32 nritems;
1380 int ret;
1381
1382 key.objectid = bytenr;
1383 key.offset = (u64)-1;
1384 key.type = BTRFS_EXTENT_ITEM_KEY;
1385
1386 path = btrfs_alloc_path();
1387 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1388 if (ret < 0)
1389 goto out;
1390 BUG_ON(ret == 0);
1391
1392 ret = -ENOENT;
1393 if (path->slots[0] == 0)
1394 goto out;
1395
1396 path->slots[0]--;
1397 leaf = path->nodes[0];
1398 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1399
1400 if (found_key.objectid != bytenr ||
1401 found_key.type != BTRFS_EXTENT_ITEM_KEY)
1402 goto out;
1403
1404 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1405 while (1) {
1406 leaf = path->nodes[0];
1407 nritems = btrfs_header_nritems(leaf);
1408 if (path->slots[0] >= nritems) {
1409 ret = btrfs_next_leaf(extent_root, path);
1410 if (ret < 0)
1411 goto out;
1412 if (ret == 0)
1413 continue;
1414 break;
1415 }
1416 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1417 if (found_key.objectid != bytenr)
1418 break;
1419
1420 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
1421 path->slots[0]++;
1422 continue;
1423 }
1424
1425 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1426 struct btrfs_extent_ref);
1427 ref_root = btrfs_ref_root(leaf, ref_item);
1428 if ((ref_root != root->root_key.objectid &&
1429 ref_root != BTRFS_TREE_LOG_OBJECTID) ||
1430 objectid != btrfs_ref_objectid(leaf, ref_item)) {
1431 ret = 1;
1432 goto out;
1433 }
1434 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
1435 ret = 1;
1436 goto out;
1437 }
1438
1439 path->slots[0]++;
1440 }
1441 ret = 0;
1442out:
1443 btrfs_free_path(path);
1444 return ret;
1445}
1446
1447int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1448 struct extent_buffer *buf, u32 nr_extents)
1449{
1450 struct btrfs_key key;
1451 struct btrfs_file_extent_item *fi;
1452 u64 root_gen;
1453 u32 nritems;
1454 int i;
1455 int level;
1456 int ret = 0;
1457 int shared = 0;
1458
1459 if (!root->ref_cows)
1460 return 0;
1461
1462 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1463 shared = 0;
1464 root_gen = root->root_key.offset;
1465 } else {
1466 shared = 1;
1467 root_gen = trans->transid - 1;
1468 }
1469
1470 level = btrfs_header_level(buf);
1471 nritems = btrfs_header_nritems(buf);
1472
1473 if (level == 0) {
1474 struct btrfs_leaf_ref *ref;
1475 struct btrfs_extent_info *info;
1476
1477 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1478 if (!ref) {
1479 ret = -ENOMEM;
1480 goto out;
1481 }
1482
1483 ref->root_gen = root_gen;
1484 ref->bytenr = buf->start;
1485 ref->owner = btrfs_header_owner(buf);
1486 ref->generation = btrfs_header_generation(buf);
1487 ref->nritems = nr_extents;
1488 info = ref->extents;
1489
1490 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1491 u64 disk_bytenr;
1492 btrfs_item_key_to_cpu(buf, &key, i);
1493 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1494 continue;
1495 fi = btrfs_item_ptr(buf, i,
1496 struct btrfs_file_extent_item);
1497 if (btrfs_file_extent_type(buf, fi) ==
1498 BTRFS_FILE_EXTENT_INLINE)
1499 continue;
1500 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1501 if (disk_bytenr == 0)
1502 continue;
1503
1504 info->bytenr = disk_bytenr;
1505 info->num_bytes =
1506 btrfs_file_extent_disk_num_bytes(buf, fi);
1507 info->objectid = key.objectid;
1508 info->offset = key.offset;
1509 info++;
1510 }
1511
1512 ret = btrfs_add_leaf_ref(root, ref, shared);
1513 if (ret == -EEXIST && shared) {
1514 struct btrfs_leaf_ref *old;
1515 old = btrfs_lookup_leaf_ref(root, ref->bytenr);
1516 BUG_ON(!old);
1517 btrfs_remove_leaf_ref(root, old);
1518 btrfs_free_leaf_ref(root, old);
1519 ret = btrfs_add_leaf_ref(root, ref, shared);
1520 }
1521 WARN_ON(ret);
1522 btrfs_free_leaf_ref(root, ref);
1523 }
1524out:
1525 return ret;
1526}
1527
1528int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1529 struct extent_buffer *orig_buf, struct extent_buffer *buf,
1530 u32 *nr_extents)
1531{
1532 u64 bytenr;
1533 u64 ref_root;
1534 u64 orig_root;
1535 u64 ref_generation;
1536 u64 orig_generation;
1537 u32 nritems;
1538 u32 nr_file_extents = 0;
1539 struct btrfs_key key;
1540 struct btrfs_file_extent_item *fi;
1541 int i;
1542 int level;
1543 int ret = 0;
1544 int faili = 0;
1545 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1546 u64, u64, u64, u64, u64, u64, u64, u64);
1547
1548 ref_root = btrfs_header_owner(buf);
1549 ref_generation = btrfs_header_generation(buf);
1550 orig_root = btrfs_header_owner(orig_buf);
1551 orig_generation = btrfs_header_generation(orig_buf);
1552
1553 nritems = btrfs_header_nritems(buf);
1554 level = btrfs_header_level(buf);
1555
1556 if (root->ref_cows) {
1557 process_func = __btrfs_inc_extent_ref;
1558 } else {
1559 if (level == 0 &&
1560 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1561 goto out;
1562 if (level != 0 &&
1563 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1564 goto out;
1565 process_func = __btrfs_update_extent_ref;
1566 }
1567
1568 for (i = 0; i < nritems; i++) {
1569 cond_resched();
1570 if (level == 0) {
1571 btrfs_item_key_to_cpu(buf, &key, i);
1572 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1573 continue;
1574 fi = btrfs_item_ptr(buf, i,
1575 struct btrfs_file_extent_item);
1576 if (btrfs_file_extent_type(buf, fi) ==
1577 BTRFS_FILE_EXTENT_INLINE)
1578 continue;
1579 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1580 if (bytenr == 0)
1581 continue;
1582
1583 nr_file_extents++;
1584
1585 ret = process_func(trans, root, bytenr,
1586 orig_buf->start, buf->start,
1587 orig_root, ref_root,
1588 orig_generation, ref_generation,
1589 key.objectid);
1590
1591 if (ret) {
1592 faili = i;
1593 WARN_ON(1);
1594 goto fail;
1595 }
1596 } else {
1597 bytenr = btrfs_node_blockptr(buf, i);
1598 ret = process_func(trans, root, bytenr,
1599 orig_buf->start, buf->start,
1600 orig_root, ref_root,
1601 orig_generation, ref_generation,
1602 level - 1);
1603 if (ret) {
1604 faili = i;
1605 WARN_ON(1);
1606 goto fail;
1607 }
1608 }
1609 }
1610out:
1611 if (nr_extents) {
1612 if (level == 0)
1613 *nr_extents = nr_file_extents;
1614 else
1615 *nr_extents = nritems;
1616 }
1617 return 0;
1618fail:
1619 WARN_ON(1);
1620 return ret;
1621}
1622
1623int btrfs_update_ref(struct btrfs_trans_handle *trans,
1624 struct btrfs_root *root, struct extent_buffer *orig_buf,
1625 struct extent_buffer *buf, int start_slot, int nr)
1626
1627{
1628 u64 bytenr;
1629 u64 ref_root;
1630 u64 orig_root;
1631 u64 ref_generation;
1632 u64 orig_generation;
1633 struct btrfs_key key;
1634 struct btrfs_file_extent_item *fi;
1635 int i;
1636 int ret;
1637 int slot;
1638 int level;
1639
1640 BUG_ON(start_slot < 0);
1641 BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1642
1643 ref_root = btrfs_header_owner(buf);
1644 ref_generation = btrfs_header_generation(buf);
1645 orig_root = btrfs_header_owner(orig_buf);
1646 orig_generation = btrfs_header_generation(orig_buf);
1647 level = btrfs_header_level(buf);
1648
1649 if (!root->ref_cows) {
1650 if (level == 0 &&
1651 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1652 return 0;
1653 if (level != 0 &&
1654 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1655 return 0;
1656 }
1657
1658 for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1659 cond_resched();
1660 if (level == 0) {
1661 btrfs_item_key_to_cpu(buf, &key, slot);
1662 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1663 continue;
1664 fi = btrfs_item_ptr(buf, slot,
1665 struct btrfs_file_extent_item);
1666 if (btrfs_file_extent_type(buf, fi) ==
1667 BTRFS_FILE_EXTENT_INLINE)
1668 continue;
1669 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1670 if (bytenr == 0)
1671 continue;
1672 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1673 orig_buf->start, buf->start,
1674 orig_root, ref_root,
1675 orig_generation, ref_generation,
1676 key.objectid);
1677 if (ret)
1678 goto fail;
1679 } else {
1680 bytenr = btrfs_node_blockptr(buf, slot);
1681 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1682 orig_buf->start, buf->start,
1683 orig_root, ref_root,
1684 orig_generation, ref_generation,
1685 level - 1);
1686 if (ret)
1687 goto fail;
1688 }
1689 }
1690 return 0;
1691fail:
1692 WARN_ON(1);
1693 return -1;
1694}
1695
1696static int write_one_cache_group(struct btrfs_trans_handle *trans,
1697 struct btrfs_root *root,
1698 struct btrfs_path *path,
1699 struct btrfs_block_group_cache *cache)
1700{
1701 int ret;
1702 int pending_ret;
1703 struct btrfs_root *extent_root = root->fs_info->extent_root;
1704 unsigned long bi;
1705 struct extent_buffer *leaf;
1706
1707 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1708 if (ret < 0)
1709 goto fail;
1710 BUG_ON(ret);
1711
1712 leaf = path->nodes[0];
1713 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1714 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1715 btrfs_mark_buffer_dirty(leaf);
1716 btrfs_release_path(extent_root, path);
1717fail:
1718 finish_current_insert(trans, extent_root, 0);
1719 pending_ret = del_pending_extents(trans, extent_root, 0);
1720 if (ret)
1721 return ret;
1722 if (pending_ret)
1723 return pending_ret;
1724 return 0;
1725
1726}
1727
1728int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1729 struct btrfs_root *root)
1730{
1731 struct btrfs_block_group_cache *cache, *entry;
1732 struct rb_node *n;
1733 int err = 0;
1734 int werr = 0;
1735 struct btrfs_path *path;
1736 u64 last = 0;
1737
1738 path = btrfs_alloc_path();
1739 if (!path)
1740 return -ENOMEM;
1741
1742 while (1) {
1743 cache = NULL;
1744 spin_lock(&root->fs_info->block_group_cache_lock);
1745 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1746 n; n = rb_next(n)) {
1747 entry = rb_entry(n, struct btrfs_block_group_cache,
1748 cache_node);
1749 if (entry->dirty) {
1750 cache = entry;
1751 break;
1752 }
1753 }
1754 spin_unlock(&root->fs_info->block_group_cache_lock);
1755
1756 if (!cache)
1757 break;
1758
1759 cache->dirty = 0;
1760 last += cache->key.offset;
1761
1762 err = write_one_cache_group(trans, root,
1763 path, cache);
1764 /*
1765 * if we fail to write the cache group, we want
1766 * to keep it marked dirty in hopes that a later
1767 * write will work
1768 */
1769 if (err) {
1770 werr = err;
1771 continue;
1772 }
1773 }
1774 btrfs_free_path(path);
1775 return werr;
1776}
1777
1778int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
1779{
1780 struct btrfs_block_group_cache *block_group;
1781 int readonly = 0;
1782
1783 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
1784 if (!block_group || block_group->ro)
1785 readonly = 1;
1786 if (block_group)
1787 put_block_group(block_group);
1788 return readonly;
1789}
1790
1791static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1792 u64 total_bytes, u64 bytes_used,
1793 struct btrfs_space_info **space_info)
1794{
1795 struct btrfs_space_info *found;
1796
1797 found = __find_space_info(info, flags);
1798 if (found) {
1799 spin_lock(&found->lock);
1800 found->total_bytes += total_bytes;
1801 found->bytes_used += bytes_used;
1802 found->full = 0;
1803 spin_unlock(&found->lock);
1804 *space_info = found;
1805 return 0;
1806 }
1807 found = kzalloc(sizeof(*found), GFP_NOFS);
1808 if (!found)
1809 return -ENOMEM;
1810
1811 list_add(&found->list, &info->space_info);
1812 INIT_LIST_HEAD(&found->block_groups);
1813 init_rwsem(&found->groups_sem);
1814 spin_lock_init(&found->lock);
1815 found->flags = flags;
1816 found->total_bytes = total_bytes;
1817 found->bytes_used = bytes_used;
1818 found->bytes_pinned = 0;
1819 found->bytes_reserved = 0;
1820 found->bytes_readonly = 0;
1821 found->full = 0;
1822 found->force_alloc = 0;
1823 *space_info = found;
1824 return 0;
1825}
1826
1827static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1828{
1829 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1830 BTRFS_BLOCK_GROUP_RAID1 |
1831 BTRFS_BLOCK_GROUP_RAID10 |
1832 BTRFS_BLOCK_GROUP_DUP);
1833 if (extra_flags) {
1834 if (flags & BTRFS_BLOCK_GROUP_DATA)
1835 fs_info->avail_data_alloc_bits |= extra_flags;
1836 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1837 fs_info->avail_metadata_alloc_bits |= extra_flags;
1838 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1839 fs_info->avail_system_alloc_bits |= extra_flags;
1840 }
1841}
1842
1843static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
1844{
1845 spin_lock(&cache->space_info->lock);
1846 spin_lock(&cache->lock);
1847 if (!cache->ro) {
1848 cache->space_info->bytes_readonly += cache->key.offset -
1849 btrfs_block_group_used(&cache->item);
1850 cache->ro = 1;
1851 }
1852 spin_unlock(&cache->lock);
1853 spin_unlock(&cache->space_info->lock);
1854}
1855
1856u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1857{
1858 u64 num_devices = root->fs_info->fs_devices->rw_devices;
1859
1860 if (num_devices == 1)
1861 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1862 if (num_devices < 4)
1863 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1864
1865 if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1866 (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1867 BTRFS_BLOCK_GROUP_RAID10))) {
1868 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1869 }
1870
1871 if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1872 (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1873 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1874 }
1875
1876 if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1877 ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1878 (flags & BTRFS_BLOCK_GROUP_RAID10) |
1879 (flags & BTRFS_BLOCK_GROUP_DUP)))
1880 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1881 return flags;
1882}
1883
1884static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1885 struct btrfs_root *extent_root, u64 alloc_bytes,
1886 u64 flags, int force)
1887{
1888 struct btrfs_space_info *space_info;
1889 u64 thresh;
1890 int ret = 0;
1891
1892 mutex_lock(&extent_root->fs_info->chunk_mutex);
1893
1894 flags = btrfs_reduce_alloc_profile(extent_root, flags);
1895
1896 space_info = __find_space_info(extent_root->fs_info, flags);
1897 if (!space_info) {
1898 ret = update_space_info(extent_root->fs_info, flags,
1899 0, 0, &space_info);
1900 BUG_ON(ret);
1901 }
1902 BUG_ON(!space_info);
1903
1904 spin_lock(&space_info->lock);
1905 if (space_info->force_alloc) {
1906 force = 1;
1907 space_info->force_alloc = 0;
1908 }
1909 if (space_info->full) {
1910 spin_unlock(&space_info->lock);
1911 goto out;
1912 }
1913
1914 thresh = space_info->total_bytes - space_info->bytes_readonly;
1915 thresh = div_factor(thresh, 6);
1916 if (!force &&
1917 (space_info->bytes_used + space_info->bytes_pinned +
1918 space_info->bytes_reserved + alloc_bytes) < thresh) {
1919 spin_unlock(&space_info->lock);
1920 goto out;
1921 }
1922 spin_unlock(&space_info->lock);
1923
1924 ret = btrfs_alloc_chunk(trans, extent_root, flags);
1925 if (ret)
1926 space_info->full = 1;
1927out:
1928 mutex_unlock(&extent_root->fs_info->chunk_mutex);
1929 return ret;
1930}
1931
1932static int update_block_group(struct btrfs_trans_handle *trans,
1933 struct btrfs_root *root,
1934 u64 bytenr, u64 num_bytes, int alloc,
1935 int mark_free)
1936{
1937 struct btrfs_block_group_cache *cache;
1938 struct btrfs_fs_info *info = root->fs_info;
1939 u64 total = num_bytes;
1940 u64 old_val;
1941 u64 byte_in_group;
1942
1943 while (total) {
1944 cache = btrfs_lookup_block_group(info, bytenr);
1945 if (!cache)
1946 return -1;
1947 byte_in_group = bytenr - cache->key.objectid;
1948 WARN_ON(byte_in_group > cache->key.offset);
1949
1950 spin_lock(&cache->space_info->lock);
1951 spin_lock(&cache->lock);
1952 cache->dirty = 1;
1953 old_val = btrfs_block_group_used(&cache->item);
1954 num_bytes = min(total, cache->key.offset - byte_in_group);
1955 if (alloc) {
1956 old_val += num_bytes;
1957 cache->space_info->bytes_used += num_bytes;
1958 if (cache->ro)
1959 cache->space_info->bytes_readonly -= num_bytes;
1960 btrfs_set_block_group_used(&cache->item, old_val);
1961 spin_unlock(&cache->lock);
1962 spin_unlock(&cache->space_info->lock);
1963 } else {
1964 old_val -= num_bytes;
1965 cache->space_info->bytes_used -= num_bytes;
1966 if (cache->ro)
1967 cache->space_info->bytes_readonly += num_bytes;
1968 btrfs_set_block_group_used(&cache->item, old_val);
1969 spin_unlock(&cache->lock);
1970 spin_unlock(&cache->space_info->lock);
1971 if (mark_free) {
1972 int ret;
1973
1974 ret = btrfs_discard_extent(root, bytenr,
1975 num_bytes);
1976 WARN_ON(ret);
1977
1978 ret = btrfs_add_free_space(cache, bytenr,
1979 num_bytes);
1980 WARN_ON(ret);
1981 }
1982 }
1983 put_block_group(cache);
1984 total -= num_bytes;
1985 bytenr += num_bytes;
1986 }
1987 return 0;
1988}
1989
1990static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1991{
1992 struct btrfs_block_group_cache *cache;
1993 u64 bytenr;
1994
1995 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
1996 if (!cache)
1997 return 0;
1998
1999 bytenr = cache->key.objectid;
2000 put_block_group(cache);
2001
2002 return bytenr;
2003}
2004
2005int btrfs_update_pinned_extents(struct btrfs_root *root,
2006 u64 bytenr, u64 num, int pin)
2007{
2008 u64 len;
2009 struct btrfs_block_group_cache *cache;
2010 struct btrfs_fs_info *fs_info = root->fs_info;
2011
2012 WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex));
2013 if (pin) {
2014 set_extent_dirty(&fs_info->pinned_extents,
2015 bytenr, bytenr + num - 1, GFP_NOFS);
2016 } else {
2017 clear_extent_dirty(&fs_info->pinned_extents,
2018 bytenr, bytenr + num - 1, GFP_NOFS);
2019 }
2020 while (num > 0) {
2021 cache = btrfs_lookup_block_group(fs_info, bytenr);
2022 BUG_ON(!cache);
2023 len = min(num, cache->key.offset -
2024 (bytenr - cache->key.objectid));
2025 if (pin) {
2026 spin_lock(&cache->space_info->lock);
2027 spin_lock(&cache->lock);
2028 cache->pinned += len;
2029 cache->space_info->bytes_pinned += len;
2030 spin_unlock(&cache->lock);
2031 spin_unlock(&cache->space_info->lock);
2032 fs_info->total_pinned += len;
2033 } else {
2034 spin_lock(&cache->space_info->lock);
2035 spin_lock(&cache->lock);
2036 cache->pinned -= len;
2037 cache->space_info->bytes_pinned -= len;
2038 spin_unlock(&cache->lock);
2039 spin_unlock(&cache->space_info->lock);
2040 fs_info->total_pinned -= len;
2041 if (cache->cached)
2042 btrfs_add_free_space(cache, bytenr, len);
2043 }
2044 put_block_group(cache);
2045 bytenr += len;
2046 num -= len;
2047 }
2048 return 0;
2049}
2050
2051static int update_reserved_extents(struct btrfs_root *root,
2052 u64 bytenr, u64 num, int reserve)
2053{
2054 u64 len;
2055 struct btrfs_block_group_cache *cache;
2056 struct btrfs_fs_info *fs_info = root->fs_info;
2057
2058 while (num > 0) {
2059 cache = btrfs_lookup_block_group(fs_info, bytenr);
2060 BUG_ON(!cache);
2061 len = min(num, cache->key.offset -
2062 (bytenr - cache->key.objectid));
2063
2064 spin_lock(&cache->space_info->lock);
2065 spin_lock(&cache->lock);
2066 if (reserve) {
2067 cache->reserved += len;
2068 cache->space_info->bytes_reserved += len;
2069 } else {
2070 cache->reserved -= len;
2071 cache->space_info->bytes_reserved -= len;
2072 }
2073 spin_unlock(&cache->lock);
2074 spin_unlock(&cache->space_info->lock);
2075 put_block_group(cache);
2076 bytenr += len;
2077 num -= len;
2078 }
2079 return 0;
2080}
2081
2082int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
2083{
2084 u64 last = 0;
2085 u64 start;
2086 u64 end;
2087 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
2088 int ret;
2089
2090 mutex_lock(&root->fs_info->pinned_mutex);
2091 while (1) {
2092 ret = find_first_extent_bit(pinned_extents, last,
2093 &start, &end, EXTENT_DIRTY);
2094 if (ret)
2095 break;
2096 set_extent_dirty(copy, start, end, GFP_NOFS);
2097 last = end + 1;
2098 }
2099 mutex_unlock(&root->fs_info->pinned_mutex);
2100 return 0;
2101}
2102
2103int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2104 struct btrfs_root *root,
2105 struct extent_io_tree *unpin)
2106{
2107 u64 start;
2108 u64 end;
2109 int ret;
2110
2111 mutex_lock(&root->fs_info->pinned_mutex);
2112 while (1) {
2113 ret = find_first_extent_bit(unpin, 0, &start, &end,
2114 EXTENT_DIRTY);
2115 if (ret)
2116 break;
2117
2118 ret = btrfs_discard_extent(root, start, end + 1 - start);
2119
2120 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
2121 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2122
2123 if (need_resched()) {
2124 mutex_unlock(&root->fs_info->pinned_mutex);
2125 cond_resched();
2126 mutex_lock(&root->fs_info->pinned_mutex);
2127 }
2128 }
2129 mutex_unlock(&root->fs_info->pinned_mutex);
2130 return ret;
2131}
2132
2133static int finish_current_insert(struct btrfs_trans_handle *trans,
2134 struct btrfs_root *extent_root, int all)
2135{
2136 u64 start;
2137 u64 end;
2138 u64 priv;
2139 u64 search = 0;
2140 u64 skipped = 0;
2141 struct btrfs_fs_info *info = extent_root->fs_info;
2142 struct btrfs_path *path;
2143 struct pending_extent_op *extent_op, *tmp;
2144 struct list_head insert_list, update_list;
2145 int ret;
2146 int num_inserts = 0, max_inserts;
2147
2148 path = btrfs_alloc_path();
2149 INIT_LIST_HEAD(&insert_list);
2150 INIT_LIST_HEAD(&update_list);
2151
2152 max_inserts = extent_root->leafsize /
2153 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
2154 sizeof(struct btrfs_extent_ref) +
2155 sizeof(struct btrfs_extent_item));
2156again:
2157 mutex_lock(&info->extent_ins_mutex);
2158 while (1) {
2159 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2160 &end, EXTENT_WRITEBACK);
2161 if (ret) {
2162 if (skipped && all && !num_inserts) {
2163 skipped = 0;
2164 search = 0;
2165 continue;
2166 }
2167 mutex_unlock(&info->extent_ins_mutex);
2168 break;
2169 }
2170
2171 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2172 if (!ret) {
2173 skipped = 1;
2174 search = end + 1;
2175 if (need_resched()) {
2176 mutex_unlock(&info->extent_ins_mutex);
2177 cond_resched();
2178 mutex_lock(&info->extent_ins_mutex);
2179 }
2180 continue;
2181 }
2182
2183 ret = get_state_private(&info->extent_ins, start, &priv);
2184 BUG_ON(ret);
2185 extent_op = (struct pending_extent_op *)(unsigned long) priv;
2186
2187 if (extent_op->type == PENDING_EXTENT_INSERT) {
2188 num_inserts++;
2189 list_add_tail(&extent_op->list, &insert_list);
2190 search = end + 1;
2191 if (num_inserts == max_inserts) {
2192 mutex_unlock(&info->extent_ins_mutex);
2193 break;
2194 }
2195 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
2196 list_add_tail(&extent_op->list, &update_list);
2197 search = end + 1;
2198 } else {
2199 BUG();
2200 }
2201 }
2202
2203 /*
2204 * process the update list, clear the writeback bit for it, and if
2205 * somebody marked this thing for deletion then just unlock it and be
2206 * done, the free_extents will handle it
2207 */
2208 mutex_lock(&info->extent_ins_mutex);
2209 list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2210 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2211 extent_op->bytenr + extent_op->num_bytes - 1,
2212 EXTENT_WRITEBACK, GFP_NOFS);
2213 if (extent_op->del) {
2214 list_del_init(&extent_op->list);
2215 unlock_extent(&info->extent_ins, extent_op->bytenr,
2216 extent_op->bytenr + extent_op->num_bytes
2217 - 1, GFP_NOFS);
2218 kfree(extent_op);
2219 }
2220 }
2221 mutex_unlock(&info->extent_ins_mutex);
2222
2223 /*
2224 * still have things left on the update list, go ahead an update
2225 * everything
2226 */
2227 if (!list_empty(&update_list)) {
2228 ret = update_backrefs(trans, extent_root, path, &update_list);
2229 BUG_ON(ret);
2230 }
2231
2232 /*
2233 * if no inserts need to be done, but we skipped some extents and we
2234 * need to make sure everything is cleaned then reset everything and
2235 * go back to the beginning
2236 */
2237 if (!num_inserts && all && skipped) {
2238 search = 0;
2239 skipped = 0;
2240 INIT_LIST_HEAD(&update_list);
2241 INIT_LIST_HEAD(&insert_list);
2242 goto again;
2243 } else if (!num_inserts) {
2244 goto out;
2245 }
2246
2247 /*
2248 * process the insert extents list. Again if we are deleting this
2249 * extent, then just unlock it, pin down the bytes if need be, and be
2250 * done with it. Saves us from having to actually insert the extent
2251 * into the tree and then subsequently come along and delete it
2252 */
2253 mutex_lock(&info->extent_ins_mutex);
2254 list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
2255 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2256 extent_op->bytenr + extent_op->num_bytes - 1,
2257 EXTENT_WRITEBACK, GFP_NOFS);
2258 if (extent_op->del) {
2259 u64 used;
2260 list_del_init(&extent_op->list);
2261 unlock_extent(&info->extent_ins, extent_op->bytenr,
2262 extent_op->bytenr + extent_op->num_bytes
2263 - 1, GFP_NOFS);
2264
2265 mutex_lock(&extent_root->fs_info->pinned_mutex);
2266 ret = pin_down_bytes(trans, extent_root,
2267 extent_op->bytenr,
2268 extent_op->num_bytes, 0);
2269 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2270
2271 spin_lock(&info->delalloc_lock);
2272 used = btrfs_super_bytes_used(&info->super_copy);
2273 btrfs_set_super_bytes_used(&info->super_copy,
2274 used - extent_op->num_bytes);
2275 used = btrfs_root_used(&extent_root->root_item);
2276 btrfs_set_root_used(&extent_root->root_item,
2277 used - extent_op->num_bytes);
2278 spin_unlock(&info->delalloc_lock);
2279
2280 ret = update_block_group(trans, extent_root,
2281 extent_op->bytenr,
2282 extent_op->num_bytes,
2283 0, ret > 0);
2284 BUG_ON(ret);
2285 kfree(extent_op);
2286 num_inserts--;
2287 }
2288 }
2289 mutex_unlock(&info->extent_ins_mutex);
2290
2291 ret = insert_extents(trans, extent_root, path, &insert_list,
2292 num_inserts);
2293 BUG_ON(ret);
2294
2295 /*
2296 * if we broke out of the loop in order to insert stuff because we hit
2297 * the maximum number of inserts at a time we can handle, then loop
2298 * back and pick up where we left off
2299 */
2300 if (num_inserts == max_inserts) {
2301 INIT_LIST_HEAD(&insert_list);
2302 INIT_LIST_HEAD(&update_list);
2303 num_inserts = 0;
2304 goto again;
2305 }
2306
2307 /*
2308 * again, if we need to make absolutely sure there are no more pending
2309 * extent operations left and we know that we skipped some, go back to
2310 * the beginning and do it all again
2311 */
2312 if (all && skipped) {
2313 INIT_LIST_HEAD(&insert_list);
2314 INIT_LIST_HEAD(&update_list);
2315 search = 0;
2316 skipped = 0;
2317 num_inserts = 0;
2318 goto again;
2319 }
2320out:
2321 btrfs_free_path(path);
2322 return 0;
2323}
2324
2325static int pin_down_bytes(struct btrfs_trans_handle *trans,
2326 struct btrfs_root *root,
2327 u64 bytenr, u64 num_bytes, int is_data)
2328{
2329 int err = 0;
2330 struct extent_buffer *buf;
2331
2332 if (is_data)
2333 goto pinit;
2334
2335 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
2336 if (!buf)
2337 goto pinit;
2338
2339 /* we can reuse a block if it hasn't been written
2340 * and it is from this transaction. We can't
2341 * reuse anything from the tree log root because
2342 * it has tiny sub-transactions.
2343 */
2344 if (btrfs_buffer_uptodate(buf, 0) &&
2345 btrfs_try_tree_lock(buf)) {
2346 u64 header_owner = btrfs_header_owner(buf);
2347 u64 header_transid = btrfs_header_generation(buf);
2348 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2349 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2350 header_transid == trans->transid &&
2351 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2352 clean_tree_block(NULL, root, buf);
2353 btrfs_tree_unlock(buf);
2354 free_extent_buffer(buf);
2355 return 1;
2356 }
2357 btrfs_tree_unlock(buf);
2358 }
2359 free_extent_buffer(buf);
2360pinit:
2361 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2362
2363 BUG_ON(err < 0);
2364 return 0;
2365}
2366
2367/*
2368 * remove an extent from the root, returns 0 on success
2369 */
2370static int __free_extent(struct btrfs_trans_handle *trans,
2371 struct btrfs_root *root,
2372 u64 bytenr, u64 num_bytes, u64 parent,
2373 u64 root_objectid, u64 ref_generation,
2374 u64 owner_objectid, int pin, int mark_free)
2375{
2376 struct btrfs_path *path;
2377 struct btrfs_key key;
2378 struct btrfs_fs_info *info = root->fs_info;
2379 struct btrfs_root *extent_root = info->extent_root;
2380 struct extent_buffer *leaf;
2381 int ret;
2382 int extent_slot = 0;
2383 int found_extent = 0;
2384 int num_to_del = 1;
2385 struct btrfs_extent_item *ei;
2386 u32 refs;
2387
2388 key.objectid = bytenr;
2389 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
2390 key.offset = num_bytes;
2391 path = btrfs_alloc_path();
2392 if (!path)
2393 return -ENOMEM;
2394
2395 path->reada = 1;
2396 ret = lookup_extent_backref(trans, extent_root, path,
2397 bytenr, parent, root_objectid,
2398 ref_generation, owner_objectid, 1);
2399 if (ret == 0) {
2400 struct btrfs_key found_key;
2401 extent_slot = path->slots[0];
2402 while (extent_slot > 0) {
2403 extent_slot--;
2404 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2405 extent_slot);
2406 if (found_key.objectid != bytenr)
2407 break;
2408 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
2409 found_key.offset == num_bytes) {
2410 found_extent = 1;
2411 break;
2412 }
2413 if (path->slots[0] - extent_slot > 5)
2414 break;
2415 }
2416 if (!found_extent) {
2417 ret = remove_extent_backref(trans, extent_root, path);
2418 BUG_ON(ret);
2419 btrfs_release_path(extent_root, path);
2420 ret = btrfs_search_slot(trans, extent_root,
2421 &key, path, -1, 1);
2422 if (ret) {
2423 printk(KERN_ERR "umm, got %d back from search"
2424 ", was looking for %llu\n", ret,
2425 (unsigned long long)bytenr);
2426 btrfs_print_leaf(extent_root, path->nodes[0]);
2427 }
2428 BUG_ON(ret);
2429 extent_slot = path->slots[0];
2430 }
2431 } else {
2432 btrfs_print_leaf(extent_root, path->nodes[0]);
2433 WARN_ON(1);
2434 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2435 "root %llu gen %llu owner %llu\n",
2436 (unsigned long long)bytenr,
2437 (unsigned long long)root_objectid,
2438 (unsigned long long)ref_generation,
2439 (unsigned long long)owner_objectid);
2440 }
2441
2442 leaf = path->nodes[0];
2443 ei = btrfs_item_ptr(leaf, extent_slot,
2444 struct btrfs_extent_item);
2445 refs = btrfs_extent_refs(leaf, ei);
2446 BUG_ON(refs == 0);
2447 refs -= 1;
2448 btrfs_set_extent_refs(leaf, ei, refs);
2449
2450 btrfs_mark_buffer_dirty(leaf);
2451
2452 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
2453 struct btrfs_extent_ref *ref;
2454 ref = btrfs_item_ptr(leaf, path->slots[0],
2455 struct btrfs_extent_ref);
2456 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
2457 /* if the back ref and the extent are next to each other
2458 * they get deleted below in one shot
2459 */
2460 path->slots[0] = extent_slot;
2461 num_to_del = 2;
2462 } else if (found_extent) {
2463 /* otherwise delete the extent back ref */
2464 ret = remove_extent_backref(trans, extent_root, path);
2465 BUG_ON(ret);
2466 /* if refs are 0, we need to setup the path for deletion */
2467 if (refs == 0) {
2468 btrfs_release_path(extent_root, path);
2469 ret = btrfs_search_slot(trans, extent_root, &key, path,
2470 -1, 1);
2471 BUG_ON(ret);
2472 }
2473 }
2474
2475 if (refs == 0) {
2476 u64 super_used;
2477 u64 root_used;
2478
2479 if (pin) {
2480 mutex_lock(&root->fs_info->pinned_mutex);
2481 ret = pin_down_bytes(trans, root, bytenr, num_bytes,
2482 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
2483 mutex_unlock(&root->fs_info->pinned_mutex);
2484 if (ret > 0)
2485 mark_free = 1;
2486 BUG_ON(ret < 0);
2487 }
2488 /* block accounting for super block */
2489 spin_lock(&info->delalloc_lock);
2490 super_used = btrfs_super_bytes_used(&info->super_copy);
2491 btrfs_set_super_bytes_used(&info->super_copy,
2492 super_used - num_bytes);
2493
2494 /* block accounting for root item */
2495 root_used = btrfs_root_used(&root->root_item);
2496 btrfs_set_root_used(&root->root_item,
2497 root_used - num_bytes);
2498 spin_unlock(&info->delalloc_lock);
2499 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2500 num_to_del);
2501 BUG_ON(ret);
2502 btrfs_release_path(extent_root, path);
2503
2504 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2505 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2506 BUG_ON(ret);
2507 }
2508
2509 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
2510 mark_free);
2511 BUG_ON(ret);
2512 }
2513 btrfs_free_path(path);
2514 finish_current_insert(trans, extent_root, 0);
2515 return ret;
2516}
2517
2518/*
2519 * find all the blocks marked as pending in the radix tree and remove
2520 * them from the extent map
2521 */
2522static int del_pending_extents(struct btrfs_trans_handle *trans,
2523 struct btrfs_root *extent_root, int all)
2524{
2525 int ret;
2526 int err = 0;
2527 u64 start;
2528 u64 end;
2529 u64 priv;
2530 u64 search = 0;
2531 int nr = 0, skipped = 0;
2532 struct extent_io_tree *pending_del;
2533 struct extent_io_tree *extent_ins;
2534 struct pending_extent_op *extent_op;
2535 struct btrfs_fs_info *info = extent_root->fs_info;
2536 struct list_head delete_list;
2537
2538 INIT_LIST_HEAD(&delete_list);
2539 extent_ins = &extent_root->fs_info->extent_ins;
2540 pending_del = &extent_root->fs_info->pending_del;
2541
2542again:
2543 mutex_lock(&info->extent_ins_mutex);
2544 while (1) {
2545 ret = find_first_extent_bit(pending_del, search, &start, &end,
2546 EXTENT_WRITEBACK);
2547 if (ret) {
2548 if (all && skipped && !nr) {
2549 search = 0;
2550 continue;
2551 }
2552 mutex_unlock(&info->extent_ins_mutex);
2553 break;
2554 }
2555
2556 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
2557 if (!ret) {
2558 search = end+1;
2559 skipped = 1;
2560
2561 if (need_resched()) {
2562 mutex_unlock(&info->extent_ins_mutex);
2563 cond_resched();
2564 mutex_lock(&info->extent_ins_mutex);
2565 }
2566
2567 continue;
2568 }
2569 BUG_ON(ret < 0);
2570
2571 ret = get_state_private(pending_del, start, &priv);
2572 BUG_ON(ret);
2573 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2574
2575 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
2576 GFP_NOFS);
2577 if (!test_range_bit(extent_ins, start, end,
2578 EXTENT_WRITEBACK, 0)) {
2579 list_add_tail(&extent_op->list, &delete_list);
2580 nr++;
2581 } else {
2582 kfree(extent_op);
2583
2584 ret = get_state_private(&info->extent_ins, start,
2585 &priv);
2586 BUG_ON(ret);
2587 extent_op = (struct pending_extent_op *)
2588 (unsigned long)priv;
2589
2590 clear_extent_bits(&info->extent_ins, start, end,
2591 EXTENT_WRITEBACK, GFP_NOFS);
2592
2593 if (extent_op->type == PENDING_BACKREF_UPDATE) {
2594 list_add_tail(&extent_op->list, &delete_list);
2595 search = end + 1;
2596 nr++;
2597 continue;
2598 }
2599
2600 mutex_lock(&extent_root->fs_info->pinned_mutex);
2601 ret = pin_down_bytes(trans, extent_root, start,
2602 end + 1 - start, 0);
2603 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2604
2605 ret = update_block_group(trans, extent_root, start,
2606 end + 1 - start, 0, ret > 0);
2607
2608 unlock_extent(extent_ins, start, end, GFP_NOFS);
2609 BUG_ON(ret);
2610 kfree(extent_op);
2611 }
2612 if (ret)
2613 err = ret;
2614
2615 search = end + 1;
2616
2617 if (need_resched()) {
2618 mutex_unlock(&info->extent_ins_mutex);
2619 cond_resched();
2620 mutex_lock(&info->extent_ins_mutex);
2621 }
2622 }
2623
2624 if (nr) {
2625 ret = free_extents(trans, extent_root, &delete_list);
2626 BUG_ON(ret);
2627 }
2628
2629 if (all && skipped) {
2630 INIT_LIST_HEAD(&delete_list);
2631 search = 0;
2632 nr = 0;
2633 goto again;
2634 }
2635
2636 return err;
2637}
2638
2639/*
2640 * remove an extent from the root, returns 0 on success
2641 */
2642static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2643 struct btrfs_root *root,
2644 u64 bytenr, u64 num_bytes, u64 parent,
2645 u64 root_objectid, u64 ref_generation,
2646 u64 owner_objectid, int pin)
2647{
2648 struct btrfs_root *extent_root = root->fs_info->extent_root;
2649 int pending_ret;
2650 int ret;
2651
2652 WARN_ON(num_bytes < root->sectorsize);
2653 if (root == extent_root) {
2654 struct pending_extent_op *extent_op = NULL;
2655
2656 mutex_lock(&root->fs_info->extent_ins_mutex);
2657 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
2658 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
2659 u64 priv;
2660 ret = get_state_private(&root->fs_info->extent_ins,
2661 bytenr, &priv);
2662 BUG_ON(ret);
2663 extent_op = (struct pending_extent_op *)
2664 (unsigned long)priv;
2665
2666 extent_op->del = 1;
2667 if (extent_op->type == PENDING_EXTENT_INSERT) {
2668 mutex_unlock(&root->fs_info->extent_ins_mutex);
2669 return 0;
2670 }
2671 }
2672
2673 if (extent_op) {
2674 ref_generation = extent_op->orig_generation;
2675 parent = extent_op->orig_parent;
2676 }
2677
2678 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2679 BUG_ON(!extent_op);
2680
2681 extent_op->type = PENDING_EXTENT_DELETE;
2682 extent_op->bytenr = bytenr;
2683 extent_op->num_bytes = num_bytes;
2684 extent_op->parent = parent;
2685 extent_op->orig_parent = parent;
2686 extent_op->generation = ref_generation;
2687 extent_op->orig_generation = ref_generation;
2688 extent_op->level = (int)owner_objectid;
2689 INIT_LIST_HEAD(&extent_op->list);
2690 extent_op->del = 0;
2691
2692 set_extent_bits(&root->fs_info->pending_del,
2693 bytenr, bytenr + num_bytes - 1,
2694 EXTENT_WRITEBACK, GFP_NOFS);
2695 set_state_private(&root->fs_info->pending_del,
2696 bytenr, (unsigned long)extent_op);
2697 mutex_unlock(&root->fs_info->extent_ins_mutex);
2698 return 0;
2699 }
2700 /* if metadata always pin */
2701 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2702 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2703 struct btrfs_block_group_cache *cache;
2704
2705 /* btrfs_free_reserved_extent */
2706 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2707 BUG_ON(!cache);
2708 btrfs_add_free_space(cache, bytenr, num_bytes);
2709 put_block_group(cache);
2710 update_reserved_extents(root, bytenr, num_bytes, 0);
2711 return 0;
2712 }
2713 pin = 1;
2714 }
2715
2716 /* if data pin when any transaction has committed this */
2717 if (ref_generation != trans->transid)
2718 pin = 1;
2719
2720 ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2721 root_objectid, ref_generation,
2722 owner_objectid, pin, pin == 0);
2723
2724 finish_current_insert(trans, root->fs_info->extent_root, 0);
2725 pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0);
2726 return ret ? ret : pending_ret;
2727}
2728
2729int btrfs_free_extent(struct btrfs_trans_handle *trans,
2730 struct btrfs_root *root,
2731 u64 bytenr, u64 num_bytes, u64 parent,
2732 u64 root_objectid, u64 ref_generation,
2733 u64 owner_objectid, int pin)
2734{
2735 int ret;
2736
2737 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2738 root_objectid, ref_generation,
2739 owner_objectid, pin);
2740 return ret;
2741}
2742
2743static u64 stripe_align(struct btrfs_root *root, u64 val)
2744{
2745 u64 mask = ((u64)root->stripesize - 1);
2746 u64 ret = (val + mask) & ~mask;
2747 return ret;
2748}
2749
2750/*
2751 * walks the btree of allocated extents and find a hole of a given size.
2752 * The key ins is changed to record the hole:
2753 * ins->objectid == block start
2754 * ins->flags = BTRFS_EXTENT_ITEM_KEY
2755 * ins->offset == number of blocks
2756 * Any available blocks before search_start are skipped.
2757 */
2758static noinline int find_free_extent(struct btrfs_trans_handle *trans,
2759 struct btrfs_root *orig_root,
2760 u64 num_bytes, u64 empty_size,
2761 u64 search_start, u64 search_end,
2762 u64 hint_byte, struct btrfs_key *ins,
2763 u64 exclude_start, u64 exclude_nr,
2764 int data)
2765{
2766 int ret = 0;
2767 struct btrfs_root *root = orig_root->fs_info->extent_root;
2768 u64 total_needed = num_bytes;
2769 u64 *last_ptr = NULL;
2770 u64 last_wanted = 0;
2771 struct btrfs_block_group_cache *block_group = NULL;
2772 int chunk_alloc_done = 0;
2773 int empty_cluster = 2 * 1024 * 1024;
2774 int allowed_chunk_alloc = 0;
2775 struct list_head *head = NULL, *cur = NULL;
2776 int loop = 0;
2777 int extra_loop = 0;
2778 struct btrfs_space_info *space_info;
2779
2780 WARN_ON(num_bytes < root->sectorsize);
2781 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2782 ins->objectid = 0;
2783 ins->offset = 0;
2784
2785 if (orig_root->ref_cows || empty_size)
2786 allowed_chunk_alloc = 1;
2787
2788 if (data & BTRFS_BLOCK_GROUP_METADATA) {
2789 last_ptr = &root->fs_info->last_alloc;
2790 empty_cluster = 64 * 1024;
2791 }
2792
2793 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2794 last_ptr = &root->fs_info->last_data_alloc;
2795
2796 if (last_ptr) {
2797 if (*last_ptr) {
2798 hint_byte = *last_ptr;
2799 last_wanted = *last_ptr;
2800 } else
2801 empty_size += empty_cluster;
2802 } else {
2803 empty_cluster = 0;
2804 }
2805 search_start = max(search_start, first_logical_byte(root, 0));
2806 search_start = max(search_start, hint_byte);
2807
2808 if (last_wanted && search_start != last_wanted) {
2809 last_wanted = 0;
2810 empty_size += empty_cluster;
2811 }
2812
2813 total_needed += empty_size;
2814 block_group = btrfs_lookup_block_group(root->fs_info, search_start);
2815 if (!block_group)
2816 block_group = btrfs_lookup_first_block_group(root->fs_info,
2817 search_start);
2818 space_info = __find_space_info(root->fs_info, data);
2819
2820 down_read(&space_info->groups_sem);
2821 while (1) {
2822 struct btrfs_free_space *free_space;
2823 /*
2824 * the only way this happens if our hint points to a block
2825 * group thats not of the proper type, while looping this
2826 * should never happen
2827 */
2828 if (empty_size)
2829 extra_loop = 1;
2830
2831 if (!block_group)
2832 goto new_group_no_lock;
2833
2834 if (unlikely(!block_group->cached)) {
2835 mutex_lock(&block_group->cache_mutex);
2836 ret = cache_block_group(root, block_group);
2837 mutex_unlock(&block_group->cache_mutex);
2838 if (ret)
2839 break;
2840 }
2841
2842 mutex_lock(&block_group->alloc_mutex);
2843 if (unlikely(!block_group_bits(block_group, data)))
2844 goto new_group;
2845
2846 if (unlikely(block_group->ro))
2847 goto new_group;
2848
2849 free_space = btrfs_find_free_space(block_group, search_start,
2850 total_needed);
2851 if (free_space) {
2852 u64 start = block_group->key.objectid;
2853 u64 end = block_group->key.objectid +
2854 block_group->key.offset;
2855
2856 search_start = stripe_align(root, free_space->offset);
2857
2858 /* move on to the next group */
2859 if (search_start + num_bytes >= search_end)
2860 goto new_group;
2861
2862 /* move on to the next group */
2863 if (search_start + num_bytes > end)
2864 goto new_group;
2865
2866 if (last_wanted && search_start != last_wanted) {
2867 total_needed += empty_cluster;
2868 empty_size += empty_cluster;
2869 last_wanted = 0;
2870 /*
2871 * if search_start is still in this block group
2872 * then we just re-search this block group
2873 */
2874 if (search_start >= start &&
2875 search_start < end) {
2876 mutex_unlock(&block_group->alloc_mutex);
2877 continue;
2878 }
2879
2880 /* else we go to the next block group */
2881 goto new_group;
2882 }
2883
2884 if (exclude_nr > 0 &&
2885 (search_start + num_bytes > exclude_start &&
2886 search_start < exclude_start + exclude_nr)) {
2887 search_start = exclude_start + exclude_nr;
2888 /*
2889 * if search_start is still in this block group
2890 * then we just re-search this block group
2891 */
2892 if (search_start >= start &&
2893 search_start < end) {
2894 mutex_unlock(&block_group->alloc_mutex);
2895 last_wanted = 0;
2896 continue;
2897 }
2898
2899 /* else we go to the next block group */
2900 goto new_group;
2901 }
2902
2903 ins->objectid = search_start;
2904 ins->offset = num_bytes;
2905
2906 btrfs_remove_free_space_lock(block_group, search_start,
2907 num_bytes);
2908 /* we are all good, lets return */
2909 mutex_unlock(&block_group->alloc_mutex);
2910 break;
2911 }
2912new_group:
2913 mutex_unlock(&block_group->alloc_mutex);
2914 put_block_group(block_group);
2915 block_group = NULL;
2916new_group_no_lock:
2917 /* don't try to compare new allocations against the
2918 * last allocation any more
2919 */
2920 last_wanted = 0;
2921
2922 /*
2923 * Here's how this works.
2924 * loop == 0: we were searching a block group via a hint
2925 * and didn't find anything, so we start at
2926 * the head of the block groups and keep searching
2927 * loop == 1: we're searching through all of the block groups
2928 * if we hit the head again we have searched
2929 * all of the block groups for this space and we
2930 * need to try and allocate, if we cant error out.
2931 * loop == 2: we allocated more space and are looping through
2932 * all of the block groups again.
2933 */
2934 if (loop == 0) {
2935 head = &space_info->block_groups;
2936 cur = head->next;
2937 loop++;
2938 } else if (loop == 1 && cur == head) {
2939 int keep_going;
2940
2941 /* at this point we give up on the empty_size
2942 * allocations and just try to allocate the min
2943 * space.
2944 *
2945 * The extra_loop field was set if an empty_size
2946 * allocation was attempted above, and if this
2947 * is try we need to try the loop again without
2948 * the additional empty_size.
2949 */
2950 total_needed -= empty_size;
2951 empty_size = 0;
2952 keep_going = extra_loop;
2953 loop++;
2954
2955 if (allowed_chunk_alloc && !chunk_alloc_done) {
2956 up_read(&space_info->groups_sem);
2957 ret = do_chunk_alloc(trans, root, num_bytes +
2958 2 * 1024 * 1024, data, 1);
2959 down_read(&space_info->groups_sem);
2960 if (ret < 0)
2961 goto loop_check;
2962 head = &space_info->block_groups;
2963 /*
2964 * we've allocated a new chunk, keep
2965 * trying
2966 */
2967 keep_going = 1;
2968 chunk_alloc_done = 1;
2969 } else if (!allowed_chunk_alloc) {
2970 space_info->force_alloc = 1;
2971 }
2972loop_check:
2973 if (keep_going) {
2974 cur = head->next;
2975 extra_loop = 0;
2976 } else {
2977 break;
2978 }
2979 } else if (cur == head) {
2980 break;
2981 }
2982
2983 block_group = list_entry(cur, struct btrfs_block_group_cache,
2984 list);
2985 atomic_inc(&block_group->count);
2986
2987 search_start = block_group->key.objectid;
2988 cur = cur->next;
2989 }
2990
2991 /* we found what we needed */
2992 if (ins->objectid) {
2993 if (!(data & BTRFS_BLOCK_GROUP_DATA))
2994 trans->block_group = block_group->key.objectid;
2995
2996 if (last_ptr)
2997 *last_ptr = ins->objectid + ins->offset;
2998 ret = 0;
2999 } else if (!ret) {
3000 printk(KERN_ERR "btrfs searching for %llu bytes, "
3001 "num_bytes %llu, loop %d, allowed_alloc %d\n",
3002 (unsigned long long)total_needed,
3003 (unsigned long long)num_bytes,
3004 loop, allowed_chunk_alloc);
3005 ret = -ENOSPC;
3006 }
3007 if (block_group)
3008 put_block_group(block_group);
3009
3010 up_read(&space_info->groups_sem);
3011 return ret;
3012}
3013
3014static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3015{
3016 struct btrfs_block_group_cache *cache;
3017 struct list_head *l;
3018
3019 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
3020 (unsigned long long)(info->total_bytes - info->bytes_used -
3021 info->bytes_pinned - info->bytes_reserved),
3022 (info->full) ? "" : "not ");
3023
3024 down_read(&info->groups_sem);
3025 list_for_each(l, &info->block_groups) {
3026 cache = list_entry(l, struct btrfs_block_group_cache, list);
3027 spin_lock(&cache->lock);
3028 printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
3029 "%llu pinned %llu reserved\n",
3030 (unsigned long long)cache->key.objectid,
3031 (unsigned long long)cache->key.offset,
3032 (unsigned long long)btrfs_block_group_used(&cache->item),
3033 (unsigned long long)cache->pinned,
3034 (unsigned long long)cache->reserved);
3035 btrfs_dump_free_space(cache, bytes);
3036 spin_unlock(&cache->lock);
3037 }
3038 up_read(&info->groups_sem);
3039}
3040
3041static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3042 struct btrfs_root *root,
3043 u64 num_bytes, u64 min_alloc_size,
3044 u64 empty_size, u64 hint_byte,
3045 u64 search_end, struct btrfs_key *ins,
3046 u64 data)
3047{
3048 int ret;
3049 u64 search_start = 0;
3050 u64 alloc_profile;
3051 struct btrfs_fs_info *info = root->fs_info;
3052
3053 if (data) {
3054 alloc_profile = info->avail_data_alloc_bits &
3055 info->data_alloc_profile;
3056 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
3057 } else if (root == root->fs_info->chunk_root) {
3058 alloc_profile = info->avail_system_alloc_bits &
3059 info->system_alloc_profile;
3060 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
3061 } else {
3062 alloc_profile = info->avail_metadata_alloc_bits &
3063 info->metadata_alloc_profile;
3064 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
3065 }
3066again:
3067 data = btrfs_reduce_alloc_profile(root, data);
3068 /*
3069 * the only place that sets empty_size is btrfs_realloc_node, which
3070 * is not called recursively on allocations
3071 */
3072 if (empty_size || root->ref_cows) {
3073 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
3074 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3075 2 * 1024 * 1024,
3076 BTRFS_BLOCK_GROUP_METADATA |
3077 (info->metadata_alloc_profile &
3078 info->avail_metadata_alloc_bits), 0);
3079 }
3080 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3081 num_bytes + 2 * 1024 * 1024, data, 0);
3082 }
3083
3084 WARN_ON(num_bytes < root->sectorsize);
3085 ret = find_free_extent(trans, root, num_bytes, empty_size,
3086 search_start, search_end, hint_byte, ins,
3087 trans->alloc_exclude_start,
3088 trans->alloc_exclude_nr, data);
3089
3090 if (ret == -ENOSPC && num_bytes > min_alloc_size) {
3091 num_bytes = num_bytes >> 1;
3092 num_bytes = num_bytes & ~(root->sectorsize - 1);
3093 num_bytes = max(num_bytes, min_alloc_size);
3094 do_chunk_alloc(trans, root->fs_info->extent_root,
3095 num_bytes, data, 1);
3096 goto again;
3097 }
3098 if (ret) {
3099 struct btrfs_space_info *sinfo;
3100
3101 sinfo = __find_space_info(root->fs_info, data);
3102 printk(KERN_ERR "btrfs allocation failed flags %llu, "
3103 "wanted %llu\n", (unsigned long long)data,
3104 (unsigned long long)num_bytes);
3105 dump_space_info(sinfo, num_bytes);
3106 BUG();
3107 }
3108
3109 return ret;
3110}
3111
3112int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
3113{
3114 struct btrfs_block_group_cache *cache;
3115 int ret = 0;
3116
3117 cache = btrfs_lookup_block_group(root->fs_info, start);
3118 if (!cache) {
3119 printk(KERN_ERR "Unable to find block group for %llu\n",
3120 (unsigned long long)start);
3121 return -ENOSPC;
3122 }
3123
3124 ret = btrfs_discard_extent(root, start, len);
3125
3126 btrfs_add_free_space(cache, start, len);
3127 put_block_group(cache);
3128 update_reserved_extents(root, start, len, 0);
3129
3130 return ret;
3131}
3132
3133int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3134 struct btrfs_root *root,
3135 u64 num_bytes, u64 min_alloc_size,
3136 u64 empty_size, u64 hint_byte,
3137 u64 search_end, struct btrfs_key *ins,
3138 u64 data)
3139{
3140 int ret;
3141 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
3142 empty_size, hint_byte, search_end, ins,
3143 data);
3144 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3145 return ret;
3146}
3147
3148static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root, u64 parent,
3150 u64 root_objectid, u64 ref_generation,
3151 u64 owner, struct btrfs_key *ins)
3152{
3153 int ret;
3154 int pending_ret;
3155 u64 super_used;
3156 u64 root_used;
3157 u64 num_bytes = ins->offset;
3158 u32 sizes[2];
3159 struct btrfs_fs_info *info = root->fs_info;
3160 struct btrfs_root *extent_root = info->extent_root;
3161 struct btrfs_extent_item *extent_item;
3162 struct btrfs_extent_ref *ref;
3163 struct btrfs_path *path;
3164 struct btrfs_key keys[2];
3165
3166 if (parent == 0)
3167 parent = ins->objectid;
3168
3169 /* block accounting for super block */
3170 spin_lock(&info->delalloc_lock);
3171 super_used = btrfs_super_bytes_used(&info->super_copy);
3172 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
3173
3174 /* block accounting for root item */
3175 root_used = btrfs_root_used(&root->root_item);
3176 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
3177 spin_unlock(&info->delalloc_lock);
3178
3179 if (root == extent_root) {
3180 struct pending_extent_op *extent_op;
3181
3182 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3183 BUG_ON(!extent_op);
3184
3185 extent_op->type = PENDING_EXTENT_INSERT;
3186 extent_op->bytenr = ins->objectid;
3187 extent_op->num_bytes = ins->offset;
3188 extent_op->parent = parent;
3189 extent_op->orig_parent = 0;
3190 extent_op->generation = ref_generation;
3191 extent_op->orig_generation = 0;
3192 extent_op->level = (int)owner;
3193 INIT_LIST_HEAD(&extent_op->list);
3194 extent_op->del = 0;
3195
3196 mutex_lock(&root->fs_info->extent_ins_mutex);
3197 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
3198 ins->objectid + ins->offset - 1,
3199 EXTENT_WRITEBACK, GFP_NOFS);
3200 set_state_private(&root->fs_info->extent_ins,
3201 ins->objectid, (unsigned long)extent_op);
3202 mutex_unlock(&root->fs_info->extent_ins_mutex);
3203 goto update_block;
3204 }
3205
3206 memcpy(&keys[0], ins, sizeof(*ins));
3207 keys[1].objectid = ins->objectid;
3208 keys[1].type = BTRFS_EXTENT_REF_KEY;
3209 keys[1].offset = parent;
3210 sizes[0] = sizeof(*extent_item);
3211 sizes[1] = sizeof(*ref);
3212
3213 path = btrfs_alloc_path();
3214 BUG_ON(!path);
3215
3216 ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
3217 sizes, 2);
3218 BUG_ON(ret);
3219
3220 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3221 struct btrfs_extent_item);
3222 btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
3223 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
3224 struct btrfs_extent_ref);
3225
3226 btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
3227 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
3228 btrfs_set_ref_objectid(path->nodes[0], ref, owner);
3229 btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
3230
3231 btrfs_mark_buffer_dirty(path->nodes[0]);
3232
3233 trans->alloc_exclude_start = 0;
3234 trans->alloc_exclude_nr = 0;
3235 btrfs_free_path(path);
3236 finish_current_insert(trans, extent_root, 0);
3237 pending_ret = del_pending_extents(trans, extent_root, 0);
3238
3239 if (ret)
3240 goto out;
3241 if (pending_ret) {
3242 ret = pending_ret;
3243 goto out;
3244 }
3245
3246update_block:
3247 ret = update_block_group(trans, root, ins->objectid,
3248 ins->offset, 1, 0);
3249 if (ret) {
3250 printk(KERN_ERR "btrfs update block group failed for %llu "
3251 "%llu\n", (unsigned long long)ins->objectid,
3252 (unsigned long long)ins->offset);
3253 BUG();
3254 }
3255out:
3256 return ret;
3257}
3258
3259int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3260 struct btrfs_root *root, u64 parent,
3261 u64 root_objectid, u64 ref_generation,
3262 u64 owner, struct btrfs_key *ins)
3263{
3264 int ret;
3265
3266 if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
3267 return 0;
3268 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3269 ref_generation, owner, ins);
3270 update_reserved_extents(root, ins->objectid, ins->offset, 0);
3271 return ret;
3272}
3273
3274/*
3275 * this is used by the tree logging recovery code. It records that
3276 * an extent has been allocated and makes sure to clear the free
3277 * space cache bits as well
3278 */
3279int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3280 struct btrfs_root *root, u64 parent,
3281 u64 root_objectid, u64 ref_generation,
3282 u64 owner, struct btrfs_key *ins)
3283{
3284 int ret;
3285 struct btrfs_block_group_cache *block_group;
3286
3287 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
3288 mutex_lock(&block_group->cache_mutex);
3289 cache_block_group(root, block_group);
3290 mutex_unlock(&block_group->cache_mutex);
3291
3292 ret = btrfs_remove_free_space(block_group, ins->objectid,
3293 ins->offset);
3294 BUG_ON(ret);
3295 put_block_group(block_group);
3296 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3297 ref_generation, owner, ins);
3298 return ret;
3299}
3300
3301/*
3302 * finds a free extent and does all the dirty work required for allocation
3303 * returns the key for the extent through ins, and a tree buffer for
3304 * the first block of the extent through buf.
3305 *
3306 * returns 0 if everything worked, non-zero otherwise.
3307 */
3308int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
3309 struct btrfs_root *root,
3310 u64 num_bytes, u64 parent, u64 min_alloc_size,
3311 u64 root_objectid, u64 ref_generation,
3312 u64 owner_objectid, u64 empty_size, u64 hint_byte,
3313 u64 search_end, struct btrfs_key *ins, u64 data)
3314{
3315 int ret;
3316
3317 ret = __btrfs_reserve_extent(trans, root, num_bytes,
3318 min_alloc_size, empty_size, hint_byte,
3319 search_end, ins, data);
3320 BUG_ON(ret);
3321 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3322 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
3323 root_objectid, ref_generation,
3324 owner_objectid, ins);
3325 BUG_ON(ret);
3326
3327 } else {
3328 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3329 }
3330 return ret;
3331}
3332
3333struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3334 struct btrfs_root *root,
3335 u64 bytenr, u32 blocksize)
3336{
3337 struct extent_buffer *buf;
3338
3339 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
3340 if (!buf)
3341 return ERR_PTR(-ENOMEM);
3342 btrfs_set_header_generation(buf, trans->transid);
3343 btrfs_tree_lock(buf);
3344 clean_tree_block(trans, root, buf);
3345 btrfs_set_buffer_uptodate(buf);
3346 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
3347 set_extent_dirty(&root->dirty_log_pages, buf->start,
3348 buf->start + buf->len - 1, GFP_NOFS);
3349 } else {
3350 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
3351 buf->start + buf->len - 1, GFP_NOFS);
3352 }
3353 trans->blocks_used++;
3354 return buf;
3355}
3356
3357/*
3358 * helper function to allocate a block for a given tree
3359 * returns the tree buffer or NULL.
3360 */
3361struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3362 struct btrfs_root *root,
3363 u32 blocksize, u64 parent,
3364 u64 root_objectid,
3365 u64 ref_generation,
3366 int level,
3367 u64 hint,
3368 u64 empty_size)
3369{
3370 struct btrfs_key ins;
3371 int ret;
3372 struct extent_buffer *buf;
3373
3374 ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
3375 root_objectid, ref_generation, level,
3376 empty_size, hint, (u64)-1, &ins, 0);
3377 if (ret) {
3378 BUG_ON(ret > 0);
3379 return ERR_PTR(ret);
3380 }
3381
3382 buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
3383 return buf;
3384}
3385
3386int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3387 struct btrfs_root *root, struct extent_buffer *leaf)
3388{
3389 u64 leaf_owner;
3390 u64 leaf_generation;
3391 struct btrfs_key key;
3392 struct btrfs_file_extent_item *fi;
3393 int i;
3394 int nritems;
3395 int ret;
3396
3397 BUG_ON(!btrfs_is_leaf(leaf));
3398 nritems = btrfs_header_nritems(leaf);
3399 leaf_owner = btrfs_header_owner(leaf);
3400 leaf_generation = btrfs_header_generation(leaf);
3401
3402 for (i = 0; i < nritems; i++) {
3403 u64 disk_bytenr;
3404 cond_resched();
3405
3406 btrfs_item_key_to_cpu(leaf, &key, i);
3407 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3408 continue;
3409 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3410 if (btrfs_file_extent_type(leaf, fi) ==
3411 BTRFS_FILE_EXTENT_INLINE)
3412 continue;
3413 /*
3414 * FIXME make sure to insert a trans record that
3415 * repeats the snapshot del on crash
3416 */
3417 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
3418 if (disk_bytenr == 0)
3419 continue;
3420
3421 ret = __btrfs_free_extent(trans, root, disk_bytenr,
3422 btrfs_file_extent_disk_num_bytes(leaf, fi),
3423 leaf->start, leaf_owner, leaf_generation,
3424 key.objectid, 0);
3425 BUG_ON(ret);
3426
3427 atomic_inc(&root->fs_info->throttle_gen);
3428 wake_up(&root->fs_info->transaction_throttle);
3429 cond_resched();
3430 }
3431 return 0;
3432}
3433
3434static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3435 struct btrfs_root *root,
3436 struct btrfs_leaf_ref *ref)
3437{
3438 int i;
3439 int ret;
3440 struct btrfs_extent_info *info = ref->extents;
3441
3442 for (i = 0; i < ref->nritems; i++) {
3443 ret = __btrfs_free_extent(trans, root, info->bytenr,
3444 info->num_bytes, ref->bytenr,
3445 ref->owner, ref->generation,
3446 info->objectid, 0);
3447
3448 atomic_inc(&root->fs_info->throttle_gen);
3449 wake_up(&root->fs_info->transaction_throttle);
3450 cond_resched();
3451
3452 BUG_ON(ret);
3453 info++;
3454 }
3455
3456 return 0;
3457}
3458
3459static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start,
3460 u64 len, u32 *refs)
3461{
3462 int ret;
3463
3464 ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
3465 BUG_ON(ret);
3466
3467#if 0 /* some debugging code in case we see problems here */
3468 /* if the refs count is one, it won't get increased again. But
3469 * if the ref count is > 1, someone may be decreasing it at
3470 * the same time we are.
3471 */
3472 if (*refs != 1) {
3473 struct extent_buffer *eb = NULL;
3474 eb = btrfs_find_create_tree_block(root, start, len);
3475 if (eb)
3476 btrfs_tree_lock(eb);
3477
3478 mutex_lock(&root->fs_info->alloc_mutex);
3479 ret = lookup_extent_ref(NULL, root, start, len, refs);
3480 BUG_ON(ret);
3481 mutex_unlock(&root->fs_info->alloc_mutex);
3482
3483 if (eb) {
3484 btrfs_tree_unlock(eb);
3485 free_extent_buffer(eb);
3486 }
3487 if (*refs == 1) {
3488 printk(KERN_ERR "btrfs block %llu went down to one "
3489 "during drop_snap\n", (unsigned long long)start);
3490 }
3491
3492 }
3493#endif
3494
3495 cond_resched();
3496 return ret;
3497}
3498
3499/*
3500 * helper function for drop_snapshot, this walks down the tree dropping ref
3501 * counts as it goes.
3502 */
3503static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
3504 struct btrfs_root *root,
3505 struct btrfs_path *path, int *level)
3506{
3507 u64 root_owner;
3508 u64 root_gen;
3509 u64 bytenr;
3510 u64 ptr_gen;
3511 struct extent_buffer *next;
3512 struct extent_buffer *cur;
3513 struct extent_buffer *parent;
3514 struct btrfs_leaf_ref *ref;
3515 u32 blocksize;
3516 int ret;
3517 u32 refs;
3518
3519 WARN_ON(*level < 0);
3520 WARN_ON(*level >= BTRFS_MAX_LEVEL);
3521 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
3522 path->nodes[*level]->len, &refs);
3523 BUG_ON(ret);
3524 if (refs > 1)
3525 goto out;
3526
3527 /*
3528 * walk down to the last node level and free all the leaves
3529 */
3530 while (*level >= 0) {
3531 WARN_ON(*level < 0);
3532 WARN_ON(*level >= BTRFS_MAX_LEVEL);
3533 cur = path->nodes[*level];
3534
3535 if (btrfs_header_level(cur) != *level)
3536 WARN_ON(1);
3537
3538 if (path->slots[*level] >=
3539 btrfs_header_nritems(cur))
3540 break;
3541 if (*level == 0) {
3542 ret = btrfs_drop_leaf_ref(trans, root, cur);
3543 BUG_ON(ret);
3544 break;
3545 }
3546 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3547 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3548 blocksize = btrfs_level_size(root, *level - 1);
3549
3550 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
3551 BUG_ON(ret);
3552 if (refs != 1) {
3553 parent = path->nodes[*level];
3554 root_owner = btrfs_header_owner(parent);
3555 root_gen = btrfs_header_generation(parent);
3556 path->slots[*level]++;
3557
3558 ret = __btrfs_free_extent(trans, root, bytenr,
3559 blocksize, parent->start,
3560 root_owner, root_gen,
3561 *level - 1, 1);
3562 BUG_ON(ret);
3563
3564 atomic_inc(&root->fs_info->throttle_gen);
3565 wake_up(&root->fs_info->transaction_throttle);
3566 cond_resched();
3567
3568 continue;
3569 }
3570 /*
3571 * at this point, we have a single ref, and since the
3572 * only place referencing this extent is a dead root
3573 * the reference count should never go higher.
3574 * So, we don't need to check it again
3575 */
3576 if (*level == 1) {
3577 ref = btrfs_lookup_leaf_ref(root, bytenr);
3578 if (ref && ref->generation != ptr_gen) {
3579 btrfs_free_leaf_ref(root, ref);
3580 ref = NULL;
3581 }
3582 if (ref) {
3583 ret = cache_drop_leaf_ref(trans, root, ref);
3584 BUG_ON(ret);
3585 btrfs_remove_leaf_ref(root, ref);
3586 btrfs_free_leaf_ref(root, ref);
3587 *level = 0;
3588 break;
3589 }
3590 }
3591 next = btrfs_find_tree_block(root, bytenr, blocksize);
3592 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
3593 free_extent_buffer(next);
3594
3595 next = read_tree_block(root, bytenr, blocksize,
3596 ptr_gen);
3597 cond_resched();
3598#if 0
3599 /*
3600 * this is a debugging check and can go away
3601 * the ref should never go all the way down to 1
3602 * at this point
3603 */
3604 ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
3605 &refs);
3606 BUG_ON(ret);
3607 WARN_ON(refs != 1);
3608#endif
3609 }
3610 WARN_ON(*level <= 0);
3611 if (path->nodes[*level-1])
3612 free_extent_buffer(path->nodes[*level-1]);
3613 path->nodes[*level-1] = next;
3614 *level = btrfs_header_level(next);
3615 path->slots[*level] = 0;
3616 cond_resched();
3617 }
3618out:
3619 WARN_ON(*level < 0);
3620 WARN_ON(*level >= BTRFS_MAX_LEVEL);
3621
3622 if (path->nodes[*level] == root->node) {
3623 parent = path->nodes[*level];
3624 bytenr = path->nodes[*level]->start;
3625 } else {
3626 parent = path->nodes[*level + 1];
3627 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
3628 }
3629
3630 blocksize = btrfs_level_size(root, *level);
3631 root_owner = btrfs_header_owner(parent);
3632 root_gen = btrfs_header_generation(parent);
3633
3634 ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
3635 parent->start, root_owner, root_gen,
3636 *level, 1);
3637 free_extent_buffer(path->nodes[*level]);
3638 path->nodes[*level] = NULL;
3639 *level += 1;
3640 BUG_ON(ret);
3641
3642 cond_resched();
3643 return 0;
3644}
3645
3646/*
3647 * helper function for drop_subtree, this function is similar to
3648 * walk_down_tree. The main difference is that it checks reference
3649 * counts while tree blocks are locked.
3650 */
3651static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3652 struct btrfs_root *root,
3653 struct btrfs_path *path, int *level)
3654{
3655 struct extent_buffer *next;
3656 struct extent_buffer *cur;
3657 struct extent_buffer *parent;
3658 u64 bytenr;
3659 u64 ptr_gen;
3660 u32 blocksize;
3661 u32 refs;
3662 int ret;
3663
3664 cur = path->nodes[*level];
3665 ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
3666 &refs);
3667 BUG_ON(ret);
3668 if (refs > 1)
3669 goto out;
3670
3671 while (*level >= 0) {
3672 cur = path->nodes[*level];
3673 if (*level == 0) {
3674 ret = btrfs_drop_leaf_ref(trans, root, cur);
3675 BUG_ON(ret);
3676 clean_tree_block(trans, root, cur);
3677 break;
3678 }
3679 if (path->slots[*level] >= btrfs_header_nritems(cur)) {
3680 clean_tree_block(trans, root, cur);
3681 break;
3682 }
3683
3684 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3685 blocksize = btrfs_level_size(root, *level - 1);
3686 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3687
3688 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3689 btrfs_tree_lock(next);
3690
3691 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
3692 &refs);
3693 BUG_ON(ret);
3694 if (refs > 1) {
3695 parent = path->nodes[*level];
3696 ret = btrfs_free_extent(trans, root, bytenr,
3697 blocksize, parent->start,
3698 btrfs_header_owner(parent),
3699 btrfs_header_generation(parent),
3700 *level - 1, 1);
3701 BUG_ON(ret);
3702 path->slots[*level]++;
3703 btrfs_tree_unlock(next);
3704 free_extent_buffer(next);
3705 continue;
3706 }
3707
3708 *level = btrfs_header_level(next);
3709 path->nodes[*level] = next;
3710 path->slots[*level] = 0;
3711 path->locks[*level] = 1;
3712 cond_resched();
3713 }
3714out:
3715 parent = path->nodes[*level + 1];
3716 bytenr = path->nodes[*level]->start;
3717 blocksize = path->nodes[*level]->len;
3718
3719 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3720 parent->start, btrfs_header_owner(parent),
3721 btrfs_header_generation(parent), *level, 1);
3722 BUG_ON(ret);
3723
3724 if (path->locks[*level]) {
3725 btrfs_tree_unlock(path->nodes[*level]);
3726 path->locks[*level] = 0;
3727 }
3728 free_extent_buffer(path->nodes[*level]);
3729 path->nodes[*level] = NULL;
3730 *level += 1;
3731 cond_resched();
3732 return 0;
3733}
3734
3735/*
3736 * helper for dropping snapshots. This walks back up the tree in the path
3737 * to find the first node higher up where we haven't yet gone through
3738 * all the slots
3739 */
3740static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3741 struct btrfs_root *root,
3742 struct btrfs_path *path,
3743 int *level, int max_level)
3744{
3745 u64 root_owner;
3746 u64 root_gen;
3747 struct btrfs_root_item *root_item = &root->root_item;
3748 int i;
3749 int slot;
3750 int ret;
3751
3752 for (i = *level; i < max_level && path->nodes[i]; i++) {
3753 slot = path->slots[i];
3754 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3755 struct extent_buffer *node;
3756 struct btrfs_disk_key disk_key;
3757 node = path->nodes[i];
3758 path->slots[i]++;
3759 *level = i;
3760 WARN_ON(*level == 0);
3761 btrfs_node_key(node, &disk_key, path->slots[i]);
3762 memcpy(&root_item->drop_progress,
3763 &disk_key, sizeof(disk_key));
3764 root_item->drop_level = i;
3765 return 0;
3766 } else {
3767 struct extent_buffer *parent;
3768 if (path->nodes[*level] == root->node)
3769 parent = path->nodes[*level];
3770 else
3771 parent = path->nodes[*level + 1];
3772
3773 root_owner = btrfs_header_owner(parent);
3774 root_gen = btrfs_header_generation(parent);
3775
3776 clean_tree_block(trans, root, path->nodes[*level]);
3777 ret = btrfs_free_extent(trans, root,
3778 path->nodes[*level]->start,
3779 path->nodes[*level]->len,
3780 parent->start, root_owner,
3781 root_gen, *level, 1);
3782 BUG_ON(ret);
3783 if (path->locks[*level]) {
3784 btrfs_tree_unlock(path->nodes[*level]);
3785 path->locks[*level] = 0;
3786 }
3787 free_extent_buffer(path->nodes[*level]);
3788 path->nodes[*level] = NULL;
3789 *level = i + 1;
3790 }
3791 }
3792 return 1;
3793}
3794
3795/*
3796 * drop the reference count on the tree rooted at 'snap'. This traverses
3797 * the tree freeing any blocks that have a ref count of zero after being
3798 * decremented.
3799 */
3800int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3801 *root)
3802{
3803 int ret = 0;
3804 int wret;
3805 int level;
3806 struct btrfs_path *path;
3807 int i;
3808 int orig_level;
3809 struct btrfs_root_item *root_item = &root->root_item;
3810
3811 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3812 path = btrfs_alloc_path();
3813 BUG_ON(!path);
3814
3815 level = btrfs_header_level(root->node);
3816 orig_level = level;
3817 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3818 path->nodes[level] = root->node;
3819 extent_buffer_get(root->node);
3820 path->slots[level] = 0;
3821 } else {
3822 struct btrfs_key key;
3823 struct btrfs_disk_key found_key;
3824 struct extent_buffer *node;
3825
3826 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3827 level = root_item->drop_level;
3828 path->lowest_level = level;
3829 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3830 if (wret < 0) {
3831 ret = wret;
3832 goto out;
3833 }
3834 node = path->nodes[level];
3835 btrfs_node_key(node, &found_key, path->slots[level]);
3836 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3837 sizeof(found_key)));
3838 /*
3839 * unlock our path, this is safe because only this
3840 * function is allowed to delete this snapshot
3841 */
3842 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3843 if (path->nodes[i] && path->locks[i]) {
3844 path->locks[i] = 0;
3845 btrfs_tree_unlock(path->nodes[i]);
3846 }
3847 }
3848 }
3849 while (1) {
3850 wret = walk_down_tree(trans, root, path, &level);
3851 if (wret > 0)
3852 break;
3853 if (wret < 0)
3854 ret = wret;
3855
3856 wret = walk_up_tree(trans, root, path, &level,
3857 BTRFS_MAX_LEVEL);
3858 if (wret > 0)
3859 break;
3860 if (wret < 0)
3861 ret = wret;
3862 if (trans->transaction->in_commit) {
3863 ret = -EAGAIN;
3864 break;
3865 }
3866 atomic_inc(&root->fs_info->throttle_gen);
3867 wake_up(&root->fs_info->transaction_throttle);
3868 }
3869 for (i = 0; i <= orig_level; i++) {
3870 if (path->nodes[i]) {
3871 free_extent_buffer(path->nodes[i]);
3872 path->nodes[i] = NULL;
3873 }
3874 }
3875out:
3876 btrfs_free_path(path);
3877 return ret;
3878}
3879
3880int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3881 struct btrfs_root *root,
3882 struct extent_buffer *node,
3883 struct extent_buffer *parent)
3884{
3885 struct btrfs_path *path;
3886 int level;
3887 int parent_level;
3888 int ret = 0;
3889 int wret;
3890
3891 path = btrfs_alloc_path();
3892 BUG_ON(!path);
3893
3894 BUG_ON(!btrfs_tree_locked(parent));
3895 parent_level = btrfs_header_level(parent);
3896 extent_buffer_get(parent);
3897 path->nodes[parent_level] = parent;
3898 path->slots[parent_level] = btrfs_header_nritems(parent);
3899
3900 BUG_ON(!btrfs_tree_locked(node));
3901 level = btrfs_header_level(node);
3902 extent_buffer_get(node);
3903 path->nodes[level] = node;
3904 path->slots[level] = 0;
3905
3906 while (1) {
3907 wret = walk_down_subtree(trans, root, path, &level);
3908 if (wret < 0)
3909 ret = wret;
3910 if (wret != 0)
3911 break;
3912
3913 wret = walk_up_tree(trans, root, path, &level, parent_level);
3914 if (wret < 0)
3915 ret = wret;
3916 if (wret != 0)
3917 break;
3918 }
3919
3920 btrfs_free_path(path);
3921 return ret;
3922}
3923
3924static unsigned long calc_ra(unsigned long start, unsigned long last,
3925 unsigned long nr)
3926{
3927 return min(last, start + nr - 1);
3928}
3929
3930static noinline int relocate_inode_pages(struct inode *inode, u64 start,
3931 u64 len)
3932{
3933 u64 page_start;
3934 u64 page_end;
3935 unsigned long first_index;
3936 unsigned long last_index;
3937 unsigned long i;
3938 struct page *page;
3939 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3940 struct file_ra_state *ra;
3941 struct btrfs_ordered_extent *ordered;
3942 unsigned int total_read = 0;
3943 unsigned int total_dirty = 0;
3944 int ret = 0;
3945
3946 ra = kzalloc(sizeof(*ra), GFP_NOFS);
3947
3948 mutex_lock(&inode->i_mutex);
3949 first_index = start >> PAGE_CACHE_SHIFT;
3950 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3951
3952 /* make sure the dirty trick played by the caller work */
3953 ret = invalidate_inode_pages2_range(inode->i_mapping,
3954 first_index, last_index);
3955 if (ret)
3956 goto out_unlock;
3957
3958 file_ra_state_init(ra, inode->i_mapping);
3959
3960 for (i = first_index ; i <= last_index; i++) {
3961 if (total_read % ra->ra_pages == 0) {
3962 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3963 calc_ra(i, last_index, ra->ra_pages));
3964 }
3965 total_read++;
3966again:
3967 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3968 BUG_ON(1);
3969 page = grab_cache_page(inode->i_mapping, i);
3970 if (!page) {
3971 ret = -ENOMEM;
3972 goto out_unlock;
3973 }
3974 if (!PageUptodate(page)) {
3975 btrfs_readpage(NULL, page);
3976 lock_page(page);
3977 if (!PageUptodate(page)) {
3978 unlock_page(page);
3979 page_cache_release(page);
3980 ret = -EIO;
3981 goto out_unlock;
3982 }
3983 }
3984 wait_on_page_writeback(page);
3985
3986 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3987 page_end = page_start + PAGE_CACHE_SIZE - 1;
3988 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3989
3990 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3991 if (ordered) {
3992 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3993 unlock_page(page);
3994 page_cache_release(page);
3995 btrfs_start_ordered_extent(inode, ordered, 1);
3996 btrfs_put_ordered_extent(ordered);
3997 goto again;
3998 }
3999 set_page_extent_mapped(page);
4000
4001 if (i == first_index)
4002 set_extent_bits(io_tree, page_start, page_end,
4003 EXTENT_BOUNDARY, GFP_NOFS);
4004 btrfs_set_extent_delalloc(inode, page_start, page_end);
4005
4006 set_page_dirty(page);
4007 total_dirty++;
4008
4009 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4010 unlock_page(page);
4011 page_cache_release(page);
4012 }
4013
4014out_unlock:
4015 kfree(ra);
4016 mutex_unlock(&inode->i_mutex);
4017 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
4018 return ret;
4019}
4020
4021static noinline int relocate_data_extent(struct inode *reloc_inode,
4022 struct btrfs_key *extent_key,
4023 u64 offset)
4024{
4025 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
4026 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
4027 struct extent_map *em;
4028 u64 start = extent_key->objectid - offset;
4029 u64 end = start + extent_key->offset - 1;
4030
4031 em = alloc_extent_map(GFP_NOFS);
4032 BUG_ON(!em || IS_ERR(em));
4033
4034 em->start = start;
4035 em->len = extent_key->offset;
4036 em->block_len = extent_key->offset;
4037 em->block_start = extent_key->objectid;
4038 em->bdev = root->fs_info->fs_devices->latest_bdev;
4039 set_bit(EXTENT_FLAG_PINNED, &em->flags);
4040
4041 /* setup extent map to cheat btrfs_readpage */
4042 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
4043 while (1) {
4044 int ret;
4045 spin_lock(&em_tree->lock);
4046 ret = add_extent_mapping(em_tree, em);
4047 spin_unlock(&em_tree->lock);
4048 if (ret != -EEXIST) {
4049 free_extent_map(em);
4050 break;
4051 }
4052 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
4053 }
4054 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
4055
4056 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
4057}
4058
4059struct btrfs_ref_path {
4060 u64 extent_start;
4061 u64 nodes[BTRFS_MAX_LEVEL];
4062 u64 root_objectid;
4063 u64 root_generation;
4064 u64 owner_objectid;
4065 u32 num_refs;
4066 int lowest_level;
4067 int current_level;
4068 int shared_level;
4069
4070 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
4071 u64 new_nodes[BTRFS_MAX_LEVEL];
4072};
4073
4074struct disk_extent {
4075 u64 ram_bytes;
4076 u64 disk_bytenr;
4077 u64 disk_num_bytes;
4078 u64 offset;
4079 u64 num_bytes;
4080 u8 compression;
4081 u8 encryption;
4082 u16 other_encoding;
4083};
4084
4085static int is_cowonly_root(u64 root_objectid)
4086{
4087 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
4088 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
4089 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
4090 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
4091 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
4092 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
4093 return 1;
4094 return 0;
4095}
4096
4097static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
4098 struct btrfs_root *extent_root,
4099 struct btrfs_ref_path *ref_path,
4100 int first_time)
4101{
4102 struct extent_buffer *leaf;
4103 struct btrfs_path *path;
4104 struct btrfs_extent_ref *ref;
4105 struct btrfs_key key;
4106 struct btrfs_key found_key;
4107 u64 bytenr;
4108 u32 nritems;
4109 int level;
4110 int ret = 1;
4111
4112 path = btrfs_alloc_path();
4113 if (!path)
4114 return -ENOMEM;
4115
4116 if (first_time) {
4117 ref_path->lowest_level = -1;
4118 ref_path->current_level = -1;
4119 ref_path->shared_level = -1;
4120 goto walk_up;
4121 }
4122walk_down:
4123 level = ref_path->current_level - 1;
4124 while (level >= -1) {
4125 u64 parent;
4126 if (level < ref_path->lowest_level)
4127 break;
4128
4129 if (level >= 0)
4130 bytenr = ref_path->nodes[level];
4131 else
4132 bytenr = ref_path->extent_start;
4133 BUG_ON(bytenr == 0);
4134
4135 parent = ref_path->nodes[level + 1];
4136 ref_path->nodes[level + 1] = 0;
4137 ref_path->current_level = level;
4138 BUG_ON(parent == 0);
4139
4140 key.objectid = bytenr;
4141 key.offset = parent + 1;
4142 key.type = BTRFS_EXTENT_REF_KEY;
4143
4144 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
4145 if (ret < 0)
4146 goto out;
4147 BUG_ON(ret == 0);
4148
4149 leaf = path->nodes[0];
4150 nritems = btrfs_header_nritems(leaf);
4151 if (path->slots[0] >= nritems) {
4152 ret = btrfs_next_leaf(extent_root, path);
4153 if (ret < 0)
4154 goto out;
4155 if (ret > 0)
4156 goto next;
4157 leaf = path->nodes[0];
4158 }
4159
4160 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4161 if (found_key.objectid == bytenr &&
4162 found_key.type == BTRFS_EXTENT_REF_KEY) {
4163 if (level < ref_path->shared_level)
4164 ref_path->shared_level = level;
4165 goto found;
4166 }
4167next:
4168 level--;
4169 btrfs_release_path(extent_root, path);
4170 cond_resched();
4171 }
4172 /* reached lowest level */
4173 ret = 1;
4174 goto out;
4175walk_up:
4176 level = ref_path->current_level;
4177 while (level < BTRFS_MAX_LEVEL - 1) {
4178 u64 ref_objectid;
4179
4180 if (level >= 0)
4181 bytenr = ref_path->nodes[level];
4182 else
4183 bytenr = ref_path->extent_start;
4184
4185 BUG_ON(bytenr == 0);
4186
4187 key.objectid = bytenr;
4188 key.offset = 0;
4189 key.type = BTRFS_EXTENT_REF_KEY;
4190
4191 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
4192 if (ret < 0)
4193 goto out;
4194
4195 leaf = path->nodes[0];
4196 nritems = btrfs_header_nritems(leaf);
4197 if (path->slots[0] >= nritems) {
4198 ret = btrfs_next_leaf(extent_root, path);
4199 if (ret < 0)
4200 goto out;
4201 if (ret > 0) {
4202 /* the extent was freed by someone */
4203 if (ref_path->lowest_level == level)
4204 goto out;
4205 btrfs_release_path(extent_root, path);
4206 goto walk_down;
4207 }
4208 leaf = path->nodes[0];
4209 }
4210
4211 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4212 if (found_key.objectid != bytenr ||
4213 found_key.type != BTRFS_EXTENT_REF_KEY) {
4214 /* the extent was freed by someone */
4215 if (ref_path->lowest_level == level) {
4216 ret = 1;
4217 goto out;
4218 }
4219 btrfs_release_path(extent_root, path);
4220 goto walk_down;
4221 }
4222found:
4223 ref = btrfs_item_ptr(leaf, path->slots[0],
4224 struct btrfs_extent_ref);
4225 ref_objectid = btrfs_ref_objectid(leaf, ref);
4226 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
4227 if (first_time) {
4228 level = (int)ref_objectid;
4229 BUG_ON(level >= BTRFS_MAX_LEVEL);
4230 ref_path->lowest_level = level;
4231 ref_path->current_level = level;
4232 ref_path->nodes[level] = bytenr;
4233 } else {
4234 WARN_ON(ref_objectid != level);
4235 }
4236 } else {
4237 WARN_ON(level != -1);
4238 }
4239 first_time = 0;
4240
4241 if (ref_path->lowest_level == level) {
4242 ref_path->owner_objectid = ref_objectid;
4243 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
4244 }
4245
4246 /*
4247 * the block is tree root or the block isn't in reference
4248 * counted tree.
4249 */
4250 if (found_key.objectid == found_key.offset ||
4251 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
4252 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
4253 ref_path->root_generation =
4254 btrfs_ref_generation(leaf, ref);
4255 if (level < 0) {
4256 /* special reference from the tree log */
4257 ref_path->nodes[0] = found_key.offset;
4258 ref_path->current_level = 0;
4259 }
4260 ret = 0;
4261 goto out;
4262 }
4263
4264 level++;
4265 BUG_ON(ref_path->nodes[level] != 0);
4266 ref_path->nodes[level] = found_key.offset;
4267 ref_path->current_level = level;
4268
4269 /*
4270 * the reference was created in the running transaction,
4271 * no need to continue walking up.
4272 */
4273 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
4274 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
4275 ref_path->root_generation =
4276 btrfs_ref_generation(leaf, ref);
4277 ret = 0;
4278 goto out;
4279 }
4280
4281 btrfs_release_path(extent_root, path);
4282 cond_resched();
4283 }
4284 /* reached max tree level, but no tree root found. */
4285 BUG();
4286out:
4287 btrfs_free_path(path);
4288 return ret;
4289}
4290
4291static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
4292 struct btrfs_root *extent_root,
4293 struct btrfs_ref_path *ref_path,
4294 u64 extent_start)
4295{
4296 memset(ref_path, 0, sizeof(*ref_path));
4297 ref_path->extent_start = extent_start;
4298
4299 return __next_ref_path(trans, extent_root, ref_path, 1);
4300}
4301
4302static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
4303 struct btrfs_root *extent_root,
4304 struct btrfs_ref_path *ref_path)
4305{
4306 return __next_ref_path(trans, extent_root, ref_path, 0);
4307}
4308
4309static noinline int get_new_locations(struct inode *reloc_inode,
4310 struct btrfs_key *extent_key,
4311 u64 offset, int no_fragment,
4312 struct disk_extent **extents,
4313 int *nr_extents)
4314{
4315 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
4316 struct btrfs_path *path;
4317 struct btrfs_file_extent_item *fi;
4318 struct extent_buffer *leaf;
4319 struct disk_extent *exts = *extents;
4320 struct btrfs_key found_key;
4321 u64 cur_pos;
4322 u64 last_byte;
4323 u32 nritems;
4324 int nr = 0;
4325 int max = *nr_extents;
4326 int ret;
4327
4328 WARN_ON(!no_fragment && *extents);
4329 if (!exts) {
4330 max = 1;
4331 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
4332 if (!exts)
4333 return -ENOMEM;
4334 }
4335
4336 path = btrfs_alloc_path();
4337 BUG_ON(!path);
4338
4339 cur_pos = extent_key->objectid - offset;
4340 last_byte = extent_key->objectid + extent_key->offset;
4341 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
4342 cur_pos, 0);
4343 if (ret < 0)
4344 goto out;
4345 if (ret > 0) {
4346 ret = -ENOENT;
4347 goto out;
4348 }
4349
4350 while (1) {
4351 leaf = path->nodes[0];
4352 nritems = btrfs_header_nritems(leaf);
4353 if (path->slots[0] >= nritems) {
4354 ret = btrfs_next_leaf(root, path);
4355 if (ret < 0)
4356 goto out;
4357 if (ret > 0)
4358 break;
4359 leaf = path->nodes[0];
4360 }
4361
4362 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4363 if (found_key.offset != cur_pos ||
4364 found_key.type != BTRFS_EXTENT_DATA_KEY ||
4365 found_key.objectid != reloc_inode->i_ino)
4366 break;
4367
4368 fi = btrfs_item_ptr(leaf, path->slots[0],
4369 struct btrfs_file_extent_item);
4370 if (btrfs_file_extent_type(leaf, fi) !=
4371 BTRFS_FILE_EXTENT_REG ||
4372 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4373 break;
4374
4375 if (nr == max) {
4376 struct disk_extent *old = exts;
4377 max *= 2;
4378 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
4379 memcpy(exts, old, sizeof(*exts) * nr);
4380 if (old != *extents)
4381 kfree(old);
4382 }
4383
4384 exts[nr].disk_bytenr =
4385 btrfs_file_extent_disk_bytenr(leaf, fi);
4386 exts[nr].disk_num_bytes =
4387 btrfs_file_extent_disk_num_bytes(leaf, fi);
4388 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
4389 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4390 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
4391 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
4392 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
4393 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
4394 fi);
4395 BUG_ON(exts[nr].offset > 0);
4396 BUG_ON(exts[nr].compression || exts[nr].encryption);
4397 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
4398
4399 cur_pos += exts[nr].num_bytes;
4400 nr++;
4401
4402 if (cur_pos + offset >= last_byte)
4403 break;
4404
4405 if (no_fragment) {
4406 ret = 1;
4407 goto out;
4408 }
4409 path->slots[0]++;
4410 }
4411
4412 BUG_ON(cur_pos + offset > last_byte);
4413 if (cur_pos + offset < last_byte) {
4414 ret = -ENOENT;
4415 goto out;
4416 }
4417 ret = 0;
4418out:
4419 btrfs_free_path(path);
4420 if (ret) {
4421 if (exts != *extents)
4422 kfree(exts);
4423 } else {
4424 *extents = exts;
4425 *nr_extents = nr;
4426 }
4427 return ret;
4428}
4429
4430static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
4431 struct btrfs_root *root,
4432 struct btrfs_path *path,
4433 struct btrfs_key *extent_key,
4434 struct btrfs_key *leaf_key,
4435 struct btrfs_ref_path *ref_path,
4436 struct disk_extent *new_extents,
4437 int nr_extents)
4438{
4439 struct extent_buffer *leaf;
4440 struct btrfs_file_extent_item *fi;
4441 struct inode *inode = NULL;
4442 struct btrfs_key key;
4443 u64 lock_start = 0;
4444 u64 lock_end = 0;
4445 u64 num_bytes;
4446 u64 ext_offset;
4447 u64 first_pos;
4448 u32 nritems;
4449 int nr_scaned = 0;
4450 int extent_locked = 0;
4451 int extent_type;
4452 int ret;
4453
4454 memcpy(&key, leaf_key, sizeof(key));
4455 first_pos = INT_LIMIT(loff_t) - extent_key->offset;
4456 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4457 if (key.objectid < ref_path->owner_objectid ||
4458 (key.objectid == ref_path->owner_objectid &&
4459 key.type < BTRFS_EXTENT_DATA_KEY)) {
4460 key.objectid = ref_path->owner_objectid;
4461 key.type = BTRFS_EXTENT_DATA_KEY;
4462 key.offset = 0;
4463 }
4464 }
4465
4466 while (1) {
4467 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4468 if (ret < 0)
4469 goto out;
4470
4471 leaf = path->nodes[0];
4472 nritems = btrfs_header_nritems(leaf);
4473next:
4474 if (extent_locked && ret > 0) {
4475 /*
4476 * the file extent item was modified by someone
4477 * before the extent got locked.
4478 */
4479 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4480 lock_end, GFP_NOFS);
4481 extent_locked = 0;
4482 }
4483
4484 if (path->slots[0] >= nritems) {
4485 if (++nr_scaned > 2)
4486 break;
4487
4488 BUG_ON(extent_locked);
4489 ret = btrfs_next_leaf(root, path);
4490 if (ret < 0)
4491 goto out;
4492 if (ret > 0)
4493 break;
4494 leaf = path->nodes[0];
4495 nritems = btrfs_header_nritems(leaf);
4496 }
4497
4498 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4499
4500 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4501 if ((key.objectid > ref_path->owner_objectid) ||
4502 (key.objectid == ref_path->owner_objectid &&
4503 key.type > BTRFS_EXTENT_DATA_KEY) ||
4504 (key.offset >= first_pos + extent_key->offset))
4505 break;
4506 }
4507
4508 if (inode && key.objectid != inode->i_ino) {
4509 BUG_ON(extent_locked);
4510 btrfs_release_path(root, path);
4511 mutex_unlock(&inode->i_mutex);
4512 iput(inode);
4513 inode = NULL;
4514 continue;
4515 }
4516
4517 if (key.type != BTRFS_EXTENT_DATA_KEY) {
4518 path->slots[0]++;
4519 ret = 1;
4520 goto next;
4521 }
4522 fi = btrfs_item_ptr(leaf, path->slots[0],
4523 struct btrfs_file_extent_item);
4524 extent_type = btrfs_file_extent_type(leaf, fi);
4525 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
4526 extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
4527 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
4528 extent_key->objectid)) {
4529 path->slots[0]++;
4530 ret = 1;
4531 goto next;
4532 }
4533
4534 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4535 ext_offset = btrfs_file_extent_offset(leaf, fi);
4536
4537 if (first_pos > key.offset - ext_offset)
4538 first_pos = key.offset - ext_offset;
4539
4540 if (!extent_locked) {
4541 lock_start = key.offset;
4542 lock_end = lock_start + num_bytes - 1;
4543 } else {
4544 if (lock_start > key.offset ||
4545 lock_end + 1 < key.offset + num_bytes) {
4546 unlock_extent(&BTRFS_I(inode)->io_tree,
4547 lock_start, lock_end, GFP_NOFS);
4548 extent_locked = 0;
4549 }
4550 }
4551
4552 if (!inode) {
4553 btrfs_release_path(root, path);
4554
4555 inode = btrfs_iget_locked(root->fs_info->sb,
4556 key.objectid, root);
4557 if (inode->i_state & I_NEW) {
4558 BTRFS_I(inode)->root = root;
4559 BTRFS_I(inode)->location.objectid =
4560 key.objectid;
4561 BTRFS_I(inode)->location.type =
4562 BTRFS_INODE_ITEM_KEY;
4563 BTRFS_I(inode)->location.offset = 0;
4564 btrfs_read_locked_inode(inode);
4565 unlock_new_inode(inode);
4566 }
4567 /*
4568 * some code call btrfs_commit_transaction while
4569 * holding the i_mutex, so we can't use mutex_lock
4570 * here.
4571 */
4572 if (is_bad_inode(inode) ||
4573 !mutex_trylock(&inode->i_mutex)) {
4574 iput(inode);
4575 inode = NULL;
4576 key.offset = (u64)-1;
4577 goto skip;
4578 }
4579 }
4580
4581 if (!extent_locked) {
4582 struct btrfs_ordered_extent *ordered;
4583
4584 btrfs_release_path(root, path);
4585
4586 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4587 lock_end, GFP_NOFS);
4588 ordered = btrfs_lookup_first_ordered_extent(inode,
4589 lock_end);
4590 if (ordered &&
4591 ordered->file_offset <= lock_end &&
4592 ordered->file_offset + ordered->len > lock_start) {
4593 unlock_extent(&BTRFS_I(inode)->io_tree,
4594 lock_start, lock_end, GFP_NOFS);
4595 btrfs_start_ordered_extent(inode, ordered, 1);
4596 btrfs_put_ordered_extent(ordered);
4597 key.offset += num_bytes;
4598 goto skip;
4599 }
4600 if (ordered)
4601 btrfs_put_ordered_extent(ordered);
4602
4603 extent_locked = 1;
4604 continue;
4605 }
4606
4607 if (nr_extents == 1) {
4608 /* update extent pointer in place */
4609 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4610 new_extents[0].disk_bytenr);
4611 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4612 new_extents[0].disk_num_bytes);
4613 btrfs_mark_buffer_dirty(leaf);
4614
4615 btrfs_drop_extent_cache(inode, key.offset,
4616 key.offset + num_bytes - 1, 0);
4617
4618 ret = btrfs_inc_extent_ref(trans, root,
4619 new_extents[0].disk_bytenr,
4620 new_extents[0].disk_num_bytes,
4621 leaf->start,
4622 root->root_key.objectid,
4623 trans->transid,
4624 key.objectid);
4625 BUG_ON(ret);
4626
4627 ret = btrfs_free_extent(trans, root,
4628 extent_key->objectid,
4629 extent_key->offset,
4630 leaf->start,
4631 btrfs_header_owner(leaf),
4632 btrfs_header_generation(leaf),
4633 key.objectid, 0);
4634 BUG_ON(ret);
4635
4636 btrfs_release_path(root, path);
4637 key.offset += num_bytes;
4638 } else {
4639 BUG_ON(1);
4640#if 0
4641 u64 alloc_hint;
4642 u64 extent_len;
4643 int i;
4644 /*
4645 * drop old extent pointer at first, then insert the
4646 * new pointers one bye one
4647 */
4648 btrfs_release_path(root, path);
4649 ret = btrfs_drop_extents(trans, root, inode, key.offset,
4650 key.offset + num_bytes,
4651 key.offset, &alloc_hint);
4652 BUG_ON(ret);
4653
4654 for (i = 0; i < nr_extents; i++) {
4655 if (ext_offset >= new_extents[i].num_bytes) {
4656 ext_offset -= new_extents[i].num_bytes;
4657 continue;
4658 }
4659 extent_len = min(new_extents[i].num_bytes -
4660 ext_offset, num_bytes);
4661
4662 ret = btrfs_insert_empty_item(trans, root,
4663 path, &key,
4664 sizeof(*fi));
4665 BUG_ON(ret);
4666
4667 leaf = path->nodes[0];
4668 fi = btrfs_item_ptr(leaf, path->slots[0],
4669 struct btrfs_file_extent_item);
4670 btrfs_set_file_extent_generation(leaf, fi,
4671 trans->transid);
4672 btrfs_set_file_extent_type(leaf, fi,
4673 BTRFS_FILE_EXTENT_REG);
4674 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4675 new_extents[i].disk_bytenr);
4676 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4677 new_extents[i].disk_num_bytes);
4678 btrfs_set_file_extent_ram_bytes(leaf, fi,
4679 new_extents[i].ram_bytes);
4680
4681 btrfs_set_file_extent_compression(leaf, fi,
4682 new_extents[i].compression);
4683 btrfs_set_file_extent_encryption(leaf, fi,
4684 new_extents[i].encryption);
4685 btrfs_set_file_extent_other_encoding(leaf, fi,
4686 new_extents[i].other_encoding);
4687
4688 btrfs_set_file_extent_num_bytes(leaf, fi,
4689 extent_len);
4690 ext_offset += new_extents[i].offset;
4691 btrfs_set_file_extent_offset(leaf, fi,
4692 ext_offset);
4693 btrfs_mark_buffer_dirty(leaf);
4694
4695 btrfs_drop_extent_cache(inode, key.offset,
4696 key.offset + extent_len - 1, 0);
4697
4698 ret = btrfs_inc_extent_ref(trans, root,
4699 new_extents[i].disk_bytenr,
4700 new_extents[i].disk_num_bytes,
4701 leaf->start,
4702 root->root_key.objectid,
4703 trans->transid, key.objectid);
4704 BUG_ON(ret);
4705 btrfs_release_path(root, path);
4706
4707 inode_add_bytes(inode, extent_len);
4708
4709 ext_offset = 0;
4710 num_bytes -= extent_len;
4711 key.offset += extent_len;
4712
4713 if (num_bytes == 0)
4714 break;
4715 }
4716 BUG_ON(i >= nr_extents);
4717#endif
4718 }
4719
4720 if (extent_locked) {
4721 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4722 lock_end, GFP_NOFS);
4723 extent_locked = 0;
4724 }
4725skip:
4726 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
4727 key.offset >= first_pos + extent_key->offset)
4728 break;
4729
4730 cond_resched();
4731 }
4732 ret = 0;
4733out:
4734 btrfs_release_path(root, path);
4735 if (inode) {
4736 mutex_unlock(&inode->i_mutex);
4737 if (extent_locked) {
4738 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4739 lock_end, GFP_NOFS);
4740 }
4741 iput(inode);
4742 }
4743 return ret;
4744}
4745
4746int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4747 struct btrfs_root *root,
4748 struct extent_buffer *buf, u64 orig_start)
4749{
4750 int level;
4751 int ret;
4752
4753 BUG_ON(btrfs_header_generation(buf) != trans->transid);
4754 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
4755
4756 level = btrfs_header_level(buf);
4757 if (level == 0) {
4758 struct btrfs_leaf_ref *ref;
4759 struct btrfs_leaf_ref *orig_ref;
4760
4761 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
4762 if (!orig_ref)
4763 return -ENOENT;
4764
4765 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
4766 if (!ref) {
4767 btrfs_free_leaf_ref(root, orig_ref);
4768 return -ENOMEM;
4769 }
4770
4771 ref->nritems = orig_ref->nritems;
4772 memcpy(ref->extents, orig_ref->extents,
4773 sizeof(ref->extents[0]) * ref->nritems);
4774
4775 btrfs_free_leaf_ref(root, orig_ref);
4776
4777 ref->root_gen = trans->transid;
4778 ref->bytenr = buf->start;
4779 ref->owner = btrfs_header_owner(buf);
4780 ref->generation = btrfs_header_generation(buf);
4781 ret = btrfs_add_leaf_ref(root, ref, 0);
4782 WARN_ON(ret);
4783 btrfs_free_leaf_ref(root, ref);
4784 }
4785 return 0;
4786}
4787
4788static noinline int invalidate_extent_cache(struct btrfs_root *root,
4789 struct extent_buffer *leaf,
4790 struct btrfs_block_group_cache *group,
4791 struct btrfs_root *target_root)
4792{
4793 struct btrfs_key key;
4794 struct inode *inode = NULL;
4795 struct btrfs_file_extent_item *fi;
4796 u64 num_bytes;
4797 u64 skip_objectid = 0;
4798 u32 nritems;
4799 u32 i;
4800
4801 nritems = btrfs_header_nritems(leaf);
4802 for (i = 0; i < nritems; i++) {
4803 btrfs_item_key_to_cpu(leaf, &key, i);
4804 if (key.objectid == skip_objectid ||
4805 key.type != BTRFS_EXTENT_DATA_KEY)
4806 continue;
4807 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4808 if (btrfs_file_extent_type(leaf, fi) ==
4809 BTRFS_FILE_EXTENT_INLINE)
4810 continue;
4811 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4812 continue;
4813 if (!inode || inode->i_ino != key.objectid) {
4814 iput(inode);
4815 inode = btrfs_ilookup(target_root->fs_info->sb,
4816 key.objectid, target_root, 1);
4817 }
4818 if (!inode) {
4819 skip_objectid = key.objectid;
4820 continue;
4821 }
4822 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4823
4824 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4825 key.offset + num_bytes - 1, GFP_NOFS);
4826 btrfs_drop_extent_cache(inode, key.offset,
4827 key.offset + num_bytes - 1, 1);
4828 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4829 key.offset + num_bytes - 1, GFP_NOFS);
4830 cond_resched();
4831 }
4832 iput(inode);
4833 return 0;
4834}
4835
4836static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4837 struct btrfs_root *root,
4838 struct extent_buffer *leaf,
4839 struct btrfs_block_group_cache *group,
4840 struct inode *reloc_inode)
4841{
4842 struct btrfs_key key;
4843 struct btrfs_key extent_key;
4844 struct btrfs_file_extent_item *fi;
4845 struct btrfs_leaf_ref *ref;
4846 struct disk_extent *new_extent;
4847 u64 bytenr;
4848 u64 num_bytes;
4849 u32 nritems;
4850 u32 i;
4851 int ext_index;
4852 int nr_extent;
4853 int ret;
4854
4855 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
4856 BUG_ON(!new_extent);
4857
4858 ref = btrfs_lookup_leaf_ref(root, leaf->start);
4859 BUG_ON(!ref);
4860
4861 ext_index = -1;
4862 nritems = btrfs_header_nritems(leaf);
4863 for (i = 0; i < nritems; i++) {
4864 btrfs_item_key_to_cpu(leaf, &key, i);
4865 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4866 continue;
4867 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4868 if (btrfs_file_extent_type(leaf, fi) ==
4869 BTRFS_FILE_EXTENT_INLINE)
4870 continue;
4871 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4872 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4873 if (bytenr == 0)
4874 continue;
4875
4876 ext_index++;
4877 if (bytenr >= group->key.objectid + group->key.offset ||
4878 bytenr + num_bytes <= group->key.objectid)
4879 continue;
4880
4881 extent_key.objectid = bytenr;
4882 extent_key.offset = num_bytes;
4883 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4884 nr_extent = 1;
4885 ret = get_new_locations(reloc_inode, &extent_key,
4886 group->key.objectid, 1,
4887 &new_extent, &nr_extent);
4888 if (ret > 0)
4889 continue;
4890 BUG_ON(ret < 0);
4891
4892 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
4893 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
4894 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
4895 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
4896
4897 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4898 new_extent->disk_bytenr);
4899 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4900 new_extent->disk_num_bytes);
4901 btrfs_mark_buffer_dirty(leaf);
4902
4903 ret = btrfs_inc_extent_ref(trans, root,
4904 new_extent->disk_bytenr,
4905 new_extent->disk_num_bytes,
4906 leaf->start,
4907 root->root_key.objectid,
4908 trans->transid, key.objectid);
4909 BUG_ON(ret);
4910 ret = btrfs_free_extent(trans, root,
4911 bytenr, num_bytes, leaf->start,
4912 btrfs_header_owner(leaf),
4913 btrfs_header_generation(leaf),
4914 key.objectid, 0);
4915 BUG_ON(ret);
4916 cond_resched();
4917 }
4918 kfree(new_extent);
4919 BUG_ON(ext_index + 1 != ref->nritems);
4920 btrfs_free_leaf_ref(root, ref);
4921 return 0;
4922}
4923
4924int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
4925 struct btrfs_root *root)
4926{
4927 struct btrfs_root *reloc_root;
4928 int ret;
4929
4930 if (root->reloc_root) {
4931 reloc_root = root->reloc_root;
4932 root->reloc_root = NULL;
4933 list_add(&reloc_root->dead_list,
4934 &root->fs_info->dead_reloc_roots);
4935
4936 btrfs_set_root_bytenr(&reloc_root->root_item,
4937 reloc_root->node->start);
4938 btrfs_set_root_level(&root->root_item,
4939 btrfs_header_level(reloc_root->node));
4940 memset(&reloc_root->root_item.drop_progress, 0,
4941 sizeof(struct btrfs_disk_key));
4942 reloc_root->root_item.drop_level = 0;
4943
4944 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4945 &reloc_root->root_key,
4946 &reloc_root->root_item);
4947 BUG_ON(ret);
4948 }
4949 return 0;
4950}
4951
4952int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
4953{
4954 struct btrfs_trans_handle *trans;
4955 struct btrfs_root *reloc_root;
4956 struct btrfs_root *prev_root = NULL;
4957 struct list_head dead_roots;
4958 int ret;
4959 unsigned long nr;
4960
4961 INIT_LIST_HEAD(&dead_roots);
4962 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
4963
4964 while (!list_empty(&dead_roots)) {
4965 reloc_root = list_entry(dead_roots.prev,
4966 struct btrfs_root, dead_list);
4967 list_del_init(&reloc_root->dead_list);
4968
4969 BUG_ON(reloc_root->commit_root != NULL);
4970 while (1) {
4971 trans = btrfs_join_transaction(root, 1);
4972 BUG_ON(!trans);
4973
4974 mutex_lock(&root->fs_info->drop_mutex);
4975 ret = btrfs_drop_snapshot(trans, reloc_root);
4976 if (ret != -EAGAIN)
4977 break;
4978 mutex_unlock(&root->fs_info->drop_mutex);
4979
4980 nr = trans->blocks_used;
4981 ret = btrfs_end_transaction(trans, root);
4982 BUG_ON(ret);
4983 btrfs_btree_balance_dirty(root, nr);
4984 }
4985
4986 free_extent_buffer(reloc_root->node);
4987
4988 ret = btrfs_del_root(trans, root->fs_info->tree_root,
4989 &reloc_root->root_key);
4990 BUG_ON(ret);
4991 mutex_unlock(&root->fs_info->drop_mutex);
4992
4993 nr = trans->blocks_used;
4994 ret = btrfs_end_transaction(trans, root);
4995 BUG_ON(ret);
4996 btrfs_btree_balance_dirty(root, nr);
4997
4998 kfree(prev_root);
4999 prev_root = reloc_root;
5000 }
5001 if (prev_root) {
5002 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
5003 kfree(prev_root);
5004 }
5005 return 0;
5006}
5007
5008int btrfs_add_dead_reloc_root(struct btrfs_root *root)
5009{
5010 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
5011 return 0;
5012}
5013
5014int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
5015{
5016 struct btrfs_root *reloc_root;
5017 struct btrfs_trans_handle *trans;
5018 struct btrfs_key location;
5019 int found;
5020 int ret;
5021
5022 mutex_lock(&root->fs_info->tree_reloc_mutex);
5023 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
5024 BUG_ON(ret);
5025 found = !list_empty(&root->fs_info->dead_reloc_roots);
5026 mutex_unlock(&root->fs_info->tree_reloc_mutex);
5027
5028 if (found) {
5029 trans = btrfs_start_transaction(root, 1);
5030 BUG_ON(!trans);
5031 ret = btrfs_commit_transaction(trans, root);
5032 BUG_ON(ret);
5033 }
5034
5035 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5036 location.offset = (u64)-1;
5037 location.type = BTRFS_ROOT_ITEM_KEY;
5038
5039 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
5040 BUG_ON(!reloc_root);
5041 btrfs_orphan_cleanup(reloc_root);
5042 return 0;
5043}
5044
5045static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
5046 struct btrfs_root *root)
5047{
5048 struct btrfs_root *reloc_root;
5049 struct extent_buffer *eb;
5050 struct btrfs_root_item *root_item;
5051 struct btrfs_key root_key;
5052 int ret;
5053
5054 BUG_ON(!root->ref_cows);
5055 if (root->reloc_root)
5056 return 0;
5057
5058 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
5059 BUG_ON(!root_item);
5060
5061 ret = btrfs_copy_root(trans, root, root->commit_root,
5062 &eb, BTRFS_TREE_RELOC_OBJECTID);
5063 BUG_ON(ret);
5064
5065 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
5066 root_key.offset = root->root_key.objectid;
5067 root_key.type = BTRFS_ROOT_ITEM_KEY;
5068
5069 memcpy(root_item, &root->root_item, sizeof(root_item));
5070 btrfs_set_root_refs(root_item, 0);
5071 btrfs_set_root_bytenr(root_item, eb->start);
5072 btrfs_set_root_level(root_item, btrfs_header_level(eb));
5073 btrfs_set_root_generation(root_item, trans->transid);
5074
5075 btrfs_tree_unlock(eb);
5076 free_extent_buffer(eb);
5077
5078 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
5079 &root_key, root_item);
5080 BUG_ON(ret);
5081 kfree(root_item);
5082
5083 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
5084 &root_key);
5085 BUG_ON(!reloc_root);
5086 reloc_root->last_trans = trans->transid;
5087 reloc_root->commit_root = NULL;
5088 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
5089
5090 root->reloc_root = reloc_root;
5091 return 0;
5092}
5093
5094/*
5095 * Core function of space balance.
5096 *
5097 * The idea is using reloc trees to relocate tree blocks in reference
5098 * counted roots. There is one reloc tree for each subvol, and all
5099 * reloc trees share same root key objectid. Reloc trees are snapshots
5100 * of the latest committed roots of subvols (root->commit_root).
5101 *
5102 * To relocate a tree block referenced by a subvol, there are two steps.
5103 * COW the block through subvol's reloc tree, then update block pointer
5104 * in the subvol to point to the new block. Since all reloc trees share
5105 * same root key objectid, doing special handing for tree blocks owned
5106 * by them is easy. Once a tree block has been COWed in one reloc tree,
5107 * we can use the resulting new block directly when the same block is
5108 * required to COW again through other reloc trees. By this way, relocated
5109 * tree blocks are shared between reloc trees, so they are also shared
5110 * between subvols.
5111 */
5112static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
5113 struct btrfs_root *root,
5114 struct btrfs_path *path,
5115 struct btrfs_key *first_key,
5116 struct btrfs_ref_path *ref_path,
5117 struct btrfs_block_group_cache *group,
5118 struct inode *reloc_inode)
5119{
5120 struct btrfs_root *reloc_root;
5121 struct extent_buffer *eb = NULL;
5122 struct btrfs_key *keys;
5123 u64 *nodes;
5124 int level;
5125 int shared_level;
5126 int lowest_level = 0;
5127 int ret;
5128
5129 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
5130 lowest_level = ref_path->owner_objectid;
5131
5132 if (!root->ref_cows) {
5133 path->lowest_level = lowest_level;
5134 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
5135 BUG_ON(ret < 0);
5136 path->lowest_level = 0;
5137 btrfs_release_path(root, path);
5138 return 0;
5139 }
5140
5141 mutex_lock(&root->fs_info->tree_reloc_mutex);
5142 ret = init_reloc_tree(trans, root);
5143 BUG_ON(ret);
5144 reloc_root = root->reloc_root;
5145
5146 shared_level = ref_path->shared_level;
5147 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
5148
5149 keys = ref_path->node_keys;
5150 nodes = ref_path->new_nodes;
5151 memset(&keys[shared_level + 1], 0,
5152 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
5153 memset(&nodes[shared_level + 1], 0,
5154 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
5155
5156 if (nodes[lowest_level] == 0) {
5157 path->lowest_level = lowest_level;
5158 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
5159 0, 1);
5160 BUG_ON(ret);
5161 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
5162 eb = path->nodes[level];
5163 if (!eb || eb == reloc_root->node)
5164 break;
5165 nodes[level] = eb->start;
5166 if (level == 0)
5167 btrfs_item_key_to_cpu(eb, &keys[level], 0);
5168 else
5169 btrfs_node_key_to_cpu(eb, &keys[level], 0);
5170 }
5171 if (nodes[0] &&
5172 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5173 eb = path->nodes[0];
5174 ret = replace_extents_in_leaf(trans, reloc_root, eb,
5175 group, reloc_inode);
5176 BUG_ON(ret);
5177 }
5178 btrfs_release_path(reloc_root, path);
5179 } else {
5180 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
5181 lowest_level);
5182 BUG_ON(ret);
5183 }
5184
5185 /*
5186 * replace tree blocks in the fs tree with tree blocks in
5187 * the reloc tree.
5188 */
5189 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
5190 BUG_ON(ret < 0);
5191
5192 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5193 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
5194 0, 0);
5195 BUG_ON(ret);
5196 extent_buffer_get(path->nodes[0]);
5197 eb = path->nodes[0];
5198 btrfs_release_path(reloc_root, path);
5199 ret = invalidate_extent_cache(reloc_root, eb, group, root);
5200 BUG_ON(ret);
5201 free_extent_buffer(eb);
5202 }
5203
5204 mutex_unlock(&root->fs_info->tree_reloc_mutex);
5205 path->lowest_level = 0;
5206 return 0;
5207}
5208
5209static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
5210 struct btrfs_root *root,
5211 struct btrfs_path *path,
5212 struct btrfs_key *first_key,
5213 struct btrfs_ref_path *ref_path)
5214{
5215 int ret;
5216
5217 ret = relocate_one_path(trans, root, path, first_key,
5218 ref_path, NULL, NULL);
5219 BUG_ON(ret);
5220
5221 if (root == root->fs_info->extent_root)
5222 btrfs_extent_post_op(trans, root);
5223
5224 return 0;
5225}
5226
5227static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
5228 struct btrfs_root *extent_root,
5229 struct btrfs_path *path,
5230 struct btrfs_key *extent_key)
5231{
5232 int ret;
5233
5234 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
5235 if (ret)
5236 goto out;
5237 ret = btrfs_del_item(trans, extent_root, path);
5238out:
5239 btrfs_release_path(extent_root, path);
5240 return ret;
5241}
5242
5243static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
5244 struct btrfs_ref_path *ref_path)
5245{
5246 struct btrfs_key root_key;
5247
5248 root_key.objectid = ref_path->root_objectid;
5249 root_key.type = BTRFS_ROOT_ITEM_KEY;
5250 if (is_cowonly_root(ref_path->root_objectid))
5251 root_key.offset = 0;
5252 else
5253 root_key.offset = (u64)-1;
5254
5255 return btrfs_read_fs_root_no_name(fs_info, &root_key);
5256}
5257
5258static noinline int relocate_one_extent(struct btrfs_root *extent_root,
5259 struct btrfs_path *path,
5260 struct btrfs_key *extent_key,
5261 struct btrfs_block_group_cache *group,
5262 struct inode *reloc_inode, int pass)
5263{
5264 struct btrfs_trans_handle *trans;
5265 struct btrfs_root *found_root;
5266 struct btrfs_ref_path *ref_path = NULL;
5267 struct disk_extent *new_extents = NULL;
5268 int nr_extents = 0;
5269 int loops;
5270 int ret;
5271 int level;
5272 struct btrfs_key first_key;
5273 u64 prev_block = 0;
5274
5275
5276 trans = btrfs_start_transaction(extent_root, 1);
5277 BUG_ON(!trans);
5278
5279 if (extent_key->objectid == 0) {
5280 ret = del_extent_zero(trans, extent_root, path, extent_key);
5281 goto out;
5282 }
5283
5284 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
5285 if (!ref_path) {
5286 ret = -ENOMEM;
5287 goto out;
5288 }
5289
5290 for (loops = 0; ; loops++) {
5291 if (loops == 0) {
5292 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
5293 extent_key->objectid);
5294 } else {
5295 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
5296 }
5297 if (ret < 0)
5298 goto out;
5299 if (ret > 0)
5300 break;
5301
5302 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
5303 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
5304 continue;
5305
5306 found_root = read_ref_root(extent_root->fs_info, ref_path);
5307 BUG_ON(!found_root);
5308 /*
5309 * for reference counted tree, only process reference paths
5310 * rooted at the latest committed root.
5311 */
5312 if (found_root->ref_cows &&
5313 ref_path->root_generation != found_root->root_key.offset)
5314 continue;
5315
5316 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5317 if (pass == 0) {
5318 /*
5319 * copy data extents to new locations
5320 */
5321 u64 group_start = group->key.objectid;
5322 ret = relocate_data_extent(reloc_inode,
5323 extent_key,
5324 group_start);
5325 if (ret < 0)
5326 goto out;
5327 break;
5328 }
5329 level = 0;
5330 } else {
5331 level = ref_path->owner_objectid;
5332 }
5333
5334 if (prev_block != ref_path->nodes[level]) {
5335 struct extent_buffer *eb;
5336 u64 block_start = ref_path->nodes[level];
5337 u64 block_size = btrfs_level_size(found_root, level);
5338
5339 eb = read_tree_block(found_root, block_start,
5340 block_size, 0);
5341 btrfs_tree_lock(eb);
5342 BUG_ON(level != btrfs_header_level(eb));
5343
5344 if (level == 0)
5345 btrfs_item_key_to_cpu(eb, &first_key, 0);
5346 else
5347 btrfs_node_key_to_cpu(eb, &first_key, 0);
5348
5349 btrfs_tree_unlock(eb);
5350 free_extent_buffer(eb);
5351 prev_block = block_start;
5352 }
5353
5354 btrfs_record_root_in_trans(found_root);
5355 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5356 /*
5357 * try to update data extent references while
5358 * keeping metadata shared between snapshots.
5359 */
5360 if (pass == 1) {
5361 ret = relocate_one_path(trans, found_root,
5362 path, &first_key, ref_path,
5363 group, reloc_inode);
5364 if (ret < 0)
5365 goto out;
5366 continue;
5367 }
5368 /*
5369 * use fallback method to process the remaining
5370 * references.
5371 */
5372 if (!new_extents) {
5373 u64 group_start = group->key.objectid;
5374 new_extents = kmalloc(sizeof(*new_extents),
5375 GFP_NOFS);
5376 nr_extents = 1;
5377 ret = get_new_locations(reloc_inode,
5378 extent_key,
5379 group_start, 1,
5380 &new_extents,
5381 &nr_extents);
5382 if (ret)
5383 goto out;
5384 }
5385 ret = replace_one_extent(trans, found_root,
5386 path, extent_key,
5387 &first_key, ref_path,
5388 new_extents, nr_extents);
5389 } else {
5390 ret = relocate_tree_block(trans, found_root, path,
5391 &first_key, ref_path);
5392 }
5393 if (ret < 0)
5394 goto out;
5395 }
5396 ret = 0;
5397out:
5398 btrfs_end_transaction(trans, extent_root);
5399 kfree(new_extents);
5400 kfree(ref_path);
5401 return ret;
5402}
5403
5404static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
5405{
5406 u64 num_devices;
5407 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
5408 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
5409
5410 num_devices = root->fs_info->fs_devices->rw_devices;
5411 if (num_devices == 1) {
5412 stripped |= BTRFS_BLOCK_GROUP_DUP;
5413 stripped = flags & ~stripped;
5414
5415 /* turn raid0 into single device chunks */
5416 if (flags & BTRFS_BLOCK_GROUP_RAID0)
5417 return stripped;
5418
5419 /* turn mirroring into duplication */
5420 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
5421 BTRFS_BLOCK_GROUP_RAID10))
5422 return stripped | BTRFS_BLOCK_GROUP_DUP;
5423 return flags;
5424 } else {
5425 /* they already had raid on here, just return */
5426 if (flags & stripped)
5427 return flags;
5428
5429 stripped |= BTRFS_BLOCK_GROUP_DUP;
5430 stripped = flags & ~stripped;
5431
5432 /* switch duplicated blocks with raid1 */
5433 if (flags & BTRFS_BLOCK_GROUP_DUP)
5434 return stripped | BTRFS_BLOCK_GROUP_RAID1;
5435
5436 /* turn single device chunks into raid0 */
5437 return stripped | BTRFS_BLOCK_GROUP_RAID0;
5438 }
5439 return flags;
5440}
5441
5442static int __alloc_chunk_for_shrink(struct btrfs_root *root,
5443 struct btrfs_block_group_cache *shrink_block_group,
5444 int force)
5445{
5446 struct btrfs_trans_handle *trans;
5447 u64 new_alloc_flags;
5448 u64 calc;
5449
5450 spin_lock(&shrink_block_group->lock);
5451 if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
5452 spin_unlock(&shrink_block_group->lock);
5453
5454 trans = btrfs_start_transaction(root, 1);
5455 spin_lock(&shrink_block_group->lock);
5456
5457 new_alloc_flags = update_block_group_flags(root,
5458 shrink_block_group->flags);
5459 if (new_alloc_flags != shrink_block_group->flags) {
5460 calc =
5461 btrfs_block_group_used(&shrink_block_group->item);
5462 } else {
5463 calc = shrink_block_group->key.offset;
5464 }
5465 spin_unlock(&shrink_block_group->lock);
5466
5467 do_chunk_alloc(trans, root->fs_info->extent_root,
5468 calc + 2 * 1024 * 1024, new_alloc_flags, force);
5469
5470 btrfs_end_transaction(trans, root);
5471 } else
5472 spin_unlock(&shrink_block_group->lock);
5473 return 0;
5474}
5475
5476static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
5477 struct btrfs_root *root,
5478 u64 objectid, u64 size)
5479{
5480 struct btrfs_path *path;
5481 struct btrfs_inode_item *item;
5482 struct extent_buffer *leaf;
5483 int ret;
5484
5485 path = btrfs_alloc_path();
5486 if (!path)
5487 return -ENOMEM;
5488
5489 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
5490 if (ret)
5491 goto out;
5492
5493 leaf = path->nodes[0];
5494 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
5495 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
5496 btrfs_set_inode_generation(leaf, item, 1);
5497 btrfs_set_inode_size(leaf, item, size);
5498 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
5499 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
5500 btrfs_mark_buffer_dirty(leaf);
5501 btrfs_release_path(root, path);
5502out:
5503 btrfs_free_path(path);
5504 return ret;
5505}
5506
5507static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
5508 struct btrfs_block_group_cache *group)
5509{
5510 struct inode *inode = NULL;
5511 struct btrfs_trans_handle *trans;
5512 struct btrfs_root *root;
5513 struct btrfs_key root_key;
5514 u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
5515 int err = 0;
5516
5517 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5518 root_key.type = BTRFS_ROOT_ITEM_KEY;
5519 root_key.offset = (u64)-1;
5520 root = btrfs_read_fs_root_no_name(fs_info, &root_key);
5521 if (IS_ERR(root))
5522 return ERR_CAST(root);
5523
5524 trans = btrfs_start_transaction(root, 1);
5525 BUG_ON(!trans);
5526
5527 err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
5528 if (err)
5529 goto out;
5530
5531 err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
5532 BUG_ON(err);
5533
5534 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
5535 group->key.offset, 0, group->key.offset,
5536 0, 0, 0);
5537 BUG_ON(err);
5538
5539 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
5540 if (inode->i_state & I_NEW) {
5541 BTRFS_I(inode)->root = root;
5542 BTRFS_I(inode)->location.objectid = objectid;
5543 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
5544 BTRFS_I(inode)->location.offset = 0;
5545 btrfs_read_locked_inode(inode);
5546 unlock_new_inode(inode);
5547 BUG_ON(is_bad_inode(inode));
5548 } else {
5549 BUG_ON(1);
5550 }
5551 BTRFS_I(inode)->index_cnt = group->key.objectid;
5552
5553 err = btrfs_orphan_add(trans, inode);
5554out:
5555 btrfs_end_transaction(trans, root);
5556 if (err) {
5557 if (inode)
5558 iput(inode);
5559 inode = ERR_PTR(err);
5560 }
5561 return inode;
5562}
5563
5564int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
5565{
5566
5567 struct btrfs_ordered_sum *sums;
5568 struct btrfs_sector_sum *sector_sum;
5569 struct btrfs_ordered_extent *ordered;
5570 struct btrfs_root *root = BTRFS_I(inode)->root;
5571 struct list_head list;
5572 size_t offset;
5573 int ret;
5574 u64 disk_bytenr;
5575
5576 INIT_LIST_HEAD(&list);
5577
5578 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
5579 BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
5580
5581 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
5582 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
5583 disk_bytenr + len - 1, &list);
5584
5585 while (!list_empty(&list)) {
5586 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
5587 list_del_init(&sums->list);
5588
5589 sector_sum = sums->sums;
5590 sums->bytenr = ordered->start;
5591
5592 offset = 0;
5593 while (offset < sums->len) {
5594 sector_sum->bytenr += ordered->start - disk_bytenr;
5595 sector_sum++;
5596 offset += root->sectorsize;
5597 }
5598
5599 btrfs_add_ordered_sum(inode, ordered, sums);
5600 }
5601 btrfs_put_ordered_extent(ordered);
5602 return 0;
5603}
5604
5605int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
5606{
5607 struct btrfs_trans_handle *trans;
5608 struct btrfs_path *path;
5609 struct btrfs_fs_info *info = root->fs_info;
5610 struct extent_buffer *leaf;
5611 struct inode *reloc_inode;
5612 struct btrfs_block_group_cache *block_group;
5613 struct btrfs_key key;
5614 u64 skipped;
5615 u64 cur_byte;
5616 u64 total_found;
5617 u32 nritems;
5618 int ret;
5619 int progress;
5620 int pass = 0;
5621
5622 root = root->fs_info->extent_root;
5623
5624 block_group = btrfs_lookup_block_group(info, group_start);
5625 BUG_ON(!block_group);
5626
5627 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
5628 (unsigned long long)block_group->key.objectid,
5629 (unsigned long long)block_group->flags);
5630
5631 path = btrfs_alloc_path();
5632 BUG_ON(!path);
5633
5634 reloc_inode = create_reloc_inode(info, block_group);
5635 BUG_ON(IS_ERR(reloc_inode));
5636
5637 __alloc_chunk_for_shrink(root, block_group, 1);
5638 set_block_group_readonly(block_group);
5639
5640 btrfs_start_delalloc_inodes(info->tree_root);
5641 btrfs_wait_ordered_extents(info->tree_root, 0);
5642again:
5643 skipped = 0;
5644 total_found = 0;
5645 progress = 0;
5646 key.objectid = block_group->key.objectid;
5647 key.offset = 0;
5648 key.type = 0;
5649 cur_byte = key.objectid;
5650
5651 trans = btrfs_start_transaction(info->tree_root, 1);
5652 btrfs_commit_transaction(trans, info->tree_root);
5653
5654 mutex_lock(&root->fs_info->cleaner_mutex);
5655 btrfs_clean_old_snapshots(info->tree_root);
5656 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
5657 mutex_unlock(&root->fs_info->cleaner_mutex);
5658
5659 while (1) {
5660 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5661 if (ret < 0)
5662 goto out;
5663next:
5664 leaf = path->nodes[0];
5665 nritems = btrfs_header_nritems(leaf);
5666 if (path->slots[0] >= nritems) {
5667 ret = btrfs_next_leaf(root, path);
5668 if (ret < 0)
5669 goto out;
5670 if (ret == 1) {
5671 ret = 0;
5672 break;
5673 }
5674 leaf = path->nodes[0];
5675 nritems = btrfs_header_nritems(leaf);
5676 }
5677
5678 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5679
5680 if (key.objectid >= block_group->key.objectid +
5681 block_group->key.offset)
5682 break;
5683
5684 if (progress && need_resched()) {
5685 btrfs_release_path(root, path);
5686 cond_resched();
5687 progress = 0;
5688 continue;
5689 }
5690 progress = 1;
5691
5692 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
5693 key.objectid + key.offset <= cur_byte) {
5694 path->slots[0]++;
5695 goto next;
5696 }
5697
5698 total_found++;
5699 cur_byte = key.objectid + key.offset;
5700 btrfs_release_path(root, path);
5701
5702 __alloc_chunk_for_shrink(root, block_group, 0);
5703 ret = relocate_one_extent(root, path, &key, block_group,
5704 reloc_inode, pass);
5705 BUG_ON(ret < 0);
5706 if (ret > 0)
5707 skipped++;
5708
5709 key.objectid = cur_byte;
5710 key.type = 0;
5711 key.offset = 0;
5712 }
5713
5714 btrfs_release_path(root, path);
5715
5716 if (pass == 0) {
5717 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
5718 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
5719 }
5720
5721 if (total_found > 0) {
5722 printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
5723 (unsigned long long)total_found, pass);
5724 pass++;
5725 if (total_found == skipped && pass > 2) {
5726 iput(reloc_inode);
5727 reloc_inode = create_reloc_inode(info, block_group);
5728 pass = 0;
5729 }
5730 goto again;
5731 }
5732
5733 /* delete reloc_inode */
5734 iput(reloc_inode);
5735
5736 /* unpin extents in this range */
5737 trans = btrfs_start_transaction(info->tree_root, 1);
5738 btrfs_commit_transaction(trans, info->tree_root);
5739
5740 spin_lock(&block_group->lock);
5741 WARN_ON(block_group->pinned > 0);
5742 WARN_ON(block_group->reserved > 0);
5743 WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
5744 spin_unlock(&block_group->lock);
5745 put_block_group(block_group);
5746 ret = 0;
5747out:
5748 btrfs_free_path(path);
5749 return ret;
5750}
5751
5752static int find_first_block_group(struct btrfs_root *root,
5753 struct btrfs_path *path, struct btrfs_key *key)
5754{
5755 int ret = 0;
5756 struct btrfs_key found_key;
5757 struct extent_buffer *leaf;
5758 int slot;
5759
5760 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
5761 if (ret < 0)
5762 goto out;
5763
5764 while (1) {
5765 slot = path->slots[0];
5766 leaf = path->nodes[0];
5767 if (slot >= btrfs_header_nritems(leaf)) {
5768 ret = btrfs_next_leaf(root, path);
5769 if (ret == 0)
5770 continue;
5771 if (ret < 0)
5772 goto out;
5773 break;
5774 }
5775 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5776
5777 if (found_key.objectid >= key->objectid &&
5778 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5779 ret = 0;
5780 goto out;
5781 }
5782 path->slots[0]++;
5783 }
5784 ret = -ENOENT;
5785out:
5786 return ret;
5787}
5788
5789int btrfs_free_block_groups(struct btrfs_fs_info *info)
5790{
5791 struct btrfs_block_group_cache *block_group;
5792 struct rb_node *n;
5793
5794 spin_lock(&info->block_group_cache_lock);
5795 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
5796 block_group = rb_entry(n, struct btrfs_block_group_cache,
5797 cache_node);
5798 rb_erase(&block_group->cache_node,
5799 &info->block_group_cache_tree);
5800 spin_unlock(&info->block_group_cache_lock);
5801
5802 btrfs_remove_free_space_cache(block_group);
5803 down_write(&block_group->space_info->groups_sem);
5804 list_del(&block_group->list);
5805 up_write(&block_group->space_info->groups_sem);
5806
5807 WARN_ON(atomic_read(&block_group->count) != 1);
5808 kfree(block_group);
5809
5810 spin_lock(&info->block_group_cache_lock);
5811 }
5812 spin_unlock(&info->block_group_cache_lock);
5813 return 0;
5814}
5815
5816int btrfs_read_block_groups(struct btrfs_root *root)
5817{
5818 struct btrfs_path *path;
5819 int ret;
5820 struct btrfs_block_group_cache *cache;
5821 struct btrfs_fs_info *info = root->fs_info;
5822 struct btrfs_space_info *space_info;
5823 struct btrfs_key key;
5824 struct btrfs_key found_key;
5825 struct extent_buffer *leaf;
5826
5827 root = info->extent_root;
5828 key.objectid = 0;
5829 key.offset = 0;
5830 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5831 path = btrfs_alloc_path();
5832 if (!path)
5833 return -ENOMEM;
5834
5835 while (1) {
5836 ret = find_first_block_group(root, path, &key);
5837 if (ret > 0) {
5838 ret = 0;
5839 goto error;
5840 }
5841 if (ret != 0)
5842 goto error;
5843
5844 leaf = path->nodes[0];
5845 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5846 cache = kzalloc(sizeof(*cache), GFP_NOFS);
5847 if (!cache) {
5848 ret = -ENOMEM;
5849 break;
5850 }
5851
5852 atomic_set(&cache->count, 1);
5853 spin_lock_init(&cache->lock);
5854 mutex_init(&cache->alloc_mutex);
5855 mutex_init(&cache->cache_mutex);
5856 INIT_LIST_HEAD(&cache->list);
5857 read_extent_buffer(leaf, &cache->item,
5858 btrfs_item_ptr_offset(leaf, path->slots[0]),
5859 sizeof(cache->item));
5860 memcpy(&cache->key, &found_key, sizeof(found_key));
5861
5862 key.objectid = found_key.objectid + found_key.offset;
5863 btrfs_release_path(root, path);
5864 cache->flags = btrfs_block_group_flags(&cache->item);
5865
5866 ret = update_space_info(info, cache->flags, found_key.offset,
5867 btrfs_block_group_used(&cache->item),
5868 &space_info);
5869 BUG_ON(ret);
5870 cache->space_info = space_info;
5871 down_write(&space_info->groups_sem);
5872 list_add_tail(&cache->list, &space_info->block_groups);
5873 up_write(&space_info->groups_sem);
5874
5875 ret = btrfs_add_block_group_cache(root->fs_info, cache);
5876 BUG_ON(ret);
5877
5878 set_avail_alloc_bits(root->fs_info, cache->flags);
5879 if (btrfs_chunk_readonly(root, cache->key.objectid))
5880 set_block_group_readonly(cache);
5881 }
5882 ret = 0;
5883error:
5884 btrfs_free_path(path);
5885 return ret;
5886}
5887
5888int btrfs_make_block_group(struct btrfs_trans_handle *trans,
5889 struct btrfs_root *root, u64 bytes_used,
5890 u64 type, u64 chunk_objectid, u64 chunk_offset,
5891 u64 size)
5892{
5893 int ret;
5894 struct btrfs_root *extent_root;
5895 struct btrfs_block_group_cache *cache;
5896
5897 extent_root = root->fs_info->extent_root;
5898
5899 root->fs_info->last_trans_new_blockgroup = trans->transid;
5900
5901 cache = kzalloc(sizeof(*cache), GFP_NOFS);
5902 if (!cache)
5903 return -ENOMEM;
5904
5905 cache->key.objectid = chunk_offset;
5906 cache->key.offset = size;
5907 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
5908 atomic_set(&cache->count, 1);
5909 spin_lock_init(&cache->lock);
5910 mutex_init(&cache->alloc_mutex);
5911 mutex_init(&cache->cache_mutex);
5912 INIT_LIST_HEAD(&cache->list);
5913
5914 btrfs_set_block_group_used(&cache->item, bytes_used);
5915 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
5916 cache->flags = type;
5917 btrfs_set_block_group_flags(&cache->item, type);
5918
5919 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
5920 &cache->space_info);
5921 BUG_ON(ret);
5922 down_write(&cache->space_info->groups_sem);
5923 list_add_tail(&cache->list, &cache->space_info->block_groups);
5924 up_write(&cache->space_info->groups_sem);
5925
5926 ret = btrfs_add_block_group_cache(root->fs_info, cache);
5927 BUG_ON(ret);
5928
5929 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
5930 sizeof(cache->item));
5931 BUG_ON(ret);
5932
5933 finish_current_insert(trans, extent_root, 0);
5934 ret = del_pending_extents(trans, extent_root, 0);
5935 BUG_ON(ret);
5936 set_avail_alloc_bits(extent_root->fs_info, type);
5937
5938 return 0;
5939}
5940
5941int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5942 struct btrfs_root *root, u64 group_start)
5943{
5944 struct btrfs_path *path;
5945 struct btrfs_block_group_cache *block_group;
5946 struct btrfs_key key;
5947 int ret;
5948
5949 root = root->fs_info->extent_root;
5950
5951 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
5952 BUG_ON(!block_group);
5953 BUG_ON(!block_group->ro);
5954
5955 memcpy(&key, &block_group->key, sizeof(key));
5956
5957 path = btrfs_alloc_path();
5958 BUG_ON(!path);
5959
5960 btrfs_remove_free_space_cache(block_group);
5961 rb_erase(&block_group->cache_node,
5962 &root->fs_info->block_group_cache_tree);
5963 down_write(&block_group->space_info->groups_sem);
5964 list_del(&block_group->list);
5965 up_write(&block_group->space_info->groups_sem);
5966
5967 spin_lock(&block_group->space_info->lock);
5968 block_group->space_info->total_bytes -= block_group->key.offset;
5969 block_group->space_info->bytes_readonly -= block_group->key.offset;
5970 spin_unlock(&block_group->space_info->lock);
5971 block_group->space_info->full = 0;
5972
5973 put_block_group(block_group);
5974 put_block_group(block_group);
5975
5976 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5977 if (ret > 0)
5978 ret = -EIO;
5979 if (ret < 0)
5980 goto out;
5981
5982 ret = btrfs_del_item(trans, root, path);
5983out:
5984 btrfs_free_path(path);
5985 return ret;
5986}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
new file mode 100644
index 000000000000..e086d407f1fa
--- /dev/null
+++ b/fs/btrfs/extent_io.c
@@ -0,0 +1,3717 @@
1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h>
7#include <linux/page-flags.h>
8#include <linux/module.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/version.h>
13#include <linux/writeback.h>
14#include <linux/pagevec.h>
15#include "extent_io.h"
16#include "extent_map.h"
17#include "compat.h"
18#include "ctree.h"
19#include "btrfs_inode.h"
20
21/* temporary define until extent_map moves out of btrfs */
22struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
23 unsigned long extra_flags,
24 void (*ctor)(void *, struct kmem_cache *,
25 unsigned long));
26
27static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
29
30static LIST_HEAD(buffers);
31static LIST_HEAD(states);
32
33#define LEAK_DEBUG 0
34#ifdef LEAK_DEBUG
35static DEFINE_SPINLOCK(leak_lock);
36#endif
37
38#define BUFFER_LRU_MAX 64
39
40struct tree_entry {
41 u64 start;
42 u64 end;
43 struct rb_node rb_node;
44};
45
46struct extent_page_data {
47 struct bio *bio;
48 struct extent_io_tree *tree;
49 get_extent_t *get_extent;
50
51 /* tells writepage not to lock the state bits for this range
52 * it still does the unlocking
53 */
54 int extent_locked;
55};
56
57int __init extent_io_init(void)
58{
59 extent_state_cache = btrfs_cache_create("extent_state",
60 sizeof(struct extent_state), 0,
61 NULL);
62 if (!extent_state_cache)
63 return -ENOMEM;
64
65 extent_buffer_cache = btrfs_cache_create("extent_buffers",
66 sizeof(struct extent_buffer), 0,
67 NULL);
68 if (!extent_buffer_cache)
69 goto free_state_cache;
70 return 0;
71
72free_state_cache:
73 kmem_cache_destroy(extent_state_cache);
74 return -ENOMEM;
75}
76
77void extent_io_exit(void)
78{
79 struct extent_state *state;
80 struct extent_buffer *eb;
81
82 while (!list_empty(&states)) {
83 state = list_entry(states.next, struct extent_state, leak_list);
84 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
85 "state %lu in tree %p refs %d\n",
86 (unsigned long long)state->start,
87 (unsigned long long)state->end,
88 state->state, state->tree, atomic_read(&state->refs));
89 list_del(&state->leak_list);
90 kmem_cache_free(extent_state_cache, state);
91
92 }
93
94 while (!list_empty(&buffers)) {
95 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
96 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
97 "refs %d\n", (unsigned long long)eb->start,
98 eb->len, atomic_read(&eb->refs));
99 list_del(&eb->leak_list);
100 kmem_cache_free(extent_buffer_cache, eb);
101 }
102 if (extent_state_cache)
103 kmem_cache_destroy(extent_state_cache);
104 if (extent_buffer_cache)
105 kmem_cache_destroy(extent_buffer_cache);
106}
107
108void extent_io_tree_init(struct extent_io_tree *tree,
109 struct address_space *mapping, gfp_t mask)
110{
111 tree->state.rb_node = NULL;
112 tree->buffer.rb_node = NULL;
113 tree->ops = NULL;
114 tree->dirty_bytes = 0;
115 spin_lock_init(&tree->lock);
116 spin_lock_init(&tree->buffer_lock);
117 tree->mapping = mapping;
118}
119
120static struct extent_state *alloc_extent_state(gfp_t mask)
121{
122 struct extent_state *state;
123#ifdef LEAK_DEBUG
124 unsigned long flags;
125#endif
126
127 state = kmem_cache_alloc(extent_state_cache, mask);
128 if (!state)
129 return state;
130 state->state = 0;
131 state->private = 0;
132 state->tree = NULL;
133#ifdef LEAK_DEBUG
134 spin_lock_irqsave(&leak_lock, flags);
135 list_add(&state->leak_list, &states);
136 spin_unlock_irqrestore(&leak_lock, flags);
137#endif
138 atomic_set(&state->refs, 1);
139 init_waitqueue_head(&state->wq);
140 return state;
141}
142
143static void free_extent_state(struct extent_state *state)
144{
145 if (!state)
146 return;
147 if (atomic_dec_and_test(&state->refs)) {
148#ifdef LEAK_DEBUG
149 unsigned long flags;
150#endif
151 WARN_ON(state->tree);
152#ifdef LEAK_DEBUG
153 spin_lock_irqsave(&leak_lock, flags);
154 list_del(&state->leak_list);
155 spin_unlock_irqrestore(&leak_lock, flags);
156#endif
157 kmem_cache_free(extent_state_cache, state);
158 }
159}
160
161static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
162 struct rb_node *node)
163{
164 struct rb_node **p = &root->rb_node;
165 struct rb_node *parent = NULL;
166 struct tree_entry *entry;
167
168 while (*p) {
169 parent = *p;
170 entry = rb_entry(parent, struct tree_entry, rb_node);
171
172 if (offset < entry->start)
173 p = &(*p)->rb_left;
174 else if (offset > entry->end)
175 p = &(*p)->rb_right;
176 else
177 return parent;
178 }
179
180 entry = rb_entry(node, struct tree_entry, rb_node);
181 rb_link_node(node, parent, p);
182 rb_insert_color(node, root);
183 return NULL;
184}
185
186static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
187 struct rb_node **prev_ret,
188 struct rb_node **next_ret)
189{
190 struct rb_root *root = &tree->state;
191 struct rb_node *n = root->rb_node;
192 struct rb_node *prev = NULL;
193 struct rb_node *orig_prev = NULL;
194 struct tree_entry *entry;
195 struct tree_entry *prev_entry = NULL;
196
197 while (n) {
198 entry = rb_entry(n, struct tree_entry, rb_node);
199 prev = n;
200 prev_entry = entry;
201
202 if (offset < entry->start)
203 n = n->rb_left;
204 else if (offset > entry->end)
205 n = n->rb_right;
206 else
207 return n;
208 }
209
210 if (prev_ret) {
211 orig_prev = prev;
212 while (prev && offset > prev_entry->end) {
213 prev = rb_next(prev);
214 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
215 }
216 *prev_ret = prev;
217 prev = orig_prev;
218 }
219
220 if (next_ret) {
221 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
222 while (prev && offset < prev_entry->start) {
223 prev = rb_prev(prev);
224 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
225 }
226 *next_ret = prev;
227 }
228 return NULL;
229}
230
231static inline struct rb_node *tree_search(struct extent_io_tree *tree,
232 u64 offset)
233{
234 struct rb_node *prev = NULL;
235 struct rb_node *ret;
236
237 ret = __etree_search(tree, offset, &prev, NULL);
238 if (!ret)
239 return prev;
240 return ret;
241}
242
243static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
244 u64 offset, struct rb_node *node)
245{
246 struct rb_root *root = &tree->buffer;
247 struct rb_node **p = &root->rb_node;
248 struct rb_node *parent = NULL;
249 struct extent_buffer *eb;
250
251 while (*p) {
252 parent = *p;
253 eb = rb_entry(parent, struct extent_buffer, rb_node);
254
255 if (offset < eb->start)
256 p = &(*p)->rb_left;
257 else if (offset > eb->start)
258 p = &(*p)->rb_right;
259 else
260 return eb;
261 }
262
263 rb_link_node(node, parent, p);
264 rb_insert_color(node, root);
265 return NULL;
266}
267
268static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
269 u64 offset)
270{
271 struct rb_root *root = &tree->buffer;
272 struct rb_node *n = root->rb_node;
273 struct extent_buffer *eb;
274
275 while (n) {
276 eb = rb_entry(n, struct extent_buffer, rb_node);
277 if (offset < eb->start)
278 n = n->rb_left;
279 else if (offset > eb->start)
280 n = n->rb_right;
281 else
282 return eb;
283 }
284 return NULL;
285}
286
287/*
288 * utility function to look for merge candidates inside a given range.
289 * Any extents with matching state are merged together into a single
290 * extent in the tree. Extents with EXTENT_IO in their state field
291 * are not merged because the end_io handlers need to be able to do
292 * operations on them without sleeping (or doing allocations/splits).
293 *
294 * This should be called with the tree lock held.
295 */
296static int merge_state(struct extent_io_tree *tree,
297 struct extent_state *state)
298{
299 struct extent_state *other;
300 struct rb_node *other_node;
301
302 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
303 return 0;
304
305 other_node = rb_prev(&state->rb_node);
306 if (other_node) {
307 other = rb_entry(other_node, struct extent_state, rb_node);
308 if (other->end == state->start - 1 &&
309 other->state == state->state) {
310 state->start = other->start;
311 other->tree = NULL;
312 rb_erase(&other->rb_node, &tree->state);
313 free_extent_state(other);
314 }
315 }
316 other_node = rb_next(&state->rb_node);
317 if (other_node) {
318 other = rb_entry(other_node, struct extent_state, rb_node);
319 if (other->start == state->end + 1 &&
320 other->state == state->state) {
321 other->start = state->start;
322 state->tree = NULL;
323 rb_erase(&state->rb_node, &tree->state);
324 free_extent_state(state);
325 }
326 }
327 return 0;
328}
329
330static void set_state_cb(struct extent_io_tree *tree,
331 struct extent_state *state,
332 unsigned long bits)
333{
334 if (tree->ops && tree->ops->set_bit_hook) {
335 tree->ops->set_bit_hook(tree->mapping->host, state->start,
336 state->end, state->state, bits);
337 }
338}
339
340static void clear_state_cb(struct extent_io_tree *tree,
341 struct extent_state *state,
342 unsigned long bits)
343{
344 if (tree->ops && tree->ops->clear_bit_hook) {
345 tree->ops->clear_bit_hook(tree->mapping->host, state->start,
346 state->end, state->state, bits);
347 }
348}
349
350/*
351 * insert an extent_state struct into the tree. 'bits' are set on the
352 * struct before it is inserted.
353 *
354 * This may return -EEXIST if the extent is already there, in which case the
355 * state struct is freed.
356 *
357 * The tree lock is not taken internally. This is a utility function and
358 * probably isn't what you want to call (see set/clear_extent_bit).
359 */
360static int insert_state(struct extent_io_tree *tree,
361 struct extent_state *state, u64 start, u64 end,
362 int bits)
363{
364 struct rb_node *node;
365
366 if (end < start) {
367 printk(KERN_ERR "btrfs end < start %llu %llu\n",
368 (unsigned long long)end,
369 (unsigned long long)start);
370 WARN_ON(1);
371 }
372 if (bits & EXTENT_DIRTY)
373 tree->dirty_bytes += end - start + 1;
374 set_state_cb(tree, state, bits);
375 state->state |= bits;
376 state->start = start;
377 state->end = end;
378 node = tree_insert(&tree->state, end, &state->rb_node);
379 if (node) {
380 struct extent_state *found;
381 found = rb_entry(node, struct extent_state, rb_node);
382 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
383 "%llu %llu\n", (unsigned long long)found->start,
384 (unsigned long long)found->end,
385 (unsigned long long)start, (unsigned long long)end);
386 free_extent_state(state);
387 return -EEXIST;
388 }
389 state->tree = tree;
390 merge_state(tree, state);
391 return 0;
392}
393
394/*
395 * split a given extent state struct in two, inserting the preallocated
396 * struct 'prealloc' as the newly created second half. 'split' indicates an
397 * offset inside 'orig' where it should be split.
398 *
399 * Before calling,
400 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
401 * are two extent state structs in the tree:
402 * prealloc: [orig->start, split - 1]
403 * orig: [ split, orig->end ]
404 *
405 * The tree locks are not taken by this function. They need to be held
406 * by the caller.
407 */
408static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
409 struct extent_state *prealloc, u64 split)
410{
411 struct rb_node *node;
412 prealloc->start = orig->start;
413 prealloc->end = split - 1;
414 prealloc->state = orig->state;
415 orig->start = split;
416
417 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
418 if (node) {
419 struct extent_state *found;
420 found = rb_entry(node, struct extent_state, rb_node);
421 free_extent_state(prealloc);
422 return -EEXIST;
423 }
424 prealloc->tree = tree;
425 return 0;
426}
427
428/*
429 * utility function to clear some bits in an extent state struct.
430 * it will optionally wake up any one waiting on this state (wake == 1), or
431 * forcibly remove the state from the tree (delete == 1).
432 *
433 * If no bits are set on the state struct after clearing things, the
434 * struct is freed and removed from the tree
435 */
436static int clear_state_bit(struct extent_io_tree *tree,
437 struct extent_state *state, int bits, int wake,
438 int delete)
439{
440 int ret = state->state & bits;
441
442 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
443 u64 range = state->end - state->start + 1;
444 WARN_ON(range > tree->dirty_bytes);
445 tree->dirty_bytes -= range;
446 }
447 clear_state_cb(tree, state, bits);
448 state->state &= ~bits;
449 if (wake)
450 wake_up(&state->wq);
451 if (delete || state->state == 0) {
452 if (state->tree) {
453 clear_state_cb(tree, state, state->state);
454 rb_erase(&state->rb_node, &tree->state);
455 state->tree = NULL;
456 free_extent_state(state);
457 } else {
458 WARN_ON(1);
459 }
460 } else {
461 merge_state(tree, state);
462 }
463 return ret;
464}
465
466/*
467 * clear some bits on a range in the tree. This may require splitting
468 * or inserting elements in the tree, so the gfp mask is used to
469 * indicate which allocations or sleeping are allowed.
470 *
471 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
472 * the given range from the tree regardless of state (ie for truncate).
473 *
474 * the range [start, end] is inclusive.
475 *
476 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
477 * bits were already set, or zero if none of the bits were already set.
478 */
479int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
480 int bits, int wake, int delete, gfp_t mask)
481{
482 struct extent_state *state;
483 struct extent_state *prealloc = NULL;
484 struct rb_node *node;
485 int err;
486 int set = 0;
487
488again:
489 if (!prealloc && (mask & __GFP_WAIT)) {
490 prealloc = alloc_extent_state(mask);
491 if (!prealloc)
492 return -ENOMEM;
493 }
494
495 spin_lock(&tree->lock);
496 /*
497 * this search will find the extents that end after
498 * our range starts
499 */
500 node = tree_search(tree, start);
501 if (!node)
502 goto out;
503 state = rb_entry(node, struct extent_state, rb_node);
504 if (state->start > end)
505 goto out;
506 WARN_ON(state->end < start);
507
508 /*
509 * | ---- desired range ---- |
510 * | state | or
511 * | ------------- state -------------- |
512 *
513 * We need to split the extent we found, and may flip
514 * bits on second half.
515 *
516 * If the extent we found extends past our range, we
517 * just split and search again. It'll get split again
518 * the next time though.
519 *
520 * If the extent we found is inside our range, we clear
521 * the desired bit on it.
522 */
523
524 if (state->start < start) {
525 if (!prealloc)
526 prealloc = alloc_extent_state(GFP_ATOMIC);
527 err = split_state(tree, state, prealloc, start);
528 BUG_ON(err == -EEXIST);
529 prealloc = NULL;
530 if (err)
531 goto out;
532 if (state->end <= end) {
533 start = state->end + 1;
534 set |= clear_state_bit(tree, state, bits,
535 wake, delete);
536 } else {
537 start = state->start;
538 }
539 goto search_again;
540 }
541 /*
542 * | ---- desired range ---- |
543 * | state |
544 * We need to split the extent, and clear the bit
545 * on the first half
546 */
547 if (state->start <= end && state->end > end) {
548 if (!prealloc)
549 prealloc = alloc_extent_state(GFP_ATOMIC);
550 err = split_state(tree, state, prealloc, end + 1);
551 BUG_ON(err == -EEXIST);
552
553 if (wake)
554 wake_up(&state->wq);
555 set |= clear_state_bit(tree, prealloc, bits,
556 wake, delete);
557 prealloc = NULL;
558 goto out;
559 }
560
561 start = state->end + 1;
562 set |= clear_state_bit(tree, state, bits, wake, delete);
563 goto search_again;
564
565out:
566 spin_unlock(&tree->lock);
567 if (prealloc)
568 free_extent_state(prealloc);
569
570 return set;
571
572search_again:
573 if (start > end)
574 goto out;
575 spin_unlock(&tree->lock);
576 if (mask & __GFP_WAIT)
577 cond_resched();
578 goto again;
579}
580
581static int wait_on_state(struct extent_io_tree *tree,
582 struct extent_state *state)
583 __releases(tree->lock)
584 __acquires(tree->lock)
585{
586 DEFINE_WAIT(wait);
587 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
588 spin_unlock(&tree->lock);
589 schedule();
590 spin_lock(&tree->lock);
591 finish_wait(&state->wq, &wait);
592 return 0;
593}
594
595/*
596 * waits for one or more bits to clear on a range in the state tree.
597 * The range [start, end] is inclusive.
598 * The tree lock is taken by this function
599 */
600int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
601{
602 struct extent_state *state;
603 struct rb_node *node;
604
605 spin_lock(&tree->lock);
606again:
607 while (1) {
608 /*
609 * this search will find all the extents that end after
610 * our range starts
611 */
612 node = tree_search(tree, start);
613 if (!node)
614 break;
615
616 state = rb_entry(node, struct extent_state, rb_node);
617
618 if (state->start > end)
619 goto out;
620
621 if (state->state & bits) {
622 start = state->start;
623 atomic_inc(&state->refs);
624 wait_on_state(tree, state);
625 free_extent_state(state);
626 goto again;
627 }
628 start = state->end + 1;
629
630 if (start > end)
631 break;
632
633 if (need_resched()) {
634 spin_unlock(&tree->lock);
635 cond_resched();
636 spin_lock(&tree->lock);
637 }
638 }
639out:
640 spin_unlock(&tree->lock);
641 return 0;
642}
643
644static void set_state_bits(struct extent_io_tree *tree,
645 struct extent_state *state,
646 int bits)
647{
648 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
649 u64 range = state->end - state->start + 1;
650 tree->dirty_bytes += range;
651 }
652 set_state_cb(tree, state, bits);
653 state->state |= bits;
654}
655
656/*
657 * set some bits on a range in the tree. This may require allocations
658 * or sleeping, so the gfp mask is used to indicate what is allowed.
659 *
660 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
661 * range already has the desired bits set. The start of the existing
662 * range is returned in failed_start in this case.
663 *
664 * [start, end] is inclusive
665 * This takes the tree lock.
666 */
667static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
668 int bits, int exclusive, u64 *failed_start,
669 gfp_t mask)
670{
671 struct extent_state *state;
672 struct extent_state *prealloc = NULL;
673 struct rb_node *node;
674 int err = 0;
675 int set;
676 u64 last_start;
677 u64 last_end;
678again:
679 if (!prealloc && (mask & __GFP_WAIT)) {
680 prealloc = alloc_extent_state(mask);
681 if (!prealloc)
682 return -ENOMEM;
683 }
684
685 spin_lock(&tree->lock);
686 /*
687 * this search will find all the extents that end after
688 * our range starts.
689 */
690 node = tree_search(tree, start);
691 if (!node) {
692 err = insert_state(tree, prealloc, start, end, bits);
693 prealloc = NULL;
694 BUG_ON(err == -EEXIST);
695 goto out;
696 }
697
698 state = rb_entry(node, struct extent_state, rb_node);
699 last_start = state->start;
700 last_end = state->end;
701
702 /*
703 * | ---- desired range ---- |
704 * | state |
705 *
706 * Just lock what we found and keep going
707 */
708 if (state->start == start && state->end <= end) {
709 set = state->state & bits;
710 if (set && exclusive) {
711 *failed_start = state->start;
712 err = -EEXIST;
713 goto out;
714 }
715 set_state_bits(tree, state, bits);
716 start = state->end + 1;
717 merge_state(tree, state);
718 goto search_again;
719 }
720
721 /*
722 * | ---- desired range ---- |
723 * | state |
724 * or
725 * | ------------- state -------------- |
726 *
727 * We need to split the extent we found, and may flip bits on
728 * second half.
729 *
730 * If the extent we found extends past our
731 * range, we just split and search again. It'll get split
732 * again the next time though.
733 *
734 * If the extent we found is inside our range, we set the
735 * desired bit on it.
736 */
737 if (state->start < start) {
738 set = state->state & bits;
739 if (exclusive && set) {
740 *failed_start = start;
741 err = -EEXIST;
742 goto out;
743 }
744 err = split_state(tree, state, prealloc, start);
745 BUG_ON(err == -EEXIST);
746 prealloc = NULL;
747 if (err)
748 goto out;
749 if (state->end <= end) {
750 set_state_bits(tree, state, bits);
751 start = state->end + 1;
752 merge_state(tree, state);
753 } else {
754 start = state->start;
755 }
756 goto search_again;
757 }
758 /*
759 * | ---- desired range ---- |
760 * | state | or | state |
761 *
762 * There's a hole, we need to insert something in it and
763 * ignore the extent we found.
764 */
765 if (state->start > start) {
766 u64 this_end;
767 if (end < last_start)
768 this_end = end;
769 else
770 this_end = last_start - 1;
771 err = insert_state(tree, prealloc, start, this_end,
772 bits);
773 prealloc = NULL;
774 BUG_ON(err == -EEXIST);
775 if (err)
776 goto out;
777 start = this_end + 1;
778 goto search_again;
779 }
780 /*
781 * | ---- desired range ---- |
782 * | state |
783 * We need to split the extent, and set the bit
784 * on the first half
785 */
786 if (state->start <= end && state->end > end) {
787 set = state->state & bits;
788 if (exclusive && set) {
789 *failed_start = start;
790 err = -EEXIST;
791 goto out;
792 }
793 err = split_state(tree, state, prealloc, end + 1);
794 BUG_ON(err == -EEXIST);
795
796 set_state_bits(tree, prealloc, bits);
797 merge_state(tree, prealloc);
798 prealloc = NULL;
799 goto out;
800 }
801
802 goto search_again;
803
804out:
805 spin_unlock(&tree->lock);
806 if (prealloc)
807 free_extent_state(prealloc);
808
809 return err;
810
811search_again:
812 if (start > end)
813 goto out;
814 spin_unlock(&tree->lock);
815 if (mask & __GFP_WAIT)
816 cond_resched();
817 goto again;
818}
819
820/* wrappers around set/clear extent bit */
821int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
822 gfp_t mask)
823{
824 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
825 mask);
826}
827
828int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
829 gfp_t mask)
830{
831 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
832}
833
834int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
835 int bits, gfp_t mask)
836{
837 return set_extent_bit(tree, start, end, bits, 0, NULL,
838 mask);
839}
840
841int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
842 int bits, gfp_t mask)
843{
844 return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
845}
846
847int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
848 gfp_t mask)
849{
850 return set_extent_bit(tree, start, end,
851 EXTENT_DELALLOC | EXTENT_DIRTY,
852 0, NULL, mask);
853}
854
855int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
856 gfp_t mask)
857{
858 return clear_extent_bit(tree, start, end,
859 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
860}
861
862int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
863 gfp_t mask)
864{
865 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
866}
867
868int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
869 gfp_t mask)
870{
871 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
872 mask);
873}
874
875static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
876 gfp_t mask)
877{
878 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
879}
880
881int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
882 gfp_t mask)
883{
884 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
885 mask);
886}
887
888static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
889 u64 end, gfp_t mask)
890{
891 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
892}
893
894static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
895 gfp_t mask)
896{
897 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
898 0, NULL, mask);
899}
900
901static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
902 u64 end, gfp_t mask)
903{
904 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
905}
906
907int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
908{
909 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
910}
911
912/*
913 * either insert or lock state struct between start and end use mask to tell
914 * us if waiting is desired.
915 */
916int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
917{
918 int err;
919 u64 failed_start;
920 while (1) {
921 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
922 &failed_start, mask);
923 if (err == -EEXIST && (mask & __GFP_WAIT)) {
924 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
925 start = failed_start;
926 } else {
927 break;
928 }
929 WARN_ON(start > end);
930 }
931 return err;
932}
933
934int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
935 gfp_t mask)
936{
937 int err;
938 u64 failed_start;
939
940 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
941 &failed_start, mask);
942 if (err == -EEXIST) {
943 if (failed_start > start)
944 clear_extent_bit(tree, start, failed_start - 1,
945 EXTENT_LOCKED, 1, 0, mask);
946 return 0;
947 }
948 return 1;
949}
950
951int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
952 gfp_t mask)
953{
954 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
955}
956
957/*
958 * helper function to set pages and extents in the tree dirty
959 */
960int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
961{
962 unsigned long index = start >> PAGE_CACHE_SHIFT;
963 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
964 struct page *page;
965
966 while (index <= end_index) {
967 page = find_get_page(tree->mapping, index);
968 BUG_ON(!page);
969 __set_page_dirty_nobuffers(page);
970 page_cache_release(page);
971 index++;
972 }
973 set_extent_dirty(tree, start, end, GFP_NOFS);
974 return 0;
975}
976
977/*
978 * helper function to set both pages and extents in the tree writeback
979 */
980static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
981{
982 unsigned long index = start >> PAGE_CACHE_SHIFT;
983 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
984 struct page *page;
985
986 while (index <= end_index) {
987 page = find_get_page(tree->mapping, index);
988 BUG_ON(!page);
989 set_page_writeback(page);
990 page_cache_release(page);
991 index++;
992 }
993 set_extent_writeback(tree, start, end, GFP_NOFS);
994 return 0;
995}
996
997/*
998 * find the first offset in the io tree with 'bits' set. zero is
999 * returned if we find something, and *start_ret and *end_ret are
1000 * set to reflect the state struct that was found.
1001 *
1002 * If nothing was found, 1 is returned, < 0 on error
1003 */
1004int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1005 u64 *start_ret, u64 *end_ret, int bits)
1006{
1007 struct rb_node *node;
1008 struct extent_state *state;
1009 int ret = 1;
1010
1011 spin_lock(&tree->lock);
1012 /*
1013 * this search will find all the extents that end after
1014 * our range starts.
1015 */
1016 node = tree_search(tree, start);
1017 if (!node)
1018 goto out;
1019
1020 while (1) {
1021 state = rb_entry(node, struct extent_state, rb_node);
1022 if (state->end >= start && (state->state & bits)) {
1023 *start_ret = state->start;
1024 *end_ret = state->end;
1025 ret = 0;
1026 break;
1027 }
1028 node = rb_next(node);
1029 if (!node)
1030 break;
1031 }
1032out:
1033 spin_unlock(&tree->lock);
1034 return ret;
1035}
1036
1037/* find the first state struct with 'bits' set after 'start', and
1038 * return it. tree->lock must be held. NULL will returned if
1039 * nothing was found after 'start'
1040 */
1041struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
1042 u64 start, int bits)
1043{
1044 struct rb_node *node;
1045 struct extent_state *state;
1046
1047 /*
1048 * this search will find all the extents that end after
1049 * our range starts.
1050 */
1051 node = tree_search(tree, start);
1052 if (!node)
1053 goto out;
1054
1055 while (1) {
1056 state = rb_entry(node, struct extent_state, rb_node);
1057 if (state->end >= start && (state->state & bits))
1058 return state;
1059
1060 node = rb_next(node);
1061 if (!node)
1062 break;
1063 }
1064out:
1065 return NULL;
1066}
1067
1068/*
1069 * find a contiguous range of bytes in the file marked as delalloc, not
1070 * more than 'max_bytes'. start and end are used to return the range,
1071 *
1072 * 1 is returned if we find something, 0 if nothing was in the tree
1073 */
1074static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1075 u64 *start, u64 *end, u64 max_bytes)
1076{
1077 struct rb_node *node;
1078 struct extent_state *state;
1079 u64 cur_start = *start;
1080 u64 found = 0;
1081 u64 total_bytes = 0;
1082
1083 spin_lock(&tree->lock);
1084
1085 /*
1086 * this search will find all the extents that end after
1087 * our range starts.
1088 */
1089 node = tree_search(tree, cur_start);
1090 if (!node) {
1091 if (!found)
1092 *end = (u64)-1;
1093 goto out;
1094 }
1095
1096 while (1) {
1097 state = rb_entry(node, struct extent_state, rb_node);
1098 if (found && (state->start != cur_start ||
1099 (state->state & EXTENT_BOUNDARY))) {
1100 goto out;
1101 }
1102 if (!(state->state & EXTENT_DELALLOC)) {
1103 if (!found)
1104 *end = state->end;
1105 goto out;
1106 }
1107 if (!found)
1108 *start = state->start;
1109 found++;
1110 *end = state->end;
1111 cur_start = state->end + 1;
1112 node = rb_next(node);
1113 if (!node)
1114 break;
1115 total_bytes += state->end - state->start + 1;
1116 if (total_bytes >= max_bytes)
1117 break;
1118 }
1119out:
1120 spin_unlock(&tree->lock);
1121 return found;
1122}
1123
1124static noinline int __unlock_for_delalloc(struct inode *inode,
1125 struct page *locked_page,
1126 u64 start, u64 end)
1127{
1128 int ret;
1129 struct page *pages[16];
1130 unsigned long index = start >> PAGE_CACHE_SHIFT;
1131 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1132 unsigned long nr_pages = end_index - index + 1;
1133 int i;
1134
1135 if (index == locked_page->index && end_index == index)
1136 return 0;
1137
1138 while (nr_pages > 0) {
1139 ret = find_get_pages_contig(inode->i_mapping, index,
1140 min_t(unsigned long, nr_pages,
1141 ARRAY_SIZE(pages)), pages);
1142 for (i = 0; i < ret; i++) {
1143 if (pages[i] != locked_page)
1144 unlock_page(pages[i]);
1145 page_cache_release(pages[i]);
1146 }
1147 nr_pages -= ret;
1148 index += ret;
1149 cond_resched();
1150 }
1151 return 0;
1152}
1153
1154static noinline int lock_delalloc_pages(struct inode *inode,
1155 struct page *locked_page,
1156 u64 delalloc_start,
1157 u64 delalloc_end)
1158{
1159 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1160 unsigned long start_index = index;
1161 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1162 unsigned long pages_locked = 0;
1163 struct page *pages[16];
1164 unsigned long nrpages;
1165 int ret;
1166 int i;
1167
1168 /* the caller is responsible for locking the start index */
1169 if (index == locked_page->index && index == end_index)
1170 return 0;
1171
1172 /* skip the page at the start index */
1173 nrpages = end_index - index + 1;
1174 while (nrpages > 0) {
1175 ret = find_get_pages_contig(inode->i_mapping, index,
1176 min_t(unsigned long,
1177 nrpages, ARRAY_SIZE(pages)), pages);
1178 if (ret == 0) {
1179 ret = -EAGAIN;
1180 goto done;
1181 }
1182 /* now we have an array of pages, lock them all */
1183 for (i = 0; i < ret; i++) {
1184 /*
1185 * the caller is taking responsibility for
1186 * locked_page
1187 */
1188 if (pages[i] != locked_page) {
1189 lock_page(pages[i]);
1190 if (!PageDirty(pages[i]) ||
1191 pages[i]->mapping != inode->i_mapping) {
1192 ret = -EAGAIN;
1193 unlock_page(pages[i]);
1194 page_cache_release(pages[i]);
1195 goto done;
1196 }
1197 }
1198 page_cache_release(pages[i]);
1199 pages_locked++;
1200 }
1201 nrpages -= ret;
1202 index += ret;
1203 cond_resched();
1204 }
1205 ret = 0;
1206done:
1207 if (ret && pages_locked) {
1208 __unlock_for_delalloc(inode, locked_page,
1209 delalloc_start,
1210 ((u64)(start_index + pages_locked - 1)) <<
1211 PAGE_CACHE_SHIFT);
1212 }
1213 return ret;
1214}
1215
1216/*
1217 * find a contiguous range of bytes in the file marked as delalloc, not
1218 * more than 'max_bytes'. start and end are used to return the range,
1219 *
1220 * 1 is returned if we find something, 0 if nothing was in the tree
1221 */
1222static noinline u64 find_lock_delalloc_range(struct inode *inode,
1223 struct extent_io_tree *tree,
1224 struct page *locked_page,
1225 u64 *start, u64 *end,
1226 u64 max_bytes)
1227{
1228 u64 delalloc_start;
1229 u64 delalloc_end;
1230 u64 found;
1231 int ret;
1232 int loops = 0;
1233
1234again:
1235 /* step one, find a bunch of delalloc bytes starting at start */
1236 delalloc_start = *start;
1237 delalloc_end = 0;
1238 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1239 max_bytes);
1240 if (!found || delalloc_end <= *start) {
1241 *start = delalloc_start;
1242 *end = delalloc_end;
1243 return found;
1244 }
1245
1246 /*
1247 * start comes from the offset of locked_page. We have to lock
1248 * pages in order, so we can't process delalloc bytes before
1249 * locked_page
1250 */
1251 if (delalloc_start < *start)
1252 delalloc_start = *start;
1253
1254 /*
1255 * make sure to limit the number of pages we try to lock down
1256 * if we're looping.
1257 */
1258 if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
1259 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
1260
1261 /* step two, lock all the pages after the page that has start */
1262 ret = lock_delalloc_pages(inode, locked_page,
1263 delalloc_start, delalloc_end);
1264 if (ret == -EAGAIN) {
1265 /* some of the pages are gone, lets avoid looping by
1266 * shortening the size of the delalloc range we're searching
1267 */
1268 if (!loops) {
1269 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1270 max_bytes = PAGE_CACHE_SIZE - offset;
1271 loops = 1;
1272 goto again;
1273 } else {
1274 found = 0;
1275 goto out_failed;
1276 }
1277 }
1278 BUG_ON(ret);
1279
1280 /* step three, lock the state bits for the whole range */
1281 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
1282
1283 /* then test to make sure it is all still delalloc */
1284 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1285 EXTENT_DELALLOC, 1);
1286 if (!ret) {
1287 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
1288 __unlock_for_delalloc(inode, locked_page,
1289 delalloc_start, delalloc_end);
1290 cond_resched();
1291 goto again;
1292 }
1293 *start = delalloc_start;
1294 *end = delalloc_end;
1295out_failed:
1296 return found;
1297}
1298
1299int extent_clear_unlock_delalloc(struct inode *inode,
1300 struct extent_io_tree *tree,
1301 u64 start, u64 end, struct page *locked_page,
1302 int unlock_pages,
1303 int clear_unlock,
1304 int clear_delalloc, int clear_dirty,
1305 int set_writeback,
1306 int end_writeback)
1307{
1308 int ret;
1309 struct page *pages[16];
1310 unsigned long index = start >> PAGE_CACHE_SHIFT;
1311 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1312 unsigned long nr_pages = end_index - index + 1;
1313 int i;
1314 int clear_bits = 0;
1315
1316 if (clear_unlock)
1317 clear_bits |= EXTENT_LOCKED;
1318 if (clear_dirty)
1319 clear_bits |= EXTENT_DIRTY;
1320
1321 if (clear_delalloc)
1322 clear_bits |= EXTENT_DELALLOC;
1323
1324 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
1325 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
1326 return 0;
1327
1328 while (nr_pages > 0) {
1329 ret = find_get_pages_contig(inode->i_mapping, index,
1330 min_t(unsigned long,
1331 nr_pages, ARRAY_SIZE(pages)), pages);
1332 for (i = 0; i < ret; i++) {
1333 if (pages[i] == locked_page) {
1334 page_cache_release(pages[i]);
1335 continue;
1336 }
1337 if (clear_dirty)
1338 clear_page_dirty_for_io(pages[i]);
1339 if (set_writeback)
1340 set_page_writeback(pages[i]);
1341 if (end_writeback)
1342 end_page_writeback(pages[i]);
1343 if (unlock_pages)
1344 unlock_page(pages[i]);
1345 page_cache_release(pages[i]);
1346 }
1347 nr_pages -= ret;
1348 index += ret;
1349 cond_resched();
1350 }
1351 return 0;
1352}
1353
1354/*
1355 * count the number of bytes in the tree that have a given bit(s)
1356 * set. This can be fairly slow, except for EXTENT_DIRTY which is
1357 * cached. The total number found is returned.
1358 */
1359u64 count_range_bits(struct extent_io_tree *tree,
1360 u64 *start, u64 search_end, u64 max_bytes,
1361 unsigned long bits)
1362{
1363 struct rb_node *node;
1364 struct extent_state *state;
1365 u64 cur_start = *start;
1366 u64 total_bytes = 0;
1367 int found = 0;
1368
1369 if (search_end <= cur_start) {
1370 WARN_ON(1);
1371 return 0;
1372 }
1373
1374 spin_lock(&tree->lock);
1375 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1376 total_bytes = tree->dirty_bytes;
1377 goto out;
1378 }
1379 /*
1380 * this search will find all the extents that end after
1381 * our range starts.
1382 */
1383 node = tree_search(tree, cur_start);
1384 if (!node)
1385 goto out;
1386
1387 while (1) {
1388 state = rb_entry(node, struct extent_state, rb_node);
1389 if (state->start > search_end)
1390 break;
1391 if (state->end >= cur_start && (state->state & bits)) {
1392 total_bytes += min(search_end, state->end) + 1 -
1393 max(cur_start, state->start);
1394 if (total_bytes >= max_bytes)
1395 break;
1396 if (!found) {
1397 *start = state->start;
1398 found = 1;
1399 }
1400 }
1401 node = rb_next(node);
1402 if (!node)
1403 break;
1404 }
1405out:
1406 spin_unlock(&tree->lock);
1407 return total_bytes;
1408}
1409
1410#if 0
1411/*
1412 * helper function to lock both pages and extents in the tree.
1413 * pages must be locked first.
1414 */
1415static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
1416{
1417 unsigned long index = start >> PAGE_CACHE_SHIFT;
1418 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1419 struct page *page;
1420 int err;
1421
1422 while (index <= end_index) {
1423 page = grab_cache_page(tree->mapping, index);
1424 if (!page) {
1425 err = -ENOMEM;
1426 goto failed;
1427 }
1428 if (IS_ERR(page)) {
1429 err = PTR_ERR(page);
1430 goto failed;
1431 }
1432 index++;
1433 }
1434 lock_extent(tree, start, end, GFP_NOFS);
1435 return 0;
1436
1437failed:
1438 /*
1439 * we failed above in getting the page at 'index', so we undo here
1440 * up to but not including the page at 'index'
1441 */
1442 end_index = index;
1443 index = start >> PAGE_CACHE_SHIFT;
1444 while (index < end_index) {
1445 page = find_get_page(tree->mapping, index);
1446 unlock_page(page);
1447 page_cache_release(page);
1448 index++;
1449 }
1450 return err;
1451}
1452
1453/*
1454 * helper function to unlock both pages and extents in the tree.
1455 */
1456static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
1457{
1458 unsigned long index = start >> PAGE_CACHE_SHIFT;
1459 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1460 struct page *page;
1461
1462 while (index <= end_index) {
1463 page = find_get_page(tree->mapping, index);
1464 unlock_page(page);
1465 page_cache_release(page);
1466 index++;
1467 }
1468 unlock_extent(tree, start, end, GFP_NOFS);
1469 return 0;
1470}
1471#endif
1472
1473/*
1474 * set the private field for a given byte offset in the tree. If there isn't
1475 * an extent_state there already, this does nothing.
1476 */
1477int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1478{
1479 struct rb_node *node;
1480 struct extent_state *state;
1481 int ret = 0;
1482
1483 spin_lock(&tree->lock);
1484 /*
1485 * this search will find all the extents that end after
1486 * our range starts.
1487 */
1488 node = tree_search(tree, start);
1489 if (!node) {
1490 ret = -ENOENT;
1491 goto out;
1492 }
1493 state = rb_entry(node, struct extent_state, rb_node);
1494 if (state->start != start) {
1495 ret = -ENOENT;
1496 goto out;
1497 }
1498 state->private = private;
1499out:
1500 spin_unlock(&tree->lock);
1501 return ret;
1502}
1503
1504int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1505{
1506 struct rb_node *node;
1507 struct extent_state *state;
1508 int ret = 0;
1509
1510 spin_lock(&tree->lock);
1511 /*
1512 * this search will find all the extents that end after
1513 * our range starts.
1514 */
1515 node = tree_search(tree, start);
1516 if (!node) {
1517 ret = -ENOENT;
1518 goto out;
1519 }
1520 state = rb_entry(node, struct extent_state, rb_node);
1521 if (state->start != start) {
1522 ret = -ENOENT;
1523 goto out;
1524 }
1525 *private = state->private;
1526out:
1527 spin_unlock(&tree->lock);
1528 return ret;
1529}
1530
1531/*
1532 * searches a range in the state tree for a given mask.
1533 * If 'filled' == 1, this returns 1 only if every extent in the tree
1534 * has the bits set. Otherwise, 1 is returned if any bit in the
1535 * range is found set.
1536 */
1537int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1538 int bits, int filled)
1539{
1540 struct extent_state *state = NULL;
1541 struct rb_node *node;
1542 int bitset = 0;
1543
1544 spin_lock(&tree->lock);
1545 node = tree_search(tree, start);
1546 while (node && start <= end) {
1547 state = rb_entry(node, struct extent_state, rb_node);
1548
1549 if (filled && state->start > start) {
1550 bitset = 0;
1551 break;
1552 }
1553
1554 if (state->start > end)
1555 break;
1556
1557 if (state->state & bits) {
1558 bitset = 1;
1559 if (!filled)
1560 break;
1561 } else if (filled) {
1562 bitset = 0;
1563 break;
1564 }
1565 start = state->end + 1;
1566 if (start > end)
1567 break;
1568 node = rb_next(node);
1569 if (!node) {
1570 if (filled)
1571 bitset = 0;
1572 break;
1573 }
1574 }
1575 spin_unlock(&tree->lock);
1576 return bitset;
1577}
1578
1579/*
1580 * helper function to set a given page up to date if all the
1581 * extents in the tree for that page are up to date
1582 */
1583static int check_page_uptodate(struct extent_io_tree *tree,
1584 struct page *page)
1585{
1586 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1587 u64 end = start + PAGE_CACHE_SIZE - 1;
1588 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1589 SetPageUptodate(page);
1590 return 0;
1591}
1592
1593/*
1594 * helper function to unlock a page if all the extents in the tree
1595 * for that page are unlocked
1596 */
1597static int check_page_locked(struct extent_io_tree *tree,
1598 struct page *page)
1599{
1600 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1601 u64 end = start + PAGE_CACHE_SIZE - 1;
1602 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1603 unlock_page(page);
1604 return 0;
1605}
1606
1607/*
1608 * helper function to end page writeback if all the extents
1609 * in the tree for that page are done with writeback
1610 */
1611static int check_page_writeback(struct extent_io_tree *tree,
1612 struct page *page)
1613{
1614 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1615 u64 end = start + PAGE_CACHE_SIZE - 1;
1616 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1617 end_page_writeback(page);
1618 return 0;
1619}
1620
1621/* lots and lots of room for performance fixes in the end_bio funcs */
1622
1623/*
1624 * after a writepage IO is done, we need to:
1625 * clear the uptodate bits on error
1626 * clear the writeback bits in the extent tree for this IO
1627 * end_page_writeback if the page has no more pending IO
1628 *
1629 * Scheduling is not allowed, so the extent state tree is expected
1630 * to have one and only one object corresponding to this IO.
1631 */
1632static void end_bio_extent_writepage(struct bio *bio, int err)
1633{
1634 int uptodate = err == 0;
1635 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1636 struct extent_io_tree *tree;
1637 u64 start;
1638 u64 end;
1639 int whole_page;
1640 int ret;
1641
1642 do {
1643 struct page *page = bvec->bv_page;
1644 tree = &BTRFS_I(page->mapping->host)->io_tree;
1645
1646 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1647 bvec->bv_offset;
1648 end = start + bvec->bv_len - 1;
1649
1650 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1651 whole_page = 1;
1652 else
1653 whole_page = 0;
1654
1655 if (--bvec >= bio->bi_io_vec)
1656 prefetchw(&bvec->bv_page->flags);
1657 if (tree->ops && tree->ops->writepage_end_io_hook) {
1658 ret = tree->ops->writepage_end_io_hook(page, start,
1659 end, NULL, uptodate);
1660 if (ret)
1661 uptodate = 0;
1662 }
1663
1664 if (!uptodate && tree->ops &&
1665 tree->ops->writepage_io_failed_hook) {
1666 ret = tree->ops->writepage_io_failed_hook(bio, page,
1667 start, end, NULL);
1668 if (ret == 0) {
1669 uptodate = (err == 0);
1670 continue;
1671 }
1672 }
1673
1674 if (!uptodate) {
1675 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1676 ClearPageUptodate(page);
1677 SetPageError(page);
1678 }
1679
1680 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1681
1682 if (whole_page)
1683 end_page_writeback(page);
1684 else
1685 check_page_writeback(tree, page);
1686 } while (bvec >= bio->bi_io_vec);
1687
1688 bio_put(bio);
1689}
1690
1691/*
1692 * after a readpage IO is done, we need to:
1693 * clear the uptodate bits on error
1694 * set the uptodate bits if things worked
1695 * set the page up to date if all extents in the tree are uptodate
1696 * clear the lock bit in the extent tree
1697 * unlock the page if there are no other extents locked for it
1698 *
1699 * Scheduling is not allowed, so the extent state tree is expected
1700 * to have one and only one object corresponding to this IO.
1701 */
1702static void end_bio_extent_readpage(struct bio *bio, int err)
1703{
1704 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1705 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1706 struct extent_io_tree *tree;
1707 u64 start;
1708 u64 end;
1709 int whole_page;
1710 int ret;
1711
1712 if (err)
1713 uptodate = 0;
1714
1715 do {
1716 struct page *page = bvec->bv_page;
1717 tree = &BTRFS_I(page->mapping->host)->io_tree;
1718
1719 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1720 bvec->bv_offset;
1721 end = start + bvec->bv_len - 1;
1722
1723 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1724 whole_page = 1;
1725 else
1726 whole_page = 0;
1727
1728 if (--bvec >= bio->bi_io_vec)
1729 prefetchw(&bvec->bv_page->flags);
1730
1731 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1732 ret = tree->ops->readpage_end_io_hook(page, start, end,
1733 NULL);
1734 if (ret)
1735 uptodate = 0;
1736 }
1737 if (!uptodate && tree->ops &&
1738 tree->ops->readpage_io_failed_hook) {
1739 ret = tree->ops->readpage_io_failed_hook(bio, page,
1740 start, end, NULL);
1741 if (ret == 0) {
1742 uptodate =
1743 test_bit(BIO_UPTODATE, &bio->bi_flags);
1744 if (err)
1745 uptodate = 0;
1746 continue;
1747 }
1748 }
1749
1750 if (uptodate) {
1751 set_extent_uptodate(tree, start, end,
1752 GFP_ATOMIC);
1753 }
1754 unlock_extent(tree, start, end, GFP_ATOMIC);
1755
1756 if (whole_page) {
1757 if (uptodate) {
1758 SetPageUptodate(page);
1759 } else {
1760 ClearPageUptodate(page);
1761 SetPageError(page);
1762 }
1763 unlock_page(page);
1764 } else {
1765 if (uptodate) {
1766 check_page_uptodate(tree, page);
1767 } else {
1768 ClearPageUptodate(page);
1769 SetPageError(page);
1770 }
1771 check_page_locked(tree, page);
1772 }
1773 } while (bvec >= bio->bi_io_vec);
1774
1775 bio_put(bio);
1776}
1777
1778/*
1779 * IO done from prepare_write is pretty simple, we just unlock
1780 * the structs in the extent tree when done, and set the uptodate bits
1781 * as appropriate.
1782 */
1783static void end_bio_extent_preparewrite(struct bio *bio, int err)
1784{
1785 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1786 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1787 struct extent_io_tree *tree;
1788 u64 start;
1789 u64 end;
1790
1791 do {
1792 struct page *page = bvec->bv_page;
1793 tree = &BTRFS_I(page->mapping->host)->io_tree;
1794
1795 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1796 bvec->bv_offset;
1797 end = start + bvec->bv_len - 1;
1798
1799 if (--bvec >= bio->bi_io_vec)
1800 prefetchw(&bvec->bv_page->flags);
1801
1802 if (uptodate) {
1803 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1804 } else {
1805 ClearPageUptodate(page);
1806 SetPageError(page);
1807 }
1808
1809 unlock_extent(tree, start, end, GFP_ATOMIC);
1810
1811 } while (bvec >= bio->bi_io_vec);
1812
1813 bio_put(bio);
1814}
1815
1816static struct bio *
1817extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1818 gfp_t gfp_flags)
1819{
1820 struct bio *bio;
1821
1822 bio = bio_alloc(gfp_flags, nr_vecs);
1823
1824 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
1825 while (!bio && (nr_vecs /= 2))
1826 bio = bio_alloc(gfp_flags, nr_vecs);
1827 }
1828
1829 if (bio) {
1830 bio->bi_size = 0;
1831 bio->bi_bdev = bdev;
1832 bio->bi_sector = first_sector;
1833 }
1834 return bio;
1835}
1836
1837static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1838 unsigned long bio_flags)
1839{
1840 int ret = 0;
1841 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1842 struct page *page = bvec->bv_page;
1843 struct extent_io_tree *tree = bio->bi_private;
1844 u64 start;
1845 u64 end;
1846
1847 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1848 end = start + bvec->bv_len - 1;
1849
1850 bio->bi_private = NULL;
1851
1852 bio_get(bio);
1853
1854 if (tree->ops && tree->ops->submit_bio_hook)
1855 tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1856 mirror_num, bio_flags);
1857 else
1858 submit_bio(rw, bio);
1859 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1860 ret = -EOPNOTSUPP;
1861 bio_put(bio);
1862 return ret;
1863}
1864
1865static int submit_extent_page(int rw, struct extent_io_tree *tree,
1866 struct page *page, sector_t sector,
1867 size_t size, unsigned long offset,
1868 struct block_device *bdev,
1869 struct bio **bio_ret,
1870 unsigned long max_pages,
1871 bio_end_io_t end_io_func,
1872 int mirror_num,
1873 unsigned long prev_bio_flags,
1874 unsigned long bio_flags)
1875{
1876 int ret = 0;
1877 struct bio *bio;
1878 int nr;
1879 int contig = 0;
1880 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
1881 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
1882 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
1883
1884 if (bio_ret && *bio_ret) {
1885 bio = *bio_ret;
1886 if (old_compressed)
1887 contig = bio->bi_sector == sector;
1888 else
1889 contig = bio->bi_sector + (bio->bi_size >> 9) ==
1890 sector;
1891
1892 if (prev_bio_flags != bio_flags || !contig ||
1893 (tree->ops && tree->ops->merge_bio_hook &&
1894 tree->ops->merge_bio_hook(page, offset, page_size, bio,
1895 bio_flags)) ||
1896 bio_add_page(bio, page, page_size, offset) < page_size) {
1897 ret = submit_one_bio(rw, bio, mirror_num,
1898 prev_bio_flags);
1899 bio = NULL;
1900 } else {
1901 return 0;
1902 }
1903 }
1904 if (this_compressed)
1905 nr = BIO_MAX_PAGES;
1906 else
1907 nr = bio_get_nr_vecs(bdev);
1908
1909 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1910
1911 bio_add_page(bio, page, page_size, offset);
1912 bio->bi_end_io = end_io_func;
1913 bio->bi_private = tree;
1914
1915 if (bio_ret)
1916 *bio_ret = bio;
1917 else
1918 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
1919
1920 return ret;
1921}
1922
1923void set_page_extent_mapped(struct page *page)
1924{
1925 if (!PagePrivate(page)) {
1926 SetPagePrivate(page);
1927 page_cache_get(page);
1928 set_page_private(page, EXTENT_PAGE_PRIVATE);
1929 }
1930}
1931
1932static void set_page_extent_head(struct page *page, unsigned long len)
1933{
1934 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1935}
1936
1937/*
1938 * basic readpage implementation. Locked extent state structs are inserted
1939 * into the tree that are removed when the IO is done (by the end_io
1940 * handlers)
1941 */
1942static int __extent_read_full_page(struct extent_io_tree *tree,
1943 struct page *page,
1944 get_extent_t *get_extent,
1945 struct bio **bio, int mirror_num,
1946 unsigned long *bio_flags)
1947{
1948 struct inode *inode = page->mapping->host;
1949 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1950 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1951 u64 end;
1952 u64 cur = start;
1953 u64 extent_offset;
1954 u64 last_byte = i_size_read(inode);
1955 u64 block_start;
1956 u64 cur_end;
1957 sector_t sector;
1958 struct extent_map *em;
1959 struct block_device *bdev;
1960 int ret;
1961 int nr = 0;
1962 size_t page_offset = 0;
1963 size_t iosize;
1964 size_t disk_io_size;
1965 size_t blocksize = inode->i_sb->s_blocksize;
1966 unsigned long this_bio_flag = 0;
1967
1968 set_page_extent_mapped(page);
1969
1970 end = page_end;
1971 lock_extent(tree, start, end, GFP_NOFS);
1972
1973 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
1974 char *userpage;
1975 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
1976
1977 if (zero_offset) {
1978 iosize = PAGE_CACHE_SIZE - zero_offset;
1979 userpage = kmap_atomic(page, KM_USER0);
1980 memset(userpage + zero_offset, 0, iosize);
1981 flush_dcache_page(page);
1982 kunmap_atomic(userpage, KM_USER0);
1983 }
1984 }
1985 while (cur <= end) {
1986 if (cur >= last_byte) {
1987 char *userpage;
1988 iosize = PAGE_CACHE_SIZE - page_offset;
1989 userpage = kmap_atomic(page, KM_USER0);
1990 memset(userpage + page_offset, 0, iosize);
1991 flush_dcache_page(page);
1992 kunmap_atomic(userpage, KM_USER0);
1993 set_extent_uptodate(tree, cur, cur + iosize - 1,
1994 GFP_NOFS);
1995 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1996 break;
1997 }
1998 em = get_extent(inode, page, page_offset, cur,
1999 end - cur + 1, 0);
2000 if (IS_ERR(em) || !em) {
2001 SetPageError(page);
2002 unlock_extent(tree, cur, end, GFP_NOFS);
2003 break;
2004 }
2005 extent_offset = cur - em->start;
2006 BUG_ON(extent_map_end(em) <= cur);
2007 BUG_ON(end < cur);
2008
2009 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2010 this_bio_flag = EXTENT_BIO_COMPRESSED;
2011
2012 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2013 cur_end = min(extent_map_end(em) - 1, end);
2014 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
2015 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2016 disk_io_size = em->block_len;
2017 sector = em->block_start >> 9;
2018 } else {
2019 sector = (em->block_start + extent_offset) >> 9;
2020 disk_io_size = iosize;
2021 }
2022 bdev = em->bdev;
2023 block_start = em->block_start;
2024 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2025 block_start = EXTENT_MAP_HOLE;
2026 free_extent_map(em);
2027 em = NULL;
2028
2029 /* we've found a hole, just zero and go on */
2030 if (block_start == EXTENT_MAP_HOLE) {
2031 char *userpage;
2032 userpage = kmap_atomic(page, KM_USER0);
2033 memset(userpage + page_offset, 0, iosize);
2034 flush_dcache_page(page);
2035 kunmap_atomic(userpage, KM_USER0);
2036
2037 set_extent_uptodate(tree, cur, cur + iosize - 1,
2038 GFP_NOFS);
2039 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2040 cur = cur + iosize;
2041 page_offset += iosize;
2042 continue;
2043 }
2044 /* the get_extent function already copied into the page */
2045 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
2046 check_page_uptodate(tree, page);
2047 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2048 cur = cur + iosize;
2049 page_offset += iosize;
2050 continue;
2051 }
2052 /* we have an inline extent but it didn't get marked up
2053 * to date. Error out
2054 */
2055 if (block_start == EXTENT_MAP_INLINE) {
2056 SetPageError(page);
2057 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2058 cur = cur + iosize;
2059 page_offset += iosize;
2060 continue;
2061 }
2062
2063 ret = 0;
2064 if (tree->ops && tree->ops->readpage_io_hook) {
2065 ret = tree->ops->readpage_io_hook(page, cur,
2066 cur + iosize - 1);
2067 }
2068 if (!ret) {
2069 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2070 pnr -= page->index;
2071 ret = submit_extent_page(READ, tree, page,
2072 sector, disk_io_size, page_offset,
2073 bdev, bio, pnr,
2074 end_bio_extent_readpage, mirror_num,
2075 *bio_flags,
2076 this_bio_flag);
2077 nr++;
2078 *bio_flags = this_bio_flag;
2079 }
2080 if (ret)
2081 SetPageError(page);
2082 cur = cur + iosize;
2083 page_offset += iosize;
2084 }
2085 if (!nr) {
2086 if (!PageError(page))
2087 SetPageUptodate(page);
2088 unlock_page(page);
2089 }
2090 return 0;
2091}
2092
2093int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2094 get_extent_t *get_extent)
2095{
2096 struct bio *bio = NULL;
2097 unsigned long bio_flags = 0;
2098 int ret;
2099
2100 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2101 &bio_flags);
2102 if (bio)
2103 submit_one_bio(READ, bio, 0, bio_flags);
2104 return ret;
2105}
2106
2107/*
2108 * the writepage semantics are similar to regular writepage. extent
2109 * records are inserted to lock ranges in the tree, and as dirty areas
2110 * are found, they are marked writeback. Then the lock bits are removed
2111 * and the end_io handler clears the writeback ranges
2112 */
2113static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2114 void *data)
2115{
2116 struct inode *inode = page->mapping->host;
2117 struct extent_page_data *epd = data;
2118 struct extent_io_tree *tree = epd->tree;
2119 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2120 u64 delalloc_start;
2121 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2122 u64 end;
2123 u64 cur = start;
2124 u64 extent_offset;
2125 u64 last_byte = i_size_read(inode);
2126 u64 block_start;
2127 u64 iosize;
2128 u64 unlock_start;
2129 sector_t sector;
2130 struct extent_map *em;
2131 struct block_device *bdev;
2132 int ret;
2133 int nr = 0;
2134 size_t pg_offset = 0;
2135 size_t blocksize;
2136 loff_t i_size = i_size_read(inode);
2137 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
2138 u64 nr_delalloc;
2139 u64 delalloc_end;
2140 int page_started;
2141 int compressed;
2142 unsigned long nr_written = 0;
2143
2144 WARN_ON(!PageLocked(page));
2145 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2146 if (page->index > end_index ||
2147 (page->index == end_index && !pg_offset)) {
2148 page->mapping->a_ops->invalidatepage(page, 0);
2149 unlock_page(page);
2150 return 0;
2151 }
2152
2153 if (page->index == end_index) {
2154 char *userpage;
2155
2156 userpage = kmap_atomic(page, KM_USER0);
2157 memset(userpage + pg_offset, 0,
2158 PAGE_CACHE_SIZE - pg_offset);
2159 kunmap_atomic(userpage, KM_USER0);
2160 flush_dcache_page(page);
2161 }
2162 pg_offset = 0;
2163
2164 set_page_extent_mapped(page);
2165
2166 delalloc_start = start;
2167 delalloc_end = 0;
2168 page_started = 0;
2169 if (!epd->extent_locked) {
2170 while (delalloc_end < page_end) {
2171 nr_delalloc = find_lock_delalloc_range(inode, tree,
2172 page,
2173 &delalloc_start,
2174 &delalloc_end,
2175 128 * 1024 * 1024);
2176 if (nr_delalloc == 0) {
2177 delalloc_start = delalloc_end + 1;
2178 continue;
2179 }
2180 tree->ops->fill_delalloc(inode, page, delalloc_start,
2181 delalloc_end, &page_started,
2182 &nr_written);
2183 delalloc_start = delalloc_end + 1;
2184 }
2185
2186 /* did the fill delalloc function already unlock and start
2187 * the IO?
2188 */
2189 if (page_started) {
2190 ret = 0;
2191 goto update_nr_written;
2192 }
2193 }
2194 lock_extent(tree, start, page_end, GFP_NOFS);
2195
2196 unlock_start = start;
2197
2198 if (tree->ops && tree->ops->writepage_start_hook) {
2199 ret = tree->ops->writepage_start_hook(page, start,
2200 page_end);
2201 if (ret == -EAGAIN) {
2202 unlock_extent(tree, start, page_end, GFP_NOFS);
2203 redirty_page_for_writepage(wbc, page);
2204 unlock_page(page);
2205 ret = 0;
2206 goto update_nr_written;
2207 }
2208 }
2209
2210 nr_written++;
2211
2212 end = page_end;
2213 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
2214 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
2215
2216 if (last_byte <= start) {
2217 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2218 unlock_extent(tree, start, page_end, GFP_NOFS);
2219 if (tree->ops && tree->ops->writepage_end_io_hook)
2220 tree->ops->writepage_end_io_hook(page, start,
2221 page_end, NULL, 1);
2222 unlock_start = page_end + 1;
2223 goto done;
2224 }
2225
2226 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2227 blocksize = inode->i_sb->s_blocksize;
2228
2229 while (cur <= end) {
2230 if (cur >= last_byte) {
2231 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2232 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2233 if (tree->ops && tree->ops->writepage_end_io_hook)
2234 tree->ops->writepage_end_io_hook(page, cur,
2235 page_end, NULL, 1);
2236 unlock_start = page_end + 1;
2237 break;
2238 }
2239 em = epd->get_extent(inode, page, pg_offset, cur,
2240 end - cur + 1, 1);
2241 if (IS_ERR(em) || !em) {
2242 SetPageError(page);
2243 break;
2244 }
2245
2246 extent_offset = cur - em->start;
2247 BUG_ON(extent_map_end(em) <= cur);
2248 BUG_ON(end < cur);
2249 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2250 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
2251 sector = (em->block_start + extent_offset) >> 9;
2252 bdev = em->bdev;
2253 block_start = em->block_start;
2254 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
2255 free_extent_map(em);
2256 em = NULL;
2257
2258 /*
2259 * compressed and inline extents are written through other
2260 * paths in the FS
2261 */
2262 if (compressed || block_start == EXTENT_MAP_HOLE ||
2263 block_start == EXTENT_MAP_INLINE) {
2264 clear_extent_dirty(tree, cur,
2265 cur + iosize - 1, GFP_NOFS);
2266
2267 unlock_extent(tree, unlock_start, cur + iosize - 1,
2268 GFP_NOFS);
2269
2270 /*
2271 * end_io notification does not happen here for
2272 * compressed extents
2273 */
2274 if (!compressed && tree->ops &&
2275 tree->ops->writepage_end_io_hook)
2276 tree->ops->writepage_end_io_hook(page, cur,
2277 cur + iosize - 1,
2278 NULL, 1);
2279 else if (compressed) {
2280 /* we don't want to end_page_writeback on
2281 * a compressed extent. this happens
2282 * elsewhere
2283 */
2284 nr++;
2285 }
2286
2287 cur += iosize;
2288 pg_offset += iosize;
2289 unlock_start = cur;
2290 continue;
2291 }
2292 /* leave this out until we have a page_mkwrite call */
2293 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2294 EXTENT_DIRTY, 0)) {
2295 cur = cur + iosize;
2296 pg_offset += iosize;
2297 continue;
2298 }
2299
2300 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2301 if (tree->ops && tree->ops->writepage_io_hook) {
2302 ret = tree->ops->writepage_io_hook(page, cur,
2303 cur + iosize - 1);
2304 } else {
2305 ret = 0;
2306 }
2307 if (ret) {
2308 SetPageError(page);
2309 } else {
2310 unsigned long max_nr = end_index + 1;
2311
2312 set_range_writeback(tree, cur, cur + iosize - 1);
2313 if (!PageWriteback(page)) {
2314 printk(KERN_ERR "btrfs warning page %lu not "
2315 "writeback, cur %llu end %llu\n",
2316 page->index, (unsigned long long)cur,
2317 (unsigned long long)end);
2318 }
2319
2320 ret = submit_extent_page(WRITE, tree, page, sector,
2321 iosize, pg_offset, bdev,
2322 &epd->bio, max_nr,
2323 end_bio_extent_writepage,
2324 0, 0, 0);
2325 if (ret)
2326 SetPageError(page);
2327 }
2328 cur = cur + iosize;
2329 pg_offset += iosize;
2330 nr++;
2331 }
2332done:
2333 if (nr == 0) {
2334 /* make sure the mapping tag for page dirty gets cleared */
2335 set_page_writeback(page);
2336 end_page_writeback(page);
2337 }
2338 if (unlock_start <= page_end)
2339 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2340 unlock_page(page);
2341
2342update_nr_written:
2343 wbc->nr_to_write -= nr_written;
2344 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2345 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2346 page->mapping->writeback_index = page->index + nr_written;
2347 return 0;
2348}
2349
2350/**
2351 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
2352 * @mapping: address space structure to write
2353 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2354 * @writepage: function called for each page
2355 * @data: data passed to writepage function
2356 *
2357 * If a page is already under I/O, write_cache_pages() skips it, even
2358 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
2359 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
2360 * and msync() need to guarantee that all the data which was dirty at the time
2361 * the call was made get new I/O started against them. If wbc->sync_mode is
2362 * WB_SYNC_ALL then we were called for data integrity and we must wait for
2363 * existing IO to complete.
2364 */
2365static int extent_write_cache_pages(struct extent_io_tree *tree,
2366 struct address_space *mapping,
2367 struct writeback_control *wbc,
2368 writepage_t writepage, void *data,
2369 void (*flush_fn)(void *))
2370{
2371 struct backing_dev_info *bdi = mapping->backing_dev_info;
2372 int ret = 0;
2373 int done = 0;
2374 struct pagevec pvec;
2375 int nr_pages;
2376 pgoff_t index;
2377 pgoff_t end; /* Inclusive */
2378 int scanned = 0;
2379 int range_whole = 0;
2380
2381 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2382 wbc->encountered_congestion = 1;
2383 return 0;
2384 }
2385
2386 pagevec_init(&pvec, 0);
2387 if (wbc->range_cyclic) {
2388 index = mapping->writeback_index; /* Start from prev offset */
2389 end = -1;
2390 } else {
2391 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2392 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2393 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2394 range_whole = 1;
2395 scanned = 1;
2396 }
2397retry:
2398 while (!done && (index <= end) &&
2399 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
2400 PAGECACHE_TAG_DIRTY, min(end - index,
2401 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
2402 unsigned i;
2403
2404 scanned = 1;
2405 for (i = 0; i < nr_pages; i++) {
2406 struct page *page = pvec.pages[i];
2407
2408 /*
2409 * At this point we hold neither mapping->tree_lock nor
2410 * lock on the page itself: the page may be truncated or
2411 * invalidated (changing page->mapping to NULL), or even
2412 * swizzled back from swapper_space to tmpfs file
2413 * mapping
2414 */
2415 if (tree->ops && tree->ops->write_cache_pages_lock_hook)
2416 tree->ops->write_cache_pages_lock_hook(page);
2417 else
2418 lock_page(page);
2419
2420 if (unlikely(page->mapping != mapping)) {
2421 unlock_page(page);
2422 continue;
2423 }
2424
2425 if (!wbc->range_cyclic && page->index > end) {
2426 done = 1;
2427 unlock_page(page);
2428 continue;
2429 }
2430
2431 if (wbc->sync_mode != WB_SYNC_NONE) {
2432 if (PageWriteback(page))
2433 flush_fn(data);
2434 wait_on_page_writeback(page);
2435 }
2436
2437 if (PageWriteback(page) ||
2438 !clear_page_dirty_for_io(page)) {
2439 unlock_page(page);
2440 continue;
2441 }
2442
2443 ret = (*writepage)(page, wbc, data);
2444
2445 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
2446 unlock_page(page);
2447 ret = 0;
2448 }
2449 if (ret || wbc->nr_to_write <= 0)
2450 done = 1;
2451 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2452 wbc->encountered_congestion = 1;
2453 done = 1;
2454 }
2455 }
2456 pagevec_release(&pvec);
2457 cond_resched();
2458 }
2459 if (!scanned && !done) {
2460 /*
2461 * We hit the last page and there is more work to be done: wrap
2462 * back to the start of the file
2463 */
2464 scanned = 1;
2465 index = 0;
2466 goto retry;
2467 }
2468 return ret;
2469}
2470
2471static noinline void flush_write_bio(void *data)
2472{
2473 struct extent_page_data *epd = data;
2474 if (epd->bio) {
2475 submit_one_bio(WRITE, epd->bio, 0, 0);
2476 epd->bio = NULL;
2477 }
2478}
2479
2480int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2481 get_extent_t *get_extent,
2482 struct writeback_control *wbc)
2483{
2484 int ret;
2485 struct address_space *mapping = page->mapping;
2486 struct extent_page_data epd = {
2487 .bio = NULL,
2488 .tree = tree,
2489 .get_extent = get_extent,
2490 .extent_locked = 0,
2491 };
2492 struct writeback_control wbc_writepages = {
2493 .bdi = wbc->bdi,
2494 .sync_mode = WB_SYNC_NONE,
2495 .older_than_this = NULL,
2496 .nr_to_write = 64,
2497 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2498 .range_end = (loff_t)-1,
2499 };
2500
2501
2502 ret = __extent_writepage(page, wbc, &epd);
2503
2504 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2505 __extent_writepage, &epd, flush_write_bio);
2506 if (epd.bio)
2507 submit_one_bio(WRITE, epd.bio, 0, 0);
2508 return ret;
2509}
2510
2511int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2512 u64 start, u64 end, get_extent_t *get_extent,
2513 int mode)
2514{
2515 int ret = 0;
2516 struct address_space *mapping = inode->i_mapping;
2517 struct page *page;
2518 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
2519 PAGE_CACHE_SHIFT;
2520
2521 struct extent_page_data epd = {
2522 .bio = NULL,
2523 .tree = tree,
2524 .get_extent = get_extent,
2525 .extent_locked = 1,
2526 };
2527 struct writeback_control wbc_writepages = {
2528 .bdi = inode->i_mapping->backing_dev_info,
2529 .sync_mode = mode,
2530 .older_than_this = NULL,
2531 .nr_to_write = nr_pages * 2,
2532 .range_start = start,
2533 .range_end = end + 1,
2534 };
2535
2536 while (start <= end) {
2537 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
2538 if (clear_page_dirty_for_io(page))
2539 ret = __extent_writepage(page, &wbc_writepages, &epd);
2540 else {
2541 if (tree->ops && tree->ops->writepage_end_io_hook)
2542 tree->ops->writepage_end_io_hook(page, start,
2543 start + PAGE_CACHE_SIZE - 1,
2544 NULL, 1);
2545 unlock_page(page);
2546 }
2547 page_cache_release(page);
2548 start += PAGE_CACHE_SIZE;
2549 }
2550
2551 if (epd.bio)
2552 submit_one_bio(WRITE, epd.bio, 0, 0);
2553 return ret;
2554}
2555
2556int extent_writepages(struct extent_io_tree *tree,
2557 struct address_space *mapping,
2558 get_extent_t *get_extent,
2559 struct writeback_control *wbc)
2560{
2561 int ret = 0;
2562 struct extent_page_data epd = {
2563 .bio = NULL,
2564 .tree = tree,
2565 .get_extent = get_extent,
2566 .extent_locked = 0,
2567 };
2568
2569 ret = extent_write_cache_pages(tree, mapping, wbc,
2570 __extent_writepage, &epd,
2571 flush_write_bio);
2572 if (epd.bio)
2573 submit_one_bio(WRITE, epd.bio, 0, 0);
2574 return ret;
2575}
2576
2577int extent_readpages(struct extent_io_tree *tree,
2578 struct address_space *mapping,
2579 struct list_head *pages, unsigned nr_pages,
2580 get_extent_t get_extent)
2581{
2582 struct bio *bio = NULL;
2583 unsigned page_idx;
2584 struct pagevec pvec;
2585 unsigned long bio_flags = 0;
2586
2587 pagevec_init(&pvec, 0);
2588 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2589 struct page *page = list_entry(pages->prev, struct page, lru);
2590
2591 prefetchw(&page->flags);
2592 list_del(&page->lru);
2593 /*
2594 * what we want to do here is call add_to_page_cache_lru,
2595 * but that isn't exported, so we reproduce it here
2596 */
2597 if (!add_to_page_cache(page, mapping,
2598 page->index, GFP_KERNEL)) {
2599
2600 /* open coding of lru_cache_add, also not exported */
2601 page_cache_get(page);
2602 if (!pagevec_add(&pvec, page))
2603 __pagevec_lru_add_file(&pvec);
2604 __extent_read_full_page(tree, page, get_extent,
2605 &bio, 0, &bio_flags);
2606 }
2607 page_cache_release(page);
2608 }
2609 if (pagevec_count(&pvec))
2610 __pagevec_lru_add_file(&pvec);
2611 BUG_ON(!list_empty(pages));
2612 if (bio)
2613 submit_one_bio(READ, bio, 0, bio_flags);
2614 return 0;
2615}
2616
2617/*
2618 * basic invalidatepage code, this waits on any locked or writeback
2619 * ranges corresponding to the page, and then deletes any extent state
2620 * records from the tree
2621 */
2622int extent_invalidatepage(struct extent_io_tree *tree,
2623 struct page *page, unsigned long offset)
2624{
2625 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2626 u64 end = start + PAGE_CACHE_SIZE - 1;
2627 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
2628
2629 start += (offset + blocksize - 1) & ~(blocksize - 1);
2630 if (start > end)
2631 return 0;
2632
2633 lock_extent(tree, start, end, GFP_NOFS);
2634 wait_on_extent_writeback(tree, start, end);
2635 clear_extent_bit(tree, start, end,
2636 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2637 1, 1, GFP_NOFS);
2638 return 0;
2639}
2640
2641/*
2642 * simple commit_write call, set_range_dirty is used to mark both
2643 * the pages and the extent records as dirty
2644 */
2645int extent_commit_write(struct extent_io_tree *tree,
2646 struct inode *inode, struct page *page,
2647 unsigned from, unsigned to)
2648{
2649 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2650
2651 set_page_extent_mapped(page);
2652 set_page_dirty(page);
2653
2654 if (pos > inode->i_size) {
2655 i_size_write(inode, pos);
2656 mark_inode_dirty(inode);
2657 }
2658 return 0;
2659}
2660
2661int extent_prepare_write(struct extent_io_tree *tree,
2662 struct inode *inode, struct page *page,
2663 unsigned from, unsigned to, get_extent_t *get_extent)
2664{
2665 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2666 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2667 u64 block_start;
2668 u64 orig_block_start;
2669 u64 block_end;
2670 u64 cur_end;
2671 struct extent_map *em;
2672 unsigned blocksize = 1 << inode->i_blkbits;
2673 size_t page_offset = 0;
2674 size_t block_off_start;
2675 size_t block_off_end;
2676 int err = 0;
2677 int iocount = 0;
2678 int ret = 0;
2679 int isnew;
2680
2681 set_page_extent_mapped(page);
2682
2683 block_start = (page_start + from) & ~((u64)blocksize - 1);
2684 block_end = (page_start + to - 1) | (blocksize - 1);
2685 orig_block_start = block_start;
2686
2687 lock_extent(tree, page_start, page_end, GFP_NOFS);
2688 while (block_start <= block_end) {
2689 em = get_extent(inode, page, page_offset, block_start,
2690 block_end - block_start + 1, 1);
2691 if (IS_ERR(em) || !em)
2692 goto err;
2693
2694 cur_end = min(block_end, extent_map_end(em) - 1);
2695 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2696 block_off_end = block_off_start + blocksize;
2697 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2698
2699 if (!PageUptodate(page) && isnew &&
2700 (block_off_end > to || block_off_start < from)) {
2701 void *kaddr;
2702
2703 kaddr = kmap_atomic(page, KM_USER0);
2704 if (block_off_end > to)
2705 memset(kaddr + to, 0, block_off_end - to);
2706 if (block_off_start < from)
2707 memset(kaddr + block_off_start, 0,
2708 from - block_off_start);
2709 flush_dcache_page(page);
2710 kunmap_atomic(kaddr, KM_USER0);
2711 }
2712 if ((em->block_start != EXTENT_MAP_HOLE &&
2713 em->block_start != EXTENT_MAP_INLINE) &&
2714 !isnew && !PageUptodate(page) &&
2715 (block_off_end > to || block_off_start < from) &&
2716 !test_range_bit(tree, block_start, cur_end,
2717 EXTENT_UPTODATE, 1)) {
2718 u64 sector;
2719 u64 extent_offset = block_start - em->start;
2720 size_t iosize;
2721 sector = (em->block_start + extent_offset) >> 9;
2722 iosize = (cur_end - block_start + blocksize) &
2723 ~((u64)blocksize - 1);
2724 /*
2725 * we've already got the extent locked, but we
2726 * need to split the state such that our end_bio
2727 * handler can clear the lock.
2728 */
2729 set_extent_bit(tree, block_start,
2730 block_start + iosize - 1,
2731 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
2732 ret = submit_extent_page(READ, tree, page,
2733 sector, iosize, page_offset, em->bdev,
2734 NULL, 1,
2735 end_bio_extent_preparewrite, 0,
2736 0, 0);
2737 iocount++;
2738 block_start = block_start + iosize;
2739 } else {
2740 set_extent_uptodate(tree, block_start, cur_end,
2741 GFP_NOFS);
2742 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
2743 block_start = cur_end + 1;
2744 }
2745 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2746 free_extent_map(em);
2747 }
2748 if (iocount) {
2749 wait_extent_bit(tree, orig_block_start,
2750 block_end, EXTENT_LOCKED);
2751 }
2752 check_page_uptodate(tree, page);
2753err:
2754 /* FIXME, zero out newly allocated blocks on error */
2755 return err;
2756}
2757
2758/*
2759 * a helper for releasepage, this tests for areas of the page that
2760 * are locked or under IO and drops the related state bits if it is safe
2761 * to drop the page.
2762 */
2763int try_release_extent_state(struct extent_map_tree *map,
2764 struct extent_io_tree *tree, struct page *page,
2765 gfp_t mask)
2766{
2767 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2768 u64 end = start + PAGE_CACHE_SIZE - 1;
2769 int ret = 1;
2770
2771 if (test_range_bit(tree, start, end,
2772 EXTENT_IOBITS | EXTENT_ORDERED, 0))
2773 ret = 0;
2774 else {
2775 if ((mask & GFP_NOFS) == GFP_NOFS)
2776 mask = GFP_NOFS;
2777 clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
2778 1, 1, mask);
2779 }
2780 return ret;
2781}
2782
2783/*
2784 * a helper for releasepage. As long as there are no locked extents
2785 * in the range corresponding to the page, both state records and extent
2786 * map records are removed
2787 */
2788int try_release_extent_mapping(struct extent_map_tree *map,
2789 struct extent_io_tree *tree, struct page *page,
2790 gfp_t mask)
2791{
2792 struct extent_map *em;
2793 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2794 u64 end = start + PAGE_CACHE_SIZE - 1;
2795
2796 if ((mask & __GFP_WAIT) &&
2797 page->mapping->host->i_size > 16 * 1024 * 1024) {
2798 u64 len;
2799 while (start <= end) {
2800 len = end - start + 1;
2801 spin_lock(&map->lock);
2802 em = lookup_extent_mapping(map, start, len);
2803 if (!em || IS_ERR(em)) {
2804 spin_unlock(&map->lock);
2805 break;
2806 }
2807 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2808 em->start != start) {
2809 spin_unlock(&map->lock);
2810 free_extent_map(em);
2811 break;
2812 }
2813 if (!test_range_bit(tree, em->start,
2814 extent_map_end(em) - 1,
2815 EXTENT_LOCKED | EXTENT_WRITEBACK |
2816 EXTENT_ORDERED,
2817 0)) {
2818 remove_extent_mapping(map, em);
2819 /* once for the rb tree */
2820 free_extent_map(em);
2821 }
2822 start = extent_map_end(em);
2823 spin_unlock(&map->lock);
2824
2825 /* once for us */
2826 free_extent_map(em);
2827 }
2828 }
2829 return try_release_extent_state(map, tree, page, mask);
2830}
2831
2832sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2833 get_extent_t *get_extent)
2834{
2835 struct inode *inode = mapping->host;
2836 u64 start = iblock << inode->i_blkbits;
2837 sector_t sector = 0;
2838 size_t blksize = (1 << inode->i_blkbits);
2839 struct extent_map *em;
2840
2841 lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2842 GFP_NOFS);
2843 em = get_extent(inode, NULL, 0, start, blksize, 0);
2844 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2845 GFP_NOFS);
2846 if (!em || IS_ERR(em))
2847 return 0;
2848
2849 if (em->block_start > EXTENT_MAP_LAST_BYTE)
2850 goto out;
2851
2852 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
2853out:
2854 free_extent_map(em);
2855 return sector;
2856}
2857
2858static inline struct page *extent_buffer_page(struct extent_buffer *eb,
2859 unsigned long i)
2860{
2861 struct page *p;
2862 struct address_space *mapping;
2863
2864 if (i == 0)
2865 return eb->first_page;
2866 i += eb->start >> PAGE_CACHE_SHIFT;
2867 mapping = eb->first_page->mapping;
2868 if (!mapping)
2869 return NULL;
2870
2871 /*
2872 * extent_buffer_page is only called after pinning the page
2873 * by increasing the reference count. So we know the page must
2874 * be in the radix tree.
2875 */
2876 rcu_read_lock();
2877 p = radix_tree_lookup(&mapping->page_tree, i);
2878 rcu_read_unlock();
2879
2880 return p;
2881}
2882
2883static inline unsigned long num_extent_pages(u64 start, u64 len)
2884{
2885 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
2886 (start >> PAGE_CACHE_SHIFT);
2887}
2888
2889static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2890 u64 start,
2891 unsigned long len,
2892 gfp_t mask)
2893{
2894 struct extent_buffer *eb = NULL;
2895#ifdef LEAK_DEBUG
2896 unsigned long flags;
2897#endif
2898
2899 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
2900 eb->start = start;
2901 eb->len = len;
2902 mutex_init(&eb->mutex);
2903#ifdef LEAK_DEBUG
2904 spin_lock_irqsave(&leak_lock, flags);
2905 list_add(&eb->leak_list, &buffers);
2906 spin_unlock_irqrestore(&leak_lock, flags);
2907#endif
2908 atomic_set(&eb->refs, 1);
2909
2910 return eb;
2911}
2912
2913static void __free_extent_buffer(struct extent_buffer *eb)
2914{
2915#ifdef LEAK_DEBUG
2916 unsigned long flags;
2917 spin_lock_irqsave(&leak_lock, flags);
2918 list_del(&eb->leak_list);
2919 spin_unlock_irqrestore(&leak_lock, flags);
2920#endif
2921 kmem_cache_free(extent_buffer_cache, eb);
2922}
2923
2924struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
2925 u64 start, unsigned long len,
2926 struct page *page0,
2927 gfp_t mask)
2928{
2929 unsigned long num_pages = num_extent_pages(start, len);
2930 unsigned long i;
2931 unsigned long index = start >> PAGE_CACHE_SHIFT;
2932 struct extent_buffer *eb;
2933 struct extent_buffer *exists = NULL;
2934 struct page *p;
2935 struct address_space *mapping = tree->mapping;
2936 int uptodate = 1;
2937
2938 spin_lock(&tree->buffer_lock);
2939 eb = buffer_search(tree, start);
2940 if (eb) {
2941 atomic_inc(&eb->refs);
2942 spin_unlock(&tree->buffer_lock);
2943 mark_page_accessed(eb->first_page);
2944 return eb;
2945 }
2946 spin_unlock(&tree->buffer_lock);
2947
2948 eb = __alloc_extent_buffer(tree, start, len, mask);
2949 if (!eb)
2950 return NULL;
2951
2952 if (page0) {
2953 eb->first_page = page0;
2954 i = 1;
2955 index++;
2956 page_cache_get(page0);
2957 mark_page_accessed(page0);
2958 set_page_extent_mapped(page0);
2959 set_page_extent_head(page0, len);
2960 uptodate = PageUptodate(page0);
2961 } else {
2962 i = 0;
2963 }
2964 for (; i < num_pages; i++, index++) {
2965 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
2966 if (!p) {
2967 WARN_ON(1);
2968 goto free_eb;
2969 }
2970 set_page_extent_mapped(p);
2971 mark_page_accessed(p);
2972 if (i == 0) {
2973 eb->first_page = p;
2974 set_page_extent_head(p, len);
2975 } else {
2976 set_page_private(p, EXTENT_PAGE_PRIVATE);
2977 }
2978 if (!PageUptodate(p))
2979 uptodate = 0;
2980 unlock_page(p);
2981 }
2982 if (uptodate)
2983 eb->flags |= EXTENT_UPTODATE;
2984 eb->flags |= EXTENT_BUFFER_FILLED;
2985
2986 spin_lock(&tree->buffer_lock);
2987 exists = buffer_tree_insert(tree, start, &eb->rb_node);
2988 if (exists) {
2989 /* add one reference for the caller */
2990 atomic_inc(&exists->refs);
2991 spin_unlock(&tree->buffer_lock);
2992 goto free_eb;
2993 }
2994 spin_unlock(&tree->buffer_lock);
2995
2996 /* add one reference for the tree */
2997 atomic_inc(&eb->refs);
2998 return eb;
2999
3000free_eb:
3001 if (!atomic_dec_and_test(&eb->refs))
3002 return exists;
3003 for (index = 1; index < i; index++)
3004 page_cache_release(extent_buffer_page(eb, index));
3005 page_cache_release(extent_buffer_page(eb, 0));
3006 __free_extent_buffer(eb);
3007 return exists;
3008}
3009
3010struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3011 u64 start, unsigned long len,
3012 gfp_t mask)
3013{
3014 struct extent_buffer *eb;
3015
3016 spin_lock(&tree->buffer_lock);
3017 eb = buffer_search(tree, start);
3018 if (eb)
3019 atomic_inc(&eb->refs);
3020 spin_unlock(&tree->buffer_lock);
3021
3022 if (eb)
3023 mark_page_accessed(eb->first_page);
3024
3025 return eb;
3026}
3027
3028void free_extent_buffer(struct extent_buffer *eb)
3029{
3030 if (!eb)
3031 return;
3032
3033 if (!atomic_dec_and_test(&eb->refs))
3034 return;
3035
3036 WARN_ON(1);
3037}
3038
3039int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3040 struct extent_buffer *eb)
3041{
3042 int set;
3043 unsigned long i;
3044 unsigned long num_pages;
3045 struct page *page;
3046
3047 u64 start = eb->start;
3048 u64 end = start + eb->len - 1;
3049
3050 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
3051 num_pages = num_extent_pages(eb->start, eb->len);
3052
3053 for (i = 0; i < num_pages; i++) {
3054 page = extent_buffer_page(eb, i);
3055 if (!set && !PageDirty(page))
3056 continue;
3057
3058 lock_page(page);
3059 if (i == 0)
3060 set_page_extent_head(page, eb->len);
3061 else
3062 set_page_private(page, EXTENT_PAGE_PRIVATE);
3063
3064 /*
3065 * if we're on the last page or the first page and the
3066 * block isn't aligned on a page boundary, do extra checks
3067 * to make sure we don't clean page that is partially dirty
3068 */
3069 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3070 ((i == num_pages - 1) &&
3071 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3072 start = (u64)page->index << PAGE_CACHE_SHIFT;
3073 end = start + PAGE_CACHE_SIZE - 1;
3074 if (test_range_bit(tree, start, end,
3075 EXTENT_DIRTY, 0)) {
3076 unlock_page(page);
3077 continue;
3078 }
3079 }
3080 clear_page_dirty_for_io(page);
3081 spin_lock_irq(&page->mapping->tree_lock);
3082 if (!PageDirty(page)) {
3083 radix_tree_tag_clear(&page->mapping->page_tree,
3084 page_index(page),
3085 PAGECACHE_TAG_DIRTY);
3086 }
3087 spin_unlock_irq(&page->mapping->tree_lock);
3088 unlock_page(page);
3089 }
3090 return 0;
3091}
3092
3093int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
3094 struct extent_buffer *eb)
3095{
3096 return wait_on_extent_writeback(tree, eb->start,
3097 eb->start + eb->len - 1);
3098}
3099
3100int set_extent_buffer_dirty(struct extent_io_tree *tree,
3101 struct extent_buffer *eb)
3102{
3103 unsigned long i;
3104 unsigned long num_pages;
3105
3106 num_pages = num_extent_pages(eb->start, eb->len);
3107 for (i = 0; i < num_pages; i++) {
3108 struct page *page = extent_buffer_page(eb, i);
3109 /* writepage may need to do something special for the
3110 * first page, we have to make sure page->private is
3111 * properly set. releasepage may drop page->private
3112 * on us if the page isn't already dirty.
3113 */
3114 lock_page(page);
3115 if (i == 0) {
3116 set_page_extent_head(page, eb->len);
3117 } else if (PagePrivate(page) &&
3118 page->private != EXTENT_PAGE_PRIVATE) {
3119 set_page_extent_mapped(page);
3120 }
3121 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
3122 set_extent_dirty(tree, page_offset(page),
3123 page_offset(page) + PAGE_CACHE_SIZE - 1,
3124 GFP_NOFS);
3125 unlock_page(page);
3126 }
3127 return 0;
3128}
3129
3130int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3131 struct extent_buffer *eb)
3132{
3133 unsigned long i;
3134 struct page *page;
3135 unsigned long num_pages;
3136
3137 num_pages = num_extent_pages(eb->start, eb->len);
3138 eb->flags &= ~EXTENT_UPTODATE;
3139
3140 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3141 GFP_NOFS);
3142 for (i = 0; i < num_pages; i++) {
3143 page = extent_buffer_page(eb, i);
3144 if (page)
3145 ClearPageUptodate(page);
3146 }
3147 return 0;
3148}
3149
3150int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3151 struct extent_buffer *eb)
3152{
3153 unsigned long i;
3154 struct page *page;
3155 unsigned long num_pages;
3156
3157 num_pages = num_extent_pages(eb->start, eb->len);
3158
3159 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3160 GFP_NOFS);
3161 for (i = 0; i < num_pages; i++) {
3162 page = extent_buffer_page(eb, i);
3163 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3164 ((i == num_pages - 1) &&
3165 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3166 check_page_uptodate(tree, page);
3167 continue;
3168 }
3169 SetPageUptodate(page);
3170 }
3171 return 0;
3172}
3173
3174int extent_range_uptodate(struct extent_io_tree *tree,
3175 u64 start, u64 end)
3176{
3177 struct page *page;
3178 int ret;
3179 int pg_uptodate = 1;
3180 int uptodate;
3181 unsigned long index;
3182
3183 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
3184 if (ret)
3185 return 1;
3186 while (start <= end) {
3187 index = start >> PAGE_CACHE_SHIFT;
3188 page = find_get_page(tree->mapping, index);
3189 uptodate = PageUptodate(page);
3190 page_cache_release(page);
3191 if (!uptodate) {
3192 pg_uptodate = 0;
3193 break;
3194 }
3195 start += PAGE_CACHE_SIZE;
3196 }
3197 return pg_uptodate;
3198}
3199
3200int extent_buffer_uptodate(struct extent_io_tree *tree,
3201 struct extent_buffer *eb)
3202{
3203 int ret = 0;
3204 unsigned long num_pages;
3205 unsigned long i;
3206 struct page *page;
3207 int pg_uptodate = 1;
3208
3209 if (eb->flags & EXTENT_UPTODATE)
3210 return 1;
3211
3212 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3213 EXTENT_UPTODATE, 1);
3214 if (ret)
3215 return ret;
3216
3217 num_pages = num_extent_pages(eb->start, eb->len);
3218 for (i = 0; i < num_pages; i++) {
3219 page = extent_buffer_page(eb, i);
3220 if (!PageUptodate(page)) {
3221 pg_uptodate = 0;
3222 break;
3223 }
3224 }
3225 return pg_uptodate;
3226}
3227
3228int read_extent_buffer_pages(struct extent_io_tree *tree,
3229 struct extent_buffer *eb,
3230 u64 start, int wait,
3231 get_extent_t *get_extent, int mirror_num)
3232{
3233 unsigned long i;
3234 unsigned long start_i;
3235 struct page *page;
3236 int err;
3237 int ret = 0;
3238 int locked_pages = 0;
3239 int all_uptodate = 1;
3240 int inc_all_pages = 0;
3241 unsigned long num_pages;
3242 struct bio *bio = NULL;
3243 unsigned long bio_flags = 0;
3244
3245 if (eb->flags & EXTENT_UPTODATE)
3246 return 0;
3247
3248 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3249 EXTENT_UPTODATE, 1)) {
3250 return 0;
3251 }
3252
3253 if (start) {
3254 WARN_ON(start < eb->start);
3255 start_i = (start >> PAGE_CACHE_SHIFT) -
3256 (eb->start >> PAGE_CACHE_SHIFT);
3257 } else {
3258 start_i = 0;
3259 }
3260
3261 num_pages = num_extent_pages(eb->start, eb->len);
3262 for (i = start_i; i < num_pages; i++) {
3263 page = extent_buffer_page(eb, i);
3264 if (!wait) {
3265 if (!trylock_page(page))
3266 goto unlock_exit;
3267 } else {
3268 lock_page(page);
3269 }
3270 locked_pages++;
3271 if (!PageUptodate(page))
3272 all_uptodate = 0;
3273 }
3274 if (all_uptodate) {
3275 if (start_i == 0)
3276 eb->flags |= EXTENT_UPTODATE;
3277 goto unlock_exit;
3278 }
3279
3280 for (i = start_i; i < num_pages; i++) {
3281 page = extent_buffer_page(eb, i);
3282 if (inc_all_pages)
3283 page_cache_get(page);
3284 if (!PageUptodate(page)) {
3285 if (start_i == 0)
3286 inc_all_pages = 1;
3287 ClearPageError(page);
3288 err = __extent_read_full_page(tree, page,
3289 get_extent, &bio,
3290 mirror_num, &bio_flags);
3291 if (err)
3292 ret = err;
3293 } else {
3294 unlock_page(page);
3295 }
3296 }
3297
3298 if (bio)
3299 submit_one_bio(READ, bio, mirror_num, bio_flags);
3300
3301 if (ret || !wait)
3302 return ret;
3303
3304 for (i = start_i; i < num_pages; i++) {
3305 page = extent_buffer_page(eb, i);
3306 wait_on_page_locked(page);
3307 if (!PageUptodate(page))
3308 ret = -EIO;
3309 }
3310
3311 if (!ret)
3312 eb->flags |= EXTENT_UPTODATE;
3313 return ret;
3314
3315unlock_exit:
3316 i = start_i;
3317 while (locked_pages > 0) {
3318 page = extent_buffer_page(eb, i);
3319 i++;
3320 unlock_page(page);
3321 locked_pages--;
3322 }
3323 return ret;
3324}
3325
3326void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3327 unsigned long start,
3328 unsigned long len)
3329{
3330 size_t cur;
3331 size_t offset;
3332 struct page *page;
3333 char *kaddr;
3334 char *dst = (char *)dstv;
3335 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3336 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3337
3338 WARN_ON(start > eb->len);
3339 WARN_ON(start + len > eb->start + eb->len);
3340
3341 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3342
3343 while (len > 0) {
3344 page = extent_buffer_page(eb, i);
3345
3346 cur = min(len, (PAGE_CACHE_SIZE - offset));
3347 kaddr = kmap_atomic(page, KM_USER1);
3348 memcpy(dst, kaddr + offset, cur);
3349 kunmap_atomic(kaddr, KM_USER1);
3350
3351 dst += cur;
3352 len -= cur;
3353 offset = 0;
3354 i++;
3355 }
3356}
3357
3358int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3359 unsigned long min_len, char **token, char **map,
3360 unsigned long *map_start,
3361 unsigned long *map_len, int km)
3362{
3363 size_t offset = start & (PAGE_CACHE_SIZE - 1);
3364 char *kaddr;
3365 struct page *p;
3366 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3367 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3368 unsigned long end_i = (start_offset + start + min_len - 1) >>
3369 PAGE_CACHE_SHIFT;
3370
3371 if (i != end_i)
3372 return -EINVAL;
3373
3374 if (i == 0) {
3375 offset = start_offset;
3376 *map_start = 0;
3377 } else {
3378 offset = 0;
3379 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
3380 }
3381
3382 if (start + min_len > eb->len) {
3383 printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
3384 "wanted %lu %lu\n", (unsigned long long)eb->start,
3385 eb->len, start, min_len);
3386 WARN_ON(1);
3387 }
3388
3389 p = extent_buffer_page(eb, i);
3390 kaddr = kmap_atomic(p, km);
3391 *token = kaddr;
3392 *map = kaddr + offset;
3393 *map_len = PAGE_CACHE_SIZE - offset;
3394 return 0;
3395}
3396
3397int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3398 unsigned long min_len,
3399 char **token, char **map,
3400 unsigned long *map_start,
3401 unsigned long *map_len, int km)
3402{
3403 int err;
3404 int save = 0;
3405 if (eb->map_token) {
3406 unmap_extent_buffer(eb, eb->map_token, km);
3407 eb->map_token = NULL;
3408 save = 1;
3409 WARN_ON(!mutex_is_locked(&eb->mutex));
3410 }
3411 err = map_private_extent_buffer(eb, start, min_len, token, map,
3412 map_start, map_len, km);
3413 if (!err && save) {
3414 eb->map_token = *token;
3415 eb->kaddr = *map;
3416 eb->map_start = *map_start;
3417 eb->map_len = *map_len;
3418 }
3419 return err;
3420}
3421
3422void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
3423{
3424 kunmap_atomic(token, km);
3425}
3426
3427int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3428 unsigned long start,
3429 unsigned long len)
3430{
3431 size_t cur;
3432 size_t offset;
3433 struct page *page;
3434 char *kaddr;
3435 char *ptr = (char *)ptrv;
3436 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3437 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3438 int ret = 0;
3439
3440 WARN_ON(start > eb->len);
3441 WARN_ON(start + len > eb->start + eb->len);
3442
3443 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3444
3445 while (len > 0) {
3446 page = extent_buffer_page(eb, i);
3447
3448 cur = min(len, (PAGE_CACHE_SIZE - offset));
3449
3450 kaddr = kmap_atomic(page, KM_USER0);
3451 ret = memcmp(ptr, kaddr + offset, cur);
3452 kunmap_atomic(kaddr, KM_USER0);
3453 if (ret)
3454 break;
3455
3456 ptr += cur;
3457 len -= cur;
3458 offset = 0;
3459 i++;
3460 }
3461 return ret;
3462}
3463
3464void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
3465 unsigned long start, unsigned long len)
3466{
3467 size_t cur;
3468 size_t offset;
3469 struct page *page;
3470 char *kaddr;
3471 char *src = (char *)srcv;
3472 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3473 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3474
3475 WARN_ON(start > eb->len);
3476 WARN_ON(start + len > eb->start + eb->len);
3477
3478 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3479
3480 while (len > 0) {
3481 page = extent_buffer_page(eb, i);
3482 WARN_ON(!PageUptodate(page));
3483
3484 cur = min(len, PAGE_CACHE_SIZE - offset);
3485 kaddr = kmap_atomic(page, KM_USER1);
3486 memcpy(kaddr + offset, src, cur);
3487 kunmap_atomic(kaddr, KM_USER1);
3488
3489 src += cur;
3490 len -= cur;
3491 offset = 0;
3492 i++;
3493 }
3494}
3495
3496void memset_extent_buffer(struct extent_buffer *eb, char c,
3497 unsigned long start, unsigned long len)
3498{
3499 size_t cur;
3500 size_t offset;
3501 struct page *page;
3502 char *kaddr;
3503 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3504 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3505
3506 WARN_ON(start > eb->len);
3507 WARN_ON(start + len > eb->start + eb->len);
3508
3509 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3510
3511 while (len > 0) {
3512 page = extent_buffer_page(eb, i);
3513 WARN_ON(!PageUptodate(page));
3514
3515 cur = min(len, PAGE_CACHE_SIZE - offset);
3516 kaddr = kmap_atomic(page, KM_USER0);
3517 memset(kaddr + offset, c, cur);
3518 kunmap_atomic(kaddr, KM_USER0);
3519
3520 len -= cur;
3521 offset = 0;
3522 i++;
3523 }
3524}
3525
3526void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
3527 unsigned long dst_offset, unsigned long src_offset,
3528 unsigned long len)
3529{
3530 u64 dst_len = dst->len;
3531 size_t cur;
3532 size_t offset;
3533 struct page *page;
3534 char *kaddr;
3535 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3536 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3537
3538 WARN_ON(src->len != dst_len);
3539
3540 offset = (start_offset + dst_offset) &
3541 ((unsigned long)PAGE_CACHE_SIZE - 1);
3542
3543 while (len > 0) {
3544 page = extent_buffer_page(dst, i);
3545 WARN_ON(!PageUptodate(page));
3546
3547 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
3548
3549 kaddr = kmap_atomic(page, KM_USER0);
3550 read_extent_buffer(src, kaddr + offset, src_offset, cur);
3551 kunmap_atomic(kaddr, KM_USER0);
3552
3553 src_offset += cur;
3554 len -= cur;
3555 offset = 0;
3556 i++;
3557 }
3558}
3559
3560static void move_pages(struct page *dst_page, struct page *src_page,
3561 unsigned long dst_off, unsigned long src_off,
3562 unsigned long len)
3563{
3564 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3565 if (dst_page == src_page) {
3566 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
3567 } else {
3568 char *src_kaddr = kmap_atomic(src_page, KM_USER1);
3569 char *p = dst_kaddr + dst_off + len;
3570 char *s = src_kaddr + src_off + len;
3571
3572 while (len--)
3573 *--p = *--s;
3574
3575 kunmap_atomic(src_kaddr, KM_USER1);
3576 }
3577 kunmap_atomic(dst_kaddr, KM_USER0);
3578}
3579
3580static void copy_pages(struct page *dst_page, struct page *src_page,
3581 unsigned long dst_off, unsigned long src_off,
3582 unsigned long len)
3583{
3584 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3585 char *src_kaddr;
3586
3587 if (dst_page != src_page)
3588 src_kaddr = kmap_atomic(src_page, KM_USER1);
3589 else
3590 src_kaddr = dst_kaddr;
3591
3592 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3593 kunmap_atomic(dst_kaddr, KM_USER0);
3594 if (dst_page != src_page)
3595 kunmap_atomic(src_kaddr, KM_USER1);
3596}
3597
3598void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3599 unsigned long src_offset, unsigned long len)
3600{
3601 size_t cur;
3602 size_t dst_off_in_page;
3603 size_t src_off_in_page;
3604 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3605 unsigned long dst_i;
3606 unsigned long src_i;
3607
3608 if (src_offset + len > dst->len) {
3609 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
3610 "len %lu dst len %lu\n", src_offset, len, dst->len);
3611 BUG_ON(1);
3612 }
3613 if (dst_offset + len > dst->len) {
3614 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
3615 "len %lu dst len %lu\n", dst_offset, len, dst->len);
3616 BUG_ON(1);
3617 }
3618
3619 while (len > 0) {
3620 dst_off_in_page = (start_offset + dst_offset) &
3621 ((unsigned long)PAGE_CACHE_SIZE - 1);
3622 src_off_in_page = (start_offset + src_offset) &
3623 ((unsigned long)PAGE_CACHE_SIZE - 1);
3624
3625 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3626 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
3627
3628 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
3629 src_off_in_page));
3630 cur = min_t(unsigned long, cur,
3631 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
3632
3633 copy_pages(extent_buffer_page(dst, dst_i),
3634 extent_buffer_page(dst, src_i),
3635 dst_off_in_page, src_off_in_page, cur);
3636
3637 src_offset += cur;
3638 dst_offset += cur;
3639 len -= cur;
3640 }
3641}
3642
3643void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3644 unsigned long src_offset, unsigned long len)
3645{
3646 size_t cur;
3647 size_t dst_off_in_page;
3648 size_t src_off_in_page;
3649 unsigned long dst_end = dst_offset + len - 1;
3650 unsigned long src_end = src_offset + len - 1;
3651 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3652 unsigned long dst_i;
3653 unsigned long src_i;
3654
3655 if (src_offset + len > dst->len) {
3656 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
3657 "len %lu len %lu\n", src_offset, len, dst->len);
3658 BUG_ON(1);
3659 }
3660 if (dst_offset + len > dst->len) {
3661 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
3662 "len %lu len %lu\n", dst_offset, len, dst->len);
3663 BUG_ON(1);
3664 }
3665 if (dst_offset < src_offset) {
3666 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3667 return;
3668 }
3669 while (len > 0) {
3670 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
3671 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
3672
3673 dst_off_in_page = (start_offset + dst_end) &
3674 ((unsigned long)PAGE_CACHE_SIZE - 1);
3675 src_off_in_page = (start_offset + src_end) &
3676 ((unsigned long)PAGE_CACHE_SIZE - 1);
3677
3678 cur = min_t(unsigned long, len, src_off_in_page + 1);
3679 cur = min(cur, dst_off_in_page + 1);
3680 move_pages(extent_buffer_page(dst, dst_i),
3681 extent_buffer_page(dst, src_i),
3682 dst_off_in_page - cur + 1,
3683 src_off_in_page - cur + 1, cur);
3684
3685 dst_end -= cur;
3686 src_end -= cur;
3687 len -= cur;
3688 }
3689}
3690
3691int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3692{
3693 u64 start = page_offset(page);
3694 struct extent_buffer *eb;
3695 int ret = 1;
3696 unsigned long i;
3697 unsigned long num_pages;
3698
3699 spin_lock(&tree->buffer_lock);
3700 eb = buffer_search(tree, start);
3701 if (!eb)
3702 goto out;
3703
3704 if (atomic_read(&eb->refs) > 1) {
3705 ret = 0;
3706 goto out;
3707 }
3708 /* at this point we can safely release the extent buffer */
3709 num_pages = num_extent_pages(eb->start, eb->len);
3710 for (i = 0; i < num_pages; i++)
3711 page_cache_release(extent_buffer_page(eb, i));
3712 rb_erase(&eb->rb_node, &tree->buffer);
3713 __free_extent_buffer(eb);
3714out:
3715 spin_unlock(&tree->buffer_lock);
3716 return ret;
3717}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
new file mode 100644
index 000000000000..c5b483a79137
--- /dev/null
+++ b/fs/btrfs/extent_io.h
@@ -0,0 +1,269 @@
1#ifndef __EXTENTIO__
2#define __EXTENTIO__
3
4#include <linux/rbtree.h>
5
6/* bits for the extent state */
7#define EXTENT_DIRTY 1
8#define EXTENT_WRITEBACK (1 << 1)
9#define EXTENT_UPTODATE (1 << 2)
10#define EXTENT_LOCKED (1 << 3)
11#define EXTENT_NEW (1 << 4)
12#define EXTENT_DELALLOC (1 << 5)
13#define EXTENT_DEFRAG (1 << 6)
14#define EXTENT_DEFRAG_DONE (1 << 7)
15#define EXTENT_BUFFER_FILLED (1 << 8)
16#define EXTENT_ORDERED (1 << 9)
17#define EXTENT_ORDERED_METADATA (1 << 10)
18#define EXTENT_BOUNDARY (1 << 11)
19#define EXTENT_NODATASUM (1 << 12)
20#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
21
22/* flags for bio submission */
23#define EXTENT_BIO_COMPRESSED 1
24
25/*
26 * page->private values. Every page that is controlled by the extent
27 * map has page->private set to one.
28 */
29#define EXTENT_PAGE_PRIVATE 1
30#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
31
32struct extent_state;
33
34typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
35 struct bio *bio, int mirror_num,
36 unsigned long bio_flags);
37struct extent_io_ops {
38 int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
39 u64 start, u64 end, int *page_started,
40 unsigned long *nr_written);
41 int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
42 int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
43 extent_submit_bio_hook_t *submit_bio_hook;
44 int (*merge_bio_hook)(struct page *page, unsigned long offset,
45 size_t size, struct bio *bio,
46 unsigned long bio_flags);
47 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
48 int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
49 u64 start, u64 end,
50 struct extent_state *state);
51 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
52 u64 start, u64 end,
53 struct extent_state *state);
54 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
55 struct extent_state *state);
56 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
57 struct extent_state *state, int uptodate);
58 int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
59 unsigned long old, unsigned long bits);
60 int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end,
61 unsigned long old, unsigned long bits);
62 int (*write_cache_pages_lock_hook)(struct page *page);
63};
64
65struct extent_io_tree {
66 struct rb_root state;
67 struct rb_root buffer;
68 struct address_space *mapping;
69 u64 dirty_bytes;
70 spinlock_t lock;
71 spinlock_t buffer_lock;
72 struct extent_io_ops *ops;
73};
74
75struct extent_state {
76 u64 start;
77 u64 end; /* inclusive */
78 struct rb_node rb_node;
79 struct extent_io_tree *tree;
80 wait_queue_head_t wq;
81 atomic_t refs;
82 unsigned long state;
83
84 /* for use by the FS */
85 u64 private;
86
87 struct list_head leak_list;
88};
89
90struct extent_buffer {
91 u64 start;
92 unsigned long len;
93 char *map_token;
94 char *kaddr;
95 unsigned long map_start;
96 unsigned long map_len;
97 struct page *first_page;
98 atomic_t refs;
99 int flags;
100 struct list_head leak_list;
101 struct rb_node rb_node;
102 struct mutex mutex;
103};
104
105struct extent_map_tree;
106
107static inline struct extent_state *extent_state_next(struct extent_state *state)
108{
109 struct rb_node *node;
110 node = rb_next(&state->rb_node);
111 if (!node)
112 return NULL;
113 return rb_entry(node, struct extent_state, rb_node);
114}
115
116typedef struct extent_map *(get_extent_t)(struct inode *inode,
117 struct page *page,
118 size_t page_offset,
119 u64 start, u64 len,
120 int create);
121
122void extent_io_tree_init(struct extent_io_tree *tree,
123 struct address_space *mapping, gfp_t mask);
124int try_release_extent_mapping(struct extent_map_tree *map,
125 struct extent_io_tree *tree, struct page *page,
126 gfp_t mask);
127int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page);
128int try_release_extent_state(struct extent_map_tree *map,
129 struct extent_io_tree *tree, struct page *page,
130 gfp_t mask);
131int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
132int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
133int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
134 gfp_t mask);
135int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
136 get_extent_t *get_extent);
137int __init extent_io_init(void);
138void extent_io_exit(void);
139
140u64 count_range_bits(struct extent_io_tree *tree,
141 u64 *start, u64 search_end,
142 u64 max_bytes, unsigned long bits);
143
144int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
145 int bits, int filled);
146int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
147 int bits, gfp_t mask);
148int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
149 int bits, int wake, int delete, gfp_t mask);
150int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
151 int bits, gfp_t mask);
152int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
153 gfp_t mask);
154int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
155 gfp_t mask);
156int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
157 gfp_t mask);
158int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
159 gfp_t mask);
160int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
161 gfp_t mask);
162int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
163 u64 end, gfp_t mask);
164int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
165 gfp_t mask);
166int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
167 gfp_t mask);
168int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
169 u64 *start_ret, u64 *end_ret, int bits);
170struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
171 u64 start, int bits);
172int extent_invalidatepage(struct extent_io_tree *tree,
173 struct page *page, unsigned long offset);
174int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
175 get_extent_t *get_extent,
176 struct writeback_control *wbc);
177int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
178 u64 start, u64 end, get_extent_t *get_extent,
179 int mode);
180int extent_writepages(struct extent_io_tree *tree,
181 struct address_space *mapping,
182 get_extent_t *get_extent,
183 struct writeback_control *wbc);
184int extent_readpages(struct extent_io_tree *tree,
185 struct address_space *mapping,
186 struct list_head *pages, unsigned nr_pages,
187 get_extent_t get_extent);
188int extent_prepare_write(struct extent_io_tree *tree,
189 struct inode *inode, struct page *page,
190 unsigned from, unsigned to, get_extent_t *get_extent);
191int extent_commit_write(struct extent_io_tree *tree,
192 struct inode *inode, struct page *page,
193 unsigned from, unsigned to);
194sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
195 get_extent_t *get_extent);
196int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
197int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
198int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
199void set_page_extent_mapped(struct page *page);
200
201struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
202 u64 start, unsigned long len,
203 struct page *page0,
204 gfp_t mask);
205struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
206 u64 start, unsigned long len,
207 gfp_t mask);
208void free_extent_buffer(struct extent_buffer *eb);
209int read_extent_buffer_pages(struct extent_io_tree *tree,
210 struct extent_buffer *eb, u64 start, int wait,
211 get_extent_t *get_extent, int mirror_num);
212
213static inline void extent_buffer_get(struct extent_buffer *eb)
214{
215 atomic_inc(&eb->refs);
216}
217
218int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
219 unsigned long start,
220 unsigned long len);
221void read_extent_buffer(struct extent_buffer *eb, void *dst,
222 unsigned long start,
223 unsigned long len);
224void write_extent_buffer(struct extent_buffer *eb, const void *src,
225 unsigned long start, unsigned long len);
226void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
227 unsigned long dst_offset, unsigned long src_offset,
228 unsigned long len);
229void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
230 unsigned long src_offset, unsigned long len);
231void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
232 unsigned long src_offset, unsigned long len);
233void memset_extent_buffer(struct extent_buffer *eb, char c,
234 unsigned long start, unsigned long len);
235int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
236 struct extent_buffer *eb);
237int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
238int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
239int clear_extent_buffer_dirty(struct extent_io_tree *tree,
240 struct extent_buffer *eb);
241int set_extent_buffer_dirty(struct extent_io_tree *tree,
242 struct extent_buffer *eb);
243int set_extent_buffer_uptodate(struct extent_io_tree *tree,
244 struct extent_buffer *eb);
245int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
246 struct extent_buffer *eb);
247int extent_buffer_uptodate(struct extent_io_tree *tree,
248 struct extent_buffer *eb);
249int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
250 unsigned long min_len, char **token, char **map,
251 unsigned long *map_start,
252 unsigned long *map_len, int km);
253int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
254 unsigned long min_len, char **token, char **map,
255 unsigned long *map_start,
256 unsigned long *map_len, int km);
257void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
258int release_extent_buffer_tail_pages(struct extent_buffer *eb);
259int extent_range_uptodate(struct extent_io_tree *tree,
260 u64 start, u64 end);
261int extent_clear_unlock_delalloc(struct inode *inode,
262 struct extent_io_tree *tree,
263 u64 start, u64 end, struct page *locked_page,
264 int unlock_page,
265 int clear_unlock,
266 int clear_delalloc, int clear_dirty,
267 int set_writeback,
268 int end_writeback);
269#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
new file mode 100644
index 000000000000..4a83e33ada32
--- /dev/null
+++ b/fs/btrfs/extent_map.c
@@ -0,0 +1,351 @@
1#include <linux/err.h>
2#include <linux/gfp.h>
3#include <linux/slab.h>
4#include <linux/module.h>
5#include <linux/spinlock.h>
6#include <linux/version.h>
7#include <linux/hardirq.h>
8#include "extent_map.h"
9
10/* temporary define until extent_map moves out of btrfs */
11struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
12 unsigned long extra_flags,
13 void (*ctor)(void *, struct kmem_cache *,
14 unsigned long));
15
16static struct kmem_cache *extent_map_cache;
17
18int __init extent_map_init(void)
19{
20 extent_map_cache = btrfs_cache_create("extent_map",
21 sizeof(struct extent_map), 0,
22 NULL);
23 if (!extent_map_cache)
24 return -ENOMEM;
25 return 0;
26}
27
28void extent_map_exit(void)
29{
30 if (extent_map_cache)
31 kmem_cache_destroy(extent_map_cache);
32}
33
34/**
35 * extent_map_tree_init - initialize extent map tree
36 * @tree: tree to initialize
37 * @mask: flags for memory allocations during tree operations
38 *
39 * Initialize the extent tree @tree. Should be called for each new inode
40 * or other user of the extent_map interface.
41 */
42void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
43{
44 tree->map.rb_node = NULL;
45 spin_lock_init(&tree->lock);
46}
47EXPORT_SYMBOL(extent_map_tree_init);
48
49/**
50 * alloc_extent_map - allocate new extent map structure
51 * @mask: memory allocation flags
52 *
53 * Allocate a new extent_map structure. The new structure is
54 * returned with a reference count of one and needs to be
55 * freed using free_extent_map()
56 */
57struct extent_map *alloc_extent_map(gfp_t mask)
58{
59 struct extent_map *em;
60 em = kmem_cache_alloc(extent_map_cache, mask);
61 if (!em || IS_ERR(em))
62 return em;
63 em->in_tree = 0;
64 em->flags = 0;
65 atomic_set(&em->refs, 1);
66 return em;
67}
68EXPORT_SYMBOL(alloc_extent_map);
69
70/**
71 * free_extent_map - drop reference count of an extent_map
72 * @em: extent map beeing releasead
73 *
74 * Drops the reference out on @em by one and free the structure
75 * if the reference count hits zero.
76 */
77void free_extent_map(struct extent_map *em)
78{
79 if (!em)
80 return;
81 WARN_ON(atomic_read(&em->refs) == 0);
82 if (atomic_dec_and_test(&em->refs)) {
83 WARN_ON(em->in_tree);
84 kmem_cache_free(extent_map_cache, em);
85 }
86}
87EXPORT_SYMBOL(free_extent_map);
88
89static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
90 struct rb_node *node)
91{
92 struct rb_node **p = &root->rb_node;
93 struct rb_node *parent = NULL;
94 struct extent_map *entry;
95
96 while (*p) {
97 parent = *p;
98 entry = rb_entry(parent, struct extent_map, rb_node);
99
100 WARN_ON(!entry->in_tree);
101
102 if (offset < entry->start)
103 p = &(*p)->rb_left;
104 else if (offset >= extent_map_end(entry))
105 p = &(*p)->rb_right;
106 else
107 return parent;
108 }
109
110 entry = rb_entry(node, struct extent_map, rb_node);
111 entry->in_tree = 1;
112 rb_link_node(node, parent, p);
113 rb_insert_color(node, root);
114 return NULL;
115}
116
117/*
118 * search through the tree for an extent_map with a given offset. If
119 * it can't be found, try to find some neighboring extents
120 */
121static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
122 struct rb_node **prev_ret,
123 struct rb_node **next_ret)
124{
125 struct rb_node *n = root->rb_node;
126 struct rb_node *prev = NULL;
127 struct rb_node *orig_prev = NULL;
128 struct extent_map *entry;
129 struct extent_map *prev_entry = NULL;
130
131 while (n) {
132 entry = rb_entry(n, struct extent_map, rb_node);
133 prev = n;
134 prev_entry = entry;
135
136 WARN_ON(!entry->in_tree);
137
138 if (offset < entry->start)
139 n = n->rb_left;
140 else if (offset >= extent_map_end(entry))
141 n = n->rb_right;
142 else
143 return n;
144 }
145
146 if (prev_ret) {
147 orig_prev = prev;
148 while (prev && offset >= extent_map_end(prev_entry)) {
149 prev = rb_next(prev);
150 prev_entry = rb_entry(prev, struct extent_map, rb_node);
151 }
152 *prev_ret = prev;
153 prev = orig_prev;
154 }
155
156 if (next_ret) {
157 prev_entry = rb_entry(prev, struct extent_map, rb_node);
158 while (prev && offset < prev_entry->start) {
159 prev = rb_prev(prev);
160 prev_entry = rb_entry(prev, struct extent_map, rb_node);
161 }
162 *next_ret = prev;
163 }
164 return NULL;
165}
166
167/*
168 * look for an offset in the tree, and if it can't be found, return
169 * the first offset we can find smaller than 'offset'.
170 */
171static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
172{
173 struct rb_node *prev;
174 struct rb_node *ret;
175 ret = __tree_search(root, offset, &prev, NULL);
176 if (!ret)
177 return prev;
178 return ret;
179}
180
181/* check to see if two extent_map structs are adjacent and safe to merge */
182static int mergable_maps(struct extent_map *prev, struct extent_map *next)
183{
184 if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
185 return 0;
186
187 /*
188 * don't merge compressed extents, we need to know their
189 * actual size
190 */
191 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
192 return 0;
193
194 if (extent_map_end(prev) == next->start &&
195 prev->flags == next->flags &&
196 prev->bdev == next->bdev &&
197 ((next->block_start == EXTENT_MAP_HOLE &&
198 prev->block_start == EXTENT_MAP_HOLE) ||
199 (next->block_start == EXTENT_MAP_INLINE &&
200 prev->block_start == EXTENT_MAP_INLINE) ||
201 (next->block_start == EXTENT_MAP_DELALLOC &&
202 prev->block_start == EXTENT_MAP_DELALLOC) ||
203 (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
204 next->block_start == extent_map_block_end(prev)))) {
205 return 1;
206 }
207 return 0;
208}
209
210/**
211 * add_extent_mapping - add new extent map to the extent tree
212 * @tree: tree to insert new map in
213 * @em: map to insert
214 *
215 * Insert @em into @tree or perform a simple forward/backward merge with
216 * existing mappings. The extent_map struct passed in will be inserted
217 * into the tree directly, with an additional reference taken, or a
218 * reference dropped if the merge attempt was sucessfull.
219 */
220int add_extent_mapping(struct extent_map_tree *tree,
221 struct extent_map *em)
222{
223 int ret = 0;
224 struct extent_map *merge = NULL;
225 struct rb_node *rb;
226 struct extent_map *exist;
227
228 exist = lookup_extent_mapping(tree, em->start, em->len);
229 if (exist) {
230 free_extent_map(exist);
231 ret = -EEXIST;
232 goto out;
233 }
234 assert_spin_locked(&tree->lock);
235 rb = tree_insert(&tree->map, em->start, &em->rb_node);
236 if (rb) {
237 ret = -EEXIST;
238 free_extent_map(merge);
239 goto out;
240 }
241 atomic_inc(&em->refs);
242 if (em->start != 0) {
243 rb = rb_prev(&em->rb_node);
244 if (rb)
245 merge = rb_entry(rb, struct extent_map, rb_node);
246 if (rb && mergable_maps(merge, em)) {
247 em->start = merge->start;
248 em->len += merge->len;
249 em->block_len += merge->block_len;
250 em->block_start = merge->block_start;
251 merge->in_tree = 0;
252 rb_erase(&merge->rb_node, &tree->map);
253 free_extent_map(merge);
254 }
255 }
256 rb = rb_next(&em->rb_node);
257 if (rb)
258 merge = rb_entry(rb, struct extent_map, rb_node);
259 if (rb && mergable_maps(em, merge)) {
260 em->len += merge->len;
261 em->block_len += merge->len;
262 rb_erase(&merge->rb_node, &tree->map);
263 merge->in_tree = 0;
264 free_extent_map(merge);
265 }
266out:
267 return ret;
268}
269EXPORT_SYMBOL(add_extent_mapping);
270
271/* simple helper to do math around the end of an extent, handling wrap */
272static u64 range_end(u64 start, u64 len)
273{
274 if (start + len < start)
275 return (u64)-1;
276 return start + len;
277}
278
279/**
280 * lookup_extent_mapping - lookup extent_map
281 * @tree: tree to lookup in
282 * @start: byte offset to start the search
283 * @len: length of the lookup range
284 *
285 * Find and return the first extent_map struct in @tree that intersects the
286 * [start, len] range. There may be additional objects in the tree that
287 * intersect, so check the object returned carefully to make sure that no
288 * additional lookups are needed.
289 */
290struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
291 u64 start, u64 len)
292{
293 struct extent_map *em;
294 struct rb_node *rb_node;
295 struct rb_node *prev = NULL;
296 struct rb_node *next = NULL;
297 u64 end = range_end(start, len);
298
299 assert_spin_locked(&tree->lock);
300 rb_node = __tree_search(&tree->map, start, &prev, &next);
301 if (!rb_node && prev) {
302 em = rb_entry(prev, struct extent_map, rb_node);
303 if (end > em->start && start < extent_map_end(em))
304 goto found;
305 }
306 if (!rb_node && next) {
307 em = rb_entry(next, struct extent_map, rb_node);
308 if (end > em->start && start < extent_map_end(em))
309 goto found;
310 }
311 if (!rb_node) {
312 em = NULL;
313 goto out;
314 }
315 if (IS_ERR(rb_node)) {
316 em = ERR_PTR(PTR_ERR(rb_node));
317 goto out;
318 }
319 em = rb_entry(rb_node, struct extent_map, rb_node);
320 if (end > em->start && start < extent_map_end(em))
321 goto found;
322
323 em = NULL;
324 goto out;
325
326found:
327 atomic_inc(&em->refs);
328out:
329 return em;
330}
331EXPORT_SYMBOL(lookup_extent_mapping);
332
333/**
334 * remove_extent_mapping - removes an extent_map from the extent tree
335 * @tree: extent tree to remove from
336 * @em: extent map beeing removed
337 *
338 * Removes @em from @tree. No reference counts are dropped, and no checks
339 * are done to see if the range is in use
340 */
341int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
342{
343 int ret = 0;
344
345 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
346 assert_spin_locked(&tree->lock);
347 rb_erase(&em->rb_node, &tree->map);
348 em->in_tree = 0;
349 return ret;
350}
351EXPORT_SYMBOL(remove_extent_mapping);
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
new file mode 100644
index 000000000000..fb6eeef06bb0
--- /dev/null
+++ b/fs/btrfs/extent_map.h
@@ -0,0 +1,62 @@
1#ifndef __EXTENTMAP__
2#define __EXTENTMAP__
3
4#include <linux/rbtree.h>
5
6#define EXTENT_MAP_LAST_BYTE (u64)-4
7#define EXTENT_MAP_HOLE (u64)-3
8#define EXTENT_MAP_INLINE (u64)-2
9#define EXTENT_MAP_DELALLOC (u64)-1
10
11/* bits for the flags field */
12#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
13#define EXTENT_FLAG_COMPRESSED 1
14#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
15#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
16
17struct extent_map {
18 struct rb_node rb_node;
19
20 /* all of these are in bytes */
21 u64 start;
22 u64 len;
23 u64 orig_start;
24 u64 block_start;
25 u64 block_len;
26 unsigned long flags;
27 struct block_device *bdev;
28 atomic_t refs;
29 int in_tree;
30};
31
32struct extent_map_tree {
33 struct rb_root map;
34 spinlock_t lock;
35};
36
37static inline u64 extent_map_end(struct extent_map *em)
38{
39 if (em->start + em->len < em->start)
40 return (u64)-1;
41 return em->start + em->len;
42}
43
44static inline u64 extent_map_block_end(struct extent_map *em)
45{
46 if (em->block_start + em->block_len < em->block_start)
47 return (u64)-1;
48 return em->block_start + em->block_len;
49}
50
51void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask);
52struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
53 u64 start, u64 len);
54int add_extent_mapping(struct extent_map_tree *tree,
55 struct extent_map *em);
56int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
57
58struct extent_map *alloc_extent_map(gfp_t mask);
59void free_extent_map(struct extent_map *em);
60int __init extent_map_init(void);
61void extent_map_exit(void);
62#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
new file mode 100644
index 000000000000..964652435fd1
--- /dev/null
+++ b/fs/btrfs/file-item.c
@@ -0,0 +1,831 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/bio.h>
20#include <linux/pagemap.h>
21#include <linux/highmem.h>
22#include "ctree.h"
23#include "disk-io.h"
24#include "transaction.h"
25#include "print-tree.h"
26
27#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
28 sizeof(struct btrfs_item) * 2) / \
29 size) - 1))
30
31#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
32 sizeof(struct btrfs_ordered_sum)) / \
33 sizeof(struct btrfs_sector_sum) * \
34 (r)->sectorsize - (r)->sectorsize)
35
36int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root,
38 u64 objectid, u64 pos,
39 u64 disk_offset, u64 disk_num_bytes,
40 u64 num_bytes, u64 offset, u64 ram_bytes,
41 u8 compression, u8 encryption, u16 other_encoding)
42{
43 int ret = 0;
44 struct btrfs_file_extent_item *item;
45 struct btrfs_key file_key;
46 struct btrfs_path *path;
47 struct extent_buffer *leaf;
48
49 path = btrfs_alloc_path();
50 BUG_ON(!path);
51 file_key.objectid = objectid;
52 file_key.offset = pos;
53 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
54
55 ret = btrfs_insert_empty_item(trans, root, path, &file_key,
56 sizeof(*item));
57 if (ret < 0)
58 goto out;
59 BUG_ON(ret);
60 leaf = path->nodes[0];
61 item = btrfs_item_ptr(leaf, path->slots[0],
62 struct btrfs_file_extent_item);
63 btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
64 btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
65 btrfs_set_file_extent_offset(leaf, item, offset);
66 btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
67 btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
68 btrfs_set_file_extent_generation(leaf, item, trans->transid);
69 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
70 btrfs_set_file_extent_compression(leaf, item, compression);
71 btrfs_set_file_extent_encryption(leaf, item, encryption);
72 btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
73
74 btrfs_mark_buffer_dirty(leaf);
75out:
76 btrfs_free_path(path);
77 return ret;
78}
79
80struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
81 struct btrfs_root *root,
82 struct btrfs_path *path,
83 u64 bytenr, int cow)
84{
85 int ret;
86 struct btrfs_key file_key;
87 struct btrfs_key found_key;
88 struct btrfs_csum_item *item;
89 struct extent_buffer *leaf;
90 u64 csum_offset = 0;
91 u16 csum_size =
92 btrfs_super_csum_size(&root->fs_info->super_copy);
93 int csums_in_item;
94
95 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
96 file_key.offset = bytenr;
97 btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
98 ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
99 if (ret < 0)
100 goto fail;
101 leaf = path->nodes[0];
102 if (ret > 0) {
103 ret = 1;
104 if (path->slots[0] == 0)
105 goto fail;
106 path->slots[0]--;
107 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
108 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
109 goto fail;
110
111 csum_offset = (bytenr - found_key.offset) >>
112 root->fs_info->sb->s_blocksize_bits;
113 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
114 csums_in_item /= csum_size;
115
116 if (csum_offset >= csums_in_item) {
117 ret = -EFBIG;
118 goto fail;
119 }
120 }
121 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
122 item = (struct btrfs_csum_item *)((unsigned char *)item +
123 csum_offset * csum_size);
124 return item;
125fail:
126 if (ret > 0)
127 ret = -ENOENT;
128 return ERR_PTR(ret);
129}
130
131
132int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
133 struct btrfs_root *root,
134 struct btrfs_path *path, u64 objectid,
135 u64 offset, int mod)
136{
137 int ret;
138 struct btrfs_key file_key;
139 int ins_len = mod < 0 ? -1 : 0;
140 int cow = mod != 0;
141
142 file_key.objectid = objectid;
143 file_key.offset = offset;
144 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
145 ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
146 return ret;
147}
148
149
150int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
151 struct bio *bio, u32 *dst)
152{
153 u32 sum;
154 struct bio_vec *bvec = bio->bi_io_vec;
155 int bio_index = 0;
156 u64 offset;
157 u64 item_start_offset = 0;
158 u64 item_last_offset = 0;
159 u64 disk_bytenr;
160 u32 diff;
161 u16 csum_size =
162 btrfs_super_csum_size(&root->fs_info->super_copy);
163 int ret;
164 struct btrfs_path *path;
165 struct btrfs_csum_item *item = NULL;
166 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
167
168 path = btrfs_alloc_path();
169 if (bio->bi_size > PAGE_CACHE_SIZE * 8)
170 path->reada = 2;
171
172 WARN_ON(bio->bi_vcnt <= 0);
173
174 disk_bytenr = (u64)bio->bi_sector << 9;
175 while (bio_index < bio->bi_vcnt) {
176 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
177 ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
178 if (ret == 0)
179 goto found;
180
181 if (!item || disk_bytenr < item_start_offset ||
182 disk_bytenr >= item_last_offset) {
183 struct btrfs_key found_key;
184 u32 item_size;
185
186 if (item)
187 btrfs_release_path(root, path);
188 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
189 path, disk_bytenr, 0);
190 if (IS_ERR(item)) {
191 ret = PTR_ERR(item);
192 if (ret == -ENOENT || ret == -EFBIG)
193 ret = 0;
194 sum = 0;
195 if (BTRFS_I(inode)->root->root_key.objectid ==
196 BTRFS_DATA_RELOC_TREE_OBJECTID) {
197 set_extent_bits(io_tree, offset,
198 offset + bvec->bv_len - 1,
199 EXTENT_NODATASUM, GFP_NOFS);
200 } else {
201 printk(KERN_INFO "btrfs no csum found "
202 "for inode %lu start %llu\n",
203 inode->i_ino,
204 (unsigned long long)offset);
205 }
206 item = NULL;
207 btrfs_release_path(root, path);
208 goto found;
209 }
210 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
211 path->slots[0]);
212
213 item_start_offset = found_key.offset;
214 item_size = btrfs_item_size_nr(path->nodes[0],
215 path->slots[0]);
216 item_last_offset = item_start_offset +
217 (item_size / csum_size) *
218 root->sectorsize;
219 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
220 struct btrfs_csum_item);
221 }
222 /*
223 * this byte range must be able to fit inside
224 * a single leaf so it will also fit inside a u32
225 */
226 diff = disk_bytenr - item_start_offset;
227 diff = diff / root->sectorsize;
228 diff = diff * csum_size;
229
230 read_extent_buffer(path->nodes[0], &sum,
231 ((unsigned long)item) + diff,
232 csum_size);
233found:
234 if (dst)
235 *dst++ = sum;
236 else
237 set_state_private(io_tree, offset, sum);
238 disk_bytenr += bvec->bv_len;
239 bio_index++;
240 bvec++;
241 }
242 btrfs_free_path(path);
243 return 0;
244}
245
246int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
247 struct list_head *list)
248{
249 struct btrfs_key key;
250 struct btrfs_path *path;
251 struct extent_buffer *leaf;
252 struct btrfs_ordered_sum *sums;
253 struct btrfs_sector_sum *sector_sum;
254 struct btrfs_csum_item *item;
255 unsigned long offset;
256 int ret;
257 size_t size;
258 u64 csum_end;
259 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
260
261 path = btrfs_alloc_path();
262 BUG_ON(!path);
263
264 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
265 key.offset = start;
266 key.type = BTRFS_EXTENT_CSUM_KEY;
267
268 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
269 if (ret < 0)
270 goto fail;
271 if (ret > 0 && path->slots[0] > 0) {
272 leaf = path->nodes[0];
273 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
274 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
275 key.type == BTRFS_EXTENT_CSUM_KEY) {
276 offset = (start - key.offset) >>
277 root->fs_info->sb->s_blocksize_bits;
278 if (offset * csum_size <
279 btrfs_item_size_nr(leaf, path->slots[0] - 1))
280 path->slots[0]--;
281 }
282 }
283
284 while (start <= end) {
285 leaf = path->nodes[0];
286 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
287 ret = btrfs_next_leaf(root, path);
288 if (ret < 0)
289 goto fail;
290 if (ret > 0)
291 break;
292 leaf = path->nodes[0];
293 }
294
295 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
296 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
297 key.type != BTRFS_EXTENT_CSUM_KEY)
298 break;
299
300 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
301 if (key.offset > end)
302 break;
303
304 if (key.offset > start)
305 start = key.offset;
306
307 size = btrfs_item_size_nr(leaf, path->slots[0]);
308 csum_end = key.offset + (size / csum_size) * root->sectorsize;
309 if (csum_end <= start) {
310 path->slots[0]++;
311 continue;
312 }
313
314 csum_end = min(csum_end, end + 1);
315 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
316 struct btrfs_csum_item);
317 while (start < csum_end) {
318 size = min_t(size_t, csum_end - start,
319 MAX_ORDERED_SUM_BYTES(root));
320 sums = kzalloc(btrfs_ordered_sum_size(root, size),
321 GFP_NOFS);
322 BUG_ON(!sums);
323
324 sector_sum = sums->sums;
325 sums->bytenr = start;
326 sums->len = size;
327
328 offset = (start - key.offset) >>
329 root->fs_info->sb->s_blocksize_bits;
330 offset *= csum_size;
331
332 while (size > 0) {
333 read_extent_buffer(path->nodes[0],
334 &sector_sum->sum,
335 ((unsigned long)item) +
336 offset, csum_size);
337 sector_sum->bytenr = start;
338
339 size -= root->sectorsize;
340 start += root->sectorsize;
341 offset += csum_size;
342 sector_sum++;
343 }
344 list_add_tail(&sums->list, list);
345 }
346 path->slots[0]++;
347 }
348 ret = 0;
349fail:
350 btrfs_free_path(path);
351 return ret;
352}
353
354int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
355 struct bio *bio, u64 file_start, int contig)
356{
357 struct btrfs_ordered_sum *sums;
358 struct btrfs_sector_sum *sector_sum;
359 struct btrfs_ordered_extent *ordered;
360 char *data;
361 struct bio_vec *bvec = bio->bi_io_vec;
362 int bio_index = 0;
363 unsigned long total_bytes = 0;
364 unsigned long this_sum_bytes = 0;
365 u64 offset;
366 u64 disk_bytenr;
367
368 WARN_ON(bio->bi_vcnt <= 0);
369 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
370 if (!sums)
371 return -ENOMEM;
372
373 sector_sum = sums->sums;
374 disk_bytenr = (u64)bio->bi_sector << 9;
375 sums->len = bio->bi_size;
376 INIT_LIST_HEAD(&sums->list);
377
378 if (contig)
379 offset = file_start;
380 else
381 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
382
383 ordered = btrfs_lookup_ordered_extent(inode, offset);
384 BUG_ON(!ordered);
385 sums->bytenr = ordered->start;
386
387 while (bio_index < bio->bi_vcnt) {
388 if (!contig)
389 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
390
391 if (!contig && (offset >= ordered->file_offset + ordered->len ||
392 offset < ordered->file_offset)) {
393 unsigned long bytes_left;
394 sums->len = this_sum_bytes;
395 this_sum_bytes = 0;
396 btrfs_add_ordered_sum(inode, ordered, sums);
397 btrfs_put_ordered_extent(ordered);
398
399 bytes_left = bio->bi_size - total_bytes;
400
401 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
402 GFP_NOFS);
403 BUG_ON(!sums);
404 sector_sum = sums->sums;
405 sums->len = bytes_left;
406 ordered = btrfs_lookup_ordered_extent(inode, offset);
407 BUG_ON(!ordered);
408 sums->bytenr = ordered->start;
409 }
410
411 data = kmap_atomic(bvec->bv_page, KM_USER0);
412 sector_sum->sum = ~(u32)0;
413 sector_sum->sum = btrfs_csum_data(root,
414 data + bvec->bv_offset,
415 sector_sum->sum,
416 bvec->bv_len);
417 kunmap_atomic(data, KM_USER0);
418 btrfs_csum_final(sector_sum->sum,
419 (char *)&sector_sum->sum);
420 sector_sum->bytenr = disk_bytenr;
421
422 sector_sum++;
423 bio_index++;
424 total_bytes += bvec->bv_len;
425 this_sum_bytes += bvec->bv_len;
426 disk_bytenr += bvec->bv_len;
427 offset += bvec->bv_len;
428 bvec++;
429 }
430 this_sum_bytes = 0;
431 btrfs_add_ordered_sum(inode, ordered, sums);
432 btrfs_put_ordered_extent(ordered);
433 return 0;
434}
435
436/*
437 * helper function for csum removal, this expects the
438 * key to describe the csum pointed to by the path, and it expects
439 * the csum to overlap the range [bytenr, len]
440 *
441 * The csum should not be entirely contained in the range and the
442 * range should not be entirely contained in the csum.
443 *
444 * This calls btrfs_truncate_item with the correct args based on the
445 * overlap, and fixes up the key as required.
446 */
447static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
448 struct btrfs_root *root,
449 struct btrfs_path *path,
450 struct btrfs_key *key,
451 u64 bytenr, u64 len)
452{
453 struct extent_buffer *leaf;
454 u16 csum_size =
455 btrfs_super_csum_size(&root->fs_info->super_copy);
456 u64 csum_end;
457 u64 end_byte = bytenr + len;
458 u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
459 int ret;
460
461 leaf = path->nodes[0];
462 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
463 csum_end <<= root->fs_info->sb->s_blocksize_bits;
464 csum_end += key->offset;
465
466 if (key->offset < bytenr && csum_end <= end_byte) {
467 /*
468 * [ bytenr - len ]
469 * [ ]
470 * [csum ]
471 * A simple truncate off the end of the item
472 */
473 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
474 new_size *= csum_size;
475 ret = btrfs_truncate_item(trans, root, path, new_size, 1);
476 BUG_ON(ret);
477 } else if (key->offset >= bytenr && csum_end > end_byte &&
478 end_byte > key->offset) {
479 /*
480 * [ bytenr - len ]
481 * [ ]
482 * [csum ]
483 * we need to truncate from the beginning of the csum
484 */
485 u32 new_size = (csum_end - end_byte) >> blocksize_bits;
486 new_size *= csum_size;
487
488 ret = btrfs_truncate_item(trans, root, path, new_size, 0);
489 BUG_ON(ret);
490
491 key->offset = end_byte;
492 ret = btrfs_set_item_key_safe(trans, root, path, key);
493 BUG_ON(ret);
494 } else {
495 BUG();
496 }
497 return 0;
498}
499
500/*
501 * deletes the csum items from the csum tree for a given
502 * range of bytes.
503 */
504int btrfs_del_csums(struct btrfs_trans_handle *trans,
505 struct btrfs_root *root, u64 bytenr, u64 len)
506{
507 struct btrfs_path *path;
508 struct btrfs_key key;
509 u64 end_byte = bytenr + len;
510 u64 csum_end;
511 struct extent_buffer *leaf;
512 int ret;
513 u16 csum_size =
514 btrfs_super_csum_size(&root->fs_info->super_copy);
515 int blocksize_bits = root->fs_info->sb->s_blocksize_bits;
516
517 root = root->fs_info->csum_root;
518
519 path = btrfs_alloc_path();
520
521 while (1) {
522 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
523 key.offset = end_byte - 1;
524 key.type = BTRFS_EXTENT_CSUM_KEY;
525
526 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
527 if (ret > 0) {
528 if (path->slots[0] == 0)
529 goto out;
530 path->slots[0]--;
531 }
532 leaf = path->nodes[0];
533 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
534
535 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
536 key.type != BTRFS_EXTENT_CSUM_KEY) {
537 break;
538 }
539
540 if (key.offset >= end_byte)
541 break;
542
543 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
544 csum_end <<= blocksize_bits;
545 csum_end += key.offset;
546
547 /* this csum ends before we start, we're done */
548 if (csum_end <= bytenr)
549 break;
550
551 /* delete the entire item, it is inside our range */
552 if (key.offset >= bytenr && csum_end <= end_byte) {
553 ret = btrfs_del_item(trans, root, path);
554 BUG_ON(ret);
555 if (key.offset == bytenr)
556 break;
557 } else if (key.offset < bytenr && csum_end > end_byte) {
558 unsigned long offset;
559 unsigned long shift_len;
560 unsigned long item_offset;
561 /*
562 * [ bytenr - len ]
563 * [csum ]
564 *
565 * Our bytes are in the middle of the csum,
566 * we need to split this item and insert a new one.
567 *
568 * But we can't drop the path because the
569 * csum could change, get removed, extended etc.
570 *
571 * The trick here is the max size of a csum item leaves
572 * enough room in the tree block for a single
573 * item header. So, we split the item in place,
574 * adding a new header pointing to the existing
575 * bytes. Then we loop around again and we have
576 * a nicely formed csum item that we can neatly
577 * truncate.
578 */
579 offset = (bytenr - key.offset) >> blocksize_bits;
580 offset *= csum_size;
581
582 shift_len = (len >> blocksize_bits) * csum_size;
583
584 item_offset = btrfs_item_ptr_offset(leaf,
585 path->slots[0]);
586
587 memset_extent_buffer(leaf, 0, item_offset + offset,
588 shift_len);
589 key.offset = bytenr;
590
591 /*
592 * btrfs_split_item returns -EAGAIN when the
593 * item changed size or key
594 */
595 ret = btrfs_split_item(trans, root, path, &key, offset);
596 BUG_ON(ret && ret != -EAGAIN);
597
598 key.offset = end_byte - 1;
599 } else {
600 ret = truncate_one_csum(trans, root, path,
601 &key, bytenr, len);
602 BUG_ON(ret);
603 if (key.offset < bytenr)
604 break;
605 }
606 btrfs_release_path(root, path);
607 }
608out:
609 btrfs_free_path(path);
610 return 0;
611}
612
613int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
614 struct btrfs_root *root,
615 struct btrfs_ordered_sum *sums)
616{
617 u64 bytenr;
618 int ret;
619 struct btrfs_key file_key;
620 struct btrfs_key found_key;
621 u64 next_offset;
622 u64 total_bytes = 0;
623 int found_next;
624 struct btrfs_path *path;
625 struct btrfs_csum_item *item;
626 struct btrfs_csum_item *item_end;
627 struct extent_buffer *leaf = NULL;
628 u64 csum_offset;
629 struct btrfs_sector_sum *sector_sum;
630 u32 nritems;
631 u32 ins_size;
632 char *eb_map;
633 char *eb_token;
634 unsigned long map_len;
635 unsigned long map_start;
636 u16 csum_size =
637 btrfs_super_csum_size(&root->fs_info->super_copy);
638
639 path = btrfs_alloc_path();
640 BUG_ON(!path);
641 sector_sum = sums->sums;
642again:
643 next_offset = (u64)-1;
644 found_next = 0;
645 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
646 file_key.offset = sector_sum->bytenr;
647 bytenr = sector_sum->bytenr;
648 btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
649
650 item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
651 if (!IS_ERR(item)) {
652 leaf = path->nodes[0];
653 ret = 0;
654 goto found;
655 }
656 ret = PTR_ERR(item);
657 if (ret == -EFBIG) {
658 u32 item_size;
659 /* we found one, but it isn't big enough yet */
660 leaf = path->nodes[0];
661 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
662 if ((item_size / csum_size) >=
663 MAX_CSUM_ITEMS(root, csum_size)) {
664 /* already at max size, make a new one */
665 goto insert;
666 }
667 } else {
668 int slot = path->slots[0] + 1;
669 /* we didn't find a csum item, insert one */
670 nritems = btrfs_header_nritems(path->nodes[0]);
671 if (path->slots[0] >= nritems - 1) {
672 ret = btrfs_next_leaf(root, path);
673 if (ret == 1)
674 found_next = 1;
675 if (ret != 0)
676 goto insert;
677 slot = 0;
678 }
679 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
680 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
681 found_key.type != BTRFS_EXTENT_CSUM_KEY) {
682 found_next = 1;
683 goto insert;
684 }
685 next_offset = found_key.offset;
686 found_next = 1;
687 goto insert;
688 }
689
690 /*
691 * at this point, we know the tree has an item, but it isn't big
692 * enough yet to put our csum in. Grow it
693 */
694 btrfs_release_path(root, path);
695 ret = btrfs_search_slot(trans, root, &file_key, path,
696 csum_size, 1);
697 if (ret < 0)
698 goto fail_unlock;
699
700 if (ret > 0) {
701 if (path->slots[0] == 0)
702 goto insert;
703 path->slots[0]--;
704 }
705
706 leaf = path->nodes[0];
707 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
708 csum_offset = (bytenr - found_key.offset) >>
709 root->fs_info->sb->s_blocksize_bits;
710
711 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
712 found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
713 csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
714 goto insert;
715 }
716
717 if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
718 csum_size) {
719 u32 diff = (csum_offset + 1) * csum_size;
720
721 /*
722 * is the item big enough already? we dropped our lock
723 * before and need to recheck
724 */
725 if (diff < btrfs_item_size_nr(leaf, path->slots[0]))
726 goto csum;
727
728 diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
729 if (diff != csum_size)
730 goto insert;
731
732 ret = btrfs_extend_item(trans, root, path, diff);
733 BUG_ON(ret);
734 goto csum;
735 }
736
737insert:
738 btrfs_release_path(root, path);
739 csum_offset = 0;
740 if (found_next) {
741 u64 tmp = total_bytes + root->sectorsize;
742 u64 next_sector = sector_sum->bytenr;
743 struct btrfs_sector_sum *next = sector_sum + 1;
744
745 while (tmp < sums->len) {
746 if (next_sector + root->sectorsize != next->bytenr)
747 break;
748 tmp += root->sectorsize;
749 next_sector = next->bytenr;
750 next++;
751 }
752 tmp = min(tmp, next_offset - file_key.offset);
753 tmp >>= root->fs_info->sb->s_blocksize_bits;
754 tmp = max((u64)1, tmp);
755 tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
756 ins_size = csum_size * tmp;
757 } else {
758 ins_size = csum_size;
759 }
760 ret = btrfs_insert_empty_item(trans, root, path, &file_key,
761 ins_size);
762 if (ret < 0)
763 goto fail_unlock;
764 if (ret != 0) {
765 WARN_ON(1);
766 goto fail_unlock;
767 }
768csum:
769 leaf = path->nodes[0];
770 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
771 ret = 0;
772 item = (struct btrfs_csum_item *)((unsigned char *)item +
773 csum_offset * csum_size);
774found:
775 item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
776 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
777 btrfs_item_size_nr(leaf, path->slots[0]));
778 eb_token = NULL;
779 cond_resched();
780next_sector:
781
782 if (!eb_token ||
783 (unsigned long)item + csum_size >= map_start + map_len) {
784 int err;
785
786 if (eb_token)
787 unmap_extent_buffer(leaf, eb_token, KM_USER1);
788 eb_token = NULL;
789 err = map_private_extent_buffer(leaf, (unsigned long)item,
790 csum_size,
791 &eb_token, &eb_map,
792 &map_start, &map_len, KM_USER1);
793 if (err)
794 eb_token = NULL;
795 }
796 if (eb_token) {
797 memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
798 &sector_sum->sum, csum_size);
799 } else {
800 write_extent_buffer(leaf, &sector_sum->sum,
801 (unsigned long)item, csum_size);
802 }
803
804 total_bytes += root->sectorsize;
805 sector_sum++;
806 if (total_bytes < sums->len) {
807 item = (struct btrfs_csum_item *)((char *)item +
808 csum_size);
809 if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
810 sector_sum->bytenr) {
811 bytenr = sector_sum->bytenr;
812 goto next_sector;
813 }
814 }
815 if (eb_token) {
816 unmap_extent_buffer(leaf, eb_token, KM_USER1);
817 eb_token = NULL;
818 }
819 btrfs_mark_buffer_dirty(path->nodes[0]);
820 cond_resched();
821 if (total_bytes < sums->len) {
822 btrfs_release_path(root, path);
823 goto again;
824 }
825out:
826 btrfs_free_path(path);
827 return ret;
828
829fail_unlock:
830 goto out;
831}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
new file mode 100644
index 000000000000..90268334145e
--- /dev/null
+++ b/fs/btrfs/file.c
@@ -0,0 +1,1288 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/fs.h>
20#include <linux/pagemap.h>
21#include <linux/highmem.h>
22#include <linux/time.h>
23#include <linux/init.h>
24#include <linux/string.h>
25#include <linux/smp_lock.h>
26#include <linux/backing-dev.h>
27#include <linux/mpage.h>
28#include <linux/swap.h>
29#include <linux/writeback.h>
30#include <linux/statfs.h>
31#include <linux/compat.h>
32#include <linux/version.h>
33#include "ctree.h"
34#include "disk-io.h"
35#include "transaction.h"
36#include "btrfs_inode.h"
37#include "ioctl.h"
38#include "print-tree.h"
39#include "tree-log.h"
40#include "locking.h"
41#include "compat.h"
42
43
44/* simple helper to fault in pages and copy. This should go away
45 * and be replaced with calls into generic code.
46 */
47static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
48 int write_bytes,
49 struct page **prepared_pages,
50 const char __user *buf)
51{
52 long page_fault = 0;
53 int i;
54 int offset = pos & (PAGE_CACHE_SIZE - 1);
55
56 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
57 size_t count = min_t(size_t,
58 PAGE_CACHE_SIZE - offset, write_bytes);
59 struct page *page = prepared_pages[i];
60 fault_in_pages_readable(buf, count);
61
62 /* Copy data from userspace to the current page */
63 kmap(page);
64 page_fault = __copy_from_user(page_address(page) + offset,
65 buf, count);
66 /* Flush processor's dcache for this page */
67 flush_dcache_page(page);
68 kunmap(page);
69 buf += count;
70 write_bytes -= count;
71
72 if (page_fault)
73 break;
74 }
75 return page_fault ? -EFAULT : 0;
76}
77
78/*
79 * unlocks pages after btrfs_file_write is done with them
80 */
81static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
82{
83 size_t i;
84 for (i = 0; i < num_pages; i++) {
85 if (!pages[i])
86 break;
87 /* page checked is some magic around finding pages that
88 * have been modified without going through btrfs_set_page_dirty
89 * clear it here
90 */
91 ClearPageChecked(pages[i]);
92 unlock_page(pages[i]);
93 mark_page_accessed(pages[i]);
94 page_cache_release(pages[i]);
95 }
96}
97
98/*
99 * after copy_from_user, pages need to be dirtied and we need to make
100 * sure holes are created between the current EOF and the start of
101 * any next extents (if required).
102 *
103 * this also makes the decision about creating an inline extent vs
104 * doing real data extents, marking pages dirty and delalloc as required.
105 */
106static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
107 struct btrfs_root *root,
108 struct file *file,
109 struct page **pages,
110 size_t num_pages,
111 loff_t pos,
112 size_t write_bytes)
113{
114 int err = 0;
115 int i;
116 struct inode *inode = fdentry(file)->d_inode;
117 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
118 u64 hint_byte;
119 u64 num_bytes;
120 u64 start_pos;
121 u64 end_of_last_block;
122 u64 end_pos = pos + write_bytes;
123 loff_t isize = i_size_read(inode);
124
125 start_pos = pos & ~((u64)root->sectorsize - 1);
126 num_bytes = (write_bytes + pos - start_pos +
127 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
128
129 end_of_last_block = start_pos + num_bytes - 1;
130
131 lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
132 trans = btrfs_join_transaction(root, 1);
133 if (!trans) {
134 err = -ENOMEM;
135 goto out_unlock;
136 }
137 btrfs_set_trans_block_group(trans, inode);
138 hint_byte = 0;
139
140 set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
141
142 /* check for reserved extents on each page, we don't want
143 * to reset the delalloc bit on things that already have
144 * extents reserved.
145 */
146 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
147 for (i = 0; i < num_pages; i++) {
148 struct page *p = pages[i];
149 SetPageUptodate(p);
150 ClearPageChecked(p);
151 set_page_dirty(p);
152 }
153 if (end_pos > isize) {
154 i_size_write(inode, end_pos);
155 btrfs_update_inode(trans, root, inode);
156 }
157 err = btrfs_end_transaction(trans, root);
158out_unlock:
159 unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
160 return err;
161}
162
163/*
164 * this drops all the extents in the cache that intersect the range
165 * [start, end]. Existing extents are split as required.
166 */
167int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
168 int skip_pinned)
169{
170 struct extent_map *em;
171 struct extent_map *split = NULL;
172 struct extent_map *split2 = NULL;
173 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
174 u64 len = end - start + 1;
175 int ret;
176 int testend = 1;
177 unsigned long flags;
178 int compressed = 0;
179
180 WARN_ON(end < start);
181 if (end == (u64)-1) {
182 len = (u64)-1;
183 testend = 0;
184 }
185 while (1) {
186 if (!split)
187 split = alloc_extent_map(GFP_NOFS);
188 if (!split2)
189 split2 = alloc_extent_map(GFP_NOFS);
190
191 spin_lock(&em_tree->lock);
192 em = lookup_extent_mapping(em_tree, start, len);
193 if (!em) {
194 spin_unlock(&em_tree->lock);
195 break;
196 }
197 flags = em->flags;
198 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
199 spin_unlock(&em_tree->lock);
200 if (em->start <= start &&
201 (!testend || em->start + em->len >= start + len)) {
202 free_extent_map(em);
203 break;
204 }
205 if (start < em->start) {
206 len = em->start - start;
207 } else {
208 len = start + len - (em->start + em->len);
209 start = em->start + em->len;
210 }
211 free_extent_map(em);
212 continue;
213 }
214 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
215 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
216 remove_extent_mapping(em_tree, em);
217
218 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
219 em->start < start) {
220 split->start = em->start;
221 split->len = start - em->start;
222 split->orig_start = em->orig_start;
223 split->block_start = em->block_start;
224
225 if (compressed)
226 split->block_len = em->block_len;
227 else
228 split->block_len = split->len;
229
230 split->bdev = em->bdev;
231 split->flags = flags;
232 ret = add_extent_mapping(em_tree, split);
233 BUG_ON(ret);
234 free_extent_map(split);
235 split = split2;
236 split2 = NULL;
237 }
238 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
239 testend && em->start + em->len > start + len) {
240 u64 diff = start + len - em->start;
241
242 split->start = start + len;
243 split->len = em->start + em->len - (start + len);
244 split->bdev = em->bdev;
245 split->flags = flags;
246
247 if (compressed) {
248 split->block_len = em->block_len;
249 split->block_start = em->block_start;
250 split->orig_start = em->orig_start;
251 } else {
252 split->block_len = split->len;
253 split->block_start = em->block_start + diff;
254 split->orig_start = split->start;
255 }
256
257 ret = add_extent_mapping(em_tree, split);
258 BUG_ON(ret);
259 free_extent_map(split);
260 split = NULL;
261 }
262 spin_unlock(&em_tree->lock);
263
264 /* once for us */
265 free_extent_map(em);
266 /* once for the tree*/
267 free_extent_map(em);
268 }
269 if (split)
270 free_extent_map(split);
271 if (split2)
272 free_extent_map(split2);
273 return 0;
274}
275
276int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
277{
278 return 0;
279#if 0
280 struct btrfs_path *path;
281 struct btrfs_key found_key;
282 struct extent_buffer *leaf;
283 struct btrfs_file_extent_item *extent;
284 u64 last_offset = 0;
285 int nritems;
286 int slot;
287 int found_type;
288 int ret;
289 int err = 0;
290 u64 extent_end = 0;
291
292 path = btrfs_alloc_path();
293 ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
294 last_offset, 0);
295 while (1) {
296 nritems = btrfs_header_nritems(path->nodes[0]);
297 if (path->slots[0] >= nritems) {
298 ret = btrfs_next_leaf(root, path);
299 if (ret)
300 goto out;
301 nritems = btrfs_header_nritems(path->nodes[0]);
302 }
303 slot = path->slots[0];
304 leaf = path->nodes[0];
305 btrfs_item_key_to_cpu(leaf, &found_key, slot);
306 if (found_key.objectid != inode->i_ino)
307 break;
308 if (found_key.type != BTRFS_EXTENT_DATA_KEY)
309 goto out;
310
311 if (found_key.offset < last_offset) {
312 WARN_ON(1);
313 btrfs_print_leaf(root, leaf);
314 printk(KERN_ERR "inode %lu found offset %llu "
315 "expected %llu\n", inode->i_ino,
316 (unsigned long long)found_key.offset,
317 (unsigned long long)last_offset);
318 err = 1;
319 goto out;
320 }
321 extent = btrfs_item_ptr(leaf, slot,
322 struct btrfs_file_extent_item);
323 found_type = btrfs_file_extent_type(leaf, extent);
324 if (found_type == BTRFS_FILE_EXTENT_REG) {
325 extent_end = found_key.offset +
326 btrfs_file_extent_num_bytes(leaf, extent);
327 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
328 struct btrfs_item *item;
329 item = btrfs_item_nr(leaf, slot);
330 extent_end = found_key.offset +
331 btrfs_file_extent_inline_len(leaf, extent);
332 extent_end = (extent_end + root->sectorsize - 1) &
333 ~((u64)root->sectorsize - 1);
334 }
335 last_offset = extent_end;
336 path->slots[0]++;
337 }
338 if (0 && last_offset < inode->i_size) {
339 WARN_ON(1);
340 btrfs_print_leaf(root, leaf);
341 printk(KERN_ERR "inode %lu found offset %llu size %llu\n",
342 inode->i_ino, (unsigned long long)last_offset,
343 (unsigned long long)inode->i_size);
344 err = 1;
345
346 }
347out:
348 btrfs_free_path(path);
349 return err;
350#endif
351}
352
353/*
354 * this is very complex, but the basic idea is to drop all extents
355 * in the range start - end. hint_block is filled in with a block number
356 * that would be a good hint to the block allocator for this file.
357 *
358 * If an extent intersects the range but is not entirely inside the range
359 * it is either truncated or split. Anything entirely inside the range
360 * is deleted from the tree.
361 *
362 * inline_limit is used to tell this code which offsets in the file to keep
363 * if they contain inline extents.
364 */
365noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
366 struct btrfs_root *root, struct inode *inode,
367 u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
368{
369 u64 extent_end = 0;
370 u64 locked_end = end;
371 u64 search_start = start;
372 u64 leaf_start;
373 u64 ram_bytes = 0;
374 u64 orig_parent = 0;
375 u64 disk_bytenr = 0;
376 u8 compression;
377 u8 encryption;
378 u16 other_encoding = 0;
379 u64 root_gen;
380 u64 root_owner;
381 struct extent_buffer *leaf;
382 struct btrfs_file_extent_item *extent;
383 struct btrfs_path *path;
384 struct btrfs_key key;
385 struct btrfs_file_extent_item old;
386 int keep;
387 int slot;
388 int bookend;
389 int found_type = 0;
390 int found_extent;
391 int found_inline;
392 int recow;
393 int ret;
394
395 inline_limit = 0;
396 btrfs_drop_extent_cache(inode, start, end - 1, 0);
397
398 path = btrfs_alloc_path();
399 if (!path)
400 return -ENOMEM;
401 while (1) {
402 recow = 0;
403 btrfs_release_path(root, path);
404 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
405 search_start, -1);
406 if (ret < 0)
407 goto out;
408 if (ret > 0) {
409 if (path->slots[0] == 0) {
410 ret = 0;
411 goto out;
412 }
413 path->slots[0]--;
414 }
415next_slot:
416 keep = 0;
417 bookend = 0;
418 found_extent = 0;
419 found_inline = 0;
420 leaf_start = 0;
421 root_gen = 0;
422 root_owner = 0;
423 compression = 0;
424 encryption = 0;
425 extent = NULL;
426 leaf = path->nodes[0];
427 slot = path->slots[0];
428 ret = 0;
429 btrfs_item_key_to_cpu(leaf, &key, slot);
430 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
431 key.offset >= end) {
432 goto out;
433 }
434 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
435 key.objectid != inode->i_ino) {
436 goto out;
437 }
438 if (recow) {
439 search_start = max(key.offset, start);
440 continue;
441 }
442 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
443 extent = btrfs_item_ptr(leaf, slot,
444 struct btrfs_file_extent_item);
445 found_type = btrfs_file_extent_type(leaf, extent);
446 compression = btrfs_file_extent_compression(leaf,
447 extent);
448 encryption = btrfs_file_extent_encryption(leaf,
449 extent);
450 other_encoding = btrfs_file_extent_other_encoding(leaf,
451 extent);
452 if (found_type == BTRFS_FILE_EXTENT_REG ||
453 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
454 extent_end =
455 btrfs_file_extent_disk_bytenr(leaf,
456 extent);
457 if (extent_end)
458 *hint_byte = extent_end;
459
460 extent_end = key.offset +
461 btrfs_file_extent_num_bytes(leaf, extent);
462 ram_bytes = btrfs_file_extent_ram_bytes(leaf,
463 extent);
464 found_extent = 1;
465 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
466 found_inline = 1;
467 extent_end = key.offset +
468 btrfs_file_extent_inline_len(leaf, extent);
469 }
470 } else {
471 extent_end = search_start;
472 }
473
474 /* we found nothing we can drop */
475 if ((!found_extent && !found_inline) ||
476 search_start >= extent_end) {
477 int nextret;
478 u32 nritems;
479 nritems = btrfs_header_nritems(leaf);
480 if (slot >= nritems - 1) {
481 nextret = btrfs_next_leaf(root, path);
482 if (nextret)
483 goto out;
484 recow = 1;
485 } else {
486 path->slots[0]++;
487 }
488 goto next_slot;
489 }
490
491 if (end <= extent_end && start >= key.offset && found_inline)
492 *hint_byte = EXTENT_MAP_INLINE;
493
494 if (found_extent) {
495 read_extent_buffer(leaf, &old, (unsigned long)extent,
496 sizeof(old));
497 root_gen = btrfs_header_generation(leaf);
498 root_owner = btrfs_header_owner(leaf);
499 leaf_start = leaf->start;
500 }
501
502 if (end < extent_end && end >= key.offset) {
503 bookend = 1;
504 if (found_inline && start <= key.offset)
505 keep = 1;
506 }
507
508 if (bookend && found_extent) {
509 if (locked_end < extent_end) {
510 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
511 locked_end, extent_end - 1,
512 GFP_NOFS);
513 if (!ret) {
514 btrfs_release_path(root, path);
515 lock_extent(&BTRFS_I(inode)->io_tree,
516 locked_end, extent_end - 1,
517 GFP_NOFS);
518 locked_end = extent_end;
519 continue;
520 }
521 locked_end = extent_end;
522 }
523 orig_parent = path->nodes[0]->start;
524 disk_bytenr = le64_to_cpu(old.disk_bytenr);
525 if (disk_bytenr != 0) {
526 ret = btrfs_inc_extent_ref(trans, root,
527 disk_bytenr,
528 le64_to_cpu(old.disk_num_bytes),
529 orig_parent, root->root_key.objectid,
530 trans->transid, inode->i_ino);
531 BUG_ON(ret);
532 }
533 }
534
535 if (found_inline) {
536 u64 mask = root->sectorsize - 1;
537 search_start = (extent_end + mask) & ~mask;
538 } else
539 search_start = extent_end;
540
541 /* truncate existing extent */
542 if (start > key.offset) {
543 u64 new_num;
544 u64 old_num;
545 keep = 1;
546 WARN_ON(start & (root->sectorsize - 1));
547 if (found_extent) {
548 new_num = start - key.offset;
549 old_num = btrfs_file_extent_num_bytes(leaf,
550 extent);
551 *hint_byte =
552 btrfs_file_extent_disk_bytenr(leaf,
553 extent);
554 if (btrfs_file_extent_disk_bytenr(leaf,
555 extent)) {
556 inode_sub_bytes(inode, old_num -
557 new_num);
558 }
559 btrfs_set_file_extent_num_bytes(leaf,
560 extent, new_num);
561 btrfs_mark_buffer_dirty(leaf);
562 } else if (key.offset < inline_limit &&
563 (end > extent_end) &&
564 (inline_limit < extent_end)) {
565 u32 new_size;
566 new_size = btrfs_file_extent_calc_inline_size(
567 inline_limit - key.offset);
568 inode_sub_bytes(inode, extent_end -
569 inline_limit);
570 btrfs_set_file_extent_ram_bytes(leaf, extent,
571 new_size);
572 if (!compression && !encryption) {
573 btrfs_truncate_item(trans, root, path,
574 new_size, 1);
575 }
576 }
577 }
578 /* delete the entire extent */
579 if (!keep) {
580 if (found_inline)
581 inode_sub_bytes(inode, extent_end -
582 key.offset);
583 ret = btrfs_del_item(trans, root, path);
584 /* TODO update progress marker and return */
585 BUG_ON(ret);
586 extent = NULL;
587 btrfs_release_path(root, path);
588 /* the extent will be freed later */
589 }
590 if (bookend && found_inline && start <= key.offset) {
591 u32 new_size;
592 new_size = btrfs_file_extent_calc_inline_size(
593 extent_end - end);
594 inode_sub_bytes(inode, end - key.offset);
595 btrfs_set_file_extent_ram_bytes(leaf, extent,
596 new_size);
597 if (!compression && !encryption)
598 ret = btrfs_truncate_item(trans, root, path,
599 new_size, 0);
600 BUG_ON(ret);
601 }
602 /* create bookend, splitting the extent in two */
603 if (bookend && found_extent) {
604 struct btrfs_key ins;
605 ins.objectid = inode->i_ino;
606 ins.offset = end;
607 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
608
609 btrfs_release_path(root, path);
610 ret = btrfs_insert_empty_item(trans, root, path, &ins,
611 sizeof(*extent));
612 BUG_ON(ret);
613
614 leaf = path->nodes[0];
615 extent = btrfs_item_ptr(leaf, path->slots[0],
616 struct btrfs_file_extent_item);
617 write_extent_buffer(leaf, &old,
618 (unsigned long)extent, sizeof(old));
619
620 btrfs_set_file_extent_compression(leaf, extent,
621 compression);
622 btrfs_set_file_extent_encryption(leaf, extent,
623 encryption);
624 btrfs_set_file_extent_other_encoding(leaf, extent,
625 other_encoding);
626 btrfs_set_file_extent_offset(leaf, extent,
627 le64_to_cpu(old.offset) + end - key.offset);
628 WARN_ON(le64_to_cpu(old.num_bytes) <
629 (extent_end - end));
630 btrfs_set_file_extent_num_bytes(leaf, extent,
631 extent_end - end);
632
633 /*
634 * set the ram bytes to the size of the full extent
635 * before splitting. This is a worst case flag,
636 * but its the best we can do because we don't know
637 * how splitting affects compression
638 */
639 btrfs_set_file_extent_ram_bytes(leaf, extent,
640 ram_bytes);
641 btrfs_set_file_extent_type(leaf, extent, found_type);
642
643 btrfs_mark_buffer_dirty(path->nodes[0]);
644
645 if (disk_bytenr != 0) {
646 ret = btrfs_update_extent_ref(trans, root,
647 disk_bytenr, orig_parent,
648 leaf->start,
649 root->root_key.objectid,
650 trans->transid, ins.objectid);
651
652 BUG_ON(ret);
653 }
654 btrfs_release_path(root, path);
655 if (disk_bytenr != 0)
656 inode_add_bytes(inode, extent_end - end);
657 }
658
659 if (found_extent && !keep) {
660 u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr);
661
662 if (old_disk_bytenr != 0) {
663 inode_sub_bytes(inode,
664 le64_to_cpu(old.num_bytes));
665 ret = btrfs_free_extent(trans, root,
666 old_disk_bytenr,
667 le64_to_cpu(old.disk_num_bytes),
668 leaf_start, root_owner,
669 root_gen, key.objectid, 0);
670 BUG_ON(ret);
671 *hint_byte = old_disk_bytenr;
672 }
673 }
674
675 if (search_start >= end) {
676 ret = 0;
677 goto out;
678 }
679 }
680out:
681 btrfs_free_path(path);
682 if (locked_end > end) {
683 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
684 GFP_NOFS);
685 }
686 btrfs_check_file(root, inode);
687 return ret;
688}
689
690static int extent_mergeable(struct extent_buffer *leaf, int slot,
691 u64 objectid, u64 bytenr, u64 *start, u64 *end)
692{
693 struct btrfs_file_extent_item *fi;
694 struct btrfs_key key;
695 u64 extent_end;
696
697 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
698 return 0;
699
700 btrfs_item_key_to_cpu(leaf, &key, slot);
701 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
702 return 0;
703
704 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
705 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
706 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
707 btrfs_file_extent_compression(leaf, fi) ||
708 btrfs_file_extent_encryption(leaf, fi) ||
709 btrfs_file_extent_other_encoding(leaf, fi))
710 return 0;
711
712 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
713 if ((*start && *start != key.offset) || (*end && *end != extent_end))
714 return 0;
715
716 *start = key.offset;
717 *end = extent_end;
718 return 1;
719}
720
721/*
722 * Mark extent in the range start - end as written.
723 *
724 * This changes extent type from 'pre-allocated' to 'regular'. If only
725 * part of extent is marked as written, the extent will be split into
726 * two or three.
727 */
728int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
729 struct btrfs_root *root,
730 struct inode *inode, u64 start, u64 end)
731{
732 struct extent_buffer *leaf;
733 struct btrfs_path *path;
734 struct btrfs_file_extent_item *fi;
735 struct btrfs_key key;
736 u64 bytenr;
737 u64 num_bytes;
738 u64 extent_end;
739 u64 extent_offset;
740 u64 other_start;
741 u64 other_end;
742 u64 split = start;
743 u64 locked_end = end;
744 u64 orig_parent;
745 int extent_type;
746 int split_end = 1;
747 int ret;
748
749 btrfs_drop_extent_cache(inode, start, end - 1, 0);
750
751 path = btrfs_alloc_path();
752 BUG_ON(!path);
753again:
754 key.objectid = inode->i_ino;
755 key.type = BTRFS_EXTENT_DATA_KEY;
756 if (split == start)
757 key.offset = split;
758 else
759 key.offset = split - 1;
760
761 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
762 if (ret > 0 && path->slots[0] > 0)
763 path->slots[0]--;
764
765 leaf = path->nodes[0];
766 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
767 BUG_ON(key.objectid != inode->i_ino ||
768 key.type != BTRFS_EXTENT_DATA_KEY);
769 fi = btrfs_item_ptr(leaf, path->slots[0],
770 struct btrfs_file_extent_item);
771 extent_type = btrfs_file_extent_type(leaf, fi);
772 BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
773 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
774 BUG_ON(key.offset > start || extent_end < end);
775
776 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
777 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
778 extent_offset = btrfs_file_extent_offset(leaf, fi);
779
780 if (key.offset == start)
781 split = end;
782
783 if (key.offset == start && extent_end == end) {
784 int del_nr = 0;
785 int del_slot = 0;
786 u64 leaf_owner = btrfs_header_owner(leaf);
787 u64 leaf_gen = btrfs_header_generation(leaf);
788 other_start = end;
789 other_end = 0;
790 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
791 bytenr, &other_start, &other_end)) {
792 extent_end = other_end;
793 del_slot = path->slots[0] + 1;
794 del_nr++;
795 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
796 leaf->start, leaf_owner,
797 leaf_gen, inode->i_ino, 0);
798 BUG_ON(ret);
799 }
800 other_start = 0;
801 other_end = start;
802 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
803 bytenr, &other_start, &other_end)) {
804 key.offset = other_start;
805 del_slot = path->slots[0];
806 del_nr++;
807 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
808 leaf->start, leaf_owner,
809 leaf_gen, inode->i_ino, 0);
810 BUG_ON(ret);
811 }
812 split_end = 0;
813 if (del_nr == 0) {
814 btrfs_set_file_extent_type(leaf, fi,
815 BTRFS_FILE_EXTENT_REG);
816 goto done;
817 }
818
819 fi = btrfs_item_ptr(leaf, del_slot - 1,
820 struct btrfs_file_extent_item);
821 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
822 btrfs_set_file_extent_num_bytes(leaf, fi,
823 extent_end - key.offset);
824 btrfs_mark_buffer_dirty(leaf);
825
826 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
827 BUG_ON(ret);
828 goto done;
829 } else if (split == start) {
830 if (locked_end < extent_end) {
831 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
832 locked_end, extent_end - 1, GFP_NOFS);
833 if (!ret) {
834 btrfs_release_path(root, path);
835 lock_extent(&BTRFS_I(inode)->io_tree,
836 locked_end, extent_end - 1, GFP_NOFS);
837 locked_end = extent_end;
838 goto again;
839 }
840 locked_end = extent_end;
841 }
842 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
843 extent_offset += split - key.offset;
844 } else {
845 BUG_ON(key.offset != start);
846 btrfs_set_file_extent_offset(leaf, fi, extent_offset +
847 split - key.offset);
848 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
849 key.offset = split;
850 btrfs_set_item_key_safe(trans, root, path, &key);
851 extent_end = split;
852 }
853
854 if (extent_end == end) {
855 split_end = 0;
856 extent_type = BTRFS_FILE_EXTENT_REG;
857 }
858 if (extent_end == end && split == start) {
859 other_start = end;
860 other_end = 0;
861 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
862 bytenr, &other_start, &other_end)) {
863 path->slots[0]++;
864 fi = btrfs_item_ptr(leaf, path->slots[0],
865 struct btrfs_file_extent_item);
866 key.offset = split;
867 btrfs_set_item_key_safe(trans, root, path, &key);
868 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
869 btrfs_set_file_extent_num_bytes(leaf, fi,
870 other_end - split);
871 goto done;
872 }
873 }
874 if (extent_end == end && split == end) {
875 other_start = 0;
876 other_end = start;
877 if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
878 bytenr, &other_start, &other_end)) {
879 path->slots[0]--;
880 fi = btrfs_item_ptr(leaf, path->slots[0],
881 struct btrfs_file_extent_item);
882 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
883 other_start);
884 goto done;
885 }
886 }
887
888 btrfs_mark_buffer_dirty(leaf);
889
890 orig_parent = leaf->start;
891 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
892 orig_parent, root->root_key.objectid,
893 trans->transid, inode->i_ino);
894 BUG_ON(ret);
895 btrfs_release_path(root, path);
896
897 key.offset = start;
898 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
899 BUG_ON(ret);
900
901 leaf = path->nodes[0];
902 fi = btrfs_item_ptr(leaf, path->slots[0],
903 struct btrfs_file_extent_item);
904 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
905 btrfs_set_file_extent_type(leaf, fi, extent_type);
906 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
907 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
908 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
909 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
910 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
911 btrfs_set_file_extent_compression(leaf, fi, 0);
912 btrfs_set_file_extent_encryption(leaf, fi, 0);
913 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
914
915 if (orig_parent != leaf->start) {
916 ret = btrfs_update_extent_ref(trans, root, bytenr,
917 orig_parent, leaf->start,
918 root->root_key.objectid,
919 trans->transid, inode->i_ino);
920 BUG_ON(ret);
921 }
922done:
923 btrfs_mark_buffer_dirty(leaf);
924 btrfs_release_path(root, path);
925 if (split_end && split == start) {
926 split = end;
927 goto again;
928 }
929 if (locked_end > end) {
930 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
931 GFP_NOFS);
932 }
933 btrfs_free_path(path);
934 return 0;
935}
936
937/*
938 * this gets pages into the page cache and locks them down, it also properly
939 * waits for data=ordered extents to finish before allowing the pages to be
940 * modified.
941 */
942static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
943 struct page **pages, size_t num_pages,
944 loff_t pos, unsigned long first_index,
945 unsigned long last_index, size_t write_bytes)
946{
947 int i;
948 unsigned long index = pos >> PAGE_CACHE_SHIFT;
949 struct inode *inode = fdentry(file)->d_inode;
950 int err = 0;
951 u64 start_pos;
952 u64 last_pos;
953
954 start_pos = pos & ~((u64)root->sectorsize - 1);
955 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
956
957 if (start_pos > inode->i_size) {
958 err = btrfs_cont_expand(inode, start_pos);
959 if (err)
960 return err;
961 }
962
963 memset(pages, 0, num_pages * sizeof(struct page *));
964again:
965 for (i = 0; i < num_pages; i++) {
966 pages[i] = grab_cache_page(inode->i_mapping, index + i);
967 if (!pages[i]) {
968 err = -ENOMEM;
969 BUG_ON(1);
970 }
971 wait_on_page_writeback(pages[i]);
972 }
973 if (start_pos < inode->i_size) {
974 struct btrfs_ordered_extent *ordered;
975 lock_extent(&BTRFS_I(inode)->io_tree,
976 start_pos, last_pos - 1, GFP_NOFS);
977 ordered = btrfs_lookup_first_ordered_extent(inode,
978 last_pos - 1);
979 if (ordered &&
980 ordered->file_offset + ordered->len > start_pos &&
981 ordered->file_offset < last_pos) {
982 btrfs_put_ordered_extent(ordered);
983 unlock_extent(&BTRFS_I(inode)->io_tree,
984 start_pos, last_pos - 1, GFP_NOFS);
985 for (i = 0; i < num_pages; i++) {
986 unlock_page(pages[i]);
987 page_cache_release(pages[i]);
988 }
989 btrfs_wait_ordered_range(inode, start_pos,
990 last_pos - start_pos);
991 goto again;
992 }
993 if (ordered)
994 btrfs_put_ordered_extent(ordered);
995
996 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
997 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
998 GFP_NOFS);
999 unlock_extent(&BTRFS_I(inode)->io_tree,
1000 start_pos, last_pos - 1, GFP_NOFS);
1001 }
1002 for (i = 0; i < num_pages; i++) {
1003 clear_page_dirty_for_io(pages[i]);
1004 set_page_extent_mapped(pages[i]);
1005 WARN_ON(!PageLocked(pages[i]));
1006 }
1007 return 0;
1008}
1009
1010static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1011 size_t count, loff_t *ppos)
1012{
1013 loff_t pos;
1014 loff_t start_pos;
1015 ssize_t num_written = 0;
1016 ssize_t err = 0;
1017 int ret = 0;
1018 struct inode *inode = fdentry(file)->d_inode;
1019 struct btrfs_root *root = BTRFS_I(inode)->root;
1020 struct page **pages = NULL;
1021 int nrptrs;
1022 struct page *pinned[2];
1023 unsigned long first_index;
1024 unsigned long last_index;
1025 int will_write;
1026
1027 will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
1028 (file->f_flags & O_DIRECT));
1029
1030 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
1031 PAGE_CACHE_SIZE / (sizeof(struct page *)));
1032 pinned[0] = NULL;
1033 pinned[1] = NULL;
1034
1035 pos = *ppos;
1036 start_pos = pos;
1037
1038 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1039 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1040 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1041 if (err)
1042 goto out_nolock;
1043 if (count == 0)
1044 goto out_nolock;
1045
1046 err = file_remove_suid(file);
1047 if (err)
1048 goto out_nolock;
1049 file_update_time(file);
1050
1051 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1052
1053 mutex_lock(&inode->i_mutex);
1054 BTRFS_I(inode)->sequence++;
1055 first_index = pos >> PAGE_CACHE_SHIFT;
1056 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1057
1058 /*
1059 * there are lots of better ways to do this, but this code
1060 * makes sure the first and last page in the file range are
1061 * up to date and ready for cow
1062 */
1063 if ((pos & (PAGE_CACHE_SIZE - 1))) {
1064 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1065 if (!PageUptodate(pinned[0])) {
1066 ret = btrfs_readpage(NULL, pinned[0]);
1067 BUG_ON(ret);
1068 wait_on_page_locked(pinned[0]);
1069 } else {
1070 unlock_page(pinned[0]);
1071 }
1072 }
1073 if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
1074 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1075 if (!PageUptodate(pinned[1])) {
1076 ret = btrfs_readpage(NULL, pinned[1]);
1077 BUG_ON(ret);
1078 wait_on_page_locked(pinned[1]);
1079 } else {
1080 unlock_page(pinned[1]);
1081 }
1082 }
1083
1084 while (count > 0) {
1085 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1086 size_t write_bytes = min(count, nrptrs *
1087 (size_t)PAGE_CACHE_SIZE -
1088 offset);
1089 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1090 PAGE_CACHE_SHIFT;
1091
1092 WARN_ON(num_pages > nrptrs);
1093 memset(pages, 0, sizeof(struct page *) * nrptrs);
1094
1095 ret = btrfs_check_free_space(root, write_bytes, 0);
1096 if (ret)
1097 goto out;
1098
1099 ret = prepare_pages(root, file, pages, num_pages,
1100 pos, first_index, last_index,
1101 write_bytes);
1102 if (ret)
1103 goto out;
1104
1105 ret = btrfs_copy_from_user(pos, num_pages,
1106 write_bytes, pages, buf);
1107 if (ret) {
1108 btrfs_drop_pages(pages, num_pages);
1109 goto out;
1110 }
1111
1112 ret = dirty_and_release_pages(NULL, root, file, pages,
1113 num_pages, pos, write_bytes);
1114 btrfs_drop_pages(pages, num_pages);
1115 if (ret)
1116 goto out;
1117
1118 if (will_write) {
1119 btrfs_fdatawrite_range(inode->i_mapping, pos,
1120 pos + write_bytes - 1,
1121 WB_SYNC_NONE);
1122 } else {
1123 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1124 num_pages);
1125 if (num_pages <
1126 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1127 btrfs_btree_balance_dirty(root, 1);
1128 btrfs_throttle(root);
1129 }
1130
1131 buf += write_bytes;
1132 count -= write_bytes;
1133 pos += write_bytes;
1134 num_written += write_bytes;
1135
1136 cond_resched();
1137 }
1138out:
1139 mutex_unlock(&inode->i_mutex);
1140
1141out_nolock:
1142 kfree(pages);
1143 if (pinned[0])
1144 page_cache_release(pinned[0]);
1145 if (pinned[1])
1146 page_cache_release(pinned[1]);
1147 *ppos = pos;
1148
1149 if (num_written > 0 && will_write) {
1150 struct btrfs_trans_handle *trans;
1151
1152 err = btrfs_wait_ordered_range(inode, start_pos, num_written);
1153 if (err)
1154 num_written = err;
1155
1156 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
1157 trans = btrfs_start_transaction(root, 1);
1158 ret = btrfs_log_dentry_safe(trans, root,
1159 file->f_dentry);
1160 if (ret == 0) {
1161 btrfs_sync_log(trans, root);
1162 btrfs_end_transaction(trans, root);
1163 } else {
1164 btrfs_commit_transaction(trans, root);
1165 }
1166 }
1167 if (file->f_flags & O_DIRECT) {
1168 invalidate_mapping_pages(inode->i_mapping,
1169 start_pos >> PAGE_CACHE_SHIFT,
1170 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1171 }
1172 }
1173 current->backing_dev_info = NULL;
1174 return num_written ? num_written : err;
1175}
1176
1177int btrfs_release_file(struct inode *inode, struct file *filp)
1178{
1179 if (filp->private_data)
1180 btrfs_ioctl_trans_end(filp);
1181 return 0;
1182}
1183
1184/*
1185 * fsync call for both files and directories. This logs the inode into
1186 * the tree log instead of forcing full commits whenever possible.
1187 *
1188 * It needs to call filemap_fdatawait so that all ordered extent updates are
1189 * in the metadata btree are up to date for copying to the log.
1190 *
1191 * It drops the inode mutex before doing the tree log commit. This is an
1192 * important optimization for directories because holding the mutex prevents
1193 * new operations on the dir while we write to disk.
1194 */
1195int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1196{
1197 struct inode *inode = dentry->d_inode;
1198 struct btrfs_root *root = BTRFS_I(inode)->root;
1199 int ret = 0;
1200 struct btrfs_trans_handle *trans;
1201
1202 /*
1203 * check the transaction that last modified this inode
1204 * and see if its already been committed
1205 */
1206 if (!BTRFS_I(inode)->last_trans)
1207 goto out;
1208
1209 mutex_lock(&root->fs_info->trans_mutex);
1210 if (BTRFS_I(inode)->last_trans <=
1211 root->fs_info->last_trans_committed) {
1212 BTRFS_I(inode)->last_trans = 0;
1213 mutex_unlock(&root->fs_info->trans_mutex);
1214 goto out;
1215 }
1216 mutex_unlock(&root->fs_info->trans_mutex);
1217
1218 root->fs_info->tree_log_batch++;
1219 filemap_fdatawrite(inode->i_mapping);
1220 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1221 root->fs_info->tree_log_batch++;
1222
1223 /*
1224 * ok we haven't committed the transaction yet, lets do a commit
1225 */
1226 if (file->private_data)
1227 btrfs_ioctl_trans_end(file);
1228
1229 trans = btrfs_start_transaction(root, 1);
1230 if (!trans) {
1231 ret = -ENOMEM;
1232 goto out;
1233 }
1234
1235 ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
1236 if (ret < 0)
1237 goto out;
1238
1239 /* we've logged all the items and now have a consistent
1240 * version of the file in the log. It is possible that
1241 * someone will come in and modify the file, but that's
1242 * fine because the log is consistent on disk, and we
1243 * have references to all of the file's extents
1244 *
1245 * It is possible that someone will come in and log the
1246 * file again, but that will end up using the synchronization
1247 * inside btrfs_sync_log to keep things safe.
1248 */
1249 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
1250
1251 if (ret > 0) {
1252 ret = btrfs_commit_transaction(trans, root);
1253 } else {
1254 btrfs_sync_log(trans, root);
1255 ret = btrfs_end_transaction(trans, root);
1256 }
1257 mutex_lock(&file->f_dentry->d_inode->i_mutex);
1258out:
1259 return ret > 0 ? EIO : ret;
1260}
1261
1262static struct vm_operations_struct btrfs_file_vm_ops = {
1263 .fault = filemap_fault,
1264 .page_mkwrite = btrfs_page_mkwrite,
1265};
1266
1267static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
1268{
1269 vma->vm_ops = &btrfs_file_vm_ops;
1270 file_accessed(filp);
1271 return 0;
1272}
1273
1274struct file_operations btrfs_file_operations = {
1275 .llseek = generic_file_llseek,
1276 .read = do_sync_read,
1277 .aio_read = generic_file_aio_read,
1278 .splice_read = generic_file_splice_read,
1279 .write = btrfs_file_write,
1280 .mmap = btrfs_file_mmap,
1281 .open = generic_file_open,
1282 .release = btrfs_release_file,
1283 .fsync = btrfs_sync_file,
1284 .unlocked_ioctl = btrfs_ioctl,
1285#ifdef CONFIG_COMPAT
1286 .compat_ioctl = btrfs_ioctl,
1287#endif
1288};
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
new file mode 100644
index 000000000000..d1e5f0e84c58
--- /dev/null
+++ b/fs/btrfs/free-space-cache.c
@@ -0,0 +1,495 @@
1/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21
22static int tree_insert_offset(struct rb_root *root, u64 offset,
23 struct rb_node *node)
24{
25 struct rb_node **p = &root->rb_node;
26 struct rb_node *parent = NULL;
27 struct btrfs_free_space *info;
28
29 while (*p) {
30 parent = *p;
31 info = rb_entry(parent, struct btrfs_free_space, offset_index);
32
33 if (offset < info->offset)
34 p = &(*p)->rb_left;
35 else if (offset > info->offset)
36 p = &(*p)->rb_right;
37 else
38 return -EEXIST;
39 }
40
41 rb_link_node(node, parent, p);
42 rb_insert_color(node, root);
43
44 return 0;
45}
46
47static int tree_insert_bytes(struct rb_root *root, u64 bytes,
48 struct rb_node *node)
49{
50 struct rb_node **p = &root->rb_node;
51 struct rb_node *parent = NULL;
52 struct btrfs_free_space *info;
53
54 while (*p) {
55 parent = *p;
56 info = rb_entry(parent, struct btrfs_free_space, bytes_index);
57
58 if (bytes < info->bytes)
59 p = &(*p)->rb_left;
60 else
61 p = &(*p)->rb_right;
62 }
63
64 rb_link_node(node, parent, p);
65 rb_insert_color(node, root);
66
67 return 0;
68}
69
70/*
71 * searches the tree for the given offset. If contains is set we will return
72 * the free space that contains the given offset. If contains is not set we
73 * will return the free space that starts at or after the given offset and is
74 * at least bytes long.
75 */
76static struct btrfs_free_space *tree_search_offset(struct rb_root *root,
77 u64 offset, u64 bytes,
78 int contains)
79{
80 struct rb_node *n = root->rb_node;
81 struct btrfs_free_space *entry, *ret = NULL;
82
83 while (n) {
84 entry = rb_entry(n, struct btrfs_free_space, offset_index);
85
86 if (offset < entry->offset) {
87 if (!contains &&
88 (!ret || entry->offset < ret->offset) &&
89 (bytes <= entry->bytes))
90 ret = entry;
91 n = n->rb_left;
92 } else if (offset > entry->offset) {
93 if ((entry->offset + entry->bytes - 1) >= offset &&
94 bytes <= entry->bytes) {
95 ret = entry;
96 break;
97 }
98 n = n->rb_right;
99 } else {
100 if (bytes > entry->bytes) {
101 n = n->rb_right;
102 continue;
103 }
104 ret = entry;
105 break;
106 }
107 }
108
109 return ret;
110}
111
112/*
113 * return a chunk at least bytes size, as close to offset that we can get.
114 */
115static struct btrfs_free_space *tree_search_bytes(struct rb_root *root,
116 u64 offset, u64 bytes)
117{
118 struct rb_node *n = root->rb_node;
119 struct btrfs_free_space *entry, *ret = NULL;
120
121 while (n) {
122 entry = rb_entry(n, struct btrfs_free_space, bytes_index);
123
124 if (bytes < entry->bytes) {
125 /*
126 * We prefer to get a hole size as close to the size we
127 * are asking for so we don't take small slivers out of
128 * huge holes, but we also want to get as close to the
129 * offset as possible so we don't have a whole lot of
130 * fragmentation.
131 */
132 if (offset <= entry->offset) {
133 if (!ret)
134 ret = entry;
135 else if (entry->bytes < ret->bytes)
136 ret = entry;
137 else if (entry->offset < ret->offset)
138 ret = entry;
139 }
140 n = n->rb_left;
141 } else if (bytes > entry->bytes) {
142 n = n->rb_right;
143 } else {
144 /*
145 * Ok we may have multiple chunks of the wanted size,
146 * so we don't want to take the first one we find, we
147 * want to take the one closest to our given offset, so
148 * keep searching just in case theres a better match.
149 */
150 n = n->rb_right;
151 if (offset > entry->offset)
152 continue;
153 else if (!ret || entry->offset < ret->offset)
154 ret = entry;
155 }
156 }
157
158 return ret;
159}
160
161static void unlink_free_space(struct btrfs_block_group_cache *block_group,
162 struct btrfs_free_space *info)
163{
164 rb_erase(&info->offset_index, &block_group->free_space_offset);
165 rb_erase(&info->bytes_index, &block_group->free_space_bytes);
166}
167
168static int link_free_space(struct btrfs_block_group_cache *block_group,
169 struct btrfs_free_space *info)
170{
171 int ret = 0;
172
173
174 ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
175 &info->offset_index);
176 if (ret)
177 return ret;
178
179 ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes,
180 &info->bytes_index);
181 if (ret)
182 return ret;
183
184 return ret;
185}
186
187static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
188 u64 offset, u64 bytes)
189{
190 struct btrfs_free_space *right_info;
191 struct btrfs_free_space *left_info;
192 struct btrfs_free_space *info = NULL;
193 struct btrfs_free_space *alloc_info;
194 int ret = 0;
195
196 alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
197 if (!alloc_info)
198 return -ENOMEM;
199
200 /*
201 * first we want to see if there is free space adjacent to the range we
202 * are adding, if there is remove that struct and add a new one to
203 * cover the entire range
204 */
205 right_info = tree_search_offset(&block_group->free_space_offset,
206 offset+bytes, 0, 1);
207 left_info = tree_search_offset(&block_group->free_space_offset,
208 offset-1, 0, 1);
209
210 if (right_info && right_info->offset == offset+bytes) {
211 unlink_free_space(block_group, right_info);
212 info = right_info;
213 info->offset = offset;
214 info->bytes += bytes;
215 } else if (right_info && right_info->offset != offset+bytes) {
216 printk(KERN_ERR "btrfs adding space in the middle of an "
217 "existing free space area. existing: "
218 "offset=%llu, bytes=%llu. new: offset=%llu, "
219 "bytes=%llu\n", (unsigned long long)right_info->offset,
220 (unsigned long long)right_info->bytes,
221 (unsigned long long)offset,
222 (unsigned long long)bytes);
223 BUG();
224 }
225
226 if (left_info) {
227 unlink_free_space(block_group, left_info);
228
229 if (unlikely((left_info->offset + left_info->bytes) !=
230 offset)) {
231 printk(KERN_ERR "btrfs free space to the left "
232 "of new free space isn't "
233 "quite right. existing: offset=%llu, "
234 "bytes=%llu. new: offset=%llu, bytes=%llu\n",
235 (unsigned long long)left_info->offset,
236 (unsigned long long)left_info->bytes,
237 (unsigned long long)offset,
238 (unsigned long long)bytes);
239 BUG();
240 }
241
242 if (info) {
243 info->offset = left_info->offset;
244 info->bytes += left_info->bytes;
245 kfree(left_info);
246 } else {
247 info = left_info;
248 info->bytes += bytes;
249 }
250 }
251
252 if (info) {
253 ret = link_free_space(block_group, info);
254 if (!ret)
255 info = NULL;
256 goto out;
257 }
258
259 info = alloc_info;
260 alloc_info = NULL;
261 info->offset = offset;
262 info->bytes = bytes;
263
264 ret = link_free_space(block_group, info);
265 if (ret)
266 kfree(info);
267out:
268 if (ret) {
269 printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret);
270 if (ret == -EEXIST)
271 BUG();
272 }
273
274 kfree(alloc_info);
275
276 return ret;
277}
278
279static int
280__btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
281 u64 offset, u64 bytes)
282{
283 struct btrfs_free_space *info;
284 int ret = 0;
285
286 info = tree_search_offset(&block_group->free_space_offset, offset, 0,
287 1);
288
289 if (info && info->offset == offset) {
290 if (info->bytes < bytes) {
291 printk(KERN_ERR "Found free space at %llu, size %llu,"
292 "trying to use %llu\n",
293 (unsigned long long)info->offset,
294 (unsigned long long)info->bytes,
295 (unsigned long long)bytes);
296 WARN_ON(1);
297 ret = -EINVAL;
298 goto out;
299 }
300 unlink_free_space(block_group, info);
301
302 if (info->bytes == bytes) {
303 kfree(info);
304 goto out;
305 }
306
307 info->offset += bytes;
308 info->bytes -= bytes;
309
310 ret = link_free_space(block_group, info);
311 BUG_ON(ret);
312 } else if (info && info->offset < offset &&
313 info->offset + info->bytes >= offset + bytes) {
314 u64 old_start = info->offset;
315 /*
316 * we're freeing space in the middle of the info,
317 * this can happen during tree log replay
318 *
319 * first unlink the old info and then
320 * insert it again after the hole we're creating
321 */
322 unlink_free_space(block_group, info);
323 if (offset + bytes < info->offset + info->bytes) {
324 u64 old_end = info->offset + info->bytes;
325
326 info->offset = offset + bytes;
327 info->bytes = old_end - info->offset;
328 ret = link_free_space(block_group, info);
329 BUG_ON(ret);
330 } else {
331 /* the hole we're creating ends at the end
332 * of the info struct, just free the info
333 */
334 kfree(info);
335 }
336
337 /* step two, insert a new info struct to cover anything
338 * before the hole
339 */
340 ret = __btrfs_add_free_space(block_group, old_start,
341 offset - old_start);
342 BUG_ON(ret);
343 } else {
344 WARN_ON(1);
345 }
346out:
347 return ret;
348}
349
350int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
351 u64 offset, u64 bytes)
352{
353 int ret;
354 struct btrfs_free_space *sp;
355
356 mutex_lock(&block_group->alloc_mutex);
357 ret = __btrfs_add_free_space(block_group, offset, bytes);
358 sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1);
359 BUG_ON(!sp);
360 mutex_unlock(&block_group->alloc_mutex);
361
362 return ret;
363}
364
365int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group,
366 u64 offset, u64 bytes)
367{
368 int ret;
369 struct btrfs_free_space *sp;
370
371 ret = __btrfs_add_free_space(block_group, offset, bytes);
372 sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1);
373 BUG_ON(!sp);
374
375 return ret;
376}
377
378int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
379 u64 offset, u64 bytes)
380{
381 int ret = 0;
382
383 mutex_lock(&block_group->alloc_mutex);
384 ret = __btrfs_remove_free_space(block_group, offset, bytes);
385 mutex_unlock(&block_group->alloc_mutex);
386
387 return ret;
388}
389
390int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group,
391 u64 offset, u64 bytes)
392{
393 int ret;
394
395 ret = __btrfs_remove_free_space(block_group, offset, bytes);
396
397 return ret;
398}
399
400void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
401 u64 bytes)
402{
403 struct btrfs_free_space *info;
404 struct rb_node *n;
405 int count = 0;
406
407 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
408 info = rb_entry(n, struct btrfs_free_space, offset_index);
409 if (info->bytes >= bytes)
410 count++;
411 }
412 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
413 "\n", count);
414}
415
416u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
417{
418 struct btrfs_free_space *info;
419 struct rb_node *n;
420 u64 ret = 0;
421
422 for (n = rb_first(&block_group->free_space_offset); n;
423 n = rb_next(n)) {
424 info = rb_entry(n, struct btrfs_free_space, offset_index);
425 ret += info->bytes;
426 }
427
428 return ret;
429}
430
431void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
432{
433 struct btrfs_free_space *info;
434 struct rb_node *node;
435
436 mutex_lock(&block_group->alloc_mutex);
437 while ((node = rb_last(&block_group->free_space_bytes)) != NULL) {
438 info = rb_entry(node, struct btrfs_free_space, bytes_index);
439 unlink_free_space(block_group, info);
440 kfree(info);
441 if (need_resched()) {
442 mutex_unlock(&block_group->alloc_mutex);
443 cond_resched();
444 mutex_lock(&block_group->alloc_mutex);
445 }
446 }
447 mutex_unlock(&block_group->alloc_mutex);
448}
449
450#if 0
451static struct btrfs_free_space *btrfs_find_free_space_offset(struct
452 btrfs_block_group_cache
453 *block_group, u64 offset,
454 u64 bytes)
455{
456 struct btrfs_free_space *ret;
457
458 mutex_lock(&block_group->alloc_mutex);
459 ret = tree_search_offset(&block_group->free_space_offset, offset,
460 bytes, 0);
461 mutex_unlock(&block_group->alloc_mutex);
462
463 return ret;
464}
465
466static struct btrfs_free_space *btrfs_find_free_space_bytes(struct
467 btrfs_block_group_cache
468 *block_group, u64 offset,
469 u64 bytes)
470{
471 struct btrfs_free_space *ret;
472
473 mutex_lock(&block_group->alloc_mutex);
474
475 ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes);
476 mutex_unlock(&block_group->alloc_mutex);
477
478 return ret;
479}
480#endif
481
482struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
483 *block_group, u64 offset,
484 u64 bytes)
485{
486 struct btrfs_free_space *ret = NULL;
487
488 ret = tree_search_offset(&block_group->free_space_offset, offset,
489 bytes, 0);
490 if (!ret)
491 ret = tree_search_bytes(&block_group->free_space_bytes,
492 offset, bytes);
493
494 return ret;
495}
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
new file mode 100644
index 000000000000..2a020b276768
--- /dev/null
+++ b/fs/btrfs/hash.h
@@ -0,0 +1,27 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __HASH__
20#define __HASH__
21
22#include "crc32c.h"
23static inline u64 btrfs_name_hash(const char *name, int len)
24{
25 return btrfs_crc32c((u32)~1, name, len);
26}
27#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
new file mode 100644
index 000000000000..3d46fa1f29a4
--- /dev/null
+++ b/fs/btrfs/inode-item.c
@@ -0,0 +1,206 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include "ctree.h"
20#include "disk-io.h"
21#include "transaction.h"
22
23static int find_name_in_backref(struct btrfs_path *path, const char *name,
24 int name_len, struct btrfs_inode_ref **ref_ret)
25{
26 struct extent_buffer *leaf;
27 struct btrfs_inode_ref *ref;
28 unsigned long ptr;
29 unsigned long name_ptr;
30 u32 item_size;
31 u32 cur_offset = 0;
32 int len;
33
34 leaf = path->nodes[0];
35 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
36 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
37 while (cur_offset < item_size) {
38 ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
39 len = btrfs_inode_ref_name_len(leaf, ref);
40 name_ptr = (unsigned long)(ref + 1);
41 cur_offset += len + sizeof(*ref);
42 if (len != name_len)
43 continue;
44 if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
45 *ref_ret = ref;
46 return 1;
47 }
48 }
49 return 0;
50}
51
52int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
53 struct btrfs_root *root,
54 const char *name, int name_len,
55 u64 inode_objectid, u64 ref_objectid, u64 *index)
56{
57 struct btrfs_path *path;
58 struct btrfs_key key;
59 struct btrfs_inode_ref *ref;
60 struct extent_buffer *leaf;
61 unsigned long ptr;
62 unsigned long item_start;
63 u32 item_size;
64 u32 sub_item_len;
65 int ret;
66 int del_len = name_len + sizeof(*ref);
67
68 key.objectid = inode_objectid;
69 key.offset = ref_objectid;
70 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
71
72 path = btrfs_alloc_path();
73 if (!path)
74 return -ENOMEM;
75
76 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
77 if (ret > 0) {
78 ret = -ENOENT;
79 goto out;
80 } else if (ret < 0) {
81 goto out;
82 }
83 if (!find_name_in_backref(path, name, name_len, &ref)) {
84 ret = -ENOENT;
85 goto out;
86 }
87 leaf = path->nodes[0];
88 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
89
90 if (index)
91 *index = btrfs_inode_ref_index(leaf, ref);
92
93 if (del_len == item_size) {
94 ret = btrfs_del_item(trans, root, path);
95 goto out;
96 }
97 ptr = (unsigned long)ref;
98 sub_item_len = name_len + sizeof(*ref);
99 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
100 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
101 item_size - (ptr + sub_item_len - item_start));
102 ret = btrfs_truncate_item(trans, root, path,
103 item_size - sub_item_len, 1);
104 BUG_ON(ret);
105out:
106 btrfs_free_path(path);
107 return ret;
108}
109
110int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
111 struct btrfs_root *root,
112 const char *name, int name_len,
113 u64 inode_objectid, u64 ref_objectid, u64 index)
114{
115 struct btrfs_path *path;
116 struct btrfs_key key;
117 struct btrfs_inode_ref *ref;
118 unsigned long ptr;
119 int ret;
120 int ins_len = name_len + sizeof(*ref);
121
122 key.objectid = inode_objectid;
123 key.offset = ref_objectid;
124 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
125
126 path = btrfs_alloc_path();
127 if (!path)
128 return -ENOMEM;
129
130 ret = btrfs_insert_empty_item(trans, root, path, &key,
131 ins_len);
132 if (ret == -EEXIST) {
133 u32 old_size;
134
135 if (find_name_in_backref(path, name, name_len, &ref))
136 goto out;
137
138 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
139 ret = btrfs_extend_item(trans, root, path, ins_len);
140 BUG_ON(ret);
141 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
142 struct btrfs_inode_ref);
143 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
144 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
145 btrfs_set_inode_ref_index(path->nodes[0], ref, index);
146 ptr = (unsigned long)(ref + 1);
147 ret = 0;
148 } else if (ret < 0) {
149 goto out;
150 } else {
151 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
152 struct btrfs_inode_ref);
153 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
154 btrfs_set_inode_ref_index(path->nodes[0], ref, index);
155 ptr = (unsigned long)(ref + 1);
156 }
157 write_extent_buffer(path->nodes[0], name, ptr, name_len);
158 btrfs_mark_buffer_dirty(path->nodes[0]);
159
160out:
161 btrfs_free_path(path);
162 return ret;
163}
164
165int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
166 struct btrfs_root *root,
167 struct btrfs_path *path, u64 objectid)
168{
169 struct btrfs_key key;
170 int ret;
171 key.objectid = objectid;
172 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
173 key.offset = 0;
174
175 ret = btrfs_insert_empty_item(trans, root, path, &key,
176 sizeof(struct btrfs_inode_item));
177 if (ret == 0 && objectid > root->highest_inode)
178 root->highest_inode = objectid;
179 return ret;
180}
181
182int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
183 *root, struct btrfs_path *path,
184 struct btrfs_key *location, int mod)
185{
186 int ins_len = mod < 0 ? -1 : 0;
187 int cow = mod != 0;
188 int ret;
189 int slot;
190 struct extent_buffer *leaf;
191 struct btrfs_key found_key;
192
193 ret = btrfs_search_slot(trans, root, location, path, ins_len, cow);
194 if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY &&
195 location->offset == (u64)-1 && path->slots[0] != 0) {
196 slot = path->slots[0] - 1;
197 leaf = path->nodes[0];
198 btrfs_item_key_to_cpu(leaf, &found_key, slot);
199 if (found_key.objectid == location->objectid &&
200 btrfs_key_type(&found_key) == btrfs_key_type(location)) {
201 path->slots[0]--;
202 return 0;
203 }
204 }
205 return ret;
206}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
new file mode 100644
index 000000000000..2aa79873eb46
--- /dev/null
+++ b/fs/btrfs/inode-map.c
@@ -0,0 +1,144 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include "ctree.h"
20#include "disk-io.h"
21#include "transaction.h"
22
23int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
24{
25 struct btrfs_path *path;
26 int ret;
27 struct extent_buffer *l;
28 struct btrfs_key search_key;
29 struct btrfs_key found_key;
30 int slot;
31
32 path = btrfs_alloc_path();
33 BUG_ON(!path);
34
35 search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
36 search_key.type = -1;
37 search_key.offset = (u64)-1;
38 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
39 if (ret < 0)
40 goto error;
41 BUG_ON(ret == 0);
42 if (path->slots[0] > 0) {
43 slot = path->slots[0] - 1;
44 l = path->nodes[0];
45 btrfs_item_key_to_cpu(l, &found_key, slot);
46 *objectid = found_key.objectid;
47 } else {
48 *objectid = BTRFS_FIRST_FREE_OBJECTID;
49 }
50 ret = 0;
51error:
52 btrfs_free_path(path);
53 return ret;
54}
55
56/*
57 * walks the btree of allocated inodes and find a hole.
58 */
59int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root,
61 u64 dirid, u64 *objectid)
62{
63 struct btrfs_path *path;
64 struct btrfs_key key;
65 int ret;
66 int slot = 0;
67 u64 last_ino = 0;
68 int start_found;
69 struct extent_buffer *l;
70 struct btrfs_key search_key;
71 u64 search_start = dirid;
72
73 mutex_lock(&root->objectid_mutex);
74 if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID &&
75 root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) {
76 *objectid = ++root->last_inode_alloc;
77 mutex_unlock(&root->objectid_mutex);
78 return 0;
79 }
80 path = btrfs_alloc_path();
81 BUG_ON(!path);
82 search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID);
83 search_key.objectid = search_start;
84 search_key.type = 0;
85 search_key.offset = 0;
86
87 btrfs_init_path(path);
88 start_found = 0;
89 ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
90 if (ret < 0)
91 goto error;
92
93 while (1) {
94 l = path->nodes[0];
95 slot = path->slots[0];
96 if (slot >= btrfs_header_nritems(l)) {
97 ret = btrfs_next_leaf(root, path);
98 if (ret == 0)
99 continue;
100 if (ret < 0)
101 goto error;
102 if (!start_found) {
103 *objectid = search_start;
104 start_found = 1;
105 goto found;
106 }
107 *objectid = last_ino > search_start ?
108 last_ino : search_start;
109 goto found;
110 }
111 btrfs_item_key_to_cpu(l, &key, slot);
112 if (key.objectid >= search_start) {
113 if (start_found) {
114 if (last_ino < search_start)
115 last_ino = search_start;
116 if (key.objectid > last_ino) {
117 *objectid = last_ino;
118 goto found;
119 }
120 } else if (key.objectid > search_start) {
121 *objectid = search_start;
122 goto found;
123 }
124 }
125 if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
126 break;
127
128 start_found = 1;
129 last_ino = key.objectid + 1;
130 path->slots[0]++;
131 }
132 BUG_ON(1);
133found:
134 btrfs_release_path(root, path);
135 btrfs_free_path(path);
136 BUG_ON(*objectid < search_start);
137 mutex_unlock(&root->objectid_mutex);
138 return 0;
139error:
140 btrfs_release_path(root, path);
141 btrfs_free_path(path);
142 mutex_unlock(&root->objectid_mutex);
143 return ret;
144}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
new file mode 100644
index 000000000000..8adfe059ab41
--- /dev/null
+++ b/fs/btrfs/inode.c
@@ -0,0 +1,5035 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/kernel.h>
20#include <linux/bio.h>
21#include <linux/buffer_head.h>
22#include <linux/file.h>
23#include <linux/fs.h>
24#include <linux/pagemap.h>
25#include <linux/highmem.h>
26#include <linux/time.h>
27#include <linux/init.h>
28#include <linux/string.h>
29#include <linux/smp_lock.h>
30#include <linux/backing-dev.h>
31#include <linux/mpage.h>
32#include <linux/swap.h>
33#include <linux/writeback.h>
34#include <linux/statfs.h>
35#include <linux/compat.h>
36#include <linux/bit_spinlock.h>
37#include <linux/version.h>
38#include <linux/xattr.h>
39#include <linux/posix_acl.h>
40#include <linux/falloc.h>
41#include "compat.h"
42#include "ctree.h"
43#include "disk-io.h"
44#include "transaction.h"
45#include "btrfs_inode.h"
46#include "ioctl.h"
47#include "print-tree.h"
48#include "volumes.h"
49#include "ordered-data.h"
50#include "xattr.h"
51#include "tree-log.h"
52#include "ref-cache.h"
53#include "compression.h"
54
55struct btrfs_iget_args {
56 u64 ino;
57 struct btrfs_root *root;
58};
59
60static struct inode_operations btrfs_dir_inode_operations;
61static struct inode_operations btrfs_symlink_inode_operations;
62static struct inode_operations btrfs_dir_ro_inode_operations;
63static struct inode_operations btrfs_special_inode_operations;
64static struct inode_operations btrfs_file_inode_operations;
65static struct address_space_operations btrfs_aops;
66static struct address_space_operations btrfs_symlink_aops;
67static struct file_operations btrfs_dir_file_operations;
68static struct extent_io_ops btrfs_extent_io_ops;
69
70static struct kmem_cache *btrfs_inode_cachep;
71struct kmem_cache *btrfs_trans_handle_cachep;
72struct kmem_cache *btrfs_transaction_cachep;
73struct kmem_cache *btrfs_bit_radix_cachep;
74struct kmem_cache *btrfs_path_cachep;
75
76#define S_SHIFT 12
77static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
78 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
79 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
80 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
81 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
82 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
83 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
84 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
85};
86
87static void btrfs_truncate(struct inode *inode);
88static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
89static noinline int cow_file_range(struct inode *inode,
90 struct page *locked_page,
91 u64 start, u64 end, int *page_started,
92 unsigned long *nr_written, int unlock);
93
94/*
95 * a very lame attempt at stopping writes when the FS is 85% full. There
96 * are countless ways this is incorrect, but it is better than nothing.
97 */
98int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
99 int for_del)
100{
101 u64 total;
102 u64 used;
103 u64 thresh;
104 int ret = 0;
105
106 spin_lock(&root->fs_info->delalloc_lock);
107 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
108 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
109 if (for_del)
110 thresh = total * 90;
111 else
112 thresh = total * 85;
113
114 do_div(thresh, 100);
115
116 if (used + root->fs_info->delalloc_bytes + num_required > thresh)
117 ret = -ENOSPC;
118 spin_unlock(&root->fs_info->delalloc_lock);
119 return ret;
120}
121
122/*
123 * this does all the hard work for inserting an inline extent into
124 * the btree. The caller should have done a btrfs_drop_extents so that
125 * no overlapping inline items exist in the btree
126 */
127static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
128 struct btrfs_root *root, struct inode *inode,
129 u64 start, size_t size, size_t compressed_size,
130 struct page **compressed_pages)
131{
132 struct btrfs_key key;
133 struct btrfs_path *path;
134 struct extent_buffer *leaf;
135 struct page *page = NULL;
136 char *kaddr;
137 unsigned long ptr;
138 struct btrfs_file_extent_item *ei;
139 int err = 0;
140 int ret;
141 size_t cur_size = size;
142 size_t datasize;
143 unsigned long offset;
144 int use_compress = 0;
145
146 if (compressed_size && compressed_pages) {
147 use_compress = 1;
148 cur_size = compressed_size;
149 }
150
151 path = btrfs_alloc_path();
152 if (!path)
153 return -ENOMEM;
154
155 btrfs_set_trans_block_group(trans, inode);
156
157 key.objectid = inode->i_ino;
158 key.offset = start;
159 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
160 datasize = btrfs_file_extent_calc_inline_size(cur_size);
161
162 inode_add_bytes(inode, size);
163 ret = btrfs_insert_empty_item(trans, root, path, &key,
164 datasize);
165 BUG_ON(ret);
166 if (ret) {
167 err = ret;
168 goto fail;
169 }
170 leaf = path->nodes[0];
171 ei = btrfs_item_ptr(leaf, path->slots[0],
172 struct btrfs_file_extent_item);
173 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
174 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
175 btrfs_set_file_extent_encryption(leaf, ei, 0);
176 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
177 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
178 ptr = btrfs_file_extent_inline_start(ei);
179
180 if (use_compress) {
181 struct page *cpage;
182 int i = 0;
183 while (compressed_size > 0) {
184 cpage = compressed_pages[i];
185 cur_size = min_t(unsigned long, compressed_size,
186 PAGE_CACHE_SIZE);
187
188 kaddr = kmap(cpage);
189 write_extent_buffer(leaf, kaddr, ptr, cur_size);
190 kunmap(cpage);
191
192 i++;
193 ptr += cur_size;
194 compressed_size -= cur_size;
195 }
196 btrfs_set_file_extent_compression(leaf, ei,
197 BTRFS_COMPRESS_ZLIB);
198 } else {
199 page = find_get_page(inode->i_mapping,
200 start >> PAGE_CACHE_SHIFT);
201 btrfs_set_file_extent_compression(leaf, ei, 0);
202 kaddr = kmap_atomic(page, KM_USER0);
203 offset = start & (PAGE_CACHE_SIZE - 1);
204 write_extent_buffer(leaf, kaddr + offset, ptr, size);
205 kunmap_atomic(kaddr, KM_USER0);
206 page_cache_release(page);
207 }
208 btrfs_mark_buffer_dirty(leaf);
209 btrfs_free_path(path);
210
211 BTRFS_I(inode)->disk_i_size = inode->i_size;
212 btrfs_update_inode(trans, root, inode);
213 return 0;
214fail:
215 btrfs_free_path(path);
216 return err;
217}
218
219
220/*
221 * conditionally insert an inline extent into the file. This
222 * does the checks required to make sure the data is small enough
223 * to fit as an inline extent.
224 */
225static int cow_file_range_inline(struct btrfs_trans_handle *trans,
226 struct btrfs_root *root,
227 struct inode *inode, u64 start, u64 end,
228 size_t compressed_size,
229 struct page **compressed_pages)
230{
231 u64 isize = i_size_read(inode);
232 u64 actual_end = min(end + 1, isize);
233 u64 inline_len = actual_end - start;
234 u64 aligned_end = (end + root->sectorsize - 1) &
235 ~((u64)root->sectorsize - 1);
236 u64 hint_byte;
237 u64 data_len = inline_len;
238 int ret;
239
240 if (compressed_size)
241 data_len = compressed_size;
242
243 if (start > 0 ||
244 actual_end >= PAGE_CACHE_SIZE ||
245 data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
246 (!compressed_size &&
247 (actual_end & (root->sectorsize - 1)) == 0) ||
248 end + 1 < isize ||
249 data_len > root->fs_info->max_inline) {
250 return 1;
251 }
252
253 ret = btrfs_drop_extents(trans, root, inode, start,
254 aligned_end, start, &hint_byte);
255 BUG_ON(ret);
256
257 if (isize > actual_end)
258 inline_len = min_t(u64, isize, actual_end);
259 ret = insert_inline_extent(trans, root, inode, start,
260 inline_len, compressed_size,
261 compressed_pages);
262 BUG_ON(ret);
263 btrfs_drop_extent_cache(inode, start, aligned_end, 0);
264 return 0;
265}
266
267struct async_extent {
268 u64 start;
269 u64 ram_size;
270 u64 compressed_size;
271 struct page **pages;
272 unsigned long nr_pages;
273 struct list_head list;
274};
275
276struct async_cow {
277 struct inode *inode;
278 struct btrfs_root *root;
279 struct page *locked_page;
280 u64 start;
281 u64 end;
282 struct list_head extents;
283 struct btrfs_work work;
284};
285
286static noinline int add_async_extent(struct async_cow *cow,
287 u64 start, u64 ram_size,
288 u64 compressed_size,
289 struct page **pages,
290 unsigned long nr_pages)
291{
292 struct async_extent *async_extent;
293
294 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
295 async_extent->start = start;
296 async_extent->ram_size = ram_size;
297 async_extent->compressed_size = compressed_size;
298 async_extent->pages = pages;
299 async_extent->nr_pages = nr_pages;
300 list_add_tail(&async_extent->list, &cow->extents);
301 return 0;
302}
303
304/*
305 * we create compressed extents in two phases. The first
306 * phase compresses a range of pages that have already been
307 * locked (both pages and state bits are locked).
308 *
309 * This is done inside an ordered work queue, and the compression
310 * is spread across many cpus. The actual IO submission is step
311 * two, and the ordered work queue takes care of making sure that
312 * happens in the same order things were put onto the queue by
313 * writepages and friends.
314 *
315 * If this code finds it can't get good compression, it puts an
316 * entry onto the work queue to write the uncompressed bytes. This
317 * makes sure that both compressed inodes and uncompressed inodes
318 * are written in the same order that pdflush sent them down.
319 */
320static noinline int compress_file_range(struct inode *inode,
321 struct page *locked_page,
322 u64 start, u64 end,
323 struct async_cow *async_cow,
324 int *num_added)
325{
326 struct btrfs_root *root = BTRFS_I(inode)->root;
327 struct btrfs_trans_handle *trans;
328 u64 num_bytes;
329 u64 orig_start;
330 u64 disk_num_bytes;
331 u64 blocksize = root->sectorsize;
332 u64 actual_end;
333 u64 isize = i_size_read(inode);
334 int ret = 0;
335 struct page **pages = NULL;
336 unsigned long nr_pages;
337 unsigned long nr_pages_ret = 0;
338 unsigned long total_compressed = 0;
339 unsigned long total_in = 0;
340 unsigned long max_compressed = 128 * 1024;
341 unsigned long max_uncompressed = 128 * 1024;
342 int i;
343 int will_compress;
344
345 orig_start = start;
346
347 actual_end = min_t(u64, isize, end + 1);
348again:
349 will_compress = 0;
350 nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
351 nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE);
352
353 total_compressed = actual_end - start;
354
355 /* we want to make sure that amount of ram required to uncompress
356 * an extent is reasonable, so we limit the total size in ram
357 * of a compressed extent to 128k. This is a crucial number
358 * because it also controls how easily we can spread reads across
359 * cpus for decompression.
360 *
361 * We also want to make sure the amount of IO required to do
362 * a random read is reasonably small, so we limit the size of
363 * a compressed extent to 128k.
364 */
365 total_compressed = min(total_compressed, max_uncompressed);
366 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
367 num_bytes = max(blocksize, num_bytes);
368 disk_num_bytes = num_bytes;
369 total_in = 0;
370 ret = 0;
371
372 /*
373 * we do compression for mount -o compress and when the
374 * inode has not been flagged as nocompress. This flag can
375 * change at any time if we discover bad compression ratios.
376 */
377 if (!btrfs_test_flag(inode, NOCOMPRESS) &&
378 btrfs_test_opt(root, COMPRESS)) {
379 WARN_ON(pages);
380 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
381
382 ret = btrfs_zlib_compress_pages(inode->i_mapping, start,
383 total_compressed, pages,
384 nr_pages, &nr_pages_ret,
385 &total_in,
386 &total_compressed,
387 max_compressed);
388
389 if (!ret) {
390 unsigned long offset = total_compressed &
391 (PAGE_CACHE_SIZE - 1);
392 struct page *page = pages[nr_pages_ret - 1];
393 char *kaddr;
394
395 /* zero the tail end of the last page, we might be
396 * sending it down to disk
397 */
398 if (offset) {
399 kaddr = kmap_atomic(page, KM_USER0);
400 memset(kaddr + offset, 0,
401 PAGE_CACHE_SIZE - offset);
402 kunmap_atomic(kaddr, KM_USER0);
403 }
404 will_compress = 1;
405 }
406 }
407 if (start == 0) {
408 trans = btrfs_join_transaction(root, 1);
409 BUG_ON(!trans);
410 btrfs_set_trans_block_group(trans, inode);
411
412 /* lets try to make an inline extent */
413 if (ret || total_in < (actual_end - start)) {
414 /* we didn't compress the entire range, try
415 * to make an uncompressed inline extent.
416 */
417 ret = cow_file_range_inline(trans, root, inode,
418 start, end, 0, NULL);
419 } else {
420 /* try making a compressed inline extent */
421 ret = cow_file_range_inline(trans, root, inode,
422 start, end,
423 total_compressed, pages);
424 }
425 btrfs_end_transaction(trans, root);
426 if (ret == 0) {
427 /*
428 * inline extent creation worked, we don't need
429 * to create any more async work items. Unlock
430 * and free up our temp pages.
431 */
432 extent_clear_unlock_delalloc(inode,
433 &BTRFS_I(inode)->io_tree,
434 start, end, NULL, 1, 0,
435 0, 1, 1, 1);
436 ret = 0;
437 goto free_pages_out;
438 }
439 }
440
441 if (will_compress) {
442 /*
443 * we aren't doing an inline extent round the compressed size
444 * up to a block size boundary so the allocator does sane
445 * things
446 */
447 total_compressed = (total_compressed + blocksize - 1) &
448 ~(blocksize - 1);
449
450 /*
451 * one last check to make sure the compression is really a
452 * win, compare the page count read with the blocks on disk
453 */
454 total_in = (total_in + PAGE_CACHE_SIZE - 1) &
455 ~(PAGE_CACHE_SIZE - 1);
456 if (total_compressed >= total_in) {
457 will_compress = 0;
458 } else {
459 disk_num_bytes = total_compressed;
460 num_bytes = total_in;
461 }
462 }
463 if (!will_compress && pages) {
464 /*
465 * the compression code ran but failed to make things smaller,
466 * free any pages it allocated and our page pointer array
467 */
468 for (i = 0; i < nr_pages_ret; i++) {
469 WARN_ON(pages[i]->mapping);
470 page_cache_release(pages[i]);
471 }
472 kfree(pages);
473 pages = NULL;
474 total_compressed = 0;
475 nr_pages_ret = 0;
476
477 /* flag the file so we don't compress in the future */
478 btrfs_set_flag(inode, NOCOMPRESS);
479 }
480 if (will_compress) {
481 *num_added += 1;
482
483 /* the async work queues will take care of doing actual
484 * allocation on disk for these compressed pages,
485 * and will submit them to the elevator.
486 */
487 add_async_extent(async_cow, start, num_bytes,
488 total_compressed, pages, nr_pages_ret);
489
490 if (start + num_bytes < end && start + num_bytes < actual_end) {
491 start += num_bytes;
492 pages = NULL;
493 cond_resched();
494 goto again;
495 }
496 } else {
497 /*
498 * No compression, but we still need to write the pages in
499 * the file we've been given so far. redirty the locked
500 * page if it corresponds to our extent and set things up
501 * for the async work queue to run cow_file_range to do
502 * the normal delalloc dance
503 */
504 if (page_offset(locked_page) >= start &&
505 page_offset(locked_page) <= end) {
506 __set_page_dirty_nobuffers(locked_page);
507 /* unlocked later on in the async handlers */
508 }
509 add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0);
510 *num_added += 1;
511 }
512
513out:
514 return 0;
515
516free_pages_out:
517 for (i = 0; i < nr_pages_ret; i++) {
518 WARN_ON(pages[i]->mapping);
519 page_cache_release(pages[i]);
520 }
521 kfree(pages);
522
523 goto out;
524}
525
526/*
527 * phase two of compressed writeback. This is the ordered portion
528 * of the code, which only gets called in the order the work was
529 * queued. We walk all the async extents created by compress_file_range
530 * and send them down to the disk.
531 */
532static noinline int submit_compressed_extents(struct inode *inode,
533 struct async_cow *async_cow)
534{
535 struct async_extent *async_extent;
536 u64 alloc_hint = 0;
537 struct btrfs_trans_handle *trans;
538 struct btrfs_key ins;
539 struct extent_map *em;
540 struct btrfs_root *root = BTRFS_I(inode)->root;
541 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
542 struct extent_io_tree *io_tree;
543 int ret;
544
545 if (list_empty(&async_cow->extents))
546 return 0;
547
548 trans = btrfs_join_transaction(root, 1);
549
550 while (!list_empty(&async_cow->extents)) {
551 async_extent = list_entry(async_cow->extents.next,
552 struct async_extent, list);
553 list_del(&async_extent->list);
554
555 io_tree = &BTRFS_I(inode)->io_tree;
556
557 /* did the compression code fall back to uncompressed IO? */
558 if (!async_extent->pages) {
559 int page_started = 0;
560 unsigned long nr_written = 0;
561
562 lock_extent(io_tree, async_extent->start,
563 async_extent->start +
564 async_extent->ram_size - 1, GFP_NOFS);
565
566 /* allocate blocks */
567 cow_file_range(inode, async_cow->locked_page,
568 async_extent->start,
569 async_extent->start +
570 async_extent->ram_size - 1,
571 &page_started, &nr_written, 0);
572
573 /*
574 * if page_started, cow_file_range inserted an
575 * inline extent and took care of all the unlocking
576 * and IO for us. Otherwise, we need to submit
577 * all those pages down to the drive.
578 */
579 if (!page_started)
580 extent_write_locked_range(io_tree,
581 inode, async_extent->start,
582 async_extent->start +
583 async_extent->ram_size - 1,
584 btrfs_get_extent,
585 WB_SYNC_ALL);
586 kfree(async_extent);
587 cond_resched();
588 continue;
589 }
590
591 lock_extent(io_tree, async_extent->start,
592 async_extent->start + async_extent->ram_size - 1,
593 GFP_NOFS);
594 /*
595 * here we're doing allocation and writeback of the
596 * compressed pages
597 */
598 btrfs_drop_extent_cache(inode, async_extent->start,
599 async_extent->start +
600 async_extent->ram_size - 1, 0);
601
602 ret = btrfs_reserve_extent(trans, root,
603 async_extent->compressed_size,
604 async_extent->compressed_size,
605 0, alloc_hint,
606 (u64)-1, &ins, 1);
607 BUG_ON(ret);
608 em = alloc_extent_map(GFP_NOFS);
609 em->start = async_extent->start;
610 em->len = async_extent->ram_size;
611 em->orig_start = em->start;
612
613 em->block_start = ins.objectid;
614 em->block_len = ins.offset;
615 em->bdev = root->fs_info->fs_devices->latest_bdev;
616 set_bit(EXTENT_FLAG_PINNED, &em->flags);
617 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
618
619 while (1) {
620 spin_lock(&em_tree->lock);
621 ret = add_extent_mapping(em_tree, em);
622 spin_unlock(&em_tree->lock);
623 if (ret != -EEXIST) {
624 free_extent_map(em);
625 break;
626 }
627 btrfs_drop_extent_cache(inode, async_extent->start,
628 async_extent->start +
629 async_extent->ram_size - 1, 0);
630 }
631
632 ret = btrfs_add_ordered_extent(inode, async_extent->start,
633 ins.objectid,
634 async_extent->ram_size,
635 ins.offset,
636 BTRFS_ORDERED_COMPRESSED);
637 BUG_ON(ret);
638
639 btrfs_end_transaction(trans, root);
640
641 /*
642 * clear dirty, set writeback and unlock the pages.
643 */
644 extent_clear_unlock_delalloc(inode,
645 &BTRFS_I(inode)->io_tree,
646 async_extent->start,
647 async_extent->start +
648 async_extent->ram_size - 1,
649 NULL, 1, 1, 0, 1, 1, 0);
650
651 ret = btrfs_submit_compressed_write(inode,
652 async_extent->start,
653 async_extent->ram_size,
654 ins.objectid,
655 ins.offset, async_extent->pages,
656 async_extent->nr_pages);
657
658 BUG_ON(ret);
659 trans = btrfs_join_transaction(root, 1);
660 alloc_hint = ins.objectid + ins.offset;
661 kfree(async_extent);
662 cond_resched();
663 }
664
665 btrfs_end_transaction(trans, root);
666 return 0;
667}
668
669/*
670 * when extent_io.c finds a delayed allocation range in the file,
671 * the call backs end up in this code. The basic idea is to
672 * allocate extents on disk for the range, and create ordered data structs
673 * in ram to track those extents.
674 *
675 * locked_page is the page that writepage had locked already. We use
676 * it to make sure we don't do extra locks or unlocks.
677 *
678 * *page_started is set to one if we unlock locked_page and do everything
679 * required to start IO on it. It may be clean and already done with
680 * IO when we return.
681 */
682static noinline int cow_file_range(struct inode *inode,
683 struct page *locked_page,
684 u64 start, u64 end, int *page_started,
685 unsigned long *nr_written,
686 int unlock)
687{
688 struct btrfs_root *root = BTRFS_I(inode)->root;
689 struct btrfs_trans_handle *trans;
690 u64 alloc_hint = 0;
691 u64 num_bytes;
692 unsigned long ram_size;
693 u64 disk_num_bytes;
694 u64 cur_alloc_size;
695 u64 blocksize = root->sectorsize;
696 u64 actual_end;
697 u64 isize = i_size_read(inode);
698 struct btrfs_key ins;
699 struct extent_map *em;
700 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
701 int ret = 0;
702
703 trans = btrfs_join_transaction(root, 1);
704 BUG_ON(!trans);
705 btrfs_set_trans_block_group(trans, inode);
706
707 actual_end = min_t(u64, isize, end + 1);
708
709 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
710 num_bytes = max(blocksize, num_bytes);
711 disk_num_bytes = num_bytes;
712 ret = 0;
713
714 if (start == 0) {
715 /* lets try to make an inline extent */
716 ret = cow_file_range_inline(trans, root, inode,
717 start, end, 0, NULL);
718 if (ret == 0) {
719 extent_clear_unlock_delalloc(inode,
720 &BTRFS_I(inode)->io_tree,
721 start, end, NULL, 1, 1,
722 1, 1, 1, 1);
723 *nr_written = *nr_written +
724 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
725 *page_started = 1;
726 ret = 0;
727 goto out;
728 }
729 }
730
731 BUG_ON(disk_num_bytes >
732 btrfs_super_total_bytes(&root->fs_info->super_copy));
733
734 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
735
736 while (disk_num_bytes > 0) {
737 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent);
738 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
739 root->sectorsize, 0, alloc_hint,
740 (u64)-1, &ins, 1);
741 BUG_ON(ret);
742
743 em = alloc_extent_map(GFP_NOFS);
744 em->start = start;
745 em->orig_start = em->start;
746
747 ram_size = ins.offset;
748 em->len = ins.offset;
749
750 em->block_start = ins.objectid;
751 em->block_len = ins.offset;
752 em->bdev = root->fs_info->fs_devices->latest_bdev;
753 set_bit(EXTENT_FLAG_PINNED, &em->flags);
754
755 while (1) {
756 spin_lock(&em_tree->lock);
757 ret = add_extent_mapping(em_tree, em);
758 spin_unlock(&em_tree->lock);
759 if (ret != -EEXIST) {
760 free_extent_map(em);
761 break;
762 }
763 btrfs_drop_extent_cache(inode, start,
764 start + ram_size - 1, 0);
765 }
766
767 cur_alloc_size = ins.offset;
768 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
769 ram_size, cur_alloc_size, 0);
770 BUG_ON(ret);
771
772 if (root->root_key.objectid ==
773 BTRFS_DATA_RELOC_TREE_OBJECTID) {
774 ret = btrfs_reloc_clone_csums(inode, start,
775 cur_alloc_size);
776 BUG_ON(ret);
777 }
778
779 if (disk_num_bytes < cur_alloc_size)
780 break;
781
782 /* we're not doing compressed IO, don't unlock the first
783 * page (which the caller expects to stay locked), don't
784 * clear any dirty bits and don't set any writeback bits
785 */
786 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
787 start, start + ram_size - 1,
788 locked_page, unlock, 1,
789 1, 0, 0, 0);
790 disk_num_bytes -= cur_alloc_size;
791 num_bytes -= cur_alloc_size;
792 alloc_hint = ins.objectid + ins.offset;
793 start += cur_alloc_size;
794 }
795out:
796 ret = 0;
797 btrfs_end_transaction(trans, root);
798
799 return ret;
800}
801
802/*
803 * work queue call back to started compression on a file and pages
804 */
805static noinline void async_cow_start(struct btrfs_work *work)
806{
807 struct async_cow *async_cow;
808 int num_added = 0;
809 async_cow = container_of(work, struct async_cow, work);
810
811 compress_file_range(async_cow->inode, async_cow->locked_page,
812 async_cow->start, async_cow->end, async_cow,
813 &num_added);
814 if (num_added == 0)
815 async_cow->inode = NULL;
816}
817
818/*
819 * work queue call back to submit previously compressed pages
820 */
821static noinline void async_cow_submit(struct btrfs_work *work)
822{
823 struct async_cow *async_cow;
824 struct btrfs_root *root;
825 unsigned long nr_pages;
826
827 async_cow = container_of(work, struct async_cow, work);
828
829 root = async_cow->root;
830 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
831 PAGE_CACHE_SHIFT;
832
833 atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
834
835 if (atomic_read(&root->fs_info->async_delalloc_pages) <
836 5 * 1042 * 1024 &&
837 waitqueue_active(&root->fs_info->async_submit_wait))
838 wake_up(&root->fs_info->async_submit_wait);
839
840 if (async_cow->inode)
841 submit_compressed_extents(async_cow->inode, async_cow);
842}
843
844static noinline void async_cow_free(struct btrfs_work *work)
845{
846 struct async_cow *async_cow;
847 async_cow = container_of(work, struct async_cow, work);
848 kfree(async_cow);
849}
850
851static int cow_file_range_async(struct inode *inode, struct page *locked_page,
852 u64 start, u64 end, int *page_started,
853 unsigned long *nr_written)
854{
855 struct async_cow *async_cow;
856 struct btrfs_root *root = BTRFS_I(inode)->root;
857 unsigned long nr_pages;
858 u64 cur_end;
859 int limit = 10 * 1024 * 1042;
860
861 if (!btrfs_test_opt(root, COMPRESS)) {
862 return cow_file_range(inode, locked_page, start, end,
863 page_started, nr_written, 1);
864 }
865
866 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
867 EXTENT_DELALLOC, 1, 0, GFP_NOFS);
868 while (start < end) {
869 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
870 async_cow->inode = inode;
871 async_cow->root = root;
872 async_cow->locked_page = locked_page;
873 async_cow->start = start;
874
875 if (btrfs_test_flag(inode, NOCOMPRESS))
876 cur_end = end;
877 else
878 cur_end = min(end, start + 512 * 1024 - 1);
879
880 async_cow->end = cur_end;
881 INIT_LIST_HEAD(&async_cow->extents);
882
883 async_cow->work.func = async_cow_start;
884 async_cow->work.ordered_func = async_cow_submit;
885 async_cow->work.ordered_free = async_cow_free;
886 async_cow->work.flags = 0;
887
888 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
889 PAGE_CACHE_SHIFT;
890 atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
891
892 btrfs_queue_worker(&root->fs_info->delalloc_workers,
893 &async_cow->work);
894
895 if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {
896 wait_event(root->fs_info->async_submit_wait,
897 (atomic_read(&root->fs_info->async_delalloc_pages) <
898 limit));
899 }
900
901 while (atomic_read(&root->fs_info->async_submit_draining) &&
902 atomic_read(&root->fs_info->async_delalloc_pages)) {
903 wait_event(root->fs_info->async_submit_wait,
904 (atomic_read(&root->fs_info->async_delalloc_pages) ==
905 0));
906 }
907
908 *nr_written += nr_pages;
909 start = cur_end + 1;
910 }
911 *page_started = 1;
912 return 0;
913}
914
915static noinline int csum_exist_in_range(struct btrfs_root *root,
916 u64 bytenr, u64 num_bytes)
917{
918 int ret;
919 struct btrfs_ordered_sum *sums;
920 LIST_HEAD(list);
921
922 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
923 bytenr + num_bytes - 1, &list);
924 if (ret == 0 && list_empty(&list))
925 return 0;
926
927 while (!list_empty(&list)) {
928 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
929 list_del(&sums->list);
930 kfree(sums);
931 }
932 return 1;
933}
934
935/*
936 * when nowcow writeback call back. This checks for snapshots or COW copies
937 * of the extents that exist in the file, and COWs the file as required.
938 *
939 * If no cow copies or snapshots exist, we write directly to the existing
940 * blocks on disk
941 */
942static int run_delalloc_nocow(struct inode *inode, struct page *locked_page,
943 u64 start, u64 end, int *page_started, int force,
944 unsigned long *nr_written)
945{
946 struct btrfs_root *root = BTRFS_I(inode)->root;
947 struct btrfs_trans_handle *trans;
948 struct extent_buffer *leaf;
949 struct btrfs_path *path;
950 struct btrfs_file_extent_item *fi;
951 struct btrfs_key found_key;
952 u64 cow_start;
953 u64 cur_offset;
954 u64 extent_end;
955 u64 disk_bytenr;
956 u64 num_bytes;
957 int extent_type;
958 int ret;
959 int type;
960 int nocow;
961 int check_prev = 1;
962
963 path = btrfs_alloc_path();
964 BUG_ON(!path);
965 trans = btrfs_join_transaction(root, 1);
966 BUG_ON(!trans);
967
968 cow_start = (u64)-1;
969 cur_offset = start;
970 while (1) {
971 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
972 cur_offset, 0);
973 BUG_ON(ret < 0);
974 if (ret > 0 && path->slots[0] > 0 && check_prev) {
975 leaf = path->nodes[0];
976 btrfs_item_key_to_cpu(leaf, &found_key,
977 path->slots[0] - 1);
978 if (found_key.objectid == inode->i_ino &&
979 found_key.type == BTRFS_EXTENT_DATA_KEY)
980 path->slots[0]--;
981 }
982 check_prev = 0;
983next_slot:
984 leaf = path->nodes[0];
985 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
986 ret = btrfs_next_leaf(root, path);
987 if (ret < 0)
988 BUG_ON(1);
989 if (ret > 0)
990 break;
991 leaf = path->nodes[0];
992 }
993
994 nocow = 0;
995 disk_bytenr = 0;
996 num_bytes = 0;
997 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
998
999 if (found_key.objectid > inode->i_ino ||
1000 found_key.type > BTRFS_EXTENT_DATA_KEY ||
1001 found_key.offset > end)
1002 break;
1003
1004 if (found_key.offset > cur_offset) {
1005 extent_end = found_key.offset;
1006 goto out_check;
1007 }
1008
1009 fi = btrfs_item_ptr(leaf, path->slots[0],
1010 struct btrfs_file_extent_item);
1011 extent_type = btrfs_file_extent_type(leaf, fi);
1012
1013 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1014 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1015 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1016 extent_end = found_key.offset +
1017 btrfs_file_extent_num_bytes(leaf, fi);
1018 if (extent_end <= start) {
1019 path->slots[0]++;
1020 goto next_slot;
1021 }
1022 if (disk_bytenr == 0)
1023 goto out_check;
1024 if (btrfs_file_extent_compression(leaf, fi) ||
1025 btrfs_file_extent_encryption(leaf, fi) ||
1026 btrfs_file_extent_other_encoding(leaf, fi))
1027 goto out_check;
1028 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1029 goto out_check;
1030 if (btrfs_extent_readonly(root, disk_bytenr))
1031 goto out_check;
1032 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
1033 disk_bytenr))
1034 goto out_check;
1035 disk_bytenr += btrfs_file_extent_offset(leaf, fi);
1036 disk_bytenr += cur_offset - found_key.offset;
1037 num_bytes = min(end + 1, extent_end) - cur_offset;
1038 /*
1039 * force cow if csum exists in the range.
1040 * this ensure that csum for a given extent are
1041 * either valid or do not exist.
1042 */
1043 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
1044 goto out_check;
1045 nocow = 1;
1046 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1047 extent_end = found_key.offset +
1048 btrfs_file_extent_inline_len(leaf, fi);
1049 extent_end = ALIGN(extent_end, root->sectorsize);
1050 } else {
1051 BUG_ON(1);
1052 }
1053out_check:
1054 if (extent_end <= start) {
1055 path->slots[0]++;
1056 goto next_slot;
1057 }
1058 if (!nocow) {
1059 if (cow_start == (u64)-1)
1060 cow_start = cur_offset;
1061 cur_offset = extent_end;
1062 if (cur_offset > end)
1063 break;
1064 path->slots[0]++;
1065 goto next_slot;
1066 }
1067
1068 btrfs_release_path(root, path);
1069 if (cow_start != (u64)-1) {
1070 ret = cow_file_range(inode, locked_page, cow_start,
1071 found_key.offset - 1, page_started,
1072 nr_written, 1);
1073 BUG_ON(ret);
1074 cow_start = (u64)-1;
1075 }
1076
1077 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1078 struct extent_map *em;
1079 struct extent_map_tree *em_tree;
1080 em_tree = &BTRFS_I(inode)->extent_tree;
1081 em = alloc_extent_map(GFP_NOFS);
1082 em->start = cur_offset;
1083 em->orig_start = em->start;
1084 em->len = num_bytes;
1085 em->block_len = num_bytes;
1086 em->block_start = disk_bytenr;
1087 em->bdev = root->fs_info->fs_devices->latest_bdev;
1088 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1089 while (1) {
1090 spin_lock(&em_tree->lock);
1091 ret = add_extent_mapping(em_tree, em);
1092 spin_unlock(&em_tree->lock);
1093 if (ret != -EEXIST) {
1094 free_extent_map(em);
1095 break;
1096 }
1097 btrfs_drop_extent_cache(inode, em->start,
1098 em->start + em->len - 1, 0);
1099 }
1100 type = BTRFS_ORDERED_PREALLOC;
1101 } else {
1102 type = BTRFS_ORDERED_NOCOW;
1103 }
1104
1105 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1106 num_bytes, num_bytes, type);
1107 BUG_ON(ret);
1108
1109 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1110 cur_offset, cur_offset + num_bytes - 1,
1111 locked_page, 1, 1, 1, 0, 0, 0);
1112 cur_offset = extent_end;
1113 if (cur_offset > end)
1114 break;
1115 }
1116 btrfs_release_path(root, path);
1117
1118 if (cur_offset <= end && cow_start == (u64)-1)
1119 cow_start = cur_offset;
1120 if (cow_start != (u64)-1) {
1121 ret = cow_file_range(inode, locked_page, cow_start, end,
1122 page_started, nr_written, 1);
1123 BUG_ON(ret);
1124 }
1125
1126 ret = btrfs_end_transaction(trans, root);
1127 BUG_ON(ret);
1128 btrfs_free_path(path);
1129 return 0;
1130}
1131
1132/*
1133 * extent_io.c call back to do delayed allocation processing
1134 */
1135static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1136 u64 start, u64 end, int *page_started,
1137 unsigned long *nr_written)
1138{
1139 int ret;
1140
1141 if (btrfs_test_flag(inode, NODATACOW))
1142 ret = run_delalloc_nocow(inode, locked_page, start, end,
1143 page_started, 1, nr_written);
1144 else if (btrfs_test_flag(inode, PREALLOC))
1145 ret = run_delalloc_nocow(inode, locked_page, start, end,
1146 page_started, 0, nr_written);
1147 else
1148 ret = cow_file_range_async(inode, locked_page, start, end,
1149 page_started, nr_written);
1150
1151 return ret;
1152}
1153
1154/*
1155 * extent_io.c set_bit_hook, used to track delayed allocation
1156 * bytes in this file, and to maintain the list of inodes that
1157 * have pending delalloc work to be done.
1158 */
1159static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1160 unsigned long old, unsigned long bits)
1161{
1162 /*
1163 * set_bit and clear bit hooks normally require _irqsave/restore
1164 * but in this case, we are only testeing for the DELALLOC
1165 * bit, which is only set or cleared with irqs on
1166 */
1167 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
1168 struct btrfs_root *root = BTRFS_I(inode)->root;
1169 spin_lock(&root->fs_info->delalloc_lock);
1170 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
1171 root->fs_info->delalloc_bytes += end - start + 1;
1172 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1173 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1174 &root->fs_info->delalloc_inodes);
1175 }
1176 spin_unlock(&root->fs_info->delalloc_lock);
1177 }
1178 return 0;
1179}
1180
1181/*
1182 * extent_io.c clear_bit_hook, see set_bit_hook for why
1183 */
1184static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
1185 unsigned long old, unsigned long bits)
1186{
1187 /*
1188 * set_bit and clear bit hooks normally require _irqsave/restore
1189 * but in this case, we are only testeing for the DELALLOC
1190 * bit, which is only set or cleared with irqs on
1191 */
1192 if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
1193 struct btrfs_root *root = BTRFS_I(inode)->root;
1194
1195 spin_lock(&root->fs_info->delalloc_lock);
1196 if (end - start + 1 > root->fs_info->delalloc_bytes) {
1197 printk(KERN_INFO "btrfs warning: delalloc account "
1198 "%llu %llu\n",
1199 (unsigned long long)end - start + 1,
1200 (unsigned long long)
1201 root->fs_info->delalloc_bytes);
1202 root->fs_info->delalloc_bytes = 0;
1203 BTRFS_I(inode)->delalloc_bytes = 0;
1204 } else {
1205 root->fs_info->delalloc_bytes -= end - start + 1;
1206 BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
1207 }
1208 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
1209 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1210 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1211 }
1212 spin_unlock(&root->fs_info->delalloc_lock);
1213 }
1214 return 0;
1215}
1216
1217/*
1218 * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
1219 * we don't create bios that span stripes or chunks
1220 */
1221int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1222 size_t size, struct bio *bio,
1223 unsigned long bio_flags)
1224{
1225 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
1226 struct btrfs_mapping_tree *map_tree;
1227 u64 logical = (u64)bio->bi_sector << 9;
1228 u64 length = 0;
1229 u64 map_length;
1230 int ret;
1231
1232 if (bio_flags & EXTENT_BIO_COMPRESSED)
1233 return 0;
1234
1235 length = bio->bi_size;
1236 map_tree = &root->fs_info->mapping_tree;
1237 map_length = length;
1238 ret = btrfs_map_block(map_tree, READ, logical,
1239 &map_length, NULL, 0);
1240
1241 if (map_length < length + size)
1242 return 1;
1243 return 0;
1244}
1245
1246/*
1247 * in order to insert checksums into the metadata in large chunks,
1248 * we wait until bio submission time. All the pages in the bio are
1249 * checksummed and sums are attached onto the ordered extent record.
1250 *
1251 * At IO completion time the cums attached on the ordered extent record
1252 * are inserted into the btree
1253 */
1254static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1255 struct bio *bio, int mirror_num,
1256 unsigned long bio_flags)
1257{
1258 struct btrfs_root *root = BTRFS_I(inode)->root;
1259 int ret = 0;
1260
1261 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1262 BUG_ON(ret);
1263 return 0;
1264}
1265
1266/*
1267 * in order to insert checksums into the metadata in large chunks,
1268 * we wait until bio submission time. All the pages in the bio are
1269 * checksummed and sums are attached onto the ordered extent record.
1270 *
1271 * At IO completion time the cums attached on the ordered extent record
1272 * are inserted into the btree
1273 */
1274static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1275 int mirror_num, unsigned long bio_flags)
1276{
1277 struct btrfs_root *root = BTRFS_I(inode)->root;
1278 return btrfs_map_bio(root, rw, bio, mirror_num, 1);
1279}
1280
1281/*
1282 * extent_io.c submission hook. This does the right thing for csum calculation
1283 * on write, or reading the csums from the tree before a read
1284 */
1285static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1286 int mirror_num, unsigned long bio_flags)
1287{
1288 struct btrfs_root *root = BTRFS_I(inode)->root;
1289 int ret = 0;
1290 int skip_sum;
1291
1292 skip_sum = btrfs_test_flag(inode, NODATASUM);
1293
1294 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1295 BUG_ON(ret);
1296
1297 if (!(rw & (1 << BIO_RW))) {
1298 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1299 return btrfs_submit_compressed_read(inode, bio,
1300 mirror_num, bio_flags);
1301 } else if (!skip_sum)
1302 btrfs_lookup_bio_sums(root, inode, bio, NULL);
1303 goto mapit;
1304 } else if (!skip_sum) {
1305 /* csum items have already been cloned */
1306 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
1307 goto mapit;
1308 /* we're doing a write, do the async checksumming */
1309 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1310 inode, rw, bio, mirror_num,
1311 bio_flags, __btrfs_submit_bio_start,
1312 __btrfs_submit_bio_done);
1313 }
1314
1315mapit:
1316 return btrfs_map_bio(root, rw, bio, mirror_num, 0);
1317}
1318
1319/*
1320 * given a list of ordered sums record them in the inode. This happens
1321 * at IO completion time based on sums calculated at bio submission time.
1322 */
1323static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1324 struct inode *inode, u64 file_offset,
1325 struct list_head *list)
1326{
1327 struct list_head *cur;
1328 struct btrfs_ordered_sum *sum;
1329
1330 btrfs_set_trans_block_group(trans, inode);
1331 list_for_each(cur, list) {
1332 sum = list_entry(cur, struct btrfs_ordered_sum, list);
1333 btrfs_csum_file_blocks(trans,
1334 BTRFS_I(inode)->root->fs_info->csum_root, sum);
1335 }
1336 return 0;
1337}
1338
1339int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end)
1340{
1341 if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
1342 WARN_ON(1);
1343 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1344 GFP_NOFS);
1345}
1346
1347/* see btrfs_writepage_start_hook for details on why this is required */
1348struct btrfs_writepage_fixup {
1349 struct page *page;
1350 struct btrfs_work work;
1351};
1352
1353static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1354{
1355 struct btrfs_writepage_fixup *fixup;
1356 struct btrfs_ordered_extent *ordered;
1357 struct page *page;
1358 struct inode *inode;
1359 u64 page_start;
1360 u64 page_end;
1361
1362 fixup = container_of(work, struct btrfs_writepage_fixup, work);
1363 page = fixup->page;
1364again:
1365 lock_page(page);
1366 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
1367 ClearPageChecked(page);
1368 goto out_page;
1369 }
1370
1371 inode = page->mapping->host;
1372 page_start = page_offset(page);
1373 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1374
1375 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
1376
1377 /* already ordered? We're done */
1378 if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
1379 EXTENT_ORDERED, 0)) {
1380 goto out;
1381 }
1382
1383 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1384 if (ordered) {
1385 unlock_extent(&BTRFS_I(inode)->io_tree, page_start,
1386 page_end, GFP_NOFS);
1387 unlock_page(page);
1388 btrfs_start_ordered_extent(inode, ordered, 1);
1389 goto again;
1390 }
1391
1392 btrfs_set_extent_delalloc(inode, page_start, page_end);
1393 ClearPageChecked(page);
1394out:
1395 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
1396out_page:
1397 unlock_page(page);
1398 page_cache_release(page);
1399}
1400
1401/*
1402 * There are a few paths in the higher layers of the kernel that directly
1403 * set the page dirty bit without asking the filesystem if it is a
1404 * good idea. This causes problems because we want to make sure COW
1405 * properly happens and the data=ordered rules are followed.
1406 *
1407 * In our case any range that doesn't have the ORDERED bit set
1408 * hasn't been properly setup for IO. We kick off an async process
1409 * to fix it up. The async helper will wait for ordered extents, set
1410 * the delalloc bit and make it safe to write the page.
1411 */
1412static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1413{
1414 struct inode *inode = page->mapping->host;
1415 struct btrfs_writepage_fixup *fixup;
1416 struct btrfs_root *root = BTRFS_I(inode)->root;
1417 int ret;
1418
1419 ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
1420 EXTENT_ORDERED, 0);
1421 if (ret)
1422 return 0;
1423
1424 if (PageChecked(page))
1425 return -EAGAIN;
1426
1427 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
1428 if (!fixup)
1429 return -EAGAIN;
1430
1431 SetPageChecked(page);
1432 page_cache_get(page);
1433 fixup->work.func = btrfs_writepage_fixup_worker;
1434 fixup->page = page;
1435 btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
1436 return -EAGAIN;
1437}
1438
1439static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1440 struct inode *inode, u64 file_pos,
1441 u64 disk_bytenr, u64 disk_num_bytes,
1442 u64 num_bytes, u64 ram_bytes,
1443 u8 compression, u8 encryption,
1444 u16 other_encoding, int extent_type)
1445{
1446 struct btrfs_root *root = BTRFS_I(inode)->root;
1447 struct btrfs_file_extent_item *fi;
1448 struct btrfs_path *path;
1449 struct extent_buffer *leaf;
1450 struct btrfs_key ins;
1451 u64 hint;
1452 int ret;
1453
1454 path = btrfs_alloc_path();
1455 BUG_ON(!path);
1456
1457 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1458 file_pos + num_bytes, file_pos, &hint);
1459 BUG_ON(ret);
1460
1461 ins.objectid = inode->i_ino;
1462 ins.offset = file_pos;
1463 ins.type = BTRFS_EXTENT_DATA_KEY;
1464 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
1465 BUG_ON(ret);
1466 leaf = path->nodes[0];
1467 fi = btrfs_item_ptr(leaf, path->slots[0],
1468 struct btrfs_file_extent_item);
1469 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1470 btrfs_set_file_extent_type(leaf, fi, extent_type);
1471 btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
1472 btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
1473 btrfs_set_file_extent_offset(leaf, fi, 0);
1474 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
1475 btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
1476 btrfs_set_file_extent_compression(leaf, fi, compression);
1477 btrfs_set_file_extent_encryption(leaf, fi, encryption);
1478 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
1479 btrfs_mark_buffer_dirty(leaf);
1480
1481 inode_add_bytes(inode, num_bytes);
1482 btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
1483
1484 ins.objectid = disk_bytenr;
1485 ins.offset = disk_num_bytes;
1486 ins.type = BTRFS_EXTENT_ITEM_KEY;
1487 ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
1488 root->root_key.objectid,
1489 trans->transid, inode->i_ino, &ins);
1490 BUG_ON(ret);
1491
1492 btrfs_free_path(path);
1493 return 0;
1494}
1495
1496/* as ordered data IO finishes, this gets called so we can finish
1497 * an ordered extent if the range of bytes in the file it covers are
1498 * fully written.
1499 */
1500static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1501{
1502 struct btrfs_root *root = BTRFS_I(inode)->root;
1503 struct btrfs_trans_handle *trans;
1504 struct btrfs_ordered_extent *ordered_extent;
1505 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1506 int compressed = 0;
1507 int ret;
1508
1509 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
1510 if (!ret)
1511 return 0;
1512
1513 trans = btrfs_join_transaction(root, 1);
1514
1515 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1516 BUG_ON(!ordered_extent);
1517 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
1518 goto nocow;
1519
1520 lock_extent(io_tree, ordered_extent->file_offset,
1521 ordered_extent->file_offset + ordered_extent->len - 1,
1522 GFP_NOFS);
1523
1524 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1525 compressed = 1;
1526 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1527 BUG_ON(compressed);
1528 ret = btrfs_mark_extent_written(trans, root, inode,
1529 ordered_extent->file_offset,
1530 ordered_extent->file_offset +
1531 ordered_extent->len);
1532 BUG_ON(ret);
1533 } else {
1534 ret = insert_reserved_file_extent(trans, inode,
1535 ordered_extent->file_offset,
1536 ordered_extent->start,
1537 ordered_extent->disk_len,
1538 ordered_extent->len,
1539 ordered_extent->len,
1540 compressed, 0, 0,
1541 BTRFS_FILE_EXTENT_REG);
1542 BUG_ON(ret);
1543 }
1544 unlock_extent(io_tree, ordered_extent->file_offset,
1545 ordered_extent->file_offset + ordered_extent->len - 1,
1546 GFP_NOFS);
1547nocow:
1548 add_pending_csums(trans, inode, ordered_extent->file_offset,
1549 &ordered_extent->list);
1550
1551 mutex_lock(&BTRFS_I(inode)->extent_mutex);
1552 btrfs_ordered_update_i_size(inode, ordered_extent);
1553 btrfs_update_inode(trans, root, inode);
1554 btrfs_remove_ordered_extent(inode, ordered_extent);
1555 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
1556
1557 /* once for us */
1558 btrfs_put_ordered_extent(ordered_extent);
1559 /* once for the tree */
1560 btrfs_put_ordered_extent(ordered_extent);
1561
1562 btrfs_end_transaction(trans, root);
1563 return 0;
1564}
1565
1566static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1567 struct extent_state *state, int uptodate)
1568{
1569 return btrfs_finish_ordered_io(page->mapping->host, start, end);
1570}
1571
1572/*
1573 * When IO fails, either with EIO or csum verification fails, we
1574 * try other mirrors that might have a good copy of the data. This
1575 * io_failure_record is used to record state as we go through all the
1576 * mirrors. If another mirror has good data, the page is set up to date
1577 * and things continue. If a good mirror can't be found, the original
1578 * bio end_io callback is called to indicate things have failed.
1579 */
1580struct io_failure_record {
1581 struct page *page;
1582 u64 start;
1583 u64 len;
1584 u64 logical;
1585 unsigned long bio_flags;
1586 int last_mirror;
1587};
1588
1589static int btrfs_io_failed_hook(struct bio *failed_bio,
1590 struct page *page, u64 start, u64 end,
1591 struct extent_state *state)
1592{
1593 struct io_failure_record *failrec = NULL;
1594 u64 private;
1595 struct extent_map *em;
1596 struct inode *inode = page->mapping->host;
1597 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
1598 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1599 struct bio *bio;
1600 int num_copies;
1601 int ret;
1602 int rw;
1603 u64 logical;
1604
1605 ret = get_state_private(failure_tree, start, &private);
1606 if (ret) {
1607 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
1608 if (!failrec)
1609 return -ENOMEM;
1610 failrec->start = start;
1611 failrec->len = end - start + 1;
1612 failrec->last_mirror = 0;
1613 failrec->bio_flags = 0;
1614
1615 spin_lock(&em_tree->lock);
1616 em = lookup_extent_mapping(em_tree, start, failrec->len);
1617 if (em->start > start || em->start + em->len < start) {
1618 free_extent_map(em);
1619 em = NULL;
1620 }
1621 spin_unlock(&em_tree->lock);
1622
1623 if (!em || IS_ERR(em)) {
1624 kfree(failrec);
1625 return -EIO;
1626 }
1627 logical = start - em->start;
1628 logical = em->block_start + logical;
1629 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
1630 logical = em->block_start;
1631 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
1632 }
1633 failrec->logical = logical;
1634 free_extent_map(em);
1635 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
1636 EXTENT_DIRTY, GFP_NOFS);
1637 set_state_private(failure_tree, start,
1638 (u64)(unsigned long)failrec);
1639 } else {
1640 failrec = (struct io_failure_record *)(unsigned long)private;
1641 }
1642 num_copies = btrfs_num_copies(
1643 &BTRFS_I(inode)->root->fs_info->mapping_tree,
1644 failrec->logical, failrec->len);
1645 failrec->last_mirror++;
1646 if (!state) {
1647 spin_lock(&BTRFS_I(inode)->io_tree.lock);
1648 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
1649 failrec->start,
1650 EXTENT_LOCKED);
1651 if (state && state->start != failrec->start)
1652 state = NULL;
1653 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
1654 }
1655 if (!state || failrec->last_mirror > num_copies) {
1656 set_state_private(failure_tree, failrec->start, 0);
1657 clear_extent_bits(failure_tree, failrec->start,
1658 failrec->start + failrec->len - 1,
1659 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
1660 kfree(failrec);
1661 return -EIO;
1662 }
1663 bio = bio_alloc(GFP_NOFS, 1);
1664 bio->bi_private = state;
1665 bio->bi_end_io = failed_bio->bi_end_io;
1666 bio->bi_sector = failrec->logical >> 9;
1667 bio->bi_bdev = failed_bio->bi_bdev;
1668 bio->bi_size = 0;
1669
1670 bio_add_page(bio, page, failrec->len, start - page_offset(page));
1671 if (failed_bio->bi_rw & (1 << BIO_RW))
1672 rw = WRITE;
1673 else
1674 rw = READ;
1675
1676 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1677 failrec->last_mirror,
1678 failrec->bio_flags);
1679 return 0;
1680}
1681
1682/*
1683 * each time an IO finishes, we do a fast check in the IO failure tree
1684 * to see if we need to process or clean up an io_failure_record
1685 */
1686static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1687{
1688 u64 private;
1689 u64 private_failure;
1690 struct io_failure_record *failure;
1691 int ret;
1692
1693 private = 0;
1694 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1695 (u64)-1, 1, EXTENT_DIRTY)) {
1696 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1697 start, &private_failure);
1698 if (ret == 0) {
1699 failure = (struct io_failure_record *)(unsigned long)
1700 private_failure;
1701 set_state_private(&BTRFS_I(inode)->io_failure_tree,
1702 failure->start, 0);
1703 clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
1704 failure->start,
1705 failure->start + failure->len - 1,
1706 EXTENT_DIRTY | EXTENT_LOCKED,
1707 GFP_NOFS);
1708 kfree(failure);
1709 }
1710 }
1711 return 0;
1712}
1713
1714/*
1715 * when reads are done, we need to check csums to verify the data is correct
1716 * if there's a match, we allow the bio to finish. If not, we go through
1717 * the io_failure_record routines to find good copies
1718 */
1719static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1720 struct extent_state *state)
1721{
1722 size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
1723 struct inode *inode = page->mapping->host;
1724 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1725 char *kaddr;
1726 u64 private = ~(u32)0;
1727 int ret;
1728 struct btrfs_root *root = BTRFS_I(inode)->root;
1729 u32 csum = ~(u32)0;
1730
1731 if (PageChecked(page)) {
1732 ClearPageChecked(page);
1733 goto good;
1734 }
1735 if (btrfs_test_flag(inode, NODATASUM))
1736 return 0;
1737
1738 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1739 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) {
1740 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
1741 GFP_NOFS);
1742 return 0;
1743 }
1744
1745 if (state && state->start == start) {
1746 private = state->private;
1747 ret = 0;
1748 } else {
1749 ret = get_state_private(io_tree, start, &private);
1750 }
1751 kaddr = kmap_atomic(page, KM_USER0);
1752 if (ret)
1753 goto zeroit;
1754
1755 csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1);
1756 btrfs_csum_final(csum, (char *)&csum);
1757 if (csum != private)
1758 goto zeroit;
1759
1760 kunmap_atomic(kaddr, KM_USER0);
1761good:
1762 /* if the io failure tree for this inode is non-empty,
1763 * check to see if we've recovered from a failed IO
1764 */
1765 btrfs_clean_io_failures(inode, start);
1766 return 0;
1767
1768zeroit:
1769 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
1770 "private %llu\n", page->mapping->host->i_ino,
1771 (unsigned long long)start, csum,
1772 (unsigned long long)private);
1773 memset(kaddr + offset, 1, end - start + 1);
1774 flush_dcache_page(page);
1775 kunmap_atomic(kaddr, KM_USER0);
1776 if (private == 0)
1777 return 0;
1778 return -EIO;
1779}
1780
1781/*
1782 * This creates an orphan entry for the given inode in case something goes
1783 * wrong in the middle of an unlink/truncate.
1784 */
1785int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
1786{
1787 struct btrfs_root *root = BTRFS_I(inode)->root;
1788 int ret = 0;
1789
1790 spin_lock(&root->list_lock);
1791
1792 /* already on the orphan list, we're good */
1793 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
1794 spin_unlock(&root->list_lock);
1795 return 0;
1796 }
1797
1798 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
1799
1800 spin_unlock(&root->list_lock);
1801
1802 /*
1803 * insert an orphan item to track this unlinked/truncated file
1804 */
1805 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
1806
1807 return ret;
1808}
1809
1810/*
1811 * We have done the truncate/delete so we can go ahead and remove the orphan
1812 * item for this particular inode.
1813 */
1814int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
1815{
1816 struct btrfs_root *root = BTRFS_I(inode)->root;
1817 int ret = 0;
1818
1819 spin_lock(&root->list_lock);
1820
1821 if (list_empty(&BTRFS_I(inode)->i_orphan)) {
1822 spin_unlock(&root->list_lock);
1823 return 0;
1824 }
1825
1826 list_del_init(&BTRFS_I(inode)->i_orphan);
1827 if (!trans) {
1828 spin_unlock(&root->list_lock);
1829 return 0;
1830 }
1831
1832 spin_unlock(&root->list_lock);
1833
1834 ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
1835
1836 return ret;
1837}
1838
1839/*
1840 * this cleans up any orphans that may be left on the list from the last use
1841 * of this root.
1842 */
1843void btrfs_orphan_cleanup(struct btrfs_root *root)
1844{
1845 struct btrfs_path *path;
1846 struct extent_buffer *leaf;
1847 struct btrfs_item *item;
1848 struct btrfs_key key, found_key;
1849 struct btrfs_trans_handle *trans;
1850 struct inode *inode;
1851 int ret = 0, nr_unlink = 0, nr_truncate = 0;
1852
1853 path = btrfs_alloc_path();
1854 if (!path)
1855 return;
1856 path->reada = -1;
1857
1858 key.objectid = BTRFS_ORPHAN_OBJECTID;
1859 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
1860 key.offset = (u64)-1;
1861
1862
1863 while (1) {
1864 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1865 if (ret < 0) {
1866 printk(KERN_ERR "Error searching slot for orphan: %d"
1867 "\n", ret);
1868 break;
1869 }
1870
1871 /*
1872 * if ret == 0 means we found what we were searching for, which
1873 * is weird, but possible, so only screw with path if we didnt
1874 * find the key and see if we have stuff that matches
1875 */
1876 if (ret > 0) {
1877 if (path->slots[0] == 0)
1878 break;
1879 path->slots[0]--;
1880 }
1881
1882 /* pull out the item */
1883 leaf = path->nodes[0];
1884 item = btrfs_item_nr(leaf, path->slots[0]);
1885 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1886
1887 /* make sure the item matches what we want */
1888 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
1889 break;
1890 if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY)
1891 break;
1892
1893 /* release the path since we're done with it */
1894 btrfs_release_path(root, path);
1895
1896 /*
1897 * this is where we are basically btrfs_lookup, without the
1898 * crossing root thing. we store the inode number in the
1899 * offset of the orphan item.
1900 */
1901 inode = btrfs_iget_locked(root->fs_info->sb,
1902 found_key.offset, root);
1903 if (!inode)
1904 break;
1905
1906 if (inode->i_state & I_NEW) {
1907 BTRFS_I(inode)->root = root;
1908
1909 /* have to set the location manually */
1910 BTRFS_I(inode)->location.objectid = inode->i_ino;
1911 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
1912 BTRFS_I(inode)->location.offset = 0;
1913
1914 btrfs_read_locked_inode(inode);
1915 unlock_new_inode(inode);
1916 }
1917
1918 /*
1919 * add this inode to the orphan list so btrfs_orphan_del does
1920 * the proper thing when we hit it
1921 */
1922 spin_lock(&root->list_lock);
1923 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
1924 spin_unlock(&root->list_lock);
1925
1926 /*
1927 * if this is a bad inode, means we actually succeeded in
1928 * removing the inode, but not the orphan record, which means
1929 * we need to manually delete the orphan since iput will just
1930 * do a destroy_inode
1931 */
1932 if (is_bad_inode(inode)) {
1933 trans = btrfs_start_transaction(root, 1);
1934 btrfs_orphan_del(trans, inode);
1935 btrfs_end_transaction(trans, root);
1936 iput(inode);
1937 continue;
1938 }
1939
1940 /* if we have links, this was a truncate, lets do that */
1941 if (inode->i_nlink) {
1942 nr_truncate++;
1943 btrfs_truncate(inode);
1944 } else {
1945 nr_unlink++;
1946 }
1947
1948 /* this will do delete_inode and everything for us */
1949 iput(inode);
1950 }
1951
1952 if (nr_unlink)
1953 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
1954 if (nr_truncate)
1955 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
1956
1957 btrfs_free_path(path);
1958}
1959
1960/*
1961 * read an inode from the btree into the in-memory inode
1962 */
1963void btrfs_read_locked_inode(struct inode *inode)
1964{
1965 struct btrfs_path *path;
1966 struct extent_buffer *leaf;
1967 struct btrfs_inode_item *inode_item;
1968 struct btrfs_timespec *tspec;
1969 struct btrfs_root *root = BTRFS_I(inode)->root;
1970 struct btrfs_key location;
1971 u64 alloc_group_block;
1972 u32 rdev;
1973 int ret;
1974
1975 path = btrfs_alloc_path();
1976 BUG_ON(!path);
1977 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
1978
1979 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
1980 if (ret)
1981 goto make_bad;
1982
1983 leaf = path->nodes[0];
1984 inode_item = btrfs_item_ptr(leaf, path->slots[0],
1985 struct btrfs_inode_item);
1986
1987 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
1988 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
1989 inode->i_uid = btrfs_inode_uid(leaf, inode_item);
1990 inode->i_gid = btrfs_inode_gid(leaf, inode_item);
1991 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1992
1993 tspec = btrfs_inode_atime(inode_item);
1994 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
1995 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
1996
1997 tspec = btrfs_inode_mtime(inode_item);
1998 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
1999 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
2000
2001 tspec = btrfs_inode_ctime(inode_item);
2002 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
2003 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
2004
2005 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
2006 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
2007 BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item);
2008 inode->i_generation = BTRFS_I(inode)->generation;
2009 inode->i_rdev = 0;
2010 rdev = btrfs_inode_rdev(leaf, inode_item);
2011
2012 BTRFS_I(inode)->index_cnt = (u64)-1;
2013 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2014
2015 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2016 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
2017 alloc_group_block, 0);
2018 btrfs_free_path(path);
2019 inode_item = NULL;
2020
2021 switch (inode->i_mode & S_IFMT) {
2022 case S_IFREG:
2023 inode->i_mapping->a_ops = &btrfs_aops;
2024 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
2025 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2026 inode->i_fop = &btrfs_file_operations;
2027 inode->i_op = &btrfs_file_inode_operations;
2028 break;
2029 case S_IFDIR:
2030 inode->i_fop = &btrfs_dir_file_operations;
2031 if (root == root->fs_info->tree_root)
2032 inode->i_op = &btrfs_dir_ro_inode_operations;
2033 else
2034 inode->i_op = &btrfs_dir_inode_operations;
2035 break;
2036 case S_IFLNK:
2037 inode->i_op = &btrfs_symlink_inode_operations;
2038 inode->i_mapping->a_ops = &btrfs_symlink_aops;
2039 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
2040 break;
2041 default:
2042 init_special_inode(inode, inode->i_mode, rdev);
2043 break;
2044 }
2045 return;
2046
2047make_bad:
2048 btrfs_free_path(path);
2049 make_bad_inode(inode);
2050}
2051
2052/*
2053 * given a leaf and an inode, copy the inode fields into the leaf
2054 */
2055static void fill_inode_item(struct btrfs_trans_handle *trans,
2056 struct extent_buffer *leaf,
2057 struct btrfs_inode_item *item,
2058 struct inode *inode)
2059{
2060 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2061 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2062 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
2063 btrfs_set_inode_mode(leaf, item, inode->i_mode);
2064 btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
2065
2066 btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
2067 inode->i_atime.tv_sec);
2068 btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
2069 inode->i_atime.tv_nsec);
2070
2071 btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
2072 inode->i_mtime.tv_sec);
2073 btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
2074 inode->i_mtime.tv_nsec);
2075
2076 btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
2077 inode->i_ctime.tv_sec);
2078 btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
2079 inode->i_ctime.tv_nsec);
2080
2081 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
2082 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
2083 btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence);
2084 btrfs_set_inode_transid(leaf, item, trans->transid);
2085 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2086 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2087 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
2088}
2089
2090/*
2091 * copy everything in the in-memory inode into the btree.
2092 */
2093noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2094 struct btrfs_root *root, struct inode *inode)
2095{
2096 struct btrfs_inode_item *inode_item;
2097 struct btrfs_path *path;
2098 struct extent_buffer *leaf;
2099 int ret;
2100
2101 path = btrfs_alloc_path();
2102 BUG_ON(!path);
2103 ret = btrfs_lookup_inode(trans, root, path,
2104 &BTRFS_I(inode)->location, 1);
2105 if (ret) {
2106 if (ret > 0)
2107 ret = -ENOENT;
2108 goto failed;
2109 }
2110
2111 leaf = path->nodes[0];
2112 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2113 struct btrfs_inode_item);
2114
2115 fill_inode_item(trans, leaf, inode_item, inode);
2116 btrfs_mark_buffer_dirty(leaf);
2117 btrfs_set_inode_last_trans(trans, inode);
2118 ret = 0;
2119failed:
2120 btrfs_free_path(path);
2121 return ret;
2122}
2123
2124
2125/*
2126 * unlink helper that gets used here in inode.c and in the tree logging
2127 * recovery code. It remove a link in a directory with a given name, and
2128 * also drops the back refs in the inode to the directory
2129 */
2130int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2131 struct btrfs_root *root,
2132 struct inode *dir, struct inode *inode,
2133 const char *name, int name_len)
2134{
2135 struct btrfs_path *path;
2136 int ret = 0;
2137 struct extent_buffer *leaf;
2138 struct btrfs_dir_item *di;
2139 struct btrfs_key key;
2140 u64 index;
2141
2142 path = btrfs_alloc_path();
2143 if (!path) {
2144 ret = -ENOMEM;
2145 goto err;
2146 }
2147
2148 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
2149 name, name_len, -1);
2150 if (IS_ERR(di)) {
2151 ret = PTR_ERR(di);
2152 goto err;
2153 }
2154 if (!di) {
2155 ret = -ENOENT;
2156 goto err;
2157 }
2158 leaf = path->nodes[0];
2159 btrfs_dir_item_key_to_cpu(leaf, di, &key);
2160 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2161 if (ret)
2162 goto err;
2163 btrfs_release_path(root, path);
2164
2165 ret = btrfs_del_inode_ref(trans, root, name, name_len,
2166 inode->i_ino,
2167 dir->i_ino, &index);
2168 if (ret) {
2169 printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
2170 "inode %lu parent %lu\n", name_len, name,
2171 inode->i_ino, dir->i_ino);
2172 goto err;
2173 }
2174
2175 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
2176 index, name, name_len, -1);
2177 if (IS_ERR(di)) {
2178 ret = PTR_ERR(di);
2179 goto err;
2180 }
2181 if (!di) {
2182 ret = -ENOENT;
2183 goto err;
2184 }
2185 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2186 btrfs_release_path(root, path);
2187
2188 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2189 inode, dir->i_ino);
2190 BUG_ON(ret != 0 && ret != -ENOENT);
2191 if (ret != -ENOENT)
2192 BTRFS_I(dir)->log_dirty_trans = trans->transid;
2193
2194 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2195 dir, index);
2196 BUG_ON(ret);
2197err:
2198 btrfs_free_path(path);
2199 if (ret)
2200 goto out;
2201
2202 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
2203 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2204 btrfs_update_inode(trans, root, dir);
2205 btrfs_drop_nlink(inode);
2206 ret = btrfs_update_inode(trans, root, inode);
2207 dir->i_sb->s_dirt = 1;
2208out:
2209 return ret;
2210}
2211
2212static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2213{
2214 struct btrfs_root *root;
2215 struct btrfs_trans_handle *trans;
2216 struct inode *inode = dentry->d_inode;
2217 int ret;
2218 unsigned long nr = 0;
2219
2220 root = BTRFS_I(dir)->root;
2221
2222 ret = btrfs_check_free_space(root, 1, 1);
2223 if (ret)
2224 goto fail;
2225
2226 trans = btrfs_start_transaction(root, 1);
2227
2228 btrfs_set_trans_block_group(trans, dir);
2229 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2230 dentry->d_name.name, dentry->d_name.len);
2231
2232 if (inode->i_nlink == 0)
2233 ret = btrfs_orphan_add(trans, inode);
2234
2235 nr = trans->blocks_used;
2236
2237 btrfs_end_transaction_throttle(trans, root);
2238fail:
2239 btrfs_btree_balance_dirty(root, nr);
2240 return ret;
2241}
2242
2243static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2244{
2245 struct inode *inode = dentry->d_inode;
2246 int err = 0;
2247 int ret;
2248 struct btrfs_root *root = BTRFS_I(dir)->root;
2249 struct btrfs_trans_handle *trans;
2250 unsigned long nr = 0;
2251
2252 /*
2253 * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir
2254 * the root of a subvolume or snapshot
2255 */
2256 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
2257 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
2258 return -ENOTEMPTY;
2259 }
2260
2261 ret = btrfs_check_free_space(root, 1, 1);
2262 if (ret)
2263 goto fail;
2264
2265 trans = btrfs_start_transaction(root, 1);
2266 btrfs_set_trans_block_group(trans, dir);
2267
2268 err = btrfs_orphan_add(trans, inode);
2269 if (err)
2270 goto fail_trans;
2271
2272 /* now the directory is empty */
2273 err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2274 dentry->d_name.name, dentry->d_name.len);
2275 if (!err)
2276 btrfs_i_size_write(inode, 0);
2277
2278fail_trans:
2279 nr = trans->blocks_used;
2280 ret = btrfs_end_transaction_throttle(trans, root);
2281fail:
2282 btrfs_btree_balance_dirty(root, nr);
2283
2284 if (ret && !err)
2285 err = ret;
2286 return err;
2287}
2288
2289#if 0
2290/*
2291 * when truncating bytes in a file, it is possible to avoid reading
2292 * the leaves that contain only checksum items. This can be the
2293 * majority of the IO required to delete a large file, but it must
2294 * be done carefully.
2295 *
2296 * The keys in the level just above the leaves are checked to make sure
2297 * the lowest key in a given leaf is a csum key, and starts at an offset
2298 * after the new size.
2299 *
2300 * Then the key for the next leaf is checked to make sure it also has
2301 * a checksum item for the same file. If it does, we know our target leaf
2302 * contains only checksum items, and it can be safely freed without reading
2303 * it.
2304 *
2305 * This is just an optimization targeted at large files. It may do
2306 * nothing. It will return 0 unless things went badly.
2307 */
2308static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
2309 struct btrfs_root *root,
2310 struct btrfs_path *path,
2311 struct inode *inode, u64 new_size)
2312{
2313 struct btrfs_key key;
2314 int ret;
2315 int nritems;
2316 struct btrfs_key found_key;
2317 struct btrfs_key other_key;
2318 struct btrfs_leaf_ref *ref;
2319 u64 leaf_gen;
2320 u64 leaf_start;
2321
2322 path->lowest_level = 1;
2323 key.objectid = inode->i_ino;
2324 key.type = BTRFS_CSUM_ITEM_KEY;
2325 key.offset = new_size;
2326again:
2327 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2328 if (ret < 0)
2329 goto out;
2330
2331 if (path->nodes[1] == NULL) {
2332 ret = 0;
2333 goto out;
2334 }
2335 ret = 0;
2336 btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
2337 nritems = btrfs_header_nritems(path->nodes[1]);
2338
2339 if (!nritems)
2340 goto out;
2341
2342 if (path->slots[1] >= nritems)
2343 goto next_node;
2344
2345 /* did we find a key greater than anything we want to delete? */
2346 if (found_key.objectid > inode->i_ino ||
2347 (found_key.objectid == inode->i_ino && found_key.type > key.type))
2348 goto out;
2349
2350 /* we check the next key in the node to make sure the leave contains
2351 * only checksum items. This comparison doesn't work if our
2352 * leaf is the last one in the node
2353 */
2354 if (path->slots[1] + 1 >= nritems) {
2355next_node:
2356 /* search forward from the last key in the node, this
2357 * will bring us into the next node in the tree
2358 */
2359 btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
2360
2361 /* unlikely, but we inc below, so check to be safe */
2362 if (found_key.offset == (u64)-1)
2363 goto out;
2364
2365 /* search_forward needs a path with locks held, do the
2366 * search again for the original key. It is possible
2367 * this will race with a balance and return a path that
2368 * we could modify, but this drop is just an optimization
2369 * and is allowed to miss some leaves.
2370 */
2371 btrfs_release_path(root, path);
2372 found_key.offset++;
2373
2374 /* setup a max key for search_forward */
2375 other_key.offset = (u64)-1;
2376 other_key.type = key.type;
2377 other_key.objectid = key.objectid;
2378
2379 path->keep_locks = 1;
2380 ret = btrfs_search_forward(root, &found_key, &other_key,
2381 path, 0, 0);
2382 path->keep_locks = 0;
2383 if (ret || found_key.objectid != key.objectid ||
2384 found_key.type != key.type) {
2385 ret = 0;
2386 goto out;
2387 }
2388
2389 key.offset = found_key.offset;
2390 btrfs_release_path(root, path);
2391 cond_resched();
2392 goto again;
2393 }
2394
2395 /* we know there's one more slot after us in the tree,
2396 * read that key so we can verify it is also a checksum item
2397 */
2398 btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
2399
2400 if (found_key.objectid < inode->i_ino)
2401 goto next_key;
2402
2403 if (found_key.type != key.type || found_key.offset < new_size)
2404 goto next_key;
2405
2406 /*
2407 * if the key for the next leaf isn't a csum key from this objectid,
2408 * we can't be sure there aren't good items inside this leaf.
2409 * Bail out
2410 */
2411 if (other_key.objectid != inode->i_ino || other_key.type != key.type)
2412 goto out;
2413
2414 leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]);
2415 leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]);
2416 /*
2417 * it is safe to delete this leaf, it contains only
2418 * csum items from this inode at an offset >= new_size
2419 */
2420 ret = btrfs_del_leaf(trans, root, path, leaf_start);
2421 BUG_ON(ret);
2422
2423 if (root->ref_cows && leaf_gen < trans->transid) {
2424 ref = btrfs_alloc_leaf_ref(root, 0);
2425 if (ref) {
2426 ref->root_gen = root->root_key.offset;
2427 ref->bytenr = leaf_start;
2428 ref->owner = 0;
2429 ref->generation = leaf_gen;
2430 ref->nritems = 0;
2431
2432 ret = btrfs_add_leaf_ref(root, ref, 0);
2433 WARN_ON(ret);
2434 btrfs_free_leaf_ref(root, ref);
2435 } else {
2436 WARN_ON(1);
2437 }
2438 }
2439next_key:
2440 btrfs_release_path(root, path);
2441
2442 if (other_key.objectid == inode->i_ino &&
2443 other_key.type == key.type && other_key.offset > key.offset) {
2444 key.offset = other_key.offset;
2445 cond_resched();
2446 goto again;
2447 }
2448 ret = 0;
2449out:
2450 /* fixup any changes we've made to the path */
2451 path->lowest_level = 0;
2452 path->keep_locks = 0;
2453 btrfs_release_path(root, path);
2454 return ret;
2455}
2456
2457#endif
2458
2459/*
2460 * this can truncate away extent items, csum items and directory items.
2461 * It starts at a high offset and removes keys until it can't find
2462 * any higher than new_size
2463 *
2464 * csum items that cross the new i_size are truncated to the new size
2465 * as well.
2466 *
2467 * min_type is the minimum key type to truncate down to. If set to 0, this
2468 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2469 */
2470noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2471 struct btrfs_root *root,
2472 struct inode *inode,
2473 u64 new_size, u32 min_type)
2474{
2475 int ret;
2476 struct btrfs_path *path;
2477 struct btrfs_key key;
2478 struct btrfs_key found_key;
2479 u32 found_type;
2480 struct extent_buffer *leaf;
2481 struct btrfs_file_extent_item *fi;
2482 u64 extent_start = 0;
2483 u64 extent_num_bytes = 0;
2484 u64 item_end = 0;
2485 u64 root_gen = 0;
2486 u64 root_owner = 0;
2487 int found_extent;
2488 int del_item;
2489 int pending_del_nr = 0;
2490 int pending_del_slot = 0;
2491 int extent_type = -1;
2492 int encoding;
2493 u64 mask = root->sectorsize - 1;
2494
2495 if (root->ref_cows)
2496 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2497 path = btrfs_alloc_path();
2498 path->reada = -1;
2499 BUG_ON(!path);
2500
2501 /* FIXME, add redo link to tree so we don't leak on crash */
2502 key.objectid = inode->i_ino;
2503 key.offset = (u64)-1;
2504 key.type = (u8)-1;
2505
2506 btrfs_init_path(path);
2507
2508search_again:
2509 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2510 if (ret < 0)
2511 goto error;
2512
2513 if (ret > 0) {
2514 /* there are no items in the tree for us to truncate, we're
2515 * done
2516 */
2517 if (path->slots[0] == 0) {
2518 ret = 0;
2519 goto error;
2520 }
2521 path->slots[0]--;
2522 }
2523
2524 while (1) {
2525 fi = NULL;
2526 leaf = path->nodes[0];
2527 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2528 found_type = btrfs_key_type(&found_key);
2529 encoding = 0;
2530
2531 if (found_key.objectid != inode->i_ino)
2532 break;
2533
2534 if (found_type < min_type)
2535 break;
2536
2537 item_end = found_key.offset;
2538 if (found_type == BTRFS_EXTENT_DATA_KEY) {
2539 fi = btrfs_item_ptr(leaf, path->slots[0],
2540 struct btrfs_file_extent_item);
2541 extent_type = btrfs_file_extent_type(leaf, fi);
2542 encoding = btrfs_file_extent_compression(leaf, fi);
2543 encoding |= btrfs_file_extent_encryption(leaf, fi);
2544 encoding |= btrfs_file_extent_other_encoding(leaf, fi);
2545
2546 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
2547 item_end +=
2548 btrfs_file_extent_num_bytes(leaf, fi);
2549 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
2550 item_end += btrfs_file_extent_inline_len(leaf,
2551 fi);
2552 }
2553 item_end--;
2554 }
2555 if (item_end < new_size) {
2556 if (found_type == BTRFS_DIR_ITEM_KEY)
2557 found_type = BTRFS_INODE_ITEM_KEY;
2558 else if (found_type == BTRFS_EXTENT_ITEM_KEY)
2559 found_type = BTRFS_EXTENT_DATA_KEY;
2560 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2561 found_type = BTRFS_XATTR_ITEM_KEY;
2562 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2563 found_type = BTRFS_INODE_REF_KEY;
2564 else if (found_type)
2565 found_type--;
2566 else
2567 break;
2568 btrfs_set_key_type(&key, found_type);
2569 goto next;
2570 }
2571 if (found_key.offset >= new_size)
2572 del_item = 1;
2573 else
2574 del_item = 0;
2575 found_extent = 0;
2576
2577 /* FIXME, shrink the extent if the ref count is only 1 */
2578 if (found_type != BTRFS_EXTENT_DATA_KEY)
2579 goto delete;
2580
2581 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
2582 u64 num_dec;
2583 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
2584 if (!del_item && !encoding) {
2585 u64 orig_num_bytes =
2586 btrfs_file_extent_num_bytes(leaf, fi);
2587 extent_num_bytes = new_size -
2588 found_key.offset + root->sectorsize - 1;
2589 extent_num_bytes = extent_num_bytes &
2590 ~((u64)root->sectorsize - 1);
2591 btrfs_set_file_extent_num_bytes(leaf, fi,
2592 extent_num_bytes);
2593 num_dec = (orig_num_bytes -
2594 extent_num_bytes);
2595 if (root->ref_cows && extent_start != 0)
2596 inode_sub_bytes(inode, num_dec);
2597 btrfs_mark_buffer_dirty(leaf);
2598 } else {
2599 extent_num_bytes =
2600 btrfs_file_extent_disk_num_bytes(leaf,
2601 fi);
2602 /* FIXME blocksize != 4096 */
2603 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
2604 if (extent_start != 0) {
2605 found_extent = 1;
2606 if (root->ref_cows)
2607 inode_sub_bytes(inode, num_dec);
2608 }
2609 root_gen = btrfs_header_generation(leaf);
2610 root_owner = btrfs_header_owner(leaf);
2611 }
2612 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
2613 /*
2614 * we can't truncate inline items that have had
2615 * special encodings
2616 */
2617 if (!del_item &&
2618 btrfs_file_extent_compression(leaf, fi) == 0 &&
2619 btrfs_file_extent_encryption(leaf, fi) == 0 &&
2620 btrfs_file_extent_other_encoding(leaf, fi) == 0) {
2621 u32 size = new_size - found_key.offset;
2622
2623 if (root->ref_cows) {
2624 inode_sub_bytes(inode, item_end + 1 -
2625 new_size);
2626 }
2627 size =
2628 btrfs_file_extent_calc_inline_size(size);
2629 ret = btrfs_truncate_item(trans, root, path,
2630 size, 1);
2631 BUG_ON(ret);
2632 } else if (root->ref_cows) {
2633 inode_sub_bytes(inode, item_end + 1 -
2634 found_key.offset);
2635 }
2636 }
2637delete:
2638 if (del_item) {
2639 if (!pending_del_nr) {
2640 /* no pending yet, add ourselves */
2641 pending_del_slot = path->slots[0];
2642 pending_del_nr = 1;
2643 } else if (pending_del_nr &&
2644 path->slots[0] + 1 == pending_del_slot) {
2645 /* hop on the pending chunk */
2646 pending_del_nr++;
2647 pending_del_slot = path->slots[0];
2648 } else {
2649 BUG();
2650 }
2651 } else {
2652 break;
2653 }
2654 if (found_extent) {
2655 ret = btrfs_free_extent(trans, root, extent_start,
2656 extent_num_bytes,
2657 leaf->start, root_owner,
2658 root_gen, inode->i_ino, 0);
2659 BUG_ON(ret);
2660 }
2661next:
2662 if (path->slots[0] == 0) {
2663 if (pending_del_nr)
2664 goto del_pending;
2665 btrfs_release_path(root, path);
2666 goto search_again;
2667 }
2668
2669 path->slots[0]--;
2670 if (pending_del_nr &&
2671 path->slots[0] + 1 != pending_del_slot) {
2672 struct btrfs_key debug;
2673del_pending:
2674 btrfs_item_key_to_cpu(path->nodes[0], &debug,
2675 pending_del_slot);
2676 ret = btrfs_del_items(trans, root, path,
2677 pending_del_slot,
2678 pending_del_nr);
2679 BUG_ON(ret);
2680 pending_del_nr = 0;
2681 btrfs_release_path(root, path);
2682 goto search_again;
2683 }
2684 }
2685 ret = 0;
2686error:
2687 if (pending_del_nr) {
2688 ret = btrfs_del_items(trans, root, path, pending_del_slot,
2689 pending_del_nr);
2690 }
2691 btrfs_free_path(path);
2692 inode->i_sb->s_dirt = 1;
2693 return ret;
2694}
2695
2696/*
2697 * taken from block_truncate_page, but does cow as it zeros out
2698 * any bytes left in the last page in the file.
2699 */
2700static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
2701{
2702 struct inode *inode = mapping->host;
2703 struct btrfs_root *root = BTRFS_I(inode)->root;
2704 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2705 struct btrfs_ordered_extent *ordered;
2706 char *kaddr;
2707 u32 blocksize = root->sectorsize;
2708 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2709 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2710 struct page *page;
2711 int ret = 0;
2712 u64 page_start;
2713 u64 page_end;
2714
2715 if ((offset & (blocksize - 1)) == 0)
2716 goto out;
2717
2718 ret = -ENOMEM;
2719again:
2720 page = grab_cache_page(mapping, index);
2721 if (!page)
2722 goto out;
2723
2724 page_start = page_offset(page);
2725 page_end = page_start + PAGE_CACHE_SIZE - 1;
2726
2727 if (!PageUptodate(page)) {
2728 ret = btrfs_readpage(NULL, page);
2729 lock_page(page);
2730 if (page->mapping != mapping) {
2731 unlock_page(page);
2732 page_cache_release(page);
2733 goto again;
2734 }
2735 if (!PageUptodate(page)) {
2736 ret = -EIO;
2737 goto out_unlock;
2738 }
2739 }
2740 wait_on_page_writeback(page);
2741
2742 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2743 set_page_extent_mapped(page);
2744
2745 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2746 if (ordered) {
2747 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2748 unlock_page(page);
2749 page_cache_release(page);
2750 btrfs_start_ordered_extent(inode, ordered, 1);
2751 btrfs_put_ordered_extent(ordered);
2752 goto again;
2753 }
2754
2755 btrfs_set_extent_delalloc(inode, page_start, page_end);
2756 ret = 0;
2757 if (offset != PAGE_CACHE_SIZE) {
2758 kaddr = kmap(page);
2759 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2760 flush_dcache_page(page);
2761 kunmap(page);
2762 }
2763 ClearPageChecked(page);
2764 set_page_dirty(page);
2765 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2766
2767out_unlock:
2768 unlock_page(page);
2769 page_cache_release(page);
2770out:
2771 return ret;
2772}
2773
2774int btrfs_cont_expand(struct inode *inode, loff_t size)
2775{
2776 struct btrfs_trans_handle *trans;
2777 struct btrfs_root *root = BTRFS_I(inode)->root;
2778 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2779 struct extent_map *em;
2780 u64 mask = root->sectorsize - 1;
2781 u64 hole_start = (inode->i_size + mask) & ~mask;
2782 u64 block_end = (size + mask) & ~mask;
2783 u64 last_byte;
2784 u64 cur_offset;
2785 u64 hole_size;
2786 int err;
2787
2788 if (size <= hole_start)
2789 return 0;
2790
2791 err = btrfs_check_free_space(root, 1, 0);
2792 if (err)
2793 return err;
2794
2795 btrfs_truncate_page(inode->i_mapping, inode->i_size);
2796
2797 while (1) {
2798 struct btrfs_ordered_extent *ordered;
2799 btrfs_wait_ordered_range(inode, hole_start,
2800 block_end - hole_start);
2801 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
2802 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
2803 if (!ordered)
2804 break;
2805 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
2806 btrfs_put_ordered_extent(ordered);
2807 }
2808
2809 trans = btrfs_start_transaction(root, 1);
2810 btrfs_set_trans_block_group(trans, inode);
2811
2812 cur_offset = hole_start;
2813 while (1) {
2814 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
2815 block_end - cur_offset, 0);
2816 BUG_ON(IS_ERR(em) || !em);
2817 last_byte = min(extent_map_end(em), block_end);
2818 last_byte = (last_byte + mask) & ~mask;
2819 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
2820 u64 hint_byte = 0;
2821 hole_size = last_byte - cur_offset;
2822 err = btrfs_drop_extents(trans, root, inode,
2823 cur_offset,
2824 cur_offset + hole_size,
2825 cur_offset, &hint_byte);
2826 if (err)
2827 break;
2828 err = btrfs_insert_file_extent(trans, root,
2829 inode->i_ino, cur_offset, 0,
2830 0, hole_size, 0, hole_size,
2831 0, 0, 0);
2832 btrfs_drop_extent_cache(inode, hole_start,
2833 last_byte - 1, 0);
2834 }
2835 free_extent_map(em);
2836 cur_offset = last_byte;
2837 if (err || cur_offset >= block_end)
2838 break;
2839 }
2840
2841 btrfs_end_transaction(trans, root);
2842 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
2843 return err;
2844}
2845
2846static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
2847{
2848 struct inode *inode = dentry->d_inode;
2849 int err;
2850
2851 err = inode_change_ok(inode, attr);
2852 if (err)
2853 return err;
2854
2855 if (S_ISREG(inode->i_mode) &&
2856 attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
2857 err = btrfs_cont_expand(inode, attr->ia_size);
2858 if (err)
2859 return err;
2860 }
2861
2862 err = inode_setattr(inode, attr);
2863
2864 if (!err && ((attr->ia_valid & ATTR_MODE)))
2865 err = btrfs_acl_chmod(inode);
2866 return err;
2867}
2868
2869void btrfs_delete_inode(struct inode *inode)
2870{
2871 struct btrfs_trans_handle *trans;
2872 struct btrfs_root *root = BTRFS_I(inode)->root;
2873 unsigned long nr;
2874 int ret;
2875
2876 truncate_inode_pages(&inode->i_data, 0);
2877 if (is_bad_inode(inode)) {
2878 btrfs_orphan_del(NULL, inode);
2879 goto no_delete;
2880 }
2881 btrfs_wait_ordered_range(inode, 0, (u64)-1);
2882
2883 btrfs_i_size_write(inode, 0);
2884 trans = btrfs_join_transaction(root, 1);
2885
2886 btrfs_set_trans_block_group(trans, inode);
2887 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0);
2888 if (ret) {
2889 btrfs_orphan_del(NULL, inode);
2890 goto no_delete_lock;
2891 }
2892
2893 btrfs_orphan_del(trans, inode);
2894
2895 nr = trans->blocks_used;
2896 clear_inode(inode);
2897
2898 btrfs_end_transaction(trans, root);
2899 btrfs_btree_balance_dirty(root, nr);
2900 return;
2901
2902no_delete_lock:
2903 nr = trans->blocks_used;
2904 btrfs_end_transaction(trans, root);
2905 btrfs_btree_balance_dirty(root, nr);
2906no_delete:
2907 clear_inode(inode);
2908}
2909
2910/*
2911 * this returns the key found in the dir entry in the location pointer.
2912 * If no dir entries were found, location->objectid is 0.
2913 */
2914static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
2915 struct btrfs_key *location)
2916{
2917 const char *name = dentry->d_name.name;
2918 int namelen = dentry->d_name.len;
2919 struct btrfs_dir_item *di;
2920 struct btrfs_path *path;
2921 struct btrfs_root *root = BTRFS_I(dir)->root;
2922 int ret = 0;
2923
2924 path = btrfs_alloc_path();
2925 BUG_ON(!path);
2926
2927 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
2928 namelen, 0);
2929 if (IS_ERR(di))
2930 ret = PTR_ERR(di);
2931
2932 if (!di || IS_ERR(di))
2933 goto out_err;
2934
2935 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
2936out:
2937 btrfs_free_path(path);
2938 return ret;
2939out_err:
2940 location->objectid = 0;
2941 goto out;
2942}
2943
2944/*
2945 * when we hit a tree root in a directory, the btrfs part of the inode
2946 * needs to be changed to reflect the root directory of the tree root. This
2947 * is kind of like crossing a mount point.
2948 */
2949static int fixup_tree_root_location(struct btrfs_root *root,
2950 struct btrfs_key *location,
2951 struct btrfs_root **sub_root,
2952 struct dentry *dentry)
2953{
2954 struct btrfs_root_item *ri;
2955
2956 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
2957 return 0;
2958 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
2959 return 0;
2960
2961 *sub_root = btrfs_read_fs_root(root->fs_info, location,
2962 dentry->d_name.name,
2963 dentry->d_name.len);
2964 if (IS_ERR(*sub_root))
2965 return PTR_ERR(*sub_root);
2966
2967 ri = &(*sub_root)->root_item;
2968 location->objectid = btrfs_root_dirid(ri);
2969 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
2970 location->offset = 0;
2971
2972 return 0;
2973}
2974
2975static noinline void init_btrfs_i(struct inode *inode)
2976{
2977 struct btrfs_inode *bi = BTRFS_I(inode);
2978
2979 bi->i_acl = NULL;
2980 bi->i_default_acl = NULL;
2981
2982 bi->generation = 0;
2983 bi->sequence = 0;
2984 bi->last_trans = 0;
2985 bi->logged_trans = 0;
2986 bi->delalloc_bytes = 0;
2987 bi->disk_i_size = 0;
2988 bi->flags = 0;
2989 bi->index_cnt = (u64)-1;
2990 bi->log_dirty_trans = 0;
2991 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
2992 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
2993 inode->i_mapping, GFP_NOFS);
2994 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
2995 inode->i_mapping, GFP_NOFS);
2996 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
2997 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
2998 mutex_init(&BTRFS_I(inode)->extent_mutex);
2999 mutex_init(&BTRFS_I(inode)->log_mutex);
3000}
3001
3002static int btrfs_init_locked_inode(struct inode *inode, void *p)
3003{
3004 struct btrfs_iget_args *args = p;
3005 inode->i_ino = args->ino;
3006 init_btrfs_i(inode);
3007 BTRFS_I(inode)->root = args->root;
3008 return 0;
3009}
3010
3011static int btrfs_find_actor(struct inode *inode, void *opaque)
3012{
3013 struct btrfs_iget_args *args = opaque;
3014 return args->ino == inode->i_ino &&
3015 args->root == BTRFS_I(inode)->root;
3016}
3017
3018struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
3019 struct btrfs_root *root, int wait)
3020{
3021 struct inode *inode;
3022 struct btrfs_iget_args args;
3023 args.ino = objectid;
3024 args.root = root;
3025
3026 if (wait) {
3027 inode = ilookup5(s, objectid, btrfs_find_actor,
3028 (void *)&args);
3029 } else {
3030 inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
3031 (void *)&args);
3032 }
3033 return inode;
3034}
3035
3036struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
3037 struct btrfs_root *root)
3038{
3039 struct inode *inode;
3040 struct btrfs_iget_args args;
3041 args.ino = objectid;
3042 args.root = root;
3043
3044 inode = iget5_locked(s, objectid, btrfs_find_actor,
3045 btrfs_init_locked_inode,
3046 (void *)&args);
3047 return inode;
3048}
3049
3050/* Get an inode object given its location and corresponding root.
3051 * Returns in *is_new if the inode was read from disk
3052 */
3053struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3054 struct btrfs_root *root, int *is_new)
3055{
3056 struct inode *inode;
3057
3058 inode = btrfs_iget_locked(s, location->objectid, root);
3059 if (!inode)
3060 return ERR_PTR(-EACCES);
3061
3062 if (inode->i_state & I_NEW) {
3063 BTRFS_I(inode)->root = root;
3064 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
3065 btrfs_read_locked_inode(inode);
3066 unlock_new_inode(inode);
3067 if (is_new)
3068 *is_new = 1;
3069 } else {
3070 if (is_new)
3071 *is_new = 0;
3072 }
3073
3074 return inode;
3075}
3076
3077struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3078{
3079 struct inode *inode;
3080 struct btrfs_inode *bi = BTRFS_I(dir);
3081 struct btrfs_root *root = bi->root;
3082 struct btrfs_root *sub_root = root;
3083 struct btrfs_key location;
3084 int ret, new;
3085
3086 if (dentry->d_name.len > BTRFS_NAME_LEN)
3087 return ERR_PTR(-ENAMETOOLONG);
3088
3089 ret = btrfs_inode_by_name(dir, dentry, &location);
3090
3091 if (ret < 0)
3092 return ERR_PTR(ret);
3093
3094 inode = NULL;
3095 if (location.objectid) {
3096 ret = fixup_tree_root_location(root, &location, &sub_root,
3097 dentry);
3098 if (ret < 0)
3099 return ERR_PTR(ret);
3100 if (ret > 0)
3101 return ERR_PTR(-ENOENT);
3102 inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
3103 if (IS_ERR(inode))
3104 return ERR_CAST(inode);
3105 }
3106 return inode;
3107}
3108
3109static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
3110 struct nameidata *nd)
3111{
3112 struct inode *inode;
3113
3114 if (dentry->d_name.len > BTRFS_NAME_LEN)
3115 return ERR_PTR(-ENAMETOOLONG);
3116
3117 inode = btrfs_lookup_dentry(dir, dentry);
3118 if (IS_ERR(inode))
3119 return ERR_CAST(inode);
3120
3121 return d_splice_alias(inode, dentry);
3122}
3123
3124static unsigned char btrfs_filetype_table[] = {
3125 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
3126};
3127
3128static int btrfs_real_readdir(struct file *filp, void *dirent,
3129 filldir_t filldir)
3130{
3131 struct inode *inode = filp->f_dentry->d_inode;
3132 struct btrfs_root *root = BTRFS_I(inode)->root;
3133 struct btrfs_item *item;
3134 struct btrfs_dir_item *di;
3135 struct btrfs_key key;
3136 struct btrfs_key found_key;
3137 struct btrfs_path *path;
3138 int ret;
3139 u32 nritems;
3140 struct extent_buffer *leaf;
3141 int slot;
3142 int advance;
3143 unsigned char d_type;
3144 int over = 0;
3145 u32 di_cur;
3146 u32 di_total;
3147 u32 di_len;
3148 int key_type = BTRFS_DIR_INDEX_KEY;
3149 char tmp_name[32];
3150 char *name_ptr;
3151 int name_len;
3152
3153 /* FIXME, use a real flag for deciding about the key type */
3154 if (root->fs_info->tree_root == root)
3155 key_type = BTRFS_DIR_ITEM_KEY;
3156
3157 /* special case for "." */
3158 if (filp->f_pos == 0) {
3159 over = filldir(dirent, ".", 1,
3160 1, inode->i_ino,
3161 DT_DIR);
3162 if (over)
3163 return 0;
3164 filp->f_pos = 1;
3165 }
3166 /* special case for .., just use the back ref */
3167 if (filp->f_pos == 1) {
3168 u64 pino = parent_ino(filp->f_path.dentry);
3169 over = filldir(dirent, "..", 2,
3170 2, pino, DT_DIR);
3171 if (over)
3172 return 0;
3173 filp->f_pos = 2;
3174 }
3175 path = btrfs_alloc_path();
3176 path->reada = 2;
3177
3178 btrfs_set_key_type(&key, key_type);
3179 key.offset = filp->f_pos;
3180 key.objectid = inode->i_ino;
3181
3182 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3183 if (ret < 0)
3184 goto err;
3185 advance = 0;
3186
3187 while (1) {
3188 leaf = path->nodes[0];
3189 nritems = btrfs_header_nritems(leaf);
3190 slot = path->slots[0];
3191 if (advance || slot >= nritems) {
3192 if (slot >= nritems - 1) {
3193 ret = btrfs_next_leaf(root, path);
3194 if (ret)
3195 break;
3196 leaf = path->nodes[0];
3197 nritems = btrfs_header_nritems(leaf);
3198 slot = path->slots[0];
3199 } else {
3200 slot++;
3201 path->slots[0]++;
3202 }
3203 }
3204
3205 advance = 1;
3206 item = btrfs_item_nr(leaf, slot);
3207 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3208
3209 if (found_key.objectid != key.objectid)
3210 break;
3211 if (btrfs_key_type(&found_key) != key_type)
3212 break;
3213 if (found_key.offset < filp->f_pos)
3214 continue;
3215
3216 filp->f_pos = found_key.offset;
3217
3218 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
3219 di_cur = 0;
3220 di_total = btrfs_item_size(leaf, item);
3221
3222 while (di_cur < di_total) {
3223 struct btrfs_key location;
3224
3225 name_len = btrfs_dir_name_len(leaf, di);
3226 if (name_len <= sizeof(tmp_name)) {
3227 name_ptr = tmp_name;
3228 } else {
3229 name_ptr = kmalloc(name_len, GFP_NOFS);
3230 if (!name_ptr) {
3231 ret = -ENOMEM;
3232 goto err;
3233 }
3234 }
3235 read_extent_buffer(leaf, name_ptr,
3236 (unsigned long)(di + 1), name_len);
3237
3238 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
3239 btrfs_dir_item_key_to_cpu(leaf, di, &location);
3240
3241 /* is this a reference to our own snapshot? If so
3242 * skip it
3243 */
3244 if (location.type == BTRFS_ROOT_ITEM_KEY &&
3245 location.objectid == root->root_key.objectid) {
3246 over = 0;
3247 goto skip;
3248 }
3249 over = filldir(dirent, name_ptr, name_len,
3250 found_key.offset, location.objectid,
3251 d_type);
3252
3253skip:
3254 if (name_ptr != tmp_name)
3255 kfree(name_ptr);
3256
3257 if (over)
3258 goto nopos;
3259 di_len = btrfs_dir_name_len(leaf, di) +
3260 btrfs_dir_data_len(leaf, di) + sizeof(*di);
3261 di_cur += di_len;
3262 di = (struct btrfs_dir_item *)((char *)di + di_len);
3263 }
3264 }
3265
3266 /* Reached end of directory/root. Bump pos past the last item. */
3267 if (key_type == BTRFS_DIR_INDEX_KEY)
3268 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
3269 else
3270 filp->f_pos++;
3271nopos:
3272 ret = 0;
3273err:
3274 btrfs_free_path(path);
3275 return ret;
3276}
3277
3278int btrfs_write_inode(struct inode *inode, int wait)
3279{
3280 struct btrfs_root *root = BTRFS_I(inode)->root;
3281 struct btrfs_trans_handle *trans;
3282 int ret = 0;
3283
3284 if (root->fs_info->btree_inode == inode)
3285 return 0;
3286
3287 if (wait) {
3288 trans = btrfs_join_transaction(root, 1);
3289 btrfs_set_trans_block_group(trans, inode);
3290 ret = btrfs_commit_transaction(trans, root);
3291 }
3292 return ret;
3293}
3294
3295/*
3296 * This is somewhat expensive, updating the tree every time the
3297 * inode changes. But, it is most likely to find the inode in cache.
3298 * FIXME, needs more benchmarking...there are no reasons other than performance
3299 * to keep or drop this code.
3300 */
3301void btrfs_dirty_inode(struct inode *inode)
3302{
3303 struct btrfs_root *root = BTRFS_I(inode)->root;
3304 struct btrfs_trans_handle *trans;
3305
3306 trans = btrfs_join_transaction(root, 1);
3307 btrfs_set_trans_block_group(trans, inode);
3308 btrfs_update_inode(trans, root, inode);
3309 btrfs_end_transaction(trans, root);
3310}
3311
3312/*
3313 * find the highest existing sequence number in a directory
3314 * and then set the in-memory index_cnt variable to reflect
3315 * free sequence numbers
3316 */
3317static int btrfs_set_inode_index_count(struct inode *inode)
3318{
3319 struct btrfs_root *root = BTRFS_I(inode)->root;
3320 struct btrfs_key key, found_key;
3321 struct btrfs_path *path;
3322 struct extent_buffer *leaf;
3323 int ret;
3324
3325 key.objectid = inode->i_ino;
3326 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
3327 key.offset = (u64)-1;
3328
3329 path = btrfs_alloc_path();
3330 if (!path)
3331 return -ENOMEM;
3332
3333 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3334 if (ret < 0)
3335 goto out;
3336 /* FIXME: we should be able to handle this */
3337 if (ret == 0)
3338 goto out;
3339 ret = 0;
3340
3341 /*
3342 * MAGIC NUMBER EXPLANATION:
3343 * since we search a directory based on f_pos we have to start at 2
3344 * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
3345 * else has to start at 2
3346 */
3347 if (path->slots[0] == 0) {
3348 BTRFS_I(inode)->index_cnt = 2;
3349 goto out;
3350 }
3351
3352 path->slots[0]--;
3353
3354 leaf = path->nodes[0];
3355 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3356
3357 if (found_key.objectid != inode->i_ino ||
3358 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
3359 BTRFS_I(inode)->index_cnt = 2;
3360 goto out;
3361 }
3362
3363 BTRFS_I(inode)->index_cnt = found_key.offset + 1;
3364out:
3365 btrfs_free_path(path);
3366 return ret;
3367}
3368
3369/*
3370 * helper to find a free sequence number in a given directory. This current
3371 * code is very simple, later versions will do smarter things in the btree
3372 */
3373int btrfs_set_inode_index(struct inode *dir, u64 *index)
3374{
3375 int ret = 0;
3376
3377 if (BTRFS_I(dir)->index_cnt == (u64)-1) {
3378 ret = btrfs_set_inode_index_count(dir);
3379 if (ret)
3380 return ret;
3381 }
3382
3383 *index = BTRFS_I(dir)->index_cnt;
3384 BTRFS_I(dir)->index_cnt++;
3385
3386 return ret;
3387}
3388
3389static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3390 struct btrfs_root *root,
3391 struct inode *dir,
3392 const char *name, int name_len,
3393 u64 ref_objectid, u64 objectid,
3394 u64 alloc_hint, int mode, u64 *index)
3395{
3396 struct inode *inode;
3397 struct btrfs_inode_item *inode_item;
3398 struct btrfs_key *location;
3399 struct btrfs_path *path;
3400 struct btrfs_inode_ref *ref;
3401 struct btrfs_key key[2];
3402 u32 sizes[2];
3403 unsigned long ptr;
3404 int ret;
3405 int owner;
3406
3407 path = btrfs_alloc_path();
3408 BUG_ON(!path);
3409
3410 inode = new_inode(root->fs_info->sb);
3411 if (!inode)
3412 return ERR_PTR(-ENOMEM);
3413
3414 if (dir) {
3415 ret = btrfs_set_inode_index(dir, index);
3416 if (ret)
3417 return ERR_PTR(ret);
3418 }
3419 /*
3420 * index_cnt is ignored for everything but a dir,
3421 * btrfs_get_inode_index_count has an explanation for the magic
3422 * number
3423 */
3424 init_btrfs_i(inode);
3425 BTRFS_I(inode)->index_cnt = 2;
3426 BTRFS_I(inode)->root = root;
3427 BTRFS_I(inode)->generation = trans->transid;
3428
3429 if (mode & S_IFDIR)
3430 owner = 0;
3431 else
3432 owner = 1;
3433 BTRFS_I(inode)->block_group =
3434 btrfs_find_block_group(root, 0, alloc_hint, owner);
3435 if ((mode & S_IFREG)) {
3436 if (btrfs_test_opt(root, NODATASUM))
3437 btrfs_set_flag(inode, NODATASUM);
3438 if (btrfs_test_opt(root, NODATACOW))
3439 btrfs_set_flag(inode, NODATACOW);
3440 }
3441
3442 key[0].objectid = objectid;
3443 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
3444 key[0].offset = 0;
3445
3446 key[1].objectid = objectid;
3447 btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
3448 key[1].offset = ref_objectid;
3449
3450 sizes[0] = sizeof(struct btrfs_inode_item);
3451 sizes[1] = name_len + sizeof(*ref);
3452
3453 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
3454 if (ret != 0)
3455 goto fail;
3456
3457 if (objectid > root->highest_inode)
3458 root->highest_inode = objectid;
3459
3460 inode->i_uid = current_fsuid();
3461 inode->i_gid = current_fsgid();
3462 inode->i_mode = mode;
3463 inode->i_ino = objectid;
3464 inode_set_bytes(inode, 0);
3465 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
3466 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3467 struct btrfs_inode_item);
3468 fill_inode_item(trans, path->nodes[0], inode_item, inode);
3469
3470 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
3471 struct btrfs_inode_ref);
3472 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
3473 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
3474 ptr = (unsigned long)(ref + 1);
3475 write_extent_buffer(path->nodes[0], name, ptr, name_len);
3476
3477 btrfs_mark_buffer_dirty(path->nodes[0]);
3478 btrfs_free_path(path);
3479
3480 location = &BTRFS_I(inode)->location;
3481 location->objectid = objectid;
3482 location->offset = 0;
3483 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
3484
3485 insert_inode_hash(inode);
3486 return inode;
3487fail:
3488 if (dir)
3489 BTRFS_I(dir)->index_cnt--;
3490 btrfs_free_path(path);
3491 return ERR_PTR(ret);
3492}
3493
3494static inline u8 btrfs_inode_type(struct inode *inode)
3495{
3496 return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
3497}
3498
3499/*
3500 * utility function to add 'inode' into 'parent_inode' with
3501 * a give name and a given sequence number.
3502 * if 'add_backref' is true, also insert a backref from the
3503 * inode to the parent directory.
3504 */
3505int btrfs_add_link(struct btrfs_trans_handle *trans,
3506 struct inode *parent_inode, struct inode *inode,
3507 const char *name, int name_len, int add_backref, u64 index)
3508{
3509 int ret;
3510 struct btrfs_key key;
3511 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
3512
3513 key.objectid = inode->i_ino;
3514 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
3515 key.offset = 0;
3516
3517 ret = btrfs_insert_dir_item(trans, root, name, name_len,
3518 parent_inode->i_ino,
3519 &key, btrfs_inode_type(inode),
3520 index);
3521 if (ret == 0) {
3522 if (add_backref) {
3523 ret = btrfs_insert_inode_ref(trans, root,
3524 name, name_len,
3525 inode->i_ino,
3526 parent_inode->i_ino,
3527 index);
3528 }
3529 btrfs_i_size_write(parent_inode, parent_inode->i_size +
3530 name_len * 2);
3531 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
3532 ret = btrfs_update_inode(trans, root, parent_inode);
3533 }
3534 return ret;
3535}
3536
3537static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
3538 struct dentry *dentry, struct inode *inode,
3539 int backref, u64 index)
3540{
3541 int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
3542 inode, dentry->d_name.name,
3543 dentry->d_name.len, backref, index);
3544 if (!err) {
3545 d_instantiate(dentry, inode);
3546 return 0;
3547 }
3548 if (err > 0)
3549 err = -EEXIST;
3550 return err;
3551}
3552
3553static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
3554 int mode, dev_t rdev)
3555{
3556 struct btrfs_trans_handle *trans;
3557 struct btrfs_root *root = BTRFS_I(dir)->root;
3558 struct inode *inode = NULL;
3559 int err;
3560 int drop_inode = 0;
3561 u64 objectid;
3562 unsigned long nr = 0;
3563 u64 index = 0;
3564
3565 if (!new_valid_dev(rdev))
3566 return -EINVAL;
3567
3568 err = btrfs_check_free_space(root, 1, 0);
3569 if (err)
3570 goto fail;
3571
3572 trans = btrfs_start_transaction(root, 1);
3573 btrfs_set_trans_block_group(trans, dir);
3574
3575 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3576 if (err) {
3577 err = -ENOSPC;
3578 goto out_unlock;
3579 }
3580
3581 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
3582 dentry->d_name.len,
3583 dentry->d_parent->d_inode->i_ino, objectid,
3584 BTRFS_I(dir)->block_group, mode, &index);
3585 err = PTR_ERR(inode);
3586 if (IS_ERR(inode))
3587 goto out_unlock;
3588
3589 err = btrfs_init_acl(inode, dir);
3590 if (err) {
3591 drop_inode = 1;
3592 goto out_unlock;
3593 }
3594
3595 btrfs_set_trans_block_group(trans, inode);
3596 err = btrfs_add_nondir(trans, dentry, inode, 0, index);
3597 if (err)
3598 drop_inode = 1;
3599 else {
3600 inode->i_op = &btrfs_special_inode_operations;
3601 init_special_inode(inode, inode->i_mode, rdev);
3602 btrfs_update_inode(trans, root, inode);
3603 }
3604 dir->i_sb->s_dirt = 1;
3605 btrfs_update_inode_block_group(trans, inode);
3606 btrfs_update_inode_block_group(trans, dir);
3607out_unlock:
3608 nr = trans->blocks_used;
3609 btrfs_end_transaction_throttle(trans, root);
3610fail:
3611 if (drop_inode) {
3612 inode_dec_link_count(inode);
3613 iput(inode);
3614 }
3615 btrfs_btree_balance_dirty(root, nr);
3616 return err;
3617}
3618
3619static int btrfs_create(struct inode *dir, struct dentry *dentry,
3620 int mode, struct nameidata *nd)
3621{
3622 struct btrfs_trans_handle *trans;
3623 struct btrfs_root *root = BTRFS_I(dir)->root;
3624 struct inode *inode = NULL;
3625 int err;
3626 int drop_inode = 0;
3627 unsigned long nr = 0;
3628 u64 objectid;
3629 u64 index = 0;
3630
3631 err = btrfs_check_free_space(root, 1, 0);
3632 if (err)
3633 goto fail;
3634 trans = btrfs_start_transaction(root, 1);
3635 btrfs_set_trans_block_group(trans, dir);
3636
3637 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3638 if (err) {
3639 err = -ENOSPC;
3640 goto out_unlock;
3641 }
3642
3643 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
3644 dentry->d_name.len,
3645 dentry->d_parent->d_inode->i_ino,
3646 objectid, BTRFS_I(dir)->block_group, mode,
3647 &index);
3648 err = PTR_ERR(inode);
3649 if (IS_ERR(inode))
3650 goto out_unlock;
3651
3652 err = btrfs_init_acl(inode, dir);
3653 if (err) {
3654 drop_inode = 1;
3655 goto out_unlock;
3656 }
3657
3658 btrfs_set_trans_block_group(trans, inode);
3659 err = btrfs_add_nondir(trans, dentry, inode, 0, index);
3660 if (err)
3661 drop_inode = 1;
3662 else {
3663 inode->i_mapping->a_ops = &btrfs_aops;
3664 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3665 inode->i_fop = &btrfs_file_operations;
3666 inode->i_op = &btrfs_file_inode_operations;
3667 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3668 }
3669 dir->i_sb->s_dirt = 1;
3670 btrfs_update_inode_block_group(trans, inode);
3671 btrfs_update_inode_block_group(trans, dir);
3672out_unlock:
3673 nr = trans->blocks_used;
3674 btrfs_end_transaction_throttle(trans, root);
3675fail:
3676 if (drop_inode) {
3677 inode_dec_link_count(inode);
3678 iput(inode);
3679 }
3680 btrfs_btree_balance_dirty(root, nr);
3681 return err;
3682}
3683
3684static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
3685 struct dentry *dentry)
3686{
3687 struct btrfs_trans_handle *trans;
3688 struct btrfs_root *root = BTRFS_I(dir)->root;
3689 struct inode *inode = old_dentry->d_inode;
3690 u64 index;
3691 unsigned long nr = 0;
3692 int err;
3693 int drop_inode = 0;
3694
3695 if (inode->i_nlink == 0)
3696 return -ENOENT;
3697
3698 btrfs_inc_nlink(inode);
3699 err = btrfs_check_free_space(root, 1, 0);
3700 if (err)
3701 goto fail;
3702 err = btrfs_set_inode_index(dir, &index);
3703 if (err)
3704 goto fail;
3705
3706 trans = btrfs_start_transaction(root, 1);
3707
3708 btrfs_set_trans_block_group(trans, dir);
3709 atomic_inc(&inode->i_count);
3710
3711 err = btrfs_add_nondir(trans, dentry, inode, 1, index);
3712
3713 if (err)
3714 drop_inode = 1;
3715
3716 dir->i_sb->s_dirt = 1;
3717 btrfs_update_inode_block_group(trans, dir);
3718 err = btrfs_update_inode(trans, root, inode);
3719
3720 if (err)
3721 drop_inode = 1;
3722
3723 nr = trans->blocks_used;
3724 btrfs_end_transaction_throttle(trans, root);
3725fail:
3726 if (drop_inode) {
3727 inode_dec_link_count(inode);
3728 iput(inode);
3729 }
3730 btrfs_btree_balance_dirty(root, nr);
3731 return err;
3732}
3733
3734static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3735{
3736 struct inode *inode = NULL;
3737 struct btrfs_trans_handle *trans;
3738 struct btrfs_root *root = BTRFS_I(dir)->root;
3739 int err = 0;
3740 int drop_on_err = 0;
3741 u64 objectid = 0;
3742 u64 index = 0;
3743 unsigned long nr = 1;
3744
3745 err = btrfs_check_free_space(root, 1, 0);
3746 if (err)
3747 goto out_unlock;
3748
3749 trans = btrfs_start_transaction(root, 1);
3750 btrfs_set_trans_block_group(trans, dir);
3751
3752 if (IS_ERR(trans)) {
3753 err = PTR_ERR(trans);
3754 goto out_unlock;
3755 }
3756
3757 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3758 if (err) {
3759 err = -ENOSPC;
3760 goto out_unlock;
3761 }
3762
3763 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
3764 dentry->d_name.len,
3765 dentry->d_parent->d_inode->i_ino, objectid,
3766 BTRFS_I(dir)->block_group, S_IFDIR | mode,
3767 &index);
3768 if (IS_ERR(inode)) {
3769 err = PTR_ERR(inode);
3770 goto out_fail;
3771 }
3772
3773 drop_on_err = 1;
3774
3775 err = btrfs_init_acl(inode, dir);
3776 if (err)
3777 goto out_fail;
3778
3779 inode->i_op = &btrfs_dir_inode_operations;
3780 inode->i_fop = &btrfs_dir_file_operations;
3781 btrfs_set_trans_block_group(trans, inode);
3782
3783 btrfs_i_size_write(inode, 0);
3784 err = btrfs_update_inode(trans, root, inode);
3785 if (err)
3786 goto out_fail;
3787
3788 err = btrfs_add_link(trans, dentry->d_parent->d_inode,
3789 inode, dentry->d_name.name,
3790 dentry->d_name.len, 0, index);
3791 if (err)
3792 goto out_fail;
3793
3794 d_instantiate(dentry, inode);
3795 drop_on_err = 0;
3796 dir->i_sb->s_dirt = 1;
3797 btrfs_update_inode_block_group(trans, inode);
3798 btrfs_update_inode_block_group(trans, dir);
3799
3800out_fail:
3801 nr = trans->blocks_used;
3802 btrfs_end_transaction_throttle(trans, root);
3803
3804out_unlock:
3805 if (drop_on_err)
3806 iput(inode);
3807 btrfs_btree_balance_dirty(root, nr);
3808 return err;
3809}
3810
3811/* helper for btfs_get_extent. Given an existing extent in the tree,
3812 * and an extent that you want to insert, deal with overlap and insert
3813 * the new extent into the tree.
3814 */
3815static int merge_extent_mapping(struct extent_map_tree *em_tree,
3816 struct extent_map *existing,
3817 struct extent_map *em,
3818 u64 map_start, u64 map_len)
3819{
3820 u64 start_diff;
3821
3822 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
3823 start_diff = map_start - em->start;
3824 em->start = map_start;
3825 em->len = map_len;
3826 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
3827 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
3828 em->block_start += start_diff;
3829 em->block_len -= start_diff;
3830 }
3831 return add_extent_mapping(em_tree, em);
3832}
3833
3834static noinline int uncompress_inline(struct btrfs_path *path,
3835 struct inode *inode, struct page *page,
3836 size_t pg_offset, u64 extent_offset,
3837 struct btrfs_file_extent_item *item)
3838{
3839 int ret;
3840 struct extent_buffer *leaf = path->nodes[0];
3841 char *tmp;
3842 size_t max_size;
3843 unsigned long inline_size;
3844 unsigned long ptr;
3845
3846 WARN_ON(pg_offset != 0);
3847 max_size = btrfs_file_extent_ram_bytes(leaf, item);
3848 inline_size = btrfs_file_extent_inline_item_len(leaf,
3849 btrfs_item_nr(leaf, path->slots[0]));
3850 tmp = kmalloc(inline_size, GFP_NOFS);
3851 ptr = btrfs_file_extent_inline_start(item);
3852
3853 read_extent_buffer(leaf, tmp, ptr, inline_size);
3854
3855 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
3856 ret = btrfs_zlib_decompress(tmp, page, extent_offset,
3857 inline_size, max_size);
3858 if (ret) {
3859 char *kaddr = kmap_atomic(page, KM_USER0);
3860 unsigned long copy_size = min_t(u64,
3861 PAGE_CACHE_SIZE - pg_offset,
3862 max_size - extent_offset);
3863 memset(kaddr + pg_offset, 0, copy_size);
3864 kunmap_atomic(kaddr, KM_USER0);
3865 }
3866 kfree(tmp);
3867 return 0;
3868}
3869
3870/*
3871 * a bit scary, this does extent mapping from logical file offset to the disk.
3872 * the ugly parts come from merging extents from the disk with the in-ram
3873 * representation. This gets more complex because of the data=ordered code,
3874 * where the in-ram extents might be locked pending data=ordered completion.
3875 *
3876 * This also copies inline extents directly into the page.
3877 */
3878
3879struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
3880 size_t pg_offset, u64 start, u64 len,
3881 int create)
3882{
3883 int ret;
3884 int err = 0;
3885 u64 bytenr;
3886 u64 extent_start = 0;
3887 u64 extent_end = 0;
3888 u64 objectid = inode->i_ino;
3889 u32 found_type;
3890 struct btrfs_path *path = NULL;
3891 struct btrfs_root *root = BTRFS_I(inode)->root;
3892 struct btrfs_file_extent_item *item;
3893 struct extent_buffer *leaf;
3894 struct btrfs_key found_key;
3895 struct extent_map *em = NULL;
3896 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3897 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3898 struct btrfs_trans_handle *trans = NULL;
3899 int compressed;
3900
3901again:
3902 spin_lock(&em_tree->lock);
3903 em = lookup_extent_mapping(em_tree, start, len);
3904 if (em)
3905 em->bdev = root->fs_info->fs_devices->latest_bdev;
3906 spin_unlock(&em_tree->lock);
3907
3908 if (em) {
3909 if (em->start > start || em->start + em->len <= start)
3910 free_extent_map(em);
3911 else if (em->block_start == EXTENT_MAP_INLINE && page)
3912 free_extent_map(em);
3913 else
3914 goto out;
3915 }
3916 em = alloc_extent_map(GFP_NOFS);
3917 if (!em) {
3918 err = -ENOMEM;
3919 goto out;
3920 }
3921 em->bdev = root->fs_info->fs_devices->latest_bdev;
3922 em->start = EXTENT_MAP_HOLE;
3923 em->orig_start = EXTENT_MAP_HOLE;
3924 em->len = (u64)-1;
3925 em->block_len = (u64)-1;
3926
3927 if (!path) {
3928 path = btrfs_alloc_path();
3929 BUG_ON(!path);
3930 }
3931
3932 ret = btrfs_lookup_file_extent(trans, root, path,
3933 objectid, start, trans != NULL);
3934 if (ret < 0) {
3935 err = ret;
3936 goto out;
3937 }
3938
3939 if (ret != 0) {
3940 if (path->slots[0] == 0)
3941 goto not_found;
3942 path->slots[0]--;
3943 }
3944
3945 leaf = path->nodes[0];
3946 item = btrfs_item_ptr(leaf, path->slots[0],
3947 struct btrfs_file_extent_item);
3948 /* are we inside the extent that was found? */
3949 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3950 found_type = btrfs_key_type(&found_key);
3951 if (found_key.objectid != objectid ||
3952 found_type != BTRFS_EXTENT_DATA_KEY) {
3953 goto not_found;
3954 }
3955
3956 found_type = btrfs_file_extent_type(leaf, item);
3957 extent_start = found_key.offset;
3958 compressed = btrfs_file_extent_compression(leaf, item);
3959 if (found_type == BTRFS_FILE_EXTENT_REG ||
3960 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
3961 extent_end = extent_start +
3962 btrfs_file_extent_num_bytes(leaf, item);
3963 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
3964 size_t size;
3965 size = btrfs_file_extent_inline_len(leaf, item);
3966 extent_end = (extent_start + size + root->sectorsize - 1) &
3967 ~((u64)root->sectorsize - 1);
3968 }
3969
3970 if (start >= extent_end) {
3971 path->slots[0]++;
3972 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3973 ret = btrfs_next_leaf(root, path);
3974 if (ret < 0) {
3975 err = ret;
3976 goto out;
3977 }
3978 if (ret > 0)
3979 goto not_found;
3980 leaf = path->nodes[0];
3981 }
3982 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3983 if (found_key.objectid != objectid ||
3984 found_key.type != BTRFS_EXTENT_DATA_KEY)
3985 goto not_found;
3986 if (start + len <= found_key.offset)
3987 goto not_found;
3988 em->start = start;
3989 em->len = found_key.offset - start;
3990 goto not_found_em;
3991 }
3992
3993 if (found_type == BTRFS_FILE_EXTENT_REG ||
3994 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
3995 em->start = extent_start;
3996 em->len = extent_end - extent_start;
3997 em->orig_start = extent_start -
3998 btrfs_file_extent_offset(leaf, item);
3999 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
4000 if (bytenr == 0) {
4001 em->block_start = EXTENT_MAP_HOLE;
4002 goto insert;
4003 }
4004 if (compressed) {
4005 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
4006 em->block_start = bytenr;
4007 em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
4008 item);
4009 } else {
4010 bytenr += btrfs_file_extent_offset(leaf, item);
4011 em->block_start = bytenr;
4012 em->block_len = em->len;
4013 if (found_type == BTRFS_FILE_EXTENT_PREALLOC)
4014 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
4015 }
4016 goto insert;
4017 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
4018 unsigned long ptr;
4019 char *map;
4020 size_t size;
4021 size_t extent_offset;
4022 size_t copy_size;
4023
4024 em->block_start = EXTENT_MAP_INLINE;
4025 if (!page || create) {
4026 em->start = extent_start;
4027 em->len = extent_end - extent_start;
4028 goto out;
4029 }
4030
4031 size = btrfs_file_extent_inline_len(leaf, item);
4032 extent_offset = page_offset(page) + pg_offset - extent_start;
4033 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
4034 size - extent_offset);
4035 em->start = extent_start + extent_offset;
4036 em->len = (copy_size + root->sectorsize - 1) &
4037 ~((u64)root->sectorsize - 1);
4038 em->orig_start = EXTENT_MAP_INLINE;
4039 if (compressed)
4040 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
4041 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
4042 if (create == 0 && !PageUptodate(page)) {
4043 if (btrfs_file_extent_compression(leaf, item) ==
4044 BTRFS_COMPRESS_ZLIB) {
4045 ret = uncompress_inline(path, inode, page,
4046 pg_offset,
4047 extent_offset, item);
4048 BUG_ON(ret);
4049 } else {
4050 map = kmap(page);
4051 read_extent_buffer(leaf, map + pg_offset, ptr,
4052 copy_size);
4053 kunmap(page);
4054 }
4055 flush_dcache_page(page);
4056 } else if (create && PageUptodate(page)) {
4057 if (!trans) {
4058 kunmap(page);
4059 free_extent_map(em);
4060 em = NULL;
4061 btrfs_release_path(root, path);
4062 trans = btrfs_join_transaction(root, 1);
4063 goto again;
4064 }
4065 map = kmap(page);
4066 write_extent_buffer(leaf, map + pg_offset, ptr,
4067 copy_size);
4068 kunmap(page);
4069 btrfs_mark_buffer_dirty(leaf);
4070 }
4071 set_extent_uptodate(io_tree, em->start,
4072 extent_map_end(em) - 1, GFP_NOFS);
4073 goto insert;
4074 } else {
4075 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
4076 WARN_ON(1);
4077 }
4078not_found:
4079 em->start = start;
4080 em->len = len;
4081not_found_em:
4082 em->block_start = EXTENT_MAP_HOLE;
4083 set_bit(EXTENT_FLAG_VACANCY, &em->flags);
4084insert:
4085 btrfs_release_path(root, path);
4086 if (em->start > start || extent_map_end(em) <= start) {
4087 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
4088 "[%llu %llu]\n", (unsigned long long)em->start,
4089 (unsigned long long)em->len,
4090 (unsigned long long)start,
4091 (unsigned long long)len);
4092 err = -EIO;
4093 goto out;
4094 }
4095
4096 err = 0;
4097 spin_lock(&em_tree->lock);
4098 ret = add_extent_mapping(em_tree, em);
4099 /* it is possible that someone inserted the extent into the tree
4100 * while we had the lock dropped. It is also possible that
4101 * an overlapping map exists in the tree
4102 */
4103 if (ret == -EEXIST) {
4104 struct extent_map *existing;
4105
4106 ret = 0;
4107
4108 existing = lookup_extent_mapping(em_tree, start, len);
4109 if (existing && (existing->start > start ||
4110 existing->start + existing->len <= start)) {
4111 free_extent_map(existing);
4112 existing = NULL;
4113 }
4114 if (!existing) {
4115 existing = lookup_extent_mapping(em_tree, em->start,
4116 em->len);
4117 if (existing) {
4118 err = merge_extent_mapping(em_tree, existing,
4119 em, start,
4120 root->sectorsize);
4121 free_extent_map(existing);
4122 if (err) {
4123 free_extent_map(em);
4124 em = NULL;
4125 }
4126 } else {
4127 err = -EIO;
4128 free_extent_map(em);
4129 em = NULL;
4130 }
4131 } else {
4132 free_extent_map(em);
4133 em = existing;
4134 err = 0;
4135 }
4136 }
4137 spin_unlock(&em_tree->lock);
4138out:
4139 if (path)
4140 btrfs_free_path(path);
4141 if (trans) {
4142 ret = btrfs_end_transaction(trans, root);
4143 if (!err)
4144 err = ret;
4145 }
4146 if (err) {
4147 free_extent_map(em);
4148 WARN_ON(1);
4149 return ERR_PTR(err);
4150 }
4151 return em;
4152}
4153
4154static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
4155 const struct iovec *iov, loff_t offset,
4156 unsigned long nr_segs)
4157{
4158 return -EINVAL;
4159}
4160
4161static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
4162{
4163 return extent_bmap(mapping, iblock, btrfs_get_extent);
4164}
4165
4166int btrfs_readpage(struct file *file, struct page *page)
4167{
4168 struct extent_io_tree *tree;
4169 tree = &BTRFS_I(page->mapping->host)->io_tree;
4170 return extent_read_full_page(tree, page, btrfs_get_extent);
4171}
4172
4173static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
4174{
4175 struct extent_io_tree *tree;
4176
4177
4178 if (current->flags & PF_MEMALLOC) {
4179 redirty_page_for_writepage(wbc, page);
4180 unlock_page(page);
4181 return 0;
4182 }
4183 tree = &BTRFS_I(page->mapping->host)->io_tree;
4184 return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
4185}
4186
4187int btrfs_writepages(struct address_space *mapping,
4188 struct writeback_control *wbc)
4189{
4190 struct extent_io_tree *tree;
4191
4192 tree = &BTRFS_I(mapping->host)->io_tree;
4193 return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
4194}
4195
4196static int
4197btrfs_readpages(struct file *file, struct address_space *mapping,
4198 struct list_head *pages, unsigned nr_pages)
4199{
4200 struct extent_io_tree *tree;
4201 tree = &BTRFS_I(mapping->host)->io_tree;
4202 return extent_readpages(tree, mapping, pages, nr_pages,
4203 btrfs_get_extent);
4204}
4205static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
4206{
4207 struct extent_io_tree *tree;
4208 struct extent_map_tree *map;
4209 int ret;
4210
4211 tree = &BTRFS_I(page->mapping->host)->io_tree;
4212 map = &BTRFS_I(page->mapping->host)->extent_tree;
4213 ret = try_release_extent_mapping(map, tree, page, gfp_flags);
4214 if (ret == 1) {
4215 ClearPagePrivate(page);
4216 set_page_private(page, 0);
4217 page_cache_release(page);
4218 }
4219 return ret;
4220}
4221
4222static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
4223{
4224 if (PageWriteback(page) || PageDirty(page))
4225 return 0;
4226 return __btrfs_releasepage(page, gfp_flags);
4227}
4228
4229static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4230{
4231 struct extent_io_tree *tree;
4232 struct btrfs_ordered_extent *ordered;
4233 u64 page_start = page_offset(page);
4234 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
4235
4236 wait_on_page_writeback(page);
4237 tree = &BTRFS_I(page->mapping->host)->io_tree;
4238 if (offset) {
4239 btrfs_releasepage(page, GFP_NOFS);
4240 return;
4241 }
4242
4243 lock_extent(tree, page_start, page_end, GFP_NOFS);
4244 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
4245 page_offset(page));
4246 if (ordered) {
4247 /*
4248 * IO on this page will never be started, so we need
4249 * to account for any ordered extents now
4250 */
4251 clear_extent_bit(tree, page_start, page_end,
4252 EXTENT_DIRTY | EXTENT_DELALLOC |
4253 EXTENT_LOCKED, 1, 0, GFP_NOFS);
4254 btrfs_finish_ordered_io(page->mapping->host,
4255 page_start, page_end);
4256 btrfs_put_ordered_extent(ordered);
4257 lock_extent(tree, page_start, page_end, GFP_NOFS);
4258 }
4259 clear_extent_bit(tree, page_start, page_end,
4260 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4261 EXTENT_ORDERED,
4262 1, 1, GFP_NOFS);
4263 __btrfs_releasepage(page, GFP_NOFS);
4264
4265 ClearPageChecked(page);
4266 if (PagePrivate(page)) {
4267 ClearPagePrivate(page);
4268 set_page_private(page, 0);
4269 page_cache_release(page);
4270 }
4271}
4272
4273/*
4274 * btrfs_page_mkwrite() is not allowed to change the file size as it gets
4275 * called from a page fault handler when a page is first dirtied. Hence we must
4276 * be careful to check for EOF conditions here. We set the page up correctly
4277 * for a written page which means we get ENOSPC checking when writing into
4278 * holes and correct delalloc and unwritten extent mapping on filesystems that
4279 * support these features.
4280 *
4281 * We are not allowed to take the i_mutex here so we have to play games to
4282 * protect against truncate races as the page could now be beyond EOF. Because
4283 * vmtruncate() writes the inode size before removing pages, once we have the
4284 * page lock we can determine safely if the page is beyond EOF. If it is not
4285 * beyond EOF, then the page is guaranteed safe against truncation until we
4286 * unlock the page.
4287 */
4288int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4289{
4290 struct inode *inode = fdentry(vma->vm_file)->d_inode;
4291 struct btrfs_root *root = BTRFS_I(inode)->root;
4292 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4293 struct btrfs_ordered_extent *ordered;
4294 char *kaddr;
4295 unsigned long zero_start;
4296 loff_t size;
4297 int ret;
4298 u64 page_start;
4299 u64 page_end;
4300
4301 ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
4302 if (ret)
4303 goto out;
4304
4305 ret = -EINVAL;
4306again:
4307 lock_page(page);
4308 size = i_size_read(inode);
4309 page_start = page_offset(page);
4310 page_end = page_start + PAGE_CACHE_SIZE - 1;
4311
4312 if ((page->mapping != inode->i_mapping) ||
4313 (page_start >= size)) {
4314 /* page got truncated out from underneath us */
4315 goto out_unlock;
4316 }
4317 wait_on_page_writeback(page);
4318
4319 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
4320 set_page_extent_mapped(page);
4321
4322 /*
4323 * we can't set the delalloc bits if there are pending ordered
4324 * extents. Drop our locks and wait for them to finish
4325 */
4326 ordered = btrfs_lookup_ordered_extent(inode, page_start);
4327 if (ordered) {
4328 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4329 unlock_page(page);
4330 btrfs_start_ordered_extent(inode, ordered, 1);
4331 btrfs_put_ordered_extent(ordered);
4332 goto again;
4333 }
4334
4335 btrfs_set_extent_delalloc(inode, page_start, page_end);
4336 ret = 0;
4337
4338 /* page is wholly or partially inside EOF */
4339 if (page_start + PAGE_CACHE_SIZE > size)
4340 zero_start = size & ~PAGE_CACHE_MASK;
4341 else
4342 zero_start = PAGE_CACHE_SIZE;
4343
4344 if (zero_start != PAGE_CACHE_SIZE) {
4345 kaddr = kmap(page);
4346 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
4347 flush_dcache_page(page);
4348 kunmap(page);
4349 }
4350 ClearPageChecked(page);
4351 set_page_dirty(page);
4352 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4353
4354out_unlock:
4355 unlock_page(page);
4356out:
4357 return ret;
4358}
4359
4360static void btrfs_truncate(struct inode *inode)
4361{
4362 struct btrfs_root *root = BTRFS_I(inode)->root;
4363 int ret;
4364 struct btrfs_trans_handle *trans;
4365 unsigned long nr;
4366 u64 mask = root->sectorsize - 1;
4367
4368 if (!S_ISREG(inode->i_mode))
4369 return;
4370 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4371 return;
4372
4373 btrfs_truncate_page(inode->i_mapping, inode->i_size);
4374 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
4375
4376 trans = btrfs_start_transaction(root, 1);
4377 btrfs_set_trans_block_group(trans, inode);
4378 btrfs_i_size_write(inode, inode->i_size);
4379
4380 ret = btrfs_orphan_add(trans, inode);
4381 if (ret)
4382 goto out;
4383 /* FIXME, add redo link to tree so we don't leak on crash */
4384 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
4385 BTRFS_EXTENT_DATA_KEY);
4386 btrfs_update_inode(trans, root, inode);
4387
4388 ret = btrfs_orphan_del(trans, inode);
4389 BUG_ON(ret);
4390
4391out:
4392 nr = trans->blocks_used;
4393 ret = btrfs_end_transaction_throttle(trans, root);
4394 BUG_ON(ret);
4395 btrfs_btree_balance_dirty(root, nr);
4396}
4397
4398/*
4399 * create a new subvolume directory/inode (helper for the ioctl).
4400 */
4401int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
4402 struct btrfs_root *new_root, struct dentry *dentry,
4403 u64 new_dirid, u64 alloc_hint)
4404{
4405 struct inode *inode;
4406 int error;
4407 u64 index = 0;
4408
4409 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
4410 new_dirid, alloc_hint, S_IFDIR | 0700, &index);
4411 if (IS_ERR(inode))
4412 return PTR_ERR(inode);
4413 inode->i_op = &btrfs_dir_inode_operations;
4414 inode->i_fop = &btrfs_dir_file_operations;
4415
4416 inode->i_nlink = 1;
4417 btrfs_i_size_write(inode, 0);
4418
4419 error = btrfs_update_inode(trans, new_root, inode);
4420 if (error)
4421 return error;
4422
4423 d_instantiate(dentry, inode);
4424 return 0;
4425}
4426
4427/* helper function for file defrag and space balancing. This
4428 * forces readahead on a given range of bytes in an inode
4429 */
4430unsigned long btrfs_force_ra(struct address_space *mapping,
4431 struct file_ra_state *ra, struct file *file,
4432 pgoff_t offset, pgoff_t last_index)
4433{
4434 pgoff_t req_size = last_index - offset + 1;
4435
4436 page_cache_sync_readahead(mapping, ra, file, offset, req_size);
4437 return offset + req_size;
4438}
4439
4440struct inode *btrfs_alloc_inode(struct super_block *sb)
4441{
4442 struct btrfs_inode *ei;
4443
4444 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
4445 if (!ei)
4446 return NULL;
4447 ei->last_trans = 0;
4448 ei->logged_trans = 0;
4449 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
4450 ei->i_acl = BTRFS_ACL_NOT_CACHED;
4451 ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
4452 INIT_LIST_HEAD(&ei->i_orphan);
4453 return &ei->vfs_inode;
4454}
4455
4456void btrfs_destroy_inode(struct inode *inode)
4457{
4458 struct btrfs_ordered_extent *ordered;
4459 WARN_ON(!list_empty(&inode->i_dentry));
4460 WARN_ON(inode->i_data.nrpages);
4461
4462 if (BTRFS_I(inode)->i_acl &&
4463 BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
4464 posix_acl_release(BTRFS_I(inode)->i_acl);
4465 if (BTRFS_I(inode)->i_default_acl &&
4466 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
4467 posix_acl_release(BTRFS_I(inode)->i_default_acl);
4468
4469 spin_lock(&BTRFS_I(inode)->root->list_lock);
4470 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
4471 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
4472 " list\n", inode->i_ino);
4473 dump_stack();
4474 }
4475 spin_unlock(&BTRFS_I(inode)->root->list_lock);
4476
4477 while (1) {
4478 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
4479 if (!ordered)
4480 break;
4481 else {
4482 printk(KERN_ERR "btrfs found ordered "
4483 "extent %llu %llu on inode cleanup\n",
4484 (unsigned long long)ordered->file_offset,
4485 (unsigned long long)ordered->len);
4486 btrfs_remove_ordered_extent(inode, ordered);
4487 btrfs_put_ordered_extent(ordered);
4488 btrfs_put_ordered_extent(ordered);
4489 }
4490 }
4491 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
4492 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
4493}
4494
4495static void init_once(void *foo)
4496{
4497 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
4498
4499 inode_init_once(&ei->vfs_inode);
4500}
4501
4502void btrfs_destroy_cachep(void)
4503{
4504 if (btrfs_inode_cachep)
4505 kmem_cache_destroy(btrfs_inode_cachep);
4506 if (btrfs_trans_handle_cachep)
4507 kmem_cache_destroy(btrfs_trans_handle_cachep);
4508 if (btrfs_transaction_cachep)
4509 kmem_cache_destroy(btrfs_transaction_cachep);
4510 if (btrfs_bit_radix_cachep)
4511 kmem_cache_destroy(btrfs_bit_radix_cachep);
4512 if (btrfs_path_cachep)
4513 kmem_cache_destroy(btrfs_path_cachep);
4514}
4515
4516struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
4517 unsigned long extra_flags,
4518 void (*ctor)(void *))
4519{
4520 return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
4521 SLAB_MEM_SPREAD | extra_flags), ctor);
4522}
4523
4524int btrfs_init_cachep(void)
4525{
4526 btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
4527 sizeof(struct btrfs_inode),
4528 0, init_once);
4529 if (!btrfs_inode_cachep)
4530 goto fail;
4531 btrfs_trans_handle_cachep =
4532 btrfs_cache_create("btrfs_trans_handle_cache",
4533 sizeof(struct btrfs_trans_handle),
4534 0, NULL);
4535 if (!btrfs_trans_handle_cachep)
4536 goto fail;
4537 btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
4538 sizeof(struct btrfs_transaction),
4539 0, NULL);
4540 if (!btrfs_transaction_cachep)
4541 goto fail;
4542 btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
4543 sizeof(struct btrfs_path),
4544 0, NULL);
4545 if (!btrfs_path_cachep)
4546 goto fail;
4547 btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
4548 SLAB_DESTROY_BY_RCU, NULL);
4549 if (!btrfs_bit_radix_cachep)
4550 goto fail;
4551 return 0;
4552fail:
4553 btrfs_destroy_cachep();
4554 return -ENOMEM;
4555}
4556
4557static int btrfs_getattr(struct vfsmount *mnt,
4558 struct dentry *dentry, struct kstat *stat)
4559{
4560 struct inode *inode = dentry->d_inode;
4561 generic_fillattr(inode, stat);
4562 stat->dev = BTRFS_I(inode)->root->anon_super.s_dev;
4563 stat->blksize = PAGE_CACHE_SIZE;
4564 stat->blocks = (inode_get_bytes(inode) +
4565 BTRFS_I(inode)->delalloc_bytes) >> 9;
4566 return 0;
4567}
4568
4569static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4570 struct inode *new_dir, struct dentry *new_dentry)
4571{
4572 struct btrfs_trans_handle *trans;
4573 struct btrfs_root *root = BTRFS_I(old_dir)->root;
4574 struct inode *new_inode = new_dentry->d_inode;
4575 struct inode *old_inode = old_dentry->d_inode;
4576 struct timespec ctime = CURRENT_TIME;
4577 u64 index = 0;
4578 int ret;
4579
4580 /* we're not allowed to rename between subvolumes */
4581 if (BTRFS_I(old_inode)->root->root_key.objectid !=
4582 BTRFS_I(new_dir)->root->root_key.objectid)
4583 return -EXDEV;
4584
4585 if (S_ISDIR(old_inode->i_mode) && new_inode &&
4586 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
4587 return -ENOTEMPTY;
4588 }
4589
4590 /* to rename a snapshot or subvolume, we need to juggle the
4591 * backrefs. This isn't coded yet
4592 */
4593 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
4594 return -EXDEV;
4595
4596 ret = btrfs_check_free_space(root, 1, 0);
4597 if (ret)
4598 goto out_unlock;
4599
4600 trans = btrfs_start_transaction(root, 1);
4601
4602 btrfs_set_trans_block_group(trans, new_dir);
4603
4604 btrfs_inc_nlink(old_dentry->d_inode);
4605 old_dir->i_ctime = old_dir->i_mtime = ctime;
4606 new_dir->i_ctime = new_dir->i_mtime = ctime;
4607 old_inode->i_ctime = ctime;
4608
4609 ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
4610 old_dentry->d_name.name,
4611 old_dentry->d_name.len);
4612 if (ret)
4613 goto out_fail;
4614
4615 if (new_inode) {
4616 new_inode->i_ctime = CURRENT_TIME;
4617 ret = btrfs_unlink_inode(trans, root, new_dir,
4618 new_dentry->d_inode,
4619 new_dentry->d_name.name,
4620 new_dentry->d_name.len);
4621 if (ret)
4622 goto out_fail;
4623 if (new_inode->i_nlink == 0) {
4624 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
4625 if (ret)
4626 goto out_fail;
4627 }
4628
4629 }
4630 ret = btrfs_set_inode_index(new_dir, &index);
4631 if (ret)
4632 goto out_fail;
4633
4634 ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode,
4635 old_inode, new_dentry->d_name.name,
4636 new_dentry->d_name.len, 1, index);
4637 if (ret)
4638 goto out_fail;
4639
4640out_fail:
4641 btrfs_end_transaction_throttle(trans, root);
4642out_unlock:
4643 return ret;
4644}
4645
4646/*
4647 * some fairly slow code that needs optimization. This walks the list
4648 * of all the inodes with pending delalloc and forces them to disk.
4649 */
4650int btrfs_start_delalloc_inodes(struct btrfs_root *root)
4651{
4652 struct list_head *head = &root->fs_info->delalloc_inodes;
4653 struct btrfs_inode *binode;
4654 struct inode *inode;
4655
4656 if (root->fs_info->sb->s_flags & MS_RDONLY)
4657 return -EROFS;
4658
4659 spin_lock(&root->fs_info->delalloc_lock);
4660 while (!list_empty(head)) {
4661 binode = list_entry(head->next, struct btrfs_inode,
4662 delalloc_inodes);
4663 inode = igrab(&binode->vfs_inode);
4664 if (!inode)
4665 list_del_init(&binode->delalloc_inodes);
4666 spin_unlock(&root->fs_info->delalloc_lock);
4667 if (inode) {
4668 filemap_flush(inode->i_mapping);
4669 iput(inode);
4670 }
4671 cond_resched();
4672 spin_lock(&root->fs_info->delalloc_lock);
4673 }
4674 spin_unlock(&root->fs_info->delalloc_lock);
4675
4676 /* the filemap_flush will queue IO into the worker threads, but
4677 * we have to make sure the IO is actually started and that
4678 * ordered extents get created before we return
4679 */
4680 atomic_inc(&root->fs_info->async_submit_draining);
4681 while (atomic_read(&root->fs_info->nr_async_submits) ||
4682 atomic_read(&root->fs_info->async_delalloc_pages)) {
4683 wait_event(root->fs_info->async_submit_wait,
4684 (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
4685 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
4686 }
4687 atomic_dec(&root->fs_info->async_submit_draining);
4688 return 0;
4689}
4690
4691static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
4692 const char *symname)
4693{
4694 struct btrfs_trans_handle *trans;
4695 struct btrfs_root *root = BTRFS_I(dir)->root;
4696 struct btrfs_path *path;
4697 struct btrfs_key key;
4698 struct inode *inode = NULL;
4699 int err;
4700 int drop_inode = 0;
4701 u64 objectid;
4702 u64 index = 0 ;
4703 int name_len;
4704 int datasize;
4705 unsigned long ptr;
4706 struct btrfs_file_extent_item *ei;
4707 struct extent_buffer *leaf;
4708 unsigned long nr = 0;
4709
4710 name_len = strlen(symname) + 1;
4711 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
4712 return -ENAMETOOLONG;
4713
4714 err = btrfs_check_free_space(root, 1, 0);
4715 if (err)
4716 goto out_fail;
4717
4718 trans = btrfs_start_transaction(root, 1);
4719 btrfs_set_trans_block_group(trans, dir);
4720
4721 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4722 if (err) {
4723 err = -ENOSPC;
4724 goto out_unlock;
4725 }
4726
4727 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4728 dentry->d_name.len,
4729 dentry->d_parent->d_inode->i_ino, objectid,
4730 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
4731 &index);
4732 err = PTR_ERR(inode);
4733 if (IS_ERR(inode))
4734 goto out_unlock;
4735
4736 err = btrfs_init_acl(inode, dir);
4737 if (err) {
4738 drop_inode = 1;
4739 goto out_unlock;
4740 }
4741
4742 btrfs_set_trans_block_group(trans, inode);
4743 err = btrfs_add_nondir(trans, dentry, inode, 0, index);
4744 if (err)
4745 drop_inode = 1;
4746 else {
4747 inode->i_mapping->a_ops = &btrfs_aops;
4748 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
4749 inode->i_fop = &btrfs_file_operations;
4750 inode->i_op = &btrfs_file_inode_operations;
4751 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4752 }
4753 dir->i_sb->s_dirt = 1;
4754 btrfs_update_inode_block_group(trans, inode);
4755 btrfs_update_inode_block_group(trans, dir);
4756 if (drop_inode)
4757 goto out_unlock;
4758
4759 path = btrfs_alloc_path();
4760 BUG_ON(!path);
4761 key.objectid = inode->i_ino;
4762 key.offset = 0;
4763 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
4764 datasize = btrfs_file_extent_calc_inline_size(name_len);
4765 err = btrfs_insert_empty_item(trans, root, path, &key,
4766 datasize);
4767 if (err) {
4768 drop_inode = 1;
4769 goto out_unlock;
4770 }
4771 leaf = path->nodes[0];
4772 ei = btrfs_item_ptr(leaf, path->slots[0],
4773 struct btrfs_file_extent_item);
4774 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
4775 btrfs_set_file_extent_type(leaf, ei,
4776 BTRFS_FILE_EXTENT_INLINE);
4777 btrfs_set_file_extent_encryption(leaf, ei, 0);
4778 btrfs_set_file_extent_compression(leaf, ei, 0);
4779 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
4780 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
4781
4782 ptr = btrfs_file_extent_inline_start(ei);
4783 write_extent_buffer(leaf, symname, ptr, name_len);
4784 btrfs_mark_buffer_dirty(leaf);
4785 btrfs_free_path(path);
4786
4787 inode->i_op = &btrfs_symlink_inode_operations;
4788 inode->i_mapping->a_ops = &btrfs_symlink_aops;
4789 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
4790 inode_set_bytes(inode, name_len);
4791 btrfs_i_size_write(inode, name_len - 1);
4792 err = btrfs_update_inode(trans, root, inode);
4793 if (err)
4794 drop_inode = 1;
4795
4796out_unlock:
4797 nr = trans->blocks_used;
4798 btrfs_end_transaction_throttle(trans, root);
4799out_fail:
4800 if (drop_inode) {
4801 inode_dec_link_count(inode);
4802 iput(inode);
4803 }
4804 btrfs_btree_balance_dirty(root, nr);
4805 return err;
4806}
4807
4808static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
4809 u64 alloc_hint, int mode)
4810{
4811 struct btrfs_trans_handle *trans;
4812 struct btrfs_root *root = BTRFS_I(inode)->root;
4813 struct btrfs_key ins;
4814 u64 alloc_size;
4815 u64 cur_offset = start;
4816 u64 num_bytes = end - start;
4817 int ret = 0;
4818
4819 trans = btrfs_join_transaction(root, 1);
4820 BUG_ON(!trans);
4821 btrfs_set_trans_block_group(trans, inode);
4822
4823 while (num_bytes > 0) {
4824 alloc_size = min(num_bytes, root->fs_info->max_extent);
4825 ret = btrfs_reserve_extent(trans, root, alloc_size,
4826 root->sectorsize, 0, alloc_hint,
4827 (u64)-1, &ins, 1);
4828 if (ret) {
4829 WARN_ON(1);
4830 goto out;
4831 }
4832 ret = insert_reserved_file_extent(trans, inode,
4833 cur_offset, ins.objectid,
4834 ins.offset, ins.offset,
4835 ins.offset, 0, 0, 0,
4836 BTRFS_FILE_EXTENT_PREALLOC);
4837 BUG_ON(ret);
4838 num_bytes -= ins.offset;
4839 cur_offset += ins.offset;
4840 alloc_hint = ins.objectid + ins.offset;
4841 }
4842out:
4843 if (cur_offset > start) {
4844 inode->i_ctime = CURRENT_TIME;
4845 btrfs_set_flag(inode, PREALLOC);
4846 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4847 cur_offset > i_size_read(inode))
4848 btrfs_i_size_write(inode, cur_offset);
4849 ret = btrfs_update_inode(trans, root, inode);
4850 BUG_ON(ret);
4851 }
4852
4853 btrfs_end_transaction(trans, root);
4854 return ret;
4855}
4856
4857static long btrfs_fallocate(struct inode *inode, int mode,
4858 loff_t offset, loff_t len)
4859{
4860 u64 cur_offset;
4861 u64 last_byte;
4862 u64 alloc_start;
4863 u64 alloc_end;
4864 u64 alloc_hint = 0;
4865 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
4866 struct extent_map *em;
4867 int ret;
4868
4869 alloc_start = offset & ~mask;
4870 alloc_end = (offset + len + mask) & ~mask;
4871
4872 mutex_lock(&inode->i_mutex);
4873 if (alloc_start > inode->i_size) {
4874 ret = btrfs_cont_expand(inode, alloc_start);
4875 if (ret)
4876 goto out;
4877 }
4878
4879 while (1) {
4880 struct btrfs_ordered_extent *ordered;
4881 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start,
4882 alloc_end - 1, GFP_NOFS);
4883 ordered = btrfs_lookup_first_ordered_extent(inode,
4884 alloc_end - 1);
4885 if (ordered &&
4886 ordered->file_offset + ordered->len > alloc_start &&
4887 ordered->file_offset < alloc_end) {
4888 btrfs_put_ordered_extent(ordered);
4889 unlock_extent(&BTRFS_I(inode)->io_tree,
4890 alloc_start, alloc_end - 1, GFP_NOFS);
4891 btrfs_wait_ordered_range(inode, alloc_start,
4892 alloc_end - alloc_start);
4893 } else {
4894 if (ordered)
4895 btrfs_put_ordered_extent(ordered);
4896 break;
4897 }
4898 }
4899
4900 cur_offset = alloc_start;
4901 while (1) {
4902 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
4903 alloc_end - cur_offset, 0);
4904 BUG_ON(IS_ERR(em) || !em);
4905 last_byte = min(extent_map_end(em), alloc_end);
4906 last_byte = (last_byte + mask) & ~mask;
4907 if (em->block_start == EXTENT_MAP_HOLE) {
4908 ret = prealloc_file_range(inode, cur_offset,
4909 last_byte, alloc_hint, mode);
4910 if (ret < 0) {
4911 free_extent_map(em);
4912 break;
4913 }
4914 }
4915 if (em->block_start <= EXTENT_MAP_LAST_BYTE)
4916 alloc_hint = em->block_start;
4917 free_extent_map(em);
4918
4919 cur_offset = last_byte;
4920 if (cur_offset >= alloc_end) {
4921 ret = 0;
4922 break;
4923 }
4924 }
4925 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1,
4926 GFP_NOFS);
4927out:
4928 mutex_unlock(&inode->i_mutex);
4929 return ret;
4930}
4931
4932static int btrfs_set_page_dirty(struct page *page)
4933{
4934 return __set_page_dirty_nobuffers(page);
4935}
4936
4937static int btrfs_permission(struct inode *inode, int mask)
4938{
4939 if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
4940 return -EACCES;
4941 return generic_permission(inode, mask, btrfs_check_acl);
4942}
4943
4944static struct inode_operations btrfs_dir_inode_operations = {
4945 .getattr = btrfs_getattr,
4946 .lookup = btrfs_lookup,
4947 .create = btrfs_create,
4948 .unlink = btrfs_unlink,
4949 .link = btrfs_link,
4950 .mkdir = btrfs_mkdir,
4951 .rmdir = btrfs_rmdir,
4952 .rename = btrfs_rename,
4953 .symlink = btrfs_symlink,
4954 .setattr = btrfs_setattr,
4955 .mknod = btrfs_mknod,
4956 .setxattr = btrfs_setxattr,
4957 .getxattr = btrfs_getxattr,
4958 .listxattr = btrfs_listxattr,
4959 .removexattr = btrfs_removexattr,
4960 .permission = btrfs_permission,
4961};
4962static struct inode_operations btrfs_dir_ro_inode_operations = {
4963 .lookup = btrfs_lookup,
4964 .permission = btrfs_permission,
4965};
4966static struct file_operations btrfs_dir_file_operations = {
4967 .llseek = generic_file_llseek,
4968 .read = generic_read_dir,
4969 .readdir = btrfs_real_readdir,
4970 .unlocked_ioctl = btrfs_ioctl,
4971#ifdef CONFIG_COMPAT
4972 .compat_ioctl = btrfs_ioctl,
4973#endif
4974 .release = btrfs_release_file,
4975 .fsync = btrfs_sync_file,
4976};
4977
4978static struct extent_io_ops btrfs_extent_io_ops = {
4979 .fill_delalloc = run_delalloc_range,
4980 .submit_bio_hook = btrfs_submit_bio_hook,
4981 .merge_bio_hook = btrfs_merge_bio_hook,
4982 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
4983 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
4984 .writepage_start_hook = btrfs_writepage_start_hook,
4985 .readpage_io_failed_hook = btrfs_io_failed_hook,
4986 .set_bit_hook = btrfs_set_bit_hook,
4987 .clear_bit_hook = btrfs_clear_bit_hook,
4988};
4989
4990static struct address_space_operations btrfs_aops = {
4991 .readpage = btrfs_readpage,
4992 .writepage = btrfs_writepage,
4993 .writepages = btrfs_writepages,
4994 .readpages = btrfs_readpages,
4995 .sync_page = block_sync_page,
4996 .bmap = btrfs_bmap,
4997 .direct_IO = btrfs_direct_IO,
4998 .invalidatepage = btrfs_invalidatepage,
4999 .releasepage = btrfs_releasepage,
5000 .set_page_dirty = btrfs_set_page_dirty,
5001};
5002
5003static struct address_space_operations btrfs_symlink_aops = {
5004 .readpage = btrfs_readpage,
5005 .writepage = btrfs_writepage,
5006 .invalidatepage = btrfs_invalidatepage,
5007 .releasepage = btrfs_releasepage,
5008};
5009
5010static struct inode_operations btrfs_file_inode_operations = {
5011 .truncate = btrfs_truncate,
5012 .getattr = btrfs_getattr,
5013 .setattr = btrfs_setattr,
5014 .setxattr = btrfs_setxattr,
5015 .getxattr = btrfs_getxattr,
5016 .listxattr = btrfs_listxattr,
5017 .removexattr = btrfs_removexattr,
5018 .permission = btrfs_permission,
5019 .fallocate = btrfs_fallocate,
5020};
5021static struct inode_operations btrfs_special_inode_operations = {
5022 .getattr = btrfs_getattr,
5023 .setattr = btrfs_setattr,
5024 .permission = btrfs_permission,
5025 .setxattr = btrfs_setxattr,
5026 .getxattr = btrfs_getxattr,
5027 .listxattr = btrfs_listxattr,
5028 .removexattr = btrfs_removexattr,
5029};
5030static struct inode_operations btrfs_symlink_inode_operations = {
5031 .readlink = generic_readlink,
5032 .follow_link = page_follow_link_light,
5033 .put_link = page_put_link,
5034 .permission = btrfs_permission,
5035};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
new file mode 100644
index 000000000000..c2aa33e3feb5
--- /dev/null
+++ b/fs/btrfs/ioctl.c
@@ -0,0 +1,1132 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/kernel.h>
20#include <linux/bio.h>
21#include <linux/buffer_head.h>
22#include <linux/file.h>
23#include <linux/fs.h>
24#include <linux/fsnotify.h>
25#include <linux/pagemap.h>
26#include <linux/highmem.h>
27#include <linux/time.h>
28#include <linux/init.h>
29#include <linux/string.h>
30#include <linux/smp_lock.h>
31#include <linux/backing-dev.h>
32#include <linux/mount.h>
33#include <linux/mpage.h>
34#include <linux/namei.h>
35#include <linux/swap.h>
36#include <linux/writeback.h>
37#include <linux/statfs.h>
38#include <linux/compat.h>
39#include <linux/bit_spinlock.h>
40#include <linux/security.h>
41#include <linux/version.h>
42#include <linux/xattr.h>
43#include <linux/vmalloc.h>
44#include "compat.h"
45#include "ctree.h"
46#include "disk-io.h"
47#include "transaction.h"
48#include "btrfs_inode.h"
49#include "ioctl.h"
50#include "print-tree.h"
51#include "volumes.h"
52#include "locking.h"
53
54
55
56static noinline int create_subvol(struct btrfs_root *root,
57 struct dentry *dentry,
58 char *name, int namelen)
59{
60 struct btrfs_trans_handle *trans;
61 struct btrfs_key key;
62 struct btrfs_root_item root_item;
63 struct btrfs_inode_item *inode_item;
64 struct extent_buffer *leaf;
65 struct btrfs_root *new_root = root;
66 struct inode *dir;
67 int ret;
68 int err;
69 u64 objectid;
70 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
71 u64 index = 0;
72 unsigned long nr = 1;
73
74 ret = btrfs_check_free_space(root, 1, 0);
75 if (ret)
76 goto fail_commit;
77
78 trans = btrfs_start_transaction(root, 1);
79 BUG_ON(!trans);
80
81 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
82 0, &objectid);
83 if (ret)
84 goto fail;
85
86 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
87 objectid, trans->transid, 0, 0, 0);
88 if (IS_ERR(leaf)) {
89 ret = PTR_ERR(leaf);
90 goto fail;
91 }
92
93 btrfs_set_header_nritems(leaf, 0);
94 btrfs_set_header_level(leaf, 0);
95 btrfs_set_header_bytenr(leaf, leaf->start);
96 btrfs_set_header_generation(leaf, trans->transid);
97 btrfs_set_header_owner(leaf, objectid);
98
99 write_extent_buffer(leaf, root->fs_info->fsid,
100 (unsigned long)btrfs_header_fsid(leaf),
101 BTRFS_FSID_SIZE);
102 btrfs_mark_buffer_dirty(leaf);
103
104 inode_item = &root_item.inode;
105 memset(inode_item, 0, sizeof(*inode_item));
106 inode_item->generation = cpu_to_le64(1);
107 inode_item->size = cpu_to_le64(3);
108 inode_item->nlink = cpu_to_le32(1);
109 inode_item->nbytes = cpu_to_le64(root->leafsize);
110 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
111
112 btrfs_set_root_bytenr(&root_item, leaf->start);
113 btrfs_set_root_generation(&root_item, trans->transid);
114 btrfs_set_root_level(&root_item, 0);
115 btrfs_set_root_refs(&root_item, 1);
116 btrfs_set_root_used(&root_item, 0);
117 btrfs_set_root_last_snapshot(&root_item, 0);
118
119 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
120 root_item.drop_level = 0;
121
122 btrfs_tree_unlock(leaf);
123 free_extent_buffer(leaf);
124 leaf = NULL;
125
126 btrfs_set_root_dirid(&root_item, new_dirid);
127
128 key.objectid = objectid;
129 key.offset = 1;
130 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
131 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
132 &root_item);
133 if (ret)
134 goto fail;
135
136 /*
137 * insert the directory item
138 */
139 key.offset = (u64)-1;
140 dir = dentry->d_parent->d_inode;
141 ret = btrfs_set_inode_index(dir, &index);
142 BUG_ON(ret);
143
144 ret = btrfs_insert_dir_item(trans, root,
145 name, namelen, dir->i_ino, &key,
146 BTRFS_FT_DIR, index);
147 if (ret)
148 goto fail;
149
150 btrfs_i_size_write(dir, dir->i_size + namelen * 2);
151 ret = btrfs_update_inode(trans, root, dir);
152 BUG_ON(ret);
153
154 /* add the backref first */
155 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
156 objectid, BTRFS_ROOT_BACKREF_KEY,
157 root->root_key.objectid,
158 dir->i_ino, index, name, namelen);
159
160 BUG_ON(ret);
161
162 /* now add the forward ref */
163 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
164 root->root_key.objectid, BTRFS_ROOT_REF_KEY,
165 objectid,
166 dir->i_ino, index, name, namelen);
167
168 BUG_ON(ret);
169
170 ret = btrfs_commit_transaction(trans, root);
171 if (ret)
172 goto fail_commit;
173
174 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
175 BUG_ON(!new_root);
176
177 trans = btrfs_start_transaction(new_root, 1);
178 BUG_ON(!trans);
179
180 ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid,
181 BTRFS_I(dir)->block_group);
182 if (ret)
183 goto fail;
184
185fail:
186 nr = trans->blocks_used;
187 err = btrfs_commit_transaction(trans, new_root);
188 if (err && !ret)
189 ret = err;
190fail_commit:
191 btrfs_btree_balance_dirty(root, nr);
192 return ret;
193}
194
195static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
196 char *name, int namelen)
197{
198 struct btrfs_pending_snapshot *pending_snapshot;
199 struct btrfs_trans_handle *trans;
200 int ret = 0;
201 int err;
202 unsigned long nr = 0;
203
204 if (!root->ref_cows)
205 return -EINVAL;
206
207 ret = btrfs_check_free_space(root, 1, 0);
208 if (ret)
209 goto fail_unlock;
210
211 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
212 if (!pending_snapshot) {
213 ret = -ENOMEM;
214 goto fail_unlock;
215 }
216 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
217 if (!pending_snapshot->name) {
218 ret = -ENOMEM;
219 kfree(pending_snapshot);
220 goto fail_unlock;
221 }
222 memcpy(pending_snapshot->name, name, namelen);
223 pending_snapshot->name[namelen] = '\0';
224 pending_snapshot->dentry = dentry;
225 trans = btrfs_start_transaction(root, 1);
226 BUG_ON(!trans);
227 pending_snapshot->root = root;
228 list_add(&pending_snapshot->list,
229 &trans->transaction->pending_snapshots);
230 err = btrfs_commit_transaction(trans, root);
231
232fail_unlock:
233 btrfs_btree_balance_dirty(root, nr);
234 return ret;
235}
236
237/* copy of may_create in fs/namei.c() */
238static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
239{
240 if (child->d_inode)
241 return -EEXIST;
242 if (IS_DEADDIR(dir))
243 return -ENOENT;
244 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
245}
246
247/*
248 * Create a new subvolume below @parent. This is largely modeled after
249 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
250 * inside this filesystem so it's quite a bit simpler.
251 */
252static noinline int btrfs_mksubvol(struct path *parent, char *name,
253 int mode, int namelen,
254 struct btrfs_root *snap_src)
255{
256 struct dentry *dentry;
257 int error;
258
259 mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
260
261 dentry = lookup_one_len(name, parent->dentry, namelen);
262 error = PTR_ERR(dentry);
263 if (IS_ERR(dentry))
264 goto out_unlock;
265
266 error = -EEXIST;
267 if (dentry->d_inode)
268 goto out_dput;
269
270 if (!IS_POSIXACL(parent->dentry->d_inode))
271 mode &= ~current->fs->umask;
272
273 error = mnt_want_write(parent->mnt);
274 if (error)
275 goto out_dput;
276
277 error = btrfs_may_create(parent->dentry->d_inode, dentry);
278 if (error)
279 goto out_drop_write;
280
281 /*
282 * Actually perform the low-level subvolume creation after all
283 * this VFS fuzz.
284 *
285 * Eventually we want to pass in an inode under which we create this
286 * subvolume, but for now all are under the filesystem root.
287 *
288 * Also we should pass on the mode eventually to allow creating new
289 * subvolume with specific mode bits.
290 */
291 if (snap_src) {
292 struct dentry *dir = dentry->d_parent;
293 struct dentry *test = dir->d_parent;
294 struct btrfs_path *path = btrfs_alloc_path();
295 int ret;
296 u64 test_oid;
297 u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid;
298
299 test_oid = snap_src->root_key.objectid;
300
301 ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
302 path, parent_oid, test_oid);
303 if (ret == 0)
304 goto create;
305 btrfs_release_path(snap_src->fs_info->tree_root, path);
306
307 /* we need to make sure we aren't creating a directory loop
308 * by taking a snapshot of something that has our current
309 * subvol in its directory tree. So, this loops through
310 * the dentries and checks the forward refs for each subvolume
311 * to see if is references the subvolume where we are
312 * placing this new snapshot.
313 */
314 while (1) {
315 if (!test ||
316 dir == snap_src->fs_info->sb->s_root ||
317 test == snap_src->fs_info->sb->s_root ||
318 test->d_inode->i_sb != snap_src->fs_info->sb) {
319 break;
320 }
321 if (S_ISLNK(test->d_inode->i_mode)) {
322 printk(KERN_INFO "Btrfs symlink in snapshot "
323 "path, failed\n");
324 error = -EMLINK;
325 btrfs_free_path(path);
326 goto out_drop_write;
327 }
328 test_oid =
329 BTRFS_I(test->d_inode)->root->root_key.objectid;
330 ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
331 path, test_oid, parent_oid);
332 if (ret == 0) {
333 printk(KERN_INFO "Btrfs snapshot creation "
334 "failed, looping\n");
335 error = -EMLINK;
336 btrfs_free_path(path);
337 goto out_drop_write;
338 }
339 btrfs_release_path(snap_src->fs_info->tree_root, path);
340 test = test->d_parent;
341 }
342create:
343 btrfs_free_path(path);
344 error = create_snapshot(snap_src, dentry, name, namelen);
345 } else {
346 error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root,
347 dentry, name, namelen);
348 }
349 if (error)
350 goto out_drop_write;
351
352 fsnotify_mkdir(parent->dentry->d_inode, dentry);
353out_drop_write:
354 mnt_drop_write(parent->mnt);
355out_dput:
356 dput(dentry);
357out_unlock:
358 mutex_unlock(&parent->dentry->d_inode->i_mutex);
359 return error;
360}
361
362
363static int btrfs_defrag_file(struct file *file)
364{
365 struct inode *inode = fdentry(file)->d_inode;
366 struct btrfs_root *root = BTRFS_I(inode)->root;
367 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
368 struct btrfs_ordered_extent *ordered;
369 struct page *page;
370 unsigned long last_index;
371 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
372 unsigned long total_read = 0;
373 u64 page_start;
374 u64 page_end;
375 unsigned long i;
376 int ret;
377
378 ret = btrfs_check_free_space(root, inode->i_size, 0);
379 if (ret)
380 return -ENOSPC;
381
382 mutex_lock(&inode->i_mutex);
383 last_index = inode->i_size >> PAGE_CACHE_SHIFT;
384 for (i = 0; i <= last_index; i++) {
385 if (total_read % ra_pages == 0) {
386 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
387 min(last_index, i + ra_pages - 1));
388 }
389 total_read++;
390again:
391 page = grab_cache_page(inode->i_mapping, i);
392 if (!page)
393 goto out_unlock;
394 if (!PageUptodate(page)) {
395 btrfs_readpage(NULL, page);
396 lock_page(page);
397 if (!PageUptodate(page)) {
398 unlock_page(page);
399 page_cache_release(page);
400 goto out_unlock;
401 }
402 }
403
404 wait_on_page_writeback(page);
405
406 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
407 page_end = page_start + PAGE_CACHE_SIZE - 1;
408 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
409
410 ordered = btrfs_lookup_ordered_extent(inode, page_start);
411 if (ordered) {
412 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
413 unlock_page(page);
414 page_cache_release(page);
415 btrfs_start_ordered_extent(inode, ordered, 1);
416 btrfs_put_ordered_extent(ordered);
417 goto again;
418 }
419 set_page_extent_mapped(page);
420
421 /*
422 * this makes sure page_mkwrite is called on the
423 * page if it is dirtied again later
424 */
425 clear_page_dirty_for_io(page);
426
427 btrfs_set_extent_delalloc(inode, page_start, page_end);
428
429 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
430 set_page_dirty(page);
431 unlock_page(page);
432 page_cache_release(page);
433 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
434 }
435
436out_unlock:
437 mutex_unlock(&inode->i_mutex);
438 return 0;
439}
440
441/*
442 * Called inside transaction, so use GFP_NOFS
443 */
444
445static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
446{
447 u64 new_size;
448 u64 old_size;
449 u64 devid = 1;
450 struct btrfs_ioctl_vol_args *vol_args;
451 struct btrfs_trans_handle *trans;
452 struct btrfs_device *device = NULL;
453 char *sizestr;
454 char *devstr = NULL;
455 int ret = 0;
456 int namelen;
457 int mod = 0;
458
459 if (root->fs_info->sb->s_flags & MS_RDONLY)
460 return -EROFS;
461
462 if (!capable(CAP_SYS_ADMIN))
463 return -EPERM;
464
465 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
466
467 if (!vol_args)
468 return -ENOMEM;
469
470 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
471 ret = -EFAULT;
472 goto out;
473 }
474
475 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
476 namelen = strlen(vol_args->name);
477
478 mutex_lock(&root->fs_info->volume_mutex);
479 sizestr = vol_args->name;
480 devstr = strchr(sizestr, ':');
481 if (devstr) {
482 char *end;
483 sizestr = devstr + 1;
484 *devstr = '\0';
485 devstr = vol_args->name;
486 devid = simple_strtoull(devstr, &end, 10);
487 printk(KERN_INFO "resizing devid %llu\n", devid);
488 }
489 device = btrfs_find_device(root, devid, NULL, NULL);
490 if (!device) {
491 printk(KERN_INFO "resizer unable to find device %llu\n", devid);
492 ret = -EINVAL;
493 goto out_unlock;
494 }
495 if (!strcmp(sizestr, "max"))
496 new_size = device->bdev->bd_inode->i_size;
497 else {
498 if (sizestr[0] == '-') {
499 mod = -1;
500 sizestr++;
501 } else if (sizestr[0] == '+') {
502 mod = 1;
503 sizestr++;
504 }
505 new_size = btrfs_parse_size(sizestr);
506 if (new_size == 0) {
507 ret = -EINVAL;
508 goto out_unlock;
509 }
510 }
511
512 old_size = device->total_bytes;
513
514 if (mod < 0) {
515 if (new_size > old_size) {
516 ret = -EINVAL;
517 goto out_unlock;
518 }
519 new_size = old_size - new_size;
520 } else if (mod > 0) {
521 new_size = old_size + new_size;
522 }
523
524 if (new_size < 256 * 1024 * 1024) {
525 ret = -EINVAL;
526 goto out_unlock;
527 }
528 if (new_size > device->bdev->bd_inode->i_size) {
529 ret = -EFBIG;
530 goto out_unlock;
531 }
532
533 do_div(new_size, root->sectorsize);
534 new_size *= root->sectorsize;
535
536 printk(KERN_INFO "new size for %s is %llu\n",
537 device->name, (unsigned long long)new_size);
538
539 if (new_size > old_size) {
540 trans = btrfs_start_transaction(root, 1);
541 ret = btrfs_grow_device(trans, device, new_size);
542 btrfs_commit_transaction(trans, root);
543 } else {
544 ret = btrfs_shrink_device(device, new_size);
545 }
546
547out_unlock:
548 mutex_unlock(&root->fs_info->volume_mutex);
549out:
550 kfree(vol_args);
551 return ret;
552}
553
554static noinline int btrfs_ioctl_snap_create(struct file *file,
555 void __user *arg, int subvol)
556{
557 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
558 struct btrfs_ioctl_vol_args *vol_args;
559 struct btrfs_dir_item *di;
560 struct btrfs_path *path;
561 struct file *src_file;
562 u64 root_dirid;
563 int namelen;
564 int ret = 0;
565
566 if (root->fs_info->sb->s_flags & MS_RDONLY)
567 return -EROFS;
568
569 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
570
571 if (!vol_args)
572 return -ENOMEM;
573
574 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
575 ret = -EFAULT;
576 goto out;
577 }
578
579 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
580 namelen = strlen(vol_args->name);
581 if (strchr(vol_args->name, '/')) {
582 ret = -EINVAL;
583 goto out;
584 }
585
586 path = btrfs_alloc_path();
587 if (!path) {
588 ret = -ENOMEM;
589 goto out;
590 }
591
592 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
593 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
594 path, root_dirid,
595 vol_args->name, namelen, 0);
596 btrfs_free_path(path);
597
598 if (di && !IS_ERR(di)) {
599 ret = -EEXIST;
600 goto out;
601 }
602
603 if (IS_ERR(di)) {
604 ret = PTR_ERR(di);
605 goto out;
606 }
607
608 if (subvol) {
609 ret = btrfs_mksubvol(&file->f_path, vol_args->name,
610 file->f_path.dentry->d_inode->i_mode,
611 namelen, NULL);
612 } else {
613 struct inode *src_inode;
614 src_file = fget(vol_args->fd);
615 if (!src_file) {
616 ret = -EINVAL;
617 goto out;
618 }
619
620 src_inode = src_file->f_path.dentry->d_inode;
621 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
622 printk(KERN_INFO "btrfs: Snapshot src from "
623 "another FS\n");
624 ret = -EINVAL;
625 fput(src_file);
626 goto out;
627 }
628 ret = btrfs_mksubvol(&file->f_path, vol_args->name,
629 file->f_path.dentry->d_inode->i_mode,
630 namelen, BTRFS_I(src_inode)->root);
631 fput(src_file);
632 }
633
634out:
635 kfree(vol_args);
636 return ret;
637}
638
639static int btrfs_ioctl_defrag(struct file *file)
640{
641 struct inode *inode = fdentry(file)->d_inode;
642 struct btrfs_root *root = BTRFS_I(inode)->root;
643 int ret;
644
645 ret = mnt_want_write(file->f_path.mnt);
646 if (ret)
647 return ret;
648
649 switch (inode->i_mode & S_IFMT) {
650 case S_IFDIR:
651 if (!capable(CAP_SYS_ADMIN)) {
652 ret = -EPERM;
653 goto out;
654 }
655 btrfs_defrag_root(root, 0);
656 btrfs_defrag_root(root->fs_info->extent_root, 0);
657 break;
658 case S_IFREG:
659 if (!(file->f_mode & FMODE_WRITE)) {
660 ret = -EINVAL;
661 goto out;
662 }
663 btrfs_defrag_file(file);
664 break;
665 }
666out:
667 mnt_drop_write(file->f_path.mnt);
668 return ret;
669}
670
671static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
672{
673 struct btrfs_ioctl_vol_args *vol_args;
674 int ret;
675
676 if (!capable(CAP_SYS_ADMIN))
677 return -EPERM;
678
679 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
680
681 if (!vol_args)
682 return -ENOMEM;
683
684 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
685 ret = -EFAULT;
686 goto out;
687 }
688 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
689 ret = btrfs_init_new_device(root, vol_args->name);
690
691out:
692 kfree(vol_args);
693 return ret;
694}
695
696static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
697{
698 struct btrfs_ioctl_vol_args *vol_args;
699 int ret;
700
701 if (!capable(CAP_SYS_ADMIN))
702 return -EPERM;
703
704 if (root->fs_info->sb->s_flags & MS_RDONLY)
705 return -EROFS;
706
707 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
708
709 if (!vol_args)
710 return -ENOMEM;
711
712 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
713 ret = -EFAULT;
714 goto out;
715 }
716 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
717 ret = btrfs_rm_device(root, vol_args->name);
718
719out:
720 kfree(vol_args);
721 return ret;
722}
723
724static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
725 u64 off, u64 olen, u64 destoff)
726{
727 struct inode *inode = fdentry(file)->d_inode;
728 struct btrfs_root *root = BTRFS_I(inode)->root;
729 struct file *src_file;
730 struct inode *src;
731 struct btrfs_trans_handle *trans;
732 struct btrfs_path *path;
733 struct extent_buffer *leaf;
734 char *buf;
735 struct btrfs_key key;
736 u32 nritems;
737 int slot;
738 int ret;
739 u64 len = olen;
740 u64 bs = root->fs_info->sb->s_blocksize;
741 u64 hint_byte;
742
743 /*
744 * TODO:
745 * - split compressed inline extents. annoying: we need to
746 * decompress into destination's address_space (the file offset
747 * may change, so source mapping won't do), then recompress (or
748 * otherwise reinsert) a subrange.
749 * - allow ranges within the same file to be cloned (provided
750 * they don't overlap)?
751 */
752
753 /* the destination must be opened for writing */
754 if (!(file->f_mode & FMODE_WRITE))
755 return -EINVAL;
756
757 ret = mnt_want_write(file->f_path.mnt);
758 if (ret)
759 return ret;
760
761 src_file = fget(srcfd);
762 if (!src_file) {
763 ret = -EBADF;
764 goto out_drop_write;
765 }
766 src = src_file->f_dentry->d_inode;
767
768 ret = -EINVAL;
769 if (src == inode)
770 goto out_fput;
771
772 ret = -EISDIR;
773 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
774 goto out_fput;
775
776 ret = -EXDEV;
777 if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
778 goto out_fput;
779
780 ret = -ENOMEM;
781 buf = vmalloc(btrfs_level_size(root, 0));
782 if (!buf)
783 goto out_fput;
784
785 path = btrfs_alloc_path();
786 if (!path) {
787 vfree(buf);
788 goto out_fput;
789 }
790 path->reada = 2;
791
792 if (inode < src) {
793 mutex_lock(&inode->i_mutex);
794 mutex_lock(&src->i_mutex);
795 } else {
796 mutex_lock(&src->i_mutex);
797 mutex_lock(&inode->i_mutex);
798 }
799
800 /* determine range to clone */
801 ret = -EINVAL;
802 if (off >= src->i_size || off + len > src->i_size)
803 goto out_unlock;
804 if (len == 0)
805 olen = len = src->i_size - off;
806 /* if we extend to eof, continue to block boundary */
807 if (off + len == src->i_size)
808 len = ((src->i_size + bs-1) & ~(bs-1))
809 - off;
810
811 /* verify the end result is block aligned */
812 if ((off & (bs-1)) ||
813 ((off + len) & (bs-1)))
814 goto out_unlock;
815
816 /* do any pending delalloc/csum calc on src, one way or
817 another, and lock file content */
818 while (1) {
819 struct btrfs_ordered_extent *ordered;
820 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
821 ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
822 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
823 break;
824 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
825 if (ordered)
826 btrfs_put_ordered_extent(ordered);
827 btrfs_wait_ordered_range(src, off, off+len);
828 }
829
830 trans = btrfs_start_transaction(root, 1);
831 BUG_ON(!trans);
832
833 /* punch hole in destination first */
834 btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte);
835
836 /* clone data */
837 key.objectid = src->i_ino;
838 key.type = BTRFS_EXTENT_DATA_KEY;
839 key.offset = 0;
840
841 while (1) {
842 /*
843 * note the key will change type as we walk through the
844 * tree.
845 */
846 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
847 if (ret < 0)
848 goto out;
849
850 nritems = btrfs_header_nritems(path->nodes[0]);
851 if (path->slots[0] >= nritems) {
852 ret = btrfs_next_leaf(root, path);
853 if (ret < 0)
854 goto out;
855 if (ret > 0)
856 break;
857 nritems = btrfs_header_nritems(path->nodes[0]);
858 }
859 leaf = path->nodes[0];
860 slot = path->slots[0];
861
862 btrfs_item_key_to_cpu(leaf, &key, slot);
863 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
864 key.objectid != src->i_ino)
865 break;
866
867 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
868 struct btrfs_file_extent_item *extent;
869 int type;
870 u32 size;
871 struct btrfs_key new_key;
872 u64 disko = 0, diskl = 0;
873 u64 datao = 0, datal = 0;
874 u8 comp;
875
876 size = btrfs_item_size_nr(leaf, slot);
877 read_extent_buffer(leaf, buf,
878 btrfs_item_ptr_offset(leaf, slot),
879 size);
880
881 extent = btrfs_item_ptr(leaf, slot,
882 struct btrfs_file_extent_item);
883 comp = btrfs_file_extent_compression(leaf, extent);
884 type = btrfs_file_extent_type(leaf, extent);
885 if (type == BTRFS_FILE_EXTENT_REG) {
886 disko = btrfs_file_extent_disk_bytenr(leaf,
887 extent);
888 diskl = btrfs_file_extent_disk_num_bytes(leaf,
889 extent);
890 datao = btrfs_file_extent_offset(leaf, extent);
891 datal = btrfs_file_extent_num_bytes(leaf,
892 extent);
893 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
894 /* take upper bound, may be compressed */
895 datal = btrfs_file_extent_ram_bytes(leaf,
896 extent);
897 }
898 btrfs_release_path(root, path);
899
900 if (key.offset + datal < off ||
901 key.offset >= off+len)
902 goto next;
903
904 memcpy(&new_key, &key, sizeof(new_key));
905 new_key.objectid = inode->i_ino;
906 new_key.offset = key.offset + destoff - off;
907
908 if (type == BTRFS_FILE_EXTENT_REG) {
909 ret = btrfs_insert_empty_item(trans, root, path,
910 &new_key, size);
911 if (ret)
912 goto out;
913
914 leaf = path->nodes[0];
915 slot = path->slots[0];
916 write_extent_buffer(leaf, buf,
917 btrfs_item_ptr_offset(leaf, slot),
918 size);
919
920 extent = btrfs_item_ptr(leaf, slot,
921 struct btrfs_file_extent_item);
922
923 if (off > key.offset) {
924 datao += off - key.offset;
925 datal -= off - key.offset;
926 }
927 if (key.offset + datao + datal + key.offset >
928 off + len)
929 datal = off + len - key.offset - datao;
930 /* disko == 0 means it's a hole */
931 if (!disko)
932 datao = 0;
933
934 btrfs_set_file_extent_offset(leaf, extent,
935 datao);
936 btrfs_set_file_extent_num_bytes(leaf, extent,
937 datal);
938 if (disko) {
939 inode_add_bytes(inode, datal);
940 ret = btrfs_inc_extent_ref(trans, root,
941 disko, diskl, leaf->start,
942 root->root_key.objectid,
943 trans->transid,
944 inode->i_ino);
945 BUG_ON(ret);
946 }
947 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
948 u64 skip = 0;
949 u64 trim = 0;
950 if (off > key.offset) {
951 skip = off - key.offset;
952 new_key.offset += skip;
953 }
954
955 if (key.offset + datal > off+len)
956 trim = key.offset + datal - (off+len);
957
958 if (comp && (skip || trim)) {
959 ret = -EINVAL;
960 goto out;
961 }
962 size -= skip + trim;
963 datal -= skip + trim;
964 ret = btrfs_insert_empty_item(trans, root, path,
965 &new_key, size);
966 if (ret)
967 goto out;
968
969 if (skip) {
970 u32 start =
971 btrfs_file_extent_calc_inline_size(0);
972 memmove(buf+start, buf+start+skip,
973 datal);
974 }
975
976 leaf = path->nodes[0];
977 slot = path->slots[0];
978 write_extent_buffer(leaf, buf,
979 btrfs_item_ptr_offset(leaf, slot),
980 size);
981 inode_add_bytes(inode, datal);
982 }
983
984 btrfs_mark_buffer_dirty(leaf);
985 }
986
987next:
988 btrfs_release_path(root, path);
989 key.offset++;
990 }
991 ret = 0;
992out:
993 btrfs_release_path(root, path);
994 if (ret == 0) {
995 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
996 if (destoff + olen > inode->i_size)
997 btrfs_i_size_write(inode, destoff + olen);
998 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
999 ret = btrfs_update_inode(trans, root, inode);
1000 }
1001 btrfs_end_transaction(trans, root);
1002 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1003 if (ret)
1004 vmtruncate(inode, 0);
1005out_unlock:
1006 mutex_unlock(&src->i_mutex);
1007 mutex_unlock(&inode->i_mutex);
1008 vfree(buf);
1009 btrfs_free_path(path);
1010out_fput:
1011 fput(src_file);
1012out_drop_write:
1013 mnt_drop_write(file->f_path.mnt);
1014 return ret;
1015}
1016
1017static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
1018{
1019 struct btrfs_ioctl_clone_range_args args;
1020
1021 if (copy_from_user(&args, argp, sizeof(args)))
1022 return -EFAULT;
1023 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
1024 args.src_length, args.dest_offset);
1025}
1026
1027/*
1028 * there are many ways the trans_start and trans_end ioctls can lead
1029 * to deadlocks. They should only be used by applications that
1030 * basically own the machine, and have a very in depth understanding
1031 * of all the possible deadlocks and enospc problems.
1032 */
1033static long btrfs_ioctl_trans_start(struct file *file)
1034{
1035 struct inode *inode = fdentry(file)->d_inode;
1036 struct btrfs_root *root = BTRFS_I(inode)->root;
1037 struct btrfs_trans_handle *trans;
1038 int ret = 0;
1039
1040 if (!capable(CAP_SYS_ADMIN))
1041 return -EPERM;
1042
1043 if (file->private_data) {
1044 ret = -EINPROGRESS;
1045 goto out;
1046 }
1047
1048 ret = mnt_want_write(file->f_path.mnt);
1049 if (ret)
1050 goto out;
1051
1052 mutex_lock(&root->fs_info->trans_mutex);
1053 root->fs_info->open_ioctl_trans++;
1054 mutex_unlock(&root->fs_info->trans_mutex);
1055
1056 trans = btrfs_start_ioctl_transaction(root, 0);
1057 if (trans)
1058 file->private_data = trans;
1059 else
1060 ret = -ENOMEM;
1061 /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/
1062out:
1063 return ret;
1064}
1065
1066/*
1067 * there are many ways the trans_start and trans_end ioctls can lead
1068 * to deadlocks. They should only be used by applications that
1069 * basically own the machine, and have a very in depth understanding
1070 * of all the possible deadlocks and enospc problems.
1071 */
1072long btrfs_ioctl_trans_end(struct file *file)
1073{
1074 struct inode *inode = fdentry(file)->d_inode;
1075 struct btrfs_root *root = BTRFS_I(inode)->root;
1076 struct btrfs_trans_handle *trans;
1077 int ret = 0;
1078
1079 trans = file->private_data;
1080 if (!trans) {
1081 ret = -EINVAL;
1082 goto out;
1083 }
1084 btrfs_end_transaction(trans, root);
1085 file->private_data = NULL;
1086
1087 mutex_lock(&root->fs_info->trans_mutex);
1088 root->fs_info->open_ioctl_trans--;
1089 mutex_unlock(&root->fs_info->trans_mutex);
1090
1091 mnt_drop_write(file->f_path.mnt);
1092
1093out:
1094 return ret;
1095}
1096
1097long btrfs_ioctl(struct file *file, unsigned int
1098 cmd, unsigned long arg)
1099{
1100 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
1101 void __user *argp = (void __user *)arg;
1102
1103 switch (cmd) {
1104 case BTRFS_IOC_SNAP_CREATE:
1105 return btrfs_ioctl_snap_create(file, argp, 0);
1106 case BTRFS_IOC_SUBVOL_CREATE:
1107 return btrfs_ioctl_snap_create(file, argp, 1);
1108 case BTRFS_IOC_DEFRAG:
1109 return btrfs_ioctl_defrag(file);
1110 case BTRFS_IOC_RESIZE:
1111 return btrfs_ioctl_resize(root, argp);
1112 case BTRFS_IOC_ADD_DEV:
1113 return btrfs_ioctl_add_dev(root, argp);
1114 case BTRFS_IOC_RM_DEV:
1115 return btrfs_ioctl_rm_dev(root, argp);
1116 case BTRFS_IOC_BALANCE:
1117 return btrfs_balance(root->fs_info->dev_root);
1118 case BTRFS_IOC_CLONE:
1119 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
1120 case BTRFS_IOC_CLONE_RANGE:
1121 return btrfs_ioctl_clone_range(file, argp);
1122 case BTRFS_IOC_TRANS_START:
1123 return btrfs_ioctl_trans_start(file);
1124 case BTRFS_IOC_TRANS_END:
1125 return btrfs_ioctl_trans_end(file);
1126 case BTRFS_IOC_SYNC:
1127 btrfs_sync_fs(file->f_dentry->d_sb, 1);
1128 return 0;
1129 }
1130
1131 return -ENOTTY;
1132}
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
new file mode 100644
index 000000000000..78049ea208db
--- /dev/null
+++ b/fs/btrfs/ioctl.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __IOCTL_
20#define __IOCTL_
21#include <linux/ioctl.h>
22
23#define BTRFS_IOCTL_MAGIC 0x94
24#define BTRFS_VOL_NAME_MAX 255
25#define BTRFS_PATH_NAME_MAX 3072
26
27struct btrfs_ioctl_vol_args {
28 __s64 fd;
29 char name[BTRFS_PATH_NAME_MAX + 1];
30};
31
32#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
33 struct btrfs_ioctl_vol_args)
34#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
35 struct btrfs_ioctl_vol_args)
36#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
37 struct btrfs_ioctl_vol_args)
38#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
39 struct btrfs_ioctl_vol_args)
40/* trans start and trans end are dangerous, and only for
41 * use by applications that know how to avoid the
42 * resulting deadlocks
43 */
44#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
45#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
46#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
47
48#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
49#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
50 struct btrfs_ioctl_vol_args)
51#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
52 struct btrfs_ioctl_vol_args)
53#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
54 struct btrfs_ioctl_vol_args)
55struct btrfs_ioctl_clone_range_args {
56 __s64 src_fd;
57 __u64 src_offset, src_length;
58 __u64 dest_offset;
59};
60
61#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
62 struct btrfs_ioctl_clone_range_args)
63
64#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
65 struct btrfs_ioctl_vol_args)
66
67#endif
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
new file mode 100644
index 000000000000..39bae7761db6
--- /dev/null
+++ b/fs/btrfs/locking.c
@@ -0,0 +1,88 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/gfp.h>
20#include <linux/pagemap.h>
21#include <linux/spinlock.h>
22#include <linux/page-flags.h>
23#include <asm/bug.h>
24#include "ctree.h"
25#include "extent_io.h"
26#include "locking.h"
27
28/*
29 * locks the per buffer mutex in an extent buffer. This uses adaptive locks
30 * and the spin is not tuned very extensively. The spinning does make a big
31 * difference in almost every workload, but spinning for the right amount of
32 * time needs some help.
33 *
34 * In general, we want to spin as long as the lock holder is doing btree
35 * searches, and we should give up if they are in more expensive code.
36 */
37
38int btrfs_tree_lock(struct extent_buffer *eb)
39{
40 int i;
41
42 if (mutex_trylock(&eb->mutex))
43 return 0;
44 for (i = 0; i < 512; i++) {
45 cpu_relax();
46 if (mutex_trylock(&eb->mutex))
47 return 0;
48 }
49 cpu_relax();
50 mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
51 return 0;
52}
53
54int btrfs_try_tree_lock(struct extent_buffer *eb)
55{
56 return mutex_trylock(&eb->mutex);
57}
58
59int btrfs_tree_unlock(struct extent_buffer *eb)
60{
61 mutex_unlock(&eb->mutex);
62 return 0;
63}
64
65int btrfs_tree_locked(struct extent_buffer *eb)
66{
67 return mutex_is_locked(&eb->mutex);
68}
69
70/*
71 * btrfs_search_slot uses this to decide if it should drop its locks
72 * before doing something expensive like allocating free blocks for cow.
73 */
74int btrfs_path_lock_waiting(struct btrfs_path *path, int level)
75{
76 int i;
77 struct extent_buffer *eb;
78 for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) {
79 eb = path->nodes[i];
80 if (!eb)
81 break;
82 smp_mb();
83 if (!list_empty(&eb->mutex.wait_list))
84 return 1;
85 }
86 return 0;
87}
88
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
new file mode 100644
index 000000000000..bc1faef12519
--- /dev/null
+++ b/fs/btrfs/locking.h
@@ -0,0 +1,27 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_LOCKING_
20#define __BTRFS_LOCKING_
21
22int btrfs_tree_lock(struct extent_buffer *eb);
23int btrfs_tree_unlock(struct extent_buffer *eb);
24int btrfs_tree_locked(struct extent_buffer *eb);
25int btrfs_try_tree_lock(struct extent_buffer *eb);
26int btrfs_path_lock_waiting(struct btrfs_path *path, int level);
27#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
new file mode 100644
index 000000000000..a20940170274
--- /dev/null
+++ b/fs/btrfs/ordered-data.c
@@ -0,0 +1,730 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/gfp.h>
20#include <linux/slab.h>
21#include <linux/blkdev.h>
22#include <linux/writeback.h>
23#include <linux/pagevec.h>
24#include "ctree.h"
25#include "transaction.h"
26#include "btrfs_inode.h"
27#include "extent_io.h"
28
29static u64 entry_end(struct btrfs_ordered_extent *entry)
30{
31 if (entry->file_offset + entry->len < entry->file_offset)
32 return (u64)-1;
33 return entry->file_offset + entry->len;
34}
35
36/* returns NULL if the insertion worked, or it returns the node it did find
37 * in the tree
38 */
39static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
40 struct rb_node *node)
41{
42 struct rb_node **p = &root->rb_node;
43 struct rb_node *parent = NULL;
44 struct btrfs_ordered_extent *entry;
45
46 while (*p) {
47 parent = *p;
48 entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
49
50 if (file_offset < entry->file_offset)
51 p = &(*p)->rb_left;
52 else if (file_offset >= entry_end(entry))
53 p = &(*p)->rb_right;
54 else
55 return parent;
56 }
57
58 rb_link_node(node, parent, p);
59 rb_insert_color(node, root);
60 return NULL;
61}
62
63/*
64 * look for a given offset in the tree, and if it can't be found return the
65 * first lesser offset
66 */
67static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
68 struct rb_node **prev_ret)
69{
70 struct rb_node *n = root->rb_node;
71 struct rb_node *prev = NULL;
72 struct rb_node *test;
73 struct btrfs_ordered_extent *entry;
74 struct btrfs_ordered_extent *prev_entry = NULL;
75
76 while (n) {
77 entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
78 prev = n;
79 prev_entry = entry;
80
81 if (file_offset < entry->file_offset)
82 n = n->rb_left;
83 else if (file_offset >= entry_end(entry))
84 n = n->rb_right;
85 else
86 return n;
87 }
88 if (!prev_ret)
89 return NULL;
90
91 while (prev && file_offset >= entry_end(prev_entry)) {
92 test = rb_next(prev);
93 if (!test)
94 break;
95 prev_entry = rb_entry(test, struct btrfs_ordered_extent,
96 rb_node);
97 if (file_offset < entry_end(prev_entry))
98 break;
99
100 prev = test;
101 }
102 if (prev)
103 prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
104 rb_node);
105 while (prev && file_offset < entry_end(prev_entry)) {
106 test = rb_prev(prev);
107 if (!test)
108 break;
109 prev_entry = rb_entry(test, struct btrfs_ordered_extent,
110 rb_node);
111 prev = test;
112 }
113 *prev_ret = prev;
114 return NULL;
115}
116
117/*
118 * helper to check if a given offset is inside a given entry
119 */
120static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
121{
122 if (file_offset < entry->file_offset ||
123 entry->file_offset + entry->len <= file_offset)
124 return 0;
125 return 1;
126}
127
128/*
129 * look find the first ordered struct that has this offset, otherwise
130 * the first one less than this offset
131 */
132static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
133 u64 file_offset)
134{
135 struct rb_root *root = &tree->tree;
136 struct rb_node *prev;
137 struct rb_node *ret;
138 struct btrfs_ordered_extent *entry;
139
140 if (tree->last) {
141 entry = rb_entry(tree->last, struct btrfs_ordered_extent,
142 rb_node);
143 if (offset_in_entry(entry, file_offset))
144 return tree->last;
145 }
146 ret = __tree_search(root, file_offset, &prev);
147 if (!ret)
148 ret = prev;
149 if (ret)
150 tree->last = ret;
151 return ret;
152}
153
154/* allocate and add a new ordered_extent into the per-inode tree.
155 * file_offset is the logical offset in the file
156 *
157 * start is the disk block number of an extent already reserved in the
158 * extent allocation tree
159 *
160 * len is the length of the extent
161 *
162 * This also sets the EXTENT_ORDERED bit on the range in the inode.
163 *
164 * The tree is given a single reference on the ordered extent that was
165 * inserted.
166 */
167int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
168 u64 start, u64 len, u64 disk_len, int type)
169{
170 struct btrfs_ordered_inode_tree *tree;
171 struct rb_node *node;
172 struct btrfs_ordered_extent *entry;
173
174 tree = &BTRFS_I(inode)->ordered_tree;
175 entry = kzalloc(sizeof(*entry), GFP_NOFS);
176 if (!entry)
177 return -ENOMEM;
178
179 mutex_lock(&tree->mutex);
180 entry->file_offset = file_offset;
181 entry->start = start;
182 entry->len = len;
183 entry->disk_len = disk_len;
184 entry->inode = inode;
185 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
186 set_bit(type, &entry->flags);
187
188 /* one ref for the tree */
189 atomic_set(&entry->refs, 1);
190 init_waitqueue_head(&entry->wait);
191 INIT_LIST_HEAD(&entry->list);
192 INIT_LIST_HEAD(&entry->root_extent_list);
193
194 node = tree_insert(&tree->tree, file_offset,
195 &entry->rb_node);
196 BUG_ON(node);
197
198 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
199 entry_end(entry) - 1, GFP_NOFS);
200
201 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
202 list_add_tail(&entry->root_extent_list,
203 &BTRFS_I(inode)->root->fs_info->ordered_extents);
204 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
205
206 mutex_unlock(&tree->mutex);
207 BUG_ON(node);
208 return 0;
209}
210
211/*
212 * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
213 * when an ordered extent is finished. If the list covers more than one
214 * ordered extent, it is split across multiples.
215 */
216int btrfs_add_ordered_sum(struct inode *inode,
217 struct btrfs_ordered_extent *entry,
218 struct btrfs_ordered_sum *sum)
219{
220 struct btrfs_ordered_inode_tree *tree;
221
222 tree = &BTRFS_I(inode)->ordered_tree;
223 mutex_lock(&tree->mutex);
224 list_add_tail(&sum->list, &entry->list);
225 mutex_unlock(&tree->mutex);
226 return 0;
227}
228
229/*
230 * this is used to account for finished IO across a given range
231 * of the file. The IO should not span ordered extents. If
232 * a given ordered_extent is completely done, 1 is returned, otherwise
233 * 0.
234 *
235 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
236 * to make sure this function only returns 1 once for a given ordered extent.
237 */
238int btrfs_dec_test_ordered_pending(struct inode *inode,
239 u64 file_offset, u64 io_size)
240{
241 struct btrfs_ordered_inode_tree *tree;
242 struct rb_node *node;
243 struct btrfs_ordered_extent *entry;
244 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
245 int ret;
246
247 tree = &BTRFS_I(inode)->ordered_tree;
248 mutex_lock(&tree->mutex);
249 clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
250 GFP_NOFS);
251 node = tree_search(tree, file_offset);
252 if (!node) {
253 ret = 1;
254 goto out;
255 }
256
257 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
258 if (!offset_in_entry(entry, file_offset)) {
259 ret = 1;
260 goto out;
261 }
262
263 ret = test_range_bit(io_tree, entry->file_offset,
264 entry->file_offset + entry->len - 1,
265 EXTENT_ORDERED, 0);
266 if (ret == 0)
267 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
268out:
269 mutex_unlock(&tree->mutex);
270 return ret == 0;
271}
272
273/*
274 * used to drop a reference on an ordered extent. This will free
275 * the extent if the last reference is dropped
276 */
277int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
278{
279 struct list_head *cur;
280 struct btrfs_ordered_sum *sum;
281
282 if (atomic_dec_and_test(&entry->refs)) {
283 while (!list_empty(&entry->list)) {
284 cur = entry->list.next;
285 sum = list_entry(cur, struct btrfs_ordered_sum, list);
286 list_del(&sum->list);
287 kfree(sum);
288 }
289 kfree(entry);
290 }
291 return 0;
292}
293
294/*
295 * remove an ordered extent from the tree. No references are dropped
296 * but, anyone waiting on this extent is woken up.
297 */
298int btrfs_remove_ordered_extent(struct inode *inode,
299 struct btrfs_ordered_extent *entry)
300{
301 struct btrfs_ordered_inode_tree *tree;
302 struct rb_node *node;
303
304 tree = &BTRFS_I(inode)->ordered_tree;
305 mutex_lock(&tree->mutex);
306 node = &entry->rb_node;
307 rb_erase(node, &tree->tree);
308 tree->last = NULL;
309 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
310
311 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
312 list_del_init(&entry->root_extent_list);
313 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
314
315 mutex_unlock(&tree->mutex);
316 wake_up(&entry->wait);
317 return 0;
318}
319
320/*
321 * wait for all the ordered extents in a root. This is done when balancing
322 * space between drives.
323 */
324int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
325{
326 struct list_head splice;
327 struct list_head *cur;
328 struct btrfs_ordered_extent *ordered;
329 struct inode *inode;
330
331 INIT_LIST_HEAD(&splice);
332
333 spin_lock(&root->fs_info->ordered_extent_lock);
334 list_splice_init(&root->fs_info->ordered_extents, &splice);
335 while (!list_empty(&splice)) {
336 cur = splice.next;
337 ordered = list_entry(cur, struct btrfs_ordered_extent,
338 root_extent_list);
339 if (nocow_only &&
340 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
341 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
342 list_move(&ordered->root_extent_list,
343 &root->fs_info->ordered_extents);
344 cond_resched_lock(&root->fs_info->ordered_extent_lock);
345 continue;
346 }
347
348 list_del_init(&ordered->root_extent_list);
349 atomic_inc(&ordered->refs);
350
351 /*
352 * the inode may be getting freed (in sys_unlink path).
353 */
354 inode = igrab(ordered->inode);
355
356 spin_unlock(&root->fs_info->ordered_extent_lock);
357
358 if (inode) {
359 btrfs_start_ordered_extent(inode, ordered, 1);
360 btrfs_put_ordered_extent(ordered);
361 iput(inode);
362 } else {
363 btrfs_put_ordered_extent(ordered);
364 }
365
366 spin_lock(&root->fs_info->ordered_extent_lock);
367 }
368 spin_unlock(&root->fs_info->ordered_extent_lock);
369 return 0;
370}
371
372/*
373 * Used to start IO or wait for a given ordered extent to finish.
374 *
375 * If wait is one, this effectively waits on page writeback for all the pages
376 * in the extent, and it waits on the io completion code to insert
377 * metadata into the btree corresponding to the extent
378 */
379void btrfs_start_ordered_extent(struct inode *inode,
380 struct btrfs_ordered_extent *entry,
381 int wait)
382{
383 u64 start = entry->file_offset;
384 u64 end = start + entry->len - 1;
385
386 /*
387 * pages in the range can be dirty, clean or writeback. We
388 * start IO on any dirty ones so the wait doesn't stall waiting
389 * for pdflush to find them
390 */
391 btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL);
392 if (wait) {
393 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
394 &entry->flags));
395 }
396}
397
398/*
399 * Used to wait on ordered extents across a large range of bytes.
400 */
401int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
402{
403 u64 end;
404 u64 orig_end;
405 u64 wait_end;
406 struct btrfs_ordered_extent *ordered;
407
408 if (start + len < start) {
409 orig_end = INT_LIMIT(loff_t);
410 } else {
411 orig_end = start + len - 1;
412 if (orig_end > INT_LIMIT(loff_t))
413 orig_end = INT_LIMIT(loff_t);
414 }
415 wait_end = orig_end;
416again:
417 /* start IO across the range first to instantiate any delalloc
418 * extents
419 */
420 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE);
421
422 /* The compression code will leave pages locked but return from
423 * writepage without setting the page writeback. Starting again
424 * with WB_SYNC_ALL will end up waiting for the IO to actually start.
425 */
426 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
427
428 btrfs_wait_on_page_writeback_range(inode->i_mapping,
429 start >> PAGE_CACHE_SHIFT,
430 orig_end >> PAGE_CACHE_SHIFT);
431
432 end = orig_end;
433 while (1) {
434 ordered = btrfs_lookup_first_ordered_extent(inode, end);
435 if (!ordered)
436 break;
437 if (ordered->file_offset > orig_end) {
438 btrfs_put_ordered_extent(ordered);
439 break;
440 }
441 if (ordered->file_offset + ordered->len < start) {
442 btrfs_put_ordered_extent(ordered);
443 break;
444 }
445 btrfs_start_ordered_extent(inode, ordered, 1);
446 end = ordered->file_offset;
447 btrfs_put_ordered_extent(ordered);
448 if (end == 0 || end == start)
449 break;
450 end--;
451 }
452 if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
453 EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
454 schedule_timeout(1);
455 goto again;
456 }
457 return 0;
458}
459
460/*
461 * find an ordered extent corresponding to file_offset. return NULL if
462 * nothing is found, otherwise take a reference on the extent and return it
463 */
464struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
465 u64 file_offset)
466{
467 struct btrfs_ordered_inode_tree *tree;
468 struct rb_node *node;
469 struct btrfs_ordered_extent *entry = NULL;
470
471 tree = &BTRFS_I(inode)->ordered_tree;
472 mutex_lock(&tree->mutex);
473 node = tree_search(tree, file_offset);
474 if (!node)
475 goto out;
476
477 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
478 if (!offset_in_entry(entry, file_offset))
479 entry = NULL;
480 if (entry)
481 atomic_inc(&entry->refs);
482out:
483 mutex_unlock(&tree->mutex);
484 return entry;
485}
486
487/*
488 * lookup and return any extent before 'file_offset'. NULL is returned
489 * if none is found
490 */
491struct btrfs_ordered_extent *
492btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
493{
494 struct btrfs_ordered_inode_tree *tree;
495 struct rb_node *node;
496 struct btrfs_ordered_extent *entry = NULL;
497
498 tree = &BTRFS_I(inode)->ordered_tree;
499 mutex_lock(&tree->mutex);
500 node = tree_search(tree, file_offset);
501 if (!node)
502 goto out;
503
504 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
505 atomic_inc(&entry->refs);
506out:
507 mutex_unlock(&tree->mutex);
508 return entry;
509}
510
511/*
512 * After an extent is done, call this to conditionally update the on disk
513 * i_size. i_size is updated to cover any fully written part of the file.
514 */
515int btrfs_ordered_update_i_size(struct inode *inode,
516 struct btrfs_ordered_extent *ordered)
517{
518 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
519 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
520 u64 disk_i_size;
521 u64 new_i_size;
522 u64 i_size_test;
523 struct rb_node *node;
524 struct btrfs_ordered_extent *test;
525
526 mutex_lock(&tree->mutex);
527 disk_i_size = BTRFS_I(inode)->disk_i_size;
528
529 /*
530 * if the disk i_size is already at the inode->i_size, or
531 * this ordered extent is inside the disk i_size, we're done
532 */
533 if (disk_i_size >= inode->i_size ||
534 ordered->file_offset + ordered->len <= disk_i_size) {
535 goto out;
536 }
537
538 /*
539 * we can't update the disk_isize if there are delalloc bytes
540 * between disk_i_size and this ordered extent
541 */
542 if (test_range_bit(io_tree, disk_i_size,
543 ordered->file_offset + ordered->len - 1,
544 EXTENT_DELALLOC, 0)) {
545 goto out;
546 }
547 /*
548 * walk backward from this ordered extent to disk_i_size.
549 * if we find an ordered extent then we can't update disk i_size
550 * yet
551 */
552 node = &ordered->rb_node;
553 while (1) {
554 node = rb_prev(node);
555 if (!node)
556 break;
557 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
558 if (test->file_offset + test->len <= disk_i_size)
559 break;
560 if (test->file_offset >= inode->i_size)
561 break;
562 if (test->file_offset >= disk_i_size)
563 goto out;
564 }
565 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode));
566
567 /*
568 * at this point, we know we can safely update i_size to at least
569 * the offset from this ordered extent. But, we need to
570 * walk forward and see if ios from higher up in the file have
571 * finished.
572 */
573 node = rb_next(&ordered->rb_node);
574 i_size_test = 0;
575 if (node) {
576 /*
577 * do we have an area where IO might have finished
578 * between our ordered extent and the next one.
579 */
580 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
581 if (test->file_offset > entry_end(ordered))
582 i_size_test = test->file_offset;
583 } else {
584 i_size_test = i_size_read(inode);
585 }
586
587 /*
588 * i_size_test is the end of a region after this ordered
589 * extent where there are no ordered extents. As long as there
590 * are no delalloc bytes in this area, it is safe to update
591 * disk_i_size to the end of the region.
592 */
593 if (i_size_test > entry_end(ordered) &&
594 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
595 EXTENT_DELALLOC, 0)) {
596 new_i_size = min_t(u64, i_size_test, i_size_read(inode));
597 }
598 BTRFS_I(inode)->disk_i_size = new_i_size;
599out:
600 mutex_unlock(&tree->mutex);
601 return 0;
602}
603
604/*
605 * search the ordered extents for one corresponding to 'offset' and
606 * try to find a checksum. This is used because we allow pages to
607 * be reclaimed before their checksum is actually put into the btree
608 */
609int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
610 u32 *sum)
611{
612 struct btrfs_ordered_sum *ordered_sum;
613 struct btrfs_sector_sum *sector_sums;
614 struct btrfs_ordered_extent *ordered;
615 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
616 struct list_head *cur;
617 unsigned long num_sectors;
618 unsigned long i;
619 u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
620 int ret = 1;
621
622 ordered = btrfs_lookup_ordered_extent(inode, offset);
623 if (!ordered)
624 return 1;
625
626 mutex_lock(&tree->mutex);
627 list_for_each_prev(cur, &ordered->list) {
628 ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
629 if (disk_bytenr >= ordered_sum->bytenr) {
630 num_sectors = ordered_sum->len / sectorsize;
631 sector_sums = ordered_sum->sums;
632 for (i = 0; i < num_sectors; i++) {
633 if (sector_sums[i].bytenr == disk_bytenr) {
634 *sum = sector_sums[i].sum;
635 ret = 0;
636 goto out;
637 }
638 }
639 }
640 }
641out:
642 mutex_unlock(&tree->mutex);
643 btrfs_put_ordered_extent(ordered);
644 return ret;
645}
646
647
648/**
649 * taken from mm/filemap.c because it isn't exported
650 *
651 * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
652 * @mapping: address space structure to write
653 * @start: offset in bytes where the range starts
654 * @end: offset in bytes where the range ends (inclusive)
655 * @sync_mode: enable synchronous operation
656 *
657 * Start writeback against all of a mapping's dirty pages that lie
658 * within the byte offsets <start, end> inclusive.
659 *
660 * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
661 * opposed to a regular memory cleansing writeback. The difference between
662 * these two operations is that if a dirty page/buffer is encountered, it must
663 * be waited upon, and not just skipped over.
664 */
665int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
666 loff_t end, int sync_mode)
667{
668 struct writeback_control wbc = {
669 .sync_mode = sync_mode,
670 .nr_to_write = mapping->nrpages * 2,
671 .range_start = start,
672 .range_end = end,
673 .for_writepages = 1,
674 };
675 return btrfs_writepages(mapping, &wbc);
676}
677
678/**
679 * taken from mm/filemap.c because it isn't exported
680 *
681 * wait_on_page_writeback_range - wait for writeback to complete
682 * @mapping: target address_space
683 * @start: beginning page index
684 * @end: ending page index
685 *
686 * Wait for writeback to complete against pages indexed by start->end
687 * inclusive
688 */
689int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
690 pgoff_t start, pgoff_t end)
691{
692 struct pagevec pvec;
693 int nr_pages;
694 int ret = 0;
695 pgoff_t index;
696
697 if (end < start)
698 return 0;
699
700 pagevec_init(&pvec, 0);
701 index = start;
702 while ((index <= end) &&
703 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
704 PAGECACHE_TAG_WRITEBACK,
705 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
706 unsigned i;
707
708 for (i = 0; i < nr_pages; i++) {
709 struct page *page = pvec.pages[i];
710
711 /* until radix tree lookup accepts end_index */
712 if (page->index > end)
713 continue;
714
715 wait_on_page_writeback(page);
716 if (PageError(page))
717 ret = -EIO;
718 }
719 pagevec_release(&pvec);
720 cond_resched();
721 }
722
723 /* Check for outstanding write errors */
724 if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
725 ret = -ENOSPC;
726 if (test_and_clear_bit(AS_EIO, &mapping->flags))
727 ret = -EIO;
728
729 return ret;
730}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
new file mode 100644
index 000000000000..ab66d5e8d6d6
--- /dev/null
+++ b/fs/btrfs/ordered-data.h
@@ -0,0 +1,158 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_ORDERED_DATA__
20#define __BTRFS_ORDERED_DATA__
21
22/* one of these per inode */
23struct btrfs_ordered_inode_tree {
24 struct mutex mutex;
25 struct rb_root tree;
26 struct rb_node *last;
27};
28
29/*
30 * these are used to collect checksums done just before bios submission.
31 * They are attached via a list into the ordered extent, and
32 * checksum items are inserted into the tree after all the blocks in
33 * the ordered extent are on disk
34 */
35struct btrfs_sector_sum {
36 /* bytenr on disk */
37 u64 bytenr;
38 u32 sum;
39};
40
41struct btrfs_ordered_sum {
42 /* bytenr is the start of this extent on disk */
43 u64 bytenr;
44
45 /*
46 * this is the length in bytes covered by the sums array below.
47 */
48 unsigned long len;
49 struct list_head list;
50 /* last field is a variable length array of btrfs_sector_sums */
51 struct btrfs_sector_sum sums[];
52};
53
54/*
55 * bits for the flags field:
56 *
57 * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
58 * It is used to make sure metadata is inserted into the tree only once
59 * per extent.
60 *
61 * BTRFS_ORDERED_COMPLETE is set when the extent is removed from the
62 * rbtree, just before waking any waiters. It is used to indicate the
63 * IO is done and any metadata is inserted into the tree.
64 */
65#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
66
67#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
68
69#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
70
71#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */
72
73#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
74
75struct btrfs_ordered_extent {
76 /* logical offset in the file */
77 u64 file_offset;
78
79 /* disk byte number */
80 u64 start;
81
82 /* ram length of the extent in bytes */
83 u64 len;
84
85 /* extent length on disk */
86 u64 disk_len;
87
88 /* flags (described above) */
89 unsigned long flags;
90
91 /* reference count */
92 atomic_t refs;
93
94 /* the inode we belong to */
95 struct inode *inode;
96
97 /* list of checksums for insertion when the extent io is done */
98 struct list_head list;
99
100 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
101 wait_queue_head_t wait;
102
103 /* our friendly rbtree entry */
104 struct rb_node rb_node;
105
106 /* a per root list of all the pending ordered extents */
107 struct list_head root_extent_list;
108};
109
110
111/*
112 * calculates the total size you need to allocate for an ordered sum
113 * structure spanning 'bytes' in the file
114 */
115static inline int btrfs_ordered_sum_size(struct btrfs_root *root,
116 unsigned long bytes)
117{
118 unsigned long num_sectors = (bytes + root->sectorsize - 1) /
119 root->sectorsize;
120 num_sectors++;
121 return sizeof(struct btrfs_ordered_sum) +
122 num_sectors * sizeof(struct btrfs_sector_sum);
123}
124
125static inline void
126btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
127{
128 mutex_init(&t->mutex);
129 t->tree.rb_node = NULL;
130 t->last = NULL;
131}
132
133int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
134int btrfs_remove_ordered_extent(struct inode *inode,
135 struct btrfs_ordered_extent *entry);
136int btrfs_dec_test_ordered_pending(struct inode *inode,
137 u64 file_offset, u64 io_size);
138int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
139 u64 start, u64 len, u64 disk_len, int tyep);
140int btrfs_add_ordered_sum(struct inode *inode,
141 struct btrfs_ordered_extent *entry,
142 struct btrfs_ordered_sum *sum);
143struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
144 u64 file_offset);
145void btrfs_start_ordered_extent(struct inode *inode,
146 struct btrfs_ordered_extent *entry, int wait);
147int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
148struct btrfs_ordered_extent *
149btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
150int btrfs_ordered_update_i_size(struct inode *inode,
151 struct btrfs_ordered_extent *ordered);
152int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
153int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
154 pgoff_t start, pgoff_t end);
155int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
156 loff_t end, int sync_mode);
157int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
158#endif
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
new file mode 100644
index 000000000000..3c0d52af4f80
--- /dev/null
+++ b/fs/btrfs/orphan.c
@@ -0,0 +1,67 @@
1/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include "ctree.h"
20#include "disk-io.h"
21
22int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
23 struct btrfs_root *root, u64 offset)
24{
25 struct btrfs_path *path;
26 struct btrfs_key key;
27 int ret = 0;
28
29 key.objectid = BTRFS_ORPHAN_OBJECTID;
30 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
31 key.offset = offset;
32
33 path = btrfs_alloc_path();
34 if (!path)
35 return -ENOMEM;
36
37 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
38
39 btrfs_free_path(path);
40 return ret;
41}
42
43int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
44 struct btrfs_root *root, u64 offset)
45{
46 struct btrfs_path *path;
47 struct btrfs_key key;
48 int ret = 0;
49
50 key.objectid = BTRFS_ORPHAN_OBJECTID;
51 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
52 key.offset = offset;
53
54 path = btrfs_alloc_path();
55 if (!path)
56 return -ENOMEM;
57
58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
59 if (ret)
60 goto out;
61
62 ret = btrfs_del_item(trans, root, path);
63
64out:
65 btrfs_free_path(path);
66 return ret;
67}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
new file mode 100644
index 000000000000..5f8f218c1005
--- /dev/null
+++ b/fs/btrfs/print-tree.c
@@ -0,0 +1,216 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include "ctree.h"
20#include "disk-io.h"
21#include "print-tree.h"
22
23static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
24{
25 int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
26 int i;
27 printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu "
28 "num_stripes %d\n",
29 (unsigned long long)btrfs_chunk_length(eb, chunk),
30 (unsigned long long)btrfs_chunk_owner(eb, chunk),
31 (unsigned long long)btrfs_chunk_type(eb, chunk),
32 num_stripes);
33 for (i = 0 ; i < num_stripes ; i++) {
34 printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i,
35 (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
36 (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
37 }
38}
39static void print_dev_item(struct extent_buffer *eb,
40 struct btrfs_dev_item *dev_item)
41{
42 printk(KERN_INFO "\t\tdev item devid %llu "
43 "total_bytes %llu bytes used %llu\n",
44 (unsigned long long)btrfs_device_id(eb, dev_item),
45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
47}
48void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
49{
50 int i;
51 u32 nr = btrfs_header_nritems(l);
52 struct btrfs_item *item;
53 struct btrfs_extent_item *ei;
54 struct btrfs_root_item *ri;
55 struct btrfs_dir_item *di;
56 struct btrfs_inode_item *ii;
57 struct btrfs_block_group_item *bi;
58 struct btrfs_file_extent_item *fi;
59 struct btrfs_key key;
60 struct btrfs_key found_key;
61 struct btrfs_extent_ref *ref;
62 struct btrfs_dev_extent *dev_extent;
63 u32 type;
64
65 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
66 (unsigned long long)btrfs_header_bytenr(l), nr,
67 btrfs_leaf_free_space(root, l));
68 for (i = 0 ; i < nr ; i++) {
69 item = btrfs_item_nr(l, i);
70 btrfs_item_key_to_cpu(l, &key, i);
71 type = btrfs_key_type(&key);
72 printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d "
73 "itemsize %d\n",
74 i,
75 (unsigned long long)key.objectid, type,
76 (unsigned long long)key.offset,
77 btrfs_item_offset(l, item), btrfs_item_size(l, item));
78 switch (type) {
79 case BTRFS_INODE_ITEM_KEY:
80 ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
81 printk(KERN_INFO "\t\tinode generation %llu size %llu "
82 "mode %o\n",
83 (unsigned long long)
84 btrfs_inode_generation(l, ii),
85 (unsigned long long)btrfs_inode_size(l, ii),
86 btrfs_inode_mode(l, ii));
87 break;
88 case BTRFS_DIR_ITEM_KEY:
89 di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
90 btrfs_dir_item_key_to_cpu(l, di, &found_key);
91 printk(KERN_INFO "\t\tdir oid %llu type %u\n",
92 (unsigned long long)found_key.objectid,
93 btrfs_dir_type(l, di));
94 break;
95 case BTRFS_ROOT_ITEM_KEY:
96 ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
97 printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n",
98 (unsigned long long)
99 btrfs_disk_root_bytenr(l, ri),
100 btrfs_disk_root_refs(l, ri));
101 break;
102 case BTRFS_EXTENT_ITEM_KEY:
103 ei = btrfs_item_ptr(l, i, struct btrfs_extent_item);
104 printk(KERN_INFO "\t\textent data refs %u\n",
105 btrfs_extent_refs(l, ei));
106 break;
107 case BTRFS_EXTENT_REF_KEY:
108 ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref);
109 printk(KERN_INFO "\t\textent back ref root %llu "
110 "gen %llu owner %llu num_refs %lu\n",
111 (unsigned long long)btrfs_ref_root(l, ref),
112 (unsigned long long)btrfs_ref_generation(l, ref),
113 (unsigned long long)btrfs_ref_objectid(l, ref),
114 (unsigned long)btrfs_ref_num_refs(l, ref));
115 break;
116
117 case BTRFS_EXTENT_DATA_KEY:
118 fi = btrfs_item_ptr(l, i,
119 struct btrfs_file_extent_item);
120 if (btrfs_file_extent_type(l, fi) ==
121 BTRFS_FILE_EXTENT_INLINE) {
122 printk(KERN_INFO "\t\tinline extent data "
123 "size %u\n",
124 btrfs_file_extent_inline_len(l, fi));
125 break;
126 }
127 printk(KERN_INFO "\t\textent data disk bytenr %llu "
128 "nr %llu\n",
129 (unsigned long long)
130 btrfs_file_extent_disk_bytenr(l, fi),
131 (unsigned long long)
132 btrfs_file_extent_disk_num_bytes(l, fi));
133 printk(KERN_INFO "\t\textent data offset %llu "
134 "nr %llu ram %llu\n",
135 (unsigned long long)
136 btrfs_file_extent_offset(l, fi),
137 (unsigned long long)
138 btrfs_file_extent_num_bytes(l, fi),
139 (unsigned long long)
140 btrfs_file_extent_ram_bytes(l, fi));
141 break;
142 case BTRFS_BLOCK_GROUP_ITEM_KEY:
143 bi = btrfs_item_ptr(l, i,
144 struct btrfs_block_group_item);
145 printk(KERN_INFO "\t\tblock group used %llu\n",
146 (unsigned long long)
147 btrfs_disk_block_group_used(l, bi));
148 break;
149 case BTRFS_CHUNK_ITEM_KEY:
150 print_chunk(l, btrfs_item_ptr(l, i,
151 struct btrfs_chunk));
152 break;
153 case BTRFS_DEV_ITEM_KEY:
154 print_dev_item(l, btrfs_item_ptr(l, i,
155 struct btrfs_dev_item));
156 break;
157 case BTRFS_DEV_EXTENT_KEY:
158 dev_extent = btrfs_item_ptr(l, i,
159 struct btrfs_dev_extent);
160 printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n"
161 "\t\tchunk objectid %llu chunk offset %llu "
162 "length %llu\n",
163 (unsigned long long)
164 btrfs_dev_extent_chunk_tree(l, dev_extent),
165 (unsigned long long)
166 btrfs_dev_extent_chunk_objectid(l, dev_extent),
167 (unsigned long long)
168 btrfs_dev_extent_chunk_offset(l, dev_extent),
169 (unsigned long long)
170 btrfs_dev_extent_length(l, dev_extent));
171 };
172 }
173}
174
175void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
176{
177 int i; u32 nr;
178 struct btrfs_key key;
179 int level;
180
181 if (!c)
182 return;
183 nr = btrfs_header_nritems(c);
184 level = btrfs_header_level(c);
185 if (level == 0) {
186 btrfs_print_leaf(root, c);
187 return;
188 }
189 printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
190 (unsigned long long)btrfs_header_bytenr(c),
191 btrfs_header_level(c), nr,
192 (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
193 for (i = 0; i < nr; i++) {
194 btrfs_node_key_to_cpu(c, &key, i);
195 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
196 i,
197 (unsigned long long)key.objectid,
198 key.type,
199 (unsigned long long)key.offset,
200 (unsigned long long)btrfs_node_blockptr(c, i));
201 }
202 for (i = 0; i < nr; i++) {
203 struct extent_buffer *next = read_tree_block(root,
204 btrfs_node_blockptr(c, i),
205 btrfs_level_size(root, level - 1),
206 btrfs_node_ptr_generation(c, i));
207 if (btrfs_is_leaf(next) &&
208 btrfs_header_level(c) != 1)
209 BUG();
210 if (btrfs_header_level(next) !=
211 btrfs_header_level(c) - 1)
212 BUG();
213 btrfs_print_tree(root, next);
214 free_extent_buffer(next);
215 }
216}
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
new file mode 100644
index 000000000000..da75efe534d5
--- /dev/null
+++ b/fs/btrfs/print-tree.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __PRINT_TREE_
20#define __PRINT_TREE_
21void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
22void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t);
23#endif
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
new file mode 100644
index 000000000000..6f0acc4c9eab
--- /dev/null
+++ b/fs/btrfs/ref-cache.c
@@ -0,0 +1,230 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21#include "ref-cache.h"
22#include "transaction.h"
23
24/*
25 * leaf refs are used to cache the information about which extents
26 * a given leaf has references on. This allows us to process that leaf
27 * in btrfs_drop_snapshot without needing to read it back from disk.
28 */
29
30/*
31 * kmalloc a leaf reference struct and update the counters for the
32 * total ref cache size
33 */
34struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
35 int nr_extents)
36{
37 struct btrfs_leaf_ref *ref;
38 size_t size = btrfs_leaf_ref_size(nr_extents);
39
40 ref = kmalloc(size, GFP_NOFS);
41 if (ref) {
42 spin_lock(&root->fs_info->ref_cache_lock);
43 root->fs_info->total_ref_cache_size += size;
44 spin_unlock(&root->fs_info->ref_cache_lock);
45
46 memset(ref, 0, sizeof(*ref));
47 atomic_set(&ref->usage, 1);
48 INIT_LIST_HEAD(&ref->list);
49 }
50 return ref;
51}
52
53/*
54 * free a leaf reference struct and update the counters for the
55 * total ref cache size
56 */
57void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
58{
59 if (!ref)
60 return;
61 WARN_ON(atomic_read(&ref->usage) == 0);
62 if (atomic_dec_and_test(&ref->usage)) {
63 size_t size = btrfs_leaf_ref_size(ref->nritems);
64
65 BUG_ON(ref->in_tree);
66 kfree(ref);
67
68 spin_lock(&root->fs_info->ref_cache_lock);
69 root->fs_info->total_ref_cache_size -= size;
70 spin_unlock(&root->fs_info->ref_cache_lock);
71 }
72}
73
74static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
75 struct rb_node *node)
76{
77 struct rb_node **p = &root->rb_node;
78 struct rb_node *parent = NULL;
79 struct btrfs_leaf_ref *entry;
80
81 while (*p) {
82 parent = *p;
83 entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
84
85 if (bytenr < entry->bytenr)
86 p = &(*p)->rb_left;
87 else if (bytenr > entry->bytenr)
88 p = &(*p)->rb_right;
89 else
90 return parent;
91 }
92
93 entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
94 rb_link_node(node, parent, p);
95 rb_insert_color(node, root);
96 return NULL;
97}
98
99static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
100{
101 struct rb_node *n = root->rb_node;
102 struct btrfs_leaf_ref *entry;
103
104 while (n) {
105 entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
106 WARN_ON(!entry->in_tree);
107
108 if (bytenr < entry->bytenr)
109 n = n->rb_left;
110 else if (bytenr > entry->bytenr)
111 n = n->rb_right;
112 else
113 return n;
114 }
115 return NULL;
116}
117
118int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
119 int shared)
120{
121 struct btrfs_leaf_ref *ref = NULL;
122 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
123
124 if (shared)
125 tree = &root->fs_info->shared_ref_tree;
126 if (!tree)
127 return 0;
128
129 spin_lock(&tree->lock);
130 while (!list_empty(&tree->list)) {
131 ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list);
132 BUG_ON(ref->tree != tree);
133 if (ref->root_gen > max_root_gen)
134 break;
135 if (!xchg(&ref->in_tree, 0)) {
136 cond_resched_lock(&tree->lock);
137 continue;
138 }
139
140 rb_erase(&ref->rb_node, &tree->root);
141 list_del_init(&ref->list);
142
143 spin_unlock(&tree->lock);
144 btrfs_free_leaf_ref(root, ref);
145 cond_resched();
146 spin_lock(&tree->lock);
147 }
148 spin_unlock(&tree->lock);
149 return 0;
150}
151
152/*
153 * find the leaf ref for a given extent. This returns the ref struct with
154 * a usage reference incremented
155 */
156struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
157 u64 bytenr)
158{
159 struct rb_node *rb;
160 struct btrfs_leaf_ref *ref = NULL;
161 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
162again:
163 if (tree) {
164 spin_lock(&tree->lock);
165 rb = tree_search(&tree->root, bytenr);
166 if (rb)
167 ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node);
168 if (ref)
169 atomic_inc(&ref->usage);
170 spin_unlock(&tree->lock);
171 if (ref)
172 return ref;
173 }
174 if (tree != &root->fs_info->shared_ref_tree) {
175 tree = &root->fs_info->shared_ref_tree;
176 goto again;
177 }
178 return NULL;
179}
180
181/*
182 * add a fully filled in leaf ref struct
183 * remove all the refs older than a given root generation
184 */
185int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
186 int shared)
187{
188 int ret = 0;
189 struct rb_node *rb;
190 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
191
192 if (shared)
193 tree = &root->fs_info->shared_ref_tree;
194
195 spin_lock(&tree->lock);
196 rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node);
197 if (rb) {
198 ret = -EEXIST;
199 } else {
200 atomic_inc(&ref->usage);
201 ref->tree = tree;
202 ref->in_tree = 1;
203 list_add_tail(&ref->list, &tree->list);
204 }
205 spin_unlock(&tree->lock);
206 return ret;
207}
208
209/*
210 * remove a single leaf ref from the tree. This drops the ref held by the tree
211 * only
212 */
213int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
214{
215 struct btrfs_leaf_ref_tree *tree;
216
217 if (!xchg(&ref->in_tree, 0))
218 return 0;
219
220 tree = ref->tree;
221 spin_lock(&tree->lock);
222
223 rb_erase(&ref->rb_node, &tree->root);
224 list_del_init(&ref->list);
225
226 spin_unlock(&tree->lock);
227
228 btrfs_free_leaf_ref(root, ref);
229 return 0;
230}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
new file mode 100644
index 000000000000..16f3183d7c59
--- /dev/null
+++ b/fs/btrfs/ref-cache.h
@@ -0,0 +1,77 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#ifndef __REFCACHE__
19#define __REFCACHE__
20
21struct btrfs_extent_info {
22 /* bytenr and num_bytes find the extent in the extent allocation tree */
23 u64 bytenr;
24 u64 num_bytes;
25
26 /* objectid and offset find the back reference for the file */
27 u64 objectid;
28 u64 offset;
29};
30
31struct btrfs_leaf_ref {
32 struct rb_node rb_node;
33 struct btrfs_leaf_ref_tree *tree;
34 int in_tree;
35 atomic_t usage;
36
37 u64 root_gen;
38 u64 bytenr;
39 u64 owner;
40 u64 generation;
41 int nritems;
42
43 struct list_head list;
44 struct btrfs_extent_info extents[];
45};
46
47static inline size_t btrfs_leaf_ref_size(int nr_extents)
48{
49 return sizeof(struct btrfs_leaf_ref) +
50 sizeof(struct btrfs_extent_info) * nr_extents;
51}
52
53static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree)
54{
55 tree->root.rb_node = NULL;
56 INIT_LIST_HEAD(&tree->list);
57 spin_lock_init(&tree->lock);
58}
59
60static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree)
61{
62 return RB_EMPTY_ROOT(&tree->root);
63}
64
65void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree);
66struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
67 int nr_extents);
68void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
69struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
70 u64 bytenr);
71int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
72 int shared);
73int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
74 int shared);
75int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
76
77#endif
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
new file mode 100644
index 000000000000..b48650de4472
--- /dev/null
+++ b/fs/btrfs/root-tree.c
@@ -0,0 +1,366 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include "ctree.h"
20#include "transaction.h"
21#include "disk-io.h"
22#include "print-tree.h"
23
24/*
25 * search forward for a root, starting with objectid 'search_start'
26 * if a root key is found, the objectid we find is filled into 'found_objectid'
27 * and 0 is returned. < 0 is returned on error, 1 if there is nothing
28 * left in the tree.
29 */
30int btrfs_search_root(struct btrfs_root *root, u64 search_start,
31 u64 *found_objectid)
32{
33 struct btrfs_path *path;
34 struct btrfs_key search_key;
35 int ret;
36
37 root = root->fs_info->tree_root;
38 search_key.objectid = search_start;
39 search_key.type = (u8)-1;
40 search_key.offset = (u64)-1;
41
42 path = btrfs_alloc_path();
43 BUG_ON(!path);
44again:
45 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
46 if (ret < 0)
47 goto out;
48 if (ret == 0) {
49 ret = 1;
50 goto out;
51 }
52 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
53 ret = btrfs_next_leaf(root, path);
54 if (ret)
55 goto out;
56 }
57 btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]);
58 if (search_key.type != BTRFS_ROOT_ITEM_KEY) {
59 search_key.offset++;
60 btrfs_release_path(root, path);
61 goto again;
62 }
63 ret = 0;
64 *found_objectid = search_key.objectid;
65
66out:
67 btrfs_free_path(path);
68 return ret;
69}
70
71/*
72 * lookup the root with the highest offset for a given objectid. The key we do
73 * find is copied into 'key'. If we find something return 0, otherwise 1, < 0
74 * on error.
75 */
76int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
77 struct btrfs_root_item *item, struct btrfs_key *key)
78{
79 struct btrfs_path *path;
80 struct btrfs_key search_key;
81 struct btrfs_key found_key;
82 struct extent_buffer *l;
83 int ret;
84 int slot;
85
86 search_key.objectid = objectid;
87 search_key.type = BTRFS_ROOT_ITEM_KEY;
88 search_key.offset = (u64)-1;
89
90 path = btrfs_alloc_path();
91 BUG_ON(!path);
92 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
93 if (ret < 0)
94 goto out;
95
96 BUG_ON(ret == 0);
97 l = path->nodes[0];
98 BUG_ON(path->slots[0] == 0);
99 slot = path->slots[0] - 1;
100 btrfs_item_key_to_cpu(l, &found_key, slot);
101 if (found_key.objectid != objectid) {
102 ret = 1;
103 goto out;
104 }
105 read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
106 sizeof(*item));
107 memcpy(key, &found_key, sizeof(found_key));
108 ret = 0;
109out:
110 btrfs_free_path(path);
111 return ret;
112}
113
114/*
115 * copy the data in 'item' into the btree
116 */
117int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
118 *root, struct btrfs_key *key, struct btrfs_root_item
119 *item)
120{
121 struct btrfs_path *path;
122 struct extent_buffer *l;
123 int ret;
124 int slot;
125 unsigned long ptr;
126
127 path = btrfs_alloc_path();
128 BUG_ON(!path);
129 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
130 if (ret < 0)
131 goto out;
132
133 if (ret != 0) {
134 btrfs_print_leaf(root, path->nodes[0]);
135 printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
136 (unsigned long long)key->objectid, key->type,
137 (unsigned long long)key->offset);
138 BUG_ON(1);
139 }
140
141 l = path->nodes[0];
142 slot = path->slots[0];
143 ptr = btrfs_item_ptr_offset(l, slot);
144 write_extent_buffer(l, item, ptr, sizeof(*item));
145 btrfs_mark_buffer_dirty(path->nodes[0]);
146out:
147 btrfs_release_path(root, path);
148 btrfs_free_path(path);
149 return ret;
150}
151
152int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
153 *root, struct btrfs_key *key, struct btrfs_root_item
154 *item)
155{
156 int ret;
157 ret = btrfs_insert_item(trans, root, key, item, sizeof(*item));
158 return ret;
159}
160
161/*
162 * at mount time we want to find all the old transaction snapshots that were in
163 * the process of being deleted if we crashed. This is any root item with an
164 * offset lower than the latest root. They need to be queued for deletion to
165 * finish what was happening when we crashed.
166 */
167int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
168 struct btrfs_root *latest)
169{
170 struct btrfs_root *dead_root;
171 struct btrfs_item *item;
172 struct btrfs_root_item *ri;
173 struct btrfs_key key;
174 struct btrfs_key found_key;
175 struct btrfs_path *path;
176 int ret;
177 u32 nritems;
178 struct extent_buffer *leaf;
179 int slot;
180
181 key.objectid = objectid;
182 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
183 key.offset = 0;
184 path = btrfs_alloc_path();
185 if (!path)
186 return -ENOMEM;
187
188again:
189 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
190 if (ret < 0)
191 goto err;
192 while (1) {
193 leaf = path->nodes[0];
194 nritems = btrfs_header_nritems(leaf);
195 slot = path->slots[0];
196 if (slot >= nritems) {
197 ret = btrfs_next_leaf(root, path);
198 if (ret)
199 break;
200 leaf = path->nodes[0];
201 nritems = btrfs_header_nritems(leaf);
202 slot = path->slots[0];
203 }
204 item = btrfs_item_nr(leaf, slot);
205 btrfs_item_key_to_cpu(leaf, &key, slot);
206 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
207 goto next;
208
209 if (key.objectid < objectid)
210 goto next;
211
212 if (key.objectid > objectid)
213 break;
214
215 ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item);
216 if (btrfs_disk_root_refs(leaf, ri) != 0)
217 goto next;
218
219 memcpy(&found_key, &key, sizeof(key));
220 key.offset++;
221 btrfs_release_path(root, path);
222 dead_root =
223 btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
224 &found_key);
225 if (IS_ERR(dead_root)) {
226 ret = PTR_ERR(dead_root);
227 goto err;
228 }
229
230 if (objectid == BTRFS_TREE_RELOC_OBJECTID)
231 ret = btrfs_add_dead_reloc_root(dead_root);
232 else
233 ret = btrfs_add_dead_root(dead_root, latest);
234 if (ret)
235 goto err;
236 goto again;
237next:
238 slot++;
239 path->slots[0]++;
240 }
241 ret = 0;
242err:
243 btrfs_free_path(path);
244 return ret;
245}
246
247/* drop the root item for 'key' from 'root' */
248int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
249 struct btrfs_key *key)
250{
251 struct btrfs_path *path;
252 int ret;
253 u32 refs;
254 struct btrfs_root_item *ri;
255 struct extent_buffer *leaf;
256
257 path = btrfs_alloc_path();
258 BUG_ON(!path);
259 ret = btrfs_search_slot(trans, root, key, path, -1, 1);
260 if (ret < 0)
261 goto out;
262
263 BUG_ON(ret != 0);
264 leaf = path->nodes[0];
265 ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
266
267 refs = btrfs_disk_root_refs(leaf, ri);
268 BUG_ON(refs != 0);
269 ret = btrfs_del_item(trans, root, path);
270out:
271 btrfs_release_path(root, path);
272 btrfs_free_path(path);
273 return ret;
274}
275
276#if 0 /* this will get used when snapshot deletion is implemented */
277int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
278 struct btrfs_root *tree_root,
279 u64 root_id, u8 type, u64 ref_id)
280{
281 struct btrfs_key key;
282 int ret;
283 struct btrfs_path *path;
284
285 path = btrfs_alloc_path();
286
287 key.objectid = root_id;
288 key.type = type;
289 key.offset = ref_id;
290
291 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
292 BUG_ON(ret);
293
294 ret = btrfs_del_item(trans, tree_root, path);
295 BUG_ON(ret);
296
297 btrfs_free_path(path);
298 return ret;
299}
300#endif
301
302int btrfs_find_root_ref(struct btrfs_root *tree_root,
303 struct btrfs_path *path,
304 u64 root_id, u64 ref_id)
305{
306 struct btrfs_key key;
307 int ret;
308
309 key.objectid = root_id;
310 key.type = BTRFS_ROOT_REF_KEY;
311 key.offset = ref_id;
312
313 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
314 return ret;
315}
316
317
318/*
319 * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
320 * or BTRFS_ROOT_BACKREF_KEY.
321 *
322 * The dirid, sequence, name and name_len refer to the directory entry
323 * that is referencing the root.
324 *
325 * For a forward ref, the root_id is the id of the tree referencing
326 * the root and ref_id is the id of the subvol or snapshot.
327 *
328 * For a back ref the root_id is the id of the subvol or snapshot and
329 * ref_id is the id of the tree referencing it.
330 */
331int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
332 struct btrfs_root *tree_root,
333 u64 root_id, u8 type, u64 ref_id,
334 u64 dirid, u64 sequence,
335 const char *name, int name_len)
336{
337 struct btrfs_key key;
338 int ret;
339 struct btrfs_path *path;
340 struct btrfs_root_ref *ref;
341 struct extent_buffer *leaf;
342 unsigned long ptr;
343
344
345 path = btrfs_alloc_path();
346
347 key.objectid = root_id;
348 key.type = type;
349 key.offset = ref_id;
350
351 ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
352 sizeof(*ref) + name_len);
353 BUG_ON(ret);
354
355 leaf = path->nodes[0];
356 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
357 btrfs_set_root_ref_dirid(leaf, ref, dirid);
358 btrfs_set_root_ref_sequence(leaf, ref, sequence);
359 btrfs_set_root_ref_name_len(leaf, ref, name_len);
360 ptr = (unsigned long)(ref + 1);
361 write_extent_buffer(leaf, name, ptr, name_len);
362 btrfs_mark_buffer_dirty(leaf);
363
364 btrfs_free_path(path);
365 return ret;
366}
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
new file mode 100644
index 000000000000..c0f7ecaf1e79
--- /dev/null
+++ b/fs/btrfs/struct-funcs.c
@@ -0,0 +1,139 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/highmem.h>
20
21/* this is some deeply nasty code. ctree.h has a different
22 * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
23 *
24 * The end result is that anyone who #includes ctree.h gets a
25 * declaration for the btrfs_set_foo functions and btrfs_foo functions
26 *
27 * This file declares the macros and then #includes ctree.h, which results
28 * in cpp creating the function here based on the template below.
29 *
30 * These setget functions do all the extent_buffer related mapping
31 * required to efficiently read and write specific fields in the extent
32 * buffers. Every pointer to metadata items in btrfs is really just
33 * an unsigned long offset into the extent buffer which has been
34 * cast to a specific type. This gives us all the gcc type checking.
35 *
36 * The extent buffer api is used to do all the kmapping and page
37 * spanning work required to get extent buffers in highmem and have
38 * a metadata blocksize different from the page size.
39 *
40 * The macro starts with a simple function prototype declaration so that
41 * sparse won't complain about it being static.
42 */
43
44#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
45u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
46void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \
47u##bits btrfs_##name(struct extent_buffer *eb, \
48 type *s) \
49{ \
50 unsigned long part_offset = (unsigned long)s; \
51 unsigned long offset = part_offset + offsetof(type, member); \
52 type *p; \
53 /* ugly, but we want the fast path here */ \
54 if (eb->map_token && offset >= eb->map_start && \
55 offset + sizeof(((type *)0)->member) <= eb->map_start + \
56 eb->map_len) { \
57 p = (type *)(eb->kaddr + part_offset - eb->map_start); \
58 return le##bits##_to_cpu(p->member); \
59 } \
60 { \
61 int err; \
62 char *map_token; \
63 char *kaddr; \
64 int unmap_on_exit = (eb->map_token == NULL); \
65 unsigned long map_start; \
66 unsigned long map_len; \
67 u##bits res; \
68 err = map_extent_buffer(eb, offset, \
69 sizeof(((type *)0)->member), \
70 &map_token, &kaddr, \
71 &map_start, &map_len, KM_USER1); \
72 if (err) { \
73 __le##bits leres; \
74 read_eb_member(eb, s, type, member, &leres); \
75 return le##bits##_to_cpu(leres); \
76 } \
77 p = (type *)(kaddr + part_offset - map_start); \
78 res = le##bits##_to_cpu(p->member); \
79 if (unmap_on_exit) \
80 unmap_extent_buffer(eb, map_token, KM_USER1); \
81 return res; \
82 } \
83} \
84void btrfs_set_##name(struct extent_buffer *eb, \
85 type *s, u##bits val) \
86{ \
87 unsigned long part_offset = (unsigned long)s; \
88 unsigned long offset = part_offset + offsetof(type, member); \
89 type *p; \
90 /* ugly, but we want the fast path here */ \
91 if (eb->map_token && offset >= eb->map_start && \
92 offset + sizeof(((type *)0)->member) <= eb->map_start + \
93 eb->map_len) { \
94 p = (type *)(eb->kaddr + part_offset - eb->map_start); \
95 p->member = cpu_to_le##bits(val); \
96 return; \
97 } \
98 { \
99 int err; \
100 char *map_token; \
101 char *kaddr; \
102 int unmap_on_exit = (eb->map_token == NULL); \
103 unsigned long map_start; \
104 unsigned long map_len; \
105 err = map_extent_buffer(eb, offset, \
106 sizeof(((type *)0)->member), \
107 &map_token, &kaddr, \
108 &map_start, &map_len, KM_USER1); \
109 if (err) { \
110 __le##bits val2; \
111 val2 = cpu_to_le##bits(val); \
112 write_eb_member(eb, s, type, member, &val2); \
113 return; \
114 } \
115 p = (type *)(kaddr + part_offset - map_start); \
116 p->member = cpu_to_le##bits(val); \
117 if (unmap_on_exit) \
118 unmap_extent_buffer(eb, map_token, KM_USER1); \
119 } \
120}
121
122#include "ctree.h"
123
124void btrfs_node_key(struct extent_buffer *eb,
125 struct btrfs_disk_key *disk_key, int nr)
126{
127 unsigned long ptr = btrfs_node_key_ptr_offset(nr);
128 if (eb->map_token && ptr >= eb->map_start &&
129 ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
130 memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
131 sizeof(*disk_key));
132 return;
133 } else if (eb->map_token) {
134 unmap_extent_buffer(eb, eb->map_token, KM_USER1);
135 eb->map_token = NULL;
136 }
137 read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
138 struct btrfs_key_ptr, key, disk_key);
139}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
new file mode 100644
index 000000000000..b4c101d9322c
--- /dev/null
+++ b/fs/btrfs/super.c
@@ -0,0 +1,720 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/blkdev.h>
20#include <linux/module.h>
21#include <linux/buffer_head.h>
22#include <linux/fs.h>
23#include <linux/pagemap.h>
24#include <linux/highmem.h>
25#include <linux/time.h>
26#include <linux/init.h>
27#include <linux/string.h>
28#include <linux/smp_lock.h>
29#include <linux/backing-dev.h>
30#include <linux/mount.h>
31#include <linux/mpage.h>
32#include <linux/swap.h>
33#include <linux/writeback.h>
34#include <linux/statfs.h>
35#include <linux/compat.h>
36#include <linux/parser.h>
37#include <linux/ctype.h>
38#include <linux/namei.h>
39#include <linux/miscdevice.h>
40#include <linux/version.h>
41#include "compat.h"
42#include "ctree.h"
43#include "disk-io.h"
44#include "transaction.h"
45#include "btrfs_inode.h"
46#include "ioctl.h"
47#include "print-tree.h"
48#include "xattr.h"
49#include "volumes.h"
50#include "version.h"
51#include "export.h"
52#include "compression.h"
53
54#define BTRFS_SUPER_MAGIC 0x9123683E
55
56static struct super_operations btrfs_super_ops;
57
58static void btrfs_put_super(struct super_block *sb)
59{
60 struct btrfs_root *root = btrfs_sb(sb);
61 int ret;
62
63 ret = close_ctree(root);
64 sb->s_fs_info = NULL;
65}
66
67enum {
68 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
69 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
70 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err,
71};
72
73static match_table_t tokens = {
74 {Opt_degraded, "degraded"},
75 {Opt_subvol, "subvol=%s"},
76 {Opt_device, "device=%s"},
77 {Opt_nodatasum, "nodatasum"},
78 {Opt_nodatacow, "nodatacow"},
79 {Opt_nobarrier, "nobarrier"},
80 {Opt_max_extent, "max_extent=%s"},
81 {Opt_max_inline, "max_inline=%s"},
82 {Opt_alloc_start, "alloc_start=%s"},
83 {Opt_thread_pool, "thread_pool=%d"},
84 {Opt_compress, "compress"},
85 {Opt_ssd, "ssd"},
86 {Opt_noacl, "noacl"},
87 {Opt_err, NULL},
88};
89
90u64 btrfs_parse_size(char *str)
91{
92 u64 res;
93 int mult = 1;
94 char *end;
95 char last;
96
97 res = simple_strtoul(str, &end, 10);
98
99 last = end[0];
100 if (isalpha(last)) {
101 last = tolower(last);
102 switch (last) {
103 case 'g':
104 mult *= 1024;
105 case 'm':
106 mult *= 1024;
107 case 'k':
108 mult *= 1024;
109 }
110 res = res * mult;
111 }
112 return res;
113}
114
115/*
116 * Regular mount options parser. Everything that is needed only when
117 * reading in a new superblock is parsed here.
118 */
119int btrfs_parse_options(struct btrfs_root *root, char *options)
120{
121 struct btrfs_fs_info *info = root->fs_info;
122 substring_t args[MAX_OPT_ARGS];
123 char *p, *num;
124 int intarg;
125
126 if (!options)
127 return 0;
128
129 /*
130 * strsep changes the string, duplicate it because parse_options
131 * gets called twice
132 */
133 options = kstrdup(options, GFP_NOFS);
134 if (!options)
135 return -ENOMEM;
136
137
138 while ((p = strsep(&options, ",")) != NULL) {
139 int token;
140 if (!*p)
141 continue;
142
143 token = match_token(p, tokens, args);
144 switch (token) {
145 case Opt_degraded:
146 printk(KERN_INFO "btrfs: allowing degraded mounts\n");
147 btrfs_set_opt(info->mount_opt, DEGRADED);
148 break;
149 case Opt_subvol:
150 case Opt_device:
151 /*
152 * These are parsed by btrfs_parse_early_options
153 * and can be happily ignored here.
154 */
155 break;
156 case Opt_nodatasum:
157 printk(KERN_INFO "btrfs: setting nodatacsum\n");
158 btrfs_set_opt(info->mount_opt, NODATASUM);
159 break;
160 case Opt_nodatacow:
161 printk(KERN_INFO "btrfs: setting nodatacow\n");
162 btrfs_set_opt(info->mount_opt, NODATACOW);
163 btrfs_set_opt(info->mount_opt, NODATASUM);
164 break;
165 case Opt_compress:
166 printk(KERN_INFO "btrfs: use compression\n");
167 btrfs_set_opt(info->mount_opt, COMPRESS);
168 break;
169 case Opt_ssd:
170 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
171 btrfs_set_opt(info->mount_opt, SSD);
172 break;
173 case Opt_nobarrier:
174 printk(KERN_INFO "btrfs: turning off barriers\n");
175 btrfs_set_opt(info->mount_opt, NOBARRIER);
176 break;
177 case Opt_thread_pool:
178 intarg = 0;
179 match_int(&args[0], &intarg);
180 if (intarg) {
181 info->thread_pool_size = intarg;
182 printk(KERN_INFO "btrfs: thread pool %d\n",
183 info->thread_pool_size);
184 }
185 break;
186 case Opt_max_extent:
187 num = match_strdup(&args[0]);
188 if (num) {
189 info->max_extent = btrfs_parse_size(num);
190 kfree(num);
191
192 info->max_extent = max_t(u64,
193 info->max_extent, root->sectorsize);
194 printk(KERN_INFO "btrfs: max_extent at %llu\n",
195 info->max_extent);
196 }
197 break;
198 case Opt_max_inline:
199 num = match_strdup(&args[0]);
200 if (num) {
201 info->max_inline = btrfs_parse_size(num);
202 kfree(num);
203
204 if (info->max_inline) {
205 info->max_inline = max_t(u64,
206 info->max_inline,
207 root->sectorsize);
208 }
209 printk(KERN_INFO "btrfs: max_inline at %llu\n",
210 info->max_inline);
211 }
212 break;
213 case Opt_alloc_start:
214 num = match_strdup(&args[0]);
215 if (num) {
216 info->alloc_start = btrfs_parse_size(num);
217 kfree(num);
218 printk(KERN_INFO
219 "btrfs: allocations start at %llu\n",
220 info->alloc_start);
221 }
222 break;
223 case Opt_noacl:
224 root->fs_info->sb->s_flags &= ~MS_POSIXACL;
225 break;
226 default:
227 break;
228 }
229 }
230 kfree(options);
231 return 0;
232}
233
234/*
235 * Parse mount options that are required early in the mount process.
236 *
237 * All other options will be parsed on much later in the mount process and
238 * only when we need to allocate a new super block.
239 */
240static int btrfs_parse_early_options(const char *options, fmode_t flags,
241 void *holder, char **subvol_name,
242 struct btrfs_fs_devices **fs_devices)
243{
244 substring_t args[MAX_OPT_ARGS];
245 char *opts, *p;
246 int error = 0;
247
248 if (!options)
249 goto out;
250
251 /*
252 * strsep changes the string, duplicate it because parse_options
253 * gets called twice
254 */
255 opts = kstrdup(options, GFP_KERNEL);
256 if (!opts)
257 return -ENOMEM;
258
259 while ((p = strsep(&opts, ",")) != NULL) {
260 int token;
261 if (!*p)
262 continue;
263
264 token = match_token(p, tokens, args);
265 switch (token) {
266 case Opt_subvol:
267 *subvol_name = match_strdup(&args[0]);
268 break;
269 case Opt_device:
270 error = btrfs_scan_one_device(match_strdup(&args[0]),
271 flags, holder, fs_devices);
272 if (error)
273 goto out_free_opts;
274 break;
275 default:
276 break;
277 }
278 }
279
280 out_free_opts:
281 kfree(opts);
282 out:
283 /*
284 * If no subvolume name is specified we use the default one. Allocate
285 * a copy of the string "." here so that code later in the
286 * mount path doesn't care if it's the default volume or another one.
287 */
288 if (!*subvol_name) {
289 *subvol_name = kstrdup(".", GFP_KERNEL);
290 if (!*subvol_name)
291 return -ENOMEM;
292 }
293 return error;
294}
295
296static int btrfs_fill_super(struct super_block *sb,
297 struct btrfs_fs_devices *fs_devices,
298 void *data, int silent)
299{
300 struct inode *inode;
301 struct dentry *root_dentry;
302 struct btrfs_super_block *disk_super;
303 struct btrfs_root *tree_root;
304 struct btrfs_inode *bi;
305 int err;
306
307 sb->s_maxbytes = MAX_LFS_FILESIZE;
308 sb->s_magic = BTRFS_SUPER_MAGIC;
309 sb->s_op = &btrfs_super_ops;
310 sb->s_export_op = &btrfs_export_ops;
311 sb->s_xattr = btrfs_xattr_handlers;
312 sb->s_time_gran = 1;
313 sb->s_flags |= MS_POSIXACL;
314
315 tree_root = open_ctree(sb, fs_devices, (char *)data);
316
317 if (IS_ERR(tree_root)) {
318 printk("btrfs: open_ctree failed\n");
319 return PTR_ERR(tree_root);
320 }
321 sb->s_fs_info = tree_root;
322 disk_super = &tree_root->fs_info->super_copy;
323 inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID,
324 tree_root->fs_info->fs_root);
325 bi = BTRFS_I(inode);
326 bi->location.objectid = inode->i_ino;
327 bi->location.offset = 0;
328 bi->root = tree_root->fs_info->fs_root;
329
330 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
331
332 if (!inode) {
333 err = -ENOMEM;
334 goto fail_close;
335 }
336 if (inode->i_state & I_NEW) {
337 btrfs_read_locked_inode(inode);
338 unlock_new_inode(inode);
339 }
340
341 root_dentry = d_alloc_root(inode);
342 if (!root_dentry) {
343 iput(inode);
344 err = -ENOMEM;
345 goto fail_close;
346 }
347#if 0
348 /* this does the super kobj at the same time */
349 err = btrfs_sysfs_add_super(tree_root->fs_info);
350 if (err)
351 goto fail_close;
352#endif
353
354 sb->s_root = root_dentry;
355
356 save_mount_options(sb, data);
357 return 0;
358
359fail_close:
360 close_ctree(tree_root);
361 return err;
362}
363
364int btrfs_sync_fs(struct super_block *sb, int wait)
365{
366 struct btrfs_trans_handle *trans;
367 struct btrfs_root *root;
368 int ret;
369 root = btrfs_sb(sb);
370
371 if (sb->s_flags & MS_RDONLY)
372 return 0;
373
374 sb->s_dirt = 0;
375 if (!wait) {
376 filemap_flush(root->fs_info->btree_inode->i_mapping);
377 return 0;
378 }
379
380 btrfs_start_delalloc_inodes(root);
381 btrfs_wait_ordered_extents(root, 0);
382
383 btrfs_clean_old_snapshots(root);
384 trans = btrfs_start_transaction(root, 1);
385 ret = btrfs_commit_transaction(trans, root);
386 sb->s_dirt = 0;
387 return ret;
388}
389
390static void btrfs_write_super(struct super_block *sb)
391{
392 sb->s_dirt = 0;
393}
394
395static int btrfs_test_super(struct super_block *s, void *data)
396{
397 struct btrfs_fs_devices *test_fs_devices = data;
398 struct btrfs_root *root = btrfs_sb(s);
399
400 return root->fs_info->fs_devices == test_fs_devices;
401}
402
403/*
404 * Find a superblock for the given device / mount point.
405 *
406 * Note: This is based on get_sb_bdev from fs/super.c with a few additions
407 * for multiple device setup. Make sure to keep it in sync.
408 */
409static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
410 const char *dev_name, void *data, struct vfsmount *mnt)
411{
412 char *subvol_name = NULL;
413 struct block_device *bdev = NULL;
414 struct super_block *s;
415 struct dentry *root;
416 struct btrfs_fs_devices *fs_devices = NULL;
417 fmode_t mode = FMODE_READ;
418 int error = 0;
419
420 if (!(flags & MS_RDONLY))
421 mode |= FMODE_WRITE;
422
423 error = btrfs_parse_early_options(data, mode, fs_type,
424 &subvol_name, &fs_devices);
425 if (error)
426 return error;
427
428 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
429 if (error)
430 goto error_free_subvol_name;
431
432 error = btrfs_open_devices(fs_devices, mode, fs_type);
433 if (error)
434 goto error_free_subvol_name;
435
436 if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
437 error = -EACCES;
438 goto error_close_devices;
439 }
440
441 bdev = fs_devices->latest_bdev;
442 s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
443 if (IS_ERR(s))
444 goto error_s;
445
446 if (s->s_root) {
447 if ((flags ^ s->s_flags) & MS_RDONLY) {
448 up_write(&s->s_umount);
449 deactivate_super(s);
450 error = -EBUSY;
451 goto error_close_devices;
452 }
453
454 btrfs_close_devices(fs_devices);
455 } else {
456 char b[BDEVNAME_SIZE];
457
458 s->s_flags = flags;
459 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
460 error = btrfs_fill_super(s, fs_devices, data,
461 flags & MS_SILENT ? 1 : 0);
462 if (error) {
463 up_write(&s->s_umount);
464 deactivate_super(s);
465 goto error_free_subvol_name;
466 }
467
468 btrfs_sb(s)->fs_info->bdev_holder = fs_type;
469 s->s_flags |= MS_ACTIVE;
470 }
471
472 if (!strcmp(subvol_name, "."))
473 root = dget(s->s_root);
474 else {
475 mutex_lock(&s->s_root->d_inode->i_mutex);
476 root = lookup_one_len(subvol_name, s->s_root,
477 strlen(subvol_name));
478 mutex_unlock(&s->s_root->d_inode->i_mutex);
479
480 if (IS_ERR(root)) {
481 up_write(&s->s_umount);
482 deactivate_super(s);
483 error = PTR_ERR(root);
484 goto error_free_subvol_name;
485 }
486 if (!root->d_inode) {
487 dput(root);
488 up_write(&s->s_umount);
489 deactivate_super(s);
490 error = -ENXIO;
491 goto error_free_subvol_name;
492 }
493 }
494
495 mnt->mnt_sb = s;
496 mnt->mnt_root = root;
497
498 kfree(subvol_name);
499 return 0;
500
501error_s:
502 error = PTR_ERR(s);
503error_close_devices:
504 btrfs_close_devices(fs_devices);
505error_free_subvol_name:
506 kfree(subvol_name);
507 return error;
508}
509
510static int btrfs_remount(struct super_block *sb, int *flags, char *data)
511{
512 struct btrfs_root *root = btrfs_sb(sb);
513 int ret;
514
515 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
516 return 0;
517
518 if (*flags & MS_RDONLY) {
519 sb->s_flags |= MS_RDONLY;
520
521 ret = btrfs_commit_super(root);
522 WARN_ON(ret);
523 } else {
524 if (root->fs_info->fs_devices->rw_devices == 0)
525 return -EACCES;
526
527 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
528 return -EINVAL;
529
530 ret = btrfs_cleanup_reloc_trees(root);
531 WARN_ON(ret);
532
533 ret = btrfs_cleanup_fs_roots(root->fs_info);
534 WARN_ON(ret);
535
536 sb->s_flags &= ~MS_RDONLY;
537 }
538
539 return 0;
540}
541
542static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
543{
544 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
545 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
546 int bits = dentry->d_sb->s_blocksize_bits;
547 __be32 *fsid = (__be32 *)root->fs_info->fsid;
548
549 buf->f_namelen = BTRFS_NAME_LEN;
550 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
551 buf->f_bfree = buf->f_blocks -
552 (btrfs_super_bytes_used(disk_super) >> bits);
553 buf->f_bavail = buf->f_bfree;
554 buf->f_bsize = dentry->d_sb->s_blocksize;
555 buf->f_type = BTRFS_SUPER_MAGIC;
556
557 /* We treat it as constant endianness (it doesn't matter _which_)
558 because we want the fsid to come out the same whether mounted
559 on a big-endian or little-endian host */
560 buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
561 buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
562 /* Mask in the root object ID too, to disambiguate subvols */
563 buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
564 buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
565
566 return 0;
567}
568
569static struct file_system_type btrfs_fs_type = {
570 .owner = THIS_MODULE,
571 .name = "btrfs",
572 .get_sb = btrfs_get_sb,
573 .kill_sb = kill_anon_super,
574 .fs_flags = FS_REQUIRES_DEV,
575};
576
577/*
578 * used by btrfsctl to scan devices when no FS is mounted
579 */
580static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
581 unsigned long arg)
582{
583 struct btrfs_ioctl_vol_args *vol;
584 struct btrfs_fs_devices *fs_devices;
585 int ret = 0;
586 int len;
587
588 if (!capable(CAP_SYS_ADMIN))
589 return -EPERM;
590
591 vol = kmalloc(sizeof(*vol), GFP_KERNEL);
592 if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
593 ret = -EFAULT;
594 goto out;
595 }
596 len = strnlen(vol->name, BTRFS_PATH_NAME_MAX);
597 switch (cmd) {
598 case BTRFS_IOC_SCAN_DEV:
599 ret = btrfs_scan_one_device(vol->name, FMODE_READ,
600 &btrfs_fs_type, &fs_devices);
601 break;
602 }
603out:
604 kfree(vol);
605 return ret;
606}
607
608static void btrfs_write_super_lockfs(struct super_block *sb)
609{
610 struct btrfs_root *root = btrfs_sb(sb);
611 mutex_lock(&root->fs_info->transaction_kthread_mutex);
612 mutex_lock(&root->fs_info->cleaner_mutex);
613}
614
615static void btrfs_unlockfs(struct super_block *sb)
616{
617 struct btrfs_root *root = btrfs_sb(sb);
618 mutex_unlock(&root->fs_info->cleaner_mutex);
619 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
620}
621
622static struct super_operations btrfs_super_ops = {
623 .delete_inode = btrfs_delete_inode,
624 .put_super = btrfs_put_super,
625 .write_super = btrfs_write_super,
626 .sync_fs = btrfs_sync_fs,
627 .show_options = generic_show_options,
628 .write_inode = btrfs_write_inode,
629 .dirty_inode = btrfs_dirty_inode,
630 .alloc_inode = btrfs_alloc_inode,
631 .destroy_inode = btrfs_destroy_inode,
632 .statfs = btrfs_statfs,
633 .remount_fs = btrfs_remount,
634 .write_super_lockfs = btrfs_write_super_lockfs,
635 .unlockfs = btrfs_unlockfs,
636};
637
638static const struct file_operations btrfs_ctl_fops = {
639 .unlocked_ioctl = btrfs_control_ioctl,
640 .compat_ioctl = btrfs_control_ioctl,
641 .owner = THIS_MODULE,
642};
643
644static struct miscdevice btrfs_misc = {
645 .minor = MISC_DYNAMIC_MINOR,
646 .name = "btrfs-control",
647 .fops = &btrfs_ctl_fops
648};
649
650static int btrfs_interface_init(void)
651{
652 return misc_register(&btrfs_misc);
653}
654
655static void btrfs_interface_exit(void)
656{
657 if (misc_deregister(&btrfs_misc) < 0)
658 printk(KERN_INFO "misc_deregister failed for control device");
659}
660
661static int __init init_btrfs_fs(void)
662{
663 int err;
664
665 err = btrfs_init_sysfs();
666 if (err)
667 return err;
668
669 err = btrfs_init_cachep();
670 if (err)
671 goto free_sysfs;
672
673 err = extent_io_init();
674 if (err)
675 goto free_cachep;
676
677 err = extent_map_init();
678 if (err)
679 goto free_extent_io;
680
681 err = btrfs_interface_init();
682 if (err)
683 goto free_extent_map;
684
685 err = register_filesystem(&btrfs_fs_type);
686 if (err)
687 goto unregister_ioctl;
688
689 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
690 return 0;
691
692unregister_ioctl:
693 btrfs_interface_exit();
694free_extent_map:
695 extent_map_exit();
696free_extent_io:
697 extent_io_exit();
698free_cachep:
699 btrfs_destroy_cachep();
700free_sysfs:
701 btrfs_exit_sysfs();
702 return err;
703}
704
705static void __exit exit_btrfs_fs(void)
706{
707 btrfs_destroy_cachep();
708 extent_map_exit();
709 extent_io_exit();
710 btrfs_interface_exit();
711 unregister_filesystem(&btrfs_fs_type);
712 btrfs_exit_sysfs();
713 btrfs_cleanup_fs_uuids();
714 btrfs_zlib_exit();
715}
716
717module_init(init_btrfs_fs)
718module_exit(exit_btrfs_fs)
719
720MODULE_LICENSE("GPL");
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
new file mode 100644
index 000000000000..a240b6fa81df
--- /dev/null
+++ b/fs/btrfs/sysfs.c
@@ -0,0 +1,269 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/spinlock.h>
22#include <linux/completion.h>
23#include <linux/buffer_head.h>
24#include <linux/module.h>
25#include <linux/kobject.h>
26
27#include "ctree.h"
28#include "disk-io.h"
29#include "transaction.h"
30
31static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
32{
33 return snprintf(buf, PAGE_SIZE, "%llu\n",
34 (unsigned long long)btrfs_root_used(&root->root_item));
35}
36
37static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
38{
39 return snprintf(buf, PAGE_SIZE, "%llu\n",
40 (unsigned long long)btrfs_root_limit(&root->root_item));
41}
42
43static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
44{
45
46 return snprintf(buf, PAGE_SIZE, "%llu\n",
47 (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
48}
49
50static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
51{
52 return snprintf(buf, PAGE_SIZE, "%llu\n",
53 (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
54}
55
56static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
57{
58 return snprintf(buf, PAGE_SIZE, "%llu\n",
59 (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
60}
61
62/* this is for root attrs (subvols/snapshots) */
63struct btrfs_root_attr {
64 struct attribute attr;
65 ssize_t (*show)(struct btrfs_root *, char *);
66 ssize_t (*store)(struct btrfs_root *, const char *, size_t);
67};
68
69#define ROOT_ATTR(name, mode, show, store) \
70static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
71 show, store)
72
73ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
74ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
75
76static struct attribute *btrfs_root_attrs[] = {
77 &btrfs_root_attr_blocks_used.attr,
78 &btrfs_root_attr_block_limit.attr,
79 NULL,
80};
81
82/* this is for super attrs (actual full fs) */
83struct btrfs_super_attr {
84 struct attribute attr;
85 ssize_t (*show)(struct btrfs_fs_info *, char *);
86 ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
87};
88
89#define SUPER_ATTR(name, mode, show, store) \
90static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
91 show, store)
92
93SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
94SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
95SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
96
97static struct attribute *btrfs_super_attrs[] = {
98 &btrfs_super_attr_blocks_used.attr,
99 &btrfs_super_attr_total_blocks.attr,
100 &btrfs_super_attr_blocksize.attr,
101 NULL,
102};
103
104static ssize_t btrfs_super_attr_show(struct kobject *kobj,
105 struct attribute *attr, char *buf)
106{
107 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
108 super_kobj);
109 struct btrfs_super_attr *a = container_of(attr,
110 struct btrfs_super_attr,
111 attr);
112
113 return a->show ? a->show(fs, buf) : 0;
114}
115
116static ssize_t btrfs_super_attr_store(struct kobject *kobj,
117 struct attribute *attr,
118 const char *buf, size_t len)
119{
120 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
121 super_kobj);
122 struct btrfs_super_attr *a = container_of(attr,
123 struct btrfs_super_attr,
124 attr);
125
126 return a->store ? a->store(fs, buf, len) : 0;
127}
128
129static ssize_t btrfs_root_attr_show(struct kobject *kobj,
130 struct attribute *attr, char *buf)
131{
132 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
133 root_kobj);
134 struct btrfs_root_attr *a = container_of(attr,
135 struct btrfs_root_attr,
136 attr);
137
138 return a->show ? a->show(root, buf) : 0;
139}
140
141static ssize_t btrfs_root_attr_store(struct kobject *kobj,
142 struct attribute *attr,
143 const char *buf, size_t len)
144{
145 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
146 root_kobj);
147 struct btrfs_root_attr *a = container_of(attr,
148 struct btrfs_root_attr,
149 attr);
150 return a->store ? a->store(root, buf, len) : 0;
151}
152
153static void btrfs_super_release(struct kobject *kobj)
154{
155 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
156 super_kobj);
157 complete(&fs->kobj_unregister);
158}
159
160static void btrfs_root_release(struct kobject *kobj)
161{
162 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
163 root_kobj);
164 complete(&root->kobj_unregister);
165}
166
167static struct sysfs_ops btrfs_super_attr_ops = {
168 .show = btrfs_super_attr_show,
169 .store = btrfs_super_attr_store,
170};
171
172static struct sysfs_ops btrfs_root_attr_ops = {
173 .show = btrfs_root_attr_show,
174 .store = btrfs_root_attr_store,
175};
176
177static struct kobj_type btrfs_root_ktype = {
178 .default_attrs = btrfs_root_attrs,
179 .sysfs_ops = &btrfs_root_attr_ops,
180 .release = btrfs_root_release,
181};
182
183static struct kobj_type btrfs_super_ktype = {
184 .default_attrs = btrfs_super_attrs,
185 .sysfs_ops = &btrfs_super_attr_ops,
186 .release = btrfs_super_release,
187};
188
189/* /sys/fs/btrfs/ entry */
190static struct kset *btrfs_kset;
191
192int btrfs_sysfs_add_super(struct btrfs_fs_info *fs)
193{
194 int error;
195 char *name;
196 char c;
197 int len = strlen(fs->sb->s_id) + 1;
198 int i;
199
200 name = kmalloc(len, GFP_NOFS);
201 if (!name) {
202 error = -ENOMEM;
203 goto fail;
204 }
205
206 for (i = 0; i < len; i++) {
207 c = fs->sb->s_id[i];
208 if (c == '/' || c == '\\')
209 c = '!';
210 name[i] = c;
211 }
212 name[len] = '\0';
213
214 fs->super_kobj.kset = btrfs_kset;
215 error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype,
216 NULL, "%s", name);
217 kfree(name);
218 if (error)
219 goto fail;
220
221 return 0;
222
223fail:
224 printk(KERN_ERR "btrfs: sysfs creation for super failed\n");
225 return error;
226}
227
228int btrfs_sysfs_add_root(struct btrfs_root *root)
229{
230 int error;
231
232 error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype,
233 &root->fs_info->super_kobj,
234 "%s", root->name);
235 if (error)
236 goto fail;
237
238 return 0;
239
240fail:
241 printk(KERN_ERR "btrfs: sysfs creation for root failed\n");
242 return error;
243}
244
245void btrfs_sysfs_del_root(struct btrfs_root *root)
246{
247 kobject_put(&root->root_kobj);
248 wait_for_completion(&root->kobj_unregister);
249}
250
251void btrfs_sysfs_del_super(struct btrfs_fs_info *fs)
252{
253 kobject_put(&fs->super_kobj);
254 wait_for_completion(&fs->kobj_unregister);
255}
256
257int btrfs_init_sysfs(void)
258{
259 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
260 if (!btrfs_kset)
261 return -ENOMEM;
262 return 0;
263}
264
265void btrfs_exit_sysfs(void)
266{
267 kset_unregister(btrfs_kset);
268}
269
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
new file mode 100644
index 000000000000..8a08f9443340
--- /dev/null
+++ b/fs/btrfs/transaction.c
@@ -0,0 +1,1097 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/fs.h>
20#include <linux/sched.h>
21#include <linux/writeback.h>
22#include <linux/pagemap.h>
23#include <linux/blkdev.h>
24#include "ctree.h"
25#include "disk-io.h"
26#include "transaction.h"
27#include "locking.h"
28#include "ref-cache.h"
29#include "tree-log.h"
30
31#define BTRFS_ROOT_TRANS_TAG 0
32
33static noinline void put_transaction(struct btrfs_transaction *transaction)
34{
35 WARN_ON(transaction->use_count == 0);
36 transaction->use_count--;
37 if (transaction->use_count == 0) {
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 }
42}
43
44/*
45 * either allocate a new transaction or hop into the existing one
46 */
47static noinline int join_transaction(struct btrfs_root *root)
48{
49 struct btrfs_transaction *cur_trans;
50 cur_trans = root->fs_info->running_transaction;
51 if (!cur_trans) {
52 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
53 GFP_NOFS);
54 BUG_ON(!cur_trans);
55 root->fs_info->generation++;
56 root->fs_info->last_alloc = 0;
57 root->fs_info->last_data_alloc = 0;
58 cur_trans->num_writers = 1;
59 cur_trans->num_joined = 0;
60 cur_trans->transid = root->fs_info->generation;
61 init_waitqueue_head(&cur_trans->writer_wait);
62 init_waitqueue_head(&cur_trans->commit_wait);
63 cur_trans->in_commit = 0;
64 cur_trans->blocked = 0;
65 cur_trans->use_count = 1;
66 cur_trans->commit_done = 0;
67 cur_trans->start_time = get_seconds();
68 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
70 extent_io_tree_init(&cur_trans->dirty_pages,
71 root->fs_info->btree_inode->i_mapping,
72 GFP_NOFS);
73 spin_lock(&root->fs_info->new_trans_lock);
74 root->fs_info->running_transaction = cur_trans;
75 spin_unlock(&root->fs_info->new_trans_lock);
76 } else {
77 cur_trans->num_writers++;
78 cur_trans->num_joined++;
79 }
80
81 return 0;
82}
83
84/*
85 * this does all the record keeping required to make sure that a reference
86 * counted root is properly recorded in a given transaction. This is required
87 * to make sure the old root from before we joined the transaction is deleted
88 * when the transaction commits
89 */
90noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
91{
92 struct btrfs_dirty_root *dirty;
93 u64 running_trans_id = root->fs_info->running_transaction->transid;
94 if (root->ref_cows && root->last_trans < running_trans_id) {
95 WARN_ON(root == root->fs_info->extent_root);
96 if (root->root_item.refs != 0) {
97 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
98 (unsigned long)root->root_key.objectid,
99 BTRFS_ROOT_TRANS_TAG);
100
101 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
102 BUG_ON(!dirty);
103 dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
104 BUG_ON(!dirty->root);
105 dirty->latest_root = root;
106 INIT_LIST_HEAD(&dirty->list);
107
108 root->commit_root = btrfs_root_node(root);
109
110 memcpy(dirty->root, root, sizeof(*root));
111 spin_lock_init(&dirty->root->node_lock);
112 spin_lock_init(&dirty->root->list_lock);
113 mutex_init(&dirty->root->objectid_mutex);
114 mutex_init(&dirty->root->log_mutex);
115 INIT_LIST_HEAD(&dirty->root->dead_list);
116 dirty->root->node = root->commit_root;
117 dirty->root->commit_root = NULL;
118
119 spin_lock(&root->list_lock);
120 list_add(&dirty->root->dead_list, &root->dead_list);
121 spin_unlock(&root->list_lock);
122
123 root->dirty_root = dirty;
124 } else {
125 WARN_ON(1);
126 }
127 root->last_trans = running_trans_id;
128 }
129 return 0;
130}
131
132/* wait for commit against the current transaction to become unblocked
133 * when this is done, it is safe to start a new transaction, but the current
134 * transaction might not be fully on disk.
135 */
136static void wait_current_trans(struct btrfs_root *root)
137{
138 struct btrfs_transaction *cur_trans;
139
140 cur_trans = root->fs_info->running_transaction;
141 if (cur_trans && cur_trans->blocked) {
142 DEFINE_WAIT(wait);
143 cur_trans->use_count++;
144 while (1) {
145 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
146 TASK_UNINTERRUPTIBLE);
147 if (cur_trans->blocked) {
148 mutex_unlock(&root->fs_info->trans_mutex);
149 schedule();
150 mutex_lock(&root->fs_info->trans_mutex);
151 finish_wait(&root->fs_info->transaction_wait,
152 &wait);
153 } else {
154 finish_wait(&root->fs_info->transaction_wait,
155 &wait);
156 break;
157 }
158 }
159 put_transaction(cur_trans);
160 }
161}
162
163static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
164 int num_blocks, int wait)
165{
166 struct btrfs_trans_handle *h =
167 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
168 int ret;
169
170 mutex_lock(&root->fs_info->trans_mutex);
171 if (!root->fs_info->log_root_recovering &&
172 ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2))
173 wait_current_trans(root);
174 ret = join_transaction(root);
175 BUG_ON(ret);
176
177 btrfs_record_root_in_trans(root);
178 h->transid = root->fs_info->running_transaction->transid;
179 h->transaction = root->fs_info->running_transaction;
180 h->blocks_reserved = num_blocks;
181 h->blocks_used = 0;
182 h->block_group = 0;
183 h->alloc_exclude_nr = 0;
184 h->alloc_exclude_start = 0;
185 root->fs_info->running_transaction->use_count++;
186 mutex_unlock(&root->fs_info->trans_mutex);
187 return h;
188}
189
190struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
191 int num_blocks)
192{
193 return start_transaction(root, num_blocks, 1);
194}
195struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
196 int num_blocks)
197{
198 return start_transaction(root, num_blocks, 0);
199}
200
201struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
202 int num_blocks)
203{
204 return start_transaction(r, num_blocks, 2);
205}
206
207/* wait for a transaction commit to be fully complete */
208static noinline int wait_for_commit(struct btrfs_root *root,
209 struct btrfs_transaction *commit)
210{
211 DEFINE_WAIT(wait);
212 mutex_lock(&root->fs_info->trans_mutex);
213 while (!commit->commit_done) {
214 prepare_to_wait(&commit->commit_wait, &wait,
215 TASK_UNINTERRUPTIBLE);
216 if (commit->commit_done)
217 break;
218 mutex_unlock(&root->fs_info->trans_mutex);
219 schedule();
220 mutex_lock(&root->fs_info->trans_mutex);
221 }
222 mutex_unlock(&root->fs_info->trans_mutex);
223 finish_wait(&commit->commit_wait, &wait);
224 return 0;
225}
226
227/*
228 * rate limit against the drop_snapshot code. This helps to slow down new
229 * operations if the drop_snapshot code isn't able to keep up.
230 */
231static void throttle_on_drops(struct btrfs_root *root)
232{
233 struct btrfs_fs_info *info = root->fs_info;
234 int harder_count = 0;
235
236harder:
237 if (atomic_read(&info->throttles)) {
238 DEFINE_WAIT(wait);
239 int thr;
240 thr = atomic_read(&info->throttle_gen);
241
242 do {
243 prepare_to_wait(&info->transaction_throttle,
244 &wait, TASK_UNINTERRUPTIBLE);
245 if (!atomic_read(&info->throttles)) {
246 finish_wait(&info->transaction_throttle, &wait);
247 break;
248 }
249 schedule();
250 finish_wait(&info->transaction_throttle, &wait);
251 } while (thr == atomic_read(&info->throttle_gen));
252 harder_count++;
253
254 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
255 harder_count < 2)
256 goto harder;
257
258 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
259 harder_count < 10)
260 goto harder;
261
262 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
263 harder_count < 20)
264 goto harder;
265 }
266}
267
268void btrfs_throttle(struct btrfs_root *root)
269{
270 mutex_lock(&root->fs_info->trans_mutex);
271 if (!root->fs_info->open_ioctl_trans)
272 wait_current_trans(root);
273 mutex_unlock(&root->fs_info->trans_mutex);
274
275 throttle_on_drops(root);
276}
277
278static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
279 struct btrfs_root *root, int throttle)
280{
281 struct btrfs_transaction *cur_trans;
282 struct btrfs_fs_info *info = root->fs_info;
283
284 mutex_lock(&info->trans_mutex);
285 cur_trans = info->running_transaction;
286 WARN_ON(cur_trans != trans->transaction);
287 WARN_ON(cur_trans->num_writers < 1);
288 cur_trans->num_writers--;
289
290 if (waitqueue_active(&cur_trans->writer_wait))
291 wake_up(&cur_trans->writer_wait);
292 put_transaction(cur_trans);
293 mutex_unlock(&info->trans_mutex);
294 memset(trans, 0, sizeof(*trans));
295 kmem_cache_free(btrfs_trans_handle_cachep, trans);
296
297 if (throttle)
298 throttle_on_drops(root);
299
300 return 0;
301}
302
303int btrfs_end_transaction(struct btrfs_trans_handle *trans,
304 struct btrfs_root *root)
305{
306 return __btrfs_end_transaction(trans, root, 0);
307}
308
309int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
310 struct btrfs_root *root)
311{
312 return __btrfs_end_transaction(trans, root, 1);
313}
314
315/*
316 * when btree blocks are allocated, they have some corresponding bits set for
317 * them in one of two extent_io trees. This is used to make sure all of
318 * those extents are on disk for transaction or log commit
319 */
320int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
321 struct extent_io_tree *dirty_pages)
322{
323 int ret;
324 int err = 0;
325 int werr = 0;
326 struct page *page;
327 struct inode *btree_inode = root->fs_info->btree_inode;
328 u64 start = 0;
329 u64 end;
330 unsigned long index;
331
332 while (1) {
333 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
334 EXTENT_DIRTY);
335 if (ret)
336 break;
337 while (start <= end) {
338 cond_resched();
339
340 index = start >> PAGE_CACHE_SHIFT;
341 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
342 page = find_get_page(btree_inode->i_mapping, index);
343 if (!page)
344 continue;
345
346 btree_lock_page_hook(page);
347 if (!page->mapping) {
348 unlock_page(page);
349 page_cache_release(page);
350 continue;
351 }
352
353 if (PageWriteback(page)) {
354 if (PageDirty(page))
355 wait_on_page_writeback(page);
356 else {
357 unlock_page(page);
358 page_cache_release(page);
359 continue;
360 }
361 }
362 err = write_one_page(page, 0);
363 if (err)
364 werr = err;
365 page_cache_release(page);
366 }
367 }
368 while (1) {
369 ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
370 EXTENT_DIRTY);
371 if (ret)
372 break;
373
374 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
375 while (start <= end) {
376 index = start >> PAGE_CACHE_SHIFT;
377 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
378 page = find_get_page(btree_inode->i_mapping, index);
379 if (!page)
380 continue;
381 if (PageDirty(page)) {
382 btree_lock_page_hook(page);
383 wait_on_page_writeback(page);
384 err = write_one_page(page, 0);
385 if (err)
386 werr = err;
387 }
388 wait_on_page_writeback(page);
389 page_cache_release(page);
390 cond_resched();
391 }
392 }
393 if (err)
394 werr = err;
395 return werr;
396}
397
398int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
399 struct btrfs_root *root)
400{
401 if (!trans || !trans->transaction) {
402 struct inode *btree_inode;
403 btree_inode = root->fs_info->btree_inode;
404 return filemap_write_and_wait(btree_inode->i_mapping);
405 }
406 return btrfs_write_and_wait_marked_extents(root,
407 &trans->transaction->dirty_pages);
408}
409
410/*
411 * this is used to update the root pointer in the tree of tree roots.
412 *
413 * But, in the case of the extent allocation tree, updating the root
414 * pointer may allocate blocks which may change the root of the extent
415 * allocation tree.
416 *
417 * So, this loops and repeats and makes sure the cowonly root didn't
418 * change while the root pointer was being updated in the metadata.
419 */
420static int update_cowonly_root(struct btrfs_trans_handle *trans,
421 struct btrfs_root *root)
422{
423 int ret;
424 u64 old_root_bytenr;
425 struct btrfs_root *tree_root = root->fs_info->tree_root;
426
427 btrfs_extent_post_op(trans, root);
428 btrfs_write_dirty_block_groups(trans, root);
429 btrfs_extent_post_op(trans, root);
430
431 while (1) {
432 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
433 if (old_root_bytenr == root->node->start)
434 break;
435 btrfs_set_root_bytenr(&root->root_item,
436 root->node->start);
437 btrfs_set_root_level(&root->root_item,
438 btrfs_header_level(root->node));
439 btrfs_set_root_generation(&root->root_item, trans->transid);
440
441 btrfs_extent_post_op(trans, root);
442
443 ret = btrfs_update_root(trans, tree_root,
444 &root->root_key,
445 &root->root_item);
446 BUG_ON(ret);
447 btrfs_write_dirty_block_groups(trans, root);
448 btrfs_extent_post_op(trans, root);
449 }
450 return 0;
451}
452
453/*
454 * update all the cowonly tree roots on disk
455 */
456int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
457 struct btrfs_root *root)
458{
459 struct btrfs_fs_info *fs_info = root->fs_info;
460 struct list_head *next;
461 struct extent_buffer *eb;
462
463 btrfs_extent_post_op(trans, fs_info->tree_root);
464
465 eb = btrfs_lock_root_node(fs_info->tree_root);
466 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0);
467 btrfs_tree_unlock(eb);
468 free_extent_buffer(eb);
469
470 btrfs_extent_post_op(trans, fs_info->tree_root);
471
472 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
473 next = fs_info->dirty_cowonly_roots.next;
474 list_del_init(next);
475 root = list_entry(next, struct btrfs_root, dirty_list);
476
477 update_cowonly_root(trans, root);
478 }
479 return 0;
480}
481
482/*
483 * dead roots are old snapshots that need to be deleted. This allocates
484 * a dirty root struct and adds it into the list of dead roots that need to
485 * be deleted
486 */
487int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
488{
489 struct btrfs_dirty_root *dirty;
490
491 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
492 if (!dirty)
493 return -ENOMEM;
494 dirty->root = root;
495 dirty->latest_root = latest;
496
497 mutex_lock(&root->fs_info->trans_mutex);
498 list_add(&dirty->list, &latest->fs_info->dead_roots);
499 mutex_unlock(&root->fs_info->trans_mutex);
500 return 0;
501}
502
503/*
504 * at transaction commit time we need to schedule the old roots for
505 * deletion via btrfs_drop_snapshot. This runs through all the
506 * reference counted roots that were modified in the current
507 * transaction and puts them into the drop list
508 */
509static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
510 struct radix_tree_root *radix,
511 struct list_head *list)
512{
513 struct btrfs_dirty_root *dirty;
514 struct btrfs_root *gang[8];
515 struct btrfs_root *root;
516 int i;
517 int ret;
518 int err = 0;
519 u32 refs;
520
521 while (1) {
522 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
523 ARRAY_SIZE(gang),
524 BTRFS_ROOT_TRANS_TAG);
525 if (ret == 0)
526 break;
527 for (i = 0; i < ret; i++) {
528 root = gang[i];
529 radix_tree_tag_clear(radix,
530 (unsigned long)root->root_key.objectid,
531 BTRFS_ROOT_TRANS_TAG);
532
533 BUG_ON(!root->ref_tree);
534 dirty = root->dirty_root;
535
536 btrfs_free_log(trans, root);
537 btrfs_free_reloc_root(trans, root);
538
539 if (root->commit_root == root->node) {
540 WARN_ON(root->node->start !=
541 btrfs_root_bytenr(&root->root_item));
542
543 free_extent_buffer(root->commit_root);
544 root->commit_root = NULL;
545 root->dirty_root = NULL;
546
547 spin_lock(&root->list_lock);
548 list_del_init(&dirty->root->dead_list);
549 spin_unlock(&root->list_lock);
550
551 kfree(dirty->root);
552 kfree(dirty);
553
554 /* make sure to update the root on disk
555 * so we get any updates to the block used
556 * counts
557 */
558 err = btrfs_update_root(trans,
559 root->fs_info->tree_root,
560 &root->root_key,
561 &root->root_item);
562 continue;
563 }
564
565 memset(&root->root_item.drop_progress, 0,
566 sizeof(struct btrfs_disk_key));
567 root->root_item.drop_level = 0;
568 root->commit_root = NULL;
569 root->dirty_root = NULL;
570 root->root_key.offset = root->fs_info->generation;
571 btrfs_set_root_bytenr(&root->root_item,
572 root->node->start);
573 btrfs_set_root_level(&root->root_item,
574 btrfs_header_level(root->node));
575 btrfs_set_root_generation(&root->root_item,
576 root->root_key.offset);
577
578 err = btrfs_insert_root(trans, root->fs_info->tree_root,
579 &root->root_key,
580 &root->root_item);
581 if (err)
582 break;
583
584 refs = btrfs_root_refs(&dirty->root->root_item);
585 btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
586 err = btrfs_update_root(trans, root->fs_info->tree_root,
587 &dirty->root->root_key,
588 &dirty->root->root_item);
589
590 BUG_ON(err);
591 if (refs == 1) {
592 list_add(&dirty->list, list);
593 } else {
594 WARN_ON(1);
595 free_extent_buffer(dirty->root->node);
596 kfree(dirty->root);
597 kfree(dirty);
598 }
599 }
600 }
601 return err;
602}
603
604/*
605 * defrag a given btree. If cacheonly == 1, this won't read from the disk,
606 * otherwise every leaf in the btree is read and defragged.
607 */
608int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
609{
610 struct btrfs_fs_info *info = root->fs_info;
611 int ret;
612 struct btrfs_trans_handle *trans;
613 unsigned long nr;
614
615 smp_mb();
616 if (root->defrag_running)
617 return 0;
618 trans = btrfs_start_transaction(root, 1);
619 while (1) {
620 root->defrag_running = 1;
621 ret = btrfs_defrag_leaves(trans, root, cacheonly);
622 nr = trans->blocks_used;
623 btrfs_end_transaction(trans, root);
624 btrfs_btree_balance_dirty(info->tree_root, nr);
625 cond_resched();
626
627 trans = btrfs_start_transaction(root, 1);
628 if (root->fs_info->closing || ret != -EAGAIN)
629 break;
630 }
631 root->defrag_running = 0;
632 smp_mb();
633 btrfs_end_transaction(trans, root);
634 return 0;
635}
636
637/*
638 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
639 * all of them
640 */
641static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
642 struct list_head *list)
643{
644 struct btrfs_dirty_root *dirty;
645 struct btrfs_trans_handle *trans;
646 unsigned long nr;
647 u64 num_bytes;
648 u64 bytes_used;
649 u64 max_useless;
650 int ret = 0;
651 int err;
652
653 while (!list_empty(list)) {
654 struct btrfs_root *root;
655
656 dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
657 list_del_init(&dirty->list);
658
659 num_bytes = btrfs_root_used(&dirty->root->root_item);
660 root = dirty->latest_root;
661 atomic_inc(&root->fs_info->throttles);
662
663 while (1) {
664 trans = btrfs_start_transaction(tree_root, 1);
665 mutex_lock(&root->fs_info->drop_mutex);
666 ret = btrfs_drop_snapshot(trans, dirty->root);
667 if (ret != -EAGAIN)
668 break;
669 mutex_unlock(&root->fs_info->drop_mutex);
670
671 err = btrfs_update_root(trans,
672 tree_root,
673 &dirty->root->root_key,
674 &dirty->root->root_item);
675 if (err)
676 ret = err;
677 nr = trans->blocks_used;
678 ret = btrfs_end_transaction(trans, tree_root);
679 BUG_ON(ret);
680
681 btrfs_btree_balance_dirty(tree_root, nr);
682 cond_resched();
683 }
684 BUG_ON(ret);
685 atomic_dec(&root->fs_info->throttles);
686 wake_up(&root->fs_info->transaction_throttle);
687
688 num_bytes -= btrfs_root_used(&dirty->root->root_item);
689 bytes_used = btrfs_root_used(&root->root_item);
690 if (num_bytes) {
691 btrfs_record_root_in_trans(root);
692 btrfs_set_root_used(&root->root_item,
693 bytes_used - num_bytes);
694 }
695
696 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
697 if (ret) {
698 BUG();
699 break;
700 }
701 mutex_unlock(&root->fs_info->drop_mutex);
702
703 spin_lock(&root->list_lock);
704 list_del_init(&dirty->root->dead_list);
705 if (!list_empty(&root->dead_list)) {
706 struct btrfs_root *oldest;
707 oldest = list_entry(root->dead_list.prev,
708 struct btrfs_root, dead_list);
709 max_useless = oldest->root_key.offset - 1;
710 } else {
711 max_useless = root->root_key.offset - 1;
712 }
713 spin_unlock(&root->list_lock);
714
715 nr = trans->blocks_used;
716 ret = btrfs_end_transaction(trans, tree_root);
717 BUG_ON(ret);
718
719 ret = btrfs_remove_leaf_refs(root, max_useless, 0);
720 BUG_ON(ret);
721
722 free_extent_buffer(dirty->root->node);
723 kfree(dirty->root);
724 kfree(dirty);
725
726 btrfs_btree_balance_dirty(tree_root, nr);
727 cond_resched();
728 }
729 return ret;
730}
731
732/*
733 * new snapshots need to be created at a very specific time in the
734 * transaction commit. This does the actual creation
735 */
736static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
737 struct btrfs_fs_info *fs_info,
738 struct btrfs_pending_snapshot *pending)
739{
740 struct btrfs_key key;
741 struct btrfs_root_item *new_root_item;
742 struct btrfs_root *tree_root = fs_info->tree_root;
743 struct btrfs_root *root = pending->root;
744 struct extent_buffer *tmp;
745 struct extent_buffer *old;
746 int ret;
747 u64 objectid;
748
749 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
750 if (!new_root_item) {
751 ret = -ENOMEM;
752 goto fail;
753 }
754 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
755 if (ret)
756 goto fail;
757
758 btrfs_record_root_in_trans(root);
759 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
760 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
761
762 key.objectid = objectid;
763 key.offset = trans->transid;
764 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
765
766 old = btrfs_lock_root_node(root);
767 btrfs_cow_block(trans, root, old, NULL, 0, &old, 0);
768
769 btrfs_copy_root(trans, root, old, &tmp, objectid);
770 btrfs_tree_unlock(old);
771 free_extent_buffer(old);
772
773 btrfs_set_root_bytenr(new_root_item, tmp->start);
774 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
775 btrfs_set_root_generation(new_root_item, trans->transid);
776 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
777 new_root_item);
778 btrfs_tree_unlock(tmp);
779 free_extent_buffer(tmp);
780 if (ret)
781 goto fail;
782
783 key.offset = (u64)-1;
784 memcpy(&pending->root_key, &key, sizeof(key));
785fail:
786 kfree(new_root_item);
787 return ret;
788}
789
790static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
791 struct btrfs_pending_snapshot *pending)
792{
793 int ret;
794 int namelen;
795 u64 index = 0;
796 struct btrfs_trans_handle *trans;
797 struct inode *parent_inode;
798 struct inode *inode;
799 struct btrfs_root *parent_root;
800
801 parent_inode = pending->dentry->d_parent->d_inode;
802 parent_root = BTRFS_I(parent_inode)->root;
803 trans = btrfs_join_transaction(parent_root, 1);
804
805 /*
806 * insert the directory item
807 */
808 namelen = strlen(pending->name);
809 ret = btrfs_set_inode_index(parent_inode, &index);
810 ret = btrfs_insert_dir_item(trans, parent_root,
811 pending->name, namelen,
812 parent_inode->i_ino,
813 &pending->root_key, BTRFS_FT_DIR, index);
814
815 if (ret)
816 goto fail;
817
818 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2);
819 ret = btrfs_update_inode(trans, parent_root, parent_inode);
820 BUG_ON(ret);
821
822 /* add the backref first */
823 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
824 pending->root_key.objectid,
825 BTRFS_ROOT_BACKREF_KEY,
826 parent_root->root_key.objectid,
827 parent_inode->i_ino, index, pending->name,
828 namelen);
829
830 BUG_ON(ret);
831
832 /* now add the forward ref */
833 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
834 parent_root->root_key.objectid,
835 BTRFS_ROOT_REF_KEY,
836 pending->root_key.objectid,
837 parent_inode->i_ino, index, pending->name,
838 namelen);
839
840 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
841 d_instantiate(pending->dentry, inode);
842fail:
843 btrfs_end_transaction(trans, fs_info->fs_root);
844 return ret;
845}
846
847/*
848 * create all the snapshots we've scheduled for creation
849 */
850static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
851 struct btrfs_fs_info *fs_info)
852{
853 struct btrfs_pending_snapshot *pending;
854 struct list_head *head = &trans->transaction->pending_snapshots;
855 struct list_head *cur;
856 int ret;
857
858 list_for_each(cur, head) {
859 pending = list_entry(cur, struct btrfs_pending_snapshot, list);
860 ret = create_pending_snapshot(trans, fs_info, pending);
861 BUG_ON(ret);
862 }
863 return 0;
864}
865
866static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
867 struct btrfs_fs_info *fs_info)
868{
869 struct btrfs_pending_snapshot *pending;
870 struct list_head *head = &trans->transaction->pending_snapshots;
871 int ret;
872
873 while (!list_empty(head)) {
874 pending = list_entry(head->next,
875 struct btrfs_pending_snapshot, list);
876 ret = finish_pending_snapshot(fs_info, pending);
877 BUG_ON(ret);
878 list_del(&pending->list);
879 kfree(pending->name);
880 kfree(pending);
881 }
882 return 0;
883}
884
885int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
886 struct btrfs_root *root)
887{
888 unsigned long joined = 0;
889 unsigned long timeout = 1;
890 struct btrfs_transaction *cur_trans;
891 struct btrfs_transaction *prev_trans = NULL;
892 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
893 struct list_head dirty_fs_roots;
894 struct extent_io_tree *pinned_copy;
895 DEFINE_WAIT(wait);
896 int ret;
897
898 INIT_LIST_HEAD(&dirty_fs_roots);
899 mutex_lock(&root->fs_info->trans_mutex);
900 if (trans->transaction->in_commit) {
901 cur_trans = trans->transaction;
902 trans->transaction->use_count++;
903 mutex_unlock(&root->fs_info->trans_mutex);
904 btrfs_end_transaction(trans, root);
905
906 ret = wait_for_commit(root, cur_trans);
907 BUG_ON(ret);
908
909 mutex_lock(&root->fs_info->trans_mutex);
910 put_transaction(cur_trans);
911 mutex_unlock(&root->fs_info->trans_mutex);
912
913 return 0;
914 }
915
916 pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
917 if (!pinned_copy)
918 return -ENOMEM;
919
920 extent_io_tree_init(pinned_copy,
921 root->fs_info->btree_inode->i_mapping, GFP_NOFS);
922
923 trans->transaction->in_commit = 1;
924 trans->transaction->blocked = 1;
925 cur_trans = trans->transaction;
926 if (cur_trans->list.prev != &root->fs_info->trans_list) {
927 prev_trans = list_entry(cur_trans->list.prev,
928 struct btrfs_transaction, list);
929 if (!prev_trans->commit_done) {
930 prev_trans->use_count++;
931 mutex_unlock(&root->fs_info->trans_mutex);
932
933 wait_for_commit(root, prev_trans);
934
935 mutex_lock(&root->fs_info->trans_mutex);
936 put_transaction(prev_trans);
937 }
938 }
939
940 do {
941 int snap_pending = 0;
942 joined = cur_trans->num_joined;
943 if (!list_empty(&trans->transaction->pending_snapshots))
944 snap_pending = 1;
945
946 WARN_ON(cur_trans != trans->transaction);
947 prepare_to_wait(&cur_trans->writer_wait, &wait,
948 TASK_UNINTERRUPTIBLE);
949
950 if (cur_trans->num_writers > 1)
951 timeout = MAX_SCHEDULE_TIMEOUT;
952 else
953 timeout = 1;
954
955 mutex_unlock(&root->fs_info->trans_mutex);
956
957 if (snap_pending) {
958 ret = btrfs_wait_ordered_extents(root, 1);
959 BUG_ON(ret);
960 }
961
962 schedule_timeout(timeout);
963
964 mutex_lock(&root->fs_info->trans_mutex);
965 finish_wait(&cur_trans->writer_wait, &wait);
966 } while (cur_trans->num_writers > 1 ||
967 (cur_trans->num_joined != joined));
968
969 ret = create_pending_snapshots(trans, root->fs_info);
970 BUG_ON(ret);
971
972 WARN_ON(cur_trans != trans->transaction);
973
974 /* btrfs_commit_tree_roots is responsible for getting the
975 * various roots consistent with each other. Every pointer
976 * in the tree of tree roots has to point to the most up to date
977 * root for every subvolume and other tree. So, we have to keep
978 * the tree logging code from jumping in and changing any
979 * of the trees.
980 *
981 * At this point in the commit, there can't be any tree-log
982 * writers, but a little lower down we drop the trans mutex
983 * and let new people in. By holding the tree_log_mutex
984 * from now until after the super is written, we avoid races
985 * with the tree-log code.
986 */
987 mutex_lock(&root->fs_info->tree_log_mutex);
988 /*
989 * keep tree reloc code from adding new reloc trees
990 */
991 mutex_lock(&root->fs_info->tree_reloc_mutex);
992
993
994 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
995 &dirty_fs_roots);
996 BUG_ON(ret);
997
998 /* add_dirty_roots gets rid of all the tree log roots, it is now
999 * safe to free the root of tree log roots
1000 */
1001 btrfs_free_log_root_tree(trans, root->fs_info);
1002
1003 ret = btrfs_commit_tree_roots(trans, root);
1004 BUG_ON(ret);
1005
1006 cur_trans = root->fs_info->running_transaction;
1007 spin_lock(&root->fs_info->new_trans_lock);
1008 root->fs_info->running_transaction = NULL;
1009 spin_unlock(&root->fs_info->new_trans_lock);
1010 btrfs_set_super_generation(&root->fs_info->super_copy,
1011 cur_trans->transid);
1012 btrfs_set_super_root(&root->fs_info->super_copy,
1013 root->fs_info->tree_root->node->start);
1014 btrfs_set_super_root_level(&root->fs_info->super_copy,
1015 btrfs_header_level(root->fs_info->tree_root->node));
1016
1017 btrfs_set_super_chunk_root(&root->fs_info->super_copy,
1018 chunk_root->node->start);
1019 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
1020 btrfs_header_level(chunk_root->node));
1021 btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy,
1022 btrfs_header_generation(chunk_root->node));
1023
1024 if (!root->fs_info->log_root_recovering) {
1025 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
1026 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
1027 }
1028
1029 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
1030 sizeof(root->fs_info->super_copy));
1031
1032 btrfs_copy_pinned(root, pinned_copy);
1033
1034 trans->transaction->blocked = 0;
1035 wake_up(&root->fs_info->transaction_throttle);
1036 wake_up(&root->fs_info->transaction_wait);
1037
1038 mutex_unlock(&root->fs_info->trans_mutex);
1039 ret = btrfs_write_and_wait_transaction(trans, root);
1040 BUG_ON(ret);
1041 write_ctree_super(trans, root, 0);
1042
1043 /*
1044 * the super is written, we can safely allow the tree-loggers
1045 * to go about their business
1046 */
1047 mutex_unlock(&root->fs_info->tree_log_mutex);
1048
1049 btrfs_finish_extent_commit(trans, root, pinned_copy);
1050 kfree(pinned_copy);
1051
1052 btrfs_drop_dead_reloc_roots(root);
1053 mutex_unlock(&root->fs_info->tree_reloc_mutex);
1054
1055 /* do the directory inserts of any pending snapshot creations */
1056 finish_pending_snapshots(trans, root->fs_info);
1057
1058 mutex_lock(&root->fs_info->trans_mutex);
1059
1060 cur_trans->commit_done = 1;
1061 root->fs_info->last_trans_committed = cur_trans->transid;
1062 wake_up(&cur_trans->commit_wait);
1063
1064 put_transaction(cur_trans);
1065 put_transaction(cur_trans);
1066
1067 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
1068 if (root->fs_info->closing)
1069 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
1070
1071 mutex_unlock(&root->fs_info->trans_mutex);
1072
1073 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1074
1075 if (root->fs_info->closing)
1076 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
1077 return ret;
1078}
1079
1080/*
1081 * interface function to delete all the snapshots we have scheduled for deletion
1082 */
1083int btrfs_clean_old_snapshots(struct btrfs_root *root)
1084{
1085 struct list_head dirty_roots;
1086 INIT_LIST_HEAD(&dirty_roots);
1087again:
1088 mutex_lock(&root->fs_info->trans_mutex);
1089 list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
1090 mutex_unlock(&root->fs_info->trans_mutex);
1091
1092 if (!list_empty(&dirty_roots)) {
1093 drop_dirty_roots(root, &dirty_roots);
1094 goto again;
1095 }
1096 return 0;
1097}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
new file mode 100644
index 000000000000..ea292117f882
--- /dev/null
+++ b/fs/btrfs/transaction.h
@@ -0,0 +1,106 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_TRANSACTION__
20#define __BTRFS_TRANSACTION__
21#include "btrfs_inode.h"
22
23struct btrfs_transaction {
24 u64 transid;
25 unsigned long num_writers;
26 unsigned long num_joined;
27 int in_commit;
28 int use_count;
29 int commit_done;
30 int blocked;
31 struct list_head list;
32 struct extent_io_tree dirty_pages;
33 unsigned long start_time;
34 wait_queue_head_t writer_wait;
35 wait_queue_head_t commit_wait;
36 struct list_head pending_snapshots;
37};
38
39struct btrfs_trans_handle {
40 u64 transid;
41 unsigned long blocks_reserved;
42 unsigned long blocks_used;
43 struct btrfs_transaction *transaction;
44 u64 block_group;
45 u64 alloc_exclude_start;
46 u64 alloc_exclude_nr;
47};
48
49struct btrfs_pending_snapshot {
50 struct dentry *dentry;
51 struct btrfs_root *root;
52 char *name;
53 struct btrfs_key root_key;
54 struct list_head list;
55};
56
57struct btrfs_dirty_root {
58 struct list_head list;
59 struct btrfs_root *root;
60 struct btrfs_root *latest_root;
61};
62
63static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
64 struct inode *inode)
65{
66 trans->block_group = BTRFS_I(inode)->block_group;
67}
68
69static inline void btrfs_update_inode_block_group(
70 struct btrfs_trans_handle *trans,
71 struct inode *inode)
72{
73 BTRFS_I(inode)->block_group = trans->block_group;
74}
75
76static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
77 struct inode *inode)
78{
79 BTRFS_I(inode)->last_trans = trans->transaction->transid;
80}
81
82int btrfs_end_transaction(struct btrfs_trans_handle *trans,
83 struct btrfs_root *root);
84struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
85 int num_blocks);
86struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
87 int num_blocks);
88struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
89 int num_blocks);
90int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root);
92int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
93 struct btrfs_root *root);
94
95int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest);
96int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
97int btrfs_clean_old_snapshots(struct btrfs_root *root);
98int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
99 struct btrfs_root *root);
100int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
101 struct btrfs_root *root);
102void btrfs_throttle(struct btrfs_root *root);
103int btrfs_record_root_in_trans(struct btrfs_root *root);
104int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
105 struct extent_io_tree *dirty_pages);
106#endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
new file mode 100644
index 000000000000..3e8358c36165
--- /dev/null
+++ b/fs/btrfs/tree-defrag.c
@@ -0,0 +1,147 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "transaction.h"
24#include "locking.h"
25
26/* defrag all the leaves in a given btree. If cache_only == 1, don't read
27 * things from disk, otherwise read all the leaves and try to get key order to
28 * better reflect disk order
29 */
30
31int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
32 struct btrfs_root *root, int cache_only)
33{
34 struct btrfs_path *path = NULL;
35 struct btrfs_key key;
36 int ret = 0;
37 int wret;
38 int level;
39 int orig_level;
40 int is_extent = 0;
41 int next_key_ret = 0;
42 u64 last_ret = 0;
43 u64 min_trans = 0;
44
45 if (cache_only)
46 goto out;
47
48 if (root->fs_info->extent_root == root) {
49 /*
50 * there's recursion here right now in the tree locking,
51 * we can't defrag the extent root without deadlock
52 */
53 goto out;
54 }
55
56 if (root->ref_cows == 0 && !is_extent)
57 goto out;
58
59 if (btrfs_test_opt(root, SSD))
60 goto out;
61
62 path = btrfs_alloc_path();
63 if (!path)
64 return -ENOMEM;
65
66 level = btrfs_header_level(root->node);
67 orig_level = level;
68
69 if (level == 0)
70 goto out;
71
72 if (root->defrag_progress.objectid == 0) {
73 struct extent_buffer *root_node;
74 u32 nritems;
75
76 root_node = btrfs_lock_root_node(root);
77 nritems = btrfs_header_nritems(root_node);
78 root->defrag_max.objectid = 0;
79 /* from above we know this is not a leaf */
80 btrfs_node_key_to_cpu(root_node, &root->defrag_max,
81 nritems - 1);
82 btrfs_tree_unlock(root_node);
83 free_extent_buffer(root_node);
84 memset(&key, 0, sizeof(key));
85 } else {
86 memcpy(&key, &root->defrag_progress, sizeof(key));
87 }
88
89 path->keep_locks = 1;
90 if (cache_only)
91 min_trans = root->defrag_trans_start;
92
93 ret = btrfs_search_forward(root, &key, NULL, path,
94 cache_only, min_trans);
95 if (ret < 0)
96 goto out;
97 if (ret > 0) {
98 ret = 0;
99 goto out;
100 }
101 btrfs_release_path(root, path);
102 wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
103
104 if (wret < 0) {
105 ret = wret;
106 goto out;
107 }
108 if (!path->nodes[1]) {
109 ret = 0;
110 goto out;
111 }
112 path->slots[1] = btrfs_header_nritems(path->nodes[1]);
113 next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only,
114 min_trans);
115 ret = btrfs_realloc_node(trans, root,
116 path->nodes[1], 0,
117 cache_only, &last_ret,
118 &root->defrag_progress);
119 WARN_ON(ret && ret != -EAGAIN);
120 if (next_key_ret == 0) {
121 memcpy(&root->defrag_progress, &key, sizeof(key));
122 ret = -EAGAIN;
123 }
124
125 btrfs_release_path(root, path);
126 if (is_extent)
127 btrfs_extent_post_op(trans, root);
128out:
129 if (path)
130 btrfs_free_path(path);
131 if (ret == -EAGAIN) {
132 if (root->defrag_max.objectid > root->defrag_progress.objectid)
133 goto done;
134 if (root->defrag_max.type > root->defrag_progress.type)
135 goto done;
136 if (root->defrag_max.offset > root->defrag_progress.offset)
137 goto done;
138 ret = 0;
139 }
140done:
141 if (ret != -EAGAIN) {
142 memset(&root->defrag_progress, 0,
143 sizeof(root->defrag_progress));
144 root->defrag_trans_start = trans->transid;
145 }
146 return ret;
147}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
new file mode 100644
index 000000000000..d81cda2e077c
--- /dev/null
+++ b/fs/btrfs/tree-log.c
@@ -0,0 +1,2898 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21#include "transaction.h"
22#include "disk-io.h"
23#include "locking.h"
24#include "print-tree.h"
25#include "compat.h"
26#include "tree-log.h"
27
28/* magic values for the inode_only field in btrfs_log_inode:
29 *
30 * LOG_INODE_ALL means to log everything
31 * LOG_INODE_EXISTS means to log just enough to recreate the inode
32 * during log replay
33 */
34#define LOG_INODE_ALL 0
35#define LOG_INODE_EXISTS 1
36
37/*
38 * stages for the tree walking. The first
39 * stage (0) is to only pin down the blocks we find
40 * the second stage (1) is to make sure that all the inodes
41 * we find in the log are created in the subvolume.
42 *
43 * The last stage is to deal with directories and links and extents
44 * and all the other fun semantics
45 */
46#define LOG_WALK_PIN_ONLY 0
47#define LOG_WALK_REPLAY_INODES 1
48#define LOG_WALK_REPLAY_ALL 2
49
50static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
51 struct btrfs_root *root, struct inode *inode,
52 int inode_only);
53static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
54 struct btrfs_root *root,
55 struct btrfs_path *path, u64 objectid);
56
57/*
58 * tree logging is a special write ahead log used to make sure that
59 * fsyncs and O_SYNCs can happen without doing full tree commits.
60 *
61 * Full tree commits are expensive because they require commonly
62 * modified blocks to be recowed, creating many dirty pages in the
63 * extent tree an 4x-6x higher write load than ext3.
64 *
65 * Instead of doing a tree commit on every fsync, we use the
66 * key ranges and transaction ids to find items for a given file or directory
67 * that have changed in this transaction. Those items are copied into
68 * a special tree (one per subvolume root), that tree is written to disk
69 * and then the fsync is considered complete.
70 *
71 * After a crash, items are copied out of the log-tree back into the
72 * subvolume tree. Any file data extents found are recorded in the extent
73 * allocation tree, and the log-tree freed.
74 *
75 * The log tree is read three times, once to pin down all the extents it is
76 * using in ram and once, once to create all the inodes logged in the tree
77 * and once to do all the other items.
78 */
79
80/*
81 * btrfs_add_log_tree adds a new per-subvolume log tree into the
82 * tree of log tree roots. This must be called with a tree log transaction
83 * running (see start_log_trans).
84 */
85static int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
86 struct btrfs_root *root)
87{
88 struct btrfs_key key;
89 struct btrfs_root_item root_item;
90 struct btrfs_inode_item *inode_item;
91 struct extent_buffer *leaf;
92 struct btrfs_root *new_root = root;
93 int ret;
94 u64 objectid = root->root_key.objectid;
95
96 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
97 BTRFS_TREE_LOG_OBJECTID,
98 trans->transid, 0, 0, 0);
99 if (IS_ERR(leaf)) {
100 ret = PTR_ERR(leaf);
101 return ret;
102 }
103
104 btrfs_set_header_nritems(leaf, 0);
105 btrfs_set_header_level(leaf, 0);
106 btrfs_set_header_bytenr(leaf, leaf->start);
107 btrfs_set_header_generation(leaf, trans->transid);
108 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
109
110 write_extent_buffer(leaf, root->fs_info->fsid,
111 (unsigned long)btrfs_header_fsid(leaf),
112 BTRFS_FSID_SIZE);
113 btrfs_mark_buffer_dirty(leaf);
114
115 inode_item = &root_item.inode;
116 memset(inode_item, 0, sizeof(*inode_item));
117 inode_item->generation = cpu_to_le64(1);
118 inode_item->size = cpu_to_le64(3);
119 inode_item->nlink = cpu_to_le32(1);
120 inode_item->nbytes = cpu_to_le64(root->leafsize);
121 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
122
123 btrfs_set_root_bytenr(&root_item, leaf->start);
124 btrfs_set_root_generation(&root_item, trans->transid);
125 btrfs_set_root_level(&root_item, 0);
126 btrfs_set_root_refs(&root_item, 0);
127 btrfs_set_root_used(&root_item, 0);
128
129 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
130 root_item.drop_level = 0;
131
132 btrfs_tree_unlock(leaf);
133 free_extent_buffer(leaf);
134 leaf = NULL;
135
136 btrfs_set_root_dirid(&root_item, 0);
137
138 key.objectid = BTRFS_TREE_LOG_OBJECTID;
139 key.offset = objectid;
140 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
141 ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key,
142 &root_item);
143 if (ret)
144 goto fail;
145
146 new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree,
147 &key);
148 BUG_ON(!new_root);
149
150 WARN_ON(root->log_root);
151 root->log_root = new_root;
152
153 /*
154 * log trees do not get reference counted because they go away
155 * before a real commit is actually done. They do store pointers
156 * to file data extents, and those reference counts still get
157 * updated (along with back refs to the log tree).
158 */
159 new_root->ref_cows = 0;
160 new_root->last_trans = trans->transid;
161
162 /*
163 * we need to make sure the root block for this new tree
164 * is marked as dirty in the dirty_log_pages tree. This
165 * is how it gets flushed down to disk at tree log commit time.
166 *
167 * the tree logging mutex keeps others from coming in and changing
168 * the new_root->node, so we can safely access it here
169 */
170 set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start,
171 new_root->node->start + new_root->node->len - 1,
172 GFP_NOFS);
173
174fail:
175 return ret;
176}
177
178/*
179 * start a sub transaction and setup the log tree
180 * this increments the log tree writer count to make the people
181 * syncing the tree wait for us to finish
182 */
183static int start_log_trans(struct btrfs_trans_handle *trans,
184 struct btrfs_root *root)
185{
186 int ret;
187 mutex_lock(&root->fs_info->tree_log_mutex);
188 if (!root->fs_info->log_root_tree) {
189 ret = btrfs_init_log_root_tree(trans, root->fs_info);
190 BUG_ON(ret);
191 }
192 if (!root->log_root) {
193 ret = btrfs_add_log_tree(trans, root);
194 BUG_ON(ret);
195 }
196 atomic_inc(&root->fs_info->tree_log_writers);
197 root->fs_info->tree_log_batch++;
198 mutex_unlock(&root->fs_info->tree_log_mutex);
199 return 0;
200}
201
202/*
203 * returns 0 if there was a log transaction running and we were able
204 * to join, or returns -ENOENT if there were not transactions
205 * in progress
206 */
207static int join_running_log_trans(struct btrfs_root *root)
208{
209 int ret = -ENOENT;
210
211 smp_mb();
212 if (!root->log_root)
213 return -ENOENT;
214
215 mutex_lock(&root->fs_info->tree_log_mutex);
216 if (root->log_root) {
217 ret = 0;
218 atomic_inc(&root->fs_info->tree_log_writers);
219 root->fs_info->tree_log_batch++;
220 }
221 mutex_unlock(&root->fs_info->tree_log_mutex);
222 return ret;
223}
224
225/*
226 * indicate we're done making changes to the log tree
227 * and wake up anyone waiting to do a sync
228 */
229static int end_log_trans(struct btrfs_root *root)
230{
231 atomic_dec(&root->fs_info->tree_log_writers);
232 smp_mb();
233 if (waitqueue_active(&root->fs_info->tree_log_wait))
234 wake_up(&root->fs_info->tree_log_wait);
235 return 0;
236}
237
238
239/*
240 * the walk control struct is used to pass state down the chain when
241 * processing the log tree. The stage field tells us which part
242 * of the log tree processing we are currently doing. The others
243 * are state fields used for that specific part
244 */
245struct walk_control {
246 /* should we free the extent on disk when done? This is used
247 * at transaction commit time while freeing a log tree
248 */
249 int free;
250
251 /* should we write out the extent buffer? This is used
252 * while flushing the log tree to disk during a sync
253 */
254 int write;
255
256 /* should we wait for the extent buffer io to finish? Also used
257 * while flushing the log tree to disk for a sync
258 */
259 int wait;
260
261 /* pin only walk, we record which extents on disk belong to the
262 * log trees
263 */
264 int pin;
265
266 /* what stage of the replay code we're currently in */
267 int stage;
268
269 /* the root we are currently replaying */
270 struct btrfs_root *replay_dest;
271
272 /* the trans handle for the current replay */
273 struct btrfs_trans_handle *trans;
274
275 /* the function that gets used to process blocks we find in the
276 * tree. Note the extent_buffer might not be up to date when it is
277 * passed in, and it must be checked or read if you need the data
278 * inside it
279 */
280 int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
281 struct walk_control *wc, u64 gen);
282};
283
284/*
285 * process_func used to pin down extents, write them or wait on them
286 */
287static int process_one_buffer(struct btrfs_root *log,
288 struct extent_buffer *eb,
289 struct walk_control *wc, u64 gen)
290{
291 if (wc->pin) {
292 mutex_lock(&log->fs_info->pinned_mutex);
293 btrfs_update_pinned_extents(log->fs_info->extent_root,
294 eb->start, eb->len, 1);
295 mutex_unlock(&log->fs_info->pinned_mutex);
296 }
297
298 if (btrfs_buffer_uptodate(eb, gen)) {
299 if (wc->write)
300 btrfs_write_tree_block(eb);
301 if (wc->wait)
302 btrfs_wait_tree_block_writeback(eb);
303 }
304 return 0;
305}
306
307/*
308 * Item overwrite used by replay and tree logging. eb, slot and key all refer
309 * to the src data we are copying out.
310 *
311 * root is the tree we are copying into, and path is a scratch
312 * path for use in this function (it should be released on entry and
313 * will be released on exit).
314 *
315 * If the key is already in the destination tree the existing item is
316 * overwritten. If the existing item isn't big enough, it is extended.
317 * If it is too large, it is truncated.
318 *
319 * If the key isn't in the destination yet, a new item is inserted.
320 */
321static noinline int overwrite_item(struct btrfs_trans_handle *trans,
322 struct btrfs_root *root,
323 struct btrfs_path *path,
324 struct extent_buffer *eb, int slot,
325 struct btrfs_key *key)
326{
327 int ret;
328 u32 item_size;
329 u64 saved_i_size = 0;
330 int save_old_i_size = 0;
331 unsigned long src_ptr;
332 unsigned long dst_ptr;
333 int overwrite_root = 0;
334
335 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
336 overwrite_root = 1;
337
338 item_size = btrfs_item_size_nr(eb, slot);
339 src_ptr = btrfs_item_ptr_offset(eb, slot);
340
341 /* look for the key in the destination tree */
342 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
343 if (ret == 0) {
344 char *src_copy;
345 char *dst_copy;
346 u32 dst_size = btrfs_item_size_nr(path->nodes[0],
347 path->slots[0]);
348 if (dst_size != item_size)
349 goto insert;
350
351 if (item_size == 0) {
352 btrfs_release_path(root, path);
353 return 0;
354 }
355 dst_copy = kmalloc(item_size, GFP_NOFS);
356 src_copy = kmalloc(item_size, GFP_NOFS);
357
358 read_extent_buffer(eb, src_copy, src_ptr, item_size);
359
360 dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
361 read_extent_buffer(path->nodes[0], dst_copy, dst_ptr,
362 item_size);
363 ret = memcmp(dst_copy, src_copy, item_size);
364
365 kfree(dst_copy);
366 kfree(src_copy);
367 /*
368 * they have the same contents, just return, this saves
369 * us from cowing blocks in the destination tree and doing
370 * extra writes that may not have been done by a previous
371 * sync
372 */
373 if (ret == 0) {
374 btrfs_release_path(root, path);
375 return 0;
376 }
377
378 }
379insert:
380 btrfs_release_path(root, path);
381 /* try to insert the key into the destination tree */
382 ret = btrfs_insert_empty_item(trans, root, path,
383 key, item_size);
384
385 /* make sure any existing item is the correct size */
386 if (ret == -EEXIST) {
387 u32 found_size;
388 found_size = btrfs_item_size_nr(path->nodes[0],
389 path->slots[0]);
390 if (found_size > item_size) {
391 btrfs_truncate_item(trans, root, path, item_size, 1);
392 } else if (found_size < item_size) {
393 ret = btrfs_extend_item(trans, root, path,
394 item_size - found_size);
395 BUG_ON(ret);
396 }
397 } else if (ret) {
398 BUG();
399 }
400 dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
401 path->slots[0]);
402
403 /* don't overwrite an existing inode if the generation number
404 * was logged as zero. This is done when the tree logging code
405 * is just logging an inode to make sure it exists after recovery.
406 *
407 * Also, don't overwrite i_size on directories during replay.
408 * log replay inserts and removes directory items based on the
409 * state of the tree found in the subvolume, and i_size is modified
410 * as it goes
411 */
412 if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) {
413 struct btrfs_inode_item *src_item;
414 struct btrfs_inode_item *dst_item;
415
416 src_item = (struct btrfs_inode_item *)src_ptr;
417 dst_item = (struct btrfs_inode_item *)dst_ptr;
418
419 if (btrfs_inode_generation(eb, src_item) == 0)
420 goto no_copy;
421
422 if (overwrite_root &&
423 S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
424 S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) {
425 save_old_i_size = 1;
426 saved_i_size = btrfs_inode_size(path->nodes[0],
427 dst_item);
428 }
429 }
430
431 copy_extent_buffer(path->nodes[0], eb, dst_ptr,
432 src_ptr, item_size);
433
434 if (save_old_i_size) {
435 struct btrfs_inode_item *dst_item;
436 dst_item = (struct btrfs_inode_item *)dst_ptr;
437 btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size);
438 }
439
440 /* make sure the generation is filled in */
441 if (key->type == BTRFS_INODE_ITEM_KEY) {
442 struct btrfs_inode_item *dst_item;
443 dst_item = (struct btrfs_inode_item *)dst_ptr;
444 if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) {
445 btrfs_set_inode_generation(path->nodes[0], dst_item,
446 trans->transid);
447 }
448 }
449no_copy:
450 btrfs_mark_buffer_dirty(path->nodes[0]);
451 btrfs_release_path(root, path);
452 return 0;
453}
454
455/*
456 * simple helper to read an inode off the disk from a given root
457 * This can only be called for subvolume roots and not for the log
458 */
459static noinline struct inode *read_one_inode(struct btrfs_root *root,
460 u64 objectid)
461{
462 struct inode *inode;
463 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
464 if (inode->i_state & I_NEW) {
465 BTRFS_I(inode)->root = root;
466 BTRFS_I(inode)->location.objectid = objectid;
467 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
468 BTRFS_I(inode)->location.offset = 0;
469 btrfs_read_locked_inode(inode);
470 unlock_new_inode(inode);
471
472 }
473 if (is_bad_inode(inode)) {
474 iput(inode);
475 inode = NULL;
476 }
477 return inode;
478}
479
480/* replays a single extent in 'eb' at 'slot' with 'key' into the
481 * subvolume 'root'. path is released on entry and should be released
482 * on exit.
483 *
484 * extents in the log tree have not been allocated out of the extent
485 * tree yet. So, this completes the allocation, taking a reference
486 * as required if the extent already exists or creating a new extent
487 * if it isn't in the extent allocation tree yet.
488 *
489 * The extent is inserted into the file, dropping any existing extents
490 * from the file that overlap the new one.
491 */
492static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
493 struct btrfs_root *root,
494 struct btrfs_path *path,
495 struct extent_buffer *eb, int slot,
496 struct btrfs_key *key)
497{
498 int found_type;
499 u64 mask = root->sectorsize - 1;
500 u64 extent_end;
501 u64 alloc_hint;
502 u64 start = key->offset;
503 u64 saved_nbytes;
504 struct btrfs_file_extent_item *item;
505 struct inode *inode = NULL;
506 unsigned long size;
507 int ret = 0;
508
509 item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
510 found_type = btrfs_file_extent_type(eb, item);
511
512 if (found_type == BTRFS_FILE_EXTENT_REG ||
513 found_type == BTRFS_FILE_EXTENT_PREALLOC)
514 extent_end = start + btrfs_file_extent_num_bytes(eb, item);
515 else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
516 size = btrfs_file_extent_inline_len(eb, item);
517 extent_end = (start + size + mask) & ~mask;
518 } else {
519 ret = 0;
520 goto out;
521 }
522
523 inode = read_one_inode(root, key->objectid);
524 if (!inode) {
525 ret = -EIO;
526 goto out;
527 }
528
529 /*
530 * first check to see if we already have this extent in the
531 * file. This must be done before the btrfs_drop_extents run
532 * so we don't try to drop this extent.
533 */
534 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
535 start, 0);
536
537 if (ret == 0 &&
538 (found_type == BTRFS_FILE_EXTENT_REG ||
539 found_type == BTRFS_FILE_EXTENT_PREALLOC)) {
540 struct btrfs_file_extent_item cmp1;
541 struct btrfs_file_extent_item cmp2;
542 struct btrfs_file_extent_item *existing;
543 struct extent_buffer *leaf;
544
545 leaf = path->nodes[0];
546 existing = btrfs_item_ptr(leaf, path->slots[0],
547 struct btrfs_file_extent_item);
548
549 read_extent_buffer(eb, &cmp1, (unsigned long)item,
550 sizeof(cmp1));
551 read_extent_buffer(leaf, &cmp2, (unsigned long)existing,
552 sizeof(cmp2));
553
554 /*
555 * we already have a pointer to this exact extent,
556 * we don't have to do anything
557 */
558 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
559 btrfs_release_path(root, path);
560 goto out;
561 }
562 }
563 btrfs_release_path(root, path);
564
565 saved_nbytes = inode_get_bytes(inode);
566 /* drop any overlapping extents */
567 ret = btrfs_drop_extents(trans, root, inode,
568 start, extent_end, start, &alloc_hint);
569 BUG_ON(ret);
570
571 if (found_type == BTRFS_FILE_EXTENT_REG ||
572 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
573 unsigned long dest_offset;
574 struct btrfs_key ins;
575
576 ret = btrfs_insert_empty_item(trans, root, path, key,
577 sizeof(*item));
578 BUG_ON(ret);
579 dest_offset = btrfs_item_ptr_offset(path->nodes[0],
580 path->slots[0]);
581 copy_extent_buffer(path->nodes[0], eb, dest_offset,
582 (unsigned long)item, sizeof(*item));
583
584 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
585 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
586 ins.type = BTRFS_EXTENT_ITEM_KEY;
587
588 if (ins.objectid > 0) {
589 u64 csum_start;
590 u64 csum_end;
591 LIST_HEAD(ordered_sums);
592 /*
593 * is this extent already allocated in the extent
594 * allocation tree? If so, just add a reference
595 */
596 ret = btrfs_lookup_extent(root, ins.objectid,
597 ins.offset);
598 if (ret == 0) {
599 ret = btrfs_inc_extent_ref(trans, root,
600 ins.objectid, ins.offset,
601 path->nodes[0]->start,
602 root->root_key.objectid,
603 trans->transid, key->objectid);
604 } else {
605 /*
606 * insert the extent pointer in the extent
607 * allocation tree
608 */
609 ret = btrfs_alloc_logged_extent(trans, root,
610 path->nodes[0]->start,
611 root->root_key.objectid,
612 trans->transid, key->objectid,
613 &ins);
614 BUG_ON(ret);
615 }
616 btrfs_release_path(root, path);
617
618 if (btrfs_file_extent_compression(eb, item)) {
619 csum_start = ins.objectid;
620 csum_end = csum_start + ins.offset;
621 } else {
622 csum_start = ins.objectid +
623 btrfs_file_extent_offset(eb, item);
624 csum_end = csum_start +
625 btrfs_file_extent_num_bytes(eb, item);
626 }
627
628 ret = btrfs_lookup_csums_range(root->log_root,
629 csum_start, csum_end - 1,
630 &ordered_sums);
631 BUG_ON(ret);
632 while (!list_empty(&ordered_sums)) {
633 struct btrfs_ordered_sum *sums;
634 sums = list_entry(ordered_sums.next,
635 struct btrfs_ordered_sum,
636 list);
637 ret = btrfs_csum_file_blocks(trans,
638 root->fs_info->csum_root,
639 sums);
640 BUG_ON(ret);
641 list_del(&sums->list);
642 kfree(sums);
643 }
644 } else {
645 btrfs_release_path(root, path);
646 }
647 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
648 /* inline extents are easy, we just overwrite them */
649 ret = overwrite_item(trans, root, path, eb, slot, key);
650 BUG_ON(ret);
651 }
652
653 inode_set_bytes(inode, saved_nbytes);
654 btrfs_update_inode(trans, root, inode);
655out:
656 if (inode)
657 iput(inode);
658 return ret;
659}
660
661/*
662 * when cleaning up conflicts between the directory names in the
663 * subvolume, directory names in the log and directory names in the
664 * inode back references, we may have to unlink inodes from directories.
665 *
666 * This is a helper function to do the unlink of a specific directory
667 * item
668 */
669static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
670 struct btrfs_root *root,
671 struct btrfs_path *path,
672 struct inode *dir,
673 struct btrfs_dir_item *di)
674{
675 struct inode *inode;
676 char *name;
677 int name_len;
678 struct extent_buffer *leaf;
679 struct btrfs_key location;
680 int ret;
681
682 leaf = path->nodes[0];
683
684 btrfs_dir_item_key_to_cpu(leaf, di, &location);
685 name_len = btrfs_dir_name_len(leaf, di);
686 name = kmalloc(name_len, GFP_NOFS);
687 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
688 btrfs_release_path(root, path);
689
690 inode = read_one_inode(root, location.objectid);
691 BUG_ON(!inode);
692
693 ret = link_to_fixup_dir(trans, root, path, location.objectid);
694 BUG_ON(ret);
695 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
696 BUG_ON(ret);
697 kfree(name);
698
699 iput(inode);
700 return ret;
701}
702
703/*
704 * helper function to see if a given name and sequence number found
705 * in an inode back reference are already in a directory and correctly
706 * point to this inode
707 */
708static noinline int inode_in_dir(struct btrfs_root *root,
709 struct btrfs_path *path,
710 u64 dirid, u64 objectid, u64 index,
711 const char *name, int name_len)
712{
713 struct btrfs_dir_item *di;
714 struct btrfs_key location;
715 int match = 0;
716
717 di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
718 index, name, name_len, 0);
719 if (di && !IS_ERR(di)) {
720 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
721 if (location.objectid != objectid)
722 goto out;
723 } else
724 goto out;
725 btrfs_release_path(root, path);
726
727 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
728 if (di && !IS_ERR(di)) {
729 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
730 if (location.objectid != objectid)
731 goto out;
732 } else
733 goto out;
734 match = 1;
735out:
736 btrfs_release_path(root, path);
737 return match;
738}
739
740/*
741 * helper function to check a log tree for a named back reference in
742 * an inode. This is used to decide if a back reference that is
743 * found in the subvolume conflicts with what we find in the log.
744 *
745 * inode backreferences may have multiple refs in a single item,
746 * during replay we process one reference at a time, and we don't
747 * want to delete valid links to a file from the subvolume if that
748 * link is also in the log.
749 */
750static noinline int backref_in_log(struct btrfs_root *log,
751 struct btrfs_key *key,
752 char *name, int namelen)
753{
754 struct btrfs_path *path;
755 struct btrfs_inode_ref *ref;
756 unsigned long ptr;
757 unsigned long ptr_end;
758 unsigned long name_ptr;
759 int found_name_len;
760 int item_size;
761 int ret;
762 int match = 0;
763
764 path = btrfs_alloc_path();
765 ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
766 if (ret != 0)
767 goto out;
768
769 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
770 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
771 ptr_end = ptr + item_size;
772 while (ptr < ptr_end) {
773 ref = (struct btrfs_inode_ref *)ptr;
774 found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref);
775 if (found_name_len == namelen) {
776 name_ptr = (unsigned long)(ref + 1);
777 ret = memcmp_extent_buffer(path->nodes[0], name,
778 name_ptr, namelen);
779 if (ret == 0) {
780 match = 1;
781 goto out;
782 }
783 }
784 ptr = (unsigned long)(ref + 1) + found_name_len;
785 }
786out:
787 btrfs_free_path(path);
788 return match;
789}
790
791
792/*
793 * replay one inode back reference item found in the log tree.
794 * eb, slot and key refer to the buffer and key found in the log tree.
795 * root is the destination we are replaying into, and path is for temp
796 * use by this function. (it should be released on return).
797 */
798static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
799 struct btrfs_root *root,
800 struct btrfs_root *log,
801 struct btrfs_path *path,
802 struct extent_buffer *eb, int slot,
803 struct btrfs_key *key)
804{
805 struct inode *dir;
806 int ret;
807 struct btrfs_key location;
808 struct btrfs_inode_ref *ref;
809 struct btrfs_dir_item *di;
810 struct inode *inode;
811 char *name;
812 int namelen;
813 unsigned long ref_ptr;
814 unsigned long ref_end;
815
816 location.objectid = key->objectid;
817 location.type = BTRFS_INODE_ITEM_KEY;
818 location.offset = 0;
819
820 /*
821 * it is possible that we didn't log all the parent directories
822 * for a given inode. If we don't find the dir, just don't
823 * copy the back ref in. The link count fixup code will take
824 * care of the rest
825 */
826 dir = read_one_inode(root, key->offset);
827 if (!dir)
828 return -ENOENT;
829
830 inode = read_one_inode(root, key->objectid);
831 BUG_ON(!dir);
832
833 ref_ptr = btrfs_item_ptr_offset(eb, slot);
834 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
835
836again:
837 ref = (struct btrfs_inode_ref *)ref_ptr;
838
839 namelen = btrfs_inode_ref_name_len(eb, ref);
840 name = kmalloc(namelen, GFP_NOFS);
841 BUG_ON(!name);
842
843 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
844
845 /* if we already have a perfect match, we're done */
846 if (inode_in_dir(root, path, dir->i_ino, inode->i_ino,
847 btrfs_inode_ref_index(eb, ref),
848 name, namelen)) {
849 goto out;
850 }
851
852 /*
853 * look for a conflicting back reference in the metadata.
854 * if we find one we have to unlink that name of the file
855 * before we add our new link. Later on, we overwrite any
856 * existing back reference, and we don't want to create
857 * dangling pointers in the directory.
858 */
859conflict_again:
860 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
861 if (ret == 0) {
862 char *victim_name;
863 int victim_name_len;
864 struct btrfs_inode_ref *victim_ref;
865 unsigned long ptr;
866 unsigned long ptr_end;
867 struct extent_buffer *leaf = path->nodes[0];
868
869 /* are we trying to overwrite a back ref for the root directory
870 * if so, just jump out, we're done
871 */
872 if (key->objectid == key->offset)
873 goto out_nowrite;
874
875 /* check all the names in this back reference to see
876 * if they are in the log. if so, we allow them to stay
877 * otherwise they must be unlinked as a conflict
878 */
879 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
880 ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]);
881 while (ptr < ptr_end) {
882 victim_ref = (struct btrfs_inode_ref *)ptr;
883 victim_name_len = btrfs_inode_ref_name_len(leaf,
884 victim_ref);
885 victim_name = kmalloc(victim_name_len, GFP_NOFS);
886 BUG_ON(!victim_name);
887
888 read_extent_buffer(leaf, victim_name,
889 (unsigned long)(victim_ref + 1),
890 victim_name_len);
891
892 if (!backref_in_log(log, key, victim_name,
893 victim_name_len)) {
894 btrfs_inc_nlink(inode);
895 btrfs_release_path(root, path);
896 ret = btrfs_unlink_inode(trans, root, dir,
897 inode, victim_name,
898 victim_name_len);
899 kfree(victim_name);
900 btrfs_release_path(root, path);
901 goto conflict_again;
902 }
903 kfree(victim_name);
904 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
905 }
906 BUG_ON(ret);
907 }
908 btrfs_release_path(root, path);
909
910 /* look for a conflicting sequence number */
911 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
912 btrfs_inode_ref_index(eb, ref),
913 name, namelen, 0);
914 if (di && !IS_ERR(di)) {
915 ret = drop_one_dir_item(trans, root, path, dir, di);
916 BUG_ON(ret);
917 }
918 btrfs_release_path(root, path);
919
920
921 /* look for a conflicting name */
922 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
923 name, namelen, 0);
924 if (di && !IS_ERR(di)) {
925 ret = drop_one_dir_item(trans, root, path, dir, di);
926 BUG_ON(ret);
927 }
928 btrfs_release_path(root, path);
929
930 /* insert our name */
931 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
932 btrfs_inode_ref_index(eb, ref));
933 BUG_ON(ret);
934
935 btrfs_update_inode(trans, root, inode);
936
937out:
938 ref_ptr = (unsigned long)(ref + 1) + namelen;
939 kfree(name);
940 if (ref_ptr < ref_end)
941 goto again;
942
943 /* finally write the back reference in the inode */
944 ret = overwrite_item(trans, root, path, eb, slot, key);
945 BUG_ON(ret);
946
947out_nowrite:
948 btrfs_release_path(root, path);
949 iput(dir);
950 iput(inode);
951 return 0;
952}
953
954/*
955 * There are a few corners where the link count of the file can't
956 * be properly maintained during replay. So, instead of adding
957 * lots of complexity to the log code, we just scan the backrefs
958 * for any file that has been through replay.
959 *
960 * The scan will update the link count on the inode to reflect the
961 * number of back refs found. If it goes down to zero, the iput
962 * will free the inode.
963 */
964static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
965 struct btrfs_root *root,
966 struct inode *inode)
967{
968 struct btrfs_path *path;
969 int ret;
970 struct btrfs_key key;
971 u64 nlink = 0;
972 unsigned long ptr;
973 unsigned long ptr_end;
974 int name_len;
975
976 key.objectid = inode->i_ino;
977 key.type = BTRFS_INODE_REF_KEY;
978 key.offset = (u64)-1;
979
980 path = btrfs_alloc_path();
981
982 while (1) {
983 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
984 if (ret < 0)
985 break;
986 if (ret > 0) {
987 if (path->slots[0] == 0)
988 break;
989 path->slots[0]--;
990 }
991 btrfs_item_key_to_cpu(path->nodes[0], &key,
992 path->slots[0]);
993 if (key.objectid != inode->i_ino ||
994 key.type != BTRFS_INODE_REF_KEY)
995 break;
996 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
997 ptr_end = ptr + btrfs_item_size_nr(path->nodes[0],
998 path->slots[0]);
999 while (ptr < ptr_end) {
1000 struct btrfs_inode_ref *ref;
1001
1002 ref = (struct btrfs_inode_ref *)ptr;
1003 name_len = btrfs_inode_ref_name_len(path->nodes[0],
1004 ref);
1005 ptr = (unsigned long)(ref + 1) + name_len;
1006 nlink++;
1007 }
1008
1009 if (key.offset == 0)
1010 break;
1011 key.offset--;
1012 btrfs_release_path(root, path);
1013 }
1014 btrfs_free_path(path);
1015 if (nlink != inode->i_nlink) {
1016 inode->i_nlink = nlink;
1017 btrfs_update_inode(trans, root, inode);
1018 }
1019 BTRFS_I(inode)->index_cnt = (u64)-1;
1020
1021 return 0;
1022}
1023
1024static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1025 struct btrfs_root *root,
1026 struct btrfs_path *path)
1027{
1028 int ret;
1029 struct btrfs_key key;
1030 struct inode *inode;
1031
1032 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
1033 key.type = BTRFS_ORPHAN_ITEM_KEY;
1034 key.offset = (u64)-1;
1035 while (1) {
1036 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1037 if (ret < 0)
1038 break;
1039
1040 if (ret == 1) {
1041 if (path->slots[0] == 0)
1042 break;
1043 path->slots[0]--;
1044 }
1045
1046 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1047 if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID ||
1048 key.type != BTRFS_ORPHAN_ITEM_KEY)
1049 break;
1050
1051 ret = btrfs_del_item(trans, root, path);
1052 BUG_ON(ret);
1053
1054 btrfs_release_path(root, path);
1055 inode = read_one_inode(root, key.offset);
1056 BUG_ON(!inode);
1057
1058 ret = fixup_inode_link_count(trans, root, inode);
1059 BUG_ON(ret);
1060
1061 iput(inode);
1062
1063 if (key.offset == 0)
1064 break;
1065 key.offset--;
1066 }
1067 btrfs_release_path(root, path);
1068 return 0;
1069}
1070
1071
1072/*
1073 * record a given inode in the fixup dir so we can check its link
1074 * count when replay is done. The link count is incremented here
1075 * so the inode won't go away until we check it
1076 */
1077static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1078 struct btrfs_root *root,
1079 struct btrfs_path *path,
1080 u64 objectid)
1081{
1082 struct btrfs_key key;
1083 int ret = 0;
1084 struct inode *inode;
1085
1086 inode = read_one_inode(root, objectid);
1087 BUG_ON(!inode);
1088
1089 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
1090 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
1091 key.offset = objectid;
1092
1093 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1094
1095 btrfs_release_path(root, path);
1096 if (ret == 0) {
1097 btrfs_inc_nlink(inode);
1098 btrfs_update_inode(trans, root, inode);
1099 } else if (ret == -EEXIST) {
1100 ret = 0;
1101 } else {
1102 BUG();
1103 }
1104 iput(inode);
1105
1106 return ret;
1107}
1108
1109/*
1110 * when replaying the log for a directory, we only insert names
1111 * for inodes that actually exist. This means an fsync on a directory
1112 * does not implicitly fsync all the new files in it
1113 */
1114static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1115 struct btrfs_root *root,
1116 struct btrfs_path *path,
1117 u64 dirid, u64 index,
1118 char *name, int name_len, u8 type,
1119 struct btrfs_key *location)
1120{
1121 struct inode *inode;
1122 struct inode *dir;
1123 int ret;
1124
1125 inode = read_one_inode(root, location->objectid);
1126 if (!inode)
1127 return -ENOENT;
1128
1129 dir = read_one_inode(root, dirid);
1130 if (!dir) {
1131 iput(inode);
1132 return -EIO;
1133 }
1134 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
1135
1136 /* FIXME, put inode into FIXUP list */
1137
1138 iput(inode);
1139 iput(dir);
1140 return ret;
1141}
1142
1143/*
1144 * take a single entry in a log directory item and replay it into
1145 * the subvolume.
1146 *
1147 * if a conflicting item exists in the subdirectory already,
1148 * the inode it points to is unlinked and put into the link count
1149 * fix up tree.
1150 *
1151 * If a name from the log points to a file or directory that does
1152 * not exist in the FS, it is skipped. fsyncs on directories
1153 * do not force down inodes inside that directory, just changes to the
1154 * names or unlinks in a directory.
1155 */
1156static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1157 struct btrfs_root *root,
1158 struct btrfs_path *path,
1159 struct extent_buffer *eb,
1160 struct btrfs_dir_item *di,
1161 struct btrfs_key *key)
1162{
1163 char *name;
1164 int name_len;
1165 struct btrfs_dir_item *dst_di;
1166 struct btrfs_key found_key;
1167 struct btrfs_key log_key;
1168 struct inode *dir;
1169 u8 log_type;
1170 int exists;
1171 int ret;
1172
1173 dir = read_one_inode(root, key->objectid);
1174 BUG_ON(!dir);
1175
1176 name_len = btrfs_dir_name_len(eb, di);
1177 name = kmalloc(name_len, GFP_NOFS);
1178 log_type = btrfs_dir_type(eb, di);
1179 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1180 name_len);
1181
1182 btrfs_dir_item_key_to_cpu(eb, di, &log_key);
1183 exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
1184 if (exists == 0)
1185 exists = 1;
1186 else
1187 exists = 0;
1188 btrfs_release_path(root, path);
1189
1190 if (key->type == BTRFS_DIR_ITEM_KEY) {
1191 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
1192 name, name_len, 1);
1193 } else if (key->type == BTRFS_DIR_INDEX_KEY) {
1194 dst_di = btrfs_lookup_dir_index_item(trans, root, path,
1195 key->objectid,
1196 key->offset, name,
1197 name_len, 1);
1198 } else {
1199 BUG();
1200 }
1201 if (!dst_di || IS_ERR(dst_di)) {
1202 /* we need a sequence number to insert, so we only
1203 * do inserts for the BTRFS_DIR_INDEX_KEY types
1204 */
1205 if (key->type != BTRFS_DIR_INDEX_KEY)
1206 goto out;
1207 goto insert;
1208 }
1209
1210 btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key);
1211 /* the existing item matches the logged item */
1212 if (found_key.objectid == log_key.objectid &&
1213 found_key.type == log_key.type &&
1214 found_key.offset == log_key.offset &&
1215 btrfs_dir_type(path->nodes[0], dst_di) == log_type) {
1216 goto out;
1217 }
1218
1219 /*
1220 * don't drop the conflicting directory entry if the inode
1221 * for the new entry doesn't exist
1222 */
1223 if (!exists)
1224 goto out;
1225
1226 ret = drop_one_dir_item(trans, root, path, dir, dst_di);
1227 BUG_ON(ret);
1228
1229 if (key->type == BTRFS_DIR_INDEX_KEY)
1230 goto insert;
1231out:
1232 btrfs_release_path(root, path);
1233 kfree(name);
1234 iput(dir);
1235 return 0;
1236
1237insert:
1238 btrfs_release_path(root, path);
1239 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1240 name, name_len, log_type, &log_key);
1241
1242 if (ret && ret != -ENOENT)
1243 BUG();
1244 goto out;
1245}
1246
1247/*
1248 * find all the names in a directory item and reconcile them into
1249 * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than
1250 * one name in a directory item, but the same code gets used for
1251 * both directory index types
1252 */
1253static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1254 struct btrfs_root *root,
1255 struct btrfs_path *path,
1256 struct extent_buffer *eb, int slot,
1257 struct btrfs_key *key)
1258{
1259 int ret;
1260 u32 item_size = btrfs_item_size_nr(eb, slot);
1261 struct btrfs_dir_item *di;
1262 int name_len;
1263 unsigned long ptr;
1264 unsigned long ptr_end;
1265
1266 ptr = btrfs_item_ptr_offset(eb, slot);
1267 ptr_end = ptr + item_size;
1268 while (ptr < ptr_end) {
1269 di = (struct btrfs_dir_item *)ptr;
1270 name_len = btrfs_dir_name_len(eb, di);
1271 ret = replay_one_name(trans, root, path, eb, di, key);
1272 BUG_ON(ret);
1273 ptr = (unsigned long)(di + 1);
1274 ptr += name_len;
1275 }
1276 return 0;
1277}
1278
1279/*
1280 * directory replay has two parts. There are the standard directory
1281 * items in the log copied from the subvolume, and range items
1282 * created in the log while the subvolume was logged.
1283 *
1284 * The range items tell us which parts of the key space the log
1285 * is authoritative for. During replay, if a key in the subvolume
1286 * directory is in a logged range item, but not actually in the log
1287 * that means it was deleted from the directory before the fsync
1288 * and should be removed.
1289 */
1290static noinline int find_dir_range(struct btrfs_root *root,
1291 struct btrfs_path *path,
1292 u64 dirid, int key_type,
1293 u64 *start_ret, u64 *end_ret)
1294{
1295 struct btrfs_key key;
1296 u64 found_end;
1297 struct btrfs_dir_log_item *item;
1298 int ret;
1299 int nritems;
1300
1301 if (*start_ret == (u64)-1)
1302 return 1;
1303
1304 key.objectid = dirid;
1305 key.type = key_type;
1306 key.offset = *start_ret;
1307
1308 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1309 if (ret < 0)
1310 goto out;
1311 if (ret > 0) {
1312 if (path->slots[0] == 0)
1313 goto out;
1314 path->slots[0]--;
1315 }
1316 if (ret != 0)
1317 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1318
1319 if (key.type != key_type || key.objectid != dirid) {
1320 ret = 1;
1321 goto next;
1322 }
1323 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1324 struct btrfs_dir_log_item);
1325 found_end = btrfs_dir_log_end(path->nodes[0], item);
1326
1327 if (*start_ret >= key.offset && *start_ret <= found_end) {
1328 ret = 0;
1329 *start_ret = key.offset;
1330 *end_ret = found_end;
1331 goto out;
1332 }
1333 ret = 1;
1334next:
1335 /* check the next slot in the tree to see if it is a valid item */
1336 nritems = btrfs_header_nritems(path->nodes[0]);
1337 if (path->slots[0] >= nritems) {
1338 ret = btrfs_next_leaf(root, path);
1339 if (ret)
1340 goto out;
1341 } else {
1342 path->slots[0]++;
1343 }
1344
1345 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1346
1347 if (key.type != key_type || key.objectid != dirid) {
1348 ret = 1;
1349 goto out;
1350 }
1351 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1352 struct btrfs_dir_log_item);
1353 found_end = btrfs_dir_log_end(path->nodes[0], item);
1354 *start_ret = key.offset;
1355 *end_ret = found_end;
1356 ret = 0;
1357out:
1358 btrfs_release_path(root, path);
1359 return ret;
1360}
1361
1362/*
1363 * this looks for a given directory item in the log. If the directory
1364 * item is not in the log, the item is removed and the inode it points
1365 * to is unlinked
1366 */
1367static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
1368 struct btrfs_root *root,
1369 struct btrfs_root *log,
1370 struct btrfs_path *path,
1371 struct btrfs_path *log_path,
1372 struct inode *dir,
1373 struct btrfs_key *dir_key)
1374{
1375 int ret;
1376 struct extent_buffer *eb;
1377 int slot;
1378 u32 item_size;
1379 struct btrfs_dir_item *di;
1380 struct btrfs_dir_item *log_di;
1381 int name_len;
1382 unsigned long ptr;
1383 unsigned long ptr_end;
1384 char *name;
1385 struct inode *inode;
1386 struct btrfs_key location;
1387
1388again:
1389 eb = path->nodes[0];
1390 slot = path->slots[0];
1391 item_size = btrfs_item_size_nr(eb, slot);
1392 ptr = btrfs_item_ptr_offset(eb, slot);
1393 ptr_end = ptr + item_size;
1394 while (ptr < ptr_end) {
1395 di = (struct btrfs_dir_item *)ptr;
1396 name_len = btrfs_dir_name_len(eb, di);
1397 name = kmalloc(name_len, GFP_NOFS);
1398 if (!name) {
1399 ret = -ENOMEM;
1400 goto out;
1401 }
1402 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1403 name_len);
1404 log_di = NULL;
1405 if (dir_key->type == BTRFS_DIR_ITEM_KEY) {
1406 log_di = btrfs_lookup_dir_item(trans, log, log_path,
1407 dir_key->objectid,
1408 name, name_len, 0);
1409 } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) {
1410 log_di = btrfs_lookup_dir_index_item(trans, log,
1411 log_path,
1412 dir_key->objectid,
1413 dir_key->offset,
1414 name, name_len, 0);
1415 }
1416 if (!log_di || IS_ERR(log_di)) {
1417 btrfs_dir_item_key_to_cpu(eb, di, &location);
1418 btrfs_release_path(root, path);
1419 btrfs_release_path(log, log_path);
1420 inode = read_one_inode(root, location.objectid);
1421 BUG_ON(!inode);
1422
1423 ret = link_to_fixup_dir(trans, root,
1424 path, location.objectid);
1425 BUG_ON(ret);
1426 btrfs_inc_nlink(inode);
1427 ret = btrfs_unlink_inode(trans, root, dir, inode,
1428 name, name_len);
1429 BUG_ON(ret);
1430 kfree(name);
1431 iput(inode);
1432
1433 /* there might still be more names under this key
1434 * check and repeat if required
1435 */
1436 ret = btrfs_search_slot(NULL, root, dir_key, path,
1437 0, 0);
1438 if (ret == 0)
1439 goto again;
1440 ret = 0;
1441 goto out;
1442 }
1443 btrfs_release_path(log, log_path);
1444 kfree(name);
1445
1446 ptr = (unsigned long)(di + 1);
1447 ptr += name_len;
1448 }
1449 ret = 0;
1450out:
1451 btrfs_release_path(root, path);
1452 btrfs_release_path(log, log_path);
1453 return ret;
1454}
1455
1456/*
1457 * deletion replay happens before we copy any new directory items
1458 * out of the log or out of backreferences from inodes. It
1459 * scans the log to find ranges of keys that log is authoritative for,
1460 * and then scans the directory to find items in those ranges that are
1461 * not present in the log.
1462 *
1463 * Anything we don't find in the log is unlinked and removed from the
1464 * directory.
1465 */
1466static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
1467 struct btrfs_root *root,
1468 struct btrfs_root *log,
1469 struct btrfs_path *path,
1470 u64 dirid)
1471{
1472 u64 range_start;
1473 u64 range_end;
1474 int key_type = BTRFS_DIR_LOG_ITEM_KEY;
1475 int ret = 0;
1476 struct btrfs_key dir_key;
1477 struct btrfs_key found_key;
1478 struct btrfs_path *log_path;
1479 struct inode *dir;
1480
1481 dir_key.objectid = dirid;
1482 dir_key.type = BTRFS_DIR_ITEM_KEY;
1483 log_path = btrfs_alloc_path();
1484 if (!log_path)
1485 return -ENOMEM;
1486
1487 dir = read_one_inode(root, dirid);
1488 /* it isn't an error if the inode isn't there, that can happen
1489 * because we replay the deletes before we copy in the inode item
1490 * from the log
1491 */
1492 if (!dir) {
1493 btrfs_free_path(log_path);
1494 return 0;
1495 }
1496again:
1497 range_start = 0;
1498 range_end = 0;
1499 while (1) {
1500 ret = find_dir_range(log, path, dirid, key_type,
1501 &range_start, &range_end);
1502 if (ret != 0)
1503 break;
1504
1505 dir_key.offset = range_start;
1506 while (1) {
1507 int nritems;
1508 ret = btrfs_search_slot(NULL, root, &dir_key, path,
1509 0, 0);
1510 if (ret < 0)
1511 goto out;
1512
1513 nritems = btrfs_header_nritems(path->nodes[0]);
1514 if (path->slots[0] >= nritems) {
1515 ret = btrfs_next_leaf(root, path);
1516 if (ret)
1517 break;
1518 }
1519 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1520 path->slots[0]);
1521 if (found_key.objectid != dirid ||
1522 found_key.type != dir_key.type)
1523 goto next_type;
1524
1525 if (found_key.offset > range_end)
1526 break;
1527
1528 ret = check_item_in_log(trans, root, log, path,
1529 log_path, dir, &found_key);
1530 BUG_ON(ret);
1531 if (found_key.offset == (u64)-1)
1532 break;
1533 dir_key.offset = found_key.offset + 1;
1534 }
1535 btrfs_release_path(root, path);
1536 if (range_end == (u64)-1)
1537 break;
1538 range_start = range_end + 1;
1539 }
1540
1541next_type:
1542 ret = 0;
1543 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
1544 key_type = BTRFS_DIR_LOG_INDEX_KEY;
1545 dir_key.type = BTRFS_DIR_INDEX_KEY;
1546 btrfs_release_path(root, path);
1547 goto again;
1548 }
1549out:
1550 btrfs_release_path(root, path);
1551 btrfs_free_path(log_path);
1552 iput(dir);
1553 return ret;
1554}
1555
1556/*
1557 * the process_func used to replay items from the log tree. This
1558 * gets called in two different stages. The first stage just looks
1559 * for inodes and makes sure they are all copied into the subvolume.
1560 *
1561 * The second stage copies all the other item types from the log into
1562 * the subvolume. The two stage approach is slower, but gets rid of
1563 * lots of complexity around inodes referencing other inodes that exist
1564 * only in the log (references come from either directory items or inode
1565 * back refs).
1566 */
1567static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1568 struct walk_control *wc, u64 gen)
1569{
1570 int nritems;
1571 struct btrfs_path *path;
1572 struct btrfs_root *root = wc->replay_dest;
1573 struct btrfs_key key;
1574 u32 item_size;
1575 int level;
1576 int i;
1577 int ret;
1578
1579 btrfs_read_buffer(eb, gen);
1580
1581 level = btrfs_header_level(eb);
1582
1583 if (level != 0)
1584 return 0;
1585
1586 path = btrfs_alloc_path();
1587 BUG_ON(!path);
1588
1589 nritems = btrfs_header_nritems(eb);
1590 for (i = 0; i < nritems; i++) {
1591 btrfs_item_key_to_cpu(eb, &key, i);
1592 item_size = btrfs_item_size_nr(eb, i);
1593
1594 /* inode keys are done during the first stage */
1595 if (key.type == BTRFS_INODE_ITEM_KEY &&
1596 wc->stage == LOG_WALK_REPLAY_INODES) {
1597 struct inode *inode;
1598 struct btrfs_inode_item *inode_item;
1599 u32 mode;
1600
1601 inode_item = btrfs_item_ptr(eb, i,
1602 struct btrfs_inode_item);
1603 mode = btrfs_inode_mode(eb, inode_item);
1604 if (S_ISDIR(mode)) {
1605 ret = replay_dir_deletes(wc->trans,
1606 root, log, path, key.objectid);
1607 BUG_ON(ret);
1608 }
1609 ret = overwrite_item(wc->trans, root, path,
1610 eb, i, &key);
1611 BUG_ON(ret);
1612
1613 /* for regular files, truncate away
1614 * extents past the new EOF
1615 */
1616 if (S_ISREG(mode)) {
1617 inode = read_one_inode(root,
1618 key.objectid);
1619 BUG_ON(!inode);
1620
1621 ret = btrfs_truncate_inode_items(wc->trans,
1622 root, inode, inode->i_size,
1623 BTRFS_EXTENT_DATA_KEY);
1624 BUG_ON(ret);
1625 iput(inode);
1626 }
1627 ret = link_to_fixup_dir(wc->trans, root,
1628 path, key.objectid);
1629 BUG_ON(ret);
1630 }
1631 if (wc->stage < LOG_WALK_REPLAY_ALL)
1632 continue;
1633
1634 /* these keys are simply copied */
1635 if (key.type == BTRFS_XATTR_ITEM_KEY) {
1636 ret = overwrite_item(wc->trans, root, path,
1637 eb, i, &key);
1638 BUG_ON(ret);
1639 } else if (key.type == BTRFS_INODE_REF_KEY) {
1640 ret = add_inode_ref(wc->trans, root, log, path,
1641 eb, i, &key);
1642 BUG_ON(ret && ret != -ENOENT);
1643 } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
1644 ret = replay_one_extent(wc->trans, root, path,
1645 eb, i, &key);
1646 BUG_ON(ret);
1647 } else if (key.type == BTRFS_DIR_ITEM_KEY ||
1648 key.type == BTRFS_DIR_INDEX_KEY) {
1649 ret = replay_one_dir_item(wc->trans, root, path,
1650 eb, i, &key);
1651 BUG_ON(ret);
1652 }
1653 }
1654 btrfs_free_path(path);
1655 return 0;
1656}
1657
1658static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1659 struct btrfs_root *root,
1660 struct btrfs_path *path, int *level,
1661 struct walk_control *wc)
1662{
1663 u64 root_owner;
1664 u64 root_gen;
1665 u64 bytenr;
1666 u64 ptr_gen;
1667 struct extent_buffer *next;
1668 struct extent_buffer *cur;
1669 struct extent_buffer *parent;
1670 u32 blocksize;
1671 int ret = 0;
1672
1673 WARN_ON(*level < 0);
1674 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1675
1676 while (*level > 0) {
1677 WARN_ON(*level < 0);
1678 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1679 cur = path->nodes[*level];
1680
1681 if (btrfs_header_level(cur) != *level)
1682 WARN_ON(1);
1683
1684 if (path->slots[*level] >=
1685 btrfs_header_nritems(cur))
1686 break;
1687
1688 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1689 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1690 blocksize = btrfs_level_size(root, *level - 1);
1691
1692 parent = path->nodes[*level];
1693 root_owner = btrfs_header_owner(parent);
1694 root_gen = btrfs_header_generation(parent);
1695
1696 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
1697
1698 wc->process_func(root, next, wc, ptr_gen);
1699
1700 if (*level == 1) {
1701 path->slots[*level]++;
1702 if (wc->free) {
1703 btrfs_read_buffer(next, ptr_gen);
1704
1705 btrfs_tree_lock(next);
1706 clean_tree_block(trans, root, next);
1707 btrfs_wait_tree_block_writeback(next);
1708 btrfs_tree_unlock(next);
1709
1710 ret = btrfs_drop_leaf_ref(trans, root, next);
1711 BUG_ON(ret);
1712
1713 WARN_ON(root_owner !=
1714 BTRFS_TREE_LOG_OBJECTID);
1715 ret = btrfs_free_reserved_extent(root,
1716 bytenr, blocksize);
1717 BUG_ON(ret);
1718 }
1719 free_extent_buffer(next);
1720 continue;
1721 }
1722 btrfs_read_buffer(next, ptr_gen);
1723
1724 WARN_ON(*level <= 0);
1725 if (path->nodes[*level-1])
1726 free_extent_buffer(path->nodes[*level-1]);
1727 path->nodes[*level-1] = next;
1728 *level = btrfs_header_level(next);
1729 path->slots[*level] = 0;
1730 cond_resched();
1731 }
1732 WARN_ON(*level < 0);
1733 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1734
1735 if (path->nodes[*level] == root->node)
1736 parent = path->nodes[*level];
1737 else
1738 parent = path->nodes[*level + 1];
1739
1740 bytenr = path->nodes[*level]->start;
1741
1742 blocksize = btrfs_level_size(root, *level);
1743 root_owner = btrfs_header_owner(parent);
1744 root_gen = btrfs_header_generation(parent);
1745
1746 wc->process_func(root, path->nodes[*level], wc,
1747 btrfs_header_generation(path->nodes[*level]));
1748
1749 if (wc->free) {
1750 next = path->nodes[*level];
1751 btrfs_tree_lock(next);
1752 clean_tree_block(trans, root, next);
1753 btrfs_wait_tree_block_writeback(next);
1754 btrfs_tree_unlock(next);
1755
1756 if (*level == 0) {
1757 ret = btrfs_drop_leaf_ref(trans, root, next);
1758 BUG_ON(ret);
1759 }
1760 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1761 ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
1762 BUG_ON(ret);
1763 }
1764 free_extent_buffer(path->nodes[*level]);
1765 path->nodes[*level] = NULL;
1766 *level += 1;
1767
1768 cond_resched();
1769 return 0;
1770}
1771
1772static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1773 struct btrfs_root *root,
1774 struct btrfs_path *path, int *level,
1775 struct walk_control *wc)
1776{
1777 u64 root_owner;
1778 u64 root_gen;
1779 int i;
1780 int slot;
1781 int ret;
1782
1783 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1784 slot = path->slots[i];
1785 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1786 struct extent_buffer *node;
1787 node = path->nodes[i];
1788 path->slots[i]++;
1789 *level = i;
1790 WARN_ON(*level == 0);
1791 return 0;
1792 } else {
1793 struct extent_buffer *parent;
1794 if (path->nodes[*level] == root->node)
1795 parent = path->nodes[*level];
1796 else
1797 parent = path->nodes[*level + 1];
1798
1799 root_owner = btrfs_header_owner(parent);
1800 root_gen = btrfs_header_generation(parent);
1801 wc->process_func(root, path->nodes[*level], wc,
1802 btrfs_header_generation(path->nodes[*level]));
1803 if (wc->free) {
1804 struct extent_buffer *next;
1805
1806 next = path->nodes[*level];
1807
1808 btrfs_tree_lock(next);
1809 clean_tree_block(trans, root, next);
1810 btrfs_wait_tree_block_writeback(next);
1811 btrfs_tree_unlock(next);
1812
1813 if (*level == 0) {
1814 ret = btrfs_drop_leaf_ref(trans, root,
1815 next);
1816 BUG_ON(ret);
1817 }
1818
1819 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1820 ret = btrfs_free_reserved_extent(root,
1821 path->nodes[*level]->start,
1822 path->nodes[*level]->len);
1823 BUG_ON(ret);
1824 }
1825 free_extent_buffer(path->nodes[*level]);
1826 path->nodes[*level] = NULL;
1827 *level = i + 1;
1828 }
1829 }
1830 return 1;
1831}
1832
1833/*
1834 * drop the reference count on the tree rooted at 'snap'. This traverses
1835 * the tree freeing any blocks that have a ref count of zero after being
1836 * decremented.
1837 */
1838static int walk_log_tree(struct btrfs_trans_handle *trans,
1839 struct btrfs_root *log, struct walk_control *wc)
1840{
1841 int ret = 0;
1842 int wret;
1843 int level;
1844 struct btrfs_path *path;
1845 int i;
1846 int orig_level;
1847
1848 path = btrfs_alloc_path();
1849 BUG_ON(!path);
1850
1851 level = btrfs_header_level(log->node);
1852 orig_level = level;
1853 path->nodes[level] = log->node;
1854 extent_buffer_get(log->node);
1855 path->slots[level] = 0;
1856
1857 while (1) {
1858 wret = walk_down_log_tree(trans, log, path, &level, wc);
1859 if (wret > 0)
1860 break;
1861 if (wret < 0)
1862 ret = wret;
1863
1864 wret = walk_up_log_tree(trans, log, path, &level, wc);
1865 if (wret > 0)
1866 break;
1867 if (wret < 0)
1868 ret = wret;
1869 }
1870
1871 /* was the root node processed? if not, catch it here */
1872 if (path->nodes[orig_level]) {
1873 wc->process_func(log, path->nodes[orig_level], wc,
1874 btrfs_header_generation(path->nodes[orig_level]));
1875 if (wc->free) {
1876 struct extent_buffer *next;
1877
1878 next = path->nodes[orig_level];
1879
1880 btrfs_tree_lock(next);
1881 clean_tree_block(trans, log, next);
1882 btrfs_wait_tree_block_writeback(next);
1883 btrfs_tree_unlock(next);
1884
1885 if (orig_level == 0) {
1886 ret = btrfs_drop_leaf_ref(trans, log,
1887 next);
1888 BUG_ON(ret);
1889 }
1890 WARN_ON(log->root_key.objectid !=
1891 BTRFS_TREE_LOG_OBJECTID);
1892 ret = btrfs_free_reserved_extent(log, next->start,
1893 next->len);
1894 BUG_ON(ret);
1895 }
1896 }
1897
1898 for (i = 0; i <= orig_level; i++) {
1899 if (path->nodes[i]) {
1900 free_extent_buffer(path->nodes[i]);
1901 path->nodes[i] = NULL;
1902 }
1903 }
1904 btrfs_free_path(path);
1905 if (wc->free)
1906 free_extent_buffer(log->node);
1907 return ret;
1908}
1909
1910static int wait_log_commit(struct btrfs_root *log)
1911{
1912 DEFINE_WAIT(wait);
1913 u64 transid = log->fs_info->tree_log_transid;
1914
1915 do {
1916 prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
1917 TASK_UNINTERRUPTIBLE);
1918 mutex_unlock(&log->fs_info->tree_log_mutex);
1919 if (atomic_read(&log->fs_info->tree_log_commit))
1920 schedule();
1921 finish_wait(&log->fs_info->tree_log_wait, &wait);
1922 mutex_lock(&log->fs_info->tree_log_mutex);
1923 } while (transid == log->fs_info->tree_log_transid &&
1924 atomic_read(&log->fs_info->tree_log_commit));
1925 return 0;
1926}
1927
1928/*
1929 * btrfs_sync_log does sends a given tree log down to the disk and
1930 * updates the super blocks to record it. When this call is done,
1931 * you know that any inodes previously logged are safely on disk
1932 */
1933int btrfs_sync_log(struct btrfs_trans_handle *trans,
1934 struct btrfs_root *root)
1935{
1936 int ret;
1937 unsigned long batch;
1938 struct btrfs_root *log = root->log_root;
1939
1940 mutex_lock(&log->fs_info->tree_log_mutex);
1941 if (atomic_read(&log->fs_info->tree_log_commit)) {
1942 wait_log_commit(log);
1943 goto out;
1944 }
1945 atomic_set(&log->fs_info->tree_log_commit, 1);
1946
1947 while (1) {
1948 batch = log->fs_info->tree_log_batch;
1949 mutex_unlock(&log->fs_info->tree_log_mutex);
1950 schedule_timeout_uninterruptible(1);
1951 mutex_lock(&log->fs_info->tree_log_mutex);
1952
1953 while (atomic_read(&log->fs_info->tree_log_writers)) {
1954 DEFINE_WAIT(wait);
1955 prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
1956 TASK_UNINTERRUPTIBLE);
1957 mutex_unlock(&log->fs_info->tree_log_mutex);
1958 if (atomic_read(&log->fs_info->tree_log_writers))
1959 schedule();
1960 mutex_lock(&log->fs_info->tree_log_mutex);
1961 finish_wait(&log->fs_info->tree_log_wait, &wait);
1962 }
1963 if (batch == log->fs_info->tree_log_batch)
1964 break;
1965 }
1966
1967 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
1968 BUG_ON(ret);
1969 ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree,
1970 &root->fs_info->log_root_tree->dirty_log_pages);
1971 BUG_ON(ret);
1972
1973 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
1974 log->fs_info->log_root_tree->node->start);
1975 btrfs_set_super_log_root_level(&root->fs_info->super_for_commit,
1976 btrfs_header_level(log->fs_info->log_root_tree->node));
1977
1978 write_ctree_super(trans, log->fs_info->tree_root, 2);
1979 log->fs_info->tree_log_transid++;
1980 log->fs_info->tree_log_batch = 0;
1981 atomic_set(&log->fs_info->tree_log_commit, 0);
1982 smp_mb();
1983 if (waitqueue_active(&log->fs_info->tree_log_wait))
1984 wake_up(&log->fs_info->tree_log_wait);
1985out:
1986 mutex_unlock(&log->fs_info->tree_log_mutex);
1987 return 0;
1988}
1989
1990/* * free all the extents used by the tree log. This should be called
1991 * at commit time of the full transaction
1992 */
1993int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
1994{
1995 int ret;
1996 struct btrfs_root *log;
1997 struct key;
1998 u64 start;
1999 u64 end;
2000 struct walk_control wc = {
2001 .free = 1,
2002 .process_func = process_one_buffer
2003 };
2004
2005 if (!root->log_root || root->fs_info->log_root_recovering)
2006 return 0;
2007
2008 log = root->log_root;
2009 ret = walk_log_tree(trans, log, &wc);
2010 BUG_ON(ret);
2011
2012 while (1) {
2013 ret = find_first_extent_bit(&log->dirty_log_pages,
2014 0, &start, &end, EXTENT_DIRTY);
2015 if (ret)
2016 break;
2017
2018 clear_extent_dirty(&log->dirty_log_pages,
2019 start, end, GFP_NOFS);
2020 }
2021
2022 log = root->log_root;
2023 ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
2024 &log->root_key);
2025 BUG_ON(ret);
2026 root->log_root = NULL;
2027 kfree(root->log_root);
2028 return 0;
2029}
2030
2031/*
2032 * helper function to update the item for a given subvolumes log root
2033 * in the tree of log roots
2034 */
2035static int update_log_root(struct btrfs_trans_handle *trans,
2036 struct btrfs_root *log)
2037{
2038 u64 bytenr = btrfs_root_bytenr(&log->root_item);
2039 int ret;
2040
2041 if (log->node->start == bytenr)
2042 return 0;
2043
2044 btrfs_set_root_bytenr(&log->root_item, log->node->start);
2045 btrfs_set_root_generation(&log->root_item, trans->transid);
2046 btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
2047 ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
2048 &log->root_key, &log->root_item);
2049 BUG_ON(ret);
2050 return ret;
2051}
2052
2053/*
2054 * If both a file and directory are logged, and unlinks or renames are
2055 * mixed in, we have a few interesting corners:
2056 *
2057 * create file X in dir Y
2058 * link file X to X.link in dir Y
2059 * fsync file X
2060 * unlink file X but leave X.link
2061 * fsync dir Y
2062 *
2063 * After a crash we would expect only X.link to exist. But file X
2064 * didn't get fsync'd again so the log has back refs for X and X.link.
2065 *
2066 * We solve this by removing directory entries and inode backrefs from the
2067 * log when a file that was logged in the current transaction is
2068 * unlinked. Any later fsync will include the updated log entries, and
2069 * we'll be able to reconstruct the proper directory items from backrefs.
2070 *
2071 * This optimizations allows us to avoid relogging the entire inode
2072 * or the entire directory.
2073 */
2074int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2075 struct btrfs_root *root,
2076 const char *name, int name_len,
2077 struct inode *dir, u64 index)
2078{
2079 struct btrfs_root *log;
2080 struct btrfs_dir_item *di;
2081 struct btrfs_path *path;
2082 int ret;
2083 int bytes_del = 0;
2084
2085 if (BTRFS_I(dir)->logged_trans < trans->transid)
2086 return 0;
2087
2088 ret = join_running_log_trans(root);
2089 if (ret)
2090 return 0;
2091
2092 mutex_lock(&BTRFS_I(dir)->log_mutex);
2093
2094 log = root->log_root;
2095 path = btrfs_alloc_path();
2096 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
2097 name, name_len, -1);
2098 if (di && !IS_ERR(di)) {
2099 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2100 bytes_del += name_len;
2101 BUG_ON(ret);
2102 }
2103 btrfs_release_path(log, path);
2104 di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino,
2105 index, name, name_len, -1);
2106 if (di && !IS_ERR(di)) {
2107 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2108 bytes_del += name_len;
2109 BUG_ON(ret);
2110 }
2111
2112 /* update the directory size in the log to reflect the names
2113 * we have removed
2114 */
2115 if (bytes_del) {
2116 struct btrfs_key key;
2117
2118 key.objectid = dir->i_ino;
2119 key.offset = 0;
2120 key.type = BTRFS_INODE_ITEM_KEY;
2121 btrfs_release_path(log, path);
2122
2123 ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
2124 if (ret == 0) {
2125 struct btrfs_inode_item *item;
2126 u64 i_size;
2127
2128 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2129 struct btrfs_inode_item);
2130 i_size = btrfs_inode_size(path->nodes[0], item);
2131 if (i_size > bytes_del)
2132 i_size -= bytes_del;
2133 else
2134 i_size = 0;
2135 btrfs_set_inode_size(path->nodes[0], item, i_size);
2136 btrfs_mark_buffer_dirty(path->nodes[0]);
2137 } else
2138 ret = 0;
2139 btrfs_release_path(log, path);
2140 }
2141
2142 btrfs_free_path(path);
2143 mutex_unlock(&BTRFS_I(dir)->log_mutex);
2144 end_log_trans(root);
2145
2146 return 0;
2147}
2148
2149/* see comments for btrfs_del_dir_entries_in_log */
2150int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2151 struct btrfs_root *root,
2152 const char *name, int name_len,
2153 struct inode *inode, u64 dirid)
2154{
2155 struct btrfs_root *log;
2156 u64 index;
2157 int ret;
2158
2159 if (BTRFS_I(inode)->logged_trans < trans->transid)
2160 return 0;
2161
2162 ret = join_running_log_trans(root);
2163 if (ret)
2164 return 0;
2165 log = root->log_root;
2166 mutex_lock(&BTRFS_I(inode)->log_mutex);
2167
2168 ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino,
2169 dirid, &index);
2170 mutex_unlock(&BTRFS_I(inode)->log_mutex);
2171 end_log_trans(root);
2172
2173 return ret;
2174}
2175
2176/*
2177 * creates a range item in the log for 'dirid'. first_offset and
2178 * last_offset tell us which parts of the key space the log should
2179 * be considered authoritative for.
2180 */
2181static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
2182 struct btrfs_root *log,
2183 struct btrfs_path *path,
2184 int key_type, u64 dirid,
2185 u64 first_offset, u64 last_offset)
2186{
2187 int ret;
2188 struct btrfs_key key;
2189 struct btrfs_dir_log_item *item;
2190
2191 key.objectid = dirid;
2192 key.offset = first_offset;
2193 if (key_type == BTRFS_DIR_ITEM_KEY)
2194 key.type = BTRFS_DIR_LOG_ITEM_KEY;
2195 else
2196 key.type = BTRFS_DIR_LOG_INDEX_KEY;
2197 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
2198 BUG_ON(ret);
2199
2200 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2201 struct btrfs_dir_log_item);
2202 btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
2203 btrfs_mark_buffer_dirty(path->nodes[0]);
2204 btrfs_release_path(log, path);
2205 return 0;
2206}
2207
2208/*
2209 * log all the items included in the current transaction for a given
2210 * directory. This also creates the range items in the log tree required
2211 * to replay anything deleted before the fsync
2212 */
2213static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2214 struct btrfs_root *root, struct inode *inode,
2215 struct btrfs_path *path,
2216 struct btrfs_path *dst_path, int key_type,
2217 u64 min_offset, u64 *last_offset_ret)
2218{
2219 struct btrfs_key min_key;
2220 struct btrfs_key max_key;
2221 struct btrfs_root *log = root->log_root;
2222 struct extent_buffer *src;
2223 int ret;
2224 int i;
2225 int nritems;
2226 u64 first_offset = min_offset;
2227 u64 last_offset = (u64)-1;
2228
2229 log = root->log_root;
2230 max_key.objectid = inode->i_ino;
2231 max_key.offset = (u64)-1;
2232 max_key.type = key_type;
2233
2234 min_key.objectid = inode->i_ino;
2235 min_key.type = key_type;
2236 min_key.offset = min_offset;
2237
2238 path->keep_locks = 1;
2239
2240 ret = btrfs_search_forward(root, &min_key, &max_key,
2241 path, 0, trans->transid);
2242
2243 /*
2244 * we didn't find anything from this transaction, see if there
2245 * is anything at all
2246 */
2247 if (ret != 0 || min_key.objectid != inode->i_ino ||
2248 min_key.type != key_type) {
2249 min_key.objectid = inode->i_ino;
2250 min_key.type = key_type;
2251 min_key.offset = (u64)-1;
2252 btrfs_release_path(root, path);
2253 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2254 if (ret < 0) {
2255 btrfs_release_path(root, path);
2256 return ret;
2257 }
2258 ret = btrfs_previous_item(root, path, inode->i_ino, key_type);
2259
2260 /* if ret == 0 there are items for this type,
2261 * create a range to tell us the last key of this type.
2262 * otherwise, there are no items in this directory after
2263 * *min_offset, and we create a range to indicate that.
2264 */
2265 if (ret == 0) {
2266 struct btrfs_key tmp;
2267 btrfs_item_key_to_cpu(path->nodes[0], &tmp,
2268 path->slots[0]);
2269 if (key_type == tmp.type)
2270 first_offset = max(min_offset, tmp.offset) + 1;
2271 }
2272 goto done;
2273 }
2274
2275 /* go backward to find any previous key */
2276 ret = btrfs_previous_item(root, path, inode->i_ino, key_type);
2277 if (ret == 0) {
2278 struct btrfs_key tmp;
2279 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
2280 if (key_type == tmp.type) {
2281 first_offset = tmp.offset;
2282 ret = overwrite_item(trans, log, dst_path,
2283 path->nodes[0], path->slots[0],
2284 &tmp);
2285 }
2286 }
2287 btrfs_release_path(root, path);
2288
2289 /* find the first key from this transaction again */
2290 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2291 if (ret != 0) {
2292 WARN_ON(1);
2293 goto done;
2294 }
2295
2296 /*
2297 * we have a block from this transaction, log every item in it
2298 * from our directory
2299 */
2300 while (1) {
2301 struct btrfs_key tmp;
2302 src = path->nodes[0];
2303 nritems = btrfs_header_nritems(src);
2304 for (i = path->slots[0]; i < nritems; i++) {
2305 btrfs_item_key_to_cpu(src, &min_key, i);
2306
2307 if (min_key.objectid != inode->i_ino ||
2308 min_key.type != key_type)
2309 goto done;
2310 ret = overwrite_item(trans, log, dst_path, src, i,
2311 &min_key);
2312 BUG_ON(ret);
2313 }
2314 path->slots[0] = nritems;
2315
2316 /*
2317 * look ahead to the next item and see if it is also
2318 * from this directory and from this transaction
2319 */
2320 ret = btrfs_next_leaf(root, path);
2321 if (ret == 1) {
2322 last_offset = (u64)-1;
2323 goto done;
2324 }
2325 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
2326 if (tmp.objectid != inode->i_ino || tmp.type != key_type) {
2327 last_offset = (u64)-1;
2328 goto done;
2329 }
2330 if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
2331 ret = overwrite_item(trans, log, dst_path,
2332 path->nodes[0], path->slots[0],
2333 &tmp);
2334
2335 BUG_ON(ret);
2336 last_offset = tmp.offset;
2337 goto done;
2338 }
2339 }
2340done:
2341 *last_offset_ret = last_offset;
2342 btrfs_release_path(root, path);
2343 btrfs_release_path(log, dst_path);
2344
2345 /* insert the log range keys to indicate where the log is valid */
2346 ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino,
2347 first_offset, last_offset);
2348 BUG_ON(ret);
2349 return 0;
2350}
2351
2352/*
2353 * logging directories is very similar to logging inodes, We find all the items
2354 * from the current transaction and write them to the log.
2355 *
2356 * The recovery code scans the directory in the subvolume, and if it finds a
2357 * key in the range logged that is not present in the log tree, then it means
2358 * that dir entry was unlinked during the transaction.
2359 *
2360 * In order for that scan to work, we must include one key smaller than
2361 * the smallest logged by this transaction and one key larger than the largest
2362 * key logged by this transaction.
2363 */
2364static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
2365 struct btrfs_root *root, struct inode *inode,
2366 struct btrfs_path *path,
2367 struct btrfs_path *dst_path)
2368{
2369 u64 min_key;
2370 u64 max_key;
2371 int ret;
2372 int key_type = BTRFS_DIR_ITEM_KEY;
2373
2374again:
2375 min_key = 0;
2376 max_key = 0;
2377 while (1) {
2378 ret = log_dir_items(trans, root, inode, path,
2379 dst_path, key_type, min_key,
2380 &max_key);
2381 BUG_ON(ret);
2382 if (max_key == (u64)-1)
2383 break;
2384 min_key = max_key + 1;
2385 }
2386
2387 if (key_type == BTRFS_DIR_ITEM_KEY) {
2388 key_type = BTRFS_DIR_INDEX_KEY;
2389 goto again;
2390 }
2391 return 0;
2392}
2393
2394/*
2395 * a helper function to drop items from the log before we relog an
2396 * inode. max_key_type indicates the highest item type to remove.
2397 * This cannot be run for file data extents because it does not
2398 * free the extents they point to.
2399 */
2400static int drop_objectid_items(struct btrfs_trans_handle *trans,
2401 struct btrfs_root *log,
2402 struct btrfs_path *path,
2403 u64 objectid, int max_key_type)
2404{
2405 int ret;
2406 struct btrfs_key key;
2407 struct btrfs_key found_key;
2408
2409 key.objectid = objectid;
2410 key.type = max_key_type;
2411 key.offset = (u64)-1;
2412
2413 while (1) {
2414 ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
2415
2416 if (ret != 1)
2417 break;
2418
2419 if (path->slots[0] == 0)
2420 break;
2421
2422 path->slots[0]--;
2423 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2424 path->slots[0]);
2425
2426 if (found_key.objectid != objectid)
2427 break;
2428
2429 ret = btrfs_del_item(trans, log, path);
2430 BUG_ON(ret);
2431 btrfs_release_path(log, path);
2432 }
2433 btrfs_release_path(log, path);
2434 return 0;
2435}
2436
2437static noinline int copy_items(struct btrfs_trans_handle *trans,
2438 struct btrfs_root *log,
2439 struct btrfs_path *dst_path,
2440 struct extent_buffer *src,
2441 int start_slot, int nr, int inode_only)
2442{
2443 unsigned long src_offset;
2444 unsigned long dst_offset;
2445 struct btrfs_file_extent_item *extent;
2446 struct btrfs_inode_item *inode_item;
2447 int ret;
2448 struct btrfs_key *ins_keys;
2449 u32 *ins_sizes;
2450 char *ins_data;
2451 int i;
2452 struct list_head ordered_sums;
2453
2454 INIT_LIST_HEAD(&ordered_sums);
2455
2456 ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
2457 nr * sizeof(u32), GFP_NOFS);
2458 ins_sizes = (u32 *)ins_data;
2459 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
2460
2461 for (i = 0; i < nr; i++) {
2462 ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot);
2463 btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
2464 }
2465 ret = btrfs_insert_empty_items(trans, log, dst_path,
2466 ins_keys, ins_sizes, nr);
2467 BUG_ON(ret);
2468
2469 for (i = 0; i < nr; i++) {
2470 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
2471 dst_path->slots[0]);
2472
2473 src_offset = btrfs_item_ptr_offset(src, start_slot + i);
2474
2475 copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
2476 src_offset, ins_sizes[i]);
2477
2478 if (inode_only == LOG_INODE_EXISTS &&
2479 ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
2480 inode_item = btrfs_item_ptr(dst_path->nodes[0],
2481 dst_path->slots[0],
2482 struct btrfs_inode_item);
2483 btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0);
2484
2485 /* set the generation to zero so the recover code
2486 * can tell the difference between an logging
2487 * just to say 'this inode exists' and a logging
2488 * to say 'update this inode with these values'
2489 */
2490 btrfs_set_inode_generation(dst_path->nodes[0],
2491 inode_item, 0);
2492 }
2493 /* take a reference on file data extents so that truncates
2494 * or deletes of this inode don't have to relog the inode
2495 * again
2496 */
2497 if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) {
2498 int found_type;
2499 extent = btrfs_item_ptr(src, start_slot + i,
2500 struct btrfs_file_extent_item);
2501
2502 found_type = btrfs_file_extent_type(src, extent);
2503 if (found_type == BTRFS_FILE_EXTENT_REG ||
2504 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
2505 u64 ds = btrfs_file_extent_disk_bytenr(src,
2506 extent);
2507 u64 dl = btrfs_file_extent_disk_num_bytes(src,
2508 extent);
2509 u64 cs = btrfs_file_extent_offset(src, extent);
2510 u64 cl = btrfs_file_extent_num_bytes(src,
2511 extent);;
2512 if (btrfs_file_extent_compression(src,
2513 extent)) {
2514 cs = 0;
2515 cl = dl;
2516 }
2517 /* ds == 0 is a hole */
2518 if (ds != 0) {
2519 ret = btrfs_inc_extent_ref(trans, log,
2520 ds, dl,
2521 dst_path->nodes[0]->start,
2522 BTRFS_TREE_LOG_OBJECTID,
2523 trans->transid,
2524 ins_keys[i].objectid);
2525 BUG_ON(ret);
2526 ret = btrfs_lookup_csums_range(
2527 log->fs_info->csum_root,
2528 ds + cs, ds + cs + cl - 1,
2529 &ordered_sums);
2530 BUG_ON(ret);
2531 }
2532 }
2533 }
2534 dst_path->slots[0]++;
2535 }
2536
2537 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
2538 btrfs_release_path(log, dst_path);
2539 kfree(ins_data);
2540
2541 /*
2542 * we have to do this after the loop above to avoid changing the
2543 * log tree while trying to change the log tree.
2544 */
2545 while (!list_empty(&ordered_sums)) {
2546 struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
2547 struct btrfs_ordered_sum,
2548 list);
2549 ret = btrfs_csum_file_blocks(trans, log, sums);
2550 BUG_ON(ret);
2551 list_del(&sums->list);
2552 kfree(sums);
2553 }
2554 return 0;
2555}
2556
2557/* log a single inode in the tree log.
2558 * At least one parent directory for this inode must exist in the tree
2559 * or be logged already.
2560 *
2561 * Any items from this inode changed by the current transaction are copied
2562 * to the log tree. An extra reference is taken on any extents in this
2563 * file, allowing us to avoid a whole pile of corner cases around logging
2564 * blocks that have been removed from the tree.
2565 *
2566 * See LOG_INODE_ALL and related defines for a description of what inode_only
2567 * does.
2568 *
2569 * This handles both files and directories.
2570 */
2571static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
2572 struct btrfs_root *root, struct inode *inode,
2573 int inode_only)
2574{
2575 struct btrfs_path *path;
2576 struct btrfs_path *dst_path;
2577 struct btrfs_key min_key;
2578 struct btrfs_key max_key;
2579 struct btrfs_root *log = root->log_root;
2580 struct extent_buffer *src = NULL;
2581 u32 size;
2582 int ret;
2583 int nritems;
2584 int ins_start_slot = 0;
2585 int ins_nr;
2586
2587 log = root->log_root;
2588
2589 path = btrfs_alloc_path();
2590 dst_path = btrfs_alloc_path();
2591
2592 min_key.objectid = inode->i_ino;
2593 min_key.type = BTRFS_INODE_ITEM_KEY;
2594 min_key.offset = 0;
2595
2596 max_key.objectid = inode->i_ino;
2597 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
2598 max_key.type = BTRFS_XATTR_ITEM_KEY;
2599 else
2600 max_key.type = (u8)-1;
2601 max_key.offset = (u64)-1;
2602
2603 /*
2604 * if this inode has already been logged and we're in inode_only
2605 * mode, we don't want to delete the things that have already
2606 * been written to the log.
2607 *
2608 * But, if the inode has been through an inode_only log,
2609 * the logged_trans field is not set. This allows us to catch
2610 * any new names for this inode in the backrefs by logging it
2611 * again
2612 */
2613 if (inode_only == LOG_INODE_EXISTS &&
2614 BTRFS_I(inode)->logged_trans == trans->transid) {
2615 btrfs_free_path(path);
2616 btrfs_free_path(dst_path);
2617 goto out;
2618 }
2619 mutex_lock(&BTRFS_I(inode)->log_mutex);
2620
2621 /*
2622 * a brute force approach to making sure we get the most uptodate
2623 * copies of everything.
2624 */
2625 if (S_ISDIR(inode->i_mode)) {
2626 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
2627
2628 if (inode_only == LOG_INODE_EXISTS)
2629 max_key_type = BTRFS_XATTR_ITEM_KEY;
2630 ret = drop_objectid_items(trans, log, path,
2631 inode->i_ino, max_key_type);
2632 } else {
2633 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
2634 }
2635 BUG_ON(ret);
2636 path->keep_locks = 1;
2637
2638 while (1) {
2639 ins_nr = 0;
2640 ret = btrfs_search_forward(root, &min_key, &max_key,
2641 path, 0, trans->transid);
2642 if (ret != 0)
2643 break;
2644again:
2645 /* note, ins_nr might be > 0 here, cleanup outside the loop */
2646 if (min_key.objectid != inode->i_ino)
2647 break;
2648 if (min_key.type > max_key.type)
2649 break;
2650
2651 src = path->nodes[0];
2652 size = btrfs_item_size_nr(src, path->slots[0]);
2653 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
2654 ins_nr++;
2655 goto next_slot;
2656 } else if (!ins_nr) {
2657 ins_start_slot = path->slots[0];
2658 ins_nr = 1;
2659 goto next_slot;
2660 }
2661
2662 ret = copy_items(trans, log, dst_path, src, ins_start_slot,
2663 ins_nr, inode_only);
2664 BUG_ON(ret);
2665 ins_nr = 1;
2666 ins_start_slot = path->slots[0];
2667next_slot:
2668
2669 nritems = btrfs_header_nritems(path->nodes[0]);
2670 path->slots[0]++;
2671 if (path->slots[0] < nritems) {
2672 btrfs_item_key_to_cpu(path->nodes[0], &min_key,
2673 path->slots[0]);
2674 goto again;
2675 }
2676 if (ins_nr) {
2677 ret = copy_items(trans, log, dst_path, src,
2678 ins_start_slot,
2679 ins_nr, inode_only);
2680 BUG_ON(ret);
2681 ins_nr = 0;
2682 }
2683 btrfs_release_path(root, path);
2684
2685 if (min_key.offset < (u64)-1)
2686 min_key.offset++;
2687 else if (min_key.type < (u8)-1)
2688 min_key.type++;
2689 else if (min_key.objectid < (u64)-1)
2690 min_key.objectid++;
2691 else
2692 break;
2693 }
2694 if (ins_nr) {
2695 ret = copy_items(trans, log, dst_path, src,
2696 ins_start_slot,
2697 ins_nr, inode_only);
2698 BUG_ON(ret);
2699 ins_nr = 0;
2700 }
2701 WARN_ON(ins_nr);
2702 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
2703 btrfs_release_path(root, path);
2704 btrfs_release_path(log, dst_path);
2705 BTRFS_I(inode)->log_dirty_trans = 0;
2706 ret = log_directory_changes(trans, root, inode, path, dst_path);
2707 BUG_ON(ret);
2708 }
2709 BTRFS_I(inode)->logged_trans = trans->transid;
2710 mutex_unlock(&BTRFS_I(inode)->log_mutex);
2711
2712 btrfs_free_path(path);
2713 btrfs_free_path(dst_path);
2714
2715 mutex_lock(&root->fs_info->tree_log_mutex);
2716 ret = update_log_root(trans, log);
2717 BUG_ON(ret);
2718 mutex_unlock(&root->fs_info->tree_log_mutex);
2719out:
2720 return 0;
2721}
2722
2723int btrfs_log_inode(struct btrfs_trans_handle *trans,
2724 struct btrfs_root *root, struct inode *inode,
2725 int inode_only)
2726{
2727 int ret;
2728
2729 start_log_trans(trans, root);
2730 ret = __btrfs_log_inode(trans, root, inode, inode_only);
2731 end_log_trans(root);
2732 return ret;
2733}
2734
2735/*
2736 * helper function around btrfs_log_inode to make sure newly created
2737 * parent directories also end up in the log. A minimal inode and backref
2738 * only logging is done of any parent directories that are older than
2739 * the last committed transaction
2740 */
2741int btrfs_log_dentry(struct btrfs_trans_handle *trans,
2742 struct btrfs_root *root, struct dentry *dentry)
2743{
2744 int inode_only = LOG_INODE_ALL;
2745 struct super_block *sb;
2746 int ret;
2747
2748 start_log_trans(trans, root);
2749 sb = dentry->d_inode->i_sb;
2750 while (1) {
2751 ret = __btrfs_log_inode(trans, root, dentry->d_inode,
2752 inode_only);
2753 BUG_ON(ret);
2754 inode_only = LOG_INODE_EXISTS;
2755
2756 dentry = dentry->d_parent;
2757 if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb)
2758 break;
2759
2760 if (BTRFS_I(dentry->d_inode)->generation <=
2761 root->fs_info->last_trans_committed)
2762 break;
2763 }
2764 end_log_trans(root);
2765 return 0;
2766}
2767
2768/*
2769 * it is not safe to log dentry if the chunk root has added new
2770 * chunks. This returns 0 if the dentry was logged, and 1 otherwise.
2771 * If this returns 1, you must commit the transaction to safely get your
2772 * data on disk.
2773 */
2774int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
2775 struct btrfs_root *root, struct dentry *dentry)
2776{
2777 u64 gen;
2778 gen = root->fs_info->last_trans_new_blockgroup;
2779 if (gen > root->fs_info->last_trans_committed)
2780 return 1;
2781 else
2782 return btrfs_log_dentry(trans, root, dentry);
2783}
2784
2785/*
2786 * should be called during mount to recover any replay any log trees
2787 * from the FS
2788 */
2789int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
2790{
2791 int ret;
2792 struct btrfs_path *path;
2793 struct btrfs_trans_handle *trans;
2794 struct btrfs_key key;
2795 struct btrfs_key found_key;
2796 struct btrfs_key tmp_key;
2797 struct btrfs_root *log;
2798 struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
2799 u64 highest_inode;
2800 struct walk_control wc = {
2801 .process_func = process_one_buffer,
2802 .stage = 0,
2803 };
2804
2805 fs_info->log_root_recovering = 1;
2806 path = btrfs_alloc_path();
2807 BUG_ON(!path);
2808
2809 trans = btrfs_start_transaction(fs_info->tree_root, 1);
2810
2811 wc.trans = trans;
2812 wc.pin = 1;
2813
2814 walk_log_tree(trans, log_root_tree, &wc);
2815
2816again:
2817 key.objectid = BTRFS_TREE_LOG_OBJECTID;
2818 key.offset = (u64)-1;
2819 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2820
2821 while (1) {
2822 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
2823 if (ret < 0)
2824 break;
2825 if (ret > 0) {
2826 if (path->slots[0] == 0)
2827 break;
2828 path->slots[0]--;
2829 }
2830 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2831 path->slots[0]);
2832 btrfs_release_path(log_root_tree, path);
2833 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
2834 break;
2835
2836 log = btrfs_read_fs_root_no_radix(log_root_tree,
2837 &found_key);
2838 BUG_ON(!log);
2839
2840
2841 tmp_key.objectid = found_key.offset;
2842 tmp_key.type = BTRFS_ROOT_ITEM_KEY;
2843 tmp_key.offset = (u64)-1;
2844
2845 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
2846 BUG_ON(!wc.replay_dest);
2847
2848 wc.replay_dest->log_root = log;
2849 btrfs_record_root_in_trans(wc.replay_dest);
2850 ret = walk_log_tree(trans, log, &wc);
2851 BUG_ON(ret);
2852
2853 if (wc.stage == LOG_WALK_REPLAY_ALL) {
2854 ret = fixup_inode_link_counts(trans, wc.replay_dest,
2855 path);
2856 BUG_ON(ret);
2857 }
2858 ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode);
2859 if (ret == 0) {
2860 wc.replay_dest->highest_inode = highest_inode;
2861 wc.replay_dest->last_inode_alloc = highest_inode;
2862 }
2863
2864 key.offset = found_key.offset - 1;
2865 wc.replay_dest->log_root = NULL;
2866 free_extent_buffer(log->node);
2867 kfree(log);
2868
2869 if (found_key.offset == 0)
2870 break;
2871 }
2872 btrfs_release_path(log_root_tree, path);
2873
2874 /* step one is to pin it all, step two is to replay just inodes */
2875 if (wc.pin) {
2876 wc.pin = 0;
2877 wc.process_func = replay_one_buffer;
2878 wc.stage = LOG_WALK_REPLAY_INODES;
2879 goto again;
2880 }
2881 /* step three is to replay everything */
2882 if (wc.stage < LOG_WALK_REPLAY_ALL) {
2883 wc.stage++;
2884 goto again;
2885 }
2886
2887 btrfs_free_path(path);
2888
2889 free_extent_buffer(log_root_tree->node);
2890 log_root_tree->log_root = NULL;
2891 fs_info->log_root_recovering = 0;
2892
2893 /* step 4: commit the transaction, which also unpins the blocks */
2894 btrfs_commit_transaction(trans, fs_info->tree_root);
2895
2896 kfree(log_root_tree);
2897 return 0;
2898}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
new file mode 100644
index 000000000000..b9409b32ed02
--- /dev/null
+++ b/fs/btrfs/tree-log.h
@@ -0,0 +1,41 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __TREE_LOG_
20#define __TREE_LOG_
21
22int btrfs_sync_log(struct btrfs_trans_handle *trans,
23 struct btrfs_root *root);
24int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
25int btrfs_log_dentry(struct btrfs_trans_handle *trans,
26 struct btrfs_root *root, struct dentry *dentry);
27int btrfs_recover_log_trees(struct btrfs_root *tree_root);
28int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
29 struct btrfs_root *root, struct dentry *dentry);
30int btrfs_log_inode(struct btrfs_trans_handle *trans,
31 struct btrfs_root *root, struct inode *inode,
32 int inode_only);
33int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root,
35 const char *name, int name_len,
36 struct inode *dir, u64 index);
37int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root,
39 const char *name, int name_len,
40 struct inode *inode, u64 dirid);
41#endif
diff --git a/fs/btrfs/version.h b/fs/btrfs/version.h
new file mode 100644
index 000000000000..9bf3946d5ef2
--- /dev/null
+++ b/fs/btrfs/version.h
@@ -0,0 +1,4 @@
1#ifndef __BTRFS_VERSION_H
2#define __BTRFS_VERSION_H
3#define BTRFS_BUILD_VERSION "Btrfs"
4#endif
diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh
new file mode 100644
index 000000000000..1ca1952fd917
--- /dev/null
+++ b/fs/btrfs/version.sh
@@ -0,0 +1,43 @@
1#!/bin/bash
2#
3# determine-version -- report a useful version for releases
4#
5# Copyright 2008, Aron Griffis <agriffis@n01se.net>
6# Copyright 2008, Oracle
7# Released under the GNU GPLv2
8
9v="v0.16"
10
11which git &> /dev/null
12if [ $? == 0 ]; then
13 git branch >& /dev/null
14 if [ $? == 0 ]; then
15 if head=`git rev-parse --verify HEAD 2>/dev/null`; then
16 if tag=`git describe --tags 2>/dev/null`; then
17 v="$tag"
18 fi
19
20 # Are there uncommitted changes?
21 git update-index --refresh --unmerged > /dev/null
22 if git diff-index --name-only HEAD | \
23 grep -v "^scripts/package" \
24 | read dummy; then
25 v="$v"-dirty
26 fi
27 fi
28 fi
29fi
30
31echo "#ifndef __BUILD_VERSION" > .build-version.h
32echo "#define __BUILD_VERSION" >> .build-version.h
33echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h
34echo "#endif" >> .build-version.h
35
36diff -q version.h .build-version.h >& /dev/null
37
38if [ $? == 0 ]; then
39 rm .build-version.h
40 exit 0
41fi
42
43mv .build-version.h version.h
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
new file mode 100644
index 000000000000..b187b537888e
--- /dev/null
+++ b/fs/btrfs/volumes.c
@@ -0,0 +1,3218 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/bio.h>
20#include <linux/buffer_head.h>
21#include <linux/blkdev.h>
22#include <linux/random.h>
23#include <linux/version.h>
24#include <asm/div64.h>
25#include "compat.h"
26#include "ctree.h"
27#include "extent_map.h"
28#include "disk-io.h"
29#include "transaction.h"
30#include "print-tree.h"
31#include "volumes.h"
32#include "async-thread.h"
33
34struct map_lookup {
35 u64 type;
36 int io_align;
37 int io_width;
38 int stripe_len;
39 int sector_size;
40 int num_stripes;
41 int sub_stripes;
42 struct btrfs_bio_stripe stripes[];
43};
44
45static int init_first_rw_device(struct btrfs_trans_handle *trans,
46 struct btrfs_root *root,
47 struct btrfs_device *device);
48static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
49
50#define map_lookup_size(n) (sizeof(struct map_lookup) + \
51 (sizeof(struct btrfs_bio_stripe) * (n)))
52
53static DEFINE_MUTEX(uuid_mutex);
54static LIST_HEAD(fs_uuids);
55
56void btrfs_lock_volumes(void)
57{
58 mutex_lock(&uuid_mutex);
59}
60
61void btrfs_unlock_volumes(void)
62{
63 mutex_unlock(&uuid_mutex);
64}
65
66static void lock_chunks(struct btrfs_root *root)
67{
68 mutex_lock(&root->fs_info->chunk_mutex);
69}
70
71static void unlock_chunks(struct btrfs_root *root)
72{
73 mutex_unlock(&root->fs_info->chunk_mutex);
74}
75
76static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
77{
78 struct btrfs_device *device;
79 WARN_ON(fs_devices->opened);
80 while (!list_empty(&fs_devices->devices)) {
81 device = list_entry(fs_devices->devices.next,
82 struct btrfs_device, dev_list);
83 list_del(&device->dev_list);
84 kfree(device->name);
85 kfree(device);
86 }
87 kfree(fs_devices);
88}
89
90int btrfs_cleanup_fs_uuids(void)
91{
92 struct btrfs_fs_devices *fs_devices;
93
94 while (!list_empty(&fs_uuids)) {
95 fs_devices = list_entry(fs_uuids.next,
96 struct btrfs_fs_devices, list);
97 list_del(&fs_devices->list);
98 free_fs_devices(fs_devices);
99 }
100 return 0;
101}
102
103static noinline struct btrfs_device *__find_device(struct list_head *head,
104 u64 devid, u8 *uuid)
105{
106 struct btrfs_device *dev;
107 struct list_head *cur;
108
109 list_for_each(cur, head) {
110 dev = list_entry(cur, struct btrfs_device, dev_list);
111 if (dev->devid == devid &&
112 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
113 return dev;
114 }
115 }
116 return NULL;
117}
118
119static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
120{
121 struct list_head *cur;
122 struct btrfs_fs_devices *fs_devices;
123
124 list_for_each(cur, &fs_uuids) {
125 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
126 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
127 return fs_devices;
128 }
129 return NULL;
130}
131
132/*
133 * we try to collect pending bios for a device so we don't get a large
134 * number of procs sending bios down to the same device. This greatly
135 * improves the schedulers ability to collect and merge the bios.
136 *
137 * But, it also turns into a long list of bios to process and that is sure
138 * to eventually make the worker thread block. The solution here is to
139 * make some progress and then put this work struct back at the end of
140 * the list if the block device is congested. This way, multiple devices
141 * can make progress from a single worker thread.
142 */
143static noinline int run_scheduled_bios(struct btrfs_device *device)
144{
145 struct bio *pending;
146 struct backing_dev_info *bdi;
147 struct btrfs_fs_info *fs_info;
148 struct bio *tail;
149 struct bio *cur;
150 int again = 0;
151 unsigned long num_run = 0;
152 unsigned long limit;
153
154 bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
155 fs_info = device->dev_root->fs_info;
156 limit = btrfs_async_submit_limit(fs_info);
157 limit = limit * 2 / 3;
158
159loop:
160 spin_lock(&device->io_lock);
161
162 /* take all the bios off the list at once and process them
163 * later on (without the lock held). But, remember the
164 * tail and other pointers so the bios can be properly reinserted
165 * into the list if we hit congestion
166 */
167 pending = device->pending_bios;
168 tail = device->pending_bio_tail;
169 WARN_ON(pending && !tail);
170 device->pending_bios = NULL;
171 device->pending_bio_tail = NULL;
172
173 /*
174 * if pending was null this time around, no bios need processing
175 * at all and we can stop. Otherwise it'll loop back up again
176 * and do an additional check so no bios are missed.
177 *
178 * device->running_pending is used to synchronize with the
179 * schedule_bio code.
180 */
181 if (pending) {
182 again = 1;
183 device->running_pending = 1;
184 } else {
185 again = 0;
186 device->running_pending = 0;
187 }
188 spin_unlock(&device->io_lock);
189
190 while (pending) {
191 cur = pending;
192 pending = pending->bi_next;
193 cur->bi_next = NULL;
194 atomic_dec(&fs_info->nr_async_bios);
195
196 if (atomic_read(&fs_info->nr_async_bios) < limit &&
197 waitqueue_active(&fs_info->async_submit_wait))
198 wake_up(&fs_info->async_submit_wait);
199
200 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
201 bio_get(cur);
202 submit_bio(cur->bi_rw, cur);
203 bio_put(cur);
204 num_run++;
205
206 /*
207 * we made progress, there is more work to do and the bdi
208 * is now congested. Back off and let other work structs
209 * run instead
210 */
211 if (pending && bdi_write_congested(bdi) &&
212 fs_info->fs_devices->open_devices > 1) {
213 struct bio *old_head;
214
215 spin_lock(&device->io_lock);
216
217 old_head = device->pending_bios;
218 device->pending_bios = pending;
219 if (device->pending_bio_tail)
220 tail->bi_next = old_head;
221 else
222 device->pending_bio_tail = tail;
223
224 spin_unlock(&device->io_lock);
225 btrfs_requeue_work(&device->work);
226 goto done;
227 }
228 }
229 if (again)
230 goto loop;
231done:
232 return 0;
233}
234
235static void pending_bios_fn(struct btrfs_work *work)
236{
237 struct btrfs_device *device;
238
239 device = container_of(work, struct btrfs_device, work);
240 run_scheduled_bios(device);
241}
242
243static noinline int device_list_add(const char *path,
244 struct btrfs_super_block *disk_super,
245 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
246{
247 struct btrfs_device *device;
248 struct btrfs_fs_devices *fs_devices;
249 u64 found_transid = btrfs_super_generation(disk_super);
250
251 fs_devices = find_fsid(disk_super->fsid);
252 if (!fs_devices) {
253 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
254 if (!fs_devices)
255 return -ENOMEM;
256 INIT_LIST_HEAD(&fs_devices->devices);
257 INIT_LIST_HEAD(&fs_devices->alloc_list);
258 list_add(&fs_devices->list, &fs_uuids);
259 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
260 fs_devices->latest_devid = devid;
261 fs_devices->latest_trans = found_transid;
262 device = NULL;
263 } else {
264 device = __find_device(&fs_devices->devices, devid,
265 disk_super->dev_item.uuid);
266 }
267 if (!device) {
268 if (fs_devices->opened)
269 return -EBUSY;
270
271 device = kzalloc(sizeof(*device), GFP_NOFS);
272 if (!device) {
273 /* we can safely leave the fs_devices entry around */
274 return -ENOMEM;
275 }
276 device->devid = devid;
277 device->work.func = pending_bios_fn;
278 memcpy(device->uuid, disk_super->dev_item.uuid,
279 BTRFS_UUID_SIZE);
280 device->barriers = 1;
281 spin_lock_init(&device->io_lock);
282 device->name = kstrdup(path, GFP_NOFS);
283 if (!device->name) {
284 kfree(device);
285 return -ENOMEM;
286 }
287 INIT_LIST_HEAD(&device->dev_alloc_list);
288 list_add(&device->dev_list, &fs_devices->devices);
289 device->fs_devices = fs_devices;
290 fs_devices->num_devices++;
291 }
292
293 if (found_transid > fs_devices->latest_trans) {
294 fs_devices->latest_devid = devid;
295 fs_devices->latest_trans = found_transid;
296 }
297 *fs_devices_ret = fs_devices;
298 return 0;
299}
300
301static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
302{
303 struct btrfs_fs_devices *fs_devices;
304 struct btrfs_device *device;
305 struct btrfs_device *orig_dev;
306
307 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
308 if (!fs_devices)
309 return ERR_PTR(-ENOMEM);
310
311 INIT_LIST_HEAD(&fs_devices->devices);
312 INIT_LIST_HEAD(&fs_devices->alloc_list);
313 INIT_LIST_HEAD(&fs_devices->list);
314 fs_devices->latest_devid = orig->latest_devid;
315 fs_devices->latest_trans = orig->latest_trans;
316 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
317
318 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
319 device = kzalloc(sizeof(*device), GFP_NOFS);
320 if (!device)
321 goto error;
322
323 device->name = kstrdup(orig_dev->name, GFP_NOFS);
324 if (!device->name)
325 goto error;
326
327 device->devid = orig_dev->devid;
328 device->work.func = pending_bios_fn;
329 memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
330 device->barriers = 1;
331 spin_lock_init(&device->io_lock);
332 INIT_LIST_HEAD(&device->dev_list);
333 INIT_LIST_HEAD(&device->dev_alloc_list);
334
335 list_add(&device->dev_list, &fs_devices->devices);
336 device->fs_devices = fs_devices;
337 fs_devices->num_devices++;
338 }
339 return fs_devices;
340error:
341 free_fs_devices(fs_devices);
342 return ERR_PTR(-ENOMEM);
343}
344
345int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
346{
347 struct list_head *tmp;
348 struct list_head *cur;
349 struct btrfs_device *device;
350
351 mutex_lock(&uuid_mutex);
352again:
353 list_for_each_safe(cur, tmp, &fs_devices->devices) {
354 device = list_entry(cur, struct btrfs_device, dev_list);
355 if (device->in_fs_metadata)
356 continue;
357
358 if (device->bdev) {
359 close_bdev_exclusive(device->bdev, device->mode);
360 device->bdev = NULL;
361 fs_devices->open_devices--;
362 }
363 if (device->writeable) {
364 list_del_init(&device->dev_alloc_list);
365 device->writeable = 0;
366 fs_devices->rw_devices--;
367 }
368 list_del_init(&device->dev_list);
369 fs_devices->num_devices--;
370 kfree(device->name);
371 kfree(device);
372 }
373
374 if (fs_devices->seed) {
375 fs_devices = fs_devices->seed;
376 goto again;
377 }
378
379 mutex_unlock(&uuid_mutex);
380 return 0;
381}
382
383static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
384{
385 struct list_head *cur;
386 struct btrfs_device *device;
387
388 if (--fs_devices->opened > 0)
389 return 0;
390
391 list_for_each(cur, &fs_devices->devices) {
392 device = list_entry(cur, struct btrfs_device, dev_list);
393 if (device->bdev) {
394 close_bdev_exclusive(device->bdev, device->mode);
395 fs_devices->open_devices--;
396 }
397 if (device->writeable) {
398 list_del_init(&device->dev_alloc_list);
399 fs_devices->rw_devices--;
400 }
401
402 device->bdev = NULL;
403 device->writeable = 0;
404 device->in_fs_metadata = 0;
405 }
406 WARN_ON(fs_devices->open_devices);
407 WARN_ON(fs_devices->rw_devices);
408 fs_devices->opened = 0;
409 fs_devices->seeding = 0;
410
411 return 0;
412}
413
414int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
415{
416 struct btrfs_fs_devices *seed_devices = NULL;
417 int ret;
418
419 mutex_lock(&uuid_mutex);
420 ret = __btrfs_close_devices(fs_devices);
421 if (!fs_devices->opened) {
422 seed_devices = fs_devices->seed;
423 fs_devices->seed = NULL;
424 }
425 mutex_unlock(&uuid_mutex);
426
427 while (seed_devices) {
428 fs_devices = seed_devices;
429 seed_devices = fs_devices->seed;
430 __btrfs_close_devices(fs_devices);
431 free_fs_devices(fs_devices);
432 }
433 return ret;
434}
435
436static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
437 fmode_t flags, void *holder)
438{
439 struct block_device *bdev;
440 struct list_head *head = &fs_devices->devices;
441 struct list_head *cur;
442 struct btrfs_device *device;
443 struct block_device *latest_bdev = NULL;
444 struct buffer_head *bh;
445 struct btrfs_super_block *disk_super;
446 u64 latest_devid = 0;
447 u64 latest_transid = 0;
448 u64 devid;
449 int seeding = 1;
450 int ret = 0;
451
452 list_for_each(cur, head) {
453 device = list_entry(cur, struct btrfs_device, dev_list);
454 if (device->bdev)
455 continue;
456 if (!device->name)
457 continue;
458
459 bdev = open_bdev_exclusive(device->name, flags, holder);
460 if (IS_ERR(bdev)) {
461 printk(KERN_INFO "open %s failed\n", device->name);
462 goto error;
463 }
464 set_blocksize(bdev, 4096);
465
466 bh = btrfs_read_dev_super(bdev);
467 if (!bh)
468 goto error_close;
469
470 disk_super = (struct btrfs_super_block *)bh->b_data;
471 devid = le64_to_cpu(disk_super->dev_item.devid);
472 if (devid != device->devid)
473 goto error_brelse;
474
475 if (memcmp(device->uuid, disk_super->dev_item.uuid,
476 BTRFS_UUID_SIZE))
477 goto error_brelse;
478
479 device->generation = btrfs_super_generation(disk_super);
480 if (!latest_transid || device->generation > latest_transid) {
481 latest_devid = devid;
482 latest_transid = device->generation;
483 latest_bdev = bdev;
484 }
485
486 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
487 device->writeable = 0;
488 } else {
489 device->writeable = !bdev_read_only(bdev);
490 seeding = 0;
491 }
492
493 device->bdev = bdev;
494 device->in_fs_metadata = 0;
495 device->mode = flags;
496
497 fs_devices->open_devices++;
498 if (device->writeable) {
499 fs_devices->rw_devices++;
500 list_add(&device->dev_alloc_list,
501 &fs_devices->alloc_list);
502 }
503 continue;
504
505error_brelse:
506 brelse(bh);
507error_close:
508 close_bdev_exclusive(bdev, FMODE_READ);
509error:
510 continue;
511 }
512 if (fs_devices->open_devices == 0) {
513 ret = -EIO;
514 goto out;
515 }
516 fs_devices->seeding = seeding;
517 fs_devices->opened = 1;
518 fs_devices->latest_bdev = latest_bdev;
519 fs_devices->latest_devid = latest_devid;
520 fs_devices->latest_trans = latest_transid;
521 fs_devices->total_rw_bytes = 0;
522out:
523 return ret;
524}
525
526int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
527 fmode_t flags, void *holder)
528{
529 int ret;
530
531 mutex_lock(&uuid_mutex);
532 if (fs_devices->opened) {
533 fs_devices->opened++;
534 ret = 0;
535 } else {
536 ret = __btrfs_open_devices(fs_devices, flags, holder);
537 }
538 mutex_unlock(&uuid_mutex);
539 return ret;
540}
541
542int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
543 struct btrfs_fs_devices **fs_devices_ret)
544{
545 struct btrfs_super_block *disk_super;
546 struct block_device *bdev;
547 struct buffer_head *bh;
548 int ret;
549 u64 devid;
550 u64 transid;
551
552 mutex_lock(&uuid_mutex);
553
554 bdev = open_bdev_exclusive(path, flags, holder);
555
556 if (IS_ERR(bdev)) {
557 ret = PTR_ERR(bdev);
558 goto error;
559 }
560
561 ret = set_blocksize(bdev, 4096);
562 if (ret)
563 goto error_close;
564 bh = btrfs_read_dev_super(bdev);
565 if (!bh) {
566 ret = -EIO;
567 goto error_close;
568 }
569 disk_super = (struct btrfs_super_block *)bh->b_data;
570 devid = le64_to_cpu(disk_super->dev_item.devid);
571 transid = btrfs_super_generation(disk_super);
572 if (disk_super->label[0])
573 printk(KERN_INFO "device label %s ", disk_super->label);
574 else {
575 /* FIXME, make a readl uuid parser */
576 printk(KERN_INFO "device fsid %llx-%llx ",
577 *(unsigned long long *)disk_super->fsid,
578 *(unsigned long long *)(disk_super->fsid + 8));
579 }
580 printk(KERN_INFO "devid %llu transid %llu %s\n",
581 (unsigned long long)devid, (unsigned long long)transid, path);
582 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
583
584 brelse(bh);
585error_close:
586 close_bdev_exclusive(bdev, flags);
587error:
588 mutex_unlock(&uuid_mutex);
589 return ret;
590}
591
592/*
593 * this uses a pretty simple search, the expectation is that it is
594 * called very infrequently and that a given device has a small number
595 * of extents
596 */
597static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
598 struct btrfs_device *device,
599 u64 num_bytes, u64 *start)
600{
601 struct btrfs_key key;
602 struct btrfs_root *root = device->dev_root;
603 struct btrfs_dev_extent *dev_extent = NULL;
604 struct btrfs_path *path;
605 u64 hole_size = 0;
606 u64 last_byte = 0;
607 u64 search_start = 0;
608 u64 search_end = device->total_bytes;
609 int ret;
610 int slot = 0;
611 int start_found;
612 struct extent_buffer *l;
613
614 path = btrfs_alloc_path();
615 if (!path)
616 return -ENOMEM;
617 path->reada = 2;
618 start_found = 0;
619
620 /* FIXME use last free of some kind */
621
622 /* we don't want to overwrite the superblock on the drive,
623 * so we make sure to start at an offset of at least 1MB
624 */
625 search_start = max((u64)1024 * 1024, search_start);
626
627 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
628 search_start = max(root->fs_info->alloc_start, search_start);
629
630 key.objectid = device->devid;
631 key.offset = search_start;
632 key.type = BTRFS_DEV_EXTENT_KEY;
633 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
634 if (ret < 0)
635 goto error;
636 ret = btrfs_previous_item(root, path, 0, key.type);
637 if (ret < 0)
638 goto error;
639 l = path->nodes[0];
640 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
641 while (1) {
642 l = path->nodes[0];
643 slot = path->slots[0];
644 if (slot >= btrfs_header_nritems(l)) {
645 ret = btrfs_next_leaf(root, path);
646 if (ret == 0)
647 continue;
648 if (ret < 0)
649 goto error;
650no_more_items:
651 if (!start_found) {
652 if (search_start >= search_end) {
653 ret = -ENOSPC;
654 goto error;
655 }
656 *start = search_start;
657 start_found = 1;
658 goto check_pending;
659 }
660 *start = last_byte > search_start ?
661 last_byte : search_start;
662 if (search_end <= *start) {
663 ret = -ENOSPC;
664 goto error;
665 }
666 goto check_pending;
667 }
668 btrfs_item_key_to_cpu(l, &key, slot);
669
670 if (key.objectid < device->devid)
671 goto next;
672
673 if (key.objectid > device->devid)
674 goto no_more_items;
675
676 if (key.offset >= search_start && key.offset > last_byte &&
677 start_found) {
678 if (last_byte < search_start)
679 last_byte = search_start;
680 hole_size = key.offset - last_byte;
681 if (key.offset > last_byte &&
682 hole_size >= num_bytes) {
683 *start = last_byte;
684 goto check_pending;
685 }
686 }
687 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
688 goto next;
689
690 start_found = 1;
691 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
692 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
693next:
694 path->slots[0]++;
695 cond_resched();
696 }
697check_pending:
698 /* we have to make sure we didn't find an extent that has already
699 * been allocated by the map tree or the original allocation
700 */
701 BUG_ON(*start < search_start);
702
703 if (*start + num_bytes > search_end) {
704 ret = -ENOSPC;
705 goto error;
706 }
707 /* check for pending inserts here */
708 ret = 0;
709
710error:
711 btrfs_free_path(path);
712 return ret;
713}
714
715static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
716 struct btrfs_device *device,
717 u64 start)
718{
719 int ret;
720 struct btrfs_path *path;
721 struct btrfs_root *root = device->dev_root;
722 struct btrfs_key key;
723 struct btrfs_key found_key;
724 struct extent_buffer *leaf = NULL;
725 struct btrfs_dev_extent *extent = NULL;
726
727 path = btrfs_alloc_path();
728 if (!path)
729 return -ENOMEM;
730
731 key.objectid = device->devid;
732 key.offset = start;
733 key.type = BTRFS_DEV_EXTENT_KEY;
734
735 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
736 if (ret > 0) {
737 ret = btrfs_previous_item(root, path, key.objectid,
738 BTRFS_DEV_EXTENT_KEY);
739 BUG_ON(ret);
740 leaf = path->nodes[0];
741 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
742 extent = btrfs_item_ptr(leaf, path->slots[0],
743 struct btrfs_dev_extent);
744 BUG_ON(found_key.offset > start || found_key.offset +
745 btrfs_dev_extent_length(leaf, extent) < start);
746 ret = 0;
747 } else if (ret == 0) {
748 leaf = path->nodes[0];
749 extent = btrfs_item_ptr(leaf, path->slots[0],
750 struct btrfs_dev_extent);
751 }
752 BUG_ON(ret);
753
754 if (device->bytes_used > 0)
755 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
756 ret = btrfs_del_item(trans, root, path);
757 BUG_ON(ret);
758
759 btrfs_free_path(path);
760 return ret;
761}
762
763int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
764 struct btrfs_device *device,
765 u64 chunk_tree, u64 chunk_objectid,
766 u64 chunk_offset, u64 start, u64 num_bytes)
767{
768 int ret;
769 struct btrfs_path *path;
770 struct btrfs_root *root = device->dev_root;
771 struct btrfs_dev_extent *extent;
772 struct extent_buffer *leaf;
773 struct btrfs_key key;
774
775 WARN_ON(!device->in_fs_metadata);
776 path = btrfs_alloc_path();
777 if (!path)
778 return -ENOMEM;
779
780 key.objectid = device->devid;
781 key.offset = start;
782 key.type = BTRFS_DEV_EXTENT_KEY;
783 ret = btrfs_insert_empty_item(trans, root, path, &key,
784 sizeof(*extent));
785 BUG_ON(ret);
786
787 leaf = path->nodes[0];
788 extent = btrfs_item_ptr(leaf, path->slots[0],
789 struct btrfs_dev_extent);
790 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
791 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
792 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
793
794 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
795 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
796 BTRFS_UUID_SIZE);
797
798 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
799 btrfs_mark_buffer_dirty(leaf);
800 btrfs_free_path(path);
801 return ret;
802}
803
804static noinline int find_next_chunk(struct btrfs_root *root,
805 u64 objectid, u64 *offset)
806{
807 struct btrfs_path *path;
808 int ret;
809 struct btrfs_key key;
810 struct btrfs_chunk *chunk;
811 struct btrfs_key found_key;
812
813 path = btrfs_alloc_path();
814 BUG_ON(!path);
815
816 key.objectid = objectid;
817 key.offset = (u64)-1;
818 key.type = BTRFS_CHUNK_ITEM_KEY;
819
820 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
821 if (ret < 0)
822 goto error;
823
824 BUG_ON(ret == 0);
825
826 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
827 if (ret) {
828 *offset = 0;
829 } else {
830 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
831 path->slots[0]);
832 if (found_key.objectid != objectid)
833 *offset = 0;
834 else {
835 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
836 struct btrfs_chunk);
837 *offset = found_key.offset +
838 btrfs_chunk_length(path->nodes[0], chunk);
839 }
840 }
841 ret = 0;
842error:
843 btrfs_free_path(path);
844 return ret;
845}
846
847static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
848{
849 int ret;
850 struct btrfs_key key;
851 struct btrfs_key found_key;
852 struct btrfs_path *path;
853
854 root = root->fs_info->chunk_root;
855
856 path = btrfs_alloc_path();
857 if (!path)
858 return -ENOMEM;
859
860 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
861 key.type = BTRFS_DEV_ITEM_KEY;
862 key.offset = (u64)-1;
863
864 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
865 if (ret < 0)
866 goto error;
867
868 BUG_ON(ret == 0);
869
870 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
871 BTRFS_DEV_ITEM_KEY);
872 if (ret) {
873 *objectid = 1;
874 } else {
875 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
876 path->slots[0]);
877 *objectid = found_key.offset + 1;
878 }
879 ret = 0;
880error:
881 btrfs_free_path(path);
882 return ret;
883}
884
885/*
886 * the device information is stored in the chunk root
887 * the btrfs_device struct should be fully filled in
888 */
889int btrfs_add_device(struct btrfs_trans_handle *trans,
890 struct btrfs_root *root,
891 struct btrfs_device *device)
892{
893 int ret;
894 struct btrfs_path *path;
895 struct btrfs_dev_item *dev_item;
896 struct extent_buffer *leaf;
897 struct btrfs_key key;
898 unsigned long ptr;
899
900 root = root->fs_info->chunk_root;
901
902 path = btrfs_alloc_path();
903 if (!path)
904 return -ENOMEM;
905
906 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
907 key.type = BTRFS_DEV_ITEM_KEY;
908 key.offset = device->devid;
909
910 ret = btrfs_insert_empty_item(trans, root, path, &key,
911 sizeof(*dev_item));
912 if (ret)
913 goto out;
914
915 leaf = path->nodes[0];
916 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
917
918 btrfs_set_device_id(leaf, dev_item, device->devid);
919 btrfs_set_device_generation(leaf, dev_item, 0);
920 btrfs_set_device_type(leaf, dev_item, device->type);
921 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
922 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
923 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
924 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
925 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
926 btrfs_set_device_group(leaf, dev_item, 0);
927 btrfs_set_device_seek_speed(leaf, dev_item, 0);
928 btrfs_set_device_bandwidth(leaf, dev_item, 0);
929 btrfs_set_device_start_offset(leaf, dev_item, 0);
930
931 ptr = (unsigned long)btrfs_device_uuid(dev_item);
932 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
933 ptr = (unsigned long)btrfs_device_fsid(dev_item);
934 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
935 btrfs_mark_buffer_dirty(leaf);
936
937 ret = 0;
938out:
939 btrfs_free_path(path);
940 return ret;
941}
942
943static int btrfs_rm_dev_item(struct btrfs_root *root,
944 struct btrfs_device *device)
945{
946 int ret;
947 struct btrfs_path *path;
948 struct btrfs_key key;
949 struct btrfs_trans_handle *trans;
950
951 root = root->fs_info->chunk_root;
952
953 path = btrfs_alloc_path();
954 if (!path)
955 return -ENOMEM;
956
957 trans = btrfs_start_transaction(root, 1);
958 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
959 key.type = BTRFS_DEV_ITEM_KEY;
960 key.offset = device->devid;
961 lock_chunks(root);
962
963 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
964 if (ret < 0)
965 goto out;
966
967 if (ret > 0) {
968 ret = -ENOENT;
969 goto out;
970 }
971
972 ret = btrfs_del_item(trans, root, path);
973 if (ret)
974 goto out;
975out:
976 btrfs_free_path(path);
977 unlock_chunks(root);
978 btrfs_commit_transaction(trans, root);
979 return ret;
980}
981
982int btrfs_rm_device(struct btrfs_root *root, char *device_path)
983{
984 struct btrfs_device *device;
985 struct btrfs_device *next_device;
986 struct block_device *bdev;
987 struct buffer_head *bh = NULL;
988 struct btrfs_super_block *disk_super;
989 u64 all_avail;
990 u64 devid;
991 u64 num_devices;
992 u8 *dev_uuid;
993 int ret = 0;
994
995 mutex_lock(&uuid_mutex);
996 mutex_lock(&root->fs_info->volume_mutex);
997
998 all_avail = root->fs_info->avail_data_alloc_bits |
999 root->fs_info->avail_system_alloc_bits |
1000 root->fs_info->avail_metadata_alloc_bits;
1001
1002 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
1003 root->fs_info->fs_devices->rw_devices <= 4) {
1004 printk(KERN_ERR "btrfs: unable to go below four devices "
1005 "on raid10\n");
1006 ret = -EINVAL;
1007 goto out;
1008 }
1009
1010 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
1011 root->fs_info->fs_devices->rw_devices <= 2) {
1012 printk(KERN_ERR "btrfs: unable to go below two "
1013 "devices on raid1\n");
1014 ret = -EINVAL;
1015 goto out;
1016 }
1017
1018 if (strcmp(device_path, "missing") == 0) {
1019 struct list_head *cur;
1020 struct list_head *devices;
1021 struct btrfs_device *tmp;
1022
1023 device = NULL;
1024 devices = &root->fs_info->fs_devices->devices;
1025 list_for_each(cur, devices) {
1026 tmp = list_entry(cur, struct btrfs_device, dev_list);
1027 if (tmp->in_fs_metadata && !tmp->bdev) {
1028 device = tmp;
1029 break;
1030 }
1031 }
1032 bdev = NULL;
1033 bh = NULL;
1034 disk_super = NULL;
1035 if (!device) {
1036 printk(KERN_ERR "btrfs: no missing devices found to "
1037 "remove\n");
1038 goto out;
1039 }
1040 } else {
1041 bdev = open_bdev_exclusive(device_path, FMODE_READ,
1042 root->fs_info->bdev_holder);
1043 if (IS_ERR(bdev)) {
1044 ret = PTR_ERR(bdev);
1045 goto out;
1046 }
1047
1048 set_blocksize(bdev, 4096);
1049 bh = btrfs_read_dev_super(bdev);
1050 if (!bh) {
1051 ret = -EIO;
1052 goto error_close;
1053 }
1054 disk_super = (struct btrfs_super_block *)bh->b_data;
1055 devid = le64_to_cpu(disk_super->dev_item.devid);
1056 dev_uuid = disk_super->dev_item.uuid;
1057 device = btrfs_find_device(root, devid, dev_uuid,
1058 disk_super->fsid);
1059 if (!device) {
1060 ret = -ENOENT;
1061 goto error_brelse;
1062 }
1063 }
1064
1065 if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
1066 printk(KERN_ERR "btrfs: unable to remove the only writeable "
1067 "device\n");
1068 ret = -EINVAL;
1069 goto error_brelse;
1070 }
1071
1072 if (device->writeable) {
1073 list_del_init(&device->dev_alloc_list);
1074 root->fs_info->fs_devices->rw_devices--;
1075 }
1076
1077 ret = btrfs_shrink_device(device, 0);
1078 if (ret)
1079 goto error_brelse;
1080
1081 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1082 if (ret)
1083 goto error_brelse;
1084
1085 device->in_fs_metadata = 0;
1086 list_del_init(&device->dev_list);
1087 device->fs_devices->num_devices--;
1088
1089 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1090 struct btrfs_device, dev_list);
1091 if (device->bdev == root->fs_info->sb->s_bdev)
1092 root->fs_info->sb->s_bdev = next_device->bdev;
1093 if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1094 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1095
1096 if (device->bdev) {
1097 close_bdev_exclusive(device->bdev, device->mode);
1098 device->bdev = NULL;
1099 device->fs_devices->open_devices--;
1100 }
1101
1102 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
1103 btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
1104
1105 if (device->fs_devices->open_devices == 0) {
1106 struct btrfs_fs_devices *fs_devices;
1107 fs_devices = root->fs_info->fs_devices;
1108 while (fs_devices) {
1109 if (fs_devices->seed == device->fs_devices)
1110 break;
1111 fs_devices = fs_devices->seed;
1112 }
1113 fs_devices->seed = device->fs_devices->seed;
1114 device->fs_devices->seed = NULL;
1115 __btrfs_close_devices(device->fs_devices);
1116 free_fs_devices(device->fs_devices);
1117 }
1118
1119 /*
1120 * at this point, the device is zero sized. We want to
1121 * remove it from the devices list and zero out the old super
1122 */
1123 if (device->writeable) {
1124 /* make sure this device isn't detected as part of
1125 * the FS anymore
1126 */
1127 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
1128 set_buffer_dirty(bh);
1129 sync_dirty_buffer(bh);
1130 }
1131
1132 kfree(device->name);
1133 kfree(device);
1134 ret = 0;
1135
1136error_brelse:
1137 brelse(bh);
1138error_close:
1139 if (bdev)
1140 close_bdev_exclusive(bdev, FMODE_READ);
1141out:
1142 mutex_unlock(&root->fs_info->volume_mutex);
1143 mutex_unlock(&uuid_mutex);
1144 return ret;
1145}
1146
1147/*
1148 * does all the dirty work required for changing file system's UUID.
1149 */
1150static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1151 struct btrfs_root *root)
1152{
1153 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1154 struct btrfs_fs_devices *old_devices;
1155 struct btrfs_fs_devices *seed_devices;
1156 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
1157 struct btrfs_device *device;
1158 u64 super_flags;
1159
1160 BUG_ON(!mutex_is_locked(&uuid_mutex));
1161 if (!fs_devices->seeding)
1162 return -EINVAL;
1163
1164 seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
1165 if (!seed_devices)
1166 return -ENOMEM;
1167
1168 old_devices = clone_fs_devices(fs_devices);
1169 if (IS_ERR(old_devices)) {
1170 kfree(seed_devices);
1171 return PTR_ERR(old_devices);
1172 }
1173
1174 list_add(&old_devices->list, &fs_uuids);
1175
1176 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
1177 seed_devices->opened = 1;
1178 INIT_LIST_HEAD(&seed_devices->devices);
1179 INIT_LIST_HEAD(&seed_devices->alloc_list);
1180 list_splice_init(&fs_devices->devices, &seed_devices->devices);
1181 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1182 list_for_each_entry(device, &seed_devices->devices, dev_list) {
1183 device->fs_devices = seed_devices;
1184 }
1185
1186 fs_devices->seeding = 0;
1187 fs_devices->num_devices = 0;
1188 fs_devices->open_devices = 0;
1189 fs_devices->seed = seed_devices;
1190
1191 generate_random_uuid(fs_devices->fsid);
1192 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1193 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1194 super_flags = btrfs_super_flags(disk_super) &
1195 ~BTRFS_SUPER_FLAG_SEEDING;
1196 btrfs_set_super_flags(disk_super, super_flags);
1197
1198 return 0;
1199}
1200
1201/*
1202 * strore the expected generation for seed devices in device items.
1203 */
1204static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
1205 struct btrfs_root *root)
1206{
1207 struct btrfs_path *path;
1208 struct extent_buffer *leaf;
1209 struct btrfs_dev_item *dev_item;
1210 struct btrfs_device *device;
1211 struct btrfs_key key;
1212 u8 fs_uuid[BTRFS_UUID_SIZE];
1213 u8 dev_uuid[BTRFS_UUID_SIZE];
1214 u64 devid;
1215 int ret;
1216
1217 path = btrfs_alloc_path();
1218 if (!path)
1219 return -ENOMEM;
1220
1221 root = root->fs_info->chunk_root;
1222 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1223 key.offset = 0;
1224 key.type = BTRFS_DEV_ITEM_KEY;
1225
1226 while (1) {
1227 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1228 if (ret < 0)
1229 goto error;
1230
1231 leaf = path->nodes[0];
1232next_slot:
1233 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1234 ret = btrfs_next_leaf(root, path);
1235 if (ret > 0)
1236 break;
1237 if (ret < 0)
1238 goto error;
1239 leaf = path->nodes[0];
1240 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1241 btrfs_release_path(root, path);
1242 continue;
1243 }
1244
1245 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1246 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
1247 key.type != BTRFS_DEV_ITEM_KEY)
1248 break;
1249
1250 dev_item = btrfs_item_ptr(leaf, path->slots[0],
1251 struct btrfs_dev_item);
1252 devid = btrfs_device_id(leaf, dev_item);
1253 read_extent_buffer(leaf, dev_uuid,
1254 (unsigned long)btrfs_device_uuid(dev_item),
1255 BTRFS_UUID_SIZE);
1256 read_extent_buffer(leaf, fs_uuid,
1257 (unsigned long)btrfs_device_fsid(dev_item),
1258 BTRFS_UUID_SIZE);
1259 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1260 BUG_ON(!device);
1261
1262 if (device->fs_devices->seeding) {
1263 btrfs_set_device_generation(leaf, dev_item,
1264 device->generation);
1265 btrfs_mark_buffer_dirty(leaf);
1266 }
1267
1268 path->slots[0]++;
1269 goto next_slot;
1270 }
1271 ret = 0;
1272error:
1273 btrfs_free_path(path);
1274 return ret;
1275}
1276
1277int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1278{
1279 struct btrfs_trans_handle *trans;
1280 struct btrfs_device *device;
1281 struct block_device *bdev;
1282 struct list_head *cur;
1283 struct list_head *devices;
1284 struct super_block *sb = root->fs_info->sb;
1285 u64 total_bytes;
1286 int seeding_dev = 0;
1287 int ret = 0;
1288
1289 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1290 return -EINVAL;
1291
1292 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
1293 if (!bdev)
1294 return -EIO;
1295
1296 if (root->fs_info->fs_devices->seeding) {
1297 seeding_dev = 1;
1298 down_write(&sb->s_umount);
1299 mutex_lock(&uuid_mutex);
1300 }
1301
1302 filemap_write_and_wait(bdev->bd_inode->i_mapping);
1303 mutex_lock(&root->fs_info->volume_mutex);
1304
1305 devices = &root->fs_info->fs_devices->devices;
1306 list_for_each(cur, devices) {
1307 device = list_entry(cur, struct btrfs_device, dev_list);
1308 if (device->bdev == bdev) {
1309 ret = -EEXIST;
1310 goto error;
1311 }
1312 }
1313
1314 device = kzalloc(sizeof(*device), GFP_NOFS);
1315 if (!device) {
1316 /* we can safely leave the fs_devices entry around */
1317 ret = -ENOMEM;
1318 goto error;
1319 }
1320
1321 device->name = kstrdup(device_path, GFP_NOFS);
1322 if (!device->name) {
1323 kfree(device);
1324 ret = -ENOMEM;
1325 goto error;
1326 }
1327
1328 ret = find_next_devid(root, &device->devid);
1329 if (ret) {
1330 kfree(device);
1331 goto error;
1332 }
1333
1334 trans = btrfs_start_transaction(root, 1);
1335 lock_chunks(root);
1336
1337 device->barriers = 1;
1338 device->writeable = 1;
1339 device->work.func = pending_bios_fn;
1340 generate_random_uuid(device->uuid);
1341 spin_lock_init(&device->io_lock);
1342 device->generation = trans->transid;
1343 device->io_width = root->sectorsize;
1344 device->io_align = root->sectorsize;
1345 device->sector_size = root->sectorsize;
1346 device->total_bytes = i_size_read(bdev->bd_inode);
1347 device->dev_root = root->fs_info->dev_root;
1348 device->bdev = bdev;
1349 device->in_fs_metadata = 1;
1350 device->mode = 0;
1351 set_blocksize(device->bdev, 4096);
1352
1353 if (seeding_dev) {
1354 sb->s_flags &= ~MS_RDONLY;
1355 ret = btrfs_prepare_sprout(trans, root);
1356 BUG_ON(ret);
1357 }
1358
1359 device->fs_devices = root->fs_info->fs_devices;
1360 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
1361 list_add(&device->dev_alloc_list,
1362 &root->fs_info->fs_devices->alloc_list);
1363 root->fs_info->fs_devices->num_devices++;
1364 root->fs_info->fs_devices->open_devices++;
1365 root->fs_info->fs_devices->rw_devices++;
1366 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1367
1368 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
1369 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
1370 total_bytes + device->total_bytes);
1371
1372 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
1373 btrfs_set_super_num_devices(&root->fs_info->super_copy,
1374 total_bytes + 1);
1375
1376 if (seeding_dev) {
1377 ret = init_first_rw_device(trans, root, device);
1378 BUG_ON(ret);
1379 ret = btrfs_finish_sprout(trans, root);
1380 BUG_ON(ret);
1381 } else {
1382 ret = btrfs_add_device(trans, root, device);
1383 }
1384
1385 unlock_chunks(root);
1386 btrfs_commit_transaction(trans, root);
1387
1388 if (seeding_dev) {
1389 mutex_unlock(&uuid_mutex);
1390 up_write(&sb->s_umount);
1391
1392 ret = btrfs_relocate_sys_chunks(root);
1393 BUG_ON(ret);
1394 }
1395out:
1396 mutex_unlock(&root->fs_info->volume_mutex);
1397 return ret;
1398error:
1399 close_bdev_exclusive(bdev, 0);
1400 if (seeding_dev) {
1401 mutex_unlock(&uuid_mutex);
1402 up_write(&sb->s_umount);
1403 }
1404 goto out;
1405}
1406
1407static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
1408 struct btrfs_device *device)
1409{
1410 int ret;
1411 struct btrfs_path *path;
1412 struct btrfs_root *root;
1413 struct btrfs_dev_item *dev_item;
1414 struct extent_buffer *leaf;
1415 struct btrfs_key key;
1416
1417 root = device->dev_root->fs_info->chunk_root;
1418
1419 path = btrfs_alloc_path();
1420 if (!path)
1421 return -ENOMEM;
1422
1423 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1424 key.type = BTRFS_DEV_ITEM_KEY;
1425 key.offset = device->devid;
1426
1427 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1428 if (ret < 0)
1429 goto out;
1430
1431 if (ret > 0) {
1432 ret = -ENOENT;
1433 goto out;
1434 }
1435
1436 leaf = path->nodes[0];
1437 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1438
1439 btrfs_set_device_id(leaf, dev_item, device->devid);
1440 btrfs_set_device_type(leaf, dev_item, device->type);
1441 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1442 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1443 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1444 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1445 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1446 btrfs_mark_buffer_dirty(leaf);
1447
1448out:
1449 btrfs_free_path(path);
1450 return ret;
1451}
1452
1453static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
1454 struct btrfs_device *device, u64 new_size)
1455{
1456 struct btrfs_super_block *super_copy =
1457 &device->dev_root->fs_info->super_copy;
1458 u64 old_total = btrfs_super_total_bytes(super_copy);
1459 u64 diff = new_size - device->total_bytes;
1460
1461 if (!device->writeable)
1462 return -EACCES;
1463 if (new_size <= device->total_bytes)
1464 return -EINVAL;
1465
1466 btrfs_set_super_total_bytes(super_copy, old_total + diff);
1467 device->fs_devices->total_rw_bytes += diff;
1468
1469 device->total_bytes = new_size;
1470 return btrfs_update_device(trans, device);
1471}
1472
1473int btrfs_grow_device(struct btrfs_trans_handle *trans,
1474 struct btrfs_device *device, u64 new_size)
1475{
1476 int ret;
1477 lock_chunks(device->dev_root);
1478 ret = __btrfs_grow_device(trans, device, new_size);
1479 unlock_chunks(device->dev_root);
1480 return ret;
1481}
1482
1483static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1484 struct btrfs_root *root,
1485 u64 chunk_tree, u64 chunk_objectid,
1486 u64 chunk_offset)
1487{
1488 int ret;
1489 struct btrfs_path *path;
1490 struct btrfs_key key;
1491
1492 root = root->fs_info->chunk_root;
1493 path = btrfs_alloc_path();
1494 if (!path)
1495 return -ENOMEM;
1496
1497 key.objectid = chunk_objectid;
1498 key.offset = chunk_offset;
1499 key.type = BTRFS_CHUNK_ITEM_KEY;
1500
1501 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1502 BUG_ON(ret);
1503
1504 ret = btrfs_del_item(trans, root, path);
1505 BUG_ON(ret);
1506
1507 btrfs_free_path(path);
1508 return 0;
1509}
1510
1511static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1512 chunk_offset)
1513{
1514 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1515 struct btrfs_disk_key *disk_key;
1516 struct btrfs_chunk *chunk;
1517 u8 *ptr;
1518 int ret = 0;
1519 u32 num_stripes;
1520 u32 array_size;
1521 u32 len = 0;
1522 u32 cur;
1523 struct btrfs_key key;
1524
1525 array_size = btrfs_super_sys_array_size(super_copy);
1526
1527 ptr = super_copy->sys_chunk_array;
1528 cur = 0;
1529
1530 while (cur < array_size) {
1531 disk_key = (struct btrfs_disk_key *)ptr;
1532 btrfs_disk_key_to_cpu(&key, disk_key);
1533
1534 len = sizeof(*disk_key);
1535
1536 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1537 chunk = (struct btrfs_chunk *)(ptr + len);
1538 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1539 len += btrfs_chunk_item_size(num_stripes);
1540 } else {
1541 ret = -EIO;
1542 break;
1543 }
1544 if (key.objectid == chunk_objectid &&
1545 key.offset == chunk_offset) {
1546 memmove(ptr, ptr + len, array_size - (cur + len));
1547 array_size -= len;
1548 btrfs_set_super_sys_array_size(super_copy, array_size);
1549 } else {
1550 ptr += len;
1551 cur += len;
1552 }
1553 }
1554 return ret;
1555}
1556
1557static int btrfs_relocate_chunk(struct btrfs_root *root,
1558 u64 chunk_tree, u64 chunk_objectid,
1559 u64 chunk_offset)
1560{
1561 struct extent_map_tree *em_tree;
1562 struct btrfs_root *extent_root;
1563 struct btrfs_trans_handle *trans;
1564 struct extent_map *em;
1565 struct map_lookup *map;
1566 int ret;
1567 int i;
1568
1569 printk(KERN_INFO "btrfs relocating chunk %llu\n",
1570 (unsigned long long)chunk_offset);
1571 root = root->fs_info->chunk_root;
1572 extent_root = root->fs_info->extent_root;
1573 em_tree = &root->fs_info->mapping_tree.map_tree;
1574
1575 /* step one, relocate all the extents inside this chunk */
1576 ret = btrfs_relocate_block_group(extent_root, chunk_offset);
1577 BUG_ON(ret);
1578
1579 trans = btrfs_start_transaction(root, 1);
1580 BUG_ON(!trans);
1581
1582 lock_chunks(root);
1583
1584 /*
1585 * step two, delete the device extents and the
1586 * chunk tree entries
1587 */
1588 spin_lock(&em_tree->lock);
1589 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1590 spin_unlock(&em_tree->lock);
1591
1592 BUG_ON(em->start > chunk_offset ||
1593 em->start + em->len < chunk_offset);
1594 map = (struct map_lookup *)em->bdev;
1595
1596 for (i = 0; i < map->num_stripes; i++) {
1597 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1598 map->stripes[i].physical);
1599 BUG_ON(ret);
1600
1601 if (map->stripes[i].dev) {
1602 ret = btrfs_update_device(trans, map->stripes[i].dev);
1603 BUG_ON(ret);
1604 }
1605 }
1606 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1607 chunk_offset);
1608
1609 BUG_ON(ret);
1610
1611 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1612 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1613 BUG_ON(ret);
1614 }
1615
1616 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1617 BUG_ON(ret);
1618
1619 spin_lock(&em_tree->lock);
1620 remove_extent_mapping(em_tree, em);
1621 spin_unlock(&em_tree->lock);
1622
1623 kfree(map);
1624 em->bdev = NULL;
1625
1626 /* once for the tree */
1627 free_extent_map(em);
1628 /* once for us */
1629 free_extent_map(em);
1630
1631 unlock_chunks(root);
1632 btrfs_end_transaction(trans, root);
1633 return 0;
1634}
1635
1636static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1637{
1638 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
1639 struct btrfs_path *path;
1640 struct extent_buffer *leaf;
1641 struct btrfs_chunk *chunk;
1642 struct btrfs_key key;
1643 struct btrfs_key found_key;
1644 u64 chunk_tree = chunk_root->root_key.objectid;
1645 u64 chunk_type;
1646 int ret;
1647
1648 path = btrfs_alloc_path();
1649 if (!path)
1650 return -ENOMEM;
1651
1652 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1653 key.offset = (u64)-1;
1654 key.type = BTRFS_CHUNK_ITEM_KEY;
1655
1656 while (1) {
1657 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1658 if (ret < 0)
1659 goto error;
1660 BUG_ON(ret == 0);
1661
1662 ret = btrfs_previous_item(chunk_root, path, key.objectid,
1663 key.type);
1664 if (ret < 0)
1665 goto error;
1666 if (ret > 0)
1667 break;
1668
1669 leaf = path->nodes[0];
1670 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1671
1672 chunk = btrfs_item_ptr(leaf, path->slots[0],
1673 struct btrfs_chunk);
1674 chunk_type = btrfs_chunk_type(leaf, chunk);
1675 btrfs_release_path(chunk_root, path);
1676
1677 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
1678 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
1679 found_key.objectid,
1680 found_key.offset);
1681 BUG_ON(ret);
1682 }
1683
1684 if (found_key.offset == 0)
1685 break;
1686 key.offset = found_key.offset - 1;
1687 }
1688 ret = 0;
1689error:
1690 btrfs_free_path(path);
1691 return ret;
1692}
1693
1694static u64 div_factor(u64 num, int factor)
1695{
1696 if (factor == 10)
1697 return num;
1698 num *= factor;
1699 do_div(num, 10);
1700 return num;
1701}
1702
1703int btrfs_balance(struct btrfs_root *dev_root)
1704{
1705 int ret;
1706 struct list_head *cur;
1707 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
1708 struct btrfs_device *device;
1709 u64 old_size;
1710 u64 size_to_free;
1711 struct btrfs_path *path;
1712 struct btrfs_key key;
1713 struct btrfs_chunk *chunk;
1714 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1715 struct btrfs_trans_handle *trans;
1716 struct btrfs_key found_key;
1717
1718 if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
1719 return -EROFS;
1720
1721 mutex_lock(&dev_root->fs_info->volume_mutex);
1722 dev_root = dev_root->fs_info->dev_root;
1723
1724 /* step one make some room on all the devices */
1725 list_for_each(cur, devices) {
1726 device = list_entry(cur, struct btrfs_device, dev_list);
1727 old_size = device->total_bytes;
1728 size_to_free = div_factor(old_size, 1);
1729 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
1730 if (!device->writeable ||
1731 device->total_bytes - device->bytes_used > size_to_free)
1732 continue;
1733
1734 ret = btrfs_shrink_device(device, old_size - size_to_free);
1735 BUG_ON(ret);
1736
1737 trans = btrfs_start_transaction(dev_root, 1);
1738 BUG_ON(!trans);
1739
1740 ret = btrfs_grow_device(trans, device, old_size);
1741 BUG_ON(ret);
1742
1743 btrfs_end_transaction(trans, dev_root);
1744 }
1745
1746 /* step two, relocate all the chunks */
1747 path = btrfs_alloc_path();
1748 BUG_ON(!path);
1749
1750 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1751 key.offset = (u64)-1;
1752 key.type = BTRFS_CHUNK_ITEM_KEY;
1753
1754 while (1) {
1755 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1756 if (ret < 0)
1757 goto error;
1758
1759 /*
1760 * this shouldn't happen, it means the last relocate
1761 * failed
1762 */
1763 if (ret == 0)
1764 break;
1765
1766 ret = btrfs_previous_item(chunk_root, path, 0,
1767 BTRFS_CHUNK_ITEM_KEY);
1768 if (ret)
1769 break;
1770
1771 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1772 path->slots[0]);
1773 if (found_key.objectid != key.objectid)
1774 break;
1775
1776 chunk = btrfs_item_ptr(path->nodes[0],
1777 path->slots[0],
1778 struct btrfs_chunk);
1779 key.offset = found_key.offset;
1780 /* chunk zero is special */
1781 if (key.offset == 0)
1782 break;
1783
1784 btrfs_release_path(chunk_root, path);
1785 ret = btrfs_relocate_chunk(chunk_root,
1786 chunk_root->root_key.objectid,
1787 found_key.objectid,
1788 found_key.offset);
1789 BUG_ON(ret);
1790 }
1791 ret = 0;
1792error:
1793 btrfs_free_path(path);
1794 mutex_unlock(&dev_root->fs_info->volume_mutex);
1795 return ret;
1796}
1797
1798/*
1799 * shrinking a device means finding all of the device extents past
1800 * the new size, and then following the back refs to the chunks.
1801 * The chunk relocation code actually frees the device extent
1802 */
1803int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1804{
1805 struct btrfs_trans_handle *trans;
1806 struct btrfs_root *root = device->dev_root;
1807 struct btrfs_dev_extent *dev_extent = NULL;
1808 struct btrfs_path *path;
1809 u64 length;
1810 u64 chunk_tree;
1811 u64 chunk_objectid;
1812 u64 chunk_offset;
1813 int ret;
1814 int slot;
1815 struct extent_buffer *l;
1816 struct btrfs_key key;
1817 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1818 u64 old_total = btrfs_super_total_bytes(super_copy);
1819 u64 diff = device->total_bytes - new_size;
1820
1821 if (new_size >= device->total_bytes)
1822 return -EINVAL;
1823
1824 path = btrfs_alloc_path();
1825 if (!path)
1826 return -ENOMEM;
1827
1828 trans = btrfs_start_transaction(root, 1);
1829 if (!trans) {
1830 ret = -ENOMEM;
1831 goto done;
1832 }
1833
1834 path->reada = 2;
1835
1836 lock_chunks(root);
1837
1838 device->total_bytes = new_size;
1839 if (device->writeable)
1840 device->fs_devices->total_rw_bytes -= diff;
1841 ret = btrfs_update_device(trans, device);
1842 if (ret) {
1843 unlock_chunks(root);
1844 btrfs_end_transaction(trans, root);
1845 goto done;
1846 }
1847 WARN_ON(diff > old_total);
1848 btrfs_set_super_total_bytes(super_copy, old_total - diff);
1849 unlock_chunks(root);
1850 btrfs_end_transaction(trans, root);
1851
1852 key.objectid = device->devid;
1853 key.offset = (u64)-1;
1854 key.type = BTRFS_DEV_EXTENT_KEY;
1855
1856 while (1) {
1857 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1858 if (ret < 0)
1859 goto done;
1860
1861 ret = btrfs_previous_item(root, path, 0, key.type);
1862 if (ret < 0)
1863 goto done;
1864 if (ret) {
1865 ret = 0;
1866 goto done;
1867 }
1868
1869 l = path->nodes[0];
1870 slot = path->slots[0];
1871 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1872
1873 if (key.objectid != device->devid)
1874 goto done;
1875
1876 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1877 length = btrfs_dev_extent_length(l, dev_extent);
1878
1879 if (key.offset + length <= new_size)
1880 goto done;
1881
1882 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1883 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1884 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1885 btrfs_release_path(root, path);
1886
1887 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
1888 chunk_offset);
1889 if (ret)
1890 goto done;
1891 }
1892
1893done:
1894 btrfs_free_path(path);
1895 return ret;
1896}
1897
1898static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
1899 struct btrfs_root *root,
1900 struct btrfs_key *key,
1901 struct btrfs_chunk *chunk, int item_size)
1902{
1903 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1904 struct btrfs_disk_key disk_key;
1905 u32 array_size;
1906 u8 *ptr;
1907
1908 array_size = btrfs_super_sys_array_size(super_copy);
1909 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
1910 return -EFBIG;
1911
1912 ptr = super_copy->sys_chunk_array + array_size;
1913 btrfs_cpu_key_to_disk(&disk_key, key);
1914 memcpy(ptr, &disk_key, sizeof(disk_key));
1915 ptr += sizeof(disk_key);
1916 memcpy(ptr, chunk, item_size);
1917 item_size += sizeof(disk_key);
1918 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
1919 return 0;
1920}
1921
1922static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size,
1923 int num_stripes, int sub_stripes)
1924{
1925 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
1926 return calc_size;
1927 else if (type & BTRFS_BLOCK_GROUP_RAID10)
1928 return calc_size * (num_stripes / sub_stripes);
1929 else
1930 return calc_size * num_stripes;
1931}
1932
1933static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1934 struct btrfs_root *extent_root,
1935 struct map_lookup **map_ret,
1936 u64 *num_bytes, u64 *stripe_size,
1937 u64 start, u64 type)
1938{
1939 struct btrfs_fs_info *info = extent_root->fs_info;
1940 struct btrfs_device *device = NULL;
1941 struct btrfs_fs_devices *fs_devices = info->fs_devices;
1942 struct list_head *cur;
1943 struct map_lookup *map = NULL;
1944 struct extent_map_tree *em_tree;
1945 struct extent_map *em;
1946 struct list_head private_devs;
1947 int min_stripe_size = 1 * 1024 * 1024;
1948 u64 calc_size = 1024 * 1024 * 1024;
1949 u64 max_chunk_size = calc_size;
1950 u64 min_free;
1951 u64 avail;
1952 u64 max_avail = 0;
1953 u64 dev_offset;
1954 int num_stripes = 1;
1955 int min_stripes = 1;
1956 int sub_stripes = 0;
1957 int looped = 0;
1958 int ret;
1959 int index;
1960 int stripe_len = 64 * 1024;
1961
1962 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1963 (type & BTRFS_BLOCK_GROUP_DUP)) {
1964 WARN_ON(1);
1965 type &= ~BTRFS_BLOCK_GROUP_DUP;
1966 }
1967 if (list_empty(&fs_devices->alloc_list))
1968 return -ENOSPC;
1969
1970 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
1971 num_stripes = fs_devices->rw_devices;
1972 min_stripes = 2;
1973 }
1974 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
1975 num_stripes = 2;
1976 min_stripes = 2;
1977 }
1978 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
1979 num_stripes = min_t(u64, 2, fs_devices->rw_devices);
1980 if (num_stripes < 2)
1981 return -ENOSPC;
1982 min_stripes = 2;
1983 }
1984 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1985 num_stripes = fs_devices->rw_devices;
1986 if (num_stripes < 4)
1987 return -ENOSPC;
1988 num_stripes &= ~(u32)1;
1989 sub_stripes = 2;
1990 min_stripes = 4;
1991 }
1992
1993 if (type & BTRFS_BLOCK_GROUP_DATA) {
1994 max_chunk_size = 10 * calc_size;
1995 min_stripe_size = 64 * 1024 * 1024;
1996 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
1997 max_chunk_size = 4 * calc_size;
1998 min_stripe_size = 32 * 1024 * 1024;
1999 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2000 calc_size = 8 * 1024 * 1024;
2001 max_chunk_size = calc_size * 2;
2002 min_stripe_size = 1 * 1024 * 1024;
2003 }
2004
2005 /* we don't want a chunk larger than 10% of writeable space */
2006 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
2007 max_chunk_size);
2008
2009again:
2010 if (!map || map->num_stripes != num_stripes) {
2011 kfree(map);
2012 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2013 if (!map)
2014 return -ENOMEM;
2015 map->num_stripes = num_stripes;
2016 }
2017
2018 if (calc_size * num_stripes > max_chunk_size) {
2019 calc_size = max_chunk_size;
2020 do_div(calc_size, num_stripes);
2021 do_div(calc_size, stripe_len);
2022 calc_size *= stripe_len;
2023 }
2024 /* we don't want tiny stripes */
2025 calc_size = max_t(u64, min_stripe_size, calc_size);
2026
2027 do_div(calc_size, stripe_len);
2028 calc_size *= stripe_len;
2029
2030 cur = fs_devices->alloc_list.next;
2031 index = 0;
2032
2033 if (type & BTRFS_BLOCK_GROUP_DUP)
2034 min_free = calc_size * 2;
2035 else
2036 min_free = calc_size;
2037
2038 /*
2039 * we add 1MB because we never use the first 1MB of the device, unless
2040 * we've looped, then we are likely allocating the maximum amount of
2041 * space left already
2042 */
2043 if (!looped)
2044 min_free += 1024 * 1024;
2045
2046 INIT_LIST_HEAD(&private_devs);
2047 while (index < num_stripes) {
2048 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2049 BUG_ON(!device->writeable);
2050 if (device->total_bytes > device->bytes_used)
2051 avail = device->total_bytes - device->bytes_used;
2052 else
2053 avail = 0;
2054 cur = cur->next;
2055
2056 if (device->in_fs_metadata && avail >= min_free) {
2057 ret = find_free_dev_extent(trans, device,
2058 min_free, &dev_offset);
2059 if (ret == 0) {
2060 list_move_tail(&device->dev_alloc_list,
2061 &private_devs);
2062 map->stripes[index].dev = device;
2063 map->stripes[index].physical = dev_offset;
2064 index++;
2065 if (type & BTRFS_BLOCK_GROUP_DUP) {
2066 map->stripes[index].dev = device;
2067 map->stripes[index].physical =
2068 dev_offset + calc_size;
2069 index++;
2070 }
2071 }
2072 } else if (device->in_fs_metadata && avail > max_avail)
2073 max_avail = avail;
2074 if (cur == &fs_devices->alloc_list)
2075 break;
2076 }
2077 list_splice(&private_devs, &fs_devices->alloc_list);
2078 if (index < num_stripes) {
2079 if (index >= min_stripes) {
2080 num_stripes = index;
2081 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2082 num_stripes /= sub_stripes;
2083 num_stripes *= sub_stripes;
2084 }
2085 looped = 1;
2086 goto again;
2087 }
2088 if (!looped && max_avail > 0) {
2089 looped = 1;
2090 calc_size = max_avail;
2091 goto again;
2092 }
2093 kfree(map);
2094 return -ENOSPC;
2095 }
2096 map->sector_size = extent_root->sectorsize;
2097 map->stripe_len = stripe_len;
2098 map->io_align = stripe_len;
2099 map->io_width = stripe_len;
2100 map->type = type;
2101 map->num_stripes = num_stripes;
2102 map->sub_stripes = sub_stripes;
2103
2104 *map_ret = map;
2105 *stripe_size = calc_size;
2106 *num_bytes = chunk_bytes_by_type(type, calc_size,
2107 num_stripes, sub_stripes);
2108
2109 em = alloc_extent_map(GFP_NOFS);
2110 if (!em) {
2111 kfree(map);
2112 return -ENOMEM;
2113 }
2114 em->bdev = (struct block_device *)map;
2115 em->start = start;
2116 em->len = *num_bytes;
2117 em->block_start = 0;
2118 em->block_len = em->len;
2119
2120 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
2121 spin_lock(&em_tree->lock);
2122 ret = add_extent_mapping(em_tree, em);
2123 spin_unlock(&em_tree->lock);
2124 BUG_ON(ret);
2125 free_extent_map(em);
2126
2127 ret = btrfs_make_block_group(trans, extent_root, 0, type,
2128 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2129 start, *num_bytes);
2130 BUG_ON(ret);
2131
2132 index = 0;
2133 while (index < map->num_stripes) {
2134 device = map->stripes[index].dev;
2135 dev_offset = map->stripes[index].physical;
2136
2137 ret = btrfs_alloc_dev_extent(trans, device,
2138 info->chunk_root->root_key.objectid,
2139 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2140 start, dev_offset, calc_size);
2141 BUG_ON(ret);
2142 index++;
2143 }
2144
2145 return 0;
2146}
2147
2148static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
2149 struct btrfs_root *extent_root,
2150 struct map_lookup *map, u64 chunk_offset,
2151 u64 chunk_size, u64 stripe_size)
2152{
2153 u64 dev_offset;
2154 struct btrfs_key key;
2155 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2156 struct btrfs_device *device;
2157 struct btrfs_chunk *chunk;
2158 struct btrfs_stripe *stripe;
2159 size_t item_size = btrfs_chunk_item_size(map->num_stripes);
2160 int index = 0;
2161 int ret;
2162
2163 chunk = kzalloc(item_size, GFP_NOFS);
2164 if (!chunk)
2165 return -ENOMEM;
2166
2167 index = 0;
2168 while (index < map->num_stripes) {
2169 device = map->stripes[index].dev;
2170 device->bytes_used += stripe_size;
2171 ret = btrfs_update_device(trans, device);
2172 BUG_ON(ret);
2173 index++;
2174 }
2175
2176 index = 0;
2177 stripe = &chunk->stripe;
2178 while (index < map->num_stripes) {
2179 device = map->stripes[index].dev;
2180 dev_offset = map->stripes[index].physical;
2181
2182 btrfs_set_stack_stripe_devid(stripe, device->devid);
2183 btrfs_set_stack_stripe_offset(stripe, dev_offset);
2184 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
2185 stripe++;
2186 index++;
2187 }
2188
2189 btrfs_set_stack_chunk_length(chunk, chunk_size);
2190 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
2191 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
2192 btrfs_set_stack_chunk_type(chunk, map->type);
2193 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
2194 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
2195 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
2196 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
2197 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
2198
2199 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2200 key.type = BTRFS_CHUNK_ITEM_KEY;
2201 key.offset = chunk_offset;
2202
2203 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
2204 BUG_ON(ret);
2205
2206 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
2207 ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk,
2208 item_size);
2209 BUG_ON(ret);
2210 }
2211 kfree(chunk);
2212 return 0;
2213}
2214
2215/*
2216 * Chunk allocation falls into two parts. The first part does works
2217 * that make the new allocated chunk useable, but not do any operation
2218 * that modifies the chunk tree. The second part does the works that
2219 * require modifying the chunk tree. This division is important for the
2220 * bootstrap process of adding storage to a seed btrfs.
2221 */
2222int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2223 struct btrfs_root *extent_root, u64 type)
2224{
2225 u64 chunk_offset;
2226 u64 chunk_size;
2227 u64 stripe_size;
2228 struct map_lookup *map;
2229 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2230 int ret;
2231
2232 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2233 &chunk_offset);
2234 if (ret)
2235 return ret;
2236
2237 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2238 &stripe_size, chunk_offset, type);
2239 if (ret)
2240 return ret;
2241
2242 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2243 chunk_size, stripe_size);
2244 BUG_ON(ret);
2245 return 0;
2246}
2247
2248static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
2249 struct btrfs_root *root,
2250 struct btrfs_device *device)
2251{
2252 u64 chunk_offset;
2253 u64 sys_chunk_offset;
2254 u64 chunk_size;
2255 u64 sys_chunk_size;
2256 u64 stripe_size;
2257 u64 sys_stripe_size;
2258 u64 alloc_profile;
2259 struct map_lookup *map;
2260 struct map_lookup *sys_map;
2261 struct btrfs_fs_info *fs_info = root->fs_info;
2262 struct btrfs_root *extent_root = fs_info->extent_root;
2263 int ret;
2264
2265 ret = find_next_chunk(fs_info->chunk_root,
2266 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
2267 BUG_ON(ret);
2268
2269 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2270 (fs_info->metadata_alloc_profile &
2271 fs_info->avail_metadata_alloc_bits);
2272 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2273
2274 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2275 &stripe_size, chunk_offset, alloc_profile);
2276 BUG_ON(ret);
2277
2278 sys_chunk_offset = chunk_offset + chunk_size;
2279
2280 alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
2281 (fs_info->system_alloc_profile &
2282 fs_info->avail_system_alloc_bits);
2283 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2284
2285 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
2286 &sys_chunk_size, &sys_stripe_size,
2287 sys_chunk_offset, alloc_profile);
2288 BUG_ON(ret);
2289
2290 ret = btrfs_add_device(trans, fs_info->chunk_root, device);
2291 BUG_ON(ret);
2292
2293 /*
2294 * Modifying chunk tree needs allocating new blocks from both
2295 * system block group and metadata block group. So we only can
2296 * do operations require modifying the chunk tree after both
2297 * block groups were created.
2298 */
2299 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2300 chunk_size, stripe_size);
2301 BUG_ON(ret);
2302
2303 ret = __finish_chunk_alloc(trans, extent_root, sys_map,
2304 sys_chunk_offset, sys_chunk_size,
2305 sys_stripe_size);
2306 BUG_ON(ret);
2307 return 0;
2308}
2309
2310int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2311{
2312 struct extent_map *em;
2313 struct map_lookup *map;
2314 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2315 int readonly = 0;
2316 int i;
2317
2318 spin_lock(&map_tree->map_tree.lock);
2319 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2320 spin_unlock(&map_tree->map_tree.lock);
2321 if (!em)
2322 return 1;
2323
2324 map = (struct map_lookup *)em->bdev;
2325 for (i = 0; i < map->num_stripes; i++) {
2326 if (!map->stripes[i].dev->writeable) {
2327 readonly = 1;
2328 break;
2329 }
2330 }
2331 free_extent_map(em);
2332 return readonly;
2333}
2334
2335void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
2336{
2337 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
2338}
2339
2340void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
2341{
2342 struct extent_map *em;
2343
2344 while (1) {
2345 spin_lock(&tree->map_tree.lock);
2346 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
2347 if (em)
2348 remove_extent_mapping(&tree->map_tree, em);
2349 spin_unlock(&tree->map_tree.lock);
2350 if (!em)
2351 break;
2352 kfree(em->bdev);
2353 /* once for us */
2354 free_extent_map(em);
2355 /* once for the tree */
2356 free_extent_map(em);
2357 }
2358}
2359
2360int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
2361{
2362 struct extent_map *em;
2363 struct map_lookup *map;
2364 struct extent_map_tree *em_tree = &map_tree->map_tree;
2365 int ret;
2366
2367 spin_lock(&em_tree->lock);
2368 em = lookup_extent_mapping(em_tree, logical, len);
2369 spin_unlock(&em_tree->lock);
2370 BUG_ON(!em);
2371
2372 BUG_ON(em->start > logical || em->start + em->len < logical);
2373 map = (struct map_lookup *)em->bdev;
2374 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
2375 ret = map->num_stripes;
2376 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
2377 ret = map->sub_stripes;
2378 else
2379 ret = 1;
2380 free_extent_map(em);
2381 return ret;
2382}
2383
2384static int find_live_mirror(struct map_lookup *map, int first, int num,
2385 int optimal)
2386{
2387 int i;
2388 if (map->stripes[optimal].dev->bdev)
2389 return optimal;
2390 for (i = first; i < first + num; i++) {
2391 if (map->stripes[i].dev->bdev)
2392 return i;
2393 }
2394 /* we couldn't find one that doesn't fail. Just return something
2395 * and the io error handling code will clean up eventually
2396 */
2397 return optimal;
2398}
2399
2400static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2401 u64 logical, u64 *length,
2402 struct btrfs_multi_bio **multi_ret,
2403 int mirror_num, struct page *unplug_page)
2404{
2405 struct extent_map *em;
2406 struct map_lookup *map;
2407 struct extent_map_tree *em_tree = &map_tree->map_tree;
2408 u64 offset;
2409 u64 stripe_offset;
2410 u64 stripe_nr;
2411 int stripes_allocated = 8;
2412 int stripes_required = 1;
2413 int stripe_index;
2414 int i;
2415 int num_stripes;
2416 int max_errors = 0;
2417 struct btrfs_multi_bio *multi = NULL;
2418
2419 if (multi_ret && !(rw & (1 << BIO_RW)))
2420 stripes_allocated = 1;
2421again:
2422 if (multi_ret) {
2423 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
2424 GFP_NOFS);
2425 if (!multi)
2426 return -ENOMEM;
2427
2428 atomic_set(&multi->error, 0);
2429 }
2430
2431 spin_lock(&em_tree->lock);
2432 em = lookup_extent_mapping(em_tree, logical, *length);
2433 spin_unlock(&em_tree->lock);
2434
2435 if (!em && unplug_page)
2436 return 0;
2437
2438 if (!em) {
2439 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
2440 (unsigned long long)logical,
2441 (unsigned long long)*length);
2442 BUG();
2443 }
2444
2445 BUG_ON(em->start > logical || em->start + em->len < logical);
2446 map = (struct map_lookup *)em->bdev;
2447 offset = logical - em->start;
2448
2449 if (mirror_num > map->num_stripes)
2450 mirror_num = 0;
2451
2452 /* if our multi bio struct is too small, back off and try again */
2453 if (rw & (1 << BIO_RW)) {
2454 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
2455 BTRFS_BLOCK_GROUP_DUP)) {
2456 stripes_required = map->num_stripes;
2457 max_errors = 1;
2458 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2459 stripes_required = map->sub_stripes;
2460 max_errors = 1;
2461 }
2462 }
2463 if (multi_ret && rw == WRITE &&
2464 stripes_allocated < stripes_required) {
2465 stripes_allocated = map->num_stripes;
2466 free_extent_map(em);
2467 kfree(multi);
2468 goto again;
2469 }
2470 stripe_nr = offset;
2471 /*
2472 * stripe_nr counts the total number of stripes we have to stride
2473 * to get to this block
2474 */
2475 do_div(stripe_nr, map->stripe_len);
2476
2477 stripe_offset = stripe_nr * map->stripe_len;
2478 BUG_ON(offset < stripe_offset);
2479
2480 /* stripe_offset is the offset of this block in its stripe*/
2481 stripe_offset = offset - stripe_offset;
2482
2483 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
2484 BTRFS_BLOCK_GROUP_RAID10 |
2485 BTRFS_BLOCK_GROUP_DUP)) {
2486 /* we limit the length of each bio to what fits in a stripe */
2487 *length = min_t(u64, em->len - offset,
2488 map->stripe_len - stripe_offset);
2489 } else {
2490 *length = em->len - offset;
2491 }
2492
2493 if (!multi_ret && !unplug_page)
2494 goto out;
2495
2496 num_stripes = 1;
2497 stripe_index = 0;
2498 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
2499 if (unplug_page || (rw & (1 << BIO_RW)))
2500 num_stripes = map->num_stripes;
2501 else if (mirror_num)
2502 stripe_index = mirror_num - 1;
2503 else {
2504 stripe_index = find_live_mirror(map, 0,
2505 map->num_stripes,
2506 current->pid % map->num_stripes);
2507 }
2508
2509 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2510 if (rw & (1 << BIO_RW))
2511 num_stripes = map->num_stripes;
2512 else if (mirror_num)
2513 stripe_index = mirror_num - 1;
2514
2515 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2516 int factor = map->num_stripes / map->sub_stripes;
2517
2518 stripe_index = do_div(stripe_nr, factor);
2519 stripe_index *= map->sub_stripes;
2520
2521 if (unplug_page || (rw & (1 << BIO_RW)))
2522 num_stripes = map->sub_stripes;
2523 else if (mirror_num)
2524 stripe_index += mirror_num - 1;
2525 else {
2526 stripe_index = find_live_mirror(map, stripe_index,
2527 map->sub_stripes, stripe_index +
2528 current->pid % map->sub_stripes);
2529 }
2530 } else {
2531 /*
2532 * after this do_div call, stripe_nr is the number of stripes
2533 * on this device we have to walk to find the data, and
2534 * stripe_index is the number of our device in the stripe array
2535 */
2536 stripe_index = do_div(stripe_nr, map->num_stripes);
2537 }
2538 BUG_ON(stripe_index >= map->num_stripes);
2539
2540 for (i = 0; i < num_stripes; i++) {
2541 if (unplug_page) {
2542 struct btrfs_device *device;
2543 struct backing_dev_info *bdi;
2544
2545 device = map->stripes[stripe_index].dev;
2546 if (device->bdev) {
2547 bdi = blk_get_backing_dev_info(device->bdev);
2548 if (bdi->unplug_io_fn)
2549 bdi->unplug_io_fn(bdi, unplug_page);
2550 }
2551 } else {
2552 multi->stripes[i].physical =
2553 map->stripes[stripe_index].physical +
2554 stripe_offset + stripe_nr * map->stripe_len;
2555 multi->stripes[i].dev = map->stripes[stripe_index].dev;
2556 }
2557 stripe_index++;
2558 }
2559 if (multi_ret) {
2560 *multi_ret = multi;
2561 multi->num_stripes = num_stripes;
2562 multi->max_errors = max_errors;
2563 }
2564out:
2565 free_extent_map(em);
2566 return 0;
2567}
2568
2569int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2570 u64 logical, u64 *length,
2571 struct btrfs_multi_bio **multi_ret, int mirror_num)
2572{
2573 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
2574 mirror_num, NULL);
2575}
2576
2577int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
2578 u64 chunk_start, u64 physical, u64 devid,
2579 u64 **logical, int *naddrs, int *stripe_len)
2580{
2581 struct extent_map_tree *em_tree = &map_tree->map_tree;
2582 struct extent_map *em;
2583 struct map_lookup *map;
2584 u64 *buf;
2585 u64 bytenr;
2586 u64 length;
2587 u64 stripe_nr;
2588 int i, j, nr = 0;
2589
2590 spin_lock(&em_tree->lock);
2591 em = lookup_extent_mapping(em_tree, chunk_start, 1);
2592 spin_unlock(&em_tree->lock);
2593
2594 BUG_ON(!em || em->start != chunk_start);
2595 map = (struct map_lookup *)em->bdev;
2596
2597 length = em->len;
2598 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
2599 do_div(length, map->num_stripes / map->sub_stripes);
2600 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
2601 do_div(length, map->num_stripes);
2602
2603 buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
2604 BUG_ON(!buf);
2605
2606 for (i = 0; i < map->num_stripes; i++) {
2607 if (devid && map->stripes[i].dev->devid != devid)
2608 continue;
2609 if (map->stripes[i].physical > physical ||
2610 map->stripes[i].physical + length <= physical)
2611 continue;
2612
2613 stripe_nr = physical - map->stripes[i].physical;
2614 do_div(stripe_nr, map->stripe_len);
2615
2616 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2617 stripe_nr = stripe_nr * map->num_stripes + i;
2618 do_div(stripe_nr, map->sub_stripes);
2619 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
2620 stripe_nr = stripe_nr * map->num_stripes + i;
2621 }
2622 bytenr = chunk_start + stripe_nr * map->stripe_len;
2623 WARN_ON(nr >= map->num_stripes);
2624 for (j = 0; j < nr; j++) {
2625 if (buf[j] == bytenr)
2626 break;
2627 }
2628 if (j == nr) {
2629 WARN_ON(nr >= map->num_stripes);
2630 buf[nr++] = bytenr;
2631 }
2632 }
2633
2634 for (i = 0; i > nr; i++) {
2635 struct btrfs_multi_bio *multi;
2636 struct btrfs_bio_stripe *stripe;
2637 int ret;
2638
2639 length = 1;
2640 ret = btrfs_map_block(map_tree, WRITE, buf[i],
2641 &length, &multi, 0);
2642 BUG_ON(ret);
2643
2644 stripe = multi->stripes;
2645 for (j = 0; j < multi->num_stripes; j++) {
2646 if (stripe->physical >= physical &&
2647 physical < stripe->physical + length)
2648 break;
2649 }
2650 BUG_ON(j >= multi->num_stripes);
2651 kfree(multi);
2652 }
2653
2654 *logical = buf;
2655 *naddrs = nr;
2656 *stripe_len = map->stripe_len;
2657
2658 free_extent_map(em);
2659 return 0;
2660}
2661
2662int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
2663 u64 logical, struct page *page)
2664{
2665 u64 length = PAGE_CACHE_SIZE;
2666 return __btrfs_map_block(map_tree, READ, logical, &length,
2667 NULL, 0, page);
2668}
2669
2670static void end_bio_multi_stripe(struct bio *bio, int err)
2671{
2672 struct btrfs_multi_bio *multi = bio->bi_private;
2673 int is_orig_bio = 0;
2674
2675 if (err)
2676 atomic_inc(&multi->error);
2677
2678 if (bio == multi->orig_bio)
2679 is_orig_bio = 1;
2680
2681 if (atomic_dec_and_test(&multi->stripes_pending)) {
2682 if (!is_orig_bio) {
2683 bio_put(bio);
2684 bio = multi->orig_bio;
2685 }
2686 bio->bi_private = multi->private;
2687 bio->bi_end_io = multi->end_io;
2688 /* only send an error to the higher layers if it is
2689 * beyond the tolerance of the multi-bio
2690 */
2691 if (atomic_read(&multi->error) > multi->max_errors) {
2692 err = -EIO;
2693 } else if (err) {
2694 /*
2695 * this bio is actually up to date, we didn't
2696 * go over the max number of errors
2697 */
2698 set_bit(BIO_UPTODATE, &bio->bi_flags);
2699 err = 0;
2700 }
2701 kfree(multi);
2702
2703 bio_endio(bio, err);
2704 } else if (!is_orig_bio) {
2705 bio_put(bio);
2706 }
2707}
2708
2709struct async_sched {
2710 struct bio *bio;
2711 int rw;
2712 struct btrfs_fs_info *info;
2713 struct btrfs_work work;
2714};
2715
2716/*
2717 * see run_scheduled_bios for a description of why bios are collected for
2718 * async submit.
2719 *
2720 * This will add one bio to the pending list for a device and make sure
2721 * the work struct is scheduled.
2722 */
2723static noinline int schedule_bio(struct btrfs_root *root,
2724 struct btrfs_device *device,
2725 int rw, struct bio *bio)
2726{
2727 int should_queue = 1;
2728
2729 /* don't bother with additional async steps for reads, right now */
2730 if (!(rw & (1 << BIO_RW))) {
2731 bio_get(bio);
2732 submit_bio(rw, bio);
2733 bio_put(bio);
2734 return 0;
2735 }
2736
2737 /*
2738 * nr_async_bios allows us to reliably return congestion to the
2739 * higher layers. Otherwise, the async bio makes it appear we have
2740 * made progress against dirty pages when we've really just put it
2741 * on a queue for later
2742 */
2743 atomic_inc(&root->fs_info->nr_async_bios);
2744 WARN_ON(bio->bi_next);
2745 bio->bi_next = NULL;
2746 bio->bi_rw |= rw;
2747
2748 spin_lock(&device->io_lock);
2749
2750 if (device->pending_bio_tail)
2751 device->pending_bio_tail->bi_next = bio;
2752
2753 device->pending_bio_tail = bio;
2754 if (!device->pending_bios)
2755 device->pending_bios = bio;
2756 if (device->running_pending)
2757 should_queue = 0;
2758
2759 spin_unlock(&device->io_lock);
2760
2761 if (should_queue)
2762 btrfs_queue_worker(&root->fs_info->submit_workers,
2763 &device->work);
2764 return 0;
2765}
2766
2767int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
2768 int mirror_num, int async_submit)
2769{
2770 struct btrfs_mapping_tree *map_tree;
2771 struct btrfs_device *dev;
2772 struct bio *first_bio = bio;
2773 u64 logical = (u64)bio->bi_sector << 9;
2774 u64 length = 0;
2775 u64 map_length;
2776 struct btrfs_multi_bio *multi = NULL;
2777 int ret;
2778 int dev_nr = 0;
2779 int total_devs = 1;
2780
2781 length = bio->bi_size;
2782 map_tree = &root->fs_info->mapping_tree;
2783 map_length = length;
2784
2785 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
2786 mirror_num);
2787 BUG_ON(ret);
2788
2789 total_devs = multi->num_stripes;
2790 if (map_length < length) {
2791 printk(KERN_CRIT "mapping failed logical %llu bio len %llu "
2792 "len %llu\n", (unsigned long long)logical,
2793 (unsigned long long)length,
2794 (unsigned long long)map_length);
2795 BUG();
2796 }
2797 multi->end_io = first_bio->bi_end_io;
2798 multi->private = first_bio->bi_private;
2799 multi->orig_bio = first_bio;
2800 atomic_set(&multi->stripes_pending, multi->num_stripes);
2801
2802 while (dev_nr < total_devs) {
2803 if (total_devs > 1) {
2804 if (dev_nr < total_devs - 1) {
2805 bio = bio_clone(first_bio, GFP_NOFS);
2806 BUG_ON(!bio);
2807 } else {
2808 bio = first_bio;
2809 }
2810 bio->bi_private = multi;
2811 bio->bi_end_io = end_bio_multi_stripe;
2812 }
2813 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
2814 dev = multi->stripes[dev_nr].dev;
2815 BUG_ON(rw == WRITE && !dev->writeable);
2816 if (dev && dev->bdev) {
2817 bio->bi_bdev = dev->bdev;
2818 if (async_submit)
2819 schedule_bio(root, dev, rw, bio);
2820 else
2821 submit_bio(rw, bio);
2822 } else {
2823 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
2824 bio->bi_sector = logical >> 9;
2825 bio_endio(bio, -EIO);
2826 }
2827 dev_nr++;
2828 }
2829 if (total_devs == 1)
2830 kfree(multi);
2831 return 0;
2832}
2833
2834struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
2835 u8 *uuid, u8 *fsid)
2836{
2837 struct btrfs_device *device;
2838 struct btrfs_fs_devices *cur_devices;
2839
2840 cur_devices = root->fs_info->fs_devices;
2841 while (cur_devices) {
2842 if (!fsid ||
2843 !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
2844 device = __find_device(&cur_devices->devices,
2845 devid, uuid);
2846 if (device)
2847 return device;
2848 }
2849 cur_devices = cur_devices->seed;
2850 }
2851 return NULL;
2852}
2853
2854static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2855 u64 devid, u8 *dev_uuid)
2856{
2857 struct btrfs_device *device;
2858 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2859
2860 device = kzalloc(sizeof(*device), GFP_NOFS);
2861 if (!device)
2862 return NULL;
2863 list_add(&device->dev_list,
2864 &fs_devices->devices);
2865 device->barriers = 1;
2866 device->dev_root = root->fs_info->dev_root;
2867 device->devid = devid;
2868 device->work.func = pending_bios_fn;
2869 device->fs_devices = fs_devices;
2870 fs_devices->num_devices++;
2871 spin_lock_init(&device->io_lock);
2872 INIT_LIST_HEAD(&device->dev_alloc_list);
2873 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2874 return device;
2875}
2876
2877static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2878 struct extent_buffer *leaf,
2879 struct btrfs_chunk *chunk)
2880{
2881 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2882 struct map_lookup *map;
2883 struct extent_map *em;
2884 u64 logical;
2885 u64 length;
2886 u64 devid;
2887 u8 uuid[BTRFS_UUID_SIZE];
2888 int num_stripes;
2889 int ret;
2890 int i;
2891
2892 logical = key->offset;
2893 length = btrfs_chunk_length(leaf, chunk);
2894
2895 spin_lock(&map_tree->map_tree.lock);
2896 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
2897 spin_unlock(&map_tree->map_tree.lock);
2898
2899 /* already mapped? */
2900 if (em && em->start <= logical && em->start + em->len > logical) {
2901 free_extent_map(em);
2902 return 0;
2903 } else if (em) {
2904 free_extent_map(em);
2905 }
2906
2907 map = kzalloc(sizeof(*map), GFP_NOFS);
2908 if (!map)
2909 return -ENOMEM;
2910
2911 em = alloc_extent_map(GFP_NOFS);
2912 if (!em)
2913 return -ENOMEM;
2914 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2915 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2916 if (!map) {
2917 free_extent_map(em);
2918 return -ENOMEM;
2919 }
2920
2921 em->bdev = (struct block_device *)map;
2922 em->start = logical;
2923 em->len = length;
2924 em->block_start = 0;
2925 em->block_len = em->len;
2926
2927 map->num_stripes = num_stripes;
2928 map->io_width = btrfs_chunk_io_width(leaf, chunk);
2929 map->io_align = btrfs_chunk_io_align(leaf, chunk);
2930 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
2931 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
2932 map->type = btrfs_chunk_type(leaf, chunk);
2933 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
2934 for (i = 0; i < num_stripes; i++) {
2935 map->stripes[i].physical =
2936 btrfs_stripe_offset_nr(leaf, chunk, i);
2937 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
2938 read_extent_buffer(leaf, uuid, (unsigned long)
2939 btrfs_stripe_dev_uuid_nr(chunk, i),
2940 BTRFS_UUID_SIZE);
2941 map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
2942 NULL);
2943 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
2944 kfree(map);
2945 free_extent_map(em);
2946 return -EIO;
2947 }
2948 if (!map->stripes[i].dev) {
2949 map->stripes[i].dev =
2950 add_missing_dev(root, devid, uuid);
2951 if (!map->stripes[i].dev) {
2952 kfree(map);
2953 free_extent_map(em);
2954 return -EIO;
2955 }
2956 }
2957 map->stripes[i].dev->in_fs_metadata = 1;
2958 }
2959
2960 spin_lock(&map_tree->map_tree.lock);
2961 ret = add_extent_mapping(&map_tree->map_tree, em);
2962 spin_unlock(&map_tree->map_tree.lock);
2963 BUG_ON(ret);
2964 free_extent_map(em);
2965
2966 return 0;
2967}
2968
2969static int fill_device_from_item(struct extent_buffer *leaf,
2970 struct btrfs_dev_item *dev_item,
2971 struct btrfs_device *device)
2972{
2973 unsigned long ptr;
2974
2975 device->devid = btrfs_device_id(leaf, dev_item);
2976 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2977 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2978 device->type = btrfs_device_type(leaf, dev_item);
2979 device->io_align = btrfs_device_io_align(leaf, dev_item);
2980 device->io_width = btrfs_device_io_width(leaf, dev_item);
2981 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
2982
2983 ptr = (unsigned long)btrfs_device_uuid(dev_item);
2984 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
2985
2986 return 0;
2987}
2988
2989static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
2990{
2991 struct btrfs_fs_devices *fs_devices;
2992 int ret;
2993
2994 mutex_lock(&uuid_mutex);
2995
2996 fs_devices = root->fs_info->fs_devices->seed;
2997 while (fs_devices) {
2998 if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
2999 ret = 0;
3000 goto out;
3001 }
3002 fs_devices = fs_devices->seed;
3003 }
3004
3005 fs_devices = find_fsid(fsid);
3006 if (!fs_devices) {
3007 ret = -ENOENT;
3008 goto out;
3009 }
3010
3011 fs_devices = clone_fs_devices(fs_devices);
3012 if (IS_ERR(fs_devices)) {
3013 ret = PTR_ERR(fs_devices);
3014 goto out;
3015 }
3016
3017 ret = __btrfs_open_devices(fs_devices, FMODE_READ,
3018 root->fs_info->bdev_holder);
3019 if (ret)
3020 goto out;
3021
3022 if (!fs_devices->seeding) {
3023 __btrfs_close_devices(fs_devices);
3024 free_fs_devices(fs_devices);
3025 ret = -EINVAL;
3026 goto out;
3027 }
3028
3029 fs_devices->seed = root->fs_info->fs_devices->seed;
3030 root->fs_info->fs_devices->seed = fs_devices;
3031out:
3032 mutex_unlock(&uuid_mutex);
3033 return ret;
3034}
3035
3036static int read_one_dev(struct btrfs_root *root,
3037 struct extent_buffer *leaf,
3038 struct btrfs_dev_item *dev_item)
3039{
3040 struct btrfs_device *device;
3041 u64 devid;
3042 int ret;
3043 u8 fs_uuid[BTRFS_UUID_SIZE];
3044 u8 dev_uuid[BTRFS_UUID_SIZE];
3045
3046 devid = btrfs_device_id(leaf, dev_item);
3047 read_extent_buffer(leaf, dev_uuid,
3048 (unsigned long)btrfs_device_uuid(dev_item),
3049 BTRFS_UUID_SIZE);
3050 read_extent_buffer(leaf, fs_uuid,
3051 (unsigned long)btrfs_device_fsid(dev_item),
3052 BTRFS_UUID_SIZE);
3053
3054 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
3055 ret = open_seed_devices(root, fs_uuid);
3056 if (ret && !btrfs_test_opt(root, DEGRADED))
3057 return ret;
3058 }
3059
3060 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
3061 if (!device || !device->bdev) {
3062 if (!btrfs_test_opt(root, DEGRADED))
3063 return -EIO;
3064
3065 if (!device) {
3066 printk(KERN_WARNING "warning devid %llu missing\n",
3067 (unsigned long long)devid);
3068 device = add_missing_dev(root, devid, dev_uuid);
3069 if (!device)
3070 return -ENOMEM;
3071 }
3072 }
3073
3074 if (device->fs_devices != root->fs_info->fs_devices) {
3075 BUG_ON(device->writeable);
3076 if (device->generation !=
3077 btrfs_device_generation(leaf, dev_item))
3078 return -EINVAL;
3079 }
3080
3081 fill_device_from_item(leaf, dev_item, device);
3082 device->dev_root = root->fs_info->dev_root;
3083 device->in_fs_metadata = 1;
3084 if (device->writeable)
3085 device->fs_devices->total_rw_bytes += device->total_bytes;
3086 ret = 0;
3087 return ret;
3088}
3089
3090int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
3091{
3092 struct btrfs_dev_item *dev_item;
3093
3094 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
3095 dev_item);
3096 return read_one_dev(root, buf, dev_item);
3097}
3098
3099int btrfs_read_sys_array(struct btrfs_root *root)
3100{
3101 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
3102 struct extent_buffer *sb;
3103 struct btrfs_disk_key *disk_key;
3104 struct btrfs_chunk *chunk;
3105 u8 *ptr;
3106 unsigned long sb_ptr;
3107 int ret = 0;
3108 u32 num_stripes;
3109 u32 array_size;
3110 u32 len = 0;
3111 u32 cur;
3112 struct btrfs_key key;
3113
3114 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
3115 BTRFS_SUPER_INFO_SIZE);
3116 if (!sb)
3117 return -ENOMEM;
3118 btrfs_set_buffer_uptodate(sb);
3119 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
3120 array_size = btrfs_super_sys_array_size(super_copy);
3121
3122 ptr = super_copy->sys_chunk_array;
3123 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
3124 cur = 0;
3125
3126 while (cur < array_size) {
3127 disk_key = (struct btrfs_disk_key *)ptr;
3128 btrfs_disk_key_to_cpu(&key, disk_key);
3129
3130 len = sizeof(*disk_key); ptr += len;
3131 sb_ptr += len;
3132 cur += len;
3133
3134 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
3135 chunk = (struct btrfs_chunk *)sb_ptr;
3136 ret = read_one_chunk(root, &key, sb, chunk);
3137 if (ret)
3138 break;
3139 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
3140 len = btrfs_chunk_item_size(num_stripes);
3141 } else {
3142 ret = -EIO;
3143 break;
3144 }
3145 ptr += len;
3146 sb_ptr += len;
3147 cur += len;
3148 }
3149 free_extent_buffer(sb);
3150 return ret;
3151}
3152
3153int btrfs_read_chunk_tree(struct btrfs_root *root)
3154{
3155 struct btrfs_path *path;
3156 struct extent_buffer *leaf;
3157 struct btrfs_key key;
3158 struct btrfs_key found_key;
3159 int ret;
3160 int slot;
3161
3162 root = root->fs_info->chunk_root;
3163
3164 path = btrfs_alloc_path();
3165 if (!path)
3166 return -ENOMEM;
3167
3168 /* first we search for all of the device items, and then we
3169 * read in all of the chunk items. This way we can create chunk
3170 * mappings that reference all of the devices that are afound
3171 */
3172 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
3173 key.offset = 0;
3174 key.type = 0;
3175again:
3176 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3177 while (1) {
3178 leaf = path->nodes[0];
3179 slot = path->slots[0];
3180 if (slot >= btrfs_header_nritems(leaf)) {
3181 ret = btrfs_next_leaf(root, path);
3182 if (ret == 0)
3183 continue;
3184 if (ret < 0)
3185 goto error;
3186 break;
3187 }
3188 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3189 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3190 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
3191 break;
3192 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
3193 struct btrfs_dev_item *dev_item;
3194 dev_item = btrfs_item_ptr(leaf, slot,
3195 struct btrfs_dev_item);
3196 ret = read_one_dev(root, leaf, dev_item);
3197 if (ret)
3198 goto error;
3199 }
3200 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
3201 struct btrfs_chunk *chunk;
3202 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3203 ret = read_one_chunk(root, &found_key, leaf, chunk);
3204 if (ret)
3205 goto error;
3206 }
3207 path->slots[0]++;
3208 }
3209 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3210 key.objectid = 0;
3211 btrfs_release_path(root, path);
3212 goto again;
3213 }
3214 ret = 0;
3215error:
3216 btrfs_free_path(path);
3217 return ret;
3218}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
new file mode 100644
index 000000000000..86c44e9ae110
--- /dev/null
+++ b/fs/btrfs/volumes.h
@@ -0,0 +1,162 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_VOLUMES_
20#define __BTRFS_VOLUMES_
21
22#include <linux/bio.h>
23#include "async-thread.h"
24
25struct buffer_head;
26struct btrfs_device {
27 struct list_head dev_list;
28 struct list_head dev_alloc_list;
29 struct btrfs_fs_devices *fs_devices;
30 struct btrfs_root *dev_root;
31 struct bio *pending_bios;
32 struct bio *pending_bio_tail;
33 int running_pending;
34 u64 generation;
35
36 int barriers;
37 int writeable;
38 int in_fs_metadata;
39
40 spinlock_t io_lock;
41
42 struct block_device *bdev;
43
44 /* the mode sent to open_bdev_exclusive */
45 fmode_t mode;
46
47 char *name;
48
49 /* the internal btrfs device id */
50 u64 devid;
51
52 /* size of the device */
53 u64 total_bytes;
54
55 /* bytes used */
56 u64 bytes_used;
57
58 /* optimal io alignment for this device */
59 u32 io_align;
60
61 /* optimal io width for this device */
62 u32 io_width;
63
64 /* minimal io size for this device */
65 u32 sector_size;
66
67 /* type and info about this device */
68 u64 type;
69
70 /* physical drive uuid (or lvm uuid) */
71 u8 uuid[BTRFS_UUID_SIZE];
72
73 struct btrfs_work work;
74};
75
76struct btrfs_fs_devices {
77 u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
78
79 /* the device with this id has the most recent coyp of the super */
80 u64 latest_devid;
81 u64 latest_trans;
82 u64 num_devices;
83 u64 open_devices;
84 u64 rw_devices;
85 u64 total_rw_bytes;
86 struct block_device *latest_bdev;
87 /* all of the devices in the FS */
88 struct list_head devices;
89
90 /* devices not currently being allocated */
91 struct list_head alloc_list;
92 struct list_head list;
93
94 struct btrfs_fs_devices *seed;
95 int seeding;
96
97 int opened;
98};
99
100struct btrfs_bio_stripe {
101 struct btrfs_device *dev;
102 u64 physical;
103};
104
105struct btrfs_multi_bio {
106 atomic_t stripes_pending;
107 bio_end_io_t *end_io;
108 struct bio *orig_bio;
109 void *private;
110 atomic_t error;
111 int max_errors;
112 int num_stripes;
113 struct btrfs_bio_stripe stripes[];
114};
115
116#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
117 (sizeof(struct btrfs_bio_stripe) * (n)))
118
119int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
120 struct btrfs_device *device,
121 u64 chunk_tree, u64 chunk_objectid,
122 u64 chunk_offset, u64 start, u64 num_bytes);
123int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
124 u64 logical, u64 *length,
125 struct btrfs_multi_bio **multi_ret, int mirror_num);
126int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
127 u64 chunk_start, u64 physical, u64 devid,
128 u64 **logical, int *naddrs, int *stripe_len);
129int btrfs_read_sys_array(struct btrfs_root *root);
130int btrfs_read_chunk_tree(struct btrfs_root *root);
131int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
132 struct btrfs_root *extent_root, u64 type);
133void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
134void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
135int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
136 int mirror_num, int async_submit);
137int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
138int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
139 fmode_t flags, void *holder);
140int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
141 struct btrfs_fs_devices **fs_devices_ret);
142int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
143int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
144int btrfs_add_device(struct btrfs_trans_handle *trans,
145 struct btrfs_root *root,
146 struct btrfs_device *device);
147int btrfs_rm_device(struct btrfs_root *root, char *device_path);
148int btrfs_cleanup_fs_uuids(void);
149int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
150int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
151 u64 logical, struct page *page);
152int btrfs_grow_device(struct btrfs_trans_handle *trans,
153 struct btrfs_device *device, u64 new_size);
154struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
155 u8 *uuid, u8 *fsid);
156int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
157int btrfs_init_new_device(struct btrfs_root *root, char *path);
158int btrfs_balance(struct btrfs_root *dev_root);
159void btrfs_unlock_volumes(void);
160void btrfs_lock_volumes(void);
161int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
162#endif
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
new file mode 100644
index 000000000000..7f332e270894
--- /dev/null
+++ b/fs/btrfs/xattr.c
@@ -0,0 +1,322 @@
1/*
2 * Copyright (C) 2007 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/init.h>
20#include <linux/fs.h>
21#include <linux/slab.h>
22#include <linux/rwsem.h>
23#include <linux/xattr.h>
24#include "ctree.h"
25#include "btrfs_inode.h"
26#include "transaction.h"
27#include "xattr.h"
28#include "disk-io.h"
29
30
31ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
32 void *buffer, size_t size)
33{
34 struct btrfs_dir_item *di;
35 struct btrfs_root *root = BTRFS_I(inode)->root;
36 struct btrfs_path *path;
37 struct extent_buffer *leaf;
38 int ret = 0;
39 unsigned long data_ptr;
40
41 path = btrfs_alloc_path();
42 if (!path)
43 return -ENOMEM;
44
45 /* lookup the xattr by name */
46 di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name,
47 strlen(name), 0);
48 if (!di || IS_ERR(di)) {
49 ret = -ENODATA;
50 goto out;
51 }
52
53 leaf = path->nodes[0];
54 /* if size is 0, that means we want the size of the attr */
55 if (!size) {
56 ret = btrfs_dir_data_len(leaf, di);
57 goto out;
58 }
59
60 /* now get the data out of our dir_item */
61 if (btrfs_dir_data_len(leaf, di) > size) {
62 ret = -ERANGE;
63 goto out;
64 }
65 data_ptr = (unsigned long)((char *)(di + 1) +
66 btrfs_dir_name_len(leaf, di));
67 read_extent_buffer(leaf, buffer, data_ptr,
68 btrfs_dir_data_len(leaf, di));
69 ret = btrfs_dir_data_len(leaf, di);
70
71out:
72 btrfs_free_path(path);
73 return ret;
74}
75
76int __btrfs_setxattr(struct inode *inode, const char *name,
77 const void *value, size_t size, int flags)
78{
79 struct btrfs_dir_item *di;
80 struct btrfs_root *root = BTRFS_I(inode)->root;
81 struct btrfs_trans_handle *trans;
82 struct btrfs_path *path;
83 int ret = 0, mod = 0;
84
85 path = btrfs_alloc_path();
86 if (!path)
87 return -ENOMEM;
88
89 trans = btrfs_start_transaction(root, 1);
90 btrfs_set_trans_block_group(trans, inode);
91
92 /* first lets see if we already have this xattr */
93 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
94 strlen(name), -1);
95 if (IS_ERR(di)) {
96 ret = PTR_ERR(di);
97 goto out;
98 }
99
100 /* ok we already have this xattr, lets remove it */
101 if (di) {
102 /* if we want create only exit */
103 if (flags & XATTR_CREATE) {
104 ret = -EEXIST;
105 goto out;
106 }
107
108 ret = btrfs_delete_one_dir_name(trans, root, path, di);
109 if (ret)
110 goto out;
111 btrfs_release_path(root, path);
112
113 /* if we don't have a value then we are removing the xattr */
114 if (!value) {
115 mod = 1;
116 goto out;
117 }
118 } else {
119 btrfs_release_path(root, path);
120
121 if (flags & XATTR_REPLACE) {
122 /* we couldn't find the attr to replace */
123 ret = -ENODATA;
124 goto out;
125 }
126 }
127
128 /* ok we have to create a completely new xattr */
129 ret = btrfs_insert_xattr_item(trans, root, name, strlen(name),
130 value, size, inode->i_ino);
131 if (ret)
132 goto out;
133 mod = 1;
134
135out:
136 if (mod) {
137 inode->i_ctime = CURRENT_TIME;
138 ret = btrfs_update_inode(trans, root, inode);
139 }
140
141 btrfs_end_transaction(trans, root);
142 btrfs_free_path(path);
143 return ret;
144}
145
146ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
147{
148 struct btrfs_key key, found_key;
149 struct inode *inode = dentry->d_inode;
150 struct btrfs_root *root = BTRFS_I(inode)->root;
151 struct btrfs_path *path;
152 struct btrfs_item *item;
153 struct extent_buffer *leaf;
154 struct btrfs_dir_item *di;
155 int ret = 0, slot, advance;
156 size_t total_size = 0, size_left = size;
157 unsigned long name_ptr;
158 size_t name_len;
159 u32 nritems;
160
161 /*
162 * ok we want all objects associated with this id.
163 * NOTE: we set key.offset = 0; because we want to start with the
164 * first xattr that we find and walk forward
165 */
166 key.objectid = inode->i_ino;
167 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
168 key.offset = 0;
169
170 path = btrfs_alloc_path();
171 if (!path)
172 return -ENOMEM;
173 path->reada = 2;
174
175 /* search for our xattrs */
176 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
177 if (ret < 0)
178 goto err;
179 ret = 0;
180 advance = 0;
181 while (1) {
182 leaf = path->nodes[0];
183 nritems = btrfs_header_nritems(leaf);
184 slot = path->slots[0];
185
186 /* this is where we start walking through the path */
187 if (advance || slot >= nritems) {
188 /*
189 * if we've reached the last slot in this leaf we need
190 * to go to the next leaf and reset everything
191 */
192 if (slot >= nritems-1) {
193 ret = btrfs_next_leaf(root, path);
194 if (ret)
195 break;
196 leaf = path->nodes[0];
197 nritems = btrfs_header_nritems(leaf);
198 slot = path->slots[0];
199 } else {
200 /*
201 * just walking through the slots on this leaf
202 */
203 slot++;
204 path->slots[0]++;
205 }
206 }
207 advance = 1;
208
209 item = btrfs_item_nr(leaf, slot);
210 btrfs_item_key_to_cpu(leaf, &found_key, slot);
211
212 /* check to make sure this item is what we want */
213 if (found_key.objectid != key.objectid)
214 break;
215 if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY)
216 break;
217
218 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
219
220 name_len = btrfs_dir_name_len(leaf, di);
221 total_size += name_len + 1;
222
223 /* we are just looking for how big our buffer needs to be */
224 if (!size)
225 continue;
226
227 if (!buffer || (name_len + 1) > size_left) {
228 ret = -ERANGE;
229 goto err;
230 }
231
232 name_ptr = (unsigned long)(di + 1);
233 read_extent_buffer(leaf, buffer, name_ptr, name_len);
234 buffer[name_len] = '\0';
235
236 size_left -= name_len + 1;
237 buffer += name_len + 1;
238 }
239 ret = total_size;
240
241err:
242 btrfs_free_path(path);
243
244 return ret;
245}
246
247/*
248 * List of handlers for synthetic system.* attributes. All real ondisk
249 * attributes are handled directly.
250 */
251struct xattr_handler *btrfs_xattr_handlers[] = {
252#ifdef CONFIG_FS_POSIX_ACL
253 &btrfs_xattr_acl_access_handler,
254 &btrfs_xattr_acl_default_handler,
255#endif
256 NULL,
257};
258
259/*
260 * Check if the attribute is in a supported namespace.
261 *
262 * This applied after the check for the synthetic attributes in the system
263 * namespace.
264 */
265static bool btrfs_is_valid_xattr(const char *name)
266{
267 return !strncmp(name, XATTR_SECURITY_PREFIX,
268 XATTR_SECURITY_PREFIX_LEN) ||
269 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
270 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
271 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
272}
273
274ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
275 void *buffer, size_t size)
276{
277 /*
278 * If this is a request for a synthetic attribute in the system.*
279 * namespace use the generic infrastructure to resolve a handler
280 * for it via sb->s_xattr.
281 */
282 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
283 return generic_getxattr(dentry, name, buffer, size);
284
285 if (!btrfs_is_valid_xattr(name))
286 return -EOPNOTSUPP;
287 return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
288}
289
290int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
291 size_t size, int flags)
292{
293 /*
294 * If this is a request for a synthetic attribute in the system.*
295 * namespace use the generic infrastructure to resolve a handler
296 * for it via sb->s_xattr.
297 */
298 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
299 return generic_setxattr(dentry, name, value, size, flags);
300
301 if (!btrfs_is_valid_xattr(name))
302 return -EOPNOTSUPP;
303
304 if (size == 0)
305 value = ""; /* empty EA, do not remove */
306 return __btrfs_setxattr(dentry->d_inode, name, value, size, flags);
307}
308
309int btrfs_removexattr(struct dentry *dentry, const char *name)
310{
311 /*
312 * If this is a request for a synthetic attribute in the system.*
313 * namespace use the generic infrastructure to resolve a handler
314 * for it via sb->s_xattr.
315 */
316 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
317 return generic_removexattr(dentry, name);
318
319 if (!btrfs_is_valid_xattr(name))
320 return -EOPNOTSUPP;
321 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
322}
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
new file mode 100644
index 000000000000..5b1d08f8e68d
--- /dev/null
+++ b/fs/btrfs/xattr.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) 2007 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __XATTR__
20#define __XATTR__
21
22#include <linux/xattr.h>
23
24extern struct xattr_handler btrfs_xattr_acl_access_handler;
25extern struct xattr_handler btrfs_xattr_acl_default_handler;
26extern struct xattr_handler *btrfs_xattr_handlers[];
27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size);
30extern int __btrfs_setxattr(struct inode *inode, const char *name,
31 const void *value, size_t size, int flags);
32
33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
34 void *buffer, size_t size);
35extern int btrfs_setxattr(struct dentry *dentry, const char *name,
36 const void *value, size_t size, int flags);
37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38
39#endif /* __XATTR__ */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
new file mode 100644
index 000000000000..ecfbce836d32
--- /dev/null
+++ b/fs/btrfs/zlib.c
@@ -0,0 +1,632 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 *
18 * Based on jffs2 zlib code:
19 * Copyright © 2001-2007 Red Hat, Inc.
20 * Created by David Woodhouse <dwmw2@infradead.org>
21 */
22
23#include <linux/kernel.h>
24#include <linux/slab.h>
25#include <linux/zlib.h>
26#include <linux/zutil.h>
27#include <linux/vmalloc.h>
28#include <linux/init.h>
29#include <linux/err.h>
30#include <linux/sched.h>
31#include <linux/pagemap.h>
32#include <linux/bio.h>
33#include "compression.h"
34
35/* Plan: call deflate() with avail_in == *sourcelen,
36 avail_out = *dstlen - 12 and flush == Z_FINISH.
37 If it doesn't manage to finish, call it again with
38 avail_in == 0 and avail_out set to the remaining 12
39 bytes for it to clean up.
40 Q: Is 12 bytes sufficient?
41*/
42#define STREAM_END_SPACE 12
43
44struct workspace {
45 z_stream inf_strm;
46 z_stream def_strm;
47 char *buf;
48 struct list_head list;
49};
50
51static LIST_HEAD(idle_workspace);
52static DEFINE_SPINLOCK(workspace_lock);
53static unsigned long num_workspace;
54static atomic_t alloc_workspace = ATOMIC_INIT(0);
55static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
56
57/*
58 * this finds an available zlib workspace or allocates a new one
59 * NULL or an ERR_PTR is returned if things go bad.
60 */
61static struct workspace *find_zlib_workspace(void)
62{
63 struct workspace *workspace;
64 int ret;
65 int cpus = num_online_cpus();
66
67again:
68 spin_lock(&workspace_lock);
69 if (!list_empty(&idle_workspace)) {
70 workspace = list_entry(idle_workspace.next, struct workspace,
71 list);
72 list_del(&workspace->list);
73 num_workspace--;
74 spin_unlock(&workspace_lock);
75 return workspace;
76
77 }
78 spin_unlock(&workspace_lock);
79 if (atomic_read(&alloc_workspace) > cpus) {
80 DEFINE_WAIT(wait);
81 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
82 if (atomic_read(&alloc_workspace) > cpus)
83 schedule();
84 finish_wait(&workspace_wait, &wait);
85 goto again;
86 }
87 atomic_inc(&alloc_workspace);
88 workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
89 if (!workspace) {
90 ret = -ENOMEM;
91 goto fail;
92 }
93
94 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
95 if (!workspace->def_strm.workspace) {
96 ret = -ENOMEM;
97 goto fail;
98 }
99 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
100 if (!workspace->inf_strm.workspace) {
101 ret = -ENOMEM;
102 goto fail_inflate;
103 }
104 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
105 if (!workspace->buf) {
106 ret = -ENOMEM;
107 goto fail_kmalloc;
108 }
109 return workspace;
110
111fail_kmalloc:
112 vfree(workspace->inf_strm.workspace);
113fail_inflate:
114 vfree(workspace->def_strm.workspace);
115fail:
116 kfree(workspace);
117 atomic_dec(&alloc_workspace);
118 wake_up(&workspace_wait);
119 return ERR_PTR(ret);
120}
121
122/*
123 * put a workspace struct back on the list or free it if we have enough
124 * idle ones sitting around
125 */
126static int free_workspace(struct workspace *workspace)
127{
128 spin_lock(&workspace_lock);
129 if (num_workspace < num_online_cpus()) {
130 list_add_tail(&workspace->list, &idle_workspace);
131 num_workspace++;
132 spin_unlock(&workspace_lock);
133 if (waitqueue_active(&workspace_wait))
134 wake_up(&workspace_wait);
135 return 0;
136 }
137 spin_unlock(&workspace_lock);
138 vfree(workspace->def_strm.workspace);
139 vfree(workspace->inf_strm.workspace);
140 kfree(workspace->buf);
141 kfree(workspace);
142
143 atomic_dec(&alloc_workspace);
144 if (waitqueue_active(&workspace_wait))
145 wake_up(&workspace_wait);
146 return 0;
147}
148
149/*
150 * cleanup function for module exit
151 */
152static void free_workspaces(void)
153{
154 struct workspace *workspace;
155 while (!list_empty(&idle_workspace)) {
156 workspace = list_entry(idle_workspace.next, struct workspace,
157 list);
158 list_del(&workspace->list);
159 vfree(workspace->def_strm.workspace);
160 vfree(workspace->inf_strm.workspace);
161 kfree(workspace->buf);
162 kfree(workspace);
163 atomic_dec(&alloc_workspace);
164 }
165}
166
167/*
168 * given an address space and start/len, compress the bytes.
169 *
170 * pages are allocated to hold the compressed result and stored
171 * in 'pages'
172 *
173 * out_pages is used to return the number of pages allocated. There
174 * may be pages allocated even if we return an error
175 *
176 * total_in is used to return the number of bytes actually read. It
177 * may be smaller then len if we had to exit early because we
178 * ran out of room in the pages array or because we cross the
179 * max_out threshold.
180 *
181 * total_out is used to return the total number of compressed bytes
182 *
183 * max_out tells us the max number of bytes that we're allowed to
184 * stuff into pages
185 */
186int btrfs_zlib_compress_pages(struct address_space *mapping,
187 u64 start, unsigned long len,
188 struct page **pages,
189 unsigned long nr_dest_pages,
190 unsigned long *out_pages,
191 unsigned long *total_in,
192 unsigned long *total_out,
193 unsigned long max_out)
194{
195 int ret;
196 struct workspace *workspace;
197 char *data_in;
198 char *cpage_out;
199 int nr_pages = 0;
200 struct page *in_page = NULL;
201 struct page *out_page = NULL;
202 int out_written = 0;
203 int in_read = 0;
204 unsigned long bytes_left;
205
206 *out_pages = 0;
207 *total_out = 0;
208 *total_in = 0;
209
210 workspace = find_zlib_workspace();
211 if (!workspace)
212 return -1;
213
214 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
215 printk(KERN_WARNING "deflateInit failed\n");
216 ret = -1;
217 goto out;
218 }
219
220 workspace->def_strm.total_in = 0;
221 workspace->def_strm.total_out = 0;
222
223 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
224 data_in = kmap(in_page);
225
226 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
227 cpage_out = kmap(out_page);
228 pages[0] = out_page;
229 nr_pages = 1;
230
231 workspace->def_strm.next_in = data_in;
232 workspace->def_strm.next_out = cpage_out;
233 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
234 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
235
236 out_written = 0;
237 in_read = 0;
238
239 while (workspace->def_strm.total_in < len) {
240 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
241 if (ret != Z_OK) {
242 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
243 ret);
244 zlib_deflateEnd(&workspace->def_strm);
245 ret = -1;
246 goto out;
247 }
248
249 /* we're making it bigger, give up */
250 if (workspace->def_strm.total_in > 8192 &&
251 workspace->def_strm.total_in <
252 workspace->def_strm.total_out) {
253 ret = -1;
254 goto out;
255 }
256 /* we need another page for writing out. Test this
257 * before the total_in so we will pull in a new page for
258 * the stream end if required
259 */
260 if (workspace->def_strm.avail_out == 0) {
261 kunmap(out_page);
262 if (nr_pages == nr_dest_pages) {
263 out_page = NULL;
264 ret = -1;
265 goto out;
266 }
267 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
268 cpage_out = kmap(out_page);
269 pages[nr_pages] = out_page;
270 nr_pages++;
271 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
272 workspace->def_strm.next_out = cpage_out;
273 }
274 /* we're all done */
275 if (workspace->def_strm.total_in >= len)
276 break;
277
278 /* we've read in a full page, get a new one */
279 if (workspace->def_strm.avail_in == 0) {
280 if (workspace->def_strm.total_out > max_out)
281 break;
282
283 bytes_left = len - workspace->def_strm.total_in;
284 kunmap(in_page);
285 page_cache_release(in_page);
286
287 start += PAGE_CACHE_SIZE;
288 in_page = find_get_page(mapping,
289 start >> PAGE_CACHE_SHIFT);
290 data_in = kmap(in_page);
291 workspace->def_strm.avail_in = min(bytes_left,
292 PAGE_CACHE_SIZE);
293 workspace->def_strm.next_in = data_in;
294 }
295 }
296 workspace->def_strm.avail_in = 0;
297 ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
298 zlib_deflateEnd(&workspace->def_strm);
299
300 if (ret != Z_STREAM_END) {
301 ret = -1;
302 goto out;
303 }
304
305 if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
306 ret = -1;
307 goto out;
308 }
309
310 ret = 0;
311 *total_out = workspace->def_strm.total_out;
312 *total_in = workspace->def_strm.total_in;
313out:
314 *out_pages = nr_pages;
315 if (out_page)
316 kunmap(out_page);
317
318 if (in_page) {
319 kunmap(in_page);
320 page_cache_release(in_page);
321 }
322 free_workspace(workspace);
323 return ret;
324}
325
326/*
327 * pages_in is an array of pages with compressed data.
328 *
329 * disk_start is the starting logical offset of this array in the file
330 *
331 * bvec is a bio_vec of pages from the file that we want to decompress into
332 *
333 * vcnt is the count of pages in the biovec
334 *
335 * srclen is the number of bytes in pages_in
336 *
337 * The basic idea is that we have a bio that was created by readpages.
338 * The pages in the bio are for the uncompressed data, and they may not
339 * be contiguous. They all correspond to the range of bytes covered by
340 * the compressed extent.
341 */
342int btrfs_zlib_decompress_biovec(struct page **pages_in,
343 u64 disk_start,
344 struct bio_vec *bvec,
345 int vcnt,
346 size_t srclen)
347{
348 int ret = 0;
349 int wbits = MAX_WBITS;
350 struct workspace *workspace;
351 char *data_in;
352 size_t total_out = 0;
353 unsigned long page_bytes_left;
354 unsigned long page_in_index = 0;
355 unsigned long page_out_index = 0;
356 struct page *page_out;
357 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
358 PAGE_CACHE_SIZE;
359 unsigned long buf_start;
360 unsigned long buf_offset;
361 unsigned long bytes;
362 unsigned long working_bytes;
363 unsigned long pg_offset;
364 unsigned long start_byte;
365 unsigned long current_buf_start;
366 char *kaddr;
367
368 workspace = find_zlib_workspace();
369 if (!workspace)
370 return -ENOMEM;
371
372 data_in = kmap(pages_in[page_in_index]);
373 workspace->inf_strm.next_in = data_in;
374 workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
375 workspace->inf_strm.total_in = 0;
376
377 workspace->inf_strm.total_out = 0;
378 workspace->inf_strm.next_out = workspace->buf;
379 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
380 page_out = bvec[page_out_index].bv_page;
381 page_bytes_left = PAGE_CACHE_SIZE;
382 pg_offset = 0;
383
384 /* If it's deflate, and it's got no preset dictionary, then
385 we can tell zlib to skip the adler32 check. */
386 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
387 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
388 !(((data_in[0]<<8) + data_in[1]) % 31)) {
389
390 wbits = -((data_in[0] >> 4) + 8);
391 workspace->inf_strm.next_in += 2;
392 workspace->inf_strm.avail_in -= 2;
393 }
394
395 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
396 printk(KERN_WARNING "inflateInit failed\n");
397 ret = -1;
398 goto out;
399 }
400 while (workspace->inf_strm.total_in < srclen) {
401 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
402 if (ret != Z_OK && ret != Z_STREAM_END)
403 break;
404 /*
405 * buf start is the byte offset we're of the start of
406 * our workspace buffer
407 */
408 buf_start = total_out;
409
410 /* total_out is the last byte of the workspace buffer */
411 total_out = workspace->inf_strm.total_out;
412
413 working_bytes = total_out - buf_start;
414
415 /*
416 * start byte is the first byte of the page we're currently
417 * copying into relative to the start of the compressed data.
418 */
419 start_byte = page_offset(page_out) - disk_start;
420
421 if (working_bytes == 0) {
422 /* we didn't make progress in this inflate
423 * call, we're done
424 */
425 if (ret != Z_STREAM_END)
426 ret = -1;
427 break;
428 }
429
430 /* we haven't yet hit data corresponding to this page */
431 if (total_out <= start_byte)
432 goto next;
433
434 /*
435 * the start of the data we care about is offset into
436 * the middle of our working buffer
437 */
438 if (total_out > start_byte && buf_start < start_byte) {
439 buf_offset = start_byte - buf_start;
440 working_bytes -= buf_offset;
441 } else {
442 buf_offset = 0;
443 }
444 current_buf_start = buf_start;
445
446 /* copy bytes from the working buffer into the pages */
447 while (working_bytes > 0) {
448 bytes = min(PAGE_CACHE_SIZE - pg_offset,
449 PAGE_CACHE_SIZE - buf_offset);
450 bytes = min(bytes, working_bytes);
451 kaddr = kmap_atomic(page_out, KM_USER0);
452 memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
453 bytes);
454 kunmap_atomic(kaddr, KM_USER0);
455 flush_dcache_page(page_out);
456
457 pg_offset += bytes;
458 page_bytes_left -= bytes;
459 buf_offset += bytes;
460 working_bytes -= bytes;
461 current_buf_start += bytes;
462
463 /* check if we need to pick another page */
464 if (page_bytes_left == 0) {
465 page_out_index++;
466 if (page_out_index >= vcnt) {
467 ret = 0;
468 goto done;
469 }
470
471 page_out = bvec[page_out_index].bv_page;
472 pg_offset = 0;
473 page_bytes_left = PAGE_CACHE_SIZE;
474 start_byte = page_offset(page_out) - disk_start;
475
476 /*
477 * make sure our new page is covered by this
478 * working buffer
479 */
480 if (total_out <= start_byte)
481 goto next;
482
483 /* the next page in the biovec might not
484 * be adjacent to the last page, but it
485 * might still be found inside this working
486 * buffer. bump our offset pointer
487 */
488 if (total_out > start_byte &&
489 current_buf_start < start_byte) {
490 buf_offset = start_byte - buf_start;
491 working_bytes = total_out - start_byte;
492 current_buf_start = buf_start +
493 buf_offset;
494 }
495 }
496 }
497next:
498 workspace->inf_strm.next_out = workspace->buf;
499 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
500
501 if (workspace->inf_strm.avail_in == 0) {
502 unsigned long tmp;
503 kunmap(pages_in[page_in_index]);
504 page_in_index++;
505 if (page_in_index >= total_pages_in) {
506 data_in = NULL;
507 break;
508 }
509 data_in = kmap(pages_in[page_in_index]);
510 workspace->inf_strm.next_in = data_in;
511 tmp = srclen - workspace->inf_strm.total_in;
512 workspace->inf_strm.avail_in = min(tmp,
513 PAGE_CACHE_SIZE);
514 }
515 }
516 if (ret != Z_STREAM_END)
517 ret = -1;
518 else
519 ret = 0;
520done:
521 zlib_inflateEnd(&workspace->inf_strm);
522 if (data_in)
523 kunmap(pages_in[page_in_index]);
524out:
525 free_workspace(workspace);
526 return ret;
527}
528
529/*
530 * a less complex decompression routine. Our compressed data fits in a
531 * single page, and we want to read a single page out of it.
532 * start_byte tells us the offset into the compressed data we're interested in
533 */
534int btrfs_zlib_decompress(unsigned char *data_in,
535 struct page *dest_page,
536 unsigned long start_byte,
537 size_t srclen, size_t destlen)
538{
539 int ret = 0;
540 int wbits = MAX_WBITS;
541 struct workspace *workspace;
542 unsigned long bytes_left = destlen;
543 unsigned long total_out = 0;
544 char *kaddr;
545
546 if (destlen > PAGE_CACHE_SIZE)
547 return -ENOMEM;
548
549 workspace = find_zlib_workspace();
550 if (!workspace)
551 return -ENOMEM;
552
553 workspace->inf_strm.next_in = data_in;
554 workspace->inf_strm.avail_in = srclen;
555 workspace->inf_strm.total_in = 0;
556
557 workspace->inf_strm.next_out = workspace->buf;
558 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
559 workspace->inf_strm.total_out = 0;
560 /* If it's deflate, and it's got no preset dictionary, then
561 we can tell zlib to skip the adler32 check. */
562 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
563 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
564 !(((data_in[0]<<8) + data_in[1]) % 31)) {
565
566 wbits = -((data_in[0] >> 4) + 8);
567 workspace->inf_strm.next_in += 2;
568 workspace->inf_strm.avail_in -= 2;
569 }
570
571 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
572 printk(KERN_WARNING "inflateInit failed\n");
573 ret = -1;
574 goto out;
575 }
576
577 while (bytes_left > 0) {
578 unsigned long buf_start;
579 unsigned long buf_offset;
580 unsigned long bytes;
581 unsigned long pg_offset = 0;
582
583 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
584 if (ret != Z_OK && ret != Z_STREAM_END)
585 break;
586
587 buf_start = total_out;
588 total_out = workspace->inf_strm.total_out;
589
590 if (total_out == buf_start) {
591 ret = -1;
592 break;
593 }
594
595 if (total_out <= start_byte)
596 goto next;
597
598 if (total_out > start_byte && buf_start < start_byte)
599 buf_offset = start_byte - buf_start;
600 else
601 buf_offset = 0;
602
603 bytes = min(PAGE_CACHE_SIZE - pg_offset,
604 PAGE_CACHE_SIZE - buf_offset);
605 bytes = min(bytes, bytes_left);
606
607 kaddr = kmap_atomic(dest_page, KM_USER0);
608 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
609 kunmap_atomic(kaddr, KM_USER0);
610
611 pg_offset += bytes;
612 bytes_left -= bytes;
613next:
614 workspace->inf_strm.next_out = workspace->buf;
615 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
616 }
617
618 if (ret != Z_STREAM_END && bytes_left != 0)
619 ret = -1;
620 else
621 ret = 0;
622
623 zlib_inflateEnd(&workspace->inf_strm);
624out:
625 free_workspace(workspace);
626 return ret;
627}
628
629void btrfs_zlib_exit(void)
630{
631 free_workspaces();
632}
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index c73fa89b5f8a..170d289ac785 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -22,9 +22,7 @@
22 22
23 23
24#define BIT_DIVIDER_MIPS 1043 24#define BIT_DIVIDER_MIPS 1043
25static int bits_mips[8] = { 277,249,290,267,229,341,212,241}; /* mips32 */ 25static int bits_mips[8] = { 277, 249, 290, 267, 229, 341, 212, 241};
26
27#include <linux/errno.h>
28 26
29struct pushpull { 27struct pushpull {
30 unsigned char *buf; 28 unsigned char *buf;
@@ -43,7 +41,9 @@ struct rubin_state {
43 int bits[8]; 41 int bits[8];
44}; 42};
45 43
46static inline void init_pushpull(struct pushpull *pp, char *buf, unsigned buflen, unsigned ofs, unsigned reserve) 44static inline void init_pushpull(struct pushpull *pp, char *buf,
45 unsigned buflen, unsigned ofs,
46 unsigned reserve)
47{ 47{
48 pp->buf = buf; 48 pp->buf = buf;
49 pp->buflen = buflen; 49 pp->buflen = buflen;
@@ -53,16 +53,14 @@ static inline void init_pushpull(struct pushpull *pp, char *buf, unsigned buflen
53 53
54static inline int pushbit(struct pushpull *pp, int bit, int use_reserved) 54static inline int pushbit(struct pushpull *pp, int bit, int use_reserved)
55{ 55{
56 if (pp->ofs >= pp->buflen - (use_reserved?0:pp->reserve)) { 56 if (pp->ofs >= pp->buflen - (use_reserved?0:pp->reserve))
57 return -ENOSPC; 57 return -ENOSPC;
58 }
59 58
60 if (bit) { 59 if (bit)
61 pp->buf[pp->ofs >> 3] |= (1<<(7-(pp->ofs &7))); 60 pp->buf[pp->ofs >> 3] |= (1<<(7-(pp->ofs & 7)));
62 } 61 else
63 else { 62 pp->buf[pp->ofs >> 3] &= ~(1<<(7-(pp->ofs & 7)));
64 pp->buf[pp->ofs >> 3] &= ~(1<<(7-(pp->ofs &7))); 63
65 }
66 pp->ofs++; 64 pp->ofs++;
67 65
68 return 0; 66 return 0;
@@ -97,6 +95,7 @@ static void init_rubin(struct rubin_state *rs, int div, int *bits)
97 rs->p = (long) (2 * UPPER_BIT_RUBIN); 95 rs->p = (long) (2 * UPPER_BIT_RUBIN);
98 rs->bit_number = (long) 0; 96 rs->bit_number = (long) 0;
99 rs->bit_divider = div; 97 rs->bit_divider = div;
98
100 for (c=0; c<8; c++) 99 for (c=0; c<8; c++)
101 rs->bits[c] = bits[c]; 100 rs->bits[c] = bits[c];
102} 101}
@@ -108,7 +107,8 @@ static int encode(struct rubin_state *rs, long A, long B, int symbol)
108 long i0, i1; 107 long i0, i1;
109 int ret; 108 int ret;
110 109
111 while ((rs->q >= UPPER_BIT_RUBIN) || ((rs->p + rs->q) <= UPPER_BIT_RUBIN)) { 110 while ((rs->q >= UPPER_BIT_RUBIN) ||
111 ((rs->p + rs->q) <= UPPER_BIT_RUBIN)) {
112 rs->bit_number++; 112 rs->bit_number++;
113 113
114 ret = pushbit(&rs->pp, (rs->q & UPPER_BIT_RUBIN) ? 1 : 0, 0); 114 ret = pushbit(&rs->pp, (rs->q & UPPER_BIT_RUBIN) ? 1 : 0, 0);
@@ -119,12 +119,12 @@ static int encode(struct rubin_state *rs, long A, long B, int symbol)
119 rs->p <<= 1; 119 rs->p <<= 1;
120 } 120 }
121 i0 = A * rs->p / (A + B); 121 i0 = A * rs->p / (A + B);
122 if (i0 <= 0) { 122 if (i0 <= 0)
123 i0 = 1; 123 i0 = 1;
124 } 124
125 if (i0 >= rs->p) { 125 if (i0 >= rs->p)
126 i0 = rs->p - 1; 126 i0 = rs->p - 1;
127 } 127
128 i1 = rs->p - i0; 128 i1 = rs->p - i0;
129 129
130 if (symbol == 0) 130 if (symbol == 0)
@@ -157,11 +157,13 @@ static void init_decode(struct rubin_state *rs, int div, int *bits)
157 /* behalve lower */ 157 /* behalve lower */
158 rs->rec_q = 0; 158 rs->rec_q = 0;
159 159
160 for (rs->bit_number = 0; rs->bit_number++ < RUBIN_REG_SIZE; rs->rec_q = rs->rec_q * 2 + (long) (pullbit(&rs->pp))) 160 for (rs->bit_number = 0; rs->bit_number++ < RUBIN_REG_SIZE;
161 rs->rec_q = rs->rec_q * 2 + (long) (pullbit(&rs->pp)))
161 ; 162 ;
162} 163}
163 164
164static void __do_decode(struct rubin_state *rs, unsigned long p, unsigned long q) 165static void __do_decode(struct rubin_state *rs, unsigned long p,
166 unsigned long q)
165{ 167{
166 register unsigned long lower_bits_rubin = LOWER_BITS_RUBIN; 168 register unsigned long lower_bits_rubin = LOWER_BITS_RUBIN;
167 unsigned long rec_q; 169 unsigned long rec_q;
@@ -207,12 +209,11 @@ static int decode(struct rubin_state *rs, long A, long B)
207 __do_decode(rs, p, q); 209 __do_decode(rs, p, q);
208 210
209 i0 = A * rs->p / (A + B); 211 i0 = A * rs->p / (A + B);
210 if (i0 <= 0) { 212 if (i0 <= 0)
211 i0 = 1; 213 i0 = 1;
212 } 214
213 if (i0 >= rs->p) { 215 if (i0 >= rs->p)
214 i0 = rs->p - 1; 216 i0 = rs->p - 1;
215 }
216 217
217 threshold = rs->q + i0; 218 threshold = rs->q + i0;
218 symbol = rs->rec_q >= threshold; 219 symbol = rs->rec_q >= threshold;
@@ -234,14 +235,15 @@ static int out_byte(struct rubin_state *rs, unsigned char byte)
234 struct rubin_state rs_copy; 235 struct rubin_state rs_copy;
235 rs_copy = *rs; 236 rs_copy = *rs;
236 237
237 for (i=0;i<8;i++) { 238 for (i=0; i<8; i++) {
238 ret = encode(rs, rs->bit_divider-rs->bits[i],rs->bits[i],byte&1); 239 ret = encode(rs, rs->bit_divider-rs->bits[i],
240 rs->bits[i], byte & 1);
239 if (ret) { 241 if (ret) {
240 /* Failed. Restore old state */ 242 /* Failed. Restore old state */
241 *rs = rs_copy; 243 *rs = rs_copy;
242 return ret; 244 return ret;
243 } 245 }
244 byte=byte>>1; 246 byte >>= 1 ;
245 } 247 }
246 return 0; 248 return 0;
247} 249}
@@ -251,7 +253,8 @@ static int in_byte(struct rubin_state *rs)
251 int i, result = 0, bit_divider = rs->bit_divider; 253 int i, result = 0, bit_divider = rs->bit_divider;
252 254
253 for (i = 0; i < 8; i++) 255 for (i = 0; i < 8; i++)
254 result |= decode(rs, bit_divider - rs->bits[i], rs->bits[i]) << i; 256 result |= decode(rs, bit_divider - rs->bits[i],
257 rs->bits[i]) << i;
255 258
256 return result; 259 return result;
257} 260}
@@ -259,7 +262,8 @@ static int in_byte(struct rubin_state *rs)
259 262
260 263
261static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in, 264static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in,
262 unsigned char *cpage_out, uint32_t *sourcelen, uint32_t *dstlen) 265 unsigned char *cpage_out, uint32_t *sourcelen,
266 uint32_t *dstlen)
263 { 267 {
264 int outpos = 0; 268 int outpos = 0;
265 int pos=0; 269 int pos=0;
@@ -295,7 +299,8 @@ static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in,
295int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out, 299int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
296 uint32_t *sourcelen, uint32_t *dstlen, void *model) 300 uint32_t *sourcelen, uint32_t *dstlen, void *model)
297{ 301{
298 return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in, cpage_out, sourcelen, dstlen); 302 return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in,
303 cpage_out, sourcelen, dstlen);
299} 304}
300#endif 305#endif
301static int jffs2_dynrubin_compress(unsigned char *data_in, 306static int jffs2_dynrubin_compress(unsigned char *data_in,
@@ -316,9 +321,8 @@ static int jffs2_dynrubin_compress(unsigned char *data_in,
316 return -1; 321 return -1;
317 322
318 memset(histo, 0, 256); 323 memset(histo, 0, 256);
319 for (i=0; i<mysrclen; i++) { 324 for (i=0; i<mysrclen; i++)
320 histo[data_in[i]]++; 325 histo[data_in[i]]++;
321 }
322 memset(bits, 0, sizeof(int)*8); 326 memset(bits, 0, sizeof(int)*8);
323 for (i=0; i<256; i++) { 327 for (i=0; i<256; i++) {
324 if (i&128) 328 if (i&128)
@@ -346,7 +350,8 @@ static int jffs2_dynrubin_compress(unsigned char *data_in,
346 cpage_out[i] = bits[i]; 350 cpage_out[i] = bits[i];
347 } 351 }
348 352
349 ret = rubin_do_compress(256, bits, data_in, cpage_out+8, &mysrclen, &mydstlen); 353 ret = rubin_do_compress(256, bits, data_in, cpage_out+8, &mysrclen,
354 &mydstlen);
350 if (ret) 355 if (ret)
351 return ret; 356 return ret;
352 357
@@ -363,8 +368,10 @@ static int jffs2_dynrubin_compress(unsigned char *data_in,
363 return 0; 368 return 0;
364} 369}
365 370
366static void rubin_do_decompress(int bit_divider, int *bits, unsigned char *cdata_in, 371static void rubin_do_decompress(int bit_divider, int *bits,
367 unsigned char *page_out, uint32_t srclen, uint32_t destlen) 372 unsigned char *cdata_in,
373 unsigned char *page_out, uint32_t srclen,
374 uint32_t destlen)
368{ 375{
369 int outpos = 0; 376 int outpos = 0;
370 struct rubin_state rs; 377 struct rubin_state rs;
@@ -372,9 +379,8 @@ static void rubin_do_decompress(int bit_divider, int *bits, unsigned char *cdata
372 init_pushpull(&rs.pp, cdata_in, srclen, 0, 0); 379 init_pushpull(&rs.pp, cdata_in, srclen, 0, 0);
373 init_decode(&rs, bit_divider, bits); 380 init_decode(&rs, bit_divider, bits);
374 381
375 while (outpos < destlen) { 382 while (outpos < destlen)
376 page_out[outpos++] = in_byte(&rs); 383 page_out[outpos++] = in_byte(&rs);
377 }
378} 384}
379 385
380 386
@@ -383,7 +389,8 @@ static int jffs2_rubinmips_decompress(unsigned char *data_in,
383 uint32_t sourcelen, uint32_t dstlen, 389 uint32_t sourcelen, uint32_t dstlen,
384 void *model) 390 void *model)
385{ 391{
386 rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in, cpage_out, sourcelen, dstlen); 392 rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in,
393 cpage_out, sourcelen, dstlen);
387 return 0; 394 return 0;
388} 395}
389 396
@@ -398,52 +405,53 @@ static int jffs2_dynrubin_decompress(unsigned char *data_in,
398 for (c=0; c<8; c++) 405 for (c=0; c<8; c++)
399 bits[c] = data_in[c]; 406 bits[c] = data_in[c];
400 407
401 rubin_do_decompress(256, bits, data_in+8, cpage_out, sourcelen-8, dstlen); 408 rubin_do_decompress(256, bits, data_in+8, cpage_out, sourcelen-8,
409 dstlen);
402 return 0; 410 return 0;
403} 411}
404 412
405static struct jffs2_compressor jffs2_rubinmips_comp = { 413static struct jffs2_compressor jffs2_rubinmips_comp = {
406 .priority = JFFS2_RUBINMIPS_PRIORITY, 414 .priority = JFFS2_RUBINMIPS_PRIORITY,
407 .name = "rubinmips", 415 .name = "rubinmips",
408 .compr = JFFS2_COMPR_DYNRUBIN, 416 .compr = JFFS2_COMPR_DYNRUBIN,
409 .compress = NULL, /*&jffs2_rubinmips_compress,*/ 417 .compress = NULL, /*&jffs2_rubinmips_compress,*/
410 .decompress = &jffs2_rubinmips_decompress, 418 .decompress = &jffs2_rubinmips_decompress,
411#ifdef JFFS2_RUBINMIPS_DISABLED 419#ifdef JFFS2_RUBINMIPS_DISABLED
412 .disabled = 1, 420 .disabled = 1,
413#else 421#else
414 .disabled = 0, 422 .disabled = 0,
415#endif 423#endif
416}; 424};
417 425
418int jffs2_rubinmips_init(void) 426int jffs2_rubinmips_init(void)
419{ 427{
420 return jffs2_register_compressor(&jffs2_rubinmips_comp); 428 return jffs2_register_compressor(&jffs2_rubinmips_comp);
421} 429}
422 430
423void jffs2_rubinmips_exit(void) 431void jffs2_rubinmips_exit(void)
424{ 432{
425 jffs2_unregister_compressor(&jffs2_rubinmips_comp); 433 jffs2_unregister_compressor(&jffs2_rubinmips_comp);
426} 434}
427 435
428static struct jffs2_compressor jffs2_dynrubin_comp = { 436static struct jffs2_compressor jffs2_dynrubin_comp = {
429 .priority = JFFS2_DYNRUBIN_PRIORITY, 437 .priority = JFFS2_DYNRUBIN_PRIORITY,
430 .name = "dynrubin", 438 .name = "dynrubin",
431 .compr = JFFS2_COMPR_RUBINMIPS, 439 .compr = JFFS2_COMPR_RUBINMIPS,
432 .compress = jffs2_dynrubin_compress, 440 .compress = jffs2_dynrubin_compress,
433 .decompress = &jffs2_dynrubin_decompress, 441 .decompress = &jffs2_dynrubin_decompress,
434#ifdef JFFS2_DYNRUBIN_DISABLED 442#ifdef JFFS2_DYNRUBIN_DISABLED
435 .disabled = 1, 443 .disabled = 1,
436#else 444#else
437 .disabled = 0, 445 .disabled = 0,
438#endif 446#endif
439}; 447};
440 448
441int jffs2_dynrubin_init(void) 449int jffs2_dynrubin_init(void)
442{ 450{
443 return jffs2_register_compressor(&jffs2_dynrubin_comp); 451 return jffs2_register_compressor(&jffs2_dynrubin_comp);
444} 452}
445 453
446void jffs2_dynrubin_exit(void) 454void jffs2_dynrubin_exit(void)
447{ 455{
448 jffs2_unregister_compressor(&jffs2_dynrubin_comp); 456 jffs2_unregister_compressor(&jffs2_dynrubin_comp);
449} 457}
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 259461b910af..c32b4a1ad6cf 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
175{ 175{
176 /* For NAND, if the failure did not occur at the device level for a 176 /* For NAND, if the failure did not occur at the device level for a
177 specific physical page, don't bother updating the bad block table. */ 177 specific physical page, don't bother updating the bad block table. */
178 if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) { 178 if (jffs2_cleanmarker_oob(c) && (bad_offset != (uint32_t)MTD_FAIL_ADDR_UNKNOWN)) {
179 /* We had a device-level failure to erase. Let's see if we've 179 /* We had a device-level failure to erase. Let's see if we've
180 failed too many times. */ 180 failed too many times. */
181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
@@ -209,7 +209,8 @@ static void jffs2_erase_callback(struct erase_info *instr)
209 struct erase_priv_struct *priv = (void *)instr->priv; 209 struct erase_priv_struct *priv = (void *)instr->priv;
210 210
211 if(instr->state != MTD_ERASE_DONE) { 211 if(instr->state != MTD_ERASE_DONE) {
212 printk(KERN_WARNING "Erase at 0x%08x finished, but state != MTD_ERASE_DONE. State is 0x%x instead.\n", instr->addr, instr->state); 212 printk(KERN_WARNING "Erase at 0x%08llx finished, but state != MTD_ERASE_DONE. State is 0x%x instead.\n",
213 (unsigned long long)instr->addr, instr->state);
213 jffs2_erase_failed(priv->c, priv->jeb, instr->fail_addr); 214 jffs2_erase_failed(priv->c, priv->jeb, instr->fail_addr);
214 } else { 215 } else {
215 jffs2_erase_succeeded(priv->c, priv->jeb); 216 jffs2_erase_succeeded(priv->c, priv->jeb);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 5198ada67398..6d720243f5f4 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -334,6 +334,7 @@ void delete_partition(struct gendisk *disk, int partno)
334 334
335 blk_free_devt(part_devt(part)); 335 blk_free_devt(part_devt(part));
336 rcu_assign_pointer(ptbl->part[partno], NULL); 336 rcu_assign_pointer(ptbl->part[partno], NULL);
337 rcu_assign_pointer(ptbl->last_lookup, NULL);
337 kobject_put(part->holder_dir); 338 kobject_put(part->holder_dir);
338 device_del(part_to_dev(part)); 339 device_del(part_to_dev(part));
339 340
diff --git a/include/acpi/acdisasm.h b/include/acpi/acdisasm.h
deleted file mode 100644
index 0c1ed387073c..000000000000
--- a/include/acpi/acdisasm.h
+++ /dev/null
@@ -1,445 +0,0 @@
1/******************************************************************************
2 *
3 * Name: acdisasm.h - AML disassembler
4 *
5 *****************************************************************************/
6
7/*
8 * Copyright (C) 2000 - 2008, Intel Corp.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions, and the following disclaimer,
16 * without modification.
17 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
18 * substantially similar to the "NO WARRANTY" disclaimer below
19 * ("Disclaimer") and any redistribution must be conditioned upon
20 * including a substantially similar Disclaimer requirement for further
21 * binary redistribution.
22 * 3. Neither the names of the above-listed copyright holders nor the names
23 * of any contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * Alternatively, this software may be distributed under the terms of the
27 * GNU General Public License ("GPL") version 2 as published by the Free
28 * Software Foundation.
29 *
30 * NO WARRANTY
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
40 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGES.
42 */
43
44#ifndef __ACDISASM_H__
45#define __ACDISASM_H__
46
47#include "amlresrc.h"
48
49#define BLOCK_NONE 0
50#define BLOCK_PAREN 1
51#define BLOCK_BRACE 2
52#define BLOCK_COMMA_LIST 4
53#define ACPI_DEFAULT_RESNAME *(u32 *) "__RD"
54
55struct acpi_external_list {
56 char *path;
57 char *internal_path;
58 struct acpi_external_list *next;
59 u32 value;
60 u16 length;
61 u8 type;
62};
63
64extern struct acpi_external_list *acpi_gbl_external_list;
65
66typedef const struct acpi_dmtable_info {
67 u8 opcode;
68 u8 offset;
69 char *name;
70
71} acpi_dmtable_info;
72
73/*
74 * Values for Opcode above.
75 * Note: 0-7 must not change, used as a flag shift value
76 */
77#define ACPI_DMT_FLAG0 0
78#define ACPI_DMT_FLAG1 1
79#define ACPI_DMT_FLAG2 2
80#define ACPI_DMT_FLAG3 3
81#define ACPI_DMT_FLAG4 4
82#define ACPI_DMT_FLAG5 5
83#define ACPI_DMT_FLAG6 6
84#define ACPI_DMT_FLAG7 7
85#define ACPI_DMT_FLAGS0 8
86#define ACPI_DMT_FLAGS2 9
87#define ACPI_DMT_UINT8 10
88#define ACPI_DMT_UINT16 11
89#define ACPI_DMT_UINT24 12
90#define ACPI_DMT_UINT32 13
91#define ACPI_DMT_UINT56 14
92#define ACPI_DMT_UINT64 15
93#define ACPI_DMT_STRING 16
94#define ACPI_DMT_NAME4 17
95#define ACPI_DMT_NAME6 18
96#define ACPI_DMT_NAME8 19
97#define ACPI_DMT_CHKSUM 20
98#define ACPI_DMT_SPACEID 21
99#define ACPI_DMT_GAS 22
100#define ACPI_DMT_ASF 23
101#define ACPI_DMT_DMAR 24
102#define ACPI_DMT_HEST 25
103#define ACPI_DMT_HESTNTFY 26
104#define ACPI_DMT_HESTNTYP 27
105#define ACPI_DMT_MADT 28
106#define ACPI_DMT_SRAT 29
107#define ACPI_DMT_EXIT 30
108#define ACPI_DMT_SIG 31
109
110typedef
111void (*acpi_dmtable_handler) (struct acpi_table_header * table);
112
113struct acpi_dmtable_data {
114 char *signature;
115 struct acpi_dmtable_info *table_info;
116 acpi_dmtable_handler table_handler;
117 char *name;
118};
119
120struct acpi_op_walk_info {
121 u32 level;
122 u32 last_level;
123 u32 count;
124 u32 bit_offset;
125 u32 flags;
126 struct acpi_walk_state *walk_state;
127};
128
129typedef
130acpi_status(*asl_walk_callback) (union acpi_parse_object * op,
131 u32 level, void *context);
132
133struct acpi_resource_tag {
134 u32 bit_index;
135 char *tag;
136};
137
138/* Strings used for decoding flags to ASL keywords */
139
140extern const char *acpi_gbl_word_decode[];
141extern const char *acpi_gbl_irq_decode[];
142extern const char *acpi_gbl_lock_rule[];
143extern const char *acpi_gbl_access_types[];
144extern const char *acpi_gbl_update_rules[];
145extern const char *acpi_gbl_match_ops[];
146
147extern struct acpi_dmtable_info acpi_dm_table_info_asf0[];
148extern struct acpi_dmtable_info acpi_dm_table_info_asf1[];
149extern struct acpi_dmtable_info acpi_dm_table_info_asf1a[];
150extern struct acpi_dmtable_info acpi_dm_table_info_asf2[];
151extern struct acpi_dmtable_info acpi_dm_table_info_asf2a[];
152extern struct acpi_dmtable_info acpi_dm_table_info_asf3[];
153extern struct acpi_dmtable_info acpi_dm_table_info_asf4[];
154extern struct acpi_dmtable_info acpi_dm_table_info_asf_hdr[];
155extern struct acpi_dmtable_info acpi_dm_table_info_boot[];
156extern struct acpi_dmtable_info acpi_dm_table_info_bert[];
157extern struct acpi_dmtable_info acpi_dm_table_info_cpep[];
158extern struct acpi_dmtable_info acpi_dm_table_info_cpep0[];
159extern struct acpi_dmtable_info acpi_dm_table_info_dbgp[];
160extern struct acpi_dmtable_info acpi_dm_table_info_dmar[];
161extern struct acpi_dmtable_info acpi_dm_table_info_dmar_hdr[];
162extern struct acpi_dmtable_info acpi_dm_table_info_dmar_scope[];
163extern struct acpi_dmtable_info acpi_dm_table_info_dmar0[];
164extern struct acpi_dmtable_info acpi_dm_table_info_dmar1[];
165extern struct acpi_dmtable_info acpi_dm_table_info_dmar2[];
166extern struct acpi_dmtable_info acpi_dm_table_info_ecdt[];
167extern struct acpi_dmtable_info acpi_dm_table_info_einj[];
168extern struct acpi_dmtable_info acpi_dm_table_info_einj0[];
169extern struct acpi_dmtable_info acpi_dm_table_info_erst[];
170extern struct acpi_dmtable_info acpi_dm_table_info_facs[];
171extern struct acpi_dmtable_info acpi_dm_table_info_fadt1[];
172extern struct acpi_dmtable_info acpi_dm_table_info_fadt2[];
173extern struct acpi_dmtable_info acpi_dm_table_info_gas[];
174extern struct acpi_dmtable_info acpi_dm_table_info_header[];
175extern struct acpi_dmtable_info acpi_dm_table_info_hest[];
176extern struct acpi_dmtable_info acpi_dm_table_info_hest9[];
177extern struct acpi_dmtable_info acpi_dm_table_info_hest_notify[];
178extern struct acpi_dmtable_info acpi_dm_table_info_hpet[];
179extern struct acpi_dmtable_info acpi_dm_table_info_madt[];
180extern struct acpi_dmtable_info acpi_dm_table_info_madt0[];
181extern struct acpi_dmtable_info acpi_dm_table_info_madt1[];
182extern struct acpi_dmtable_info acpi_dm_table_info_madt2[];
183extern struct acpi_dmtable_info acpi_dm_table_info_madt3[];
184extern struct acpi_dmtable_info acpi_dm_table_info_madt4[];
185extern struct acpi_dmtable_info acpi_dm_table_info_madt5[];
186extern struct acpi_dmtable_info acpi_dm_table_info_madt6[];
187extern struct acpi_dmtable_info acpi_dm_table_info_madt7[];
188extern struct acpi_dmtable_info acpi_dm_table_info_madt8[];
189extern struct acpi_dmtable_info acpi_dm_table_info_madt9[];
190extern struct acpi_dmtable_info acpi_dm_table_info_madt10[];
191extern struct acpi_dmtable_info acpi_dm_table_info_madt_hdr[];
192extern struct acpi_dmtable_info acpi_dm_table_info_mcfg[];
193extern struct acpi_dmtable_info acpi_dm_table_info_mcfg0[];
194extern struct acpi_dmtable_info acpi_dm_table_info_rsdp1[];
195extern struct acpi_dmtable_info acpi_dm_table_info_rsdp2[];
196extern struct acpi_dmtable_info acpi_dm_table_info_sbst[];
197extern struct acpi_dmtable_info acpi_dm_table_info_slic[];
198extern struct acpi_dmtable_info acpi_dm_table_info_slit[];
199extern struct acpi_dmtable_info acpi_dm_table_info_spcr[];
200extern struct acpi_dmtable_info acpi_dm_table_info_spmi[];
201extern struct acpi_dmtable_info acpi_dm_table_info_srat[];
202extern struct acpi_dmtable_info acpi_dm_table_info_srat_hdr[];
203extern struct acpi_dmtable_info acpi_dm_table_info_srat0[];
204extern struct acpi_dmtable_info acpi_dm_table_info_srat1[];
205extern struct acpi_dmtable_info acpi_dm_table_info_srat2[];
206extern struct acpi_dmtable_info acpi_dm_table_info_tcpa[];
207extern struct acpi_dmtable_info acpi_dm_table_info_wdrt[];
208
209/*
210 * dmtable
211 */
212void acpi_dm_dump_data_table(struct acpi_table_header *table);
213
214acpi_status
215acpi_dm_dump_table(u32 table_length,
216 u32 table_offset,
217 void *table,
218 u32 sub_table_length, struct acpi_dmtable_info *info);
219
220void acpi_dm_line_header(u32 offset, u32 byte_length, char *name);
221
222void acpi_dm_line_header2(u32 offset, u32 byte_length, char *name, u32 value);
223
224/*
225 * dmtbdump
226 */
227void acpi_dm_dump_asf(struct acpi_table_header *table);
228
229void acpi_dm_dump_cpep(struct acpi_table_header *table);
230
231void acpi_dm_dump_dmar(struct acpi_table_header *table);
232
233void acpi_dm_dump_einj(struct acpi_table_header *table);
234
235void acpi_dm_dump_erst(struct acpi_table_header *table);
236
237void acpi_dm_dump_fadt(struct acpi_table_header *table);
238
239void acpi_dm_dump_hest(struct acpi_table_header *table);
240
241void acpi_dm_dump_mcfg(struct acpi_table_header *table);
242
243void acpi_dm_dump_madt(struct acpi_table_header *table);
244
245u32 acpi_dm_dump_rsdp(struct acpi_table_header *table);
246
247void acpi_dm_dump_rsdt(struct acpi_table_header *table);
248
249void acpi_dm_dump_slit(struct acpi_table_header *table);
250
251void acpi_dm_dump_srat(struct acpi_table_header *table);
252
253void acpi_dm_dump_xsdt(struct acpi_table_header *table);
254
255/*
256 * dmwalk
257 */
258void
259acpi_dm_disassemble(struct acpi_walk_state *walk_state,
260 union acpi_parse_object *origin, u32 num_opcodes);
261
262void
263acpi_dm_walk_parse_tree(union acpi_parse_object *op,
264 asl_walk_callback descending_callback,
265 asl_walk_callback ascending_callback, void *context);
266
267/*
268 * dmopcode
269 */
270void
271acpi_dm_disassemble_one_op(struct acpi_walk_state *walk_state,
272 struct acpi_op_walk_info *info,
273 union acpi_parse_object *op);
274
275void acpi_dm_decode_internal_object(union acpi_operand_object *obj_desc);
276
277u32 acpi_dm_list_type(union acpi_parse_object *op);
278
279void acpi_dm_method_flags(union acpi_parse_object *op);
280
281void acpi_dm_field_flags(union acpi_parse_object *op);
282
283void acpi_dm_address_space(u8 space_id);
284
285void acpi_dm_region_flags(union acpi_parse_object *op);
286
287void acpi_dm_match_op(union acpi_parse_object *op);
288
289u8 acpi_dm_comma_if_list_member(union acpi_parse_object *op);
290
291void acpi_dm_comma_if_field_member(union acpi_parse_object *op);
292
293/*
294 * dmnames
295 */
296u32 acpi_dm_dump_name(char *name);
297
298acpi_status
299acpi_ps_display_object_pathname(struct acpi_walk_state *walk_state,
300 union acpi_parse_object *op);
301
302void acpi_dm_namestring(char *name);
303
304/*
305 * dmobject
306 */
307void
308acpi_dm_display_internal_object(union acpi_operand_object *obj_desc,
309 struct acpi_walk_state *walk_state);
310
311void acpi_dm_display_arguments(struct acpi_walk_state *walk_state);
312
313void acpi_dm_display_locals(struct acpi_walk_state *walk_state);
314
315void
316acpi_dm_dump_method_info(acpi_status status,
317 struct acpi_walk_state *walk_state,
318 union acpi_parse_object *op);
319
320/*
321 * dmbuffer
322 */
323void acpi_dm_disasm_byte_list(u32 level, u8 * byte_data, u32 byte_count);
324
325void
326acpi_dm_byte_list(struct acpi_op_walk_info *info, union acpi_parse_object *op);
327
328void acpi_dm_is_eisa_id(union acpi_parse_object *op);
329
330void acpi_dm_eisa_id(u32 encoded_id);
331
332u8 acpi_dm_is_unicode_buffer(union acpi_parse_object *op);
333
334u8 acpi_dm_is_string_buffer(union acpi_parse_object *op);
335
336/*
337 * dmresrc
338 */
339void acpi_dm_dump_integer8(u8 value, char *name);
340
341void acpi_dm_dump_integer16(u16 value, char *name);
342
343void acpi_dm_dump_integer32(u32 value, char *name);
344
345void acpi_dm_dump_integer64(u64 value, char *name);
346
347void
348acpi_dm_resource_template(struct acpi_op_walk_info *info,
349 union acpi_parse_object *op,
350 u8 * byte_data, u32 byte_count);
351
352acpi_status acpi_dm_is_resource_template(union acpi_parse_object *op);
353
354void acpi_dm_indent(u32 level);
355
356void acpi_dm_bit_list(u16 mask);
357
358void acpi_dm_decode_attribute(u8 attribute);
359
360void acpi_dm_descriptor_name(void);
361
362/*
363 * dmresrcl
364 */
365void
366acpi_dm_word_descriptor(union aml_resource *resource, u32 length, u32 level);
367
368void
369acpi_dm_dword_descriptor(union aml_resource *resource, u32 length, u32 level);
370
371void
372acpi_dm_extended_descriptor(union aml_resource *resource,
373 u32 length, u32 level);
374
375void
376acpi_dm_qword_descriptor(union aml_resource *resource, u32 length, u32 level);
377
378void
379acpi_dm_memory24_descriptor(union aml_resource *resource,
380 u32 length, u32 level);
381
382void
383acpi_dm_memory32_descriptor(union aml_resource *resource,
384 u32 length, u32 level);
385
386void
387acpi_dm_fixed_memory32_descriptor(union aml_resource *resource,
388 u32 length, u32 level);
389
390void
391acpi_dm_generic_register_descriptor(union aml_resource *resource,
392 u32 length, u32 level);
393
394void
395acpi_dm_interrupt_descriptor(union aml_resource *resource,
396 u32 length, u32 level);
397
398void
399acpi_dm_vendor_large_descriptor(union aml_resource *resource,
400 u32 length, u32 level);
401
402void acpi_dm_vendor_common(char *name, u8 * byte_data, u32 length, u32 level);
403
404/*
405 * dmresrcs
406 */
407void
408acpi_dm_irq_descriptor(union aml_resource *resource, u32 length, u32 level);
409
410void
411acpi_dm_dma_descriptor(union aml_resource *resource, u32 length, u32 level);
412
413void acpi_dm_io_descriptor(union aml_resource *resource, u32 length, u32 level);
414
415void
416acpi_dm_fixed_io_descriptor(union aml_resource *resource,
417 u32 length, u32 level);
418
419void
420acpi_dm_start_dependent_descriptor(union aml_resource *resource,
421 u32 length, u32 level);
422
423void
424acpi_dm_end_dependent_descriptor(union aml_resource *resource,
425 u32 length, u32 level);
426
427void
428acpi_dm_vendor_small_descriptor(union aml_resource *resource,
429 u32 length, u32 level);
430
431/*
432 * dmutils
433 */
434void acpi_dm_add_to_external_list(char *path, u8 type, u32 value);
435
436/*
437 * dmrestag
438 */
439void acpi_dm_find_resources(union acpi_parse_object *root);
440
441void
442acpi_dm_check_resource_reference(union acpi_parse_object *op,
443 struct acpi_walk_state *walk_state);
444
445#endif /* __ACDISASM_H__ */
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
index 84f5cb242863..eda04546cdf6 100644
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -153,8 +153,9 @@
153#define AE_AML_CIRCULAR_REFERENCE (acpi_status) (0x001E | AE_CODE_AML) 153#define AE_AML_CIRCULAR_REFERENCE (acpi_status) (0x001E | AE_CODE_AML)
154#define AE_AML_BAD_RESOURCE_LENGTH (acpi_status) (0x001F | AE_CODE_AML) 154#define AE_AML_BAD_RESOURCE_LENGTH (acpi_status) (0x001F | AE_CODE_AML)
155#define AE_AML_ILLEGAL_ADDRESS (acpi_status) (0x0020 | AE_CODE_AML) 155#define AE_AML_ILLEGAL_ADDRESS (acpi_status) (0x0020 | AE_CODE_AML)
156#define AE_AML_INFINITE_LOOP (acpi_status) (0x0021 | AE_CODE_AML)
156 157
157#define AE_CODE_AML_MAX 0x0020 158#define AE_CODE_AML_MAX 0x0021
158 159
159/* 160/*
160 * Internal exceptions used for control 161 * Internal exceptions used for control
@@ -175,6 +176,8 @@
175 176
176#define AE_CODE_CTRL_MAX 0x000D 177#define AE_CODE_CTRL_MAX 0x000D
177 178
179/* Exception strings for acpi_format_exception */
180
178#ifdef DEFINE_ACPI_GLOBALS 181#ifdef DEFINE_ACPI_GLOBALS
179 182
180/* 183/*
@@ -267,6 +270,7 @@ char const *acpi_gbl_exception_names_aml[] = {
267 "AE_AML_CIRCULAR_REFERENCE", 270 "AE_AML_CIRCULAR_REFERENCE",
268 "AE_AML_BAD_RESOURCE_LENGTH", 271 "AE_AML_BAD_RESOURCE_LENGTH",
269 "AE_AML_ILLEGAL_ADDRESS", 272 "AE_AML_ILLEGAL_ADDRESS",
273 "AE_AML_INFINITE_LOOP"
270}; 274};
271 275
272char const *acpi_gbl_exception_names_ctrl[] = { 276char const *acpi_gbl_exception_names_ctrl[] = {
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index db8852d8bcf7..5c823d5ab783 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -45,9 +45,9 @@
45#define __ACOUTPUT_H__ 45#define __ACOUTPUT_H__
46 46
47/* 47/*
48 * Debug levels and component IDs. These are used to control the 48 * Debug levels and component IDs. These are used to control the
49 * granularity of the output of the DEBUG_PRINT macro -- on a per- 49 * granularity of the output of the ACPI_DEBUG_PRINT macro -- on a
50 * component basis and a per-exception-type basis. 50 * per-component basis and a per-exception-type basis.
51 */ 51 */
52 52
53/* Component IDs are used in the global "DebugLayer" */ 53/* Component IDs are used in the global "DebugLayer" */
@@ -69,8 +69,10 @@
69 69
70#define ACPI_COMPILER 0x00001000 70#define ACPI_COMPILER 0x00001000
71#define ACPI_TOOLS 0x00002000 71#define ACPI_TOOLS 0x00002000
72#define ACPI_EXAMPLE 0x00004000
73#define ACPI_DRIVER 0x00008000
72 74
73#define ACPI_ALL_COMPONENTS 0x00003FFF 75#define ACPI_ALL_COMPONENTS 0x0000FFFF
74#define ACPI_COMPONENT_DEFAULT (ACPI_ALL_COMPONENTS) 76#define ACPI_COMPONENT_DEFAULT (ACPI_ALL_COMPONENTS)
75 77
76/* Component IDs reserved for ACPI drivers */ 78/* Component IDs reserved for ACPI drivers */
@@ -78,7 +80,7 @@
78#define ACPI_ALL_DRIVERS 0xFFFF0000 80#define ACPI_ALL_DRIVERS 0xFFFF0000
79 81
80/* 82/*
81 * Raw debug output levels, do not use these in the DEBUG_PRINT macros 83 * Raw debug output levels, do not use these in the ACPI_DEBUG_PRINT macros
82 */ 84 */
83#define ACPI_LV_INIT 0x00000001 85#define ACPI_LV_INIT 0x00000001
84#define ACPI_LV_DEBUG_OBJECT 0x00000002 86#define ACPI_LV_DEBUG_OBJECT 0x00000002
@@ -176,4 +178,95 @@
176#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT) 178#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT)
177#define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL) 179#define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL)
178 180
181#if defined (ACPI_DEBUG_OUTPUT) || !defined (ACPI_NO_ERROR_MESSAGES)
182/*
183 * Module name is included in both debug and non-debug versions primarily for
184 * error messages. The __FILE__ macro is not very useful for this, because it
185 * often includes the entire pathname to the module
186 */
187#define ACPI_MODULE_NAME(name) static const char ACPI_UNUSED_VAR _acpi_module_name[] = name;
188#else
189#define ACPI_MODULE_NAME(name)
190#endif
191
192/*
193 * Ascii error messages can be configured out
194 */
195#ifndef ACPI_NO_ERROR_MESSAGES
196#define AE_INFO _acpi_module_name, __LINE__
197
198/*
199 * Error reporting. Callers module and line number are inserted by AE_INFO,
200 * the plist contains a set of parens to allow variable-length lists.
201 * These macros are used for both the debug and non-debug versions of the code.
202 */
203#define ACPI_INFO(plist) acpi_info plist
204#define ACPI_WARNING(plist) acpi_warning plist
205#define ACPI_EXCEPTION(plist) acpi_exception plist
206#define ACPI_ERROR(plist) acpi_error plist
207
208#else
209
210/* No error messages */
211
212#define ACPI_INFO(plist)
213#define ACPI_WARNING(plist)
214#define ACPI_EXCEPTION(plist)
215#define ACPI_ERROR(plist)
216
217#endif /* ACPI_NO_ERROR_MESSAGES */
218
219/*
220 * Debug macros that are conditionally compiled
221 */
222#ifdef ACPI_DEBUG_OUTPUT
223
224/*
225 * If ACPI_GET_FUNCTION_NAME was not defined in the compiler-dependent header,
226 * define it now. This is the case where there the compiler does not support
227 * a __FUNCTION__ macro or equivalent.
228 */
229#ifndef ACPI_GET_FUNCTION_NAME
230#define ACPI_GET_FUNCTION_NAME _acpi_function_name
231
232/*
233 * The Name parameter should be the procedure name as a quoted string.
234 * The function name is also used by the function exit macros below.
235 * Note: (const char) is used to be compatible with the debug interfaces
236 * and macros such as __FUNCTION__.
237 */
238#define ACPI_FUNCTION_NAME(name) static const char _acpi_function_name[] = #name;
239
240#else
241/* Compiler supports __FUNCTION__ (or equivalent) -- Ignore this macro */
242
243#define ACPI_FUNCTION_NAME(name)
244#endif /* ACPI_GET_FUNCTION_NAME */
245
246/*
247 * Common parameters used for debug output functions:
248 * line number, function name, module(file) name, component ID
249 */
250#define ACPI_DEBUG_PARAMETERS __LINE__, ACPI_GET_FUNCTION_NAME, _acpi_module_name, _COMPONENT
251
252/*
253 * Master debug print macros
254 * Print message if and only if:
255 * 1) Debug print for the current component is enabled
256 * 2) Debug error level or trace level for the print statement is enabled
257 */
258#define ACPI_DEBUG_PRINT(plist) acpi_debug_print plist
259#define ACPI_DEBUG_PRINT_RAW(plist) acpi_debug_print_raw plist
260
261#else
262/*
263 * This is the non-debug case -- make everything go away,
264 * leaving no executable debug code!
265 */
266#define ACPI_FUNCTION_NAME(a)
267#define ACPI_DEBUG_PRINT(pl)
268#define ACPI_DEBUG_PRINT_RAW(pl)
269
270#endif /* ACPI_DEBUG_OUTPUT */
271
179#endif /* __ACOUTPUT_H__ */ 272#endif /* __ACOUTPUT_H__ */
diff --git a/include/acpi/acpi.h b/include/acpi/acpi.h
index c515ef6cc89e..472b7bf0c5d4 100644
--- a/include/acpi/acpi.h
+++ b/include/acpi/acpi.h
@@ -1,6 +1,6 @@
1/****************************************************************************** 1/******************************************************************************
2 * 2 *
3 * Name: acpi.h - Master include file, Publics and external data. 3 * Name: acpi.h - Master public include file used to interface to ACPICA
4 * 4 *
5 *****************************************************************************/ 5 *****************************************************************************/
6 6
@@ -45,25 +45,22 @@
45#define __ACPI_H__ 45#define __ACPI_H__
46 46
47/* 47/*
48 * Common includes for all ACPI driver files 48 * Public include files for use by code that will interface to ACPICA.
49 * We put them here because we don't want to duplicate them 49 *
50 * in the rest of the source code again and again. 50 * Information includes the ACPICA data types, names, exceptions, and
51 * external interface prototypes. Also included are the definitions for
52 * all ACPI tables (FADT, MADT, etc.)
53 *
54 * Note: The order of these include files is important.
51 */ 55 */
52#include "acnames.h" /* Global ACPI names and strings */ 56#include "platform/acenv.h" /* Environment-specific items */
53#include "acconfig.h" /* Configuration constants */ 57#include "acnames.h" /* Common ACPI names and strings */
54#include "platform/acenv.h" /* Target environment specific items */ 58#include "actypes.h" /* ACPICA data types and structures */
55#include "actypes.h" /* Fundamental common data types */ 59#include "acexcep.h" /* ACPICA exceptions */
56#include "acexcep.h" /* ACPI exception codes */
57#include "acmacros.h" /* C macros */
58#include "actbl.h" /* ACPI table definitions */ 60#include "actbl.h" /* ACPI table definitions */
59#include "aclocal.h" /* Internal data types */
60#include "acoutput.h" /* Error output and Debug macros */ 61#include "acoutput.h" /* Error output and Debug macros */
61#include "acpiosxf.h" /* Interfaces to the ACPI-to-OS layer */ 62#include "acrestyp.h" /* Resource Descriptor structs */
63#include "acpiosxf.h" /* OSL interfaces (ACPICA-to-OS) */
62#include "acpixf.h" /* ACPI core subsystem external interfaces */ 64#include "acpixf.h" /* ACPI core subsystem external interfaces */
63#include "acobject.h" /* ACPI internal object */
64#include "acstruct.h" /* Common structures */
65#include "acglobal.h" /* All global variables */
66#include "achware.h" /* Hardware defines and interfaces */
67#include "acutils.h" /* Utility interfaces */
68 65
69#endif /* __ACPI_H__ */ 66#endif /* __ACPI_H__ */
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index b91440ac0d16..a62720a7edc0 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -121,8 +121,11 @@ acpi_os_wait_semaphore(acpi_semaphore handle, u32 units, u16 timeout);
121acpi_status acpi_os_signal_semaphore(acpi_semaphore handle, u32 units); 121acpi_status acpi_os_signal_semaphore(acpi_semaphore handle, u32 units);
122 122
123/* 123/*
124 * Mutex primitives 124 * Mutex primitives. May be configured to use semaphores instead via
125 * ACPI_MUTEX_TYPE (see platform/acenv.h)
125 */ 126 */
127#if (ACPI_MUTEX_TYPE != ACPI_BINARY_SEMAPHORE)
128
126acpi_status acpi_os_create_mutex(acpi_mutex * out_handle); 129acpi_status acpi_os_create_mutex(acpi_mutex * out_handle);
127 130
128void acpi_os_delete_mutex(acpi_mutex handle); 131void acpi_os_delete_mutex(acpi_mutex handle);
@@ -130,13 +133,7 @@ void acpi_os_delete_mutex(acpi_mutex handle);
130acpi_status acpi_os_acquire_mutex(acpi_mutex handle, u16 timeout); 133acpi_status acpi_os_acquire_mutex(acpi_mutex handle, u16 timeout);
131 134
132void acpi_os_release_mutex(acpi_mutex handle); 135void acpi_os_release_mutex(acpi_mutex handle);
133 136#endif
134/* Temporary macros for Mutex* interfaces, map to existing semaphore xfaces */
135
136#define acpi_os_create_mutex(out_handle) acpi_os_create_semaphore (1, 1, out_handle)
137#define acpi_os_delete_mutex(handle) (void) acpi_os_delete_semaphore (handle)
138#define acpi_os_acquire_mutex(handle,time) acpi_os_wait_semaphore (handle, 1, time)
139#define acpi_os_release_mutex(handle) (void) acpi_os_signal_semaphore (handle, 1)
140 137
141/* 138/*
142 * Memory allocation and mapping 139 * Memory allocation and mapping
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 33bc0e3b1954..c8e8cf45830f 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -45,9 +45,32 @@
45#ifndef __ACXFACE_H__ 45#ifndef __ACXFACE_H__
46#define __ACXFACE_H__ 46#define __ACXFACE_H__
47 47
48/* Current ACPICA subsystem version in YYYYMMDD format */
49
50#define ACPI_CA_VERSION 0x20081204
51
48#include "actypes.h" 52#include "actypes.h"
49#include "actbl.h" 53#include "actbl.h"
50 54
55extern u8 acpi_gbl_permanent_mmap;
56
57/*
58 * Globals that are publically available, allowing for
59 * run time configuration
60 */
61extern u32 acpi_dbg_level;
62extern u32 acpi_dbg_layer;
63extern u8 acpi_gbl_enable_interpreter_slack;
64extern u8 acpi_gbl_all_methods_serialized;
65extern u8 acpi_gbl_create_osi_method;
66extern u8 acpi_gbl_leave_wake_gpes_disabled;
67extern acpi_name acpi_gbl_trace_method_name;
68extern u32 acpi_gbl_trace_flags;
69
70extern u32 acpi_current_gpe_count;
71extern struct acpi_table_fadt acpi_gbl_FADT;
72
73extern u32 acpi_rsdt_forced;
51/* 74/*
52 * Global interfaces 75 * Global interfaces
53 */ 76 */
@@ -79,11 +102,6 @@ const char *acpi_format_exception(acpi_status exception);
79 102
80acpi_status acpi_purge_cached_objects(void); 103acpi_status acpi_purge_cached_objects(void);
81 104
82#ifdef ACPI_FUTURE_USAGE
83acpi_status
84acpi_install_initialization_handler(acpi_init_handler handler, u32 function);
85#endif
86
87/* 105/*
88 * ACPI Memory management 106 * ACPI Memory management
89 */ 107 */
@@ -193,9 +211,12 @@ acpi_status acpi_get_id(acpi_handle object, acpi_owner_id * out_type);
193acpi_status acpi_get_parent(acpi_handle object, acpi_handle * out_handle); 211acpi_status acpi_get_parent(acpi_handle object, acpi_handle * out_handle);
194 212
195/* 213/*
196 * Event handler interfaces 214 * Handler interfaces
197 */ 215 */
198acpi_status 216acpi_status
217acpi_install_initialization_handler(acpi_init_handler handler, u32 function);
218
219acpi_status
199acpi_install_fixed_event_handler(u32 acpi_event, 220acpi_install_fixed_event_handler(u32 acpi_event,
200 acpi_event_handler handler, void *context); 221 acpi_event_handler handler, void *context);
201 222
@@ -227,6 +248,10 @@ acpi_install_gpe_handler(acpi_handle gpe_device,
227 u32 gpe_number, 248 u32 gpe_number,
228 u32 type, acpi_event_handler address, void *context); 249 u32 type, acpi_event_handler address, void *context);
229 250
251acpi_status
252acpi_remove_gpe_handler(acpi_handle gpe_device,
253 u32 gpe_number, acpi_event_handler address);
254
230#ifdef ACPI_FUTURE_USAGE 255#ifdef ACPI_FUTURE_USAGE
231acpi_status acpi_install_exception_handler(acpi_exception_handler handler); 256acpi_status acpi_install_exception_handler(acpi_exception_handler handler);
232#endif 257#endif
@@ -238,10 +263,6 @@ acpi_status acpi_acquire_global_lock(u16 timeout, u32 * handle);
238 263
239acpi_status acpi_release_global_lock(u32 handle); 264acpi_status acpi_release_global_lock(u32 handle);
240 265
241acpi_status
242acpi_remove_gpe_handler(acpi_handle gpe_device,
243 u32 gpe_number, acpi_event_handler address);
244
245acpi_status acpi_enable_event(u32 event, u32 flags); 266acpi_status acpi_enable_event(u32 event, u32 flags);
246 267
247acpi_status acpi_disable_event(u32 event, u32 flags); 268acpi_status acpi_disable_event(u32 event, u32 flags);
@@ -250,6 +271,9 @@ acpi_status acpi_clear_event(u32 event);
250 271
251acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status); 272acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status);
252 273
274/*
275 * GPE Interfaces
276 */
253acpi_status acpi_set_gpe_type(acpi_handle gpe_device, u32 gpe_number, u8 type); 277acpi_status acpi_set_gpe_type(acpi_handle gpe_device, u32 gpe_number, u8 type);
254 278
255acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number); 279acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number);
@@ -263,6 +287,12 @@ acpi_get_gpe_status(acpi_handle gpe_device,
263 u32 gpe_number, 287 u32 gpe_number,
264 u32 flags, acpi_event_status * event_status); 288 u32 flags, acpi_event_status * event_status);
265 289
290acpi_status acpi_disable_all_gpes(void);
291
292acpi_status acpi_enable_all_runtime_gpes(void);
293
294acpi_status acpi_get_gpe_device(u32 gpe_index, acpi_handle *gpe_device);
295
266acpi_status 296acpi_status
267acpi_install_gpe_block(acpi_handle gpe_device, 297acpi_install_gpe_block(acpi_handle gpe_device,
268 struct acpi_generic_address *gpe_block_address, 298 struct acpi_generic_address *gpe_block_address,
@@ -313,6 +343,8 @@ acpi_resource_to_address64(struct acpi_resource *resource,
313/* 343/*
314 * Hardware (ACPI device) interfaces 344 * Hardware (ACPI device) interfaces
315 */ 345 */
346acpi_status acpi_reset(void);
347
316acpi_status acpi_get_register(u32 register_id, u32 * return_value); 348acpi_status acpi_get_register(u32 register_id, u32 * return_value);
317 349
318acpi_status acpi_get_register_unlocked(u32 register_id, u32 *return_value); 350acpi_status acpi_get_register_unlocked(u32 register_id, u32 *return_value);
@@ -320,12 +352,14 @@ acpi_status acpi_get_register_unlocked(u32 register_id, u32 *return_value);
320acpi_status acpi_set_register(u32 register_id, u32 value); 352acpi_status acpi_set_register(u32 register_id, u32 value);
321 353
322acpi_status 354acpi_status
323acpi_set_firmware_waking_vector(acpi_physical_address physical_address); 355acpi_set_firmware_waking_vector(u32 physical_address);
324 356
325#ifdef ACPI_FUTURE_USAGE
326acpi_status 357acpi_status
327acpi_get_firmware_waking_vector(acpi_physical_address * physical_address); 358acpi_set_firmware_waking_vector64(u64 physical_address);
328#endif 359
360acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg);
361
362acpi_status acpi_write(u32 value, struct acpi_generic_address *reg);
329 363
330acpi_status 364acpi_status
331acpi_get_sleep_type_data(u8 sleep_state, u8 * slp_typ_a, u8 * slp_typ_b); 365acpi_get_sleep_type_data(u8 sleep_state, u8 * slp_typ_a, u8 * slp_typ_b);
@@ -340,4 +374,42 @@ acpi_status acpi_leave_sleep_state_prep(u8 sleep_state);
340 374
341acpi_status acpi_leave_sleep_state(u8 sleep_state); 375acpi_status acpi_leave_sleep_state(u8 sleep_state);
342 376
377/*
378 * Debug output
379 */
380void ACPI_INTERNAL_VAR_XFACE
381acpi_error(const char *module_name,
382 u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
383
384void ACPI_INTERNAL_VAR_XFACE
385acpi_exception(const char *module_name,
386 u32 line_number,
387 acpi_status status, const char *format, ...) ACPI_PRINTF_LIKE(4);
388
389void ACPI_INTERNAL_VAR_XFACE
390acpi_warning(const char *module_name,
391 u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
392
393void ACPI_INTERNAL_VAR_XFACE
394acpi_info(const char *module_name,
395 u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
396
397#ifdef ACPI_DEBUG_OUTPUT
398
399void ACPI_INTERNAL_VAR_XFACE
400acpi_debug_print(u32 requested_debug_level,
401 u32 line_number,
402 const char *function_name,
403 const char *module_name,
404 u32 component_id, const char *format, ...) ACPI_PRINTF_LIKE(6);
405
406void ACPI_INTERNAL_VAR_XFACE
407acpi_debug_print_raw(u32 requested_debug_level,
408 u32 line_number,
409 const char *function_name,
410 const char *module_name,
411 u32 component_id,
412 const char *format, ...) ACPI_PRINTF_LIKE(6);
413#endif
414
343#endif /* __ACXFACE_H__ */ 415#endif /* __ACXFACE_H__ */
diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
new file mode 100644
index 000000000000..9ffe00feada6
--- /dev/null
+++ b/include/acpi/acrestyp.h
@@ -0,0 +1,405 @@
1/******************************************************************************
2 *
3 * Name: acrestyp.h - Defines, types, and structures for resource descriptors
4 *
5 *****************************************************************************/
6
7/*
8 * Copyright (C) 2000 - 2008, Intel Corp.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions, and the following disclaimer,
16 * without modification.
17 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
18 * substantially similar to the "NO WARRANTY" disclaimer below
19 * ("Disclaimer") and any redistribution must be conditioned upon
20 * including a substantially similar Disclaimer requirement for further
21 * binary redistribution.
22 * 3. Neither the names of the above-listed copyright holders nor the names
23 * of any contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * Alternatively, this software may be distributed under the terms of the
27 * GNU General Public License ("GPL") version 2 as published by the Free
28 * Software Foundation.
29 *
30 * NO WARRANTY
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
40 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGES.
42 */
43
44#ifndef __ACRESTYP_H__
45#define __ACRESTYP_H__
46
47/*
48 * Definitions for Resource Attributes
49 */
50typedef u16 acpi_rs_length; /* Resource Length field is fixed at 16 bits */
51typedef u32 acpi_rsdesc_size; /* Max Resource Descriptor size is (Length+3) = (64_k-1)+3 */
52
53/*
54 * Memory Attributes
55 */
56#define ACPI_READ_ONLY_MEMORY (u8) 0x00
57#define ACPI_READ_WRITE_MEMORY (u8) 0x01
58
59#define ACPI_NON_CACHEABLE_MEMORY (u8) 0x00
60#define ACPI_CACHABLE_MEMORY (u8) 0x01
61#define ACPI_WRITE_COMBINING_MEMORY (u8) 0x02
62#define ACPI_PREFETCHABLE_MEMORY (u8) 0x03
63
64/*
65 * IO Attributes
66 * The ISA IO ranges are: n000-n0_fFh, n400-n4_fFh, n800-n8_fFh, n_c00-n_cFFh.
67 * The non-ISA IO ranges are: n100-n3_fFh, n500-n7_fFh, n900-n_bFFh, n_cd0-n_fFFh.
68 */
69#define ACPI_NON_ISA_ONLY_RANGES (u8) 0x01
70#define ACPI_ISA_ONLY_RANGES (u8) 0x02
71#define ACPI_ENTIRE_RANGE (ACPI_NON_ISA_ONLY_RANGES | ACPI_ISA_ONLY_RANGES)
72
73/* Type of translation - 1=Sparse, 0=Dense */
74
75#define ACPI_SPARSE_TRANSLATION (u8) 0x01
76
77/*
78 * IO Port Descriptor Decode
79 */
80#define ACPI_DECODE_10 (u8) 0x00 /* 10-bit IO address decode */
81#define ACPI_DECODE_16 (u8) 0x01 /* 16-bit IO address decode */
82
83/*
84 * IRQ Attributes
85 */
86#define ACPI_LEVEL_SENSITIVE (u8) 0x00
87#define ACPI_EDGE_SENSITIVE (u8) 0x01
88
89#define ACPI_ACTIVE_HIGH (u8) 0x00
90#define ACPI_ACTIVE_LOW (u8) 0x01
91
92#define ACPI_EXCLUSIVE (u8) 0x00
93#define ACPI_SHARED (u8) 0x01
94
95/*
96 * DMA Attributes
97 */
98#define ACPI_COMPATIBILITY (u8) 0x00
99#define ACPI_TYPE_A (u8) 0x01
100#define ACPI_TYPE_B (u8) 0x02
101#define ACPI_TYPE_F (u8) 0x03
102
103#define ACPI_NOT_BUS_MASTER (u8) 0x00
104#define ACPI_BUS_MASTER (u8) 0x01
105
106#define ACPI_TRANSFER_8 (u8) 0x00
107#define ACPI_TRANSFER_8_16 (u8) 0x01
108#define ACPI_TRANSFER_16 (u8) 0x02
109
110/*
111 * Start Dependent Functions Priority definitions
112 */
113#define ACPI_GOOD_CONFIGURATION (u8) 0x00
114#define ACPI_ACCEPTABLE_CONFIGURATION (u8) 0x01
115#define ACPI_SUB_OPTIMAL_CONFIGURATION (u8) 0x02
116
117/*
118 * 16, 32 and 64-bit Address Descriptor resource types
119 */
120#define ACPI_MEMORY_RANGE (u8) 0x00
121#define ACPI_IO_RANGE (u8) 0x01
122#define ACPI_BUS_NUMBER_RANGE (u8) 0x02
123
124#define ACPI_ADDRESS_NOT_FIXED (u8) 0x00
125#define ACPI_ADDRESS_FIXED (u8) 0x01
126
127#define ACPI_POS_DECODE (u8) 0x00
128#define ACPI_SUB_DECODE (u8) 0x01
129
130#define ACPI_PRODUCER (u8) 0x00
131#define ACPI_CONSUMER (u8) 0x01
132
133/*
134 * If possible, pack the following structures to byte alignment
135 */
136#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
137#pragma pack(1)
138#endif
139
140/* UUID data structures for use in vendor-defined resource descriptors */
141
142struct acpi_uuid {
143 u8 data[ACPI_UUID_LENGTH];
144};
145
146struct acpi_vendor_uuid {
147 u8 subtype;
148 u8 data[ACPI_UUID_LENGTH];
149};
150
151/*
152 * Structures used to describe device resources
153 */
154struct acpi_resource_irq {
155 u8 descriptor_length;
156 u8 triggering;
157 u8 polarity;
158 u8 sharable;
159 u8 interrupt_count;
160 u8 interrupts[1];
161};
162
163struct acpi_resource_dma {
164 u8 type;
165 u8 bus_master;
166 u8 transfer;
167 u8 channel_count;
168 u8 channels[1];
169};
170
171struct acpi_resource_start_dependent {
172 u8 descriptor_length;
173 u8 compatibility_priority;
174 u8 performance_robustness;
175};
176
177/*
178 * The END_DEPENDENT_FUNCTIONS_RESOURCE struct is not
179 * needed because it has no fields
180 */
181
182struct acpi_resource_io {
183 u8 io_decode;
184 u8 alignment;
185 u8 address_length;
186 u16 minimum;
187 u16 maximum;
188};
189
190struct acpi_resource_fixed_io {
191 u16 address;
192 u8 address_length;
193};
194
195struct acpi_resource_vendor {
196 u16 byte_length;
197 u8 byte_data[1];
198};
199
200/* Vendor resource with UUID info (introduced in ACPI 3.0) */
201
202struct acpi_resource_vendor_typed {
203 u16 byte_length;
204 u8 uuid_subtype;
205 u8 uuid[ACPI_UUID_LENGTH];
206 u8 byte_data[1];
207};
208
209struct acpi_resource_end_tag {
210 u8 checksum;
211};
212
213struct acpi_resource_memory24 {
214 u8 write_protect;
215 u16 minimum;
216 u16 maximum;
217 u16 alignment;
218 u16 address_length;
219};
220
221struct acpi_resource_memory32 {
222 u8 write_protect;
223 u32 minimum;
224 u32 maximum;
225 u32 alignment;
226 u32 address_length;
227};
228
229struct acpi_resource_fixed_memory32 {
230 u8 write_protect;
231 u32 address;
232 u32 address_length;
233};
234
235struct acpi_memory_attribute {
236 u8 write_protect;
237 u8 caching;
238 u8 range_type;
239 u8 translation;
240};
241
242struct acpi_io_attribute {
243 u8 range_type;
244 u8 translation;
245 u8 translation_type;
246 u8 reserved1;
247};
248
249union acpi_resource_attribute {
250 struct acpi_memory_attribute mem;
251 struct acpi_io_attribute io;
252
253 /* Used for the *word_space macros */
254
255 u8 type_specific;
256};
257
258struct acpi_resource_source {
259 u8 index;
260 u16 string_length;
261 char *string_ptr;
262};
263
264/* Fields common to all address descriptors, 16/32/64 bit */
265
266#define ACPI_RESOURCE_ADDRESS_COMMON \
267 u8 resource_type; \
268 u8 producer_consumer; \
269 u8 decode; \
270 u8 min_address_fixed; \
271 u8 max_address_fixed; \
272 union acpi_resource_attribute info;
273
274struct acpi_resource_address {
275ACPI_RESOURCE_ADDRESS_COMMON};
276
277struct acpi_resource_address16 {
278 ACPI_RESOURCE_ADDRESS_COMMON u16 granularity;
279 u16 minimum;
280 u16 maximum;
281 u16 translation_offset;
282 u16 address_length;
283 struct acpi_resource_source resource_source;
284};
285
286struct acpi_resource_address32 {
287 ACPI_RESOURCE_ADDRESS_COMMON u32 granularity;
288 u32 minimum;
289 u32 maximum;
290 u32 translation_offset;
291 u32 address_length;
292 struct acpi_resource_source resource_source;
293};
294
295struct acpi_resource_address64 {
296 ACPI_RESOURCE_ADDRESS_COMMON u64 granularity;
297 u64 minimum;
298 u64 maximum;
299 u64 translation_offset;
300 u64 address_length;
301 struct acpi_resource_source resource_source;
302};
303
304struct acpi_resource_extended_address64 {
305 ACPI_RESOURCE_ADDRESS_COMMON u8 revision_iD;
306 u64 granularity;
307 u64 minimum;
308 u64 maximum;
309 u64 translation_offset;
310 u64 address_length;
311 u64 type_specific;
312};
313
314struct acpi_resource_extended_irq {
315 u8 producer_consumer;
316 u8 triggering;
317 u8 polarity;
318 u8 sharable;
319 u8 interrupt_count;
320 struct acpi_resource_source resource_source;
321 u32 interrupts[1];
322};
323
324struct acpi_resource_generic_register {
325 u8 space_id;
326 u8 bit_width;
327 u8 bit_offset;
328 u8 access_size;
329 u64 address;
330};
331
332/* ACPI_RESOURCE_TYPEs */
333
334#define ACPI_RESOURCE_TYPE_IRQ 0
335#define ACPI_RESOURCE_TYPE_DMA 1
336#define ACPI_RESOURCE_TYPE_START_DEPENDENT 2
337#define ACPI_RESOURCE_TYPE_END_DEPENDENT 3
338#define ACPI_RESOURCE_TYPE_IO 4
339#define ACPI_RESOURCE_TYPE_FIXED_IO 5
340#define ACPI_RESOURCE_TYPE_VENDOR 6
341#define ACPI_RESOURCE_TYPE_END_TAG 7
342#define ACPI_RESOURCE_TYPE_MEMORY24 8
343#define ACPI_RESOURCE_TYPE_MEMORY32 9
344#define ACPI_RESOURCE_TYPE_FIXED_MEMORY32 10
345#define ACPI_RESOURCE_TYPE_ADDRESS16 11
346#define ACPI_RESOURCE_TYPE_ADDRESS32 12
347#define ACPI_RESOURCE_TYPE_ADDRESS64 13
348#define ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 14 /* ACPI 3.0 */
349#define ACPI_RESOURCE_TYPE_EXTENDED_IRQ 15
350#define ACPI_RESOURCE_TYPE_GENERIC_REGISTER 16
351#define ACPI_RESOURCE_TYPE_MAX 16
352
353/* Master union for resource descriptors */
354
355union acpi_resource_data {
356 struct acpi_resource_irq irq;
357 struct acpi_resource_dma dma;
358 struct acpi_resource_start_dependent start_dpf;
359 struct acpi_resource_io io;
360 struct acpi_resource_fixed_io fixed_io;
361 struct acpi_resource_vendor vendor;
362 struct acpi_resource_vendor_typed vendor_typed;
363 struct acpi_resource_end_tag end_tag;
364 struct acpi_resource_memory24 memory24;
365 struct acpi_resource_memory32 memory32;
366 struct acpi_resource_fixed_memory32 fixed_memory32;
367 struct acpi_resource_address16 address16;
368 struct acpi_resource_address32 address32;
369 struct acpi_resource_address64 address64;
370 struct acpi_resource_extended_address64 ext_address64;
371 struct acpi_resource_extended_irq extended_irq;
372 struct acpi_resource_generic_register generic_reg;
373
374 /* Common fields */
375
376 struct acpi_resource_address address; /* Common 16/32/64 address fields */
377};
378
379/* Common resource header */
380
381struct acpi_resource {
382 u32 type;
383 u32 length;
384 union acpi_resource_data data;
385};
386
387/* restore default alignment */
388
389#pragma pack()
390
391#define ACPI_RS_SIZE_NO_DATA 8 /* Id + Length fields */
392#define ACPI_RS_SIZE_MIN (u32) ACPI_ROUND_UP_TO_NATIVE_WORD (12)
393#define ACPI_RS_SIZE(type) (u32) (ACPI_RS_SIZE_NO_DATA + sizeof (type))
394
395#define ACPI_NEXT_RESOURCE(res) (struct acpi_resource *)((u8 *) res + res->length)
396
397struct acpi_pci_routing_table {
398 u32 length;
399 u32 pin;
400 acpi_integer address; /* here for 64-bit alignment */
401 u32 source_index;
402 char source[4]; /* pad to 64 bits so sizeof() works in all cases */
403};
404
405#endif /* __ACRESTYP_H__ */
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 13a3d9ad92db..813e4b6c2c0d 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -288,6 +288,31 @@ enum acpi_prefered_pm_profiles {
288 288
289#define ACPI_FADT_OFFSET(f) (u8) ACPI_OFFSET (struct acpi_table_fadt, f) 289#define ACPI_FADT_OFFSET(f) (u8) ACPI_OFFSET (struct acpi_table_fadt, f)
290 290
291union acpi_name_union {
292 u32 integer;
293 char ascii[4];
294};
295
296/*
297 * Internal ACPI Table Descriptor. One per ACPI table
298 */
299struct acpi_table_desc {
300 acpi_physical_address address;
301 struct acpi_table_header *pointer;
302 u32 length; /* Length fixed at 32 bits */
303 union acpi_name_union signature;
304 acpi_owner_id owner_id;
305 u8 flags;
306};
307
308/* Flags for above */
309
310#define ACPI_TABLE_ORIGIN_UNKNOWN (0)
311#define ACPI_TABLE_ORIGIN_MAPPED (1)
312#define ACPI_TABLE_ORIGIN_ALLOCATED (2)
313#define ACPI_TABLE_ORIGIN_MASK (3)
314#define ACPI_TABLE_IS_LOADED (4)
315
291/* 316/*
292 * Get the remaining ACPI tables 317 * Get the remaining ACPI tables
293 */ 318 */
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 63f5b4cf4de1..18963b968114 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -627,7 +627,7 @@ struct acpi_hest_aer_common {
627 u32 uncorrectable_error_mask; 627 u32 uncorrectable_error_mask;
628 u32 uncorrectable_error_severity; 628 u32 uncorrectable_error_severity;
629 u32 correctable_error_mask; 629 u32 correctable_error_mask;
630 u32 advanced_error_cababilities; 630 u32 advanced_error_capabilities;
631}; 631};
632 632
633/* Hardware Error Notification */ 633/* Hardware Error Notification */
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 8222e8de0d1c..a20aab510173 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -204,11 +204,10 @@ typedef u32 acpi_physical_address;
204 204
205/******************************************************************************* 205/*******************************************************************************
206 * 206 *
207 * OS-dependent and compiler-dependent types 207 * OS-dependent types
208 * 208 *
209 * If the defaults below are not appropriate for the host system, they can 209 * If the defaults below are not appropriate for the host system, they can
210 * be defined in the compiler-specific or OS-specific header, and this will 210 * be defined in the OS-specific header, and this will take precedence.
211 * take precedence.
212 * 211 *
213 ******************************************************************************/ 212 ******************************************************************************/
214 213
@@ -218,12 +217,6 @@ typedef u32 acpi_physical_address;
218#define acpi_thread_id acpi_size 217#define acpi_thread_id acpi_size
219#endif 218#endif
220 219
221/* Object returned from acpi_os_create_lock */
222
223#ifndef acpi_spinlock
224#define acpi_spinlock void *
225#endif
226
227/* Flags for acpi_os_acquire_lock/acpi_os_release_lock */ 220/* Flags for acpi_os_acquire_lock/acpi_os_release_lock */
228 221
229#ifndef acpi_cpu_flags 222#ifndef acpi_cpu_flags
@@ -233,9 +226,51 @@ typedef u32 acpi_physical_address;
233/* Object returned from acpi_os_create_cache */ 226/* Object returned from acpi_os_create_cache */
234 227
235#ifndef acpi_cache_t 228#ifndef acpi_cache_t
229#ifdef ACPI_USE_LOCAL_CACHE
236#define acpi_cache_t struct acpi_memory_list 230#define acpi_cache_t struct acpi_memory_list
231#else
232#define acpi_cache_t void *
233#endif
234#endif
235
236/*
237 * Synchronization objects - Mutexes, Semaphores, and spin_locks
238 */
239#if (ACPI_MUTEX_TYPE == ACPI_BINARY_SEMAPHORE)
240/*
241 * These macros are used if the host OS does not support a mutex object.
242 * Map the OSL Mutex interfaces to binary semaphores.
243 */
244#define acpi_mutex acpi_semaphore
245#define acpi_os_create_mutex(out_handle) acpi_os_create_semaphore (1, 1, out_handle)
246#define acpi_os_delete_mutex(handle) (void) acpi_os_delete_semaphore (handle)
247#define acpi_os_acquire_mutex(handle,time) acpi_os_wait_semaphore (handle, 1, time)
248#define acpi_os_release_mutex(handle) (void) acpi_os_signal_semaphore (handle, 1)
249#endif
250
251/* Configurable types for synchronization objects */
252
253#ifndef acpi_spinlock
254#define acpi_spinlock void *
255#endif
256
257#ifndef acpi_semaphore
258#define acpi_semaphore void *
259#endif
260
261#ifndef acpi_mutex
262#define acpi_mutex void *
237#endif 263#endif
238 264
265/*******************************************************************************
266 *
267 * Compiler-dependent types
268 *
269 * If the defaults below are not appropriate for the host compiler, they can
270 * be defined in the compiler-specific header, and this will take precedence.
271 *
272 ******************************************************************************/
273
239/* Use C99 uintptr_t for pointer casting if available, "void *" otherwise */ 274/* Use C99 uintptr_t for pointer casting if available, "void *" otherwise */
240 275
241#ifndef acpi_uintptr_t 276#ifndef acpi_uintptr_t
@@ -268,6 +303,43 @@ typedef u32 acpi_physical_address;
268#define ACPI_EXPORT_SYMBOL(symbol) 303#define ACPI_EXPORT_SYMBOL(symbol)
269#endif 304#endif
270 305
306/******************************************************************************
307 *
308 * ACPI Specification constants (Do not change unless the specification changes)
309 *
310 *****************************************************************************/
311
312/* Number of distinct FADT-based GPE register blocks (GPE0 and GPE1) */
313
314#define ACPI_MAX_GPE_BLOCKS 2
315
316/* Default ACPI register widths */
317
318#define ACPI_GPE_REGISTER_WIDTH 8
319#define ACPI_PM1_REGISTER_WIDTH 16
320#define ACPI_PM2_REGISTER_WIDTH 8
321#define ACPI_PM_TIMER_WIDTH 32
322
323/* Names within the namespace are 4 bytes long */
324
325#define ACPI_NAME_SIZE 4
326#define ACPI_PATH_SEGMENT_LENGTH 5 /* 4 chars for name + 1 char for separator */
327#define ACPI_PATH_SEPARATOR '.'
328
329/* Sizes for ACPI table headers */
330
331#define ACPI_OEM_ID_SIZE 6
332#define ACPI_OEM_TABLE_ID_SIZE 8
333
334/* ACPI/PNP hardware IDs */
335
336#define PCI_ROOT_HID_STRING "PNP0A03"
337#define PCI_EXPRESS_ROOT_HID_STRING "PNP0A08"
338
339/* PM Timer ticks per second (HZ) */
340
341#define PM_TIMER_FREQUENCY 3579545
342
271/******************************************************************************* 343/*******************************************************************************
272 * 344 *
273 * Independent types 345 * Independent types
@@ -291,13 +363,18 @@ typedef u32 acpi_physical_address;
291#endif 363#endif
292 364
293/* 365/*
294 * Mescellaneous types 366 * Miscellaneous types
295 */ 367 */
296typedef u32 acpi_status; /* All ACPI Exceptions */ 368typedef u32 acpi_status; /* All ACPI Exceptions */
297typedef u32 acpi_name; /* 4-byte ACPI name */ 369typedef u32 acpi_name; /* 4-byte ACPI name */
298typedef char *acpi_string; /* Null terminated ASCII string */ 370typedef char *acpi_string; /* Null terminated ASCII string */
299typedef void *acpi_handle; /* Actually a ptr to a NS Node */ 371typedef void *acpi_handle; /* Actually a ptr to a NS Node */
300 372
373/* Owner IDs are used to track namespace nodes for selective deletion */
374
375typedef u8 acpi_owner_id;
376#define ACPI_OWNER_ID_MAX 0xFF
377
301struct uint64_struct { 378struct uint64_struct {
302 u32 lo; 379 u32 lo;
303 u32 hi; 380 u32 hi;
@@ -313,13 +390,8 @@ struct uint32_struct {
313 u32 hi; 390 u32 hi;
314}; 391};
315 392
316/* Synchronization objects */
317
318#define acpi_mutex void *
319#define acpi_semaphore void *
320
321/* 393/*
322 * Acpi integer width. In ACPI version 1, integers are 32 bits. In ACPI 394 * Acpi integer width. In ACPI version 1, integers are 32 bits. In ACPI
323 * version 2, integers are 64 bits. Note that this pertains to the ACPI integer 395 * version 2, integers are 64 bits. Note that this pertains to the ACPI integer
324 * type only, not other integers used in the implementation of the ACPI CA 396 * type only, not other integers used in the implementation of the ACPI CA
325 * subsystem. 397 * subsystem.
@@ -338,10 +410,75 @@ typedef unsigned long long acpi_integer;
338#define ACPI_MAX16_DECIMAL_DIGITS 5 410#define ACPI_MAX16_DECIMAL_DIGITS 5
339#define ACPI_MAX8_DECIMAL_DIGITS 3 411#define ACPI_MAX8_DECIMAL_DIGITS 3
340 412
413/* PM Timer ticks per second (HZ) */
414
415#define PM_TIMER_FREQUENCY 3579545
416
341/* 417/*
342 * Constants with special meanings 418 * Constants with special meanings
343 */ 419 */
344#define ACPI_ROOT_OBJECT ACPI_ADD_PTR (acpi_handle, NULL, ACPI_MAX_PTR) 420#define ACPI_ROOT_OBJECT ACPI_ADD_PTR (acpi_handle, NULL, ACPI_MAX_PTR)
421#define ACPI_WAIT_FOREVER 0xFFFF /* u16, as per ACPI spec */
422#define ACPI_DO_NOT_WAIT 0
423
424/*******************************************************************************
425 *
426 * Commonly used macros
427 *
428 ******************************************************************************/
429
430/* Data manipulation */
431
432#define ACPI_LOWORD(l) ((u16)(u32)(l))
433#define ACPI_HIWORD(l) ((u16)((((u32)(l)) >> 16) & 0xFFFF))
434#define ACPI_LOBYTE(l) ((u8)(u16)(l))
435#define ACPI_HIBYTE(l) ((u8)((((u16)(l)) >> 8) & 0xFF))
436
437/* Full 64-bit integer must be available on both 32-bit and 64-bit platforms */
438
439struct acpi_integer_overlay {
440 u32 lo_dword;
441 u32 hi_dword;
442};
443
444#define ACPI_LODWORD(integer) (ACPI_CAST_PTR (struct acpi_integer_overlay, &integer)->lo_dword)
445#define ACPI_HIDWORD(integer) (ACPI_CAST_PTR (struct acpi_integer_overlay, &integer)->hi_dword)
446
447#define ACPI_SET_BIT(target,bit) ((target) |= (bit))
448#define ACPI_CLEAR_BIT(target,bit) ((target) &= ~(bit))
449#define ACPI_MIN(a,b) (((a)<(b))?(a):(b))
450#define ACPI_MAX(a,b) (((a)>(b))?(a):(b))
451
452/* Size calculation */
453
454#define ACPI_ARRAY_LENGTH(x) (sizeof(x) / sizeof((x)[0]))
455
456/* Pointer manipulation */
457
458#define ACPI_CAST_PTR(t, p) ((t *) (acpi_uintptr_t) (p))
459#define ACPI_CAST_INDIRECT_PTR(t, p) ((t **) (acpi_uintptr_t) (p))
460#define ACPI_ADD_PTR(t, a, b) ACPI_CAST_PTR (t, (ACPI_CAST_PTR (u8, (a)) + (acpi_size)(b)))
461#define ACPI_PTR_DIFF(a, b) (acpi_size) (ACPI_CAST_PTR (u8, (a)) - ACPI_CAST_PTR (u8, (b)))
462
463/* Pointer/Integer type conversions */
464
465#define ACPI_TO_POINTER(i) ACPI_ADD_PTR (void, (void *) NULL,(acpi_size) i)
466#define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p, (void *) NULL)
467#define ACPI_OFFSET(d, f) (acpi_size) ACPI_PTR_DIFF (&(((d *)0)->f), (void *) NULL)
468#define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i)
469#define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i)
470
471#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
472#define ACPI_COMPARE_NAME(a,b) (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b)))
473#else
474#define ACPI_COMPARE_NAME(a,b) (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE))
475#endif
476
477/*******************************************************************************
478 *
479 * Miscellaneous constants
480 *
481 ******************************************************************************/
345 482
346/* 483/*
347 * Initialization sequence 484 * Initialization sequence
@@ -414,7 +551,7 @@ typedef unsigned long long acpi_integer;
414#define ACPI_NOTIFY_MAX 0x0B 551#define ACPI_NOTIFY_MAX 0x0B
415 552
416/* 553/*
417 * Types associated with ACPI names and objects. The first group of 554 * Types associated with ACPI names and objects. The first group of
418 * values (up to ACPI_TYPE_EXTERNAL_MAX) correspond to the definition 555 * values (up to ACPI_TYPE_EXTERNAL_MAX) correspond to the definition
419 * of the ACPI object_type() operator (See the ACPI Spec). Therefore, 556 * of the ACPI object_type() operator (See the ACPI Spec). Therefore,
420 * only add to the first group if the spec changes. 557 * only add to the first group if the spec changes.
@@ -732,6 +869,15 @@ struct acpi_buffer {
732#define ACPI_NAME_TYPE_MAX 1 869#define ACPI_NAME_TYPE_MAX 1
733 870
734/* 871/*
872 * Predefined Namespace items
873 */
874struct acpi_predefined_names {
875 char *name;
876 u8 type;
877 char *val;
878};
879
880/*
735 * Structure and flags for acpi_get_system_info 881 * Structure and flags for acpi_get_system_info
736 */ 882 */
737#define ACPI_SYS_MODE_UNKNOWN 0x0000 883#define ACPI_SYS_MODE_UNKNOWN 0x0000
@@ -787,7 +933,7 @@ acpi_status(*acpi_exception_handler) (acpi_status aml_status,
787 u16 opcode, 933 u16 opcode,
788 u32 aml_offset, void *context); 934 u32 aml_offset, void *context);
789 935
790/* Table Event handler (Load, load_table etc) and types */ 936/* Table Event handler (Load, load_table, etc.) and types */
791 937
792typedef 938typedef
793acpi_status(*acpi_tbl_handler) (u32 event, void *table, void *context); 939acpi_status(*acpi_tbl_handler) (u32 event, void *table, void *context);
@@ -823,6 +969,12 @@ acpi_status(*acpi_walk_callback) (acpi_handle obj_handle,
823#define ACPI_INTERRUPT_NOT_HANDLED 0x00 969#define ACPI_INTERRUPT_NOT_HANDLED 0x00
824#define ACPI_INTERRUPT_HANDLED 0x01 970#define ACPI_INTERRUPT_HANDLED 0x01
825 971
972/* Length of _HID, _UID, _CID, and UUID values */
973
974#define ACPI_DEVICE_ID_LENGTH 0x09
975#define ACPI_MAX_CID_LENGTH 48
976#define ACPI_UUID_LENGTH 16
977
826/* Common string version of device HIDs and UIDs */ 978/* Common string version of device HIDs and UIDs */
827 979
828struct acpica_device_id { 980struct acpica_device_id {
@@ -900,357 +1052,28 @@ struct acpi_mem_space_context {
900}; 1052};
901 1053
902/* 1054/*
903 * Definitions for Resource Attributes 1055 * struct acpi_memory_list is used only if the ACPICA local cache is enabled
904 */
905typedef u16 acpi_rs_length; /* Resource Length field is fixed at 16 bits */
906typedef u32 acpi_rsdesc_size; /* Max Resource Descriptor size is (Length+3) = (64_k-1)+3 */
907
908/*
909 * Memory Attributes
910 */
911#define ACPI_READ_ONLY_MEMORY (u8) 0x00
912#define ACPI_READ_WRITE_MEMORY (u8) 0x01
913
914#define ACPI_NON_CACHEABLE_MEMORY (u8) 0x00
915#define ACPI_CACHABLE_MEMORY (u8) 0x01
916#define ACPI_WRITE_COMBINING_MEMORY (u8) 0x02
917#define ACPI_PREFETCHABLE_MEMORY (u8) 0x03
918
919/*
920 * IO Attributes
921 * The ISA IO ranges are: n000-n0_fFh, n400-n4_fFh, n800-n8_fFh, n_c00-n_cFFh.
922 * The non-ISA IO ranges are: n100-n3_fFh, n500-n7_fFh, n900-n_bFFh, n_cd0-n_fFFh.
923 */ 1056 */
924#define ACPI_NON_ISA_ONLY_RANGES (u8) 0x01 1057struct acpi_memory_list {
925#define ACPI_ISA_ONLY_RANGES (u8) 0x02 1058 char *list_name;
926#define ACPI_ENTIRE_RANGE (ACPI_NON_ISA_ONLY_RANGES | ACPI_ISA_ONLY_RANGES) 1059 void *list_head;
927 1060 u16 object_size;
928/* Type of translation - 1=Sparse, 0=Dense */ 1061 u16 max_depth;
929 1062 u16 current_depth;
930#define ACPI_SPARSE_TRANSLATION (u8) 0x01 1063 u16 link_offset;
931 1064
932/* 1065#ifdef ACPI_DBG_TRACK_ALLOCATIONS
933 * IO Port Descriptor Decode 1066
934 */ 1067 /* Statistics for debug memory tracking only */
935#define ACPI_DECODE_10 (u8) 0x00 /* 10-bit IO address decode */ 1068
936#define ACPI_DECODE_16 (u8) 0x01 /* 16-bit IO address decode */ 1069 u32 total_allocated;
937 1070 u32 total_freed;
938/* 1071 u32 max_occupied;
939 * IRQ Attributes 1072 u32 total_size;
940 */ 1073 u32 current_total_size;
941#define ACPI_LEVEL_SENSITIVE (u8) 0x00 1074 u32 requests;
942#define ACPI_EDGE_SENSITIVE (u8) 0x01 1075 u32 hits;
943
944#define ACPI_ACTIVE_HIGH (u8) 0x00
945#define ACPI_ACTIVE_LOW (u8) 0x01
946
947#define ACPI_EXCLUSIVE (u8) 0x00
948#define ACPI_SHARED (u8) 0x01
949
950/*
951 * DMA Attributes
952 */
953#define ACPI_COMPATIBILITY (u8) 0x00
954#define ACPI_TYPE_A (u8) 0x01
955#define ACPI_TYPE_B (u8) 0x02
956#define ACPI_TYPE_F (u8) 0x03
957
958#define ACPI_NOT_BUS_MASTER (u8) 0x00
959#define ACPI_BUS_MASTER (u8) 0x01
960
961#define ACPI_TRANSFER_8 (u8) 0x00
962#define ACPI_TRANSFER_8_16 (u8) 0x01
963#define ACPI_TRANSFER_16 (u8) 0x02
964
965/*
966 * Start Dependent Functions Priority definitions
967 */
968#define ACPI_GOOD_CONFIGURATION (u8) 0x00
969#define ACPI_ACCEPTABLE_CONFIGURATION (u8) 0x01
970#define ACPI_SUB_OPTIMAL_CONFIGURATION (u8) 0x02
971
972/*
973 * 16, 32 and 64-bit Address Descriptor resource types
974 */
975#define ACPI_MEMORY_RANGE (u8) 0x00
976#define ACPI_IO_RANGE (u8) 0x01
977#define ACPI_BUS_NUMBER_RANGE (u8) 0x02
978
979#define ACPI_ADDRESS_NOT_FIXED (u8) 0x00
980#define ACPI_ADDRESS_FIXED (u8) 0x01
981
982#define ACPI_POS_DECODE (u8) 0x00
983#define ACPI_SUB_DECODE (u8) 0x01
984
985#define ACPI_PRODUCER (u8) 0x00
986#define ACPI_CONSUMER (u8) 0x01
987
988/*
989 * If possible, pack the following structures to byte alignment
990 */
991#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
992#pragma pack(1)
993#endif 1076#endif
994
995/* UUID data structures for use in vendor-defined resource descriptors */
996
997struct acpi_uuid {
998 u8 data[ACPI_UUID_LENGTH];
999};
1000
1001struct acpi_vendor_uuid {
1002 u8 subtype;
1003 u8 data[ACPI_UUID_LENGTH];
1004};
1005
1006/*
1007 * Structures used to describe device resources
1008 */
1009struct acpi_resource_irq {
1010 u8 descriptor_length;
1011 u8 triggering;
1012 u8 polarity;
1013 u8 sharable;
1014 u8 interrupt_count;
1015 u8 interrupts[1];
1016};
1017
1018struct acpi_resource_dma {
1019 u8 type;
1020 u8 bus_master;
1021 u8 transfer;
1022 u8 channel_count;
1023 u8 channels[1];
1024};
1025
1026struct acpi_resource_start_dependent {
1027 u8 descriptor_length;
1028 u8 compatibility_priority;
1029 u8 performance_robustness;
1030};
1031
1032/*
1033 * END_DEPENDENT_FUNCTIONS_RESOURCE struct is not
1034 * needed because it has no fields
1035 */
1036
1037struct acpi_resource_io {
1038 u8 io_decode;
1039 u8 alignment;
1040 u8 address_length;
1041 u16 minimum;
1042 u16 maximum;
1043};
1044
1045struct acpi_resource_fixed_io {
1046 u16 address;
1047 u8 address_length;
1048};
1049
1050struct acpi_resource_vendor {
1051 u16 byte_length;
1052 u8 byte_data[1];
1053};
1054
1055/* Vendor resource with UUID info (introduced in ACPI 3.0) */
1056
1057struct acpi_resource_vendor_typed {
1058 u16 byte_length;
1059 u8 uuid_subtype;
1060 u8 uuid[ACPI_UUID_LENGTH];
1061 u8 byte_data[1];
1062};
1063
1064struct acpi_resource_end_tag {
1065 u8 checksum;
1066};
1067
1068struct acpi_resource_memory24 {
1069 u8 write_protect;
1070 u16 minimum;
1071 u16 maximum;
1072 u16 alignment;
1073 u16 address_length;
1074};
1075
1076struct acpi_resource_memory32 {
1077 u8 write_protect;
1078 u32 minimum;
1079 u32 maximum;
1080 u32 alignment;
1081 u32 address_length;
1082};
1083
1084struct acpi_resource_fixed_memory32 {
1085 u8 write_protect;
1086 u32 address;
1087 u32 address_length;
1088};
1089
1090struct acpi_memory_attribute {
1091 u8 write_protect;
1092 u8 caching;
1093 u8 range_type;
1094 u8 translation;
1095};
1096
1097struct acpi_io_attribute {
1098 u8 range_type;
1099 u8 translation;
1100 u8 translation_type;
1101 u8 reserved1;
1102};
1103
1104union acpi_resource_attribute {
1105 struct acpi_memory_attribute mem;
1106 struct acpi_io_attribute io;
1107
1108 /* Used for the *word_space macros */
1109
1110 u8 type_specific;
1111};
1112
1113struct acpi_resource_source {
1114 u8 index;
1115 u16 string_length;
1116 char *string_ptr;
1117};
1118
1119/* Fields common to all address descriptors, 16/32/64 bit */
1120
1121#define ACPI_RESOURCE_ADDRESS_COMMON \
1122 u8 resource_type; \
1123 u8 producer_consumer; \
1124 u8 decode; \
1125 u8 min_address_fixed; \
1126 u8 max_address_fixed; \
1127 union acpi_resource_attribute info;
1128
1129struct acpi_resource_address {
1130ACPI_RESOURCE_ADDRESS_COMMON};
1131
1132struct acpi_resource_address16 {
1133 ACPI_RESOURCE_ADDRESS_COMMON u16 granularity;
1134 u16 minimum;
1135 u16 maximum;
1136 u16 translation_offset;
1137 u16 address_length;
1138 struct acpi_resource_source resource_source;
1139};
1140
1141struct acpi_resource_address32 {
1142 ACPI_RESOURCE_ADDRESS_COMMON u32 granularity;
1143 u32 minimum;
1144 u32 maximum;
1145 u32 translation_offset;
1146 u32 address_length;
1147 struct acpi_resource_source resource_source;
1148};
1149
1150struct acpi_resource_address64 {
1151 ACPI_RESOURCE_ADDRESS_COMMON u64 granularity;
1152 u64 minimum;
1153 u64 maximum;
1154 u64 translation_offset;
1155 u64 address_length;
1156 struct acpi_resource_source resource_source;
1157};
1158
1159struct acpi_resource_extended_address64 {
1160 ACPI_RESOURCE_ADDRESS_COMMON u8 revision_iD;
1161 u64 granularity;
1162 u64 minimum;
1163 u64 maximum;
1164 u64 translation_offset;
1165 u64 address_length;
1166 u64 type_specific;
1167};
1168
1169struct acpi_resource_extended_irq {
1170 u8 producer_consumer;
1171 u8 triggering;
1172 u8 polarity;
1173 u8 sharable;
1174 u8 interrupt_count;
1175 struct acpi_resource_source resource_source;
1176 u32 interrupts[1];
1177};
1178
1179struct acpi_resource_generic_register {
1180 u8 space_id;
1181 u8 bit_width;
1182 u8 bit_offset;
1183 u8 access_size;
1184 u64 address;
1185};
1186
1187/* ACPI_RESOURCE_TYPEs */
1188
1189#define ACPI_RESOURCE_TYPE_IRQ 0
1190#define ACPI_RESOURCE_TYPE_DMA 1
1191#define ACPI_RESOURCE_TYPE_START_DEPENDENT 2
1192#define ACPI_RESOURCE_TYPE_END_DEPENDENT 3
1193#define ACPI_RESOURCE_TYPE_IO 4
1194#define ACPI_RESOURCE_TYPE_FIXED_IO 5
1195#define ACPI_RESOURCE_TYPE_VENDOR 6
1196#define ACPI_RESOURCE_TYPE_END_TAG 7
1197#define ACPI_RESOURCE_TYPE_MEMORY24 8
1198#define ACPI_RESOURCE_TYPE_MEMORY32 9
1199#define ACPI_RESOURCE_TYPE_FIXED_MEMORY32 10
1200#define ACPI_RESOURCE_TYPE_ADDRESS16 11
1201#define ACPI_RESOURCE_TYPE_ADDRESS32 12
1202#define ACPI_RESOURCE_TYPE_ADDRESS64 13
1203#define ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 14 /* ACPI 3.0 */
1204#define ACPI_RESOURCE_TYPE_EXTENDED_IRQ 15
1205#define ACPI_RESOURCE_TYPE_GENERIC_REGISTER 16
1206#define ACPI_RESOURCE_TYPE_MAX 16
1207
1208union acpi_resource_data {
1209 struct acpi_resource_irq irq;
1210 struct acpi_resource_dma dma;
1211 struct acpi_resource_start_dependent start_dpf;
1212 struct acpi_resource_io io;
1213 struct acpi_resource_fixed_io fixed_io;
1214 struct acpi_resource_vendor vendor;
1215 struct acpi_resource_vendor_typed vendor_typed;
1216 struct acpi_resource_end_tag end_tag;
1217 struct acpi_resource_memory24 memory24;
1218 struct acpi_resource_memory32 memory32;
1219 struct acpi_resource_fixed_memory32 fixed_memory32;
1220 struct acpi_resource_address16 address16;
1221 struct acpi_resource_address32 address32;
1222 struct acpi_resource_address64 address64;
1223 struct acpi_resource_extended_address64 ext_address64;
1224 struct acpi_resource_extended_irq extended_irq;
1225 struct acpi_resource_generic_register generic_reg;
1226
1227 /* Common fields */
1228
1229 struct acpi_resource_address address; /* Common 16/32/64 address fields */
1230};
1231
1232struct acpi_resource {
1233 u32 type;
1234 u32 length;
1235 union acpi_resource_data data;
1236};
1237
1238/* restore default alignment */
1239
1240#pragma pack()
1241
1242#define ACPI_RS_SIZE_NO_DATA 8 /* Id + Length fields */
1243#define ACPI_RS_SIZE_MIN (u32) ACPI_ROUND_UP_TO_NATIVE_WORD (12)
1244#define ACPI_RS_SIZE(type) (u32) (ACPI_RS_SIZE_NO_DATA + sizeof (type))
1245
1246#define ACPI_NEXT_RESOURCE(res) (struct acpi_resource *)((u8 *) res + res->length)
1247
1248struct acpi_pci_routing_table {
1249 u32 length;
1250 u32 pin;
1251 acpi_integer address; /* here for 64-bit alignment */
1252 u32 source_index;
1253 char source[4]; /* pad to 64 bits so sizeof() works in all cases */
1254}; 1077};
1255 1078
1256#endif /* __ACTYPES_H__ */ 1079#endif /* __ACTYPES_H__ */
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index fcd2572e428c..e62f10d9a7d8 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -44,14 +44,26 @@
44#ifndef __ACENV_H__ 44#ifndef __ACENV_H__
45#define __ACENV_H__ 45#define __ACENV_H__
46 46
47/* 47/* Types for ACPI_MUTEX_TYPE */
48
49#define ACPI_BINARY_SEMAPHORE 0
50#define ACPI_OSL_MUTEX 1
51
52/* Types for DEBUGGER_THREADING */
53
54#define DEBUGGER_SINGLE_THREADED 0
55#define DEBUGGER_MULTI_THREADED 1
56
57/******************************************************************************
58 *
48 * Configuration for ACPI tools and utilities 59 * Configuration for ACPI tools and utilities
49 */ 60 *
61 *****************************************************************************/
50 62
51#ifdef ACPI_LIBRARY 63#ifdef ACPI_LIBRARY
52/* 64/*
53 * Note: The non-debug version of the acpi_library does not contain any 65 * Note: The non-debug version of the acpi_library does not contain any
54 * debug support, for minimimal size. The debug version uses ACPI_FULL_DEBUG 66 * debug support, for minimal size. The debug version uses ACPI_FULL_DEBUG
55 */ 67 */
56#define ACPI_USE_LOCAL_CACHE 68#define ACPI_USE_LOCAL_CACHE
57#endif 69#endif
@@ -75,17 +87,6 @@
75#define ACPI_DBG_TRACK_ALLOCATIONS 87#define ACPI_DBG_TRACK_ALLOCATIONS
76#endif 88#endif
77 89
78#ifdef ACPI_DASM_APP
79#ifndef MSDOS
80#define ACPI_DEBUG_OUTPUT
81#endif
82#define ACPI_APPLICATION
83#define ACPI_DISASSEMBLER
84#define ACPI_NO_METHOD_EXECUTION
85#define ACPI_LARGE_NAMESPACE_NODE
86#define ACPI_DATA_TABLE_DISASSEMBLY
87#endif
88
89#ifdef ACPI_APPLICATION 90#ifdef ACPI_APPLICATION
90#define ACPI_USE_SYSTEM_CLIBRARY 91#define ACPI_USE_SYSTEM_CLIBRARY
91#define ACPI_USE_LOCAL_CACHE 92#define ACPI_USE_LOCAL_CACHE
@@ -179,6 +180,19 @@
179 180
180/*! [End] no source code translation !*/ 181/*! [End] no source code translation !*/
181 182
183/******************************************************************************
184 *
185 * Miscellaneous configuration
186 *
187 *****************************************************************************/
188
189/*
190 * Are mutexes supported by the host? default is no, use binary semaphores.
191 */
192#ifndef ACPI_MUTEX_TYPE
193#define ACPI_MUTEX_TYPE ACPI_BINARY_SEMAPHORE
194#endif
195
182/* 196/*
183 * Debugger threading model 197 * Debugger threading model
184 * Use single threaded if the entire subsystem is contained in an application 198 * Use single threaded if the entire subsystem is contained in an application
@@ -187,9 +201,6 @@
187 * By default the model is single threaded if ACPI_APPLICATION is set, 201 * By default the model is single threaded if ACPI_APPLICATION is set,
188 * multi-threaded if ACPI_APPLICATION is not set. 202 * multi-threaded if ACPI_APPLICATION is not set.
189 */ 203 */
190#define DEBUGGER_SINGLE_THREADED 0
191#define DEBUGGER_MULTI_THREADED 1
192
193#ifndef DEBUGGER_THREADING 204#ifndef DEBUGGER_THREADING
194#ifdef ACPI_APPLICATION 205#ifdef ACPI_APPLICATION
195#define DEBUGGER_THREADING DEBUGGER_SINGLE_THREADED 206#define DEBUGGER_THREADING DEBUGGER_SINGLE_THREADED
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 0515e754449d..6d49b2a498c4 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -46,6 +46,7 @@
46 46
47#define ACPI_USE_SYSTEM_CLIBRARY 47#define ACPI_USE_SYSTEM_CLIBRARY
48#define ACPI_USE_DO_WHILE_0 48#define ACPI_USE_DO_WHILE_0
49#define ACPI_MUTEX_TYPE ACPI_BINARY_SEMAPHORE
49 50
50#ifdef __KERNEL__ 51#ifdef __KERNEL__
51 52
@@ -70,9 +71,6 @@
70#define ACPI_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL(symbol); 71#define ACPI_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL(symbol);
71#define strtoul simple_strtoul 72#define strtoul simple_strtoul
72 73
73/* Full namespace pathname length limit - arbitrary */
74#define ACPI_PATHNAME_MAX 256
75
76#else /* !__KERNEL__ */ 74#else /* !__KERNEL__ */
77 75
78#include <stdarg.h> 76#include <stdarg.h>
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index fba8051fb297..6fce2fc2d124 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -131,22 +131,6 @@ extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity);
131 */ 131 */
132void acpi_unregister_gsi (u32 gsi); 132void acpi_unregister_gsi (u32 gsi);
133 133
134struct acpi_prt_entry {
135 struct list_head node;
136 struct acpi_pci_id id;
137 u8 pin;
138 struct {
139 acpi_handle handle;
140 u32 index;
141 } link;
142 u32 irq;
143};
144
145struct acpi_prt_list {
146 int count;
147 struct list_head entries;
148};
149
150struct pci_dev; 134struct pci_dev;
151 135
152int acpi_pci_irq_enable (struct pci_dev *dev); 136int acpi_pci_irq_enable (struct pci_dev *dev);
@@ -270,6 +254,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
270#ifdef CONFIG_PM_SLEEP 254#ifdef CONFIG_PM_SLEEP
271void __init acpi_no_s4_hw_signature(void); 255void __init acpi_no_s4_hw_signature(void);
272void __init acpi_old_suspend_ordering(void); 256void __init acpi_old_suspend_ordering(void);
257void __init acpi_s4_no_nvs(void);
273#endif /* CONFIG_PM_SLEEP */ 258#endif /* CONFIG_PM_SLEEP */
274#else /* CONFIG_ACPI */ 259#else /* CONFIG_ACPI */
275 260
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 0f50d4cc4360..45f6297821bd 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -59,9 +59,7 @@ enum async_tx_flags {
59}; 59};
60 60
61#ifdef CONFIG_DMA_ENGINE 61#ifdef CONFIG_DMA_ENGINE
62void async_tx_issue_pending_all(void); 62#define async_tx_issue_pending_all dma_issue_pending_all
63enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
64void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx);
65#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL 63#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
66#include <asm/async_tx.h> 64#include <asm/async_tx.h>
67#else 65#else
@@ -77,19 +75,6 @@ static inline void async_tx_issue_pending_all(void)
77 do { } while (0); 75 do { } while (0);
78} 76}
79 77
80static inline enum dma_status
81dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
82{
83 return DMA_SUCCESS;
84}
85
86static inline void
87async_tx_run_dependencies(struct dma_async_tx_descriptor *tx,
88 struct dma_chan *host_chan)
89{
90 do { } while (0);
91}
92
93static inline struct dma_chan * 78static inline struct dma_chan *
94async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 79async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
95 enum dma_transaction_type tx_type, struct page **dst, int dst_count, 80 enum dma_transaction_type tx_type, struct page **dst, int dst_count,
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
index 2a2213eefd85..2f1f95737acb 100644
--- a/include/linux/atmel-mci.h
+++ b/include/linux/atmel-mci.h
@@ -3,7 +3,7 @@
3 3
4#define ATMEL_MCI_MAX_NR_SLOTS 2 4#define ATMEL_MCI_MAX_NR_SLOTS 2
5 5
6struct dma_slave; 6#include <linux/dw_dmac.h>
7 7
8/** 8/**
9 * struct mci_slot_pdata - board-specific per-slot configuration 9 * struct mci_slot_pdata - board-specific per-slot configuration
@@ -28,11 +28,11 @@ struct mci_slot_pdata {
28 28
29/** 29/**
30 * struct mci_platform_data - board-specific MMC/SDcard configuration 30 * struct mci_platform_data - board-specific MMC/SDcard configuration
31 * @dma_slave: DMA slave interface to use in data transfers, or NULL. 31 * @dma_slave: DMA slave interface to use in data transfers.
32 * @slot: Per-slot configuration data. 32 * @slot: Per-slot configuration data.
33 */ 33 */
34struct mci_platform_data { 34struct mci_platform_data {
35 struct dma_slave *dma_slave; 35 struct dw_dma_slave dma_slave;
36 struct mci_slot_pdata slot[ATMEL_MCI_MAX_NR_SLOTS]; 36 struct mci_slot_pdata slot[ATMEL_MCI_MAX_NR_SLOTS];
37}; 37};
38 38
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 1ee9488ca2e4..79ca2da81c87 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -31,6 +31,10 @@ struct backlight_device;
31struct fb_info; 31struct fb_info;
32 32
33struct backlight_ops { 33struct backlight_ops {
34 unsigned int options;
35
36#define BL_CORE_SUSPENDRESUME (1 << 0)
37
34 /* Notify the backlight driver some property has changed */ 38 /* Notify the backlight driver some property has changed */
35 int (*update_status)(struct backlight_device *); 39 int (*update_status)(struct backlight_device *);
36 /* Return the current backlight brightness (accounting for power, 40 /* Return the current backlight brightness (accounting for power,
@@ -51,7 +55,19 @@ struct backlight_properties {
51 modes; 4: full off), see FB_BLANK_XXX */ 55 modes; 4: full off), see FB_BLANK_XXX */
52 int power; 56 int power;
53 /* FB Blanking active? (values as for power) */ 57 /* FB Blanking active? (values as for power) */
58 /* Due to be removed, please use (state & BL_CORE_FBBLANK) */
54 int fb_blank; 59 int fb_blank;
60 /* Flags used to signal drivers of state changes */
61 /* Upper 4 bits are reserved for driver internal use */
62 unsigned int state;
63
64#define BL_CORE_SUSPENDED (1 << 0) /* backlight is suspended */
65#define BL_CORE_FBBLANK (1 << 1) /* backlight is under an fb blank event */
66#define BL_CORE_DRIVER4 (1 << 28) /* reserved for driver specific use */
67#define BL_CORE_DRIVER3 (1 << 29) /* reserved for driver specific use */
68#define BL_CORE_DRIVER2 (1 << 30) /* reserved for driver specific use */
69#define BL_CORE_DRIVER1 (1 << 31) /* reserved for driver specific use */
70
55}; 71};
56 72
57struct backlight_device { 73struct backlight_device {
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index adb0b084eb5a..64dea2ab326c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -29,32 +29,6 @@
29#include <linux/dma-mapping.h> 29#include <linux/dma-mapping.h>
30 30
31/** 31/**
32 * enum dma_state - resource PNP/power management state
33 * @DMA_RESOURCE_SUSPEND: DMA device going into low power state
34 * @DMA_RESOURCE_RESUME: DMA device returning to full power
35 * @DMA_RESOURCE_AVAILABLE: DMA device available to the system
36 * @DMA_RESOURCE_REMOVED: DMA device removed from the system
37 */
38enum dma_state {
39 DMA_RESOURCE_SUSPEND,
40 DMA_RESOURCE_RESUME,
41 DMA_RESOURCE_AVAILABLE,
42 DMA_RESOURCE_REMOVED,
43};
44
45/**
46 * enum dma_state_client - state of the channel in the client
47 * @DMA_ACK: client would like to use, or was using this channel
48 * @DMA_DUP: client has already seen this channel, or is not using this channel
49 * @DMA_NAK: client does not want to see any more channels
50 */
51enum dma_state_client {
52 DMA_ACK,
53 DMA_DUP,
54 DMA_NAK,
55};
56
57/**
58 * typedef dma_cookie_t - an opaque DMA cookie 32 * typedef dma_cookie_t - an opaque DMA cookie
59 * 33 *
60 * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code 34 * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code
@@ -89,23 +63,13 @@ enum dma_transaction_type {
89 DMA_MEMSET, 63 DMA_MEMSET,
90 DMA_MEMCPY_CRC32C, 64 DMA_MEMCPY_CRC32C,
91 DMA_INTERRUPT, 65 DMA_INTERRUPT,
66 DMA_PRIVATE,
92 DMA_SLAVE, 67 DMA_SLAVE,
93}; 68};
94 69
95/* last transaction type for creation of the capabilities mask */ 70/* last transaction type for creation of the capabilities mask */
96#define DMA_TX_TYPE_END (DMA_SLAVE + 1) 71#define DMA_TX_TYPE_END (DMA_SLAVE + 1)
97 72
98/**
99 * enum dma_slave_width - DMA slave register access width.
100 * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses
101 * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses
102 * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses
103 */
104enum dma_slave_width {
105 DMA_SLAVE_WIDTH_8BIT,
106 DMA_SLAVE_WIDTH_16BIT,
107 DMA_SLAVE_WIDTH_32BIT,
108};
109 73
110/** 74/**
111 * enum dma_ctrl_flags - DMA flags to augment operation preparation, 75 * enum dma_ctrl_flags - DMA flags to augment operation preparation,
@@ -132,32 +96,6 @@ enum dma_ctrl_flags {
132typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; 96typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
133 97
134/** 98/**
135 * struct dma_slave - Information about a DMA slave
136 * @dev: device acting as DMA slave
137 * @dma_dev: required DMA master device. If non-NULL, the client can not be
138 * bound to other masters than this.
139 * @tx_reg: physical address of data register used for
140 * memory-to-peripheral transfers
141 * @rx_reg: physical address of data register used for
142 * peripheral-to-memory transfers
143 * @reg_width: peripheral register width
144 *
145 * If dma_dev is non-NULL, the client can not be bound to other DMA
146 * masters than the one corresponding to this device. The DMA master
147 * driver may use this to determine if there is controller-specific
148 * data wrapped around this struct. Drivers of platform code that sets
149 * the dma_dev field must therefore make sure to use an appropriate
150 * controller-specific dma slave structure wrapping this struct.
151 */
152struct dma_slave {
153 struct device *dev;
154 struct device *dma_dev;
155 dma_addr_t tx_reg;
156 dma_addr_t rx_reg;
157 enum dma_slave_width reg_width;
158};
159
160/**
161 * struct dma_chan_percpu - the per-CPU part of struct dma_chan 99 * struct dma_chan_percpu - the per-CPU part of struct dma_chan
162 * @refcount: local_t used for open-coded "bigref" counting 100 * @refcount: local_t used for open-coded "bigref" counting
163 * @memcpy_count: transaction counter 101 * @memcpy_count: transaction counter
@@ -165,7 +103,6 @@ struct dma_slave {
165 */ 103 */
166 104
167struct dma_chan_percpu { 105struct dma_chan_percpu {
168 local_t refcount;
169 /* stats */ 106 /* stats */
170 unsigned long memcpy_count; 107 unsigned long memcpy_count;
171 unsigned long bytes_transferred; 108 unsigned long bytes_transferred;
@@ -176,13 +113,14 @@ struct dma_chan_percpu {
176 * @device: ptr to the dma device who supplies this channel, always !%NULL 113 * @device: ptr to the dma device who supplies this channel, always !%NULL
177 * @cookie: last cookie value returned to client 114 * @cookie: last cookie value returned to client
178 * @chan_id: channel ID for sysfs 115 * @chan_id: channel ID for sysfs
179 * @class_dev: class device for sysfs 116 * @dev: class device for sysfs
180 * @refcount: kref, used in "bigref" slow-mode 117 * @refcount: kref, used in "bigref" slow-mode
181 * @slow_ref: indicates that the DMA channel is free 118 * @slow_ref: indicates that the DMA channel is free
182 * @rcu: the DMA channel's RCU head 119 * @rcu: the DMA channel's RCU head
183 * @device_node: used to add this to the device chan list 120 * @device_node: used to add this to the device chan list
184 * @local: per-cpu pointer to a struct dma_chan_percpu 121 * @local: per-cpu pointer to a struct dma_chan_percpu
185 * @client-count: how many clients are using this channel 122 * @client-count: how many clients are using this channel
123 * @table_count: number of appearances in the mem-to-mem allocation table
186 */ 124 */
187struct dma_chan { 125struct dma_chan {
188 struct dma_device *device; 126 struct dma_device *device;
@@ -190,73 +128,47 @@ struct dma_chan {
190 128
191 /* sysfs */ 129 /* sysfs */
192 int chan_id; 130 int chan_id;
193 struct device dev; 131 struct dma_chan_dev *dev;
194
195 struct kref refcount;
196 int slow_ref;
197 struct rcu_head rcu;
198 132
199 struct list_head device_node; 133 struct list_head device_node;
200 struct dma_chan_percpu *local; 134 struct dma_chan_percpu *local;
201 int client_count; 135 int client_count;
136 int table_count;
202}; 137};
203 138
204#define to_dma_chan(p) container_of(p, struct dma_chan, dev) 139/**
205 140 * struct dma_chan_dev - relate sysfs device node to backing channel device
206void dma_chan_cleanup(struct kref *kref); 141 * @chan - driver channel device
207 142 * @device - sysfs device
208static inline void dma_chan_get(struct dma_chan *chan) 143 * @dev_id - parent dma_device dev_id
209{ 144 * @idr_ref - reference count to gate release of dma_device dev_id
210 if (unlikely(chan->slow_ref)) 145 */
211 kref_get(&chan->refcount); 146struct dma_chan_dev {
212 else { 147 struct dma_chan *chan;
213 local_inc(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); 148 struct device device;
214 put_cpu(); 149 int dev_id;
215 } 150 atomic_t *idr_ref;
216} 151};
217 152
218static inline void dma_chan_put(struct dma_chan *chan) 153static inline const char *dma_chan_name(struct dma_chan *chan)
219{ 154{
220 if (unlikely(chan->slow_ref)) 155 return dev_name(&chan->dev->device);
221 kref_put(&chan->refcount, dma_chan_cleanup);
222 else {
223 local_dec(&(per_cpu_ptr(chan->local, get_cpu())->refcount));
224 put_cpu();
225 }
226} 156}
227 157
228/* 158void dma_chan_cleanup(struct kref *kref);
229 * typedef dma_event_callback - function pointer to a DMA event callback
230 * For each channel added to the system this routine is called for each client.
231 * If the client would like to use the channel it returns '1' to signal (ack)
232 * the dmaengine core to take out a reference on the channel and its
233 * corresponding device. A client must not 'ack' an available channel more
234 * than once. When a channel is removed all clients are notified. If a client
235 * is using the channel it must 'ack' the removal. A client must not 'ack' a
236 * removed channel more than once.
237 * @client - 'this' pointer for the client context
238 * @chan - channel to be acted upon
239 * @state - available or removed
240 */
241struct dma_client;
242typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
243 struct dma_chan *chan, enum dma_state state);
244 159
245/** 160/**
246 * struct dma_client - info on the entity making use of DMA services 161 * typedef dma_filter_fn - callback filter for dma_request_channel
247 * @event_callback: func ptr to call when something happens 162 * @chan: channel to be reviewed
248 * @cap_mask: only return channels that satisfy the requested capabilities 163 * @filter_param: opaque parameter passed through dma_request_channel
249 * a value of zero corresponds to any capability 164 *
250 * @slave: data for preparing slave transfer. Must be non-NULL iff the 165 * When this optional parameter is specified in a call to dma_request_channel a
251 * DMA_SLAVE capability is requested. 166 * suitable channel is passed to this routine for further dispositioning before
252 * @global_node: list_head for global dma_client_list 167 * being returned. Where 'suitable' indicates a non-busy channel that
168 * satisfies the given capability mask. It returns 'true' to indicate that the
169 * channel is suitable.
253 */ 170 */
254struct dma_client { 171typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
255 dma_event_callback event_callback;
256 dma_cap_mask_t cap_mask;
257 struct dma_slave *slave;
258 struct list_head global_node;
259};
260 172
261typedef void (*dma_async_tx_callback)(void *dma_async_param); 173typedef void (*dma_async_tx_callback)(void *dma_async_param);
262/** 174/**
@@ -323,14 +235,10 @@ struct dma_device {
323 dma_cap_mask_t cap_mask; 235 dma_cap_mask_t cap_mask;
324 int max_xor; 236 int max_xor;
325 237
326 struct kref refcount;
327 struct completion done;
328
329 int dev_id; 238 int dev_id;
330 struct device *dev; 239 struct device *dev;
331 240
332 int (*device_alloc_chan_resources)(struct dma_chan *chan, 241 int (*device_alloc_chan_resources)(struct dma_chan *chan);
333 struct dma_client *client);
334 void (*device_free_chan_resources)(struct dma_chan *chan); 242 void (*device_free_chan_resources)(struct dma_chan *chan);
335 243
336 struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( 244 struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
@@ -362,9 +270,8 @@ struct dma_device {
362 270
363/* --- public DMA engine API --- */ 271/* --- public DMA engine API --- */
364 272
365void dma_async_client_register(struct dma_client *client); 273void dmaengine_get(void);
366void dma_async_client_unregister(struct dma_client *client); 274void dmaengine_put(void);
367void dma_async_client_chan_request(struct dma_client *client);
368dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, 275dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
369 void *dest, void *src, size_t len); 276 void *dest, void *src, size_t len);
370dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, 277dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
@@ -406,6 +313,12 @@ __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
406 set_bit(tx_type, dstp->bits); 313 set_bit(tx_type, dstp->bits);
407} 314}
408 315
316#define dma_cap_zero(mask) __dma_cap_zero(&(mask))
317static inline void __dma_cap_zero(dma_cap_mask_t *dstp)
318{
319 bitmap_zero(dstp->bits, DMA_TX_TYPE_END);
320}
321
409#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask)) 322#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask))
410static inline int 323static inline int
411__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) 324__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
@@ -475,11 +388,25 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
475} 388}
476 389
477enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); 390enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
391#ifdef CONFIG_DMA_ENGINE
392enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
393#else
394static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
395{
396 return DMA_SUCCESS;
397}
398#endif
478 399
479/* --- DMA device --- */ 400/* --- DMA device --- */
480 401
481int dma_async_device_register(struct dma_device *device); 402int dma_async_device_register(struct dma_device *device);
482void dma_async_device_unregister(struct dma_device *device); 403void dma_async_device_unregister(struct dma_device *device);
404void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
405struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
406void dma_issue_pending_all(void);
407#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
408struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
409void dma_release_channel(struct dma_chan *chan);
483 410
484/* --- Helper iov-locking functions --- */ 411/* --- Helper iov-locking functions --- */
485 412
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 04d217b442bf..d797dde247f7 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -22,14 +22,34 @@ struct dw_dma_platform_data {
22}; 22};
23 23
24/** 24/**
25 * enum dw_dma_slave_width - DMA slave register access width.
26 * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses
27 * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses
28 * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses
29 */
30enum dw_dma_slave_width {
31 DW_DMA_SLAVE_WIDTH_8BIT,
32 DW_DMA_SLAVE_WIDTH_16BIT,
33 DW_DMA_SLAVE_WIDTH_32BIT,
34};
35
36/**
25 * struct dw_dma_slave - Controller-specific information about a slave 37 * struct dw_dma_slave - Controller-specific information about a slave
26 * @slave: Generic information about the slave 38 *
27 * @ctl_lo: Platform-specific initializer for the CTL_LO register 39 * @dma_dev: required DMA master device
40 * @tx_reg: physical address of data register used for
41 * memory-to-peripheral transfers
42 * @rx_reg: physical address of data register used for
43 * peripheral-to-memory transfers
44 * @reg_width: peripheral register width
28 * @cfg_hi: Platform-specific initializer for the CFG_HI register 45 * @cfg_hi: Platform-specific initializer for the CFG_HI register
29 * @cfg_lo: Platform-specific initializer for the CFG_LO register 46 * @cfg_lo: Platform-specific initializer for the CFG_LO register
30 */ 47 */
31struct dw_dma_slave { 48struct dw_dma_slave {
32 struct dma_slave slave; 49 struct device *dma_dev;
50 dma_addr_t tx_reg;
51 dma_addr_t rx_reg;
52 enum dw_dma_slave_width reg_width;
33 u32 cfg_hi; 53 u32 cfg_hi;
34 u32 cfg_lo; 54 u32 cfg_lo;
35}; 55};
@@ -54,9 +74,4 @@ struct dw_dma_slave {
54#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ 74#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */
55#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ 75#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */
56 76
57static inline struct dw_dma_slave *to_dw_dma_slave(struct dma_slave *slave)
58{
59 return container_of(slave, struct dw_dma_slave, slave);
60}
61
62#endif /* DW_DMAC_H */ 77#endif /* DW_DMAC_H */
diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
index 81b4207deb95..96eea90f01a8 100644
--- a/include/linux/leds-pca9532.h
+++ b/include/linux/leds-pca9532.h
@@ -15,6 +15,7 @@
15#define __LINUX_PCA9532_H 15#define __LINUX_PCA9532_H
16 16
17#include <linux/leds.h> 17#include <linux/leds.h>
18#include <linux/workqueue.h>
18 19
19enum pca9532_state { 20enum pca9532_state {
20 PCA9532_OFF = 0x0, 21 PCA9532_OFF = 0x0,
@@ -31,6 +32,7 @@ struct pca9532_led {
31 struct i2c_client *client; 32 struct i2c_client *client;
32 char *name; 33 char *name;
33 struct led_classdev ldev; 34 struct led_classdev ldev;
35 struct work_struct work;
34 enum pca9532_type type; 36 enum pca9532_type type;
35 enum pca9532_state state; 37 enum pca9532_state state;
36}; 38};
diff --git a/include/linux/leds.h b/include/linux/leds.h
index d3a73f5a48c3..24489da701e3 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -32,7 +32,10 @@ struct led_classdev {
32 int brightness; 32 int brightness;
33 int flags; 33 int flags;
34 34
35 /* Lower 16 bits reflect status */
35#define LED_SUSPENDED (1 << 0) 36#define LED_SUSPENDED (1 << 0)
37 /* Upper 16 bits reflect control information */
38#define LED_CORE_SUSPENDRESUME (1 << 16)
36 39
37 /* Set LED brightness level */ 40 /* Set LED brightness level */
38 /* Must not sleep, use a workqueue if needed */ 41 /* Must not sleep, use a workqueue if needed */
@@ -62,7 +65,7 @@ struct led_classdev {
62 65
63extern int led_classdev_register(struct device *parent, 66extern int led_classdev_register(struct device *parent,
64 struct led_classdev *led_cdev); 67 struct led_classdev *led_cdev);
65extern void led_classdev_unregister(struct led_classdev *lcd); 68extern void led_classdev_unregister(struct led_classdev *led_cdev);
66extern void led_classdev_suspend(struct led_classdev *led_cdev); 69extern void led_classdev_suspend(struct led_classdev *led_cdev);
67extern void led_classdev_resume(struct led_classdev *led_cdev); 70extern void led_classdev_resume(struct led_classdev *led_cdev);
68 71
diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index 96acbfc8aa12..be3264e286e0 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -13,6 +13,10 @@
13#ifndef __LINUX_MFD_WM8350_PMIC_H 13#ifndef __LINUX_MFD_WM8350_PMIC_H
14#define __LINUX_MFD_WM8350_PMIC_H 14#define __LINUX_MFD_WM8350_PMIC_H
15 15
16#include <linux/platform_device.h>
17#include <linux/leds.h>
18#include <linux/regulator/machine.h>
19
16/* 20/*
17 * Register values. 21 * Register values.
18 */ 22 */
@@ -700,6 +704,33 @@ struct wm8350;
700struct platform_device; 704struct platform_device;
701struct regulator_init_data; 705struct regulator_init_data;
702 706
707/*
708 * WM8350 LED platform data
709 */
710struct wm8350_led_platform_data {
711 const char *name;
712 const char *default_trigger;
713 int max_uA;
714};
715
716struct wm8350_led {
717 struct platform_device *pdev;
718 struct mutex mutex;
719 struct work_struct work;
720 spinlock_t value_lock;
721 enum led_brightness value;
722 struct led_classdev cdev;
723 int max_uA_index;
724 int enabled;
725
726 struct regulator *isink;
727 struct regulator_consumer_supply isink_consumer;
728 struct regulator_init_data isink_init;
729 struct regulator *dcdc;
730 struct regulator_consumer_supply dcdc_consumer;
731 struct regulator_init_data dcdc_init;
732};
733
703struct wm8350_pmic { 734struct wm8350_pmic {
704 /* Number of regulators of each type on this device */ 735 /* Number of regulators of each type on this device */
705 int max_dcdc; 736 int max_dcdc;
@@ -717,10 +748,15 @@ struct wm8350_pmic {
717 748
718 /* regulator devices */ 749 /* regulator devices */
719 struct platform_device *pdev[NUM_WM8350_REGULATORS]; 750 struct platform_device *pdev[NUM_WM8350_REGULATORS];
751
752 /* LED devices */
753 struct wm8350_led led[2];
720}; 754};
721 755
722int wm8350_register_regulator(struct wm8350 *wm8350, int reg, 756int wm8350_register_regulator(struct wm8350 *wm8350, int reg,
723 struct regulator_init_data *initdata); 757 struct regulator_init_data *initdata);
758int wm8350_register_led(struct wm8350 *wm8350, int lednum, int dcdc, int isink,
759 struct wm8350_led_platform_data *pdata);
724 760
725/* 761/*
726 * Additional DCDC control not supported via regulator API 762 * Additional DCDC control not supported via regulator API
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 00e2b575021f..88d3d8fbf9f2 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -520,6 +520,7 @@ struct cfi_fixup {
520 520
521#define CFI_MFR_AMD 0x0001 521#define CFI_MFR_AMD 0x0001
522#define CFI_MFR_ATMEL 0x001F 522#define CFI_MFR_ATMEL 0x001F
523#define CFI_MFR_SAMSUNG 0x00EC
523#define CFI_MFR_ST 0x0020 /* STMicroelectronics */ 524#define CFI_MFR_ST 0x0020 /* STMicroelectronics */
524 525
525void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups); 526void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups);
diff --git a/include/linux/mtd/ftl.h b/include/linux/mtd/ftl.h
index 0be442f881dd..0555f7a0b9ed 100644
--- a/include/linux/mtd/ftl.h
+++ b/include/linux/mtd/ftl.h
@@ -32,25 +32,25 @@
32#define _LINUX_FTL_H 32#define _LINUX_FTL_H
33 33
34typedef struct erase_unit_header_t { 34typedef struct erase_unit_header_t {
35 u_int8_t LinkTargetTuple[5]; 35 uint8_t LinkTargetTuple[5];
36 u_int8_t DataOrgTuple[10]; 36 uint8_t DataOrgTuple[10];
37 u_int8_t NumTransferUnits; 37 uint8_t NumTransferUnits;
38 u_int32_t EraseCount; 38 uint32_t EraseCount;
39 u_int16_t LogicalEUN; 39 uint16_t LogicalEUN;
40 u_int8_t BlockSize; 40 uint8_t BlockSize;
41 u_int8_t EraseUnitSize; 41 uint8_t EraseUnitSize;
42 u_int16_t FirstPhysicalEUN; 42 uint16_t FirstPhysicalEUN;
43 u_int16_t NumEraseUnits; 43 uint16_t NumEraseUnits;
44 u_int32_t FormattedSize; 44 uint32_t FormattedSize;
45 u_int32_t FirstVMAddress; 45 uint32_t FirstVMAddress;
46 u_int16_t NumVMPages; 46 uint16_t NumVMPages;
47 u_int8_t Flags; 47 uint8_t Flags;
48 u_int8_t Code; 48 uint8_t Code;
49 u_int32_t SerialNumber; 49 uint32_t SerialNumber;
50 u_int32_t AltEUHOffset; 50 uint32_t AltEUHOffset;
51 u_int32_t BAMOffset; 51 uint32_t BAMOffset;
52 u_int8_t Reserved[12]; 52 uint8_t Reserved[12];
53 u_int8_t EndTuple[2]; 53 uint8_t EndTuple[2];
54} erase_unit_header_t; 54} erase_unit_header_t;
55 55
56/* Flags in erase_unit_header_t */ 56/* Flags in erase_unit_header_t */
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index aa30244492c6..b981b8772217 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -223,6 +223,7 @@ struct map_info {
223 must leave it enabled. */ 223 must leave it enabled. */
224 void (*set_vpp)(struct map_info *, int); 224 void (*set_vpp)(struct map_info *, int);
225 225
226 unsigned long pfow_base;
226 unsigned long map_priv_1; 227 unsigned long map_priv_1;
227 unsigned long map_priv_2; 228 unsigned long map_priv_2;
228 void *fldrv_priv; 229 void *fldrv_priv;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 64433eb411d7..3aa5d77c2cdb 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -15,6 +15,8 @@
15#include <linux/mtd/compatmac.h> 15#include <linux/mtd/compatmac.h>
16#include <mtd/mtd-abi.h> 16#include <mtd/mtd-abi.h>
17 17
18#include <asm/div64.h>
19
18#define MTD_CHAR_MAJOR 90 20#define MTD_CHAR_MAJOR 90
19#define MTD_BLOCK_MAJOR 31 21#define MTD_BLOCK_MAJOR 31
20#define MAX_MTD_DEVICES 32 22#define MAX_MTD_DEVICES 32
@@ -25,20 +27,20 @@
25#define MTD_ERASE_DONE 0x08 27#define MTD_ERASE_DONE 0x08
26#define MTD_ERASE_FAILED 0x10 28#define MTD_ERASE_FAILED 0x10
27 29
28#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff 30#define MTD_FAIL_ADDR_UNKNOWN -1LL
29 31
30/* If the erase fails, fail_addr might indicate exactly which block failed. If 32/* If the erase fails, fail_addr might indicate exactly which block failed. If
31 fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not 33 fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not
32 specific to any particular block. */ 34 specific to any particular block. */
33struct erase_info { 35struct erase_info {
34 struct mtd_info *mtd; 36 struct mtd_info *mtd;
35 u_int32_t addr; 37 uint64_t addr;
36 u_int32_t len; 38 uint64_t len;
37 u_int32_t fail_addr; 39 uint64_t fail_addr;
38 u_long time; 40 u_long time;
39 u_long retries; 41 u_long retries;
40 u_int dev; 42 unsigned dev;
41 u_int cell; 43 unsigned cell;
42 void (*callback) (struct erase_info *self); 44 void (*callback) (struct erase_info *self);
43 u_long priv; 45 u_long priv;
44 u_char state; 46 u_char state;
@@ -46,9 +48,9 @@ struct erase_info {
46}; 48};
47 49
48struct mtd_erase_region_info { 50struct mtd_erase_region_info {
49 u_int32_t offset; /* At which this region starts, from the beginning of the MTD */ 51 uint64_t offset; /* At which this region starts, from the beginning of the MTD */
50 u_int32_t erasesize; /* For this region */ 52 uint32_t erasesize; /* For this region */
51 u_int32_t numblocks; /* Number of blocks of erasesize in this region */ 53 uint32_t numblocks; /* Number of blocks of erasesize in this region */
52 unsigned long *lockmap; /* If keeping bitmap of locks */ 54 unsigned long *lockmap; /* If keeping bitmap of locks */
53}; 55};
54 56
@@ -100,14 +102,14 @@ struct mtd_oob_ops {
100 102
101struct mtd_info { 103struct mtd_info {
102 u_char type; 104 u_char type;
103 u_int32_t flags; 105 uint32_t flags;
104 u_int32_t size; // Total size of the MTD 106 uint64_t size; // Total size of the MTD
105 107
106 /* "Major" erase size for the device. Naïve users may take this 108 /* "Major" erase size for the device. Naïve users may take this
107 * to be the only erase size available, or may use the more detailed 109 * to be the only erase size available, or may use the more detailed
108 * information below if they desire 110 * information below if they desire
109 */ 111 */
110 u_int32_t erasesize; 112 uint32_t erasesize;
111 /* Minimal writable flash unit size. In case of NOR flash it is 1 (even 113 /* Minimal writable flash unit size. In case of NOR flash it is 1 (even
112 * though individual bits can be cleared), in case of NAND flash it is 114 * though individual bits can be cleared), in case of NAND flash it is
113 * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR 115 * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR
@@ -115,10 +117,20 @@ struct mtd_info {
115 * Any driver registering a struct mtd_info must ensure a writesize of 117 * Any driver registering a struct mtd_info must ensure a writesize of
116 * 1 or larger. 118 * 1 or larger.
117 */ 119 */
118 u_int32_t writesize; 120 uint32_t writesize;
121
122 uint32_t oobsize; // Amount of OOB data per block (e.g. 16)
123 uint32_t oobavail; // Available OOB bytes per block
119 124
120 u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) 125 /*
121 u_int32_t oobavail; // Available OOB bytes per block 126 * If erasesize is a power of 2 then the shift is stored in
127 * erasesize_shift otherwise erasesize_shift is zero. Ditto writesize.
128 */
129 unsigned int erasesize_shift;
130 unsigned int writesize_shift;
131 /* Masks based on erasesize_shift and writesize_shift */
132 unsigned int erasesize_mask;
133 unsigned int writesize_mask;
122 134
123 // Kernel-only stuff starts here. 135 // Kernel-only stuff starts here.
124 const char *name; 136 const char *name;
@@ -190,8 +202,8 @@ struct mtd_info {
190 void (*sync) (struct mtd_info *mtd); 202 void (*sync) (struct mtd_info *mtd);
191 203
192 /* Chip-supported device locking */ 204 /* Chip-supported device locking */
193 int (*lock) (struct mtd_info *mtd, loff_t ofs, size_t len); 205 int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
194 int (*unlock) (struct mtd_info *mtd, loff_t ofs, size_t len); 206 int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
195 207
196 /* Power Management functions */ 208 /* Power Management functions */
197 int (*suspend) (struct mtd_info *mtd); 209 int (*suspend) (struct mtd_info *mtd);
@@ -221,6 +233,35 @@ struct mtd_info {
221 void (*put_device) (struct mtd_info *mtd); 233 void (*put_device) (struct mtd_info *mtd);
222}; 234};
223 235
236static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
237{
238 if (mtd->erasesize_shift)
239 return sz >> mtd->erasesize_shift;
240 do_div(sz, mtd->erasesize);
241 return sz;
242}
243
244static inline uint32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd)
245{
246 if (mtd->erasesize_shift)
247 return sz & mtd->erasesize_mask;
248 return do_div(sz, mtd->erasesize);
249}
250
251static inline uint32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd)
252{
253 if (mtd->writesize_shift)
254 return sz >> mtd->writesize_shift;
255 do_div(sz, mtd->writesize);
256 return sz;
257}
258
259static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd)
260{
261 if (mtd->writesize_shift)
262 return sz & mtd->writesize_mask;
263 return do_div(sz, mtd->writesize);
264}
224 265
225 /* Kernel-side ioctl definitions */ 266 /* Kernel-side ioctl definitions */
226 267
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 733d3f3b4eb8..db5b63da2a7e 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -335,17 +335,12 @@ struct nand_buffers {
335 * @erase_cmd: [INTERN] erase command write function, selectable due to AND support 335 * @erase_cmd: [INTERN] erase command write function, selectable due to AND support
336 * @scan_bbt: [REPLACEABLE] function to scan bad block table 336 * @scan_bbt: [REPLACEABLE] function to scan bad block table
337 * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR) 337 * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR)
338 * @wq: [INTERN] wait queue to sleep on if a NAND operation is in progress
339 * @state: [INTERN] the current state of the NAND device 338 * @state: [INTERN] the current state of the NAND device
340 * @oob_poi: poison value buffer 339 * @oob_poi: poison value buffer
341 * @page_shift: [INTERN] number of address bits in a page (column address bits) 340 * @page_shift: [INTERN] number of address bits in a page (column address bits)
342 * @phys_erase_shift: [INTERN] number of address bits in a physical eraseblock 341 * @phys_erase_shift: [INTERN] number of address bits in a physical eraseblock
343 * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry 342 * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry
344 * @chip_shift: [INTERN] number of address bits in one chip 343 * @chip_shift: [INTERN] number of address bits in one chip
345 * @datbuf: [INTERN] internal buffer for one page + oob
346 * @oobbuf: [INTERN] oob buffer for one eraseblock
347 * @oobdirty: [INTERN] indicates that oob_buf must be reinitialized
348 * @data_poi: [INTERN] pointer to a data buffer
349 * @options: [BOARDSPECIFIC] various chip options. They can partly be set to inform nand_scan about 344 * @options: [BOARDSPECIFIC] various chip options. They can partly be set to inform nand_scan about
350 * special functionality. See the defines for further explanation 345 * special functionality. See the defines for further explanation
351 * @badblockpos: [INTERN] position of the bad block marker in the oob area 346 * @badblockpos: [INTERN] position of the bad block marker in the oob area
@@ -399,7 +394,7 @@ struct nand_chip {
399 int bbt_erase_shift; 394 int bbt_erase_shift;
400 int chip_shift; 395 int chip_shift;
401 int numchips; 396 int numchips;
402 unsigned long chipsize; 397 uint64_t chipsize;
403 int pagemask; 398 int pagemask;
404 int pagebuf; 399 int pagebuf;
405 int subpagesize; 400 int subpagesize;
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index c92b4d439609..a45dd831b3f8 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -36,9 +36,9 @@
36 36
37struct mtd_partition { 37struct mtd_partition {
38 char *name; /* identifier string */ 38 char *name; /* identifier string */
39 u_int32_t size; /* partition size */ 39 uint64_t size; /* partition size */
40 u_int32_t offset; /* offset within the master MTD space */ 40 uint64_t offset; /* offset within the master MTD space */
41 u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ 41 uint32_t mask_flags; /* master MTD flags to mask out for this partition */
42 struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ 42 struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/
43 struct mtd_info **mtdp; /* pointer to store the MTD object */ 43 struct mtd_info **mtdp; /* pointer to store the MTD object */
44}; 44};
diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
new file mode 100644
index 000000000000..b730d4f84655
--- /dev/null
+++ b/include/linux/mtd/pfow.h
@@ -0,0 +1,159 @@
1/* Primary function overlay window definitions
2 * and service functions used by LPDDR chips
3 */
4#ifndef __LINUX_MTD_PFOW_H
5#define __LINUX_MTD_PFOW_H
6
7#include <linux/mtd/qinfo.h>
8
9/* PFOW registers addressing */
10/* Address of symbol "P" */
11#define PFOW_QUERY_STRING_P 0x0000
12/* Address of symbol "F" */
13#define PFOW_QUERY_STRING_F 0x0002
14/* Address of symbol "O" */
15#define PFOW_QUERY_STRING_O 0x0004
16/* Address of symbol "W" */
17#define PFOW_QUERY_STRING_W 0x0006
18/* Identification info for LPDDR chip */
19#define PFOW_MANUFACTURER_ID 0x0020
20#define PFOW_DEVICE_ID 0x0022
21/* Address in PFOW where prog buffer can can be found */
22#define PFOW_PROGRAM_BUFFER_OFFSET 0x0040
23/* Size of program buffer in words */
24#define PFOW_PROGRAM_BUFFER_SIZE 0x0042
25/* Address command code register */
26#define PFOW_COMMAND_CODE 0x0080
27/* command data register */
28#define PFOW_COMMAND_DATA 0x0084
29/* command address register lower address bits */
30#define PFOW_COMMAND_ADDRESS_L 0x0088
31/* command address register upper address bits */
32#define PFOW_COMMAND_ADDRESS_H 0x008a
33/* number of bytes to be proggrammed lower address bits */
34#define PFOW_DATA_COUNT_L 0x0090
35/* number of bytes to be proggrammed higher address bits */
36#define PFOW_DATA_COUNT_H 0x0092
37/* command execution register, the only possible value is 0x01 */
38#define PFOW_COMMAND_EXECUTE 0x00c0
39/* 0x01 should be written at this address to clear buffer */
40#define PFOW_CLEAR_PROGRAM_BUFFER 0x00c4
41/* device program/erase suspend register */
42#define PFOW_PROGRAM_ERASE_SUSPEND 0x00c8
43/* device status register */
44#define PFOW_DSR 0x00cc
45
46/* LPDDR memory device command codes */
47/* They are possible values of PFOW command code register */
48#define LPDDR_WORD_PROGRAM 0x0041
49#define LPDDR_BUFF_PROGRAM 0x00E9
50#define LPDDR_BLOCK_ERASE 0x0020
51#define LPDDR_LOCK_BLOCK 0x0061
52#define LPDDR_UNLOCK_BLOCK 0x0062
53#define LPDDR_READ_BLOCK_LOCK_STATUS 0x0065
54#define LPDDR_INFO_QUERY 0x0098
55#define LPDDR_READ_OTP 0x0097
56#define LPDDR_PROG_OTP 0x00C0
57#define LPDDR_RESUME 0x00D0
58
59/* Defines possible value of PFOW command execution register */
60#define LPDDR_START_EXECUTION 0x0001
61
62/* Defines possible value of PFOW program/erase suspend register */
63#define LPDDR_SUSPEND 0x0001
64
65/* Possible values of PFOW device status register */
66/* access R - read; RC read & clearable */
67#define DSR_DPS (1<<1) /* RC; device protect status
68 * 0 - not protected 1 - locked */
69#define DSR_PSS (1<<2) /* R; program suspend status;
70 * 0-prog in progress/completed,
71 * 1- prog suspended */
72#define DSR_VPPS (1<<3) /* RC; 0-Vpp OK, * 1-Vpp low */
73#define DSR_PROGRAM_STATUS (1<<4) /* RC; 0-successful, 1-error */
74#define DSR_ERASE_STATUS (1<<5) /* RC; erase or blank check status;
75 * 0-success erase/blank check,
76 * 1 blank check error */
77#define DSR_ESS (1<<6) /* R; erase suspend status;
78 * 0-erase in progress/complete,
79 * 1 erase suspended */
80#define DSR_READY_STATUS (1<<7) /* R; Device status
81 * 0-busy,
82 * 1-ready */
83#define DSR_RPS (0x3<<8) /* RC; region program status
84 * 00 - Success,
85 * 01-re-program attempt in region with
86 * object mode data,
87 * 10-object mode program w attempt in
88 * region with control mode data
89 * 11-attempt to program invalid half
90 * with 0x41 command */
91#define DSR_AOS (1<<12) /* RC; 1- AO related failure */
92#define DSR_AVAILABLE (1<<15) /* R; Device availbility
93 * 1 - Device available
94 * 0 - not available */
95
96/* The superset of all possible error bits in DSR */
97#define DSR_ERR 0x133A
98
99static inline void send_pfow_command(struct map_info *map,
100 unsigned long cmd_code, unsigned long adr,
101 unsigned long len, map_word *datum)
102{
103 int bits_per_chip = map_bankwidth(map) * 8;
104 int chipnum;
105 struct lpddr_private *lpddr = map->fldrv_priv;
106 chipnum = adr >> lpddr->chipshift;
107
108 map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE);
109 map_write(map, CMD(adr & ((1<<bits_per_chip) - 1)),
110 map->pfow_base + PFOW_COMMAND_ADDRESS_L);
111 map_write(map, CMD(adr>>bits_per_chip),
112 map->pfow_base + PFOW_COMMAND_ADDRESS_H);
113 if (len) {
114 map_write(map, CMD(len & ((1<<bits_per_chip) - 1)),
115 map->pfow_base + PFOW_DATA_COUNT_L);
116 map_write(map, CMD(len>>bits_per_chip),
117 map->pfow_base + PFOW_DATA_COUNT_H);
118 }
119 if (datum)
120 map_write(map, *datum, map->pfow_base + PFOW_COMMAND_DATA);
121
122 /* Command execution start */
123 map_write(map, CMD(LPDDR_START_EXECUTION),
124 map->pfow_base + PFOW_COMMAND_EXECUTE);
125}
126
127static inline void print_drs_error(unsigned dsr)
128{
129 int prog_status = (dsr & DSR_RPS) >> 8;
130
131 if (!(dsr & DSR_AVAILABLE))
132 printk(KERN_NOTICE"DSR.15: (0) Device not Available\n");
133 if (prog_status & 0x03)
134 printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid "
135 "half with 41h command\n");
136 else if (prog_status & 0x02)
137 printk(KERN_NOTICE"DSR.9,8: (10) Object Mode Program attempt "
138 "in region with Control Mode data\n");
139 else if (prog_status & 0x01)
140 printk(KERN_NOTICE"DSR.9,8: (01) Program attempt in region "
141 "with Object Mode data\n");
142 if (!(dsr & DSR_READY_STATUS))
143 printk(KERN_NOTICE"DSR.7: (0) Device is Busy\n");
144 if (dsr & DSR_ESS)
145 printk(KERN_NOTICE"DSR.6: (1) Erase Suspended\n");
146 if (dsr & DSR_ERASE_STATUS)
147 printk(KERN_NOTICE"DSR.5: (1) Erase/Blank check error\n");
148 if (dsr & DSR_PROGRAM_STATUS)
149 printk(KERN_NOTICE"DSR.4: (1) Program Error\n");
150 if (dsr & DSR_VPPS)
151 printk(KERN_NOTICE"DSR.3: (1) Vpp low detect, operation "
152 "aborted\n");
153 if (dsr & DSR_PSS)
154 printk(KERN_NOTICE"DSR.2: (1) Program suspended\n");
155 if (dsr & DSR_DPS)
156 printk(KERN_NOTICE"DSR.1: (1) Aborted Erase/Program attempt "
157 "on locked block\n");
158}
159#endif /* __LINUX_MTD_PFOW_H */
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index c8e63a5ee72e..76f7cabf07d3 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -24,6 +24,7 @@ struct physmap_flash_data {
24 unsigned int width; 24 unsigned int width;
25 void (*set_vpp)(struct map_info *, int); 25 void (*set_vpp)(struct map_info *, int);
26 unsigned int nr_parts; 26 unsigned int nr_parts;
27 unsigned int pfow_base;
27 struct mtd_partition *parts; 28 struct mtd_partition *parts;
28}; 29};
29 30
diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h
new file mode 100644
index 000000000000..7b3d487d8b3f
--- /dev/null
+++ b/include/linux/mtd/qinfo.h
@@ -0,0 +1,91 @@
1#ifndef __LINUX_MTD_QINFO_H
2#define __LINUX_MTD_QINFO_H
3
4#include <linux/mtd/map.h>
5#include <linux/wait.h>
6#include <linux/spinlock.h>
7#include <linux/delay.h>
8#include <linux/mtd/mtd.h>
9#include <linux/mtd/flashchip.h>
10#include <linux/mtd/partitions.h>
11
12/* lpddr_private describes lpddr flash chip in memory map
13 * @ManufactId - Chip Manufacture ID
14 * @DevId - Chip Device ID
15 * @qinfo - pointer to qinfo records describing the chip
16 * @numchips - number of chips including virual RWW partitions
17 * @chipshift - Chip/partiton size 2^chipshift
18 * @chips - per-chip data structure
19 */
20struct lpddr_private {
21 uint16_t ManufactId;
22 uint16_t DevId;
23 struct qinfo_chip *qinfo;
24 int numchips;
25 unsigned long chipshift;
26 struct flchip chips[0];
27};
28
29/* qinfo_query_info structure contains request information for
30 * each qinfo record
31 * @major - major number of qinfo record
32 * @major - minor number of qinfo record
33 * @id_str - descriptive string to access the record
34 * @desc - detailed description for the qinfo record
35 */
36struct qinfo_query_info {
37 uint8_t major;
38 uint8_t minor;
39 char *id_str;
40 char *desc;
41};
42
43/*
44 * qinfo_chip structure contains necessary qinfo records data
45 * @DevSizeShift - Device size 2^n bytes
46 * @BufSizeShift - Program buffer size 2^n bytes
47 * @TotalBlocksNum - Total number of blocks
48 * @UniformBlockSizeShift - Uniform block size 2^UniformBlockSizeShift bytes
49 * @HWPartsNum - Number of hardware partitions
50 * @SuspEraseSupp - Suspend erase supported
51 * @SingleWordProgTime - Single word program 2^SingleWordProgTime u-sec
52 * @ProgBufferTime - Program buffer write 2^ProgBufferTime u-sec
53 * @BlockEraseTime - Block erase 2^BlockEraseTime m-sec
54 */
55struct qinfo_chip {
56 /* General device info */
57 uint16_t DevSizeShift;
58 uint16_t BufSizeShift;
59 /* Erase block information */
60 uint16_t TotalBlocksNum;
61 uint16_t UniformBlockSizeShift;
62 /* Partition information */
63 uint16_t HWPartsNum;
64 /* Optional features */
65 uint16_t SuspEraseSupp;
66 /* Operation typical time */
67 uint16_t SingleWordProgTime;
68 uint16_t ProgBufferTime;
69 uint16_t BlockEraseTime;
70};
71
72/* defines for fixup usage */
73#define LPDDR_MFR_ANY 0xffff
74#define LPDDR_ID_ANY 0xffff
75#define NUMONYX_MFGR_ID 0x0089
76#define R18_DEVICE_ID_1G 0x893c
77
78static inline map_word lpddr_build_cmd(u_long cmd, struct map_info *map)
79{
80 map_word val = { {0} };
81 val.x[0] = cmd;
82 return val;
83}
84
85#define CMD(x) lpddr_build_cmd(x, map)
86#define CMDVAL(cmd) cmd.x[0]
87
88struct mtd_info *lpddr_cmdset(struct map_info *);
89
90#endif
91
diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h
new file mode 100644
index 000000000000..25f4d2a845c1
--- /dev/null
+++ b/include/linux/mtd/sharpsl.h
@@ -0,0 +1,20 @@
1/*
2 * SharpSL NAND support
3 *
4 * Copyright (C) 2008 Dmitry Baryshkov
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/mtd/nand.h>
12#include <linux/mtd/nand_ecc.h>
13#include <linux/mtd/partitions.h>
14
15struct sharpsl_nand_platform_data {
16 struct nand_bbt_descr *badblock_pattern;
17 struct nand_ecclayout *ecc_layout;
18 struct mtd_partition *partitions;
19 unsigned int nr_partitions;
20};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 114091be8872..f24556813375 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1125,9 +1125,6 @@ struct softnet_data
1125 struct sk_buff *completion_queue; 1125 struct sk_buff *completion_queue;
1126 1126
1127 struct napi_struct backlog; 1127 struct napi_struct backlog;
1128#ifdef CONFIG_NET_DMA
1129 struct dma_chan *net_dma;
1130#endif
1131}; 1128};
1132 1129
1133DECLARE_PER_CPU(struct softnet_data,softnet_data); 1130DECLARE_PER_CPU(struct softnet_data,softnet_data);
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 1ce9fe572e51..1d9518bc4c58 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -164,4 +164,22 @@ void oprofile_put_buff(unsigned long *buf, unsigned int start,
164unsigned long oprofile_get_cpu_buffer_size(void); 164unsigned long oprofile_get_cpu_buffer_size(void);
165void oprofile_cpu_buffer_inc_smpl_lost(void); 165void oprofile_cpu_buffer_inc_smpl_lost(void);
166 166
167/* cpu buffer functions */
168
169struct op_sample;
170
171struct op_entry {
172 struct ring_buffer_event *event;
173 struct op_sample *sample;
174 unsigned long irq_flags;
175 unsigned long size;
176 unsigned long *data;
177};
178
179void oprofile_write_reserve(struct op_entry *entry,
180 struct pt_regs * const regs,
181 unsigned long pc, int code, int size);
182int oprofile_add_data(struct op_entry *entry, unsigned long val);
183int oprofile_write_commit(struct op_entry *entry);
184
167#endif /* OPROFILE_H */ 185#endif /* OPROFILE_H */
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index f7cc204fab07..20998746518e 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -223,7 +223,6 @@ struct hotplug_params {
223#ifdef CONFIG_ACPI 223#ifdef CONFIG_ACPI
224#include <acpi/acpi.h> 224#include <acpi/acpi.h>
225#include <acpi/acpi_bus.h> 225#include <acpi/acpi_bus.h>
226#include <acpi/actypes.h>
227extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, 226extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
228 struct hotplug_params *hpp); 227 struct hotplug_params *hpp);
229int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); 228int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
diff --git a/include/linux/spi/tdo24m.h b/include/linux/spi/tdo24m.h
new file mode 100644
index 000000000000..7572d4e1fe76
--- /dev/null
+++ b/include/linux/spi/tdo24m.h
@@ -0,0 +1,13 @@
1#ifndef __TDO24M_H__
2#define __TDO24M_H__
3
4enum tdo24m_model {
5 TDO24M,
6 TDO35S,
7};
8
9struct tdo24m_platform_data {
10 enum tdo24m_model model;
11};
12
13#endif /* __TDO24M_H__ */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 2ce8207686e2..2b409c44db83 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -232,6 +232,11 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
232 232
233extern void hibernation_set_ops(struct platform_hibernation_ops *ops); 233extern void hibernation_set_ops(struct platform_hibernation_ops *ops);
234extern int hibernate(void); 234extern int hibernate(void);
235extern int hibernate_nvs_register(unsigned long start, unsigned long size);
236extern int hibernate_nvs_alloc(void);
237extern void hibernate_nvs_free(void);
238extern void hibernate_nvs_save(void);
239extern void hibernate_nvs_restore(void);
235#else /* CONFIG_HIBERNATION */ 240#else /* CONFIG_HIBERNATION */
236static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } 241static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
237static inline void swsusp_set_page_free(struct page *p) {} 242static inline void swsusp_set_page_free(struct page *p) {}
@@ -239,6 +244,14 @@ static inline void swsusp_unset_page_free(struct page *p) {}
239 244
240static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {} 245static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {}
241static inline int hibernate(void) { return -ENOSYS; } 246static inline int hibernate(void) { return -ENOSYS; }
247static inline int hibernate_nvs_register(unsigned long a, unsigned long b)
248{
249 return 0;
250}
251static inline int hibernate_nvs_alloc(void) { return 0; }
252static inline void hibernate_nvs_free(void) {}
253static inline void hibernate_nvs_save(void) {}
254static inline void hibernate_nvs_restore(void) {}
242#endif /* CONFIG_HIBERNATION */ 255#endif /* CONFIG_HIBERNATION */
243 256
244#ifdef CONFIG_PM_SLEEP 257#ifdef CONFIG_PM_SLEEP
diff --git a/include/net/netdma.h b/include/net/netdma.h
index f28c6e064e8f..8ba8ce284eeb 100644
--- a/include/net/netdma.h
+++ b/include/net/netdma.h
@@ -24,17 +24,6 @@
24#include <linux/dmaengine.h> 24#include <linux/dmaengine.h>
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26 26
27static inline struct dma_chan *get_softnet_dma(void)
28{
29 struct dma_chan *chan;
30 rcu_read_lock();
31 chan = rcu_dereference(__get_cpu_var(softnet_data).net_dma);
32 if (chan)
33 dma_chan_get(chan);
34 rcu_read_unlock();
35 return chan;
36}
37
38int dma_skb_copy_datagram_iovec(struct dma_chan* chan, 27int dma_skb_copy_datagram_iovec(struct dma_chan* chan,
39 struct sk_buff *skb, int offset, struct iovec *to, 28 struct sk_buff *skb, int offset, struct iovec *to,
40 size_t len, struct dma_pinned_list *pinned_list); 29 size_t len, struct dma_pinned_list *pinned_list);
diff --git a/kernel/cred.c b/kernel/cred.c
index ff7bc071991c..043f78c133c4 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -506,6 +506,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
506 else 506 else
507 old = get_cred(&init_cred); 507 old = get_cred(&init_cred);
508 508
509 *new = *old;
509 get_uid(new->user); 510 get_uid(new->user);
510 get_group_info(new->group_info); 511 get_group_info(new->group_info);
511 512
@@ -529,6 +530,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
529 530
530error: 531error:
531 put_cred(new); 532 put_cred(new);
533 put_cred(old);
532 return NULL; 534 return NULL;
533} 535}
534EXPORT_SYMBOL(prepare_kernel_cred); 536EXPORT_SYMBOL(prepare_kernel_cred);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f77d3819ef57..45e8541ab7e3 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -258,12 +258,12 @@ int hibernation_snapshot(int platform_mode)
258{ 258{
259 int error; 259 int error;
260 260
261 /* Free memory before shutting down devices. */ 261 error = platform_begin(platform_mode);
262 error = swsusp_shrink_memory();
263 if (error) 262 if (error)
264 return error; 263 return error;
265 264
266 error = platform_begin(platform_mode); 265 /* Free memory before shutting down devices. */
266 error = swsusp_shrink_memory();
267 if (error) 267 if (error)
268 goto Close; 268 goto Close;
269 269
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5d2ab836e998..f5fc2d7680f2 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -25,6 +25,7 @@
25#include <linux/syscalls.h> 25#include <linux/syscalls.h>
26#include <linux/console.h> 26#include <linux/console.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/list.h>
28 29
29#include <asm/uaccess.h> 30#include <asm/uaccess.h>
30#include <asm/mmu_context.h> 31#include <asm/mmu_context.h>
@@ -192,12 +193,6 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
192 return ret; 193 return ret;
193} 194}
194 195
195static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
196{
197 free_list_of_pages(ca->chain, clear_page_nosave);
198 memset(ca, 0, sizeof(struct chain_allocator));
199}
200
201/** 196/**
202 * Data types related to memory bitmaps. 197 * Data types related to memory bitmaps.
203 * 198 *
@@ -233,7 +228,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
233#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) 228#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
234 229
235struct bm_block { 230struct bm_block {
236 struct bm_block *next; /* next element of the list */ 231 struct list_head hook; /* hook into a list of bitmap blocks */
237 unsigned long start_pfn; /* pfn represented by the first bit */ 232 unsigned long start_pfn; /* pfn represented by the first bit */
238 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ 233 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
239 unsigned long *data; /* bitmap representing pages */ 234 unsigned long *data; /* bitmap representing pages */
@@ -244,24 +239,15 @@ static inline unsigned long bm_block_bits(struct bm_block *bb)
244 return bb->end_pfn - bb->start_pfn; 239 return bb->end_pfn - bb->start_pfn;
245} 240}
246 241
247struct zone_bitmap {
248 struct zone_bitmap *next; /* next element of the list */
249 unsigned long start_pfn; /* minimal pfn in this zone */
250 unsigned long end_pfn; /* maximal pfn in this zone plus 1 */
251 struct bm_block *bm_blocks; /* list of bitmap blocks */
252 struct bm_block *cur_block; /* recently used bitmap block */
253};
254
255/* strcut bm_position is used for browsing memory bitmaps */ 242/* strcut bm_position is used for browsing memory bitmaps */
256 243
257struct bm_position { 244struct bm_position {
258 struct zone_bitmap *zone_bm;
259 struct bm_block *block; 245 struct bm_block *block;
260 int bit; 246 int bit;
261}; 247};
262 248
263struct memory_bitmap { 249struct memory_bitmap {
264 struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */ 250 struct list_head blocks; /* list of bitmap blocks */
265 struct linked_page *p_list; /* list of pages used to store zone 251 struct linked_page *p_list; /* list of pages used to store zone
266 * bitmap objects and bitmap block 252 * bitmap objects and bitmap block
267 * objects 253 * objects
@@ -273,11 +259,7 @@ struct memory_bitmap {
273 259
274static void memory_bm_position_reset(struct memory_bitmap *bm) 260static void memory_bm_position_reset(struct memory_bitmap *bm)
275{ 261{
276 struct zone_bitmap *zone_bm; 262 bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
277
278 zone_bm = bm->zone_bm_list;
279 bm->cur.zone_bm = zone_bm;
280 bm->cur.block = zone_bm->bm_blocks;
281 bm->cur.bit = 0; 263 bm->cur.bit = 0;
282} 264}
283 265
@@ -285,151 +267,184 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
285 267
286/** 268/**
287 * create_bm_block_list - create a list of block bitmap objects 269 * create_bm_block_list - create a list of block bitmap objects
270 * @nr_blocks - number of blocks to allocate
271 * @list - list to put the allocated blocks into
272 * @ca - chain allocator to be used for allocating memory
288 */ 273 */
289 274static int create_bm_block_list(unsigned long pages,
290static inline struct bm_block * 275 struct list_head *list,
291create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca) 276 struct chain_allocator *ca)
292{ 277{
293 struct bm_block *bblist = NULL; 278 unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
294 279
295 while (nr_blocks-- > 0) { 280 while (nr_blocks-- > 0) {
296 struct bm_block *bb; 281 struct bm_block *bb;
297 282
298 bb = chain_alloc(ca, sizeof(struct bm_block)); 283 bb = chain_alloc(ca, sizeof(struct bm_block));
299 if (!bb) 284 if (!bb)
300 return NULL; 285 return -ENOMEM;
301 286 list_add(&bb->hook, list);
302 bb->next = bblist;
303 bblist = bb;
304 } 287 }
305 return bblist; 288
289 return 0;
306} 290}
307 291
292struct mem_extent {
293 struct list_head hook;
294 unsigned long start;
295 unsigned long end;
296};
297
308/** 298/**
309 * create_zone_bm_list - create a list of zone bitmap objects 299 * free_mem_extents - free a list of memory extents
300 * @list - list of extents to empty
310 */ 301 */
302static void free_mem_extents(struct list_head *list)
303{
304 struct mem_extent *ext, *aux;
311 305
312static inline struct zone_bitmap * 306 list_for_each_entry_safe(ext, aux, list, hook) {
313create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca) 307 list_del(&ext->hook);
308 kfree(ext);
309 }
310}
311
312/**
313 * create_mem_extents - create a list of memory extents representing
314 * contiguous ranges of PFNs
315 * @list - list to put the extents into
316 * @gfp_mask - mask to use for memory allocations
317 */
318static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
314{ 319{
315 struct zone_bitmap *zbmlist = NULL; 320 struct zone *zone;
316 321
317 while (nr_zones-- > 0) { 322 INIT_LIST_HEAD(list);
318 struct zone_bitmap *zbm;
319 323
320 zbm = chain_alloc(ca, sizeof(struct zone_bitmap)); 324 for_each_zone(zone) {
321 if (!zbm) 325 unsigned long zone_start, zone_end;
322 return NULL; 326 struct mem_extent *ext, *cur, *aux;
327
328 if (!populated_zone(zone))
329 continue;
323 330
324 zbm->next = zbmlist; 331 zone_start = zone->zone_start_pfn;
325 zbmlist = zbm; 332 zone_end = zone->zone_start_pfn + zone->spanned_pages;
333
334 list_for_each_entry(ext, list, hook)
335 if (zone_start <= ext->end)
336 break;
337
338 if (&ext->hook == list || zone_end < ext->start) {
339 /* New extent is necessary */
340 struct mem_extent *new_ext;
341
342 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
343 if (!new_ext) {
344 free_mem_extents(list);
345 return -ENOMEM;
346 }
347 new_ext->start = zone_start;
348 new_ext->end = zone_end;
349 list_add_tail(&new_ext->hook, &ext->hook);
350 continue;
351 }
352
353 /* Merge this zone's range of PFNs with the existing one */
354 if (zone_start < ext->start)
355 ext->start = zone_start;
356 if (zone_end > ext->end)
357 ext->end = zone_end;
358
359 /* More merging may be possible */
360 cur = ext;
361 list_for_each_entry_safe_continue(cur, aux, list, hook) {
362 if (zone_end < cur->start)
363 break;
364 if (zone_end < cur->end)
365 ext->end = cur->end;
366 list_del(&cur->hook);
367 kfree(cur);
368 }
326 } 369 }
327 return zbmlist; 370
371 return 0;
328} 372}
329 373
330/** 374/**
331 * memory_bm_create - allocate memory for a memory bitmap 375 * memory_bm_create - allocate memory for a memory bitmap
332 */ 376 */
333
334static int 377static int
335memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) 378memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
336{ 379{
337 struct chain_allocator ca; 380 struct chain_allocator ca;
338 struct zone *zone; 381 struct list_head mem_extents;
339 struct zone_bitmap *zone_bm; 382 struct mem_extent *ext;
340 struct bm_block *bb; 383 int error;
341 unsigned int nr;
342 384
343 chain_init(&ca, gfp_mask, safe_needed); 385 chain_init(&ca, gfp_mask, safe_needed);
386 INIT_LIST_HEAD(&bm->blocks);
344 387
345 /* Compute the number of zones */ 388 error = create_mem_extents(&mem_extents, gfp_mask);
346 nr = 0; 389 if (error)
347 for_each_zone(zone) 390 return error;
348 if (populated_zone(zone))
349 nr++;
350
351 /* Allocate the list of zones bitmap objects */
352 zone_bm = create_zone_bm_list(nr, &ca);
353 bm->zone_bm_list = zone_bm;
354 if (!zone_bm) {
355 chain_free(&ca, PG_UNSAFE_CLEAR);
356 return -ENOMEM;
357 }
358
359 /* Initialize the zone bitmap objects */
360 for_each_zone(zone) {
361 unsigned long pfn;
362 391
363 if (!populated_zone(zone)) 392 list_for_each_entry(ext, &mem_extents, hook) {
364 continue; 393 struct bm_block *bb;
394 unsigned long pfn = ext->start;
395 unsigned long pages = ext->end - ext->start;
365 396
366 zone_bm->start_pfn = zone->zone_start_pfn; 397 bb = list_entry(bm->blocks.prev, struct bm_block, hook);
367 zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
368 /* Allocate the list of bitmap block objects */
369 nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
370 bb = create_bm_block_list(nr, &ca);
371 zone_bm->bm_blocks = bb;
372 zone_bm->cur_block = bb;
373 if (!bb)
374 goto Free;
375 398
376 nr = zone->spanned_pages; 399 error = create_bm_block_list(pages, bm->blocks.prev, &ca);
377 pfn = zone->zone_start_pfn; 400 if (error)
378 /* Initialize the bitmap block objects */ 401 goto Error;
379 while (bb) {
380 unsigned long *ptr;
381 402
382 ptr = get_image_page(gfp_mask, safe_needed); 403 list_for_each_entry_continue(bb, &bm->blocks, hook) {
383 bb->data = ptr; 404 bb->data = get_image_page(gfp_mask, safe_needed);
384 if (!ptr) 405 if (!bb->data) {
385 goto Free; 406 error = -ENOMEM;
407 goto Error;
408 }
386 409
387 bb->start_pfn = pfn; 410 bb->start_pfn = pfn;
388 if (nr >= BM_BITS_PER_BLOCK) { 411 if (pages >= BM_BITS_PER_BLOCK) {
389 pfn += BM_BITS_PER_BLOCK; 412 pfn += BM_BITS_PER_BLOCK;
390 nr -= BM_BITS_PER_BLOCK; 413 pages -= BM_BITS_PER_BLOCK;
391 } else { 414 } else {
392 /* This is executed only once in the loop */ 415 /* This is executed only once in the loop */
393 pfn += nr; 416 pfn += pages;
394 } 417 }
395 bb->end_pfn = pfn; 418 bb->end_pfn = pfn;
396 bb = bb->next;
397 } 419 }
398 zone_bm = zone_bm->next;
399 } 420 }
421
400 bm->p_list = ca.chain; 422 bm->p_list = ca.chain;
401 memory_bm_position_reset(bm); 423 memory_bm_position_reset(bm);
402 return 0; 424 Exit:
425 free_mem_extents(&mem_extents);
426 return error;
403 427
404 Free: 428 Error:
405 bm->p_list = ca.chain; 429 bm->p_list = ca.chain;
406 memory_bm_free(bm, PG_UNSAFE_CLEAR); 430 memory_bm_free(bm, PG_UNSAFE_CLEAR);
407 return -ENOMEM; 431 goto Exit;
408} 432}
409 433
410/** 434/**
411 * memory_bm_free - free memory occupied by the memory bitmap @bm 435 * memory_bm_free - free memory occupied by the memory bitmap @bm
412 */ 436 */
413
414static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 437static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
415{ 438{
416 struct zone_bitmap *zone_bm; 439 struct bm_block *bb;
417 440
418 /* Free the list of bit blocks for each zone_bitmap object */ 441 list_for_each_entry(bb, &bm->blocks, hook)
419 zone_bm = bm->zone_bm_list; 442 if (bb->data)
420 while (zone_bm) { 443 free_image_page(bb->data, clear_nosave_free);
421 struct bm_block *bb;
422 444
423 bb = zone_bm->bm_blocks;
424 while (bb) {
425 if (bb->data)
426 free_image_page(bb->data, clear_nosave_free);
427 bb = bb->next;
428 }
429 zone_bm = zone_bm->next;
430 }
431 free_list_of_pages(bm->p_list, clear_nosave_free); 445 free_list_of_pages(bm->p_list, clear_nosave_free);
432 bm->zone_bm_list = NULL; 446
447 INIT_LIST_HEAD(&bm->blocks);
433} 448}
434 449
435/** 450/**
@@ -437,38 +452,33 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
437 * to given pfn. The cur_zone_bm member of @bm and the cur_block member 452 * to given pfn. The cur_zone_bm member of @bm and the cur_block member
438 * of @bm->cur_zone_bm are updated. 453 * of @bm->cur_zone_bm are updated.
439 */ 454 */
440
441static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 455static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
442 void **addr, unsigned int *bit_nr) 456 void **addr, unsigned int *bit_nr)
443{ 457{
444 struct zone_bitmap *zone_bm;
445 struct bm_block *bb; 458 struct bm_block *bb;
446 459
447 /* Check if the pfn is from the current zone */ 460 /*
448 zone_bm = bm->cur.zone_bm; 461 * Check if the pfn corresponds to the current bitmap block and find
449 if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { 462 * the block where it fits if this is not the case.
450 zone_bm = bm->zone_bm_list; 463 */
451 /* We don't assume that the zones are sorted by pfns */ 464 bb = bm->cur.block;
452 while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
453 zone_bm = zone_bm->next;
454
455 if (!zone_bm)
456 return -EFAULT;
457 }
458 bm->cur.zone_bm = zone_bm;
459 }
460 /* Check if the pfn corresponds to the current bitmap block */
461 bb = zone_bm->cur_block;
462 if (pfn < bb->start_pfn) 465 if (pfn < bb->start_pfn)
463 bb = zone_bm->bm_blocks; 466 list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
467 if (pfn >= bb->start_pfn)
468 break;
464 469
465 while (pfn >= bb->end_pfn) { 470 if (pfn >= bb->end_pfn)
466 bb = bb->next; 471 list_for_each_entry_continue(bb, &bm->blocks, hook)
472 if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
473 break;
467 474
468 BUG_ON(!bb); 475 if (&bb->hook == &bm->blocks)
469 } 476 return -EFAULT;
470 zone_bm->cur_block = bb; 477
478 /* The block has been found */
479 bm->cur.block = bb;
471 pfn -= bb->start_pfn; 480 pfn -= bb->start_pfn;
481 bm->cur.bit = pfn + 1;
472 *bit_nr = pfn; 482 *bit_nr = pfn;
473 *addr = bb->data; 483 *addr = bb->data;
474 return 0; 484 return 0;
@@ -519,6 +529,14 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
519 return test_bit(bit, addr); 529 return test_bit(bit, addr);
520} 530}
521 531
532static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
533{
534 void *addr;
535 unsigned int bit;
536
537 return !memory_bm_find_bit(bm, pfn, &addr, &bit);
538}
539
522/** 540/**
523 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit 541 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit
524 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is 542 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
@@ -530,29 +548,21 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
530 548
531static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 549static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
532{ 550{
533 struct zone_bitmap *zone_bm;
534 struct bm_block *bb; 551 struct bm_block *bb;
535 int bit; 552 int bit;
536 553
554 bb = bm->cur.block;
537 do { 555 do {
538 bb = bm->cur.block; 556 bit = bm->cur.bit;
539 do { 557 bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
540 bit = bm->cur.bit; 558 if (bit < bm_block_bits(bb))
541 bit = find_next_bit(bb->data, bm_block_bits(bb), bit); 559 goto Return_pfn;
542 if (bit < bm_block_bits(bb)) 560
543 goto Return_pfn; 561 bb = list_entry(bb->hook.next, struct bm_block, hook);
544 562 bm->cur.block = bb;
545 bb = bb->next; 563 bm->cur.bit = 0;
546 bm->cur.block = bb; 564 } while (&bb->hook != &bm->blocks);
547 bm->cur.bit = 0; 565
548 } while (bb);
549 zone_bm = bm->cur.zone_bm->next;
550 if (zone_bm) {
551 bm->cur.zone_bm = zone_bm;
552 bm->cur.block = zone_bm->bm_blocks;
553 bm->cur.bit = 0;
554 }
555 } while (zone_bm);
556 memory_bm_position_reset(bm); 566 memory_bm_position_reset(bm);
557 return BM_END_OF_MAP; 567 return BM_END_OF_MAP;
558 568
@@ -808,8 +818,7 @@ static unsigned int count_free_highmem_pages(void)
808 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 818 * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
809 * and it isn't a part of a free chunk of pages. 819 * and it isn't a part of a free chunk of pages.
810 */ 820 */
811 821static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
812static struct page *saveable_highmem_page(unsigned long pfn)
813{ 822{
814 struct page *page; 823 struct page *page;
815 824
@@ -817,6 +826,8 @@ static struct page *saveable_highmem_page(unsigned long pfn)
817 return NULL; 826 return NULL;
818 827
819 page = pfn_to_page(pfn); 828 page = pfn_to_page(pfn);
829 if (page_zone(page) != zone)
830 return NULL;
820 831
821 BUG_ON(!PageHighMem(page)); 832 BUG_ON(!PageHighMem(page));
822 833
@@ -846,13 +857,16 @@ unsigned int count_highmem_pages(void)
846 mark_free_pages(zone); 857 mark_free_pages(zone);
847 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 858 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
848 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 859 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
849 if (saveable_highmem_page(pfn)) 860 if (saveable_highmem_page(zone, pfn))
850 n++; 861 n++;
851 } 862 }
852 return n; 863 return n;
853} 864}
854#else 865#else
855static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } 866static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
867{
868 return NULL;
869}
856#endif /* CONFIG_HIGHMEM */ 870#endif /* CONFIG_HIGHMEM */
857 871
858/** 872/**
@@ -863,8 +877,7 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
863 * of pages statically defined as 'unsaveable', and it isn't a part of 877 * of pages statically defined as 'unsaveable', and it isn't a part of
864 * a free chunk of pages. 878 * a free chunk of pages.
865 */ 879 */
866 880static struct page *saveable_page(struct zone *zone, unsigned long pfn)
867static struct page *saveable_page(unsigned long pfn)
868{ 881{
869 struct page *page; 882 struct page *page;
870 883
@@ -872,6 +885,8 @@ static struct page *saveable_page(unsigned long pfn)
872 return NULL; 885 return NULL;
873 886
874 page = pfn_to_page(pfn); 887 page = pfn_to_page(pfn);
888 if (page_zone(page) != zone)
889 return NULL;
875 890
876 BUG_ON(PageHighMem(page)); 891 BUG_ON(PageHighMem(page));
877 892
@@ -903,7 +918,7 @@ unsigned int count_data_pages(void)
903 mark_free_pages(zone); 918 mark_free_pages(zone);
904 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 919 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
905 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 920 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
906 if(saveable_page(pfn)) 921 if (saveable_page(zone, pfn))
907 n++; 922 n++;
908 } 923 }
909 return n; 924 return n;
@@ -944,7 +959,7 @@ static inline struct page *
944page_is_saveable(struct zone *zone, unsigned long pfn) 959page_is_saveable(struct zone *zone, unsigned long pfn)
945{ 960{
946 return is_highmem(zone) ? 961 return is_highmem(zone) ?
947 saveable_highmem_page(pfn) : saveable_page(pfn); 962 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
948} 963}
949 964
950static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 965static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
@@ -966,7 +981,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
966 * data modified by kmap_atomic() 981 * data modified by kmap_atomic()
967 */ 982 */
968 safe_copy_page(buffer, s_page); 983 safe_copy_page(buffer, s_page);
969 dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); 984 dst = kmap_atomic(d_page, KM_USER0);
970 memcpy(dst, buffer, PAGE_SIZE); 985 memcpy(dst, buffer, PAGE_SIZE);
971 kunmap_atomic(dst, KM_USER0); 986 kunmap_atomic(dst, KM_USER0);
972 } else { 987 } else {
@@ -975,7 +990,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
975 } 990 }
976} 991}
977#else 992#else
978#define page_is_saveable(zone, pfn) saveable_page(pfn) 993#define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
979 994
980static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 995static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
981{ 996{
@@ -1459,9 +1474,7 @@ load_header(struct swsusp_info *info)
1459 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set 1474 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1460 * the corresponding bit in the memory bitmap @bm 1475 * the corresponding bit in the memory bitmap @bm
1461 */ 1476 */
1462 1477static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1463static inline void
1464unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1465{ 1478{
1466 int j; 1479 int j;
1467 1480
@@ -1469,8 +1482,13 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1469 if (unlikely(buf[j] == BM_END_OF_MAP)) 1482 if (unlikely(buf[j] == BM_END_OF_MAP))
1470 break; 1483 break;
1471 1484
1472 memory_bm_set_bit(bm, buf[j]); 1485 if (memory_bm_pfn_present(bm, buf[j]))
1486 memory_bm_set_bit(bm, buf[j]);
1487 else
1488 return -EFAULT;
1473 } 1489 }
1490
1491 return 0;
1474} 1492}
1475 1493
1476/* List of "safe" pages that may be used to store data loaded from the suspend 1494/* List of "safe" pages that may be used to store data loaded from the suspend
@@ -1608,7 +1626,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1608 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 1626 pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1609 if (!pbe) { 1627 if (!pbe) {
1610 swsusp_free(); 1628 swsusp_free();
1611 return NULL; 1629 return ERR_PTR(-ENOMEM);
1612 } 1630 }
1613 pbe->orig_page = page; 1631 pbe->orig_page = page;
1614 if (safe_highmem_pages > 0) { 1632 if (safe_highmem_pages > 0) {
@@ -1677,7 +1695,7 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1677static inline void * 1695static inline void *
1678get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 1696get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1679{ 1697{
1680 return NULL; 1698 return ERR_PTR(-EINVAL);
1681} 1699}
1682 1700
1683static inline void copy_last_highmem_page(void) {} 1701static inline void copy_last_highmem_page(void) {}
@@ -1788,8 +1806,13 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1788static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 1806static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1789{ 1807{
1790 struct pbe *pbe; 1808 struct pbe *pbe;
1791 struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); 1809 struct page *page;
1810 unsigned long pfn = memory_bm_next_pfn(bm);
1792 1811
1812 if (pfn == BM_END_OF_MAP)
1813 return ERR_PTR(-EFAULT);
1814
1815 page = pfn_to_page(pfn);
1793 if (PageHighMem(page)) 1816 if (PageHighMem(page))
1794 return get_highmem_page_buffer(page, ca); 1817 return get_highmem_page_buffer(page, ca);
1795 1818
@@ -1805,7 +1828,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1805 pbe = chain_alloc(ca, sizeof(struct pbe)); 1828 pbe = chain_alloc(ca, sizeof(struct pbe));
1806 if (!pbe) { 1829 if (!pbe) {
1807 swsusp_free(); 1830 swsusp_free();
1808 return NULL; 1831 return ERR_PTR(-ENOMEM);
1809 } 1832 }
1810 pbe->orig_address = page_address(page); 1833 pbe->orig_address = page_address(page);
1811 pbe->address = safe_pages_list; 1834 pbe->address = safe_pages_list;
@@ -1868,7 +1891,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1868 return error; 1891 return error;
1869 1892
1870 } else if (handle->prev <= nr_meta_pages) { 1893 } else if (handle->prev <= nr_meta_pages) {
1871 unpack_orig_pfns(buffer, &copy_bm); 1894 error = unpack_orig_pfns(buffer, &copy_bm);
1895 if (error)
1896 return error;
1897
1872 if (handle->prev == nr_meta_pages) { 1898 if (handle->prev == nr_meta_pages) {
1873 error = prepare_image(&orig_bm, &copy_bm); 1899 error = prepare_image(&orig_bm, &copy_bm);
1874 if (error) 1900 if (error)
@@ -1879,12 +1905,14 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1879 restore_pblist = NULL; 1905 restore_pblist = NULL;
1880 handle->buffer = get_buffer(&orig_bm, &ca); 1906 handle->buffer = get_buffer(&orig_bm, &ca);
1881 handle->sync_read = 0; 1907 handle->sync_read = 0;
1882 if (!handle->buffer) 1908 if (IS_ERR(handle->buffer))
1883 return -ENOMEM; 1909 return PTR_ERR(handle->buffer);
1884 } 1910 }
1885 } else { 1911 } else {
1886 copy_last_highmem_page(); 1912 copy_last_highmem_page();
1887 handle->buffer = get_buffer(&orig_bm, &ca); 1913 handle->buffer = get_buffer(&orig_bm, &ca);
1914 if (IS_ERR(handle->buffer))
1915 return PTR_ERR(handle->buffer);
1888 if (handle->buffer != buffer) 1916 if (handle->buffer != buffer)
1889 handle->sync_read = 0; 1917 handle->sync_read = 0;
1890 } 1918 }
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 023ff2a31d89..a92c91451559 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -262,3 +262,125 @@ int swsusp_shrink_memory(void)
262 262
263 return 0; 263 return 0;
264} 264}
265
266/*
267 * Platforms, like ACPI, may want us to save some memory used by them during
268 * hibernation and to restore the contents of this memory during the subsequent
269 * resume. The code below implements a mechanism allowing us to do that.
270 */
271
272struct nvs_page {
273 unsigned long phys_start;
274 unsigned int size;
275 void *kaddr;
276 void *data;
277 struct list_head node;
278};
279
280static LIST_HEAD(nvs_list);
281
282/**
283 * hibernate_nvs_register - register platform NVS memory region to save
284 * @start - physical address of the region
285 * @size - size of the region
286 *
287 * The NVS region need not be page-aligned (both ends) and we arrange
288 * things so that the data from page-aligned addresses in this region will
289 * be copied into separate RAM pages.
290 */
291int hibernate_nvs_register(unsigned long start, unsigned long size)
292{
293 struct nvs_page *entry, *next;
294
295 while (size > 0) {
296 unsigned int nr_bytes;
297
298 entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
299 if (!entry)
300 goto Error;
301
302 list_add_tail(&entry->node, &nvs_list);
303 entry->phys_start = start;
304 nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
305 entry->size = (size < nr_bytes) ? size : nr_bytes;
306
307 start += entry->size;
308 size -= entry->size;
309 }
310 return 0;
311
312 Error:
313 list_for_each_entry_safe(entry, next, &nvs_list, node) {
314 list_del(&entry->node);
315 kfree(entry);
316 }
317 return -ENOMEM;
318}
319
320/**
321 * hibernate_nvs_free - free data pages allocated for saving NVS regions
322 */
323void hibernate_nvs_free(void)
324{
325 struct nvs_page *entry;
326
327 list_for_each_entry(entry, &nvs_list, node)
328 if (entry->data) {
329 free_page((unsigned long)entry->data);
330 entry->data = NULL;
331 if (entry->kaddr) {
332 iounmap(entry->kaddr);
333 entry->kaddr = NULL;
334 }
335 }
336}
337
338/**
339 * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
340 */
341int hibernate_nvs_alloc(void)
342{
343 struct nvs_page *entry;
344
345 list_for_each_entry(entry, &nvs_list, node) {
346 entry->data = (void *)__get_free_page(GFP_KERNEL);
347 if (!entry->data) {
348 hibernate_nvs_free();
349 return -ENOMEM;
350 }
351 }
352 return 0;
353}
354
355/**
356 * hibernate_nvs_save - save NVS memory regions
357 */
358void hibernate_nvs_save(void)
359{
360 struct nvs_page *entry;
361
362 printk(KERN_INFO "PM: Saving platform NVS memory\n");
363
364 list_for_each_entry(entry, &nvs_list, node)
365 if (entry->data) {
366 entry->kaddr = ioremap(entry->phys_start, entry->size);
367 memcpy(entry->data, entry->kaddr, entry->size);
368 }
369}
370
371/**
372 * hibernate_nvs_restore - restore NVS memory regions
373 *
374 * This function is going to be called with interrupts disabled, so it
375 * cannot iounmap the virtual addresses used to access the NVS region.
376 */
377void hibernate_nvs_restore(void)
378{
379 struct nvs_page *entry;
380
381 printk(KERN_INFO "PM: Restoring platform NVS memory\n");
382
383 list_for_each_entry(entry, &nvs_list, node)
384 if (entry->data)
385 memcpy(entry->kaddr, entry->data, entry->size);
386}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a9d9760dc7b6..8b0daf0662ef 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -168,7 +168,13 @@ rb_event_length(struct ring_buffer_event *event)
168 */ 168 */
169unsigned ring_buffer_event_length(struct ring_buffer_event *event) 169unsigned ring_buffer_event_length(struct ring_buffer_event *event)
170{ 170{
171 return rb_event_length(event); 171 unsigned length = rb_event_length(event);
172 if (event->type != RINGBUF_TYPE_DATA)
173 return length;
174 length -= RB_EVNT_HDR_SIZE;
175 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
176 length -= sizeof(event->array[0]);
177 return length;
172} 178}
173EXPORT_SYMBOL_GPL(ring_buffer_event_length); 179EXPORT_SYMBOL_GPL(ring_buffer_event_length);
174 180
diff --git a/net/core/dev.c b/net/core/dev.c
index bab8bcedd62e..5f736f1ceeae 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -170,25 +170,6 @@ static DEFINE_SPINLOCK(ptype_lock);
170static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 170static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
171static struct list_head ptype_all __read_mostly; /* Taps */ 171static struct list_head ptype_all __read_mostly; /* Taps */
172 172
173#ifdef CONFIG_NET_DMA
174struct net_dma {
175 struct dma_client client;
176 spinlock_t lock;
177 cpumask_t channel_mask;
178 struct dma_chan **channels;
179};
180
181static enum dma_state_client
182netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
183 enum dma_state state);
184
185static struct net_dma net_dma = {
186 .client = {
187 .event_callback = netdev_dma_event,
188 },
189};
190#endif
191
192/* 173/*
193 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 174 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
194 * semaphore. 175 * semaphore.
@@ -2754,14 +2735,7 @@ out:
2754 * There may not be any more sk_buffs coming right now, so push 2735 * There may not be any more sk_buffs coming right now, so push
2755 * any pending DMA copies to hardware 2736 * any pending DMA copies to hardware
2756 */ 2737 */
2757 if (!cpus_empty(net_dma.channel_mask)) { 2738 dma_issue_pending_all();
2758 int chan_idx;
2759 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
2760 struct dma_chan *chan = net_dma.channels[chan_idx];
2761 if (chan)
2762 dma_async_memcpy_issue_pending(chan);
2763 }
2764 }
2765#endif 2739#endif
2766 2740
2767 return; 2741 return;
@@ -4952,122 +4926,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
4952 return NOTIFY_OK; 4926 return NOTIFY_OK;
4953} 4927}
4954 4928
4955#ifdef CONFIG_NET_DMA
4956/**
4957 * net_dma_rebalance - try to maintain one DMA channel per CPU
4958 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4959 *
4960 * This is called when the number of channels allocated to the net_dma client
4961 * changes. The net_dma client tries to have one DMA channel per CPU.
4962 */
4963
4964static void net_dma_rebalance(struct net_dma *net_dma)
4965{
4966 unsigned int cpu, i, n, chan_idx;
4967 struct dma_chan *chan;
4968
4969 if (cpus_empty(net_dma->channel_mask)) {
4970 for_each_online_cpu(cpu)
4971 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4972 return;
4973 }
4974
4975 i = 0;
4976 cpu = first_cpu(cpu_online_map);
4977
4978 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
4979 chan = net_dma->channels[chan_idx];
4980
4981 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4982 + (i < (num_online_cpus() %
4983 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4984
4985 while(n) {
4986 per_cpu(softnet_data, cpu).net_dma = chan;
4987 cpu = next_cpu(cpu, cpu_online_map);
4988 n--;
4989 }
4990 i++;
4991 }
4992}
4993
4994/**
4995 * netdev_dma_event - event callback for the net_dma_client
4996 * @client: should always be net_dma_client
4997 * @chan: DMA channel for the event
4998 * @state: DMA state to be handled
4999 */
5000static enum dma_state_client
5001netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
5002 enum dma_state state)
5003{
5004 int i, found = 0, pos = -1;
5005 struct net_dma *net_dma =
5006 container_of(client, struct net_dma, client);
5007 enum dma_state_client ack = DMA_DUP; /* default: take no action */
5008
5009 spin_lock(&net_dma->lock);
5010 switch (state) {
5011 case DMA_RESOURCE_AVAILABLE:
5012 for (i = 0; i < nr_cpu_ids; i++)
5013 if (net_dma->channels[i] == chan) {
5014 found = 1;
5015 break;
5016 } else if (net_dma->channels[i] == NULL && pos < 0)
5017 pos = i;
5018
5019 if (!found && pos >= 0) {
5020 ack = DMA_ACK;
5021 net_dma->channels[pos] = chan;
5022 cpu_set(pos, net_dma->channel_mask);
5023 net_dma_rebalance(net_dma);
5024 }
5025 break;
5026 case DMA_RESOURCE_REMOVED:
5027 for (i = 0; i < nr_cpu_ids; i++)
5028 if (net_dma->channels[i] == chan) {
5029 found = 1;
5030 pos = i;
5031 break;
5032 }
5033
5034 if (found) {
5035 ack = DMA_ACK;
5036 cpu_clear(pos, net_dma->channel_mask);
5037 net_dma->channels[i] = NULL;
5038 net_dma_rebalance(net_dma);
5039 }
5040 break;
5041 default:
5042 break;
5043 }
5044 spin_unlock(&net_dma->lock);
5045
5046 return ack;
5047}
5048
5049/**
5050 * netdev_dma_register - register the networking subsystem as a DMA client
5051 */
5052static int __init netdev_dma_register(void)
5053{
5054 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
5055 GFP_KERNEL);
5056 if (unlikely(!net_dma.channels)) {
5057 printk(KERN_NOTICE
5058 "netdev_dma: no memory for net_dma.channels\n");
5059 return -ENOMEM;
5060 }
5061 spin_lock_init(&net_dma.lock);
5062 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
5063 dma_async_client_register(&net_dma.client);
5064 dma_async_client_chan_request(&net_dma.client);
5065 return 0;
5066}
5067
5068#else
5069static int __init netdev_dma_register(void) { return -ENODEV; }
5070#endif /* CONFIG_NET_DMA */
5071 4929
5072/** 4930/**
5073 * netdev_increment_features - increment feature set by one 4931 * netdev_increment_features - increment feature set by one
@@ -5287,14 +5145,15 @@ static int __init net_dev_init(void)
5287 if (register_pernet_device(&default_device_ops)) 5145 if (register_pernet_device(&default_device_ops))
5288 goto out; 5146 goto out;
5289 5147
5290 netdev_dma_register();
5291
5292 open_softirq(NET_TX_SOFTIRQ, net_tx_action); 5148 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5293 open_softirq(NET_RX_SOFTIRQ, net_rx_action); 5149 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5294 5150
5295 hotcpu_notifier(dev_cpu_callback, 0); 5151 hotcpu_notifier(dev_cpu_callback, 0);
5296 dst_init(); 5152 dst_init();
5297 dev_mcast_init(); 5153 dev_mcast_init();
5154 #ifdef CONFIG_NET_DMA
5155 dmaengine_get();
5156 #endif
5298 rc = 0; 5157 rc = 0;
5299out: 5158out:
5300 return rc; 5159 return rc;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bd6ff907d9e4..ce572f9dff02 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1313,7 +1313,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1313 if ((available < target) && 1313 if ((available < target) &&
1314 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && 1314 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1315 !sysctl_tcp_low_latency && 1315 !sysctl_tcp_low_latency &&
1316 __get_cpu_var(softnet_data).net_dma) { 1316 dma_find_channel(DMA_MEMCPY)) {
1317 preempt_enable_no_resched(); 1317 preempt_enable_no_resched();
1318 tp->ucopy.pinned_list = 1318 tp->ucopy.pinned_list =
1319 dma_pin_iovec_pages(msg->msg_iov, len); 1319 dma_pin_iovec_pages(msg->msg_iov, len);
@@ -1523,7 +1523,7 @@ do_prequeue:
1523 if (!(flags & MSG_TRUNC)) { 1523 if (!(flags & MSG_TRUNC)) {
1524#ifdef CONFIG_NET_DMA 1524#ifdef CONFIG_NET_DMA
1525 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1525 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1526 tp->ucopy.dma_chan = get_softnet_dma(); 1526 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1527 1527
1528 if (tp->ucopy.dma_chan) { 1528 if (tp->ucopy.dma_chan) {
1529 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( 1529 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
@@ -1628,7 +1628,6 @@ skip_copy:
1628 1628
1629 /* Safe to free early-copied skbs now */ 1629 /* Safe to free early-copied skbs now */
1630 __skb_queue_purge(&sk->sk_async_wait_queue); 1630 __skb_queue_purge(&sk->sk_async_wait_queue);
1631 dma_chan_put(tp->ucopy.dma_chan);
1632 tp->ucopy.dma_chan = NULL; 1631 tp->ucopy.dma_chan = NULL;
1633 } 1632 }
1634 if (tp->ucopy.pinned_list) { 1633 if (tp->ucopy.pinned_list) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 99b7ecbe8893..a6961d75c7ea 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5005,7 +5005,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5005 return 0; 5005 return 0;
5006 5006
5007 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 5007 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
5008 tp->ucopy.dma_chan = get_softnet_dma(); 5008 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
5009 5009
5010 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { 5010 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
5011 5011
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9d839fa9331e..19d7b429a262 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1594,7 +1594,7 @@ process:
1594#ifdef CONFIG_NET_DMA 1594#ifdef CONFIG_NET_DMA
1595 struct tcp_sock *tp = tcp_sk(sk); 1595 struct tcp_sock *tp = tcp_sk(sk);
1596 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1596 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1597 tp->ucopy.dma_chan = get_softnet_dma(); 1597 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1598 if (tp->ucopy.dma_chan) 1598 if (tp->ucopy.dma_chan)
1599 ret = tcp_v4_do_rcv(sk, skb); 1599 ret = tcp_v4_do_rcv(sk, skb);
1600 else 1600 else
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1297306d729c..e5b85d45bee8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1675,7 +1675,7 @@ process:
1675#ifdef CONFIG_NET_DMA 1675#ifdef CONFIG_NET_DMA
1676 struct tcp_sock *tp = tcp_sk(sk); 1676 struct tcp_sock *tp = tcp_sk(sk);
1677 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1677 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1678 tp->ucopy.dma_chan = get_softnet_dma(); 1678 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1679 if (tp->ucopy.dma_chan) 1679 if (tp->ucopy.dma_chan)
1680 ret = tcp_v6_do_rcv(sk, skb); 1680 ret = tcp_v6_do_rcv(sk, skb);
1681 else 1681 else