aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/listRCU.txt6
-rw-r--r--Documentation/RCU/rcu.txt2
-rw-r--r--Documentation/RCU/rculist_nulls.txt4
-rw-r--r--Documentation/filesystems/exofs.txt176
-rw-r--r--Documentation/filesystems/udf.txt2
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/md.txt37
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/arm/configs/omap_ldp_defconfig24
-rw-r--r--arch/arm/configs/pcm037_defconfig23
-rw-r--r--arch/arm/configs/realview-smp_defconfig24
-rw-r--r--arch/arm/configs/realview_defconfig24
-rw-r--r--arch/arm/mach-at91/pm.c8
-rw-r--r--arch/arm/mach-gemini/include/mach/system.h2
-rw-r--r--arch/arm/mach-mmp/include/mach/system.h2
-rw-r--r--arch/arm/mach-mx3/pcm037.c23
-rw-r--r--arch/arm/mach-omap2/Makefile2
-rw-r--r--arch/arm/mach-omap2/board-ldp.c47
-rw-r--r--arch/arm/mach-omap2/board-overo.c65
-rw-r--r--arch/arm/mach-realview/core.c17
-rw-r--r--arch/arm/mach-realview/localtimer.c1
-rw-r--r--arch/arm/mm/abort-ev6.S4
-rw-r--r--arch/arm/mm/cache-feroceon-l2.c9
-rw-r--r--arch/arm/vfp/entry.S23
-rw-r--r--arch/arm/vfp/vfphw.S12
-rw-r--r--arch/arm/vfp/vfpmodule.c6
-rw-r--r--arch/m68k/include/asm/bootinfo.h381
-rw-r--r--arch/m68k/include/asm/bootinfo_mm.h378
-rw-r--r--arch/m68k/include/asm/bootinfo_no.h2
-rw-r--r--arch/m68k/include/asm/bug.h31
-rw-r--r--arch/m68k/include/asm/bug_mm.h29
-rw-r--r--arch/m68k/include/asm/bug_no.h4
-rw-r--r--arch/m68k/include/asm/bugs.h21
-rw-r--r--arch/m68k/include/asm/bugs_mm.h14
-rw-r--r--arch/m68k/include/asm/bugs_no.h16
-rw-r--r--arch/m68k/include/asm/cache.h14
-rw-r--r--arch/m68k/include/asm/cache_mm.h11
-rw-r--r--arch/m68k/include/asm/cache_no.h12
-rw-r--r--arch/m68k/include/asm/current.h31
-rw-r--r--arch/m68k/include/asm/current_mm.h6
-rw-r--r--arch/m68k/include/asm/current_no.h24
-rw-r--r--arch/m68k/include/asm/div64.h37
-rw-r--r--arch/m68k/include/asm/div64_mm.h28
-rw-r--r--arch/m68k/include/asm/div64_no.h1
-rw-r--r--arch/m68k/include/asm/dma-mapping.h113
-rw-r--r--arch/m68k/include/asm/dma-mapping_mm.h112
-rw-r--r--arch/m68k/include/asm/dma-mapping_no.h6
-rw-r--r--arch/m68k/include/asm/elf.h120
-rw-r--r--arch/m68k/include/asm/elf_mm.h119
-rw-r--r--arch/m68k/include/asm/elf_no.h110
-rw-r--r--arch/m68k/include/asm/fb.h41
-rw-r--r--arch/m68k/include/asm/fb_mm.h34
-rw-r--r--arch/m68k/include/asm/fb_no.h12
-rw-r--r--arch/m68k/include/asm/fpu.h22
-rw-r--r--arch/m68k/include/asm/fpu_mm.h21
-rw-r--r--arch/m68k/include/asm/fpu_no.h21
-rw-r--r--arch/m68k/include/asm/hw_irq.h9
-rw-r--r--arch/m68k/include/asm/hw_irq_mm.h6
-rw-r--r--arch/m68k/include/asm/hw_irq_no.h4
-rw-r--r--arch/m68k/include/asm/kmap_types.h26
-rw-r--r--arch/m68k/include/asm/kmap_types_mm.h21
-rw-r--r--arch/m68k/include/asm/kmap_types_no.h21
-rw-r--r--arch/m68k/include/asm/m532xsim.h1
-rw-r--r--arch/m68k/include/asm/mc146818rtc.h31
-rw-r--r--arch/m68k/include/asm/mc146818rtc_mm.h26
-rw-r--r--arch/m68k/include/asm/mc146818rtc_no.h9
-rw-r--r--arch/m68k/include/asm/mcfpci.h119
-rw-r--r--arch/m68k/include/asm/mmu.h14
-rw-r--r--arch/m68k/include/asm/mmu_context.h176
-rw-r--r--arch/m68k/include/asm/mmu_context_mm.h154
-rw-r--r--arch/m68k/include/asm/mmu_context_no.h33
-rw-r--r--arch/m68k/include/asm/mmu_mm.h7
-rw-r--r--arch/m68k/include/asm/mmu_no.h10
-rw-r--r--arch/m68k/include/asm/module.h51
-rw-r--r--arch/m68k/include/asm/module_mm.h39
-rw-r--r--arch/m68k/include/asm/module_no.h11
-rw-r--r--arch/m68k/include/asm/page_offset.h12
-rw-r--r--arch/m68k/include/asm/page_offset_mm.h8
-rw-r--r--arch/m68k/include/asm/page_offset_no.h5
-rw-r--r--arch/m68k/include/asm/pci.h17
-rw-r--r--arch/m68k/include/asm/pci_mm.h12
-rw-r--r--arch/m68k/include/asm/pci_no.h29
-rw-r--r--arch/m68k/include/asm/pgalloc.h20
-rw-r--r--arch/m68k/include/asm/pgalloc_mm.h19
-rw-r--r--arch/m68k/include/asm/pgalloc_no.h8
-rw-r--r--arch/m68k/include/asm/pgtable_no.h2
-rw-r--r--arch/m68k/include/asm/rtc.h7
-rw-r--r--arch/m68k/include/asm/scatterlist.h26
-rw-r--r--arch/m68k/include/asm/scatterlist_mm.h23
-rw-r--r--arch/m68k/include/asm/scatterlist_no.h22
-rw-r--r--arch/m68k/include/asm/segment.h64
-rw-r--r--arch/m68k/include/asm/segment_mm.h57
-rw-r--r--arch/m68k/include/asm/segment_no.h51
-rw-r--r--arch/m68k/include/asm/timex.h21
-rw-r--r--arch/m68k/include/asm/timex_mm.h18
-rw-r--r--arch/m68k/include/asm/timex_no.h23
-rw-r--r--arch/m68k/include/asm/tlbflush.h268
-rw-r--r--arch/m68k/include/asm/tlbflush_mm.h219
-rw-r--r--arch/m68k/include/asm/tlbflush_no.h55
-rw-r--r--arch/m68k/include/asm/ucontext.h33
-rw-r--r--arch/m68k/include/asm/ucontext_mm.h30
-rw-r--r--arch/m68k/include/asm/ucontext_no.h32
-rw-r--r--arch/m68k/include/asm/unaligned.h26
-rw-r--r--arch/m68k/include/asm/unaligned_mm.h13
-rw-r--r--arch/m68k/include/asm/unaligned_no.h25
-rw-r--r--arch/m68k/kernel/time.c18
-rw-r--r--arch/m68knommu/Makefile14
-rw-r--r--arch/m68knommu/kernel/dma.c8
-rw-r--r--arch/m68knommu/kernel/irq.c2
-rw-r--r--arch/m68knommu/mm/init.c6
-rw-r--r--arch/m68knommu/platform/5249/config.c11
-rw-r--r--arch/m68knommu/platform/5307/config.c8
-rw-r--r--arch/m68knommu/platform/5407/config.c8
-rw-r--r--arch/m68knommu/platform/coldfire/Makefile2
-rw-r--r--arch/m68knommu/platform/coldfire/clk.c40
-rw-r--r--arch/parisc/Kconfig12
-rw-r--r--arch/parisc/Makefile4
-rw-r--r--arch/parisc/include/asm/atomic.h18
-rw-r--r--arch/parisc/include/asm/cacheflush.h3
-rw-r--r--arch/parisc/include/asm/elf.h10
-rw-r--r--arch/parisc/include/asm/ftrace.h25
-rw-r--r--arch/parisc/include/asm/page.h13
-rw-r--r--arch/parisc/include/asm/pdc.h4
-rw-r--r--arch/parisc/include/asm/pgtable.h15
-rw-r--r--arch/parisc/include/asm/smp.h3
-rw-r--r--arch/parisc/kernel/Makefile15
-rw-r--r--arch/parisc/kernel/entry.S60
-rw-r--r--arch/parisc/kernel/firmware.c6
-rw-r--r--arch/parisc/kernel/ftrace.c185
-rw-r--r--arch/parisc/kernel/irq.c4
-rw-r--r--arch/parisc/kernel/module.c18
-rw-r--r--arch/parisc/kernel/parisc_ksyms.c5
-rw-r--r--arch/parisc/kernel/process.c29
-rw-r--r--arch/parisc/kernel/processor.c13
-rw-r--r--arch/parisc/kernel/smp.c21
-rw-r--r--arch/parisc/kernel/stacktrace.c63
-rw-r--r--arch/parisc/kernel/syscall.S58
-rw-r--r--arch/parisc/kernel/time.c9
-rw-r--r--arch/parisc/kernel/traps.c5
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S2
-rw-r--r--arch/parisc/mm/init.c7
-rw-r--r--arch/powerpc/include/asm/ps3.h3
-rw-r--r--arch/powerpc/kernel/time.c16
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c2
-rw-r--r--arch/powerpc/platforms/ps3/platform.h2
-rw-r--r--arch/powerpc/platforms/ps3/setup.c2
-rw-r--r--arch/powerpc/platforms/ps3/time.c26
-rw-r--r--arch/um/drivers/ubd_kern.c12
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/iomap_32.c2
-rw-r--r--crypto/shash.c3
-rw-r--r--crypto/xor.c2
-rw-r--r--drivers/block/aoe/aoecmd.c23
-rw-r--r--drivers/block/hd.c2
-rw-r--r--drivers/block/xsysace.c30
-rw-r--r--drivers/char/hw_random/timeriomem-rng.c39
-rw-r--r--drivers/crypto/ixp4xx_crypto.c182
-rw-r--r--drivers/md/Kconfig31
-rw-r--r--drivers/md/Makefile16
-rw-r--r--drivers/md/bitmap.c49
-rw-r--r--drivers/md/bitmap.h (renamed from include/linux/raid/bitmap.h)0
-rw-r--r--drivers/md/dm-bio-list.h10
-rw-r--r--drivers/md/dm-bio-record.h26
-rw-r--r--drivers/md/dm-crypt.c6
-rw-r--r--drivers/md/dm-exception-store.c252
-rw-r--r--drivers/md/dm-exception-store.h58
-rw-r--r--drivers/md/dm-io.c5
-rw-r--r--drivers/md/dm-log.c75
-rw-r--r--drivers/md/dm-path-selector.c21
-rw-r--r--drivers/md/dm-raid1.c50
-rw-r--r--drivers/md/dm-snap-persistent.c153
-rw-r--r--drivers/md/dm-snap-transient.c86
-rw-r--r--drivers/md/dm-snap.c384
-rw-r--r--drivers/md/dm-snap.h105
-rw-r--r--drivers/md/dm-table.c26
-rw-r--r--drivers/md/dm-target.c104
-rw-r--r--drivers/md/dm.c134
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/md/faulty.c19
-rw-r--r--drivers/md/linear.c25
-rw-r--r--drivers/md/linear.h (renamed from include/linux/raid/linear.h)2
-rw-r--r--drivers/md/md.c615
-rw-r--r--drivers/md/md.h (renamed from include/linux/raid/md_k.h)66
-rw-r--r--drivers/md/mktables.c14
-rw-r--r--drivers/md/multipath.c17
-rw-r--r--drivers/md/multipath.h (renamed from include/linux/raid/multipath.h)2
-rw-r--r--drivers/md/raid0.c66
-rw-r--r--drivers/md/raid0.h (renamed from include/linux/raid/raid0.h)2
-rw-r--r--drivers/md/raid1.c35
-rw-r--r--drivers/md/raid1.h (renamed from include/linux/raid/raid1.h)2
-rw-r--r--drivers/md/raid10.c42
-rw-r--r--drivers/md/raid10.h (renamed from include/linux/raid/raid10.h)2
-rw-r--r--drivers/md/raid5.c1494
-rw-r--r--drivers/md/raid5.h (renamed from include/linux/raid/raid5.h)110
-rw-r--r--drivers/md/raid6algos.c21
-rw-r--r--drivers/md/raid6altivec.uc4
-rw-r--r--drivers/md/raid6int.uc4
-rw-r--r--drivers/md/raid6mmx.c4
-rw-r--r--drivers/md/raid6recov.c13
-rw-r--r--drivers/md/raid6sse1.c4
-rw-r--r--drivers/md/raid6sse2.c4
-rw-r--r--drivers/md/raid6test/Makefile2
-rw-r--r--drivers/md/raid6test/test.c2
-rw-r--r--drivers/md/raid6x86.h2
-rw-r--r--drivers/mtd/maps/pxa2xx-flash.c2
-rw-r--r--drivers/parisc/asp.c2
-rw-r--r--drivers/parisc/ccio-dma.c16
-rw-r--r--drivers/parisc/dino.c7
-rw-r--r--drivers/parisc/eisa.c2
-rw-r--r--drivers/parisc/eisa_enumerator.c4
-rw-r--r--drivers/parisc/iosapic.c2
-rw-r--r--drivers/parisc/led.c26
-rw-r--r--drivers/pcmcia/pxa2xx_cm_x255.c2
-rw-r--r--drivers/rtc/Kconfig31
-rw-r--r--drivers/rtc/Makefile4
-rw-r--r--drivers/rtc/rtc-generic.c84
-rw-r--r--drivers/rtc/rtc-parisc.c86
-rw-r--r--drivers/rtc/rtc-ppc.c69
-rw-r--r--drivers/rtc/rtc-ps3.c104
-rw-r--r--drivers/serial/mcf.c2
-rw-r--r--drivers/usb/storage/isd200.c239
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/exofs/BUGS3
-rw-r--r--fs/exofs/Kbuild16
-rw-r--r--fs/exofs/Kconfig13
-rw-r--r--fs/exofs/common.h184
-rw-r--r--fs/exofs/dir.c672
-rw-r--r--fs/exofs/exofs.h180
-rw-r--r--fs/exofs/file.c87
-rw-r--r--fs/exofs/inode.c1303
-rw-r--r--fs/exofs/namei.c342
-rw-r--r--fs/exofs/osd.c153
-rw-r--r--fs/exofs/super.c584
-rw-r--r--fs/exofs/symlink.c57
-rw-r--r--fs/udf/balloc.c150
-rw-r--r--fs/udf/dir.c14
-rw-r--r--fs/udf/directory.c38
-rw-r--r--fs/udf/ecma_167.h416
-rw-r--r--fs/udf/ialloc.c9
-rw-r--r--fs/udf/inode.c213
-rw-r--r--fs/udf/misc.c29
-rw-r--r--fs/udf/namei.c86
-rw-r--r--fs/udf/osta_udf.h22
-rw-r--r--fs/udf/partition.c2
-rw-r--r--fs/udf/super.c605
-rw-r--r--fs/udf/truncate.c44
-rw-r--r--fs/udf/udf_i.h6
-rw-r--r--fs/udf/udf_sb.h9
-rw-r--r--fs/udf/udfdecl.h57
-rw-r--r--fs/udf/udfend.h28
-rw-r--r--fs/udf/udftime.c6
-rw-r--r--fs/udf/unicode.c62
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/linux-2.6/mutex.h25
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c107
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c33
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c157
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c137
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h32
-rw-r--r--fs/xfs/quota/xfs_dquot.c28
-rw-r--r--fs/xfs/quota/xfs_dquot.h18
-rw-r--r--fs/xfs/quota/xfs_qm.c212
-rw-r--r--fs/xfs/quota/xfs_qm.h26
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c1
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c190
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h40
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c16
-rw-r--r--fs/xfs/support/debug.c1
-rw-r--r--fs/xfs/support/uuid.c71
-rw-r--r--fs/xfs/support/uuid.h4
-rw-r--r--fs/xfs/xfs_ag.h4
-rw-r--r--fs/xfs/xfs_alloc.c26
-rw-r--r--fs/xfs/xfs_alloc.h6
-rw-r--r--fs/xfs/xfs_attr_leaf.c58
-rw-r--r--fs/xfs/xfs_bmap.c76
-rw-r--r--fs/xfs/xfs_bmap.h6
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_btree.h2
-rw-r--r--fs/xfs/xfs_da_btree.c2
-rw-r--r--fs/xfs/xfs_da_btree.h9
-rw-r--r--fs/xfs/xfs_dfrag.c68
-rw-r--r--fs/xfs/xfs_dinode.h4
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_dir2_block.c7
-rw-r--r--fs/xfs/xfs_dir2_data.h2
-rw-r--r--fs/xfs/xfs_dir2_leaf.c17
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_sf.c13
-rw-r--r--fs/xfs/xfs_extfree_item.h6
-rw-r--r--fs/xfs/xfs_filestream.c9
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_ialloc.c12
-rw-r--r--fs/xfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/xfs_ialloc_btree.h22
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.h2
-rw-r--r--fs/xfs/xfs_iomap.h2
-rw-r--r--fs/xfs/xfs_itable.c9
-rw-r--r--fs/xfs/xfs_log.c67
-rw-r--r--fs/xfs/xfs_log.h3
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c308
-rw-r--r--fs/xfs/xfs_mount.c253
-rw-r--r--fs/xfs/xfs_mount.h19
-rw-r--r--fs/xfs/xfs_qmops.c1
-rw-r--r--fs/xfs/xfs_quota.h3
-rw-r--r--fs/xfs/xfs_rtalloc.c10
-rw-r--r--fs/xfs/xfs_rtalloc.h8
-rw-r--r--fs/xfs/xfs_trans.h24
-rw-r--r--fs/xfs/xfs_trans_ail.c4
-rw-r--r--fs/xfs/xfs_trans_item.c2
-rw-r--r--fs/xfs/xfs_trans_space.h2
-rw-r--r--fs/xfs/xfs_types.h8
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c408
-rw-r--r--fs/xfs/xfs_vnodeops.h3
-rw-r--r--include/linux/device-mapper.h3
-rw-r--r--include/linux/dm-dirty-log.h13
-rw-r--r--include/linux/hdreg.h66
-rw-r--r--include/linux/highmem.h29
-rw-r--r--include/linux/raid/md.h81
-rw-r--r--include/linux/raid/md_u.h35
-rw-r--r--include/linux/raid/pq.h (renamed from drivers/md/raid6.h)28
-rw-r--r--include/linux/raid/xor.h2
-rw-r--r--include/linux/timeriomem-rng.h2
-rw-r--r--init/do_mounts.h1
-rw-r--r--init/do_mounts_md.c5
-rw-r--r--scripts/package/buildtar4
334 files changed, 11522 insertions, 6914 deletions
diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt
index 1fd175368a87..4349c1487e91 100644
--- a/Documentation/RCU/listRCU.txt
+++ b/Documentation/RCU/listRCU.txt
@@ -118,7 +118,7 @@ Following are the RCU equivalents for these two functions:
118 list_for_each_entry(e, list, list) { 118 list_for_each_entry(e, list, list) {
119 if (!audit_compare_rule(rule, &e->rule)) { 119 if (!audit_compare_rule(rule, &e->rule)) {
120 list_del_rcu(&e->list); 120 list_del_rcu(&e->list);
121 call_rcu(&e->rcu, audit_free_rule, e); 121 call_rcu(&e->rcu, audit_free_rule);
122 return 0; 122 return 0;
123 } 123 }
124 } 124 }
@@ -206,7 +206,7 @@ RCU ("read-copy update") its name. The RCU code is as follows:
206 ne->rule.action = newaction; 206 ne->rule.action = newaction;
207 ne->rule.file_count = newfield_count; 207 ne->rule.file_count = newfield_count;
208 list_replace_rcu(e, ne); 208 list_replace_rcu(e, ne);
209 call_rcu(&e->rcu, audit_free_rule, e); 209 call_rcu(&e->rcu, audit_free_rule);
210 return 0; 210 return 0;
211 } 211 }
212 } 212 }
@@ -283,7 +283,7 @@ flag under the spinlock as follows:
283 list_del_rcu(&e->list); 283 list_del_rcu(&e->list);
284 e->deleted = 1; 284 e->deleted = 1;
285 spin_unlock(&e->lock); 285 spin_unlock(&e->lock);
286 call_rcu(&e->rcu, audit_free_rule, e); 286 call_rcu(&e->rcu, audit_free_rule);
287 return 0; 287 return 0;
288 } 288 }
289 } 289 }
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 95821a29ae41..7aa2002ade77 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -81,7 +81,7 @@ o I hear that RCU needs work in order to support realtime kernels?
81 This work is largely completed. Realtime-friendly RCU can be 81 This work is largely completed. Realtime-friendly RCU can be
82 enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter. 82 enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter.
83 However, work is in progress for enabling priority boosting of 83 However, work is in progress for enabling priority boosting of
84 preempted RCU read-side critical sections.This is needed if you 84 preempted RCU read-side critical sections. This is needed if you
85 have CPU-bound realtime threads. 85 have CPU-bound realtime threads.
86 86
87o Where can I find more information on RCU? 87o Where can I find more information on RCU?
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 239f542d48ba..6389dec33459 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -21,7 +21,7 @@ if (obj) {
21 /* 21 /*
22 * Because a writer could delete object, and a writer could 22 * Because a writer could delete object, and a writer could
23 * reuse these object before the RCU grace period, we 23 * reuse these object before the RCU grace period, we
24 * must check key after geting the reference on object 24 * must check key after getting the reference on object
25 */ 25 */
26 if (obj->key != key) { // not the object we expected 26 if (obj->key != key) { // not the object we expected
27 put_ref(obj); 27 put_ref(obj);
@@ -117,7 +117,7 @@ a race (some writer did a delete and/or a move of an object
117to another chain) checking the final 'nulls' value if 117to another chain) checking the final 'nulls' value if
118the lookup met the end of chain. If final 'nulls' value 118the lookup met the end of chain. If final 'nulls' value
119is not the slot number, then we must restart the lookup at 119is not the slot number, then we must restart the lookup at
120the begining. If the object was moved to same chain, 120the beginning. If the object was moved to the same chain,
121then the reader doesnt care : It might eventually 121then the reader doesnt care : It might eventually
122scan the list again without harm. 122scan the list again without harm.
123 123
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt
new file mode 100644
index 000000000000..0ced74c2f73c
--- /dev/null
+++ b/Documentation/filesystems/exofs.txt
@@ -0,0 +1,176 @@
1===============================================================================
2WHAT IS EXOFS?
3===============================================================================
4
5exofs is a file system that uses an OSD and exports the API of a normal Linux
6file system. Users access exofs like any other local file system, and exofs
7will in turn issue commands to the local OSD initiator.
8
9OSD is a new T10 command set that views storage devices not as a large/flat
10array of sectors but as a container of objects, each having a length, quota,
11time attributes and more. Each object is addressed by a 64bit ID, and is
12contained in a 64bit ID partition. Each object has associated attributes
13attached to it, which are integral part of the object and provide metadata about
14the object. The standard defines some common obligatory attributes, but user
15attributes can be added as needed.
16
17===============================================================================
18ENVIRONMENT
19===============================================================================
20
21To use this file system, you need to have an object store to run it on. You
22may download a target from:
23http://open-osd.org
24
25See Documentation/scsi/osd.txt for how to setup a working osd environment.
26
27===============================================================================
28USAGE
29===============================================================================
30
311. Download and compile exofs and open-osd initiator:
32 You need an external Kernel source tree or kernel headers from your
33 distribution. (anything based on 2.6.26 or later).
34
35 a. download open-osd including exofs source using:
36 [parent-directory]$ git clone git://git.open-osd.org/open-osd.git
37
38 b. Build the library module like this:
39 [parent-directory]$ make -C KSRC=$(KER_DIR) open-osd
40
41 This will build both the open-osd initiator as well as the exofs kernel
42 module. Use whatever parameters you compiled your Kernel with and
43 $(KER_DIR) above pointing to the Kernel you compile against. See the file
44 open-osd/top-level-Makefile for an example.
45
462. Get the OSD initiator and target set up properly, and login to the target.
47 See Documentation/scsi/osd.txt for farther instructions. Also see ./do-osd
48 for example script that does all these steps.
49
503. Insmod the exofs.ko module:
51 [exofs]$ insmod exofs.ko
52
534. Make sure the directory where you want to mount exists. If not, create it.
54 (For example, mkdir /mnt/exofs)
55
565. At first run you will need to invoke the mkfs.exofs application
57
58 As an example, this will create the file system on:
59 /dev/osd0 partition ID 65536
60
61 mkfs.exofs --pid=65536 --format /dev/osd0
62
63 The --format is optional if not specified no OSD_FORMAT will be
64 preformed and a clean file system will be created in the specified pid,
65 in the available space of the target. (Use --format=size_in_meg to limit
66 the total LUN space available)
67
68 If pid already exist it will be deleted and a new one will be created in it's
69 place. Be careful.
70
71 An exofs lives inside a single OSD partition. You can create multiple exofs
72 filesystems on the same device using multiple pids.
73
74 (run mkfs.exofs without any parameters for usage help message)
75
766. Mount the file system.
77
78 For example, to mount /dev/osd0, partition ID 0x10000 on /mnt/exofs:
79
80 mount -t exofs -o pid=65536 /dev/osd0 /mnt/exofs/
81
827. For reference (See do-exofs example script):
83 do-exofs start - an example of how to perform the above steps.
84 do-exofs stop - an example of how to unmount the file system.
85 do-exofs format - an example of how to format and mkfs a new exofs.
86
878. Extra compilation flags (uncomment in fs/exofs/Kbuild):
88 CONFIG_EXOFS_DEBUG - for debug messages and extra checks.
89
90===============================================================================
91exofs mount options
92===============================================================================
93Similar to any mount command:
94 mount -t exofs -o exofs_options /dev/osdX mount_exofs_directory
95
96Where:
97 -t exofs: specifies the exofs file system
98
99 /dev/osdX: X is a decimal number. /dev/osdX was created after a successful
100 login into an OSD target.
101
102 mount_exofs_directory: The directory to mount the file system on
103
104 exofs specific options: Options are separated by commas (,)
105 pid=<integer> - The partition number to mount/create as
106 container of the filesystem.
107 This option is mandatory
108 to=<integer> - Timeout in ticks for a single command
109 default is (60 * HZ) [for debugging only]
110
111===============================================================================
112DESIGN
113===============================================================================
114
115* The file system control block (AKA on-disk superblock) resides in an object
116 with a special ID (defined in common.h).
117 Information included in the file system control block is used to fill the
118 in-memory superblock structure at mount time. This object is created before
119 the file system is used by mkexofs.c It contains information such as:
120 - The file system's magic number
121 - The next inode number to be allocated
122
123* Each file resides in its own object and contains the data (and it will be
124 possible to extend the file over multiple objects, though this has not been
125 implemented yet).
126
127* A directory is treated as a file, and essentially contains a list of <file
128 name, inode #> pairs for files that are found in that directory. The object
129 IDs correspond to the files' inode numbers and will be allocated according to
130 a bitmap (stored in a separate object). Now they are allocated using a
131 counter.
132
133* Each file's control block (AKA on-disk inode) is stored in its object's
134 attributes. This applies to both regular files and other types (directories,
135 device files, symlinks, etc.).
136
137* Credentials are generated per object (inode and superblock) when they is
138 created in memory (read off disk or created). The credential works for all
139 operations and is used as long as the object remains in memory.
140
141* Async OSD operations are used whenever possible, but the target may execute
142 them out of order. The operations that concern us are create, delete,
143 readpage, writepage, update_inode, and truncate. The following pairs of
144 operations should execute in the order written, and we need to prevent them
145 from executing in reverse order:
146 - The following are handled with the OBJ_CREATED and OBJ_2BCREATED
147 flags. OBJ_CREATED is set when we know the object exists on the OSD -
148 in create's callback function, and when we successfully do a read_inode.
149 OBJ_2BCREATED is set in the beginning of the create function, so we
150 know that we should wait.
151 - create/delete: delete should wait until the object is created
152 on the OSD.
153 - create/readpage: readpage should be able to return a page
154 full of zeroes in this case. If there was a write already
155 en-route (i.e. create, writepage, readpage) then the page
156 would be locked, and so it would really be the same as
157 create/writepage.
158 - create/writepage: if writepage is called for a sync write, it
159 should wait until the object is created on the OSD.
160 Otherwise, it should just return.
161 - create/truncate: truncate should wait until the object is
162 created on the OSD.
163 - create/update_inode: update_inode should wait until the
164 object is created on the OSD.
165 - Handled by VFS locks:
166 - readpage/delete: shouldn't happen because of page lock.
167 - writepage/delete: shouldn't happen because of page lock.
168 - readpage/writepage: shouldn't happen because of page lock.
169
170===============================================================================
171LICENSE/COPYRIGHT
172===============================================================================
173The exofs file system is based on ext2 v0.5b (distributed with the Linux kernel
174version 2.6.10). All files include the original copyrights, and the license
175is GPL version 2 (only version 2, as is true for the Linux kernel). The
176Linux kernel can be downloaded from www.kernel.org.
diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt
index fde829a756e6..902b95d0ee51 100644
--- a/Documentation/filesystems/udf.txt
+++ b/Documentation/filesystems/udf.txt
@@ -24,6 +24,8 @@ The following mount options are supported:
24 24
25 gid= Set the default group. 25 gid= Set the default group.
26 umask= Set the default umask. 26 umask= Set the default umask.
27 mode= Set the default file permissions.
28 dmode= Set the default directory permissions.
27 uid= Set the default user. 29 uid= Set the default user.
28 bs= Set the block size. 30 bs= Set the block size.
29 unhide Show otherwise hidden files. 31 unhide Show otherwise hidden files.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 240257dd4238..bdc0c433e88c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1523,7 +1523,9 @@ and is between 256 and 4096 characters. It is defined in the file
1523 1523
1524 noclflush [BUGS=X86] Don't use the CLFLUSH instruction 1524 noclflush [BUGS=X86] Don't use the CLFLUSH instruction
1525 1525
1526 nohlt [BUGS=ARM,SH] 1526 nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
1527 wfi(ARM) instruction doesn't work correctly and not to
1528 use it. This is also useful when using JTAG debugger.
1527 1529
1528 no-hlt [BUGS=X86-32] Tells the kernel that the hlt 1530 no-hlt [BUGS=X86-32] Tells the kernel that the hlt
1529 instruction doesn't work correctly and not to 1531 instruction doesn't work correctly and not to
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 1da9d1b1793f..4edd39ec7db9 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -164,15 +164,19 @@ All md devices contain:
164 raid_disks 164 raid_disks
165 a text file with a simple number indicating the number of devices 165 a text file with a simple number indicating the number of devices
166 in a fully functional array. If this is not yet known, the file 166 in a fully functional array. If this is not yet known, the file
167 will be empty. If an array is being resized (not currently 167 will be empty. If an array is being resized this will contain
168 possible) this will contain the larger of the old and new sizes. 168 the new number of devices.
169 Some raid level (RAID1) allow this value to be set while the 169 Some raid levels allow this value to be set while the array is
170 array is active. This will reconfigure the array. Otherwise 170 active. This will reconfigure the array. Otherwise it can only
171 it can only be set while assembling an array. 171 be set while assembling an array.
172 A change to this attribute will not be permitted if it would
173 reduce the size of the array. To reduce the number of drives
174 in an e.g. raid5, the array size must first be reduced by
175 setting the 'array_size' attribute.
172 176
173 chunk_size 177 chunk_size
174 This is the size if bytes for 'chunks' and is only relevant to 178 This is the size in bytes for 'chunks' and is only relevant to
175 raid levels that involve striping (1,4,5,6,10). The address space 179 raid levels that involve striping (0,4,5,6,10). The address space
176 of the array is conceptually divided into chunks and consecutive 180 of the array is conceptually divided into chunks and consecutive
177 chunks are striped onto neighbouring devices. 181 chunks are striped onto neighbouring devices.
178 The size should be at least PAGE_SIZE (4k) and should be a power 182 The size should be at least PAGE_SIZE (4k) and should be a power
@@ -183,6 +187,20 @@ All md devices contain:
183 simply a number that is interpretted differently by different 187 simply a number that is interpretted differently by different
184 levels. It can be written while assembling an array. 188 levels. It can be written while assembling an array.
185 189
190 array_size
191 This can be used to artificially constrain the available space in
192 the array to be less than is actually available on the combined
193 devices. Writing a number (in Kilobytes) which is less than
194 the available size will set the size. Any reconfiguration of the
195 array (e.g. adding devices) will not cause the size to change.
196 Writing the word 'default' will cause the effective size of the
197 array to be whatever size is actually available based on
198 'level', 'chunk_size' and 'component_size'.
199
200 This can be used to reduce the size of the array before reducing
201 the number of devices in a raid4/5/6, or to support external
202 metadata formats which mandate such clipping.
203
186 reshape_position 204 reshape_position
187 This is either "none" or a sector number within the devices of 205 This is either "none" or a sector number within the devices of
188 the array where "reshape" is up to. If this is set, the three 206 the array where "reshape" is up to. If this is set, the three
@@ -207,6 +225,11 @@ All md devices contain:
207 about the array. It can be 0.90 (traditional format), 1.0, 1.1, 225 about the array. It can be 0.90 (traditional format), 1.0, 1.1,
208 1.2 (newer format in varying locations) or "none" indicating that 226 1.2 (newer format in varying locations) or "none" indicating that
209 the kernel isn't managing metadata at all. 227 the kernel isn't managing metadata at all.
228 Alternately it can be "external:" followed by a string which
229 is set by user-space. This indicates that metadata is managed
230 by a user-space program. Any device failure or other event that
231 requires a metadata update will cause array activity to be
232 suspended until the event is acknowledged.
210 233
211 resync_start 234 resync_start
212 The point at which resync should start. If no resync is needed, 235 The point at which resync should start. If no resync is needed,
diff --git a/MAINTAINERS b/MAINTAINERS
index ebaf77ebd8b7..908226600f16 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4969,7 +4969,8 @@ S: Supported
4969 4969
4970XFS FILESYSTEM 4970XFS FILESYSTEM
4971P: Silicon Graphics Inc 4971P: Silicon Graphics Inc
4972P: Bill O'Donnell 4972P: Felix Blyakher
4973M: felixb@sgi.com
4973M: xfs-masters@oss.sgi.com 4974M: xfs-masters@oss.sgi.com
4974L: xfs@oss.sgi.com 4975L: xfs@oss.sgi.com
4975W: http://oss.sgi.com/projects/xfs 4976W: http://oss.sgi.com/projects/xfs
diff --git a/arch/arm/configs/omap_ldp_defconfig b/arch/arm/configs/omap_ldp_defconfig
index aa9d34feddc6..679a4a3e265e 100644
--- a/arch/arm/configs/omap_ldp_defconfig
+++ b/arch/arm/configs/omap_ldp_defconfig
@@ -474,14 +474,34 @@ CONFIG_NETDEVICES=y
474# CONFIG_EQUALIZER is not set 474# CONFIG_EQUALIZER is not set
475# CONFIG_TUN is not set 475# CONFIG_TUN is not set
476# CONFIG_VETH is not set 476# CONFIG_VETH is not set
477# CONFIG_PHYLIB is not set 477CONFIG_PHYLIB=y
478
479#
480# MII PHY device drivers
481#
482# CONFIG_MARVELL_PHY is not set
483# CONFIG_DAVICOM_PHY is not set
484# CONFIG_QSEMI_PHY is not set
485# CONFIG_LXT_PHY is not set
486# CONFIG_CICADA_PHY is not set
487# CONFIG_VITESSE_PHY is not set
488CONFIG_SMSC_PHY=y
489# CONFIG_BROADCOM_PHY is not set
490# CONFIG_ICPLUS_PHY is not set
491# CONFIG_REALTEK_PHY is not set
492# CONFIG_NATIONAL_PHY is not set
493# CONFIG_STE10XP is not set
494# CONFIG_LSI_ET1011C_PHY is not set
495# CONFIG_FIXED_PHY is not set
496# CONFIG_MDIO_BITBANG is not set
478CONFIG_NET_ETHERNET=y 497CONFIG_NET_ETHERNET=y
479CONFIG_MII=y 498CONFIG_MII=y
480# CONFIG_AX88796 is not set 499# CONFIG_AX88796 is not set
481# CONFIG_SMC91X is not set 500# CONFIG_SMC91X is not set
482# CONFIG_DM9000 is not set 501# CONFIG_DM9000 is not set
483# CONFIG_ENC28J60 is not set 502# CONFIG_ENC28J60 is not set
484CONFIG_SMC911X=y 503# CONFIG_SMC911X is not set
504CONFIG_SMSC911X=y
485# CONFIG_IBM_NEW_EMAC_ZMII is not set 505# CONFIG_IBM_NEW_EMAC_ZMII is not set
486# CONFIG_IBM_NEW_EMAC_RGMII is not set 506# CONFIG_IBM_NEW_EMAC_RGMII is not set
487# CONFIG_IBM_NEW_EMAC_TAH is not set 507# CONFIG_IBM_NEW_EMAC_TAH is not set
diff --git a/arch/arm/configs/pcm037_defconfig b/arch/arm/configs/pcm037_defconfig
index 627474586470..6e37c77c4760 100644
--- a/arch/arm/configs/pcm037_defconfig
+++ b/arch/arm/configs/pcm037_defconfig
@@ -465,12 +465,33 @@ CONFIG_NETDEVICES=y
465# CONFIG_EQUALIZER is not set 465# CONFIG_EQUALIZER is not set
466# CONFIG_TUN is not set 466# CONFIG_TUN is not set
467# CONFIG_VETH is not set 467# CONFIG_VETH is not set
468# CONFIG_PHYLIB is not set 468CONFIG_PHYLIB=y
469
470#
471# MII PHY device drivers
472#
473# CONFIG_MARVELL_PHY is not set
474# CONFIG_DAVICOM_PHY is not set
475# CONFIG_QSEMI_PHY is not set
476# CONFIG_LXT_PHY is not set
477# CONFIG_CICADA_PHY is not set
478# CONFIG_VITESSE_PHY is not set
479CONFIG_SMSC_PHY=y
480# CONFIG_BROADCOM_PHY is not set
481# CONFIG_ICPLUS_PHY is not set
482# CONFIG_REALTEK_PHY is not set
483# CONFIG_NATIONAL_PHY is not set
484# CONFIG_STE10XP is not set
485# CONFIG_LSI_ET1011C_PHY is not set
486# CONFIG_FIXED_PHY is not set
487# CONFIG_MDIO_BITBANG is not set
469CONFIG_NET_ETHERNET=y 488CONFIG_NET_ETHERNET=y
470CONFIG_MII=y 489CONFIG_MII=y
471# CONFIG_AX88796 is not set 490# CONFIG_AX88796 is not set
472CONFIG_SMC91X=y 491CONFIG_SMC91X=y
473# CONFIG_DM9000 is not set 492# CONFIG_DM9000 is not set
493# CONFIG_SMC911X is not set
494CONFIG_SMSC911X=y
474# CONFIG_IBM_NEW_EMAC_ZMII is not set 495# CONFIG_IBM_NEW_EMAC_ZMII is not set
475# CONFIG_IBM_NEW_EMAC_RGMII is not set 496# CONFIG_IBM_NEW_EMAC_RGMII is not set
476# CONFIG_IBM_NEW_EMAC_TAH is not set 497# CONFIG_IBM_NEW_EMAC_TAH is not set
diff --git a/arch/arm/configs/realview-smp_defconfig b/arch/arm/configs/realview-smp_defconfig
index cd29824d791c..21db4b3ec8ff 100644
--- a/arch/arm/configs/realview-smp_defconfig
+++ b/arch/arm/configs/realview-smp_defconfig
@@ -496,13 +496,33 @@ CONFIG_NETDEVICES=y
496# CONFIG_EQUALIZER is not set 496# CONFIG_EQUALIZER is not set
497# CONFIG_TUN is not set 497# CONFIG_TUN is not set
498# CONFIG_VETH is not set 498# CONFIG_VETH is not set
499# CONFIG_PHYLIB is not set 499CONFIG_PHYLIB=y
500
501#
502# MII PHY device drivers
503#
504# CONFIG_MARVELL_PHY is not set
505# CONFIG_DAVICOM_PHY is not set
506# CONFIG_QSEMI_PHY is not set
507# CONFIG_LXT_PHY is not set
508# CONFIG_CICADA_PHY is not set
509# CONFIG_VITESSE_PHY is not set
510CONFIG_SMSC_PHY=y
511# CONFIG_BROADCOM_PHY is not set
512# CONFIG_ICPLUS_PHY is not set
513# CONFIG_REALTEK_PHY is not set
514# CONFIG_NATIONAL_PHY is not set
515# CONFIG_STE10XP is not set
516# CONFIG_LSI_ET1011C_PHY is not set
517# CONFIG_FIXED_PHY is not set
518# CONFIG_MDIO_BITBANG is not set
500CONFIG_NET_ETHERNET=y 519CONFIG_NET_ETHERNET=y
501CONFIG_MII=y 520CONFIG_MII=y
502# CONFIG_AX88796 is not set 521# CONFIG_AX88796 is not set
503CONFIG_SMC91X=y 522CONFIG_SMC91X=y
504# CONFIG_DM9000 is not set 523# CONFIG_DM9000 is not set
505CONFIG_SMC911X=y 524# CONFIG_SMC911X is not set
525CONFIG_SMSC911X=y
506# CONFIG_IBM_NEW_EMAC_ZMII is not set 526# CONFIG_IBM_NEW_EMAC_ZMII is not set
507# CONFIG_IBM_NEW_EMAC_RGMII is not set 527# CONFIG_IBM_NEW_EMAC_RGMII is not set
508# CONFIG_IBM_NEW_EMAC_TAH is not set 528# CONFIG_IBM_NEW_EMAC_TAH is not set
diff --git a/arch/arm/configs/realview_defconfig b/arch/arm/configs/realview_defconfig
index 7e253f58ed18..9a75c30b910d 100644
--- a/arch/arm/configs/realview_defconfig
+++ b/arch/arm/configs/realview_defconfig
@@ -490,13 +490,33 @@ CONFIG_NETDEVICES=y
490# CONFIG_EQUALIZER is not set 490# CONFIG_EQUALIZER is not set
491# CONFIG_TUN is not set 491# CONFIG_TUN is not set
492# CONFIG_VETH is not set 492# CONFIG_VETH is not set
493# CONFIG_PHYLIB is not set 493CONFIG_PHYLIB=y
494
495#
496# MII PHY device drivers
497#
498# CONFIG_MARVELL_PHY is not set
499# CONFIG_DAVICOM_PHY is not set
500# CONFIG_QSEMI_PHY is not set
501# CONFIG_LXT_PHY is not set
502# CONFIG_CICADA_PHY is not set
503# CONFIG_VITESSE_PHY is not set
504CONFIG_SMSC_PHY=y
505# CONFIG_BROADCOM_PHY is not set
506# CONFIG_ICPLUS_PHY is not set
507# CONFIG_REALTEK_PHY is not set
508# CONFIG_NATIONAL_PHY is not set
509# CONFIG_STE10XP is not set
510# CONFIG_LSI_ET1011C_PHY is not set
511# CONFIG_FIXED_PHY is not set
512# CONFIG_MDIO_BITBANG is not set
494CONFIG_NET_ETHERNET=y 513CONFIG_NET_ETHERNET=y
495CONFIG_MII=y 514CONFIG_MII=y
496# CONFIG_AX88796 is not set 515# CONFIG_AX88796 is not set
497CONFIG_SMC91X=y 516CONFIG_SMC91X=y
498# CONFIG_DM9000 is not set 517# CONFIG_DM9000 is not set
499CONFIG_SMC911X=y 518# CONFIG_SMC911X is not set
519CONFIG_SMSC911X=y
500# CONFIG_IBM_NEW_EMAC_ZMII is not set 520# CONFIG_IBM_NEW_EMAC_ZMII is not set
501# CONFIG_IBM_NEW_EMAC_RGMII is not set 521# CONFIG_IBM_NEW_EMAC_RGMII is not set
502# CONFIG_IBM_NEW_EMAC_TAH is not set 522# CONFIG_IBM_NEW_EMAC_TAH is not set
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 7ac812dc055a..e26c4fe61fae 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -198,17 +198,17 @@ static int at91_pm_verify_clocks(void)
198 /* USB must not be using PLLB */ 198 /* USB must not be using PLLB */
199 if (cpu_is_at91rm9200()) { 199 if (cpu_is_at91rm9200()) {
200 if ((scsr & (AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP)) != 0) { 200 if ((scsr & (AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP)) != 0) {
201 pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n"); 201 pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
202 return 0; 202 return 0;
203 } 203 }
204 } else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) { 204 } else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) {
205 if ((scsr & (AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP)) != 0) { 205 if ((scsr & (AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP)) != 0) {
206 pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n"); 206 pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
207 return 0; 207 return 0;
208 } 208 }
209 } else if (cpu_is_at91cap9()) { 209 } else if (cpu_is_at91cap9()) {
210 if ((scsr & AT91CAP9_PMC_UHP) != 0) { 210 if ((scsr & AT91CAP9_PMC_UHP) != 0) {
211 pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n"); 211 pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
212 return 0; 212 return 0;
213 } 213 }
214 } 214 }
@@ -223,7 +223,7 @@ static int at91_pm_verify_clocks(void)
223 223
224 css = at91_sys_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS; 224 css = at91_sys_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
225 if (css != AT91_PMC_CSS_SLOW) { 225 if (css != AT91_PMC_CSS_SLOW) {
226 pr_debug("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css); 226 pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
227 return 0; 227 return 0;
228 } 228 }
229 } 229 }
diff --git a/arch/arm/mach-gemini/include/mach/system.h b/arch/arm/mach-gemini/include/mach/system.h
index bbbd72767a02..4d9c1f872472 100644
--- a/arch/arm/mach-gemini/include/mach/system.h
+++ b/arch/arm/mach-gemini/include/mach/system.h
@@ -28,7 +28,7 @@ static inline void arch_idle(void)
28 cpu_do_idle(); 28 cpu_do_idle();
29} 29}
30 30
31static inline void arch_reset(char mode) 31static inline void arch_reset(char mode, const char *cmd)
32{ 32{
33 __raw_writel(RESET_GLOBAL | RESET_CPU1, 33 __raw_writel(RESET_GLOBAL | RESET_CPU1,
34 IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET); 34 IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET);
diff --git a/arch/arm/mach-mmp/include/mach/system.h b/arch/arm/mach-mmp/include/mach/system.h
index 001edfefec19..4f5b0e0ce6cf 100644
--- a/arch/arm/mach-mmp/include/mach/system.h
+++ b/arch/arm/mach-mmp/include/mach/system.h
@@ -14,7 +14,7 @@ static inline void arch_idle(void)
14 cpu_do_idle(); 14 cpu_do_idle();
15} 15}
16 16
17static inline void arch_reset(char mode) 17static inline void arch_reset(char mode, const char *cmd)
18{ 18{
19 cpu_reset(0); 19 cpu_reset(0);
20} 20}
diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index 5fce022114de..c3648eff5137 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -24,7 +24,7 @@
24#include <linux/mtd/plat-ram.h> 24#include <linux/mtd/plat-ram.h>
25#include <linux/memory.h> 25#include <linux/memory.h>
26#include <linux/gpio.h> 26#include <linux/gpio.h>
27#include <linux/smc911x.h> 27#include <linux/smsc911x.h>
28#include <linux/interrupt.h> 28#include <linux/interrupt.h>
29#include <linux/i2c.h> 29#include <linux/i2c.h>
30#include <linux/i2c/at24.h> 30#include <linux/i2c/at24.h>
@@ -70,7 +70,7 @@ static struct imxuart_platform_data uart_pdata = {
70 .flags = IMXUART_HAVE_RTSCTS, 70 .flags = IMXUART_HAVE_RTSCTS,
71}; 71};
72 72
73static struct resource smc911x_resources[] = { 73static struct resource smsc911x_resources[] = {
74 [0] = { 74 [0] = {
75 .start = CS1_BASE_ADDR + 0x300, 75 .start = CS1_BASE_ADDR + 0x300,
76 .end = CS1_BASE_ADDR + 0x300 + SZ_64K - 1, 76 .end = CS1_BASE_ADDR + 0x300 + SZ_64K - 1,
@@ -79,22 +79,25 @@ static struct resource smc911x_resources[] = {
79 [1] = { 79 [1] = {
80 .start = IOMUX_TO_IRQ(MX31_PIN_GPIO3_1), 80 .start = IOMUX_TO_IRQ(MX31_PIN_GPIO3_1),
81 .end = IOMUX_TO_IRQ(MX31_PIN_GPIO3_1), 81 .end = IOMUX_TO_IRQ(MX31_PIN_GPIO3_1),
82 .flags = IORESOURCE_IRQ, 82 .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
83 }, 83 },
84}; 84};
85 85
86static struct smc911x_platdata smc911x_info = { 86static struct smsc911x_platform_config smsc911x_info = {
87 .flags = SMC911X_USE_32BIT, 87 .flags = SMSC911X_USE_32BIT | SMSC911X_FORCE_INTERNAL_PHY |
88 .irq_flags = IRQF_SHARED | IRQF_TRIGGER_LOW, 88 SMSC911X_SAVE_MAC_ADDRESS,
89 .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
90 .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN,
91 .phy_interface = PHY_INTERFACE_MODE_MII,
89}; 92};
90 93
91static struct platform_device pcm037_eth = { 94static struct platform_device pcm037_eth = {
92 .name = "smc911x", 95 .name = "smsc911x",
93 .id = -1, 96 .id = -1,
94 .num_resources = ARRAY_SIZE(smc911x_resources), 97 .num_resources = ARRAY_SIZE(smsc911x_resources),
95 .resource = smc911x_resources, 98 .resource = smsc911x_resources,
96 .dev = { 99 .dev = {
97 .platform_data = &smc911x_info, 100 .platform_data = &smsc911x_info,
98 }, 101 },
99}; 102};
100 103
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index a2c3fcc27a22..c49d9bfa3abd 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -47,6 +47,8 @@ obj-$(CONFIG_MACH_OMAP_3430SDP) += board-3430sdp.o \
47 47
48obj-$(CONFIG_MACH_NOKIA_RX51) += board-rx51.o \ 48obj-$(CONFIG_MACH_NOKIA_RX51) += board-rx51.o \
49 board-rx51-peripherals.o \ 49 board-rx51-peripherals.o \
50 mmc-twl4030.o
51
50# Platform specific device init code 52# Platform specific device init code
51ifeq ($(CONFIG_USB_MUSB_SOC),y) 53ifeq ($(CONFIG_USB_MUSB_SOC),y)
52obj-y += usb-musb.o 54obj-y += usb-musb.o
diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index e096f776f996..da57b0fcda14 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -23,6 +23,7 @@
23#include <linux/spi/ads7846.h> 23#include <linux/spi/ads7846.h>
24#include <linux/i2c/twl4030.h> 24#include <linux/i2c/twl4030.h>
25#include <linux/io.h> 25#include <linux/io.h>
26#include <linux/smsc911x.h>
26 27
27#include <mach/hardware.h> 28#include <mach/hardware.h>
28#include <asm/mach-types.h> 29#include <asm/mach-types.h>
@@ -41,12 +42,12 @@
41 42
42#include "mmc-twl4030.h" 43#include "mmc-twl4030.h"
43 44
44#define LDP_SMC911X_CS 1 45#define LDP_SMSC911X_CS 1
45#define LDP_SMC911X_GPIO 152 46#define LDP_SMSC911X_GPIO 152
46#define DEBUG_BASE 0x08000000 47#define DEBUG_BASE 0x08000000
47#define LDP_ETHR_START DEBUG_BASE 48#define LDP_ETHR_START DEBUG_BASE
48 49
49static struct resource ldp_smc911x_resources[] = { 50static struct resource ldp_smsc911x_resources[] = {
50 [0] = { 51 [0] = {
51 .start = LDP_ETHR_START, 52 .start = LDP_ETHR_START,
52 .end = LDP_ETHR_START + SZ_4K, 53 .end = LDP_ETHR_START + SZ_4K,
@@ -59,40 +60,50 @@ static struct resource ldp_smc911x_resources[] = {
59 }, 60 },
60}; 61};
61 62
62static struct platform_device ldp_smc911x_device = { 63static struct smsc911x_platform_config ldp_smsc911x_config = {
63 .name = "smc911x", 64 .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
65 .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN,
66 .flags = SMSC911X_USE_32BIT,
67 .phy_interface = PHY_INTERFACE_MODE_MII,
68};
69
70static struct platform_device ldp_smsc911x_device = {
71 .name = "smsc911x",
64 .id = -1, 72 .id = -1,
65 .num_resources = ARRAY_SIZE(ldp_smc911x_resources), 73 .num_resources = ARRAY_SIZE(ldp_smsc911x_resources),
66 .resource = ldp_smc911x_resources, 74 .resource = ldp_smsc911x_resources,
75 .dev = {
76 .platform_data = &ldp_smsc911x_config,
77 },
67}; 78};
68 79
69static struct platform_device *ldp_devices[] __initdata = { 80static struct platform_device *ldp_devices[] __initdata = {
70 &ldp_smc911x_device, 81 &ldp_smsc911x_device,
71}; 82};
72 83
73static inline void __init ldp_init_smc911x(void) 84static inline void __init ldp_init_smsc911x(void)
74{ 85{
75 int eth_cs; 86 int eth_cs;
76 unsigned long cs_mem_base; 87 unsigned long cs_mem_base;
77 int eth_gpio = 0; 88 int eth_gpio = 0;
78 89
79 eth_cs = LDP_SMC911X_CS; 90 eth_cs = LDP_SMSC911X_CS;
80 91
81 if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) { 92 if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) {
82 printk(KERN_ERR "Failed to request GPMC mem for smc911x\n"); 93 printk(KERN_ERR "Failed to request GPMC mem for smsc911x\n");
83 return; 94 return;
84 } 95 }
85 96
86 ldp_smc911x_resources[0].start = cs_mem_base + 0x0; 97 ldp_smsc911x_resources[0].start = cs_mem_base + 0x0;
87 ldp_smc911x_resources[0].end = cs_mem_base + 0xff; 98 ldp_smsc911x_resources[0].end = cs_mem_base + 0xff;
88 udelay(100); 99 udelay(100);
89 100
90 eth_gpio = LDP_SMC911X_GPIO; 101 eth_gpio = LDP_SMSC911X_GPIO;
91 102
92 ldp_smc911x_resources[1].start = OMAP_GPIO_IRQ(eth_gpio); 103 ldp_smsc911x_resources[1].start = OMAP_GPIO_IRQ(eth_gpio);
93 104
94 if (gpio_request(eth_gpio, "smc911x irq") < 0) { 105 if (gpio_request(eth_gpio, "smsc911x irq") < 0) {
95 printk(KERN_ERR "Failed to request GPIO%d for smc911x IRQ\n", 106 printk(KERN_ERR "Failed to request GPIO%d for smsc911x IRQ\n",
96 eth_gpio); 107 eth_gpio);
97 return; 108 return;
98 } 109 }
@@ -104,7 +115,7 @@ static void __init omap_ldp_init_irq(void)
104 omap2_init_common_hw(NULL); 115 omap2_init_common_hw(NULL);
105 omap_init_irq(); 116 omap_init_irq();
106 omap_gpio_init(); 117 omap_gpio_init();
107 ldp_init_smc911x(); 118 ldp_init_smsc911x();
108} 119}
109 120
110static struct omap_uart_config ldp_uart_config __initdata = { 121static struct omap_uart_config ldp_uart_config __initdata = {
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index b3f6e9d81807..b1f23bea863f 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -57,6 +57,9 @@
57#define GPMC_CS0_BASE 0x60 57#define GPMC_CS0_BASE 0x60
58#define GPMC_CS_SIZE 0x30 58#define GPMC_CS_SIZE 0x30
59 59
60#define OVERO_SMSC911X_CS 5
61#define OVERO_SMSC911X_GPIO 176
62
60#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \ 63#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \
61 defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) 64 defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE)
62 65
@@ -116,6 +119,67 @@ static void __init overo_ads7846_init(void)
116static inline void __init overo_ads7846_init(void) { return; } 119static inline void __init overo_ads7846_init(void) { return; }
117#endif 120#endif
118 121
122#if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE)
123
124#include <linux/smsc911x.h>
125
126static struct resource overo_smsc911x_resources[] = {
127 {
128 .name = "smsc911x-memory",
129 .flags = IORESOURCE_MEM,
130 },
131 {
132 .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
133 },
134};
135
136static struct smsc911x_platform_config overo_smsc911x_config = {
137 .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
138 .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN,
139 .flags = SMSC911X_USE_32BIT ,
140 .phy_interface = PHY_INTERFACE_MODE_MII,
141};
142
143static struct platform_device overo_smsc911x_device = {
144 .name = "smsc911x",
145 .id = -1,
146 .num_resources = ARRAY_SIZE(overo_smsc911x_resources),
147 .resource = &overo_smsc911x_resources,
148 .dev = {
149 .platform_data = &overo_smsc911x_config,
150 },
151};
152
153static inline void __init overo_init_smsc911x(void)
154{
155 unsigned long cs_mem_base;
156
157 if (gpmc_cs_request(OVERO_SMSC911X_CS, SZ_16M, &cs_mem_base) < 0) {
158 printk(KERN_ERR "Failed request for GPMC mem for smsc911x\n");
159 return;
160 }
161
162 overo_smsc911x_resources[0].start = cs_mem_base + 0x0;
163 overo_smsc911x_resources[0].end = cs_mem_base + 0xff;
164
165 if ((gpio_request(OVERO_SMSC911X_GPIO, "SMSC911X IRQ") == 0) &&
166 (gpio_direction_input(OVERO_SMSC911X_GPIO) == 0)) {
167 gpio_export(OVERO_SMSC911X_GPIO, 0);
168 } else {
169 printk(KERN_ERR "could not obtain gpio for SMSC911X IRQ\n");
170 return;
171 }
172
173 overo_smsc911x_resources[1].start = OMAP_GPIO_IRQ(OVERO_SMSC911X_GPIO);
174 overo_smsc911x_resources[1].end = 0;
175
176 platform_device_register(&overo_smsc911x_device);
177}
178
179#else
180static inline void __init overo_init_smsc911x(void) { return; }
181#endif
182
119static struct mtd_partition overo_nand_partitions[] = { 183static struct mtd_partition overo_nand_partitions[] = {
120 { 184 {
121 .name = "xloader", 185 .name = "xloader",
@@ -290,6 +354,7 @@ static void __init overo_init(void)
290 overo_flash_init(); 354 overo_flash_init();
291 usb_musb_init(); 355 usb_musb_init();
292 overo_ads7846_init(); 356 overo_ads7846_init();
357 overo_init_smsc911x();
293 358
294 if ((gpio_request(OVERO_GPIO_W2W_NRESET, 359 if ((gpio_request(OVERO_GPIO_W2W_NRESET,
295 "OVERO_GPIO_W2W_NRESET") == 0) && 360 "OVERO_GPIO_W2W_NRESET") == 0) &&
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index d6766685cfc7..9ab947c14f26 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -28,7 +28,7 @@
28#include <linux/clocksource.h> 28#include <linux/clocksource.h>
29#include <linux/clockchips.h> 29#include <linux/clockchips.h>
30#include <linux/io.h> 30#include <linux/io.h>
31#include <linux/smc911x.h> 31#include <linux/smsc911x.h>
32#include <linux/ata_platform.h> 32#include <linux/ata_platform.h>
33 33
34#include <asm/clkdev.h> 34#include <asm/clkdev.h>
@@ -128,14 +128,15 @@ int realview_flash_register(struct resource *res, u32 num)
128 return platform_device_register(&realview_flash_device); 128 return platform_device_register(&realview_flash_device);
129} 129}
130 130
131static struct smc911x_platdata realview_smc911x_platdata = { 131static struct smsc911x_platform_config smsc911x_config = {
132 .flags = SMC911X_USE_32BIT, 132 .flags = SMSC911X_USE_32BIT,
133 .irq_flags = IRQF_SHARED, 133 .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_HIGH,
134 .irq_polarity = 1, 134 .irq_type = SMSC911X_IRQ_TYPE_PUSH_PULL,
135 .phy_interface = PHY_INTERFACE_MODE_MII,
135}; 136};
136 137
137static struct platform_device realview_eth_device = { 138static struct platform_device realview_eth_device = {
138 .name = "smc911x", 139 .name = "smsc911x",
139 .id = 0, 140 .id = 0,
140 .num_resources = 2, 141 .num_resources = 2,
141}; 142};
@@ -145,8 +146,8 @@ int realview_eth_register(const char *name, struct resource *res)
145 if (name) 146 if (name)
146 realview_eth_device.name = name; 147 realview_eth_device.name = name;
147 realview_eth_device.resource = res; 148 realview_eth_device.resource = res;
148 if (strcmp(realview_eth_device.name, "smc911x") == 0) 149 if (strcmp(realview_eth_device.name, "smsc911x") == 0)
149 realview_eth_device.dev.platform_data = &realview_smc911x_platdata; 150 realview_eth_device.dev.platform_data = &smsc911x_config;
150 151
151 return platform_device_register(&realview_eth_device); 152 return platform_device_register(&realview_eth_device);
152} 153}
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index 67d6d9cc68b2..d0d39adf6407 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -191,6 +191,7 @@ void __cpuinit local_timer_setup(void)
191 clk->name = "dummy_timer"; 191 clk->name = "dummy_timer";
192 clk->features = CLOCK_EVT_FEAT_DUMMY; 192 clk->features = CLOCK_EVT_FEAT_DUMMY;
193 clk->rating = 200; 193 clk->rating = 200;
194 clk->mult = 1;
194 clk->set_mode = dummy_timer_set_mode; 195 clk->set_mode = dummy_timer_set_mode;
195 clk->broadcast = smp_timer_broadcast; 196 clk->broadcast = smp_timer_broadcast;
196 clk->cpumask = cpumask_of(cpu); 197 clk->cpumask = cpumask_of(cpu);
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 94077fbd96b7..6f7e70907e44 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -29,10 +29,10 @@ ENTRY(v6_early_abort)
29 mrc p15, 0, r1, c5, c0, 0 @ get FSR 29 mrc p15, 0, r1, c5, c0, 0 @ get FSR
30 mrc p15, 0, r0, c6, c0, 0 @ get FAR 30 mrc p15, 0, r0, c6, c0, 0 @ get FAR
31/* 31/*
32 * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR. 32 * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR (erratum 326103).
33 * The test below covers all the write situations, including Java bytecodes 33 * The test below covers all the write situations, including Java bytecodes
34 */ 34 */
35 bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR 35 bic r1, r1, #1 << 11 @ clear bit 11 of FSR
36 tst r3, #PSR_J_BIT @ Java? 36 tst r3, #PSR_J_BIT @ Java?
37 movne pc, lr 37 movne pc, lr
38 do_thumb_abort 38 do_thumb_abort
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index d6dd83826f8a..6e77c042d8e9 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -115,6 +115,10 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
115 raw_local_irq_restore(flags); 115 raw_local_irq_restore(flags);
116} 116}
117 117
118static inline void l2_inv_all(void)
119{
120 __asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0));
121}
118 122
119/* 123/*
120 * Linux primitives. 124 * Linux primitives.
@@ -254,9 +258,7 @@ static void __init enable_dcache(void)
254 258
255static void __init __invalidate_icache(void) 259static void __init __invalidate_icache(void)
256{ 260{
257 int dummy; 261 __asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0));
258
259 __asm__ __volatile__("mcr p15, 0, %0, c7, c5, 0" : "=r" (dummy));
260} 262}
261 263
262static int __init invalidate_and_disable_icache(void) 264static int __init invalidate_and_disable_icache(void)
@@ -321,6 +323,7 @@ static void __init enable_l2(void)
321 323
322 d = flush_and_disable_dcache(); 324 d = flush_and_disable_dcache();
323 i = invalidate_and_disable_icache(); 325 i = invalidate_and_disable_icache();
326 l2_inv_all();
324 write_extra_features(u | 0x00400000); 327 write_extra_features(u | 0x00400000);
325 if (i) 328 if (i)
326 enable_icache(); 329 enable_icache();
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index ba592a9e6fb3..a2bed62aec21 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -15,13 +15,16 @@
15 * r10 = thread_info structure 15 * r10 = thread_info structure
16 * lr = failure return 16 * lr = failure return
17 */ 17 */
18#include <linux/linkage.h> 18#include <asm/thread_info.h>
19#include <linux/init.h>
20#include <asm/asm-offsets.h>
21#include <asm/assembler.h>
22#include <asm/vfpmacros.h> 19#include <asm/vfpmacros.h>
20#include "../kernel/entry-header.S"
23 21
24ENTRY(do_vfp) 22ENTRY(do_vfp)
23#ifdef CONFIG_PREEMPT
24 ldr r4, [r10, #TI_PREEMPT] @ get preempt count
25 add r11, r4, #1 @ increment it
26 str r11, [r10, #TI_PREEMPT]
27#endif
25 enable_irq 28 enable_irq
26 ldr r4, .LCvfp 29 ldr r4, .LCvfp
27 ldr r11, [r10, #TI_CPU] @ CPU number 30 ldr r11, [r10, #TI_CPU] @ CPU number
@@ -30,6 +33,12 @@ ENTRY(do_vfp)
30ENDPROC(do_vfp) 33ENDPROC(do_vfp)
31 34
32ENTRY(vfp_null_entry) 35ENTRY(vfp_null_entry)
36#ifdef CONFIG_PREEMPT
37 get_thread_info r10
38 ldr r4, [r10, #TI_PREEMPT] @ get preempt count
39 sub r11, r4, #1 @ decrement it
40 str r11, [r10, #TI_PREEMPT]
41#endif
33 mov pc, lr 42 mov pc, lr
34ENDPROC(vfp_null_entry) 43ENDPROC(vfp_null_entry)
35 44
@@ -41,6 +50,12 @@ ENDPROC(vfp_null_entry)
41 50
42 __INIT 51 __INIT
43ENTRY(vfp_testing_entry) 52ENTRY(vfp_testing_entry)
53#ifdef CONFIG_PREEMPT
54 get_thread_info r10
55 ldr r4, [r10, #TI_PREEMPT] @ get preempt count
56 sub r11, r4, #1 @ decrement it
57 str r11, [r10, #TI_PREEMPT]
58#endif
44 ldr r0, VFP_arch_address 59 ldr r0, VFP_arch_address
45 str r5, [r0] @ known non-zero value 60 str r5, [r0] @ known non-zero value
46 mov pc, r9 @ we have handled the fault 61 mov pc, r9 @ we have handled the fault
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index a5a4e57763c3..83c4e384b16d 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -137,6 +137,12 @@ check_for_exception:
137 VFPFMXR FPEXC, r1 @ restore FPEXC last 137 VFPFMXR FPEXC, r1 @ restore FPEXC last
138 sub r2, r2, #4 138 sub r2, r2, #4
139 str r2, [sp, #S_PC] @ retry the instruction 139 str r2, [sp, #S_PC] @ retry the instruction
140#ifdef CONFIG_PREEMPT
141 get_thread_info r10
142 ldr r4, [r10, #TI_PREEMPT] @ get preempt count
143 sub r11, r4, #1 @ decrement it
144 str r11, [r10, #TI_PREEMPT]
145#endif
140 mov pc, r9 @ we think we have handled things 146 mov pc, r9 @ we think we have handled things
141 147
142 148
@@ -155,6 +161,12 @@ look_for_VFP_exceptions:
155 @ not recognised by VFP 161 @ not recognised by VFP
156 162
157 DBGSTR "not VFP" 163 DBGSTR "not VFP"
164#ifdef CONFIG_PREEMPT
165 get_thread_info r10
166 ldr r4, [r10, #TI_PREEMPT] @ get preempt count
167 sub r11, r4, #1 @ decrement it
168 str r11, [r10, #TI_PREEMPT]
169#endif
158 mov pc, lr 170 mov pc, lr
159 171
160process_exception: 172process_exception:
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 75457b30d813..01599c4ef726 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -266,7 +266,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
266 * on VFP subarch 1. 266 * on VFP subarch 1.
267 */ 267 */
268 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); 268 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
269 return; 269 goto exit;
270 } 270 }
271 271
272 /* 272 /*
@@ -297,7 +297,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
297 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. 297 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
298 */ 298 */
299 if (fpexc ^ (FPEXC_EX | FPEXC_FP2V)) 299 if (fpexc ^ (FPEXC_EX | FPEXC_FP2V))
300 return; 300 goto exit;
301 301
302 /* 302 /*
303 * The barrier() here prevents fpinst2 being read 303 * The barrier() here prevents fpinst2 being read
@@ -310,6 +310,8 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
310 exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); 310 exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
311 if (exceptions) 311 if (exceptions)
312 vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); 312 vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
313 exit:
314 preempt_enable();
313} 315}
314 316
315static void vfp_enable(void *unused) 317static void vfp_enable(void *unused)
diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h
index fedf3e326121..fb8a06b9ab6a 100644
--- a/arch/m68k/include/asm/bootinfo.h
+++ b/arch/m68k/include/asm/bootinfo.h
@@ -1,5 +1,378 @@
1#ifdef __uClinux__ 1/*
2#include "bootinfo_no.h" 2** asm/bootinfo.h -- Definition of the Linux/m68k boot information structure
3#else 3**
4#include "bootinfo_mm.h" 4** Copyright 1992 by Greg Harp
5**
6** This file is subject to the terms and conditions of the GNU General Public
7** License. See the file COPYING in the main directory of this archive
8** for more details.
9**
10** Created 09/29/92 by Greg Harp
11**
12** 5/2/94 Roman Hodek:
13** Added bi_atari part of the machine dependent union bi_un; for now it
14** contains just a model field to distinguish between TT and Falcon.
15** 26/7/96 Roman Zippel:
16** Renamed to setup.h; added some useful macros to allow gcc some
17** optimizations if possible.
18** 5/10/96 Geert Uytterhoeven:
19** Redesign of the boot information structure; renamed to bootinfo.h again
20** 27/11/96 Geert Uytterhoeven:
21** Backwards compatibility with bootinfo interface version 1.0
22*/
23
24#ifndef _M68K_BOOTINFO_H
25#define _M68K_BOOTINFO_H
26
27
28 /*
29 * Bootinfo definitions
30 *
31 * This is an easily parsable and extendable structure containing all
32 * information to be passed from the bootstrap to the kernel.
33 *
34 * This way I hope to keep all future changes back/forewards compatible.
35 * Thus, keep your fingers crossed...
36 *
37 * This structure is copied right after the kernel bss by the bootstrap
38 * routine.
39 */
40
41#ifndef __ASSEMBLY__
42
43struct bi_record {
44 unsigned short tag; /* tag ID */
45 unsigned short size; /* size of record (in bytes) */
46 unsigned long data[0]; /* data */
47};
48
49#endif /* __ASSEMBLY__ */
50
51
52 /*
53 * Tag Definitions
54 *
55 * Machine independent tags start counting from 0x0000
56 * Machine dependent tags start counting from 0x8000
57 */
58
59#define BI_LAST 0x0000 /* last record (sentinel) */
60#define BI_MACHTYPE 0x0001 /* machine type (u_long) */
61#define BI_CPUTYPE 0x0002 /* cpu type (u_long) */
62#define BI_FPUTYPE 0x0003 /* fpu type (u_long) */
63#define BI_MMUTYPE 0x0004 /* mmu type (u_long) */
64#define BI_MEMCHUNK 0x0005 /* memory chunk address and size */
65 /* (struct mem_info) */
66#define BI_RAMDISK 0x0006 /* ramdisk address and size */
67 /* (struct mem_info) */
68#define BI_COMMAND_LINE 0x0007 /* kernel command line parameters */
69 /* (string) */
70
71 /*
72 * Amiga-specific tags
73 */
74
75#define BI_AMIGA_MODEL 0x8000 /* model (u_long) */
76#define BI_AMIGA_AUTOCON 0x8001 /* AutoConfig device */
77 /* (struct ConfigDev) */
78#define BI_AMIGA_CHIP_SIZE 0x8002 /* size of Chip RAM (u_long) */
79#define BI_AMIGA_VBLANK 0x8003 /* VBLANK frequency (u_char) */
80#define BI_AMIGA_PSFREQ 0x8004 /* power supply frequency (u_char) */
81#define BI_AMIGA_ECLOCK 0x8005 /* EClock frequency (u_long) */
82#define BI_AMIGA_CHIPSET 0x8006 /* native chipset present (u_long) */
83#define BI_AMIGA_SERPER 0x8007 /* serial port period (u_short) */
84
85 /*
86 * Atari-specific tags
87 */
88
89#define BI_ATARI_MCH_COOKIE 0x8000 /* _MCH cookie from TOS (u_long) */
90#define BI_ATARI_MCH_TYPE 0x8001 /* special machine type (u_long) */
91 /* (values are ATARI_MACH_* defines */
92
93/* mch_cookie values (upper word) */
94#define ATARI_MCH_ST 0
95#define ATARI_MCH_STE 1
96#define ATARI_MCH_TT 2
97#define ATARI_MCH_FALCON 3
98
99/* mch_type values */
100#define ATARI_MACH_NORMAL 0 /* no special machine type */
101#define ATARI_MACH_MEDUSA 1 /* Medusa 040 */
102#define ATARI_MACH_HADES 2 /* Hades 040 or 060 */
103#define ATARI_MACH_AB40 3 /* Afterburner040 on Falcon */
104
105 /*
106 * VME-specific tags
107 */
108
109#define BI_VME_TYPE 0x8000 /* VME sub-architecture (u_long) */
110#define BI_VME_BRDINFO 0x8001 /* VME board information (struct) */
111
112/* BI_VME_TYPE codes */
113#define VME_TYPE_TP34V 0x0034 /* Tadpole TP34V */
114#define VME_TYPE_MVME147 0x0147 /* Motorola MVME147 */
115#define VME_TYPE_MVME162 0x0162 /* Motorola MVME162 */
116#define VME_TYPE_MVME166 0x0166 /* Motorola MVME166 */
117#define VME_TYPE_MVME167 0x0167 /* Motorola MVME167 */
118#define VME_TYPE_MVME172 0x0172 /* Motorola MVME172 */
119#define VME_TYPE_MVME177 0x0177 /* Motorola MVME177 */
120#define VME_TYPE_BVME4000 0x4000 /* BVM Ltd. BVME4000 */
121#define VME_TYPE_BVME6000 0x6000 /* BVM Ltd. BVME6000 */
122
123/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on
124 * Motorola VME boards. Contains board number, Bug version, board
125 * configuration options, etc. See include/asm/mvme16xhw.h for details.
126 */
127
128
129 /*
130 * Macintosh-specific tags (all u_long)
131 */
132
133#define BI_MAC_MODEL 0x8000 /* Mac Gestalt ID (model type) */
134#define BI_MAC_VADDR 0x8001 /* Mac video base address */
135#define BI_MAC_VDEPTH 0x8002 /* Mac video depth */
136#define BI_MAC_VROW 0x8003 /* Mac video rowbytes */
137#define BI_MAC_VDIM 0x8004 /* Mac video dimensions */
138#define BI_MAC_VLOGICAL 0x8005 /* Mac video logical base */
139#define BI_MAC_SCCBASE 0x8006 /* Mac SCC base address */
140#define BI_MAC_BTIME 0x8007 /* Mac boot time */
141#define BI_MAC_GMTBIAS 0x8008 /* Mac GMT timezone offset */
142#define BI_MAC_MEMSIZE 0x8009 /* Mac RAM size (sanity check) */
143#define BI_MAC_CPUID 0x800a /* Mac CPU type (sanity check) */
144#define BI_MAC_ROMBASE 0x800b /* Mac system ROM base address */
145
146 /*
147 * Macintosh hardware profile data - unused, see macintosh.h for
148 * resonable type values
149 */
150
151#define BI_MAC_VIA1BASE 0x8010 /* Mac VIA1 base address (always present) */
152#define BI_MAC_VIA2BASE 0x8011 /* Mac VIA2 base address (type varies) */
153#define BI_MAC_VIA2TYPE 0x8012 /* Mac VIA2 type (VIA, RBV, OSS) */
154#define BI_MAC_ADBTYPE 0x8013 /* Mac ADB interface type */
155#define BI_MAC_ASCBASE 0x8014 /* Mac Apple Sound Chip base address */
156#define BI_MAC_SCSI5380 0x8015 /* Mac NCR 5380 SCSI (base address, multi) */
157#define BI_MAC_SCSIDMA 0x8016 /* Mac SCSI DMA (base address) */
158#define BI_MAC_SCSI5396 0x8017 /* Mac NCR 53C96 SCSI (base address, multi) */
159#define BI_MAC_IDETYPE 0x8018 /* Mac IDE interface type */
160#define BI_MAC_IDEBASE 0x8019 /* Mac IDE interface base address */
161#define BI_MAC_NUBUS 0x801a /* Mac Nubus type (none, regular, pseudo) */
162#define BI_MAC_SLOTMASK 0x801b /* Mac Nubus slots present */
163#define BI_MAC_SCCTYPE 0x801c /* Mac SCC serial type (normal, IOP) */
164#define BI_MAC_ETHTYPE 0x801d /* Mac builtin ethernet type (Sonic, MACE */
165#define BI_MAC_ETHBASE 0x801e /* Mac builtin ethernet base address */
166#define BI_MAC_PMU 0x801f /* Mac power management / poweroff hardware */
167#define BI_MAC_IOP_SWIM 0x8020 /* Mac SWIM floppy IOP */
168#define BI_MAC_IOP_ADB 0x8021 /* Mac ADB IOP */
169
170 /*
171 * Mac: compatibility with old booter data format (temporarily)
172 * Fields unused with the new bootinfo can be deleted now; instead of
173 * adding new fields the struct might be splitted into a hardware address
174 * part and a hardware type part
175 */
176
177#ifndef __ASSEMBLY__
178
179struct mac_booter_data
180{
181 unsigned long videoaddr;
182 unsigned long videorow;
183 unsigned long videodepth;
184 unsigned long dimensions;
185 unsigned long args;
186 unsigned long boottime;
187 unsigned long gmtbias;
188 unsigned long bootver;
189 unsigned long videological;
190 unsigned long sccbase;
191 unsigned long id;
192 unsigned long memsize;
193 unsigned long serialmf;
194 unsigned long serialhsk;
195 unsigned long serialgpi;
196 unsigned long printmf;
197 unsigned long printhsk;
198 unsigned long printgpi;
199 unsigned long cpuid;
200 unsigned long rombase;
201 unsigned long adbdelay;
202 unsigned long timedbra;
203};
204
205extern struct mac_booter_data
206 mac_bi_data;
207
5#endif 208#endif
209
210 /*
211 * Apollo-specific tags
212 */
213
214#define BI_APOLLO_MODEL 0x8000 /* model (u_long) */
215
216 /*
217 * HP300-specific tags
218 */
219
220#define BI_HP300_MODEL 0x8000 /* model (u_long) */
221#define BI_HP300_UART_SCODE 0x8001 /* UART select code (u_long) */
222#define BI_HP300_UART_ADDR 0x8002 /* phys. addr of UART (u_long) */
223
224 /*
225 * Stuff for bootinfo interface versioning
226 *
227 * At the start of kernel code, a 'struct bootversion' is located.
228 * bootstrap checks for a matching version of the interface before booting
229 * a kernel, to avoid user confusion if kernel and bootstrap don't work
230 * together :-)
231 *
232 * If incompatible changes are made to the bootinfo interface, the major
233 * number below should be stepped (and the minor reset to 0) for the
234 * appropriate machine. If a change is backward-compatible, the minor
235 * should be stepped. "Backwards-compatible" means that booting will work,
236 * but certain features may not.
237 */
238
239#define BOOTINFOV_MAGIC 0x4249561A /* 'BIV^Z' */
240#define MK_BI_VERSION(major,minor) (((major)<<16)+(minor))
241#define BI_VERSION_MAJOR(v) (((v) >> 16) & 0xffff)
242#define BI_VERSION_MINOR(v) ((v) & 0xffff)
243
244#ifndef __ASSEMBLY__
245
246struct bootversion {
247 unsigned short branch;
248 unsigned long magic;
249 struct {
250 unsigned long machtype;
251 unsigned long version;
252 } machversions[0];
253};
254
255#endif /* __ASSEMBLY__ */
256
257#define AMIGA_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
258#define ATARI_BOOTI_VERSION MK_BI_VERSION( 2, 1 )
259#define MAC_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
260#define MVME147_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
261#define MVME16x_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
262#define BVME6000_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
263#define Q40_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
264#define HP300_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
265
266#ifdef BOOTINFO_COMPAT_1_0
267
268 /*
269 * Backwards compatibility with bootinfo interface version 1.0
270 */
271
272#define COMPAT_AMIGA_BOOTI_VERSION MK_BI_VERSION( 1, 0 )
273#define COMPAT_ATARI_BOOTI_VERSION MK_BI_VERSION( 1, 0 )
274#define COMPAT_MAC_BOOTI_VERSION MK_BI_VERSION( 1, 0 )
275
276#include <linux/zorro.h>
277
278#define COMPAT_NUM_AUTO 16
279
280struct compat_bi_Amiga {
281 int model;
282 int num_autocon;
283 struct ConfigDev autocon[COMPAT_NUM_AUTO];
284 unsigned long chip_size;
285 unsigned char vblank;
286 unsigned char psfreq;
287 unsigned long eclock;
288 unsigned long chipset;
289 unsigned long hw_present;
290};
291
292struct compat_bi_Atari {
293 unsigned long hw_present;
294 unsigned long mch_cookie;
295};
296
297#ifndef __ASSEMBLY__
298
299struct compat_bi_Macintosh
300{
301 unsigned long videoaddr;
302 unsigned long videorow;
303 unsigned long videodepth;
304 unsigned long dimensions;
305 unsigned long args;
306 unsigned long boottime;
307 unsigned long gmtbias;
308 unsigned long bootver;
309 unsigned long videological;
310 unsigned long sccbase;
311 unsigned long id;
312 unsigned long memsize;
313 unsigned long serialmf;
314 unsigned long serialhsk;
315 unsigned long serialgpi;
316 unsigned long printmf;
317 unsigned long printhsk;
318 unsigned long printgpi;
319 unsigned long cpuid;
320 unsigned long rombase;
321 unsigned long adbdelay;
322 unsigned long timedbra;
323};
324
325#endif
326
327struct compat_mem_info {
328 unsigned long addr;
329 unsigned long size;
330};
331
332#define COMPAT_NUM_MEMINFO 4
333
334#define COMPAT_CPUB_68020 0
335#define COMPAT_CPUB_68030 1
336#define COMPAT_CPUB_68040 2
337#define COMPAT_CPUB_68060 3
338#define COMPAT_FPUB_68881 5
339#define COMPAT_FPUB_68882 6
340#define COMPAT_FPUB_68040 7
341#define COMPAT_FPUB_68060 8
342
343#define COMPAT_CPU_68020 (1<<COMPAT_CPUB_68020)
344#define COMPAT_CPU_68030 (1<<COMPAT_CPUB_68030)
345#define COMPAT_CPU_68040 (1<<COMPAT_CPUB_68040)
346#define COMPAT_CPU_68060 (1<<COMPAT_CPUB_68060)
347#define COMPAT_CPU_MASK (31)
348#define COMPAT_FPU_68881 (1<<COMPAT_FPUB_68881)
349#define COMPAT_FPU_68882 (1<<COMPAT_FPUB_68882)
350#define COMPAT_FPU_68040 (1<<COMPAT_FPUB_68040)
351#define COMPAT_FPU_68060 (1<<COMPAT_FPUB_68060)
352#define COMPAT_FPU_MASK (0xfe0)
353
354#define COMPAT_CL_SIZE (256)
355
356struct compat_bootinfo {
357 unsigned long machtype;
358 unsigned long cputype;
359 struct compat_mem_info memory[COMPAT_NUM_MEMINFO];
360 int num_memory;
361 unsigned long ramdisk_size;
362 unsigned long ramdisk_addr;
363 char command_line[COMPAT_CL_SIZE];
364 union {
365 struct compat_bi_Amiga bi_ami;
366 struct compat_bi_Atari bi_ata;
367 struct compat_bi_Macintosh bi_mac;
368 } bi_un;
369};
370
371#define bi_amiga bi_un.bi_ami
372#define bi_atari bi_un.bi_ata
373#define bi_mac bi_un.bi_mac
374
375#endif /* BOOTINFO_COMPAT_1_0 */
376
377
378#endif /* _M68K_BOOTINFO_H */
diff --git a/arch/m68k/include/asm/bootinfo_mm.h b/arch/m68k/include/asm/bootinfo_mm.h
deleted file mode 100644
index fb8a06b9ab6a..000000000000
--- a/arch/m68k/include/asm/bootinfo_mm.h
+++ /dev/null
@@ -1,378 +0,0 @@
1/*
2** asm/bootinfo.h -- Definition of the Linux/m68k boot information structure
3**
4** Copyright 1992 by Greg Harp
5**
6** This file is subject to the terms and conditions of the GNU General Public
7** License. See the file COPYING in the main directory of this archive
8** for more details.
9**
10** Created 09/29/92 by Greg Harp
11**
12** 5/2/94 Roman Hodek:
13** Added bi_atari part of the machine dependent union bi_un; for now it
14** contains just a model field to distinguish between TT and Falcon.
15** 26/7/96 Roman Zippel:
16** Renamed to setup.h; added some useful macros to allow gcc some
17** optimizations if possible.
18** 5/10/96 Geert Uytterhoeven:
19** Redesign of the boot information structure; renamed to bootinfo.h again
20** 27/11/96 Geert Uytterhoeven:
21** Backwards compatibility with bootinfo interface version 1.0
22*/
23
24#ifndef _M68K_BOOTINFO_H
25#define _M68K_BOOTINFO_H
26
27
28 /*
29 * Bootinfo definitions
30 *
31 * This is an easily parsable and extendable structure containing all
32 * information to be passed from the bootstrap to the kernel.
33 *
34 * This way I hope to keep all future changes back/forewards compatible.
35 * Thus, keep your fingers crossed...
36 *
37 * This structure is copied right after the kernel bss by the bootstrap
38 * routine.
39 */
40
41#ifndef __ASSEMBLY__
42
43struct bi_record {
44 unsigned short tag; /* tag ID */
45 unsigned short size; /* size of record (in bytes) */
46 unsigned long data[0]; /* data */
47};
48
49#endif /* __ASSEMBLY__ */
50
51
52 /*
53 * Tag Definitions
54 *
55 * Machine independent tags start counting from 0x0000
56 * Machine dependent tags start counting from 0x8000
57 */
58
59#define BI_LAST 0x0000 /* last record (sentinel) */
60#define BI_MACHTYPE 0x0001 /* machine type (u_long) */
61#define BI_CPUTYPE 0x0002 /* cpu type (u_long) */
62#define BI_FPUTYPE 0x0003 /* fpu type (u_long) */
63#define BI_MMUTYPE 0x0004 /* mmu type (u_long) */
64#define BI_MEMCHUNK 0x0005 /* memory chunk address and size */
65 /* (struct mem_info) */
66#define BI_RAMDISK 0x0006 /* ramdisk address and size */
67 /* (struct mem_info) */
68#define BI_COMMAND_LINE 0x0007 /* kernel command line parameters */
69 /* (string) */
70
71 /*
72 * Amiga-specific tags
73 */
74
75#define BI_AMIGA_MODEL 0x8000 /* model (u_long) */
76#define BI_AMIGA_AUTOCON 0x8001 /* AutoConfig device */
77 /* (struct ConfigDev) */
78#define BI_AMIGA_CHIP_SIZE 0x8002 /* size of Chip RAM (u_long) */
79#define BI_AMIGA_VBLANK 0x8003 /* VBLANK frequency (u_char) */
80#define BI_AMIGA_PSFREQ 0x8004 /* power supply frequency (u_char) */
81#define BI_AMIGA_ECLOCK 0x8005 /* EClock frequency (u_long) */
82#define BI_AMIGA_CHIPSET 0x8006 /* native chipset present (u_long) */
83#define BI_AMIGA_SERPER 0x8007 /* serial port period (u_short) */
84
85 /*
86 * Atari-specific tags
87 */
88
89#define BI_ATARI_MCH_COOKIE 0x8000 /* _MCH cookie from TOS (u_long) */
90#define BI_ATARI_MCH_TYPE 0x8001 /* special machine type (u_long) */
91 /* (values are ATARI_MACH_* defines */
92
93/* mch_cookie values (upper word) */
94#define ATARI_MCH_ST 0
95#define ATARI_MCH_STE 1
96#define ATARI_MCH_TT 2
97#define ATARI_MCH_FALCON 3
98
99/* mch_type values */
100#define ATARI_MACH_NORMAL 0 /* no special machine type */
101#define ATARI_MACH_MEDUSA 1 /* Medusa 040 */
102#define ATARI_MACH_HADES 2 /* Hades 040 or 060 */
103#define ATARI_MACH_AB40 3 /* Afterburner040 on Falcon */
104
105 /*
106 * VME-specific tags
107 */
108
109#define BI_VME_TYPE 0x8000 /* VME sub-architecture (u_long) */
110#define BI_VME_BRDINFO 0x8001 /* VME board information (struct) */
111
112/* BI_VME_TYPE codes */
113#define VME_TYPE_TP34V 0x0034 /* Tadpole TP34V */
114#define VME_TYPE_MVME147 0x0147 /* Motorola MVME147 */
115#define VME_TYPE_MVME162 0x0162 /* Motorola MVME162 */
116#define VME_TYPE_MVME166 0x0166 /* Motorola MVME166 */
117#define VME_TYPE_MVME167 0x0167 /* Motorola MVME167 */
118#define VME_TYPE_MVME172 0x0172 /* Motorola MVME172 */
119#define VME_TYPE_MVME177 0x0177 /* Motorola MVME177 */
120#define VME_TYPE_BVME4000 0x4000 /* BVM Ltd. BVME4000 */
121#define VME_TYPE_BVME6000 0x6000 /* BVM Ltd. BVME6000 */
122
123/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on
124 * Motorola VME boards. Contains board number, Bug version, board
125 * configuration options, etc. See include/asm/mvme16xhw.h for details.
126 */
127
128
129 /*
130 * Macintosh-specific tags (all u_long)
131 */
132
133#define BI_MAC_MODEL 0x8000 /* Mac Gestalt ID (model type) */
134#define BI_MAC_VADDR 0x8001 /* Mac video base address */
135#define BI_MAC_VDEPTH 0x8002 /* Mac video depth */
136#define BI_MAC_VROW 0x8003 /* Mac video rowbytes */
137#define BI_MAC_VDIM 0x8004 /* Mac video dimensions */
138#define BI_MAC_VLOGICAL 0x8005 /* Mac video logical base */
139#define BI_MAC_SCCBASE 0x8006 /* Mac SCC base address */
140#define BI_MAC_BTIME 0x8007 /* Mac boot time */
141#define BI_MAC_GMTBIAS 0x8008 /* Mac GMT timezone offset */
142#define BI_MAC_MEMSIZE 0x8009 /* Mac RAM size (sanity check) */
143#define BI_MAC_CPUID 0x800a /* Mac CPU type (sanity check) */
144#define BI_MAC_ROMBASE 0x800b /* Mac system ROM base address */
145
146 /*
147 * Macintosh hardware profile data - unused, see macintosh.h for
148 * resonable type values
149 */
150
151#define BI_MAC_VIA1BASE 0x8010 /* Mac VIA1 base address (always present) */
152#define BI_MAC_VIA2BASE 0x8011 /* Mac VIA2 base address (type varies) */
153#define BI_MAC_VIA2TYPE 0x8012 /* Mac VIA2 type (VIA, RBV, OSS) */
154#define BI_MAC_ADBTYPE 0x8013 /* Mac ADB interface type */
155#define BI_MAC_ASCBASE 0x8014 /* Mac Apple Sound Chip base address */
156#define BI_MAC_SCSI5380 0x8015 /* Mac NCR 5380 SCSI (base address, multi) */
157#define BI_MAC_SCSIDMA 0x8016 /* Mac SCSI DMA (base address) */
158#define BI_MAC_SCSI5396 0x8017 /* Mac NCR 53C96 SCSI (base address, multi) */
159#define BI_MAC_IDETYPE 0x8018 /* Mac IDE interface type */
160#define BI_MAC_IDEBASE 0x8019 /* Mac IDE interface base address */
161#define BI_MAC_NUBUS 0x801a /* Mac Nubus type (none, regular, pseudo) */
162#define BI_MAC_SLOTMASK 0x801b /* Mac Nubus slots present */
163#define BI_MAC_SCCTYPE 0x801c /* Mac SCC serial type (normal, IOP) */
164#define BI_MAC_ETHTYPE 0x801d /* Mac builtin ethernet type (Sonic, MACE */
165#define BI_MAC_ETHBASE 0x801e /* Mac builtin ethernet base address */
166#define BI_MAC_PMU 0x801f /* Mac power management / poweroff hardware */
167#define BI_MAC_IOP_SWIM 0x8020 /* Mac SWIM floppy IOP */
168#define BI_MAC_IOP_ADB 0x8021 /* Mac ADB IOP */
169
170 /*
171 * Mac: compatibility with old booter data format (temporarily)
172 * Fields unused with the new bootinfo can be deleted now; instead of
173 * adding new fields the struct might be splitted into a hardware address
174 * part and a hardware type part
175 */
176
177#ifndef __ASSEMBLY__
178
179struct mac_booter_data
180{
181 unsigned long videoaddr;
182 unsigned long videorow;
183 unsigned long videodepth;
184 unsigned long dimensions;
185 unsigned long args;
186 unsigned long boottime;
187 unsigned long gmtbias;
188 unsigned long bootver;
189 unsigned long videological;
190 unsigned long sccbase;
191 unsigned long id;
192 unsigned long memsize;
193 unsigned long serialmf;
194 unsigned long serialhsk;
195 unsigned long serialgpi;
196 unsigned long printmf;
197 unsigned long printhsk;
198 unsigned long printgpi;
199 unsigned long cpuid;
200 unsigned long rombase;
201 unsigned long adbdelay;
202 unsigned long timedbra;
203};
204
205extern struct mac_booter_data
206 mac_bi_data;
207
208#endif
209
210 /*
211 * Apollo-specific tags
212 */
213
214#define BI_APOLLO_MODEL 0x8000 /* model (u_long) */
215
216 /*
217 * HP300-specific tags
218 */
219
220#define BI_HP300_MODEL 0x8000 /* model (u_long) */
221#define BI_HP300_UART_SCODE 0x8001 /* UART select code (u_long) */
222#define BI_HP300_UART_ADDR 0x8002 /* phys. addr of UART (u_long) */
223
224 /*
225 * Stuff for bootinfo interface versioning
226 *
227 * At the start of kernel code, a 'struct bootversion' is located.
228 * bootstrap checks for a matching version of the interface before booting
229 * a kernel, to avoid user confusion if kernel and bootstrap don't work
230 * together :-)
231 *
232 * If incompatible changes are made to the bootinfo interface, the major
233 * number below should be stepped (and the minor reset to 0) for the
234 * appropriate machine. If a change is backward-compatible, the minor
235 * should be stepped. "Backwards-compatible" means that booting will work,
236 * but certain features may not.
237 */
238
239#define BOOTINFOV_MAGIC 0x4249561A /* 'BIV^Z' */
240#define MK_BI_VERSION(major,minor) (((major)<<16)+(minor))
241#define BI_VERSION_MAJOR(v) (((v) >> 16) & 0xffff)
242#define BI_VERSION_MINOR(v) ((v) & 0xffff)
243
244#ifndef __ASSEMBLY__
245
246struct bootversion {
247 unsigned short branch;
248 unsigned long magic;
249 struct {
250 unsigned long machtype;
251 unsigned long version;
252 } machversions[0];
253};
254
255#endif /* __ASSEMBLY__ */
256
257#define AMIGA_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
258#define ATARI_BOOTI_VERSION MK_BI_VERSION( 2, 1 )
259#define MAC_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
260#define MVME147_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
261#define MVME16x_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
262#define BVME6000_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
263#define Q40_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
264#define HP300_BOOTI_VERSION MK_BI_VERSION( 2, 0 )
265
266#ifdef BOOTINFO_COMPAT_1_0
267
268 /*
269 * Backwards compatibility with bootinfo interface version 1.0
270 */
271
272#define COMPAT_AMIGA_BOOTI_VERSION MK_BI_VERSION( 1, 0 )
273#define COMPAT_ATARI_BOOTI_VERSION MK_BI_VERSION( 1, 0 )
274#define COMPAT_MAC_BOOTI_VERSION MK_BI_VERSION( 1, 0 )
275
276#include <linux/zorro.h>
277
278#define COMPAT_NUM_AUTO 16
279
280struct compat_bi_Amiga {
281 int model;
282 int num_autocon;
283 struct ConfigDev autocon[COMPAT_NUM_AUTO];
284 unsigned long chip_size;
285 unsigned char vblank;
286 unsigned char psfreq;
287 unsigned long eclock;
288 unsigned long chipset;
289 unsigned long hw_present;
290};
291
292struct compat_bi_Atari {
293 unsigned long hw_present;
294 unsigned long mch_cookie;
295};
296
297#ifndef __ASSEMBLY__
298
299struct compat_bi_Macintosh
300{
301 unsigned long videoaddr;
302 unsigned long videorow;
303 unsigned long videodepth;
304 unsigned long dimensions;
305 unsigned long args;
306 unsigned long boottime;
307 unsigned long gmtbias;
308 unsigned long bootver;
309 unsigned long videological;
310 unsigned long sccbase;
311 unsigned long id;
312 unsigned long memsize;
313 unsigned long serialmf;
314 unsigned long serialhsk;
315 unsigned long serialgpi;
316 unsigned long printmf;
317 unsigned long printhsk;
318 unsigned long printgpi;
319 unsigned long cpuid;
320 unsigned long rombase;
321 unsigned long adbdelay;
322 unsigned long timedbra;
323};
324
325#endif
326
327struct compat_mem_info {
328 unsigned long addr;
329 unsigned long size;
330};
331
332#define COMPAT_NUM_MEMINFO 4
333
334#define COMPAT_CPUB_68020 0
335#define COMPAT_CPUB_68030 1
336#define COMPAT_CPUB_68040 2
337#define COMPAT_CPUB_68060 3
338#define COMPAT_FPUB_68881 5
339#define COMPAT_FPUB_68882 6
340#define COMPAT_FPUB_68040 7
341#define COMPAT_FPUB_68060 8
342
343#define COMPAT_CPU_68020 (1<<COMPAT_CPUB_68020)
344#define COMPAT_CPU_68030 (1<<COMPAT_CPUB_68030)
345#define COMPAT_CPU_68040 (1<<COMPAT_CPUB_68040)
346#define COMPAT_CPU_68060 (1<<COMPAT_CPUB_68060)
347#define COMPAT_CPU_MASK (31)
348#define COMPAT_FPU_68881 (1<<COMPAT_FPUB_68881)
349#define COMPAT_FPU_68882 (1<<COMPAT_FPUB_68882)
350#define COMPAT_FPU_68040 (1<<COMPAT_FPUB_68040)
351#define COMPAT_FPU_68060 (1<<COMPAT_FPUB_68060)
352#define COMPAT_FPU_MASK (0xfe0)
353
354#define COMPAT_CL_SIZE (256)
355
356struct compat_bootinfo {
357 unsigned long machtype;
358 unsigned long cputype;
359 struct compat_mem_info memory[COMPAT_NUM_MEMINFO];
360 int num_memory;
361 unsigned long ramdisk_size;
362 unsigned long ramdisk_addr;
363 char command_line[COMPAT_CL_SIZE];
364 union {
365 struct compat_bi_Amiga bi_ami;
366 struct compat_bi_Atari bi_ata;
367 struct compat_bi_Macintosh bi_mac;
368 } bi_un;
369};
370
371#define bi_amiga bi_un.bi_ami
372#define bi_atari bi_un.bi_ata
373#define bi_mac bi_un.bi_mac
374
375#endif /* BOOTINFO_COMPAT_1_0 */
376
377
378#endif /* _M68K_BOOTINFO_H */
diff --git a/arch/m68k/include/asm/bootinfo_no.h b/arch/m68k/include/asm/bootinfo_no.h
deleted file mode 100644
index c12e526f5189..000000000000
--- a/arch/m68k/include/asm/bootinfo_no.h
+++ /dev/null
@@ -1,2 +0,0 @@
1
2/* Nothing for m68knommu */
diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h
index 997e0944ebc1..ef9a2e47352f 100644
--- a/arch/m68k/include/asm/bug.h
+++ b/arch/m68k/include/asm/bug.h
@@ -1,5 +1,30 @@
1#ifdef __uClinux__ 1#ifndef _M68K_BUG_H
2#include "bug_no.h" 2#define _M68K_BUG_H
3
4#ifdef CONFIG_MMU
5#ifdef CONFIG_BUG
6#ifdef CONFIG_DEBUG_BUGVERBOSE
7#ifndef CONFIG_SUN3
8#define BUG() do { \
9 printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
10 __builtin_trap(); \
11} while (0)
3#else 12#else
4#include "bug_mm.h" 13#define BUG() do { \
14 printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
15 panic("BUG!"); \
16} while (0)
17#endif
18#else
19#define BUG() do { \
20 __builtin_trap(); \
21} while (0)
22#endif
23
24#define HAVE_ARCH_BUG
25#endif
26#endif /* CONFIG_MMU */
27
28#include <asm-generic/bug.h>
29
5#endif 30#endif
diff --git a/arch/m68k/include/asm/bug_mm.h b/arch/m68k/include/asm/bug_mm.h
deleted file mode 100644
index e5b528deb8a8..000000000000
--- a/arch/m68k/include/asm/bug_mm.h
+++ /dev/null
@@ -1,29 +0,0 @@
1#ifndef _M68K_BUG_H
2#define _M68K_BUG_H
3
4
5#ifdef CONFIG_BUG
6#ifdef CONFIG_DEBUG_BUGVERBOSE
7#ifndef CONFIG_SUN3
8#define BUG() do { \
9 printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
10 __builtin_trap(); \
11} while (0)
12#else
13#define BUG() do { \
14 printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
15 panic("BUG!"); \
16} while (0)
17#endif
18#else
19#define BUG() do { \
20 __builtin_trap(); \
21} while (0)
22#endif
23
24#define HAVE_ARCH_BUG
25#endif
26
27#include <asm-generic/bug.h>
28
29#endif
diff --git a/arch/m68k/include/asm/bug_no.h b/arch/m68k/include/asm/bug_no.h
deleted file mode 100644
index 70e7dc0af21a..000000000000
--- a/arch/m68k/include/asm/bug_no.h
+++ /dev/null
@@ -1,4 +0,0 @@
1#ifndef _M68KNOMMU_BUG_H
2#define _M68KNOMMU_BUG_H
3#include <asm-generic/bug.h>
4#endif
diff --git a/arch/m68k/include/asm/bugs.h b/arch/m68k/include/asm/bugs.h
index 01f047d784ec..d06207b9ba5a 100644
--- a/arch/m68k/include/asm/bugs.h
+++ b/arch/m68k/include/asm/bugs.h
@@ -1,5 +1,20 @@
1#ifdef __uClinux__ 1/*
2#include "bugs_no.h" 2 * include/asm-m68k/bugs.h
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 */
6
7/*
8 * This is included by init/main.c to check for architecture-dependent bugs.
9 *
10 * Needs:
11 * void check_bugs(void);
12 */
13
14#ifdef CONFIG_MMU
15extern void check_bugs(void); /* in arch/m68k/kernel/setup.c */
3#else 16#else
4#include "bugs_mm.h" 17static void check_bugs(void)
18{
19}
5#endif 20#endif
diff --git a/arch/m68k/include/asm/bugs_mm.h b/arch/m68k/include/asm/bugs_mm.h
deleted file mode 100644
index d01935592410..000000000000
--- a/arch/m68k/include/asm/bugs_mm.h
+++ /dev/null
@@ -1,14 +0,0 @@
1/*
2 * include/asm-m68k/bugs.h
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 */
6
7/*
8 * This is included by init/main.c to check for architecture-dependent bugs.
9 *
10 * Needs:
11 * void check_bugs(void);
12 */
13
14extern void check_bugs(void); /* in arch/m68k/kernel/setup.c */
diff --git a/arch/m68k/include/asm/bugs_no.h b/arch/m68k/include/asm/bugs_no.h
deleted file mode 100644
index 5f382dac3a60..000000000000
--- a/arch/m68k/include/asm/bugs_no.h
+++ /dev/null
@@ -1,16 +0,0 @@
1/*
2 * include/asm-m68k/bugs.h
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 */
6
7/*
8 * This is included by init/main.c to check for architecture-dependent bugs.
9 *
10 * Needs:
11 * void check_bugs(void);
12 */
13
14static void check_bugs(void)
15{
16}
diff --git a/arch/m68k/include/asm/cache.h b/arch/m68k/include/asm/cache.h
index 599c29bc8f40..fed3fd30de7e 100644
--- a/arch/m68k/include/asm/cache.h
+++ b/arch/m68k/include/asm/cache.h
@@ -1,5 +1,11 @@
1#ifdef __uClinux__ 1/*
2#include "cache_no.h" 2 * include/asm-m68k/cache.h
3#else 3 */
4#include "cache_mm.h" 4#ifndef __ARCH_M68K_CACHE_H
5#define __ARCH_M68K_CACHE_H
6
7/* bytes per L1 cache line */
8#define L1_CACHE_SHIFT 4
9#define L1_CACHE_BYTES (1<< L1_CACHE_SHIFT)
10
5#endif 11#endif
diff --git a/arch/m68k/include/asm/cache_mm.h b/arch/m68k/include/asm/cache_mm.h
deleted file mode 100644
index fed3fd30de7e..000000000000
--- a/arch/m68k/include/asm/cache_mm.h
+++ /dev/null
@@ -1,11 +0,0 @@
1/*
2 * include/asm-m68k/cache.h
3 */
4#ifndef __ARCH_M68K_CACHE_H
5#define __ARCH_M68K_CACHE_H
6
7/* bytes per L1 cache line */
8#define L1_CACHE_SHIFT 4
9#define L1_CACHE_BYTES (1<< L1_CACHE_SHIFT)
10
11#endif
diff --git a/arch/m68k/include/asm/cache_no.h b/arch/m68k/include/asm/cache_no.h
deleted file mode 100644
index 24e9eace5f8c..000000000000
--- a/arch/m68k/include/asm/cache_no.h
+++ /dev/null
@@ -1,12 +0,0 @@
1#ifndef __ARCH_M68KNOMMU_CACHE_H
2#define __ARCH_M68KNOMMU_CACHE_H
3
4/* bytes per L1 cache line */
5#define L1_CACHE_BYTES 16 /* this need to be at least 1 */
6
7/* m68k-elf-gcc 2.95.2 doesn't like these */
8
9#define __cacheline_aligned
10#define ____cacheline_aligned
11
12#endif
diff --git a/arch/m68k/include/asm/current.h b/arch/m68k/include/asm/current.h
index 51b056dfaedd..91fcc5358cfe 100644
--- a/arch/m68k/include/asm/current.h
+++ b/arch/m68k/include/asm/current.h
@@ -1,5 +1,28 @@
1#ifdef __uClinux__ 1#ifndef _M68K_CURRENT_H
2#include "current_no.h" 2#define _M68K_CURRENT_H
3
4#ifdef CONFIG_MMU
5
6register struct task_struct *current __asm__("%a2");
7
3#else 8#else
4#include "current_mm.h" 9
5#endif 10/*
11 * Rather than dedicate a register (as the m68k source does), we
12 * just keep a global, we should probably just change it all to be
13 * current and lose _current_task.
14 */
15#include <linux/thread_info.h>
16
17struct task_struct;
18
19static inline struct task_struct *get_current(void)
20{
21 return(current_thread_info()->task);
22}
23
24#define current get_current()
25
26#endif /* CONFNIG_MMU */
27
28#endif /* !(_M68K_CURRENT_H) */
diff --git a/arch/m68k/include/asm/current_mm.h b/arch/m68k/include/asm/current_mm.h
deleted file mode 100644
index 8de8f8ceda61..000000000000
--- a/arch/m68k/include/asm/current_mm.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _M68K_CURRENT_H
2#define _M68K_CURRENT_H
3
4register struct task_struct *current __asm__("%a2");
5
6#endif /* !(_M68K_CURRENT_H) */
diff --git a/arch/m68k/include/asm/current_no.h b/arch/m68k/include/asm/current_no.h
deleted file mode 100644
index 53ee0f9f7cef..000000000000
--- a/arch/m68k/include/asm/current_no.h
+++ /dev/null
@@ -1,24 +0,0 @@
1#ifndef _M68KNOMMU_CURRENT_H
2#define _M68KNOMMU_CURRENT_H
3/*
4 * current.h
5 * (C) Copyright 2000, Lineo, David McCullough <davidm@uclinux.org>
6 * (C) Copyright 2002, Greg Ungerer (gerg@snapgear.com)
7 *
8 * rather than dedicate a register (as the m68k source does), we
9 * just keep a global, we should probably just change it all to be
10 * current and lose _current_task.
11 */
12
13#include <linux/thread_info.h>
14
15struct task_struct;
16
17static inline struct task_struct *get_current(void)
18{
19 return(current_thread_info()->task);
20}
21
22#define current get_current()
23
24#endif /* _M68KNOMMU_CURRENT_H */
diff --git a/arch/m68k/include/asm/div64.h b/arch/m68k/include/asm/div64.h
index d211d9f54276..edb66148a71d 100644
--- a/arch/m68k/include/asm/div64.h
+++ b/arch/m68k/include/asm/div64.h
@@ -1,5 +1,34 @@
1#ifdef __uClinux__ 1#ifndef _M68K_DIV64_H
2#include "div64_no.h" 2#define _M68K_DIV64_H
3
4#ifdef CONFIG_MMU
5
6#include <linux/types.h>
7
8/* n = n / base; return rem; */
9
10#define do_div(n, base) ({ \
11 union { \
12 unsigned long n32[2]; \
13 unsigned long long n64; \
14 } __n; \
15 unsigned long __rem, __upper; \
16 \
17 __n.n64 = (n); \
18 if ((__upper = __n.n32[0])) { \
19 asm ("divul.l %2,%1:%0" \
20 : "=d" (__n.n32[0]), "=d" (__upper) \
21 : "d" (base), "0" (__n.n32[0])); \
22 } \
23 asm ("divu.l %2,%1:%0" \
24 : "=d" (__n.n32[1]), "=d" (__rem) \
25 : "d" (base), "1" (__upper), "0" (__n.n32[1])); \
26 (n) = __n.n64; \
27 __rem; \
28})
29
3#else 30#else
4#include "div64_mm.h" 31#include <asm-generic/div64.h>
5#endif 32#endif /* CONFIG_MMU */
33
34#endif /* _M68K_DIV64_H */
diff --git a/arch/m68k/include/asm/div64_mm.h b/arch/m68k/include/asm/div64_mm.h
deleted file mode 100644
index 8243c931b5c0..000000000000
--- a/arch/m68k/include/asm/div64_mm.h
+++ /dev/null
@@ -1,28 +0,0 @@
1#ifndef _M68K_DIV64_H
2#define _M68K_DIV64_H
3
4#include <linux/types.h>
5
6/* n = n / base; return rem; */
7
8#define do_div(n, base) ({ \
9 union { \
10 unsigned long n32[2]; \
11 unsigned long long n64; \
12 } __n; \
13 unsigned long __rem, __upper; \
14 \
15 __n.n64 = (n); \
16 if ((__upper = __n.n32[0])) { \
17 asm ("divul.l %2,%1:%0" \
18 : "=d" (__n.n32[0]), "=d" (__upper) \
19 : "d" (base), "0" (__n.n32[0])); \
20 } \
21 asm ("divu.l %2,%1:%0" \
22 : "=d" (__n.n32[1]), "=d" (__rem) \
23 : "d" (base), "1" (__upper), "0" (__n.n32[1])); \
24 (n) = __n.n64; \
25 __rem; \
26})
27
28#endif /* _M68K_DIV64_H */
diff --git a/arch/m68k/include/asm/div64_no.h b/arch/m68k/include/asm/div64_no.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/m68k/include/asm/div64_no.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/div64.h>
diff --git a/arch/m68k/include/asm/dma-mapping.h b/arch/m68k/include/asm/dma-mapping.h
index f4a4c7638f89..26f505488c11 100644
--- a/arch/m68k/include/asm/dma-mapping.h
+++ b/arch/m68k/include/asm/dma-mapping.h
@@ -1,5 +1,112 @@
1#ifdef __uClinux__ 1#ifndef _M68K_DMA_MAPPING_H
2#include "dma-mapping_no.h" 2#define _M68K_DMA_MAPPING_H
3
4#include <asm/cache.h>
5
6struct scatterlist;
7
8#ifndef CONFIG_MMU_SUN3
9static inline int dma_supported(struct device *dev, u64 mask)
10{
11 return 1;
12}
13
14static inline int dma_set_mask(struct device *dev, u64 mask)
15{
16 return 0;
17}
18
19static inline int dma_get_cache_alignment(void)
20{
21 return 1 << L1_CACHE_SHIFT;
22}
23
24static inline int dma_is_consistent(struct device *dev, dma_addr_t dma_addr)
25{
26 return 0;
27}
28
29extern void *dma_alloc_coherent(struct device *, size_t,
30 dma_addr_t *, gfp_t);
31extern void dma_free_coherent(struct device *, size_t,
32 void *, dma_addr_t);
33
34static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
35 dma_addr_t *handle, gfp_t flag)
36{
37 return dma_alloc_coherent(dev, size, handle, flag);
38}
39static inline void dma_free_noncoherent(struct device *dev, size_t size,
40 void *addr, dma_addr_t handle)
41{
42 dma_free_coherent(dev, size, addr, handle);
43}
44static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
45 enum dma_data_direction dir)
46{
47 /* we use coherent allocation, so not much to do here. */
48}
49
50extern dma_addr_t dma_map_single(struct device *, void *, size_t,
51 enum dma_data_direction);
52static inline void dma_unmap_single(struct device *dev, dma_addr_t addr,
53 size_t size, enum dma_data_direction dir)
54{
55}
56
57extern dma_addr_t dma_map_page(struct device *, struct page *,
58 unsigned long, size_t size,
59 enum dma_data_direction);
60static inline void dma_unmap_page(struct device *dev, dma_addr_t address,
61 size_t size, enum dma_data_direction dir)
62{
63}
64
65extern int dma_map_sg(struct device *, struct scatterlist *, int,
66 enum dma_data_direction);
67static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
68 int nhwentries, enum dma_data_direction dir)
69{
70}
71
72extern void dma_sync_single_for_device(struct device *, dma_addr_t, size_t,
73 enum dma_data_direction);
74extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
75 enum dma_data_direction);
76
77static inline void dma_sync_single_range_for_device(struct device *dev,
78 dma_addr_t dma_handle, unsigned long offset, size_t size,
79 enum dma_data_direction direction)
80{
81 /* just sync everything for now */
82 dma_sync_single_for_device(dev, dma_handle, offset + size, direction);
83}
84
85static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
86 size_t size, enum dma_data_direction dir)
87{
88}
89
90static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
91 int nents, enum dma_data_direction dir)
92{
93}
94
95static inline void dma_sync_single_range_for_cpu(struct device *dev,
96 dma_addr_t dma_handle, unsigned long offset, size_t size,
97 enum dma_data_direction direction)
98{
99 /* just sync everything for now */
100 dma_sync_single_for_cpu(dev, dma_handle, offset + size, direction);
101}
102
103static inline int dma_mapping_error(struct device *dev, dma_addr_t handle)
104{
105 return 0;
106}
107
3#else 108#else
4#include "dma-mapping_mm.h" 109#include <asm-generic/dma-mapping-broken.h>
5#endif 110#endif
111
112#endif /* _M68K_DMA_MAPPING_H */
diff --git a/arch/m68k/include/asm/dma-mapping_mm.h b/arch/m68k/include/asm/dma-mapping_mm.h
deleted file mode 100644
index 26f505488c11..000000000000
--- a/arch/m68k/include/asm/dma-mapping_mm.h
+++ /dev/null
@@ -1,112 +0,0 @@
1#ifndef _M68K_DMA_MAPPING_H
2#define _M68K_DMA_MAPPING_H
3
4#include <asm/cache.h>
5
6struct scatterlist;
7
8#ifndef CONFIG_MMU_SUN3
9static inline int dma_supported(struct device *dev, u64 mask)
10{
11 return 1;
12}
13
14static inline int dma_set_mask(struct device *dev, u64 mask)
15{
16 return 0;
17}
18
19static inline int dma_get_cache_alignment(void)
20{
21 return 1 << L1_CACHE_SHIFT;
22}
23
24static inline int dma_is_consistent(struct device *dev, dma_addr_t dma_addr)
25{
26 return 0;
27}
28
29extern void *dma_alloc_coherent(struct device *, size_t,
30 dma_addr_t *, gfp_t);
31extern void dma_free_coherent(struct device *, size_t,
32 void *, dma_addr_t);
33
34static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
35 dma_addr_t *handle, gfp_t flag)
36{
37 return dma_alloc_coherent(dev, size, handle, flag);
38}
39static inline void dma_free_noncoherent(struct device *dev, size_t size,
40 void *addr, dma_addr_t handle)
41{
42 dma_free_coherent(dev, size, addr, handle);
43}
44static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
45 enum dma_data_direction dir)
46{
47 /* we use coherent allocation, so not much to do here. */
48}
49
50extern dma_addr_t dma_map_single(struct device *, void *, size_t,
51 enum dma_data_direction);
52static inline void dma_unmap_single(struct device *dev, dma_addr_t addr,
53 size_t size, enum dma_data_direction dir)
54{
55}
56
57extern dma_addr_t dma_map_page(struct device *, struct page *,
58 unsigned long, size_t size,
59 enum dma_data_direction);
60static inline void dma_unmap_page(struct device *dev, dma_addr_t address,
61 size_t size, enum dma_data_direction dir)
62{
63}
64
65extern int dma_map_sg(struct device *, struct scatterlist *, int,
66 enum dma_data_direction);
67static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
68 int nhwentries, enum dma_data_direction dir)
69{
70}
71
72extern void dma_sync_single_for_device(struct device *, dma_addr_t, size_t,
73 enum dma_data_direction);
74extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
75 enum dma_data_direction);
76
77static inline void dma_sync_single_range_for_device(struct device *dev,
78 dma_addr_t dma_handle, unsigned long offset, size_t size,
79 enum dma_data_direction direction)
80{
81 /* just sync everything for now */
82 dma_sync_single_for_device(dev, dma_handle, offset + size, direction);
83}
84
85static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
86 size_t size, enum dma_data_direction dir)
87{
88}
89
90static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
91 int nents, enum dma_data_direction dir)
92{
93}
94
95static inline void dma_sync_single_range_for_cpu(struct device *dev,
96 dma_addr_t dma_handle, unsigned long offset, size_t size,
97 enum dma_data_direction direction)
98{
99 /* just sync everything for now */
100 dma_sync_single_for_cpu(dev, dma_handle, offset + size, direction);
101}
102
103static inline int dma_mapping_error(struct device *dev, dma_addr_t handle)
104{
105 return 0;
106}
107
108#else
109#include <asm-generic/dma-mapping-broken.h>
110#endif
111
112#endif /* _M68K_DMA_MAPPING_H */
diff --git a/arch/m68k/include/asm/dma-mapping_no.h b/arch/m68k/include/asm/dma-mapping_no.h
deleted file mode 100644
index 1748f2bca940..000000000000
--- a/arch/m68k/include/asm/dma-mapping_no.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _M68KNOMMU_DMA_MAPPING_H
2#define _M68KNOMMU_DMA_MAPPING_H
3
4#include <asm-generic/dma-mapping-broken.h>
5
6#endif /* _M68KNOMMU_DMA_MAPPING_H */
diff --git a/arch/m68k/include/asm/elf.h b/arch/m68k/include/asm/elf.h
index 04ce488bc63f..0b0f49eb876b 100644
--- a/arch/m68k/include/asm/elf.h
+++ b/arch/m68k/include/asm/elf.h
@@ -1,5 +1,119 @@
1#ifdef __uClinux__ 1#ifndef __ASMm68k_ELF_H
2#include "elf_no.h" 2#define __ASMm68k_ELF_H
3
4/*
5 * ELF register definitions..
6 */
7
8#include <asm/ptrace.h>
9#include <asm/user.h>
10
11/*
12 * 68k ELF relocation types
13 */
14#define R_68K_NONE 0
15#define R_68K_32 1
16#define R_68K_16 2
17#define R_68K_8 3
18#define R_68K_PC32 4
19#define R_68K_PC16 5
20#define R_68K_PC8 6
21#define R_68K_GOT32 7
22#define R_68K_GOT16 8
23#define R_68K_GOT8 9
24#define R_68K_GOT32O 10
25#define R_68K_GOT16O 11
26#define R_68K_GOT8O 12
27#define R_68K_PLT32 13
28#define R_68K_PLT16 14
29#define R_68K_PLT8 15
30#define R_68K_PLT32O 16
31#define R_68K_PLT16O 17
32#define R_68K_PLT8O 18
33#define R_68K_COPY 19
34#define R_68K_GLOB_DAT 20
35#define R_68K_JMP_SLOT 21
36#define R_68K_RELATIVE 22
37
38typedef unsigned long elf_greg_t;
39
40#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
41typedef elf_greg_t elf_gregset_t[ELF_NGREG];
42
43typedef struct user_m68kfp_struct elf_fpregset_t;
44
45/*
46 * This is used to ensure we don't load something for the wrong architecture.
47 */
48#define elf_check_arch(x) ((x)->e_machine == EM_68K)
49
50/*
51 * These are used to set parameters in the core dumps.
52 */
53#define ELF_CLASS ELFCLASS32
54#define ELF_DATA ELFDATA2MSB
55#define ELF_ARCH EM_68K
56
57/* For SVR4/m68k the function pointer to be registered with `atexit' is
58 passed in %a1. Although my copy of the ABI has no such statement, it
59 is actually used on ASV. */
60#define ELF_PLAT_INIT(_r, load_addr) _r->a1 = 0
61
62#define USE_ELF_CORE_DUMP
63#ifndef CONFIG_SUN3
64#define ELF_EXEC_PAGESIZE 4096
3#else 65#else
4#include "elf_mm.h" 66#define ELF_EXEC_PAGESIZE 8192
67#endif
68
69/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
70 use of this is to invoke "./ld.so someprog" to test out a new version of
71 the loader. We need to make sure that it is out of the way of the program
72 that it will "exec", and that there is sufficient room for the brk. */
73
74#ifndef CONFIG_SUN3
75#define ELF_ET_DYN_BASE 0xD0000000UL
76#else
77#define ELF_ET_DYN_BASE 0x0D800000UL
78#endif
79
80#define ELF_CORE_COPY_REGS(pr_reg, regs) \
81 /* Bleech. */ \
82 pr_reg[0] = regs->d1; \
83 pr_reg[1] = regs->d2; \
84 pr_reg[2] = regs->d3; \
85 pr_reg[3] = regs->d4; \
86 pr_reg[4] = regs->d5; \
87 pr_reg[7] = regs->a0; \
88 pr_reg[8] = regs->a1; \
89 pr_reg[9] = regs->a2; \
90 pr_reg[14] = regs->d0; \
91 pr_reg[15] = rdusp(); \
92 pr_reg[16] = regs->orig_d0; \
93 pr_reg[17] = regs->sr; \
94 pr_reg[18] = regs->pc; \
95 pr_reg[19] = (regs->format << 12) | regs->vector; \
96 { \
97 struct switch_stack *sw = ((struct switch_stack *)regs) - 1; \
98 pr_reg[5] = sw->d6; \
99 pr_reg[6] = sw->d7; \
100 pr_reg[10] = sw->a3; \
101 pr_reg[11] = sw->a4; \
102 pr_reg[12] = sw->a5; \
103 pr_reg[13] = sw->a6; \
104 }
105
106/* This yields a mask that user programs can use to figure out what
107 instruction set this cpu supports. */
108
109#define ELF_HWCAP (0)
110
111/* This yields a string that ld.so will use to load implementation
112 specific libraries for optimization. This is more specific in
113 intent than poking at uname or /proc/cpuinfo. */
114
115#define ELF_PLATFORM (NULL)
116
117#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
118
5#endif 119#endif
diff --git a/arch/m68k/include/asm/elf_mm.h b/arch/m68k/include/asm/elf_mm.h
deleted file mode 100644
index 0b0f49eb876b..000000000000
--- a/arch/m68k/include/asm/elf_mm.h
+++ /dev/null
@@ -1,119 +0,0 @@
1#ifndef __ASMm68k_ELF_H
2#define __ASMm68k_ELF_H
3
4/*
5 * ELF register definitions..
6 */
7
8#include <asm/ptrace.h>
9#include <asm/user.h>
10
11/*
12 * 68k ELF relocation types
13 */
14#define R_68K_NONE 0
15#define R_68K_32 1
16#define R_68K_16 2
17#define R_68K_8 3
18#define R_68K_PC32 4
19#define R_68K_PC16 5
20#define R_68K_PC8 6
21#define R_68K_GOT32 7
22#define R_68K_GOT16 8
23#define R_68K_GOT8 9
24#define R_68K_GOT32O 10
25#define R_68K_GOT16O 11
26#define R_68K_GOT8O 12
27#define R_68K_PLT32 13
28#define R_68K_PLT16 14
29#define R_68K_PLT8 15
30#define R_68K_PLT32O 16
31#define R_68K_PLT16O 17
32#define R_68K_PLT8O 18
33#define R_68K_COPY 19
34#define R_68K_GLOB_DAT 20
35#define R_68K_JMP_SLOT 21
36#define R_68K_RELATIVE 22
37
38typedef unsigned long elf_greg_t;
39
40#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
41typedef elf_greg_t elf_gregset_t[ELF_NGREG];
42
43typedef struct user_m68kfp_struct elf_fpregset_t;
44
45/*
46 * This is used to ensure we don't load something for the wrong architecture.
47 */
48#define elf_check_arch(x) ((x)->e_machine == EM_68K)
49
50/*
51 * These are used to set parameters in the core dumps.
52 */
53#define ELF_CLASS ELFCLASS32
54#define ELF_DATA ELFDATA2MSB
55#define ELF_ARCH EM_68K
56
57/* For SVR4/m68k the function pointer to be registered with `atexit' is
58 passed in %a1. Although my copy of the ABI has no such statement, it
59 is actually used on ASV. */
60#define ELF_PLAT_INIT(_r, load_addr) _r->a1 = 0
61
62#define USE_ELF_CORE_DUMP
63#ifndef CONFIG_SUN3
64#define ELF_EXEC_PAGESIZE 4096
65#else
66#define ELF_EXEC_PAGESIZE 8192
67#endif
68
69/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
70 use of this is to invoke "./ld.so someprog" to test out a new version of
71 the loader. We need to make sure that it is out of the way of the program
72 that it will "exec", and that there is sufficient room for the brk. */
73
74#ifndef CONFIG_SUN3
75#define ELF_ET_DYN_BASE 0xD0000000UL
76#else
77#define ELF_ET_DYN_BASE 0x0D800000UL
78#endif
79
80#define ELF_CORE_COPY_REGS(pr_reg, regs) \
81 /* Bleech. */ \
82 pr_reg[0] = regs->d1; \
83 pr_reg[1] = regs->d2; \
84 pr_reg[2] = regs->d3; \
85 pr_reg[3] = regs->d4; \
86 pr_reg[4] = regs->d5; \
87 pr_reg[7] = regs->a0; \
88 pr_reg[8] = regs->a1; \
89 pr_reg[9] = regs->a2; \
90 pr_reg[14] = regs->d0; \
91 pr_reg[15] = rdusp(); \
92 pr_reg[16] = regs->orig_d0; \
93 pr_reg[17] = regs->sr; \
94 pr_reg[18] = regs->pc; \
95 pr_reg[19] = (regs->format << 12) | regs->vector; \
96 { \
97 struct switch_stack *sw = ((struct switch_stack *)regs) - 1; \
98 pr_reg[5] = sw->d6; \
99 pr_reg[6] = sw->d7; \
100 pr_reg[10] = sw->a3; \
101 pr_reg[11] = sw->a4; \
102 pr_reg[12] = sw->a5; \
103 pr_reg[13] = sw->a6; \
104 }
105
106/* This yields a mask that user programs can use to figure out what
107 instruction set this cpu supports. */
108
109#define ELF_HWCAP (0)
110
111/* This yields a string that ld.so will use to load implementation
112 specific libraries for optimization. This is more specific in
113 intent than poking at uname or /proc/cpuinfo. */
114
115#define ELF_PLATFORM (NULL)
116
117#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
118
119#endif
diff --git a/arch/m68k/include/asm/elf_no.h b/arch/m68k/include/asm/elf_no.h
deleted file mode 100644
index b8046837f384..000000000000
--- a/arch/m68k/include/asm/elf_no.h
+++ /dev/null
@@ -1,110 +0,0 @@
1#ifndef __ASMm68k_ELF_H
2#define __ASMm68k_ELF_H
3
4/*
5 * ELF register definitions..
6 */
7
8#include <asm/ptrace.h>
9#include <asm/user.h>
10
11/*
12 * 68k ELF relocation types
13 */
14#define R_68K_NONE 0
15#define R_68K_32 1
16#define R_68K_16 2
17#define R_68K_8 3
18#define R_68K_PC32 4
19#define R_68K_PC16 5
20#define R_68K_PC8 6
21#define R_68K_GOT32 7
22#define R_68K_GOT16 8
23#define R_68K_GOT8 9
24#define R_68K_GOT32O 10
25#define R_68K_GOT16O 11
26#define R_68K_GOT8O 12
27#define R_68K_PLT32 13
28#define R_68K_PLT16 14
29#define R_68K_PLT8 15
30#define R_68K_PLT32O 16
31#define R_68K_PLT16O 17
32#define R_68K_PLT8O 18
33#define R_68K_COPY 19
34#define R_68K_GLOB_DAT 20
35#define R_68K_JMP_SLOT 21
36#define R_68K_RELATIVE 22
37
38typedef unsigned long elf_greg_t;
39
40#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
41typedef elf_greg_t elf_gregset_t[ELF_NGREG];
42
43typedef struct user_m68kfp_struct elf_fpregset_t;
44
45/*
46 * This is used to ensure we don't load something for the wrong architecture.
47 */
48#define elf_check_arch(x) ((x)->e_machine == EM_68K)
49
50/*
51 * These are used to set parameters in the core dumps.
52 */
53#define ELF_CLASS ELFCLASS32
54#define ELF_DATA ELFDATA2MSB
55#define ELF_ARCH EM_68K
56
57/* For SVR4/m68k the function pointer to be registered with `atexit' is
58 passed in %a1. Although my copy of the ABI has no such statement, it
59 is actually used on ASV. */
60#define ELF_PLAT_INIT(_r, load_addr) _r->a1 = 0
61
62#define USE_ELF_CORE_DUMP
63#define ELF_EXEC_PAGESIZE 4096
64
65/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
66 use of this is to invoke "./ld.so someprog" to test out a new version of
67 the loader. We need to make sure that it is out of the way of the program
68 that it will "exec", and that there is sufficient room for the brk. */
69
70#define ELF_ET_DYN_BASE 0xD0000000UL
71
72#define ELF_CORE_COPY_REGS(pr_reg, regs) \
73 /* Bleech. */ \
74 pr_reg[0] = regs->d1; \
75 pr_reg[1] = regs->d2; \
76 pr_reg[2] = regs->d3; \
77 pr_reg[3] = regs->d4; \
78 pr_reg[4] = regs->d5; \
79 pr_reg[7] = regs->a0; \
80 pr_reg[8] = regs->a1; \
81 pr_reg[14] = regs->d0; \
82 pr_reg[15] = rdusp(); \
83 pr_reg[16] = 0 /* regs->orig_d0 */; \
84 pr_reg[17] = regs->sr; \
85 pr_reg[18] = regs->pc; \
86 /* pr_reg[19] = (regs->format << 12) | regs->vector; */ \
87 { \
88 struct switch_stack *sw = ((struct switch_stack *)regs) - 1; \
89 pr_reg[5] = sw->d6; \
90 pr_reg[6] = sw->d7; \
91 pr_reg[10] = sw->a3; \
92 pr_reg[11] = sw->a4; \
93 pr_reg[12] = sw->a5; \
94 pr_reg[13] = sw->a6; \
95 }
96
97/* This yields a mask that user programs can use to figure out what
98 instruction set this cpu supports. */
99
100#define ELF_HWCAP (0)
101
102/* This yields a string that ld.so will use to load implementation
103 specific libraries for optimization. This is more specific in
104 intent than poking at uname or /proc/cpuinfo. */
105
106#define ELF_PLATFORM (NULL)
107
108#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
109
110#endif
diff --git a/arch/m68k/include/asm/fb.h b/arch/m68k/include/asm/fb.h
index 97bcaefd2064..be4e4c6797e8 100644
--- a/arch/m68k/include/asm/fb.h
+++ b/arch/m68k/include/asm/fb.h
@@ -1,5 +1,38 @@
1#ifdef __uClinux__ 1#ifndef _ASM_FB_H_
2#include "fb_no.h" 2#define _ASM_FB_H_
3
4#include <linux/fb.h>
5#include <linux/fs.h>
6#include <asm/page.h>
7#include <asm/setup.h>
8
9#ifdef CONFIG_MMU
10#ifdef CONFIG_SUN3
11static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
12 unsigned long off)
13{
14 pgprot_val(vma->vm_page_prot) |= SUN3_PAGE_NOCACHE;
15}
3#else 16#else
4#include "fb_mm.h" 17static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
5#endif 18 unsigned long off)
19{
20 if (CPU_IS_020_OR_030)
21 pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE030;
22 if (CPU_IS_040_OR_060) {
23 pgprot_val(vma->vm_page_prot) &= _CACHEMASK040;
24 /* Use no-cache mode, serialized */
25 pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE_S;
26 }
27}
28#endif /* CONFIG_SUN3 */
29#else
30#define fb_pgprotect(...) do {} while (0)
31#endif /* CONFIG_MMU */
32
33static inline int fb_is_primary_device(struct fb_info *info)
34{
35 return 0;
36}
37
38#endif /* _ASM_FB_H_ */
diff --git a/arch/m68k/include/asm/fb_mm.h b/arch/m68k/include/asm/fb_mm.h
deleted file mode 100644
index 380b97ae8157..000000000000
--- a/arch/m68k/include/asm/fb_mm.h
+++ /dev/null
@@ -1,34 +0,0 @@
1#ifndef _ASM_FB_H_
2#define _ASM_FB_H_
3
4#include <linux/fb.h>
5#include <linux/fs.h>
6#include <asm/page.h>
7#include <asm/setup.h>
8
9#ifdef CONFIG_SUN3
10static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
11 unsigned long off)
12{
13 pgprot_val(vma->vm_page_prot) |= SUN3_PAGE_NOCACHE;
14}
15#else
16static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
17 unsigned long off)
18{
19 if (CPU_IS_020_OR_030)
20 pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE030;
21 if (CPU_IS_040_OR_060) {
22 pgprot_val(vma->vm_page_prot) &= _CACHEMASK040;
23 /* Use no-cache mode, serialized */
24 pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE_S;
25 }
26}
27#endif /* CONFIG_SUN3 */
28
29static inline int fb_is_primary_device(struct fb_info *info)
30{
31 return 0;
32}
33
34#endif /* _ASM_FB_H_ */
diff --git a/arch/m68k/include/asm/fb_no.h b/arch/m68k/include/asm/fb_no.h
deleted file mode 100644
index c7df38030992..000000000000
--- a/arch/m68k/include/asm/fb_no.h
+++ /dev/null
@@ -1,12 +0,0 @@
1#ifndef _ASM_FB_H_
2#define _ASM_FB_H_
3#include <linux/fb.h>
4
5#define fb_pgprotect(...) do {} while (0)
6
7static inline int fb_is_primary_device(struct fb_info *info)
8{
9 return 0;
10}
11
12#endif /* _ASM_FB_H_ */
diff --git a/arch/m68k/include/asm/fpu.h b/arch/m68k/include/asm/fpu.h
index e19bc5ed9c37..ffb6b8cfc6d5 100644
--- a/arch/m68k/include/asm/fpu.h
+++ b/arch/m68k/include/asm/fpu.h
@@ -1,5 +1,21 @@
1#ifdef __uClinux__ 1#ifndef __M68K_FPU_H
2#include "fpu_no.h" 2#define __M68K_FPU_H
3
4
5/*
6 * MAX floating point unit state size (FSAVE/FRESTORE)
7 */
8
9#if defined(CONFIG_M68020) || defined(CONFIG_M68030)
10#define FPSTATESIZE (216)
11#elif defined(CONFIG_M68040)
12#define FPSTATESIZE (96)
13#elif defined(CONFIG_M68KFPU_EMU)
14#define FPSTATESIZE (28)
15#elif defined(CONFIG_M68060)
16#define FPSTATESIZE (12)
3#else 17#else
4#include "fpu_mm.h" 18#define FPSTATESIZE (0)
5#endif 19#endif
20
21#endif /* __M68K_FPU_H */
diff --git a/arch/m68k/include/asm/fpu_mm.h b/arch/m68k/include/asm/fpu_mm.h
deleted file mode 100644
index ffb6b8cfc6d5..000000000000
--- a/arch/m68k/include/asm/fpu_mm.h
+++ /dev/null
@@ -1,21 +0,0 @@
1#ifndef __M68K_FPU_H
2#define __M68K_FPU_H
3
4
5/*
6 * MAX floating point unit state size (FSAVE/FRESTORE)
7 */
8
9#if defined(CONFIG_M68020) || defined(CONFIG_M68030)
10#define FPSTATESIZE (216)
11#elif defined(CONFIG_M68040)
12#define FPSTATESIZE (96)
13#elif defined(CONFIG_M68KFPU_EMU)
14#define FPSTATESIZE (28)
15#elif defined(CONFIG_M68060)
16#define FPSTATESIZE (12)
17#else
18#define FPSTATESIZE (0)
19#endif
20
21#endif /* __M68K_FPU_H */
diff --git a/arch/m68k/include/asm/fpu_no.h b/arch/m68k/include/asm/fpu_no.h
deleted file mode 100644
index b16b2e4fca2a..000000000000
--- a/arch/m68k/include/asm/fpu_no.h
+++ /dev/null
@@ -1,21 +0,0 @@
1#ifndef __M68KNOMMU_FPU_H
2#define __M68KNOMMU_FPU_H
3
4
5/*
6 * MAX floating point unit state size (FSAVE/FRESTORE)
7 */
8#if defined(CONFIG_M68020) || defined(CONFIG_M68030)
9#define FPSTATESIZE (216/sizeof(unsigned char))
10#elif defined(CONFIG_M68040)
11#define FPSTATESIZE (96/sizeof(unsigned char))
12#elif defined(CONFIG_M68KFPU_EMU)
13#define FPSTATESIZE (28/sizeof(unsigned char))
14#elif defined(CONFIG_M68060)
15#define FPSTATESIZE (12/sizeof(unsigned char))
16#else
17/* Assume no FP unit present then... */
18#define FPSTATESIZE (2) /* dummy size */
19#endif
20
21#endif /* __M68K_FPU_H */
diff --git a/arch/m68k/include/asm/hw_irq.h b/arch/m68k/include/asm/hw_irq.h
index e19526015890..eacef0951fbf 100644
--- a/arch/m68k/include/asm/hw_irq.h
+++ b/arch/m68k/include/asm/hw_irq.h
@@ -1,5 +1,6 @@
1#ifdef __uClinux__ 1#ifndef __ASM_M68K_HW_IRQ_H
2#include "hw_irq_no.h" 2#define __ASM_M68K_HW_IRQ_H
3#else 3
4#include "hw_irq_mm.h" 4/* Dummy include. */
5
5#endif 6#endif
diff --git a/arch/m68k/include/asm/hw_irq_mm.h b/arch/m68k/include/asm/hw_irq_mm.h
deleted file mode 100644
index eacef0951fbf..000000000000
--- a/arch/m68k/include/asm/hw_irq_mm.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __ASM_M68K_HW_IRQ_H
2#define __ASM_M68K_HW_IRQ_H
3
4/* Dummy include. */
5
6#endif
diff --git a/arch/m68k/include/asm/hw_irq_no.h b/arch/m68k/include/asm/hw_irq_no.h
deleted file mode 100644
index f3ec9e5ae049..000000000000
--- a/arch/m68k/include/asm/hw_irq_no.h
+++ /dev/null
@@ -1,4 +0,0 @@
1#ifndef __M68KNOMMU_HW_IRQ_H__
2#define __M68KNOMMU_HW_IRQ_H__
3
4#endif /* __M68KNOMMU_HW_IRQ_H__ */
diff --git a/arch/m68k/include/asm/kmap_types.h b/arch/m68k/include/asm/kmap_types.h
index 045d9fd122a2..c843c63d3801 100644
--- a/arch/m68k/include/asm/kmap_types.h
+++ b/arch/m68k/include/asm/kmap_types.h
@@ -1,5 +1,21 @@
1#ifdef __uClinux__ 1#ifndef __ASM_M68K_KMAP_TYPES_H
2#include "kmap_types_no.h" 2#define __ASM_M68K_KMAP_TYPES_H
3#else 3
4#include "kmap_types_mm.h" 4enum km_type {
5#endif 5 KM_BOUNCE_READ,
6 KM_SKB_SUNRPC_DATA,
7 KM_SKB_DATA_SOFTIRQ,
8 KM_USER0,
9 KM_USER1,
10 KM_BIO_SRC_IRQ,
11 KM_BIO_DST_IRQ,
12 KM_PTE0,
13 KM_PTE1,
14 KM_IRQ0,
15 KM_IRQ1,
16 KM_SOFTIRQ0,
17 KM_SOFTIRQ1,
18 KM_TYPE_NR
19};
20
21#endif /* __ASM_M68K_KMAP_TYPES_H */
diff --git a/arch/m68k/include/asm/kmap_types_mm.h b/arch/m68k/include/asm/kmap_types_mm.h
deleted file mode 100644
index c843c63d3801..000000000000
--- a/arch/m68k/include/asm/kmap_types_mm.h
+++ /dev/null
@@ -1,21 +0,0 @@
1#ifndef __ASM_M68K_KMAP_TYPES_H
2#define __ASM_M68K_KMAP_TYPES_H
3
4enum km_type {
5 KM_BOUNCE_READ,
6 KM_SKB_SUNRPC_DATA,
7 KM_SKB_DATA_SOFTIRQ,
8 KM_USER0,
9 KM_USER1,
10 KM_BIO_SRC_IRQ,
11 KM_BIO_DST_IRQ,
12 KM_PTE0,
13 KM_PTE1,
14 KM_IRQ0,
15 KM_IRQ1,
16 KM_SOFTIRQ0,
17 KM_SOFTIRQ1,
18 KM_TYPE_NR
19};
20
21#endif /* __ASM_M68K_KMAP_TYPES_H */
diff --git a/arch/m68k/include/asm/kmap_types_no.h b/arch/m68k/include/asm/kmap_types_no.h
deleted file mode 100644
index bfb6707575d1..000000000000
--- a/arch/m68k/include/asm/kmap_types_no.h
+++ /dev/null
@@ -1,21 +0,0 @@
1#ifndef __ASM_M68K_KMAP_TYPES_H
2#define __ASM_M68K_KMAP_TYPES_H
3
4enum km_type {
5 KM_BOUNCE_READ,
6 KM_SKB_SUNRPC_DATA,
7 KM_SKB_DATA_SOFTIRQ,
8 KM_USER0,
9 KM_USER1,
10 KM_BIO_SRC_IRQ,
11 KM_BIO_DST_IRQ,
12 KM_PTE0,
13 KM_PTE1,
14 KM_IRQ0,
15 KM_IRQ1,
16 KM_SOFTIRQ0,
17 KM_SOFTIRQ1,
18 KM_TYPE_NR
19};
20
21#endif
diff --git a/arch/m68k/include/asm/m532xsim.h b/arch/m68k/include/asm/m532xsim.h
index 1835fd20a82c..ce603451b55e 100644
--- a/arch/m68k/include/asm/m532xsim.h
+++ b/arch/m68k/include/asm/m532xsim.h
@@ -16,6 +16,7 @@
16#define MCFINT_VECBASE 64 16#define MCFINT_VECBASE 64
17#define MCFINT_UART0 26 /* Interrupt number for UART0 */ 17#define MCFINT_UART0 26 /* Interrupt number for UART0 */
18#define MCFINT_UART1 27 /* Interrupt number for UART1 */ 18#define MCFINT_UART1 27 /* Interrupt number for UART1 */
19#define MCFINT_UART2 28 /* Interrupt number for UART2 */
19 20
20#define MCF_WTM_WCR MCF_REG16(0xFC098000) 21#define MCF_WTM_WCR MCF_REG16(0xFC098000)
21 22
diff --git a/arch/m68k/include/asm/mc146818rtc.h b/arch/m68k/include/asm/mc146818rtc.h
index fb90dcf78426..9f70a01f73dc 100644
--- a/arch/m68k/include/asm/mc146818rtc.h
+++ b/arch/m68k/include/asm/mc146818rtc.h
@@ -1,5 +1,26 @@
1#ifdef __uClinux__ 1/*
2#include "mc146818rtc_no.h" 2 * Machine dependent access functions for RTC registers.
3#else 3 */
4#include "mc146818rtc_mm.h" 4#ifndef _ASM_MC146818RTC_H
5#endif 5#define _ASM_MC146818RTC_H
6
7
8#ifdef CONFIG_ATARI
9/* RTC in Atari machines */
10
11#include <asm/atarihw.h>
12
13#define RTC_PORT(x) (TT_RTC_BAS + 2*(x))
14#define RTC_ALWAYS_BCD 0
15
16#define CMOS_READ(addr) ({ \
17atari_outb_p((addr),RTC_PORT(0)); \
18atari_inb_p(RTC_PORT(1)); \
19})
20#define CMOS_WRITE(val, addr) ({ \
21atari_outb_p((addr),RTC_PORT(0)); \
22atari_outb_p((val),RTC_PORT(1)); \
23})
24#endif /* CONFIG_ATARI */
25
26#endif /* _ASM_MC146818RTC_H */
diff --git a/arch/m68k/include/asm/mc146818rtc_mm.h b/arch/m68k/include/asm/mc146818rtc_mm.h
deleted file mode 100644
index 9f70a01f73dc..000000000000
--- a/arch/m68k/include/asm/mc146818rtc_mm.h
+++ /dev/null
@@ -1,26 +0,0 @@
1/*
2 * Machine dependent access functions for RTC registers.
3 */
4#ifndef _ASM_MC146818RTC_H
5#define _ASM_MC146818RTC_H
6
7
8#ifdef CONFIG_ATARI
9/* RTC in Atari machines */
10
11#include <asm/atarihw.h>
12
13#define RTC_PORT(x) (TT_RTC_BAS + 2*(x))
14#define RTC_ALWAYS_BCD 0
15
16#define CMOS_READ(addr) ({ \
17atari_outb_p((addr),RTC_PORT(0)); \
18atari_inb_p(RTC_PORT(1)); \
19})
20#define CMOS_WRITE(val, addr) ({ \
21atari_outb_p((addr),RTC_PORT(0)); \
22atari_outb_p((val),RTC_PORT(1)); \
23})
24#endif /* CONFIG_ATARI */
25
26#endif /* _ASM_MC146818RTC_H */
diff --git a/arch/m68k/include/asm/mc146818rtc_no.h b/arch/m68k/include/asm/mc146818rtc_no.h
deleted file mode 100644
index 907a0481a140..000000000000
--- a/arch/m68k/include/asm/mc146818rtc_no.h
+++ /dev/null
@@ -1,9 +0,0 @@
1/*
2 * Machine dependent access functions for RTC registers.
3 */
4#ifndef _M68KNOMMU_MC146818RTC_H
5#define _M68KNOMMU_MC146818RTC_H
6
7/* empty include file to satisfy the include in genrtc.c/ide-geometry.c */
8
9#endif /* _M68KNOMMU_MC146818RTC_H */
diff --git a/arch/m68k/include/asm/mcfpci.h b/arch/m68k/include/asm/mcfpci.h
deleted file mode 100644
index f1507dd06ec6..000000000000
--- a/arch/m68k/include/asm/mcfpci.h
+++ /dev/null
@@ -1,119 +0,0 @@
1/****************************************************************************/
2
3/*
4 * mcfpci.h -- PCI bridge on ColdFire eval boards.
5 *
6 * (C) Copyright 2000, Greg Ungerer (gerg@snapgear.com)
7 * (C) Copyright 2000, Lineo Inc. (www.lineo.com)
8 */
9
10/****************************************************************************/
11#ifndef mcfpci_h
12#define mcfpci_h
13/****************************************************************************/
14
15
16#ifdef CONFIG_PCI
17
18/*
19 * Address regions in the PCI address space are not mapped into the
20 * normal memory space of the ColdFire. They must be accessed via
21 * handler routines. This is easy for I/O space (inb/outb/etc) but
22 * needs some code changes to support ordinary memory. Interrupts
23 * also need to be vectored through the PCI handler first, then it
24 * will call the actual driver sub-handlers.
25 */
26
27/*
28 * Un-define all the standard I/O access routines.
29 */
30#undef inb
31#undef inw
32#undef inl
33#undef inb_p
34#undef inw_p
35#undef insb
36#undef insw
37#undef insl
38#undef outb
39#undef outw
40#undef outl
41#undef outb_p
42#undef outw_p
43#undef outsb
44#undef outsw
45#undef outsl
46
47#undef request_irq
48#undef free_irq
49
50#undef bus_to_virt
51#undef virt_to_bus
52
53
54/*
55 * Re-direct all I/O memory accesses functions to PCI specific ones.
56 */
57#define inb pci_inb
58#define inw pci_inw
59#define inl pci_inl
60#define inb_p pci_inb
61#define inw_p pci_inw
62#define insb pci_insb
63#define insw pci_insw
64#define insl pci_insl
65
66#define outb pci_outb
67#define outw pci_outw
68#define outl pci_outl
69#define outb_p pci_outb
70#define outw_p pci_outw
71#define outsb pci_outsb
72#define outsw pci_outsw
73#define outsl pci_outsl
74
75#define request_irq pci_request_irq
76#define free_irq pci_free_irq
77
78#define virt_to_bus pci_virt_to_bus
79#define bus_to_virt pci_bus_to_virt
80
81#define CONFIG_COMEMPCI 1
82
83
84/*
85 * Prototypes of the real PCI functions (defined in bios32.c).
86 */
87unsigned char pci_inb(unsigned int addr);
88unsigned short pci_inw(unsigned int addr);
89unsigned int pci_inl(unsigned int addr);
90void pci_insb(void *addr, void *buf, int len);
91void pci_insw(void *addr, void *buf, int len);
92void pci_insl(void *addr, void *buf, int len);
93
94void pci_outb(unsigned char val, unsigned int addr);
95void pci_outw(unsigned short val, unsigned int addr);
96void pci_outl(unsigned int val, unsigned int addr);
97void pci_outsb(void *addr, void *buf, int len);
98void pci_outsw(void *addr, void *buf, int len);
99void pci_outsl(void *addr, void *buf, int len);
100
101int pci_request_irq(unsigned int irq,
102 void (*handler)(int, void *, struct pt_regs *),
103 unsigned long flags,
104 const char *device,
105 void *dev_id);
106void pci_free_irq(unsigned int irq, void *dev_id);
107
108void *pci_bmalloc(int size);
109void pci_bmfree(void *bmp, int len);
110void pci_copytoshmem(unsigned long bmp, void *src, int size);
111void pci_copyfromshmem(void *dst, unsigned long bmp, int size);
112unsigned long pci_virt_to_bus(volatile void *address);
113void *pci_bus_to_virt(unsigned long address);
114void pci_bmcpyto(void *dst, void *src, int len);
115void pci_bmcpyfrom(void *dst, void *src, int len);
116
117#endif /* CONFIG_PCI */
118/****************************************************************************/
119#endif /* mcfpci_h */
diff --git a/arch/m68k/include/asm/mmu.h b/arch/m68k/include/asm/mmu.h
index a81d3946675f..8a11a63ee15a 100644
--- a/arch/m68k/include/asm/mmu.h
+++ b/arch/m68k/include/asm/mmu.h
@@ -1,5 +1,13 @@
1#ifdef __uClinux__ 1#ifndef __MMU_H
2#include "mmu_no.h" 2#define __MMU_H
3
4#ifdef CONFIG_MMU
5/* Default "unsigned long" context */
6typedef unsigned long mm_context_t;
3#else 7#else
4#include "mmu_mm.h" 8typedef struct {
9 unsigned long end_brk;
10} mm_context_t;
11#endif
12
5#endif 13#endif
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index b440928fc6c7..7d4341e55a99 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -1,5 +1,175 @@
1#ifdef __uClinux__ 1#ifndef __M68K_MMU_CONTEXT_H
2#include "mmu_context_no.h" 2#define __M68K_MMU_CONTEXT_H
3
4#include <asm-generic/mm_hooks.h>
5
6static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
7{
8}
9
10#ifdef CONFIG_MMU
11#ifndef CONFIG_SUN3
12
13#include <asm/setup.h>
14#include <asm/page.h>
15#include <asm/pgalloc.h>
16
17static inline int init_new_context(struct task_struct *tsk,
18 struct mm_struct *mm)
19{
20 mm->context = virt_to_phys(mm->pgd);
21 return 0;
22}
23
24#define destroy_context(mm) do { } while(0)
25
26static inline void switch_mm_0230(struct mm_struct *mm)
27{
28 unsigned long crp[2] = {
29 0x80000000 | _PAGE_TABLE, mm->context
30 };
31 unsigned long tmp;
32
33 asm volatile (".chip 68030");
34
35 /* flush MC68030/MC68020 caches (they are virtually addressed) */
36 asm volatile (
37 "movec %%cacr,%0;"
38 "orw %1,%0; "
39 "movec %0,%%cacr"
40 : "=d" (tmp) : "di" (FLUSH_I_AND_D));
41
42 /* Switch the root pointer. For a 030-only kernel,
43 * avoid flushing the whole ATC, we only need to
44 * flush the user entries. The 68851 does this by
45 * itself. Avoid a runtime check here.
46 */
47 asm volatile (
48#ifdef CPU_M68030_ONLY
49 "pmovefd %0,%%crp; "
50 "pflush #0,#4"
3#else 51#else
4#include "mmu_context_mm.h" 52 "pmove %0,%%crp"
5#endif 53#endif
54 : : "m" (crp[0]));
55
56 asm volatile (".chip 68k");
57}
58
59static inline void switch_mm_0460(struct mm_struct *mm)
60{
61 asm volatile (".chip 68040");
62
63 /* flush address translation cache (user entries) */
64 asm volatile ("pflushan");
65
66 /* switch the root pointer */
67 asm volatile ("movec %0,%%urp" : : "r" (mm->context));
68
69 if (CPU_IS_060) {
70 unsigned long tmp;
71
72 /* clear user entries in the branch cache */
73 asm volatile (
74 "movec %%cacr,%0; "
75 "orl %1,%0; "
76 "movec %0,%%cacr"
77 : "=d" (tmp): "di" (0x00200000));
78 }
79
80 asm volatile (".chip 68k");
81}
82
83static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
84{
85 if (prev != next) {
86 if (CPU_IS_020_OR_030)
87 switch_mm_0230(next);
88 else
89 switch_mm_0460(next);
90 }
91}
92
93#define deactivate_mm(tsk,mm) do { } while (0)
94
95static inline void activate_mm(struct mm_struct *prev_mm,
96 struct mm_struct *next_mm)
97{
98 next_mm->context = virt_to_phys(next_mm->pgd);
99
100 if (CPU_IS_020_OR_030)
101 switch_mm_0230(next_mm);
102 else
103 switch_mm_0460(next_mm);
104}
105
106#else /* CONFIG_SUN3 */
107#include <asm/sun3mmu.h>
108#include <linux/sched.h>
109
110extern unsigned long get_free_context(struct mm_struct *mm);
111extern void clear_context(unsigned long context);
112
113/* set the context for a new task to unmapped */
114static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
115{
116 mm->context = SUN3_INVALID_CONTEXT;
117 return 0;
118}
119
120/* find the context given to this process, and if it hasn't already
121 got one, go get one for it. */
122static inline void get_mmu_context(struct mm_struct *mm)
123{
124 if(mm->context == SUN3_INVALID_CONTEXT)
125 mm->context = get_free_context(mm);
126}
127
128/* flush context if allocated... */
129static inline void destroy_context(struct mm_struct *mm)
130{
131 if(mm->context != SUN3_INVALID_CONTEXT)
132 clear_context(mm->context);
133}
134
135static inline void activate_context(struct mm_struct *mm)
136{
137 get_mmu_context(mm);
138 sun3_put_context(mm->context);
139}
140
141static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
142{
143 activate_context(tsk->mm);
144}
145
146#define deactivate_mm(tsk,mm) do { } while (0)
147
148static inline void activate_mm(struct mm_struct *prev_mm,
149 struct mm_struct *next_mm)
150{
151 activate_context(next_mm);
152}
153
154#endif
155#else /* !CONFIG_MMU */
156
157static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
158{
159 return 0;
160}
161
162
163static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
164{
165}
166
167#define destroy_context(mm) do { } while (0)
168#define deactivate_mm(tsk,mm) do { } while (0)
169
170static inline void activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm)
171{
172}
173
174#endif /* CONFIG_MMU */
175#endif /* __M68K_MMU_CONTEXT_H */
diff --git a/arch/m68k/include/asm/mmu_context_mm.h b/arch/m68k/include/asm/mmu_context_mm.h
deleted file mode 100644
index 894dacbcee14..000000000000
--- a/arch/m68k/include/asm/mmu_context_mm.h
+++ /dev/null
@@ -1,154 +0,0 @@
1#ifndef __M68K_MMU_CONTEXT_H
2#define __M68K_MMU_CONTEXT_H
3
4#include <asm-generic/mm_hooks.h>
5
6static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
7{
8}
9
10#ifndef CONFIG_SUN3
11
12#include <asm/setup.h>
13#include <asm/page.h>
14#include <asm/pgalloc.h>
15
16static inline int init_new_context(struct task_struct *tsk,
17 struct mm_struct *mm)
18{
19 mm->context = virt_to_phys(mm->pgd);
20 return 0;
21}
22
23#define destroy_context(mm) do { } while(0)
24
25static inline void switch_mm_0230(struct mm_struct *mm)
26{
27 unsigned long crp[2] = {
28 0x80000000 | _PAGE_TABLE, mm->context
29 };
30 unsigned long tmp;
31
32 asm volatile (".chip 68030");
33
34 /* flush MC68030/MC68020 caches (they are virtually addressed) */
35 asm volatile (
36 "movec %%cacr,%0;"
37 "orw %1,%0; "
38 "movec %0,%%cacr"
39 : "=d" (tmp) : "di" (FLUSH_I_AND_D));
40
41 /* Switch the root pointer. For a 030-only kernel,
42 * avoid flushing the whole ATC, we only need to
43 * flush the user entries. The 68851 does this by
44 * itself. Avoid a runtime check here.
45 */
46 asm volatile (
47#ifdef CPU_M68030_ONLY
48 "pmovefd %0,%%crp; "
49 "pflush #0,#4"
50#else
51 "pmove %0,%%crp"
52#endif
53 : : "m" (crp[0]));
54
55 asm volatile (".chip 68k");
56}
57
58static inline void switch_mm_0460(struct mm_struct *mm)
59{
60 asm volatile (".chip 68040");
61
62 /* flush address translation cache (user entries) */
63 asm volatile ("pflushan");
64
65 /* switch the root pointer */
66 asm volatile ("movec %0,%%urp" : : "r" (mm->context));
67
68 if (CPU_IS_060) {
69 unsigned long tmp;
70
71 /* clear user entries in the branch cache */
72 asm volatile (
73 "movec %%cacr,%0; "
74 "orl %1,%0; "
75 "movec %0,%%cacr"
76 : "=d" (tmp): "di" (0x00200000));
77 }
78
79 asm volatile (".chip 68k");
80}
81
82static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
83{
84 if (prev != next) {
85 if (CPU_IS_020_OR_030)
86 switch_mm_0230(next);
87 else
88 switch_mm_0460(next);
89 }
90}
91
92#define deactivate_mm(tsk,mm) do { } while (0)
93
94static inline void activate_mm(struct mm_struct *prev_mm,
95 struct mm_struct *next_mm)
96{
97 next_mm->context = virt_to_phys(next_mm->pgd);
98
99 if (CPU_IS_020_OR_030)
100 switch_mm_0230(next_mm);
101 else
102 switch_mm_0460(next_mm);
103}
104
105#else /* CONFIG_SUN3 */
106#include <asm/sun3mmu.h>
107#include <linux/sched.h>
108
109extern unsigned long get_free_context(struct mm_struct *mm);
110extern void clear_context(unsigned long context);
111
112/* set the context for a new task to unmapped */
113static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
114{
115 mm->context = SUN3_INVALID_CONTEXT;
116 return 0;
117}
118
119/* find the context given to this process, and if it hasn't already
120 got one, go get one for it. */
121static inline void get_mmu_context(struct mm_struct *mm)
122{
123 if(mm->context == SUN3_INVALID_CONTEXT)
124 mm->context = get_free_context(mm);
125}
126
127/* flush context if allocated... */
128static inline void destroy_context(struct mm_struct *mm)
129{
130 if(mm->context != SUN3_INVALID_CONTEXT)
131 clear_context(mm->context);
132}
133
134static inline void activate_context(struct mm_struct *mm)
135{
136 get_mmu_context(mm);
137 sun3_put_context(mm->context);
138}
139
140static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
141{
142 activate_context(tsk->mm);
143}
144
145#define deactivate_mm(tsk,mm) do { } while (0)
146
147static inline void activate_mm(struct mm_struct *prev_mm,
148 struct mm_struct *next_mm)
149{
150 activate_context(next_mm);
151}
152
153#endif
154#endif
diff --git a/arch/m68k/include/asm/mmu_context_no.h b/arch/m68k/include/asm/mmu_context_no.h
deleted file mode 100644
index 9ccee4278c97..000000000000
--- a/arch/m68k/include/asm/mmu_context_no.h
+++ /dev/null
@@ -1,33 +0,0 @@
1#ifndef __M68KNOMMU_MMU_CONTEXT_H
2#define __M68KNOMMU_MMU_CONTEXT_H
3
4#include <asm/setup.h>
5#include <asm/page.h>
6#include <asm/pgalloc.h>
7#include <asm-generic/mm_hooks.h>
8
9static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
10{
11}
12
13static inline int
14init_new_context(struct task_struct *tsk, struct mm_struct *mm)
15{
16 // mm->context = virt_to_phys(mm->pgd);
17 return(0);
18}
19
20#define destroy_context(mm) do { } while(0)
21
22static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
23{
24}
25
26#define deactivate_mm(tsk,mm) do { } while (0)
27
28static inline void activate_mm(struct mm_struct *prev_mm,
29 struct mm_struct *next_mm)
30{
31}
32
33#endif
diff --git a/arch/m68k/include/asm/mmu_mm.h b/arch/m68k/include/asm/mmu_mm.h
deleted file mode 100644
index ccd36d26615a..000000000000
--- a/arch/m68k/include/asm/mmu_mm.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef __MMU_H
2#define __MMU_H
3
4/* Default "unsigned long" context */
5typedef unsigned long mm_context_t;
6
7#endif
diff --git a/arch/m68k/include/asm/mmu_no.h b/arch/m68k/include/asm/mmu_no.h
deleted file mode 100644
index e2da1e6f09fe..000000000000
--- a/arch/m68k/include/asm/mmu_no.h
+++ /dev/null
@@ -1,10 +0,0 @@
1#ifndef __M68KNOMMU_MMU_H
2#define __M68KNOMMU_MMU_H
3
4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
5
6typedef struct {
7 unsigned long end_brk;
8} mm_context_t;
9
10#endif /* __M68KNOMMU_MMU_H */
diff --git a/arch/m68k/include/asm/module.h b/arch/m68k/include/asm/module.h
index 79b59d137dd0..5f21e11071bd 100644
--- a/arch/m68k/include/asm/module.h
+++ b/arch/m68k/include/asm/module.h
@@ -1,5 +1,48 @@
1#ifdef __uClinux__ 1#ifndef _ASM_M68K_MODULE_H
2#include "module_no.h" 2#define _ASM_M68K_MODULE_H
3
4#ifdef CONFIG_MMU
5
6struct mod_arch_specific {
7 struct m68k_fixup_info *fixup_start, *fixup_end;
8};
9
10#define MODULE_ARCH_INIT { \
11 .fixup_start = __start_fixup, \
12 .fixup_end = __stop_fixup, \
13}
14
15
16enum m68k_fixup_type {
17 m68k_fixup_memoffset,
18 m68k_fixup_vnode_shift,
19};
20
21struct m68k_fixup_info {
22 enum m68k_fixup_type type;
23 void *addr;
24};
25
26#define m68k_fixup(type, addr) \
27 " .section \".m68k_fixup\",\"aw\"\n" \
28 " .long " #type "," #addr "\n" \
29 " .previous\n"
30
31extern struct m68k_fixup_info __start_fixup[], __stop_fixup[];
32
33struct module;
34extern void module_fixup(struct module *mod, struct m68k_fixup_info *start,
35 struct m68k_fixup_info *end);
36
3#else 37#else
4#include "module_mm.h" 38
5#endif 39struct mod_arch_specific {
40};
41
42#endif /* CONFIG_MMU */
43
44#define Elf_Shdr Elf32_Shdr
45#define Elf_Sym Elf32_Sym
46#define Elf_Ehdr Elf32_Ehdr
47
48#endif /* _ASM_M68K_MODULE_H */
diff --git a/arch/m68k/include/asm/module_mm.h b/arch/m68k/include/asm/module_mm.h
deleted file mode 100644
index 382d20a6fc18..000000000000
--- a/arch/m68k/include/asm/module_mm.h
+++ /dev/null
@@ -1,39 +0,0 @@
1#ifndef _ASM_M68K_MODULE_H
2#define _ASM_M68K_MODULE_H
3
4struct mod_arch_specific {
5 struct m68k_fixup_info *fixup_start, *fixup_end;
6};
7
8#define MODULE_ARCH_INIT { \
9 .fixup_start = __start_fixup, \
10 .fixup_end = __stop_fixup, \
11}
12
13#define Elf_Shdr Elf32_Shdr
14#define Elf_Sym Elf32_Sym
15#define Elf_Ehdr Elf32_Ehdr
16
17
18enum m68k_fixup_type {
19 m68k_fixup_memoffset,
20 m68k_fixup_vnode_shift,
21};
22
23struct m68k_fixup_info {
24 enum m68k_fixup_type type;
25 void *addr;
26};
27
28#define m68k_fixup(type, addr) \
29 " .section \".m68k_fixup\",\"aw\"\n" \
30 " .long " #type "," #addr "\n" \
31 " .previous\n"
32
33extern struct m68k_fixup_info __start_fixup[], __stop_fixup[];
34
35struct module;
36extern void module_fixup(struct module *mod, struct m68k_fixup_info *start,
37 struct m68k_fixup_info *end);
38
39#endif /* _ASM_M68K_MODULE_H */
diff --git a/arch/m68k/include/asm/module_no.h b/arch/m68k/include/asm/module_no.h
deleted file mode 100644
index 2e45ab50b232..000000000000
--- a/arch/m68k/include/asm/module_no.h
+++ /dev/null
@@ -1,11 +0,0 @@
1#ifndef ASM_M68KNOMMU_MODULE_H
2#define ASM_M68KNOMMU_MODULE_H
3
4struct mod_arch_specific {
5};
6
7#define Elf_Shdr Elf32_Shdr
8#define Elf_Sym Elf32_Sym
9#define Elf_Ehdr Elf32_Ehdr
10
11#endif /* ASM_M68KNOMMU_MODULE_H */
diff --git a/arch/m68k/include/asm/page_offset.h b/arch/m68k/include/asm/page_offset.h
index 66455c849fbb..1780152d81da 100644
--- a/arch/m68k/include/asm/page_offset.h
+++ b/arch/m68k/include/asm/page_offset.h
@@ -1,5 +1,11 @@
1#ifdef __uClinux__ 1/* This handles the memory map.. */
2#include "page_offset_no.h" 2
3#ifdef CONFIG_MMU
4#ifndef CONFIG_SUN3
5#define PAGE_OFFSET_RAW 0x00000000
3#else 6#else
4#include "page_offset_mm.h" 7#define PAGE_OFFSET_RAW 0x0E000000
8#endif
9#else
10#define PAGE_OFFSET_RAW CONFIG_RAMBASE
5#endif 11#endif
diff --git a/arch/m68k/include/asm/page_offset_mm.h b/arch/m68k/include/asm/page_offset_mm.h
deleted file mode 100644
index 1cbdb7f30ac2..000000000000
--- a/arch/m68k/include/asm/page_offset_mm.h
+++ /dev/null
@@ -1,8 +0,0 @@
1
2/* This handles the memory map.. */
3#ifndef CONFIG_SUN3
4#define PAGE_OFFSET_RAW 0x00000000
5#else
6#define PAGE_OFFSET_RAW 0x0E000000
7#endif
8
diff --git a/arch/m68k/include/asm/page_offset_no.h b/arch/m68k/include/asm/page_offset_no.h
deleted file mode 100644
index d4e73e0ba646..000000000000
--- a/arch/m68k/include/asm/page_offset_no.h
+++ /dev/null
@@ -1,5 +0,0 @@
1
2
3/* This handles the memory map.. */
4#define PAGE_OFFSET_RAW CONFIG_RAMBASE
5
diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h
index dbea95373080..4ad0aea48ab4 100644
--- a/arch/m68k/include/asm/pci.h
+++ b/arch/m68k/include/asm/pci.h
@@ -1,5 +1,12 @@
1#ifdef __uClinux__ 1#ifndef _ASM_M68K_PCI_H
2#include "pci_no.h" 2#define _ASM_M68K_PCI_H
3#else 3
4#include "pci_mm.h" 4#include <asm-generic/pci-dma-compat.h>
5#endif 5
6/* The PCI address space does equal the physical memory
7 * address space. The networking and block device layers use
8 * this boolean for bounce buffer decisions.
9 */
10#define PCI_DMA_BUS_IS_PHYS (1)
11
12#endif /* _ASM_M68K_PCI_H */
diff --git a/arch/m68k/include/asm/pci_mm.h b/arch/m68k/include/asm/pci_mm.h
deleted file mode 100644
index 4ad0aea48ab4..000000000000
--- a/arch/m68k/include/asm/pci_mm.h
+++ /dev/null
@@ -1,12 +0,0 @@
1#ifndef _ASM_M68K_PCI_H
2#define _ASM_M68K_PCI_H
3
4#include <asm-generic/pci-dma-compat.h>
5
6/* The PCI address space does equal the physical memory
7 * address space. The networking and block device layers use
8 * this boolean for bounce buffer decisions.
9 */
10#define PCI_DMA_BUS_IS_PHYS (1)
11
12#endif /* _ASM_M68K_PCI_H */
diff --git a/arch/m68k/include/asm/pci_no.h b/arch/m68k/include/asm/pci_no.h
deleted file mode 100644
index 9abbc03c73ee..000000000000
--- a/arch/m68k/include/asm/pci_no.h
+++ /dev/null
@@ -1,29 +0,0 @@
1#ifndef M68KNOMMU_PCI_H
2#define M68KNOMMU_PCI_H
3
4#include <asm/pci_mm.h>
5
6#ifdef CONFIG_COMEMPCI
7/*
8 * These are pretty much arbitary with the CoMEM implementation.
9 * We have the whole address space to ourselves.
10 */
11#define PCIBIOS_MIN_IO 0x100
12#define PCIBIOS_MIN_MEM 0x00010000
13
14#define pcibios_scan_all_fns(a, b) 0
15
16/*
17 * Return whether the given PCI device DMA address mask can
18 * be supported properly. For example, if your device can
19 * only drive the low 24-bits during PCI bus mastering, then
20 * you would pass 0x00ffffff as the mask to this function.
21 */
22static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
23{
24 return 1;
25}
26
27#endif /* CONFIG_COMEMPCI */
28
29#endif /* M68KNOMMU_PCI_H */
diff --git a/arch/m68k/include/asm/pgalloc.h b/arch/m68k/include/asm/pgalloc.h
index 059cb73e78fc..c294aad8a900 100644
--- a/arch/m68k/include/asm/pgalloc.h
+++ b/arch/m68k/include/asm/pgalloc.h
@@ -1,5 +1,19 @@
1#ifdef __uClinux__ 1#ifndef M68K_PGALLOC_H
2#include "pgalloc_no.h" 2#define M68K_PGALLOC_H
3
4#include <linux/mm.h>
5#include <linux/highmem.h>
6#include <asm/setup.h>
7
8#ifdef CONFIG_MMU
9#include <asm/virtconvert.h>
10#ifdef CONFIG_SUN3
11#include <asm/sun3_pgalloc.h>
3#else 12#else
4#include "pgalloc_mm.h" 13#include <asm/motorola_pgalloc.h>
5#endif 14#endif
15
16extern void m68k_setup_node(int node);
17#endif
18
19#endif /* M68K_PGALLOC_H */
diff --git a/arch/m68k/include/asm/pgalloc_mm.h b/arch/m68k/include/asm/pgalloc_mm.h
deleted file mode 100644
index 4cb1a57ab763..000000000000
--- a/arch/m68k/include/asm/pgalloc_mm.h
+++ /dev/null
@@ -1,19 +0,0 @@
1
2#ifndef M68K_PGALLOC_H
3#define M68K_PGALLOC_H
4
5#include <linux/mm.h>
6#include <linux/highmem.h>
7#include <asm/setup.h>
8#include <asm/virtconvert.h>
9
10
11#ifdef CONFIG_SUN3
12#include <asm/sun3_pgalloc.h>
13#else
14#include <asm/motorola_pgalloc.h>
15#endif
16
17extern void m68k_setup_node(int node);
18
19#endif /* M68K_PGALLOC_H */
diff --git a/arch/m68k/include/asm/pgalloc_no.h b/arch/m68k/include/asm/pgalloc_no.h
deleted file mode 100644
index d6352f671ec0..000000000000
--- a/arch/m68k/include/asm/pgalloc_no.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef _M68KNOMMU_PGALLOC_H
2#define _M68KNOMMU_PGALLOC_H
3
4#include <asm/setup.h>
5
6#define check_pgt_cache() do { } while (0)
7
8#endif /* _M68KNOMMU_PGALLOC_H */
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index 46251016e821..bf86b29fe64a 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -67,4 +67,6 @@ extern unsigned int kobjsize(const void *objp);
67 67
68#include <asm-generic/pgtable.h> 68#include <asm-generic/pgtable.h>
69 69
70#define check_pgt_cache() do { } while (0)
71
70#endif /* _M68KNOMMU_PGTABLE_H */ 72#endif /* _M68KNOMMU_PGTABLE_H */
diff --git a/arch/m68k/include/asm/rtc.h b/arch/m68k/include/asm/rtc.h
index 5d3e03859844..a4d08ea122ee 100644
--- a/arch/m68k/include/asm/rtc.h
+++ b/arch/m68k/include/asm/rtc.h
@@ -36,13 +36,16 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
36 * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated 36 * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
37 * by the RTC when initially set to a non-zero value. 37 * by the RTC when initially set to a non-zero value.
38 */ 38 */
39 mach_hwclk(0, time); 39 if (mach_hwclk)
40 mach_hwclk(0, time);
40 return RTC_24H; 41 return RTC_24H;
41} 42}
42 43
43static inline int set_rtc_time(struct rtc_time *time) 44static inline int set_rtc_time(struct rtc_time *time)
44{ 45{
45 return mach_hwclk(1, time); 46 if (mach_hwclk)
47 return mach_hwclk(1, time);
48 return -EINVAL;
46} 49}
47 50
48static inline unsigned int get_rtc_ss(void) 51static inline unsigned int get_rtc_ss(void)
diff --git a/arch/m68k/include/asm/scatterlist.h b/arch/m68k/include/asm/scatterlist.h
index b7e528636252..e27ad902b1cf 100644
--- a/arch/m68k/include/asm/scatterlist.h
+++ b/arch/m68k/include/asm/scatterlist.h
@@ -1,5 +1,23 @@
1#ifdef __uClinux__ 1#ifndef _M68K_SCATTERLIST_H
2#include "scatterlist_no.h" 2#define _M68K_SCATTERLIST_H
3#else 3
4#include "scatterlist_mm.h" 4#include <linux/types.h>
5
6struct scatterlist {
7#ifdef CONFIG_DEBUG_SG
8 unsigned long sg_magic;
5#endif 9#endif
10 unsigned long page_link;
11 unsigned int offset;
12 unsigned int length;
13
14 dma_addr_t dma_address; /* A place to hang host-specific addresses at. */
15};
16
17/* This is bogus and should go away. */
18#define ISA_DMA_THRESHOLD (0x00ffffff)
19
20#define sg_dma_address(sg) ((sg)->dma_address)
21#define sg_dma_len(sg) ((sg)->length)
22
23#endif /* !(_M68K_SCATTERLIST_H) */
diff --git a/arch/m68k/include/asm/scatterlist_mm.h b/arch/m68k/include/asm/scatterlist_mm.h
deleted file mode 100644
index d3a7a0edfeca..000000000000
--- a/arch/m68k/include/asm/scatterlist_mm.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef _M68K_SCATTERLIST_H
2#define _M68K_SCATTERLIST_H
3
4#include <linux/types.h>
5
6struct scatterlist {
7#ifdef CONFIG_DEBUG_SG
8 unsigned long sg_magic;
9#endif
10 unsigned long page_link;
11 unsigned int offset;
12 unsigned int length;
13
14 __u32 dma_address; /* A place to hang host-specific addresses at. */
15};
16
17/* This is bogus and should go away. */
18#define ISA_DMA_THRESHOLD (0x00ffffff)
19
20#define sg_dma_address(sg) ((sg)->dma_address)
21#define sg_dma_len(sg) ((sg)->length)
22
23#endif /* !(_M68K_SCATTERLIST_H) */
diff --git a/arch/m68k/include/asm/scatterlist_no.h b/arch/m68k/include/asm/scatterlist_no.h
deleted file mode 100644
index afc4788b0d2c..000000000000
--- a/arch/m68k/include/asm/scatterlist_no.h
+++ /dev/null
@@ -1,22 +0,0 @@
1#ifndef _M68KNOMMU_SCATTERLIST_H
2#define _M68KNOMMU_SCATTERLIST_H
3
4#include <linux/mm.h>
5#include <asm/types.h>
6
7struct scatterlist {
8#ifdef CONFIG_DEBUG_SG
9 unsigned long sg_magic;
10#endif
11 unsigned long page_link;
12 unsigned int offset;
13 dma_addr_t dma_address;
14 unsigned int length;
15};
16
17#define sg_dma_address(sg) ((sg)->dma_address)
18#define sg_dma_len(sg) ((sg)->length)
19
20#define ISA_DMA_THRESHOLD (0xffffffff)
21
22#endif /* !(_M68KNOMMU_SCATTERLIST_H) */
diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h
index 82583bc004bd..ee959219fdfe 100644
--- a/arch/m68k/include/asm/segment.h
+++ b/arch/m68k/include/asm/segment.h
@@ -1,5 +1,63 @@
1#ifdef __uClinux__ 1#ifndef _M68K_SEGMENT_H
2#include "segment_no.h" 2#define _M68K_SEGMENT_H
3
4/* define constants */
5/* Address spaces (FC0-FC2) */
6#define USER_DATA (1)
7#ifndef __USER_DS
8#define __USER_DS (USER_DATA)
9#endif
10#define USER_PROGRAM (2)
11#define SUPER_DATA (5)
12#ifndef __KERNEL_DS
13#define __KERNEL_DS (SUPER_DATA)
14#endif
15#define SUPER_PROGRAM (6)
16#define CPU_SPACE (7)
17
18#ifndef __ASSEMBLY__
19
20typedef struct {
21 unsigned long seg;
22} mm_segment_t;
23
24#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
25#define USER_DS MAKE_MM_SEG(__USER_DS)
26#define KERNEL_DS MAKE_MM_SEG(__KERNEL_DS)
27
28/*
29 * Get/set the SFC/DFC registers for MOVES instructions
30 */
31
32static inline mm_segment_t get_fs(void)
33{
34#ifdef CONFIG_MMU
35 mm_segment_t _v;
36 __asm__ ("movec %/dfc,%0":"=r" (_v.seg):);
37
38 return _v;
3#else 39#else
4#include "segment_mm.h" 40 return USER_DS;
41#endif
42}
43
44static inline mm_segment_t get_ds(void)
45{
46 /* return the supervisor data space code */
47 return KERNEL_DS;
48}
49
50static inline void set_fs(mm_segment_t val)
51{
52#ifdef CONFIG_MMU
53 __asm__ __volatile__ ("movec %0,%/sfc\n\t"
54 "movec %0,%/dfc\n\t"
55 : /* no outputs */ : "r" (val.seg) : "memory");
5#endif 56#endif
57}
58
59#define segment_eq(a,b) ((a).seg == (b).seg)
60
61#endif /* __ASSEMBLY__ */
62
63#endif /* _M68K_SEGMENT_H */
diff --git a/arch/m68k/include/asm/segment_mm.h b/arch/m68k/include/asm/segment_mm.h
deleted file mode 100644
index 7b0b2d3127f9..000000000000
--- a/arch/m68k/include/asm/segment_mm.h
+++ /dev/null
@@ -1,57 +0,0 @@
1#ifndef _M68K_SEGMENT_H
2#define _M68K_SEGMENT_H
3
4/* define constants */
5/* Address spaces (FC0-FC2) */
6#define USER_DATA (1)
7#ifndef __USER_DS
8#define __USER_DS (USER_DATA)
9#endif
10#define USER_PROGRAM (2)
11#define SUPER_DATA (5)
12#ifndef __KERNEL_DS
13#define __KERNEL_DS (SUPER_DATA)
14#endif
15#define SUPER_PROGRAM (6)
16#define CPU_SPACE (7)
17
18#ifndef __ASSEMBLY__
19
20typedef struct {
21 unsigned long seg;
22} mm_segment_t;
23
24#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
25#define USER_DS MAKE_MM_SEG(__USER_DS)
26#define KERNEL_DS MAKE_MM_SEG(__KERNEL_DS)
27
28/*
29 * Get/set the SFC/DFC registers for MOVES instructions
30 */
31
32static inline mm_segment_t get_fs(void)
33{
34 mm_segment_t _v;
35 __asm__ ("movec %/dfc,%0":"=r" (_v.seg):);
36
37 return _v;
38}
39
40static inline mm_segment_t get_ds(void)
41{
42 /* return the supervisor data space code */
43 return KERNEL_DS;
44}
45
46static inline void set_fs(mm_segment_t val)
47{
48 __asm__ __volatile__ ("movec %0,%/sfc\n\t"
49 "movec %0,%/dfc\n\t"
50 : /* no outputs */ : "r" (val.seg) : "memory");
51}
52
53#define segment_eq(a,b) ((a).seg == (b).seg)
54
55#endif /* __ASSEMBLY__ */
56
57#endif /* _M68K_SEGMENT_H */
diff --git a/arch/m68k/include/asm/segment_no.h b/arch/m68k/include/asm/segment_no.h
deleted file mode 100644
index 42318ebec7ec..000000000000
--- a/arch/m68k/include/asm/segment_no.h
+++ /dev/null
@@ -1,51 +0,0 @@
1#ifndef _M68K_SEGMENT_H
2#define _M68K_SEGMENT_H
3
4/* define constants */
5/* Address spaces (FC0-FC2) */
6#define USER_DATA (1)
7#ifndef __USER_DS
8#define __USER_DS (USER_DATA)
9#endif
10#define USER_PROGRAM (2)
11#define SUPER_DATA (5)
12#ifndef __KERNEL_DS
13#define __KERNEL_DS (SUPER_DATA)
14#endif
15#define SUPER_PROGRAM (6)
16#define CPU_SPACE (7)
17
18#ifndef __ASSEMBLY__
19
20typedef struct {
21 unsigned long seg;
22} mm_segment_t;
23
24#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
25#define USER_DS MAKE_MM_SEG(__USER_DS)
26#define KERNEL_DS MAKE_MM_SEG(__KERNEL_DS)
27
28/*
29 * Get/set the SFC/DFC registers for MOVES instructions
30 */
31
32static inline mm_segment_t get_fs(void)
33{
34 return USER_DS;
35}
36
37static inline mm_segment_t get_ds(void)
38{
39 /* return the supervisor data space code */
40 return KERNEL_DS;
41}
42
43static inline void set_fs(mm_segment_t val)
44{
45}
46
47#define segment_eq(a,b) ((a).seg == (b).seg)
48
49#endif /* __ASSEMBLY__ */
50
51#endif /* _M68K_SEGMENT_H */
diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h
index 719762980578..b87f2f278f67 100644
--- a/arch/m68k/include/asm/timex.h
+++ b/arch/m68k/include/asm/timex.h
@@ -1,5 +1,18 @@
1#ifdef __uClinux__ 1/*
2#include "timex_no.h" 2 * linux/include/asm-m68k/timex.h
3#else 3 *
4#include "timex_mm.h" 4 * m68k architecture timex specifications
5 */
6#ifndef _ASMm68k_TIMEX_H
7#define _ASMm68k_TIMEX_H
8
9#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
10
11typedef unsigned long cycles_t;
12
13static inline cycles_t get_cycles(void)
14{
15 return 0;
16}
17
5#endif 18#endif
diff --git a/arch/m68k/include/asm/timex_mm.h b/arch/m68k/include/asm/timex_mm.h
deleted file mode 100644
index b87f2f278f67..000000000000
--- a/arch/m68k/include/asm/timex_mm.h
+++ /dev/null
@@ -1,18 +0,0 @@
1/*
2 * linux/include/asm-m68k/timex.h
3 *
4 * m68k architecture timex specifications
5 */
6#ifndef _ASMm68k_TIMEX_H
7#define _ASMm68k_TIMEX_H
8
9#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
10
11typedef unsigned long cycles_t;
12
13static inline cycles_t get_cycles(void)
14{
15 return 0;
16}
17
18#endif
diff --git a/arch/m68k/include/asm/timex_no.h b/arch/m68k/include/asm/timex_no.h
deleted file mode 100644
index 109050f3fe91..000000000000
--- a/arch/m68k/include/asm/timex_no.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * linux/include/asm-m68knommu/timex.h
3 *
4 * m68knommu architecture timex specifications
5 */
6#ifndef _ASM_M68KNOMMU_TIMEX_H
7#define _ASM_M68KNOMMU_TIMEX_H
8
9#ifdef CONFIG_COLDFIRE
10#include <asm/coldfire.h>
11#define CLOCK_TICK_RATE MCF_CLK
12#else
13#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
14#endif
15
16typedef unsigned long cycles_t;
17
18static inline cycles_t get_cycles(void)
19{
20 return 0;
21}
22
23#endif
diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h
index b6f93b30951e..a6b4ed4fc90f 100644
--- a/arch/m68k/include/asm/tlbflush.h
+++ b/arch/m68k/include/asm/tlbflush.h
@@ -1,5 +1,267 @@
1#ifdef __uClinux__ 1#ifndef _M68K_TLBFLUSH_H
2#include "tlbflush_no.h" 2#define _M68K_TLBFLUSH_H
3
4#ifdef CONFIG_MMU
5#ifndef CONFIG_SUN3
6
7#include <asm/current.h>
8
9static inline void flush_tlb_kernel_page(void *addr)
10{
11 if (CPU_IS_040_OR_060) {
12 mm_segment_t old_fs = get_fs();
13 set_fs(KERNEL_DS);
14 __asm__ __volatile__(".chip 68040\n\t"
15 "pflush (%0)\n\t"
16 ".chip 68k"
17 : : "a" (addr));
18 set_fs(old_fs);
19 } else if (CPU_IS_020_OR_030)
20 __asm__ __volatile__("pflush #4,#4,(%0)" : : "a" (addr));
21}
22
23/*
24 * flush all user-space atc entries.
25 */
26static inline void __flush_tlb(void)
27{
28 if (CPU_IS_040_OR_060)
29 __asm__ __volatile__(".chip 68040\n\t"
30 "pflushan\n\t"
31 ".chip 68k");
32 else if (CPU_IS_020_OR_030)
33 __asm__ __volatile__("pflush #0,#4");
34}
35
36static inline void __flush_tlb040_one(unsigned long addr)
37{
38 __asm__ __volatile__(".chip 68040\n\t"
39 "pflush (%0)\n\t"
40 ".chip 68k"
41 : : "a" (addr));
42}
43
44static inline void __flush_tlb_one(unsigned long addr)
45{
46 if (CPU_IS_040_OR_060)
47 __flush_tlb040_one(addr);
48 else if (CPU_IS_020_OR_030)
49 __asm__ __volatile__("pflush #0,#4,(%0)" : : "a" (addr));
50}
51
52#define flush_tlb() __flush_tlb()
53
54/*
55 * flush all atc entries (both kernel and user-space entries).
56 */
57static inline void flush_tlb_all(void)
58{
59 if (CPU_IS_040_OR_060)
60 __asm__ __volatile__(".chip 68040\n\t"
61 "pflusha\n\t"
62 ".chip 68k");
63 else if (CPU_IS_020_OR_030)
64 __asm__ __volatile__("pflusha");
65}
66
67static inline void flush_tlb_mm(struct mm_struct *mm)
68{
69 if (mm == current->active_mm)
70 __flush_tlb();
71}
72
73static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
74{
75 if (vma->vm_mm == current->active_mm) {
76 mm_segment_t old_fs = get_fs();
77 set_fs(USER_DS);
78 __flush_tlb_one(addr);
79 set_fs(old_fs);
80 }
81}
82
83static inline void flush_tlb_range(struct vm_area_struct *vma,
84 unsigned long start, unsigned long end)
85{
86 if (vma->vm_mm == current->active_mm)
87 __flush_tlb();
88}
89
90static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
91{
92 flush_tlb_all();
93}
94
3#else 95#else
4#include "tlbflush_mm.h" 96
97
98/* Reserved PMEGs. */
99extern char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
100extern unsigned long pmeg_vaddr[SUN3_PMEGS_NUM];
101extern unsigned char pmeg_alloc[SUN3_PMEGS_NUM];
102extern unsigned char pmeg_ctx[SUN3_PMEGS_NUM];
103
104/* Flush all userspace mappings one by one... (why no flush command,
105 sun?) */
106static inline void flush_tlb_all(void)
107{
108 unsigned long addr;
109 unsigned char ctx, oldctx;
110
111 oldctx = sun3_get_context();
112 for(addr = 0x00000000; addr < TASK_SIZE; addr += SUN3_PMEG_SIZE) {
113 for(ctx = 0; ctx < 8; ctx++) {
114 sun3_put_context(ctx);
115 sun3_put_segmap(addr, SUN3_INVALID_PMEG);
116 }
117 }
118
119 sun3_put_context(oldctx);
120 /* erase all of the userspace pmeg maps, we've clobbered them
121 all anyway */
122 for(addr = 0; addr < SUN3_INVALID_PMEG; addr++) {
123 if(pmeg_alloc[addr] == 1) {
124 pmeg_alloc[addr] = 0;
125 pmeg_ctx[addr] = 0;
126 pmeg_vaddr[addr] = 0;
127 }
128 }
129
130}
131
132/* Clear user TLB entries within the context named in mm */
133static inline void flush_tlb_mm (struct mm_struct *mm)
134{
135 unsigned char oldctx;
136 unsigned char seg;
137 unsigned long i;
138
139 oldctx = sun3_get_context();
140 sun3_put_context(mm->context);
141
142 for(i = 0; i < TASK_SIZE; i += SUN3_PMEG_SIZE) {
143 seg = sun3_get_segmap(i);
144 if(seg == SUN3_INVALID_PMEG)
145 continue;
146
147 sun3_put_segmap(i, SUN3_INVALID_PMEG);
148 pmeg_alloc[seg] = 0;
149 pmeg_ctx[seg] = 0;
150 pmeg_vaddr[seg] = 0;
151 }
152
153 sun3_put_context(oldctx);
154
155}
156
157/* Flush a single TLB page. In this case, we're limited to flushing a
158 single PMEG */
159static inline void flush_tlb_page (struct vm_area_struct *vma,
160 unsigned long addr)
161{
162 unsigned char oldctx;
163 unsigned char i;
164
165 oldctx = sun3_get_context();
166 sun3_put_context(vma->vm_mm->context);
167 addr &= ~SUN3_PMEG_MASK;
168 if((i = sun3_get_segmap(addr)) != SUN3_INVALID_PMEG)
169 {
170 pmeg_alloc[i] = 0;
171 pmeg_ctx[i] = 0;
172 pmeg_vaddr[i] = 0;
173 sun3_put_segmap (addr, SUN3_INVALID_PMEG);
174 }
175 sun3_put_context(oldctx);
176
177}
178/* Flush a range of pages from TLB. */
179
180static inline void flush_tlb_range (struct vm_area_struct *vma,
181 unsigned long start, unsigned long end)
182{
183 struct mm_struct *mm = vma->vm_mm;
184 unsigned char seg, oldctx;
185
186 start &= ~SUN3_PMEG_MASK;
187
188 oldctx = sun3_get_context();
189 sun3_put_context(mm->context);
190
191 while(start < end)
192 {
193 if((seg = sun3_get_segmap(start)) == SUN3_INVALID_PMEG)
194 goto next;
195 if(pmeg_ctx[seg] == mm->context) {
196 pmeg_alloc[seg] = 0;
197 pmeg_ctx[seg] = 0;
198 pmeg_vaddr[seg] = 0;
199 }
200 sun3_put_segmap(start, SUN3_INVALID_PMEG);
201 next:
202 start += SUN3_PMEG_SIZE;
203 }
204}
205
206static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
207{
208 flush_tlb_all();
209}
210
211/* Flush kernel page from TLB. */
212static inline void flush_tlb_kernel_page (unsigned long addr)
213{
214 sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG);
215}
216
5#endif 217#endif
218
219#else /* !CONFIG_MMU */
220
221/*
222 * flush all user-space atc entries.
223 */
224static inline void __flush_tlb(void)
225{
226 BUG();
227}
228
229static inline void __flush_tlb_one(unsigned long addr)
230{
231 BUG();
232}
233
234#define flush_tlb() __flush_tlb()
235
236/*
237 * flush all atc entries (both kernel and user-space entries).
238 */
239static inline void flush_tlb_all(void)
240{
241 BUG();
242}
243
244static inline void flush_tlb_mm(struct mm_struct *mm)
245{
246 BUG();
247}
248
249static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
250{
251 BUG();
252}
253
254static inline void flush_tlb_range(struct mm_struct *mm,
255 unsigned long start, unsigned long end)
256{
257 BUG();
258}
259
260static inline void flush_tlb_kernel_page(unsigned long addr)
261{
262 BUG();
263}
264
265#endif /* CONFIG_MMU */
266
267#endif /* _M68K_TLBFLUSH_H */
diff --git a/arch/m68k/include/asm/tlbflush_mm.h b/arch/m68k/include/asm/tlbflush_mm.h
deleted file mode 100644
index acb6bf21a321..000000000000
--- a/arch/m68k/include/asm/tlbflush_mm.h
+++ /dev/null
@@ -1,219 +0,0 @@
1#ifndef _M68K_TLBFLUSH_H
2#define _M68K_TLBFLUSH_H
3
4
5#ifndef CONFIG_SUN3
6
7#include <asm/current.h>
8
9static inline void flush_tlb_kernel_page(void *addr)
10{
11 if (CPU_IS_040_OR_060) {
12 mm_segment_t old_fs = get_fs();
13 set_fs(KERNEL_DS);
14 __asm__ __volatile__(".chip 68040\n\t"
15 "pflush (%0)\n\t"
16 ".chip 68k"
17 : : "a" (addr));
18 set_fs(old_fs);
19 } else if (CPU_IS_020_OR_030)
20 __asm__ __volatile__("pflush #4,#4,(%0)" : : "a" (addr));
21}
22
23/*
24 * flush all user-space atc entries.
25 */
26static inline void __flush_tlb(void)
27{
28 if (CPU_IS_040_OR_060)
29 __asm__ __volatile__(".chip 68040\n\t"
30 "pflushan\n\t"
31 ".chip 68k");
32 else if (CPU_IS_020_OR_030)
33 __asm__ __volatile__("pflush #0,#4");
34}
35
36static inline void __flush_tlb040_one(unsigned long addr)
37{
38 __asm__ __volatile__(".chip 68040\n\t"
39 "pflush (%0)\n\t"
40 ".chip 68k"
41 : : "a" (addr));
42}
43
44static inline void __flush_tlb_one(unsigned long addr)
45{
46 if (CPU_IS_040_OR_060)
47 __flush_tlb040_one(addr);
48 else if (CPU_IS_020_OR_030)
49 __asm__ __volatile__("pflush #0,#4,(%0)" : : "a" (addr));
50}
51
52#define flush_tlb() __flush_tlb()
53
54/*
55 * flush all atc entries (both kernel and user-space entries).
56 */
57static inline void flush_tlb_all(void)
58{
59 if (CPU_IS_040_OR_060)
60 __asm__ __volatile__(".chip 68040\n\t"
61 "pflusha\n\t"
62 ".chip 68k");
63 else if (CPU_IS_020_OR_030)
64 __asm__ __volatile__("pflusha");
65}
66
67static inline void flush_tlb_mm(struct mm_struct *mm)
68{
69 if (mm == current->active_mm)
70 __flush_tlb();
71}
72
73static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
74{
75 if (vma->vm_mm == current->active_mm) {
76 mm_segment_t old_fs = get_fs();
77 set_fs(USER_DS);
78 __flush_tlb_one(addr);
79 set_fs(old_fs);
80 }
81}
82
83static inline void flush_tlb_range(struct vm_area_struct *vma,
84 unsigned long start, unsigned long end)
85{
86 if (vma->vm_mm == current->active_mm)
87 __flush_tlb();
88}
89
90static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
91{
92 flush_tlb_all();
93}
94
95#else
96
97
98/* Reserved PMEGs. */
99extern char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
100extern unsigned long pmeg_vaddr[SUN3_PMEGS_NUM];
101extern unsigned char pmeg_alloc[SUN3_PMEGS_NUM];
102extern unsigned char pmeg_ctx[SUN3_PMEGS_NUM];
103
104/* Flush all userspace mappings one by one... (why no flush command,
105 sun?) */
106static inline void flush_tlb_all(void)
107{
108 unsigned long addr;
109 unsigned char ctx, oldctx;
110
111 oldctx = sun3_get_context();
112 for(addr = 0x00000000; addr < TASK_SIZE; addr += SUN3_PMEG_SIZE) {
113 for(ctx = 0; ctx < 8; ctx++) {
114 sun3_put_context(ctx);
115 sun3_put_segmap(addr, SUN3_INVALID_PMEG);
116 }
117 }
118
119 sun3_put_context(oldctx);
120 /* erase all of the userspace pmeg maps, we've clobbered them
121 all anyway */
122 for(addr = 0; addr < SUN3_INVALID_PMEG; addr++) {
123 if(pmeg_alloc[addr] == 1) {
124 pmeg_alloc[addr] = 0;
125 pmeg_ctx[addr] = 0;
126 pmeg_vaddr[addr] = 0;
127 }
128 }
129
130}
131
132/* Clear user TLB entries within the context named in mm */
133static inline void flush_tlb_mm (struct mm_struct *mm)
134{
135 unsigned char oldctx;
136 unsigned char seg;
137 unsigned long i;
138
139 oldctx = sun3_get_context();
140 sun3_put_context(mm->context);
141
142 for(i = 0; i < TASK_SIZE; i += SUN3_PMEG_SIZE) {
143 seg = sun3_get_segmap(i);
144 if(seg == SUN3_INVALID_PMEG)
145 continue;
146
147 sun3_put_segmap(i, SUN3_INVALID_PMEG);
148 pmeg_alloc[seg] = 0;
149 pmeg_ctx[seg] = 0;
150 pmeg_vaddr[seg] = 0;
151 }
152
153 sun3_put_context(oldctx);
154
155}
156
157/* Flush a single TLB page. In this case, we're limited to flushing a
158 single PMEG */
159static inline void flush_tlb_page (struct vm_area_struct *vma,
160 unsigned long addr)
161{
162 unsigned char oldctx;
163 unsigned char i;
164
165 oldctx = sun3_get_context();
166 sun3_put_context(vma->vm_mm->context);
167 addr &= ~SUN3_PMEG_MASK;
168 if((i = sun3_get_segmap(addr)) != SUN3_INVALID_PMEG)
169 {
170 pmeg_alloc[i] = 0;
171 pmeg_ctx[i] = 0;
172 pmeg_vaddr[i] = 0;
173 sun3_put_segmap (addr, SUN3_INVALID_PMEG);
174 }
175 sun3_put_context(oldctx);
176
177}
178/* Flush a range of pages from TLB. */
179
180static inline void flush_tlb_range (struct vm_area_struct *vma,
181 unsigned long start, unsigned long end)
182{
183 struct mm_struct *mm = vma->vm_mm;
184 unsigned char seg, oldctx;
185
186 start &= ~SUN3_PMEG_MASK;
187
188 oldctx = sun3_get_context();
189 sun3_put_context(mm->context);
190
191 while(start < end)
192 {
193 if((seg = sun3_get_segmap(start)) == SUN3_INVALID_PMEG)
194 goto next;
195 if(pmeg_ctx[seg] == mm->context) {
196 pmeg_alloc[seg] = 0;
197 pmeg_ctx[seg] = 0;
198 pmeg_vaddr[seg] = 0;
199 }
200 sun3_put_segmap(start, SUN3_INVALID_PMEG);
201 next:
202 start += SUN3_PMEG_SIZE;
203 }
204}
205
206static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
207{
208 flush_tlb_all();
209}
210
211/* Flush kernel page from TLB. */
212static inline void flush_tlb_kernel_page (unsigned long addr)
213{
214 sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG);
215}
216
217#endif
218
219#endif /* _M68K_TLBFLUSH_H */
diff --git a/arch/m68k/include/asm/tlbflush_no.h b/arch/m68k/include/asm/tlbflush_no.h
deleted file mode 100644
index a470cfb803eb..000000000000
--- a/arch/m68k/include/asm/tlbflush_no.h
+++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef _M68KNOMMU_TLBFLUSH_H
2#define _M68KNOMMU_TLBFLUSH_H
3
4/*
5 * Copyright (C) 2000 Lineo, David McCullough <davidm@uclinux.org>
6 * Copyright (C) 2000-2002, Greg Ungerer <gerg@snapgear.com>
7 */
8
9#include <asm/setup.h>
10
11/*
12 * flush all user-space atc entries.
13 */
14static inline void __flush_tlb(void)
15{
16 BUG();
17}
18
19static inline void __flush_tlb_one(unsigned long addr)
20{
21 BUG();
22}
23
24#define flush_tlb() __flush_tlb()
25
26/*
27 * flush all atc entries (both kernel and user-space entries).
28 */
29static inline void flush_tlb_all(void)
30{
31 BUG();
32}
33
34static inline void flush_tlb_mm(struct mm_struct *mm)
35{
36 BUG();
37}
38
39static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
40{
41 BUG();
42}
43
44static inline void flush_tlb_range(struct mm_struct *mm,
45 unsigned long start, unsigned long end)
46{
47 BUG();
48}
49
50static inline void flush_tlb_kernel_page(unsigned long addr)
51{
52 BUG();
53}
54
55#endif /* _M68KNOMMU_TLBFLUSH_H */
diff --git a/arch/m68k/include/asm/ucontext.h b/arch/m68k/include/asm/ucontext.h
index b53cd160c0b3..e4e22669edc0 100644
--- a/arch/m68k/include/asm/ucontext.h
+++ b/arch/m68k/include/asm/ucontext.h
@@ -1,5 +1,30 @@
1#ifdef __uClinux__ 1#ifndef _M68K_UCONTEXT_H
2#include "ucontext_no.h" 2#define _M68K_UCONTEXT_H
3#else 3
4#include "ucontext_mm.h" 4typedef int greg_t;
5#define NGREG 18
6typedef greg_t gregset_t[NGREG];
7
8typedef struct fpregset {
9 int f_fpcntl[3];
10 int f_fpregs[8*3];
11} fpregset_t;
12
13struct mcontext {
14 int version;
15 gregset_t gregs;
16 fpregset_t fpregs;
17};
18
19#define MCONTEXT_VERSION 2
20
21struct ucontext {
22 unsigned long uc_flags;
23 struct ucontext *uc_link;
24 stack_t uc_stack;
25 struct mcontext uc_mcontext;
26 unsigned long uc_filler[80];
27 sigset_t uc_sigmask; /* mask last for extensibility */
28};
29
5#endif 30#endif
diff --git a/arch/m68k/include/asm/ucontext_mm.h b/arch/m68k/include/asm/ucontext_mm.h
deleted file mode 100644
index e4e22669edc0..000000000000
--- a/arch/m68k/include/asm/ucontext_mm.h
+++ /dev/null
@@ -1,30 +0,0 @@
1#ifndef _M68K_UCONTEXT_H
2#define _M68K_UCONTEXT_H
3
4typedef int greg_t;
5#define NGREG 18
6typedef greg_t gregset_t[NGREG];
7
8typedef struct fpregset {
9 int f_fpcntl[3];
10 int f_fpregs[8*3];
11} fpregset_t;
12
13struct mcontext {
14 int version;
15 gregset_t gregs;
16 fpregset_t fpregs;
17};
18
19#define MCONTEXT_VERSION 2
20
21struct ucontext {
22 unsigned long uc_flags;
23 struct ucontext *uc_link;
24 stack_t uc_stack;
25 struct mcontext uc_mcontext;
26 unsigned long uc_filler[80];
27 sigset_t uc_sigmask; /* mask last for extensibility */
28};
29
30#endif
diff --git a/arch/m68k/include/asm/ucontext_no.h b/arch/m68k/include/asm/ucontext_no.h
deleted file mode 100644
index 713a27f901cd..000000000000
--- a/arch/m68k/include/asm/ucontext_no.h
+++ /dev/null
@@ -1,32 +0,0 @@
1#ifndef _M68KNOMMU_UCONTEXT_H
2#define _M68KNOMMU_UCONTEXT_H
3
4typedef int greg_t;
5#define NGREG 18
6typedef greg_t gregset_t[NGREG];
7
8typedef struct fpregset {
9 int f_pcr;
10 int f_psr;
11 int f_fpiaddr;
12 int f_fpregs[8][3];
13} fpregset_t;
14
15struct mcontext {
16 int version;
17 gregset_t gregs;
18 fpregset_t fpregs;
19};
20
21#define MCONTEXT_VERSION 2
22
23struct ucontext {
24 unsigned long uc_flags;
25 struct ucontext *uc_link;
26 stack_t uc_stack;
27 struct mcontext uc_mcontext;
28 unsigned long uc_filler[80];
29 sigset_t uc_sigmask; /* mask last for extensibility */
30};
31
32#endif
diff --git a/arch/m68k/include/asm/unaligned.h b/arch/m68k/include/asm/unaligned.h
index c640bba3bdf4..019caa740c21 100644
--- a/arch/m68k/include/asm/unaligned.h
+++ b/arch/m68k/include/asm/unaligned.h
@@ -1,5 +1,25 @@
1#ifdef __uClinux__ 1#ifndef _ASM_M68K_UNALIGNED_H
2#include "unaligned_no.h" 2#define _ASM_M68K_UNALIGNED_H
3
4
5#ifdef CONFIG_COLDFIRE
6#include <linux/unaligned/be_struct.h>
7#include <linux/unaligned/le_byteshift.h>
8#include <linux/unaligned/generic.h>
9
10#define get_unaligned __get_unaligned_be
11#define put_unaligned __put_unaligned_be
12
3#else 13#else
4#include "unaligned_mm.h" 14/*
15 * The m68k can do unaligned accesses itself.
16 */
17#include <linux/unaligned/access_ok.h>
18#include <linux/unaligned/generic.h>
19
20#define get_unaligned __get_unaligned_be
21#define put_unaligned __put_unaligned_be
22
5#endif 23#endif
24
25#endif /* _ASM_M68K_UNALIGNED_H */
diff --git a/arch/m68k/include/asm/unaligned_mm.h b/arch/m68k/include/asm/unaligned_mm.h
deleted file mode 100644
index 77698f2dc33c..000000000000
--- a/arch/m68k/include/asm/unaligned_mm.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef _ASM_M68K_UNALIGNED_H
2#define _ASM_M68K_UNALIGNED_H
3
4/*
5 * The m68k can do unaligned accesses itself.
6 */
7#include <linux/unaligned/access_ok.h>
8#include <linux/unaligned/generic.h>
9
10#define get_unaligned __get_unaligned_be
11#define put_unaligned __put_unaligned_be
12
13#endif /* _ASM_M68K_UNALIGNED_H */
diff --git a/arch/m68k/include/asm/unaligned_no.h b/arch/m68k/include/asm/unaligned_no.h
deleted file mode 100644
index eb1ea4cb9a59..000000000000
--- a/arch/m68k/include/asm/unaligned_no.h
+++ /dev/null
@@ -1,25 +0,0 @@
1#ifndef _ASM_M68KNOMMU_UNALIGNED_H
2#define _ASM_M68KNOMMU_UNALIGNED_H
3
4
5#ifdef CONFIG_COLDFIRE
6#include <linux/unaligned/be_struct.h>
7#include <linux/unaligned/le_byteshift.h>
8#include <linux/unaligned/generic.h>
9
10#define get_unaligned __get_unaligned_be
11#define put_unaligned __put_unaligned_be
12
13#else
14/*
15 * The m68k can do unaligned accesses itself.
16 */
17#include <linux/unaligned/access_ok.h>
18#include <linux/unaligned/generic.h>
19
20#define get_unaligned __get_unaligned_be
21#define put_unaligned __put_unaligned_be
22
23#endif
24
25#endif /* _ASM_M68KNOMMU_UNALIGNED_H */
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 7db41594d7b6..54d980795fc4 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -18,6 +18,7 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/rtc.h> 20#include <linux/rtc.h>
21#include <linux/platform_device.h>
21 22
22#include <asm/machdep.h> 23#include <asm/machdep.h>
23#include <asm/io.h> 24#include <asm/io.h>
@@ -159,3 +160,20 @@ int do_settimeofday(struct timespec *tv)
159} 160}
160 161
161EXPORT_SYMBOL(do_settimeofday); 162EXPORT_SYMBOL(do_settimeofday);
163
164
165static int __init rtc_init(void)
166{
167 struct platform_device *pdev;
168
169 if (!mach_hwclk)
170 return -ENODEV;
171
172 pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
173 if (IS_ERR(pdev))
174 return PTR_ERR(pdev);
175
176 return 0;
177}
178
179module_init(rtc_init);
diff --git a/arch/m68knommu/Makefile b/arch/m68knommu/Makefile
index fd0fb303d885..ce404bc9ccbd 100644
--- a/arch/m68knommu/Makefile
+++ b/arch/m68knommu/Makefile
@@ -88,18 +88,18 @@ export PLATFORM BOARD MODEL CPUCLASS
88# 88#
89# Some CFLAG additions based on specific CPU type. 89# Some CFLAG additions based on specific CPU type.
90# 90#
91cflags-$(CONFIG_M5206) := -m5200 91cflags-$(CONFIG_M5206) := $(call cc-option,-mcpu=5206,-m5200)
92cflags-$(CONFIG_M5206e) := -m5200 92cflags-$(CONFIG_M5206e) := $(call cc-option,-m5206e,-m5200)
93cflags-$(CONFIG_M520x) := -m5307 93cflags-$(CONFIG_M520x) := $(call cc-option,-mcpu=5208,-m5200)
94cflags-$(CONFIG_M523x) := $(call cc-option,-mcpu=523x,-m5307) 94cflags-$(CONFIG_M523x) := $(call cc-option,-mcpu=523x,-m5307)
95cflags-$(CONFIG_M5249) := -m5200 95cflags-$(CONFIG_M5249) := $(call cc-option,-mcpu=5249,-m5200)
96cflags-$(CONFIG_M5271) := $(call cc-option,-mcpu=5271,-m5307) 96cflags-$(CONFIG_M5271) := $(call cc-option,-mcpu=5271,-m5307)
97cflags-$(CONFIG_M5272) := -m5307 97cflags-$(CONFIG_M5272) := $(call cc-option,-mcpu=5271,-m5200)
98cflags-$(CONFIG_M5275) := $(call cc-option,-mcpu=5275,-m5307) 98cflags-$(CONFIG_M5275) := $(call cc-option,-mcpu=5275,-m5307)
99cflags-$(CONFIG_M528x) := $(call cc-option,-m528x,-m5307) 99cflags-$(CONFIG_M528x) := $(call cc-option,-m528x,-m5307)
100cflags-$(CONFIG_M5307) := -m5307 100cflags-$(CONFIG_M5307) := $(call cc-option,-m5307,-m5200)
101cflags-$(CONFIG_M532x) := $(call cc-option,-mcpu=532x,-m5307) 101cflags-$(CONFIG_M532x) := $(call cc-option,-mcpu=532x,-m5307)
102cflags-$(CONFIG_M5407) := -m5200 102cflags-$(CONFIG_M5407) := $(call cc-option,-m5407,-m5200)
103cflags-$(CONFIG_M68328) := -m68000 103cflags-$(CONFIG_M68328) := -m68000
104cflags-$(CONFIG_M68EZ328) := -m68000 104cflags-$(CONFIG_M68EZ328) := -m68000
105cflags-$(CONFIG_M68VZ328) := -m68000 105cflags-$(CONFIG_M68VZ328) := -m68000
diff --git a/arch/m68knommu/kernel/dma.c b/arch/m68knommu/kernel/dma.c
index e10eafc52789..936125806638 100644
--- a/arch/m68knommu/kernel/dma.c
+++ b/arch/m68knommu/kernel/dma.c
@@ -9,10 +9,11 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/string.h> 10#include <linux/string.h>
11#include <linux/device.h> 11#include <linux/device.h>
12#include <linux/dma-mapping.h>
12#include <asm/io.h> 13#include <asm/io.h>
13 14
14void *dma_alloc_coherent(struct device *dev, size_t size, 15void *dma_alloc_coherent(struct device *dev, size_t size,
15 dma_addr_t *dma_handle, int gfp) 16 dma_addr_t *dma_handle, gfp_t gfp)
16{ 17{
17 void *ret; 18 void *ret;
18 /* ignore region specifiers */ 19 /* ignore region specifiers */
@@ -34,3 +35,8 @@ void dma_free_coherent(struct device *dev, size_t size,
34{ 35{
35 free_pages((unsigned long)vaddr, get_order(size)); 36 free_pages((unsigned long)vaddr, get_order(size));
36} 37}
38
39void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir)
40{
41}
42
diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c
index bba1bb48a21f..56e0f4c55a67 100644
--- a/arch/m68knommu/kernel/irq.c
+++ b/arch/m68knommu/kernel/irq.c
@@ -23,7 +23,7 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
23 struct pt_regs *oldregs = set_irq_regs(regs); 23 struct pt_regs *oldregs = set_irq_regs(regs);
24 24
25 irq_enter(); 25 irq_enter();
26 __do_IRQ(irq); 26 generic_handle_irq(irq);
27 irq_exit(); 27 irq_exit();
28 28
29 set_irq_regs(oldregs); 29 set_irq_regs(oldregs);
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index 3bf249c53e41..7befc0c357e0 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -111,11 +111,7 @@ void __init paging_init(void)
111 { 111 {
112 unsigned long zones_size[MAX_NR_ZONES] = {0, }; 112 unsigned long zones_size[MAX_NR_ZONES] = {0, };
113 113
114 zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; 114 zones_size[ZONE_DMA] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
115 zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
116#ifdef CONFIG_HIGHMEM
117 zones_size[ZONE_HIGHMEM] = 0;
118#endif
119 free_area_init(zones_size); 115 free_area_init(zones_size);
120 } 116 }
121} 117}
diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c
index d299f7b8768a..9eab19d01eb1 100644
--- a/arch/m68knommu/platform/5249/config.c
+++ b/arch/m68knommu/platform/5249/config.c
@@ -32,7 +32,8 @@ static struct mcf_platform_uart m5249_uart_platform[] = {
32 { 32 {
33 .mapbase = MCF_MBAR + MCFUART_BASE2, 33 .mapbase = MCF_MBAR + MCFUART_BASE2,
34 .irq = 74, 34 .irq = 74,
35 } 35 },
36 { },
36}; 37};
37 38
38static struct platform_device m5249_uart = { 39static struct platform_device m5249_uart = {
@@ -50,12 +51,12 @@ static struct platform_device *m5249_devices[] __initdata = {
50static void __init m5249_uart_init_line(int line, int irq) 51static void __init m5249_uart_init_line(int line, int irq)
51{ 52{
52 if (line == 0) { 53 if (line == 0) {
53 writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); 54 writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
54 writeb(irq, MCFUART_BASE1 + MCFUART_UIVR); 55 writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR);
55 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1); 56 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1);
56 } else if (line == 1) { 57 } else if (line == 1) {
57 writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); 58 writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
58 writeb(irq, MCFUART_BASE2 + MCFUART_UIVR); 59 writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR);
59 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); 60 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2);
60 } 61 }
61} 62}
diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c
index 724faf05852a..44803bf70a6e 100644
--- a/arch/m68knommu/platform/5307/config.c
+++ b/arch/m68knommu/platform/5307/config.c
@@ -65,12 +65,12 @@ static struct platform_device *m5307_devices[] __initdata = {
65static void __init m5307_uart_init_line(int line, int irq) 65static void __init m5307_uart_init_line(int line, int irq)
66{ 66{
67 if (line == 0) { 67 if (line == 0) {
68 writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); 68 writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
69 writeb(irq, MCFUART_BASE1 + MCFUART_UIVR); 69 writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR);
70 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1); 70 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1);
71 } else if (line == 1) { 71 } else if (line == 1) {
72 writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); 72 writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
73 writeb(irq, MCFUART_BASE2 + MCFUART_UIVR); 73 writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR);
74 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); 74 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2);
75 } 75 }
76} 76}
diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c
index 648b8b778211..0ee8c1a200c8 100644
--- a/arch/m68knommu/platform/5407/config.c
+++ b/arch/m68knommu/platform/5407/config.c
@@ -56,12 +56,12 @@ static struct platform_device *m5407_devices[] __initdata = {
56static void __init m5407_uart_init_line(int line, int irq) 56static void __init m5407_uart_init_line(int line, int irq)
57{ 57{
58 if (line == 0) { 58 if (line == 0) {
59 writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); 59 writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR);
60 writeb(irq, MCFUART_BASE1 + MCFUART_UIVR); 60 writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR);
61 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1); 61 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1);
62 } else if (line == 1) { 62 } else if (line == 1) {
63 writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); 63 writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR);
64 writeb(irq, MCFUART_BASE2 + MCFUART_UIVR); 64 writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR);
65 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); 65 mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2);
66 } 66 }
67} 67}
diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile
index 4f416a91a829..1bcb9372353f 100644
--- a/arch/m68knommu/platform/coldfire/Makefile
+++ b/arch/m68knommu/platform/coldfire/Makefile
@@ -14,7 +14,7 @@
14 14
15asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 15asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1
16 16
17obj-$(CONFIG_COLDFIRE) += dma.o entry.o vectors.o 17obj-$(CONFIG_COLDFIRE) += clk.o dma.o entry.o vectors.o
18obj-$(CONFIG_M5206) += timers.o 18obj-$(CONFIG_M5206) += timers.o
19obj-$(CONFIG_M5206e) += timers.o 19obj-$(CONFIG_M5206e) += timers.o
20obj-$(CONFIG_M520x) += pit.o 20obj-$(CONFIG_M520x) += pit.o
diff --git a/arch/m68knommu/platform/coldfire/clk.c b/arch/m68knommu/platform/coldfire/clk.c
new file mode 100644
index 000000000000..7cdbf445b28f
--- /dev/null
+++ b/arch/m68knommu/platform/coldfire/clk.c
@@ -0,0 +1,40 @@
1/***************************************************************************/
2
3/*
4 * clk.c -- general ColdFire CPU kernel clk handling
5 *
6 * Copyright (C) 2009, Greg Ungerer (gerg@snapgear.com)
7 */
8
9/***************************************************************************/
10
11#include <linux/kernel.h>
12#include <linux/clk.h>
13#include <asm/coldfire.h>
14
15/***************************************************************************/
16
17struct clk *clk_get(struct device *dev, const char *id)
18{
19 return NULL;
20}
21
22int clk_enable(struct clk *clk)
23{
24 return 0;
25}
26
27void clk_disable(struct clk *clk)
28{
29}
30
31void clk_put(struct clk *clk)
32{
33}
34
35unsigned long clk_get_rate(struct clk *clk)
36{
37 return MCF_CLK;
38}
39
40/***************************************************************************/
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index aacf11d33723..9038f39d9d73 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,9 +9,13 @@ config PARISC
9 def_bool y 9 def_bool y
10 select HAVE_IDE 10 select HAVE_IDE
11 select HAVE_OPROFILE 11 select HAVE_OPROFILE
12 select HAVE_FUNCTION_TRACER if 64BIT
13 select HAVE_FUNCTION_GRAPH_TRACER if 64BIT
14 select HAVE_FUNCTION_TRACE_MCOUNT_TEST if 64BIT
12 select RTC_CLASS 15 select RTC_CLASS
13 select RTC_DRV_PARISC 16 select RTC_DRV_GENERIC
14 select INIT_ALL_POSSIBLE 17 select INIT_ALL_POSSIBLE
18 select BUG
15 help 19 help
16 The PA-RISC microprocessor is designed by Hewlett-Packard and used 20 The PA-RISC microprocessor is designed by Hewlett-Packard and used
17 in many of their workstations & servers (HP9000 700 and 800 series, 21 in many of their workstations & servers (HP9000 700 and 800 series,
@@ -75,6 +79,9 @@ config GENERIC_HARDIRQS
75config GENERIC_IRQ_PROBE 79config GENERIC_IRQ_PROBE
76 def_bool y 80 def_bool y
77 81
82config HAVE_LATENCYTOP_SUPPORT
83 def_bool y
84
78config IRQ_PER_CPU 85config IRQ_PER_CPU
79 bool 86 bool
80 default y 87 default y
@@ -83,6 +90,9 @@ config IRQ_PER_CPU
83config PM 90config PM
84 bool 91 bool
85 92
93config STACKTRACE_SUPPORT
94 def_bool y
95
86config ISA_DMA_API 96config ISA_DMA_API
87 bool 97 bool
88 98
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 0d428278356d..da6f66901c92 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -56,7 +56,9 @@ cflags-y += -mdisable-fpregs
56 56
57# Without this, "ld -r" results in .text sections that are too big 57# Without this, "ld -r" results in .text sections that are too big
58# (> 0x40000) for branches to reach stubs. 58# (> 0x40000) for branches to reach stubs.
59cflags-y += -ffunction-sections 59ifndef CONFIG_FUNCTION_TRACER
60 cflags-y += -ffunction-sections
61endif
60 62
61# select which processor to optimise for 63# select which processor to optimise for
62cflags-$(CONFIG_PA7100) += -march=1.1 -mschedule=7100 64cflags-$(CONFIG_PA7100) += -march=1.1 -mschedule=7100
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index edbfe25c5fc1..ada3e5364d82 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -25,7 +25,7 @@
25 * Since "a" is usually an address, use one spinlock per cacheline. 25 * Since "a" is usually an address, use one spinlock per cacheline.
26 */ 26 */
27# define ATOMIC_HASH_SIZE 4 27# define ATOMIC_HASH_SIZE 4
28# define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ])) 28# define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) (a))/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
29 29
30extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; 30extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
31 31
@@ -222,13 +222,13 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
222 222
223#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) 223#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
224 224
225#define atomic_add(i,v) ((void)(__atomic_add_return( ((int)i),(v)))) 225#define atomic_add(i,v) ((void)(__atomic_add_return( ((int)(i)),(v))))
226#define atomic_sub(i,v) ((void)(__atomic_add_return(-((int)i),(v)))) 226#define atomic_sub(i,v) ((void)(__atomic_add_return(-((int)(i)),(v))))
227#define atomic_inc(v) ((void)(__atomic_add_return( 1,(v)))) 227#define atomic_inc(v) ((void)(__atomic_add_return( 1,(v))))
228#define atomic_dec(v) ((void)(__atomic_add_return( -1,(v)))) 228#define atomic_dec(v) ((void)(__atomic_add_return( -1,(v))))
229 229
230#define atomic_add_return(i,v) (__atomic_add_return( ((int)i),(v))) 230#define atomic_add_return(i,v) (__atomic_add_return( ((int)(i)),(v)))
231#define atomic_sub_return(i,v) (__atomic_add_return(-((int)i),(v))) 231#define atomic_sub_return(i,v) (__atomic_add_return(-((int)(i)),(v)))
232#define atomic_inc_return(v) (__atomic_add_return( 1,(v))) 232#define atomic_inc_return(v) (__atomic_add_return( 1,(v)))
233#define atomic_dec_return(v) (__atomic_add_return( -1,(v))) 233#define atomic_dec_return(v) (__atomic_add_return( -1,(v)))
234 234
@@ -289,13 +289,13 @@ atomic64_read(const atomic64_t *v)
289 return v->counter; 289 return v->counter;
290} 290}
291 291
292#define atomic64_add(i,v) ((void)(__atomic64_add_return( ((s64)i),(v)))) 292#define atomic64_add(i,v) ((void)(__atomic64_add_return( ((s64)(i)),(v))))
293#define atomic64_sub(i,v) ((void)(__atomic64_add_return(-((s64)i),(v)))) 293#define atomic64_sub(i,v) ((void)(__atomic64_add_return(-((s64)(i)),(v))))
294#define atomic64_inc(v) ((void)(__atomic64_add_return( 1,(v)))) 294#define atomic64_inc(v) ((void)(__atomic64_add_return( 1,(v))))
295#define atomic64_dec(v) ((void)(__atomic64_add_return( -1,(v)))) 295#define atomic64_dec(v) ((void)(__atomic64_add_return( -1,(v))))
296 296
297#define atomic64_add_return(i,v) (__atomic64_add_return( ((s64)i),(v))) 297#define atomic64_add_return(i,v) (__atomic64_add_return( ((s64)(i)),(v)))
298#define atomic64_sub_return(i,v) (__atomic64_add_return(-((s64)i),(v))) 298#define atomic64_sub_return(i,v) (__atomic64_add_return(-((s64)(i)),(v)))
299#define atomic64_inc_return(v) (__atomic64_add_return( 1,(v))) 299#define atomic64_inc_return(v) (__atomic64_add_return( 1,(v)))
300#define atomic64_dec_return(v) (__atomic64_add_return( -1,(v))) 300#define atomic64_dec_return(v) (__atomic64_add_return( -1,(v)))
301 301
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index b7ca6dc7fddc..724395143f26 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -97,6 +97,9 @@ void mark_rodata_ro(void);
97 97
98#ifdef CONFIG_PA8X00 98#ifdef CONFIG_PA8X00
99/* Only pa8800, pa8900 needs this */ 99/* Only pa8800, pa8900 needs this */
100
101#include <asm/kmap_types.h>
102
100#define ARCH_HAS_KMAP 103#define ARCH_HAS_KMAP
101 104
102void kunmap_parisc(void *addr); 105void kunmap_parisc(void *addr);
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 7fa675799e6d..9c802eb4be84 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -168,6 +168,16 @@ typedef struct elf64_fdesc {
168 __u64 gp; 168 __u64 gp;
169} Elf64_Fdesc; 169} Elf64_Fdesc;
170 170
171#ifdef __KERNEL__
172
173#ifdef CONFIG_64BIT
174#define Elf_Fdesc Elf64_Fdesc
175#else
176#define Elf_Fdesc Elf32_Fdesc
177#endif /*CONFIG_64BIT*/
178
179#endif /*__KERNEL__*/
180
171/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */ 181/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */
172 182
173#define PT_HP_TLS (PT_LOOS + 0x0) 183#define PT_HP_TLS (PT_LOOS + 0x0)
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h
new file mode 100644
index 000000000000..2fa05dd6aeee
--- /dev/null
+++ b/arch/parisc/include/asm/ftrace.h
@@ -0,0 +1,25 @@
1#ifndef _ASM_PARISC_FTRACE_H
2#define _ASM_PARISC_FTRACE_H
3
4#ifndef __ASSEMBLY__
5extern void mcount(void);
6
7/*
8 * Stack of return addresses for functions of a thread.
9 * Used in struct thread_info
10 */
11struct ftrace_ret_stack {
12 unsigned long ret;
13 unsigned long func;
14 unsigned long long calltime;
15};
16
17/*
18 * Primary handler of a function return.
19 * It relays on ftrace_return_to_handler.
20 * Defined in entry.S
21 */
22extern void return_to_handler(void);
23#endif /* __ASSEMBLY__ */
24
25#endif /* _ASM_PARISC_FTRACE_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index c3941f09a878..7bc5125d7d4c 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -36,16 +36,7 @@ void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
36 */ 36 */
37#define STRICT_MM_TYPECHECKS 37#define STRICT_MM_TYPECHECKS
38#ifdef STRICT_MM_TYPECHECKS 38#ifdef STRICT_MM_TYPECHECKS
39typedef struct { unsigned long pte; 39typedef struct { unsigned long pte; } pte_t; /* either 32 or 64bit */
40#if !defined(CONFIG_64BIT)
41 unsigned long future_flags;
42 /* XXX: it's possible to remove future_flags and change BITS_PER_PTE_ENTRY
43 to 2, but then strangely the identical 32bit kernel boots on a
44 c3000(pa20), but not any longer on a 715(pa11).
45 Still investigating... HelgeD.
46 */
47#endif
48} pte_t; /* either 32 or 64bit */
49 40
50/* NOTE: even on 64 bits, these entries are __u32 because we allocate 41/* NOTE: even on 64 bits, these entries are __u32 because we allocate
51 * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */ 42 * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
@@ -111,7 +102,7 @@ extern int npmem_ranges;
111#define BITS_PER_PMD_ENTRY 2 102#define BITS_PER_PMD_ENTRY 2
112#define BITS_PER_PGD_ENTRY 2 103#define BITS_PER_PGD_ENTRY 2
113#else 104#else
114#define BITS_PER_PTE_ENTRY 3 105#define BITS_PER_PTE_ENTRY 2
115#define BITS_PER_PMD_ENTRY 2 106#define BITS_PER_PMD_ENTRY 2
116#define BITS_PER_PGD_ENTRY BITS_PER_PMD_ENTRY 107#define BITS_PER_PGD_ENTRY BITS_PER_PMD_ENTRY
117#endif 108#endif
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 430f1aeea0b8..4ca510b3c6f8 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -49,6 +49,8 @@
49#define PDC_MODEL_CPU_ID 6 /* returns cpu-id (only newer machines!) */ 49#define PDC_MODEL_CPU_ID 6 /* returns cpu-id (only newer machines!) */
50#define PDC_MODEL_CAPABILITIES 7 /* returns OS32/OS64-flags */ 50#define PDC_MODEL_CAPABILITIES 7 /* returns OS32/OS64-flags */
51/* Values for PDC_MODEL_CAPABILITIES non-equivalent virtual aliasing support */ 51/* Values for PDC_MODEL_CAPABILITIES non-equivalent virtual aliasing support */
52#define PDC_MODEL_OS64 (1 << 0)
53#define PDC_MODEL_OS32 (1 << 1)
52#define PDC_MODEL_IOPDIR_FDC (1 << 2) 54#define PDC_MODEL_IOPDIR_FDC (1 << 2)
53#define PDC_MODEL_NVA_MASK (3 << 4) 55#define PDC_MODEL_NVA_MASK (3 << 4)
54#define PDC_MODEL_NVA_SUPPORTED (0 << 4) 56#define PDC_MODEL_NVA_SUPPORTED (0 << 4)
@@ -341,6 +343,8 @@
341 343
342#ifdef __KERNEL__ 344#ifdef __KERNEL__
343 345
346#include <asm/page.h> /* for __PAGE_OFFSET */
347
344extern int pdc_type; 348extern int pdc_type;
345 349
346/* Values for pdc_type */ 350/* Values for pdc_type */
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 470a4b88124d..a27d2e200fb2 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -50,11 +50,7 @@
50 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e)) 50 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
51 51
52/* This is the size of the initially mapped kernel memory */ 52/* This is the size of the initially mapped kernel memory */
53#ifdef CONFIG_64BIT
54#define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */ 53#define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */
55#else
56#define KERNEL_INITIAL_ORDER 23 /* 0 to 1<<23 = 8MB */
57#endif
58#define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) 54#define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER)
59 55
60#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB) 56#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
@@ -91,16 +87,25 @@
91 87
92/* Definitions for 1st level */ 88/* Definitions for 1st level */
93#define PGDIR_SHIFT (PMD_SHIFT + BITS_PER_PMD) 89#define PGDIR_SHIFT (PMD_SHIFT + BITS_PER_PMD)
90#if (PGDIR_SHIFT + PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) > BITS_PER_LONG
91#define BITS_PER_PGD (BITS_PER_LONG - PGDIR_SHIFT)
92#else
94#define BITS_PER_PGD (PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) 93#define BITS_PER_PGD (PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY)
94#endif
95#define PGDIR_SIZE (1UL << PGDIR_SHIFT) 95#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
96#define PGDIR_MASK (~(PGDIR_SIZE-1)) 96#define PGDIR_MASK (~(PGDIR_SIZE-1))
97#define PTRS_PER_PGD (1UL << BITS_PER_PGD) 97#define PTRS_PER_PGD (1UL << BITS_PER_PGD)
98#define USER_PTRS_PER_PGD PTRS_PER_PGD 98#define USER_PTRS_PER_PGD PTRS_PER_PGD
99 99
100#ifdef CONFIG_64BIT
100#define MAX_ADDRBITS (PGDIR_SHIFT + BITS_PER_PGD) 101#define MAX_ADDRBITS (PGDIR_SHIFT + BITS_PER_PGD)
101#define MAX_ADDRESS (1UL << MAX_ADDRBITS) 102#define MAX_ADDRESS (1UL << MAX_ADDRBITS)
102
103#define SPACEID_SHIFT (MAX_ADDRBITS - 32) 103#define SPACEID_SHIFT (MAX_ADDRBITS - 32)
104#else
105#define MAX_ADDRBITS (BITS_PER_LONG)
106#define MAX_ADDRESS (1UL << MAX_ADDRBITS)
107#define SPACEID_SHIFT 0
108#endif
104 109
105/* This calculates the number of initial pages we need for the initial 110/* This calculates the number of initial pages we need for the initial
106 * page tables */ 111 * page tables */
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index 6ef4b7867b1b..21eb45a52629 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -29,7 +29,8 @@ extern void smp_send_reschedule(int cpu);
29extern void smp_send_all_nop(void); 29extern void smp_send_all_nop(void);
30 30
31extern void arch_send_call_function_single_ipi(int cpu); 31extern void arch_send_call_function_single_ipi(int cpu);
32extern void arch_send_call_function_ipi(cpumask_t mask); 32extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
33#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
33 34
34#endif /* !ASSEMBLY */ 35#endif /* !ASSEMBLY */
35 36
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 016d3fc4111c..67db0722e6ca 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -11,10 +11,25 @@ obj-y := cache.o pacache.o setup.o traps.o time.o irq.o \
11 process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \ 11 process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \
12 topology.o 12 topology.o
13 13
14ifdef CONFIG_FUNCTION_TRACER
15# Do not profile debug and lowlevel utilities
16CFLAGS_REMOVE_ftrace.o = -pg
17CFLAGS_REMOVE_cache.o = -pg
18CFLAGS_REMOVE_irq.o = -pg
19CFLAGS_REMOVE_pacache.o = -pg
20CFLAGS_REMOVE_perf.o = -pg
21CFLAGS_REMOVE_traps.o = -pg
22CFLAGS_REMOVE_unaligned.o = -pg
23CFLAGS_REMOVE_unwind.o = -pg
24endif
25
14obj-$(CONFIG_SMP) += smp.o 26obj-$(CONFIG_SMP) += smp.o
15obj-$(CONFIG_PA11) += pci-dma.o 27obj-$(CONFIG_PA11) += pci-dma.o
16obj-$(CONFIG_PCI) += pci.o 28obj-$(CONFIG_PCI) += pci.o
17obj-$(CONFIG_MODULES) += module.o 29obj-$(CONFIG_MODULES) += module.o
18obj-$(CONFIG_64BIT) += binfmt_elf32.o sys_parisc32.o signal32.o 30obj-$(CONFIG_64BIT) += binfmt_elf32.o sys_parisc32.o signal32.o
31obj-$(CONFIG_STACKTRACE)+= stacktrace.o
19# only supported for PCX-W/U in 64-bit mode at the moment 32# only supported for PCX-W/U in 64-bit mode at the moment
20obj-$(CONFIG_64BIT) += perf.o perf_asm.o 33obj-$(CONFIG_64BIT) += perf.o perf_asm.o
34obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
35obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 0db9fdcb7709..ae3e70cd1e14 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -505,6 +505,18 @@
505 STREG \pte,0(\ptep) 505 STREG \pte,0(\ptep)
506 .endm 506 .endm
507 507
508 /* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
509 * to a CPU TLB 4k PFN (4k => 12 bits to shift) */
510 #define PAGE_ADD_SHIFT (PAGE_SHIFT-12)
511
512 /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
513 .macro convert_for_tlb_insert20 pte
514 extrd,u \pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
515 64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
516 depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\
517 (63-58)+PAGE_ADD_SHIFT,\pte
518 .endm
519
508 /* Convert the pte and prot to tlb insertion values. How 520 /* Convert the pte and prot to tlb insertion values. How
509 * this happens is quite subtle, read below */ 521 * this happens is quite subtle, read below */
510 .macro make_insert_tlb spc,pte,prot 522 .macro make_insert_tlb spc,pte,prot
@@ -544,8 +556,7 @@
544 depi 1,12,1,\prot 556 depi 1,12,1,\prot
545 557
546 /* Drop prot bits and convert to page addr for iitlbt and idtlbt */ 558 /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
547 extrd,u \pte,(63-ASM_PFN_PTE_SHIFT)+(63-58),64-PAGE_SHIFT,\pte 559 convert_for_tlb_insert20 \pte
548 depdi _PAGE_SIZE_ENCODING_DEFAULT,63,63-58,\pte
549 .endm 560 .endm
550 561
551 /* Identical macro to make_insert_tlb above, except it 562 /* Identical macro to make_insert_tlb above, except it
@@ -563,8 +574,8 @@
563 574
564 /* Get rid of prot bits and convert to page addr for iitlba */ 575 /* Get rid of prot bits and convert to page addr for iitlba */
565 576
566 depi _PAGE_SIZE_ENCODING_DEFAULT,31,ASM_PFN_PTE_SHIFT,\pte 577 depi 0,31,ASM_PFN_PTE_SHIFT,\pte
567 extru \pte,24,25,\pte 578 SHRREG \pte,(ASM_PFN_PTE_SHIFT-(31-26)),\pte
568 .endm 579 .endm
569 580
570 /* This is for ILP32 PA2.0 only. The TLB insertion needs 581 /* This is for ILP32 PA2.0 only. The TLB insertion needs
@@ -1244,10 +1255,9 @@ nadtlb_check_flush_20w:
1244 depdi,z 7,7,3,prot 1255 depdi,z 7,7,3,prot
1245 depdi 1,10,1,prot 1256 depdi 1,10,1,prot
1246 1257
1247 /* Get rid of prot bits and convert to page addr for idtlbt */ 1258 /* Drop prot bits from pte and convert to page addr for idtlbt */
1259 convert_for_tlb_insert20 pte
1248 1260
1249 depdi 0,63,12,pte
1250 extrd,u pte,56,52,pte
1251 idtlbt pte,prot 1261 idtlbt pte,prot
1252 1262
1253 rfir 1263 rfir
@@ -1337,8 +1347,8 @@ nadtlb_check_flush_11:
1337 1347
1338 /* Get rid of prot bits and convert to page addr for idtlba */ 1348 /* Get rid of prot bits and convert to page addr for idtlba */
1339 1349
1340 depi 0,31,12,pte 1350 depi 0,31,ASM_PFN_PTE_SHIFT,pte
1341 extru pte,24,25,pte 1351 SHRREG pte,(ASM_PFN_PTE_SHIFT-(31-26)),pte
1342 1352
1343 mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ 1353 mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */
1344 mtsp spc,%sr1 1354 mtsp spc,%sr1
@@ -1403,10 +1413,9 @@ nadtlb_check_flush_20:
1403 depdi,z 7,7,3,prot 1413 depdi,z 7,7,3,prot
1404 depdi 1,10,1,prot 1414 depdi 1,10,1,prot
1405 1415
1406 /* Get rid of prot bits and convert to page addr for idtlbt */ 1416 /* Drop prot bits from pte and convert to page addr for idtlbt */
1417 convert_for_tlb_insert20 pte
1407 1418
1408 depdi 0,63,12,pte
1409 extrd,u pte,56,32,pte
1410 idtlbt pte,prot 1419 idtlbt pte,prot
1411 1420
1412 rfir 1421 rfir
@@ -2176,6 +2185,33 @@ syscall_do_resched:
2176ENDPROC(syscall_exit) 2185ENDPROC(syscall_exit)
2177 2186
2178 2187
2188#ifdef CONFIG_FUNCTION_TRACER
2189 .import ftrace_function_trampoline,code
2190ENTRY(_mcount)
2191 copy %r3, %arg2
2192 b ftrace_function_trampoline
2193 nop
2194ENDPROC(_mcount)
2195
2196ENTRY(return_to_handler)
2197 load32 return_trampoline, %rp
2198 copy %ret0, %arg0
2199 copy %ret1, %arg1
2200 b ftrace_return_to_handler
2201 nop
2202return_trampoline:
2203 copy %ret0, %rp
2204 copy %r23, %ret0
2205 copy %r24, %ret1
2206
2207.globl ftrace_stub
2208ftrace_stub:
2209 bv %r0(%rp)
2210 nop
2211ENDPROC(return_to_handler)
2212#endif /* CONFIG_FUNCTION_TRACER */
2213
2214
2179get_register: 2215get_register:
2180 /* 2216 /*
2181 * get_register is used by the non access tlb miss handlers to 2217 * get_register is used by the non access tlb miss handlers to
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index f6d241238a78..4c247e02d9b1 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -527,7 +527,11 @@ int pdc_model_capabilities(unsigned long *capabilities)
527 pdc_result[0] = 0; /* preset zero (call may not be implemented!) */ 527 pdc_result[0] = 0; /* preset zero (call may not be implemented!) */
528 retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0); 528 retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0);
529 convert_to_wide(pdc_result); 529 convert_to_wide(pdc_result);
530 *capabilities = pdc_result[0]; 530 if (retval == PDC_OK) {
531 *capabilities = pdc_result[0];
532 } else {
533 *capabilities = PDC_MODEL_OS32;
534 }
531 spin_unlock_irqrestore(&pdc_lock, flags); 535 spin_unlock_irqrestore(&pdc_lock, flags);
532 536
533 return retval; 537 return retval;
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
new file mode 100644
index 000000000000..9877372ffdba
--- /dev/null
+++ b/arch/parisc/kernel/ftrace.c
@@ -0,0 +1,185 @@
1/*
2 * Code for tracing calls in Linux kernel.
3 * Copyright (C) 2009 Helge Deller <deller@gmx.de>
4 *
5 * based on code for x86 which is:
6 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
7 *
8 * future possible enhancements:
9 * - add CONFIG_DYNAMIC_FTRACE
10 * - add CONFIG_STACK_TRACER
11 */
12
13#include <linux/init.h>
14#include <linux/ftrace.h>
15
16#include <asm/sections.h>
17#include <asm/ftrace.h>
18
19
20
21#ifdef CONFIG_FUNCTION_GRAPH_TRACER
22
23/* Add a function return address to the trace stack on thread info.*/
24static int push_return_trace(unsigned long ret, unsigned long long time,
25 unsigned long func, int *depth)
26{
27 int index;
28
29 if (!current->ret_stack)
30 return -EBUSY;
31
32 /* The return trace stack is full */
33 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
34 atomic_inc(&current->trace_overrun);
35 return -EBUSY;
36 }
37
38 index = ++current->curr_ret_stack;
39 barrier();
40 current->ret_stack[index].ret = ret;
41 current->ret_stack[index].func = func;
42 current->ret_stack[index].calltime = time;
43 *depth = index;
44
45 return 0;
46}
47
48/* Retrieve a function return address to the trace stack on thread info.*/
49static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
50{
51 int index;
52
53 index = current->curr_ret_stack;
54
55 if (unlikely(index < 0)) {
56 ftrace_graph_stop();
57 WARN_ON(1);
58 /* Might as well panic, otherwise we have no where to go */
59 *ret = (unsigned long)
60 dereference_function_descriptor(&panic);
61 return;
62 }
63
64 *ret = current->ret_stack[index].ret;
65 trace->func = current->ret_stack[index].func;
66 trace->calltime = current->ret_stack[index].calltime;
67 trace->overrun = atomic_read(&current->trace_overrun);
68 trace->depth = index;
69 barrier();
70 current->curr_ret_stack--;
71
72}
73
74/*
75 * Send the trace to the ring-buffer.
76 * @return the original return address.
77 */
78unsigned long ftrace_return_to_handler(unsigned long retval0,
79 unsigned long retval1)
80{
81 struct ftrace_graph_ret trace;
82 unsigned long ret;
83
84 pop_return_trace(&trace, &ret);
85 trace.rettime = cpu_clock(raw_smp_processor_id());
86 ftrace_graph_return(&trace);
87
88 if (unlikely(!ret)) {
89 ftrace_graph_stop();
90 WARN_ON(1);
91 /* Might as well panic. What else to do? */
92 ret = (unsigned long)
93 dereference_function_descriptor(&panic);
94 }
95
96 /* HACK: we hand over the old functions' return values
97 in %r23 and %r24. Assembly in entry.S will take care
98 and move those to their final registers %ret0 and %ret1 */
99 asm( "copy %0, %%r23 \n\t"
100 "copy %1, %%r24 \n" : : "r" (retval0), "r" (retval1) );
101
102 return ret;
103}
104
105/*
106 * Hook the return address and push it in the stack of return addrs
107 * in current thread info.
108 */
109void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
110{
111 unsigned long old;
112 unsigned long long calltime;
113 struct ftrace_graph_ent trace;
114
115 if (unlikely(atomic_read(&current->tracing_graph_pause)))
116 return;
117
118 old = *parent;
119 *parent = (unsigned long)
120 dereference_function_descriptor(&return_to_handler);
121
122 if (unlikely(!__kernel_text_address(old))) {
123 ftrace_graph_stop();
124 *parent = old;
125 WARN_ON(1);
126 return;
127 }
128
129 calltime = cpu_clock(raw_smp_processor_id());
130
131 if (push_return_trace(old, calltime,
132 self_addr, &trace.depth) == -EBUSY) {
133 *parent = old;
134 return;
135 }
136
137 trace.func = self_addr;
138
139 /* Only trace if the calling function expects to */
140 if (!ftrace_graph_entry(&trace)) {
141 current->curr_ret_stack--;
142 *parent = old;
143 }
144}
145
146#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
147
148
149void ftrace_function_trampoline(unsigned long parent,
150 unsigned long self_addr,
151 unsigned long org_sp_gr3)
152{
153 extern ftrace_func_t ftrace_trace_function;
154
155 if (function_trace_stop)
156 return;
157
158 if (ftrace_trace_function != ftrace_stub) {
159 ftrace_trace_function(parent, self_addr);
160 return;
161 }
162#ifdef CONFIG_FUNCTION_GRAPH_TRACER
163 if (ftrace_graph_entry && ftrace_graph_return) {
164 unsigned long sp;
165 unsigned long *parent_rp;
166
167 asm volatile ("copy %%r30, %0" : "=r"(sp));
168 /* sanity check: is stack pointer which we got from
169 assembler function in entry.S in a reasonable
170 range compared to current stack pointer? */
171 if ((sp - org_sp_gr3) > 0x400)
172 return;
173
174 /* calculate pointer to %rp in stack */
175 parent_rp = (unsigned long *) org_sp_gr3 - 0x10;
176 /* sanity check: parent_rp should hold parent */
177 if (*parent_rp != parent)
178 return;
179
180 prepare_ftrace_return(parent_rp, self_addr);
181 return;
182 }
183#endif
184}
185
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 1c740f5cbd63..4ea4229d765c 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -311,12 +311,12 @@ unsigned long txn_alloc_addr(unsigned int virt_irq)
311 next_cpu++; /* assign to "next" CPU we want this bugger on */ 311 next_cpu++; /* assign to "next" CPU we want this bugger on */
312 312
313 /* validate entry */ 313 /* validate entry */
314 while ((next_cpu < NR_CPUS) && 314 while ((next_cpu < nr_cpu_ids) &&
315 (!per_cpu(cpu_data, next_cpu).txn_addr || 315 (!per_cpu(cpu_data, next_cpu).txn_addr ||
316 !cpu_online(next_cpu))) 316 !cpu_online(next_cpu)))
317 next_cpu++; 317 next_cpu++;
318 318
319 if (next_cpu >= NR_CPUS) 319 if (next_cpu >= nr_cpu_ids)
320 next_cpu = 0; /* nothing else, assign monarch */ 320 next_cpu = 0; /* nothing else, assign monarch */
321 321
322 return txn_affinity_addr(virt_irq, next_cpu); 322 return txn_affinity_addr(virt_irq, next_cpu);
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 9013243cecca..ecd1c5024447 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -61,9 +61,7 @@
61#include <linux/string.h> 61#include <linux/string.h>
62#include <linux/kernel.h> 62#include <linux/kernel.h>
63#include <linux/bug.h> 63#include <linux/bug.h>
64#include <linux/uaccess.h>
65 64
66#include <asm/sections.h>
67#include <asm/unwind.h> 65#include <asm/unwind.h>
68 66
69#if 0 67#if 0
@@ -115,8 +113,6 @@ struct got_entry {
115 Elf32_Addr addr; 113 Elf32_Addr addr;
116}; 114};
117 115
118#define Elf_Fdesc Elf32_Fdesc
119
120struct stub_entry { 116struct stub_entry {
121 Elf32_Word insns[2]; /* each stub entry has two insns */ 117 Elf32_Word insns[2]; /* each stub entry has two insns */
122}; 118};
@@ -125,8 +121,6 @@ struct got_entry {
125 Elf64_Addr addr; 121 Elf64_Addr addr;
126}; 122};
127 123
128#define Elf_Fdesc Elf64_Fdesc
129
130struct stub_entry { 124struct stub_entry {
131 Elf64_Word insns[4]; /* each stub entry has four insns */ 125 Elf64_Word insns[4]; /* each stub entry has four insns */
132}; 126};
@@ -916,15 +910,3 @@ void module_arch_cleanup(struct module *mod)
916 deregister_unwind_table(mod); 910 deregister_unwind_table(mod);
917 module_bug_cleanup(mod); 911 module_bug_cleanup(mod);
918} 912}
919
920#ifdef CONFIG_64BIT
921void *dereference_function_descriptor(void *ptr)
922{
923 Elf64_Fdesc *desc = ptr;
924 void *p;
925
926 if (!probe_kernel_address(&desc->addr, p))
927 ptr = p;
928 return ptr;
929}
930#endif
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 0eecfbbc59cd..df653663d3db 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -153,5 +153,10 @@ EXPORT_SYMBOL(node_data);
153EXPORT_SYMBOL(pfnnid_map); 153EXPORT_SYMBOL(pfnnid_map);
154#endif 154#endif
155 155
156#ifdef CONFIG_FUNCTION_TRACER
157extern void _mcount(void);
158EXPORT_SYMBOL(_mcount);
159#endif
160
156/* from pacache.S -- needed for copy_page */ 161/* from pacache.S -- needed for copy_page */
157EXPORT_SYMBOL(copy_user_page_asm); 162EXPORT_SYMBOL(copy_user_page_asm);
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 8aa591ed9127..6f69101f90bb 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -46,14 +46,15 @@
46#include <linux/stddef.h> 46#include <linux/stddef.h>
47#include <linux/unistd.h> 47#include <linux/unistd.h>
48#include <linux/kallsyms.h> 48#include <linux/kallsyms.h>
49#include <linux/uaccess.h>
49 50
50#include <asm/io.h> 51#include <asm/io.h>
51#include <asm/asm-offsets.h> 52#include <asm/asm-offsets.h>
52#include <asm/pdc.h> 53#include <asm/pdc.h>
53#include <asm/pdc_chassis.h> 54#include <asm/pdc_chassis.h>
54#include <asm/pgalloc.h> 55#include <asm/pgalloc.h>
55#include <asm/uaccess.h>
56#include <asm/unwind.h> 56#include <asm/unwind.h>
57#include <asm/sections.h>
57 58
58/* 59/*
59 * The idle thread. There's no useful work to be 60 * The idle thread. There's no useful work to be
@@ -231,8 +232,8 @@ sys_clone(unsigned long clone_flags, unsigned long usp,
231 232
232 However, these last 3 args are only examined 233 However, these last 3 args are only examined
233 if the proper flags are set. */ 234 if the proper flags are set. */
234 int __user *child_tidptr; 235 int __user *parent_tidptr = (int __user *)regs->gr[24];
235 int __user *parent_tidptr; 236 int __user *child_tidptr = (int __user *)regs->gr[22];
236 237
237 /* usp must be word aligned. This also prevents users from 238 /* usp must be word aligned. This also prevents users from
238 * passing in the value 1 (which is the signal for a special 239 * passing in the value 1 (which is the signal for a special
@@ -243,16 +244,6 @@ sys_clone(unsigned long clone_flags, unsigned long usp,
243 if (usp == 0) 244 if (usp == 0)
244 usp = regs->gr[30]; 245 usp = regs->gr[30];
245 246
246 if (clone_flags & CLONE_PARENT_SETTID)
247 parent_tidptr = (int __user *)regs->gr[24];
248 else
249 parent_tidptr = NULL;
250
251 if (clone_flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID))
252 child_tidptr = (int __user *)regs->gr[22];
253 else
254 child_tidptr = NULL;
255
256 return do_fork(clone_flags, usp, regs, 0, parent_tidptr, child_tidptr); 247 return do_fork(clone_flags, usp, regs, 0, parent_tidptr, child_tidptr);
257} 248}
258 249
@@ -400,3 +391,15 @@ get_wchan(struct task_struct *p)
400 } while (count++ < 16); 391 } while (count++ < 16);
401 return 0; 392 return 0;
402} 393}
394
395#ifdef CONFIG_64BIT
396void *dereference_function_descriptor(void *ptr)
397{
398 Elf64_Fdesc *desc = ptr;
399 void *p;
400
401 if (!probe_kernel_address(&desc->addr, p))
402 ptr = p;
403 return ptr;
404}
405#endif
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index ecb609342feb..e09d0f7fb6b0 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -100,8 +100,8 @@ static int __cpuinit processor_probe(struct parisc_device *dev)
100 struct cpuinfo_parisc *p; 100 struct cpuinfo_parisc *p;
101 101
102#ifdef CONFIG_SMP 102#ifdef CONFIG_SMP
103 if (num_online_cpus() >= NR_CPUS) { 103 if (num_online_cpus() >= nr_cpu_ids) {
104 printk(KERN_INFO "num_online_cpus() >= NR_CPUS\n"); 104 printk(KERN_INFO "num_online_cpus() >= nr_cpu_ids\n");
105 return 1; 105 return 1;
106 } 106 }
107#else 107#else
@@ -214,7 +214,7 @@ static int __cpuinit processor_probe(struct parisc_device *dev)
214 */ 214 */
215#ifdef CONFIG_SMP 215#ifdef CONFIG_SMP
216 if (cpuid) { 216 if (cpuid) {
217 cpu_set(cpuid, cpu_present_map); 217 set_cpu_present(cpuid, true);
218 cpu_up(cpuid); 218 cpu_up(cpuid);
219 } 219 }
220#endif 220#endif
@@ -364,6 +364,13 @@ show_cpuinfo (struct seq_file *m, void *v)
364 boot_cpu_data.cpu_hz / 1000000, 364 boot_cpu_data.cpu_hz / 1000000,
365 boot_cpu_data.cpu_hz % 1000000 ); 365 boot_cpu_data.cpu_hz % 1000000 );
366 366
367 seq_printf(m, "capabilities\t:");
368 if (boot_cpu_data.pdc.capabilities & PDC_MODEL_OS32)
369 seq_printf(m, " os32");
370 if (boot_cpu_data.pdc.capabilities & PDC_MODEL_OS64)
371 seq_printf(m, " os64");
372 seq_printf(m, "\n");
373
367 seq_printf(m, "model\t\t: %s\n" 374 seq_printf(m, "model\t\t: %s\n"
368 "model name\t: %s\n", 375 "model name\t: %s\n",
369 boot_cpu_data.pdc.sys_model_name, 376 boot_cpu_data.pdc.sys_model_name,
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 9995d7ed5819..1fd0f0cec037 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -31,6 +31,7 @@
31#include <linux/err.h> 31#include <linux/err.h>
32#include <linux/delay.h> 32#include <linux/delay.h>
33#include <linux/bitops.h> 33#include <linux/bitops.h>
34#include <linux/ftrace.h>
34 35
35#include <asm/system.h> 36#include <asm/system.h>
36#include <asm/atomic.h> 37#include <asm/atomic.h>
@@ -113,14 +114,14 @@ halt_processor(void)
113{ 114{
114 /* REVISIT : redirect I/O Interrupts to another CPU? */ 115 /* REVISIT : redirect I/O Interrupts to another CPU? */
115 /* REVISIT : does PM *know* this CPU isn't available? */ 116 /* REVISIT : does PM *know* this CPU isn't available? */
116 cpu_clear(smp_processor_id(), cpu_online_map); 117 set_cpu_online(smp_processor_id(), false);
117 local_irq_disable(); 118 local_irq_disable();
118 for (;;) 119 for (;;)
119 ; 120 ;
120} 121}
121 122
122 123
123irqreturn_t 124irqreturn_t __irq_entry
124ipi_interrupt(int irq, void *dev_id) 125ipi_interrupt(int irq, void *dev_id)
125{ 126{
126 int this_cpu = smp_processor_id(); 127 int this_cpu = smp_processor_id();
@@ -214,11 +215,11 @@ ipi_send(int cpu, enum ipi_message_type op)
214} 215}
215 216
216static void 217static void
217send_IPI_mask(cpumask_t mask, enum ipi_message_type op) 218send_IPI_mask(const struct cpumask *mask, enum ipi_message_type op)
218{ 219{
219 int cpu; 220 int cpu;
220 221
221 for_each_cpu_mask(cpu, mask) 222 for_each_cpu(cpu, mask)
222 ipi_send(cpu, op); 223 ipi_send(cpu, op);
223} 224}
224 225
@@ -257,7 +258,7 @@ smp_send_all_nop(void)
257 send_IPI_allbutself(IPI_NOP); 258 send_IPI_allbutself(IPI_NOP);
258} 259}
259 260
260void arch_send_call_function_ipi(cpumask_t mask) 261void arch_send_call_function_ipi_mask(const struct cpumask *mask)
261{ 262{
262 send_IPI_mask(mask, IPI_CALL_FUNC); 263 send_IPI_mask(mask, IPI_CALL_FUNC);
263} 264}
@@ -296,13 +297,14 @@ smp_cpu_init(int cpunum)
296 mb(); 297 mb();
297 298
298 /* Well, support 2.4 linux scheme as well. */ 299 /* Well, support 2.4 linux scheme as well. */
299 if (cpu_test_and_set(cpunum, cpu_online_map)) 300 if (cpu_isset(cpunum, cpu_online_map))
300 { 301 {
301 extern void machine_halt(void); /* arch/parisc.../process.c */ 302 extern void machine_halt(void); /* arch/parisc.../process.c */
302 303
303 printk(KERN_CRIT "CPU#%d already initialized!\n", cpunum); 304 printk(KERN_CRIT "CPU#%d already initialized!\n", cpunum);
304 machine_halt(); 305 machine_halt();
305 } 306 }
307 set_cpu_online(cpunum, true);
306 308
307 /* Initialise the idle task for this CPU */ 309 /* Initialise the idle task for this CPU */
308 atomic_inc(&init_mm.mm_count); 310 atomic_inc(&init_mm.mm_count);
@@ -424,8 +426,8 @@ void __init smp_prepare_boot_cpu(void)
424 /* Setup BSP mappings */ 426 /* Setup BSP mappings */
425 printk(KERN_INFO "SMP: bootstrap CPU ID is %d\n", bootstrap_processor); 427 printk(KERN_INFO "SMP: bootstrap CPU ID is %d\n", bootstrap_processor);
426 428
427 cpu_set(bootstrap_processor, cpu_online_map); 429 set_cpu_online(bootstrap_processor, true);
428 cpu_set(bootstrap_processor, cpu_present_map); 430 set_cpu_present(bootstrap_processor, true);
429} 431}
430 432
431 433
@@ -436,8 +438,7 @@ void __init smp_prepare_boot_cpu(void)
436*/ 438*/
437void __init smp_prepare_cpus(unsigned int max_cpus) 439void __init smp_prepare_cpus(unsigned int max_cpus)
438{ 440{
439 cpus_clear(cpu_present_map); 441 init_cpu_present(cpumask_of(0));
440 cpu_set(0, cpu_present_map);
441 442
442 parisc_max_cpus = max_cpus; 443 parisc_max_cpus = max_cpus;
443 if (!max_cpus) 444 if (!max_cpus)
diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
new file mode 100644
index 000000000000..2fe914c5f533
--- /dev/null
+++ b/arch/parisc/kernel/stacktrace.c
@@ -0,0 +1,63 @@
1/*
2 * Stack trace management functions
3 *
4 * Copyright (C) 2009 Helge Deller <deller@gmx.de>
5 * based on arch/x86/kernel/stacktrace.c by Ingo Molnar <mingo@redhat.com>
6 * and parisc unwind functions by Randolph Chung <tausq@debian.org>
7 *
8 * TODO: Userspace stacktrace (CONFIG_USER_STACKTRACE_SUPPORT)
9 */
10#include <linux/module.h>
11#include <linux/stacktrace.h>
12
13#include <asm/unwind.h>
14
15static void dump_trace(struct task_struct *task, struct stack_trace *trace)
16{
17 struct unwind_frame_info info;
18
19 /* initialize unwind info */
20 if (task == current) {
21 unsigned long sp;
22 struct pt_regs r;
23HERE:
24 asm volatile ("copy %%r30, %0" : "=r"(sp));
25 memset(&r, 0, sizeof(struct pt_regs));
26 r.iaoq[0] = (unsigned long)&&HERE;
27 r.gr[2] = (unsigned long)__builtin_return_address(0);
28 r.gr[30] = sp;
29 unwind_frame_init(&info, task, &r);
30 } else {
31 unwind_frame_init_from_blocked_task(&info, task);
32 }
33
34 /* unwind stack and save entries in stack_trace struct */
35 trace->nr_entries = 0;
36 while (trace->nr_entries < trace->max_entries) {
37 if (unwind_once(&info) < 0 || info.ip == 0)
38 break;
39
40 if (__kernel_text_address(info.ip))
41 trace->entries[trace->nr_entries++] = info.ip;
42 }
43}
44
45
46/*
47 * Save stack-backtrace addresses into a stack_trace buffer.
48 */
49void save_stack_trace(struct stack_trace *trace)
50{
51 dump_trace(current, trace);
52 if (trace->nr_entries < trace->max_entries)
53 trace->entries[trace->nr_entries++] = ULONG_MAX;
54}
55EXPORT_SYMBOL_GPL(save_stack_trace);
56
57void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
58{
59 dump_trace(tsk, trace);
60 if (trace->nr_entries < trace->max_entries)
61 trace->entries[trace->nr_entries++] = ULONG_MAX;
62}
63EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 69b6eebc466e..59fc1a43ec3e 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -365,17 +365,51 @@ tracesys_sigexit:
365 365
366 366
367 /********************************************************* 367 /*********************************************************
368 Light-weight-syscall code 368 32/64-bit Light-Weight-Syscall ABI
369 369
370 r20 - lws number 370 * - Indicates a hint for userspace inline asm
371 r26,r25,r24,r23,r22 - Input registers 371 implementations.
372 r28 - Function return register
373 r21 - Error code.
374 372
375 Scracth: Any of the above that aren't being 373 Syscall number (caller-saves)
376 currently used, including r1. 374 - %r20
375 * In asm clobber.
377 376
378 Return pointer: r31 (Not usable) 377 Argument registers (caller-saves)
378 - %r26, %r25, %r24, %r23, %r22
379 * In asm input.
380
381 Return registers (caller-saves)
382 - %r28 (return), %r21 (errno)
383 * In asm output.
384
385 Caller-saves registers
386 - %r1, %r27, %r29
387 - %r2 (return pointer)
388 - %r31 (ble link register)
389 * In asm clobber.
390
391 Callee-saves registers
392 - %r3-%r18
393 - %r30 (stack pointer)
394 * Not in asm clobber.
395
396 If userspace is 32-bit:
397 Callee-saves registers
398 - %r19 (32-bit PIC register)
399
400 Differences from 32-bit calling convention:
401 - Syscall number in %r20
402 - Additional argument register %r22 (arg4)
403 - Callee-saves %r19.
404
405 If userspace is 64-bit:
406 Callee-saves registers
407 - %r27 (64-bit PIC register)
408
409 Differences from 64-bit calling convention:
410 - Syscall number in %r20
411 - Additional argument register %r22 (arg4)
412 - Callee-saves %r27.
379 413
380 Error codes returned by entry path: 414 Error codes returned by entry path:
381 415
@@ -473,7 +507,8 @@ lws_compare_and_swap64:
473 b,n lws_compare_and_swap 507 b,n lws_compare_and_swap
474#else 508#else
475 /* If we are not a 64-bit kernel, then we don't 509 /* If we are not a 64-bit kernel, then we don't
476 * implement having 64-bit input registers 510 * have 64-bit input registers, and calling
511 * the 64-bit LWS CAS returns ENOSYS.
477 */ 512 */
478 b,n lws_exit_nosys 513 b,n lws_exit_nosys
479#endif 514#endif
@@ -635,12 +670,15 @@ END(sys_call_table64)
635 /* 670 /*
636 All light-weight-syscall atomic operations 671 All light-weight-syscall atomic operations
637 will use this set of locks 672 will use this set of locks
673
674 NOTE: The lws_lock_start symbol must be
675 at least 16-byte aligned for safe use
676 with ldcw.
638 */ 677 */
639 .section .data 678 .section .data
640 .align PAGE_SIZE 679 .align PAGE_SIZE
641ENTRY(lws_lock_start) 680ENTRY(lws_lock_start)
642 /* lws locks */ 681 /* lws locks */
643 .align 16
644 .rept 16 682 .rept 16
645 /* Keep locks aligned at 16-bytes */ 683 /* Keep locks aligned at 16-bytes */
646 .word 1 684 .word 1
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index e75cae6072c5..d4dd05674c62 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -24,6 +24,7 @@
24#include <linux/profile.h> 24#include <linux/profile.h>
25#include <linux/clocksource.h> 25#include <linux/clocksource.h>
26#include <linux/platform_device.h> 26#include <linux/platform_device.h>
27#include <linux/ftrace.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29#include <asm/io.h> 30#include <asm/io.h>
@@ -53,7 +54,7 @@ static unsigned long clocktick __read_mostly; /* timer cycles per tick */
53 * held off for an arbitrarily long period of time by interrupts being 54 * held off for an arbitrarily long period of time by interrupts being
54 * disabled, so we may miss one or more ticks. 55 * disabled, so we may miss one or more ticks.
55 */ 56 */
56irqreturn_t timer_interrupt(int irq, void *dev_id) 57irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
57{ 58{
58 unsigned long now; 59 unsigned long now;
59 unsigned long next_tick; 60 unsigned long next_tick;
@@ -216,14 +217,14 @@ void __init start_cpu_itimer(void)
216 per_cpu(cpu_data, cpu).it_value = next_tick; 217 per_cpu(cpu_data, cpu).it_value = next_tick;
217} 218}
218 219
219static struct platform_device rtc_parisc_dev = { 220static struct platform_device rtc_generic_dev = {
220 .name = "rtc-parisc", 221 .name = "rtc-generic",
221 .id = -1, 222 .id = -1,
222}; 223};
223 224
224static int __init rtc_init(void) 225static int __init rtc_init(void)
225{ 226{
226 if (platform_device_register(&rtc_parisc_dev) < 0) 227 if (platform_device_register(&rtc_generic_dev) < 0)
227 printk(KERN_ERR "unable to register rtc device...\n"); 228 printk(KERN_ERR "unable to register rtc device...\n");
228 229
229 /* not necessarily an error */ 230 /* not necessarily an error */
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index ba658d2086f7..c32f5d6d778e 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -247,6 +247,8 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
247 247
248 oops_in_progress = 1; 248 oops_in_progress = 1;
249 249
250 oops_enter();
251
250 /* Amuse the user in a SPARC fashion */ 252 /* Amuse the user in a SPARC fashion */
251 if (err) printk( 253 if (err) printk(
252KERN_CRIT " _______________________________ \n" 254KERN_CRIT " _______________________________ \n"
@@ -293,6 +295,7 @@ KERN_CRIT " || ||\n");
293 panic("Fatal exception"); 295 panic("Fatal exception");
294 } 296 }
295 297
298 oops_exit();
296 do_exit(SIGSEGV); 299 do_exit(SIGSEGV);
297} 300}
298 301
@@ -494,7 +497,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o
494 panic(msg); 497 panic(msg);
495} 498}
496 499
497void handle_interruption(int code, struct pt_regs *regs) 500void notrace handle_interruption(int code, struct pt_regs *regs)
498{ 501{
499 unsigned long fault_address = 0; 502 unsigned long fault_address = 0;
500 unsigned long fault_space = 0; 503 unsigned long fault_space = 0;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 1a3b6ccd3620..fd2cc4fd2b65 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -54,6 +54,8 @@ SECTIONS
54 TEXT_TEXT 54 TEXT_TEXT
55 SCHED_TEXT 55 SCHED_TEXT
56 LOCK_TEXT 56 LOCK_TEXT
57 KPROBES_TEXT
58 IRQENTRY_TEXT
57 *(.text.do_softirq) 59 *(.text.do_softirq)
58 *(.text.sys_exit) 60 *(.text.sys_exit)
59 *(.text.do_sigaltstack) 61 *(.text.do_sigaltstack)
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 9d704d9831d1..4356ceb1e366 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -456,6 +456,13 @@ void __init mem_init(void)
456{ 456{
457 int codesize, reservedpages, datasize, initsize; 457 int codesize, reservedpages, datasize, initsize;
458 458
459 /* Do sanity checks on page table constants */
460 BUILD_BUG_ON(PTE_ENTRY_SIZE != sizeof(pte_t));
461 BUILD_BUG_ON(PMD_ENTRY_SIZE != sizeof(pmd_t));
462 BUILD_BUG_ON(PGD_ENTRY_SIZE != sizeof(pgd_t));
463 BUILD_BUG_ON(PAGE_SHIFT + BITS_PER_PTE + BITS_PER_PMD + BITS_PER_PGD
464 > BITS_PER_LONG);
465
459 high_memory = __va((max_pfn << PAGE_SHIFT)); 466 high_memory = __va((max_pfn << PAGE_SHIFT));
460 467
461#ifndef CONFIG_DISCONTIGMEM 468#ifndef CONFIG_DISCONTIGMEM
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index 67f1812698d2..cdb6fd814de8 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -50,6 +50,9 @@ enum ps3_param_av_multi_out {
50 50
51enum ps3_param_av_multi_out ps3_os_area_get_av_multi_out(void); 51enum ps3_param_av_multi_out ps3_os_area_get_av_multi_out(void);
52 52
53extern u64 ps3_os_area_get_rtc_diff(void);
54extern void ps3_os_area_set_rtc_diff(u64 rtc_diff);
55
53/* dma routines */ 56/* dma routines */
54 57
55enum ps3_dma_page_size { 58enum ps3_dma_page_size {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index c9564031a2a9..926ea864e34f 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1127,3 +1127,19 @@ void div128_by_32(u64 dividend_high, u64 dividend_low,
1127 dr->result_low = ((u64)y << 32) + z; 1127 dr->result_low = ((u64)y << 32) + z;
1128 1128
1129} 1129}
1130
1131static int __init rtc_init(void)
1132{
1133 struct platform_device *pdev;
1134
1135 if (!ppc_md.get_rtc_time)
1136 return -ENODEV;
1137
1138 pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
1139 if (IS_ERR(pdev))
1140 return PTR_ERR(pdev);
1141
1142 return 0;
1143}
1144
1145module_init(rtc_init);
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index e1c83c23b435..86e392b1b049 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -808,6 +808,7 @@ u64 ps3_os_area_get_rtc_diff(void)
808{ 808{
809 return saved_params.rtc_diff; 809 return saved_params.rtc_diff;
810} 810}
811EXPORT_SYMBOL(ps3_os_area_get_rtc_diff);
811 812
812/** 813/**
813 * ps3_os_area_set_rtc_diff - Set the rtc diff value. 814 * ps3_os_area_set_rtc_diff - Set the rtc diff value.
@@ -823,6 +824,7 @@ void ps3_os_area_set_rtc_diff(u64 rtc_diff)
823 os_area_queue_work(); 824 os_area_queue_work();
824 } 825 }
825} 826}
827EXPORT_SYMBOL(ps3_os_area_set_rtc_diff);
826 828
827/** 829/**
828 * ps3_os_area_get_av_multi_out - Returns the default video mode. 830 * ps3_os_area_get_av_multi_out - Returns the default video mode.
diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h
index 235c13ebacd9..136aa0637d9c 100644
--- a/arch/powerpc/platforms/ps3/platform.h
+++ b/arch/powerpc/platforms/ps3/platform.h
@@ -64,8 +64,6 @@ int ps3_set_rtc_time(struct rtc_time *time);
64 64
65void __init ps3_os_area_save_params(void); 65void __init ps3_os_area_save_params(void);
66void __init ps3_os_area_init(void); 66void __init ps3_os_area_init(void);
67u64 ps3_os_area_get_rtc_diff(void);
68void ps3_os_area_set_rtc_diff(u64 rtc_diff);
69 67
70/* spu */ 68/* spu */
71 69
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 3331ccbb8d38..66181821322a 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -270,8 +270,6 @@ define_machine(ps3) {
270 .init_IRQ = ps3_init_IRQ, 270 .init_IRQ = ps3_init_IRQ,
271 .panic = ps3_panic, 271 .panic = ps3_panic,
272 .get_boot_time = ps3_get_boot_time, 272 .get_boot_time = ps3_get_boot_time,
273 .set_rtc_time = ps3_set_rtc_time,
274 .get_rtc_time = ps3_get_rtc_time,
275 .set_dabr = ps3_set_dabr, 273 .set_dabr = ps3_set_dabr,
276 .calibrate_decr = ps3_calibrate_decr, 274 .calibrate_decr = ps3_calibrate_decr,
277 .progress = ps3_progress, 275 .progress = ps3_progress,
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
index d0daf7d6d3b2..b178a1e66c91 100644
--- a/arch/powerpc/platforms/ps3/time.c
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/platform_device.h>
22 23
23#include <asm/rtc.h> 24#include <asm/rtc.h>
24#include <asm/lv1call.h> 25#include <asm/lv1call.h>
@@ -74,23 +75,20 @@ static u64 read_rtc(void)
74 return rtc_val; 75 return rtc_val;
75} 76}
76 77
77int ps3_set_rtc_time(struct rtc_time *tm) 78unsigned long __init ps3_get_boot_time(void)
78{ 79{
79 u64 now = mktime(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, 80 return read_rtc() + ps3_os_area_get_rtc_diff();
80 tm->tm_hour, tm->tm_min, tm->tm_sec);
81
82 ps3_os_area_set_rtc_diff(now - read_rtc());
83 return 0;
84} 81}
85 82
86void ps3_get_rtc_time(struct rtc_time *tm) 83static int __init ps3_rtc_init(void)
87{ 84{
88 to_tm(read_rtc() + ps3_os_area_get_rtc_diff(), tm); 85 struct platform_device *pdev;
89 tm->tm_year -= 1900;
90 tm->tm_mon -= 1;
91}
92 86
93unsigned long __init ps3_get_boot_time(void) 87 pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0);
94{ 88 if (IS_ERR(pdev))
95 return read_rtc() + ps3_os_area_get_rtc_diff(); 89 return PTR_ERR(pdev);
90
91 return 0;
96} 92}
93
94module_init(ps3_rtc_init);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index d42f826a8ab9..f934225fd8ef 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -22,6 +22,7 @@
22#include "linux/kernel.h" 22#include "linux/kernel.h"
23#include "linux/module.h" 23#include "linux/module.h"
24#include "linux/blkdev.h" 24#include "linux/blkdev.h"
25#include "linux/ata.h"
25#include "linux/hdreg.h" 26#include "linux/hdreg.h"
26#include "linux/init.h" 27#include "linux/init.h"
27#include "linux/cdrom.h" 28#include "linux/cdrom.h"
@@ -1308,16 +1309,15 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1308 unsigned int cmd, unsigned long arg) 1309 unsigned int cmd, unsigned long arg)
1309{ 1310{
1310 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1311 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1311 struct hd_driveid ubd_id = { 1312 u16 ubd_id[ATA_ID_WORDS];
1312 .cyls = 0,
1313 .heads = 128,
1314 .sectors = 32,
1315 };
1316 1313
1317 switch (cmd) { 1314 switch (cmd) {
1318 struct cdrom_volctrl volume; 1315 struct cdrom_volctrl volume;
1319 case HDIO_GET_IDENTITY: 1316 case HDIO_GET_IDENTITY:
1320 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512); 1317 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1318 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1319 ubd_id[ATA_ID_HEADS] = 128;
1320 ubd_id[ATA_ID_SECTORS] = 32;
1321 if(copy_to_user((char __user *) arg, (char *) &ubd_id, 1321 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1322 sizeof(ubd_id))) 1322 sizeof(ubd_id)))
1323 return -EFAULT; 1323 return -EFAULT;
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 5bc5d1688c1c..8126e8d1a2a4 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -40,7 +40,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
40 40
41 debug_kmap_atomic(type); 41 debug_kmap_atomic(type);
42 42
43 debug_kmap_atomic(type);
44 idx = type + KM_TYPE_NR*smp_processor_id(); 43 idx = type + KM_TYPE_NR*smp_processor_id();
45 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
46 BUG_ON(!pte_none(*(kmap_pte-idx))); 45 BUG_ON(!pte_none(*(kmap_pte-idx)));
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index bff0c9032f8c..e331f77348a7 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -39,6 +39,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
39 39
40 pagefault_disable(); 40 pagefault_disable();
41 41
42 debug_kmap_atomic(type);
42 idx = type + KM_TYPE_NR * smp_processor_id(); 43 idx = type + KM_TYPE_NR * smp_processor_id();
43 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
44 set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); 45 set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
@@ -72,7 +73,6 @@ iounmap_atomic(void *kvaddr, enum km_type type)
72 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; 73 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
73 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); 74 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
74 75
75 debug_kmap_atomic(type);
76 /* 76 /*
77 * Force other mappings to Oops if they'll try to access this pte 77 * Force other mappings to Oops if they'll try to access this pte
78 * without first remap it. Keeping stale mappings around is a bad idea 78 * without first remap it. Keeping stale mappings around is a bad idea
diff --git a/crypto/shash.c b/crypto/shash.c
index 7a659733f94a..2ccc8b0076ce 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -77,6 +77,9 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
77 u8 buf[shash_align_buffer_size(unaligned_len, alignmask)] 77 u8 buf[shash_align_buffer_size(unaligned_len, alignmask)]
78 __attribute__ ((aligned)); 78 __attribute__ ((aligned));
79 79
80 if (unaligned_len > len)
81 unaligned_len = len;
82
80 memcpy(buf, data, unaligned_len); 83 memcpy(buf, data, unaligned_len);
81 84
82 return shash->update(desc, buf, unaligned_len) ?: 85 return shash->update(desc, buf, unaligned_len) ?:
diff --git a/crypto/xor.c b/crypto/xor.c
index b2e6db075e49..996b6ee57d9e 100644
--- a/crypto/xor.c
+++ b/crypto/xor.c
@@ -18,8 +18,8 @@
18 18
19#define BH_TRACE 0 19#define BH_TRACE 0
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/raid/md.h>
22#include <linux/raid/xor.h> 21#include <linux/raid/xor.h>
22#include <linux/jiffies.h>
23#include <asm/xor.h> 23#include <asm/xor.h>
24 24
25/* The xor routines to use. */ 25/* The xor routines to use. */
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 45c5a33daf49..31693bc24444 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -4,6 +4,7 @@
4 * Filesystem request handling methods 4 * Filesystem request handling methods
5 */ 5 */
6 6
7#include <linux/ata.h>
7#include <linux/hdreg.h> 8#include <linux/hdreg.h>
8#include <linux/blkdev.h> 9#include <linux/blkdev.h>
9#include <linux/skbuff.h> 10#include <linux/skbuff.h>
@@ -267,7 +268,7 @@ aoecmd_ata_rw(struct aoedev *d)
267 writebit = 0; 268 writebit = 0;
268 } 269 }
269 270
270 ah->cmdstat = WIN_READ | writebit | extbit; 271 ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
271 272
272 /* mark all tracking fields and load out */ 273 /* mark all tracking fields and load out */
273 buf->nframesout += 1; 274 buf->nframesout += 1;
@@ -362,10 +363,10 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
362 switch (ah->cmdstat) { 363 switch (ah->cmdstat) {
363 default: 364 default:
364 break; 365 break;
365 case WIN_READ: 366 case ATA_CMD_PIO_READ:
366 case WIN_READ_EXT: 367 case ATA_CMD_PIO_READ_EXT:
367 case WIN_WRITE: 368 case ATA_CMD_PIO_WRITE:
368 case WIN_WRITE_EXT: 369 case ATA_CMD_PIO_WRITE_EXT:
369 put_lba(ah, f->lba); 370 put_lba(ah, f->lba);
370 371
371 n = f->bcnt; 372 n = f->bcnt;
@@ -812,8 +813,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
812 d->htgt = NULL; 813 d->htgt = NULL;
813 n = ahout->scnt << 9; 814 n = ahout->scnt << 9;
814 switch (ahout->cmdstat) { 815 switch (ahout->cmdstat) {
815 case WIN_READ: 816 case ATA_CMD_PIO_READ:
816 case WIN_READ_EXT: 817 case ATA_CMD_PIO_READ_EXT:
817 if (skb->len - sizeof *hin - sizeof *ahin < n) { 818 if (skb->len - sizeof *hin - sizeof *ahin < n) {
818 printk(KERN_ERR 819 printk(KERN_ERR
819 "aoe: %s. skb->len=%d need=%ld\n", 820 "aoe: %s. skb->len=%d need=%ld\n",
@@ -823,8 +824,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
823 return; 824 return;
824 } 825 }
825 memcpy(f->bufaddr, ahin+1, n); 826 memcpy(f->bufaddr, ahin+1, n);
826 case WIN_WRITE: 827 case ATA_CMD_PIO_WRITE:
827 case WIN_WRITE_EXT: 828 case ATA_CMD_PIO_WRITE_EXT:
828 ifp = getif(t, skb->dev); 829 ifp = getif(t, skb->dev);
829 if (ifp) { 830 if (ifp) {
830 ifp->lost = 0; 831 ifp->lost = 0;
@@ -838,7 +839,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
838 goto xmit; 839 goto xmit;
839 } 840 }
840 break; 841 break;
841 case WIN_IDENTIFY: 842 case ATA_CMD_ID_ATA:
842 if (skb->len - sizeof *hin - sizeof *ahin < 512) { 843 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
843 printk(KERN_INFO 844 printk(KERN_INFO
844 "aoe: runt data size in ataid. skb->len=%d\n", 845 "aoe: runt data size in ataid. skb->len=%d\n",
@@ -914,7 +915,7 @@ aoecmd_ata_id(struct aoedev *d)
914 915
915 /* set up ata header */ 916 /* set up ata header */
916 ah->scnt = 1; 917 ah->scnt = 1;
917 ah->cmdstat = WIN_IDENTIFY; 918 ah->cmdstat = ATA_CMD_ID_ATA;
918 ah->lba3 = 0xa0; 919 ah->lba3 = 0xa0;
919 920
920 skb->dev = t->ifp->nd; 921 skb->dev = t->ifp->nd;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 482c0c4b964f..3c11f062a18c 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -42,6 +42,8 @@
42#include <linux/ata.h> 42#include <linux/ata.h>
43#include <linux/hdreg.h> 43#include <linux/hdreg.h>
44 44
45#define HD_IRQ 14
46
45#define REALLY_SLOW_IO 47#define REALLY_SLOW_IO
46#include <asm/system.h> 48#include <asm/system.h>
47#include <asm/io.h> 49#include <asm/io.h>
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 119be3442f28..6cccdc3f5220 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -89,6 +89,7 @@
89#include <linux/delay.h> 89#include <linux/delay.h>
90#include <linux/slab.h> 90#include <linux/slab.h>
91#include <linux/blkdev.h> 91#include <linux/blkdev.h>
92#include <linux/ata.h>
92#include <linux/hdreg.h> 93#include <linux/hdreg.h>
93#include <linux/platform_device.h> 94#include <linux/platform_device.h>
94#if defined(CONFIG_OF) 95#if defined(CONFIG_OF)
@@ -208,7 +209,7 @@ struct ace_device {
208 struct gendisk *gd; 209 struct gendisk *gd;
209 210
210 /* Inserted CF card parameters */ 211 /* Inserted CF card parameters */
211 struct hd_driveid cf_id; 212 u16 cf_id[ATA_ID_WORDS];
212}; 213};
213 214
214static int ace_major; 215static int ace_major;
@@ -402,21 +403,14 @@ static void ace_dump_regs(struct ace_device *ace)
402 ace_in32(ace, ACE_CFGLBA), ace_in(ace, ACE_FATSTAT)); 403 ace_in32(ace, ACE_CFGLBA), ace_in(ace, ACE_FATSTAT));
403} 404}
404 405
405void ace_fix_driveid(struct hd_driveid *id) 406void ace_fix_driveid(u16 *id)
406{ 407{
407#if defined(__BIG_ENDIAN) 408#if defined(__BIG_ENDIAN)
408 u16 *buf = (void *)id;
409 int i; 409 int i;
410 410
411 /* All half words have wrong byte order; swap the bytes */ 411 /* All half words have wrong byte order; swap the bytes */
412 for (i = 0; i < sizeof(struct hd_driveid); i += 2, buf++) 412 for (i = 0; i < ATA_ID_WORDS; i++, id++)
413 *buf = le16_to_cpu(*buf); 413 *id = le16_to_cpu(*id);
414
415 /* Some of the data values are 32bit; swap the half words */
416 id->lba_capacity = ((id->lba_capacity >> 16) & 0x0000FFFF) |
417 ((id->lba_capacity << 16) & 0xFFFF0000);
418 id->spg = ((id->spg >> 16) & 0x0000FFFF) |
419 ((id->spg << 16) & 0xFFFF0000);
420#endif 414#endif
421} 415}
422 416
@@ -614,7 +608,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
614 break; 608 break;
615 609
616 case ACE_FSM_STATE_IDENTIFY_COMPLETE: 610 case ACE_FSM_STATE_IDENTIFY_COMPLETE:
617 ace_fix_driveid(&ace->cf_id); 611 ace_fix_driveid(&ace->cf_id[0]);
618 ace_dump_mem(&ace->cf_id, 512); /* Debug: Dump out disk ID */ 612 ace_dump_mem(&ace->cf_id, 512); /* Debug: Dump out disk ID */
619 613
620 if (ace->data_result) { 614 if (ace->data_result) {
@@ -627,9 +621,10 @@ static void ace_fsm_dostate(struct ace_device *ace)
627 ace->media_change = 0; 621 ace->media_change = 0;
628 622
629 /* Record disk parameters */ 623 /* Record disk parameters */
630 set_capacity(ace->gd, ace->cf_id.lba_capacity); 624 set_capacity(ace->gd,
625 ata_id_u32(&ace->cf_id, ATA_ID_LBA_CAPACITY));
631 dev_info(ace->dev, "capacity: %i sectors\n", 626 dev_info(ace->dev, "capacity: %i sectors\n",
632 ace->cf_id.lba_capacity); 627 ata_id_u32(&ace->cf_id, ATA_ID_LBA_CAPACITY));
633 } 628 }
634 629
635 /* We're done, drop to IDLE state and notify waiters */ 630 /* We're done, drop to IDLE state and notify waiters */
@@ -928,12 +923,13 @@ static int ace_release(struct gendisk *disk, fmode_t mode)
928static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo) 923static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
929{ 924{
930 struct ace_device *ace = bdev->bd_disk->private_data; 925 struct ace_device *ace = bdev->bd_disk->private_data;
926 u16 *cf_id = &ace->cf_id[0];
931 927
932 dev_dbg(ace->dev, "ace_getgeo()\n"); 928 dev_dbg(ace->dev, "ace_getgeo()\n");
933 929
934 geo->heads = ace->cf_id.heads; 930 geo->heads = cf_id[ATA_ID_HEADS];
935 geo->sectors = ace->cf_id.sectors; 931 geo->sectors = cf_id[ATA_ID_SECTORS];
936 geo->cylinders = ace->cf_id.cyls; 932 geo->cylinders = cf_id[ATA_ID_CYLS];
937 933
938 return 0; 934 return 0;
939} 935}
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 10ad41be5897..dcd352ad0e7f 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -90,10 +90,30 @@ static struct hwrng timeriomem_rng_ops = {
90 90
91static int __init timeriomem_rng_probe(struct platform_device *pdev) 91static int __init timeriomem_rng_probe(struct platform_device *pdev)
92{ 92{
93 struct resource *res, *mem;
93 int ret; 94 int ret;
94 95
96 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
97
98 if (!res)
99 return -ENOENT;
100
101 mem = request_mem_region(res->start, res->end - res->start + 1,
102 pdev->name);
103 if (mem == NULL)
104 return -EBUSY;
105
106 dev_set_drvdata(&pdev->dev, mem);
107
95 timeriomem_rng_data = pdev->dev.platform_data; 108 timeriomem_rng_data = pdev->dev.platform_data;
96 109
110 timeriomem_rng_data->address = ioremap(res->start,
111 res->end - res->start + 1);
112 if (!timeriomem_rng_data->address) {
113 ret = -ENOMEM;
114 goto err_ioremap;
115 }
116
97 if (timeriomem_rng_data->period != 0 117 if (timeriomem_rng_data->period != 0
98 && usecs_to_jiffies(timeriomem_rng_data->period) > 0) { 118 && usecs_to_jiffies(timeriomem_rng_data->period) > 0) {
99 timeriomem_rng_timer.expires = jiffies; 119 timeriomem_rng_timer.expires = jiffies;
@@ -104,23 +124,34 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
104 timeriomem_rng_data->present = 1; 124 timeriomem_rng_data->present = 1;
105 125
106 ret = hwrng_register(&timeriomem_rng_ops); 126 ret = hwrng_register(&timeriomem_rng_ops);
107 if (ret) { 127 if (ret)
108 dev_err(&pdev->dev, "problem registering\n"); 128 goto err_register;
109 return ret;
110 }
111 129
112 dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n", 130 dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
113 timeriomem_rng_data->address, 131 timeriomem_rng_data->address,
114 timeriomem_rng_data->period); 132 timeriomem_rng_data->period);
115 133
116 return 0; 134 return 0;
135
136err_register:
137 dev_err(&pdev->dev, "problem registering\n");
138 iounmap(timeriomem_rng_data->address);
139err_ioremap:
140 release_resource(mem);
141
142 return ret;
117} 143}
118 144
119static int __devexit timeriomem_rng_remove(struct platform_device *pdev) 145static int __devexit timeriomem_rng_remove(struct platform_device *pdev)
120{ 146{
147 struct resource *mem = dev_get_drvdata(&pdev->dev);
148
121 del_timer_sync(&timeriomem_rng_timer); 149 del_timer_sync(&timeriomem_rng_timer);
122 hwrng_unregister(&timeriomem_rng_ops); 150 hwrng_unregister(&timeriomem_rng_ops);
123 151
152 iounmap(timeriomem_rng_data->address);
153 release_resource(mem);
154
124 return 0; 155 return 0;
125} 156}
126 157
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index d9e751be8c5f..af9761ccf9f1 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -101,6 +101,7 @@ struct buffer_desc {
101 u32 phys_addr; 101 u32 phys_addr;
102 u32 __reserved[4]; 102 u32 __reserved[4];
103 struct buffer_desc *next; 103 struct buffer_desc *next;
104 enum dma_data_direction dir;
104}; 105};
105 106
106struct crypt_ctl { 107struct crypt_ctl {
@@ -132,14 +133,10 @@ struct crypt_ctl {
132struct ablk_ctx { 133struct ablk_ctx {
133 struct buffer_desc *src; 134 struct buffer_desc *src;
134 struct buffer_desc *dst; 135 struct buffer_desc *dst;
135 unsigned src_nents;
136 unsigned dst_nents;
137}; 136};
138 137
139struct aead_ctx { 138struct aead_ctx {
140 struct buffer_desc *buffer; 139 struct buffer_desc *buffer;
141 unsigned short assoc_nents;
142 unsigned short src_nents;
143 struct scatterlist ivlist; 140 struct scatterlist ivlist;
144 /* used when the hmac is not on one sg entry */ 141 /* used when the hmac is not on one sg entry */
145 u8 *hmac_virt; 142 u8 *hmac_virt;
@@ -312,7 +309,7 @@ static struct crypt_ctl *get_crypt_desc_emerg(void)
312 } 309 }
313} 310}
314 311
315static void free_buf_chain(struct buffer_desc *buf, u32 phys) 312static void free_buf_chain(struct device *dev, struct buffer_desc *buf,u32 phys)
316{ 313{
317 while (buf) { 314 while (buf) {
318 struct buffer_desc *buf1; 315 struct buffer_desc *buf1;
@@ -320,6 +317,7 @@ static void free_buf_chain(struct buffer_desc *buf, u32 phys)
320 317
321 buf1 = buf->next; 318 buf1 = buf->next;
322 phys1 = buf->phys_next; 319 phys1 = buf->phys_next;
320 dma_unmap_single(dev, buf->phys_next, buf->buf_len, buf->dir);
323 dma_pool_free(buffer_pool, buf, phys); 321 dma_pool_free(buffer_pool, buf, phys);
324 buf = buf1; 322 buf = buf1;
325 phys = phys1; 323 phys = phys1;
@@ -348,7 +346,6 @@ static void one_packet(dma_addr_t phys)
348 struct crypt_ctl *crypt; 346 struct crypt_ctl *crypt;
349 struct ixp_ctx *ctx; 347 struct ixp_ctx *ctx;
350 int failed; 348 int failed;
351 enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
352 349
353 failed = phys & 0x1 ? -EBADMSG : 0; 350 failed = phys & 0x1 ? -EBADMSG : 0;
354 phys &= ~0x3; 351 phys &= ~0x3;
@@ -358,13 +355,8 @@ static void one_packet(dma_addr_t phys)
358 case CTL_FLAG_PERFORM_AEAD: { 355 case CTL_FLAG_PERFORM_AEAD: {
359 struct aead_request *req = crypt->data.aead_req; 356 struct aead_request *req = crypt->data.aead_req;
360 struct aead_ctx *req_ctx = aead_request_ctx(req); 357 struct aead_ctx *req_ctx = aead_request_ctx(req);
361 dma_unmap_sg(dev, req->assoc, req_ctx->assoc_nents,
362 DMA_TO_DEVICE);
363 dma_unmap_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL);
364 dma_unmap_sg(dev, req->src, req_ctx->src_nents,
365 DMA_BIDIRECTIONAL);
366 358
367 free_buf_chain(req_ctx->buffer, crypt->src_buf); 359 free_buf_chain(dev, req_ctx->buffer, crypt->src_buf);
368 if (req_ctx->hmac_virt) { 360 if (req_ctx->hmac_virt) {
369 finish_scattered_hmac(crypt); 361 finish_scattered_hmac(crypt);
370 } 362 }
@@ -374,16 +366,11 @@ static void one_packet(dma_addr_t phys)
374 case CTL_FLAG_PERFORM_ABLK: { 366 case CTL_FLAG_PERFORM_ABLK: {
375 struct ablkcipher_request *req = crypt->data.ablk_req; 367 struct ablkcipher_request *req = crypt->data.ablk_req;
376 struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req); 368 struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req);
377 int nents; 369
378 if (req_ctx->dst) { 370 if (req_ctx->dst) {
379 nents = req_ctx->dst_nents; 371 free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
380 dma_unmap_sg(dev, req->dst, nents, DMA_FROM_DEVICE);
381 free_buf_chain(req_ctx->dst, crypt->dst_buf);
382 src_direction = DMA_TO_DEVICE;
383 } 372 }
384 nents = req_ctx->src_nents; 373 free_buf_chain(dev, req_ctx->src, crypt->src_buf);
385 dma_unmap_sg(dev, req->src, nents, src_direction);
386 free_buf_chain(req_ctx->src, crypt->src_buf);
387 req->base.complete(&req->base, failed); 374 req->base.complete(&req->base, failed);
388 break; 375 break;
389 } 376 }
@@ -750,56 +737,35 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
750 return 0; 737 return 0;
751} 738}
752 739
753static int count_sg(struct scatterlist *sg, int nbytes) 740static struct buffer_desc *chainup_buffers(struct device *dev,
741 struct scatterlist *sg, unsigned nbytes,
742 struct buffer_desc *buf, gfp_t flags,
743 enum dma_data_direction dir)
754{ 744{
755 int i; 745 for (;nbytes > 0; sg = scatterwalk_sg_next(sg)) {
756 for (i = 0; nbytes > 0; i++, sg = sg_next(sg)) 746 unsigned len = min(nbytes, sg->length);
757 nbytes -= sg->length;
758 return i;
759}
760
761static struct buffer_desc *chainup_buffers(struct scatterlist *sg,
762 unsigned nbytes, struct buffer_desc *buf, gfp_t flags)
763{
764 int nents = 0;
765
766 while (nbytes > 0) {
767 struct buffer_desc *next_buf; 747 struct buffer_desc *next_buf;
768 u32 next_buf_phys; 748 u32 next_buf_phys;
769 unsigned len = min(nbytes, sg_dma_len(sg)); 749 void *ptr;
770 750
771 nents++;
772 nbytes -= len; 751 nbytes -= len;
773 if (!buf->phys_addr) { 752 ptr = page_address(sg_page(sg)) + sg->offset;
774 buf->phys_addr = sg_dma_address(sg);
775 buf->buf_len = len;
776 buf->next = NULL;
777 buf->phys_next = 0;
778 goto next;
779 }
780 /* Two consecutive chunks on one page may be handled by the old
781 * buffer descriptor, increased by the length of the new one
782 */
783 if (sg_dma_address(sg) == buf->phys_addr + buf->buf_len) {
784 buf->buf_len += len;
785 goto next;
786 }
787 next_buf = dma_pool_alloc(buffer_pool, flags, &next_buf_phys); 753 next_buf = dma_pool_alloc(buffer_pool, flags, &next_buf_phys);
788 if (!next_buf) 754 if (!next_buf) {
789 return NULL; 755 buf = NULL;
756 break;
757 }
758 sg_dma_address(sg) = dma_map_single(dev, ptr, len, dir);
790 buf->next = next_buf; 759 buf->next = next_buf;
791 buf->phys_next = next_buf_phys; 760 buf->phys_next = next_buf_phys;
792
793 buf = next_buf; 761 buf = next_buf;
794 buf->next = NULL; 762
795 buf->phys_next = 0;
796 buf->phys_addr = sg_dma_address(sg); 763 buf->phys_addr = sg_dma_address(sg);
797 buf->buf_len = len; 764 buf->buf_len = len;
798next: 765 buf->dir = dir;
799 if (nbytes > 0) {
800 sg = sg_next(sg);
801 }
802 } 766 }
767 buf->next = NULL;
768 buf->phys_next = 0;
803 return buf; 769 return buf;
804} 770}
805 771
@@ -860,12 +826,12 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
860 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 826 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
861 struct ixp_ctx *ctx = crypto_ablkcipher_ctx(tfm); 827 struct ixp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
862 unsigned ivsize = crypto_ablkcipher_ivsize(tfm); 828 unsigned ivsize = crypto_ablkcipher_ivsize(tfm);
863 int ret = -ENOMEM;
864 struct ix_sa_dir *dir; 829 struct ix_sa_dir *dir;
865 struct crypt_ctl *crypt; 830 struct crypt_ctl *crypt;
866 unsigned int nbytes = req->nbytes, nents; 831 unsigned int nbytes = req->nbytes;
867 enum dma_data_direction src_direction = DMA_BIDIRECTIONAL; 832 enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
868 struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req); 833 struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req);
834 struct buffer_desc src_hook;
869 gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? 835 gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
870 GFP_KERNEL : GFP_ATOMIC; 836 GFP_KERNEL : GFP_ATOMIC;
871 837
@@ -878,7 +844,7 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
878 844
879 crypt = get_crypt_desc(); 845 crypt = get_crypt_desc();
880 if (!crypt) 846 if (!crypt)
881 return ret; 847 return -ENOMEM;
882 848
883 crypt->data.ablk_req = req; 849 crypt->data.ablk_req = req;
884 crypt->crypto_ctx = dir->npe_ctx_phys; 850 crypt->crypto_ctx = dir->npe_ctx_phys;
@@ -891,53 +857,41 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
891 BUG_ON(ivsize && !req->info); 857 BUG_ON(ivsize && !req->info);
892 memcpy(crypt->iv, req->info, ivsize); 858 memcpy(crypt->iv, req->info, ivsize);
893 if (req->src != req->dst) { 859 if (req->src != req->dst) {
860 struct buffer_desc dst_hook;
894 crypt->mode |= NPE_OP_NOT_IN_PLACE; 861 crypt->mode |= NPE_OP_NOT_IN_PLACE;
895 nents = count_sg(req->dst, nbytes);
896 /* This was never tested by Intel 862 /* This was never tested by Intel
897 * for more than one dst buffer, I think. */ 863 * for more than one dst buffer, I think. */
898 BUG_ON(nents != 1); 864 BUG_ON(req->dst->length < nbytes);
899 req_ctx->dst_nents = nents; 865 req_ctx->dst = NULL;
900 dma_map_sg(dev, req->dst, nents, DMA_FROM_DEVICE); 866 if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook,
901 req_ctx->dst = dma_pool_alloc(buffer_pool, flags,&crypt->dst_buf); 867 flags, DMA_FROM_DEVICE))
902 if (!req_ctx->dst)
903 goto unmap_sg_dest;
904 req_ctx->dst->phys_addr = 0;
905 if (!chainup_buffers(req->dst, nbytes, req_ctx->dst, flags))
906 goto free_buf_dest; 868 goto free_buf_dest;
907 src_direction = DMA_TO_DEVICE; 869 src_direction = DMA_TO_DEVICE;
870 req_ctx->dst = dst_hook.next;
871 crypt->dst_buf = dst_hook.phys_next;
908 } else { 872 } else {
909 req_ctx->dst = NULL; 873 req_ctx->dst = NULL;
910 req_ctx->dst_nents = 0;
911 } 874 }
912 nents = count_sg(req->src, nbytes); 875 req_ctx->src = NULL;
913 req_ctx->src_nents = nents; 876 if (!chainup_buffers(dev, req->src, nbytes, &src_hook,
914 dma_map_sg(dev, req->src, nents, src_direction); 877 flags, src_direction))
915
916 req_ctx->src = dma_pool_alloc(buffer_pool, flags, &crypt->src_buf);
917 if (!req_ctx->src)
918 goto unmap_sg_src;
919 req_ctx->src->phys_addr = 0;
920 if (!chainup_buffers(req->src, nbytes, req_ctx->src, flags))
921 goto free_buf_src; 878 goto free_buf_src;
922 879
880 req_ctx->src = src_hook.next;
881 crypt->src_buf = src_hook.phys_next;
923 crypt->ctl_flags |= CTL_FLAG_PERFORM_ABLK; 882 crypt->ctl_flags |= CTL_FLAG_PERFORM_ABLK;
924 qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt)); 883 qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
925 BUG_ON(qmgr_stat_overflow(SEND_QID)); 884 BUG_ON(qmgr_stat_overflow(SEND_QID));
926 return -EINPROGRESS; 885 return -EINPROGRESS;
927 886
928free_buf_src: 887free_buf_src:
929 free_buf_chain(req_ctx->src, crypt->src_buf); 888 free_buf_chain(dev, req_ctx->src, crypt->src_buf);
930unmap_sg_src:
931 dma_unmap_sg(dev, req->src, req_ctx->src_nents, src_direction);
932free_buf_dest: 889free_buf_dest:
933 if (req->src != req->dst) { 890 if (req->src != req->dst) {
934 free_buf_chain(req_ctx->dst, crypt->dst_buf); 891 free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
935unmap_sg_dest:
936 dma_unmap_sg(dev, req->src, req_ctx->dst_nents,
937 DMA_FROM_DEVICE);
938 } 892 }
939 crypt->ctl_flags = CTL_FLAG_UNUSED; 893 crypt->ctl_flags = CTL_FLAG_UNUSED;
940 return ret; 894 return -ENOMEM;
941} 895}
942 896
943static int ablk_encrypt(struct ablkcipher_request *req) 897static int ablk_encrypt(struct ablkcipher_request *req)
@@ -985,7 +939,7 @@ static int hmac_inconsistent(struct scatterlist *sg, unsigned start,
985 break; 939 break;
986 940
987 offset += sg->length; 941 offset += sg->length;
988 sg = sg_next(sg); 942 sg = scatterwalk_sg_next(sg);
989 } 943 }
990 return (start + nbytes > offset + sg->length); 944 return (start + nbytes > offset + sg->length);
991} 945}
@@ -997,11 +951,10 @@ static int aead_perform(struct aead_request *req, int encrypt,
997 struct ixp_ctx *ctx = crypto_aead_ctx(tfm); 951 struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
998 unsigned ivsize = crypto_aead_ivsize(tfm); 952 unsigned ivsize = crypto_aead_ivsize(tfm);
999 unsigned authsize = crypto_aead_authsize(tfm); 953 unsigned authsize = crypto_aead_authsize(tfm);
1000 int ret = -ENOMEM;
1001 struct ix_sa_dir *dir; 954 struct ix_sa_dir *dir;
1002 struct crypt_ctl *crypt; 955 struct crypt_ctl *crypt;
1003 unsigned int cryptlen, nents; 956 unsigned int cryptlen;
1004 struct buffer_desc *buf; 957 struct buffer_desc *buf, src_hook;
1005 struct aead_ctx *req_ctx = aead_request_ctx(req); 958 struct aead_ctx *req_ctx = aead_request_ctx(req);
1006 gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? 959 gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1007 GFP_KERNEL : GFP_ATOMIC; 960 GFP_KERNEL : GFP_ATOMIC;
@@ -1022,7 +975,7 @@ static int aead_perform(struct aead_request *req, int encrypt,
1022 } 975 }
1023 crypt = get_crypt_desc(); 976 crypt = get_crypt_desc();
1024 if (!crypt) 977 if (!crypt)
1025 return ret; 978 return -ENOMEM;
1026 979
1027 crypt->data.aead_req = req; 980 crypt->data.aead_req = req;
1028 crypt->crypto_ctx = dir->npe_ctx_phys; 981 crypt->crypto_ctx = dir->npe_ctx_phys;
@@ -1041,31 +994,27 @@ static int aead_perform(struct aead_request *req, int encrypt,
1041 BUG(); /* -ENOTSUP because of my lazyness */ 994 BUG(); /* -ENOTSUP because of my lazyness */
1042 } 995 }
1043 996
1044 req_ctx->buffer = dma_pool_alloc(buffer_pool, flags, &crypt->src_buf);
1045 if (!req_ctx->buffer)
1046 goto out;
1047 req_ctx->buffer->phys_addr = 0;
1048 /* ASSOC data */ 997 /* ASSOC data */
1049 nents = count_sg(req->assoc, req->assoclen); 998 buf = chainup_buffers(dev, req->assoc, req->assoclen, &src_hook,
1050 req_ctx->assoc_nents = nents; 999 flags, DMA_TO_DEVICE);
1051 dma_map_sg(dev, req->assoc, nents, DMA_TO_DEVICE); 1000 req_ctx->buffer = src_hook.next;
1052 buf = chainup_buffers(req->assoc, req->assoclen, req_ctx->buffer,flags); 1001 crypt->src_buf = src_hook.phys_next;
1053 if (!buf) 1002 if (!buf)
1054 goto unmap_sg_assoc; 1003 goto out;
1055 /* IV */ 1004 /* IV */
1056 sg_init_table(&req_ctx->ivlist, 1); 1005 sg_init_table(&req_ctx->ivlist, 1);
1057 sg_set_buf(&req_ctx->ivlist, iv, ivsize); 1006 sg_set_buf(&req_ctx->ivlist, iv, ivsize);
1058 dma_map_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL); 1007 buf = chainup_buffers(dev, &req_ctx->ivlist, ivsize, buf, flags,
1059 buf = chainup_buffers(&req_ctx->ivlist, ivsize, buf, flags); 1008 DMA_BIDIRECTIONAL);
1060 if (!buf) 1009 if (!buf)
1061 goto unmap_sg_iv; 1010 goto free_chain;
1062 if (unlikely(hmac_inconsistent(req->src, cryptlen, authsize))) { 1011 if (unlikely(hmac_inconsistent(req->src, cryptlen, authsize))) {
1063 /* The 12 hmac bytes are scattered, 1012 /* The 12 hmac bytes are scattered,
1064 * we need to copy them into a safe buffer */ 1013 * we need to copy them into a safe buffer */
1065 req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags, 1014 req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags,
1066 &crypt->icv_rev_aes); 1015 &crypt->icv_rev_aes);
1067 if (unlikely(!req_ctx->hmac_virt)) 1016 if (unlikely(!req_ctx->hmac_virt))
1068 goto unmap_sg_iv; 1017 goto free_chain;
1069 if (!encrypt) { 1018 if (!encrypt) {
1070 scatterwalk_map_and_copy(req_ctx->hmac_virt, 1019 scatterwalk_map_and_copy(req_ctx->hmac_virt,
1071 req->src, cryptlen, authsize, 0); 1020 req->src, cryptlen, authsize, 0);
@@ -1075,33 +1024,28 @@ static int aead_perform(struct aead_request *req, int encrypt,
1075 req_ctx->hmac_virt = NULL; 1024 req_ctx->hmac_virt = NULL;
1076 } 1025 }
1077 /* Crypt */ 1026 /* Crypt */
1078 nents = count_sg(req->src, cryptlen + authsize); 1027 buf = chainup_buffers(dev, req->src, cryptlen + authsize, buf, flags,
1079 req_ctx->src_nents = nents; 1028 DMA_BIDIRECTIONAL);
1080 dma_map_sg(dev, req->src, nents, DMA_BIDIRECTIONAL);
1081 buf = chainup_buffers(req->src, cryptlen + authsize, buf, flags);
1082 if (!buf) 1029 if (!buf)
1083 goto unmap_sg_src; 1030 goto free_hmac_virt;
1084 if (!req_ctx->hmac_virt) { 1031 if (!req_ctx->hmac_virt) {
1085 crypt->icv_rev_aes = buf->phys_addr + buf->buf_len - authsize; 1032 crypt->icv_rev_aes = buf->phys_addr + buf->buf_len - authsize;
1086 } 1033 }
1034
1087 crypt->ctl_flags |= CTL_FLAG_PERFORM_AEAD; 1035 crypt->ctl_flags |= CTL_FLAG_PERFORM_AEAD;
1088 qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt)); 1036 qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
1089 BUG_ON(qmgr_stat_overflow(SEND_QID)); 1037 BUG_ON(qmgr_stat_overflow(SEND_QID));
1090 return -EINPROGRESS; 1038 return -EINPROGRESS;
1091unmap_sg_src: 1039free_hmac_virt:
1092 dma_unmap_sg(dev, req->src, req_ctx->src_nents, DMA_BIDIRECTIONAL);
1093 if (req_ctx->hmac_virt) { 1040 if (req_ctx->hmac_virt) {
1094 dma_pool_free(buffer_pool, req_ctx->hmac_virt, 1041 dma_pool_free(buffer_pool, req_ctx->hmac_virt,
1095 crypt->icv_rev_aes); 1042 crypt->icv_rev_aes);
1096 } 1043 }
1097unmap_sg_iv: 1044free_chain:
1098 dma_unmap_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL); 1045 free_buf_chain(dev, req_ctx->buffer, crypt->src_buf);
1099unmap_sg_assoc:
1100 dma_unmap_sg(dev, req->assoc, req_ctx->assoc_nents, DMA_TO_DEVICE);
1101 free_buf_chain(req_ctx->buffer, crypt->src_buf);
1102out: 1046out:
1103 crypt->ctl_flags = CTL_FLAG_UNUSED; 1047 crypt->ctl_flags = CTL_FLAG_UNUSED;
1104 return ret; 1048 return -ENOMEM;
1105} 1049}
1106 1050
1107static int aead_setup(struct crypto_aead *tfm, unsigned int authsize) 1051static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2281b5098e95..36e0675be9f7 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -121,6 +121,7 @@ config MD_RAID10
121config MD_RAID456 121config MD_RAID456
122 tristate "RAID-4/RAID-5/RAID-6 mode" 122 tristate "RAID-4/RAID-5/RAID-6 mode"
123 depends on BLK_DEV_MD 123 depends on BLK_DEV_MD
124 select MD_RAID6_PQ
124 select ASYNC_MEMCPY 125 select ASYNC_MEMCPY
125 select ASYNC_XOR 126 select ASYNC_XOR
126 ---help--- 127 ---help---
@@ -151,34 +152,8 @@ config MD_RAID456
151 152
152 If unsure, say Y. 153 If unsure, say Y.
153 154
154config MD_RAID5_RESHAPE 155config MD_RAID6_PQ
155 bool "Support adding drives to a raid-5 array" 156 tristate
156 depends on MD_RAID456
157 default y
158 ---help---
159 A RAID-5 set can be expanded by adding extra drives. This
160 requires "restriping" the array which means (almost) every
161 block must be written to a different place.
162
163 This option allows such restriping to be done while the array
164 is online.
165
166 You will need mdadm version 2.4.1 or later to use this
167 feature safely. During the early stage of reshape there is
168 a critical section where live data is being over-written. A
169 crash during this time needs extra care for recovery. The
170 newer mdadm takes a copy of the data in the critical section
171 and will restore it, if necessary, after a crash.
172
173 The mdadm usage is e.g.
174 mdadm --grow /dev/md1 --raid-disks=6
175 to grow '/dev/md1' to having 6 disks.
176
177 Note: The array can only be expanded, not contracted.
178 There should be enough spares already present to make the new
179 array workable.
180
181 If unsure, say Y.
182 157
183config MD_MULTIPATH 158config MD_MULTIPATH
184 tristate "Multipath I/O support" 159 tristate "Multipath I/O support"
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 72880b7e28d9..45cc5951d928 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -2,20 +2,21 @@
2# Makefile for the kernel software RAID and LVM drivers. 2# Makefile for the kernel software RAID and LVM drivers.
3# 3#
4 4
5dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ 5dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
6 dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o 6 dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
7dm-multipath-objs := dm-path-selector.o dm-mpath.o 7dm-multipath-y += dm-path-selector.o dm-mpath.o
8dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ 8dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
9 dm-snap-persistent.o 9 dm-snap-persistent.o
10dm-mirror-objs := dm-raid1.o 10dm-mirror-y += dm-raid1.o
11md-mod-objs := md.o bitmap.o 11md-mod-y += md.o bitmap.o
12raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ 12raid456-y += raid5.o
13raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
13 raid6int1.o raid6int2.o raid6int4.o \ 14 raid6int1.o raid6int2.o raid6int4.o \
14 raid6int8.o raid6int16.o raid6int32.o \ 15 raid6int8.o raid6int16.o raid6int32.o \
15 raid6altivec1.o raid6altivec2.o raid6altivec4.o \ 16 raid6altivec1.o raid6altivec2.o raid6altivec4.o \
16 raid6altivec8.o \ 17 raid6altivec8.o \
17 raid6mmx.o raid6sse1.o raid6sse2.o 18 raid6mmx.o raid6sse1.o raid6sse2.o
18hostprogs-y := mktables 19hostprogs-y += mktables
19 20
20# Note: link order is important. All raid personalities 21# Note: link order is important. All raid personalities
21# and must come before md.o, as they each initialise 22# and must come before md.o, as they each initialise
@@ -26,6 +27,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
26obj-$(CONFIG_MD_RAID0) += raid0.o 27obj-$(CONFIG_MD_RAID0) += raid0.o
27obj-$(CONFIG_MD_RAID1) += raid1.o 28obj-$(CONFIG_MD_RAID1) += raid1.o
28obj-$(CONFIG_MD_RAID10) += raid10.o 29obj-$(CONFIG_MD_RAID10) += raid10.o
30obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o
29obj-$(CONFIG_MD_RAID456) += raid456.o 31obj-$(CONFIG_MD_RAID456) += raid456.o
30obj-$(CONFIG_MD_MULTIPATH) += multipath.o 32obj-$(CONFIG_MD_MULTIPATH) += multipath.o
31obj-$(CONFIG_MD_FAULTY) += faulty.o 33obj-$(CONFIG_MD_FAULTY) += faulty.o
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 719943763391..f8a9f7ab2cb8 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -16,6 +16,7 @@
16 * wait if count gets too high, wake when it drops to half. 16 * wait if count gets too high, wake when it drops to half.
17 */ 17 */
18 18
19#include <linux/blkdev.h>
19#include <linux/module.h> 20#include <linux/module.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
21#include <linux/slab.h> 22#include <linux/slab.h>
@@ -26,8 +27,8 @@
26#include <linux/file.h> 27#include <linux/file.h>
27#include <linux/mount.h> 28#include <linux/mount.h>
28#include <linux/buffer_head.h> 29#include <linux/buffer_head.h>
29#include <linux/raid/md.h> 30#include "md.h"
30#include <linux/raid/bitmap.h> 31#include "bitmap.h"
31 32
32/* debug macros */ 33/* debug macros */
33 34
@@ -111,9 +112,10 @@ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int creat
111 unsigned char *mappage; 112 unsigned char *mappage;
112 113
113 if (page >= bitmap->pages) { 114 if (page >= bitmap->pages) {
114 printk(KERN_ALERT 115 /* This can happen if bitmap_start_sync goes beyond
115 "%s: invalid bitmap page request: %lu (> %lu)\n", 116 * End-of-device while looking for a whole page.
116 bmname(bitmap), page, bitmap->pages-1); 117 * It is harmless.
118 */
117 return -EINVAL; 119 return -EINVAL;
118 } 120 }
119 121
@@ -265,7 +267,6 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
265 list_for_each_continue_rcu(pos, &mddev->disks) { 267 list_for_each_continue_rcu(pos, &mddev->disks) {
266 rdev = list_entry(pos, mdk_rdev_t, same_set); 268 rdev = list_entry(pos, mdk_rdev_t, same_set);
267 if (rdev->raid_disk >= 0 && 269 if (rdev->raid_disk >= 0 &&
268 test_bit(In_sync, &rdev->flags) &&
269 !test_bit(Faulty, &rdev->flags)) { 270 !test_bit(Faulty, &rdev->flags)) {
270 /* this is a usable devices */ 271 /* this is a usable devices */
271 atomic_inc(&rdev->nr_pending); 272 atomic_inc(&rdev->nr_pending);
@@ -297,7 +298,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
297 + size/512 > 0) 298 + size/512 > 0)
298 /* bitmap runs in to metadata */ 299 /* bitmap runs in to metadata */
299 goto bad_alignment; 300 goto bad_alignment;
300 if (rdev->data_offset + mddev->size*2 301 if (rdev->data_offset + mddev->dev_sectors
301 > rdev->sb_start + bitmap->offset) 302 > rdev->sb_start + bitmap->offset)
302 /* data runs in to bitmap */ 303 /* data runs in to bitmap */
303 goto bad_alignment; 304 goto bad_alignment;
@@ -570,7 +571,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
570 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 571 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
571 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) 572 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
572 reason = "unrecognized superblock version"; 573 reason = "unrecognized superblock version";
573 else if (chunksize < PAGE_SIZE) 574 else if (chunksize < 512)
574 reason = "bitmap chunksize too small"; 575 reason = "bitmap chunksize too small";
575 else if ((1 << ffz(~chunksize)) != chunksize) 576 else if ((1 << ffz(~chunksize)) != chunksize)
576 reason = "bitmap chunksize not a power of 2"; 577 reason = "bitmap chunksize not a power of 2";
@@ -1306,6 +1307,9 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1306 PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", 1307 PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
1307 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); 1308 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
1308 } 1309 }
1310 if (bitmap->mddev->degraded)
1311 /* Never clear bits or update events_cleared when degraded */
1312 success = 0;
1309 1313
1310 while (sectors) { 1314 while (sectors) {
1311 int blocks; 1315 int blocks;
@@ -1345,8 +1349,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1345 } 1349 }
1346} 1350}
1347 1351
1348int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, 1352static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
1349 int degraded) 1353 int degraded)
1350{ 1354{
1351 bitmap_counter_t *bmc; 1355 bitmap_counter_t *bmc;
1352 int rv; 1356 int rv;
@@ -1374,6 +1378,29 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
1374 return rv; 1378 return rv;
1375} 1379}
1376 1380
1381int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
1382 int degraded)
1383{
1384 /* bitmap_start_sync must always report on multiples of whole
1385 * pages, otherwise resync (which is very PAGE_SIZE based) will
1386 * get confused.
1387 * So call __bitmap_start_sync repeatedly (if needed) until
1388 * At least PAGE_SIZE>>9 blocks are covered.
1389 * Return the 'or' of the result.
1390 */
1391 int rv = 0;
1392 int blocks1;
1393
1394 *blocks = 0;
1395 while (*blocks < (PAGE_SIZE>>9)) {
1396 rv |= __bitmap_start_sync(bitmap, offset,
1397 &blocks1, degraded);
1398 offset += blocks1;
1399 *blocks += blocks1;
1400 }
1401 return rv;
1402}
1403
1377void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) 1404void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
1378{ 1405{
1379 bitmap_counter_t *bmc; 1406 bitmap_counter_t *bmc;
@@ -1443,6 +1470,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1443 wait_event(bitmap->mddev->recovery_wait, 1470 wait_event(bitmap->mddev->recovery_wait,
1444 atomic_read(&bitmap->mddev->recovery_active) == 0); 1471 atomic_read(&bitmap->mddev->recovery_active) == 0);
1445 1472
1473 bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync;
1474 set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1446 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); 1475 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
1447 s = 0; 1476 s = 0;
1448 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1477 while (s < sector && s < bitmap->mddev->resync_max_sectors) {
diff --git a/include/linux/raid/bitmap.h b/drivers/md/bitmap.h
index e98900671ca9..e98900671ca9 100644
--- a/include/linux/raid/bitmap.h
+++ b/drivers/md/bitmap.h
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index d4509be0fe67..345098b4ca77 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
52 bl->tail = bio; 52 bl->tail = bio;
53} 53}
54 54
55static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
56{
57 bio->bi_next = bl->head;
58
59 bl->head = bio;
60
61 if (!bl->tail)
62 bl->tail = bio;
63}
64
55static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) 65static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
56{ 66{
57 if (!bl2->head) 67 if (!bl2->head)
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index d3ec217847d6..3a8cfa2645c7 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -16,30 +16,56 @@
16 * functions in this file help the target record and restore the 16 * functions in this file help the target record and restore the
17 * original bio state. 17 * original bio state.
18 */ 18 */
19
20struct dm_bio_vec_details {
21#if PAGE_SIZE < 65536
22 __u16 bv_len;
23 __u16 bv_offset;
24#else
25 unsigned bv_len;
26 unsigned bv_offset;
27#endif
28};
29
19struct dm_bio_details { 30struct dm_bio_details {
20 sector_t bi_sector; 31 sector_t bi_sector;
21 struct block_device *bi_bdev; 32 struct block_device *bi_bdev;
22 unsigned int bi_size; 33 unsigned int bi_size;
23 unsigned short bi_idx; 34 unsigned short bi_idx;
24 unsigned long bi_flags; 35 unsigned long bi_flags;
36 struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
25}; 37};
26 38
27static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) 39static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
28{ 40{
41 unsigned i;
42
29 bd->bi_sector = bio->bi_sector; 43 bd->bi_sector = bio->bi_sector;
30 bd->bi_bdev = bio->bi_bdev; 44 bd->bi_bdev = bio->bi_bdev;
31 bd->bi_size = bio->bi_size; 45 bd->bi_size = bio->bi_size;
32 bd->bi_idx = bio->bi_idx; 46 bd->bi_idx = bio->bi_idx;
33 bd->bi_flags = bio->bi_flags; 47 bd->bi_flags = bio->bi_flags;
48
49 for (i = 0; i < bio->bi_vcnt; i++) {
50 bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
51 bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
52 }
34} 53}
35 54
36static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) 55static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
37{ 56{
57 unsigned i;
58
38 bio->bi_sector = bd->bi_sector; 59 bio->bi_sector = bd->bi_sector;
39 bio->bi_bdev = bd->bi_bdev; 60 bio->bi_bdev = bd->bi_bdev;
40 bio->bi_size = bd->bi_size; 61 bio->bi_size = bd->bi_size;
41 bio->bi_idx = bd->bi_idx; 62 bio->bi_idx = bd->bi_idx;
42 bio->bi_flags = bd->bi_flags; 63 bio->bi_flags = bd->bi_flags;
64
65 for (i = 0; i < bio->bi_vcnt; i++) {
66 bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
67 bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
68 }
43} 69}
44 70
45#endif 71#endif
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bfefd079a955..53394e863c74 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1156,8 +1156,7 @@ bad_ivmode:
1156 crypto_free_ablkcipher(tfm); 1156 crypto_free_ablkcipher(tfm);
1157bad_cipher: 1157bad_cipher:
1158 /* Must zero key material before freeing */ 1158 /* Must zero key material before freeing */
1159 memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); 1159 kzfree(cc);
1160 kfree(cc);
1161 return -EINVAL; 1160 return -EINVAL;
1162} 1161}
1163 1162
@@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti)
1183 dm_put_device(ti, cc->dev); 1182 dm_put_device(ti, cc->dev);
1184 1183
1185 /* Must zero key material before freeing */ 1184 /* Must zero key material before freeing */
1186 memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); 1185 kzfree(cc);
1187 kfree(cc);
1188} 1186}
1189 1187
1190static int crypt_map(struct dm_target *ti, struct bio *bio, 1188static int crypt_map(struct dm_target *ti, struct bio *bio,
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index dccbfb0e010f..a2e26c242141 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -7,6 +7,7 @@
7 7
8#include "dm-exception-store.h" 8#include "dm-exception-store.h"
9 9
10#include <linux/ctype.h>
10#include <linux/mm.h> 11#include <linux/mm.h>
11#include <linux/pagemap.h> 12#include <linux/pagemap.h>
12#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
@@ -14,6 +15,257 @@
14 15
15#define DM_MSG_PREFIX "snapshot exception stores" 16#define DM_MSG_PREFIX "snapshot exception stores"
16 17
18static LIST_HEAD(_exception_store_types);
19static DEFINE_SPINLOCK(_lock);
20
21static struct dm_exception_store_type *__find_exception_store_type(const char *name)
22{
23 struct dm_exception_store_type *type;
24
25 list_for_each_entry(type, &_exception_store_types, list)
26 if (!strcmp(name, type->name))
27 return type;
28
29 return NULL;
30}
31
32static struct dm_exception_store_type *_get_exception_store_type(const char *name)
33{
34 struct dm_exception_store_type *type;
35
36 spin_lock(&_lock);
37
38 type = __find_exception_store_type(name);
39
40 if (type && !try_module_get(type->module))
41 type = NULL;
42
43 spin_unlock(&_lock);
44
45 return type;
46}
47
48/*
49 * get_type
50 * @type_name
51 *
52 * Attempt to retrieve the dm_exception_store_type by name. If not already
53 * available, attempt to load the appropriate module.
54 *
55 * Exstore modules are named "dm-exstore-" followed by the 'type_name'.
56 * Modules may contain multiple types.
57 * This function will first try the module "dm-exstore-<type_name>",
58 * then truncate 'type_name' on the last '-' and try again.
59 *
60 * For example, if type_name was "clustered-shared", it would search
61 * 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
62 *
63 * 'dm-exception-store-<type_name>' is too long of a name in my
64 * opinion, which is why I've chosen to have the files
65 * containing exception store implementations be 'dm-exstore-<type_name>'.
66 * If you want your module to be autoloaded, you will follow this
67 * naming convention.
68 *
69 * Returns: dm_exception_store_type* on success, NULL on failure
70 */
71static struct dm_exception_store_type *get_type(const char *type_name)
72{
73 char *p, *type_name_dup;
74 struct dm_exception_store_type *type;
75
76 type = _get_exception_store_type(type_name);
77 if (type)
78 return type;
79
80 type_name_dup = kstrdup(type_name, GFP_KERNEL);
81 if (!type_name_dup) {
82 DMERR("No memory left to attempt load for \"%s\"", type_name);
83 return NULL;
84 }
85
86 while (request_module("dm-exstore-%s", type_name_dup) ||
87 !(type = _get_exception_store_type(type_name))) {
88 p = strrchr(type_name_dup, '-');
89 if (!p)
90 break;
91 p[0] = '\0';
92 }
93
94 if (!type)
95 DMWARN("Module for exstore type \"%s\" not found.", type_name);
96
97 kfree(type_name_dup);
98
99 return type;
100}
101
102static void put_type(struct dm_exception_store_type *type)
103{
104 spin_lock(&_lock);
105 module_put(type->module);
106 spin_unlock(&_lock);
107}
108
109int dm_exception_store_type_register(struct dm_exception_store_type *type)
110{
111 int r = 0;
112
113 spin_lock(&_lock);
114 if (!__find_exception_store_type(type->name))
115 list_add(&type->list, &_exception_store_types);
116 else
117 r = -EEXIST;
118 spin_unlock(&_lock);
119
120 return r;
121}
122EXPORT_SYMBOL(dm_exception_store_type_register);
123
124int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
125{
126 spin_lock(&_lock);
127
128 if (!__find_exception_store_type(type->name)) {
129 spin_unlock(&_lock);
130 return -EINVAL;
131 }
132
133 list_del(&type->list);
134
135 spin_unlock(&_lock);
136
137 return 0;
138}
139EXPORT_SYMBOL(dm_exception_store_type_unregister);
140
141/*
142 * Round a number up to the nearest 'size' boundary. size must
143 * be a power of 2.
144 */
145static ulong round_up(ulong n, ulong size)
146{
147 size--;
148 return (n + size) & ~size;
149}
150
151static int set_chunk_size(struct dm_exception_store *store,
152 const char *chunk_size_arg, char **error)
153{
154 unsigned long chunk_size_ulong;
155 char *value;
156
157 chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
158 if (*chunk_size_arg == '\0' || *value != '\0') {
159 *error = "Invalid chunk size";
160 return -EINVAL;
161 }
162
163 if (!chunk_size_ulong) {
164 store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
165 return 0;
166 }
167
168 /*
169 * Chunk size must be multiple of page size. Silently
170 * round up if it's not.
171 */
172 chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
173
174 /* Check chunk_size is a power of 2 */
175 if (!is_power_of_2(chunk_size_ulong)) {
176 *error = "Chunk size is not a power of 2";
177 return -EINVAL;
178 }
179
180 /* Validate the chunk size against the device block size */
181 if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
182 *error = "Chunk size is not a multiple of device blocksize";
183 return -EINVAL;
184 }
185
186 store->chunk_size = chunk_size_ulong;
187 store->chunk_mask = chunk_size_ulong - 1;
188 store->chunk_shift = ffs(chunk_size_ulong) - 1;
189
190 return 0;
191}
192
193int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
194 unsigned *args_used,
195 struct dm_exception_store **store)
196{
197 int r = 0;
198 struct dm_exception_store_type *type;
199 struct dm_exception_store *tmp_store;
200 char persistent;
201
202 if (argc < 3) {
203 ti->error = "Insufficient exception store arguments";
204 return -EINVAL;
205 }
206
207 tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
208 if (!tmp_store) {
209 ti->error = "Exception store allocation failed";
210 return -ENOMEM;
211 }
212
213 persistent = toupper(*argv[1]);
214 if (persistent != 'P' && persistent != 'N') {
215 ti->error = "Persistent flag is not P or N";
216 return -EINVAL;
217 }
218
219 type = get_type(argv[1]);
220 if (!type) {
221 ti->error = "Exception store type not recognised";
222 r = -EINVAL;
223 goto bad_type;
224 }
225
226 tmp_store->type = type;
227 tmp_store->ti = ti;
228
229 r = dm_get_device(ti, argv[0], 0, 0,
230 FMODE_READ | FMODE_WRITE, &tmp_store->cow);
231 if (r) {
232 ti->error = "Cannot get COW device";
233 goto bad_cow;
234 }
235
236 r = set_chunk_size(tmp_store, argv[2], &ti->error);
237 if (r)
238 goto bad_cow;
239
240 r = type->ctr(tmp_store, 0, NULL);
241 if (r) {
242 ti->error = "Exception store type constructor failed";
243 goto bad_ctr;
244 }
245
246 *args_used = 3;
247 *store = tmp_store;
248 return 0;
249
250bad_ctr:
251 dm_put_device(ti, tmp_store->cow);
252bad_cow:
253 put_type(type);
254bad_type:
255 kfree(tmp_store);
256 return r;
257}
258EXPORT_SYMBOL(dm_exception_store_create);
259
260void dm_exception_store_destroy(struct dm_exception_store *store)
261{
262 store->type->dtr(store);
263 dm_put_device(store->ti, store->cow);
264 put_type(store->type);
265 kfree(store);
266}
267EXPORT_SYMBOL(dm_exception_store_destroy);
268
17int dm_exception_store_init(void) 269int dm_exception_store_init(void)
18{ 270{
19 int r; 271 int r;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index bb9f33d5daa2..0a2e6e7f67b3 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -37,11 +37,18 @@ struct dm_snap_exception {
37 * Abstraction to handle the meta/layout of exception stores (the 37 * Abstraction to handle the meta/layout of exception stores (the
38 * COW device). 38 * COW device).
39 */ 39 */
40struct dm_exception_store { 40struct dm_exception_store;
41struct dm_exception_store_type {
42 const char *name;
43 struct module *module;
44
45 int (*ctr) (struct dm_exception_store *store,
46 unsigned argc, char **argv);
47
41 /* 48 /*
42 * Destroys this object when you've finished with it. 49 * Destroys this object when you've finished with it.
43 */ 50 */
44 void (*destroy) (struct dm_exception_store *store); 51 void (*dtr) (struct dm_exception_store *store);
45 52
46 /* 53 /*
47 * The target shouldn't read the COW device until this is 54 * The target shouldn't read the COW device until this is
@@ -72,8 +79,9 @@ struct dm_exception_store {
72 */ 79 */
73 void (*drop_snapshot) (struct dm_exception_store *store); 80 void (*drop_snapshot) (struct dm_exception_store *store);
74 81
75 int (*status) (struct dm_exception_store *store, status_type_t status, 82 unsigned (*status) (struct dm_exception_store *store,
76 char *result, unsigned int maxlen); 83 status_type_t status, char *result,
84 unsigned maxlen);
77 85
78 /* 86 /*
79 * Return how full the snapshot is. 87 * Return how full the snapshot is.
@@ -82,7 +90,21 @@ struct dm_exception_store {
82 sector_t *numerator, 90 sector_t *numerator,
83 sector_t *denominator); 91 sector_t *denominator);
84 92
85 struct dm_snapshot *snap; 93 /* For internal device-mapper use only. */
94 struct list_head list;
95};
96
97struct dm_exception_store {
98 struct dm_exception_store_type *type;
99 struct dm_target *ti;
100
101 struct dm_dev *cow;
102
103 /* Size of data blocks saved - must be a power of 2 */
104 chunk_t chunk_size;
105 chunk_t chunk_mask;
106 chunk_t chunk_shift;
107
86 void *context; 108 void *context;
87}; 109};
88 110
@@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
129 151
130# endif 152# endif
131 153
154/*
155 * Return the number of sectors in the device.
156 */
157static inline sector_t get_dev_size(struct block_device *bdev)
158{
159 return bdev->bd_inode->i_size >> SECTOR_SHIFT;
160}
161
162static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
163 sector_t sector)
164{
165 return (sector & ~store->chunk_mask) >> store->chunk_shift;
166}
167
168int dm_exception_store_type_register(struct dm_exception_store_type *type);
169int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
170
171int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
172 unsigned *args_used,
173 struct dm_exception_store **store);
174void dm_exception_store_destroy(struct dm_exception_store *store);
175
132int dm_exception_store_init(void); 176int dm_exception_store_init(void);
133void dm_exception_store_exit(void); 177void dm_exception_store_exit(void);
134 178
@@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void);
141int dm_transient_snapshot_init(void); 185int dm_transient_snapshot_init(void);
142void dm_transient_snapshot_exit(void); 186void dm_transient_snapshot_exit(void);
143 187
144int dm_create_persistent(struct dm_exception_store *store);
145
146int dm_create_transient(struct dm_exception_store *store);
147
148#endif /* _LINUX_DM_EXCEPTION_STORE */ 188#endif /* _LINUX_DM_EXCEPTION_STORE */
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 36e2b5e46a6b..e73aabd61cd7 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
370 while (1) { 370 while (1) {
371 set_current_state(TASK_UNINTERRUPTIBLE); 371 set_current_state(TASK_UNINTERRUPTIBLE);
372 372
373 if (!atomic_read(&io.count) || signal_pending(current)) 373 if (!atomic_read(&io.count))
374 break; 374 break;
375 375
376 io_schedule(); 376 io_schedule();
377 } 377 }
378 set_current_state(TASK_RUNNING); 378 set_current_state(TASK_RUNNING);
379 379
380 if (atomic_read(&io.count))
381 return -EINTR;
382
383 if (error_bits) 380 if (error_bits)
384 *error_bits = io.error_bits; 381 *error_bits = io.error_bits;
385 382
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 737961f275c1..be233bc4d917 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -16,40 +16,29 @@
16 16
17#define DM_MSG_PREFIX "dirty region log" 17#define DM_MSG_PREFIX "dirty region log"
18 18
19struct dm_dirty_log_internal {
20 struct dm_dirty_log_type *type;
21
22 struct list_head list;
23 long use;
24};
25
26static LIST_HEAD(_log_types); 19static LIST_HEAD(_log_types);
27static DEFINE_SPINLOCK(_lock); 20static DEFINE_SPINLOCK(_lock);
28 21
29static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name) 22static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
30{ 23{
31 struct dm_dirty_log_internal *log_type; 24 struct dm_dirty_log_type *log_type;
32 25
33 list_for_each_entry(log_type, &_log_types, list) 26 list_for_each_entry(log_type, &_log_types, list)
34 if (!strcmp(name, log_type->type->name)) 27 if (!strcmp(name, log_type->name))
35 return log_type; 28 return log_type;
36 29
37 return NULL; 30 return NULL;
38} 31}
39 32
40static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) 33static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
41{ 34{
42 struct dm_dirty_log_internal *log_type; 35 struct dm_dirty_log_type *log_type;
43 36
44 spin_lock(&_lock); 37 spin_lock(&_lock);
45 38
46 log_type = __find_dirty_log_type(name); 39 log_type = __find_dirty_log_type(name);
47 if (log_type) { 40 if (log_type && !try_module_get(log_type->module))
48 if (!log_type->use && !try_module_get(log_type->type->module)) 41 log_type = NULL;
49 log_type = NULL;
50 else
51 log_type->use++;
52 }
53 42
54 spin_unlock(&_lock); 43 spin_unlock(&_lock);
55 44
@@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
76static struct dm_dirty_log_type *get_type(const char *type_name) 65static struct dm_dirty_log_type *get_type(const char *type_name)
77{ 66{
78 char *p, *type_name_dup; 67 char *p, *type_name_dup;
79 struct dm_dirty_log_internal *log_type; 68 struct dm_dirty_log_type *log_type;
80 69
81 if (!type_name) 70 if (!type_name)
82 return NULL; 71 return NULL;
83 72
84 log_type = _get_dirty_log_type(type_name); 73 log_type = _get_dirty_log_type(type_name);
85 if (log_type) 74 if (log_type)
86 return log_type->type; 75 return log_type;
87 76
88 type_name_dup = kstrdup(type_name, GFP_KERNEL); 77 type_name_dup = kstrdup(type_name, GFP_KERNEL);
89 if (!type_name_dup) { 78 if (!type_name_dup) {
@@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name)
105 94
106 kfree(type_name_dup); 95 kfree(type_name_dup);
107 96
108 return log_type ? log_type->type : NULL; 97 return log_type;
109} 98}
110 99
111static void put_type(struct dm_dirty_log_type *type) 100static void put_type(struct dm_dirty_log_type *type)
112{ 101{
113 struct dm_dirty_log_internal *log_type;
114
115 if (!type) 102 if (!type)
116 return; 103 return;
117 104
118 spin_lock(&_lock); 105 spin_lock(&_lock);
119 log_type = __find_dirty_log_type(type->name); 106 if (!__find_dirty_log_type(type->name))
120 if (!log_type)
121 goto out; 107 goto out;
122 108
123 if (!--log_type->use) 109 module_put(type->module);
124 module_put(type->module);
125
126 BUG_ON(log_type->use < 0);
127 110
128out: 111out:
129 spin_unlock(&_lock); 112 spin_unlock(&_lock);
130} 113}
131 114
132static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
133{
134 struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
135 GFP_KERNEL);
136
137 if (log_type)
138 log_type->type = type;
139
140 return log_type;
141}
142
143int dm_dirty_log_type_register(struct dm_dirty_log_type *type) 115int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
144{ 116{
145 struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
146 int r = 0; 117 int r = 0;
147 118
148 if (!log_type)
149 return -ENOMEM;
150
151 spin_lock(&_lock); 119 spin_lock(&_lock);
152 if (!__find_dirty_log_type(type->name)) 120 if (!__find_dirty_log_type(type->name))
153 list_add(&log_type->list, &_log_types); 121 list_add(&type->list, &_log_types);
154 else { 122 else
155 kfree(log_type);
156 r = -EEXIST; 123 r = -EEXIST;
157 }
158 spin_unlock(&_lock); 124 spin_unlock(&_lock);
159 125
160 return r; 126 return r;
@@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register);
163 129
164int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) 130int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
165{ 131{
166 struct dm_dirty_log_internal *log_type;
167
168 spin_lock(&_lock); 132 spin_lock(&_lock);
169 133
170 log_type = __find_dirty_log_type(type->name); 134 if (!__find_dirty_log_type(type->name)) {
171 if (!log_type) {
172 spin_unlock(&_lock); 135 spin_unlock(&_lock);
173 return -EINVAL; 136 return -EINVAL;
174 } 137 }
175 138
176 if (log_type->use) { 139 list_del(&type->list);
177 spin_unlock(&_lock);
178 return -ETXTBSY;
179 }
180
181 list_del(&log_type->list);
182 140
183 spin_unlock(&_lock); 141 spin_unlock(&_lock);
184 kfree(log_type);
185 142
186 return 0; 143 return 0;
187} 144}
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index 96ea226155b1..42c04f04a0c4 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -17,9 +17,7 @@
17 17
18struct ps_internal { 18struct ps_internal {
19 struct path_selector_type pst; 19 struct path_selector_type pst;
20
21 struct list_head list; 20 struct list_head list;
22 long use;
23}; 21};
24 22
25#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst) 23#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
@@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name)
45 43
46 down_read(&_ps_lock); 44 down_read(&_ps_lock);
47 psi = __find_path_selector_type(name); 45 psi = __find_path_selector_type(name);
48 if (psi) { 46 if (psi && !try_module_get(psi->pst.module))
49 if ((psi->use == 0) && !try_module_get(psi->pst.module)) 47 psi = NULL;
50 psi = NULL;
51 else
52 psi->use++;
53 }
54 up_read(&_ps_lock); 48 up_read(&_ps_lock);
55 49
56 return psi; 50 return psi;
@@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst)
84 if (!psi) 78 if (!psi)
85 goto out; 79 goto out;
86 80
87 if (--psi->use == 0) 81 module_put(psi->pst.module);
88 module_put(psi->pst.module);
89
90 BUG_ON(psi->use < 0);
91
92out: 82out:
93 up_read(&_ps_lock); 83 up_read(&_ps_lock);
94} 84}
@@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
136 return -EINVAL; 126 return -EINVAL;
137 } 127 }
138 128
139 if (psi->use) {
140 up_write(&_ps_lock);
141 return -ETXTBSY;
142 }
143
144 list_del(&psi->list); 129 list_del(&psi->list);
145 130
146 up_write(&_ps_lock); 131 up_write(&_ps_lock);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4d6bc101962e..536ef0bef154 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,8 @@ struct dm_raid1_read_record {
145 struct dm_bio_details details; 145 struct dm_bio_details details;
146}; 146};
147 147
148static struct kmem_cache *_dm_raid1_read_record_cache;
149
148/* 150/*
149 * Every mirror should look like this one. 151 * Every mirror should look like this one.
150 */ 152 */
@@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
586 int state; 588 int state;
587 struct bio *bio; 589 struct bio *bio;
588 struct bio_list sync, nosync, recover, *this_list = NULL; 590 struct bio_list sync, nosync, recover, *this_list = NULL;
591 struct bio_list requeue;
592 struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
593 region_t region;
589 594
590 if (!writes->head) 595 if (!writes->head)
591 return; 596 return;
@@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
596 bio_list_init(&sync); 601 bio_list_init(&sync);
597 bio_list_init(&nosync); 602 bio_list_init(&nosync);
598 bio_list_init(&recover); 603 bio_list_init(&recover);
604 bio_list_init(&requeue);
599 605
600 while ((bio = bio_list_pop(writes))) { 606 while ((bio = bio_list_pop(writes))) {
601 state = dm_rh_get_state(ms->rh, 607 region = dm_rh_bio_to_region(ms->rh, bio);
602 dm_rh_bio_to_region(ms->rh, bio), 1); 608
609 if (log->type->is_remote_recovering &&
610 log->type->is_remote_recovering(log, region)) {
611 bio_list_add(&requeue, bio);
612 continue;
613 }
614
615 state = dm_rh_get_state(ms->rh, region, 1);
603 switch (state) { 616 switch (state) {
604 case DM_RH_CLEAN: 617 case DM_RH_CLEAN:
605 case DM_RH_DIRTY: 618 case DM_RH_DIRTY:
@@ -619,6 +632,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
619 } 632 }
620 633
621 /* 634 /*
635 * Add bios that are delayed due to remote recovery
636 * back on to the write queue
637 */
638 if (unlikely(requeue.head)) {
639 spin_lock_irq(&ms->lock);
640 bio_list_merge(&ms->writes, &requeue);
641 spin_unlock_irq(&ms->lock);
642 }
643
644 /*
622 * Increment the pending counts for any regions that will 645 * Increment the pending counts for any regions that will
623 * be written to (writes to recover regions are going to 646 * be written to (writes to recover regions are going to
624 * be delayed). 647 * be delayed).
@@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
764 atomic_set(&ms->suspend, 0); 787 atomic_set(&ms->suspend, 0);
765 atomic_set(&ms->default_mirror, DEFAULT_MIRROR); 788 atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
766 789
767 len = sizeof(struct dm_raid1_read_record); 790 ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
768 ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS, 791 _dm_raid1_read_record_cache);
769 len); 792
770 if (!ms->read_record_pool) { 793 if (!ms->read_record_pool) {
771 ti->error = "Error creating mirror read_record_pool"; 794 ti->error = "Error creating mirror read_record_pool";
772 kfree(ms); 795 kfree(ms);
@@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void)
1279{ 1302{
1280 int r; 1303 int r;
1281 1304
1305 _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
1306 if (!_dm_raid1_read_record_cache) {
1307 DMERR("Can't allocate dm_raid1_read_record cache");
1308 r = -ENOMEM;
1309 goto bad_cache;
1310 }
1311
1282 r = dm_register_target(&mirror_target); 1312 r = dm_register_target(&mirror_target);
1283 if (r < 0) 1313 if (r < 0) {
1284 DMERR("Failed to register mirror target"); 1314 DMERR("Failed to register mirror target");
1315 goto bad_target;
1316 }
1317
1318 return 0;
1285 1319
1320bad_target:
1321 kmem_cache_destroy(_dm_raid1_read_record_cache);
1322bad_cache:
1286 return r; 1323 return r;
1287} 1324}
1288 1325
1289static void __exit dm_mirror_exit(void) 1326static void __exit dm_mirror_exit(void)
1290{ 1327{
1291 dm_unregister_target(&mirror_target); 1328 dm_unregister_target(&mirror_target);
1329 kmem_cache_destroy(_dm_raid1_read_record_cache);
1292} 1330}
1293 1331
1294/* Module hooks */ 1332/* Module hooks */
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 936b34e0959f..e75c6dd76a9a 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include "dm-exception-store.h" 8#include "dm-exception-store.h"
9#include "dm-snap.h"
10 9
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/pagemap.h> 11#include <linux/pagemap.h>
@@ -89,7 +88,7 @@ struct commit_callback {
89 * The top level structure for a persistent exception store. 88 * The top level structure for a persistent exception store.
90 */ 89 */
91struct pstore { 90struct pstore {
92 struct dm_snapshot *snap; /* up pointer to my snapshot */ 91 struct dm_exception_store *store;
93 int version; 92 int version;
94 int valid; 93 int valid;
95 uint32_t exceptions_per_area; 94 uint32_t exceptions_per_area;
@@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps)
141 int r = -ENOMEM; 140 int r = -ENOMEM;
142 size_t len; 141 size_t len;
143 142
144 len = ps->snap->chunk_size << SECTOR_SHIFT; 143 len = ps->store->chunk_size << SECTOR_SHIFT;
145 144
146 /* 145 /*
147 * Allocate the chunk_size block of memory that will hold 146 * Allocate the chunk_size block of memory that will hold
@@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps)
163 162
164static void free_area(struct pstore *ps) 163static void free_area(struct pstore *ps)
165{ 164{
166 vfree(ps->area); 165 if (ps->area)
166 vfree(ps->area);
167 ps->area = NULL; 167 ps->area = NULL;
168 vfree(ps->zero_area); 168
169 if (ps->zero_area)
170 vfree(ps->zero_area);
169 ps->zero_area = NULL; 171 ps->zero_area = NULL;
170} 172}
171 173
@@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work)
189static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) 191static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
190{ 192{
191 struct dm_io_region where = { 193 struct dm_io_region where = {
192 .bdev = ps->snap->cow->bdev, 194 .bdev = ps->store->cow->bdev,
193 .sector = ps->snap->chunk_size * chunk, 195 .sector = ps->store->chunk_size * chunk,
194 .count = ps->snap->chunk_size, 196 .count = ps->store->chunk_size,
195 }; 197 };
196 struct dm_io_request io_req = { 198 struct dm_io_request io_req = {
197 .bi_rw = rw, 199 .bi_rw = rw,
@@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw)
247 249
248static void zero_memory_area(struct pstore *ps) 250static void zero_memory_area(struct pstore *ps)
249{ 251{
250 memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); 252 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
251} 253}
252 254
253static int zero_disk_area(struct pstore *ps, chunk_t area) 255static int zero_disk_area(struct pstore *ps, chunk_t area)
254{ 256{
255 struct dm_io_region where = { 257 struct dm_io_region where = {
256 .bdev = ps->snap->cow->bdev, 258 .bdev = ps->store->cow->bdev,
257 .sector = ps->snap->chunk_size * area_location(ps, area), 259 .sector = ps->store->chunk_size * area_location(ps, area),
258 .count = ps->snap->chunk_size, 260 .count = ps->store->chunk_size,
259 }; 261 };
260 struct dm_io_request io_req = { 262 struct dm_io_request io_req = {
261 .bi_rw = WRITE, 263 .bi_rw = WRITE,
@@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot)
278 /* 280 /*
279 * Use default chunk size (or hardsect_size, if larger) if none supplied 281 * Use default chunk size (or hardsect_size, if larger) if none supplied
280 */ 282 */
281 if (!ps->snap->chunk_size) { 283 if (!ps->store->chunk_size) {
282 ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, 284 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
283 bdev_hardsect_size(ps->snap->cow->bdev) >> 9); 285 bdev_hardsect_size(ps->store->cow->bdev) >> 9);
284 ps->snap->chunk_mask = ps->snap->chunk_size - 1; 286 ps->store->chunk_mask = ps->store->chunk_size - 1;
285 ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; 287 ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
286 chunk_size_supplied = 0; 288 chunk_size_supplied = 0;
287 } 289 }
288 290
289 ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> 291 ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
290 chunk_size)); 292 chunk_size));
291 if (IS_ERR(ps->io_client)) 293 if (IS_ERR(ps->io_client))
292 return PTR_ERR(ps->io_client); 294 return PTR_ERR(ps->io_client);
@@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
317 ps->version = le32_to_cpu(dh->version); 319 ps->version = le32_to_cpu(dh->version);
318 chunk_size = le32_to_cpu(dh->chunk_size); 320 chunk_size = le32_to_cpu(dh->chunk_size);
319 321
320 if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) 322 if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
321 return 0; 323 return 0;
322 324
323 DMWARN("chunk size %llu in device metadata overrides " 325 DMWARN("chunk size %llu in device metadata overrides "
324 "table chunk size of %llu.", 326 "table chunk size of %llu.",
325 (unsigned long long)chunk_size, 327 (unsigned long long)chunk_size,
326 (unsigned long long)ps->snap->chunk_size); 328 (unsigned long long)ps->store->chunk_size);
327 329
328 /* We had a bogus chunk_size. Fix stuff up. */ 330 /* We had a bogus chunk_size. Fix stuff up. */
329 free_area(ps); 331 free_area(ps);
330 332
331 ps->snap->chunk_size = chunk_size; 333 ps->store->chunk_size = chunk_size;
332 ps->snap->chunk_mask = chunk_size - 1; 334 ps->store->chunk_mask = chunk_size - 1;
333 ps->snap->chunk_shift = ffs(chunk_size) - 1; 335 ps->store->chunk_shift = ffs(chunk_size) - 1;
334 336
335 r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), 337 r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
336 ps->io_client); 338 ps->io_client);
337 if (r) 339 if (r)
338 return r; 340 return r;
@@ -349,13 +351,13 @@ static int write_header(struct pstore *ps)
349{ 351{
350 struct disk_header *dh; 352 struct disk_header *dh;
351 353
352 memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); 354 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
353 355
354 dh = (struct disk_header *) ps->area; 356 dh = (struct disk_header *) ps->area;
355 dh->magic = cpu_to_le32(SNAP_MAGIC); 357 dh->magic = cpu_to_le32(SNAP_MAGIC);
356 dh->valid = cpu_to_le32(ps->valid); 358 dh->valid = cpu_to_le32(ps->valid);
357 dh->version = cpu_to_le32(ps->version); 359 dh->version = cpu_to_le32(ps->version);
358 dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); 360 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
359 361
360 return chunk_io(ps, 0, WRITE, 1); 362 return chunk_io(ps, 0, WRITE, 1);
361} 363}
@@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store)
474static void persistent_fraction_full(struct dm_exception_store *store, 476static void persistent_fraction_full(struct dm_exception_store *store,
475 sector_t *numerator, sector_t *denominator) 477 sector_t *numerator, sector_t *denominator)
476{ 478{
477 *numerator = get_info(store)->next_free * store->snap->chunk_size; 479 *numerator = get_info(store)->next_free * store->chunk_size;
478 *denominator = get_dev_size(store->snap->cow->bdev); 480 *denominator = get_dev_size(store->cow->bdev);
479} 481}
480 482
481static void persistent_destroy(struct dm_exception_store *store) 483static void persistent_dtr(struct dm_exception_store *store)
482{ 484{
483 struct pstore *ps = get_info(store); 485 struct pstore *ps = get_info(store);
484 486
485 destroy_workqueue(ps->metadata_wq); 487 destroy_workqueue(ps->metadata_wq);
486 dm_io_client_destroy(ps->io_client); 488
487 vfree(ps->callbacks); 489 /* Created in read_header */
490 if (ps->io_client)
491 dm_io_client_destroy(ps->io_client);
488 free_area(ps); 492 free_area(ps);
493
494 /* Allocated in persistent_read_metadata */
495 if (ps->callbacks)
496 vfree(ps->callbacks);
497
489 kfree(ps); 498 kfree(ps);
490} 499}
491 500
@@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
507 /* 516 /*
508 * Now we know correct chunk_size, complete the initialisation. 517 * Now we know correct chunk_size, complete the initialisation.
509 */ 518 */
510 ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / 519 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
511 sizeof(struct disk_exception); 520 sizeof(struct disk_exception);
512 ps->callbacks = dm_vcalloc(ps->exceptions_per_area, 521 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
513 sizeof(*ps->callbacks)); 522 sizeof(*ps->callbacks));
@@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
564 struct pstore *ps = get_info(store); 573 struct pstore *ps = get_info(store);
565 uint32_t stride; 574 uint32_t stride;
566 chunk_t next_free; 575 chunk_t next_free;
567 sector_t size = get_dev_size(store->snap->cow->bdev); 576 sector_t size = get_dev_size(store->cow->bdev);
568 577
569 /* Is there enough room ? */ 578 /* Is there enough room ? */
570 if (size < ((ps->next_free + 1) * store->snap->chunk_size)) 579 if (size < ((ps->next_free + 1) * store->chunk_size))
571 return -ENOSPC; 580 return -ENOSPC;
572 581
573 e->new_chunk = ps->next_free; 582 e->new_chunk = ps->next_free;
@@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
656 DMWARN("write header failed"); 665 DMWARN("write header failed");
657} 666}
658 667
659int dm_create_persistent(struct dm_exception_store *store) 668static int persistent_ctr(struct dm_exception_store *store,
669 unsigned argc, char **argv)
660{ 670{
661 struct pstore *ps; 671 struct pstore *ps;
662 672
663 /* allocate the pstore */ 673 /* allocate the pstore */
664 ps = kmalloc(sizeof(*ps), GFP_KERNEL); 674 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
665 if (!ps) 675 if (!ps)
666 return -ENOMEM; 676 return -ENOMEM;
667 677
668 ps->snap = store->snap; 678 ps->store = store;
669 ps->valid = 1; 679 ps->valid = 1;
670 ps->version = SNAPSHOT_DISK_VERSION; 680 ps->version = SNAPSHOT_DISK_VERSION;
671 ps->area = NULL; 681 ps->area = NULL;
@@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store)
683 return -ENOMEM; 693 return -ENOMEM;
684 } 694 }
685 695
686 store->destroy = persistent_destroy;
687 store->read_metadata = persistent_read_metadata;
688 store->prepare_exception = persistent_prepare_exception;
689 store->commit_exception = persistent_commit_exception;
690 store->drop_snapshot = persistent_drop_snapshot;
691 store->fraction_full = persistent_fraction_full;
692 store->context = ps; 696 store->context = ps;
693 697
694 return 0; 698 return 0;
695} 699}
696 700
701static unsigned persistent_status(struct dm_exception_store *store,
702 status_type_t status, char *result,
703 unsigned maxlen)
704{
705 unsigned sz = 0;
706
707 switch (status) {
708 case STATUSTYPE_INFO:
709 break;
710 case STATUSTYPE_TABLE:
711 DMEMIT(" %s P %llu", store->cow->name,
712 (unsigned long long)store->chunk_size);
713 }
714
715 return sz;
716}
717
718static struct dm_exception_store_type _persistent_type = {
719 .name = "persistent",
720 .module = THIS_MODULE,
721 .ctr = persistent_ctr,
722 .dtr = persistent_dtr,
723 .read_metadata = persistent_read_metadata,
724 .prepare_exception = persistent_prepare_exception,
725 .commit_exception = persistent_commit_exception,
726 .drop_snapshot = persistent_drop_snapshot,
727 .fraction_full = persistent_fraction_full,
728 .status = persistent_status,
729};
730
731static struct dm_exception_store_type _persistent_compat_type = {
732 .name = "P",
733 .module = THIS_MODULE,
734 .ctr = persistent_ctr,
735 .dtr = persistent_dtr,
736 .read_metadata = persistent_read_metadata,
737 .prepare_exception = persistent_prepare_exception,
738 .commit_exception = persistent_commit_exception,
739 .drop_snapshot = persistent_drop_snapshot,
740 .fraction_full = persistent_fraction_full,
741 .status = persistent_status,
742};
743
697int dm_persistent_snapshot_init(void) 744int dm_persistent_snapshot_init(void)
698{ 745{
699 return 0; 746 int r;
747
748 r = dm_exception_store_type_register(&_persistent_type);
749 if (r) {
750 DMERR("Unable to register persistent exception store type");
751 return r;
752 }
753
754 r = dm_exception_store_type_register(&_persistent_compat_type);
755 if (r) {
756 DMERR("Unable to register old-style persistent exception "
757 "store type");
758 dm_exception_store_type_unregister(&_persistent_type);
759 return r;
760 }
761
762 return r;
700} 763}
701 764
702void dm_persistent_snapshot_exit(void) 765void dm_persistent_snapshot_exit(void)
703{ 766{
767 dm_exception_store_type_unregister(&_persistent_type);
768 dm_exception_store_type_unregister(&_persistent_compat_type);
704} 769}
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
index 7f6e2e6dcb0d..cde5aa558e6d 100644
--- a/drivers/md/dm-snap-transient.c
+++ b/drivers/md/dm-snap-transient.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include "dm-exception-store.h" 8#include "dm-exception-store.h"
9#include "dm-snap.h"
10 9
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/pagemap.h> 11#include <linux/pagemap.h>
@@ -23,7 +22,7 @@ struct transient_c {
23 sector_t next_free; 22 sector_t next_free;
24}; 23};
25 24
26static void transient_destroy(struct dm_exception_store *store) 25static void transient_dtr(struct dm_exception_store *store)
27{ 26{
28 kfree(store->context); 27 kfree(store->context);
29} 28}
@@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store,
39static int transient_prepare_exception(struct dm_exception_store *store, 38static int transient_prepare_exception(struct dm_exception_store *store,
40 struct dm_snap_exception *e) 39 struct dm_snap_exception *e)
41{ 40{
42 struct transient_c *tc = (struct transient_c *) store->context; 41 struct transient_c *tc = store->context;
43 sector_t size = get_dev_size(store->snap->cow->bdev); 42 sector_t size = get_dev_size(store->cow->bdev);
44 43
45 if (size < (tc->next_free + store->snap->chunk_size)) 44 if (size < (tc->next_free + store->chunk_size))
46 return -1; 45 return -1;
47 46
48 e->new_chunk = sector_to_chunk(store->snap, tc->next_free); 47 e->new_chunk = sector_to_chunk(store, tc->next_free);
49 tc->next_free += store->snap->chunk_size; 48 tc->next_free += store->chunk_size;
50 49
51 return 0; 50 return 0;
52} 51}
@@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store,
64 sector_t *numerator, sector_t *denominator) 63 sector_t *numerator, sector_t *denominator)
65{ 64{
66 *numerator = ((struct transient_c *) store->context)->next_free; 65 *numerator = ((struct transient_c *) store->context)->next_free;
67 *denominator = get_dev_size(store->snap->cow->bdev); 66 *denominator = get_dev_size(store->cow->bdev);
68} 67}
69 68
70int dm_create_transient(struct dm_exception_store *store) 69static int transient_ctr(struct dm_exception_store *store,
70 unsigned argc, char **argv)
71{ 71{
72 struct transient_c *tc; 72 struct transient_c *tc;
73 73
74 store->destroy = transient_destroy;
75 store->read_metadata = transient_read_metadata;
76 store->prepare_exception = transient_prepare_exception;
77 store->commit_exception = transient_commit_exception;
78 store->drop_snapshot = NULL;
79 store->fraction_full = transient_fraction_full;
80
81 tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); 74 tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
82 if (!tc) 75 if (!tc)
83 return -ENOMEM; 76 return -ENOMEM;
@@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store)
88 return 0; 81 return 0;
89} 82}
90 83
84static unsigned transient_status(struct dm_exception_store *store,
85 status_type_t status, char *result,
86 unsigned maxlen)
87{
88 unsigned sz = 0;
89
90 switch (status) {
91 case STATUSTYPE_INFO:
92 break;
93 case STATUSTYPE_TABLE:
94 DMEMIT(" %s N %llu", store->cow->name,
95 (unsigned long long)store->chunk_size);
96 }
97
98 return sz;
99}
100
101static struct dm_exception_store_type _transient_type = {
102 .name = "transient",
103 .module = THIS_MODULE,
104 .ctr = transient_ctr,
105 .dtr = transient_dtr,
106 .read_metadata = transient_read_metadata,
107 .prepare_exception = transient_prepare_exception,
108 .commit_exception = transient_commit_exception,
109 .fraction_full = transient_fraction_full,
110 .status = transient_status,
111};
112
113static struct dm_exception_store_type _transient_compat_type = {
114 .name = "N",
115 .module = THIS_MODULE,
116 .ctr = transient_ctr,
117 .dtr = transient_dtr,
118 .read_metadata = transient_read_metadata,
119 .prepare_exception = transient_prepare_exception,
120 .commit_exception = transient_commit_exception,
121 .fraction_full = transient_fraction_full,
122 .status = transient_status,
123};
124
91int dm_transient_snapshot_init(void) 125int dm_transient_snapshot_init(void)
92{ 126{
93 return 0; 127 int r;
128
129 r = dm_exception_store_type_register(&_transient_type);
130 if (r) {
131 DMWARN("Unable to register transient exception store type");
132 return r;
133 }
134
135 r = dm_exception_store_type_register(&_transient_compat_type);
136 if (r) {
137 DMWARN("Unable to register old-style transient "
138 "exception store type");
139 dm_exception_store_type_unregister(&_transient_type);
140 return r;
141 }
142
143 return r;
94} 144}
95 145
96void dm_transient_snapshot_exit(void) 146void dm_transient_snapshot_exit(void)
97{ 147{
148 dm_exception_store_type_unregister(&_transient_type);
149 dm_exception_store_type_unregister(&_transient_compat_type);
98} 150}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 65ff82ff124e..981a0413068f 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -7,7 +7,6 @@
7 */ 7 */
8 8
9#include <linux/blkdev.h> 9#include <linux/blkdev.h>
10#include <linux/ctype.h>
11#include <linux/device-mapper.h> 10#include <linux/device-mapper.h>
12#include <linux/delay.h> 11#include <linux/delay.h>
13#include <linux/fs.h> 12#include <linux/fs.h>
@@ -20,9 +19,9 @@
20#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
21#include <linux/log2.h> 20#include <linux/log2.h>
22#include <linux/dm-kcopyd.h> 21#include <linux/dm-kcopyd.h>
22#include <linux/workqueue.h>
23 23
24#include "dm-exception-store.h" 24#include "dm-exception-store.h"
25#include "dm-snap.h"
26#include "dm-bio-list.h" 25#include "dm-bio-list.h"
27 26
28#define DM_MSG_PREFIX "snapshots" 27#define DM_MSG_PREFIX "snapshots"
@@ -47,9 +46,76 @@
47 */ 46 */
48#define MIN_IOS 256 47#define MIN_IOS 256
49 48
49#define DM_TRACKED_CHUNK_HASH_SIZE 16
50#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
51 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
52
53struct exception_table {
54 uint32_t hash_mask;
55 unsigned hash_shift;
56 struct list_head *table;
57};
58
59struct dm_snapshot {
60 struct rw_semaphore lock;
61
62 struct dm_dev *origin;
63
64 /* List of snapshots per Origin */
65 struct list_head list;
66
67 /* You can't use a snapshot if this is 0 (e.g. if full) */
68 int valid;
69
70 /* Origin writes don't trigger exceptions until this is set */
71 int active;
72
73 mempool_t *pending_pool;
74
75 atomic_t pending_exceptions_count;
76
77 struct exception_table pending;
78 struct exception_table complete;
79
80 /*
81 * pe_lock protects all pending_exception operations and access
82 * as well as the snapshot_bios list.
83 */
84 spinlock_t pe_lock;
85
86 /* The on disk metadata handler */
87 struct dm_exception_store *store;
88
89 struct dm_kcopyd_client *kcopyd_client;
90
91 /* Queue of snapshot writes for ksnapd to flush */
92 struct bio_list queued_bios;
93 struct work_struct queued_bios_work;
94
95 /* Chunks with outstanding reads */
96 mempool_t *tracked_chunk_pool;
97 spinlock_t tracked_chunk_lock;
98 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
99};
100
50static struct workqueue_struct *ksnapd; 101static struct workqueue_struct *ksnapd;
51static void flush_queued_bios(struct work_struct *work); 102static void flush_queued_bios(struct work_struct *work);
52 103
104static sector_t chunk_to_sector(struct dm_exception_store *store,
105 chunk_t chunk)
106{
107 return chunk << store->chunk_shift;
108}
109
110static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
111{
112 /*
113 * There is only ever one instance of a particular block
114 * device so we can compare pointers safely.
115 */
116 return lhs == rhs;
117}
118
53struct dm_snap_pending_exception { 119struct dm_snap_pending_exception {
54 struct dm_snap_exception e; 120 struct dm_snap_exception e;
55 121
@@ -476,11 +542,11 @@ static int init_hash_tables(struct dm_snapshot *s)
476 * Calculate based on the size of the original volume or 542 * Calculate based on the size of the original volume or
477 * the COW volume... 543 * the COW volume...
478 */ 544 */
479 cow_dev_size = get_dev_size(s->cow->bdev); 545 cow_dev_size = get_dev_size(s->store->cow->bdev);
480 origin_dev_size = get_dev_size(s->origin->bdev); 546 origin_dev_size = get_dev_size(s->origin->bdev);
481 max_buckets = calc_max_buckets(); 547 max_buckets = calc_max_buckets();
482 548
483 hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift; 549 hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
484 hash_size = min(hash_size, max_buckets); 550 hash_size = min(hash_size, max_buckets);
485 551
486 hash_size = rounddown_pow_of_two(hash_size); 552 hash_size = rounddown_pow_of_two(hash_size);
@@ -505,58 +571,6 @@ static int init_hash_tables(struct dm_snapshot *s)
505} 571}
506 572
507/* 573/*
508 * Round a number up to the nearest 'size' boundary. size must
509 * be a power of 2.
510 */
511static ulong round_up(ulong n, ulong size)
512{
513 size--;
514 return (n + size) & ~size;
515}
516
517static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
518 char **error)
519{
520 unsigned long chunk_size;
521 char *value;
522
523 chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
524 if (*chunk_size_arg == '\0' || *value != '\0') {
525 *error = "Invalid chunk size";
526 return -EINVAL;
527 }
528
529 if (!chunk_size) {
530 s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
531 return 0;
532 }
533
534 /*
535 * Chunk size must be multiple of page size. Silently
536 * round up if it's not.
537 */
538 chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
539
540 /* Check chunk_size is a power of 2 */
541 if (!is_power_of_2(chunk_size)) {
542 *error = "Chunk size is not a power of 2";
543 return -EINVAL;
544 }
545
546 /* Validate the chunk size against the device block size */
547 if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
548 *error = "Chunk size is not a multiple of device blocksize";
549 return -EINVAL;
550 }
551
552 s->chunk_size = chunk_size;
553 s->chunk_mask = chunk_size - 1;
554 s->chunk_shift = ffs(chunk_size) - 1;
555
556 return 0;
557}
558
559/*
560 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> 574 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
561 */ 575 */
562static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 576static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
@@ -564,91 +578,68 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
564 struct dm_snapshot *s; 578 struct dm_snapshot *s;
565 int i; 579 int i;
566 int r = -EINVAL; 580 int r = -EINVAL;
567 char persistent;
568 char *origin_path; 581 char *origin_path;
569 char *cow_path; 582 struct dm_exception_store *store;
583 unsigned args_used;
570 584
571 if (argc != 4) { 585 if (argc != 4) {
572 ti->error = "requires exactly 4 arguments"; 586 ti->error = "requires exactly 4 arguments";
573 r = -EINVAL; 587 r = -EINVAL;
574 goto bad1; 588 goto bad_args;
575 } 589 }
576 590
577 origin_path = argv[0]; 591 origin_path = argv[0];
578 cow_path = argv[1]; 592 argv++;
579 persistent = toupper(*argv[2]); 593 argc--;
580 594
581 if (persistent != 'P' && persistent != 'N') { 595 r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
582 ti->error = "Persistent flag is not P or N"; 596 if (r) {
597 ti->error = "Couldn't create exception store";
583 r = -EINVAL; 598 r = -EINVAL;
584 goto bad1; 599 goto bad_args;
585 } 600 }
586 601
602 argv += args_used;
603 argc -= args_used;
604
587 s = kmalloc(sizeof(*s), GFP_KERNEL); 605 s = kmalloc(sizeof(*s), GFP_KERNEL);
588 if (s == NULL) { 606 if (!s) {
589 ti->error = "Cannot allocate snapshot context private " 607 ti->error = "Cannot allocate snapshot context private "
590 "structure"; 608 "structure";
591 r = -ENOMEM; 609 r = -ENOMEM;
592 goto bad1; 610 goto bad_snap;
593 } 611 }
594 612
595 r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); 613 r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
596 if (r) { 614 if (r) {
597 ti->error = "Cannot get origin device"; 615 ti->error = "Cannot get origin device";
598 goto bad2; 616 goto bad_origin;
599 }
600
601 r = dm_get_device(ti, cow_path, 0, 0,
602 FMODE_READ | FMODE_WRITE, &s->cow);
603 if (r) {
604 dm_put_device(ti, s->origin);
605 ti->error = "Cannot get COW device";
606 goto bad2;
607 } 617 }
608 618
609 r = set_chunk_size(s, argv[3], &ti->error); 619 s->store = store;
610 if (r)
611 goto bad3;
612
613 s->type = persistent;
614
615 s->valid = 1; 620 s->valid = 1;
616 s->active = 0; 621 s->active = 0;
617 atomic_set(&s->pending_exceptions_count, 0); 622 atomic_set(&s->pending_exceptions_count, 0);
618 init_rwsem(&s->lock); 623 init_rwsem(&s->lock);
619 spin_lock_init(&s->pe_lock); 624 spin_lock_init(&s->pe_lock);
620 s->ti = ti;
621 625
622 /* Allocate hash table for COW data */ 626 /* Allocate hash table for COW data */
623 if (init_hash_tables(s)) { 627 if (init_hash_tables(s)) {
624 ti->error = "Unable to allocate hash table space"; 628 ti->error = "Unable to allocate hash table space";
625 r = -ENOMEM; 629 r = -ENOMEM;
626 goto bad3; 630 goto bad_hash_tables;
627 }
628
629 s->store.snap = s;
630
631 if (persistent == 'P')
632 r = dm_create_persistent(&s->store);
633 else
634 r = dm_create_transient(&s->store);
635
636 if (r) {
637 ti->error = "Couldn't create exception store";
638 r = -EINVAL;
639 goto bad4;
640 } 631 }
641 632
642 r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); 633 r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
643 if (r) { 634 if (r) {
644 ti->error = "Could not create kcopyd client"; 635 ti->error = "Could not create kcopyd client";
645 goto bad5; 636 goto bad_kcopyd;
646 } 637 }
647 638
648 s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 639 s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
649 if (!s->pending_pool) { 640 if (!s->pending_pool) {
650 ti->error = "Could not allocate mempool for pending exceptions"; 641 ti->error = "Could not allocate mempool for pending exceptions";
651 goto bad6; 642 goto bad_pending_pool;
652 } 643 }
653 644
654 s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, 645 s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
@@ -665,7 +656,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
665 spin_lock_init(&s->tracked_chunk_lock); 656 spin_lock_init(&s->tracked_chunk_lock);
666 657
667 /* Metadata must only be loaded into one table at once */ 658 /* Metadata must only be loaded into one table at once */
668 r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s); 659 r = s->store->type->read_metadata(s->store, dm_add_exception,
660 (void *)s);
669 if (r < 0) { 661 if (r < 0) {
670 ti->error = "Failed to read snapshot metadata"; 662 ti->error = "Failed to read snapshot metadata";
671 goto bad_load_and_register; 663 goto bad_load_and_register;
@@ -686,34 +678,33 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
686 } 678 }
687 679
688 ti->private = s; 680 ti->private = s;
689 ti->split_io = s->chunk_size; 681 ti->split_io = s->store->chunk_size;
690 682
691 return 0; 683 return 0;
692 684
693 bad_load_and_register: 685bad_load_and_register:
694 mempool_destroy(s->tracked_chunk_pool); 686 mempool_destroy(s->tracked_chunk_pool);
695 687
696 bad_tracked_chunk_pool: 688bad_tracked_chunk_pool:
697 mempool_destroy(s->pending_pool); 689 mempool_destroy(s->pending_pool);
698 690
699 bad6: 691bad_pending_pool:
700 dm_kcopyd_client_destroy(s->kcopyd_client); 692 dm_kcopyd_client_destroy(s->kcopyd_client);
701 693
702 bad5: 694bad_kcopyd:
703 s->store.destroy(&s->store);
704
705 bad4:
706 exit_exception_table(&s->pending, pending_cache); 695 exit_exception_table(&s->pending, pending_cache);
707 exit_exception_table(&s->complete, exception_cache); 696 exit_exception_table(&s->complete, exception_cache);
708 697
709 bad3: 698bad_hash_tables:
710 dm_put_device(ti, s->cow);
711 dm_put_device(ti, s->origin); 699 dm_put_device(ti, s->origin);
712 700
713 bad2: 701bad_origin:
714 kfree(s); 702 kfree(s);
715 703
716 bad1: 704bad_snap:
705 dm_exception_store_destroy(store);
706
707bad_args:
717 return r; 708 return r;
718} 709}
719 710
@@ -724,8 +715,6 @@ static void __free_exceptions(struct dm_snapshot *s)
724 715
725 exit_exception_table(&s->pending, pending_cache); 716 exit_exception_table(&s->pending, pending_cache);
726 exit_exception_table(&s->complete, exception_cache); 717 exit_exception_table(&s->complete, exception_cache);
727
728 s->store.destroy(&s->store);
729} 718}
730 719
731static void snapshot_dtr(struct dm_target *ti) 720static void snapshot_dtr(struct dm_target *ti)
@@ -761,7 +750,8 @@ static void snapshot_dtr(struct dm_target *ti)
761 mempool_destroy(s->pending_pool); 750 mempool_destroy(s->pending_pool);
762 751
763 dm_put_device(ti, s->origin); 752 dm_put_device(ti, s->origin);
764 dm_put_device(ti, s->cow); 753
754 dm_exception_store_destroy(s->store);
765 755
766 kfree(s); 756 kfree(s);
767} 757}
@@ -820,12 +810,12 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
820 else if (err == -ENOMEM) 810 else if (err == -ENOMEM)
821 DMERR("Invalidating snapshot: Unable to allocate exception."); 811 DMERR("Invalidating snapshot: Unable to allocate exception.");
822 812
823 if (s->store.drop_snapshot) 813 if (s->store->type->drop_snapshot)
824 s->store.drop_snapshot(&s->store); 814 s->store->type->drop_snapshot(s->store);
825 815
826 s->valid = 0; 816 s->valid = 0;
827 817
828 dm_table_event(s->ti->table); 818 dm_table_event(s->store->ti->table);
829} 819}
830 820
831static void get_pending_exception(struct dm_snap_pending_exception *pe) 821static void get_pending_exception(struct dm_snap_pending_exception *pe)
@@ -943,8 +933,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
943 933
944 else 934 else
945 /* Update the metadata if we are persistent */ 935 /* Update the metadata if we are persistent */
946 s->store.commit_exception(&s->store, &pe->e, commit_callback, 936 s->store->type->commit_exception(s->store, &pe->e,
947 pe); 937 commit_callback, pe);
948} 938}
949 939
950/* 940/*
@@ -960,11 +950,11 @@ static void start_copy(struct dm_snap_pending_exception *pe)
960 dev_size = get_dev_size(bdev); 950 dev_size = get_dev_size(bdev);
961 951
962 src.bdev = bdev; 952 src.bdev = bdev;
963 src.sector = chunk_to_sector(s, pe->e.old_chunk); 953 src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
964 src.count = min(s->chunk_size, dev_size - src.sector); 954 src.count = min(s->store->chunk_size, dev_size - src.sector);
965 955
966 dest.bdev = s->cow->bdev; 956 dest.bdev = s->store->cow->bdev;
967 dest.sector = chunk_to_sector(s, pe->e.new_chunk); 957 dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
968 dest.count = src.count; 958 dest.count = src.count;
969 959
970 /* Hand over to kcopyd */ 960 /* Hand over to kcopyd */
@@ -972,6 +962,17 @@ static void start_copy(struct dm_snap_pending_exception *pe)
972 &src, 1, &dest, 0, copy_callback, pe); 962 &src, 1, &dest, 0, copy_callback, pe);
973} 963}
974 964
965static struct dm_snap_pending_exception *
966__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
967{
968 struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
969
970 if (!e)
971 return NULL;
972
973 return container_of(e, struct dm_snap_pending_exception, e);
974}
975
975/* 976/*
976 * Looks to see if this snapshot already has a pending exception 977 * Looks to see if this snapshot already has a pending exception
977 * for this chunk, otherwise it allocates a new one and inserts 978 * for this chunk, otherwise it allocates a new one and inserts
@@ -981,40 +982,15 @@ static void start_copy(struct dm_snap_pending_exception *pe)
981 * this. 982 * this.
982 */ 983 */
983static struct dm_snap_pending_exception * 984static struct dm_snap_pending_exception *
984__find_pending_exception(struct dm_snapshot *s, struct bio *bio) 985__find_pending_exception(struct dm_snapshot *s,
986 struct dm_snap_pending_exception *pe, chunk_t chunk)
985{ 987{
986 struct dm_snap_exception *e; 988 struct dm_snap_pending_exception *pe2;
987 struct dm_snap_pending_exception *pe;
988 chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
989 989
990 /* 990 pe2 = __lookup_pending_exception(s, chunk);
991 * Is there a pending exception for this already ? 991 if (pe2) {
992 */
993 e = lookup_exception(&s->pending, chunk);
994 if (e) {
995 /* cast the exception to a pending exception */
996 pe = container_of(e, struct dm_snap_pending_exception, e);
997 goto out;
998 }
999
1000 /*
1001 * Create a new pending exception, we don't want
1002 * to hold the lock while we do this.
1003 */
1004 up_write(&s->lock);
1005 pe = alloc_pending_exception(s);
1006 down_write(&s->lock);
1007
1008 if (!s->valid) {
1009 free_pending_exception(pe);
1010 return NULL;
1011 }
1012
1013 e = lookup_exception(&s->pending, chunk);
1014 if (e) {
1015 free_pending_exception(pe); 992 free_pending_exception(pe);
1016 pe = container_of(e, struct dm_snap_pending_exception, e); 993 return pe2;
1017 goto out;
1018 } 994 }
1019 995
1020 pe->e.old_chunk = chunk; 996 pe->e.old_chunk = chunk;
@@ -1024,7 +1000,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
1024 atomic_set(&pe->ref_count, 0); 1000 atomic_set(&pe->ref_count, 0);
1025 pe->started = 0; 1001 pe->started = 0;
1026 1002
1027 if (s->store.prepare_exception(&s->store, &pe->e)) { 1003 if (s->store->type->prepare_exception(s->store, &pe->e)) {
1028 free_pending_exception(pe); 1004 free_pending_exception(pe);
1029 return NULL; 1005 return NULL;
1030 } 1006 }
@@ -1032,17 +1008,18 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
1032 get_pending_exception(pe); 1008 get_pending_exception(pe);
1033 insert_exception(&s->pending, &pe->e); 1009 insert_exception(&s->pending, &pe->e);
1034 1010
1035 out:
1036 return pe; 1011 return pe;
1037} 1012}
1038 1013
1039static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e, 1014static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
1040 struct bio *bio, chunk_t chunk) 1015 struct bio *bio, chunk_t chunk)
1041{ 1016{
1042 bio->bi_bdev = s->cow->bdev; 1017 bio->bi_bdev = s->store->cow->bdev;
1043 bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) + 1018 bio->bi_sector = chunk_to_sector(s->store,
1044 (chunk - e->old_chunk)) + 1019 dm_chunk_number(e->new_chunk) +
1045 (bio->bi_sector & s->chunk_mask); 1020 (chunk - e->old_chunk)) +
1021 (bio->bi_sector &
1022 s->store->chunk_mask);
1046} 1023}
1047 1024
1048static int snapshot_map(struct dm_target *ti, struct bio *bio, 1025static int snapshot_map(struct dm_target *ti, struct bio *bio,
@@ -1054,7 +1031,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1054 chunk_t chunk; 1031 chunk_t chunk;
1055 struct dm_snap_pending_exception *pe = NULL; 1032 struct dm_snap_pending_exception *pe = NULL;
1056 1033
1057 chunk = sector_to_chunk(s, bio->bi_sector); 1034 chunk = sector_to_chunk(s->store, bio->bi_sector);
1058 1035
1059 /* Full snapshots are not usable */ 1036 /* Full snapshots are not usable */
1060 /* To get here the table must be live so s->active is always set. */ 1037 /* To get here the table must be live so s->active is always set. */
@@ -1083,11 +1060,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1083 * writeable. 1060 * writeable.
1084 */ 1061 */
1085 if (bio_rw(bio) == WRITE) { 1062 if (bio_rw(bio) == WRITE) {
1086 pe = __find_pending_exception(s, bio); 1063 pe = __lookup_pending_exception(s, chunk);
1087 if (!pe) { 1064 if (!pe) {
1088 __invalidate_snapshot(s, -ENOMEM); 1065 up_write(&s->lock);
1089 r = -EIO; 1066 pe = alloc_pending_exception(s);
1090 goto out_unlock; 1067 down_write(&s->lock);
1068
1069 if (!s->valid) {
1070 free_pending_exception(pe);
1071 r = -EIO;
1072 goto out_unlock;
1073 }
1074
1075 e = lookup_exception(&s->complete, chunk);
1076 if (e) {
1077 free_pending_exception(pe);
1078 remap_exception(s, e, bio, chunk);
1079 goto out_unlock;
1080 }
1081
1082 pe = __find_pending_exception(s, pe, chunk);
1083 if (!pe) {
1084 __invalidate_snapshot(s, -ENOMEM);
1085 r = -EIO;
1086 goto out_unlock;
1087 }
1091 } 1088 }
1092 1089
1093 remap_exception(s, &pe->e, bio, chunk); 1090 remap_exception(s, &pe->e, bio, chunk);
@@ -1137,24 +1134,25 @@ static void snapshot_resume(struct dm_target *ti)
1137static int snapshot_status(struct dm_target *ti, status_type_t type, 1134static int snapshot_status(struct dm_target *ti, status_type_t type,
1138 char *result, unsigned int maxlen) 1135 char *result, unsigned int maxlen)
1139{ 1136{
1137 unsigned sz = 0;
1140 struct dm_snapshot *snap = ti->private; 1138 struct dm_snapshot *snap = ti->private;
1141 1139
1142 switch (type) { 1140 switch (type) {
1143 case STATUSTYPE_INFO: 1141 case STATUSTYPE_INFO:
1144 if (!snap->valid) 1142 if (!snap->valid)
1145 snprintf(result, maxlen, "Invalid"); 1143 DMEMIT("Invalid");
1146 else { 1144 else {
1147 if (snap->store.fraction_full) { 1145 if (snap->store->type->fraction_full) {
1148 sector_t numerator, denominator; 1146 sector_t numerator, denominator;
1149 snap->store.fraction_full(&snap->store, 1147 snap->store->type->fraction_full(snap->store,
1150 &numerator, 1148 &numerator,
1151 &denominator); 1149 &denominator);
1152 snprintf(result, maxlen, "%llu/%llu", 1150 DMEMIT("%llu/%llu",
1153 (unsigned long long)numerator, 1151 (unsigned long long)numerator,
1154 (unsigned long long)denominator); 1152 (unsigned long long)denominator);
1155 } 1153 }
1156 else 1154 else
1157 snprintf(result, maxlen, "Unknown"); 1155 DMEMIT("Unknown");
1158 } 1156 }
1159 break; 1157 break;
1160 1158
@@ -1164,10 +1162,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
1164 * to make private copies if the output is to 1162 * to make private copies if the output is to
1165 * make sense. 1163 * make sense.
1166 */ 1164 */
1167 snprintf(result, maxlen, "%s %s %c %llu", 1165 DMEMIT("%s", snap->origin->name);
1168 snap->origin->name, snap->cow->name, 1166 snap->store->type->status(snap->store, type, result + sz,
1169 snap->type, 1167 maxlen - sz);
1170 (unsigned long long)snap->chunk_size);
1171 break; 1168 break;
1172 } 1169 }
1173 1170
@@ -1196,14 +1193,14 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
1196 goto next_snapshot; 1193 goto next_snapshot;
1197 1194
1198 /* Nothing to do if writing beyond end of snapshot */ 1195 /* Nothing to do if writing beyond end of snapshot */
1199 if (bio->bi_sector >= dm_table_get_size(snap->ti->table)) 1196 if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
1200 goto next_snapshot; 1197 goto next_snapshot;
1201 1198
1202 /* 1199 /*
1203 * Remember, different snapshots can have 1200 * Remember, different snapshots can have
1204 * different chunk sizes. 1201 * different chunk sizes.
1205 */ 1202 */
1206 chunk = sector_to_chunk(snap, bio->bi_sector); 1203 chunk = sector_to_chunk(snap->store, bio->bi_sector);
1207 1204
1208 /* 1205 /*
1209 * Check exception table to see if block 1206 * Check exception table to see if block
@@ -1217,10 +1214,28 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
1217 if (e) 1214 if (e)
1218 goto next_snapshot; 1215 goto next_snapshot;
1219 1216
1220 pe = __find_pending_exception(snap, bio); 1217 pe = __lookup_pending_exception(snap, chunk);
1221 if (!pe) { 1218 if (!pe) {
1222 __invalidate_snapshot(snap, -ENOMEM); 1219 up_write(&snap->lock);
1223 goto next_snapshot; 1220 pe = alloc_pending_exception(snap);
1221 down_write(&snap->lock);
1222
1223 if (!snap->valid) {
1224 free_pending_exception(pe);
1225 goto next_snapshot;
1226 }
1227
1228 e = lookup_exception(&snap->complete, chunk);
1229 if (e) {
1230 free_pending_exception(pe);
1231 goto next_snapshot;
1232 }
1233
1234 pe = __find_pending_exception(snap, pe, chunk);
1235 if (!pe) {
1236 __invalidate_snapshot(snap, -ENOMEM);
1237 goto next_snapshot;
1238 }
1224 } 1239 }
1225 1240
1226 if (!primary_pe) { 1241 if (!primary_pe) {
@@ -1360,7 +1375,8 @@ static void origin_resume(struct dm_target *ti)
1360 o = __lookup_origin(dev->bdev); 1375 o = __lookup_origin(dev->bdev);
1361 if (o) 1376 if (o)
1362 list_for_each_entry (snap, &o->snapshots, list) 1377 list_for_each_entry (snap, &o->snapshots, list)
1363 chunk_size = min_not_zero(chunk_size, snap->chunk_size); 1378 chunk_size = min_not_zero(chunk_size,
1379 snap->store->chunk_size);
1364 up_read(&_origins_lock); 1380 up_read(&_origins_lock);
1365 1381
1366 ti->split_io = chunk_size; 1382 ti->split_io = chunk_size;
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
deleted file mode 100644
index d9e62b43cf85..000000000000
--- a/drivers/md/dm-snap.h
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
3 *
4 * This file is released under the GPL.
5 */
6
7#ifndef DM_SNAPSHOT_H
8#define DM_SNAPSHOT_H
9
10#include <linux/device-mapper.h>
11#include "dm-exception-store.h"
12#include "dm-bio-list.h"
13#include <linux/blkdev.h>
14#include <linux/workqueue.h>
15
16struct exception_table {
17 uint32_t hash_mask;
18 unsigned hash_shift;
19 struct list_head *table;
20};
21
22#define DM_TRACKED_CHUNK_HASH_SIZE 16
23#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
24 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
25
26struct dm_snapshot {
27 struct rw_semaphore lock;
28 struct dm_target *ti;
29
30 struct dm_dev *origin;
31 struct dm_dev *cow;
32
33 /* List of snapshots per Origin */
34 struct list_head list;
35
36 /* Size of data blocks saved - must be a power of 2 */
37 chunk_t chunk_size;
38 chunk_t chunk_mask;
39 chunk_t chunk_shift;
40
41 /* You can't use a snapshot if this is 0 (e.g. if full) */
42 int valid;
43
44 /* Origin writes don't trigger exceptions until this is set */
45 int active;
46
47 /* Used for display of table */
48 char type;
49
50 mempool_t *pending_pool;
51
52 atomic_t pending_exceptions_count;
53
54 struct exception_table pending;
55 struct exception_table complete;
56
57 /*
58 * pe_lock protects all pending_exception operations and access
59 * as well as the snapshot_bios list.
60 */
61 spinlock_t pe_lock;
62
63 /* The on disk metadata handler */
64 struct dm_exception_store store;
65
66 struct dm_kcopyd_client *kcopyd_client;
67
68 /* Queue of snapshot writes for ksnapd to flush */
69 struct bio_list queued_bios;
70 struct work_struct queued_bios_work;
71
72 /* Chunks with outstanding reads */
73 mempool_t *tracked_chunk_pool;
74 spinlock_t tracked_chunk_lock;
75 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
76};
77
78/*
79 * Return the number of sectors in the device.
80 */
81static inline sector_t get_dev_size(struct block_device *bdev)
82{
83 return bdev->bd_inode->i_size >> SECTOR_SHIFT;
84}
85
86static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
87{
88 return (sector & ~s->chunk_mask) >> s->chunk_shift;
89}
90
91static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
92{
93 return chunk << s->chunk_shift;
94}
95
96static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
97{
98 /*
99 * There is only ever one instance of a particular block
100 * device so we can compare pointers safely.
101 */
102 return lhs == rhs;
103}
104
105#endif
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2fd66c30f7f8..e8361b191b9b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start,
399} 399}
400 400
401/* 401/*
402 * This upgrades the mode on an already open dm_dev. Being 402 * This upgrades the mode on an already open dm_dev, being
403 * careful to leave things as they were if we fail to reopen the 403 * careful to leave things as they were if we fail to reopen the
404 * device. 404 * device and not to touch the existing bdev field in case
405 * it is accessed concurrently inside dm_table_any_congested().
405 */ 406 */
406static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, 407static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
407 struct mapped_device *md) 408 struct mapped_device *md)
408{ 409{
409 int r; 410 int r;
410 struct dm_dev_internal dd_copy; 411 struct dm_dev_internal dd_new, dd_old;
411 dev_t dev = dd->dm_dev.bdev->bd_dev;
412 412
413 dd_copy = *dd; 413 dd_new = dd_old = *dd;
414
415 dd_new.dm_dev.mode |= new_mode;
416 dd_new.dm_dev.bdev = NULL;
417
418 r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
419 if (r)
420 return r;
414 421
415 dd->dm_dev.mode |= new_mode; 422 dd->dm_dev.mode |= new_mode;
416 dd->dm_dev.bdev = NULL; 423 close_dev(&dd_old, md);
417 r = open_dev(dd, dev, md);
418 if (!r)
419 close_dev(&dd_copy, md);
420 else
421 *dd = dd_copy;
422 424
423 return r; 425 return 0;
424} 426}
425 427
426/* 428/*
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 7decf10006e4..04feccf2a997 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -14,45 +14,34 @@
14 14
15#define DM_MSG_PREFIX "target" 15#define DM_MSG_PREFIX "target"
16 16
17struct tt_internal {
18 struct target_type tt;
19
20 struct list_head list;
21 long use;
22};
23
24static LIST_HEAD(_targets); 17static LIST_HEAD(_targets);
25static DECLARE_RWSEM(_lock); 18static DECLARE_RWSEM(_lock);
26 19
27#define DM_MOD_NAME_SIZE 32 20#define DM_MOD_NAME_SIZE 32
28 21
29static inline struct tt_internal *__find_target_type(const char *name) 22static inline struct target_type *__find_target_type(const char *name)
30{ 23{
31 struct tt_internal *ti; 24 struct target_type *tt;
32 25
33 list_for_each_entry (ti, &_targets, list) 26 list_for_each_entry(tt, &_targets, list)
34 if (!strcmp(name, ti->tt.name)) 27 if (!strcmp(name, tt->name))
35 return ti; 28 return tt;
36 29
37 return NULL; 30 return NULL;
38} 31}
39 32
40static struct tt_internal *get_target_type(const char *name) 33static struct target_type *get_target_type(const char *name)
41{ 34{
42 struct tt_internal *ti; 35 struct target_type *tt;
43 36
44 down_read(&_lock); 37 down_read(&_lock);
45 38
46 ti = __find_target_type(name); 39 tt = __find_target_type(name);
47 if (ti) { 40 if (tt && !try_module_get(tt->module))
48 if ((ti->use == 0) && !try_module_get(ti->tt.module)) 41 tt = NULL;
49 ti = NULL;
50 else
51 ti->use++;
52 }
53 42
54 up_read(&_lock); 43 up_read(&_lock);
55 return ti; 44 return tt;
56} 45}
57 46
58static void load_module(const char *name) 47static void load_module(const char *name)
@@ -62,92 +51,59 @@ static void load_module(const char *name)
62 51
63struct target_type *dm_get_target_type(const char *name) 52struct target_type *dm_get_target_type(const char *name)
64{ 53{
65 struct tt_internal *ti = get_target_type(name); 54 struct target_type *tt = get_target_type(name);
66 55
67 if (!ti) { 56 if (!tt) {
68 load_module(name); 57 load_module(name);
69 ti = get_target_type(name); 58 tt = get_target_type(name);
70 } 59 }
71 60
72 return ti ? &ti->tt : NULL; 61 return tt;
73} 62}
74 63
75void dm_put_target_type(struct target_type *t) 64void dm_put_target_type(struct target_type *tt)
76{ 65{
77 struct tt_internal *ti = (struct tt_internal *) t;
78
79 down_read(&_lock); 66 down_read(&_lock);
80 if (--ti->use == 0) 67 module_put(tt->module);
81 module_put(ti->tt.module);
82
83 BUG_ON(ti->use < 0);
84 up_read(&_lock); 68 up_read(&_lock);
85
86 return;
87}
88
89static struct tt_internal *alloc_target(struct target_type *t)
90{
91 struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
92
93 if (ti)
94 ti->tt = *t;
95
96 return ti;
97} 69}
98 70
99
100int dm_target_iterate(void (*iter_func)(struct target_type *tt, 71int dm_target_iterate(void (*iter_func)(struct target_type *tt,
101 void *param), void *param) 72 void *param), void *param)
102{ 73{
103 struct tt_internal *ti; 74 struct target_type *tt;
104 75
105 down_read(&_lock); 76 down_read(&_lock);
106 list_for_each_entry (ti, &_targets, list) 77 list_for_each_entry(tt, &_targets, list)
107 iter_func(&ti->tt, param); 78 iter_func(tt, param);
108 up_read(&_lock); 79 up_read(&_lock);
109 80
110 return 0; 81 return 0;
111} 82}
112 83
113int dm_register_target(struct target_type *t) 84int dm_register_target(struct target_type *tt)
114{ 85{
115 int rv = 0; 86 int rv = 0;
116 struct tt_internal *ti = alloc_target(t);
117
118 if (!ti)
119 return -ENOMEM;
120 87
121 down_write(&_lock); 88 down_write(&_lock);
122 if (__find_target_type(t->name)) 89 if (__find_target_type(tt->name))
123 rv = -EEXIST; 90 rv = -EEXIST;
124 else 91 else
125 list_add(&ti->list, &_targets); 92 list_add(&tt->list, &_targets);
126 93
127 up_write(&_lock); 94 up_write(&_lock);
128 if (rv)
129 kfree(ti);
130 return rv; 95 return rv;
131} 96}
132 97
133void dm_unregister_target(struct target_type *t) 98void dm_unregister_target(struct target_type *tt)
134{ 99{
135 struct tt_internal *ti;
136
137 down_write(&_lock); 100 down_write(&_lock);
138 if (!(ti = __find_target_type(t->name))) { 101 if (!__find_target_type(tt->name)) {
139 DMCRIT("Unregistering unrecognised target: %s", t->name); 102 DMCRIT("Unregistering unrecognised target: %s", tt->name);
140 BUG();
141 }
142
143 if (ti->use) {
144 DMCRIT("Attempt to unregister target still in use: %s",
145 t->name);
146 BUG(); 103 BUG();
147 } 104 }
148 105
149 list_del(&ti->list); 106 list_del(&tt->list);
150 kfree(ti);
151 107
152 up_write(&_lock); 108 up_write(&_lock);
153} 109}
@@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t)
156 * io-err: always fails an io, useful for bringing 112 * io-err: always fails an io, useful for bringing
157 * up LVs that have holes in them. 113 * up LVs that have holes in them.
158 */ 114 */
159static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args) 115static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
160{ 116{
161 return 0; 117 return 0;
162} 118}
163 119
164static void io_err_dtr(struct dm_target *ti) 120static void io_err_dtr(struct dm_target *tt)
165{ 121{
166 /* empty */ 122 /* empty */
167} 123}
168 124
169static int io_err_map(struct dm_target *ti, struct bio *bio, 125static int io_err_map(struct dm_target *tt, struct bio *bio,
170 union map_info *map_context) 126 union map_info *map_context)
171{ 127{
172 return -EIO; 128 return -EIO;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8d40f27cce89..788ba96a6256 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio)
99/* 99/*
100 * Work processed by per-device workqueue. 100 * Work processed by per-device workqueue.
101 */ 101 */
102struct dm_wq_req {
103 enum {
104 DM_WQ_FLUSH_DEFERRED,
105 } type;
106 struct work_struct work;
107 struct mapped_device *md;
108 void *context;
109};
110
111struct mapped_device { 102struct mapped_device {
112 struct rw_semaphore io_lock; 103 struct rw_semaphore io_lock;
113 struct mutex suspend_lock; 104 struct mutex suspend_lock;
114 spinlock_t pushback_lock;
115 rwlock_t map_lock; 105 rwlock_t map_lock;
116 atomic_t holders; 106 atomic_t holders;
117 atomic_t open_count; 107 atomic_t open_count;
@@ -129,8 +119,9 @@ struct mapped_device {
129 */ 119 */
130 atomic_t pending; 120 atomic_t pending;
131 wait_queue_head_t wait; 121 wait_queue_head_t wait;
122 struct work_struct work;
132 struct bio_list deferred; 123 struct bio_list deferred;
133 struct bio_list pushback; 124 spinlock_t deferred_lock;
134 125
135 /* 126 /*
136 * Processing queue (flush/barriers) 127 * Processing queue (flush/barriers)
@@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
453 return 1; 444 return 1;
454 } 445 }
455 446
447 spin_lock_irq(&md->deferred_lock);
456 bio_list_add(&md->deferred, bio); 448 bio_list_add(&md->deferred, bio);
449 spin_unlock_irq(&md->deferred_lock);
457 450
458 up_write(&md->io_lock); 451 up_write(&md->io_lock);
459 return 0; /* deferred successfully */ 452 return 0; /* deferred successfully */
@@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error)
537 if (io->error == DM_ENDIO_REQUEUE) { 530 if (io->error == DM_ENDIO_REQUEUE) {
538 /* 531 /*
539 * Target requested pushing back the I/O. 532 * Target requested pushing back the I/O.
540 * This must be handled before the sleeper on
541 * suspend queue merges the pushback list.
542 */ 533 */
543 spin_lock_irqsave(&md->pushback_lock, flags); 534 spin_lock_irqsave(&md->deferred_lock, flags);
544 if (__noflush_suspending(md)) 535 if (__noflush_suspending(md))
545 bio_list_add(&md->pushback, io->bio); 536 bio_list_add(&md->deferred, io->bio);
546 else 537 else
547 /* noflush suspend was interrupted. */ 538 /* noflush suspend was interrupted. */
548 io->error = -EIO; 539 io->error = -EIO;
549 spin_unlock_irqrestore(&md->pushback_lock, flags); 540 spin_unlock_irqrestore(&md->deferred_lock, flags);
550 } 541 }
551 542
552 end_io_acct(io); 543 end_io_acct(io);
@@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci)
834} 825}
835 826
836/* 827/*
837 * Split the bio into several clones. 828 * Split the bio into several clones and submit it to targets.
838 */ 829 */
839static int __split_bio(struct mapped_device *md, struct bio *bio) 830static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
840{ 831{
841 struct clone_info ci; 832 struct clone_info ci;
842 int error = 0; 833 int error = 0;
843 834
844 ci.map = dm_get_table(md); 835 ci.map = dm_get_table(md);
845 if (unlikely(!ci.map)) 836 if (unlikely(!ci.map)) {
846 return -EIO; 837 bio_io_error(bio);
838 return;
839 }
847 if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { 840 if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
848 dm_table_put(ci.map); 841 dm_table_put(ci.map);
849 bio_endio(bio, -EOPNOTSUPP); 842 bio_endio(bio, -EOPNOTSUPP);
850 return 0; 843 return;
851 } 844 }
852 ci.md = md; 845 ci.md = md;
853 ci.bio = bio; 846 ci.bio = bio;
@@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
867 /* drop the extra reference count */ 860 /* drop the extra reference count */
868 dec_pending(ci.io, error); 861 dec_pending(ci.io, error);
869 dm_table_put(ci.map); 862 dm_table_put(ci.map);
870
871 return 0;
872} 863}
873/*----------------------------------------------------------------- 864/*-----------------------------------------------------------------
874 * CRUD END 865 * CRUD END
@@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
959 down_read(&md->io_lock); 950 down_read(&md->io_lock);
960 } 951 }
961 952
962 r = __split_bio(md, bio); 953 __split_and_process_bio(md, bio);
963 up_read(&md->io_lock); 954 up_read(&md->io_lock);
955 return 0;
964 956
965out_req: 957out_req:
966 if (r < 0) 958 if (r < 0)
@@ -1074,6 +1066,8 @@ out:
1074 1066
1075static struct block_device_operations dm_blk_dops; 1067static struct block_device_operations dm_blk_dops;
1076 1068
1069static void dm_wq_work(struct work_struct *work);
1070
1077/* 1071/*
1078 * Allocate and initialise a blank device with a given minor. 1072 * Allocate and initialise a blank device with a given minor.
1079 */ 1073 */
@@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor)
1101 1095
1102 init_rwsem(&md->io_lock); 1096 init_rwsem(&md->io_lock);
1103 mutex_init(&md->suspend_lock); 1097 mutex_init(&md->suspend_lock);
1104 spin_lock_init(&md->pushback_lock); 1098 spin_lock_init(&md->deferred_lock);
1105 rwlock_init(&md->map_lock); 1099 rwlock_init(&md->map_lock);
1106 atomic_set(&md->holders, 1); 1100 atomic_set(&md->holders, 1);
1107 atomic_set(&md->open_count, 0); 1101 atomic_set(&md->open_count, 0);
@@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor)
1118 md->queue->backing_dev_info.congested_fn = dm_any_congested; 1112 md->queue->backing_dev_info.congested_fn = dm_any_congested;
1119 md->queue->backing_dev_info.congested_data = md; 1113 md->queue->backing_dev_info.congested_data = md;
1120 blk_queue_make_request(md->queue, dm_request); 1114 blk_queue_make_request(md->queue, dm_request);
1115 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
1121 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1116 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1122 md->queue->unplug_fn = dm_unplug_all; 1117 md->queue->unplug_fn = dm_unplug_all;
1123 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1118 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
@@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
1140 1135
1141 atomic_set(&md->pending, 0); 1136 atomic_set(&md->pending, 0);
1142 init_waitqueue_head(&md->wait); 1137 init_waitqueue_head(&md->wait);
1138 INIT_WORK(&md->work, dm_wq_work);
1143 init_waitqueue_head(&md->eventq); 1139 init_waitqueue_head(&md->eventq);
1144 1140
1145 md->disk->major = _major; 1141 md->disk->major = _major;
@@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md)
1379} 1375}
1380EXPORT_SYMBOL_GPL(dm_put); 1376EXPORT_SYMBOL_GPL(dm_put);
1381 1377
1382static int dm_wait_for_completion(struct mapped_device *md) 1378static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1383{ 1379{
1384 int r = 0; 1380 int r = 0;
1381 DECLARE_WAITQUEUE(wait, current);
1382
1383 dm_unplug_all(md->queue);
1384
1385 add_wait_queue(&md->wait, &wait);
1385 1386
1386 while (1) { 1387 while (1) {
1387 set_current_state(TASK_INTERRUPTIBLE); 1388 set_current_state(interruptible);
1388 1389
1389 smp_mb(); 1390 smp_mb();
1390 if (!atomic_read(&md->pending)) 1391 if (!atomic_read(&md->pending))
1391 break; 1392 break;
1392 1393
1393 if (signal_pending(current)) { 1394 if (interruptible == TASK_INTERRUPTIBLE &&
1395 signal_pending(current)) {
1394 r = -EINTR; 1396 r = -EINTR;
1395 break; 1397 break;
1396 } 1398 }
@@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md)
1399 } 1401 }
1400 set_current_state(TASK_RUNNING); 1402 set_current_state(TASK_RUNNING);
1401 1403
1404 remove_wait_queue(&md->wait, &wait);
1405
1402 return r; 1406 return r;
1403} 1407}
1404 1408
1405/* 1409/*
1406 * Process the deferred bios 1410 * Process the deferred bios
1407 */ 1411 */
1408static void __flush_deferred_io(struct mapped_device *md) 1412static void dm_wq_work(struct work_struct *work)
1409{ 1413{
1414 struct mapped_device *md = container_of(work, struct mapped_device,
1415 work);
1410 struct bio *c; 1416 struct bio *c;
1411 1417
1412 while ((c = bio_list_pop(&md->deferred))) { 1418 down_write(&md->io_lock);
1413 if (__split_bio(md, c))
1414 bio_io_error(c);
1415 }
1416
1417 clear_bit(DMF_BLOCK_IO, &md->flags);
1418}
1419 1419
1420static void __merge_pushback_list(struct mapped_device *md) 1420next_bio:
1421{ 1421 spin_lock_irq(&md->deferred_lock);
1422 unsigned long flags; 1422 c = bio_list_pop(&md->deferred);
1423 spin_unlock_irq(&md->deferred_lock);
1423 1424
1424 spin_lock_irqsave(&md->pushback_lock, flags); 1425 if (c) {
1425 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 1426 __split_and_process_bio(md, c);
1426 bio_list_merge_head(&md->deferred, &md->pushback); 1427 goto next_bio;
1427 bio_list_init(&md->pushback); 1428 }
1428 spin_unlock_irqrestore(&md->pushback_lock, flags);
1429}
1430 1429
1431static void dm_wq_work(struct work_struct *work) 1430 clear_bit(DMF_BLOCK_IO, &md->flags);
1432{
1433 struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
1434 struct mapped_device *md = req->md;
1435 1431
1436 down_write(&md->io_lock);
1437 switch (req->type) {
1438 case DM_WQ_FLUSH_DEFERRED:
1439 __flush_deferred_io(md);
1440 break;
1441 default:
1442 DMERR("dm_wq_work: unrecognised work type %d", req->type);
1443 BUG();
1444 }
1445 up_write(&md->io_lock); 1432 up_write(&md->io_lock);
1446} 1433}
1447 1434
1448static void dm_wq_queue(struct mapped_device *md, int type, void *context, 1435static void dm_queue_flush(struct mapped_device *md)
1449 struct dm_wq_req *req)
1450{
1451 req->type = type;
1452 req->md = md;
1453 req->context = context;
1454 INIT_WORK(&req->work, dm_wq_work);
1455 queue_work(md->wq, &req->work);
1456}
1457
1458static void dm_queue_flush(struct mapped_device *md, int type, void *context)
1459{ 1436{
1460 struct dm_wq_req req; 1437 queue_work(md->wq, &md->work);
1461
1462 dm_wq_queue(md, type, context, &req);
1463 flush_workqueue(md->wq); 1438 flush_workqueue(md->wq);
1464} 1439}
1465 1440
@@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md)
1534int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 1509int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1535{ 1510{
1536 struct dm_table *map = NULL; 1511 struct dm_table *map = NULL;
1537 DECLARE_WAITQUEUE(wait, current);
1538 int r = 0; 1512 int r = 0;
1539 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; 1513 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
1540 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; 1514 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
@@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1584 down_write(&md->io_lock); 1558 down_write(&md->io_lock);
1585 set_bit(DMF_BLOCK_IO, &md->flags); 1559 set_bit(DMF_BLOCK_IO, &md->flags);
1586 1560
1587 add_wait_queue(&md->wait, &wait);
1588 up_write(&md->io_lock); 1561 up_write(&md->io_lock);
1589 1562
1590 /* unplug */
1591 if (map)
1592 dm_table_unplug_all(map);
1593
1594 /* 1563 /*
1595 * Wait for the already-mapped ios to complete. 1564 * Wait for the already-mapped ios to complete.
1596 */ 1565 */
1597 r = dm_wait_for_completion(md); 1566 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
1598 1567
1599 down_write(&md->io_lock); 1568 down_write(&md->io_lock);
1600 remove_wait_queue(&md->wait, &wait);
1601 1569
1602 if (noflush) 1570 if (noflush)
1603 __merge_pushback_list(md); 1571 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
1604 up_write(&md->io_lock); 1572 up_write(&md->io_lock);
1605 1573
1606 /* were we interrupted ? */ 1574 /* were we interrupted ? */
1607 if (r < 0) { 1575 if (r < 0) {
1608 dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); 1576 dm_queue_flush(md);
1609 1577
1610 unlock_fs(md); 1578 unlock_fs(md);
1611 goto out; /* pushback list is already flushed, so skip flush */ 1579 goto out; /* pushback list is already flushed, so skip flush */
@@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md)
1645 if (r) 1613 if (r)
1646 goto out; 1614 goto out;
1647 1615
1648 dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); 1616 dm_queue_flush(md);
1649 1617
1650 unlock_fs(md); 1618 unlock_fs(md);
1651 1619
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 20194e000c5a..b48397c0abbd 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t);
60int dm_target_init(void); 60int dm_target_init(void);
61void dm_target_exit(void); 61void dm_target_exit(void);
62struct target_type *dm_get_target_type(const char *name); 62struct target_type *dm_get_target_type(const char *name);
63void dm_put_target_type(struct target_type *t); 63void dm_put_target_type(struct target_type *tt);
64int dm_target_iterate(void (*iter_func)(struct target_type *tt, 64int dm_target_iterate(void (*iter_func)(struct target_type *tt,
65 void *param), void *param); 65 void *param), void *param);
66 66
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 86d9adf90e79..8695809b24b0 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -62,7 +62,10 @@
62#define ModeShift 5 62#define ModeShift 5
63 63
64#define MaxFault 50 64#define MaxFault 50
65#include <linux/raid/md.h> 65#include <linux/blkdev.h>
66#include <linux/raid/md_u.h>
67#include "md.h"
68#include <linux/seq_file.h>
66 69
67 70
68static void faulty_fail(struct bio *bio, int error) 71static void faulty_fail(struct bio *bio, int error)
@@ -280,6 +283,17 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
280 return 0; 283 return 0;
281} 284}
282 285
286static sector_t faulty_size(mddev_t *mddev, sector_t sectors, int raid_disks)
287{
288 WARN_ONCE(raid_disks,
289 "%s does not support generic reshape\n", __func__);
290
291 if (sectors == 0)
292 return mddev->dev_sectors;
293
294 return sectors;
295}
296
283static int run(mddev_t *mddev) 297static int run(mddev_t *mddev)
284{ 298{
285 mdk_rdev_t *rdev; 299 mdk_rdev_t *rdev;
@@ -298,7 +312,7 @@ static int run(mddev_t *mddev)
298 list_for_each_entry(rdev, &mddev->disks, same_set) 312 list_for_each_entry(rdev, &mddev->disks, same_set)
299 conf->rdev = rdev; 313 conf->rdev = rdev;
300 314
301 mddev->array_sectors = mddev->size * 2; 315 md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
302 mddev->private = conf; 316 mddev->private = conf;
303 317
304 reconfig(mddev, mddev->layout, -1); 318 reconfig(mddev, mddev->layout, -1);
@@ -325,6 +339,7 @@ static struct mdk_personality faulty_personality =
325 .stop = stop, 339 .stop = stop,
326 .status = status, 340 .status = status,
327 .reconfig = reconfig, 341 .reconfig = reconfig,
342 .size = faulty_size,
328}; 343};
329 344
330static int __init raid_init(void) 345static int __init raid_init(void)
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 09658b218474..7a36e38393a1 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -16,7 +16,11 @@
16 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 16 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17*/ 17*/
18 18
19#include <linux/raid/linear.h> 19#include <linux/blkdev.h>
20#include <linux/raid/md_u.h>
21#include <linux/seq_file.h>
22#include "md.h"
23#include "linear.h"
20 24
21/* 25/*
22 * find which device holds a particular offset 26 * find which device holds a particular offset
@@ -97,6 +101,16 @@ static int linear_congested(void *data, int bits)
97 return ret; 101 return ret;
98} 102}
99 103
104static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
105{
106 linear_conf_t *conf = mddev_to_conf(mddev);
107
108 WARN_ONCE(sectors || raid_disks,
109 "%s does not support generic reshape\n", __func__);
110
111 return conf->array_sectors;
112}
113
100static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) 114static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
101{ 115{
102 linear_conf_t *conf; 116 linear_conf_t *conf;
@@ -135,8 +149,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
135 mddev->queue->max_sectors > (PAGE_SIZE>>9)) 149 mddev->queue->max_sectors > (PAGE_SIZE>>9))
136 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); 150 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
137 151
138 disk->num_sectors = rdev->size * 2; 152 disk->num_sectors = rdev->sectors;
139 conf->array_sectors += rdev->size * 2; 153 conf->array_sectors += rdev->sectors;
140 154
141 cnt++; 155 cnt++;
142 } 156 }
@@ -249,7 +263,7 @@ static int linear_run (mddev_t *mddev)
249 if (!conf) 263 if (!conf)
250 return 1; 264 return 1;
251 mddev->private = conf; 265 mddev->private = conf;
252 mddev->array_sectors = conf->array_sectors; 266 md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
253 267
254 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); 268 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
255 mddev->queue->unplug_fn = linear_unplug; 269 mddev->queue->unplug_fn = linear_unplug;
@@ -283,7 +297,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
283 newconf->prev = mddev_to_conf(mddev); 297 newconf->prev = mddev_to_conf(mddev);
284 mddev->private = newconf; 298 mddev->private = newconf;
285 mddev->raid_disks++; 299 mddev->raid_disks++;
286 mddev->array_sectors = newconf->array_sectors; 300 md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
287 set_capacity(mddev->gendisk, mddev->array_sectors); 301 set_capacity(mddev->gendisk, mddev->array_sectors);
288 return 0; 302 return 0;
289} 303}
@@ -381,6 +395,7 @@ static struct mdk_personality linear_personality =
381 .stop = linear_stop, 395 .stop = linear_stop,
382 .status = linear_status, 396 .status = linear_status,
383 .hot_add_disk = linear_add, 397 .hot_add_disk = linear_add,
398 .size = linear_size,
384}; 399};
385 400
386static int __init linear_init (void) 401static int __init linear_init (void)
diff --git a/include/linux/raid/linear.h b/drivers/md/linear.h
index f38b9c586afb..bf8179587f95 100644
--- a/include/linux/raid/linear.h
+++ b/drivers/md/linear.h
@@ -1,8 +1,6 @@
1#ifndef _LINEAR_H 1#ifndef _LINEAR_H
2#define _LINEAR_H 2#define _LINEAR_H
3 3
4#include <linux/raid/md.h>
5
6struct dev_info { 4struct dev_info {
7 mdk_rdev_t *rdev; 5 mdk_rdev_t *rdev;
8 sector_t num_sectors; 6 sector_t num_sectors;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a307f87eb90e..ed5727c089a9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -33,9 +33,9 @@
33*/ 33*/
34 34
35#include <linux/kthread.h> 35#include <linux/kthread.h>
36#include <linux/raid/md.h> 36#include <linux/blkdev.h>
37#include <linux/raid/bitmap.h>
38#include <linux/sysctl.h> 37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/buffer_head.h> /* for invalidate_bdev */ 39#include <linux/buffer_head.h> /* for invalidate_bdev */
40#include <linux/poll.h> 40#include <linux/poll.h>
41#include <linux/ctype.h> 41#include <linux/ctype.h>
@@ -45,11 +45,10 @@
45#include <linux/reboot.h> 45#include <linux/reboot.h>
46#include <linux/file.h> 46#include <linux/file.h>
47#include <linux/delay.h> 47#include <linux/delay.h>
48 48#include <linux/raid/md_p.h>
49#define MAJOR_NR MD_MAJOR 49#include <linux/raid/md_u.h>
50 50#include "md.h"
51/* 63 partitions with the alternate major number (mdp) */ 51#include "bitmap.h"
52#define MdpMinorShift 6
53 52
54#define DEBUG 0 53#define DEBUG 0
55#define dprintk(x...) ((void)(DEBUG && printk(x))) 54#define dprintk(x...) ((void)(DEBUG && printk(x)))
@@ -202,12 +201,68 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
202 ) 201 )
203 202
204 203
205static int md_fail_request(struct request_queue *q, struct bio *bio) 204/* Rather than calling directly into the personality make_request function,
205 * IO requests come here first so that we can check if the device is
206 * being suspended pending a reconfiguration.
207 * We hold a refcount over the call to ->make_request. By the time that
208 * call has finished, the bio has been linked into some internal structure
209 * and so is visible to ->quiesce(), so we don't need the refcount any more.
210 */
211static int md_make_request(struct request_queue *q, struct bio *bio)
206{ 212{
207 bio_io_error(bio); 213 mddev_t *mddev = q->queuedata;
208 return 0; 214 int rv;
215 if (mddev == NULL || mddev->pers == NULL) {
216 bio_io_error(bio);
217 return 0;
218 }
219 rcu_read_lock();
220 if (mddev->suspended) {
221 DEFINE_WAIT(__wait);
222 for (;;) {
223 prepare_to_wait(&mddev->sb_wait, &__wait,
224 TASK_UNINTERRUPTIBLE);
225 if (!mddev->suspended)
226 break;
227 rcu_read_unlock();
228 schedule();
229 rcu_read_lock();
230 }
231 finish_wait(&mddev->sb_wait, &__wait);
232 }
233 atomic_inc(&mddev->active_io);
234 rcu_read_unlock();
235 rv = mddev->pers->make_request(q, bio);
236 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
237 wake_up(&mddev->sb_wait);
238
239 return rv;
240}
241
242static void mddev_suspend(mddev_t *mddev)
243{
244 BUG_ON(mddev->suspended);
245 mddev->suspended = 1;
246 synchronize_rcu();
247 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
248 mddev->pers->quiesce(mddev, 1);
249 md_unregister_thread(mddev->thread);
250 mddev->thread = NULL;
251 /* we now know that no code is executing in the personality module,
252 * except possibly the tail end of a ->bi_end_io function, but that
253 * is certain to complete before the module has a chance to get
254 * unloaded
255 */
256}
257
258static void mddev_resume(mddev_t *mddev)
259{
260 mddev->suspended = 0;
261 wake_up(&mddev->sb_wait);
262 mddev->pers->quiesce(mddev, 0);
209} 263}
210 264
265
211static inline mddev_t *mddev_get(mddev_t *mddev) 266static inline mddev_t *mddev_get(mddev_t *mddev)
212{ 267{
213 atomic_inc(&mddev->active); 268 atomic_inc(&mddev->active);
@@ -310,6 +365,7 @@ static mddev_t * mddev_find(dev_t unit)
310 init_timer(&new->safemode_timer); 365 init_timer(&new->safemode_timer);
311 atomic_set(&new->active, 1); 366 atomic_set(&new->active, 1);
312 atomic_set(&new->openers, 0); 367 atomic_set(&new->openers, 0);
368 atomic_set(&new->active_io, 0);
313 spin_lock_init(&new->write_lock); 369 spin_lock_init(&new->write_lock);
314 init_waitqueue_head(&new->sb_wait); 370 init_waitqueue_head(&new->sb_wait);
315 init_waitqueue_head(&new->recovery_wait); 371 init_waitqueue_head(&new->recovery_wait);
@@ -326,6 +382,11 @@ static inline int mddev_lock(mddev_t * mddev)
326 return mutex_lock_interruptible(&mddev->reconfig_mutex); 382 return mutex_lock_interruptible(&mddev->reconfig_mutex);
327} 383}
328 384
385static inline int mddev_is_locked(mddev_t *mddev)
386{
387 return mutex_is_locked(&mddev->reconfig_mutex);
388}
389
329static inline int mddev_trylock(mddev_t * mddev) 390static inline int mddev_trylock(mddev_t * mddev)
330{ 391{
331 return mutex_trylock(&mddev->reconfig_mutex); 392 return mutex_trylock(&mddev->reconfig_mutex);
@@ -409,7 +470,7 @@ static void free_disk_sb(mdk_rdev_t * rdev)
409 rdev->sb_loaded = 0; 470 rdev->sb_loaded = 0;
410 rdev->sb_page = NULL; 471 rdev->sb_page = NULL;
411 rdev->sb_start = 0; 472 rdev->sb_start = 0;
412 rdev->size = 0; 473 rdev->sectors = 0;
413 } 474 }
414} 475}
415 476
@@ -775,9 +836,9 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
775 else 836 else
776 ret = 0; 837 ret = 0;
777 } 838 }
778 rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2; 839 rdev->sectors = calc_num_sectors(rdev, sb->chunk_size);
779 840
780 if (rdev->size < sb->size && sb->level > 1) 841 if (rdev->sectors < sb->size * 2 && sb->level > 1)
781 /* "this cannot possibly happen" ... */ 842 /* "this cannot possibly happen" ... */
782 ret = -EINVAL; 843 ret = -EINVAL;
783 844
@@ -812,7 +873,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
812 mddev->clevel[0] = 0; 873 mddev->clevel[0] = 0;
813 mddev->layout = sb->layout; 874 mddev->layout = sb->layout;
814 mddev->raid_disks = sb->raid_disks; 875 mddev->raid_disks = sb->raid_disks;
815 mddev->size = sb->size; 876 mddev->dev_sectors = sb->size * 2;
816 mddev->events = ev1; 877 mddev->events = ev1;
817 mddev->bitmap_offset = 0; 878 mddev->bitmap_offset = 0;
818 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 879 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
@@ -926,7 +987,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
926 987
927 sb->ctime = mddev->ctime; 988 sb->ctime = mddev->ctime;
928 sb->level = mddev->level; 989 sb->level = mddev->level;
929 sb->size = mddev->size; 990 sb->size = mddev->dev_sectors / 2;
930 sb->raid_disks = mddev->raid_disks; 991 sb->raid_disks = mddev->raid_disks;
931 sb->md_minor = mddev->md_minor; 992 sb->md_minor = mddev->md_minor;
932 sb->not_persistent = 0; 993 sb->not_persistent = 0;
@@ -1024,7 +1085,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1024static unsigned long long 1085static unsigned long long
1025super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) 1086super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1026{ 1087{
1027 if (num_sectors && num_sectors < rdev->mddev->size * 2) 1088 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1028 return 0; /* component must fit device */ 1089 return 0; /* component must fit device */
1029 if (rdev->mddev->bitmap_offset) 1090 if (rdev->mddev->bitmap_offset)
1030 return 0; /* can't move bitmap */ 1091 return 0; /* can't move bitmap */
@@ -1180,16 +1241,17 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1180 ret = 0; 1241 ret = 0;
1181 } 1242 }
1182 if (minor_version) 1243 if (minor_version)
1183 rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; 1244 rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) -
1245 le64_to_cpu(sb->data_offset);
1184 else 1246 else
1185 rdev->size = rdev->sb_start / 2; 1247 rdev->sectors = rdev->sb_start;
1186 if (rdev->size < le64_to_cpu(sb->data_size)/2) 1248 if (rdev->sectors < le64_to_cpu(sb->data_size))
1187 return -EINVAL; 1249 return -EINVAL;
1188 rdev->size = le64_to_cpu(sb->data_size)/2; 1250 rdev->sectors = le64_to_cpu(sb->data_size);
1189 if (le32_to_cpu(sb->chunksize)) 1251 if (le32_to_cpu(sb->chunksize))
1190 rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1); 1252 rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1);
1191 1253
1192 if (le64_to_cpu(sb->size) > rdev->size*2) 1254 if (le64_to_cpu(sb->size) > rdev->sectors)
1193 return -EINVAL; 1255 return -EINVAL;
1194 return ret; 1256 return ret;
1195} 1257}
@@ -1216,7 +1278,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1216 mddev->clevel[0] = 0; 1278 mddev->clevel[0] = 0;
1217 mddev->layout = le32_to_cpu(sb->layout); 1279 mddev->layout = le32_to_cpu(sb->layout);
1218 mddev->raid_disks = le32_to_cpu(sb->raid_disks); 1280 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1219 mddev->size = le64_to_cpu(sb->size)/2; 1281 mddev->dev_sectors = le64_to_cpu(sb->size);
1220 mddev->events = ev1; 1282 mddev->events = ev1;
1221 mddev->bitmap_offset = 0; 1283 mddev->bitmap_offset = 0;
1222 mddev->default_bitmap_offset = 1024 >> 9; 1284 mddev->default_bitmap_offset = 1024 >> 9;
@@ -1312,7 +1374,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1312 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors)); 1374 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1313 1375
1314 sb->raid_disks = cpu_to_le32(mddev->raid_disks); 1376 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1315 sb->size = cpu_to_le64(mddev->size<<1); 1377 sb->size = cpu_to_le64(mddev->dev_sectors);
1316 1378
1317 if (mddev->bitmap && mddev->bitmap_file == NULL) { 1379 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1318 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1380 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
@@ -1320,10 +1382,15 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1320 } 1382 }
1321 1383
1322 if (rdev->raid_disk >= 0 && 1384 if (rdev->raid_disk >= 0 &&
1323 !test_bit(In_sync, &rdev->flags) && 1385 !test_bit(In_sync, &rdev->flags)) {
1324 rdev->recovery_offset > 0) { 1386 if (mddev->curr_resync_completed > rdev->recovery_offset)
1325 sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); 1387 rdev->recovery_offset = mddev->curr_resync_completed;
1326 sb->recovery_offset = cpu_to_le64(rdev->recovery_offset); 1388 if (rdev->recovery_offset > 0) {
1389 sb->feature_map |=
1390 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1391 sb->recovery_offset =
1392 cpu_to_le64(rdev->recovery_offset);
1393 }
1327 } 1394 }
1328 1395
1329 if (mddev->reshape_position != MaxSector) { 1396 if (mddev->reshape_position != MaxSector) {
@@ -1365,7 +1432,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1365{ 1432{
1366 struct mdp_superblock_1 *sb; 1433 struct mdp_superblock_1 *sb;
1367 sector_t max_sectors; 1434 sector_t max_sectors;
1368 if (num_sectors && num_sectors < rdev->mddev->size * 2) 1435 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1369 return 0; /* component must fit device */ 1436 return 0; /* component must fit device */
1370 if (rdev->sb_start < rdev->data_offset) { 1437 if (rdev->sb_start < rdev->data_offset) {
1371 /* minor versions 1 and 2; superblock before data */ 1438 /* minor versions 1 and 2; superblock before data */
@@ -1381,7 +1448,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1381 sector_t sb_start; 1448 sector_t sb_start;
1382 sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; 1449 sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2;
1383 sb_start &= ~(sector_t)(4*2 - 1); 1450 sb_start &= ~(sector_t)(4*2 - 1);
1384 max_sectors = rdev->size * 2 + sb_start - rdev->sb_start; 1451 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1385 if (!num_sectors || num_sectors > max_sectors) 1452 if (!num_sectors || num_sectors > max_sectors)
1386 num_sectors = max_sectors; 1453 num_sectors = max_sectors;
1387 rdev->sb_start = sb_start; 1454 rdev->sb_start = sb_start;
@@ -1433,6 +1500,38 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1433 1500
1434static LIST_HEAD(pending_raid_disks); 1501static LIST_HEAD(pending_raid_disks);
1435 1502
1503static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev)
1504{
1505 struct mdk_personality *pers = mddev->pers;
1506 struct gendisk *disk = mddev->gendisk;
1507 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
1508 struct blk_integrity *bi_mddev = blk_get_integrity(disk);
1509
1510 /* Data integrity passthrough not supported on RAID 4, 5 and 6 */
1511 if (pers && pers->level >= 4 && pers->level <= 6)
1512 return;
1513
1514 /* If rdev is integrity capable, register profile for mddev */
1515 if (!bi_mddev && bi_rdev) {
1516 if (blk_integrity_register(disk, bi_rdev))
1517 printk(KERN_ERR "%s: %s Could not register integrity!\n",
1518 __func__, disk->disk_name);
1519 else
1520 printk(KERN_NOTICE "Enabling data integrity on %s\n",
1521 disk->disk_name);
1522 return;
1523 }
1524
1525 /* Check that mddev and rdev have matching profiles */
1526 if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) {
1527 printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__,
1528 disk->disk_name, rdev->bdev->bd_disk->disk_name);
1529 printk(KERN_NOTICE "Disabling data integrity on %s\n",
1530 disk->disk_name);
1531 blk_integrity_unregister(disk);
1532 }
1533}
1534
1436static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) 1535static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1437{ 1536{
1438 char b[BDEVNAME_SIZE]; 1537 char b[BDEVNAME_SIZE];
@@ -1449,8 +1548,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1449 if (find_rdev(mddev, rdev->bdev->bd_dev)) 1548 if (find_rdev(mddev, rdev->bdev->bd_dev))
1450 return -EEXIST; 1549 return -EEXIST;
1451 1550
1452 /* make sure rdev->size exceeds mddev->size */ 1551 /* make sure rdev->sectors exceeds mddev->dev_sectors */
1453 if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) { 1552 if (rdev->sectors && (mddev->dev_sectors == 0 ||
1553 rdev->sectors < mddev->dev_sectors)) {
1454 if (mddev->pers) { 1554 if (mddev->pers) {
1455 /* Cannot change size, so fail 1555 /* Cannot change size, so fail
1456 * If mddev->level <= 0, then we don't care 1556 * If mddev->level <= 0, then we don't care
@@ -1459,7 +1559,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1459 if (mddev->level > 0) 1559 if (mddev->level > 0)
1460 return -ENOSPC; 1560 return -ENOSPC;
1461 } else 1561 } else
1462 mddev->size = rdev->size; 1562 mddev->dev_sectors = rdev->sectors;
1463 } 1563 }
1464 1564
1465 /* Verify rdev->desc_nr is unique. 1565 /* Verify rdev->desc_nr is unique.
@@ -1503,6 +1603,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1503 1603
1504 /* May as well allow recovery to be retried once */ 1604 /* May as well allow recovery to be retried once */
1505 mddev->recovery_disabled = 0; 1605 mddev->recovery_disabled = 0;
1606
1607 md_integrity_check(rdev, mddev);
1506 return 0; 1608 return 0;
1507 1609
1508 fail: 1610 fail:
@@ -1713,8 +1815,8 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1713static void print_rdev(mdk_rdev_t *rdev, int major_version) 1815static void print_rdev(mdk_rdev_t *rdev, int major_version)
1714{ 1816{
1715 char b[BDEVNAME_SIZE]; 1817 char b[BDEVNAME_SIZE];
1716 printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", 1818 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
1717 bdevname(rdev->bdev,b), (unsigned long long)rdev->size, 1819 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
1718 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), 1820 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
1719 rdev->desc_nr); 1821 rdev->desc_nr);
1720 if (rdev->sb_loaded) { 1822 if (rdev->sb_loaded) {
@@ -2153,7 +2255,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2153 return -EINVAL; 2255 return -EINVAL;
2154 if (rdev->mddev->pers && rdev->raid_disk >= 0) 2256 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2155 return -EBUSY; 2257 return -EBUSY;
2156 if (rdev->size && rdev->mddev->external) 2258 if (rdev->sectors && rdev->mddev->external)
2157 /* Must set offset before size, so overlap checks 2259 /* Must set offset before size, so overlap checks
2158 * can be sane */ 2260 * can be sane */
2159 return -EBUSY; 2261 return -EBUSY;
@@ -2167,7 +2269,7 @@ __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2167static ssize_t 2269static ssize_t
2168rdev_size_show(mdk_rdev_t *rdev, char *page) 2270rdev_size_show(mdk_rdev_t *rdev, char *page)
2169{ 2271{
2170 return sprintf(page, "%llu\n", (unsigned long long)rdev->size); 2272 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2171} 2273}
2172 2274
2173static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) 2275static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
@@ -2180,34 +2282,52 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2180 return 1; 2282 return 1;
2181} 2283}
2182 2284
2285static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2286{
2287 unsigned long long blocks;
2288 sector_t new;
2289
2290 if (strict_strtoull(buf, 10, &blocks) < 0)
2291 return -EINVAL;
2292
2293 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2294 return -EINVAL; /* sector conversion overflow */
2295
2296 new = blocks * 2;
2297 if (new != blocks * 2)
2298 return -EINVAL; /* unsigned long long to sector_t overflow */
2299
2300 *sectors = new;
2301 return 0;
2302}
2303
2183static ssize_t 2304static ssize_t
2184rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) 2305rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2185{ 2306{
2186 unsigned long long size;
2187 unsigned long long oldsize = rdev->size;
2188 mddev_t *my_mddev = rdev->mddev; 2307 mddev_t *my_mddev = rdev->mddev;
2308 sector_t oldsectors = rdev->sectors;
2309 sector_t sectors;
2189 2310
2190 if (strict_strtoull(buf, 10, &size) < 0) 2311 if (strict_blocks_to_sectors(buf, &sectors) < 0)
2191 return -EINVAL; 2312 return -EINVAL;
2192 if (my_mddev->pers && rdev->raid_disk >= 0) { 2313 if (my_mddev->pers && rdev->raid_disk >= 0) {
2193 if (my_mddev->persistent) { 2314 if (my_mddev->persistent) {
2194 size = super_types[my_mddev->major_version]. 2315 sectors = super_types[my_mddev->major_version].
2195 rdev_size_change(rdev, size * 2); 2316 rdev_size_change(rdev, sectors);
2196 if (!size) 2317 if (!sectors)
2197 return -EBUSY; 2318 return -EBUSY;
2198 } else if (!size) { 2319 } else if (!sectors)
2199 size = (rdev->bdev->bd_inode->i_size >> 10); 2320 sectors = (rdev->bdev->bd_inode->i_size >> 9) -
2200 size -= rdev->data_offset/2; 2321 rdev->data_offset;
2201 }
2202 } 2322 }
2203 if (size < my_mddev->size) 2323 if (sectors < my_mddev->dev_sectors)
2204 return -EINVAL; /* component must fit device */ 2324 return -EINVAL; /* component must fit device */
2205 2325
2206 rdev->size = size; 2326 rdev->sectors = sectors;
2207 if (size > oldsize && my_mddev->external) { 2327 if (sectors > oldsectors && my_mddev->external) {
2208 /* need to check that all other rdevs with the same ->bdev 2328 /* need to check that all other rdevs with the same ->bdev
2209 * do not overlap. We need to unlock the mddev to avoid 2329 * do not overlap. We need to unlock the mddev to avoid
2210 * a deadlock. We have already changed rdev->size, and if 2330 * a deadlock. We have already changed rdev->sectors, and if
2211 * we have to change it back, we will have the lock again. 2331 * we have to change it back, we will have the lock again.
2212 */ 2332 */
2213 mddev_t *mddev; 2333 mddev_t *mddev;
@@ -2223,9 +2343,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2223 if (test_bit(AllReserved, &rdev2->flags) || 2343 if (test_bit(AllReserved, &rdev2->flags) ||
2224 (rdev->bdev == rdev2->bdev && 2344 (rdev->bdev == rdev2->bdev &&
2225 rdev != rdev2 && 2345 rdev != rdev2 &&
2226 overlaps(rdev->data_offset, rdev->size * 2, 2346 overlaps(rdev->data_offset, rdev->sectors,
2227 rdev2->data_offset, 2347 rdev2->data_offset,
2228 rdev2->size * 2))) { 2348 rdev2->sectors))) {
2229 overlap = 1; 2349 overlap = 1;
2230 break; 2350 break;
2231 } 2351 }
@@ -2239,11 +2359,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2239 if (overlap) { 2359 if (overlap) {
2240 /* Someone else could have slipped in a size 2360 /* Someone else could have slipped in a size
2241 * change here, but doing so is just silly. 2361 * change here, but doing so is just silly.
2242 * We put oldsize back because we *know* it is 2362 * We put oldsectors back because we *know* it is
2243 * safe, and trust userspace not to race with 2363 * safe, and trust userspace not to race with
2244 * itself 2364 * itself
2245 */ 2365 */
2246 rdev->size = oldsize; 2366 rdev->sectors = oldsectors;
2247 return -EBUSY; 2367 return -EBUSY;
2248 } 2368 }
2249 } 2369 }
@@ -2547,18 +2667,101 @@ level_show(mddev_t *mddev, char *page)
2547static ssize_t 2667static ssize_t
2548level_store(mddev_t *mddev, const char *buf, size_t len) 2668level_store(mddev_t *mddev, const char *buf, size_t len)
2549{ 2669{
2670 char level[16];
2550 ssize_t rv = len; 2671 ssize_t rv = len;
2551 if (mddev->pers) 2672 struct mdk_personality *pers;
2673 void *priv;
2674
2675 if (mddev->pers == NULL) {
2676 if (len == 0)
2677 return 0;
2678 if (len >= sizeof(mddev->clevel))
2679 return -ENOSPC;
2680 strncpy(mddev->clevel, buf, len);
2681 if (mddev->clevel[len-1] == '\n')
2682 len--;
2683 mddev->clevel[len] = 0;
2684 mddev->level = LEVEL_NONE;
2685 return rv;
2686 }
2687
2688 /* request to change the personality. Need to ensure:
2689 * - array is not engaged in resync/recovery/reshape
2690 * - old personality can be suspended
2691 * - new personality will access other array.
2692 */
2693
2694 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
2552 return -EBUSY; 2695 return -EBUSY;
2553 if (len == 0) 2696
2554 return 0; 2697 if (!mddev->pers->quiesce) {
2555 if (len >= sizeof(mddev->clevel)) 2698 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
2556 return -ENOSPC; 2699 mdname(mddev), mddev->pers->name);
2557 strncpy(mddev->clevel, buf, len); 2700 return -EINVAL;
2558 if (mddev->clevel[len-1] == '\n') 2701 }
2702
2703 /* Now find the new personality */
2704 if (len == 0 || len >= sizeof(level))
2705 return -EINVAL;
2706 strncpy(level, buf, len);
2707 if (level[len-1] == '\n')
2559 len--; 2708 len--;
2560 mddev->clevel[len] = 0; 2709 level[len] = 0;
2561 mddev->level = LEVEL_NONE; 2710
2711 request_module("md-%s", level);
2712 spin_lock(&pers_lock);
2713 pers = find_pers(LEVEL_NONE, level);
2714 if (!pers || !try_module_get(pers->owner)) {
2715 spin_unlock(&pers_lock);
2716 printk(KERN_WARNING "md: personality %s not loaded\n", level);
2717 return -EINVAL;
2718 }
2719 spin_unlock(&pers_lock);
2720
2721 if (pers == mddev->pers) {
2722 /* Nothing to do! */
2723 module_put(pers->owner);
2724 return rv;
2725 }
2726 if (!pers->takeover) {
2727 module_put(pers->owner);
2728 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
2729 mdname(mddev), level);
2730 return -EINVAL;
2731 }
2732
2733 /* ->takeover must set new_* and/or delta_disks
2734 * if it succeeds, and may set them when it fails.
2735 */
2736 priv = pers->takeover(mddev);
2737 if (IS_ERR(priv)) {
2738 mddev->new_level = mddev->level;
2739 mddev->new_layout = mddev->layout;
2740 mddev->new_chunk = mddev->chunk_size;
2741 mddev->raid_disks -= mddev->delta_disks;
2742 mddev->delta_disks = 0;
2743 module_put(pers->owner);
2744 printk(KERN_WARNING "md: %s: %s would not accept array\n",
2745 mdname(mddev), level);
2746 return PTR_ERR(priv);
2747 }
2748
2749 /* Looks like we have a winner */
2750 mddev_suspend(mddev);
2751 mddev->pers->stop(mddev);
2752 module_put(mddev->pers->owner);
2753 mddev->pers = pers;
2754 mddev->private = priv;
2755 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
2756 mddev->level = mddev->new_level;
2757 mddev->layout = mddev->new_layout;
2758 mddev->chunk_size = mddev->new_chunk;
2759 mddev->delta_disks = 0;
2760 pers->run(mddev);
2761 mddev_resume(mddev);
2762 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2763 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2764 md_wakeup_thread(mddev->thread);
2562 return rv; 2765 return rv;
2563} 2766}
2564 2767
@@ -2586,12 +2789,18 @@ layout_store(mddev_t *mddev, const char *buf, size_t len)
2586 if (!*buf || (*e && *e != '\n')) 2789 if (!*buf || (*e && *e != '\n'))
2587 return -EINVAL; 2790 return -EINVAL;
2588 2791
2589 if (mddev->pers) 2792 if (mddev->pers) {
2590 return -EBUSY; 2793 int err;
2591 if (mddev->reshape_position != MaxSector) 2794 if (mddev->pers->reconfig == NULL)
2795 return -EBUSY;
2796 err = mddev->pers->reconfig(mddev, n, -1);
2797 if (err)
2798 return err;
2799 } else {
2592 mddev->new_layout = n; 2800 mddev->new_layout = n;
2593 else 2801 if (mddev->reshape_position == MaxSector)
2594 mddev->layout = n; 2802 mddev->layout = n;
2803 }
2595 return len; 2804 return len;
2596} 2805}
2597static struct md_sysfs_entry md_layout = 2806static struct md_sysfs_entry md_layout =
@@ -2648,19 +2857,24 @@ chunk_size_show(mddev_t *mddev, char *page)
2648static ssize_t 2857static ssize_t
2649chunk_size_store(mddev_t *mddev, const char *buf, size_t len) 2858chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
2650{ 2859{
2651 /* can only set chunk_size if array is not yet active */
2652 char *e; 2860 char *e;
2653 unsigned long n = simple_strtoul(buf, &e, 10); 2861 unsigned long n = simple_strtoul(buf, &e, 10);
2654 2862
2655 if (!*buf || (*e && *e != '\n')) 2863 if (!*buf || (*e && *e != '\n'))
2656 return -EINVAL; 2864 return -EINVAL;
2657 2865
2658 if (mddev->pers) 2866 if (mddev->pers) {
2659 return -EBUSY; 2867 int err;
2660 else if (mddev->reshape_position != MaxSector) 2868 if (mddev->pers->reconfig == NULL)
2869 return -EBUSY;
2870 err = mddev->pers->reconfig(mddev, -1, n);
2871 if (err)
2872 return err;
2873 } else {
2661 mddev->new_chunk = n; 2874 mddev->new_chunk = n;
2662 else 2875 if (mddev->reshape_position == MaxSector)
2663 mddev->chunk_size = n; 2876 mddev->chunk_size = n;
2877 }
2664 return len; 2878 return len;
2665} 2879}
2666static struct md_sysfs_entry md_chunk_size = 2880static struct md_sysfs_entry md_chunk_size =
@@ -2669,6 +2883,8 @@ __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
2669static ssize_t 2883static ssize_t
2670resync_start_show(mddev_t *mddev, char *page) 2884resync_start_show(mddev_t *mddev, char *page)
2671{ 2885{
2886 if (mddev->recovery_cp == MaxSector)
2887 return sprintf(page, "none\n");
2672 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); 2888 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
2673} 2889}
2674 2890
@@ -2766,7 +2982,7 @@ array_state_show(mddev_t *mddev, char *page)
2766 else { 2982 else {
2767 if (list_empty(&mddev->disks) && 2983 if (list_empty(&mddev->disks) &&
2768 mddev->raid_disks == 0 && 2984 mddev->raid_disks == 0 &&
2769 mddev->size == 0) 2985 mddev->dev_sectors == 0)
2770 st = clear; 2986 st = clear;
2771 else 2987 else
2772 st = inactive; 2988 st = inactive;
@@ -2973,7 +3189,8 @@ __ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
2973static ssize_t 3189static ssize_t
2974size_show(mddev_t *mddev, char *page) 3190size_show(mddev_t *mddev, char *page)
2975{ 3191{
2976 return sprintf(page, "%llu\n", (unsigned long long)mddev->size); 3192 return sprintf(page, "%llu\n",
3193 (unsigned long long)mddev->dev_sectors / 2);
2977} 3194}
2978 3195
2979static int update_size(mddev_t *mddev, sector_t num_sectors); 3196static int update_size(mddev_t *mddev, sector_t num_sectors);
@@ -2985,20 +3202,18 @@ size_store(mddev_t *mddev, const char *buf, size_t len)
2985 * not increase it (except from 0). 3202 * not increase it (except from 0).
2986 * If array is active, we can try an on-line resize 3203 * If array is active, we can try an on-line resize
2987 */ 3204 */
2988 char *e; 3205 sector_t sectors;
2989 int err = 0; 3206 int err = strict_blocks_to_sectors(buf, &sectors);
2990 unsigned long long size = simple_strtoull(buf, &e, 10);
2991 if (!*buf || *buf == '\n' ||
2992 (*e && *e != '\n'))
2993 return -EINVAL;
2994 3207
3208 if (err < 0)
3209 return err;
2995 if (mddev->pers) { 3210 if (mddev->pers) {
2996 err = update_size(mddev, size * 2); 3211 err = update_size(mddev, sectors);
2997 md_update_sb(mddev, 1); 3212 md_update_sb(mddev, 1);
2998 } else { 3213 } else {
2999 if (mddev->size == 0 || 3214 if (mddev->dev_sectors == 0 ||
3000 mddev->size > size) 3215 mddev->dev_sectors > sectors)
3001 mddev->size = size; 3216 mddev->dev_sectors = sectors;
3002 else 3217 else
3003 err = -ENOSPC; 3218 err = -ENOSPC;
3004 } 3219 }
@@ -3251,6 +3466,8 @@ static ssize_t
3251sync_speed_show(mddev_t *mddev, char *page) 3466sync_speed_show(mddev_t *mddev, char *page)
3252{ 3467{
3253 unsigned long resync, dt, db; 3468 unsigned long resync, dt, db;
3469 if (mddev->curr_resync == 0)
3470 return sprintf(page, "none\n");
3254 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); 3471 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
3255 dt = (jiffies - mddev->resync_mark) / HZ; 3472 dt = (jiffies - mddev->resync_mark) / HZ;
3256 if (!dt) dt++; 3473 if (!dt) dt++;
@@ -3263,15 +3480,15 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
3263static ssize_t 3480static ssize_t
3264sync_completed_show(mddev_t *mddev, char *page) 3481sync_completed_show(mddev_t *mddev, char *page)
3265{ 3482{
3266 unsigned long max_blocks, resync; 3483 unsigned long max_sectors, resync;
3267 3484
3268 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 3485 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3269 max_blocks = mddev->resync_max_sectors; 3486 max_sectors = mddev->resync_max_sectors;
3270 else 3487 else
3271 max_blocks = mddev->size << 1; 3488 max_sectors = mddev->dev_sectors;
3272 3489
3273 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); 3490 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
3274 return sprintf(page, "%lu / %lu\n", resync, max_blocks); 3491 return sprintf(page, "%lu / %lu\n", resync, max_sectors);
3275} 3492}
3276 3493
3277static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); 3494static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
@@ -3431,6 +3648,57 @@ static struct md_sysfs_entry md_reshape_position =
3431__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, 3648__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
3432 reshape_position_store); 3649 reshape_position_store);
3433 3650
3651static ssize_t
3652array_size_show(mddev_t *mddev, char *page)
3653{
3654 if (mddev->external_size)
3655 return sprintf(page, "%llu\n",
3656 (unsigned long long)mddev->array_sectors/2);
3657 else
3658 return sprintf(page, "default\n");
3659}
3660
3661static ssize_t
3662array_size_store(mddev_t *mddev, const char *buf, size_t len)
3663{
3664 sector_t sectors;
3665
3666 if (strncmp(buf, "default", 7) == 0) {
3667 if (mddev->pers)
3668 sectors = mddev->pers->size(mddev, 0, 0);
3669 else
3670 sectors = mddev->array_sectors;
3671
3672 mddev->external_size = 0;
3673 } else {
3674 if (strict_blocks_to_sectors(buf, &sectors) < 0)
3675 return -EINVAL;
3676 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
3677 return -EINVAL;
3678
3679 mddev->external_size = 1;
3680 }
3681
3682 mddev->array_sectors = sectors;
3683 set_capacity(mddev->gendisk, mddev->array_sectors);
3684 if (mddev->pers) {
3685 struct block_device *bdev = bdget_disk(mddev->gendisk, 0);
3686
3687 if (bdev) {
3688 mutex_lock(&bdev->bd_inode->i_mutex);
3689 i_size_write(bdev->bd_inode,
3690 (loff_t)mddev->array_sectors << 9);
3691 mutex_unlock(&bdev->bd_inode->i_mutex);
3692 bdput(bdev);
3693 }
3694 }
3695
3696 return len;
3697}
3698
3699static struct md_sysfs_entry md_array_size =
3700__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
3701 array_size_store);
3434 3702
3435static struct attribute *md_default_attrs[] = { 3703static struct attribute *md_default_attrs[] = {
3436 &md_level.attr, 3704 &md_level.attr,
@@ -3444,6 +3712,7 @@ static struct attribute *md_default_attrs[] = {
3444 &md_safe_delay.attr, 3712 &md_safe_delay.attr,
3445 &md_array_state.attr, 3713 &md_array_state.attr,
3446 &md_reshape_position.attr, 3714 &md_reshape_position.attr,
3715 &md_array_size.attr,
3447 NULL, 3716 NULL,
3448}; 3717};
3449 3718
@@ -3602,10 +3871,12 @@ static int md_alloc(dev_t dev, char *name)
3602 mddev_put(mddev); 3871 mddev_put(mddev);
3603 return -ENOMEM; 3872 return -ENOMEM;
3604 } 3873 }
3874 mddev->queue->queuedata = mddev;
3875
3605 /* Can be unlocked because the queue is new: no concurrency */ 3876 /* Can be unlocked because the queue is new: no concurrency */
3606 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); 3877 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
3607 3878
3608 blk_queue_make_request(mddev->queue, md_fail_request); 3879 blk_queue_make_request(mddev->queue, md_make_request);
3609 3880
3610 disk = alloc_disk(1 << shift); 3881 disk = alloc_disk(1 << shift);
3611 if (!disk) { 3882 if (!disk) {
@@ -3731,13 +4002,13 @@ static int do_md_run(mddev_t * mddev)
3731 list_for_each_entry(rdev, &mddev->disks, same_set) { 4002 list_for_each_entry(rdev, &mddev->disks, same_set) {
3732 if (test_bit(Faulty, &rdev->flags)) 4003 if (test_bit(Faulty, &rdev->flags))
3733 continue; 4004 continue;
3734 if (rdev->size < chunk_size / 1024) { 4005 if (rdev->sectors < chunk_size / 512) {
3735 printk(KERN_WARNING 4006 printk(KERN_WARNING
3736 "md: Dev %s smaller than chunk_size:" 4007 "md: Dev %s smaller than chunk_size:"
3737 " %lluk < %dk\n", 4008 " %llu < %d\n",
3738 bdevname(rdev->bdev,b), 4009 bdevname(rdev->bdev,b),
3739 (unsigned long long)rdev->size, 4010 (unsigned long long)rdev->sectors,
3740 chunk_size / 1024); 4011 chunk_size / 512);
3741 return -EINVAL; 4012 return -EINVAL;
3742 } 4013 }
3743 } 4014 }
@@ -3761,11 +4032,11 @@ static int do_md_run(mddev_t * mddev)
3761 4032
3762 /* perform some consistency tests on the device. 4033 /* perform some consistency tests on the device.
3763 * We don't want the data to overlap the metadata, 4034 * We don't want the data to overlap the metadata,
3764 * Internal Bitmap issues has handled elsewhere. 4035 * Internal Bitmap issues have been handled elsewhere.
3765 */ 4036 */
3766 if (rdev->data_offset < rdev->sb_start) { 4037 if (rdev->data_offset < rdev->sb_start) {
3767 if (mddev->size && 4038 if (mddev->dev_sectors &&
3768 rdev->data_offset + mddev->size*2 4039 rdev->data_offset + mddev->dev_sectors
3769 > rdev->sb_start) { 4040 > rdev->sb_start) {
3770 printk("md: %s: data overlaps metadata\n", 4041 printk("md: %s: data overlaps metadata\n",
3771 mdname(mddev)); 4042 mdname(mddev));
@@ -3801,9 +4072,16 @@ static int do_md_run(mddev_t * mddev)
3801 } 4072 }
3802 mddev->pers = pers; 4073 mddev->pers = pers;
3803 spin_unlock(&pers_lock); 4074 spin_unlock(&pers_lock);
3804 mddev->level = pers->level; 4075 if (mddev->level != pers->level) {
4076 mddev->level = pers->level;
4077 mddev->new_level = pers->level;
4078 }
3805 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 4079 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3806 4080
4081 if (pers->level >= 4 && pers->level <= 6)
4082 /* Cannot support integrity (yet) */
4083 blk_integrity_unregister(mddev->gendisk);
4084
3807 if (mddev->reshape_position != MaxSector && 4085 if (mddev->reshape_position != MaxSector &&
3808 pers->start_reshape == NULL) { 4086 pers->start_reshape == NULL) {
3809 /* This personality cannot handle reshaping... */ 4087 /* This personality cannot handle reshaping... */
@@ -3843,7 +4121,9 @@ static int do_md_run(mddev_t * mddev)
3843 } 4121 }
3844 4122
3845 mddev->recovery = 0; 4123 mddev->recovery = 0;
3846 mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ 4124 /* may be over-ridden by personality */
4125 mddev->resync_max_sectors = mddev->dev_sectors;
4126
3847 mddev->barriers_work = 1; 4127 mddev->barriers_work = 1;
3848 mddev->ok_start_degraded = start_dirty_degraded; 4128 mddev->ok_start_degraded = start_dirty_degraded;
3849 4129
@@ -3853,7 +4133,17 @@ static int do_md_run(mddev_t * mddev)
3853 err = mddev->pers->run(mddev); 4133 err = mddev->pers->run(mddev);
3854 if (err) 4134 if (err)
3855 printk(KERN_ERR "md: pers->run() failed ...\n"); 4135 printk(KERN_ERR "md: pers->run() failed ...\n");
3856 else if (mddev->pers->sync_request) { 4136 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
4137 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
4138 " but 'external_size' not in effect?\n", __func__);
4139 printk(KERN_ERR
4140 "md: invalid array_size %llu > default size %llu\n",
4141 (unsigned long long)mddev->array_sectors / 2,
4142 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
4143 err = -EINVAL;
4144 mddev->pers->stop(mddev);
4145 }
4146 if (err == 0 && mddev->pers->sync_request) {
3857 err = bitmap_create(mddev); 4147 err = bitmap_create(mddev);
3858 if (err) { 4148 if (err) {
3859 printk(KERN_ERR "%s: failed to create bitmap (%d)\n", 4149 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
@@ -3899,16 +4189,6 @@ static int do_md_run(mddev_t * mddev)
3899 4189
3900 set_capacity(disk, mddev->array_sectors); 4190 set_capacity(disk, mddev->array_sectors);
3901 4191
3902 /* If we call blk_queue_make_request here, it will
3903 * re-initialise max_sectors etc which may have been
3904 * refined inside -> run. So just set the bits we need to set.
3905 * Most initialisation happended when we called
3906 * blk_queue_make_request(..., md_fail_request)
3907 * earlier.
3908 */
3909 mddev->queue->queuedata = mddev;
3910 mddev->queue->make_request_fn = mddev->pers->make_request;
3911
3912 /* If there is a partially-recovered drive we need to 4192 /* If there is a partially-recovered drive we need to
3913 * start recovery here. If we leave it to md_check_recovery, 4193 * start recovery here. If we leave it to md_check_recovery,
3914 * it will remove the drives and not do the right thing 4194 * it will remove the drives and not do the right thing
@@ -4038,7 +4318,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4038 md_super_wait(mddev); 4318 md_super_wait(mddev);
4039 if (mddev->ro) 4319 if (mddev->ro)
4040 set_disk_ro(disk, 0); 4320 set_disk_ro(disk, 0);
4041 blk_queue_make_request(mddev->queue, md_fail_request); 4321
4042 mddev->pers->stop(mddev); 4322 mddev->pers->stop(mddev);
4043 mddev->queue->merge_bvec_fn = NULL; 4323 mddev->queue->merge_bvec_fn = NULL;
4044 mddev->queue->unplug_fn = NULL; 4324 mddev->queue->unplug_fn = NULL;
@@ -4095,7 +4375,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4095 export_array(mddev); 4375 export_array(mddev);
4096 4376
4097 mddev->array_sectors = 0; 4377 mddev->array_sectors = 0;
4098 mddev->size = 0; 4378 mddev->external_size = 0;
4379 mddev->dev_sectors = 0;
4099 mddev->raid_disks = 0; 4380 mddev->raid_disks = 0;
4100 mddev->recovery_cp = 0; 4381 mddev->recovery_cp = 0;
4101 mddev->resync_min = 0; 4382 mddev->resync_min = 0;
@@ -4135,6 +4416,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4135 printk(KERN_INFO "md: %s switched to read-only mode.\n", 4416 printk(KERN_INFO "md: %s switched to read-only mode.\n",
4136 mdname(mddev)); 4417 mdname(mddev));
4137 err = 0; 4418 err = 0;
4419 blk_integrity_unregister(disk);
4138 md_new_event(mddev); 4420 md_new_event(mddev);
4139 sysfs_notify_dirent(mddev->sysfs_state); 4421 sysfs_notify_dirent(mddev->sysfs_state);
4140out: 4422out:
@@ -4300,8 +4582,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
4300 info.patch_version = MD_PATCHLEVEL_VERSION; 4582 info.patch_version = MD_PATCHLEVEL_VERSION;
4301 info.ctime = mddev->ctime; 4583 info.ctime = mddev->ctime;
4302 info.level = mddev->level; 4584 info.level = mddev->level;
4303 info.size = mddev->size; 4585 info.size = mddev->dev_sectors / 2;
4304 if (info.size != mddev->size) /* overflow */ 4586 if (info.size != mddev->dev_sectors / 2) /* overflow */
4305 info.size = -1; 4587 info.size = -1;
4306 info.nr_disks = nr; 4588 info.nr_disks = nr;
4307 info.raid_disks = mddev->raid_disks; 4589 info.raid_disks = mddev->raid_disks;
@@ -4480,6 +4762,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
4480 clear_bit(In_sync, &rdev->flags); /* just to be sure */ 4762 clear_bit(In_sync, &rdev->flags); /* just to be sure */
4481 if (info->state & (1<<MD_DISK_WRITEMOSTLY)) 4763 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4482 set_bit(WriteMostly, &rdev->flags); 4764 set_bit(WriteMostly, &rdev->flags);
4765 else
4766 clear_bit(WriteMostly, &rdev->flags);
4483 4767
4484 rdev->raid_disk = -1; 4768 rdev->raid_disk = -1;
4485 err = bind_rdev_to_array(rdev, mddev); 4769 err = bind_rdev_to_array(rdev, mddev);
@@ -4543,7 +4827,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
4543 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; 4827 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
4544 } else 4828 } else
4545 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 4829 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
4546 rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; 4830 rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size);
4547 4831
4548 err = bind_rdev_to_array(rdev, mddev); 4832 err = bind_rdev_to_array(rdev, mddev);
4549 if (err) { 4833 if (err) {
@@ -4613,7 +4897,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
4613 else 4897 else
4614 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; 4898 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
4615 4899
4616 rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; 4900 rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size);
4617 4901
4618 if (test_bit(Faulty, &rdev->flags)) { 4902 if (test_bit(Faulty, &rdev->flags)) {
4619 printk(KERN_WARNING 4903 printk(KERN_WARNING
@@ -4749,7 +5033,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
4749 5033
4750 mddev->level = info->level; 5034 mddev->level = info->level;
4751 mddev->clevel[0] = 0; 5035 mddev->clevel[0] = 0;
4752 mddev->size = info->size; 5036 mddev->dev_sectors = 2 * (sector_t)info->size;
4753 mddev->raid_disks = info->raid_disks; 5037 mddev->raid_disks = info->raid_disks;
4754 /* don't set md_minor, it is determined by which /dev/md* was 5038 /* don't set md_minor, it is determined by which /dev/md* was
4755 * openned 5039 * openned
@@ -4788,6 +5072,17 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
4788 return 0; 5072 return 0;
4789} 5073}
4790 5074
5075void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors)
5076{
5077 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
5078
5079 if (mddev->external_size)
5080 return;
5081
5082 mddev->array_sectors = array_sectors;
5083}
5084EXPORT_SYMBOL(md_set_array_sectors);
5085
4791static int update_size(mddev_t *mddev, sector_t num_sectors) 5086static int update_size(mddev_t *mddev, sector_t num_sectors)
4792{ 5087{
4793 mdk_rdev_t *rdev; 5088 mdk_rdev_t *rdev;
@@ -4814,8 +5109,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
4814 */ 5109 */
4815 return -EBUSY; 5110 return -EBUSY;
4816 list_for_each_entry(rdev, &mddev->disks, same_set) { 5111 list_for_each_entry(rdev, &mddev->disks, same_set) {
4817 sector_t avail; 5112 sector_t avail = rdev->sectors;
4818 avail = rdev->size * 2;
4819 5113
4820 if (fit && (num_sectors == 0 || num_sectors > avail)) 5114 if (fit && (num_sectors == 0 || num_sectors > avail))
4821 num_sectors = avail; 5115 num_sectors = avail;
@@ -4887,12 +5181,18 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
4887 ) 5181 )
4888 return -EINVAL; 5182 return -EINVAL;
4889 /* Check there is only one change */ 5183 /* Check there is only one change */
4890 if (info->size >= 0 && mddev->size != info->size) cnt++; 5184 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
4891 if (mddev->raid_disks != info->raid_disks) cnt++; 5185 cnt++;
4892 if (mddev->layout != info->layout) cnt++; 5186 if (mddev->raid_disks != info->raid_disks)
4893 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++; 5187 cnt++;
4894 if (cnt == 0) return 0; 5188 if (mddev->layout != info->layout)
4895 if (cnt > 1) return -EINVAL; 5189 cnt++;
5190 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
5191 cnt++;
5192 if (cnt == 0)
5193 return 0;
5194 if (cnt > 1)
5195 return -EINVAL;
4896 5196
4897 if (mddev->layout != info->layout) { 5197 if (mddev->layout != info->layout) {
4898 /* Change layout 5198 /* Change layout
@@ -4904,7 +5204,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
4904 else 5204 else
4905 return mddev->pers->reconfig(mddev, info->layout, -1); 5205 return mddev->pers->reconfig(mddev, info->layout, -1);
4906 } 5206 }
4907 if (info->size >= 0 && mddev->size != info->size) 5207 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
4908 rv = update_size(mddev, (sector_t)info->size * 2); 5208 rv = update_size(mddev, (sector_t)info->size * 2);
4909 5209
4910 if (mddev->raid_disks != info->raid_disks) 5210 if (mddev->raid_disks != info->raid_disks)
@@ -5331,6 +5631,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
5331 5631
5332void md_unregister_thread(mdk_thread_t *thread) 5632void md_unregister_thread(mdk_thread_t *thread)
5333{ 5633{
5634 if (!thread)
5635 return;
5334 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); 5636 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
5335 5637
5336 kthread_stop(thread->tsk); 5638 kthread_stop(thread->tsk);
@@ -5404,7 +5706,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
5404 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 5706 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
5405 max_blocks = mddev->resync_max_sectors >> 1; 5707 max_blocks = mddev->resync_max_sectors >> 1;
5406 else 5708 else
5407 max_blocks = mddev->size; 5709 max_blocks = mddev->dev_sectors / 2;
5408 5710
5409 /* 5711 /*
5410 * Should not happen. 5712 * Should not happen.
@@ -5537,7 +5839,7 @@ struct mdstat_info {
5537static int md_seq_show(struct seq_file *seq, void *v) 5839static int md_seq_show(struct seq_file *seq, void *v)
5538{ 5840{
5539 mddev_t *mddev = v; 5841 mddev_t *mddev = v;
5540 sector_t size; 5842 sector_t sectors;
5541 mdk_rdev_t *rdev; 5843 mdk_rdev_t *rdev;
5542 struct mdstat_info *mi = seq->private; 5844 struct mdstat_info *mi = seq->private;
5543 struct bitmap *bitmap; 5845 struct bitmap *bitmap;
@@ -5573,7 +5875,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
5573 seq_printf(seq, " %s", mddev->pers->name); 5875 seq_printf(seq, " %s", mddev->pers->name);
5574 } 5876 }
5575 5877
5576 size = 0; 5878 sectors = 0;
5577 list_for_each_entry(rdev, &mddev->disks, same_set) { 5879 list_for_each_entry(rdev, &mddev->disks, same_set) {
5578 char b[BDEVNAME_SIZE]; 5880 char b[BDEVNAME_SIZE];
5579 seq_printf(seq, " %s[%d]", 5881 seq_printf(seq, " %s[%d]",
@@ -5585,7 +5887,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
5585 continue; 5887 continue;
5586 } else if (rdev->raid_disk < 0) 5888 } else if (rdev->raid_disk < 0)
5587 seq_printf(seq, "(S)"); /* spare */ 5889 seq_printf(seq, "(S)"); /* spare */
5588 size += rdev->size; 5890 sectors += rdev->sectors;
5589 } 5891 }
5590 5892
5591 if (!list_empty(&mddev->disks)) { 5893 if (!list_empty(&mddev->disks)) {
@@ -5595,7 +5897,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
5595 mddev->array_sectors / 2); 5897 mddev->array_sectors / 2);
5596 else 5898 else
5597 seq_printf(seq, "\n %llu blocks", 5899 seq_printf(seq, "\n %llu blocks",
5598 (unsigned long long)size); 5900 (unsigned long long)sectors / 2);
5599 } 5901 }
5600 if (mddev->persistent) { 5902 if (mddev->persistent) {
5601 if (mddev->major_version != 0 || 5903 if (mddev->major_version != 0 ||
@@ -5722,19 +6024,19 @@ int unregister_md_personality(struct mdk_personality *p)
5722 return 0; 6024 return 0;
5723} 6025}
5724 6026
5725static int is_mddev_idle(mddev_t *mddev) 6027static int is_mddev_idle(mddev_t *mddev, int init)
5726{ 6028{
5727 mdk_rdev_t * rdev; 6029 mdk_rdev_t * rdev;
5728 int idle; 6030 int idle;
5729 long curr_events; 6031 int curr_events;
5730 6032
5731 idle = 1; 6033 idle = 1;
5732 rcu_read_lock(); 6034 rcu_read_lock();
5733 rdev_for_each_rcu(rdev, mddev) { 6035 rdev_for_each_rcu(rdev, mddev) {
5734 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; 6036 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
5735 curr_events = part_stat_read(&disk->part0, sectors[0]) + 6037 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
5736 part_stat_read(&disk->part0, sectors[1]) - 6038 (int)part_stat_read(&disk->part0, sectors[1]) -
5737 atomic_read(&disk->sync_io); 6039 atomic_read(&disk->sync_io);
5738 /* sync IO will cause sync_io to increase before the disk_stats 6040 /* sync IO will cause sync_io to increase before the disk_stats
5739 * as sync_io is counted when a request starts, and 6041 * as sync_io is counted when a request starts, and
5740 * disk_stats is counted when it completes. 6042 * disk_stats is counted when it completes.
@@ -5757,7 +6059,7 @@ static int is_mddev_idle(mddev_t *mddev)
5757 * always make curr_events less than last_events. 6059 * always make curr_events less than last_events.
5758 * 6060 *
5759 */ 6061 */
5760 if (curr_events - rdev->last_events > 4096) { 6062 if (init || curr_events - rdev->last_events > 64) {
5761 rdev->last_events = curr_events; 6063 rdev->last_events = curr_events;
5762 idle = 0; 6064 idle = 0;
5763 } 6065 }
@@ -5980,10 +6282,10 @@ void md_do_sync(mddev_t *mddev)
5980 j = mddev->recovery_cp; 6282 j = mddev->recovery_cp;
5981 6283
5982 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 6284 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5983 max_sectors = mddev->size << 1; 6285 max_sectors = mddev->dev_sectors;
5984 else { 6286 else {
5985 /* recovery follows the physical size of devices */ 6287 /* recovery follows the physical size of devices */
5986 max_sectors = mddev->size << 1; 6288 max_sectors = mddev->dev_sectors;
5987 j = MaxSector; 6289 j = MaxSector;
5988 list_for_each_entry(rdev, &mddev->disks, same_set) 6290 list_for_each_entry(rdev, &mddev->disks, same_set)
5989 if (rdev->raid_disk >= 0 && 6291 if (rdev->raid_disk >= 0 &&
@@ -6000,7 +6302,7 @@ void md_do_sync(mddev_t *mddev)
6000 "(but not more than %d KB/sec) for %s.\n", 6302 "(but not more than %d KB/sec) for %s.\n",
6001 speed_max(mddev), desc); 6303 speed_max(mddev), desc);
6002 6304
6003 is_mddev_idle(mddev); /* this also initializes IO event counters */ 6305 is_mddev_idle(mddev, 1); /* this initializes IO event counters */
6004 6306
6005 io_sectors = 0; 6307 io_sectors = 0;
6006 for (m = 0; m < SYNC_MARKS; m++) { 6308 for (m = 0; m < SYNC_MARKS; m++) {
@@ -6040,6 +6342,18 @@ void md_do_sync(mddev_t *mddev)
6040 } 6342 }
6041 if (kthread_should_stop()) 6343 if (kthread_should_stop())
6042 goto interrupted; 6344 goto interrupted;
6345
6346 if (mddev->curr_resync > mddev->curr_resync_completed &&
6347 (mddev->curr_resync - mddev->curr_resync_completed)
6348 > (max_sectors >> 4)) {
6349 /* time to update curr_resync_completed */
6350 blk_unplug(mddev->queue);
6351 wait_event(mddev->recovery_wait,
6352 atomic_read(&mddev->recovery_active) == 0);
6353 mddev->curr_resync_completed =
6354 mddev->curr_resync;
6355 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6356 }
6043 sectors = mddev->pers->sync_request(mddev, j, &skipped, 6357 sectors = mddev->pers->sync_request(mddev, j, &skipped,
6044 currspeed < speed_min(mddev)); 6358 currspeed < speed_min(mddev));
6045 if (sectors == 0) { 6359 if (sectors == 0) {
@@ -6102,7 +6416,7 @@ void md_do_sync(mddev_t *mddev)
6102 6416
6103 if (currspeed > speed_min(mddev)) { 6417 if (currspeed > speed_min(mddev)) {
6104 if ((currspeed > speed_max(mddev)) || 6418 if ((currspeed > speed_max(mddev)) ||
6105 !is_mddev_idle(mddev)) { 6419 !is_mddev_idle(mddev, 0)) {
6106 msleep(500); 6420 msleep(500);
6107 goto repeat; 6421 goto repeat;
6108 } 6422 }
@@ -6173,6 +6487,8 @@ static int remove_and_add_spares(mddev_t *mddev)
6173 mdk_rdev_t *rdev; 6487 mdk_rdev_t *rdev;
6174 int spares = 0; 6488 int spares = 0;
6175 6489
6490 mddev->curr_resync_completed = 0;
6491
6176 list_for_each_entry(rdev, &mddev->disks, same_set) 6492 list_for_each_entry(rdev, &mddev->disks, same_set)
6177 if (rdev->raid_disk >= 0 && 6493 if (rdev->raid_disk >= 0 &&
6178 !test_bit(Blocked, &rdev->flags) && 6494 !test_bit(Blocked, &rdev->flags) &&
@@ -6327,6 +6643,9 @@ void md_check_recovery(mddev_t *mddev)
6327 sysfs_notify(&mddev->kobj, NULL, 6643 sysfs_notify(&mddev->kobj, NULL,
6328 "degraded"); 6644 "degraded");
6329 } 6645 }
6646 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
6647 mddev->pers->finish_reshape)
6648 mddev->pers->finish_reshape(mddev);
6330 md_update_sb(mddev, 1); 6649 md_update_sb(mddev, 1);
6331 6650
6332 /* if array is no-longer degraded, then any saved_raid_disk 6651 /* if array is no-longer degraded, then any saved_raid_disk
@@ -6470,13 +6789,13 @@ static void md_geninit(void)
6470 6789
6471static int __init md_init(void) 6790static int __init md_init(void)
6472{ 6791{
6473 if (register_blkdev(MAJOR_NR, "md")) 6792 if (register_blkdev(MD_MAJOR, "md"))
6474 return -1; 6793 return -1;
6475 if ((mdp_major=register_blkdev(0, "mdp"))<=0) { 6794 if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
6476 unregister_blkdev(MAJOR_NR, "md"); 6795 unregister_blkdev(MD_MAJOR, "md");
6477 return -1; 6796 return -1;
6478 } 6797 }
6479 blk_register_region(MKDEV(MAJOR_NR, 0), 1UL<<MINORBITS, THIS_MODULE, 6798 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
6480 md_probe, NULL, NULL); 6799 md_probe, NULL, NULL);
6481 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, 6800 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
6482 md_probe, NULL, NULL); 6801 md_probe, NULL, NULL);
@@ -6562,10 +6881,10 @@ static __exit void md_exit(void)
6562 mddev_t *mddev; 6881 mddev_t *mddev;
6563 struct list_head *tmp; 6882 struct list_head *tmp;
6564 6883
6565 blk_unregister_region(MKDEV(MAJOR_NR,0), 1U << MINORBITS); 6884 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
6566 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS); 6885 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
6567 6886
6568 unregister_blkdev(MAJOR_NR,"md"); 6887 unregister_blkdev(MD_MAJOR,"md");
6569 unregister_blkdev(mdp_major, "mdp"); 6888 unregister_blkdev(mdp_major, "mdp");
6570 unregister_reboot_notifier(&md_notifier); 6889 unregister_reboot_notifier(&md_notifier);
6571 unregister_sysctl_table(raid_table_header); 6890 unregister_sysctl_table(raid_table_header);
diff --git a/include/linux/raid/md_k.h b/drivers/md/md.h
index 9743e4dbc918..e9b7f54c24d6 100644
--- a/include/linux/raid/md_k.h
+++ b/drivers/md/md.h
@@ -15,21 +15,8 @@
15#ifndef _MD_K_H 15#ifndef _MD_K_H
16#define _MD_K_H 16#define _MD_K_H
17 17
18/* and dm-bio-list.h is not under include/linux because.... ??? */
19#include "../../../drivers/md/dm-bio-list.h"
20
21#ifdef CONFIG_BLOCK 18#ifdef CONFIG_BLOCK
22 19
23#define LEVEL_MULTIPATH (-4)
24#define LEVEL_LINEAR (-1)
25#define LEVEL_FAULTY (-5)
26
27/* we need a value for 'no level specified' and 0
28 * means 'raid0', so we need something else. This is
29 * for internal use only
30 */
31#define LEVEL_NONE (-1000000)
32
33#define MaxSector (~(sector_t)0) 20#define MaxSector (~(sector_t)0)
34 21
35typedef struct mddev_s mddev_t; 22typedef struct mddev_s mddev_t;
@@ -49,9 +36,9 @@ struct mdk_rdev_s
49{ 36{
50 struct list_head same_set; /* RAID devices within the same set */ 37 struct list_head same_set; /* RAID devices within the same set */
51 38
52 sector_t size; /* Device size (in blocks) */ 39 sector_t sectors; /* Device size (in 512bytes sectors) */
53 mddev_t *mddev; /* RAID array if running */ 40 mddev_t *mddev; /* RAID array if running */
54 long last_events; /* IO event timestamp */ 41 int last_events; /* IO event timestamp */
55 42
56 struct block_device *bdev; /* block device handle */ 43 struct block_device *bdev; /* block device handle */
57 44
@@ -132,6 +119,8 @@ struct mddev_s
132#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ 119#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
133#define MD_CHANGE_PENDING 2 /* superblock update in progress */ 120#define MD_CHANGE_PENDING 2 /* superblock update in progress */
134 121
122 int suspended;
123 atomic_t active_io;
135 int ro; 124 int ro;
136 125
137 struct gendisk *gendisk; 126 struct gendisk *gendisk;
@@ -155,8 +144,11 @@ struct mddev_s
155 char clevel[16]; 144 char clevel[16];
156 int raid_disks; 145 int raid_disks;
157 int max_disks; 146 int max_disks;
158 sector_t size; /* used size of component devices */ 147 sector_t dev_sectors; /* used size of
148 * component devices */
159 sector_t array_sectors; /* exported array size */ 149 sector_t array_sectors; /* exported array size */
150 int external_size; /* size managed
151 * externally */
160 __u64 events; 152 __u64 events;
161 153
162 char uuid[16]; 154 char uuid[16];
@@ -172,6 +164,13 @@ struct mddev_s
172 struct mdk_thread_s *thread; /* management thread */ 164 struct mdk_thread_s *thread; /* management thread */
173 struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ 165 struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */
174 sector_t curr_resync; /* last block scheduled */ 166 sector_t curr_resync; /* last block scheduled */
167 /* As resync requests can complete out of order, we cannot easily track
168 * how much resync has been completed. So we occasionally pause until
169 * everything completes, then set curr_resync_completed to curr_resync.
170 * As such it may be well behind the real resync mark, but it is a value
171 * we are certain of.
172 */
173 sector_t curr_resync_completed;
175 unsigned long resync_mark; /* a recent timestamp */ 174 unsigned long resync_mark; /* a recent timestamp */
176 sector_t resync_mark_cnt;/* blocks written at resync_mark */ 175 sector_t resync_mark_cnt;/* blocks written at resync_mark */
177 sector_t curr_mark_cnt; /* blocks scheduled now */ 176 sector_t curr_mark_cnt; /* blocks scheduled now */
@@ -315,8 +314,10 @@ struct mdk_personality
315 int (*spare_active) (mddev_t *mddev); 314 int (*spare_active) (mddev_t *mddev);
316 sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); 315 sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
317 int (*resize) (mddev_t *mddev, sector_t sectors); 316 int (*resize) (mddev_t *mddev, sector_t sectors);
317 sector_t (*size) (mddev_t *mddev, sector_t sectors, int raid_disks);
318 int (*check_reshape) (mddev_t *mddev); 318 int (*check_reshape) (mddev_t *mddev);
319 int (*start_reshape) (mddev_t *mddev); 319 int (*start_reshape) (mddev_t *mddev);
320 void (*finish_reshape) (mddev_t *mddev);
320 int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); 321 int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
321 /* quiesce moves between quiescence states 322 /* quiesce moves between quiescence states
322 * 0 - fully active 323 * 0 - fully active
@@ -324,6 +325,16 @@ struct mdk_personality
324 * others - reserved 325 * others - reserved
325 */ 326 */
326 void (*quiesce) (mddev_t *mddev, int state); 327 void (*quiesce) (mddev_t *mddev, int state);
328 /* takeover is used to transition an array from one
329 * personality to another. The new personality must be able
330 * to handle the data in the current layout.
331 * e.g. 2drive raid1 -> 2drive raid5
332 * ndrive raid5 -> degraded n+1drive raid6 with special layout
333 * If the takeover succeeds, a new 'private' structure is returned.
334 * This needs to be installed and then ->run used to activate the
335 * array.
336 */
337 void *(*takeover) (mddev_t *mddev);
327}; 338};
328 339
329 340
@@ -400,3 +411,26 @@ static inline void safe_put_page(struct page *p)
400#endif /* CONFIG_BLOCK */ 411#endif /* CONFIG_BLOCK */
401#endif 412#endif
402 413
414
415extern int register_md_personality(struct mdk_personality *p);
416extern int unregister_md_personality(struct mdk_personality *p);
417extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
418 mddev_t *mddev, const char *name);
419extern void md_unregister_thread(mdk_thread_t *thread);
420extern void md_wakeup_thread(mdk_thread_t *thread);
421extern void md_check_recovery(mddev_t *mddev);
422extern void md_write_start(mddev_t *mddev, struct bio *bi);
423extern void md_write_end(mddev_t *mddev);
424extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
425extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
426
427extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
428 sector_t sector, int size, struct page *page);
429extern void md_super_wait(mddev_t *mddev);
430extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
431 struct page *page, int rw);
432extern void md_do_sync(mddev_t *mddev);
433extern void md_new_event(mddev_t *mddev);
434extern int md_allow_write(mddev_t *mddev);
435extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
436extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
diff --git a/drivers/md/mktables.c b/drivers/md/mktables.c
index b61d5767aae7..3b1500843bba 100644
--- a/drivers/md/mktables.c
+++ b/drivers/md/mktables.c
@@ -59,7 +59,7 @@ int main(int argc, char *argv[])
59 uint8_t v; 59 uint8_t v;
60 uint8_t exptbl[256], invtbl[256]; 60 uint8_t exptbl[256], invtbl[256];
61 61
62 printf("#include \"raid6.h\"\n"); 62 printf("#include <linux/raid/pq.h>\n");
63 63
64 /* Compute multiplication table */ 64 /* Compute multiplication table */
65 printf("\nconst u8 __attribute__((aligned(256)))\n" 65 printf("\nconst u8 __attribute__((aligned(256)))\n"
@@ -76,6 +76,9 @@ int main(int argc, char *argv[])
76 printf("\t},\n"); 76 printf("\t},\n");
77 } 77 }
78 printf("};\n"); 78 printf("};\n");
79 printf("#ifdef __KERNEL__\n");
80 printf("EXPORT_SYMBOL(raid6_gfmul);\n");
81 printf("#endif\n");
79 82
80 /* Compute power-of-2 table (exponent) */ 83 /* Compute power-of-2 table (exponent) */
81 v = 1; 84 v = 1;
@@ -92,6 +95,9 @@ int main(int argc, char *argv[])
92 } 95 }
93 } 96 }
94 printf("};\n"); 97 printf("};\n");
98 printf("#ifdef __KERNEL__\n");
99 printf("EXPORT_SYMBOL(raid6_gfexp);\n");
100 printf("#endif\n");
95 101
96 /* Compute inverse table x^-1 == x^254 */ 102 /* Compute inverse table x^-1 == x^254 */
97 printf("\nconst u8 __attribute__((aligned(256)))\n" 103 printf("\nconst u8 __attribute__((aligned(256)))\n"
@@ -104,6 +110,9 @@ int main(int argc, char *argv[])
104 } 110 }
105 } 111 }
106 printf("};\n"); 112 printf("};\n");
113 printf("#ifdef __KERNEL__\n");
114 printf("EXPORT_SYMBOL(raid6_gfinv);\n");
115 printf("#endif\n");
107 116
108 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ 117 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
109 printf("\nconst u8 __attribute__((aligned(256)))\n" 118 printf("\nconst u8 __attribute__((aligned(256)))\n"
@@ -115,6 +124,9 @@ int main(int argc, char *argv[])
115 (j == 7) ? '\n' : ' '); 124 (j == 7) ? '\n' : ' ');
116 } 125 }
117 printf("};\n"); 126 printf("};\n");
127 printf("#ifdef __KERNEL__\n");
128 printf("EXPORT_SYMBOL(raid6_gfexi);\n");
129 printf("#endif\n");
118 130
119 return 0; 131 return 0;
120} 132}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index f6d08f241671..41ced0cbe823 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -19,7 +19,11 @@
19 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */ 20 */
21 21
22#include <linux/raid/multipath.h> 22#include <linux/blkdev.h>
23#include <linux/raid/md_u.h>
24#include <linux/seq_file.h>
25#include "md.h"
26#include "multipath.h"
23 27
24#define MAX_WORK_PER_DISK 128 28#define MAX_WORK_PER_DISK 128
25 29
@@ -402,6 +406,14 @@ static void multipathd (mddev_t *mddev)
402 spin_unlock_irqrestore(&conf->device_lock, flags); 406 spin_unlock_irqrestore(&conf->device_lock, flags);
403} 407}
404 408
409static sector_t multipath_size(mddev_t *mddev, sector_t sectors, int raid_disks)
410{
411 WARN_ONCE(sectors || raid_disks,
412 "%s does not support generic reshape\n", __func__);
413
414 return mddev->dev_sectors;
415}
416
405static int multipath_run (mddev_t *mddev) 417static int multipath_run (mddev_t *mddev)
406{ 418{
407 multipath_conf_t *conf; 419 multipath_conf_t *conf;
@@ -498,7 +510,7 @@ static int multipath_run (mddev_t *mddev)
498 /* 510 /*
499 * Ok, everything is just fine now 511 * Ok, everything is just fine now
500 */ 512 */
501 mddev->array_sectors = mddev->size * 2; 513 md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
502 514
503 mddev->queue->unplug_fn = multipath_unplug; 515 mddev->queue->unplug_fn = multipath_unplug;
504 mddev->queue->backing_dev_info.congested_fn = multipath_congested; 516 mddev->queue->backing_dev_info.congested_fn = multipath_congested;
@@ -543,6 +555,7 @@ static struct mdk_personality multipath_personality =
543 .error_handler = multipath_error, 555 .error_handler = multipath_error,
544 .hot_add_disk = multipath_add_disk, 556 .hot_add_disk = multipath_add_disk,
545 .hot_remove_disk= multipath_remove_disk, 557 .hot_remove_disk= multipath_remove_disk,
558 .size = multipath_size,
546}; 559};
547 560
548static int __init multipath_init (void) 561static int __init multipath_init (void)
diff --git a/include/linux/raid/multipath.h b/drivers/md/multipath.h
index 6f53fc177a47..6fa70b400cda 100644
--- a/include/linux/raid/multipath.h
+++ b/drivers/md/multipath.h
@@ -1,8 +1,6 @@
1#ifndef _MULTIPATH_H 1#ifndef _MULTIPATH_H
2#define _MULTIPATH_H 2#define _MULTIPATH_H
3 3
4#include <linux/raid/md.h>
5
6struct multipath_info { 4struct multipath_info {
7 mdk_rdev_t *rdev; 5 mdk_rdev_t *rdev;
8}; 6};
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c605ba805586..c08d7559be55 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -18,7 +18,10 @@
18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19*/ 19*/
20 20
21#include <linux/raid/raid0.h> 21#include <linux/blkdev.h>
22#include <linux/seq_file.h>
23#include "md.h"
24#include "raid0.h"
22 25
23static void raid0_unplug(struct request_queue *q) 26static void raid0_unplug(struct request_queue *q)
24{ 27{
@@ -73,16 +76,15 @@ static int create_strip_zones (mddev_t *mddev)
73 list_for_each_entry(rdev2, &mddev->disks, same_set) { 76 list_for_each_entry(rdev2, &mddev->disks, same_set) {
74 printk(KERN_INFO "raid0: comparing %s(%llu)", 77 printk(KERN_INFO "raid0: comparing %s(%llu)",
75 bdevname(rdev1->bdev,b), 78 bdevname(rdev1->bdev,b),
76 (unsigned long long)rdev1->size); 79 (unsigned long long)rdev1->sectors);
77 printk(KERN_INFO " with %s(%llu)\n", 80 printk(KERN_INFO " with %s(%llu)\n",
78 bdevname(rdev2->bdev,b), 81 bdevname(rdev2->bdev,b),
79 (unsigned long long)rdev2->size); 82 (unsigned long long)rdev2->sectors);
80 if (rdev2 == rdev1) { 83 if (rdev2 == rdev1) {
81 printk(KERN_INFO "raid0: END\n"); 84 printk(KERN_INFO "raid0: END\n");
82 break; 85 break;
83 } 86 }
84 if (rdev2->size == rdev1->size) 87 if (rdev2->sectors == rdev1->sectors) {
85 {
86 /* 88 /*
87 * Not unique, don't count it as a new 89 * Not unique, don't count it as a new
88 * group 90 * group
@@ -145,7 +147,7 @@ static int create_strip_zones (mddev_t *mddev)
145 mddev->queue->max_sectors > (PAGE_SIZE>>9)) 147 mddev->queue->max_sectors > (PAGE_SIZE>>9))
146 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); 148 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
147 149
148 if (!smallest || (rdev1->size <smallest->size)) 150 if (!smallest || (rdev1->sectors < smallest->sectors))
149 smallest = rdev1; 151 smallest = rdev1;
150 cnt++; 152 cnt++;
151 } 153 }
@@ -155,10 +157,10 @@ static int create_strip_zones (mddev_t *mddev)
155 goto abort; 157 goto abort;
156 } 158 }
157 zone->nb_dev = cnt; 159 zone->nb_dev = cnt;
158 zone->sectors = smallest->size * cnt * 2; 160 zone->sectors = smallest->sectors * cnt;
159 zone->zone_start = 0; 161 zone->zone_start = 0;
160 162
161 current_start = smallest->size * 2; 163 current_start = smallest->sectors;
162 curr_zone_start = zone->sectors; 164 curr_zone_start = zone->sectors;
163 165
164 /* now do the other zones */ 166 /* now do the other zones */
@@ -177,29 +179,29 @@ static int create_strip_zones (mddev_t *mddev)
177 rdev = conf->strip_zone[0].dev[j]; 179 rdev = conf->strip_zone[0].dev[j];
178 printk(KERN_INFO "raid0: checking %s ...", 180 printk(KERN_INFO "raid0: checking %s ...",
179 bdevname(rdev->bdev, b)); 181 bdevname(rdev->bdev, b));
180 if (rdev->size > current_start / 2) { 182 if (rdev->sectors <= current_start) {
181 printk(KERN_INFO " contained as device %d\n",
182 c);
183 zone->dev[c] = rdev;
184 c++;
185 if (!smallest || (rdev->size <smallest->size)) {
186 smallest = rdev;
187 printk(KERN_INFO " (%llu) is smallest!.\n",
188 (unsigned long long)rdev->size);
189 }
190 } else
191 printk(KERN_INFO " nope.\n"); 183 printk(KERN_INFO " nope.\n");
184 continue;
185 }
186 printk(KERN_INFO " contained as device %d\n", c);
187 zone->dev[c] = rdev;
188 c++;
189 if (!smallest || rdev->sectors < smallest->sectors) {
190 smallest = rdev;
191 printk(KERN_INFO " (%llu) is smallest!.\n",
192 (unsigned long long)rdev->sectors);
193 }
192 } 194 }
193 195
194 zone->nb_dev = c; 196 zone->nb_dev = c;
195 zone->sectors = (smallest->size * 2 - current_start) * c; 197 zone->sectors = (smallest->sectors - current_start) * c;
196 printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", 198 printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
197 zone->nb_dev, (unsigned long long)zone->sectors); 199 zone->nb_dev, (unsigned long long)zone->sectors);
198 200
199 zone->zone_start = curr_zone_start; 201 zone->zone_start = curr_zone_start;
200 curr_zone_start += zone->sectors; 202 curr_zone_start += zone->sectors;
201 203
202 current_start = smallest->size * 2; 204 current_start = smallest->sectors;
203 printk(KERN_INFO "raid0: current zone start: %llu\n", 205 printk(KERN_INFO "raid0: current zone start: %llu\n",
204 (unsigned long long)current_start); 206 (unsigned long long)current_start);
205 } 207 }
@@ -261,12 +263,25 @@ static int raid0_mergeable_bvec(struct request_queue *q,
261 return max; 263 return max;
262} 264}
263 265
266static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
267{
268 sector_t array_sectors = 0;
269 mdk_rdev_t *rdev;
270
271 WARN_ONCE(sectors || raid_disks,
272 "%s does not support generic reshape\n", __func__);
273
274 list_for_each_entry(rdev, &mddev->disks, same_set)
275 array_sectors += rdev->sectors;
276
277 return array_sectors;
278}
279
264static int raid0_run (mddev_t *mddev) 280static int raid0_run (mddev_t *mddev)
265{ 281{
266 unsigned cur=0, i=0, nb_zone; 282 unsigned cur=0, i=0, nb_zone;
267 s64 sectors; 283 s64 sectors;
268 raid0_conf_t *conf; 284 raid0_conf_t *conf;
269 mdk_rdev_t *rdev;
270 285
271 if (mddev->chunk_size == 0) { 286 if (mddev->chunk_size == 0) {
272 printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); 287 printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
@@ -291,16 +306,14 @@ static int raid0_run (mddev_t *mddev)
291 goto out_free_conf; 306 goto out_free_conf;
292 307
293 /* calculate array device size */ 308 /* calculate array device size */
294 mddev->array_sectors = 0; 309 md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
295 list_for_each_entry(rdev, &mddev->disks, same_set)
296 mddev->array_sectors += rdev->size * 2;
297 310
298 printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", 311 printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
299 (unsigned long long)mddev->array_sectors); 312 (unsigned long long)mddev->array_sectors);
300 printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", 313 printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
301 (unsigned long long)conf->spacing); 314 (unsigned long long)conf->spacing);
302 { 315 {
303 sector_t s = mddev->array_sectors; 316 sector_t s = raid0_size(mddev, 0, 0);
304 sector_t space = conf->spacing; 317 sector_t space = conf->spacing;
305 int round; 318 int round;
306 conf->sector_shift = 0; 319 conf->sector_shift = 0;
@@ -509,6 +522,7 @@ static struct mdk_personality raid0_personality=
509 .run = raid0_run, 522 .run = raid0_run,
510 .stop = raid0_stop, 523 .stop = raid0_stop,
511 .status = raid0_status, 524 .status = raid0_status,
525 .size = raid0_size,
512}; 526};
513 527
514static int __init raid0_init (void) 528static int __init raid0_init (void)
diff --git a/include/linux/raid/raid0.h b/drivers/md/raid0.h
index fd42aa87c391..824b12eb1d4f 100644
--- a/include/linux/raid/raid0.h
+++ b/drivers/md/raid0.h
@@ -1,8 +1,6 @@
1#ifndef _RAID0_H 1#ifndef _RAID0_H
2#define _RAID0_H 2#define _RAID0_H
3 3
4#include <linux/raid/md.h>
5
6struct strip_zone 4struct strip_zone
7{ 5{
8 sector_t zone_start; /* Zone offset in md_dev (in sectors) */ 6 sector_t zone_start; /* Zone offset in md_dev (in sectors) */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e2466425d9ca..b4f4badc0068 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -31,10 +31,13 @@
31 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 31 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32 */ 32 */
33 33
34#include "dm-bio-list.h"
35#include <linux/delay.h> 34#include <linux/delay.h>
36#include <linux/raid/raid1.h> 35#include <linux/blkdev.h>
37#include <linux/raid/bitmap.h> 36#include <linux/seq_file.h>
37#include "md.h"
38#include "dm-bio-list.h"
39#include "raid1.h"
40#include "bitmap.h"
38 41
39#define DEBUG 0 42#define DEBUG 0
40#if DEBUG 43#if DEBUG
@@ -1723,7 +1726,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1723 return 0; 1726 return 0;
1724 } 1727 }
1725 1728
1726 max_sector = mddev->size << 1; 1729 max_sector = mddev->dev_sectors;
1727 if (sector_nr >= max_sector) { 1730 if (sector_nr >= max_sector) {
1728 /* If we aborted, we need to abort the 1731 /* If we aborted, we need to abort the
1729 * sync on the 'current' bitmap chunk (there will 1732 * sync on the 'current' bitmap chunk (there will
@@ -1919,6 +1922,14 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1919 return nr_sectors; 1922 return nr_sectors;
1920} 1923}
1921 1924
1925static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
1926{
1927 if (sectors)
1928 return sectors;
1929
1930 return mddev->dev_sectors;
1931}
1932
1922static int run(mddev_t *mddev) 1933static int run(mddev_t *mddev)
1923{ 1934{
1924 conf_t *conf; 1935 conf_t *conf;
@@ -2048,7 +2059,7 @@ static int run(mddev_t *mddev)
2048 /* 2059 /*
2049 * Ok, everything is just fine now 2060 * Ok, everything is just fine now
2050 */ 2061 */
2051 mddev->array_sectors = mddev->size * 2; 2062 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
2052 2063
2053 mddev->queue->unplug_fn = raid1_unplug; 2064 mddev->queue->unplug_fn = raid1_unplug;
2054 mddev->queue->backing_dev_info.congested_fn = raid1_congested; 2065 mddev->queue->backing_dev_info.congested_fn = raid1_congested;
@@ -2089,6 +2100,9 @@ static int stop(mddev_t *mddev)
2089 /* need to kick something here to make sure I/O goes? */ 2100 /* need to kick something here to make sure I/O goes? */
2090 } 2101 }
2091 2102
2103 raise_barrier(conf);
2104 lower_barrier(conf);
2105
2092 md_unregister_thread(mddev->thread); 2106 md_unregister_thread(mddev->thread);
2093 mddev->thread = NULL; 2107 mddev->thread = NULL;
2094 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 2108 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
@@ -2110,15 +2124,17 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
2110 * any io in the removed space completes, but it hardly seems 2124 * any io in the removed space completes, but it hardly seems
2111 * worth it. 2125 * worth it.
2112 */ 2126 */
2113 mddev->array_sectors = sectors; 2127 md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
2128 if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
2129 return -EINVAL;
2114 set_capacity(mddev->gendisk, mddev->array_sectors); 2130 set_capacity(mddev->gendisk, mddev->array_sectors);
2115 mddev->changed = 1; 2131 mddev->changed = 1;
2116 if (mddev->array_sectors / 2 > mddev->size && 2132 if (sectors > mddev->dev_sectors &&
2117 mddev->recovery_cp == MaxSector) { 2133 mddev->recovery_cp == MaxSector) {
2118 mddev->recovery_cp = mddev->size << 1; 2134 mddev->recovery_cp = mddev->dev_sectors;
2119 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 2135 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2120 } 2136 }
2121 mddev->size = mddev->array_sectors / 2; 2137 mddev->dev_sectors = sectors;
2122 mddev->resync_max_sectors = sectors; 2138 mddev->resync_max_sectors = sectors;
2123 return 0; 2139 return 0;
2124} 2140}
@@ -2264,6 +2280,7 @@ static struct mdk_personality raid1_personality =
2264 .spare_active = raid1_spare_active, 2280 .spare_active = raid1_spare_active,
2265 .sync_request = sync_request, 2281 .sync_request = sync_request,
2266 .resize = raid1_resize, 2282 .resize = raid1_resize,
2283 .size = raid1_size,
2267 .check_reshape = raid1_reshape, 2284 .check_reshape = raid1_reshape,
2268 .quiesce = raid1_quiesce, 2285 .quiesce = raid1_quiesce,
2269}; 2286};
diff --git a/include/linux/raid/raid1.h b/drivers/md/raid1.h
index 0a9ba7c3302e..1620eea3d57c 100644
--- a/include/linux/raid/raid1.h
+++ b/drivers/md/raid1.h
@@ -1,8 +1,6 @@
1#ifndef _RAID1_H 1#ifndef _RAID1_H
2#define _RAID1_H 2#define _RAID1_H
3 3
4#include <linux/raid/md.h>
5
6typedef struct mirror_info mirror_info_t; 4typedef struct mirror_info mirror_info_t;
7 5
8struct mirror_info { 6struct mirror_info {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7301631abe04..e293d92641ac 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -18,10 +18,13 @@
18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */ 19 */
20 20
21#include "dm-bio-list.h"
22#include <linux/delay.h> 21#include <linux/delay.h>
23#include <linux/raid/raid10.h> 22#include <linux/blkdev.h>
24#include <linux/raid/bitmap.h> 23#include <linux/seq_file.h>
24#include "md.h"
25#include "dm-bio-list.h"
26#include "raid10.h"
27#include "bitmap.h"
25 28
26/* 29/*
27 * RAID10 provides a combination of RAID0 and RAID1 functionality. 30 * RAID10 provides a combination of RAID0 and RAID1 functionality.
@@ -1695,7 +1698,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1695 return 0; 1698 return 0;
1696 1699
1697 skipped: 1700 skipped:
1698 max_sector = mddev->size << 1; 1701 max_sector = mddev->dev_sectors;
1699 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 1702 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
1700 max_sector = mddev->resync_max_sectors; 1703 max_sector = mddev->resync_max_sectors;
1701 if (sector_nr >= max_sector) { 1704 if (sector_nr >= max_sector) {
@@ -2020,6 +2023,25 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
2020 goto skipped; 2023 goto skipped;
2021} 2024}
2022 2025
2026static sector_t
2027raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
2028{
2029 sector_t size;
2030 conf_t *conf = mddev_to_conf(mddev);
2031
2032 if (!raid_disks)
2033 raid_disks = mddev->raid_disks;
2034 if (!sectors)
2035 sectors = mddev->dev_sectors;
2036
2037 size = sectors >> conf->chunk_shift;
2038 sector_div(size, conf->far_copies);
2039 size = size * raid_disks;
2040 sector_div(size, conf->near_copies);
2041
2042 return size << conf->chunk_shift;
2043}
2044
2023static int run(mddev_t *mddev) 2045static int run(mddev_t *mddev)
2024{ 2046{
2025 conf_t *conf; 2047 conf_t *conf;
@@ -2076,7 +2098,7 @@ static int run(mddev_t *mddev)
2076 conf->far_offset = fo; 2098 conf->far_offset = fo;
2077 conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; 2099 conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
2078 conf->chunk_shift = ffz(~mddev->chunk_size) - 9; 2100 conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
2079 size = mddev->size >> (conf->chunk_shift-1); 2101 size = mddev->dev_sectors >> conf->chunk_shift;
2080 sector_div(size, fc); 2102 sector_div(size, fc);
2081 size = size * conf->raid_disks; 2103 size = size * conf->raid_disks;
2082 sector_div(size, nc); 2104 sector_div(size, nc);
@@ -2089,7 +2111,7 @@ static int run(mddev_t *mddev)
2089 */ 2111 */
2090 stride += conf->raid_disks - 1; 2112 stride += conf->raid_disks - 1;
2091 sector_div(stride, conf->raid_disks); 2113 sector_div(stride, conf->raid_disks);
2092 mddev->size = stride << (conf->chunk_shift-1); 2114 mddev->dev_sectors = stride << conf->chunk_shift;
2093 2115
2094 if (fo) 2116 if (fo)
2095 stride = 1; 2117 stride = 1;
@@ -2171,8 +2193,8 @@ static int run(mddev_t *mddev)
2171 /* 2193 /*
2172 * Ok, everything is just fine now 2194 * Ok, everything is just fine now
2173 */ 2195 */
2174 mddev->array_sectors = size << conf->chunk_shift; 2196 md_set_array_sectors(mddev, raid10_size(mddev, 0, 0));
2175 mddev->resync_max_sectors = size << conf->chunk_shift; 2197 mddev->resync_max_sectors = raid10_size(mddev, 0, 0);
2176 2198
2177 mddev->queue->unplug_fn = raid10_unplug; 2199 mddev->queue->unplug_fn = raid10_unplug;
2178 mddev->queue->backing_dev_info.congested_fn = raid10_congested; 2200 mddev->queue->backing_dev_info.congested_fn = raid10_congested;
@@ -2208,6 +2230,9 @@ static int stop(mddev_t *mddev)
2208{ 2230{
2209 conf_t *conf = mddev_to_conf(mddev); 2231 conf_t *conf = mddev_to_conf(mddev);
2210 2232
2233 raise_barrier(conf, 0);
2234 lower_barrier(conf);
2235
2211 md_unregister_thread(mddev->thread); 2236 md_unregister_thread(mddev->thread);
2212 mddev->thread = NULL; 2237 mddev->thread = NULL;
2213 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 2238 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
@@ -2255,6 +2280,7 @@ static struct mdk_personality raid10_personality =
2255 .spare_active = raid10_spare_active, 2280 .spare_active = raid10_spare_active,
2256 .sync_request = sync_request, 2281 .sync_request = sync_request,
2257 .quiesce = raid10_quiesce, 2282 .quiesce = raid10_quiesce,
2283 .size = raid10_size,
2258}; 2284};
2259 2285
2260static int __init raid_init(void) 2286static int __init raid_init(void)
diff --git a/include/linux/raid/raid10.h b/drivers/md/raid10.h
index e9091cfeb286..244dbe507a54 100644
--- a/include/linux/raid/raid10.h
+++ b/drivers/md/raid10.h
@@ -1,8 +1,6 @@
1#ifndef _RAID10_H 1#ifndef _RAID10_H
2#define _RAID10_H 2#define _RAID10_H
3 3
4#include <linux/raid/md.h>
5
6typedef struct mirror_info mirror_info_t; 4typedef struct mirror_info mirror_info_t;
7 5
8struct mirror_info { 6struct mirror_info {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a5ba080d303b..3bbc6d647044 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -43,11 +43,14 @@
43 * miss any bits. 43 * miss any bits.
44 */ 44 */
45 45
46#include <linux/blkdev.h>
46#include <linux/kthread.h> 47#include <linux/kthread.h>
47#include "raid6.h" 48#include <linux/raid/pq.h>
48
49#include <linux/raid/bitmap.h>
50#include <linux/async_tx.h> 49#include <linux/async_tx.h>
50#include <linux/seq_file.h>
51#include "md.h"
52#include "raid5.h"
53#include "bitmap.h"
51 54
52/* 55/*
53 * Stripe cache 56 * Stripe cache
@@ -91,11 +94,6 @@
91 94
92#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) 95#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
93 96
94#if !RAID6_USE_EMPTY_ZERO_PAGE
95/* In .bss so it's zeroed */
96const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
97#endif
98
99/* 97/*
100 * We maintain a biased count of active stripes in the bottom 16 bits of 98 * We maintain a biased count of active stripes in the bottom 16 bits of
101 * bi_phys_segments, and a count of processed stripes in the upper 16 bits 99 * bi_phys_segments, and a count of processed stripes in the upper 16 bits
@@ -130,12 +128,42 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
130 bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); 128 bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
131} 129}
132 130
131/* Find first data disk in a raid6 stripe */
132static inline int raid6_d0(struct stripe_head *sh)
133{
134 if (sh->ddf_layout)
135 /* ddf always start from first device */
136 return 0;
137 /* md starts just after Q block */
138 if (sh->qd_idx == sh->disks - 1)
139 return 0;
140 else
141 return sh->qd_idx + 1;
142}
133static inline int raid6_next_disk(int disk, int raid_disks) 143static inline int raid6_next_disk(int disk, int raid_disks)
134{ 144{
135 disk++; 145 disk++;
136 return (disk < raid_disks) ? disk : 0; 146 return (disk < raid_disks) ? disk : 0;
137} 147}
138 148
149/* When walking through the disks in a raid5, starting at raid6_d0,
150 * We need to map each disk to a 'slot', where the data disks are slot
151 * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
152 * is raid_disks-1. This help does that mapping.
153 */
154static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
155 int *count, int syndrome_disks)
156{
157 int slot;
158
159 if (idx == sh->pd_idx)
160 return syndrome_disks;
161 if (idx == sh->qd_idx)
162 return syndrome_disks + 1;
163 slot = (*count)++;
164 return slot;
165}
166
139static void return_io(struct bio *return_bi) 167static void return_io(struct bio *return_bi)
140{ 168{
141 struct bio *bi = return_bi; 169 struct bio *bi = return_bi;
@@ -193,6 +221,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
193 } 221 }
194 } 222 }
195} 223}
224
196static void release_stripe(struct stripe_head *sh) 225static void release_stripe(struct stripe_head *sh)
197{ 226{
198 raid5_conf_t *conf = sh->raid_conf; 227 raid5_conf_t *conf = sh->raid_conf;
@@ -270,9 +299,11 @@ static int grow_buffers(struct stripe_head *sh, int num)
270 return 0; 299 return 0;
271} 300}
272 301
273static void raid5_build_block(struct stripe_head *sh, int i); 302static void raid5_build_block(struct stripe_head *sh, int i, int previous);
303static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
304 struct stripe_head *sh);
274 305
275static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks) 306static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
276{ 307{
277 raid5_conf_t *conf = sh->raid_conf; 308 raid5_conf_t *conf = sh->raid_conf;
278 int i; 309 int i;
@@ -287,11 +318,12 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
287 318
288 remove_hash(sh); 319 remove_hash(sh);
289 320
321 sh->generation = conf->generation - previous;
322 sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
290 sh->sector = sector; 323 sh->sector = sector;
291 sh->pd_idx = pd_idx; 324 stripe_set_idx(sector, conf, previous, sh);
292 sh->state = 0; 325 sh->state = 0;
293 326
294 sh->disks = disks;
295 327
296 for (i = sh->disks; i--; ) { 328 for (i = sh->disks; i--; ) {
297 struct r5dev *dev = &sh->dev[i]; 329 struct r5dev *dev = &sh->dev[i];
@@ -305,12 +337,13 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
305 BUG(); 337 BUG();
306 } 338 }
307 dev->flags = 0; 339 dev->flags = 0;
308 raid5_build_block(sh, i); 340 raid5_build_block(sh, i, previous);
309 } 341 }
310 insert_hash(conf, sh); 342 insert_hash(conf, sh);
311} 343}
312 344
313static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, int disks) 345static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector,
346 short generation)
314{ 347{
315 struct stripe_head *sh; 348 struct stripe_head *sh;
316 struct hlist_node *hn; 349 struct hlist_node *hn;
@@ -318,7 +351,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
318 CHECK_DEVLOCK(); 351 CHECK_DEVLOCK();
319 pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); 352 pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
320 hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) 353 hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
321 if (sh->sector == sector && sh->disks == disks) 354 if (sh->sector == sector && sh->generation == generation)
322 return sh; 355 return sh;
323 pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); 356 pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
324 return NULL; 357 return NULL;
@@ -327,8 +360,9 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
327static void unplug_slaves(mddev_t *mddev); 360static void unplug_slaves(mddev_t *mddev);
328static void raid5_unplug_device(struct request_queue *q); 361static void raid5_unplug_device(struct request_queue *q);
329 362
330static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks, 363static struct stripe_head *
331 int pd_idx, int noblock) 364get_active_stripe(raid5_conf_t *conf, sector_t sector,
365 int previous, int noblock)
332{ 366{
333 struct stripe_head *sh; 367 struct stripe_head *sh;
334 368
@@ -340,7 +374,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
340 wait_event_lock_irq(conf->wait_for_stripe, 374 wait_event_lock_irq(conf->wait_for_stripe,
341 conf->quiesce == 0, 375 conf->quiesce == 0,
342 conf->device_lock, /* nothing */); 376 conf->device_lock, /* nothing */);
343 sh = __find_stripe(conf, sector, disks); 377 sh = __find_stripe(conf, sector, conf->generation - previous);
344 if (!sh) { 378 if (!sh) {
345 if (!conf->inactive_blocked) 379 if (!conf->inactive_blocked)
346 sh = get_free_stripe(conf); 380 sh = get_free_stripe(conf);
@@ -358,10 +392,11 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
358 ); 392 );
359 conf->inactive_blocked = 0; 393 conf->inactive_blocked = 0;
360 } else 394 } else
361 init_stripe(sh, sector, pd_idx, disks); 395 init_stripe(sh, sector, previous);
362 } else { 396 } else {
363 if (atomic_read(&sh->count)) { 397 if (atomic_read(&sh->count)) {
364 BUG_ON(!list_empty(&sh->lru)); 398 BUG_ON(!list_empty(&sh->lru)
399 && !test_bit(STRIPE_EXPANDING, &sh->state));
365 } else { 400 } else {
366 if (!test_bit(STRIPE_HANDLE, &sh->state)) 401 if (!test_bit(STRIPE_HANDLE, &sh->state))
367 atomic_inc(&conf->active_stripes); 402 atomic_inc(&conf->active_stripes);
@@ -895,8 +930,10 @@ static int grow_stripes(raid5_conf_t *conf, int num)
895 struct kmem_cache *sc; 930 struct kmem_cache *sc;
896 int devs = conf->raid_disks; 931 int devs = conf->raid_disks;
897 932
898 sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev)); 933 sprintf(conf->cache_name[0],
899 sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev)); 934 "raid%d-%s", conf->level, mdname(conf->mddev));
935 sprintf(conf->cache_name[1],
936 "raid%d-%s-alt", conf->level, mdname(conf->mddev));
900 conf->active_name = 0; 937 conf->active_name = 0;
901 sc = kmem_cache_create(conf->cache_name[conf->active_name], 938 sc = kmem_cache_create(conf->cache_name[conf->active_name],
902 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), 939 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -911,7 +948,6 @@ static int grow_stripes(raid5_conf_t *conf, int num)
911 return 0; 948 return 0;
912} 949}
913 950
914#ifdef CONFIG_MD_RAID5_RESHAPE
915static int resize_stripes(raid5_conf_t *conf, int newsize) 951static int resize_stripes(raid5_conf_t *conf, int newsize)
916{ 952{
917 /* Make all the stripes able to hold 'newsize' devices. 953 /* Make all the stripes able to hold 'newsize' devices.
@@ -1036,7 +1072,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1036 conf->pool_size = newsize; 1072 conf->pool_size = newsize;
1037 return err; 1073 return err;
1038} 1074}
1039#endif
1040 1075
1041static int drop_one_stripe(raid5_conf_t *conf) 1076static int drop_one_stripe(raid5_conf_t *conf)
1042{ 1077{
@@ -1066,7 +1101,7 @@ static void shrink_stripes(raid5_conf_t *conf)
1066 1101
1067static void raid5_end_read_request(struct bio * bi, int error) 1102static void raid5_end_read_request(struct bio * bi, int error)
1068{ 1103{
1069 struct stripe_head *sh = bi->bi_private; 1104 struct stripe_head *sh = bi->bi_private;
1070 raid5_conf_t *conf = sh->raid_conf; 1105 raid5_conf_t *conf = sh->raid_conf;
1071 int disks = sh->disks, i; 1106 int disks = sh->disks, i;
1072 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 1107 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1148,7 +1183,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1148 1183
1149static void raid5_end_write_request(struct bio *bi, int error) 1184static void raid5_end_write_request(struct bio *bi, int error)
1150{ 1185{
1151 struct stripe_head *sh = bi->bi_private; 1186 struct stripe_head *sh = bi->bi_private;
1152 raid5_conf_t *conf = sh->raid_conf; 1187 raid5_conf_t *conf = sh->raid_conf;
1153 int disks = sh->disks, i; 1188 int disks = sh->disks, i;
1154 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 1189 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1176,9 +1211,9 @@ static void raid5_end_write_request(struct bio *bi, int error)
1176} 1211}
1177 1212
1178 1213
1179static sector_t compute_blocknr(struct stripe_head *sh, int i); 1214static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
1180 1215
1181static void raid5_build_block(struct stripe_head *sh, int i) 1216static void raid5_build_block(struct stripe_head *sh, int i, int previous)
1182{ 1217{
1183 struct r5dev *dev = &sh->dev[i]; 1218 struct r5dev *dev = &sh->dev[i];
1184 1219
@@ -1194,7 +1229,7 @@ static void raid5_build_block(struct stripe_head *sh, int i)
1194 dev->req.bi_private = sh; 1229 dev->req.bi_private = sh;
1195 1230
1196 dev->flags = 0; 1231 dev->flags = 0;
1197 dev->sector = compute_blocknr(sh, i); 1232 dev->sector = compute_blocknr(sh, i, previous);
1198} 1233}
1199 1234
1200static void error(mddev_t *mddev, mdk_rdev_t *rdev) 1235static void error(mddev_t *mddev, mdk_rdev_t *rdev)
@@ -1227,15 +1262,23 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1227 * Input: a 'big' sector number, 1262 * Input: a 'big' sector number,
1228 * Output: index of the data and parity disk, and the sector # in them. 1263 * Output: index of the data and parity disk, and the sector # in them.
1229 */ 1264 */
1230static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, 1265static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1231 unsigned int data_disks, unsigned int * dd_idx, 1266 int previous, int *dd_idx,
1232 unsigned int * pd_idx, raid5_conf_t *conf) 1267 struct stripe_head *sh)
1233{ 1268{
1234 long stripe; 1269 long stripe;
1235 unsigned long chunk_number; 1270 unsigned long chunk_number;
1236 unsigned int chunk_offset; 1271 unsigned int chunk_offset;
1272 int pd_idx, qd_idx;
1273 int ddf_layout = 0;
1237 sector_t new_sector; 1274 sector_t new_sector;
1238 int sectors_per_chunk = conf->chunk_size >> 9; 1275 int algorithm = previous ? conf->prev_algo
1276 : conf->algorithm;
1277 int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
1278 : (conf->chunk_size >> 9);
1279 int raid_disks = previous ? conf->previous_raid_disks
1280 : conf->raid_disks;
1281 int data_disks = raid_disks - conf->max_degraded;
1239 1282
1240 /* First compute the information on this sector */ 1283 /* First compute the information on this sector */
1241 1284
@@ -1259,68 +1302,170 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
1259 /* 1302 /*
1260 * Select the parity disk based on the user selected algorithm. 1303 * Select the parity disk based on the user selected algorithm.
1261 */ 1304 */
1305 pd_idx = qd_idx = ~0;
1262 switch(conf->level) { 1306 switch(conf->level) {
1263 case 4: 1307 case 4:
1264 *pd_idx = data_disks; 1308 pd_idx = data_disks;
1265 break; 1309 break;
1266 case 5: 1310 case 5:
1267 switch (conf->algorithm) { 1311 switch (algorithm) {
1268 case ALGORITHM_LEFT_ASYMMETRIC: 1312 case ALGORITHM_LEFT_ASYMMETRIC:
1269 *pd_idx = data_disks - stripe % raid_disks; 1313 pd_idx = data_disks - stripe % raid_disks;
1270 if (*dd_idx >= *pd_idx) 1314 if (*dd_idx >= pd_idx)
1271 (*dd_idx)++; 1315 (*dd_idx)++;
1272 break; 1316 break;
1273 case ALGORITHM_RIGHT_ASYMMETRIC: 1317 case ALGORITHM_RIGHT_ASYMMETRIC:
1274 *pd_idx = stripe % raid_disks; 1318 pd_idx = stripe % raid_disks;
1275 if (*dd_idx >= *pd_idx) 1319 if (*dd_idx >= pd_idx)
1276 (*dd_idx)++; 1320 (*dd_idx)++;
1277 break; 1321 break;
1278 case ALGORITHM_LEFT_SYMMETRIC: 1322 case ALGORITHM_LEFT_SYMMETRIC:
1279 *pd_idx = data_disks - stripe % raid_disks; 1323 pd_idx = data_disks - stripe % raid_disks;
1280 *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; 1324 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
1281 break; 1325 break;
1282 case ALGORITHM_RIGHT_SYMMETRIC: 1326 case ALGORITHM_RIGHT_SYMMETRIC:
1283 *pd_idx = stripe % raid_disks; 1327 pd_idx = stripe % raid_disks;
1284 *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; 1328 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
1329 break;
1330 case ALGORITHM_PARITY_0:
1331 pd_idx = 0;
1332 (*dd_idx)++;
1333 break;
1334 case ALGORITHM_PARITY_N:
1335 pd_idx = data_disks;
1285 break; 1336 break;
1286 default: 1337 default:
1287 printk(KERN_ERR "raid5: unsupported algorithm %d\n", 1338 printk(KERN_ERR "raid5: unsupported algorithm %d\n",
1288 conf->algorithm); 1339 algorithm);
1340 BUG();
1289 } 1341 }
1290 break; 1342 break;
1291 case 6: 1343 case 6:
1292 1344
1293 /**** FIX THIS ****/ 1345 switch (algorithm) {
1294 switch (conf->algorithm) {
1295 case ALGORITHM_LEFT_ASYMMETRIC: 1346 case ALGORITHM_LEFT_ASYMMETRIC:
1296 *pd_idx = raid_disks - 1 - (stripe % raid_disks); 1347 pd_idx = raid_disks - 1 - (stripe % raid_disks);
1297 if (*pd_idx == raid_disks-1) 1348 qd_idx = pd_idx + 1;
1298 (*dd_idx)++; /* Q D D D P */ 1349 if (pd_idx == raid_disks-1) {
1299 else if (*dd_idx >= *pd_idx) 1350 (*dd_idx)++; /* Q D D D P */
1351 qd_idx = 0;
1352 } else if (*dd_idx >= pd_idx)
1300 (*dd_idx) += 2; /* D D P Q D */ 1353 (*dd_idx) += 2; /* D D P Q D */
1301 break; 1354 break;
1302 case ALGORITHM_RIGHT_ASYMMETRIC: 1355 case ALGORITHM_RIGHT_ASYMMETRIC:
1303 *pd_idx = stripe % raid_disks; 1356 pd_idx = stripe % raid_disks;
1304 if (*pd_idx == raid_disks-1) 1357 qd_idx = pd_idx + 1;
1305 (*dd_idx)++; /* Q D D D P */ 1358 if (pd_idx == raid_disks-1) {
1306 else if (*dd_idx >= *pd_idx) 1359 (*dd_idx)++; /* Q D D D P */
1360 qd_idx = 0;
1361 } else if (*dd_idx >= pd_idx)
1307 (*dd_idx) += 2; /* D D P Q D */ 1362 (*dd_idx) += 2; /* D D P Q D */
1308 break; 1363 break;
1309 case ALGORITHM_LEFT_SYMMETRIC: 1364 case ALGORITHM_LEFT_SYMMETRIC:
1310 *pd_idx = raid_disks - 1 - (stripe % raid_disks); 1365 pd_idx = raid_disks - 1 - (stripe % raid_disks);
1311 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; 1366 qd_idx = (pd_idx + 1) % raid_disks;
1367 *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
1312 break; 1368 break;
1313 case ALGORITHM_RIGHT_SYMMETRIC: 1369 case ALGORITHM_RIGHT_SYMMETRIC:
1314 *pd_idx = stripe % raid_disks; 1370 pd_idx = stripe % raid_disks;
1315 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; 1371 qd_idx = (pd_idx + 1) % raid_disks;
1372 *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
1373 break;
1374
1375 case ALGORITHM_PARITY_0:
1376 pd_idx = 0;
1377 qd_idx = 1;
1378 (*dd_idx) += 2;
1379 break;
1380 case ALGORITHM_PARITY_N:
1381 pd_idx = data_disks;
1382 qd_idx = data_disks + 1;
1316 break; 1383 break;
1384
1385 case ALGORITHM_ROTATING_ZERO_RESTART:
1386 /* Exactly the same as RIGHT_ASYMMETRIC, but or
1387 * of blocks for computing Q is different.
1388 */
1389 pd_idx = stripe % raid_disks;
1390 qd_idx = pd_idx + 1;
1391 if (pd_idx == raid_disks-1) {
1392 (*dd_idx)++; /* Q D D D P */
1393 qd_idx = 0;
1394 } else if (*dd_idx >= pd_idx)
1395 (*dd_idx) += 2; /* D D P Q D */
1396 ddf_layout = 1;
1397 break;
1398
1399 case ALGORITHM_ROTATING_N_RESTART:
1400 /* Same a left_asymmetric, by first stripe is
1401 * D D D P Q rather than
1402 * Q D D D P
1403 */
1404 pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks);
1405 qd_idx = pd_idx + 1;
1406 if (pd_idx == raid_disks-1) {
1407 (*dd_idx)++; /* Q D D D P */
1408 qd_idx = 0;
1409 } else if (*dd_idx >= pd_idx)
1410 (*dd_idx) += 2; /* D D P Q D */
1411 ddf_layout = 1;
1412 break;
1413
1414 case ALGORITHM_ROTATING_N_CONTINUE:
1415 /* Same as left_symmetric but Q is before P */
1416 pd_idx = raid_disks - 1 - (stripe % raid_disks);
1417 qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
1418 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
1419 ddf_layout = 1;
1420 break;
1421
1422 case ALGORITHM_LEFT_ASYMMETRIC_6:
1423 /* RAID5 left_asymmetric, with Q on last device */
1424 pd_idx = data_disks - stripe % (raid_disks-1);
1425 if (*dd_idx >= pd_idx)
1426 (*dd_idx)++;
1427 qd_idx = raid_disks - 1;
1428 break;
1429
1430 case ALGORITHM_RIGHT_ASYMMETRIC_6:
1431 pd_idx = stripe % (raid_disks-1);
1432 if (*dd_idx >= pd_idx)
1433 (*dd_idx)++;
1434 qd_idx = raid_disks - 1;
1435 break;
1436
1437 case ALGORITHM_LEFT_SYMMETRIC_6:
1438 pd_idx = data_disks - stripe % (raid_disks-1);
1439 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
1440 qd_idx = raid_disks - 1;
1441 break;
1442
1443 case ALGORITHM_RIGHT_SYMMETRIC_6:
1444 pd_idx = stripe % (raid_disks-1);
1445 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
1446 qd_idx = raid_disks - 1;
1447 break;
1448
1449 case ALGORITHM_PARITY_0_6:
1450 pd_idx = 0;
1451 (*dd_idx)++;
1452 qd_idx = raid_disks - 1;
1453 break;
1454
1455
1317 default: 1456 default:
1318 printk(KERN_CRIT "raid6: unsupported algorithm %d\n", 1457 printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
1319 conf->algorithm); 1458 algorithm);
1459 BUG();
1320 } 1460 }
1321 break; 1461 break;
1322 } 1462 }
1323 1463
1464 if (sh) {
1465 sh->pd_idx = pd_idx;
1466 sh->qd_idx = qd_idx;
1467 sh->ddf_layout = ddf_layout;
1468 }
1324 /* 1469 /*
1325 * Finally, compute the new sector number 1470 * Finally, compute the new sector number
1326 */ 1471 */
@@ -1329,17 +1474,21 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
1329} 1474}
1330 1475
1331 1476
1332static sector_t compute_blocknr(struct stripe_head *sh, int i) 1477static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
1333{ 1478{
1334 raid5_conf_t *conf = sh->raid_conf; 1479 raid5_conf_t *conf = sh->raid_conf;
1335 int raid_disks = sh->disks; 1480 int raid_disks = sh->disks;
1336 int data_disks = raid_disks - conf->max_degraded; 1481 int data_disks = raid_disks - conf->max_degraded;
1337 sector_t new_sector = sh->sector, check; 1482 sector_t new_sector = sh->sector, check;
1338 int sectors_per_chunk = conf->chunk_size >> 9; 1483 int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
1484 : (conf->chunk_size >> 9);
1485 int algorithm = previous ? conf->prev_algo
1486 : conf->algorithm;
1339 sector_t stripe; 1487 sector_t stripe;
1340 int chunk_offset; 1488 int chunk_offset;
1341 int chunk_number, dummy1, dummy2, dd_idx = i; 1489 int chunk_number, dummy1, dd_idx = i;
1342 sector_t r_sector; 1490 sector_t r_sector;
1491 struct stripe_head sh2;
1343 1492
1344 1493
1345 chunk_offset = sector_div(new_sector, sectors_per_chunk); 1494 chunk_offset = sector_div(new_sector, sectors_per_chunk);
@@ -1351,7 +1500,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1351 switch(conf->level) { 1500 switch(conf->level) {
1352 case 4: break; 1501 case 4: break;
1353 case 5: 1502 case 5:
1354 switch (conf->algorithm) { 1503 switch (algorithm) {
1355 case ALGORITHM_LEFT_ASYMMETRIC: 1504 case ALGORITHM_LEFT_ASYMMETRIC:
1356 case ALGORITHM_RIGHT_ASYMMETRIC: 1505 case ALGORITHM_RIGHT_ASYMMETRIC:
1357 if (i > sh->pd_idx) 1506 if (i > sh->pd_idx)
@@ -1363,19 +1512,27 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1363 i += raid_disks; 1512 i += raid_disks;
1364 i -= (sh->pd_idx + 1); 1513 i -= (sh->pd_idx + 1);
1365 break; 1514 break;
1515 case ALGORITHM_PARITY_0:
1516 i -= 1;
1517 break;
1518 case ALGORITHM_PARITY_N:
1519 break;
1366 default: 1520 default:
1367 printk(KERN_ERR "raid5: unsupported algorithm %d\n", 1521 printk(KERN_ERR "raid5: unsupported algorithm %d\n",
1368 conf->algorithm); 1522 algorithm);
1523 BUG();
1369 } 1524 }
1370 break; 1525 break;
1371 case 6: 1526 case 6:
1372 if (i == raid6_next_disk(sh->pd_idx, raid_disks)) 1527 if (i == sh->qd_idx)
1373 return 0; /* It is the Q disk */ 1528 return 0; /* It is the Q disk */
1374 switch (conf->algorithm) { 1529 switch (algorithm) {
1375 case ALGORITHM_LEFT_ASYMMETRIC: 1530 case ALGORITHM_LEFT_ASYMMETRIC:
1376 case ALGORITHM_RIGHT_ASYMMETRIC: 1531 case ALGORITHM_RIGHT_ASYMMETRIC:
1377 if (sh->pd_idx == raid_disks-1) 1532 case ALGORITHM_ROTATING_ZERO_RESTART:
1378 i--; /* Q D D D P */ 1533 case ALGORITHM_ROTATING_N_RESTART:
1534 if (sh->pd_idx == raid_disks-1)
1535 i--; /* Q D D D P */
1379 else if (i > sh->pd_idx) 1536 else if (i > sh->pd_idx)
1380 i -= 2; /* D D P Q D */ 1537 i -= 2; /* D D P Q D */
1381 break; 1538 break;
@@ -1390,9 +1547,35 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1390 i -= (sh->pd_idx + 2); 1547 i -= (sh->pd_idx + 2);
1391 } 1548 }
1392 break; 1549 break;
1550 case ALGORITHM_PARITY_0:
1551 i -= 2;
1552 break;
1553 case ALGORITHM_PARITY_N:
1554 break;
1555 case ALGORITHM_ROTATING_N_CONTINUE:
1556 if (sh->pd_idx == 0)
1557 i--; /* P D D D Q */
1558 else if (i > sh->pd_idx)
1559 i -= 2; /* D D Q P D */
1560 break;
1561 case ALGORITHM_LEFT_ASYMMETRIC_6:
1562 case ALGORITHM_RIGHT_ASYMMETRIC_6:
1563 if (i > sh->pd_idx)
1564 i--;
1565 break;
1566 case ALGORITHM_LEFT_SYMMETRIC_6:
1567 case ALGORITHM_RIGHT_SYMMETRIC_6:
1568 if (i < sh->pd_idx)
1569 i += data_disks + 1;
1570 i -= (sh->pd_idx + 1);
1571 break;
1572 case ALGORITHM_PARITY_0_6:
1573 i -= 1;
1574 break;
1393 default: 1575 default:
1394 printk(KERN_CRIT "raid6: unsupported algorithm %d\n", 1576 printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
1395 conf->algorithm); 1577 algorithm);
1578 BUG();
1396 } 1579 }
1397 break; 1580 break;
1398 } 1581 }
@@ -1400,8 +1583,10 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1400 chunk_number = stripe * data_disks + i; 1583 chunk_number = stripe * data_disks + i;
1401 r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; 1584 r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset;
1402 1585
1403 check = raid5_compute_sector(r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); 1586 check = raid5_compute_sector(conf, r_sector,
1404 if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { 1587 previous, &dummy1, &sh2);
1588 if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx
1589 || sh2.qd_idx != sh->qd_idx) {
1405 printk(KERN_ERR "compute_blocknr: map not correct\n"); 1590 printk(KERN_ERR "compute_blocknr: map not correct\n");
1406 return 0; 1591 return 0;
1407 } 1592 }
@@ -1468,14 +1653,16 @@ static void copy_data(int frombio, struct bio *bio,
1468 1653
1469static void compute_parity6(struct stripe_head *sh, int method) 1654static void compute_parity6(struct stripe_head *sh, int method)
1470{ 1655{
1471 raid6_conf_t *conf = sh->raid_conf; 1656 raid5_conf_t *conf = sh->raid_conf;
1472 int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = sh->disks, count; 1657 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
1658 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
1473 struct bio *chosen; 1659 struct bio *chosen;
1474 /**** FIX THIS: This could be very bad if disks is close to 256 ****/ 1660 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1475 void *ptrs[disks]; 1661 void *ptrs[syndrome_disks+2];
1476 1662
1477 qd_idx = raid6_next_disk(pd_idx, disks); 1663 pd_idx = sh->pd_idx;
1478 d0_idx = raid6_next_disk(qd_idx, disks); 1664 qd_idx = sh->qd_idx;
1665 d0_idx = raid6_d0(sh);
1479 1666
1480 pr_debug("compute_parity, stripe %llu, method %d\n", 1667 pr_debug("compute_parity, stripe %llu, method %d\n",
1481 (unsigned long long)sh->sector, method); 1668 (unsigned long long)sh->sector, method);
@@ -1513,24 +1700,29 @@ static void compute_parity6(struct stripe_head *sh, int method)
1513 set_bit(R5_UPTODATE, &sh->dev[i].flags); 1700 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1514 } 1701 }
1515 1702
1516// switch(method) { 1703 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
1517// case RECONSTRUCT_WRITE: 1704
1518// case CHECK_PARITY: 1705 for (i = 0; i < disks; i++)
1519// case UPDATE_PARITY: 1706 ptrs[i] = (void *)raid6_empty_zero_page;
1520 /* Note that unlike RAID-5, the ordering of the disks matters greatly. */ 1707
1521 /* FIX: Is this ordering of drives even remotely optimal? */ 1708 count = 0;
1522 count = 0; 1709 i = d0_idx;
1523 i = d0_idx; 1710 do {
1524 do { 1711 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1525 ptrs[count++] = page_address(sh->dev[i].page); 1712
1526 if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags)) 1713 ptrs[slot] = page_address(sh->dev[i].page);
1527 printk("block %d/%d not uptodate on parity calc\n", i,count); 1714 if (slot < syndrome_disks &&
1528 i = raid6_next_disk(i, disks); 1715 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
1529 } while ( i != d0_idx ); 1716 printk(KERN_ERR "block %d/%d not uptodate "
1530// break; 1717 "on parity calc\n", i, count);
1531// } 1718 BUG();
1532 1719 }
1533 raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); 1720
1721 i = raid6_next_disk(i, disks);
1722 } while (i != d0_idx);
1723 BUG_ON(count != syndrome_disks);
1724
1725 raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
1534 1726
1535 switch(method) { 1727 switch(method) {
1536 case RECONSTRUCT_WRITE: 1728 case RECONSTRUCT_WRITE:
@@ -1552,8 +1744,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1552{ 1744{
1553 int i, count, disks = sh->disks; 1745 int i, count, disks = sh->disks;
1554 void *ptr[MAX_XOR_BLOCKS], *dest, *p; 1746 void *ptr[MAX_XOR_BLOCKS], *dest, *p;
1555 int pd_idx = sh->pd_idx; 1747 int qd_idx = sh->qd_idx;
1556 int qd_idx = raid6_next_disk(pd_idx, disks);
1557 1748
1558 pr_debug("compute_block_1, stripe %llu, idx %d\n", 1749 pr_debug("compute_block_1, stripe %llu, idx %d\n",
1559 (unsigned long long)sh->sector, dd_idx); 1750 (unsigned long long)sh->sector, dd_idx);
@@ -1589,63 +1780,65 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1589static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) 1780static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1590{ 1781{
1591 int i, count, disks = sh->disks; 1782 int i, count, disks = sh->disks;
1592 int pd_idx = sh->pd_idx; 1783 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
1593 int qd_idx = raid6_next_disk(pd_idx, disks); 1784 int d0_idx = raid6_d0(sh);
1594 int d0_idx = raid6_next_disk(qd_idx, disks); 1785 int faila = -1, failb = -1;
1595 int faila, failb; 1786 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1787 void *ptrs[syndrome_disks+2];
1596 1788
1597 /* faila and failb are disk numbers relative to d0_idx */ 1789 for (i = 0; i < disks ; i++)
1598 /* pd_idx become disks-2 and qd_idx become disks-1 */ 1790 ptrs[i] = (void *)raid6_empty_zero_page;
1599 faila = (dd_idx1 < d0_idx) ? dd_idx1+(disks-d0_idx) : dd_idx1-d0_idx; 1791 count = 0;
1600 failb = (dd_idx2 < d0_idx) ? dd_idx2+(disks-d0_idx) : dd_idx2-d0_idx; 1792 i = d0_idx;
1793 do {
1794 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1795
1796 ptrs[slot] = page_address(sh->dev[i].page);
1797
1798 if (i == dd_idx1)
1799 faila = slot;
1800 if (i == dd_idx2)
1801 failb = slot;
1802 i = raid6_next_disk(i, disks);
1803 } while (i != d0_idx);
1804 BUG_ON(count != syndrome_disks);
1601 1805
1602 BUG_ON(faila == failb); 1806 BUG_ON(faila == failb);
1603 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } 1807 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
1604 1808
1605 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", 1809 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
1606 (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb); 1810 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
1811 faila, failb);
1607 1812
1608 if ( failb == disks-1 ) { 1813 if (failb == syndrome_disks+1) {
1609 /* Q disk is one of the missing disks */ 1814 /* Q disk is one of the missing disks */
1610 if ( faila == disks-2 ) { 1815 if (faila == syndrome_disks) {
1611 /* Missing P+Q, just recompute */ 1816 /* Missing P+Q, just recompute */
1612 compute_parity6(sh, UPDATE_PARITY); 1817 compute_parity6(sh, UPDATE_PARITY);
1613 return; 1818 return;
1614 } else { 1819 } else {
1615 /* We're missing D+Q; recompute D from P */ 1820 /* We're missing D+Q; recompute D from P */
1616 compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0); 1821 compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
1822 dd_idx2 : dd_idx1),
1823 0);
1617 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ 1824 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
1618 return; 1825 return;
1619 } 1826 }
1620 } 1827 }
1621 1828
1622 /* We're missing D+P or D+D; build pointer table */ 1829 /* We're missing D+P or D+D; */
1623 { 1830 if (failb == syndrome_disks) {
1624 /**** FIX THIS: This could be very bad if disks is close to 256 ****/ 1831 /* We're missing D+P. */
1625 void *ptrs[disks]; 1832 raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
1626 1833 } else {
1627 count = 0; 1834 /* We're missing D+D. */
1628 i = d0_idx; 1835 raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
1629 do { 1836 ptrs);
1630 ptrs[count++] = page_address(sh->dev[i].page);
1631 i = raid6_next_disk(i, disks);
1632 if (i != dd_idx1 && i != dd_idx2 &&
1633 !test_bit(R5_UPTODATE, &sh->dev[i].flags))
1634 printk("compute_2 with missing block %d/%d\n", count, i);
1635 } while ( i != d0_idx );
1636
1637 if ( failb == disks-2 ) {
1638 /* We're missing D+P. */
1639 raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs);
1640 } else {
1641 /* We're missing D+D. */
1642 raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs);
1643 }
1644
1645 /* Both the above update both missing blocks */
1646 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1647 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1648 } 1837 }
1838
1839 /* Both the above update both missing blocks */
1840 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1841 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1649} 1842}
1650 1843
1651static void 1844static void
@@ -1800,17 +1993,21 @@ static int page_is_zero(struct page *p)
1800 memcmp(a, a+4, STRIPE_SIZE-4)==0); 1993 memcmp(a, a+4, STRIPE_SIZE-4)==0);
1801} 1994}
1802 1995
1803static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks) 1996static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
1997 struct stripe_head *sh)
1804{ 1998{
1805 int sectors_per_chunk = conf->chunk_size >> 9; 1999 int sectors_per_chunk =
1806 int pd_idx, dd_idx; 2000 previous ? (conf->prev_chunk >> 9)
2001 : (conf->chunk_size >> 9);
2002 int dd_idx;
1807 int chunk_offset = sector_div(stripe, sectors_per_chunk); 2003 int chunk_offset = sector_div(stripe, sectors_per_chunk);
2004 int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
1808 2005
1809 raid5_compute_sector(stripe * (disks - conf->max_degraded) 2006 raid5_compute_sector(conf,
2007 stripe * (disks - conf->max_degraded)
1810 *sectors_per_chunk + chunk_offset, 2008 *sectors_per_chunk + chunk_offset,
1811 disks, disks - conf->max_degraded, 2009 previous,
1812 &dd_idx, &pd_idx, conf); 2010 &dd_idx, sh);
1813 return pd_idx;
1814} 2011}
1815 2012
1816static void 2013static void
@@ -2181,7 +2378,7 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf,
2181 struct r6_state *r6s, int disks) 2378 struct r6_state *r6s, int disks)
2182{ 2379{
2183 int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; 2380 int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
2184 int qd_idx = r6s->qd_idx; 2381 int qd_idx = sh->qd_idx;
2185 for (i = disks; i--; ) { 2382 for (i = disks; i--; ) {
2186 struct r5dev *dev = &sh->dev[i]; 2383 struct r5dev *dev = &sh->dev[i];
2187 /* Would I have to read this buffer for reconstruct_write */ 2384 /* Would I have to read this buffer for reconstruct_write */
@@ -2371,7 +2568,7 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2371 int update_p = 0, update_q = 0; 2568 int update_p = 0, update_q = 0;
2372 struct r5dev *dev; 2569 struct r5dev *dev;
2373 int pd_idx = sh->pd_idx; 2570 int pd_idx = sh->pd_idx;
2374 int qd_idx = r6s->qd_idx; 2571 int qd_idx = sh->qd_idx;
2375 2572
2376 set_bit(STRIPE_HANDLE, &sh->state); 2573 set_bit(STRIPE_HANDLE, &sh->state);
2377 2574
@@ -2467,17 +2664,14 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2467 struct dma_async_tx_descriptor *tx = NULL; 2664 struct dma_async_tx_descriptor *tx = NULL;
2468 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2665 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
2469 for (i = 0; i < sh->disks; i++) 2666 for (i = 0; i < sh->disks; i++)
2470 if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) { 2667 if (i != sh->pd_idx && i != sh->qd_idx) {
2471 int dd_idx, pd_idx, j; 2668 int dd_idx, j;
2472 struct stripe_head *sh2; 2669 struct stripe_head *sh2;
2473 2670
2474 sector_t bn = compute_blocknr(sh, i); 2671 sector_t bn = compute_blocknr(sh, i, 1);
2475 sector_t s = raid5_compute_sector(bn, conf->raid_disks, 2672 sector_t s = raid5_compute_sector(conf, bn, 0,
2476 conf->raid_disks - 2673 &dd_idx, NULL);
2477 conf->max_degraded, &dd_idx, 2674 sh2 = get_active_stripe(conf, s, 0, 1);
2478 &pd_idx, conf);
2479 sh2 = get_active_stripe(conf, s, conf->raid_disks,
2480 pd_idx, 1);
2481 if (sh2 == NULL) 2675 if (sh2 == NULL)
2482 /* so far only the early blocks of this stripe 2676 /* so far only the early blocks of this stripe
2483 * have been requested. When later blocks 2677 * have been requested. When later blocks
@@ -2500,8 +2694,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2500 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 2694 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
2501 for (j = 0; j < conf->raid_disks; j++) 2695 for (j = 0; j < conf->raid_disks; j++)
2502 if (j != sh2->pd_idx && 2696 if (j != sh2->pd_idx &&
2503 (!r6s || j != raid6_next_disk(sh2->pd_idx, 2697 (!r6s || j != sh2->qd_idx) &&
2504 sh2->disks)) &&
2505 !test_bit(R5_Expanded, &sh2->dev[j].flags)) 2698 !test_bit(R5_Expanded, &sh2->dev[j].flags))
2506 break; 2699 break;
2507 if (j == conf->raid_disks) { 2700 if (j == conf->raid_disks) {
@@ -2750,6 +2943,23 @@ static bool handle_stripe5(struct stripe_head *sh)
2750 2943
2751 /* Finish reconstruct operations initiated by the expansion process */ 2944 /* Finish reconstruct operations initiated by the expansion process */
2752 if (sh->reconstruct_state == reconstruct_state_result) { 2945 if (sh->reconstruct_state == reconstruct_state_result) {
2946 struct stripe_head *sh2
2947 = get_active_stripe(conf, sh->sector, 1, 1);
2948 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
2949 /* sh cannot be written until sh2 has been read.
2950 * so arrange for sh to be delayed a little
2951 */
2952 set_bit(STRIPE_DELAYED, &sh->state);
2953 set_bit(STRIPE_HANDLE, &sh->state);
2954 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
2955 &sh2->state))
2956 atomic_inc(&conf->preread_active_stripes);
2957 release_stripe(sh2);
2958 goto unlock;
2959 }
2960 if (sh2)
2961 release_stripe(sh2);
2962
2753 sh->reconstruct_state = reconstruct_state_idle; 2963 sh->reconstruct_state = reconstruct_state_idle;
2754 clear_bit(STRIPE_EXPANDING, &sh->state); 2964 clear_bit(STRIPE_EXPANDING, &sh->state);
2755 for (i = conf->raid_disks; i--; ) { 2965 for (i = conf->raid_disks; i--; ) {
@@ -2763,8 +2973,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2763 !sh->reconstruct_state) { 2973 !sh->reconstruct_state) {
2764 /* Need to write out all blocks after computing parity */ 2974 /* Need to write out all blocks after computing parity */
2765 sh->disks = conf->raid_disks; 2975 sh->disks = conf->raid_disks;
2766 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 2976 stripe_set_idx(sh->sector, conf, 0, sh);
2767 conf->raid_disks);
2768 schedule_reconstruction5(sh, &s, 1, 1); 2977 schedule_reconstruction5(sh, &s, 1, 1);
2769 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { 2978 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
2770 clear_bit(STRIPE_EXPAND_READY, &sh->state); 2979 clear_bit(STRIPE_EXPAND_READY, &sh->state);
@@ -2796,20 +3005,19 @@ static bool handle_stripe5(struct stripe_head *sh)
2796 3005
2797static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) 3006static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2798{ 3007{
2799 raid6_conf_t *conf = sh->raid_conf; 3008 raid5_conf_t *conf = sh->raid_conf;
2800 int disks = sh->disks; 3009 int disks = sh->disks;
2801 struct bio *return_bi = NULL; 3010 struct bio *return_bi = NULL;
2802 int i, pd_idx = sh->pd_idx; 3011 int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx;
2803 struct stripe_head_state s; 3012 struct stripe_head_state s;
2804 struct r6_state r6s; 3013 struct r6_state r6s;
2805 struct r5dev *dev, *pdev, *qdev; 3014 struct r5dev *dev, *pdev, *qdev;
2806 mdk_rdev_t *blocked_rdev = NULL; 3015 mdk_rdev_t *blocked_rdev = NULL;
2807 3016
2808 r6s.qd_idx = raid6_next_disk(pd_idx, disks);
2809 pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 3017 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
2810 "pd_idx=%d, qd_idx=%d\n", 3018 "pd_idx=%d, qd_idx=%d\n",
2811 (unsigned long long)sh->sector, sh->state, 3019 (unsigned long long)sh->sector, sh->state,
2812 atomic_read(&sh->count), pd_idx, r6s.qd_idx); 3020 atomic_read(&sh->count), pd_idx, qd_idx);
2813 memset(&s, 0, sizeof(s)); 3021 memset(&s, 0, sizeof(s));
2814 3022
2815 spin_lock(&sh->lock); 3023 spin_lock(&sh->lock);
@@ -2920,9 +3128,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2920 pdev = &sh->dev[pd_idx]; 3128 pdev = &sh->dev[pd_idx];
2921 r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx) 3129 r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
2922 || (s.failed >= 2 && r6s.failed_num[1] == pd_idx); 3130 || (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
2923 qdev = &sh->dev[r6s.qd_idx]; 3131 qdev = &sh->dev[qd_idx];
2924 r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx) 3132 r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx)
2925 || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx); 3133 || (s.failed >= 2 && r6s.failed_num[1] == qd_idx);
2926 3134
2927 if ( s.written && 3135 if ( s.written &&
2928 ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags) 3136 ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
@@ -2980,10 +3188,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2980 } 3188 }
2981 3189
2982 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3190 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
3191 struct stripe_head *sh2
3192 = get_active_stripe(conf, sh->sector, 1, 1);
3193 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
3194 /* sh cannot be written until sh2 has been read.
3195 * so arrange for sh to be delayed a little
3196 */
3197 set_bit(STRIPE_DELAYED, &sh->state);
3198 set_bit(STRIPE_HANDLE, &sh->state);
3199 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
3200 &sh2->state))
3201 atomic_inc(&conf->preread_active_stripes);
3202 release_stripe(sh2);
3203 goto unlock;
3204 }
3205 if (sh2)
3206 release_stripe(sh2);
3207
2983 /* Need to write out all blocks after computing P&Q */ 3208 /* Need to write out all blocks after computing P&Q */
2984 sh->disks = conf->raid_disks; 3209 sh->disks = conf->raid_disks;
2985 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 3210 stripe_set_idx(sh->sector, conf, 0, sh);
2986 conf->raid_disks);
2987 compute_parity6(sh, RECONSTRUCT_WRITE); 3211 compute_parity6(sh, RECONSTRUCT_WRITE);
2988 for (i = conf->raid_disks ; i-- ; ) { 3212 for (i = conf->raid_disks ; i-- ; ) {
2989 set_bit(R5_LOCKED, &sh->dev[i].flags); 3213 set_bit(R5_LOCKED, &sh->dev[i].flags);
@@ -3134,6 +3358,8 @@ static int raid5_mergeable_bvec(struct request_queue *q,
3134 if ((bvm->bi_rw & 1) == WRITE) 3358 if ((bvm->bi_rw & 1) == WRITE)
3135 return biovec->bv_len; /* always allow writes to be mergeable */ 3359 return biovec->bv_len; /* always allow writes to be mergeable */
3136 3360
3361 if (mddev->new_chunk < mddev->chunk_size)
3362 chunk_sectors = mddev->new_chunk >> 9;
3137 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; 3363 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
3138 if (max < 0) max = 0; 3364 if (max < 0) max = 0;
3139 if (max <= biovec->bv_len && bio_sectors == 0) 3365 if (max <= biovec->bv_len && bio_sectors == 0)
@@ -3149,6 +3375,8 @@ static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
3149 unsigned int chunk_sectors = mddev->chunk_size >> 9; 3375 unsigned int chunk_sectors = mddev->chunk_size >> 9;
3150 unsigned int bio_sectors = bio->bi_size >> 9; 3376 unsigned int bio_sectors = bio->bi_size >> 9;
3151 3377
3378 if (mddev->new_chunk < mddev->chunk_size)
3379 chunk_sectors = mddev->new_chunk >> 9;
3152 return chunk_sectors >= 3380 return chunk_sectors >=
3153 ((sector & (chunk_sectors - 1)) + bio_sectors); 3381 ((sector & (chunk_sectors - 1)) + bio_sectors);
3154} 3382}
@@ -3255,9 +3483,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
3255{ 3483{
3256 mddev_t *mddev = q->queuedata; 3484 mddev_t *mddev = q->queuedata;
3257 raid5_conf_t *conf = mddev_to_conf(mddev); 3485 raid5_conf_t *conf = mddev_to_conf(mddev);
3258 const unsigned int raid_disks = conf->raid_disks; 3486 unsigned int dd_idx;
3259 const unsigned int data_disks = raid_disks - conf->max_degraded;
3260 unsigned int dd_idx, pd_idx;
3261 struct bio* align_bi; 3487 struct bio* align_bi;
3262 mdk_rdev_t *rdev; 3488 mdk_rdev_t *rdev;
3263 3489
@@ -3266,7 +3492,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
3266 return 0; 3492 return 0;
3267 } 3493 }
3268 /* 3494 /*
3269 * use bio_clone to make a copy of the bio 3495 * use bio_clone to make a copy of the bio
3270 */ 3496 */
3271 align_bi = bio_clone(raid_bio, GFP_NOIO); 3497 align_bi = bio_clone(raid_bio, GFP_NOIO);
3272 if (!align_bi) 3498 if (!align_bi)
@@ -3280,12 +3506,9 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
3280 /* 3506 /*
3281 * compute position 3507 * compute position
3282 */ 3508 */
3283 align_bi->bi_sector = raid5_compute_sector(raid_bio->bi_sector, 3509 align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector,
3284 raid_disks, 3510 0,
3285 data_disks, 3511 &dd_idx, NULL);
3286 &dd_idx,
3287 &pd_idx,
3288 conf);
3289 3512
3290 rcu_read_lock(); 3513 rcu_read_lock();
3291 rdev = rcu_dereference(conf->disks[dd_idx].rdev); 3514 rdev = rcu_dereference(conf->disks[dd_idx].rdev);
@@ -3377,7 +3600,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3377{ 3600{
3378 mddev_t *mddev = q->queuedata; 3601 mddev_t *mddev = q->queuedata;
3379 raid5_conf_t *conf = mddev_to_conf(mddev); 3602 raid5_conf_t *conf = mddev_to_conf(mddev);
3380 unsigned int dd_idx, pd_idx; 3603 int dd_idx;
3381 sector_t new_sector; 3604 sector_t new_sector;
3382 sector_t logical_sector, last_sector; 3605 sector_t logical_sector, last_sector;
3383 struct stripe_head *sh; 3606 struct stripe_head *sh;
@@ -3400,7 +3623,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3400 if (rw == READ && 3623 if (rw == READ &&
3401 mddev->reshape_position == MaxSector && 3624 mddev->reshape_position == MaxSector &&
3402 chunk_aligned_read(q,bi)) 3625 chunk_aligned_read(q,bi))
3403 return 0; 3626 return 0;
3404 3627
3405 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 3628 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
3406 last_sector = bi->bi_sector + (bi->bi_size>>9); 3629 last_sector = bi->bi_sector + (bi->bi_size>>9);
@@ -3410,26 +3633,31 @@ static int make_request(struct request_queue *q, struct bio * bi)
3410 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 3633 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
3411 DEFINE_WAIT(w); 3634 DEFINE_WAIT(w);
3412 int disks, data_disks; 3635 int disks, data_disks;
3636 int previous;
3413 3637
3414 retry: 3638 retry:
3639 previous = 0;
3640 disks = conf->raid_disks;
3415 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); 3641 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
3416 if (likely(conf->expand_progress == MaxSector)) 3642 if (unlikely(conf->reshape_progress != MaxSector)) {
3417 disks = conf->raid_disks; 3643 /* spinlock is needed as reshape_progress may be
3418 else {
3419 /* spinlock is needed as expand_progress may be
3420 * 64bit on a 32bit platform, and so it might be 3644 * 64bit on a 32bit platform, and so it might be
3421 * possible to see a half-updated value 3645 * possible to see a half-updated value
3422 * Ofcourse expand_progress could change after 3646 * Ofcourse reshape_progress could change after
3423 * the lock is dropped, so once we get a reference 3647 * the lock is dropped, so once we get a reference
3424 * to the stripe that we think it is, we will have 3648 * to the stripe that we think it is, we will have
3425 * to check again. 3649 * to check again.
3426 */ 3650 */
3427 spin_lock_irq(&conf->device_lock); 3651 spin_lock_irq(&conf->device_lock);
3428 disks = conf->raid_disks; 3652 if (mddev->delta_disks < 0
3429 if (logical_sector >= conf->expand_progress) 3653 ? logical_sector < conf->reshape_progress
3654 : logical_sector >= conf->reshape_progress) {
3430 disks = conf->previous_raid_disks; 3655 disks = conf->previous_raid_disks;
3431 else { 3656 previous = 1;
3432 if (logical_sector >= conf->expand_lo) { 3657 } else {
3658 if (mddev->delta_disks < 0
3659 ? logical_sector < conf->reshape_safe
3660 : logical_sector >= conf->reshape_safe) {
3433 spin_unlock_irq(&conf->device_lock); 3661 spin_unlock_irq(&conf->device_lock);
3434 schedule(); 3662 schedule();
3435 goto retry; 3663 goto retry;
@@ -3439,15 +3667,17 @@ static int make_request(struct request_queue *q, struct bio * bi)
3439 } 3667 }
3440 data_disks = disks - conf->max_degraded; 3668 data_disks = disks - conf->max_degraded;
3441 3669
3442 new_sector = raid5_compute_sector(logical_sector, disks, data_disks, 3670 new_sector = raid5_compute_sector(conf, logical_sector,
3443 &dd_idx, &pd_idx, conf); 3671 previous,
3672 &dd_idx, NULL);
3444 pr_debug("raid5: make_request, sector %llu logical %llu\n", 3673 pr_debug("raid5: make_request, sector %llu logical %llu\n",
3445 (unsigned long long)new_sector, 3674 (unsigned long long)new_sector,
3446 (unsigned long long)logical_sector); 3675 (unsigned long long)logical_sector);
3447 3676
3448 sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK)); 3677 sh = get_active_stripe(conf, new_sector, previous,
3678 (bi->bi_rw&RWA_MASK));
3449 if (sh) { 3679 if (sh) {
3450 if (unlikely(conf->expand_progress != MaxSector)) { 3680 if (unlikely(previous)) {
3451 /* expansion might have moved on while waiting for a 3681 /* expansion might have moved on while waiting for a
3452 * stripe, so we must do the range check again. 3682 * stripe, so we must do the range check again.
3453 * Expansion could still move past after this 3683 * Expansion could still move past after this
@@ -3458,8 +3688,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
3458 */ 3688 */
3459 int must_retry = 0; 3689 int must_retry = 0;
3460 spin_lock_irq(&conf->device_lock); 3690 spin_lock_irq(&conf->device_lock);
3461 if (logical_sector < conf->expand_progress && 3691 if (mddev->delta_disks < 0
3462 disks == conf->previous_raid_disks) 3692 ? logical_sector >= conf->reshape_progress
3693 : logical_sector < conf->reshape_progress)
3463 /* mismatch, need to try again */ 3694 /* mismatch, need to try again */
3464 must_retry = 1; 3695 must_retry = 1;
3465 spin_unlock_irq(&conf->device_lock); 3696 spin_unlock_irq(&conf->device_lock);
@@ -3514,6 +3745,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
3514 return 0; 3745 return 0;
3515} 3746}
3516 3747
3748static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks);
3749
3517static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped) 3750static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped)
3518{ 3751{
3519 /* reshaping is quite different to recovery/resync so it is 3752 /* reshaping is quite different to recovery/resync so it is
@@ -3527,61 +3760,118 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3527 */ 3760 */
3528 raid5_conf_t *conf = (raid5_conf_t *) mddev->private; 3761 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
3529 struct stripe_head *sh; 3762 struct stripe_head *sh;
3530 int pd_idx;
3531 sector_t first_sector, last_sector; 3763 sector_t first_sector, last_sector;
3532 int raid_disks = conf->previous_raid_disks; 3764 int raid_disks = conf->previous_raid_disks;
3533 int data_disks = raid_disks - conf->max_degraded; 3765 int data_disks = raid_disks - conf->max_degraded;
3534 int new_data_disks = conf->raid_disks - conf->max_degraded; 3766 int new_data_disks = conf->raid_disks - conf->max_degraded;
3535 int i; 3767 int i;
3536 int dd_idx; 3768 int dd_idx;
3537 sector_t writepos, safepos, gap; 3769 sector_t writepos, readpos, safepos;
3538 3770 sector_t stripe_addr;
3539 if (sector_nr == 0 && 3771 int reshape_sectors;
3540 conf->expand_progress != 0) { 3772 struct list_head stripes;
3541 /* restarting in the middle, skip the initial sectors */ 3773
3542 sector_nr = conf->expand_progress; 3774 if (sector_nr == 0) {
3775 /* If restarting in the middle, skip the initial sectors */
3776 if (mddev->delta_disks < 0 &&
3777 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3778 sector_nr = raid5_size(mddev, 0, 0)
3779 - conf->reshape_progress;
3780 } else if (mddev->delta_disks > 0 &&
3781 conf->reshape_progress > 0)
3782 sector_nr = conf->reshape_progress;
3543 sector_div(sector_nr, new_data_disks); 3783 sector_div(sector_nr, new_data_disks);
3544 *skipped = 1; 3784 if (sector_nr) {
3545 return sector_nr; 3785 *skipped = 1;
3786 return sector_nr;
3787 }
3546 } 3788 }
3547 3789
3790 /* We need to process a full chunk at a time.
3791 * If old and new chunk sizes differ, we need to process the
3792 * largest of these
3793 */
3794 if (mddev->new_chunk > mddev->chunk_size)
3795 reshape_sectors = mddev->new_chunk / 512;
3796 else
3797 reshape_sectors = mddev->chunk_size / 512;
3798
3548 /* we update the metadata when there is more than 3Meg 3799 /* we update the metadata when there is more than 3Meg
3549 * in the block range (that is rather arbitrary, should 3800 * in the block range (that is rather arbitrary, should
3550 * probably be time based) or when the data about to be 3801 * probably be time based) or when the data about to be
3551 * copied would over-write the source of the data at 3802 * copied would over-write the source of the data at
3552 * the front of the range. 3803 * the front of the range.
3553 * i.e. one new_stripe forward from expand_progress new_maps 3804 * i.e. one new_stripe along from reshape_progress new_maps
3554 * to after where expand_lo old_maps to 3805 * to after where reshape_safe old_maps to
3555 */ 3806 */
3556 writepos = conf->expand_progress + 3807 writepos = conf->reshape_progress;
3557 conf->chunk_size/512*(new_data_disks);
3558 sector_div(writepos, new_data_disks); 3808 sector_div(writepos, new_data_disks);
3559 safepos = conf->expand_lo; 3809 readpos = conf->reshape_progress;
3810 sector_div(readpos, data_disks);
3811 safepos = conf->reshape_safe;
3560 sector_div(safepos, data_disks); 3812 sector_div(safepos, data_disks);
3561 gap = conf->expand_progress - conf->expand_lo; 3813 if (mddev->delta_disks < 0) {
3814 writepos -= reshape_sectors;
3815 readpos += reshape_sectors;
3816 safepos += reshape_sectors;
3817 } else {
3818 writepos += reshape_sectors;
3819 readpos -= reshape_sectors;
3820 safepos -= reshape_sectors;
3821 }
3562 3822
3563 if (writepos >= safepos || 3823 /* 'writepos' is the most advanced device address we might write.
3564 gap > (new_data_disks)*3000*2 /*3Meg*/) { 3824 * 'readpos' is the least advanced device address we might read.
3825 * 'safepos' is the least address recorded in the metadata as having
3826 * been reshaped.
3827 * If 'readpos' is behind 'writepos', then there is no way that we can
3828 * ensure safety in the face of a crash - that must be done by userspace
3829 * making a backup of the data. So in that case there is no particular
3830 * rush to update metadata.
3831 * Otherwise if 'safepos' is behind 'writepos', then we really need to
3832 * update the metadata to advance 'safepos' to match 'readpos' so that
3833 * we can be safe in the event of a crash.
3834 * So we insist on updating metadata if safepos is behind writepos and
3835 * readpos is beyond writepos.
3836 * In any case, update the metadata every 10 seconds.
3837 * Maybe that number should be configurable, but I'm not sure it is
3838 * worth it.... maybe it could be a multiple of safemode_delay???
3839 */
3840 if ((mddev->delta_disks < 0
3841 ? (safepos > writepos && readpos < writepos)
3842 : (safepos < writepos && readpos > writepos)) ||
3843 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
3565 /* Cannot proceed until we've updated the superblock... */ 3844 /* Cannot proceed until we've updated the superblock... */
3566 wait_event(conf->wait_for_overlap, 3845 wait_event(conf->wait_for_overlap,
3567 atomic_read(&conf->reshape_stripes)==0); 3846 atomic_read(&conf->reshape_stripes)==0);
3568 mddev->reshape_position = conf->expand_progress; 3847 mddev->reshape_position = conf->reshape_progress;
3848 conf->reshape_checkpoint = jiffies;
3569 set_bit(MD_CHANGE_DEVS, &mddev->flags); 3849 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3570 md_wakeup_thread(mddev->thread); 3850 md_wakeup_thread(mddev->thread);
3571 wait_event(mddev->sb_wait, mddev->flags == 0 || 3851 wait_event(mddev->sb_wait, mddev->flags == 0 ||
3572 kthread_should_stop()); 3852 kthread_should_stop());
3573 spin_lock_irq(&conf->device_lock); 3853 spin_lock_irq(&conf->device_lock);
3574 conf->expand_lo = mddev->reshape_position; 3854 conf->reshape_safe = mddev->reshape_position;
3575 spin_unlock_irq(&conf->device_lock); 3855 spin_unlock_irq(&conf->device_lock);
3576 wake_up(&conf->wait_for_overlap); 3856 wake_up(&conf->wait_for_overlap);
3577 } 3857 }
3578 3858
3579 for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) { 3859 if (mddev->delta_disks < 0) {
3860 BUG_ON(conf->reshape_progress == 0);
3861 stripe_addr = writepos;
3862 BUG_ON((mddev->dev_sectors &
3863 ~((sector_t)reshape_sectors - 1))
3864 - reshape_sectors - stripe_addr
3865 != sector_nr);
3866 } else {
3867 BUG_ON(writepos != sector_nr + reshape_sectors);
3868 stripe_addr = sector_nr;
3869 }
3870 INIT_LIST_HEAD(&stripes);
3871 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
3580 int j; 3872 int j;
3581 int skipped = 0; 3873 int skipped = 0;
3582 pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks); 3874 sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
3583 sh = get_active_stripe(conf, sector_nr+i,
3584 conf->raid_disks, pd_idx, 0);
3585 set_bit(STRIPE_EXPANDING, &sh->state); 3875 set_bit(STRIPE_EXPANDING, &sh->state);
3586 atomic_inc(&conf->reshape_stripes); 3876 atomic_inc(&conf->reshape_stripes);
3587 /* If any of this stripe is beyond the end of the old 3877 /* If any of this stripe is beyond the end of the old
@@ -3592,10 +3882,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3592 if (j == sh->pd_idx) 3882 if (j == sh->pd_idx)
3593 continue; 3883 continue;
3594 if (conf->level == 6 && 3884 if (conf->level == 6 &&
3595 j == raid6_next_disk(sh->pd_idx, sh->disks)) 3885 j == sh->qd_idx)
3596 continue; 3886 continue;
3597 s = compute_blocknr(sh, j); 3887 s = compute_blocknr(sh, j, 0);
3598 if (s < mddev->array_sectors) { 3888 if (s < raid5_size(mddev, 0, 0)) {
3599 skipped = 1; 3889 skipped = 1;
3600 continue; 3890 continue;
3601 } 3891 }
@@ -3607,10 +3897,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3607 set_bit(STRIPE_EXPAND_READY, &sh->state); 3897 set_bit(STRIPE_EXPAND_READY, &sh->state);
3608 set_bit(STRIPE_HANDLE, &sh->state); 3898 set_bit(STRIPE_HANDLE, &sh->state);
3609 } 3899 }
3610 release_stripe(sh); 3900 list_add(&sh->lru, &stripes);
3611 } 3901 }
3612 spin_lock_irq(&conf->device_lock); 3902 spin_lock_irq(&conf->device_lock);
3613 conf->expand_progress = (sector_nr + i) * new_data_disks; 3903 if (mddev->delta_disks < 0)
3904 conf->reshape_progress -= reshape_sectors * new_data_disks;
3905 else
3906 conf->reshape_progress += reshape_sectors * new_data_disks;
3614 spin_unlock_irq(&conf->device_lock); 3907 spin_unlock_irq(&conf->device_lock);
3615 /* Ok, those stripe are ready. We can start scheduling 3908 /* Ok, those stripe are ready. We can start scheduling
3616 * reads on the source stripes. 3909 * reads on the source stripes.
@@ -3618,46 +3911,50 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3618 * block on the destination stripes. 3911 * block on the destination stripes.
3619 */ 3912 */
3620 first_sector = 3913 first_sector =
3621 raid5_compute_sector(sector_nr*(new_data_disks), 3914 raid5_compute_sector(conf, stripe_addr*(new_data_disks),
3622 raid_disks, data_disks, 3915 1, &dd_idx, NULL);
3623 &dd_idx, &pd_idx, conf);
3624 last_sector = 3916 last_sector =
3625 raid5_compute_sector((sector_nr+conf->chunk_size/512) 3917 raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
3626 *(new_data_disks) -1, 3918 *(new_data_disks) - 1),
3627 raid_disks, data_disks, 3919 1, &dd_idx, NULL);
3628 &dd_idx, &pd_idx, conf); 3920 if (last_sector >= mddev->dev_sectors)
3629 if (last_sector >= (mddev->size<<1)) 3921 last_sector = mddev->dev_sectors - 1;
3630 last_sector = (mddev->size<<1)-1;
3631 while (first_sector <= last_sector) { 3922 while (first_sector <= last_sector) {
3632 pd_idx = stripe_to_pdidx(first_sector, conf, 3923 sh = get_active_stripe(conf, first_sector, 1, 0);
3633 conf->previous_raid_disks);
3634 sh = get_active_stripe(conf, first_sector,
3635 conf->previous_raid_disks, pd_idx, 0);
3636 set_bit(STRIPE_EXPAND_SOURCE, &sh->state); 3924 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
3637 set_bit(STRIPE_HANDLE, &sh->state); 3925 set_bit(STRIPE_HANDLE, &sh->state);
3638 release_stripe(sh); 3926 release_stripe(sh);
3639 first_sector += STRIPE_SECTORS; 3927 first_sector += STRIPE_SECTORS;
3640 } 3928 }
3929 /* Now that the sources are clearly marked, we can release
3930 * the destination stripes
3931 */
3932 while (!list_empty(&stripes)) {
3933 sh = list_entry(stripes.next, struct stripe_head, lru);
3934 list_del_init(&sh->lru);
3935 release_stripe(sh);
3936 }
3641 /* If this takes us to the resync_max point where we have to pause, 3937 /* If this takes us to the resync_max point where we have to pause,
3642 * then we need to write out the superblock. 3938 * then we need to write out the superblock.
3643 */ 3939 */
3644 sector_nr += conf->chunk_size>>9; 3940 sector_nr += reshape_sectors;
3645 if (sector_nr >= mddev->resync_max) { 3941 if (sector_nr >= mddev->resync_max) {
3646 /* Cannot proceed until we've updated the superblock... */ 3942 /* Cannot proceed until we've updated the superblock... */
3647 wait_event(conf->wait_for_overlap, 3943 wait_event(conf->wait_for_overlap,
3648 atomic_read(&conf->reshape_stripes) == 0); 3944 atomic_read(&conf->reshape_stripes) == 0);
3649 mddev->reshape_position = conf->expand_progress; 3945 mddev->reshape_position = conf->reshape_progress;
3946 conf->reshape_checkpoint = jiffies;
3650 set_bit(MD_CHANGE_DEVS, &mddev->flags); 3947 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3651 md_wakeup_thread(mddev->thread); 3948 md_wakeup_thread(mddev->thread);
3652 wait_event(mddev->sb_wait, 3949 wait_event(mddev->sb_wait,
3653 !test_bit(MD_CHANGE_DEVS, &mddev->flags) 3950 !test_bit(MD_CHANGE_DEVS, &mddev->flags)
3654 || kthread_should_stop()); 3951 || kthread_should_stop());
3655 spin_lock_irq(&conf->device_lock); 3952 spin_lock_irq(&conf->device_lock);
3656 conf->expand_lo = mddev->reshape_position; 3953 conf->reshape_safe = mddev->reshape_position;
3657 spin_unlock_irq(&conf->device_lock); 3954 spin_unlock_irq(&conf->device_lock);
3658 wake_up(&conf->wait_for_overlap); 3955 wake_up(&conf->wait_for_overlap);
3659 } 3956 }
3660 return conf->chunk_size>>9; 3957 return reshape_sectors;
3661} 3958}
3662 3959
3663/* FIXME go_faster isn't used */ 3960/* FIXME go_faster isn't used */
@@ -3665,9 +3962,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
3665{ 3962{
3666 raid5_conf_t *conf = (raid5_conf_t *) mddev->private; 3963 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
3667 struct stripe_head *sh; 3964 struct stripe_head *sh;
3668 int pd_idx; 3965 sector_t max_sector = mddev->dev_sectors;
3669 int raid_disks = conf->raid_disks;
3670 sector_t max_sector = mddev->size << 1;
3671 int sync_blocks; 3966 int sync_blocks;
3672 int still_degraded = 0; 3967 int still_degraded = 0;
3673 int i; 3968 int i;
@@ -3675,6 +3970,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
3675 if (sector_nr >= max_sector) { 3970 if (sector_nr >= max_sector) {
3676 /* just being told to finish up .. nothing much to do */ 3971 /* just being told to finish up .. nothing much to do */
3677 unplug_slaves(mddev); 3972 unplug_slaves(mddev);
3973
3678 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { 3974 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
3679 end_reshape(conf); 3975 end_reshape(conf);
3680 return 0; 3976 return 0;
@@ -3705,7 +4001,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
3705 */ 4001 */
3706 if (mddev->degraded >= conf->max_degraded && 4002 if (mddev->degraded >= conf->max_degraded &&
3707 test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 4003 test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
3708 sector_t rv = (mddev->size << 1) - sector_nr; 4004 sector_t rv = mddev->dev_sectors - sector_nr;
3709 *skipped = 1; 4005 *skipped = 1;
3710 return rv; 4006 return rv;
3711 } 4007 }
@@ -3721,10 +4017,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
3721 4017
3722 bitmap_cond_end_sync(mddev->bitmap, sector_nr); 4018 bitmap_cond_end_sync(mddev->bitmap, sector_nr);
3723 4019
3724 pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks); 4020 sh = get_active_stripe(conf, sector_nr, 0, 1);
3725 sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
3726 if (sh == NULL) { 4021 if (sh == NULL) {
3727 sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0); 4022 sh = get_active_stripe(conf, sector_nr, 0, 0);
3728 /* make sure we don't swamp the stripe cache if someone else 4023 /* make sure we don't swamp the stripe cache if someone else
3729 * is trying to get access 4024 * is trying to get access
3730 */ 4025 */
@@ -3766,19 +4061,15 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3766 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector. 4061 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
3767 */ 4062 */
3768 struct stripe_head *sh; 4063 struct stripe_head *sh;
3769 int dd_idx, pd_idx; 4064 int dd_idx;
3770 sector_t sector, logical_sector, last_sector; 4065 sector_t sector, logical_sector, last_sector;
3771 int scnt = 0; 4066 int scnt = 0;
3772 int remaining; 4067 int remaining;
3773 int handled = 0; 4068 int handled = 0;
3774 4069
3775 logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 4070 logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
3776 sector = raid5_compute_sector( logical_sector, 4071 sector = raid5_compute_sector(conf, logical_sector,
3777 conf->raid_disks, 4072 0, &dd_idx, NULL);
3778 conf->raid_disks - conf->max_degraded,
3779 &dd_idx,
3780 &pd_idx,
3781 conf);
3782 last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); 4073 last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
3783 4074
3784 for (; logical_sector < last_sector; 4075 for (; logical_sector < last_sector;
@@ -3790,7 +4081,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3790 /* already done this stripe */ 4081 /* already done this stripe */
3791 continue; 4082 continue;
3792 4083
3793 sh = get_active_stripe(conf, sector, conf->raid_disks, pd_idx, 1); 4084 sh = get_active_stripe(conf, sector, 0, 1);
3794 4085
3795 if (!sh) { 4086 if (!sh) {
3796 /* failed to get a stripe - must wait */ 4087 /* failed to get a stripe - must wait */
@@ -3992,89 +4283,69 @@ static struct attribute_group raid5_attrs_group = {
3992 .attrs = raid5_attrs, 4283 .attrs = raid5_attrs,
3993}; 4284};
3994 4285
3995static int run(mddev_t *mddev) 4286static sector_t
4287raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
4288{
4289 raid5_conf_t *conf = mddev_to_conf(mddev);
4290
4291 if (!sectors)
4292 sectors = mddev->dev_sectors;
4293 if (!raid_disks) {
4294 /* size is defined by the smallest of previous and new size */
4295 if (conf->raid_disks < conf->previous_raid_disks)
4296 raid_disks = conf->raid_disks;
4297 else
4298 raid_disks = conf->previous_raid_disks;
4299 }
4300
4301 sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
4302 sectors &= ~((sector_t)mddev->new_chunk/512 - 1);
4303 return sectors * (raid_disks - conf->max_degraded);
4304}
4305
4306static raid5_conf_t *setup_conf(mddev_t *mddev)
3996{ 4307{
3997 raid5_conf_t *conf; 4308 raid5_conf_t *conf;
3998 int raid_disk, memory; 4309 int raid_disk, memory;
3999 mdk_rdev_t *rdev; 4310 mdk_rdev_t *rdev;
4000 struct disk_info *disk; 4311 struct disk_info *disk;
4001 int working_disks = 0;
4002 4312
4003 if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { 4313 if (mddev->new_level != 5
4314 && mddev->new_level != 4
4315 && mddev->new_level != 6) {
4004 printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n", 4316 printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
4005 mdname(mddev), mddev->level); 4317 mdname(mddev), mddev->new_level);
4006 return -EIO; 4318 return ERR_PTR(-EIO);
4007 } 4319 }
4008 4320 if ((mddev->new_level == 5
4009 if (mddev->chunk_size < PAGE_SIZE) { 4321 && !algorithm_valid_raid5(mddev->new_layout)) ||
4010 printk(KERN_ERR "md/raid5: chunk_size must be at least " 4322 (mddev->new_level == 6
4011 "PAGE_SIZE but %d < %ld\n", 4323 && !algorithm_valid_raid6(mddev->new_layout))) {
4012 mddev->chunk_size, PAGE_SIZE); 4324 printk(KERN_ERR "raid5: %s: layout %d not supported\n",
4013 return -EINVAL; 4325 mdname(mddev), mddev->new_layout);
4326 return ERR_PTR(-EIO);
4014 } 4327 }
4015 4328 if (mddev->new_level == 6 && mddev->raid_disks < 4) {
4016 if (mddev->reshape_position != MaxSector) { 4329 printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
4017 /* Check that we can continue the reshape. 4330 mdname(mddev), mddev->raid_disks);
4018 * Currently only disks can change, it must 4331 return ERR_PTR(-EINVAL);
4019 * increase, and we must be past the point where
4020 * a stripe over-writes itself
4021 */
4022 sector_t here_new, here_old;
4023 int old_disks;
4024 int max_degraded = (mddev->level == 5 ? 1 : 2);
4025
4026 if (mddev->new_level != mddev->level ||
4027 mddev->new_layout != mddev->layout ||
4028 mddev->new_chunk != mddev->chunk_size) {
4029 printk(KERN_ERR "raid5: %s: unsupported reshape "
4030 "required - aborting.\n",
4031 mdname(mddev));
4032 return -EINVAL;
4033 }
4034 if (mddev->delta_disks <= 0) {
4035 printk(KERN_ERR "raid5: %s: unsupported reshape "
4036 "(reduce disks) required - aborting.\n",
4037 mdname(mddev));
4038 return -EINVAL;
4039 }
4040 old_disks = mddev->raid_disks - mddev->delta_disks;
4041 /* reshape_position must be on a new-stripe boundary, and one
4042 * further up in new geometry must map after here in old
4043 * geometry.
4044 */
4045 here_new = mddev->reshape_position;
4046 if (sector_div(here_new, (mddev->chunk_size>>9)*
4047 (mddev->raid_disks - max_degraded))) {
4048 printk(KERN_ERR "raid5: reshape_position not "
4049 "on a stripe boundary\n");
4050 return -EINVAL;
4051 }
4052 /* here_new is the stripe we will write to */
4053 here_old = mddev->reshape_position;
4054 sector_div(here_old, (mddev->chunk_size>>9)*
4055 (old_disks-max_degraded));
4056 /* here_old is the first stripe that we might need to read
4057 * from */
4058 if (here_new >= here_old) {
4059 /* Reading from the same stripe as writing to - bad */
4060 printk(KERN_ERR "raid5: reshape_position too early for "
4061 "auto-recovery - aborting.\n");
4062 return -EINVAL;
4063 }
4064 printk(KERN_INFO "raid5: reshape will continue\n");
4065 /* OK, we should be able to continue; */
4066 } 4332 }
4067 4333
4334 if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
4335 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
4336 mddev->new_chunk, mdname(mddev));
4337 return ERR_PTR(-EINVAL);
4338 }
4068 4339
4069 mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); 4340 conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL);
4070 if ((conf = mddev->private) == NULL) 4341 if (conf == NULL)
4071 goto abort; 4342 goto abort;
4072 if (mddev->reshape_position == MaxSector) { 4343
4073 conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks; 4344 conf->raid_disks = mddev->raid_disks;
4074 } else { 4345 if (mddev->reshape_position == MaxSector)
4075 conf->raid_disks = mddev->raid_disks; 4346 conf->previous_raid_disks = mddev->raid_disks;
4347 else
4076 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; 4348 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
4077 }
4078 4349
4079 conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), 4350 conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
4080 GFP_KERNEL); 4351 GFP_KERNEL);
@@ -4086,13 +4357,12 @@ static int run(mddev_t *mddev)
4086 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) 4357 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
4087 goto abort; 4358 goto abort;
4088 4359
4089 if (mddev->level == 6) { 4360 if (mddev->new_level == 6) {
4090 conf->spare_page = alloc_page(GFP_KERNEL); 4361 conf->spare_page = alloc_page(GFP_KERNEL);
4091 if (!conf->spare_page) 4362 if (!conf->spare_page)
4092 goto abort; 4363 goto abort;
4093 } 4364 }
4094 spin_lock_init(&conf->device_lock); 4365 spin_lock_init(&conf->device_lock);
4095 mddev->queue->queue_lock = &conf->device_lock;
4096 init_waitqueue_head(&conf->wait_for_stripe); 4366 init_waitqueue_head(&conf->wait_for_stripe);
4097 init_waitqueue_head(&conf->wait_for_overlap); 4367 init_waitqueue_head(&conf->wait_for_overlap);
4098 INIT_LIST_HEAD(&conf->handle_list); 4368 INIT_LIST_HEAD(&conf->handle_list);
@@ -4121,47 +4391,134 @@ static int run(mddev_t *mddev)
4121 printk(KERN_INFO "raid5: device %s operational as raid" 4391 printk(KERN_INFO "raid5: device %s operational as raid"
4122 " disk %d\n", bdevname(rdev->bdev,b), 4392 " disk %d\n", bdevname(rdev->bdev,b),
4123 raid_disk); 4393 raid_disk);
4124 working_disks++;
4125 } else 4394 } else
4126 /* Cannot rely on bitmap to complete recovery */ 4395 /* Cannot rely on bitmap to complete recovery */
4127 conf->fullsync = 1; 4396 conf->fullsync = 1;
4128 } 4397 }
4129 4398
4130 /* 4399 conf->chunk_size = mddev->new_chunk;
4131 * 0 for a fully functional array, 1 or 2 for a degraded array. 4400 conf->level = mddev->new_level;
4132 */
4133 mddev->degraded = conf->raid_disks - working_disks;
4134 conf->mddev = mddev;
4135 conf->chunk_size = mddev->chunk_size;
4136 conf->level = mddev->level;
4137 if (conf->level == 6) 4401 if (conf->level == 6)
4138 conf->max_degraded = 2; 4402 conf->max_degraded = 2;
4139 else 4403 else
4140 conf->max_degraded = 1; 4404 conf->max_degraded = 1;
4141 conf->algorithm = mddev->layout; 4405 conf->algorithm = mddev->new_layout;
4142 conf->max_nr_stripes = NR_STRIPES; 4406 conf->max_nr_stripes = NR_STRIPES;
4143 conf->expand_progress = mddev->reshape_position; 4407 conf->reshape_progress = mddev->reshape_position;
4144 4408 if (conf->reshape_progress != MaxSector) {
4145 /* device size must be a multiple of chunk size */ 4409 conf->prev_chunk = mddev->chunk_size;
4146 mddev->size &= ~(mddev->chunk_size/1024 -1); 4410 conf->prev_algo = mddev->layout;
4147 mddev->resync_max_sectors = mddev->size << 1; 4411 }
4148 4412
4149 if (conf->level == 6 && conf->raid_disks < 4) { 4413 memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
4150 printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", 4414 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
4151 mdname(mddev), conf->raid_disks); 4415 if (grow_stripes(conf, conf->max_nr_stripes)) {
4416 printk(KERN_ERR
4417 "raid5: couldn't allocate %dkB for buffers\n", memory);
4152 goto abort; 4418 goto abort;
4153 } 4419 } else
4154 if (!conf->chunk_size || conf->chunk_size % 4) { 4420 printk(KERN_INFO "raid5: allocated %dkB for %s\n",
4155 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", 4421 memory, mdname(mddev));
4156 conf->chunk_size, mdname(mddev)); 4422
4423 conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
4424 if (!conf->thread) {
4425 printk(KERN_ERR
4426 "raid5: couldn't allocate thread for %s\n",
4427 mdname(mddev));
4157 goto abort; 4428 goto abort;
4158 } 4429 }
4159 if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) { 4430
4160 printk(KERN_ERR 4431 return conf;
4161 "raid5: unsupported parity algorithm %d for %s\n", 4432
4162 conf->algorithm, mdname(mddev)); 4433 abort:
4163 goto abort; 4434 if (conf) {
4435 shrink_stripes(conf);
4436 safe_put_page(conf->spare_page);
4437 kfree(conf->disks);
4438 kfree(conf->stripe_hashtbl);
4439 kfree(conf);
4440 return ERR_PTR(-EIO);
4441 } else
4442 return ERR_PTR(-ENOMEM);
4443}
4444
4445static int run(mddev_t *mddev)
4446{
4447 raid5_conf_t *conf;
4448 int working_disks = 0;
4449 mdk_rdev_t *rdev;
4450
4451 if (mddev->reshape_position != MaxSector) {
4452 /* Check that we can continue the reshape.
4453 * Currently only disks can change, it must
4454 * increase, and we must be past the point where
4455 * a stripe over-writes itself
4456 */
4457 sector_t here_new, here_old;
4458 int old_disks;
4459 int max_degraded = (mddev->level == 6 ? 2 : 1);
4460
4461 if (mddev->new_level != mddev->level) {
4462 printk(KERN_ERR "raid5: %s: unsupported reshape "
4463 "required - aborting.\n",
4464 mdname(mddev));
4465 return -EINVAL;
4466 }
4467 old_disks = mddev->raid_disks - mddev->delta_disks;
4468 /* reshape_position must be on a new-stripe boundary, and one
4469 * further up in new geometry must map after here in old
4470 * geometry.
4471 */
4472 here_new = mddev->reshape_position;
4473 if (sector_div(here_new, (mddev->new_chunk>>9)*
4474 (mddev->raid_disks - max_degraded))) {
4475 printk(KERN_ERR "raid5: reshape_position not "
4476 "on a stripe boundary\n");
4477 return -EINVAL;
4478 }
4479 /* here_new is the stripe we will write to */
4480 here_old = mddev->reshape_position;
4481 sector_div(here_old, (mddev->chunk_size>>9)*
4482 (old_disks-max_degraded));
4483 /* here_old is the first stripe that we might need to read
4484 * from */
4485 if (here_new >= here_old) {
4486 /* Reading from the same stripe as writing to - bad */
4487 printk(KERN_ERR "raid5: reshape_position too early for "
4488 "auto-recovery - aborting.\n");
4489 return -EINVAL;
4490 }
4491 printk(KERN_INFO "raid5: reshape will continue\n");
4492 /* OK, we should be able to continue; */
4493 } else {
4494 BUG_ON(mddev->level != mddev->new_level);
4495 BUG_ON(mddev->layout != mddev->new_layout);
4496 BUG_ON(mddev->chunk_size != mddev->new_chunk);
4497 BUG_ON(mddev->delta_disks != 0);
4164 } 4498 }
4499
4500 if (mddev->private == NULL)
4501 conf = setup_conf(mddev);
4502 else
4503 conf = mddev->private;
4504
4505 if (IS_ERR(conf))
4506 return PTR_ERR(conf);
4507
4508 mddev->thread = conf->thread;
4509 conf->thread = NULL;
4510 mddev->private = conf;
4511
4512 /*
4513 * 0 for a fully functional array, 1 or 2 for a degraded array.
4514 */
4515 list_for_each_entry(rdev, &mddev->disks, same_set)
4516 if (rdev->raid_disk >= 0 &&
4517 test_bit(In_sync, &rdev->flags))
4518 working_disks++;
4519
4520 mddev->degraded = conf->raid_disks - working_disks;
4521
4165 if (mddev->degraded > conf->max_degraded) { 4522 if (mddev->degraded > conf->max_degraded) {
4166 printk(KERN_ERR "raid5: not enough operational devices for %s" 4523 printk(KERN_ERR "raid5: not enough operational devices for %s"
4167 " (%d/%d failed)\n", 4524 " (%d/%d failed)\n",
@@ -4169,6 +4526,10 @@ static int run(mddev_t *mddev)
4169 goto abort; 4526 goto abort;
4170 } 4527 }
4171 4528
4529 /* device size must be a multiple of chunk size */
4530 mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
4531 mddev->resync_max_sectors = mddev->dev_sectors;
4532
4172 if (mddev->degraded > 0 && 4533 if (mddev->degraded > 0 &&
4173 mddev->recovery_cp != MaxSector) { 4534 mddev->recovery_cp != MaxSector) {
4174 if (mddev->ok_start_degraded) 4535 if (mddev->ok_start_degraded)
@@ -4184,43 +4545,22 @@ static int run(mddev_t *mddev)
4184 } 4545 }
4185 } 4546 }
4186 4547
4187 {
4188 mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
4189 if (!mddev->thread) {
4190 printk(KERN_ERR
4191 "raid5: couldn't allocate thread for %s\n",
4192 mdname(mddev));
4193 goto abort;
4194 }
4195 }
4196 memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
4197 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
4198 if (grow_stripes(conf, conf->max_nr_stripes)) {
4199 printk(KERN_ERR
4200 "raid5: couldn't allocate %dkB for buffers\n", memory);
4201 shrink_stripes(conf);
4202 md_unregister_thread(mddev->thread);
4203 goto abort;
4204 } else
4205 printk(KERN_INFO "raid5: allocated %dkB for %s\n",
4206 memory, mdname(mddev));
4207
4208 if (mddev->degraded == 0) 4548 if (mddev->degraded == 0)
4209 printk("raid5: raid level %d set %s active with %d out of %d" 4549 printk("raid5: raid level %d set %s active with %d out of %d"
4210 " devices, algorithm %d\n", conf->level, mdname(mddev), 4550 " devices, algorithm %d\n", conf->level, mdname(mddev),
4211 mddev->raid_disks-mddev->degraded, mddev->raid_disks, 4551 mddev->raid_disks-mddev->degraded, mddev->raid_disks,
4212 conf->algorithm); 4552 mddev->new_layout);
4213 else 4553 else
4214 printk(KERN_ALERT "raid5: raid level %d set %s active with %d" 4554 printk(KERN_ALERT "raid5: raid level %d set %s active with %d"
4215 " out of %d devices, algorithm %d\n", conf->level, 4555 " out of %d devices, algorithm %d\n", conf->level,
4216 mdname(mddev), mddev->raid_disks - mddev->degraded, 4556 mdname(mddev), mddev->raid_disks - mddev->degraded,
4217 mddev->raid_disks, conf->algorithm); 4557 mddev->raid_disks, mddev->new_layout);
4218 4558
4219 print_raid5_conf(conf); 4559 print_raid5_conf(conf);
4220 4560
4221 if (conf->expand_progress != MaxSector) { 4561 if (conf->reshape_progress != MaxSector) {
4222 printk("...ok start reshape thread\n"); 4562 printk("...ok start reshape thread\n");
4223 conf->expand_lo = conf->expand_progress; 4563 conf->reshape_safe = conf->reshape_progress;
4224 atomic_set(&conf->reshape_stripes, 0); 4564 atomic_set(&conf->reshape_stripes, 0);
4225 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 4565 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4226 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 4566 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -4247,18 +4587,22 @@ static int run(mddev_t *mddev)
4247 "raid5: failed to create sysfs attributes for %s\n", 4587 "raid5: failed to create sysfs attributes for %s\n",
4248 mdname(mddev)); 4588 mdname(mddev));
4249 4589
4590 mddev->queue->queue_lock = &conf->device_lock;
4591
4250 mddev->queue->unplug_fn = raid5_unplug_device; 4592 mddev->queue->unplug_fn = raid5_unplug_device;
4251 mddev->queue->backing_dev_info.congested_data = mddev; 4593 mddev->queue->backing_dev_info.congested_data = mddev;
4252 mddev->queue->backing_dev_info.congested_fn = raid5_congested; 4594 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
4253 4595
4254 mddev->array_sectors = 2 * mddev->size * (conf->previous_raid_disks - 4596 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
4255 conf->max_degraded);
4256 4597
4257 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 4598 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
4258 4599
4259 return 0; 4600 return 0;
4260abort: 4601abort:
4602 md_unregister_thread(mddev->thread);
4603 mddev->thread = NULL;
4261 if (conf) { 4604 if (conf) {
4605 shrink_stripes(conf);
4262 print_raid5_conf(conf); 4606 print_raid5_conf(conf);
4263 safe_put_page(conf->spare_page); 4607 safe_put_page(conf->spare_page);
4264 kfree(conf->disks); 4608 kfree(conf->disks);
@@ -4396,6 +4740,10 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
4396 print_raid5_conf(conf); 4740 print_raid5_conf(conf);
4397 rdev = p->rdev; 4741 rdev = p->rdev;
4398 if (rdev) { 4742 if (rdev) {
4743 if (number >= conf->raid_disks &&
4744 conf->reshape_progress == MaxSector)
4745 clear_bit(In_sync, &rdev->flags);
4746
4399 if (test_bit(In_sync, &rdev->flags) || 4747 if (test_bit(In_sync, &rdev->flags) ||
4400 atomic_read(&rdev->nr_pending)) { 4748 atomic_read(&rdev->nr_pending)) {
4401 err = -EBUSY; 4749 err = -EBUSY;
@@ -4405,7 +4753,8 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
4405 * isn't possible. 4753 * isn't possible.
4406 */ 4754 */
4407 if (!test_bit(Faulty, &rdev->flags) && 4755 if (!test_bit(Faulty, &rdev->flags) &&
4408 mddev->degraded <= conf->max_degraded) { 4756 mddev->degraded <= conf->max_degraded &&
4757 number < conf->raid_disks) {
4409 err = -EBUSY; 4758 err = -EBUSY;
4410 goto abort; 4759 goto abort;
4411 } 4760 }
@@ -4472,36 +4821,48 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
4472 * any io in the removed space completes, but it hardly seems 4821 * any io in the removed space completes, but it hardly seems
4473 * worth it. 4822 * worth it.
4474 */ 4823 */
4475 raid5_conf_t *conf = mddev_to_conf(mddev);
4476
4477 sectors &= ~((sector_t)mddev->chunk_size/512 - 1); 4824 sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
4478 mddev->array_sectors = sectors * (mddev->raid_disks 4825 md_set_array_sectors(mddev, raid5_size(mddev, sectors,
4479 - conf->max_degraded); 4826 mddev->raid_disks));
4827 if (mddev->array_sectors >
4828 raid5_size(mddev, sectors, mddev->raid_disks))
4829 return -EINVAL;
4480 set_capacity(mddev->gendisk, mddev->array_sectors); 4830 set_capacity(mddev->gendisk, mddev->array_sectors);
4481 mddev->changed = 1; 4831 mddev->changed = 1;
4482 if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) { 4832 if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
4483 mddev->recovery_cp = mddev->size << 1; 4833 mddev->recovery_cp = mddev->dev_sectors;
4484 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 4834 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4485 } 4835 }
4486 mddev->size = sectors /2; 4836 mddev->dev_sectors = sectors;
4487 mddev->resync_max_sectors = sectors; 4837 mddev->resync_max_sectors = sectors;
4488 return 0; 4838 return 0;
4489} 4839}
4490 4840
4491#ifdef CONFIG_MD_RAID5_RESHAPE
4492static int raid5_check_reshape(mddev_t *mddev) 4841static int raid5_check_reshape(mddev_t *mddev)
4493{ 4842{
4494 raid5_conf_t *conf = mddev_to_conf(mddev); 4843 raid5_conf_t *conf = mddev_to_conf(mddev);
4495 int err;
4496 4844
4497 if (mddev->delta_disks < 0 || 4845 if (mddev->delta_disks == 0 &&
4498 mddev->new_level != mddev->level) 4846 mddev->new_layout == mddev->layout &&
4499 return -EINVAL; /* Cannot shrink array or change level yet */ 4847 mddev->new_chunk == mddev->chunk_size)
4500 if (mddev->delta_disks == 0) 4848 return -EINVAL; /* nothing to do */
4501 return 0; /* nothing to do */
4502 if (mddev->bitmap) 4849 if (mddev->bitmap)
4503 /* Cannot grow a bitmap yet */ 4850 /* Cannot grow a bitmap yet */
4504 return -EBUSY; 4851 return -EBUSY;
4852 if (mddev->degraded > conf->max_degraded)
4853 return -EINVAL;
4854 if (mddev->delta_disks < 0) {
4855 /* We might be able to shrink, but the devices must
4856 * be made bigger first.
4857 * For raid6, 4 is the minimum size.
4858 * Otherwise 2 is the minimum
4859 */
4860 int min = 2;
4861 if (mddev->level == 6)
4862 min = 4;
4863 if (mddev->raid_disks + mddev->delta_disks < min)
4864 return -EINVAL;
4865 }
4505 4866
4506 /* Can only proceed if there are plenty of stripe_heads. 4867 /* Can only proceed if there are plenty of stripe_heads.
4507 * We need a minimum of one full stripe,, and for sensible progress 4868 * We need a minimum of one full stripe,, and for sensible progress
@@ -4514,18 +4875,12 @@ static int raid5_check_reshape(mddev_t *mddev)
4514 if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes || 4875 if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
4515 (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { 4876 (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
4516 printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", 4877 printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
4517 (mddev->chunk_size / STRIPE_SIZE)*4); 4878 (max(mddev->chunk_size, mddev->new_chunk)
4879 / STRIPE_SIZE)*4);
4518 return -ENOSPC; 4880 return -ENOSPC;
4519 } 4881 }
4520 4882
4521 err = resize_stripes(conf, conf->raid_disks + mddev->delta_disks); 4883 return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
4522 if (err)
4523 return err;
4524
4525 if (mddev->degraded > conf->max_degraded)
4526 return -EINVAL;
4527 /* looks like we might be able to manage this */
4528 return 0;
4529} 4884}
4530 4885
4531static int raid5_start_reshape(mddev_t *mddev) 4886static int raid5_start_reshape(mddev_t *mddev)
@@ -4550,12 +4905,31 @@ static int raid5_start_reshape(mddev_t *mddev)
4550 */ 4905 */
4551 return -EINVAL; 4906 return -EINVAL;
4552 4907
4908 /* Refuse to reduce size of the array. Any reductions in
4909 * array size must be through explicit setting of array_size
4910 * attribute.
4911 */
4912 if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
4913 < mddev->array_sectors) {
4914 printk(KERN_ERR "md: %s: array size must be reduced "
4915 "before number of disks\n", mdname(mddev));
4916 return -EINVAL;
4917 }
4918
4553 atomic_set(&conf->reshape_stripes, 0); 4919 atomic_set(&conf->reshape_stripes, 0);
4554 spin_lock_irq(&conf->device_lock); 4920 spin_lock_irq(&conf->device_lock);
4555 conf->previous_raid_disks = conf->raid_disks; 4921 conf->previous_raid_disks = conf->raid_disks;
4556 conf->raid_disks += mddev->delta_disks; 4922 conf->raid_disks += mddev->delta_disks;
4557 conf->expand_progress = 0; 4923 conf->prev_chunk = conf->chunk_size;
4558 conf->expand_lo = 0; 4924 conf->chunk_size = mddev->new_chunk;
4925 conf->prev_algo = conf->algorithm;
4926 conf->algorithm = mddev->new_layout;
4927 if (mddev->delta_disks < 0)
4928 conf->reshape_progress = raid5_size(mddev, 0, 0);
4929 else
4930 conf->reshape_progress = 0;
4931 conf->reshape_safe = conf->reshape_progress;
4932 conf->generation++;
4559 spin_unlock_irq(&conf->device_lock); 4933 spin_unlock_irq(&conf->device_lock);
4560 4934
4561 /* Add some new drives, as many as will fit. 4935 /* Add some new drives, as many as will fit.
@@ -4580,9 +4954,12 @@ static int raid5_start_reshape(mddev_t *mddev)
4580 break; 4954 break;
4581 } 4955 }
4582 4956
4583 spin_lock_irqsave(&conf->device_lock, flags); 4957 if (mddev->delta_disks > 0) {
4584 mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices; 4958 spin_lock_irqsave(&conf->device_lock, flags);
4585 spin_unlock_irqrestore(&conf->device_lock, flags); 4959 mddev->degraded = (conf->raid_disks - conf->previous_raid_disks)
4960 - added_devices;
4961 spin_unlock_irqrestore(&conf->device_lock, flags);
4962 }
4586 mddev->raid_disks = conf->raid_disks; 4963 mddev->raid_disks = conf->raid_disks;
4587 mddev->reshape_position = 0; 4964 mddev->reshape_position = 0;
4588 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4965 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -4597,52 +4974,86 @@ static int raid5_start_reshape(mddev_t *mddev)
4597 mddev->recovery = 0; 4974 mddev->recovery = 0;
4598 spin_lock_irq(&conf->device_lock); 4975 spin_lock_irq(&conf->device_lock);
4599 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; 4976 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
4600 conf->expand_progress = MaxSector; 4977 conf->reshape_progress = MaxSector;
4601 spin_unlock_irq(&conf->device_lock); 4978 spin_unlock_irq(&conf->device_lock);
4602 return -EAGAIN; 4979 return -EAGAIN;
4603 } 4980 }
4981 conf->reshape_checkpoint = jiffies;
4604 md_wakeup_thread(mddev->sync_thread); 4982 md_wakeup_thread(mddev->sync_thread);
4605 md_new_event(mddev); 4983 md_new_event(mddev);
4606 return 0; 4984 return 0;
4607} 4985}
4608#endif
4609 4986
4987/* This is called from the reshape thread and should make any
4988 * changes needed in 'conf'
4989 */
4610static void end_reshape(raid5_conf_t *conf) 4990static void end_reshape(raid5_conf_t *conf)
4611{ 4991{
4612 struct block_device *bdev;
4613 4992
4614 if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { 4993 if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
4615 conf->mddev->array_sectors = 2 * conf->mddev->size * 4994
4616 (conf->raid_disks - conf->max_degraded);
4617 set_capacity(conf->mddev->gendisk, conf->mddev->array_sectors);
4618 conf->mddev->changed = 1;
4619
4620 bdev = bdget_disk(conf->mddev->gendisk, 0);
4621 if (bdev) {
4622 mutex_lock(&bdev->bd_inode->i_mutex);
4623 i_size_write(bdev->bd_inode,
4624 (loff_t)conf->mddev->array_sectors << 9);
4625 mutex_unlock(&bdev->bd_inode->i_mutex);
4626 bdput(bdev);
4627 }
4628 spin_lock_irq(&conf->device_lock); 4995 spin_lock_irq(&conf->device_lock);
4629 conf->expand_progress = MaxSector; 4996 conf->previous_raid_disks = conf->raid_disks;
4997 conf->reshape_progress = MaxSector;
4630 spin_unlock_irq(&conf->device_lock); 4998 spin_unlock_irq(&conf->device_lock);
4631 conf->mddev->reshape_position = MaxSector; 4999 wake_up(&conf->wait_for_overlap);
4632 5000
4633 /* read-ahead size must cover two whole stripes, which is 5001 /* read-ahead size must cover two whole stripes, which is
4634 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices 5002 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
4635 */ 5003 */
4636 { 5004 {
4637 int data_disks = conf->previous_raid_disks - conf->max_degraded; 5005 int data_disks = conf->raid_disks - conf->max_degraded;
4638 int stripe = data_disks * 5006 int stripe = data_disks * (conf->chunk_size
4639 (conf->mddev->chunk_size / PAGE_SIZE); 5007 / PAGE_SIZE);
4640 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 5008 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
4641 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 5009 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
4642 } 5010 }
4643 } 5011 }
4644} 5012}
4645 5013
5014/* This is called from the raid5d thread with mddev_lock held.
5015 * It makes config changes to the device.
5016 */
5017static void raid5_finish_reshape(mddev_t *mddev)
5018{
5019 struct block_device *bdev;
5020 raid5_conf_t *conf = mddev_to_conf(mddev);
5021
5022 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5023
5024 if (mddev->delta_disks > 0) {
5025 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
5026 set_capacity(mddev->gendisk, mddev->array_sectors);
5027 mddev->changed = 1;
5028
5029 bdev = bdget_disk(mddev->gendisk, 0);
5030 if (bdev) {
5031 mutex_lock(&bdev->bd_inode->i_mutex);
5032 i_size_write(bdev->bd_inode,
5033 (loff_t)mddev->array_sectors << 9);
5034 mutex_unlock(&bdev->bd_inode->i_mutex);
5035 bdput(bdev);
5036 }
5037 } else {
5038 int d;
5039 mddev->degraded = conf->raid_disks;
5040 for (d = 0; d < conf->raid_disks ; d++)
5041 if (conf->disks[d].rdev &&
5042 test_bit(In_sync,
5043 &conf->disks[d].rdev->flags))
5044 mddev->degraded--;
5045 for (d = conf->raid_disks ;
5046 d < conf->raid_disks - mddev->delta_disks;
5047 d++)
5048 raid5_remove_disk(mddev, d);
5049 }
5050 mddev->layout = conf->algorithm;
5051 mddev->chunk_size = conf->chunk_size;
5052 mddev->reshape_position = MaxSector;
5053 mddev->delta_disks = 0;
5054 }
5055}
5056
4646static void raid5_quiesce(mddev_t *mddev, int state) 5057static void raid5_quiesce(mddev_t *mddev, int state)
4647{ 5058{
4648 raid5_conf_t *conf = mddev_to_conf(mddev); 5059 raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -4672,6 +5083,212 @@ static void raid5_quiesce(mddev_t *mddev, int state)
4672 } 5083 }
4673} 5084}
4674 5085
5086
5087static void *raid5_takeover_raid1(mddev_t *mddev)
5088{
5089 int chunksect;
5090
5091 if (mddev->raid_disks != 2 ||
5092 mddev->degraded > 1)
5093 return ERR_PTR(-EINVAL);
5094
5095 /* Should check if there are write-behind devices? */
5096
5097 chunksect = 64*2; /* 64K by default */
5098
5099 /* The array must be an exact multiple of chunksize */
5100 while (chunksect && (mddev->array_sectors & (chunksect-1)))
5101 chunksect >>= 1;
5102
5103 if ((chunksect<<9) < STRIPE_SIZE)
5104 /* array size does not allow a suitable chunk size */
5105 return ERR_PTR(-EINVAL);
5106
5107 mddev->new_level = 5;
5108 mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
5109 mddev->new_chunk = chunksect << 9;
5110
5111 return setup_conf(mddev);
5112}
5113
5114static void *raid5_takeover_raid6(mddev_t *mddev)
5115{
5116 int new_layout;
5117
5118 switch (mddev->layout) {
5119 case ALGORITHM_LEFT_ASYMMETRIC_6:
5120 new_layout = ALGORITHM_LEFT_ASYMMETRIC;
5121 break;
5122 case ALGORITHM_RIGHT_ASYMMETRIC_6:
5123 new_layout = ALGORITHM_RIGHT_ASYMMETRIC;
5124 break;
5125 case ALGORITHM_LEFT_SYMMETRIC_6:
5126 new_layout = ALGORITHM_LEFT_SYMMETRIC;
5127 break;
5128 case ALGORITHM_RIGHT_SYMMETRIC_6:
5129 new_layout = ALGORITHM_RIGHT_SYMMETRIC;
5130 break;
5131 case ALGORITHM_PARITY_0_6:
5132 new_layout = ALGORITHM_PARITY_0;
5133 break;
5134 case ALGORITHM_PARITY_N:
5135 new_layout = ALGORITHM_PARITY_N;
5136 break;
5137 default:
5138 return ERR_PTR(-EINVAL);
5139 }
5140 mddev->new_level = 5;
5141 mddev->new_layout = new_layout;
5142 mddev->delta_disks = -1;
5143 mddev->raid_disks -= 1;
5144 return setup_conf(mddev);
5145}
5146
5147
5148static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
5149{
5150 /* For a 2-drive array, the layout and chunk size can be changed
5151 * immediately as not restriping is needed.
5152 * For larger arrays we record the new value - after validation
5153 * to be used by a reshape pass.
5154 */
5155 raid5_conf_t *conf = mddev_to_conf(mddev);
5156
5157 if (new_layout >= 0 && !algorithm_valid_raid5(new_layout))
5158 return -EINVAL;
5159 if (new_chunk > 0) {
5160 if (new_chunk & (new_chunk-1))
5161 /* not a power of 2 */
5162 return -EINVAL;
5163 if (new_chunk < PAGE_SIZE)
5164 return -EINVAL;
5165 if (mddev->array_sectors & ((new_chunk>>9)-1))
5166 /* not factor of array size */
5167 return -EINVAL;
5168 }
5169
5170 /* They look valid */
5171
5172 if (mddev->raid_disks == 2) {
5173
5174 if (new_layout >= 0) {
5175 conf->algorithm = new_layout;
5176 mddev->layout = mddev->new_layout = new_layout;
5177 }
5178 if (new_chunk > 0) {
5179 conf->chunk_size = new_chunk;
5180 mddev->chunk_size = mddev->new_chunk = new_chunk;
5181 }
5182 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5183 md_wakeup_thread(mddev->thread);
5184 } else {
5185 if (new_layout >= 0)
5186 mddev->new_layout = new_layout;
5187 if (new_chunk > 0)
5188 mddev->new_chunk = new_chunk;
5189 }
5190 return 0;
5191}
5192
5193static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
5194{
5195 if (new_layout >= 0 && !algorithm_valid_raid6(new_layout))
5196 return -EINVAL;
5197 if (new_chunk > 0) {
5198 if (new_chunk & (new_chunk-1))
5199 /* not a power of 2 */
5200 return -EINVAL;
5201 if (new_chunk < PAGE_SIZE)
5202 return -EINVAL;
5203 if (mddev->array_sectors & ((new_chunk>>9)-1))
5204 /* not factor of array size */
5205 return -EINVAL;
5206 }
5207
5208 /* They look valid */
5209
5210 if (new_layout >= 0)
5211 mddev->new_layout = new_layout;
5212 if (new_chunk > 0)
5213 mddev->new_chunk = new_chunk;
5214
5215 return 0;
5216}
5217
5218static void *raid5_takeover(mddev_t *mddev)
5219{
5220 /* raid5 can take over:
5221 * raid0 - if all devices are the same - make it a raid4 layout
5222 * raid1 - if there are two drives. We need to know the chunk size
5223 * raid4 - trivial - just use a raid4 layout.
5224 * raid6 - Providing it is a *_6 layout
5225 *
5226 * For now, just do raid1
5227 */
5228
5229 if (mddev->level == 1)
5230 return raid5_takeover_raid1(mddev);
5231 if (mddev->level == 4) {
5232 mddev->new_layout = ALGORITHM_PARITY_N;
5233 mddev->new_level = 5;
5234 return setup_conf(mddev);
5235 }
5236 if (mddev->level == 6)
5237 return raid5_takeover_raid6(mddev);
5238
5239 return ERR_PTR(-EINVAL);
5240}
5241
5242
5243static struct mdk_personality raid5_personality;
5244
5245static void *raid6_takeover(mddev_t *mddev)
5246{
5247 /* Currently can only take over a raid5. We map the
5248 * personality to an equivalent raid6 personality
5249 * with the Q block at the end.
5250 */
5251 int new_layout;
5252
5253 if (mddev->pers != &raid5_personality)
5254 return ERR_PTR(-EINVAL);
5255 if (mddev->degraded > 1)
5256 return ERR_PTR(-EINVAL);
5257 if (mddev->raid_disks > 253)
5258 return ERR_PTR(-EINVAL);
5259 if (mddev->raid_disks < 3)
5260 return ERR_PTR(-EINVAL);
5261
5262 switch (mddev->layout) {
5263 case ALGORITHM_LEFT_ASYMMETRIC:
5264 new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
5265 break;
5266 case ALGORITHM_RIGHT_ASYMMETRIC:
5267 new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
5268 break;
5269 case ALGORITHM_LEFT_SYMMETRIC:
5270 new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
5271 break;
5272 case ALGORITHM_RIGHT_SYMMETRIC:
5273 new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
5274 break;
5275 case ALGORITHM_PARITY_0:
5276 new_layout = ALGORITHM_PARITY_0_6;
5277 break;
5278 case ALGORITHM_PARITY_N:
5279 new_layout = ALGORITHM_PARITY_N;
5280 break;
5281 default:
5282 return ERR_PTR(-EINVAL);
5283 }
5284 mddev->new_level = 6;
5285 mddev->new_layout = new_layout;
5286 mddev->delta_disks = 1;
5287 mddev->raid_disks += 1;
5288 return setup_conf(mddev);
5289}
5290
5291
4675static struct mdk_personality raid6_personality = 5292static struct mdk_personality raid6_personality =
4676{ 5293{
4677 .name = "raid6", 5294 .name = "raid6",
@@ -4687,11 +5304,13 @@ static struct mdk_personality raid6_personality =
4687 .spare_active = raid5_spare_active, 5304 .spare_active = raid5_spare_active,
4688 .sync_request = sync_request, 5305 .sync_request = sync_request,
4689 .resize = raid5_resize, 5306 .resize = raid5_resize,
4690#ifdef CONFIG_MD_RAID5_RESHAPE 5307 .size = raid5_size,
4691 .check_reshape = raid5_check_reshape, 5308 .check_reshape = raid5_check_reshape,
4692 .start_reshape = raid5_start_reshape, 5309 .start_reshape = raid5_start_reshape,
4693#endif 5310 .finish_reshape = raid5_finish_reshape,
4694 .quiesce = raid5_quiesce, 5311 .quiesce = raid5_quiesce,
5312 .takeover = raid6_takeover,
5313 .reconfig = raid6_reconfig,
4695}; 5314};
4696static struct mdk_personality raid5_personality = 5315static struct mdk_personality raid5_personality =
4697{ 5316{
@@ -4708,11 +5327,13 @@ static struct mdk_personality raid5_personality =
4708 .spare_active = raid5_spare_active, 5327 .spare_active = raid5_spare_active,
4709 .sync_request = sync_request, 5328 .sync_request = sync_request,
4710 .resize = raid5_resize, 5329 .resize = raid5_resize,
4711#ifdef CONFIG_MD_RAID5_RESHAPE 5330 .size = raid5_size,
4712 .check_reshape = raid5_check_reshape, 5331 .check_reshape = raid5_check_reshape,
4713 .start_reshape = raid5_start_reshape, 5332 .start_reshape = raid5_start_reshape,
4714#endif 5333 .finish_reshape = raid5_finish_reshape,
4715 .quiesce = raid5_quiesce, 5334 .quiesce = raid5_quiesce,
5335 .takeover = raid5_takeover,
5336 .reconfig = raid5_reconfig,
4716}; 5337};
4717 5338
4718static struct mdk_personality raid4_personality = 5339static struct mdk_personality raid4_personality =
@@ -4730,20 +5351,15 @@ static struct mdk_personality raid4_personality =
4730 .spare_active = raid5_spare_active, 5351 .spare_active = raid5_spare_active,
4731 .sync_request = sync_request, 5352 .sync_request = sync_request,
4732 .resize = raid5_resize, 5353 .resize = raid5_resize,
4733#ifdef CONFIG_MD_RAID5_RESHAPE 5354 .size = raid5_size,
4734 .check_reshape = raid5_check_reshape, 5355 .check_reshape = raid5_check_reshape,
4735 .start_reshape = raid5_start_reshape, 5356 .start_reshape = raid5_start_reshape,
4736#endif 5357 .finish_reshape = raid5_finish_reshape,
4737 .quiesce = raid5_quiesce, 5358 .quiesce = raid5_quiesce,
4738}; 5359};
4739 5360
4740static int __init raid5_init(void) 5361static int __init raid5_init(void)
4741{ 5362{
4742 int e;
4743
4744 e = raid6_select_algo();
4745 if ( e )
4746 return e;
4747 register_md_personality(&raid6_personality); 5363 register_md_personality(&raid6_personality);
4748 register_md_personality(&raid5_personality); 5364 register_md_personality(&raid5_personality);
4749 register_md_personality(&raid4_personality); 5365 register_md_personality(&raid4_personality);
diff --git a/include/linux/raid/raid5.h b/drivers/md/raid5.h
index 3b2672792457..52ba99954dec 100644
--- a/include/linux/raid/raid5.h
+++ b/drivers/md/raid5.h
@@ -1,7 +1,6 @@
1#ifndef _RAID5_H 1#ifndef _RAID5_H
2#define _RAID5_H 2#define _RAID5_H
3 3
4#include <linux/raid/md.h>
5#include <linux/raid/xor.h> 4#include <linux/raid/xor.h>
6 5
7/* 6/*
@@ -197,15 +196,19 @@ enum reconstruct_states {
197 196
198struct stripe_head { 197struct stripe_head {
199 struct hlist_node hash; 198 struct hlist_node hash;
200 struct list_head lru; /* inactive_list or handle_list */ 199 struct list_head lru; /* inactive_list or handle_list */
201 struct raid5_private_data *raid_conf; 200 struct raid5_private_data *raid_conf;
202 sector_t sector; /* sector of this row */ 201 short generation; /* increments with every
203 int pd_idx; /* parity disk index */ 202 * reshape */
204 unsigned long state; /* state flags */ 203 sector_t sector; /* sector of this row */
205 atomic_t count; /* nr of active thread/requests */ 204 short pd_idx; /* parity disk index */
205 short qd_idx; /* 'Q' disk index for raid6 */
206 short ddf_layout;/* use DDF ordering to calculate Q */
207 unsigned long state; /* state flags */
208 atomic_t count; /* nr of active thread/requests */
206 spinlock_t lock; 209 spinlock_t lock;
207 int bm_seq; /* sequence number for bitmap flushes */ 210 int bm_seq; /* sequence number for bitmap flushes */
208 int disks; /* disks in stripe */ 211 int disks; /* disks in stripe */
209 enum check_states check_state; 212 enum check_states check_state;
210 enum reconstruct_states reconstruct_state; 213 enum reconstruct_states reconstruct_state;
211 /* stripe_operations 214 /* stripe_operations
@@ -238,7 +241,7 @@ struct stripe_head_state {
238 241
239/* r6_state - extra state data only relevant to r6 */ 242/* r6_state - extra state data only relevant to r6 */
240struct r6_state { 243struct r6_state {
241 int p_failed, q_failed, qd_idx, failed_num[2]; 244 int p_failed, q_failed, failed_num[2];
242}; 245};
243 246
244/* Flags */ 247/* Flags */
@@ -268,6 +271,8 @@ struct r6_state {
268#define READ_MODIFY_WRITE 2 271#define READ_MODIFY_WRITE 2
269/* not a write method, but a compute_parity mode */ 272/* not a write method, but a compute_parity mode */
270#define CHECK_PARITY 3 273#define CHECK_PARITY 3
274/* Additional compute_parity mode -- updates the parity w/o LOCKING */
275#define UPDATE_PARITY 4
271 276
272/* 277/*
273 * Stripe state 278 * Stripe state
@@ -319,7 +324,7 @@ struct r6_state {
319 * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue. 324 * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
320 * HANDLE gets cleared if stripe_handle leave nothing locked. 325 * HANDLE gets cleared if stripe_handle leave nothing locked.
321 */ 326 */
322 327
323 328
324struct disk_info { 329struct disk_info {
325 mdk_rdev_t *rdev; 330 mdk_rdev_t *rdev;
@@ -334,12 +339,21 @@ struct raid5_private_data {
334 int raid_disks; 339 int raid_disks;
335 int max_nr_stripes; 340 int max_nr_stripes;
336 341
337 /* used during an expand */ 342 /* reshape_progress is the leading edge of a 'reshape'
338 sector_t expand_progress; /* MaxSector when no expand happening */ 343 * It has value MaxSector when no reshape is happening
339 sector_t expand_lo; /* from here up to expand_progress it out-of-bounds 344 * If delta_disks < 0, it is the last sector we started work on,
340 * as we haven't flushed the metadata yet 345 * else is it the next sector to work on.
341 */ 346 */
347 sector_t reshape_progress;
348 /* reshape_safe is the trailing edge of a reshape. We know that
349 * before (or after) this address, all reshape has completed.
350 */
351 sector_t reshape_safe;
342 int previous_raid_disks; 352 int previous_raid_disks;
353 int prev_chunk, prev_algo;
354 short generation; /* increments with every reshape */
355 unsigned long reshape_checkpoint; /* Time we last updated
356 * metadata */
343 357
344 struct list_head handle_list; /* stripes needing handling */ 358 struct list_head handle_list; /* stripes needing handling */
345 struct list_head hold_list; /* preread ready stripes */ 359 struct list_head hold_list; /* preread ready stripes */
@@ -385,6 +399,11 @@ struct raid5_private_data {
385 int pool_size; /* number of disks in stripeheads in pool */ 399 int pool_size; /* number of disks in stripeheads in pool */
386 spinlock_t device_lock; 400 spinlock_t device_lock;
387 struct disk_info *disks; 401 struct disk_info *disks;
402
403 /* When taking over an array from a different personality, we store
404 * the new thread here until we fully activate the array.
405 */
406 struct mdk_thread_s *thread;
388}; 407};
389 408
390typedef struct raid5_private_data raid5_conf_t; 409typedef struct raid5_private_data raid5_conf_t;
@@ -394,9 +413,62 @@ typedef struct raid5_private_data raid5_conf_t;
394/* 413/*
395 * Our supported algorithms 414 * Our supported algorithms
396 */ 415 */
397#define ALGORITHM_LEFT_ASYMMETRIC 0 416#define ALGORITHM_LEFT_ASYMMETRIC 0 /* Rotating Parity N with Data Restart */
398#define ALGORITHM_RIGHT_ASYMMETRIC 1 417#define ALGORITHM_RIGHT_ASYMMETRIC 1 /* Rotating Parity 0 with Data Restart */
399#define ALGORITHM_LEFT_SYMMETRIC 2 418#define ALGORITHM_LEFT_SYMMETRIC 2 /* Rotating Parity N with Data Continuation */
400#define ALGORITHM_RIGHT_SYMMETRIC 3 419#define ALGORITHM_RIGHT_SYMMETRIC 3 /* Rotating Parity 0 with Data Continuation */
420
421/* Define non-rotating (raid4) algorithms. These allow
422 * conversion of raid4 to raid5.
423 */
424#define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */
425#define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */
426
427/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
428 * Firstly, the exact positioning of the parity block is slightly
429 * different between the 'LEFT_*' modes of md and the "_N_*" modes
430 * of DDF.
431 * Secondly, or order of datablocks over which the Q syndrome is computed
432 * is different.
433 * Consequently we have different layouts for DDF/raid6 than md/raid6.
434 * These layouts are from the DDFv1.2 spec.
435 * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
436 * leaves RLQ=3 as 'Vendor Specific'
437 */
438
439#define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */
440#define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */
441#define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */
442
443
444/* For every RAID5 algorithm we define a RAID6 algorithm
445 * with exactly the same layout for data and parity, and
446 * with the Q block always on the last device (N-1).
447 * This allows trivial conversion from RAID5 to RAID6
448 */
449#define ALGORITHM_LEFT_ASYMMETRIC_6 16
450#define ALGORITHM_RIGHT_ASYMMETRIC_6 17
451#define ALGORITHM_LEFT_SYMMETRIC_6 18
452#define ALGORITHM_RIGHT_SYMMETRIC_6 19
453#define ALGORITHM_PARITY_0_6 20
454#define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N
455
456static inline int algorithm_valid_raid5(int layout)
457{
458 return (layout >= 0) &&
459 (layout <= 5);
460}
461static inline int algorithm_valid_raid6(int layout)
462{
463 return (layout >= 0 && layout <= 5)
464 ||
465 (layout == 8 || layout == 10)
466 ||
467 (layout >= 16 && layout <= 20);
468}
401 469
470static inline int algorithm_is_DDF(int layout)
471{
472 return layout >= 8 && layout <= 10;
473}
402#endif 474#endif
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c
index 21987e3dbe6c..866215ac7f25 100644
--- a/drivers/md/raid6algos.c
+++ b/drivers/md/raid6algos.c
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
@@ -16,13 +16,20 @@
16 * Algorithm list and algorithm selection for RAID-6 16 * Algorithm list and algorithm selection for RAID-6
17 */ 17 */
18 18
19#include "raid6.h" 19#include <linux/raid/pq.h>
20#ifndef __KERNEL__ 20#ifndef __KERNEL__
21#include <sys/mman.h> 21#include <sys/mman.h>
22#include <stdio.h> 22#include <stdio.h>
23#else
24#if !RAID6_USE_EMPTY_ZERO_PAGE
25/* In .bss so it's zeroed */
26const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
27EXPORT_SYMBOL(raid6_empty_zero_page);
28#endif
23#endif 29#endif
24 30
25struct raid6_calls raid6_call; 31struct raid6_calls raid6_call;
32EXPORT_SYMBOL_GPL(raid6_call);
26 33
27/* Various routine sets */ 34/* Various routine sets */
28extern const struct raid6_calls raid6_intx1; 35extern const struct raid6_calls raid6_intx1;
@@ -79,6 +86,7 @@ const struct raid6_calls * const raid6_algos[] = {
79#else 86#else
80/* Need more time to be stable in userspace */ 87/* Need more time to be stable in userspace */
81#define RAID6_TIME_JIFFIES_LG2 9 88#define RAID6_TIME_JIFFIES_LG2 9
89#define time_before(x, y) ((x) < (y))
82#endif 90#endif
83 91
84/* Try to pick the best algorithm */ 92/* Try to pick the best algorithm */
@@ -152,3 +160,12 @@ int __init raid6_select_algo(void)
152 160
153 return best ? 0 : -EINVAL; 161 return best ? 0 : -EINVAL;
154} 162}
163
164static void raid6_exit(void)
165{
166 do { } while (0);
167}
168
169subsys_initcall(raid6_select_algo);
170module_exit(raid6_exit);
171MODULE_LICENSE("GPL");
diff --git a/drivers/md/raid6altivec.uc b/drivers/md/raid6altivec.uc
index b9afd35b8812..699dfeee4944 100644
--- a/drivers/md/raid6altivec.uc
+++ b/drivers/md/raid6altivec.uc
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
@@ -22,7 +22,7 @@
22 * bracked this with preempt_disable/enable or in a lock) 22 * bracked this with preempt_disable/enable or in a lock)
23 */ 23 */
24 24
25#include "raid6.h" 25#include <linux/raid/pq.h>
26 26
27#ifdef CONFIG_ALTIVEC 27#ifdef CONFIG_ALTIVEC
28 28
diff --git a/drivers/md/raid6int.uc b/drivers/md/raid6int.uc
index ad004cee0e26..f9bf9cba357f 100644
--- a/drivers/md/raid6int.uc
+++ b/drivers/md/raid6int.uc
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
18 * This file is postprocessed using unroll.pl 18 * This file is postprocessed using unroll.pl
19 */ 19 */
20 20
21#include "raid6.h" 21#include <linux/raid/pq.h>
22 22
23/* 23/*
24 * This is the C data type to use 24 * This is the C data type to use
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c
index d4e4a1bd70ad..e7f6c13132bf 100644
--- a/drivers/md/raid6mmx.c
+++ b/drivers/md/raid6mmx.c
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
18 18
19#if defined(__i386__) && !defined(__arch_um__) 19#if defined(__i386__) && !defined(__arch_um__)
20 20
21#include "raid6.h" 21#include <linux/raid/pq.h>
22#include "raid6x86.h" 22#include "raid6x86.h"
23 23
24/* Shared with raid6sse1.c */ 24/* Shared with raid6sse1.c */
diff --git a/drivers/md/raid6recov.c b/drivers/md/raid6recov.c
index a8c4d9451bd9..2609f00e0d61 100644
--- a/drivers/md/raid6recov.c
+++ b/drivers/md/raid6recov.c
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
18 * the syndrome.) 18 * the syndrome.)
19 */ 19 */
20 20
21#include "raid6.h" 21#include <linux/raid/pq.h>
22 22
23/* Recover two failed data blocks. */ 23/* Recover two failed data blocks. */
24void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, 24void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
@@ -63,9 +63,7 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
63 p++; q++; 63 p++; q++;
64 } 64 }
65} 65}
66 66EXPORT_SYMBOL_GPL(raid6_2data_recov);
67
68
69 67
70/* Recover failure of one data block plus the P block */ 68/* Recover failure of one data block plus the P block */
71void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) 69void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
@@ -97,9 +95,10 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
97 q++; dq++; 95 q++; dq++;
98 } 96 }
99} 97}
98EXPORT_SYMBOL_GPL(raid6_datap_recov);
100 99
101 100#ifndef __KERNEL__
102#ifndef __KERNEL__ /* Testing only */ 101/* Testing only */
103 102
104/* Recover two failed blocks. */ 103/* Recover two failed blocks. */
105void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) 104void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c
index 0666237276ff..b274dd5eab8f 100644
--- a/drivers/md/raid6sse1.c
+++ b/drivers/md/raid6sse1.c
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
@@ -23,7 +23,7 @@
23 23
24#if defined(__i386__) && !defined(__arch_um__) 24#if defined(__i386__) && !defined(__arch_um__)
25 25
26#include "raid6.h" 26#include <linux/raid/pq.h>
27#include "raid6x86.h" 27#include "raid6x86.h"
28 28
29/* Defined in raid6mmx.c */ 29/* Defined in raid6mmx.c */
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c
index b034ad868039..6ed6c6c0389f 100644
--- a/drivers/md/raid6sse2.c
+++ b/drivers/md/raid6sse2.c
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
@@ -19,7 +19,7 @@
19 19
20#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) 20#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
21 21
22#include "raid6.h" 22#include <linux/raid/pq.h>
23#include "raid6x86.h" 23#include "raid6x86.h"
24 24
25static const struct raid6_sse_constants { 25static const struct raid6_sse_constants {
diff --git a/drivers/md/raid6test/Makefile b/drivers/md/raid6test/Makefile
index 78e0396adf2a..58ffdf4f5161 100644
--- a/drivers/md/raid6test/Makefile
+++ b/drivers/md/raid6test/Makefile
@@ -5,7 +5,7 @@
5 5
6CC = gcc 6CC = gcc
7OPTFLAGS = -O2 # Adjust as desired 7OPTFLAGS = -O2 # Adjust as desired
8CFLAGS = -I.. -g $(OPTFLAGS) 8CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
9LD = ld 9LD = ld
10PERL = perl 10PERL = perl
11AR = ar 11AR = ar
diff --git a/drivers/md/raid6test/test.c b/drivers/md/raid6test/test.c
index 559cc41b2585..7a930318b17d 100644
--- a/drivers/md/raid6test/test.c
+++ b/drivers/md/raid6test/test.c
@@ -17,7 +17,7 @@
17#include <stdlib.h> 17#include <stdlib.h>
18#include <stdio.h> 18#include <stdio.h>
19#include <string.h> 19#include <string.h>
20#include "raid6.h" 20#include <linux/raid/pq.h>
21 21
22#define NDISKS 16 /* Including P and Q */ 22#define NDISKS 16 /* Including P and Q */
23 23
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h
index 99fea7a70ca7..4c22c1568558 100644
--- a/drivers/md/raid6x86.h
+++ b/drivers/md/raid6x86.h
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index e9026cb1c5b2..572d32fdf38a 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -117,7 +117,7 @@ static int __init pxa2xx_flash_probe(struct platform_device *pdev)
117 return 0; 117 return 0;
118} 118}
119 119
120static int __exit pxa2xx_flash_remove(struct platform_device *dev) 120static int __devexit pxa2xx_flash_remove(struct platform_device *dev)
121{ 121{
122 struct pxa2xx_flash_info *info = platform_get_drvdata(dev); 122 struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
123 123
diff --git a/drivers/parisc/asp.c b/drivers/parisc/asp.c
index 7931133526c4..9ca21098b146 100644
--- a/drivers/parisc/asp.c
+++ b/drivers/parisc/asp.c
@@ -81,7 +81,7 @@ static int __init asp_init_chip(struct parisc_device *dev)
81 asp.hpa = ASP_INTERRUPT_ADDR; 81 asp.hpa = ASP_INTERRUPT_ADDR;
82 82
83 printk(KERN_INFO "%s version %d at 0x%lx found.\n", 83 printk(KERN_INFO "%s version %d at 0x%lx found.\n",
84 asp.name, asp.version, dev->hpa.start); 84 asp.name, asp.version, (unsigned long)dev->hpa.start);
85 85
86 /* the IRQ ASP should use */ 86 /* the IRQ ASP should use */
87 ret = -EBUSY; 87 ret = -EBUSY;
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index cd4dd7ed2c06..5d610cbcfe80 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -406,8 +406,6 @@ resource_found:
406 } 406 }
407 ioc->avg_search[ioc->avg_idx++] = cr_start; 407 ioc->avg_search[ioc->avg_idx++] = cr_start;
408 ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1; 408 ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
409#endif
410#ifdef CCIO_COLLECT_STATS
411 ioc->used_pages += pages_needed; 409 ioc->used_pages += pages_needed;
412#endif 410#endif
413 /* 411 /*
@@ -453,10 +451,10 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
453 unsigned long mask = ~(~0UL >> pages_mapped); 451 unsigned long mask = ~(~0UL >> pages_mapped);
454 CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 8); 452 CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 8);
455#else 453#else
456 CCIO_FREE_MAPPINGS(ioc, res_idx, 0xff, 8); 454 CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffUL, 8);
457#endif 455#endif
458 } else if(pages_mapped <= 16) { 456 } else if(pages_mapped <= 16) {
459 CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffff, 16); 457 CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffffUL, 16);
460 } else if(pages_mapped <= 32) { 458 } else if(pages_mapped <= 32) {
461 CCIO_FREE_MAPPINGS(ioc, res_idx, ~(unsigned int)0, 32); 459 CCIO_FREE_MAPPINGS(ioc, res_idx, ~(unsigned int)0, 32);
462#ifdef __LP64__ 460#ifdef __LP64__
@@ -1028,8 +1026,10 @@ static int ccio_proc_info(struct seq_file *m, void *p)
1028 1026
1029 while (ioc != NULL) { 1027 while (ioc != NULL) {
1030 unsigned int total_pages = ioc->res_size << 3; 1028 unsigned int total_pages = ioc->res_size << 3;
1029#ifdef CCIO_COLLECT_STATS
1031 unsigned long avg = 0, min, max; 1030 unsigned long avg = 0, min, max;
1032 int j; 1031 int j;
1032#endif
1033 1033
1034 len += seq_printf(m, "%s\n", ioc->name); 1034 len += seq_printf(m, "%s\n", ioc->name);
1035 1035
@@ -1060,8 +1060,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
1060 avg /= CCIO_SEARCH_SAMPLE; 1060 avg /= CCIO_SEARCH_SAMPLE;
1061 len += seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n", 1061 len += seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
1062 min, avg, max); 1062 min, avg, max);
1063#endif 1063
1064#ifdef CCIO_COLLECT_STATS
1065 len += seq_printf(m, "pci_map_single(): %8ld calls %8ld pages (avg %d/1000)\n", 1064 len += seq_printf(m, "pci_map_single(): %8ld calls %8ld pages (avg %d/1000)\n",
1066 ioc->msingle_calls, ioc->msingle_pages, 1065 ioc->msingle_calls, ioc->msingle_pages,
1067 (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls)); 1066 (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
@@ -1400,7 +1399,7 @@ ccio_init_resource(struct resource *res, char *name, void __iomem *ioaddr)
1400 result = insert_resource(&iomem_resource, res); 1399 result = insert_resource(&iomem_resource, res);
1401 if (result < 0) { 1400 if (result < 0) {
1402 printk(KERN_ERR "%s() failed to claim CCIO bus address space (%08lx,%08lx)\n", 1401 printk(KERN_ERR "%s() failed to claim CCIO bus address space (%08lx,%08lx)\n",
1403 __func__, res->start, res->end); 1402 __func__, (unsigned long)res->start, (unsigned long)res->end);
1404 } 1403 }
1405} 1404}
1406 1405
@@ -1551,7 +1550,8 @@ static int __init ccio_probe(struct parisc_device *dev)
1551 1550
1552 ioc->name = dev->id.hversion == U2_IOA_RUNWAY ? "U2" : "UTurn"; 1551 ioc->name = dev->id.hversion == U2_IOA_RUNWAY ? "U2" : "UTurn";
1553 1552
1554 printk(KERN_INFO "Found %s at 0x%lx\n", ioc->name, dev->hpa.start); 1553 printk(KERN_INFO "Found %s at 0x%lx\n", ioc->name,
1554 (unsigned long)dev->hpa.start);
1555 1555
1556 for (i = 0; i < ioc_count; i++) { 1556 for (i = 0; i < ioc_count; i++) {
1557 ioc_p = &(*ioc_p)->next; 1557 ioc_p = &(*ioc_p)->next;
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index bb5a1c9597cb..52ae0b1d470c 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -819,7 +819,9 @@ dino_bridge_init(struct dino_device *dino_dev, const char *name)
819 819
820 result = ccio_request_resource(dino_dev->hba.dev, &res[i]); 820 result = ccio_request_resource(dino_dev->hba.dev, &res[i]);
821 if (result < 0) { 821 if (result < 0) {
822 printk(KERN_ERR "%s: failed to claim PCI Bus address space %d (0x%lx-0x%lx)!\n", name, i, res[i].start, res[i].end); 822 printk(KERN_ERR "%s: failed to claim PCI Bus address "
823 "space %d (0x%lx-0x%lx)!\n", name, i,
824 (unsigned long)res[i].start, (unsigned long)res[i].end);
823 return result; 825 return result;
824 } 826 }
825 } 827 }
@@ -899,7 +901,8 @@ static int __init dino_common_init(struct parisc_device *dev,
899 if (request_resource(&ioport_resource, res) < 0) { 901 if (request_resource(&ioport_resource, res) < 0) {
900 printk(KERN_ERR "%s: request I/O Port region failed " 902 printk(KERN_ERR "%s: request I/O Port region failed "
901 "0x%lx/%lx (hpa 0x%p)\n", 903 "0x%lx/%lx (hpa 0x%p)\n",
902 name, res->start, res->end, dino_dev->hba.base_addr); 904 name, (unsigned long)res->start, (unsigned long)res->end,
905 dino_dev->hba.base_addr);
903 return 1; 906 return 1;
904 } 907 }
905 908
diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 7891db50c483..f415fdd9a885 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -314,7 +314,7 @@ static int __init eisa_probe(struct parisc_device *dev)
314 char *name = is_mongoose(dev) ? "Mongoose" : "Wax"; 314 char *name = is_mongoose(dev) ? "Mongoose" : "Wax";
315 315
316 printk(KERN_INFO "%s EISA Adapter found at 0x%08lx\n", 316 printk(KERN_INFO "%s EISA Adapter found at 0x%08lx\n",
317 name, dev->hpa.start); 317 name, (unsigned long)dev->hpa.start);
318 318
319 eisa_dev.hba.dev = dev; 319 eisa_dev.hba.dev = dev;
320 eisa_dev.hba.iommu = ccio_get_iommu(dev); 320 eisa_dev.hba.iommu = ccio_get_iommu(dev);
diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c
index 6d8aae003f6c..c709ecc2b7f7 100644
--- a/drivers/parisc/eisa_enumerator.c
+++ b/drivers/parisc/eisa_enumerator.c
@@ -98,7 +98,7 @@ static int configure_memory(const unsigned char *buf,
98 res->start = mem_parent->start + get_24(buf+len+2); 98 res->start = mem_parent->start + get_24(buf+len+2);
99 res->end = res->start + get_16(buf+len+5)*1024; 99 res->end = res->start + get_16(buf+len+5)*1024;
100 res->flags = IORESOURCE_MEM; 100 res->flags = IORESOURCE_MEM;
101 printk("memory %lx-%lx ", res->start, res->end); 101 printk("memory %lx-%lx ", (unsigned long)res->start, (unsigned long)res->end);
102 result = request_resource(mem_parent, res); 102 result = request_resource(mem_parent, res);
103 if (result < 0) { 103 if (result < 0) {
104 printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n"); 104 printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n");
@@ -188,7 +188,7 @@ static int configure_port(const unsigned char *buf, struct resource *io_parent,
188 res->start = get_16(buf+len+1); 188 res->start = get_16(buf+len+1);
189 res->end = get_16(buf+len+1)+(c&HPEE_PORT_SIZE_MASK)+1; 189 res->end = get_16(buf+len+1)+(c&HPEE_PORT_SIZE_MASK)+1;
190 res->flags = IORESOURCE_IO; 190 res->flags = IORESOURCE_IO;
191 printk("ioports %lx-%lx ", res->start, res->end); 191 printk("ioports %lx-%lx ", (unsigned long)res->start, (unsigned long)res->end);
192 result = request_resource(io_parent, res); 192 result = request_resource(io_parent, res);
193 if (result < 0) { 193 if (result < 0) {
194 printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n"); 194 printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n");
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 501aaf1f253f..73348c4047e9 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -714,7 +714,7 @@ static void iosapic_set_affinity_irq(unsigned int irq,
714 if (dest_cpu < 0) 714 if (dest_cpu < 0)
715 return; 715 return;
716 716
717 irq_desc[irq].affinity = cpumask_of_cpu(dest_cpu); 717 cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu));
718 vi->txn_addr = txn_affinity_addr(irq, dest_cpu); 718 vi->txn_addr = txn_affinity_addr(irq, dest_cpu);
719 719
720 spin_lock_irqsave(&iosapic_lock, flags); 720 spin_lock_irqsave(&iosapic_lock, flags);
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 454b6532e409..9581d3619450 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * (c) Copyright 2000 Red Hat Software 4 * (c) Copyright 2000 Red Hat Software
5 * (c) Copyright 2000 Helge Deller <hdeller@redhat.com> 5 * (c) Copyright 2000 Helge Deller <hdeller@redhat.com>
6 * (c) Copyright 2001-2005 Helge Deller <deller@gmx.de> 6 * (c) Copyright 2001-2009 Helge Deller <deller@gmx.de>
7 * (c) Copyright 2001 Randolph Chung <tausq@debian.org> 7 * (c) Copyright 2001 Randolph Chung <tausq@debian.org>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
@@ -243,13 +243,11 @@ static int __init led_create_procfs(void)
243 243
244 proc_pdc_root = proc_mkdir("pdc", 0); 244 proc_pdc_root = proc_mkdir("pdc", 0);
245 if (!proc_pdc_root) return -1; 245 if (!proc_pdc_root) return -1;
246 proc_pdc_root->owner = THIS_MODULE;
247 ent = create_proc_entry("led", S_IFREG|S_IRUGO|S_IWUSR, proc_pdc_root); 246 ent = create_proc_entry("led", S_IFREG|S_IRUGO|S_IWUSR, proc_pdc_root);
248 if (!ent) return -1; 247 if (!ent) return -1;
249 ent->data = (void *)LED_NOLCD; /* LED */ 248 ent->data = (void *)LED_NOLCD; /* LED */
250 ent->read_proc = led_proc_read; 249 ent->read_proc = led_proc_read;
251 ent->write_proc = led_proc_write; 250 ent->write_proc = led_proc_write;
252 ent->owner = THIS_MODULE;
253 251
254 if (led_type == LED_HASLCD) 252 if (led_type == LED_HASLCD)
255 { 253 {
@@ -258,7 +256,6 @@ static int __init led_create_procfs(void)
258 ent->data = (void *)LED_HASLCD; /* LCD */ 256 ent->data = (void *)LED_HASLCD; /* LCD */
259 ent->read_proc = led_proc_read; 257 ent->read_proc = led_proc_read;
260 ent->write_proc = led_proc_write; 258 ent->write_proc = led_proc_write;
261 ent->owner = THIS_MODULE;
262 } 259 }
263 260
264 return 0; 261 return 0;
@@ -463,9 +460,20 @@ static void led_work_func (struct work_struct *unused)
463 if (likely(led_lanrxtx)) currentleds |= led_get_net_activity(); 460 if (likely(led_lanrxtx)) currentleds |= led_get_net_activity();
464 if (likely(led_diskio)) currentleds |= led_get_diskio_activity(); 461 if (likely(led_diskio)) currentleds |= led_get_diskio_activity();
465 462
466 /* blink all LEDs twice a second if we got an Oops (HPMC) */ 463 /* blink LEDs if we got an Oops (HPMC) */
467 if (unlikely(oops_in_progress)) 464 if (unlikely(oops_in_progress)) {
468 currentleds = (count_HZ<=(HZ/2)) ? 0 : 0xff; 465 if (boot_cpu_data.cpu_type >= pcxl2) {
466 /* newer machines don't have loadavg. LEDs, so we
467 * let all LEDs blink twice per second instead */
468 currentleds = (count_HZ <= (HZ/2)) ? 0 : 0xff;
469 } else {
470 /* old machines: blink loadavg. LEDs twice per second */
471 if (count_HZ <= (HZ/2))
472 currentleds &= ~(LED4|LED5|LED6|LED7);
473 else
474 currentleds |= (LED4|LED5|LED6|LED7);
475 }
476 }
469 477
470 if (currentleds != lastleds) 478 if (currentleds != lastleds)
471 { 479 {
@@ -511,7 +519,7 @@ static int led_halt(struct notifier_block *nb, unsigned long event, void *buf)
511 519
512 /* Cancel the work item and delete the queue */ 520 /* Cancel the work item and delete the queue */
513 if (led_wq) { 521 if (led_wq) {
514 cancel_rearming_delayed_workqueue(led_wq, &led_task); 522 cancel_delayed_work_sync(&led_task);
515 destroy_workqueue(led_wq); 523 destroy_workqueue(led_wq);
516 led_wq = NULL; 524 led_wq = NULL;
517 } 525 }
@@ -630,7 +638,7 @@ int lcd_print( const char *str )
630 638
631 /* temporarily disable the led work task */ 639 /* temporarily disable the led work task */
632 if (led_wq) 640 if (led_wq)
633 cancel_rearming_delayed_workqueue(led_wq, &led_task); 641 cancel_delayed_work_sync(&led_task);
634 642
635 /* copy display string to buffer for procfs */ 643 /* copy display string to buffer for procfs */
636 strlcpy(lcd_text, str, sizeof(lcd_text)); 644 strlcpy(lcd_text, str, sizeof(lcd_text));
diff --git a/drivers/pcmcia/pxa2xx_cm_x255.c b/drivers/pcmcia/pxa2xx_cm_x255.c
index 4ed64d8e95e7..5143a760153b 100644
--- a/drivers/pcmcia/pxa2xx_cm_x255.c
+++ b/drivers/pcmcia/pxa2xx_cm_x255.c
@@ -63,7 +63,7 @@ static void cmx255_pcmcia_socket_state(struct soc_pcmcia_socket *skt,
63 struct pcmcia_state *state) 63 struct pcmcia_state *state)
64{ 64{
65 int cd = skt->nr ? GPIO_PCMCIA_S1_CD_VALID : GPIO_PCMCIA_S0_CD_VALID; 65 int cd = skt->nr ? GPIO_PCMCIA_S1_CD_VALID : GPIO_PCMCIA_S0_CD_VALID;
66 int rdy = skt->nr ? GPIO_PCMCIA_S0_RDYINT : GPIO_PCMCIA_S1_RDYINT; 66 int rdy = skt->nr ? GPIO_PCMCIA_S1_RDYINT : GPIO_PCMCIA_S0_RDYINT;
67 67
68 state->detect = !gpio_get_value(cd); 68 state->detect = !gpio_get_value(cd);
69 state->ready = !!gpio_get_value(rdy); 69 state->ready = !!gpio_get_value(rdy);
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 56002f7d26bd..ffe34a12f446 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -688,22 +688,16 @@ config RTC_DRV_RS5C313
688 help 688 help
689 If you say yes here you get support for the Ricoh RS5C313 RTC chips. 689 If you say yes here you get support for the Ricoh RS5C313 RTC chips.
690 690
691config RTC_DRV_PARISC 691config RTC_DRV_GENERIC
692 tristate "PA-RISC firmware RTC support" 692 tristate "Generic RTC support"
693 depends on PARISC 693 # Please consider writing a new RTC driver instead of using the generic
694 help 694 # RTC abstraction
695 Say Y or M here to enable RTC support on PA-RISC systems using 695 depends on PARISC || M68K || PPC
696 firmware calls. If you do not know what you are doing, you should 696 help
697 Say Y or M here to enable RTC support on systems using the generic
698 RTC abstraction. If you do not know what you are doing, you should
697 just say Y. 699 just say Y.
698 700
699config RTC_DRV_PPC
700 tristate "PowerPC machine dependent RTC support"
701 depends on PPC
702 help
703 The PowerPC kernel has machine-specific functions for accessing
704 the RTC. This exposes that functionality through the generic RTC
705 class.
706
707config RTC_DRV_PXA 701config RTC_DRV_PXA
708 tristate "PXA27x/PXA3xx" 702 tristate "PXA27x/PXA3xx"
709 depends on ARCH_PXA 703 depends on ARCH_PXA
@@ -747,4 +741,13 @@ config RTC_DRV_MV
747 This driver can also be built as a module. If so, the module 741 This driver can also be built as a module. If so, the module
748 will be called rtc-mv. 742 will be called rtc-mv.
749 743
744config RTC_DRV_PS3
745 tristate "PS3 RTC"
746 depends on PPC_PS3
747 help
748 If you say yes here you will get support for the RTC on PS3.
749
750 This driver can also be built as a module. If so, the module
751 will be called rtc-ps3.
752
750endif # RTC_CLASS 753endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index e7b09986d26e..6c0639a14f09 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -56,8 +56,7 @@ obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
56obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o 56obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o
57obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o 57obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o
58obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o 58obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
59obj-$(CONFIG_RTC_DRV_PARISC) += rtc-parisc.o 59obj-$(CONFIG_RTC_DRV_GENERIC) += rtc-generic.o
60obj-$(CONFIG_RTC_DRV_PPC) += rtc-ppc.o
61obj-$(CONFIG_RTC_DRV_PXA) += rtc-pxa.o 60obj-$(CONFIG_RTC_DRV_PXA) += rtc-pxa.o
62obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o 61obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o
63obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o 62obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
@@ -77,3 +76,4 @@ obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o
77obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o 76obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o
78obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o 77obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o
79obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o 78obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o
79obj-$(CONFIG_RTC_DRV_PS3) += rtc-ps3.o
diff --git a/drivers/rtc/rtc-generic.c b/drivers/rtc/rtc-generic.c
new file mode 100644
index 000000000000..98322004ad2e
--- /dev/null
+++ b/drivers/rtc/rtc-generic.c
@@ -0,0 +1,84 @@
1/* rtc-generic: RTC driver using the generic RTC abstraction
2 *
3 * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/time.h>
9#include <linux/platform_device.h>
10#include <linux/rtc.h>
11
12#include <asm/rtc.h>
13
14static int generic_get_time(struct device *dev, struct rtc_time *tm)
15{
16 unsigned int ret = get_rtc_time(tm);
17
18 if (ret & RTC_BATT_BAD)
19 return -EOPNOTSUPP;
20
21 return rtc_valid_tm(tm);
22}
23
24static int generic_set_time(struct device *dev, struct rtc_time *tm)
25{
26 if (set_rtc_time(tm) < 0)
27 return -EOPNOTSUPP;
28
29 return 0;
30}
31
32static const struct rtc_class_ops generic_rtc_ops = {
33 .read_time = generic_get_time,
34 .set_time = generic_set_time,
35};
36
37static int __init generic_rtc_probe(struct platform_device *dev)
38{
39 struct rtc_device *rtc;
40
41 rtc = rtc_device_register("rtc-generic", &dev->dev, &generic_rtc_ops,
42 THIS_MODULE);
43 if (IS_ERR(rtc))
44 return PTR_ERR(rtc);
45
46 platform_set_drvdata(dev, rtc);
47
48 return 0;
49}
50
51static int __exit generic_rtc_remove(struct platform_device *dev)
52{
53 struct rtc_device *rtc = platform_get_drvdata(dev);
54
55 rtc_device_unregister(rtc);
56
57 return 0;
58}
59
60static struct platform_driver generic_rtc_driver = {
61 .driver = {
62 .name = "rtc-generic",
63 .owner = THIS_MODULE,
64 },
65 .remove = __exit_p(generic_rtc_remove),
66};
67
68static int __init generic_rtc_init(void)
69{
70 return platform_driver_probe(&generic_rtc_driver, generic_rtc_probe);
71}
72
73static void __exit generic_rtc_fini(void)
74{
75 platform_driver_unregister(&generic_rtc_driver);
76}
77
78module_init(generic_rtc_init);
79module_exit(generic_rtc_fini);
80
81MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
82MODULE_LICENSE("GPL");
83MODULE_DESCRIPTION("Generic RTC driver");
84MODULE_ALIAS("platform:rtc-generic");
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
deleted file mode 100644
index b966f56da976..000000000000
--- a/drivers/rtc/rtc-parisc.c
+++ /dev/null
@@ -1,86 +0,0 @@
1/* rtc-parisc: RTC for HP PA-RISC firmware
2 *
3 * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/time.h>
9#include <linux/platform_device.h>
10#include <linux/rtc.h>
11
12#include <asm/rtc.h>
13
14static int parisc_get_time(struct device *dev, struct rtc_time *tm)
15{
16 unsigned long ret;
17
18 ret = get_rtc_time(tm);
19
20 if (ret & RTC_BATT_BAD)
21 return -EOPNOTSUPP;
22
23 return rtc_valid_tm(tm);
24}
25
26static int parisc_set_time(struct device *dev, struct rtc_time *tm)
27{
28 if (set_rtc_time(tm) < 0)
29 return -EOPNOTSUPP;
30
31 return 0;
32}
33
34static const struct rtc_class_ops parisc_rtc_ops = {
35 .read_time = parisc_get_time,
36 .set_time = parisc_set_time,
37};
38
39static int __init parisc_rtc_probe(struct platform_device *dev)
40{
41 struct rtc_device *rtc;
42
43 rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops,
44 THIS_MODULE);
45 if (IS_ERR(rtc))
46 return PTR_ERR(rtc);
47
48 platform_set_drvdata(dev, rtc);
49
50 return 0;
51}
52
53static int __exit parisc_rtc_remove(struct platform_device *dev)
54{
55 struct rtc_device *rtc = platform_get_drvdata(dev);
56
57 rtc_device_unregister(rtc);
58
59 return 0;
60}
61
62static struct platform_driver parisc_rtc_driver = {
63 .driver = {
64 .name = "rtc-parisc",
65 .owner = THIS_MODULE,
66 },
67 .probe = parisc_rtc_probe,
68 .remove = __devexit_p(parisc_rtc_remove),
69};
70
71static int __init parisc_rtc_init(void)
72{
73 return platform_driver_probe(&parisc_rtc_driver, parisc_rtc_probe);
74}
75
76static void __exit parisc_rtc_fini(void)
77{
78 platform_driver_unregister(&parisc_rtc_driver);
79}
80
81module_init(parisc_rtc_init);
82module_exit(parisc_rtc_fini);
83
84MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
85MODULE_LICENSE("GPL");
86MODULE_DESCRIPTION("HP PA-RISC RTC driver");
diff --git a/drivers/rtc/rtc-ppc.c b/drivers/rtc/rtc-ppc.c
deleted file mode 100644
index c8e97e25ef7e..000000000000
--- a/drivers/rtc/rtc-ppc.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/*
2 * RTC driver for ppc_md RTC functions
3 *
4 * © 2007 Red Hat, Inc.
5 *
6 * Author: David Woodhouse <dwmw2@infradead.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13
14#include <linux/module.h>
15#include <linux/err.h>
16#include <linux/rtc.h>
17#include <linux/platform_device.h>
18#include <asm/machdep.h>
19
20static int ppc_rtc_read_time(struct device *dev, struct rtc_time *tm)
21{
22 ppc_md.get_rtc_time(tm);
23 return 0;
24}
25
26static int ppc_rtc_set_time(struct device *dev, struct rtc_time *tm)
27{
28 return ppc_md.set_rtc_time(tm);
29}
30
31static const struct rtc_class_ops ppc_rtc_ops = {
32 .set_time = ppc_rtc_set_time,
33 .read_time = ppc_rtc_read_time,
34};
35
36static struct rtc_device *rtc;
37static struct platform_device *ppc_rtc_pdev;
38
39static int __init ppc_rtc_init(void)
40{
41 if (!ppc_md.get_rtc_time || !ppc_md.set_rtc_time)
42 return -ENODEV;
43
44 ppc_rtc_pdev = platform_device_register_simple("ppc-rtc", 0, NULL, 0);
45 if (IS_ERR(ppc_rtc_pdev))
46 return PTR_ERR(ppc_rtc_pdev);
47
48 rtc = rtc_device_register("ppc_md", &ppc_rtc_pdev->dev,
49 &ppc_rtc_ops, THIS_MODULE);
50 if (IS_ERR(rtc)) {
51 platform_device_unregister(ppc_rtc_pdev);
52 return PTR_ERR(rtc);
53 }
54
55 return 0;
56}
57
58static void __exit ppc_rtc_exit(void)
59{
60 rtc_device_unregister(rtc);
61 platform_device_unregister(ppc_rtc_pdev);
62}
63
64module_init(ppc_rtc_init);
65module_exit(ppc_rtc_exit);
66
67MODULE_LICENSE("GPL");
68MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
69MODULE_DESCRIPTION("Generic RTC class driver for PowerPC");
diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c
new file mode 100644
index 000000000000..968133ce1ee8
--- /dev/null
+++ b/drivers/rtc/rtc-ps3.c
@@ -0,0 +1,104 @@
1/*
2 * PS3 RTC Driver
3 *
4 * Copyright 2009 Sony Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program.
17 * If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/platform_device.h>
23#include <linux/rtc.h>
24
25#include <asm/lv1call.h>
26#include <asm/ps3.h>
27
28
29static u64 read_rtc(void)
30{
31 int result;
32 u64 rtc_val;
33 u64 tb_val;
34
35 result = lv1_get_rtc(&rtc_val, &tb_val);
36 BUG_ON(result);
37
38 return rtc_val;
39}
40
41static int ps3_get_time(struct device *dev, struct rtc_time *tm)
42{
43 rtc_time_to_tm(read_rtc() + ps3_os_area_get_rtc_diff(), tm);
44 return rtc_valid_tm(tm);
45}
46
47static int ps3_set_time(struct device *dev, struct rtc_time *tm)
48{
49 unsigned long now;
50
51 rtc_tm_to_time(tm, &now);
52 ps3_os_area_set_rtc_diff(now - read_rtc());
53 return 0;
54}
55
56static const struct rtc_class_ops ps3_rtc_ops = {
57 .read_time = ps3_get_time,
58 .set_time = ps3_set_time,
59};
60
61static int __init ps3_rtc_probe(struct platform_device *dev)
62{
63 struct rtc_device *rtc;
64
65 rtc = rtc_device_register("rtc-ps3", &dev->dev, &ps3_rtc_ops,
66 THIS_MODULE);
67 if (IS_ERR(rtc))
68 return PTR_ERR(rtc);
69
70 platform_set_drvdata(dev, rtc);
71 return 0;
72}
73
74static int __exit ps3_rtc_remove(struct platform_device *dev)
75{
76 rtc_device_unregister(platform_get_drvdata(dev));
77 return 0;
78}
79
80static struct platform_driver ps3_rtc_driver = {
81 .driver = {
82 .name = "rtc-ps3",
83 .owner = THIS_MODULE,
84 },
85 .remove = __exit_p(ps3_rtc_remove),
86};
87
88static int __init ps3_rtc_init(void)
89{
90 return platform_driver_probe(&ps3_rtc_driver, ps3_rtc_probe);
91}
92
93static void __exit ps3_rtc_fini(void)
94{
95 platform_driver_unregister(&ps3_rtc_driver);
96}
97
98module_init(ps3_rtc_init);
99module_exit(ps3_rtc_fini);
100
101MODULE_AUTHOR("Sony Corporation");
102MODULE_LICENSE("GPL");
103MODULE_DESCRIPTION("ps3 RTC driver");
104MODULE_ALIAS("platform:rtc-ps3");
diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c
index 56841fe5f483..0eefb07bebaf 100644
--- a/drivers/serial/mcf.c
+++ b/drivers/serial/mcf.c
@@ -513,7 +513,7 @@ static int __init mcf_console_setup(struct console *co, char *options)
513 int parity = 'n'; 513 int parity = 'n';
514 int flow = 'n'; 514 int flow = 'n';
515 515
516 if ((co->index >= 0) && (co->index <= MCF_MAXPORTS)) 516 if ((co->index < 0) || (co->index >= MCF_MAXPORTS))
517 co->index = 0; 517 co->index = 0;
518 port = &mcf_ports[co->index].port; 518 port = &mcf_ports[co->index].port;
519 if (port->membase == 0) 519 if (port->membase == 0)
diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
index 882c57b399f7..fdba2f69d4c9 100644
--- a/drivers/usb/storage/isd200.c
+++ b/drivers/usb/storage/isd200.c
@@ -46,6 +46,7 @@
46#include <linux/errno.h> 46#include <linux/errno.h>
47#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <linux/ata.h>
49#include <linux/hdreg.h> 50#include <linux/hdreg.h>
50#include <linux/scatterlist.h> 51#include <linux/scatterlist.h>
51 52
@@ -328,7 +329,7 @@ struct isd200_config {
328 329
329struct isd200_info { 330struct isd200_info {
330 struct inquiry_data InquiryData; 331 struct inquiry_data InquiryData;
331 struct hd_driveid *id; 332 u16 *id;
332 struct isd200_config ConfigData; 333 struct isd200_config ConfigData;
333 unsigned char *RegsBuf; 334 unsigned char *RegsBuf;
334 unsigned char ATARegs[8]; 335 unsigned char ATARegs[8];
@@ -419,19 +420,19 @@ static void isd200_build_sense(struct us_data *us, struct scsi_cmnd *srb)
419 buf->Flags = UNIT_ATTENTION; 420 buf->Flags = UNIT_ATTENTION;
420 buf->AdditionalSenseCode = 0; 421 buf->AdditionalSenseCode = 0;
421 buf->AdditionalSenseCodeQualifier = 0; 422 buf->AdditionalSenseCodeQualifier = 0;
422 } else if(error & MCR_ERR) { 423 } else if (error & ATA_MCR) {
423 buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID; 424 buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
424 buf->AdditionalSenseLength = 0xb; 425 buf->AdditionalSenseLength = 0xb;
425 buf->Flags = UNIT_ATTENTION; 426 buf->Flags = UNIT_ATTENTION;
426 buf->AdditionalSenseCode = 0; 427 buf->AdditionalSenseCode = 0;
427 buf->AdditionalSenseCodeQualifier = 0; 428 buf->AdditionalSenseCodeQualifier = 0;
428 } else if(error & TRK0_ERR) { 429 } else if (error & ATA_TRK0NF) {
429 buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID; 430 buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
430 buf->AdditionalSenseLength = 0xb; 431 buf->AdditionalSenseLength = 0xb;
431 buf->Flags = NOT_READY; 432 buf->Flags = NOT_READY;
432 buf->AdditionalSenseCode = 0; 433 buf->AdditionalSenseCode = 0;
433 buf->AdditionalSenseCodeQualifier = 0; 434 buf->AdditionalSenseCodeQualifier = 0;
434 } else if(error & ECC_ERR) { 435 } else if (error & ATA_UNC) {
435 buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID; 436 buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
436 buf->AdditionalSenseLength = 0xb; 437 buf->AdditionalSenseLength = 0xb;
437 buf->Flags = DATA_PROTECT; 438 buf->Flags = DATA_PROTECT;
@@ -547,16 +548,16 @@ static int isd200_action( struct us_data *us, int action,
547 ata.generic.ActionSelect = ACTION_SELECT_1|ACTION_SELECT_5; 548 ata.generic.ActionSelect = ACTION_SELECT_1|ACTION_SELECT_5;
548 ata.generic.RegisterSelect = REG_DEVICE_HEAD | REG_COMMAND; 549 ata.generic.RegisterSelect = REG_DEVICE_HEAD | REG_COMMAND;
549 ata.write.DeviceHeadByte = info->DeviceHead; 550 ata.write.DeviceHeadByte = info->DeviceHead;
550 ata.write.CommandByte = WIN_SRST; 551 ata.write.CommandByte = ATA_CMD_DEV_RESET;
551 isd200_set_srb(info, DMA_NONE, NULL, 0); 552 isd200_set_srb(info, DMA_NONE, NULL, 0);
552 break; 553 break;
553 554
554 case ACTION_IDENTIFY: 555 case ACTION_IDENTIFY:
555 US_DEBUGP(" isd200_action(IDENTIFY)\n"); 556 US_DEBUGP(" isd200_action(IDENTIFY)\n");
556 ata.generic.RegisterSelect = REG_COMMAND; 557 ata.generic.RegisterSelect = REG_COMMAND;
557 ata.write.CommandByte = WIN_IDENTIFY; 558 ata.write.CommandByte = ATA_CMD_ID_ATA;
558 isd200_set_srb(info, DMA_FROM_DEVICE, info->id, 559 isd200_set_srb(info, DMA_FROM_DEVICE, info->id,
559 sizeof(struct hd_driveid)); 560 ATA_ID_WORDS * 2);
560 break; 561 break;
561 562
562 default: 563 default:
@@ -944,22 +945,22 @@ static int isd200_try_enum(struct us_data *us, unsigned char master_slave,
944 break; 945 break;
945 946
946 if (!detect) { 947 if (!detect) {
947 if (regs[ATA_REG_STATUS_OFFSET] & BUSY_STAT) { 948 if (regs[ATA_REG_STATUS_OFFSET] & ATA_BUSY) {
948 US_DEBUGP(" %s status is still BSY, try again...\n",mstr); 949 US_DEBUGP(" %s status is still BSY, try again...\n",mstr);
949 } else { 950 } else {
950 US_DEBUGP(" %s status !BSY, continue with next operation\n",mstr); 951 US_DEBUGP(" %s status !BSY, continue with next operation\n",mstr);
951 break; 952 break;
952 } 953 }
953 } 954 }
954 /* check for BUSY_STAT and */ 955 /* check for ATA_BUSY and */
955 /* WRERR_STAT (workaround ATA Zip drive) and */ 956 /* ATA_DF (workaround ATA Zip drive) and */
956 /* ERR_STAT (workaround for Archos CD-ROM) */ 957 /* ATA_ERR (workaround for Archos CD-ROM) */
957 else if (regs[ATA_REG_STATUS_OFFSET] & 958 else if (regs[ATA_REG_STATUS_OFFSET] &
958 (BUSY_STAT | WRERR_STAT | ERR_STAT )) { 959 (ATA_BUSY | ATA_DF | ATA_ERR)) {
959 US_DEBUGP(" Status indicates it is not ready, try again...\n"); 960 US_DEBUGP(" Status indicates it is not ready, try again...\n");
960 } 961 }
961 /* check for DRDY, ATA devices set DRDY after SRST */ 962 /* check for DRDY, ATA devices set DRDY after SRST */
962 else if (regs[ATA_REG_STATUS_OFFSET] & READY_STAT) { 963 else if (regs[ATA_REG_STATUS_OFFSET] & ATA_DRDY) {
963 US_DEBUGP(" Identified ATA device\n"); 964 US_DEBUGP(" Identified ATA device\n");
964 info->DeviceFlags |= DF_ATA_DEVICE; 965 info->DeviceFlags |= DF_ATA_DEVICE;
965 info->DeviceHead = master_slave; 966 info->DeviceHead = master_slave;
@@ -1053,103 +1054,50 @@ static int isd200_manual_enum(struct us_data *us)
1053 return(retStatus); 1054 return(retStatus);
1054} 1055}
1055 1056
1056static void isd200_fix_driveid (struct hd_driveid *id) 1057static void isd200_fix_driveid(u16 *id)
1057{ 1058{
1058#ifndef __LITTLE_ENDIAN 1059#ifndef __LITTLE_ENDIAN
1059# ifdef __BIG_ENDIAN 1060# ifdef __BIG_ENDIAN
1060 int i; 1061 int i;
1061 u16 *stringcast; 1062
1062 1063 for (i = 0; i < ATA_ID_WORDS; i++)
1063 id->config = __le16_to_cpu(id->config); 1064 id[i] = __le16_to_cpu(id[i]);
1064 id->cyls = __le16_to_cpu(id->cyls);
1065 id->reserved2 = __le16_to_cpu(id->reserved2);
1066 id->heads = __le16_to_cpu(id->heads);
1067 id->track_bytes = __le16_to_cpu(id->track_bytes);
1068 id->sector_bytes = __le16_to_cpu(id->sector_bytes);
1069 id->sectors = __le16_to_cpu(id->sectors);
1070 id->vendor0 = __le16_to_cpu(id->vendor0);
1071 id->vendor1 = __le16_to_cpu(id->vendor1);
1072 id->vendor2 = __le16_to_cpu(id->vendor2);
1073 stringcast = (u16 *)&id->serial_no[0];
1074 for (i = 0; i < (20/2); i++)
1075 stringcast[i] = __le16_to_cpu(stringcast[i]);
1076 id->buf_type = __le16_to_cpu(id->buf_type);
1077 id->buf_size = __le16_to_cpu(id->buf_size);
1078 id->ecc_bytes = __le16_to_cpu(id->ecc_bytes);
1079 stringcast = (u16 *)&id->fw_rev[0];
1080 for (i = 0; i < (8/2); i++)
1081 stringcast[i] = __le16_to_cpu(stringcast[i]);
1082 stringcast = (u16 *)&id->model[0];
1083 for (i = 0; i < (40/2); i++)
1084 stringcast[i] = __le16_to_cpu(stringcast[i]);
1085 id->dword_io = __le16_to_cpu(id->dword_io);
1086 id->reserved50 = __le16_to_cpu(id->reserved50);
1087 id->field_valid = __le16_to_cpu(id->field_valid);
1088 id->cur_cyls = __le16_to_cpu(id->cur_cyls);
1089 id->cur_heads = __le16_to_cpu(id->cur_heads);
1090 id->cur_sectors = __le16_to_cpu(id->cur_sectors);
1091 id->cur_capacity0 = __le16_to_cpu(id->cur_capacity0);
1092 id->cur_capacity1 = __le16_to_cpu(id->cur_capacity1);
1093 id->lba_capacity = __le32_to_cpu(id->lba_capacity);
1094 id->dma_1word = __le16_to_cpu(id->dma_1word);
1095 id->dma_mword = __le16_to_cpu(id->dma_mword);
1096 id->eide_pio_modes = __le16_to_cpu(id->eide_pio_modes);
1097 id->eide_dma_min = __le16_to_cpu(id->eide_dma_min);
1098 id->eide_dma_time = __le16_to_cpu(id->eide_dma_time);
1099 id->eide_pio = __le16_to_cpu(id->eide_pio);
1100 id->eide_pio_iordy = __le16_to_cpu(id->eide_pio_iordy);
1101 for (i = 0; i < 2; ++i)
1102 id->words69_70[i] = __le16_to_cpu(id->words69_70[i]);
1103 for (i = 0; i < 4; ++i)
1104 id->words71_74[i] = __le16_to_cpu(id->words71_74[i]);
1105 id->queue_depth = __le16_to_cpu(id->queue_depth);
1106 for (i = 0; i < 4; ++i)
1107 id->words76_79[i] = __le16_to_cpu(id->words76_79[i]);
1108 id->major_rev_num = __le16_to_cpu(id->major_rev_num);
1109 id->minor_rev_num = __le16_to_cpu(id->minor_rev_num);
1110 id->command_set_1 = __le16_to_cpu(id->command_set_1);
1111 id->command_set_2 = __le16_to_cpu(id->command_set_2);
1112 id->cfsse = __le16_to_cpu(id->cfsse);
1113 id->cfs_enable_1 = __le16_to_cpu(id->cfs_enable_1);
1114 id->cfs_enable_2 = __le16_to_cpu(id->cfs_enable_2);
1115 id->csf_default = __le16_to_cpu(id->csf_default);
1116 id->dma_ultra = __le16_to_cpu(id->dma_ultra);
1117 id->trseuc = __le16_to_cpu(id->trseuc);
1118 id->trsEuc = __le16_to_cpu(id->trsEuc);
1119 id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues);
1120 id->mprc = __le16_to_cpu(id->mprc);
1121 id->hw_config = __le16_to_cpu(id->hw_config);
1122 id->acoustic = __le16_to_cpu(id->acoustic);
1123 id->msrqs = __le16_to_cpu(id->msrqs);
1124 id->sxfert = __le16_to_cpu(id->sxfert);
1125 id->sal = __le16_to_cpu(id->sal);
1126 id->spg = __le32_to_cpu(id->spg);
1127 id->lba_capacity_2 = __le64_to_cpu(id->lba_capacity_2);
1128 for (i = 0; i < 22; i++)
1129 id->words104_125[i] = __le16_to_cpu(id->words104_125[i]);
1130 id->last_lun = __le16_to_cpu(id->last_lun);
1131 id->word127 = __le16_to_cpu(id->word127);
1132 id->dlf = __le16_to_cpu(id->dlf);
1133 id->csfo = __le16_to_cpu(id->csfo);
1134 for (i = 0; i < 26; i++)
1135 id->words130_155[i] = __le16_to_cpu(id->words130_155[i]);
1136 id->word156 = __le16_to_cpu(id->word156);
1137 for (i = 0; i < 3; i++)
1138 id->words157_159[i] = __le16_to_cpu(id->words157_159[i]);
1139 id->cfa_power = __le16_to_cpu(id->cfa_power);
1140 for (i = 0; i < 14; i++)
1141 id->words161_175[i] = __le16_to_cpu(id->words161_175[i]);
1142 for (i = 0; i < 31; i++)
1143 id->words176_205[i] = __le16_to_cpu(id->words176_205[i]);
1144 for (i = 0; i < 48; i++)
1145 id->words206_254[i] = __le16_to_cpu(id->words206_254[i]);
1146 id->integrity_word = __le16_to_cpu(id->integrity_word);
1147# else 1065# else
1148# error "Please fix <asm/byteorder.h>" 1066# error "Please fix <asm/byteorder.h>"
1149# endif 1067# endif
1150#endif 1068#endif
1151} 1069}
1152 1070
1071static void isd200_dump_driveid(u16 *id)
1072{
1073 US_DEBUGP(" Identify Data Structure:\n");
1074 US_DEBUGP(" config = 0x%x\n", id[ATA_ID_CONFIG]);
1075 US_DEBUGP(" cyls = 0x%x\n", id[ATA_ID_CYLS]);
1076 US_DEBUGP(" heads = 0x%x\n", id[ATA_ID_HEADS]);
1077 US_DEBUGP(" track_bytes = 0x%x\n", id[4]);
1078 US_DEBUGP(" sector_bytes = 0x%x\n", id[5]);
1079 US_DEBUGP(" sectors = 0x%x\n", id[ATA_ID_SECTORS]);
1080 US_DEBUGP(" serial_no[0] = 0x%x\n", *(char *)&id[ATA_ID_SERNO]);
1081 US_DEBUGP(" buf_type = 0x%x\n", id[20]);
1082 US_DEBUGP(" buf_size = 0x%x\n", id[ATA_ID_BUF_SIZE]);
1083 US_DEBUGP(" ecc_bytes = 0x%x\n", id[22]);
1084 US_DEBUGP(" fw_rev[0] = 0x%x\n", *(char *)&id[ATA_ID_FW_REV]);
1085 US_DEBUGP(" model[0] = 0x%x\n", *(char *)&id[ATA_ID_PROD]);
1086 US_DEBUGP(" max_multsect = 0x%x\n", id[ATA_ID_MAX_MULTSECT] & 0xff);
1087 US_DEBUGP(" dword_io = 0x%x\n", id[ATA_ID_DWORD_IO]);
1088 US_DEBUGP(" capability = 0x%x\n", id[ATA_ID_CAPABILITY] >> 8);
1089 US_DEBUGP(" tPIO = 0x%x\n", id[ATA_ID_OLD_PIO_MODES] >> 8);
1090 US_DEBUGP(" tDMA = 0x%x\n", id[ATA_ID_OLD_DMA_MODES] >> 8);
1091 US_DEBUGP(" field_valid = 0x%x\n", id[ATA_ID_FIELD_VALID]);
1092 US_DEBUGP(" cur_cyls = 0x%x\n", id[ATA_ID_CUR_CYLS]);
1093 US_DEBUGP(" cur_heads = 0x%x\n", id[ATA_ID_CUR_HEADS]);
1094 US_DEBUGP(" cur_sectors = 0x%x\n", id[ATA_ID_CUR_SECTORS]);
1095 US_DEBUGP(" cur_capacity = 0x%x\n", ata_id_u32(id, 57));
1096 US_DEBUGP(" multsect = 0x%x\n", id[ATA_ID_MULTSECT] & 0xff);
1097 US_DEBUGP(" lba_capacity = 0x%x\n", ata_id_u32(id, ATA_ID_LBA_CAPACITY));
1098 US_DEBUGP(" command_set_1 = 0x%x\n", id[ATA_ID_COMMAND_SET_1]);
1099 US_DEBUGP(" command_set_2 = 0x%x\n", id[ATA_ID_COMMAND_SET_2]);
1100}
1153 1101
1154/************************************************************************** 1102/**************************************************************************
1155 * isd200_get_inquiry_data 1103 * isd200_get_inquiry_data
@@ -1163,7 +1111,7 @@ static int isd200_get_inquiry_data( struct us_data *us )
1163{ 1111{
1164 struct isd200_info *info = (struct isd200_info *)us->extra; 1112 struct isd200_info *info = (struct isd200_info *)us->extra;
1165 int retStatus = ISD200_GOOD; 1113 int retStatus = ISD200_GOOD;
1166 struct hd_driveid *id = info->id; 1114 u16 *id = info->id;
1167 1115
1168 US_DEBUGP("Entering isd200_get_inquiry_data\n"); 1116 US_DEBUGP("Entering isd200_get_inquiry_data\n");
1169 1117
@@ -1180,8 +1128,7 @@ static int isd200_get_inquiry_data( struct us_data *us )
1180 /* this must be an ATA device */ 1128 /* this must be an ATA device */
1181 /* perform an ATA Command Identify */ 1129 /* perform an ATA Command Identify */
1182 transferStatus = isd200_action( us, ACTION_IDENTIFY, 1130 transferStatus = isd200_action( us, ACTION_IDENTIFY,
1183 id, 1131 id, ATA_ID_WORDS * 2);
1184 sizeof(struct hd_driveid) );
1185 if (transferStatus != ISD200_TRANSPORT_GOOD) { 1132 if (transferStatus != ISD200_TRANSPORT_GOOD) {
1186 /* Error issuing ATA Command Identify */ 1133 /* Error issuing ATA Command Identify */
1187 US_DEBUGP(" Error issuing ATA Command Identify\n"); 1134 US_DEBUGP(" Error issuing ATA Command Identify\n");
@@ -1191,35 +1138,9 @@ static int isd200_get_inquiry_data( struct us_data *us )
1191 int i; 1138 int i;
1192 __be16 *src; 1139 __be16 *src;
1193 __u16 *dest; 1140 __u16 *dest;
1194 isd200_fix_driveid(id);
1195 1141
1196 US_DEBUGP(" Identify Data Structure:\n"); 1142 isd200_fix_driveid(id);
1197 US_DEBUGP(" config = 0x%x\n", id->config); 1143 isd200_dump_driveid(id);
1198 US_DEBUGP(" cyls = 0x%x\n", id->cyls);
1199 US_DEBUGP(" heads = 0x%x\n", id->heads);
1200 US_DEBUGP(" track_bytes = 0x%x\n", id->track_bytes);
1201 US_DEBUGP(" sector_bytes = 0x%x\n", id->sector_bytes);
1202 US_DEBUGP(" sectors = 0x%x\n", id->sectors);
1203 US_DEBUGP(" serial_no[0] = 0x%x\n", id->serial_no[0]);
1204 US_DEBUGP(" buf_type = 0x%x\n", id->buf_type);
1205 US_DEBUGP(" buf_size = 0x%x\n", id->buf_size);
1206 US_DEBUGP(" ecc_bytes = 0x%x\n", id->ecc_bytes);
1207 US_DEBUGP(" fw_rev[0] = 0x%x\n", id->fw_rev[0]);
1208 US_DEBUGP(" model[0] = 0x%x\n", id->model[0]);
1209 US_DEBUGP(" max_multsect = 0x%x\n", id->max_multsect);
1210 US_DEBUGP(" dword_io = 0x%x\n", id->dword_io);
1211 US_DEBUGP(" capability = 0x%x\n", id->capability);
1212 US_DEBUGP(" tPIO = 0x%x\n", id->tPIO);
1213 US_DEBUGP(" tDMA = 0x%x\n", id->tDMA);
1214 US_DEBUGP(" field_valid = 0x%x\n", id->field_valid);
1215 US_DEBUGP(" cur_cyls = 0x%x\n", id->cur_cyls);
1216 US_DEBUGP(" cur_heads = 0x%x\n", id->cur_heads);
1217 US_DEBUGP(" cur_sectors = 0x%x\n", id->cur_sectors);
1218 US_DEBUGP(" cur_capacity = 0x%x\n", (id->cur_capacity1 << 16) + id->cur_capacity0 );
1219 US_DEBUGP(" multsect = 0x%x\n", id->multsect);
1220 US_DEBUGP(" lba_capacity = 0x%x\n", id->lba_capacity);
1221 US_DEBUGP(" command_set_1 = 0x%x\n", id->command_set_1);
1222 US_DEBUGP(" command_set_2 = 0x%x\n", id->command_set_2);
1223 1144
1224 memset(&info->InquiryData, 0, sizeof(info->InquiryData)); 1145 memset(&info->InquiryData, 0, sizeof(info->InquiryData));
1225 1146
@@ -1229,30 +1150,30 @@ static int isd200_get_inquiry_data( struct us_data *us )
1229 /* The length must be at least 36 (5 + 31) */ 1150 /* The length must be at least 36 (5 + 31) */
1230 info->InquiryData.AdditionalLength = 0x1F; 1151 info->InquiryData.AdditionalLength = 0x1F;
1231 1152
1232 if (id->command_set_1 & COMMANDSET_MEDIA_STATUS) { 1153 if (id[ATA_ID_COMMAND_SET_1] & COMMANDSET_MEDIA_STATUS) {
1233 /* set the removable bit */ 1154 /* set the removable bit */
1234 info->InquiryData.DeviceTypeModifier = DEVICE_REMOVABLE; 1155 info->InquiryData.DeviceTypeModifier = DEVICE_REMOVABLE;
1235 info->DeviceFlags |= DF_REMOVABLE_MEDIA; 1156 info->DeviceFlags |= DF_REMOVABLE_MEDIA;
1236 } 1157 }
1237 1158
1238 /* Fill in vendor identification fields */ 1159 /* Fill in vendor identification fields */
1239 src = (__be16*)id->model; 1160 src = (__be16 *)&id[ATA_ID_PROD];
1240 dest = (__u16*)info->InquiryData.VendorId; 1161 dest = (__u16*)info->InquiryData.VendorId;
1241 for (i=0;i<4;i++) 1162 for (i=0;i<4;i++)
1242 dest[i] = be16_to_cpu(src[i]); 1163 dest[i] = be16_to_cpu(src[i]);
1243 1164
1244 src = (__be16*)(id->model+8); 1165 src = (__be16 *)&id[ATA_ID_PROD + 8/2];
1245 dest = (__u16*)info->InquiryData.ProductId; 1166 dest = (__u16*)info->InquiryData.ProductId;
1246 for (i=0;i<8;i++) 1167 for (i=0;i<8;i++)
1247 dest[i] = be16_to_cpu(src[i]); 1168 dest[i] = be16_to_cpu(src[i]);
1248 1169
1249 src = (__be16*)id->fw_rev; 1170 src = (__be16 *)&id[ATA_ID_FW_REV];
1250 dest = (__u16*)info->InquiryData.ProductRevisionLevel; 1171 dest = (__u16*)info->InquiryData.ProductRevisionLevel;
1251 for (i=0;i<2;i++) 1172 for (i=0;i<2;i++)
1252 dest[i] = be16_to_cpu(src[i]); 1173 dest[i] = be16_to_cpu(src[i]);
1253 1174
1254 /* determine if it supports Media Status Notification */ 1175 /* determine if it supports Media Status Notification */
1255 if (id->command_set_2 & COMMANDSET_MEDIA_STATUS) { 1176 if (id[ATA_ID_COMMAND_SET_2] & COMMANDSET_MEDIA_STATUS) {
1256 US_DEBUGP(" Device supports Media Status Notification\n"); 1177 US_DEBUGP(" Device supports Media Status Notification\n");
1257 1178
1258 /* Indicate that it is enabled, even though it is not 1179 /* Indicate that it is enabled, even though it is not
@@ -1301,7 +1222,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
1301 union ata_cdb * ataCdb) 1222 union ata_cdb * ataCdb)
1302{ 1223{
1303 struct isd200_info *info = (struct isd200_info *)us->extra; 1224 struct isd200_info *info = (struct isd200_info *)us->extra;
1304 struct hd_driveid *id = info->id; 1225 u16 *id = info->id;
1305 int sendToTransport = 1; 1226 int sendToTransport = 1;
1306 unsigned char sectnum, head; 1227 unsigned char sectnum, head;
1307 unsigned short cylinder; 1228 unsigned short cylinder;
@@ -1369,13 +1290,12 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
1369 1290
1370 US_DEBUGP(" ATA OUT - SCSIOP_READ_CAPACITY\n"); 1291 US_DEBUGP(" ATA OUT - SCSIOP_READ_CAPACITY\n");
1371 1292
1372 if (id->capability & CAPABILITY_LBA ) { 1293 if (ata_id_has_lba(id))
1373 capacity = id->lba_capacity - 1; 1294 capacity = ata_id_u32(id, ATA_ID_LBA_CAPACITY) - 1;
1374 } else { 1295 else
1375 capacity = (id->heads * 1296 capacity = (id[ATA_ID_HEADS] * id[ATA_ID_CYLS] *
1376 id->cyls * 1297 id[ATA_ID_SECTORS]) - 1;
1377 id->sectors) - 1; 1298
1378 }
1379 readCapacityData.LogicalBlockAddress = cpu_to_be32(capacity); 1299 readCapacityData.LogicalBlockAddress = cpu_to_be32(capacity);
1380 readCapacityData.BytesPerBlock = cpu_to_be32(0x200); 1300 readCapacityData.BytesPerBlock = cpu_to_be32(0x200);
1381 1301
@@ -1392,16 +1312,16 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
1392 lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]); 1312 lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]);
1393 blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8]; 1313 blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8];
1394 1314
1395 if (id->capability & CAPABILITY_LBA) { 1315 if (ata_id_has_lba(id)) {
1396 sectnum = (unsigned char)(lba); 1316 sectnum = (unsigned char)(lba);
1397 cylinder = (unsigned short)(lba>>8); 1317 cylinder = (unsigned short)(lba>>8);
1398 head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F); 1318 head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F);
1399 } else { 1319 } else {
1400 sectnum = (unsigned char)((lba % id->sectors) + 1); 1320 sectnum = (u8)((lba % id[ATA_ID_SECTORS]) + 1);
1401 cylinder = (unsigned short)(lba / (id->sectors * 1321 cylinder = (u16)(lba / (id[ATA_ID_SECTORS] *
1402 id->heads)); 1322 id[ATA_ID_HEADS]));
1403 head = (unsigned char)((lba / id->sectors) % 1323 head = (u8)((lba / id[ATA_ID_SECTORS]) %
1404 id->heads); 1324 id[ATA_ID_HEADS]);
1405 } 1325 }
1406 ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; 1326 ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand;
1407 ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; 1327 ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand;
@@ -1415,7 +1335,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
1415 ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8); 1335 ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8);
1416 ataCdb->write.CylinderLowByte = (unsigned char)cylinder; 1336 ataCdb->write.CylinderLowByte = (unsigned char)cylinder;
1417 ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD); 1337 ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD);
1418 ataCdb->write.CommandByte = WIN_READ; 1338 ataCdb->write.CommandByte = ATA_CMD_PIO_READ;
1419 break; 1339 break;
1420 1340
1421 case WRITE_10: 1341 case WRITE_10:
@@ -1424,14 +1344,16 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
1424 lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]); 1344 lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]);
1425 blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8]; 1345 blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8];
1426 1346
1427 if (id->capability & CAPABILITY_LBA) { 1347 if (ata_id_has_lba(id)) {
1428 sectnum = (unsigned char)(lba); 1348 sectnum = (unsigned char)(lba);
1429 cylinder = (unsigned short)(lba>>8); 1349 cylinder = (unsigned short)(lba>>8);
1430 head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F); 1350 head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F);
1431 } else { 1351 } else {
1432 sectnum = (unsigned char)((lba % id->sectors) + 1); 1352 sectnum = (u8)((lba % id[ATA_ID_SECTORS]) + 1);
1433 cylinder = (unsigned short)(lba / (id->sectors * id->heads)); 1353 cylinder = (u16)(lba / (id[ATA_ID_SECTORS] *
1434 head = (unsigned char)((lba / id->sectors) % id->heads); 1354 id[ATA_ID_HEADS]));
1355 head = (u8)((lba / id[ATA_ID_SECTORS]) %
1356 id[ATA_ID_HEADS]);
1435 } 1357 }
1436 ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; 1358 ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand;
1437 ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; 1359 ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand;
@@ -1445,7 +1367,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
1445 ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8); 1367 ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8);
1446 ataCdb->write.CylinderLowByte = (unsigned char)cylinder; 1368 ataCdb->write.CylinderLowByte = (unsigned char)cylinder;
1447 ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD); 1369 ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD);
1448 ataCdb->write.CommandByte = WIN_WRITE; 1370 ataCdb->write.CommandByte = ATA_CMD_PIO_WRITE;
1449 break; 1371 break;
1450 1372
1451 case ALLOW_MEDIUM_REMOVAL: 1373 case ALLOW_MEDIUM_REMOVAL:
@@ -1459,7 +1381,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
1459 ataCdb->generic.TransferBlockSize = 1; 1381 ataCdb->generic.TransferBlockSize = 1;
1460 ataCdb->generic.RegisterSelect = REG_COMMAND; 1382 ataCdb->generic.RegisterSelect = REG_COMMAND;
1461 ataCdb->write.CommandByte = (srb->cmnd[4] & 0x1) ? 1383 ataCdb->write.CommandByte = (srb->cmnd[4] & 0x1) ?
1462 WIN_DOORLOCK : WIN_DOORUNLOCK; 1384 ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
1463 isd200_srb_set_bufflen(srb, 0); 1385 isd200_srb_set_bufflen(srb, 0);
1464 } else { 1386 } else {
1465 US_DEBUGP(" Not removeable media, just report okay\n"); 1387 US_DEBUGP(" Not removeable media, just report okay\n");
@@ -1539,8 +1461,7 @@ static int isd200_init_info(struct us_data *us)
1539 if (!info) 1461 if (!info)
1540 retStatus = ISD200_ERROR; 1462 retStatus = ISD200_ERROR;
1541 else { 1463 else {
1542 info->id = (struct hd_driveid *) 1464 info->id = kzalloc(ATA_ID_WORDS * 2, GFP_KERNEL);
1543 kzalloc(sizeof(struct hd_driveid), GFP_KERNEL);
1544 info->RegsBuf = (unsigned char *) 1465 info->RegsBuf = (unsigned char *)
1545 kmalloc(sizeof(info->ATARegs), GFP_KERNEL); 1466 kmalloc(sizeof(info->ATARegs), GFP_KERNEL);
1546 info->srb.sense_buffer = 1467 info->srb.sense_buffer =
diff --git a/fs/Kconfig b/fs/Kconfig
index c0022b1d5877..86b203fc3c56 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -176,6 +176,8 @@ source "fs/romfs/Kconfig"
176source "fs/sysv/Kconfig" 176source "fs/sysv/Kconfig"
177source "fs/ufs/Kconfig" 177source "fs/ufs/Kconfig"
178 178
179source "fs/exofs/Kconfig"
180
179endif # MISC_FILESYSTEMS 181endif # MISC_FILESYSTEMS
180 182
181menuconfig NETWORK_FILESYSTEMS 183menuconfig NETWORK_FILESYSTEMS
diff --git a/fs/Makefile b/fs/Makefile
index 055d5237b109..70b2aed87133 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -122,3 +122,4 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/
122obj-$(CONFIG_OCFS2_FS) += ocfs2/ 122obj-$(CONFIG_OCFS2_FS) += ocfs2/
123obj-$(CONFIG_BTRFS_FS) += btrfs/ 123obj-$(CONFIG_BTRFS_FS) += btrfs/
124obj-$(CONFIG_GFS2_FS) += gfs2/ 124obj-$(CONFIG_GFS2_FS) += gfs2/
125obj-$(CONFIG_EXOFS_FS) += exofs/
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index ff786687e93b..3e87ce443ea2 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -23,7 +23,7 @@
23#include <linux/if.h> 23#include <linux/if.h>
24#include <linux/if_bridge.h> 24#include <linux/if_bridge.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/raid/md.h> 26#include <linux/raid/md_u.h>
27#include <linux/kd.h> 27#include <linux/kd.h>
28#include <linux/route.h> 28#include <linux/route.h>
29#include <linux/in6.h> 29#include <linux/in6.h>
diff --git a/fs/exofs/BUGS b/fs/exofs/BUGS
new file mode 100644
index 000000000000..1b2d4c63a579
--- /dev/null
+++ b/fs/exofs/BUGS
@@ -0,0 +1,3 @@
1- Out-of-space may cause a severe problem if the object (and directory entry)
2 were written, but the inode attributes failed. Then if the filesystem was
3 unmounted and mounted the kernel can get into an endless loop doing a readdir.
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
new file mode 100644
index 000000000000..cc2d22db119c
--- /dev/null
+++ b/fs/exofs/Kbuild
@@ -0,0 +1,16 @@
1#
2# Kbuild for the EXOFS module
3#
4# Copyright (C) 2008 Panasas Inc. All rights reserved.
5#
6# Authors:
7# Boaz Harrosh <bharrosh@panasas.com>
8#
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License version 2
11#
12# Kbuild - Gets included from the Kernels Makefile and build system
13#
14
15exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o
16obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
new file mode 100644
index 000000000000..86194b2f799d
--- /dev/null
+++ b/fs/exofs/Kconfig
@@ -0,0 +1,13 @@
1config EXOFS_FS
2 tristate "exofs: OSD based file system support"
3 depends on SCSI_OSD_ULD
4 help
5 EXOFS is a file system that uses an OSD storage device,
6 as its backing storage.
7
8# Debugging-related stuff
9config EXOFS_DEBUG
10 bool "Enable debugging"
11 depends on EXOFS_FS
12 help
13 This option enables EXOFS debug prints.
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
new file mode 100644
index 000000000000..b1512c4bb8c7
--- /dev/null
+++ b/fs/exofs/common.h
@@ -0,0 +1,184 @@
1/*
2 * common.h - Common definitions for both Kernel and user-mode utilities
3 *
4 * Copyright (C) 2005, 2006
5 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
6 * Copyright (C) 2005, 2006
7 * International Business Machines
8 * Copyright (C) 2008, 2009
9 * Boaz Harrosh <bharrosh@panasas.com>
10 *
11 * Copyrights for code taken from ext2:
12 * Copyright (C) 1992, 1993, 1994, 1995
13 * Remy Card (card@masi.ibp.fr)
14 * Laboratoire MASI - Institut Blaise Pascal
15 * Universite Pierre et Marie Curie (Paris VI)
16 * from
17 * linux/fs/minix/inode.c
18 * Copyright (C) 1991, 1992 Linus Torvalds
19 *
20 * This file is part of exofs.
21 *
22 * exofs is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation. Since it is based on ext2, and the only
25 * valid version of GPL for the Linux kernel is version 2, the only valid
26 * version of GPL for exofs is version 2.
27 *
28 * exofs is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU General Public License for more details.
32 *
33 * You should have received a copy of the GNU General Public License
34 * along with exofs; if not, write to the Free Software
35 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36 */
37
38#ifndef __EXOFS_COM_H__
39#define __EXOFS_COM_H__
40
41#include <linux/types.h>
42
43#include <scsi/osd_attributes.h>
44#include <scsi/osd_initiator.h>
45#include <scsi/osd_sec.h>
46
47/****************************************************************************
48 * Object ID related defines
49 * NOTE: inode# = object ID - EXOFS_OBJ_OFF
50 ****************************************************************************/
51#define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */
52#define EXOFS_OBJ_OFF 0x10000 /* offset for objects */
53#define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */
54#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
55
56/* exofs Application specific page/attribute */
57# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
58# define EXOFS_ATTR_INODE_DATA 1
59
60/*
61 * The maximum number of files we can have is limited by the size of the
62 * inode number. This is the largest object ID that the file system supports.
63 * Object IDs 0, 1, and 2 are always in use (see above defines).
64 */
65enum {
66 EXOFS_MAX_INO_ID = (sizeof(ino_t) * 8 == 64) ? ULLONG_MAX :
67 (1ULL << (sizeof(ino_t) * 8ULL - 1ULL)),
68 EXOFS_MAX_ID = (EXOFS_MAX_INO_ID - 1 - EXOFS_OBJ_OFF),
69};
70
71/****************************************************************************
72 * Misc.
73 ****************************************************************************/
74#define EXOFS_BLKSHIFT 12
75#define EXOFS_BLKSIZE (1UL << EXOFS_BLKSHIFT)
76
77/****************************************************************************
78 * superblock-related things
79 ****************************************************************************/
80#define EXOFS_SUPER_MAGIC 0x5DF5
81
82/*
83 * The file system control block - stored in an object's data (mainly, the one
84 * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored
85 * on disk. Right now it just has a magic value, which is basically a sanity
86 * check on our ability to communicate with the object store.
87 */
88struct exofs_fscb {
89 __le64 s_nextid; /* Highest object ID used */
90 __le32 s_numfiles; /* Number of files on fs */
91 __le16 s_magic; /* Magic signature */
92 __le16 s_newfs; /* Non-zero if this is a new fs */
93};
94
95/****************************************************************************
96 * inode-related things
97 ****************************************************************************/
98#define EXOFS_IDATA 5
99
100/*
101 * The file control block - stored in an object's attributes. This is where
102 * the in-memory inode is stored on disk.
103 */
104struct exofs_fcb {
105 __le64 i_size; /* Size of the file */
106 __le16 i_mode; /* File mode */
107 __le16 i_links_count; /* Links count */
108 __le32 i_uid; /* Owner Uid */
109 __le32 i_gid; /* Group Id */
110 __le32 i_atime; /* Access time */
111 __le32 i_ctime; /* Creation time */
112 __le32 i_mtime; /* Modification time */
113 __le32 i_flags; /* File flags (unused for now)*/
114 __le32 i_generation; /* File version (for NFS) */
115 __le32 i_data[EXOFS_IDATA]; /* Short symlink names and device #s */
116};
117
118#define EXOFS_INO_ATTR_SIZE sizeof(struct exofs_fcb)
119
120/* This is the Attribute the fcb is stored in */
121static const struct __weak osd_attr g_attr_inode_data = ATTR_DEF(
122 EXOFS_APAGE_FS_DATA,
123 EXOFS_ATTR_INODE_DATA,
124 EXOFS_INO_ATTR_SIZE);
125
126/****************************************************************************
127 * dentry-related things
128 ****************************************************************************/
129#define EXOFS_NAME_LEN 255
130
131/*
132 * The on-disk directory entry
133 */
134struct exofs_dir_entry {
135 __le64 inode_no; /* inode number */
136 __le16 rec_len; /* directory entry length */
137 u8 name_len; /* name length */
138 u8 file_type; /* umm...file type */
139 char name[EXOFS_NAME_LEN]; /* file name */
140};
141
142enum {
143 EXOFS_FT_UNKNOWN,
144 EXOFS_FT_REG_FILE,
145 EXOFS_FT_DIR,
146 EXOFS_FT_CHRDEV,
147 EXOFS_FT_BLKDEV,
148 EXOFS_FT_FIFO,
149 EXOFS_FT_SOCK,
150 EXOFS_FT_SYMLINK,
151 EXOFS_FT_MAX
152};
153
154#define EXOFS_DIR_PAD 4
155#define EXOFS_DIR_ROUND (EXOFS_DIR_PAD - 1)
156#define EXOFS_DIR_REC_LEN(name_len) \
157 (((name_len) + offsetof(struct exofs_dir_entry, name) + \
158 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
159
160/*************************
161 * function declarations *
162 *************************/
163/* osd.c */
164void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
165 const struct osd_obj_id *obj);
166
167int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid);
168static inline int exofs_check_ok(struct osd_request *or)
169{
170 return exofs_check_ok_resid(or, NULL, NULL);
171}
172int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred);
173int exofs_async_op(struct osd_request *or,
174 osd_req_done_fn *async_done, void *caller_context, u8 *cred);
175
176int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
177
178int osd_req_read_kern(struct osd_request *or,
179 const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
180
181int osd_req_write_kern(struct osd_request *or,
182 const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
183
184#endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
new file mode 100644
index 000000000000..65b0c8c776a1
--- /dev/null
+++ b/fs/exofs/dir.c
@@ -0,0 +1,672 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com>
8 *
9 * Copyrights for code taken from ext2:
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 * from
15 * linux/fs/minix/inode.c
16 * Copyright (C) 1991, 1992 Linus Torvalds
17 *
18 * This file is part of exofs.
19 *
20 * exofs is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation. Since it is based on ext2, and the only
23 * valid version of GPL for the Linux kernel is version 2, the only valid
24 * version of GPL for exofs is version 2.
25 *
26 * exofs is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with exofs; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */
35
36#include "exofs.h"
37
38static inline unsigned exofs_chunk_size(struct inode *inode)
39{
40 return inode->i_sb->s_blocksize;
41}
42
43static inline void exofs_put_page(struct page *page)
44{
45 kunmap(page);
46 page_cache_release(page);
47}
48
49/* Accesses dir's inode->i_size must be called under inode lock */
50static inline unsigned long dir_pages(struct inode *inode)
51{
52 return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
53}
54
55static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr)
56{
57 loff_t last_byte = inode->i_size;
58
59 last_byte -= page_nr << PAGE_CACHE_SHIFT;
60 if (last_byte > PAGE_CACHE_SIZE)
61 last_byte = PAGE_CACHE_SIZE;
62 return last_byte;
63}
64
65static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len)
66{
67 struct address_space *mapping = page->mapping;
68 struct inode *dir = mapping->host;
69 int err = 0;
70
71 dir->i_version++;
72
73 if (!PageUptodate(page))
74 SetPageUptodate(page);
75
76 if (pos+len > dir->i_size) {
77 i_size_write(dir, pos+len);
78 mark_inode_dirty(dir);
79 }
80 set_page_dirty(page);
81
82 if (IS_DIRSYNC(dir))
83 err = write_one_page(page, 1);
84 else
85 unlock_page(page);
86
87 return err;
88}
89
90static void exofs_check_page(struct page *page)
91{
92 struct inode *dir = page->mapping->host;
93 unsigned chunk_size = exofs_chunk_size(dir);
94 char *kaddr = page_address(page);
95 unsigned offs, rec_len;
96 unsigned limit = PAGE_CACHE_SIZE;
97 struct exofs_dir_entry *p;
98 char *error;
99
100 /* if the page is the last one in the directory */
101 if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
102 limit = dir->i_size & ~PAGE_CACHE_MASK;
103 if (limit & (chunk_size - 1))
104 goto Ebadsize;
105 if (!limit)
106 goto out;
107 }
108 for (offs = 0; offs <= limit - EXOFS_DIR_REC_LEN(1); offs += rec_len) {
109 p = (struct exofs_dir_entry *)(kaddr + offs);
110 rec_len = le16_to_cpu(p->rec_len);
111
112 if (rec_len < EXOFS_DIR_REC_LEN(1))
113 goto Eshort;
114 if (rec_len & 3)
115 goto Ealign;
116 if (rec_len < EXOFS_DIR_REC_LEN(p->name_len))
117 goto Enamelen;
118 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
119 goto Espan;
120 }
121 if (offs != limit)
122 goto Eend;
123out:
124 SetPageChecked(page);
125 return;
126
127Ebadsize:
128 EXOFS_ERR("ERROR [exofs_check_page]: "
129 "size of directory #%lu is not a multiple of chunk size",
130 dir->i_ino
131 );
132 goto fail;
133Eshort:
134 error = "rec_len is smaller than minimal";
135 goto bad_entry;
136Ealign:
137 error = "unaligned directory entry";
138 goto bad_entry;
139Enamelen:
140 error = "rec_len is too small for name_len";
141 goto bad_entry;
142Espan:
143 error = "directory entry across blocks";
144 goto bad_entry;
145bad_entry:
146 EXOFS_ERR(
147 "ERROR [exofs_check_page]: bad entry in directory #%lu: %s - "
148 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d",
149 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
150 _LLU(le64_to_cpu(p->inode_no)),
151 rec_len, p->name_len);
152 goto fail;
153Eend:
154 p = (struct exofs_dir_entry *)(kaddr + offs);
155 EXOFS_ERR("ERROR [exofs_check_page]: "
156 "entry in directory #%lu spans the page boundary"
157 "offset=%lu, inode=%llu",
158 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
159 _LLU(le64_to_cpu(p->inode_no)));
160fail:
161 SetPageChecked(page);
162 SetPageError(page);
163}
164
165static struct page *exofs_get_page(struct inode *dir, unsigned long n)
166{
167 struct address_space *mapping = dir->i_mapping;
168 struct page *page = read_mapping_page(mapping, n, NULL);
169
170 if (!IS_ERR(page)) {
171 kmap(page);
172 if (!PageChecked(page))
173 exofs_check_page(page);
174 if (PageError(page))
175 goto fail;
176 }
177 return page;
178
179fail:
180 exofs_put_page(page);
181 return ERR_PTR(-EIO);
182}
183
184static inline int exofs_match(int len, const unsigned char *name,
185 struct exofs_dir_entry *de)
186{
187 if (len != de->name_len)
188 return 0;
189 if (!de->inode_no)
190 return 0;
191 return !memcmp(name, de->name, len);
192}
193
194static inline
195struct exofs_dir_entry *exofs_next_entry(struct exofs_dir_entry *p)
196{
197 return (struct exofs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len));
198}
199
200static inline unsigned
201exofs_validate_entry(char *base, unsigned offset, unsigned mask)
202{
203 struct exofs_dir_entry *de = (struct exofs_dir_entry *)(base + offset);
204 struct exofs_dir_entry *p =
205 (struct exofs_dir_entry *)(base + (offset&mask));
206 while ((char *)p < (char *)de) {
207 if (p->rec_len == 0)
208 break;
209 p = exofs_next_entry(p);
210 }
211 return (char *)p - base;
212}
213
214static unsigned char exofs_filetype_table[EXOFS_FT_MAX] = {
215 [EXOFS_FT_UNKNOWN] = DT_UNKNOWN,
216 [EXOFS_FT_REG_FILE] = DT_REG,
217 [EXOFS_FT_DIR] = DT_DIR,
218 [EXOFS_FT_CHRDEV] = DT_CHR,
219 [EXOFS_FT_BLKDEV] = DT_BLK,
220 [EXOFS_FT_FIFO] = DT_FIFO,
221 [EXOFS_FT_SOCK] = DT_SOCK,
222 [EXOFS_FT_SYMLINK] = DT_LNK,
223};
224
225#define S_SHIFT 12
226static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = {
227 [S_IFREG >> S_SHIFT] = EXOFS_FT_REG_FILE,
228 [S_IFDIR >> S_SHIFT] = EXOFS_FT_DIR,
229 [S_IFCHR >> S_SHIFT] = EXOFS_FT_CHRDEV,
230 [S_IFBLK >> S_SHIFT] = EXOFS_FT_BLKDEV,
231 [S_IFIFO >> S_SHIFT] = EXOFS_FT_FIFO,
232 [S_IFSOCK >> S_SHIFT] = EXOFS_FT_SOCK,
233 [S_IFLNK >> S_SHIFT] = EXOFS_FT_SYMLINK,
234};
235
236static inline
237void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode)
238{
239 mode_t mode = inode->i_mode;
240 de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
241}
242
243static int
244exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
245{
246 loff_t pos = filp->f_pos;
247 struct inode *inode = filp->f_path.dentry->d_inode;
248 unsigned int offset = pos & ~PAGE_CACHE_MASK;
249 unsigned long n = pos >> PAGE_CACHE_SHIFT;
250 unsigned long npages = dir_pages(inode);
251 unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
252 unsigned char *types = NULL;
253 int need_revalidate = (filp->f_version != inode->i_version);
254
255 if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1))
256 return 0;
257
258 types = exofs_filetype_table;
259
260 for ( ; n < npages; n++, offset = 0) {
261 char *kaddr, *limit;
262 struct exofs_dir_entry *de;
263 struct page *page = exofs_get_page(inode, n);
264
265 if (IS_ERR(page)) {
266 EXOFS_ERR("ERROR: "
267 "bad page in #%lu",
268 inode->i_ino);
269 filp->f_pos += PAGE_CACHE_SIZE - offset;
270 return PTR_ERR(page);
271 }
272 kaddr = page_address(page);
273 if (unlikely(need_revalidate)) {
274 if (offset) {
275 offset = exofs_validate_entry(kaddr, offset,
276 chunk_mask);
277 filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset;
278 }
279 filp->f_version = inode->i_version;
280 need_revalidate = 0;
281 }
282 de = (struct exofs_dir_entry *)(kaddr + offset);
283 limit = kaddr + exofs_last_byte(inode, n) -
284 EXOFS_DIR_REC_LEN(1);
285 for (; (char *)de <= limit; de = exofs_next_entry(de)) {
286 if (de->rec_len == 0) {
287 EXOFS_ERR("ERROR: "
288 "zero-length directory entry");
289 exofs_put_page(page);
290 return -EIO;
291 }
292 if (de->inode_no) {
293 int over;
294 unsigned char d_type = DT_UNKNOWN;
295
296 if (types && de->file_type < EXOFS_FT_MAX)
297 d_type = types[de->file_type];
298
299 offset = (char *)de - kaddr;
300 over = filldir(dirent, de->name, de->name_len,
301 (n<<PAGE_CACHE_SHIFT) | offset,
302 le64_to_cpu(de->inode_no),
303 d_type);
304 if (over) {
305 exofs_put_page(page);
306 return 0;
307 }
308 }
309 filp->f_pos += le16_to_cpu(de->rec_len);
310 }
311 exofs_put_page(page);
312 }
313
314 return 0;
315}
316
317struct exofs_dir_entry *exofs_find_entry(struct inode *dir,
318 struct dentry *dentry, struct page **res_page)
319{
320 const unsigned char *name = dentry->d_name.name;
321 int namelen = dentry->d_name.len;
322 unsigned reclen = EXOFS_DIR_REC_LEN(namelen);
323 unsigned long start, n;
324 unsigned long npages = dir_pages(dir);
325 struct page *page = NULL;
326 struct exofs_i_info *oi = exofs_i(dir);
327 struct exofs_dir_entry *de;
328
329 if (npages == 0)
330 goto out;
331
332 *res_page = NULL;
333
334 start = oi->i_dir_start_lookup;
335 if (start >= npages)
336 start = 0;
337 n = start;
338 do {
339 char *kaddr;
340 page = exofs_get_page(dir, n);
341 if (!IS_ERR(page)) {
342 kaddr = page_address(page);
343 de = (struct exofs_dir_entry *) kaddr;
344 kaddr += exofs_last_byte(dir, n) - reclen;
345 while ((char *) de <= kaddr) {
346 if (de->rec_len == 0) {
347 EXOFS_ERR(
348 "ERROR: exofs_find_entry: "
349 "zero-length directory entry");
350 exofs_put_page(page);
351 goto out;
352 }
353 if (exofs_match(namelen, name, de))
354 goto found;
355 de = exofs_next_entry(de);
356 }
357 exofs_put_page(page);
358 }
359 if (++n >= npages)
360 n = 0;
361 } while (n != start);
362out:
363 return NULL;
364
365found:
366 *res_page = page;
367 oi->i_dir_start_lookup = n;
368 return de;
369}
370
371struct exofs_dir_entry *exofs_dotdot(struct inode *dir, struct page **p)
372{
373 struct page *page = exofs_get_page(dir, 0);
374 struct exofs_dir_entry *de = NULL;
375
376 if (!IS_ERR(page)) {
377 de = exofs_next_entry(
378 (struct exofs_dir_entry *)page_address(page));
379 *p = page;
380 }
381 return de;
382}
383
384ino_t exofs_parent_ino(struct dentry *child)
385{
386 struct page *page;
387 struct exofs_dir_entry *de;
388 ino_t ino;
389
390 de = exofs_dotdot(child->d_inode, &page);
391 if (!de)
392 return 0;
393
394 ino = le64_to_cpu(de->inode_no);
395 exofs_put_page(page);
396 return ino;
397}
398
399ino_t exofs_inode_by_name(struct inode *dir, struct dentry *dentry)
400{
401 ino_t res = 0;
402 struct exofs_dir_entry *de;
403 struct page *page;
404
405 de = exofs_find_entry(dir, dentry, &page);
406 if (de) {
407 res = le64_to_cpu(de->inode_no);
408 exofs_put_page(page);
409 }
410 return res;
411}
412
413int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de,
414 struct page *page, struct inode *inode)
415{
416 loff_t pos = page_offset(page) +
417 (char *) de - (char *) page_address(page);
418 unsigned len = le16_to_cpu(de->rec_len);
419 int err;
420
421 lock_page(page);
422 err = exofs_write_begin(NULL, page->mapping, pos, len,
423 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
424 if (err)
425 EXOFS_ERR("exofs_set_link: exofs_write_begin FAILD => %d\n",
426 err);
427
428 de->inode_no = cpu_to_le64(inode->i_ino);
429 exofs_set_de_type(de, inode);
430 if (likely(!err))
431 err = exofs_commit_chunk(page, pos, len);
432 exofs_put_page(page);
433 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
434 mark_inode_dirty(dir);
435 return err;
436}
437
438int exofs_add_link(struct dentry *dentry, struct inode *inode)
439{
440 struct inode *dir = dentry->d_parent->d_inode;
441 const unsigned char *name = dentry->d_name.name;
442 int namelen = dentry->d_name.len;
443 unsigned chunk_size = exofs_chunk_size(dir);
444 unsigned reclen = EXOFS_DIR_REC_LEN(namelen);
445 unsigned short rec_len, name_len;
446 struct page *page = NULL;
447 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
448 struct exofs_dir_entry *de;
449 unsigned long npages = dir_pages(dir);
450 unsigned long n;
451 char *kaddr;
452 loff_t pos;
453 int err;
454
455 for (n = 0; n <= npages; n++) {
456 char *dir_end;
457
458 page = exofs_get_page(dir, n);
459 err = PTR_ERR(page);
460 if (IS_ERR(page))
461 goto out;
462 lock_page(page);
463 kaddr = page_address(page);
464 dir_end = kaddr + exofs_last_byte(dir, n);
465 de = (struct exofs_dir_entry *)kaddr;
466 kaddr += PAGE_CACHE_SIZE - reclen;
467 while ((char *)de <= kaddr) {
468 if ((char *)de == dir_end) {
469 name_len = 0;
470 rec_len = chunk_size;
471 de->rec_len = cpu_to_le16(chunk_size);
472 de->inode_no = 0;
473 goto got_it;
474 }
475 if (de->rec_len == 0) {
476 EXOFS_ERR("ERROR: exofs_add_link: "
477 "zero-length directory entry");
478 err = -EIO;
479 goto out_unlock;
480 }
481 err = -EEXIST;
482 if (exofs_match(namelen, name, de))
483 goto out_unlock;
484 name_len = EXOFS_DIR_REC_LEN(de->name_len);
485 rec_len = le16_to_cpu(de->rec_len);
486 if (!de->inode_no && rec_len >= reclen)
487 goto got_it;
488 if (rec_len >= name_len + reclen)
489 goto got_it;
490 de = (struct exofs_dir_entry *) ((char *) de + rec_len);
491 }
492 unlock_page(page);
493 exofs_put_page(page);
494 }
495
496 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode);
497 return -EINVAL;
498
499got_it:
500 pos = page_offset(page) +
501 (char *)de - (char *)page_address(page);
502 err = exofs_write_begin(NULL, page->mapping, pos, rec_len, 0,
503 &page, NULL);
504 if (err)
505 goto out_unlock;
506 if (de->inode_no) {
507 struct exofs_dir_entry *de1 =
508 (struct exofs_dir_entry *)((char *)de + name_len);
509 de1->rec_len = cpu_to_le16(rec_len - name_len);
510 de->rec_len = cpu_to_le16(name_len);
511 de = de1;
512 }
513 de->name_len = namelen;
514 memcpy(de->name, name, namelen);
515 de->inode_no = cpu_to_le64(inode->i_ino);
516 exofs_set_de_type(de, inode);
517 err = exofs_commit_chunk(page, pos, rec_len);
518 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
519 mark_inode_dirty(dir);
520 sbi->s_numfiles++;
521
522out_put:
523 exofs_put_page(page);
524out:
525 return err;
526out_unlock:
527 unlock_page(page);
528 goto out_put;
529}
530
531int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
532{
533 struct address_space *mapping = page->mapping;
534 struct inode *inode = mapping->host;
535 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
536 char *kaddr = page_address(page);
537 unsigned from = ((char *)dir - kaddr) & ~(exofs_chunk_size(inode)-1);
538 unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len);
539 loff_t pos;
540 struct exofs_dir_entry *pde = NULL;
541 struct exofs_dir_entry *de = (struct exofs_dir_entry *) (kaddr + from);
542 int err;
543
544 while (de < dir) {
545 if (de->rec_len == 0) {
546 EXOFS_ERR("ERROR: exofs_delete_entry:"
547 "zero-length directory entry");
548 err = -EIO;
549 goto out;
550 }
551 pde = de;
552 de = exofs_next_entry(de);
553 }
554 if (pde)
555 from = (char *)pde - (char *)page_address(page);
556 pos = page_offset(page) + from;
557 lock_page(page);
558 err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0,
559 &page, NULL);
560 if (err)
561 EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILD => %d\n",
562 err);
563 if (pde)
564 pde->rec_len = cpu_to_le16(to - from);
565 dir->inode_no = 0;
566 if (likely(!err))
567 err = exofs_commit_chunk(page, pos, to - from);
568 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
569 mark_inode_dirty(inode);
570 sbi->s_numfiles--;
571out:
572 exofs_put_page(page);
573 return err;
574}
575
576/* kept aligned on 4 bytes */
577#define THIS_DIR ".\0\0"
578#define PARENT_DIR "..\0"
579
580int exofs_make_empty(struct inode *inode, struct inode *parent)
581{
582 struct address_space *mapping = inode->i_mapping;
583 struct page *page = grab_cache_page(mapping, 0);
584 unsigned chunk_size = exofs_chunk_size(inode);
585 struct exofs_dir_entry *de;
586 int err;
587 void *kaddr;
588
589 if (!page)
590 return -ENOMEM;
591
592 err = exofs_write_begin(NULL, page->mapping, 0, chunk_size, 0,
593 &page, NULL);
594 if (err) {
595 unlock_page(page);
596 goto fail;
597 }
598
599 kaddr = kmap_atomic(page, KM_USER0);
600 de = (struct exofs_dir_entry *)kaddr;
601 de->name_len = 1;
602 de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1));
603 memcpy(de->name, THIS_DIR, sizeof(THIS_DIR));
604 de->inode_no = cpu_to_le64(inode->i_ino);
605 exofs_set_de_type(de, inode);
606
607 de = (struct exofs_dir_entry *)(kaddr + EXOFS_DIR_REC_LEN(1));
608 de->name_len = 2;
609 de->rec_len = cpu_to_le16(chunk_size - EXOFS_DIR_REC_LEN(1));
610 de->inode_no = cpu_to_le64(parent->i_ino);
611 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
612 exofs_set_de_type(de, inode);
613 kunmap_atomic(page, KM_USER0);
614 err = exofs_commit_chunk(page, 0, chunk_size);
615fail:
616 page_cache_release(page);
617 return err;
618}
619
620int exofs_empty_dir(struct inode *inode)
621{
622 struct page *page = NULL;
623 unsigned long i, npages = dir_pages(inode);
624
625 for (i = 0; i < npages; i++) {
626 char *kaddr;
627 struct exofs_dir_entry *de;
628 page = exofs_get_page(inode, i);
629
630 if (IS_ERR(page))
631 continue;
632
633 kaddr = page_address(page);
634 de = (struct exofs_dir_entry *)kaddr;
635 kaddr += exofs_last_byte(inode, i) - EXOFS_DIR_REC_LEN(1);
636
637 while ((char *)de <= kaddr) {
638 if (de->rec_len == 0) {
639 EXOFS_ERR("ERROR: exofs_empty_dir: "
640 "zero-length directory entry"
641 "kaddr=%p, de=%p\n", kaddr, de);
642 goto not_empty;
643 }
644 if (de->inode_no != 0) {
645 /* check for . and .. */
646 if (de->name[0] != '.')
647 goto not_empty;
648 if (de->name_len > 2)
649 goto not_empty;
650 if (de->name_len < 2) {
651 if (le64_to_cpu(de->inode_no) !=
652 inode->i_ino)
653 goto not_empty;
654 } else if (de->name[1] != '.')
655 goto not_empty;
656 }
657 de = exofs_next_entry(de);
658 }
659 exofs_put_page(page);
660 }
661 return 1;
662
663not_empty:
664 exofs_put_page(page);
665 return 0;
666}
667
668const struct file_operations exofs_dir_operations = {
669 .llseek = generic_file_llseek,
670 .read = generic_read_dir,
671 .readdir = exofs_readdir,
672};
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
new file mode 100644
index 000000000000..0fd4c7859679
--- /dev/null
+++ b/fs/exofs/exofs.h
@@ -0,0 +1,180 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com>
8 *
9 * Copyrights for code taken from ext2:
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 * from
15 * linux/fs/minix/inode.c
16 * Copyright (C) 1991, 1992 Linus Torvalds
17 *
18 * This file is part of exofs.
19 *
20 * exofs is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation. Since it is based on ext2, and the only
23 * valid version of GPL for the Linux kernel is version 2, the only valid
24 * version of GPL for exofs is version 2.
25 *
26 * exofs is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with exofs; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */
35
36#include <linux/fs.h>
37#include <linux/time.h>
38#include "common.h"
39
40#ifndef __EXOFS_H__
41#define __EXOFS_H__
42
43#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
44
45#ifdef CONFIG_EXOFS_DEBUG
46#define EXOFS_DBGMSG(fmt, a...) \
47 printk(KERN_NOTICE "exofs @%s:%d: " fmt, __func__, __LINE__, ##a)
48#else
49#define EXOFS_DBGMSG(fmt, a...) \
50 do { if (0) printk(fmt, ##a); } while (0)
51#endif
52
53/* u64 has problems with printk this will cast it to unsigned long long */
54#define _LLU(x) (unsigned long long)(x)
55
56/*
57 * our extension to the in-memory superblock
58 */
59struct exofs_sb_info {
60 struct osd_dev *s_dev; /* returned by get_osd_dev */
61 osd_id s_pid; /* partition ID of file system*/
62 int s_timeout; /* timeout for OSD operations */
63 uint64_t s_nextid; /* highest object ID used */
64 uint32_t s_numfiles; /* number of files on fs */
65 spinlock_t s_next_gen_lock; /* spinlock for gen # update */
66 u32 s_next_generation; /* next gen # to use */
67 atomic_t s_curr_pending; /* number of pending commands */
68 uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */
69};
70
71/*
72 * our extension to the in-memory inode
73 */
74struct exofs_i_info {
75 unsigned long i_flags; /* various atomic flags */
76 uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/
77 uint32_t i_dir_start_lookup; /* which page to start lookup */
78 wait_queue_head_t i_wq; /* wait queue for inode */
79 uint64_t i_commit_size; /* the object's written length */
80 uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */
81 struct inode vfs_inode; /* normal in-memory inode */
82};
83
84/*
85 * our inode flags
86 */
87#define OBJ_2BCREATED 0 /* object will be created soon*/
88#define OBJ_CREATED 1 /* object has been created on the osd*/
89
90static inline int obj_2bcreated(struct exofs_i_info *oi)
91{
92 return test_bit(OBJ_2BCREATED, &oi->i_flags);
93}
94
95static inline void set_obj_2bcreated(struct exofs_i_info *oi)
96{
97 set_bit(OBJ_2BCREATED, &oi->i_flags);
98}
99
100static inline int obj_created(struct exofs_i_info *oi)
101{
102 return test_bit(OBJ_CREATED, &oi->i_flags);
103}
104
105static inline void set_obj_created(struct exofs_i_info *oi)
106{
107 set_bit(OBJ_CREATED, &oi->i_flags);
108}
109
110int __exofs_wait_obj_created(struct exofs_i_info *oi);
111static inline int wait_obj_created(struct exofs_i_info *oi)
112{
113 if (likely(obj_created(oi)))
114 return 0;
115
116 return __exofs_wait_obj_created(oi);
117}
118
119/*
120 * get to our inode from the vfs inode
121 */
122static inline struct exofs_i_info *exofs_i(struct inode *inode)
123{
124 return container_of(inode, struct exofs_i_info, vfs_inode);
125}
126
127/*
128 * Maximum count of links to a file
129 */
130#define EXOFS_LINK_MAX 32000
131
132/*************************
133 * function declarations *
134 *************************/
135/* inode.c */
136void exofs_truncate(struct inode *inode);
137int exofs_setattr(struct dentry *, struct iattr *);
138int exofs_write_begin(struct file *file, struct address_space *mapping,
139 loff_t pos, unsigned len, unsigned flags,
140 struct page **pagep, void **fsdata);
141extern struct inode *exofs_iget(struct super_block *, unsigned long);
142struct inode *exofs_new_inode(struct inode *, int);
143extern int exofs_write_inode(struct inode *, int);
144extern void exofs_delete_inode(struct inode *);
145
146/* dir.c: */
147int exofs_add_link(struct dentry *, struct inode *);
148ino_t exofs_inode_by_name(struct inode *, struct dentry *);
149int exofs_delete_entry(struct exofs_dir_entry *, struct page *);
150int exofs_make_empty(struct inode *, struct inode *);
151struct exofs_dir_entry *exofs_find_entry(struct inode *, struct dentry *,
152 struct page **);
153int exofs_empty_dir(struct inode *);
154struct exofs_dir_entry *exofs_dotdot(struct inode *, struct page **);
155ino_t exofs_parent_ino(struct dentry *child);
156int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
157 struct inode *);
158
159/*********************
160 * operation vectors *
161 *********************/
162/* dir.c: */
163extern const struct file_operations exofs_dir_operations;
164
165/* file.c */
166extern const struct inode_operations exofs_file_inode_operations;
167extern const struct file_operations exofs_file_operations;
168
169/* inode.c */
170extern const struct address_space_operations exofs_aops;
171
172/* namei.c */
173extern const struct inode_operations exofs_dir_inode_operations;
174extern const struct inode_operations exofs_special_inode_operations;
175
176/* symlink.c */
177extern const struct inode_operations exofs_symlink_inode_operations;
178extern const struct inode_operations exofs_fast_symlink_inode_operations;
179
180#endif
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
new file mode 100644
index 000000000000..6ed7fe484752
--- /dev/null
+++ b/fs/exofs/file.c
@@ -0,0 +1,87 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com>
8 *
9 * Copyrights for code taken from ext2:
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 * from
15 * linux/fs/minix/inode.c
16 * Copyright (C) 1991, 1992 Linus Torvalds
17 *
18 * This file is part of exofs.
19 *
20 * exofs is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation. Since it is based on ext2, and the only
23 * valid version of GPL for the Linux kernel is version 2, the only valid
24 * version of GPL for exofs is version 2.
25 *
26 * exofs is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with exofs; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */
35
36#include <linux/buffer_head.h>
37
38#include "exofs.h"
39
40static int exofs_release_file(struct inode *inode, struct file *filp)
41{
42 return 0;
43}
44
45static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
46 int datasync)
47{
48 int ret;
49 struct address_space *mapping = filp->f_mapping;
50
51 ret = filemap_write_and_wait(mapping);
52 if (ret)
53 return ret;
54
55 /*Note: file_fsync below also calles sync_blockdev, which is a no-op
56 * for exofs, but other then that it does sync_inode and
57 * sync_superblock which is what we need here.
58 */
59 return file_fsync(filp, dentry, datasync);
60}
61
62static int exofs_flush(struct file *file, fl_owner_t id)
63{
64 exofs_file_fsync(file, file->f_path.dentry, 1);
65 /* TODO: Flush the OSD target */
66 return 0;
67}
68
69const struct file_operations exofs_file_operations = {
70 .llseek = generic_file_llseek,
71 .read = do_sync_read,
72 .write = do_sync_write,
73 .aio_read = generic_file_aio_read,
74 .aio_write = generic_file_aio_write,
75 .mmap = generic_file_mmap,
76 .open = generic_file_open,
77 .release = exofs_release_file,
78 .fsync = exofs_file_fsync,
79 .flush = exofs_flush,
80 .splice_read = generic_file_splice_read,
81 .splice_write = generic_file_splice_write,
82};
83
84const struct inode_operations exofs_file_inode_operations = {
85 .truncate = exofs_truncate,
86 .setattr = exofs_setattr,
87};
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
new file mode 100644
index 000000000000..ba8d9fab4693
--- /dev/null
+++ b/fs/exofs/inode.c
@@ -0,0 +1,1303 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com>
8 *
9 * Copyrights for code taken from ext2:
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 * from
15 * linux/fs/minix/inode.c
16 * Copyright (C) 1991, 1992 Linus Torvalds
17 *
18 * This file is part of exofs.
19 *
20 * exofs is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation. Since it is based on ext2, and the only
23 * valid version of GPL for the Linux kernel is version 2, the only valid
24 * version of GPL for exofs is version 2.
25 *
26 * exofs is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with exofs; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */
35
36#include <linux/writeback.h>
37#include <linux/buffer_head.h>
38#include <scsi/scsi_device.h>
39
40#include "exofs.h"
41
42#ifdef CONFIG_EXOFS_DEBUG
43# define EXOFS_DEBUG_OBJ_ISIZE 1
44#endif
45
46struct page_collect {
47 struct exofs_sb_info *sbi;
48 struct request_queue *req_q;
49 struct inode *inode;
50 unsigned expected_pages;
51
52 struct bio *bio;
53 unsigned nr_pages;
54 unsigned long length;
55 loff_t pg_first; /* keep 64bit also in 32-arches */
56};
57
58static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
59 struct inode *inode)
60{
61 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
62 struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue;
63
64 pcol->sbi = sbi;
65 pcol->req_q = req_q;
66 pcol->inode = inode;
67 pcol->expected_pages = expected_pages;
68
69 pcol->bio = NULL;
70 pcol->nr_pages = 0;
71 pcol->length = 0;
72 pcol->pg_first = -1;
73
74 EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
75 expected_pages);
76}
77
78static void _pcol_reset(struct page_collect *pcol)
79{
80 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
81
82 pcol->bio = NULL;
83 pcol->nr_pages = 0;
84 pcol->length = 0;
85 pcol->pg_first = -1;
86 EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n",
87 pcol->inode->i_ino, pcol->expected_pages);
88
89 /* this is probably the end of the loop but in writes
90 * it might not end here. don't be left with nothing
91 */
92 if (!pcol->expected_pages)
93 pcol->expected_pages = 128;
94}
95
96static int pcol_try_alloc(struct page_collect *pcol)
97{
98 int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES);
99
100 for (; pages; pages >>= 1) {
101 pcol->bio = bio_alloc(GFP_KERNEL, pages);
102 if (likely(pcol->bio))
103 return 0;
104 }
105
106 EXOFS_ERR("Failed to kcalloc expected_pages=%u\n",
107 pcol->expected_pages);
108 return -ENOMEM;
109}
110
111static void pcol_free(struct page_collect *pcol)
112{
113 bio_put(pcol->bio);
114 pcol->bio = NULL;
115}
116
117static int pcol_add_page(struct page_collect *pcol, struct page *page,
118 unsigned len)
119{
120 int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0);
121 if (unlikely(len != added_len))
122 return -ENOMEM;
123
124 ++pcol->nr_pages;
125 pcol->length += len;
126 return 0;
127}
128
129static int update_read_page(struct page *page, int ret)
130{
131 if (ret == 0) {
132 /* Everything is OK */
133 SetPageUptodate(page);
134 if (PageError(page))
135 ClearPageError(page);
136 } else if (ret == -EFAULT) {
137 /* In this case we were trying to read something that wasn't on
138 * disk yet - return a page full of zeroes. This should be OK,
139 * because the object should be empty (if there was a write
140 * before this read, the read would be waiting with the page
141 * locked */
142 clear_highpage(page);
143
144 SetPageUptodate(page);
145 if (PageError(page))
146 ClearPageError(page);
147 ret = 0; /* recovered error */
148 EXOFS_DBGMSG("recovered read error\n");
149 } else /* Error */
150 SetPageError(page);
151
152 return ret;
153}
154
155static void update_write_page(struct page *page, int ret)
156{
157 if (ret) {
158 mapping_set_error(page->mapping, ret);
159 SetPageError(page);
160 }
161 end_page_writeback(page);
162}
163
164/* Called at the end of reads, to optionally unlock pages and update their
165 * status.
166 */
167static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
168 bool do_unlock)
169{
170 struct bio_vec *bvec;
171 int i;
172 u64 resid;
173 u64 good_bytes;
174 u64 length = 0;
175 int ret = exofs_check_ok_resid(or, &resid, NULL);
176
177 osd_end_request(or);
178
179 if (likely(!ret))
180 good_bytes = pcol->length;
181 else if (!resid)
182 good_bytes = 0;
183 else
184 good_bytes = pcol->length - resid;
185
186 EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx"
187 " length=0x%lx nr_pages=%u\n",
188 pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
189 pcol->nr_pages);
190
191 __bio_for_each_segment(bvec, pcol->bio, i, 0) {
192 struct page *page = bvec->bv_page;
193 struct inode *inode = page->mapping->host;
194 int page_stat;
195
196 if (inode != pcol->inode)
197 continue; /* osd might add more pages at end */
198
199 if (likely(length < good_bytes))
200 page_stat = 0;
201 else
202 page_stat = ret;
203
204 EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n",
205 inode->i_ino, page->index,
206 page_stat ? "bad_bytes" : "good_bytes");
207
208 ret = update_read_page(page, page_stat);
209 if (do_unlock)
210 unlock_page(page);
211 length += bvec->bv_len;
212 }
213
214 pcol_free(pcol);
215 EXOFS_DBGMSG("readpages_done END\n");
216 return ret;
217}
218
219/* callback of async reads */
220static void readpages_done(struct osd_request *or, void *p)
221{
222 struct page_collect *pcol = p;
223
224 __readpages_done(or, pcol, true);
225 atomic_dec(&pcol->sbi->s_curr_pending);
226 kfree(p);
227}
228
229static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
230{
231 struct bio_vec *bvec;
232 int i;
233
234 __bio_for_each_segment(bvec, pcol->bio, i, 0) {
235 struct page *page = bvec->bv_page;
236
237 if (rw == READ)
238 update_read_page(page, ret);
239 else
240 update_write_page(page, ret);
241
242 unlock_page(page);
243 }
244 pcol_free(pcol);
245}
246
247static int read_exec(struct page_collect *pcol, bool is_sync)
248{
249 struct exofs_i_info *oi = exofs_i(pcol->inode);
250 struct osd_obj_id obj = {pcol->sbi->s_pid,
251 pcol->inode->i_ino + EXOFS_OBJ_OFF};
252 struct osd_request *or = NULL;
253 struct page_collect *pcol_copy = NULL;
254 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
255 int ret;
256
257 if (!pcol->bio)
258 return 0;
259
260 /* see comment in _readpage() about sync reads */
261 WARN_ON(is_sync && (pcol->nr_pages != 1));
262
263 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
264 if (unlikely(!or)) {
265 ret = -ENOMEM;
266 goto err;
267 }
268
269 osd_req_read(or, &obj, pcol->bio, i_start);
270
271 if (is_sync) {
272 exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
273 return __readpages_done(or, pcol, false);
274 }
275
276 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
277 if (!pcol_copy) {
278 ret = -ENOMEM;
279 goto err;
280 }
281
282 *pcol_copy = *pcol;
283 ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred);
284 if (unlikely(ret))
285 goto err;
286
287 atomic_inc(&pcol->sbi->s_curr_pending);
288
289 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
290 obj.id, _LLU(i_start), pcol->length);
291
292 /* pages ownership was passed to pcol_copy */
293 _pcol_reset(pcol);
294 return 0;
295
296err:
297 if (!is_sync)
298 _unlock_pcol_pages(pcol, ret, READ);
299 kfree(pcol_copy);
300 if (or)
301 osd_end_request(or);
302 return ret;
303}
304
305/* readpage_strip is called either directly from readpage() or by the VFS from
306 * within read_cache_pages(), to add one more page to be read. It will try to
307 * collect as many contiguous pages as posible. If a discontinuity is
308 * encountered, or it runs out of resources, it will submit the previous segment
309 * and will start a new collection. Eventually caller must submit the last
310 * segment if present.
311 */
312static int readpage_strip(void *data, struct page *page)
313{
314 struct page_collect *pcol = data;
315 struct inode *inode = pcol->inode;
316 struct exofs_i_info *oi = exofs_i(inode);
317 loff_t i_size = i_size_read(inode);
318 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
319 size_t len;
320 int ret;
321
322 /* FIXME: Just for debugging, will be removed */
323 if (PageUptodate(page))
324 EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino,
325 page->index);
326
327 if (page->index < end_index)
328 len = PAGE_CACHE_SIZE;
329 else if (page->index == end_index)
330 len = i_size & ~PAGE_CACHE_MASK;
331 else
332 len = 0;
333
334 if (!len || !obj_created(oi)) {
335 /* this will be out of bounds, or doesn't exist yet.
336 * Current page is cleared and the request is split
337 */
338 clear_highpage(page);
339
340 SetPageUptodate(page);
341 if (PageError(page))
342 ClearPageError(page);
343
344 unlock_page(page);
345 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
346 " splitting\n", inode->i_ino, page->index);
347
348 return read_exec(pcol, false);
349 }
350
351try_again:
352
353 if (unlikely(pcol->pg_first == -1)) {
354 pcol->pg_first = page->index;
355 } else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
356 page->index)) {
357 /* Discontinuity detected, split the request */
358 ret = read_exec(pcol, false);
359 if (unlikely(ret))
360 goto fail;
361 goto try_again;
362 }
363
364 if (!pcol->bio) {
365 ret = pcol_try_alloc(pcol);
366 if (unlikely(ret))
367 goto fail;
368 }
369
370 if (len != PAGE_CACHE_SIZE)
371 zero_user(page, len, PAGE_CACHE_SIZE - len);
372
373 EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
374 inode->i_ino, page->index, len);
375
376 ret = pcol_add_page(pcol, page, len);
377 if (ret) {
378 EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p "
379 "this_len=0x%zx nr_pages=%u length=0x%lx\n",
380 page, len, pcol->nr_pages, pcol->length);
381
382 /* split the request, and start again with current page */
383 ret = read_exec(pcol, false);
384 if (unlikely(ret))
385 goto fail;
386
387 goto try_again;
388 }
389
390 return 0;
391
392fail:
393 /* SetPageError(page); ??? */
394 unlock_page(page);
395 return ret;
396}
397
398static int exofs_readpages(struct file *file, struct address_space *mapping,
399 struct list_head *pages, unsigned nr_pages)
400{
401 struct page_collect pcol;
402 int ret;
403
404 _pcol_init(&pcol, nr_pages, mapping->host);
405
406 ret = read_cache_pages(mapping, pages, readpage_strip, &pcol);
407 if (ret) {
408 EXOFS_ERR("read_cache_pages => %d\n", ret);
409 return ret;
410 }
411
412 return read_exec(&pcol, false);
413}
414
415static int _readpage(struct page *page, bool is_sync)
416{
417 struct page_collect pcol;
418 int ret;
419
420 _pcol_init(&pcol, 1, page->mapping->host);
421
422 /* readpage_strip might call read_exec(,async) inside at several places
423 * but this is safe for is_async=0 since read_exec will not do anything
424 * when we have a single page.
425 */
426 ret = readpage_strip(&pcol, page);
427 if (ret) {
428 EXOFS_ERR("_readpage => %d\n", ret);
429 return ret;
430 }
431
432 return read_exec(&pcol, is_sync);
433}
434
435/*
436 * We don't need the file
437 */
438static int exofs_readpage(struct file *file, struct page *page)
439{
440 return _readpage(page, false);
441}
442
443/* Callback for osd_write. All writes are asynchronouse */
444static void writepages_done(struct osd_request *or, void *p)
445{
446 struct page_collect *pcol = p;
447 struct bio_vec *bvec;
448 int i;
449 u64 resid;
450 u64 good_bytes;
451 u64 length = 0;
452
453 int ret = exofs_check_ok_resid(or, NULL, &resid);
454
455 osd_end_request(or);
456 atomic_dec(&pcol->sbi->s_curr_pending);
457
458 if (likely(!ret))
459 good_bytes = pcol->length;
460 else if (!resid)
461 good_bytes = 0;
462 else
463 good_bytes = pcol->length - resid;
464
465 EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx"
466 " length=0x%lx nr_pages=%u\n",
467 pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
468 pcol->nr_pages);
469
470 __bio_for_each_segment(bvec, pcol->bio, i, 0) {
471 struct page *page = bvec->bv_page;
472 struct inode *inode = page->mapping->host;
473 int page_stat;
474
475 if (inode != pcol->inode)
476 continue; /* osd might add more pages to a bio */
477
478 if (likely(length < good_bytes))
479 page_stat = 0;
480 else
481 page_stat = ret;
482
483 update_write_page(page, page_stat);
484 unlock_page(page);
485 EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n",
486 inode->i_ino, page->index, page_stat);
487
488 length += bvec->bv_len;
489 }
490
491 pcol_free(pcol);
492 kfree(pcol);
493 EXOFS_DBGMSG("writepages_done END\n");
494}
495
496static int write_exec(struct page_collect *pcol)
497{
498 struct exofs_i_info *oi = exofs_i(pcol->inode);
499 struct osd_obj_id obj = {pcol->sbi->s_pid,
500 pcol->inode->i_ino + EXOFS_OBJ_OFF};
501 struct osd_request *or = NULL;
502 struct page_collect *pcol_copy = NULL;
503 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
504 int ret;
505
506 if (!pcol->bio)
507 return 0;
508
509 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
510 if (unlikely(!or)) {
511 EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
512 ret = -ENOMEM;
513 goto err;
514 }
515
516 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
517 if (!pcol_copy) {
518 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
519 ret = -ENOMEM;
520 goto err;
521 }
522
523 *pcol_copy = *pcol;
524
525 osd_req_write(or, &obj, pcol_copy->bio, i_start);
526 ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
527 if (unlikely(ret)) {
528 EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
529 goto err;
530 }
531
532 atomic_inc(&pcol->sbi->s_curr_pending);
533 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
534 pcol->inode->i_ino, pcol->pg_first, _LLU(i_start),
535 pcol->length);
536 /* pages ownership was passed to pcol_copy */
537 _pcol_reset(pcol);
538 return 0;
539
540err:
541 _unlock_pcol_pages(pcol, ret, WRITE);
542 kfree(pcol_copy);
543 if (or)
544 osd_end_request(or);
545 return ret;
546}
547
548/* writepage_strip is called either directly from writepage() or by the VFS from
549 * within write_cache_pages(), to add one more page to be written to storage.
550 * It will try to collect as many contiguous pages as possible. If a
551 * discontinuity is encountered or it runs out of resources it will submit the
552 * previous segment and will start a new collection.
553 * Eventually caller must submit the last segment if present.
554 */
555static int writepage_strip(struct page *page,
556 struct writeback_control *wbc_unused, void *data)
557{
558 struct page_collect *pcol = data;
559 struct inode *inode = pcol->inode;
560 struct exofs_i_info *oi = exofs_i(inode);
561 loff_t i_size = i_size_read(inode);
562 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
563 size_t len;
564 int ret;
565
566 BUG_ON(!PageLocked(page));
567
568 ret = wait_obj_created(oi);
569 if (unlikely(ret))
570 goto fail;
571
572 if (page->index < end_index)
573 /* in this case, the page is within the limits of the file */
574 len = PAGE_CACHE_SIZE;
575 else {
576 len = i_size & ~PAGE_CACHE_MASK;
577
578 if (page->index > end_index || !len) {
579 /* in this case, the page is outside the limits
580 * (truncate in progress)
581 */
582 ret = write_exec(pcol);
583 if (unlikely(ret))
584 goto fail;
585 if (PageError(page))
586 ClearPageError(page);
587 unlock_page(page);
588 return 0;
589 }
590 }
591
592try_again:
593
594 if (unlikely(pcol->pg_first == -1)) {
595 pcol->pg_first = page->index;
596 } else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
597 page->index)) {
598 /* Discontinuity detected, split the request */
599 ret = write_exec(pcol);
600 if (unlikely(ret))
601 goto fail;
602 goto try_again;
603 }
604
605 if (!pcol->bio) {
606 ret = pcol_try_alloc(pcol);
607 if (unlikely(ret))
608 goto fail;
609 }
610
611 EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
612 inode->i_ino, page->index, len);
613
614 ret = pcol_add_page(pcol, page, len);
615 if (unlikely(ret)) {
616 EXOFS_DBGMSG("Failed pcol_add_page "
617 "nr_pages=%u total_length=0x%lx\n",
618 pcol->nr_pages, pcol->length);
619
620 /* split the request, next loop will start again */
621 ret = write_exec(pcol);
622 if (unlikely(ret)) {
623 EXOFS_DBGMSG("write_exec faild => %d", ret);
624 goto fail;
625 }
626
627 goto try_again;
628 }
629
630 BUG_ON(PageWriteback(page));
631 set_page_writeback(page);
632
633 return 0;
634
635fail:
636 set_bit(AS_EIO, &page->mapping->flags);
637 unlock_page(page);
638 return ret;
639}
640
641static int exofs_writepages(struct address_space *mapping,
642 struct writeback_control *wbc)
643{
644 struct page_collect pcol;
645 long start, end, expected_pages;
646 int ret;
647
648 start = wbc->range_start >> PAGE_CACHE_SHIFT;
649 end = (wbc->range_end == LLONG_MAX) ?
650 start + mapping->nrpages :
651 wbc->range_end >> PAGE_CACHE_SHIFT;
652
653 if (start || end)
654 expected_pages = min(end - start + 1, 32L);
655 else
656 expected_pages = mapping->nrpages;
657
658 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx"
659 " m->nrpages=%lu start=0x%lx end=0x%lx\n",
660 mapping->host->i_ino, wbc->range_start, wbc->range_end,
661 mapping->nrpages, start, end);
662
663 _pcol_init(&pcol, expected_pages, mapping->host);
664
665 ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol);
666 if (ret) {
667 EXOFS_ERR("write_cache_pages => %d\n", ret);
668 return ret;
669 }
670
671 return write_exec(&pcol);
672}
673
674static int exofs_writepage(struct page *page, struct writeback_control *wbc)
675{
676 struct page_collect pcol;
677 int ret;
678
679 _pcol_init(&pcol, 1, page->mapping->host);
680
681 ret = writepage_strip(page, NULL, &pcol);
682 if (ret) {
683 EXOFS_ERR("exofs_writepage => %d\n", ret);
684 return ret;
685 }
686
687 return write_exec(&pcol);
688}
689
690int exofs_write_begin(struct file *file, struct address_space *mapping,
691 loff_t pos, unsigned len, unsigned flags,
692 struct page **pagep, void **fsdata)
693{
694 int ret = 0;
695 struct page *page;
696
697 page = *pagep;
698 if (page == NULL) {
699 ret = simple_write_begin(file, mapping, pos, len, flags, pagep,
700 fsdata);
701 if (ret) {
702 EXOFS_DBGMSG("simple_write_begin faild\n");
703 return ret;
704 }
705
706 page = *pagep;
707 }
708
709 /* read modify write */
710 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
711 ret = _readpage(page, true);
712 if (ret) {
713 /*SetPageError was done by _readpage. Is it ok?*/
714 unlock_page(page);
715 EXOFS_DBGMSG("__readpage_filler faild\n");
716 }
717 }
718
719 return ret;
720}
721
722static int exofs_write_begin_export(struct file *file,
723 struct address_space *mapping,
724 loff_t pos, unsigned len, unsigned flags,
725 struct page **pagep, void **fsdata)
726{
727 *pagep = NULL;
728
729 return exofs_write_begin(file, mapping, pos, len, flags, pagep,
730 fsdata);
731}
732
733const struct address_space_operations exofs_aops = {
734 .readpage = exofs_readpage,
735 .readpages = exofs_readpages,
736 .writepage = exofs_writepage,
737 .writepages = exofs_writepages,
738 .write_begin = exofs_write_begin_export,
739 .write_end = simple_write_end,
740};
741
742/******************************************************************************
743 * INODE OPERATIONS
744 *****************************************************************************/
745
746/*
747 * Test whether an inode is a fast symlink.
748 */
749static inline int exofs_inode_is_fast_symlink(struct inode *inode)
750{
751 struct exofs_i_info *oi = exofs_i(inode);
752
753 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
754}
755
756/*
757 * get_block_t - Fill in a buffer_head
758 * An OSD takes care of block allocation so we just fake an allocation by
759 * putting in the inode's sector_t in the buffer_head.
760 * TODO: What about the case of create==0 and @iblock does not exist in the
761 * object?
762 */
763static int exofs_get_block(struct inode *inode, sector_t iblock,
764 struct buffer_head *bh_result, int create)
765{
766 map_bh(bh_result, inode->i_sb, iblock);
767 return 0;
768}
769
770const struct osd_attr g_attr_logical_length = ATTR_DEF(
771 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
772
773/*
774 * Truncate a file to the specified size - all we have to do is set the size
775 * attribute. We make sure the object exists first.
776 */
777void exofs_truncate(struct inode *inode)
778{
779 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
780 struct exofs_i_info *oi = exofs_i(inode);
781 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
782 struct osd_request *or;
783 struct osd_attr attr;
784 loff_t isize = i_size_read(inode);
785 __be64 newsize;
786 int ret;
787
788 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
789 || S_ISLNK(inode->i_mode)))
790 return;
791 if (exofs_inode_is_fast_symlink(inode))
792 return;
793 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
794 return;
795 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
796
797 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
798
799 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
800 if (unlikely(!or)) {
801 EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
802 goto fail;
803 }
804
805 osd_req_set_attributes(or, &obj);
806
807 newsize = cpu_to_be64((u64)isize);
808 attr = g_attr_logical_length;
809 attr.val_ptr = &newsize;
810 osd_req_add_set_attr_list(or, &attr, 1);
811
812 /* if we are about to truncate an object, and it hasn't been
813 * created yet, wait
814 */
815 if (unlikely(wait_obj_created(oi)))
816 goto fail;
817
818 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
819 osd_end_request(or);
820 if (ret)
821 goto fail;
822
823out:
824 mark_inode_dirty(inode);
825 return;
826fail:
827 make_bad_inode(inode);
828 goto out;
829}
830
831/*
832 * Set inode attributes - just call generic functions.
833 */
834int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
835{
836 struct inode *inode = dentry->d_inode;
837 int error;
838
839 error = inode_change_ok(inode, iattr);
840 if (error)
841 return error;
842
843 error = inode_setattr(inode, iattr);
844 return error;
845}
846
847/*
848 * Read an inode from the OSD, and return it as is. We also return the size
849 * attribute in the 'sanity' argument if we got compiled with debugging turned
850 * on.
851 */
852static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
853 struct exofs_fcb *inode, uint64_t *sanity)
854{
855 struct exofs_sb_info *sbi = sb->s_fs_info;
856 struct osd_request *or;
857 struct osd_attr attr;
858 struct osd_obj_id obj = {sbi->s_pid,
859 oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
860 int ret;
861
862 exofs_make_credential(oi->i_cred, &obj);
863
864 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
865 if (unlikely(!or)) {
866 EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n");
867 return -ENOMEM;
868 }
869 osd_req_get_attributes(or, &obj);
870
871 /* we need the inode attribute */
872 osd_req_add_get_attr_list(or, &g_attr_inode_data, 1);
873
874#ifdef EXOFS_DEBUG_OBJ_ISIZE
875 /* we get the size attributes to do a sanity check */
876 osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
877#endif
878
879 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
880 if (ret)
881 goto out;
882
883 attr = g_attr_inode_data;
884 ret = extract_attr_from_req(or, &attr);
885 if (ret) {
886 EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n");
887 goto out;
888 }
889
890 WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE);
891 memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
892
893#ifdef EXOFS_DEBUG_OBJ_ISIZE
894 attr = g_attr_logical_length;
895 ret = extract_attr_from_req(or, &attr);
896 if (ret) {
897 EXOFS_ERR("ERROR: extract attr from or failed\n");
898 goto out;
899 }
900 *sanity = get_unaligned_be64(attr.val_ptr);
901#endif
902
903out:
904 osd_end_request(or);
905 return ret;
906}
907
908/*
909 * Fill in an inode read from the OSD and set it up for use
910 */
911struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
912{
913 struct exofs_i_info *oi;
914 struct exofs_fcb fcb;
915 struct inode *inode;
916 uint64_t uninitialized_var(sanity);
917 int ret;
918
919 inode = iget_locked(sb, ino);
920 if (!inode)
921 return ERR_PTR(-ENOMEM);
922 if (!(inode->i_state & I_NEW))
923 return inode;
924 oi = exofs_i(inode);
925
926 /* read the inode from the osd */
927 ret = exofs_get_inode(sb, oi, &fcb, &sanity);
928 if (ret)
929 goto bad_inode;
930
931 init_waitqueue_head(&oi->i_wq);
932 set_obj_created(oi);
933
934 /* copy stuff from on-disk struct to in-memory struct */
935 inode->i_mode = le16_to_cpu(fcb.i_mode);
936 inode->i_uid = le32_to_cpu(fcb.i_uid);
937 inode->i_gid = le32_to_cpu(fcb.i_gid);
938 inode->i_nlink = le16_to_cpu(fcb.i_links_count);
939 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
940 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
941 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
942 inode->i_ctime.tv_nsec =
943 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
944 oi->i_commit_size = le64_to_cpu(fcb.i_size);
945 i_size_write(inode, oi->i_commit_size);
946 inode->i_blkbits = EXOFS_BLKSHIFT;
947 inode->i_generation = le32_to_cpu(fcb.i_generation);
948
949#ifdef EXOFS_DEBUG_OBJ_ISIZE
950 if ((inode->i_size != sanity) &&
951 (!exofs_inode_is_fast_symlink(inode))) {
952 EXOFS_ERR("WARNING: Size of object from inode and "
953 "attributes differ (%lld != %llu)\n",
954 inode->i_size, _LLU(sanity));
955 }
956#endif
957
958 oi->i_dir_start_lookup = 0;
959
960 if ((inode->i_nlink == 0) && (inode->i_mode == 0)) {
961 ret = -ESTALE;
962 goto bad_inode;
963 }
964
965 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
966 if (fcb.i_data[0])
967 inode->i_rdev =
968 old_decode_dev(le32_to_cpu(fcb.i_data[0]));
969 else
970 inode->i_rdev =
971 new_decode_dev(le32_to_cpu(fcb.i_data[1]));
972 } else {
973 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
974 }
975
976 if (S_ISREG(inode->i_mode)) {
977 inode->i_op = &exofs_file_inode_operations;
978 inode->i_fop = &exofs_file_operations;
979 inode->i_mapping->a_ops = &exofs_aops;
980 } else if (S_ISDIR(inode->i_mode)) {
981 inode->i_op = &exofs_dir_inode_operations;
982 inode->i_fop = &exofs_dir_operations;
983 inode->i_mapping->a_ops = &exofs_aops;
984 } else if (S_ISLNK(inode->i_mode)) {
985 if (exofs_inode_is_fast_symlink(inode))
986 inode->i_op = &exofs_fast_symlink_inode_operations;
987 else {
988 inode->i_op = &exofs_symlink_inode_operations;
989 inode->i_mapping->a_ops = &exofs_aops;
990 }
991 } else {
992 inode->i_op = &exofs_special_inode_operations;
993 if (fcb.i_data[0])
994 init_special_inode(inode, inode->i_mode,
995 old_decode_dev(le32_to_cpu(fcb.i_data[0])));
996 else
997 init_special_inode(inode, inode->i_mode,
998 new_decode_dev(le32_to_cpu(fcb.i_data[1])));
999 }
1000
1001 unlock_new_inode(inode);
1002 return inode;
1003
1004bad_inode:
1005 iget_failed(inode);
1006 return ERR_PTR(ret);
1007}
1008
1009int __exofs_wait_obj_created(struct exofs_i_info *oi)
1010{
1011 if (!obj_created(oi)) {
1012 BUG_ON(!obj_2bcreated(oi));
1013 wait_event(oi->i_wq, obj_created(oi));
1014 }
1015 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
1016}
1017/*
1018 * Callback function from exofs_new_inode(). The important thing is that we
1019 * set the obj_created flag so that other methods know that the object exists on
1020 * the OSD.
1021 */
1022static void create_done(struct osd_request *or, void *p)
1023{
1024 struct inode *inode = p;
1025 struct exofs_i_info *oi = exofs_i(inode);
1026 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1027 int ret;
1028
1029 ret = exofs_check_ok(or);
1030 osd_end_request(or);
1031 atomic_dec(&sbi->s_curr_pending);
1032
1033 if (unlikely(ret)) {
1034 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
1035 _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF));
1036 make_bad_inode(inode);
1037 } else
1038 set_obj_created(oi);
1039
1040 atomic_dec(&inode->i_count);
1041 wake_up(&oi->i_wq);
1042}
1043
1044/*
1045 * Set up a new inode and create an object for it on the OSD
1046 */
1047struct inode *exofs_new_inode(struct inode *dir, int mode)
1048{
1049 struct super_block *sb;
1050 struct inode *inode;
1051 struct exofs_i_info *oi;
1052 struct exofs_sb_info *sbi;
1053 struct osd_request *or;
1054 struct osd_obj_id obj;
1055 int ret;
1056
1057 sb = dir->i_sb;
1058 inode = new_inode(sb);
1059 if (!inode)
1060 return ERR_PTR(-ENOMEM);
1061
1062 oi = exofs_i(inode);
1063
1064 init_waitqueue_head(&oi->i_wq);
1065 set_obj_2bcreated(oi);
1066
1067 sbi = sb->s_fs_info;
1068
1069 sb->s_dirt = 1;
1070 inode->i_uid = current->cred->fsuid;
1071 if (dir->i_mode & S_ISGID) {
1072 inode->i_gid = dir->i_gid;
1073 if (S_ISDIR(mode))
1074 mode |= S_ISGID;
1075 } else {
1076 inode->i_gid = current->cred->fsgid;
1077 }
1078 inode->i_mode = mode;
1079
1080 inode->i_ino = sbi->s_nextid++;
1081 inode->i_blkbits = EXOFS_BLKSHIFT;
1082 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1083 oi->i_commit_size = inode->i_size = 0;
1084 spin_lock(&sbi->s_next_gen_lock);
1085 inode->i_generation = sbi->s_next_generation++;
1086 spin_unlock(&sbi->s_next_gen_lock);
1087 insert_inode_hash(inode);
1088
1089 mark_inode_dirty(inode);
1090
1091 obj.partition = sbi->s_pid;
1092 obj.id = inode->i_ino + EXOFS_OBJ_OFF;
1093 exofs_make_credential(oi->i_cred, &obj);
1094
1095 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1096 if (unlikely(!or)) {
1097 EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
1098 return ERR_PTR(-ENOMEM);
1099 }
1100
1101 osd_req_create_object(or, &obj);
1102
1103 /* increment the refcount so that the inode will still be around when we
1104 * reach the callback
1105 */
1106 atomic_inc(&inode->i_count);
1107
1108 ret = exofs_async_op(or, create_done, inode, oi->i_cred);
1109 if (ret) {
1110 atomic_dec(&inode->i_count);
1111 osd_end_request(or);
1112 return ERR_PTR(-EIO);
1113 }
1114 atomic_inc(&sbi->s_curr_pending);
1115
1116 return inode;
1117}
1118
1119/*
1120 * struct to pass two arguments to update_inode's callback
1121 */
1122struct updatei_args {
1123 struct exofs_sb_info *sbi;
1124 struct exofs_fcb fcb;
1125};
1126
1127/*
1128 * Callback function from exofs_update_inode().
1129 */
1130static void updatei_done(struct osd_request *or, void *p)
1131{
1132 struct updatei_args *args = p;
1133
1134 osd_end_request(or);
1135
1136 atomic_dec(&args->sbi->s_curr_pending);
1137
1138 kfree(args);
1139}
1140
1141/*
1142 * Write the inode to the OSD. Just fill up the struct, and set the attribute
1143 * synchronously or asynchronously depending on the do_sync flag.
1144 */
1145static int exofs_update_inode(struct inode *inode, int do_sync)
1146{
1147 struct exofs_i_info *oi = exofs_i(inode);
1148 struct super_block *sb = inode->i_sb;
1149 struct exofs_sb_info *sbi = sb->s_fs_info;
1150 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
1151 struct osd_request *or;
1152 struct osd_attr attr;
1153 struct exofs_fcb *fcb;
1154 struct updatei_args *args;
1155 int ret;
1156
1157 args = kzalloc(sizeof(*args), GFP_KERNEL);
1158 if (!args)
1159 return -ENOMEM;
1160
1161 fcb = &args->fcb;
1162
1163 fcb->i_mode = cpu_to_le16(inode->i_mode);
1164 fcb->i_uid = cpu_to_le32(inode->i_uid);
1165 fcb->i_gid = cpu_to_le32(inode->i_gid);
1166 fcb->i_links_count = cpu_to_le16(inode->i_nlink);
1167 fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
1168 fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
1169 fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
1170 oi->i_commit_size = i_size_read(inode);
1171 fcb->i_size = cpu_to_le64(oi->i_commit_size);
1172 fcb->i_generation = cpu_to_le32(inode->i_generation);
1173
1174 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
1175 if (old_valid_dev(inode->i_rdev)) {
1176 fcb->i_data[0] =
1177 cpu_to_le32(old_encode_dev(inode->i_rdev));
1178 fcb->i_data[1] = 0;
1179 } else {
1180 fcb->i_data[0] = 0;
1181 fcb->i_data[1] =
1182 cpu_to_le32(new_encode_dev(inode->i_rdev));
1183 fcb->i_data[2] = 0;
1184 }
1185 } else
1186 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1187
1188 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1189 if (unlikely(!or)) {
1190 EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n");
1191 ret = -ENOMEM;
1192 goto free_args;
1193 }
1194
1195 osd_req_set_attributes(or, &obj);
1196
1197 attr = g_attr_inode_data;
1198 attr.val_ptr = fcb;
1199 osd_req_add_set_attr_list(or, &attr, 1);
1200
1201 if (!obj_created(oi)) {
1202 EXOFS_DBGMSG("!obj_created\n");
1203 BUG_ON(!obj_2bcreated(oi));
1204 wait_event(oi->i_wq, obj_created(oi));
1205 EXOFS_DBGMSG("wait_event done\n");
1206 }
1207
1208 if (do_sync) {
1209 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
1210 osd_end_request(or);
1211 goto free_args;
1212 } else {
1213 args->sbi = sbi;
1214
1215 ret = exofs_async_op(or, updatei_done, args, oi->i_cred);
1216 if (ret) {
1217 osd_end_request(or);
1218 goto free_args;
1219 }
1220 atomic_inc(&sbi->s_curr_pending);
1221 goto out; /* deallocation in updatei_done */
1222 }
1223
1224free_args:
1225 kfree(args);
1226out:
1227 EXOFS_DBGMSG("ret=>%d\n", ret);
1228 return ret;
1229}
1230
1231int exofs_write_inode(struct inode *inode, int wait)
1232{
1233 return exofs_update_inode(inode, wait);
1234}
1235
1236/*
1237 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1238 * do.
1239 */
1240static void delete_done(struct osd_request *or, void *p)
1241{
1242 struct exofs_sb_info *sbi;
1243 osd_end_request(or);
1244 sbi = p;
1245 atomic_dec(&sbi->s_curr_pending);
1246}
1247
1248/*
1249 * Called when the refcount of an inode reaches zero. We remove the object
1250 * from the OSD here. We make sure the object was created before we try and
1251 * delete it.
1252 */
1253void exofs_delete_inode(struct inode *inode)
1254{
1255 struct exofs_i_info *oi = exofs_i(inode);
1256 struct super_block *sb = inode->i_sb;
1257 struct exofs_sb_info *sbi = sb->s_fs_info;
1258 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
1259 struct osd_request *or;
1260 int ret;
1261
1262 truncate_inode_pages(&inode->i_data, 0);
1263
1264 if (is_bad_inode(inode))
1265 goto no_delete;
1266
1267 mark_inode_dirty(inode);
1268 exofs_update_inode(inode, inode_needs_sync(inode));
1269
1270 inode->i_size = 0;
1271 if (inode->i_blocks)
1272 exofs_truncate(inode);
1273
1274 clear_inode(inode);
1275
1276 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1277 if (unlikely(!or)) {
1278 EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n");
1279 return;
1280 }
1281
1282 osd_req_remove_object(or, &obj);
1283
1284 /* if we are deleting an obj that hasn't been created yet, wait */
1285 if (!obj_created(oi)) {
1286 BUG_ON(!obj_2bcreated(oi));
1287 wait_event(oi->i_wq, obj_created(oi));
1288 }
1289
1290 ret = exofs_async_op(or, delete_done, sbi, oi->i_cred);
1291 if (ret) {
1292 EXOFS_ERR(
1293 "ERROR: @exofs_delete_inode exofs_async_op failed\n");
1294 osd_end_request(or);
1295 return;
1296 }
1297 atomic_inc(&sbi->s_curr_pending);
1298
1299 return;
1300
1301no_delete:
1302 clear_inode(inode);
1303}
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
new file mode 100644
index 000000000000..77fdd765e76d
--- /dev/null
+++ b/fs/exofs/namei.c
@@ -0,0 +1,342 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com>
8 *
9 * Copyrights for code taken from ext2:
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 * from
15 * linux/fs/minix/inode.c
16 * Copyright (C) 1991, 1992 Linus Torvalds
17 *
18 * This file is part of exofs.
19 *
20 * exofs is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation. Since it is based on ext2, and the only
23 * valid version of GPL for the Linux kernel is version 2, the only valid
24 * version of GPL for exofs is version 2.
25 *
26 * exofs is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with exofs; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */
35
36#include "exofs.h"
37
38static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode)
39{
40 int err = exofs_add_link(dentry, inode);
41 if (!err) {
42 d_instantiate(dentry, inode);
43 return 0;
44 }
45 inode_dec_link_count(inode);
46 iput(inode);
47 return err;
48}
49
50static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
51 struct nameidata *nd)
52{
53 struct inode *inode;
54 ino_t ino;
55
56 if (dentry->d_name.len > EXOFS_NAME_LEN)
57 return ERR_PTR(-ENAMETOOLONG);
58
59 ino = exofs_inode_by_name(dir, dentry);
60 inode = NULL;
61 if (ino) {
62 inode = exofs_iget(dir->i_sb, ino);
63 if (IS_ERR(inode))
64 return ERR_CAST(inode);
65 }
66 return d_splice_alias(inode, dentry);
67}
68
69static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
70 struct nameidata *nd)
71{
72 struct inode *inode = exofs_new_inode(dir, mode);
73 int err = PTR_ERR(inode);
74 if (!IS_ERR(inode)) {
75 inode->i_op = &exofs_file_inode_operations;
76 inode->i_fop = &exofs_file_operations;
77 inode->i_mapping->a_ops = &exofs_aops;
78 mark_inode_dirty(inode);
79 err = exofs_add_nondir(dentry, inode);
80 }
81 return err;
82}
83
84static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode,
85 dev_t rdev)
86{
87 struct inode *inode;
88 int err;
89
90 if (!new_valid_dev(rdev))
91 return -EINVAL;
92
93 inode = exofs_new_inode(dir, mode);
94 err = PTR_ERR(inode);
95 if (!IS_ERR(inode)) {
96 init_special_inode(inode, inode->i_mode, rdev);
97 mark_inode_dirty(inode);
98 err = exofs_add_nondir(dentry, inode);
99 }
100 return err;
101}
102
103static int exofs_symlink(struct inode *dir, struct dentry *dentry,
104 const char *symname)
105{
106 struct super_block *sb = dir->i_sb;
107 int err = -ENAMETOOLONG;
108 unsigned l = strlen(symname)+1;
109 struct inode *inode;
110 struct exofs_i_info *oi;
111
112 if (l > sb->s_blocksize)
113 goto out;
114
115 inode = exofs_new_inode(dir, S_IFLNK | S_IRWXUGO);
116 err = PTR_ERR(inode);
117 if (IS_ERR(inode))
118 goto out;
119
120 oi = exofs_i(inode);
121 if (l > sizeof(oi->i_data)) {
122 /* slow symlink */
123 inode->i_op = &exofs_symlink_inode_operations;
124 inode->i_mapping->a_ops = &exofs_aops;
125 memset(oi->i_data, 0, sizeof(oi->i_data));
126
127 err = page_symlink(inode, symname, l);
128 if (err)
129 goto out_fail;
130 } else {
131 /* fast symlink */
132 inode->i_op = &exofs_fast_symlink_inode_operations;
133 memcpy(oi->i_data, symname, l);
134 inode->i_size = l-1;
135 }
136 mark_inode_dirty(inode);
137
138 err = exofs_add_nondir(dentry, inode);
139out:
140 return err;
141
142out_fail:
143 inode_dec_link_count(inode);
144 iput(inode);
145 goto out;
146}
147
148static int exofs_link(struct dentry *old_dentry, struct inode *dir,
149 struct dentry *dentry)
150{
151 struct inode *inode = old_dentry->d_inode;
152
153 if (inode->i_nlink >= EXOFS_LINK_MAX)
154 return -EMLINK;
155
156 inode->i_ctime = CURRENT_TIME;
157 inode_inc_link_count(inode);
158 atomic_inc(&inode->i_count);
159
160 return exofs_add_nondir(dentry, inode);
161}
162
163static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
164{
165 struct inode *inode;
166 int err = -EMLINK;
167
168 if (dir->i_nlink >= EXOFS_LINK_MAX)
169 goto out;
170
171 inode_inc_link_count(dir);
172
173 inode = exofs_new_inode(dir, S_IFDIR | mode);
174 err = PTR_ERR(inode);
175 if (IS_ERR(inode))
176 goto out_dir;
177
178 inode->i_op = &exofs_dir_inode_operations;
179 inode->i_fop = &exofs_dir_operations;
180 inode->i_mapping->a_ops = &exofs_aops;
181
182 inode_inc_link_count(inode);
183
184 err = exofs_make_empty(inode, dir);
185 if (err)
186 goto out_fail;
187
188 err = exofs_add_link(dentry, inode);
189 if (err)
190 goto out_fail;
191
192 d_instantiate(dentry, inode);
193out:
194 return err;
195
196out_fail:
197 inode_dec_link_count(inode);
198 inode_dec_link_count(inode);
199 iput(inode);
200out_dir:
201 inode_dec_link_count(dir);
202 goto out;
203}
204
205static int exofs_unlink(struct inode *dir, struct dentry *dentry)
206{
207 struct inode *inode = dentry->d_inode;
208 struct exofs_dir_entry *de;
209 struct page *page;
210 int err = -ENOENT;
211
212 de = exofs_find_entry(dir, dentry, &page);
213 if (!de)
214 goto out;
215
216 err = exofs_delete_entry(de, page);
217 if (err)
218 goto out;
219
220 inode->i_ctime = dir->i_ctime;
221 inode_dec_link_count(inode);
222 err = 0;
223out:
224 return err;
225}
226
227static int exofs_rmdir(struct inode *dir, struct dentry *dentry)
228{
229 struct inode *inode = dentry->d_inode;
230 int err = -ENOTEMPTY;
231
232 if (exofs_empty_dir(inode)) {
233 err = exofs_unlink(dir, dentry);
234 if (!err) {
235 inode->i_size = 0;
236 inode_dec_link_count(inode);
237 inode_dec_link_count(dir);
238 }
239 }
240 return err;
241}
242
243static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
244 struct inode *new_dir, struct dentry *new_dentry)
245{
246 struct inode *old_inode = old_dentry->d_inode;
247 struct inode *new_inode = new_dentry->d_inode;
248 struct page *dir_page = NULL;
249 struct exofs_dir_entry *dir_de = NULL;
250 struct page *old_page;
251 struct exofs_dir_entry *old_de;
252 int err = -ENOENT;
253
254 old_de = exofs_find_entry(old_dir, old_dentry, &old_page);
255 if (!old_de)
256 goto out;
257
258 if (S_ISDIR(old_inode->i_mode)) {
259 err = -EIO;
260 dir_de = exofs_dotdot(old_inode, &dir_page);
261 if (!dir_de)
262 goto out_old;
263 }
264
265 if (new_inode) {
266 struct page *new_page;
267 struct exofs_dir_entry *new_de;
268
269 err = -ENOTEMPTY;
270 if (dir_de && !exofs_empty_dir(new_inode))
271 goto out_dir;
272
273 err = -ENOENT;
274 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
275 if (!new_de)
276 goto out_dir;
277 inode_inc_link_count(old_inode);
278 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
279 new_inode->i_ctime = CURRENT_TIME;
280 if (dir_de)
281 drop_nlink(new_inode);
282 inode_dec_link_count(new_inode);
283 if (err)
284 goto out_dir;
285 } else {
286 if (dir_de) {
287 err = -EMLINK;
288 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
289 goto out_dir;
290 }
291 inode_inc_link_count(old_inode);
292 err = exofs_add_link(new_dentry, old_inode);
293 if (err) {
294 inode_dec_link_count(old_inode);
295 goto out_dir;
296 }
297 if (dir_de)
298 inode_inc_link_count(new_dir);
299 }
300
301 old_inode->i_ctime = CURRENT_TIME;
302
303 exofs_delete_entry(old_de, old_page);
304 inode_dec_link_count(old_inode);
305
306 if (dir_de) {
307 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
308 inode_dec_link_count(old_dir);
309 if (err)
310 goto out_dir;
311 }
312 return 0;
313
314
315out_dir:
316 if (dir_de) {
317 kunmap(dir_page);
318 page_cache_release(dir_page);
319 }
320out_old:
321 kunmap(old_page);
322 page_cache_release(old_page);
323out:
324 return err;
325}
326
327const struct inode_operations exofs_dir_inode_operations = {
328 .create = exofs_create,
329 .lookup = exofs_lookup,
330 .link = exofs_link,
331 .unlink = exofs_unlink,
332 .symlink = exofs_symlink,
333 .mkdir = exofs_mkdir,
334 .rmdir = exofs_rmdir,
335 .mknod = exofs_mknod,
336 .rename = exofs_rename,
337 .setattr = exofs_setattr,
338};
339
340const struct inode_operations exofs_special_inode_operations = {
341 .setattr = exofs_setattr,
342};
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
new file mode 100644
index 000000000000..b249ae97fb15
--- /dev/null
+++ b/fs/exofs/osd.c
@@ -0,0 +1,153 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com>
8 *
9 * This file is part of exofs.
10 *
11 * exofs is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation. Since it is based on ext2, and the only
14 * valid version of GPL for the Linux kernel is version 2, the only valid
15 * version of GPL for exofs is version 2.
16 *
17 * exofs is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with exofs; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27#include <scsi/scsi_device.h>
28#include <scsi/osd_sense.h>
29
30#include "exofs.h"
31
32int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid)
33{
34 struct osd_sense_info osi;
35 int ret = osd_req_decode_sense(or, &osi);
36
37 if (ret) { /* translate to Linux codes */
38 if (osi.additional_code == scsi_invalid_field_in_cdb) {
39 if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE)
40 ret = -EFAULT;
41 if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID)
42 ret = -ENOENT;
43 else
44 ret = -EINVAL;
45 } else if (osi.additional_code == osd_quota_error)
46 ret = -ENOSPC;
47 else
48 ret = -EIO;
49 }
50
51 /* FIXME: should be include in osd_sense_info */
52 if (in_resid)
53 *in_resid = or->in.req ? or->in.req->data_len : 0;
54
55 if (out_resid)
56 *out_resid = or->out.req ? or->out.req->data_len : 0;
57
58 return ret;
59}
60
61void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
62{
63 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
64}
65
66/*
67 * Perform a synchronous OSD operation.
68 */
69int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
70{
71 int ret;
72
73 or->timeout = timeout;
74 ret = osd_finalize_request(or, 0, credential, NULL);
75 if (ret) {
76 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
77 return ret;
78 }
79
80 ret = osd_execute_request(or);
81
82 if (ret)
83 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
84 /* osd_req_decode_sense(or, ret); */
85 return ret;
86}
87
88/*
89 * Perform an asynchronous OSD operation.
90 */
91int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done,
92 void *caller_context, u8 *cred)
93{
94 int ret;
95
96 ret = osd_finalize_request(or, 0, cred, NULL);
97 if (ret) {
98 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
99 return ret;
100 }
101
102 ret = osd_execute_request_async(or, async_done, caller_context);
103
104 if (ret)
105 EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret);
106 return ret;
107}
108
109int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
110{
111 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
112 void *iter = NULL;
113 int nelem;
114
115 do {
116 nelem = 1;
117 osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
118 if ((cur_attr.attr_page == attr->attr_page) &&
119 (cur_attr.attr_id == attr->attr_id)) {
120 attr->len = cur_attr.len;
121 attr->val_ptr = cur_attr.val_ptr;
122 return 0;
123 }
124 } while (iter);
125
126 return -EIO;
127}
128
129int osd_req_read_kern(struct osd_request *or,
130 const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
131{
132 struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
133 struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
134
135 if (!bio)
136 return -ENOMEM;
137
138 osd_req_read(or, obj, bio, offset);
139 return 0;
140}
141
142int osd_req_write_kern(struct osd_request *or,
143 const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
144{
145 struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
146 struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
147
148 if (!bio)
149 return -ENOMEM;
150
151 osd_req_write(or, obj, bio, offset);
152 return 0;
153}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
new file mode 100644
index 000000000000..9f1985e857e2
--- /dev/null
+++ b/fs/exofs/super.c
@@ -0,0 +1,584 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com>
8 *
9 * Copyrights for code taken from ext2:
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 * from
15 * linux/fs/minix/inode.c
16 * Copyright (C) 1991, 1992 Linus Torvalds
17 *
18 * This file is part of exofs.
19 *
20 * exofs is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation. Since it is based on ext2, and the only
23 * valid version of GPL for the Linux kernel is version 2, the only valid
24 * version of GPL for exofs is version 2.
25 *
26 * exofs is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with exofs; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */
35
36#include <linux/string.h>
37#include <linux/parser.h>
38#include <linux/vfs.h>
39#include <linux/random.h>
40#include <linux/exportfs.h>
41
42#include "exofs.h"
43
44/******************************************************************************
45 * MOUNT OPTIONS
46 *****************************************************************************/
47
48/*
49 * struct to hold what we get from mount options
50 */
51struct exofs_mountopt {
52 const char *dev_name;
53 uint64_t pid;
54 int timeout;
55};
56
57/*
58 * exofs-specific mount-time options.
59 */
60enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
61
62/*
63 * Our mount-time options. These should ideally be 64-bit unsigned, but the
64 * kernel's parsing functions do not currently support that. 32-bit should be
65 * sufficient for most applications now.
66 */
67static match_table_t tokens = {
68 {Opt_pid, "pid=%u"},
69 {Opt_to, "to=%u"},
70 {Opt_err, NULL}
71};
72
73/*
74 * The main option parsing method. Also makes sure that all of the mandatory
75 * mount options were set.
76 */
77static int parse_options(char *options, struct exofs_mountopt *opts)
78{
79 char *p;
80 substring_t args[MAX_OPT_ARGS];
81 int option;
82 bool s_pid = false;
83
84 EXOFS_DBGMSG("parse_options %s\n", options);
85 /* defaults */
86 memset(opts, 0, sizeof(*opts));
87 opts->timeout = BLK_DEFAULT_SG_TIMEOUT;
88
89 while ((p = strsep(&options, ",")) != NULL) {
90 int token;
91 char str[32];
92
93 if (!*p)
94 continue;
95
96 token = match_token(p, tokens, args);
97 switch (token) {
98 case Opt_pid:
99 if (0 == match_strlcpy(str, &args[0], sizeof(str)))
100 return -EINVAL;
101 opts->pid = simple_strtoull(str, NULL, 0);
102 if (opts->pid < EXOFS_MIN_PID) {
103 EXOFS_ERR("Partition ID must be >= %u",
104 EXOFS_MIN_PID);
105 return -EINVAL;
106 }
107 s_pid = 1;
108 break;
109 case Opt_to:
110 if (match_int(&args[0], &option))
111 return -EINVAL;
112 if (option <= 0) {
113 EXOFS_ERR("Timout must be > 0");
114 return -EINVAL;
115 }
116 opts->timeout = option * HZ;
117 break;
118 }
119 }
120
121 if (!s_pid) {
122 EXOFS_ERR("Need to specify the following options:\n");
123 EXOFS_ERR(" -o pid=pid_no_to_use\n");
124 return -EINVAL;
125 }
126
127 return 0;
128}
129
130/******************************************************************************
131 * INODE CACHE
132 *****************************************************************************/
133
134/*
135 * Our inode cache. Isn't it pretty?
136 */
137static struct kmem_cache *exofs_inode_cachep;
138
139/*
140 * Allocate an inode in the cache
141 */
142static struct inode *exofs_alloc_inode(struct super_block *sb)
143{
144 struct exofs_i_info *oi;
145
146 oi = kmem_cache_alloc(exofs_inode_cachep, GFP_KERNEL);
147 if (!oi)
148 return NULL;
149
150 oi->vfs_inode.i_version = 1;
151 return &oi->vfs_inode;
152}
153
154/*
155 * Remove an inode from the cache
156 */
157static void exofs_destroy_inode(struct inode *inode)
158{
159 kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
160}
161
162/*
163 * Initialize the inode
164 */
165static void exofs_init_once(void *foo)
166{
167 struct exofs_i_info *oi = foo;
168
169 inode_init_once(&oi->vfs_inode);
170}
171
172/*
173 * Create and initialize the inode cache
174 */
175static int init_inodecache(void)
176{
177 exofs_inode_cachep = kmem_cache_create("exofs_inode_cache",
178 sizeof(struct exofs_i_info), 0,
179 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
180 exofs_init_once);
181 if (exofs_inode_cachep == NULL)
182 return -ENOMEM;
183 return 0;
184}
185
186/*
187 * Destroy the inode cache
188 */
189static void destroy_inodecache(void)
190{
191 kmem_cache_destroy(exofs_inode_cachep);
192}
193
194/******************************************************************************
195 * SUPERBLOCK FUNCTIONS
196 *****************************************************************************/
197static const struct super_operations exofs_sops;
198static const struct export_operations exofs_export_ops;
199
200/*
201 * Write the superblock to the OSD
202 */
203static void exofs_write_super(struct super_block *sb)
204{
205 struct exofs_sb_info *sbi;
206 struct exofs_fscb *fscb;
207 struct osd_request *or;
208 struct osd_obj_id obj;
209 int ret;
210
211 fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
212 if (!fscb) {
213 EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
214 return;
215 }
216
217 lock_kernel();
218 sbi = sb->s_fs_info;
219 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
220 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
221 fscb->s_magic = cpu_to_le16(sb->s_magic);
222 fscb->s_newfs = 0;
223
224 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
225 if (unlikely(!or)) {
226 EXOFS_ERR("exofs_write_super: osd_start_request failed.\n");
227 goto out;
228 }
229
230 obj.partition = sbi->s_pid;
231 obj.id = EXOFS_SUPER_ID;
232 ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb));
233 if (unlikely(ret)) {
234 EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n");
235 goto out;
236 }
237
238 ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
239 if (unlikely(ret)) {
240 EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n");
241 goto out;
242 }
243 sb->s_dirt = 0;
244
245out:
246 if (or)
247 osd_end_request(or);
248 unlock_kernel();
249 kfree(fscb);
250}
251
252/*
253 * This function is called when the vfs is freeing the superblock. We just
254 * need to free our own part.
255 */
256static void exofs_put_super(struct super_block *sb)
257{
258 int num_pend;
259 struct exofs_sb_info *sbi = sb->s_fs_info;
260
261 /* make sure there are no pending commands */
262 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
263 num_pend = atomic_read(&sbi->s_curr_pending)) {
264 wait_queue_head_t wq;
265 init_waitqueue_head(&wq);
266 wait_event_timeout(wq,
267 (atomic_read(&sbi->s_curr_pending) == 0),
268 msecs_to_jiffies(100));
269 }
270
271 osduld_put_device(sbi->s_dev);
272 kfree(sb->s_fs_info);
273 sb->s_fs_info = NULL;
274}
275
276/*
277 * Read the superblock from the OSD and fill in the fields
278 */
279static int exofs_fill_super(struct super_block *sb, void *data, int silent)
280{
281 struct inode *root;
282 struct exofs_mountopt *opts = data;
283 struct exofs_sb_info *sbi; /*extended info */
284 struct exofs_fscb fscb; /*on-disk superblock info */
285 struct osd_request *or = NULL;
286 struct osd_obj_id obj;
287 int ret;
288
289 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
290 if (!sbi)
291 return -ENOMEM;
292 sb->s_fs_info = sbi;
293
294 /* use mount options to fill superblock */
295 sbi->s_dev = osduld_path_lookup(opts->dev_name);
296 if (IS_ERR(sbi->s_dev)) {
297 ret = PTR_ERR(sbi->s_dev);
298 sbi->s_dev = NULL;
299 goto free_sbi;
300 }
301
302 sbi->s_pid = opts->pid;
303 sbi->s_timeout = opts->timeout;
304
305 /* fill in some other data by hand */
306 memset(sb->s_id, 0, sizeof(sb->s_id));
307 strcpy(sb->s_id, "exofs");
308 sb->s_blocksize = EXOFS_BLKSIZE;
309 sb->s_blocksize_bits = EXOFS_BLKSHIFT;
310 sb->s_maxbytes = MAX_LFS_FILESIZE;
311 atomic_set(&sbi->s_curr_pending, 0);
312 sb->s_bdev = NULL;
313 sb->s_dev = 0;
314
315 /* read data from on-disk superblock object */
316 obj.partition = sbi->s_pid;
317 obj.id = EXOFS_SUPER_ID;
318 exofs_make_credential(sbi->s_cred, &obj);
319
320 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
321 if (unlikely(!or)) {
322 if (!silent)
323 EXOFS_ERR(
324 "exofs_fill_super: osd_start_request failed.\n");
325 ret = -ENOMEM;
326 goto free_sbi;
327 }
328 ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb));
329 if (unlikely(ret)) {
330 if (!silent)
331 EXOFS_ERR(
332 "exofs_fill_super: osd_req_read_kern failed.\n");
333 ret = -ENOMEM;
334 goto free_sbi;
335 }
336
337 ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
338 if (unlikely(ret)) {
339 if (!silent)
340 EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n");
341 ret = -EIO;
342 goto free_sbi;
343 }
344
345 sb->s_magic = le16_to_cpu(fscb.s_magic);
346 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
347 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
348
349 /* make sure what we read from the object store is correct */
350 if (sb->s_magic != EXOFS_SUPER_MAGIC) {
351 if (!silent)
352 EXOFS_ERR("ERROR: Bad magic value\n");
353 ret = -EINVAL;
354 goto free_sbi;
355 }
356
357 /* start generation numbers from a random point */
358 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
359 spin_lock_init(&sbi->s_next_gen_lock);
360
361 /* set up operation vectors */
362 sb->s_op = &exofs_sops;
363 sb->s_export_op = &exofs_export_ops;
364 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
365 if (IS_ERR(root)) {
366 EXOFS_ERR("ERROR: exofs_iget failed\n");
367 ret = PTR_ERR(root);
368 goto free_sbi;
369 }
370 sb->s_root = d_alloc_root(root);
371 if (!sb->s_root) {
372 iput(root);
373 EXOFS_ERR("ERROR: get root inode failed\n");
374 ret = -ENOMEM;
375 goto free_sbi;
376 }
377
378 if (!S_ISDIR(root->i_mode)) {
379 dput(sb->s_root);
380 sb->s_root = NULL;
381 EXOFS_ERR("ERROR: corrupt root inode (mode = %hd)\n",
382 root->i_mode);
383 ret = -EINVAL;
384 goto free_sbi;
385 }
386
387 ret = 0;
388out:
389 if (or)
390 osd_end_request(or);
391 return ret;
392
393free_sbi:
394 osduld_put_device(sbi->s_dev); /* NULL safe */
395 kfree(sbi);
396 goto out;
397}
398
399/*
400 * Set up the superblock (calls exofs_fill_super eventually)
401 */
402static int exofs_get_sb(struct file_system_type *type,
403 int flags, const char *dev_name,
404 void *data, struct vfsmount *mnt)
405{
406 struct exofs_mountopt opts;
407 int ret;
408
409 ret = parse_options(data, &opts);
410 if (ret)
411 return ret;
412
413 opts.dev_name = dev_name;
414 return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt);
415}
416
417/*
418 * Return information about the file system state in the buffer. This is used
419 * by the 'df' command, for example.
420 */
421static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
422{
423 struct super_block *sb = dentry->d_sb;
424 struct exofs_sb_info *sbi = sb->s_fs_info;
425 struct osd_obj_id obj = {sbi->s_pid, 0};
426 struct osd_attr attrs[] = {
427 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
428 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
429 ATTR_DEF(OSD_APAGE_PARTITION_INFORMATION,
430 OSD_ATTR_PI_USED_CAPACITY, sizeof(__be64)),
431 };
432 uint64_t capacity = ULLONG_MAX;
433 uint64_t used = ULLONG_MAX;
434 struct osd_request *or;
435 uint8_t cred_a[OSD_CAP_LEN];
436 int ret;
437
438 /* get used/capacity attributes */
439 exofs_make_credential(cred_a, &obj);
440
441 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
442 if (unlikely(!or)) {
443 EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n");
444 return -ENOMEM;
445 }
446
447 osd_req_get_attributes(or, &obj);
448 osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs));
449 ret = exofs_sync_op(or, sbi->s_timeout, cred_a);
450 if (unlikely(ret))
451 goto out;
452
453 ret = extract_attr_from_req(or, &attrs[0]);
454 if (likely(!ret))
455 capacity = get_unaligned_be64(attrs[0].val_ptr);
456 else
457 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
458
459 ret = extract_attr_from_req(or, &attrs[1]);
460 if (likely(!ret))
461 used = get_unaligned_be64(attrs[1].val_ptr);
462 else
463 EXOFS_DBGMSG("exofs_statfs: get used-space failed.\n");
464
465 /* fill in the stats buffer */
466 buf->f_type = EXOFS_SUPER_MAGIC;
467 buf->f_bsize = EXOFS_BLKSIZE;
468 buf->f_blocks = (capacity >> EXOFS_BLKSHIFT);
469 buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT);
470 buf->f_bavail = buf->f_bfree;
471 buf->f_files = sbi->s_numfiles;
472 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles;
473 buf->f_namelen = EXOFS_NAME_LEN;
474
475out:
476 osd_end_request(or);
477 return ret;
478}
479
480static const struct super_operations exofs_sops = {
481 .alloc_inode = exofs_alloc_inode,
482 .destroy_inode = exofs_destroy_inode,
483 .write_inode = exofs_write_inode,
484 .delete_inode = exofs_delete_inode,
485 .put_super = exofs_put_super,
486 .write_super = exofs_write_super,
487 .statfs = exofs_statfs,
488};
489
490/******************************************************************************
491 * EXPORT OPERATIONS
492 *****************************************************************************/
493
494struct dentry *exofs_get_parent(struct dentry *child)
495{
496 unsigned long ino = exofs_parent_ino(child);
497
498 if (!ino)
499 return NULL;
500
501 return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
502}
503
504static struct inode *exofs_nfs_get_inode(struct super_block *sb,
505 u64 ino, u32 generation)
506{
507 struct inode *inode;
508
509 inode = exofs_iget(sb, ino);
510 if (IS_ERR(inode))
511 return ERR_CAST(inode);
512 if (generation && inode->i_generation != generation) {
513 /* we didn't find the right inode.. */
514 iput(inode);
515 return ERR_PTR(-ESTALE);
516 }
517 return inode;
518}
519
520static struct dentry *exofs_fh_to_dentry(struct super_block *sb,
521 struct fid *fid, int fh_len, int fh_type)
522{
523 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
524 exofs_nfs_get_inode);
525}
526
527static struct dentry *exofs_fh_to_parent(struct super_block *sb,
528 struct fid *fid, int fh_len, int fh_type)
529{
530 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
531 exofs_nfs_get_inode);
532}
533
534static const struct export_operations exofs_export_ops = {
535 .fh_to_dentry = exofs_fh_to_dentry,
536 .fh_to_parent = exofs_fh_to_parent,
537 .get_parent = exofs_get_parent,
538};
539
540/******************************************************************************
541 * INSMOD/RMMOD
542 *****************************************************************************/
543
544/*
545 * struct that describes this file system
546 */
547static struct file_system_type exofs_type = {
548 .owner = THIS_MODULE,
549 .name = "exofs",
550 .get_sb = exofs_get_sb,
551 .kill_sb = generic_shutdown_super,
552};
553
554static int __init init_exofs(void)
555{
556 int err;
557
558 err = init_inodecache();
559 if (err)
560 goto out;
561
562 err = register_filesystem(&exofs_type);
563 if (err)
564 goto out_d;
565
566 return 0;
567out_d:
568 destroy_inodecache();
569out:
570 return err;
571}
572
573static void __exit exit_exofs(void)
574{
575 unregister_filesystem(&exofs_type);
576 destroy_inodecache();
577}
578
579MODULE_AUTHOR("Avishay Traeger <avishay@gmail.com>");
580MODULE_DESCRIPTION("exofs");
581MODULE_LICENSE("GPL");
582
583module_init(init_exofs)
584module_exit(exit_exofs)
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
new file mode 100644
index 000000000000..36e2d7bc7f7b
--- /dev/null
+++ b/fs/exofs/symlink.c
@@ -0,0 +1,57 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com>
8 *
9 * Copyrights for code taken from ext2:
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 * from
15 * linux/fs/minix/inode.c
16 * Copyright (C) 1991, 1992 Linus Torvalds
17 *
18 * This file is part of exofs.
19 *
20 * exofs is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation. Since it is based on ext2, and the only
23 * valid version of GPL for the Linux kernel is version 2, the only valid
24 * version of GPL for exofs is version 2.
25 *
26 * exofs is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with exofs; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */
35
36#include <linux/namei.h>
37
38#include "exofs.h"
39
40static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd)
41{
42 struct exofs_i_info *oi = exofs_i(dentry->d_inode);
43
44 nd_set_link(nd, (char *)oi->i_data);
45 return NULL;
46}
47
48const struct inode_operations exofs_symlink_inode_operations = {
49 .readlink = generic_readlink,
50 .follow_link = page_follow_link_light,
51 .put_link = page_put_link,
52};
53
54const struct inode_operations exofs_fast_symlink_inode_operations = {
55 .readlink = generic_readlink,
56 .follow_link = exofs_follow_link,
57};
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 2bb788a2acb1..e48e9a3af763 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -87,12 +87,12 @@ static int read_block_bitmap(struct super_block *sb,
87{ 87{
88 struct buffer_head *bh = NULL; 88 struct buffer_head *bh = NULL;
89 int retval = 0; 89 int retval = 0;
90 kernel_lb_addr loc; 90 struct kernel_lb_addr loc;
91 91
92 loc.logicalBlockNum = bitmap->s_extPosition; 92 loc.logicalBlockNum = bitmap->s_extPosition;
93 loc.partitionReferenceNum = UDF_SB(sb)->s_partition; 93 loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
94 94
95 bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block)); 95 bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block));
96 if (!bh) 96 if (!bh)
97 retval = -EIO; 97 retval = -EIO;
98 98
@@ -140,27 +140,29 @@ static inline int load_block_bitmap(struct super_block *sb,
140 return slot; 140 return slot;
141} 141}
142 142
143static bool udf_add_free_space(struct udf_sb_info *sbi, 143static void udf_add_free_space(struct super_block *sb, u16 partition, u32 cnt)
144 u16 partition, u32 cnt)
145{ 144{
145 struct udf_sb_info *sbi = UDF_SB(sb);
146 struct logicalVolIntegrityDesc *lvid; 146 struct logicalVolIntegrityDesc *lvid;
147 147
148 if (sbi->s_lvid_bh == NULL) 148 if (!sbi->s_lvid_bh)
149 return false; 149 return;
150 150
151 lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; 151 lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
152 le32_add_cpu(&lvid->freeSpaceTable[partition], cnt); 152 le32_add_cpu(&lvid->freeSpaceTable[partition], cnt);
153 return true; 153 udf_updated_lvid(sb);
154} 154}
155 155
156static void udf_bitmap_free_blocks(struct super_block *sb, 156static void udf_bitmap_free_blocks(struct super_block *sb,
157 struct inode *inode, 157 struct inode *inode,
158 struct udf_bitmap *bitmap, 158 struct udf_bitmap *bitmap,
159 kernel_lb_addr bloc, uint32_t offset, 159 struct kernel_lb_addr *bloc,
160 uint32_t offset,
160 uint32_t count) 161 uint32_t count)
161{ 162{
162 struct udf_sb_info *sbi = UDF_SB(sb); 163 struct udf_sb_info *sbi = UDF_SB(sb);
163 struct buffer_head *bh = NULL; 164 struct buffer_head *bh = NULL;
165 struct udf_part_map *partmap;
164 unsigned long block; 166 unsigned long block;
165 unsigned long block_group; 167 unsigned long block_group;
166 unsigned long bit; 168 unsigned long bit;
@@ -169,17 +171,17 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
169 unsigned long overflow; 171 unsigned long overflow;
170 172
171 mutex_lock(&sbi->s_alloc_mutex); 173 mutex_lock(&sbi->s_alloc_mutex);
172 if (bloc.logicalBlockNum < 0 || 174 partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
173 (bloc.logicalBlockNum + count) > 175 if (bloc->logicalBlockNum < 0 ||
174 sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { 176 (bloc->logicalBlockNum + count) >
177 partmap->s_partition_len) {
175 udf_debug("%d < %d || %d + %d > %d\n", 178 udf_debug("%d < %d || %d + %d > %d\n",
176 bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, 179 bloc->logicalBlockNum, 0, bloc->logicalBlockNum,
177 sbi->s_partmaps[bloc.partitionReferenceNum]. 180 count, partmap->s_partition_len);
178 s_partition_len);
179 goto error_return; 181 goto error_return;
180 } 182 }
181 183
182 block = bloc.logicalBlockNum + offset + 184 block = bloc->logicalBlockNum + offset +
183 (sizeof(struct spaceBitmapDesc) << 3); 185 (sizeof(struct spaceBitmapDesc) << 3);
184 186
185 do { 187 do {
@@ -207,7 +209,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
207 } else { 209 } else {
208 if (inode) 210 if (inode)
209 vfs_dq_free_block(inode, 1); 211 vfs_dq_free_block(inode, 1);
210 udf_add_free_space(sbi, sbi->s_partition, 1); 212 udf_add_free_space(sb, sbi->s_partition, 1);
211 } 213 }
212 } 214 }
213 mark_buffer_dirty(bh); 215 mark_buffer_dirty(bh);
@@ -218,9 +220,6 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
218 } while (overflow); 220 } while (overflow);
219 221
220error_return: 222error_return:
221 sb->s_dirt = 1;
222 if (sbi->s_lvid_bh)
223 mark_buffer_dirty(sbi->s_lvid_bh);
224 mutex_unlock(&sbi->s_alloc_mutex); 223 mutex_unlock(&sbi->s_alloc_mutex);
225} 224}
226 225
@@ -277,9 +276,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
277 } while (block_count > 0); 276 } while (block_count > 0);
278 277
279out: 278out:
280 if (udf_add_free_space(sbi, partition, -alloc_count)) 279 udf_add_free_space(sb, partition, -alloc_count);
281 mark_buffer_dirty(sbi->s_lvid_bh);
282 sb->s_dirt = 1;
283 mutex_unlock(&sbi->s_alloc_mutex); 280 mutex_unlock(&sbi->s_alloc_mutex);
284 return alloc_count; 281 return alloc_count;
285} 282}
@@ -409,9 +406,7 @@ got_block:
409 406
410 mark_buffer_dirty(bh); 407 mark_buffer_dirty(bh);
411 408
412 if (udf_add_free_space(sbi, partition, -1)) 409 udf_add_free_space(sb, partition, -1);
413 mark_buffer_dirty(sbi->s_lvid_bh);
414 sb->s_dirt = 1;
415 mutex_unlock(&sbi->s_alloc_mutex); 410 mutex_unlock(&sbi->s_alloc_mutex);
416 *err = 0; 411 *err = 0;
417 return newblock; 412 return newblock;
@@ -425,26 +420,28 @@ error_return:
425static void udf_table_free_blocks(struct super_block *sb, 420static void udf_table_free_blocks(struct super_block *sb,
426 struct inode *inode, 421 struct inode *inode,
427 struct inode *table, 422 struct inode *table,
428 kernel_lb_addr bloc, uint32_t offset, 423 struct kernel_lb_addr *bloc,
424 uint32_t offset,
429 uint32_t count) 425 uint32_t count)
430{ 426{
431 struct udf_sb_info *sbi = UDF_SB(sb); 427 struct udf_sb_info *sbi = UDF_SB(sb);
428 struct udf_part_map *partmap;
432 uint32_t start, end; 429 uint32_t start, end;
433 uint32_t elen; 430 uint32_t elen;
434 kernel_lb_addr eloc; 431 struct kernel_lb_addr eloc;
435 struct extent_position oepos, epos; 432 struct extent_position oepos, epos;
436 int8_t etype; 433 int8_t etype;
437 int i; 434 int i;
438 struct udf_inode_info *iinfo; 435 struct udf_inode_info *iinfo;
439 436
440 mutex_lock(&sbi->s_alloc_mutex); 437 mutex_lock(&sbi->s_alloc_mutex);
441 if (bloc.logicalBlockNum < 0 || 438 partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
442 (bloc.logicalBlockNum + count) > 439 if (bloc->logicalBlockNum < 0 ||
443 sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) { 440 (bloc->logicalBlockNum + count) >
441 partmap->s_partition_len) {
444 udf_debug("%d < %d || %d + %d > %d\n", 442 udf_debug("%d < %d || %d + %d > %d\n",
445 bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, 443 bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count,
446 sbi->s_partmaps[bloc.partitionReferenceNum]. 444 partmap->s_partition_len);
447 s_partition_len);
448 goto error_return; 445 goto error_return;
449 } 446 }
450 447
@@ -453,11 +450,10 @@ static void udf_table_free_blocks(struct super_block *sb,
453 could occure, but.. oh well */ 450 could occure, but.. oh well */
454 if (inode) 451 if (inode)
455 vfs_dq_free_block(inode, count); 452 vfs_dq_free_block(inode, count);
456 if (udf_add_free_space(sbi, sbi->s_partition, count)) 453 udf_add_free_space(sb, sbi->s_partition, count);
457 mark_buffer_dirty(sbi->s_lvid_bh);
458 454
459 start = bloc.logicalBlockNum + offset; 455 start = bloc->logicalBlockNum + offset;
460 end = bloc.logicalBlockNum + offset + count - 1; 456 end = bloc->logicalBlockNum + offset + count - 1;
461 457
462 epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry); 458 epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry);
463 elen = 0; 459 elen = 0;
@@ -483,7 +479,7 @@ static void udf_table_free_blocks(struct super_block *sb,
483 start += count; 479 start += count;
484 count = 0; 480 count = 0;
485 } 481 }
486 udf_write_aext(table, &oepos, eloc, elen, 1); 482 udf_write_aext(table, &oepos, &eloc, elen, 1);
487 } else if (eloc.logicalBlockNum == (end + 1)) { 483 } else if (eloc.logicalBlockNum == (end + 1)) {
488 if ((0x3FFFFFFF - elen) < 484 if ((0x3FFFFFFF - elen) <
489 (count << sb->s_blocksize_bits)) { 485 (count << sb->s_blocksize_bits)) {
@@ -502,7 +498,7 @@ static void udf_table_free_blocks(struct super_block *sb,
502 end -= count; 498 end -= count;
503 count = 0; 499 count = 0;
504 } 500 }
505 udf_write_aext(table, &oepos, eloc, elen, 1); 501 udf_write_aext(table, &oepos, &eloc, elen, 1);
506 } 502 }
507 503
508 if (epos.bh != oepos.bh) { 504 if (epos.bh != oepos.bh) {
@@ -532,8 +528,8 @@ static void udf_table_free_blocks(struct super_block *sb,
532 */ 528 */
533 529
534 int adsize; 530 int adsize;
535 short_ad *sad = NULL; 531 struct short_ad *sad = NULL;
536 long_ad *lad = NULL; 532 struct long_ad *lad = NULL;
537 struct allocExtDesc *aed; 533 struct allocExtDesc *aed;
538 534
539 eloc.logicalBlockNum = start; 535 eloc.logicalBlockNum = start;
@@ -541,9 +537,9 @@ static void udf_table_free_blocks(struct super_block *sb,
541 (count << sb->s_blocksize_bits); 537 (count << sb->s_blocksize_bits);
542 538
543 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 539 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
544 adsize = sizeof(short_ad); 540 adsize = sizeof(struct short_ad);
545 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 541 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
546 adsize = sizeof(long_ad); 542 adsize = sizeof(struct long_ad);
547 else { 543 else {
548 brelse(oepos.bh); 544 brelse(oepos.bh);
549 brelse(epos.bh); 545 brelse(epos.bh);
@@ -563,7 +559,7 @@ static void udf_table_free_blocks(struct super_block *sb,
563 elen -= sb->s_blocksize; 559 elen -= sb->s_blocksize;
564 560
565 epos.bh = udf_tread(sb, 561 epos.bh = udf_tread(sb,
566 udf_get_lb_pblock(sb, epos.block, 0)); 562 udf_get_lb_pblock(sb, &epos.block, 0));
567 if (!epos.bh) { 563 if (!epos.bh) {
568 brelse(oepos.bh); 564 brelse(oepos.bh);
569 goto error_return; 565 goto error_return;
@@ -601,15 +597,15 @@ static void udf_table_free_blocks(struct super_block *sb,
601 if (sbi->s_udfrev >= 0x0200) 597 if (sbi->s_udfrev >= 0x0200)
602 udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 598 udf_new_tag(epos.bh->b_data, TAG_IDENT_AED,
603 3, 1, epos.block.logicalBlockNum, 599 3, 1, epos.block.logicalBlockNum,
604 sizeof(tag)); 600 sizeof(struct tag));
605 else 601 else
606 udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 602 udf_new_tag(epos.bh->b_data, TAG_IDENT_AED,
607 2, 1, epos.block.logicalBlockNum, 603 2, 1, epos.block.logicalBlockNum,
608 sizeof(tag)); 604 sizeof(struct tag));
609 605
610 switch (iinfo->i_alloc_type) { 606 switch (iinfo->i_alloc_type) {
611 case ICBTAG_FLAG_AD_SHORT: 607 case ICBTAG_FLAG_AD_SHORT:
612 sad = (short_ad *)sptr; 608 sad = (struct short_ad *)sptr;
613 sad->extLength = cpu_to_le32( 609 sad->extLength = cpu_to_le32(
614 EXT_NEXT_EXTENT_ALLOCDECS | 610 EXT_NEXT_EXTENT_ALLOCDECS |
615 sb->s_blocksize); 611 sb->s_blocksize);
@@ -617,7 +613,7 @@ static void udf_table_free_blocks(struct super_block *sb,
617 cpu_to_le32(epos.block.logicalBlockNum); 613 cpu_to_le32(epos.block.logicalBlockNum);
618 break; 614 break;
619 case ICBTAG_FLAG_AD_LONG: 615 case ICBTAG_FLAG_AD_LONG:
620 lad = (long_ad *)sptr; 616 lad = (struct long_ad *)sptr;
621 lad->extLength = cpu_to_le32( 617 lad->extLength = cpu_to_le32(
622 EXT_NEXT_EXTENT_ALLOCDECS | 618 EXT_NEXT_EXTENT_ALLOCDECS |
623 sb->s_blocksize); 619 sb->s_blocksize);
@@ -635,7 +631,7 @@ static void udf_table_free_blocks(struct super_block *sb,
635 631
636 /* It's possible that stealing the block emptied the extent */ 632 /* It's possible that stealing the block emptied the extent */
637 if (elen) { 633 if (elen) {
638 udf_write_aext(table, &epos, eloc, elen, 1); 634 udf_write_aext(table, &epos, &eloc, elen, 1);
639 635
640 if (!epos.bh) { 636 if (!epos.bh) {
641 iinfo->i_lenAlloc += adsize; 637 iinfo->i_lenAlloc += adsize;
@@ -653,7 +649,6 @@ static void udf_table_free_blocks(struct super_block *sb,
653 brelse(oepos.bh); 649 brelse(oepos.bh);
654 650
655error_return: 651error_return:
656 sb->s_dirt = 1;
657 mutex_unlock(&sbi->s_alloc_mutex); 652 mutex_unlock(&sbi->s_alloc_mutex);
658 return; 653 return;
659} 654}
@@ -666,7 +661,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
666 struct udf_sb_info *sbi = UDF_SB(sb); 661 struct udf_sb_info *sbi = UDF_SB(sb);
667 int alloc_count = 0; 662 int alloc_count = 0;
668 uint32_t elen, adsize; 663 uint32_t elen, adsize;
669 kernel_lb_addr eloc; 664 struct kernel_lb_addr eloc;
670 struct extent_position epos; 665 struct extent_position epos;
671 int8_t etype = -1; 666 int8_t etype = -1;
672 struct udf_inode_info *iinfo; 667 struct udf_inode_info *iinfo;
@@ -677,9 +672,9 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
677 672
678 iinfo = UDF_I(table); 673 iinfo = UDF_I(table);
679 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 674 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
680 adsize = sizeof(short_ad); 675 adsize = sizeof(struct short_ad);
681 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 676 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
682 adsize = sizeof(long_ad); 677 adsize = sizeof(struct long_ad);
683 else 678 else
684 return 0; 679 return 0;
685 680
@@ -707,7 +702,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
707 alloc_count = block_count; 702 alloc_count = block_count;
708 eloc.logicalBlockNum += alloc_count; 703 eloc.logicalBlockNum += alloc_count;
709 elen -= (alloc_count << sb->s_blocksize_bits); 704 elen -= (alloc_count << sb->s_blocksize_bits);
710 udf_write_aext(table, &epos, eloc, 705 udf_write_aext(table, &epos, &eloc,
711 (etype << 30) | elen, 1); 706 (etype << 30) | elen, 1);
712 } else 707 } else
713 udf_delete_aext(table, epos, eloc, 708 udf_delete_aext(table, epos, eloc,
@@ -718,10 +713,8 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
718 713
719 brelse(epos.bh); 714 brelse(epos.bh);
720 715
721 if (alloc_count && udf_add_free_space(sbi, partition, -alloc_count)) { 716 if (alloc_count)
722 mark_buffer_dirty(sbi->s_lvid_bh); 717 udf_add_free_space(sb, partition, -alloc_count);
723 sb->s_dirt = 1;
724 }
725 mutex_unlock(&sbi->s_alloc_mutex); 718 mutex_unlock(&sbi->s_alloc_mutex);
726 return alloc_count; 719 return alloc_count;
727} 720}
@@ -735,7 +728,7 @@ static int udf_table_new_block(struct super_block *sb,
735 uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF; 728 uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF;
736 uint32_t newblock = 0, adsize; 729 uint32_t newblock = 0, adsize;
737 uint32_t elen, goal_elen = 0; 730 uint32_t elen, goal_elen = 0;
738 kernel_lb_addr eloc, uninitialized_var(goal_eloc); 731 struct kernel_lb_addr eloc, uninitialized_var(goal_eloc);
739 struct extent_position epos, goal_epos; 732 struct extent_position epos, goal_epos;
740 int8_t etype; 733 int8_t etype;
741 struct udf_inode_info *iinfo = UDF_I(table); 734 struct udf_inode_info *iinfo = UDF_I(table);
@@ -743,9 +736,9 @@ static int udf_table_new_block(struct super_block *sb,
743 *err = -ENOSPC; 736 *err = -ENOSPC;
744 737
745 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 738 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
746 adsize = sizeof(short_ad); 739 adsize = sizeof(struct short_ad);
747 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 740 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
748 adsize = sizeof(long_ad); 741 adsize = sizeof(struct long_ad);
749 else 742 else
750 return newblock; 743 return newblock;
751 744
@@ -814,46 +807,37 @@ static int udf_table_new_block(struct super_block *sb,
814 } 807 }
815 808
816 if (goal_elen) 809 if (goal_elen)
817 udf_write_aext(table, &goal_epos, goal_eloc, goal_elen, 1); 810 udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1);
818 else 811 else
819 udf_delete_aext(table, goal_epos, goal_eloc, goal_elen); 812 udf_delete_aext(table, goal_epos, goal_eloc, goal_elen);
820 brelse(goal_epos.bh); 813 brelse(goal_epos.bh);
821 814
822 if (udf_add_free_space(sbi, partition, -1)) 815 udf_add_free_space(sb, partition, -1);
823 mark_buffer_dirty(sbi->s_lvid_bh);
824 816
825 sb->s_dirt = 1;
826 mutex_unlock(&sbi->s_alloc_mutex); 817 mutex_unlock(&sbi->s_alloc_mutex);
827 *err = 0; 818 *err = 0;
828 return newblock; 819 return newblock;
829} 820}
830 821
831inline void udf_free_blocks(struct super_block *sb, 822void udf_free_blocks(struct super_block *sb, struct inode *inode,
832 struct inode *inode, 823 struct kernel_lb_addr *bloc, uint32_t offset,
833 kernel_lb_addr bloc, uint32_t offset, 824 uint32_t count)
834 uint32_t count)
835{ 825{
836 uint16_t partition = bloc.partitionReferenceNum; 826 uint16_t partition = bloc->partitionReferenceNum;
837 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; 827 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
838 828
839 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { 829 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
840 return udf_bitmap_free_blocks(sb, inode, 830 udf_bitmap_free_blocks(sb, inode, map->s_uspace.s_bitmap,
841 map->s_uspace.s_bitmap, 831 bloc, offset, count);
842 bloc, offset, count);
843 } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) { 832 } else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
844 return udf_table_free_blocks(sb, inode, 833 udf_table_free_blocks(sb, inode, map->s_uspace.s_table,
845 map->s_uspace.s_table, 834 bloc, offset, count);
846 bloc, offset, count);
847 } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) { 835 } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
848 return udf_bitmap_free_blocks(sb, inode, 836 udf_bitmap_free_blocks(sb, inode, map->s_fspace.s_bitmap,
849 map->s_fspace.s_bitmap, 837 bloc, offset, count);
850 bloc, offset, count);
851 } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) { 838 } else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
852 return udf_table_free_blocks(sb, inode, 839 udf_table_free_blocks(sb, inode, map->s_fspace.s_table,
853 map->s_fspace.s_table, 840 bloc, offset, count);
854 bloc, offset, count);
855 } else {
856 return;
857 } 841 }
858} 842}
859 843
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 62dc270c69d1..2efd4d5291b6 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -51,7 +51,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
51 uint8_t lfi; 51 uint8_t lfi;
52 loff_t size = udf_ext0_offset(dir) + dir->i_size; 52 loff_t size = udf_ext0_offset(dir) + dir->i_size;
53 struct buffer_head *tmp, *bha[16]; 53 struct buffer_head *tmp, *bha[16];
54 kernel_lb_addr eloc; 54 struct kernel_lb_addr eloc;
55 uint32_t elen; 55 uint32_t elen;
56 sector_t offset; 56 sector_t offset;
57 int i, num, ret = 0; 57 int i, num, ret = 0;
@@ -80,13 +80,13 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
80 ret = -ENOENT; 80 ret = -ENOENT;
81 goto out; 81 goto out;
82 } 82 }
83 block = udf_get_lb_pblock(dir->i_sb, eloc, offset); 83 block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
84 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { 84 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
85 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 85 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
86 epos.offset -= sizeof(short_ad); 86 epos.offset -= sizeof(struct short_ad);
87 else if (iinfo->i_alloc_type == 87 else if (iinfo->i_alloc_type ==
88 ICBTAG_FLAG_AD_LONG) 88 ICBTAG_FLAG_AD_LONG)
89 epos.offset -= sizeof(long_ad); 89 epos.offset -= sizeof(struct long_ad);
90 } else { 90 } else {
91 offset = 0; 91 offset = 0;
92 } 92 }
@@ -101,7 +101,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
101 if (i + offset > (elen >> dir->i_sb->s_blocksize_bits)) 101 if (i + offset > (elen >> dir->i_sb->s_blocksize_bits))
102 i = (elen >> dir->i_sb->s_blocksize_bits) - offset; 102 i = (elen >> dir->i_sb->s_blocksize_bits) - offset;
103 for (num = 0; i > 0; i--) { 103 for (num = 0; i > 0; i--) {
104 block = udf_get_lb_pblock(dir->i_sb, eloc, offset + i); 104 block = udf_get_lb_pblock(dir->i_sb, &eloc, offset + i);
105 tmp = udf_tgetblk(dir->i_sb, block); 105 tmp = udf_tgetblk(dir->i_sb, block);
106 if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp)) 106 if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp))
107 bha[num++] = tmp; 107 bha[num++] = tmp;
@@ -161,9 +161,9 @@ static int do_udf_readdir(struct inode *dir, struct file *filp,
161 memcpy(fname, "..", flen); 161 memcpy(fname, "..", flen);
162 dt_type = DT_DIR; 162 dt_type = DT_DIR;
163 } else { 163 } else {
164 kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation); 164 struct kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation);
165 165
166 iblock = udf_get_lb_pblock(dir->i_sb, tloc, 0); 166 iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0);
167 flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); 167 flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
168 dt_type = DT_UNKNOWN; 168 dt_type = DT_UNKNOWN;
169 } 169 }
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 2820f8fcf4cc..1d2c570704c8 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -20,7 +20,7 @@
20 20
21#if 0 21#if 0
22static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad, 22static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad,
23 uint8_t ad_size, kernel_lb_addr fe_loc, 23 uint8_t ad_size, struct kernel_lb_addr fe_loc,
24 int *pos, int *offset, struct buffer_head **bh, 24 int *pos, int *offset, struct buffer_head **bh,
25 int *error) 25 int *error)
26{ 26{
@@ -75,7 +75,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
75 struct udf_fileident_bh *fibh, 75 struct udf_fileident_bh *fibh,
76 struct fileIdentDesc *cfi, 76 struct fileIdentDesc *cfi,
77 struct extent_position *epos, 77 struct extent_position *epos,
78 kernel_lb_addr *eloc, uint32_t *elen, 78 struct kernel_lb_addr *eloc, uint32_t *elen,
79 sector_t *offset) 79 sector_t *offset)
80{ 80{
81 struct fileIdentDesc *fi; 81 struct fileIdentDesc *fi;
@@ -111,7 +111,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
111 (EXT_RECORDED_ALLOCATED >> 30)) 111 (EXT_RECORDED_ALLOCATED >> 30))
112 return NULL; 112 return NULL;
113 113
114 block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); 114 block = udf_get_lb_pblock(dir->i_sb, eloc, *offset);
115 115
116 (*offset)++; 116 (*offset)++;
117 117
@@ -131,7 +131,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
131 if (i + *offset > (*elen >> blocksize_bits)) 131 if (i + *offset > (*elen >> blocksize_bits))
132 i = (*elen >> blocksize_bits)-*offset; 132 i = (*elen >> blocksize_bits)-*offset;
133 for (num = 0; i > 0; i--) { 133 for (num = 0; i > 0; i--) {
134 block = udf_get_lb_pblock(dir->i_sb, *eloc, 134 block = udf_get_lb_pblock(dir->i_sb, eloc,
135 *offset + i); 135 *offset + i);
136 tmp = udf_tgetblk(dir->i_sb, block); 136 tmp = udf_tgetblk(dir->i_sb, block);
137 if (tmp && !buffer_uptodate(tmp) && 137 if (tmp && !buffer_uptodate(tmp) &&
@@ -169,7 +169,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
169 (EXT_RECORDED_ALLOCATED >> 30)) 169 (EXT_RECORDED_ALLOCATED >> 30))
170 return NULL; 170 return NULL;
171 171
172 block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); 172 block = udf_get_lb_pblock(dir->i_sb, eloc, *offset);
173 173
174 (*offset)++; 174 (*offset)++;
175 175
@@ -249,9 +249,9 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
249} 249}
250 250
251#if 0 251#if 0
252static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset) 252static struct extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
253{ 253{
254 extent_ad *ext; 254 struct extent_ad *ext;
255 struct fileEntry *fe; 255 struct fileEntry *fe;
256 uint8_t *ptr; 256 uint8_t *ptr;
257 257
@@ -274,54 +274,54 @@ static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
274 if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs))) 274 if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs)))
275 ptr += *offset; 275 ptr += *offset;
276 276
277 ext = (extent_ad *)ptr; 277 ext = (struct extent_ad *)ptr;
278 278
279 *offset = *offset + sizeof(extent_ad); 279 *offset = *offset + sizeof(struct extent_ad);
280 return ext; 280 return ext;
281} 281}
282#endif 282#endif
283 283
284short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset, 284struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
285 int inc) 285 int inc)
286{ 286{
287 short_ad *sa; 287 struct short_ad *sa;
288 288
289 if ((!ptr) || (!offset)) { 289 if ((!ptr) || (!offset)) {
290 printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n"); 290 printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n");
291 return NULL; 291 return NULL;
292 } 292 }
293 293
294 if ((*offset + sizeof(short_ad)) > maxoffset) 294 if ((*offset + sizeof(struct short_ad)) > maxoffset)
295 return NULL; 295 return NULL;
296 else { 296 else {
297 sa = (short_ad *)ptr; 297 sa = (struct short_ad *)ptr;
298 if (sa->extLength == 0) 298 if (sa->extLength == 0)
299 return NULL; 299 return NULL;
300 } 300 }
301 301
302 if (inc) 302 if (inc)
303 *offset += sizeof(short_ad); 303 *offset += sizeof(struct short_ad);
304 return sa; 304 return sa;
305} 305}
306 306
307long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc) 307struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset, int inc)
308{ 308{
309 long_ad *la; 309 struct long_ad *la;
310 310
311 if ((!ptr) || (!offset)) { 311 if ((!ptr) || (!offset)) {
312 printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n"); 312 printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n");
313 return NULL; 313 return NULL;
314 } 314 }
315 315
316 if ((*offset + sizeof(long_ad)) > maxoffset) 316 if ((*offset + sizeof(struct long_ad)) > maxoffset)
317 return NULL; 317 return NULL;
318 else { 318 else {
319 la = (long_ad *)ptr; 319 la = (struct long_ad *)ptr;
320 if (la->extLength == 0) 320 if (la->extLength == 0)
321 return NULL; 321 return NULL;
322 } 322 }
323 323
324 if (inc) 324 if (inc)
325 *offset += sizeof(long_ad); 325 *offset += sizeof(struct long_ad);
326 return la; 326 return la;
327} 327}
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h
index a0974df82b31..4792b771aa80 100644
--- a/fs/udf/ecma_167.h
+++ b/fs/udf/ecma_167.h
@@ -38,10 +38,10 @@
38#define _ECMA_167_H 1 38#define _ECMA_167_H 1
39 39
40/* Character set specification (ECMA 167r3 1/7.2.1) */ 40/* Character set specification (ECMA 167r3 1/7.2.1) */
41typedef struct { 41struct charspec {
42 uint8_t charSetType; 42 uint8_t charSetType;
43 uint8_t charSetInfo[63]; 43 uint8_t charSetInfo[63];
44} __attribute__ ((packed)) charspec; 44} __attribute__ ((packed));
45 45
46/* Character Set Type (ECMA 167r3 1/7.2.1.1) */ 46/* Character Set Type (ECMA 167r3 1/7.2.1.1) */
47#define CHARSPEC_TYPE_CS0 0x00 /* (1/7.2.2) */ 47#define CHARSPEC_TYPE_CS0 0x00 /* (1/7.2.2) */
@@ -57,7 +57,7 @@ typedef struct {
57typedef uint8_t dstring; 57typedef uint8_t dstring;
58 58
59/* Timestamp (ECMA 167r3 1/7.3) */ 59/* Timestamp (ECMA 167r3 1/7.3) */
60typedef struct { 60struct timestamp {
61 __le16 typeAndTimezone; 61 __le16 typeAndTimezone;
62 __le16 year; 62 __le16 year;
63 uint8_t month; 63 uint8_t month;
@@ -68,7 +68,7 @@ typedef struct {
68 uint8_t centiseconds; 68 uint8_t centiseconds;
69 uint8_t hundredsOfMicroseconds; 69 uint8_t hundredsOfMicroseconds;
70 uint8_t microseconds; 70 uint8_t microseconds;
71} __attribute__ ((packed)) timestamp; 71} __attribute__ ((packed));
72 72
73/* Type and Time Zone (ECMA 167r3 1/7.3.1) */ 73/* Type and Time Zone (ECMA 167r3 1/7.3.1) */
74#define TIMESTAMP_TYPE_MASK 0xF000 74#define TIMESTAMP_TYPE_MASK 0xF000
@@ -78,11 +78,11 @@ typedef struct {
78#define TIMESTAMP_TIMEZONE_MASK 0x0FFF 78#define TIMESTAMP_TIMEZONE_MASK 0x0FFF
79 79
80/* Entity identifier (ECMA 167r3 1/7.4) */ 80/* Entity identifier (ECMA 167r3 1/7.4) */
81typedef struct { 81struct regid {
82 uint8_t flags; 82 uint8_t flags;
83 uint8_t ident[23]; 83 uint8_t ident[23];
84 uint8_t identSuffix[8]; 84 uint8_t identSuffix[8];
85} __attribute__ ((packed)) regid; 85} __attribute__ ((packed));
86 86
87/* Flags (ECMA 167r3 1/7.4.1) */ 87/* Flags (ECMA 167r3 1/7.4.1) */
88#define ENTITYID_FLAGS_DIRTY 0x00 88#define ENTITYID_FLAGS_DIRTY 0x00
@@ -126,38 +126,38 @@ struct terminatingExtendedAreaDesc {
126 126
127/* Boot Descriptor (ECMA 167r3 2/9.4) */ 127/* Boot Descriptor (ECMA 167r3 2/9.4) */
128struct bootDesc { 128struct bootDesc {
129 uint8_t structType; 129 uint8_t structType;
130 uint8_t stdIdent[VSD_STD_ID_LEN]; 130 uint8_t stdIdent[VSD_STD_ID_LEN];
131 uint8_t structVersion; 131 uint8_t structVersion;
132 uint8_t reserved1; 132 uint8_t reserved1;
133 regid archType; 133 struct regid archType;
134 regid bootIdent; 134 struct regid bootIdent;
135 __le32 bootExtLocation; 135 __le32 bootExtLocation;
136 __le32 bootExtLength; 136 __le32 bootExtLength;
137 __le64 loadAddress; 137 __le64 loadAddress;
138 __le64 startAddress; 138 __le64 startAddress;
139 timestamp descCreationDateAndTime; 139 struct timestamp descCreationDateAndTime;
140 __le16 flags; 140 __le16 flags;
141 uint8_t reserved2[32]; 141 uint8_t reserved2[32];
142 uint8_t bootUse[1906]; 142 uint8_t bootUse[1906];
143} __attribute__ ((packed)); 143} __attribute__ ((packed));
144 144
145/* Flags (ECMA 167r3 2/9.4.12) */ 145/* Flags (ECMA 167r3 2/9.4.12) */
146#define BOOT_FLAGS_ERASE 0x01 146#define BOOT_FLAGS_ERASE 0x01
147 147
148/* Extent Descriptor (ECMA 167r3 3/7.1) */ 148/* Extent Descriptor (ECMA 167r3 3/7.1) */
149typedef struct { 149struct extent_ad {
150 __le32 extLength; 150 __le32 extLength;
151 __le32 extLocation; 151 __le32 extLocation;
152} __attribute__ ((packed)) extent_ad; 152} __attribute__ ((packed));
153 153
154typedef struct { 154struct kernel_extent_ad {
155 uint32_t extLength; 155 uint32_t extLength;
156 uint32_t extLocation; 156 uint32_t extLocation;
157} kernel_extent_ad; 157};
158 158
159/* Descriptor Tag (ECMA 167r3 3/7.2) */ 159/* Descriptor Tag (ECMA 167r3 3/7.2) */
160typedef struct { 160struct tag {
161 __le16 tagIdent; 161 __le16 tagIdent;
162 __le16 descVersion; 162 __le16 descVersion;
163 uint8_t tagChecksum; 163 uint8_t tagChecksum;
@@ -166,7 +166,7 @@ typedef struct {
166 __le16 descCRC; 166 __le16 descCRC;
167 __le16 descCRCLength; 167 __le16 descCRCLength;
168 __le32 tagLocation; 168 __le32 tagLocation;
169} __attribute__ ((packed)) tag; 169} __attribute__ ((packed));
170 170
171/* Tag Identifier (ECMA 167r3 3/7.2.1) */ 171/* Tag Identifier (ECMA 167r3 3/7.2.1) */
172#define TAG_IDENT_PVD 0x0001 172#define TAG_IDENT_PVD 0x0001
@@ -190,28 +190,28 @@ struct NSRDesc {
190 190
191/* Primary Volume Descriptor (ECMA 167r3 3/10.1) */ 191/* Primary Volume Descriptor (ECMA 167r3 3/10.1) */
192struct primaryVolDesc { 192struct primaryVolDesc {
193 tag descTag; 193 struct tag descTag;
194 __le32 volDescSeqNum; 194 __le32 volDescSeqNum;
195 __le32 primaryVolDescNum; 195 __le32 primaryVolDescNum;
196 dstring volIdent[32]; 196 dstring volIdent[32];
197 __le16 volSeqNum; 197 __le16 volSeqNum;
198 __le16 maxVolSeqNum; 198 __le16 maxVolSeqNum;
199 __le16 interchangeLvl; 199 __le16 interchangeLvl;
200 __le16 maxInterchangeLvl; 200 __le16 maxInterchangeLvl;
201 __le32 charSetList; 201 __le32 charSetList;
202 __le32 maxCharSetList; 202 __le32 maxCharSetList;
203 dstring volSetIdent[128]; 203 dstring volSetIdent[128];
204 charspec descCharSet; 204 struct charspec descCharSet;
205 charspec explanatoryCharSet; 205 struct charspec explanatoryCharSet;
206 extent_ad volAbstract; 206 struct extent_ad volAbstract;
207 extent_ad volCopyright; 207 struct extent_ad volCopyright;
208 regid appIdent; 208 struct regid appIdent;
209 timestamp recordingDateAndTime; 209 struct timestamp recordingDateAndTime;
210 regid impIdent; 210 struct regid impIdent;
211 uint8_t impUse[64]; 211 uint8_t impUse[64];
212 __le32 predecessorVolDescSeqLocation; 212 __le32 predecessorVolDescSeqLocation;
213 __le16 flags; 213 __le16 flags;
214 uint8_t reserved[22]; 214 uint8_t reserved[22];
215} __attribute__ ((packed)); 215} __attribute__ ((packed));
216 216
217/* Flags (ECMA 167r3 3/10.1.21) */ 217/* Flags (ECMA 167r3 3/10.1.21) */
@@ -219,40 +219,40 @@ struct primaryVolDesc {
219 219
220/* Anchor Volume Descriptor Pointer (ECMA 167r3 3/10.2) */ 220/* Anchor Volume Descriptor Pointer (ECMA 167r3 3/10.2) */
221struct anchorVolDescPtr { 221struct anchorVolDescPtr {
222 tag descTag; 222 struct tag descTag;
223 extent_ad mainVolDescSeqExt; 223 struct extent_ad mainVolDescSeqExt;
224 extent_ad reserveVolDescSeqExt; 224 struct extent_ad reserveVolDescSeqExt;
225 uint8_t reserved[480]; 225 uint8_t reserved[480];
226} __attribute__ ((packed)); 226} __attribute__ ((packed));
227 227
228/* Volume Descriptor Pointer (ECMA 167r3 3/10.3) */ 228/* Volume Descriptor Pointer (ECMA 167r3 3/10.3) */
229struct volDescPtr { 229struct volDescPtr {
230 tag descTag; 230 struct tag descTag;
231 __le32 volDescSeqNum; 231 __le32 volDescSeqNum;
232 extent_ad nextVolDescSeqExt; 232 struct extent_ad nextVolDescSeqExt;
233 uint8_t reserved[484]; 233 uint8_t reserved[484];
234} __attribute__ ((packed)); 234} __attribute__ ((packed));
235 235
236/* Implementation Use Volume Descriptor (ECMA 167r3 3/10.4) */ 236/* Implementation Use Volume Descriptor (ECMA 167r3 3/10.4) */
237struct impUseVolDesc { 237struct impUseVolDesc {
238 tag descTag; 238 struct tag descTag;
239 __le32 volDescSeqNum; 239 __le32 volDescSeqNum;
240 regid impIdent; 240 struct regid impIdent;
241 uint8_t impUse[460]; 241 uint8_t impUse[460];
242} __attribute__ ((packed)); 242} __attribute__ ((packed));
243 243
244/* Partition Descriptor (ECMA 167r3 3/10.5) */ 244/* Partition Descriptor (ECMA 167r3 3/10.5) */
245struct partitionDesc { 245struct partitionDesc {
246 tag descTag; 246 struct tag descTag;
247 __le32 volDescSeqNum; 247 __le32 volDescSeqNum;
248 __le16 partitionFlags; 248 __le16 partitionFlags;
249 __le16 partitionNumber; 249 __le16 partitionNumber;
250 regid partitionContents; 250 struct regid partitionContents;
251 uint8_t partitionContentsUse[128]; 251 uint8_t partitionContentsUse[128];
252 __le32 accessType; 252 __le32 accessType;
253 __le32 partitionStartingLocation; 253 __le32 partitionStartingLocation;
254 __le32 partitionLength; 254 __le32 partitionLength;
255 regid impIdent; 255 struct regid impIdent;
256 uint8_t impUse[128]; 256 uint8_t impUse[128];
257 uint8_t reserved[156]; 257 uint8_t reserved[156];
258} __attribute__ ((packed)); 258} __attribute__ ((packed));
@@ -278,19 +278,19 @@ struct partitionDesc {
278 278
279/* Logical Volume Descriptor (ECMA 167r3 3/10.6) */ 279/* Logical Volume Descriptor (ECMA 167r3 3/10.6) */
280struct logicalVolDesc { 280struct logicalVolDesc {
281 tag descTag; 281 struct tag descTag;
282 __le32 volDescSeqNum; 282 __le32 volDescSeqNum;
283 charspec descCharSet; 283 struct charspec descCharSet;
284 dstring logicalVolIdent[128]; 284 dstring logicalVolIdent[128];
285 __le32 logicalBlockSize; 285 __le32 logicalBlockSize;
286 regid domainIdent; 286 struct regid domainIdent;
287 uint8_t logicalVolContentsUse[16]; 287 uint8_t logicalVolContentsUse[16];
288 __le32 mapTableLength; 288 __le32 mapTableLength;
289 __le32 numPartitionMaps; 289 __le32 numPartitionMaps;
290 regid impIdent; 290 struct regid impIdent;
291 uint8_t impUse[128]; 291 uint8_t impUse[128];
292 extent_ad integritySeqExt; 292 struct extent_ad integritySeqExt;
293 uint8_t partitionMaps[0]; 293 uint8_t partitionMaps[0];
294} __attribute__ ((packed)); 294} __attribute__ ((packed));
295 295
296/* Generic Partition Map (ECMA 167r3 3/10.7.1) */ 296/* Generic Partition Map (ECMA 167r3 3/10.7.1) */
@@ -322,30 +322,30 @@ struct genericPartitionMap2 {
322 322
323/* Unallocated Space Descriptor (ECMA 167r3 3/10.8) */ 323/* Unallocated Space Descriptor (ECMA 167r3 3/10.8) */
324struct unallocSpaceDesc { 324struct unallocSpaceDesc {
325 tag descTag; 325 struct tag descTag;
326 __le32 volDescSeqNum; 326 __le32 volDescSeqNum;
327 __le32 numAllocDescs; 327 __le32 numAllocDescs;
328 extent_ad allocDescs[0]; 328 struct extent_ad allocDescs[0];
329} __attribute__ ((packed)); 329} __attribute__ ((packed));
330 330
331/* Terminating Descriptor (ECMA 167r3 3/10.9) */ 331/* Terminating Descriptor (ECMA 167r3 3/10.9) */
332struct terminatingDesc { 332struct terminatingDesc {
333 tag descTag; 333 struct tag descTag;
334 uint8_t reserved[496]; 334 uint8_t reserved[496];
335} __attribute__ ((packed)); 335} __attribute__ ((packed));
336 336
337/* Logical Volume Integrity Descriptor (ECMA 167r3 3/10.10) */ 337/* Logical Volume Integrity Descriptor (ECMA 167r3 3/10.10) */
338struct logicalVolIntegrityDesc { 338struct logicalVolIntegrityDesc {
339 tag descTag; 339 struct tag descTag;
340 timestamp recordingDateAndTime; 340 struct timestamp recordingDateAndTime;
341 __le32 integrityType; 341 __le32 integrityType;
342 extent_ad nextIntegrityExt; 342 struct extent_ad nextIntegrityExt;
343 uint8_t logicalVolContentsUse[32]; 343 uint8_t logicalVolContentsUse[32];
344 __le32 numOfPartitions; 344 __le32 numOfPartitions;
345 __le32 lengthOfImpUse; 345 __le32 lengthOfImpUse;
346 __le32 freeSpaceTable[0]; 346 __le32 freeSpaceTable[0];
347 __le32 sizeTable[0]; 347 __le32 sizeTable[0];
348 uint8_t impUse[0]; 348 uint8_t impUse[0];
349} __attribute__ ((packed)); 349} __attribute__ ((packed));
350 350
351/* Integrity Type (ECMA 167r3 3/10.10.3) */ 351/* Integrity Type (ECMA 167r3 3/10.10.3) */
@@ -353,50 +353,50 @@ struct logicalVolIntegrityDesc {
353#define LVID_INTEGRITY_TYPE_CLOSE 0x00000001 353#define LVID_INTEGRITY_TYPE_CLOSE 0x00000001
354 354
355/* Recorded Address (ECMA 167r3 4/7.1) */ 355/* Recorded Address (ECMA 167r3 4/7.1) */
356typedef struct { 356struct lb_addr {
357 __le32 logicalBlockNum; 357 __le32 logicalBlockNum;
358 __le16 partitionReferenceNum; 358 __le16 partitionReferenceNum;
359} __attribute__ ((packed)) lb_addr; 359} __attribute__ ((packed));
360 360
361/* ... and its in-core analog */ 361/* ... and its in-core analog */
362typedef struct { 362struct kernel_lb_addr {
363 uint32_t logicalBlockNum; 363 uint32_t logicalBlockNum;
364 uint16_t partitionReferenceNum; 364 uint16_t partitionReferenceNum;
365} kernel_lb_addr; 365};
366 366
367/* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */ 367/* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */
368typedef struct { 368struct short_ad {
369 __le32 extLength; 369 __le32 extLength;
370 __le32 extPosition; 370 __le32 extPosition;
371} __attribute__ ((packed)) short_ad; 371} __attribute__ ((packed));
372 372
373/* Long Allocation Descriptor (ECMA 167r3 4/14.14.2) */ 373/* Long Allocation Descriptor (ECMA 167r3 4/14.14.2) */
374typedef struct { 374struct long_ad {
375 __le32 extLength; 375 __le32 extLength;
376 lb_addr extLocation; 376 struct lb_addr extLocation;
377 uint8_t impUse[6]; 377 uint8_t impUse[6];
378} __attribute__ ((packed)) long_ad; 378} __attribute__ ((packed));
379 379
380typedef struct { 380struct kernel_long_ad {
381 uint32_t extLength; 381 uint32_t extLength;
382 kernel_lb_addr extLocation; 382 struct kernel_lb_addr extLocation;
383 uint8_t impUse[6]; 383 uint8_t impUse[6];
384} kernel_long_ad; 384};
385 385
386/* Extended Allocation Descriptor (ECMA 167r3 4/14.14.3) */ 386/* Extended Allocation Descriptor (ECMA 167r3 4/14.14.3) */
387typedef struct { 387struct ext_ad {
388 __le32 extLength; 388 __le32 extLength;
389 __le32 recordedLength; 389 __le32 recordedLength;
390 __le32 informationLength; 390 __le32 informationLength;
391 lb_addr extLocation; 391 struct lb_addr extLocation;
392} __attribute__ ((packed)) ext_ad; 392} __attribute__ ((packed));
393 393
394typedef struct { 394struct kernel_ext_ad {
395 uint32_t extLength; 395 uint32_t extLength;
396 uint32_t recordedLength; 396 uint32_t recordedLength;
397 uint32_t informationLength; 397 uint32_t informationLength;
398 kernel_lb_addr extLocation; 398 struct kernel_lb_addr extLocation;
399} kernel_ext_ad; 399};
400 400
401/* Descriptor Tag (ECMA 167r3 4/7.2 - See 3/7.2) */ 401/* Descriptor Tag (ECMA 167r3 4/7.2 - See 3/7.2) */
402 402
@@ -415,44 +415,44 @@ typedef struct {
415 415
416/* File Set Descriptor (ECMA 167r3 4/14.1) */ 416/* File Set Descriptor (ECMA 167r3 4/14.1) */
417struct fileSetDesc { 417struct fileSetDesc {
418 tag descTag; 418 struct tag descTag;
419 timestamp recordingDateAndTime; 419 struct timestamp recordingDateAndTime;
420 __le16 interchangeLvl; 420 __le16 interchangeLvl;
421 __le16 maxInterchangeLvl; 421 __le16 maxInterchangeLvl;
422 __le32 charSetList; 422 __le32 charSetList;
423 __le32 maxCharSetList; 423 __le32 maxCharSetList;
424 __le32 fileSetNum; 424 __le32 fileSetNum;
425 __le32 fileSetDescNum; 425 __le32 fileSetDescNum;
426 charspec logicalVolIdentCharSet; 426 struct charspec logicalVolIdentCharSet;
427 dstring logicalVolIdent[128]; 427 dstring logicalVolIdent[128];
428 charspec fileSetCharSet; 428 struct charspec fileSetCharSet;
429 dstring fileSetIdent[32]; 429 dstring fileSetIdent[32];
430 dstring copyrightFileIdent[32]; 430 dstring copyrightFileIdent[32];
431 dstring abstractFileIdent[32]; 431 dstring abstractFileIdent[32];
432 long_ad rootDirectoryICB; 432 struct long_ad rootDirectoryICB;
433 regid domainIdent; 433 struct regid domainIdent;
434 long_ad nextExt; 434 struct long_ad nextExt;
435 long_ad streamDirectoryICB; 435 struct long_ad streamDirectoryICB;
436 uint8_t reserved[32]; 436 uint8_t reserved[32];
437} __attribute__ ((packed)); 437} __attribute__ ((packed));
438 438
439/* Partition Header Descriptor (ECMA 167r3 4/14.3) */ 439/* Partition Header Descriptor (ECMA 167r3 4/14.3) */
440struct partitionHeaderDesc { 440struct partitionHeaderDesc {
441 short_ad unallocSpaceTable; 441 struct short_ad unallocSpaceTable;
442 short_ad unallocSpaceBitmap; 442 struct short_ad unallocSpaceBitmap;
443 short_ad partitionIntegrityTable; 443 struct short_ad partitionIntegrityTable;
444 short_ad freedSpaceTable; 444 struct short_ad freedSpaceTable;
445 short_ad freedSpaceBitmap; 445 struct short_ad freedSpaceBitmap;
446 uint8_t reserved[88]; 446 uint8_t reserved[88];
447} __attribute__ ((packed)); 447} __attribute__ ((packed));
448 448
449/* File Identifier Descriptor (ECMA 167r3 4/14.4) */ 449/* File Identifier Descriptor (ECMA 167r3 4/14.4) */
450struct fileIdentDesc { 450struct fileIdentDesc {
451 tag descTag; 451 struct tag descTag;
452 __le16 fileVersionNum; 452 __le16 fileVersionNum;
453 uint8_t fileCharacteristics; 453 uint8_t fileCharacteristics;
454 uint8_t lengthFileIdent; 454 uint8_t lengthFileIdent;
455 long_ad icb; 455 struct long_ad icb;
456 __le16 lengthOfImpUse; 456 __le16 lengthOfImpUse;
457 uint8_t impUse[0]; 457 uint8_t impUse[0];
458 uint8_t fileIdent[0]; 458 uint8_t fileIdent[0];
@@ -468,22 +468,22 @@ struct fileIdentDesc {
468 468
469/* Allocation Ext Descriptor (ECMA 167r3 4/14.5) */ 469/* Allocation Ext Descriptor (ECMA 167r3 4/14.5) */
470struct allocExtDesc { 470struct allocExtDesc {
471 tag descTag; 471 struct tag descTag;
472 __le32 previousAllocExtLocation; 472 __le32 previousAllocExtLocation;
473 __le32 lengthAllocDescs; 473 __le32 lengthAllocDescs;
474} __attribute__ ((packed)); 474} __attribute__ ((packed));
475 475
476/* ICB Tag (ECMA 167r3 4/14.6) */ 476/* ICB Tag (ECMA 167r3 4/14.6) */
477typedef struct { 477struct icbtag {
478 __le32 priorRecordedNumDirectEntries; 478 __le32 priorRecordedNumDirectEntries;
479 __le16 strategyType; 479 __le16 strategyType;
480 __le16 strategyParameter; 480 __le16 strategyParameter;
481 __le16 numEntries; 481 __le16 numEntries;
482 uint8_t reserved; 482 uint8_t reserved;
483 uint8_t fileType; 483 uint8_t fileType;
484 lb_addr parentICBLocation; 484 struct lb_addr parentICBLocation;
485 __le16 flags; 485 __le16 flags;
486} __attribute__ ((packed)) icbtag; 486} __attribute__ ((packed));
487 487
488/* Strategy Type (ECMA 167r3 4/14.6.2) */ 488/* Strategy Type (ECMA 167r3 4/14.6.2) */
489#define ICBTAG_STRATEGY_TYPE_UNDEF 0x0000 489#define ICBTAG_STRATEGY_TYPE_UNDEF 0x0000
@@ -528,41 +528,41 @@ typedef struct {
528 528
529/* Indirect Entry (ECMA 167r3 4/14.7) */ 529/* Indirect Entry (ECMA 167r3 4/14.7) */
530struct indirectEntry { 530struct indirectEntry {
531 tag descTag; 531 struct tag descTag;
532 icbtag icbTag; 532 struct icbtag icbTag;
533 long_ad indirectICB; 533 struct long_ad indirectICB;
534} __attribute__ ((packed)); 534} __attribute__ ((packed));
535 535
536/* Terminal Entry (ECMA 167r3 4/14.8) */ 536/* Terminal Entry (ECMA 167r3 4/14.8) */
537struct terminalEntry { 537struct terminalEntry {
538 tag descTag; 538 struct tag descTag;
539 icbtag icbTag; 539 struct icbtag icbTag;
540} __attribute__ ((packed)); 540} __attribute__ ((packed));
541 541
542/* File Entry (ECMA 167r3 4/14.9) */ 542/* File Entry (ECMA 167r3 4/14.9) */
543struct fileEntry { 543struct fileEntry {
544 tag descTag; 544 struct tag descTag;
545 icbtag icbTag; 545 struct icbtag icbTag;
546 __le32 uid; 546 __le32 uid;
547 __le32 gid; 547 __le32 gid;
548 __le32 permissions; 548 __le32 permissions;
549 __le16 fileLinkCount; 549 __le16 fileLinkCount;
550 uint8_t recordFormat; 550 uint8_t recordFormat;
551 uint8_t recordDisplayAttr; 551 uint8_t recordDisplayAttr;
552 __le32 recordLength; 552 __le32 recordLength;
553 __le64 informationLength; 553 __le64 informationLength;
554 __le64 logicalBlocksRecorded; 554 __le64 logicalBlocksRecorded;
555 timestamp accessTime; 555 struct timestamp accessTime;
556 timestamp modificationTime; 556 struct timestamp modificationTime;
557 timestamp attrTime; 557 struct timestamp attrTime;
558 __le32 checkpoint; 558 __le32 checkpoint;
559 long_ad extendedAttrICB; 559 struct long_ad extendedAttrICB;
560 regid impIdent; 560 struct regid impIdent;
561 __le64 uniqueID; 561 __le64 uniqueID;
562 __le32 lengthExtendedAttr; 562 __le32 lengthExtendedAttr;
563 __le32 lengthAllocDescs; 563 __le32 lengthAllocDescs;
564 uint8_t extendedAttr[0]; 564 uint8_t extendedAttr[0];
565 uint8_t allocDescs[0]; 565 uint8_t allocDescs[0];
566} __attribute__ ((packed)); 566} __attribute__ ((packed));
567 567
568/* Permissions (ECMA 167r3 4/14.9.5) */ 568/* Permissions (ECMA 167r3 4/14.9.5) */
@@ -604,7 +604,7 @@ struct fileEntry {
604 604
605/* Extended Attribute Header Descriptor (ECMA 167r3 4/14.10.1) */ 605/* Extended Attribute Header Descriptor (ECMA 167r3 4/14.10.1) */
606struct extendedAttrHeaderDesc { 606struct extendedAttrHeaderDesc {
607 tag descTag; 607 struct tag descTag;
608 __le32 impAttrLocation; 608 __le32 impAttrLocation;
609 __le32 appAttrLocation; 609 __le32 appAttrLocation;
610} __attribute__ ((packed)); 610} __attribute__ ((packed));
@@ -687,7 +687,7 @@ struct impUseExtAttr {
687 uint8_t reserved[3]; 687 uint8_t reserved[3];
688 __le32 attrLength; 688 __le32 attrLength;
689 __le32 impUseLength; 689 __le32 impUseLength;
690 regid impIdent; 690 struct regid impIdent;
691 uint8_t impUse[0]; 691 uint8_t impUse[0];
692} __attribute__ ((packed)); 692} __attribute__ ((packed));
693 693
@@ -698,7 +698,7 @@ struct appUseExtAttr {
698 uint8_t reserved[3]; 698 uint8_t reserved[3];
699 __le32 attrLength; 699 __le32 attrLength;
700 __le32 appUseLength; 700 __le32 appUseLength;
701 regid appIdent; 701 struct regid appIdent;
702 uint8_t appUse[0]; 702 uint8_t appUse[0];
703} __attribute__ ((packed)); 703} __attribute__ ((packed));
704 704
@@ -712,15 +712,15 @@ struct appUseExtAttr {
712 712
713/* Unallocated Space Entry (ECMA 167r3 4/14.11) */ 713/* Unallocated Space Entry (ECMA 167r3 4/14.11) */
714struct unallocSpaceEntry { 714struct unallocSpaceEntry {
715 tag descTag; 715 struct tag descTag;
716 icbtag icbTag; 716 struct icbtag icbTag;
717 __le32 lengthAllocDescs; 717 __le32 lengthAllocDescs;
718 uint8_t allocDescs[0]; 718 uint8_t allocDescs[0];
719} __attribute__ ((packed)); 719} __attribute__ ((packed));
720 720
721/* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */ 721/* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */
722struct spaceBitmapDesc { 722struct spaceBitmapDesc {
723 tag descTag; 723 struct tag descTag;
724 __le32 numOfBits; 724 __le32 numOfBits;
725 __le32 numOfBytes; 725 __le32 numOfBytes;
726 uint8_t bitmap[0]; 726 uint8_t bitmap[0];
@@ -728,13 +728,13 @@ struct spaceBitmapDesc {
728 728
729/* Partition Integrity Entry (ECMA 167r3 4/14.13) */ 729/* Partition Integrity Entry (ECMA 167r3 4/14.13) */
730struct partitionIntegrityEntry { 730struct partitionIntegrityEntry {
731 tag descTag; 731 struct tag descTag;
732 icbtag icbTag; 732 struct icbtag icbTag;
733 timestamp recordingDateAndTime; 733 struct timestamp recordingDateAndTime;
734 uint8_t integrityType; 734 uint8_t integrityType;
735 uint8_t reserved[175]; 735 uint8_t reserved[175];
736 regid impIdent; 736 struct regid impIdent;
737 uint8_t impUse[256]; 737 uint8_t impUse[256];
738} __attribute__ ((packed)); 738} __attribute__ ((packed));
739 739
740/* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */ 740/* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */
@@ -765,32 +765,32 @@ struct pathComponent {
765 765
766/* File Entry (ECMA 167r3 4/14.17) */ 766/* File Entry (ECMA 167r3 4/14.17) */
767struct extendedFileEntry { 767struct extendedFileEntry {
768 tag descTag; 768 struct tag descTag;
769 icbtag icbTag; 769 struct icbtag icbTag;
770 __le32 uid; 770 __le32 uid;
771 __le32 gid; 771 __le32 gid;
772 __le32 permissions; 772 __le32 permissions;
773 __le16 fileLinkCount; 773 __le16 fileLinkCount;
774 uint8_t recordFormat; 774 uint8_t recordFormat;
775 uint8_t recordDisplayAttr; 775 uint8_t recordDisplayAttr;
776 __le32 recordLength; 776 __le32 recordLength;
777 __le64 informationLength; 777 __le64 informationLength;
778 __le64 objectSize; 778 __le64 objectSize;
779 __le64 logicalBlocksRecorded; 779 __le64 logicalBlocksRecorded;
780 timestamp accessTime; 780 struct timestamp accessTime;
781 timestamp modificationTime; 781 struct timestamp modificationTime;
782 timestamp createTime; 782 struct timestamp createTime;
783 timestamp attrTime; 783 struct timestamp attrTime;
784 __le32 checkpoint; 784 __le32 checkpoint;
785 __le32 reserved; 785 __le32 reserved;
786 long_ad extendedAttrICB; 786 struct long_ad extendedAttrICB;
787 long_ad streamDirectoryICB; 787 struct long_ad streamDirectoryICB;
788 regid impIdent; 788 struct regid impIdent;
789 __le64 uniqueID; 789 __le64 uniqueID;
790 __le32 lengthExtendedAttr; 790 __le32 lengthExtendedAttr;
791 __le32 lengthAllocDescs; 791 __le32 lengthAllocDescs;
792 uint8_t extendedAttr[0]; 792 uint8_t extendedAttr[0];
793 uint8_t allocDescs[0]; 793 uint8_t allocDescs[0];
794} __attribute__ ((packed)); 794} __attribute__ ((packed));
795 795
796#endif /* _ECMA_167_H */ 796#endif /* _ECMA_167_H */
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 47dbe5613f90..c10fa39f97e2 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -49,12 +49,11 @@ void udf_free_inode(struct inode *inode)
49 le32_add_cpu(&lvidiu->numDirs, -1); 49 le32_add_cpu(&lvidiu->numDirs, -1);
50 else 50 else
51 le32_add_cpu(&lvidiu->numFiles, -1); 51 le32_add_cpu(&lvidiu->numFiles, -1);
52 52 udf_updated_lvid(sb);
53 mark_buffer_dirty(sbi->s_lvid_bh);
54 } 53 }
55 mutex_unlock(&sbi->s_alloc_mutex); 54 mutex_unlock(&sbi->s_alloc_mutex);
56 55
57 udf_free_blocks(sb, NULL, UDF_I(inode)->i_location, 0, 1); 56 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
58} 57}
59 58
60struct inode *udf_new_inode(struct inode *dir, int mode, int *err) 59struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
@@ -122,7 +121,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
122 if (!(++uniqueID & 0x00000000FFFFFFFFUL)) 121 if (!(++uniqueID & 0x00000000FFFFFFFFUL))
123 uniqueID += 16; 122 uniqueID += 16;
124 lvhd->uniqueID = cpu_to_le64(uniqueID); 123 lvhd->uniqueID = cpu_to_le64(uniqueID);
125 mark_buffer_dirty(sbi->s_lvid_bh); 124 udf_updated_lvid(sb);
126 } 125 }
127 mutex_unlock(&sbi->s_alloc_mutex); 126 mutex_unlock(&sbi->s_alloc_mutex);
128 inode->i_mode = mode; 127 inode->i_mode = mode;
@@ -138,7 +137,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
138 iinfo->i_location.logicalBlockNum = block; 137 iinfo->i_location.logicalBlockNum = block;
139 iinfo->i_location.partitionReferenceNum = 138 iinfo->i_location.partitionReferenceNum =
140 dinfo->i_location.partitionReferenceNum; 139 dinfo->i_location.partitionReferenceNum;
141 inode->i_ino = udf_get_lb_pblock(sb, iinfo->i_location, 0); 140 inode->i_ino = udf_get_lb_pblock(sb, &iinfo->i_location, 0);
142 inode->i_blocks = 0; 141 inode->i_blocks = 0;
143 iinfo->i_lenEAttr = 0; 142 iinfo->i_lenEAttr = 0;
144 iinfo->i_lenAlloc = 0; 143 iinfo->i_lenAlloc = 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 30ebde490f7f..e7533f785636 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -55,15 +55,15 @@ static int udf_alloc_i_data(struct inode *inode, size_t size);
55static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, 55static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
56 sector_t *, int *); 56 sector_t *, int *);
57static int8_t udf_insert_aext(struct inode *, struct extent_position, 57static int8_t udf_insert_aext(struct inode *, struct extent_position,
58 kernel_lb_addr, uint32_t); 58 struct kernel_lb_addr, uint32_t);
59static void udf_split_extents(struct inode *, int *, int, int, 59static void udf_split_extents(struct inode *, int *, int, int,
60 kernel_long_ad[EXTENT_MERGE_SIZE], int *); 60 struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
61static void udf_prealloc_extents(struct inode *, int, int, 61static void udf_prealloc_extents(struct inode *, int, int,
62 kernel_long_ad[EXTENT_MERGE_SIZE], int *); 62 struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
63static void udf_merge_extents(struct inode *, 63static void udf_merge_extents(struct inode *,
64 kernel_long_ad[EXTENT_MERGE_SIZE], int *); 64 struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
65static void udf_update_extents(struct inode *, 65static void udf_update_extents(struct inode *,
66 kernel_long_ad[EXTENT_MERGE_SIZE], int, int, 66 struct kernel_long_ad[EXTENT_MERGE_SIZE], int, int,
67 struct extent_position *); 67 struct extent_position *);
68static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); 68static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
69 69
@@ -200,7 +200,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
200{ 200{
201 int newblock; 201 int newblock;
202 struct buffer_head *dbh = NULL; 202 struct buffer_head *dbh = NULL;
203 kernel_lb_addr eloc; 203 struct kernel_lb_addr eloc;
204 uint32_t elen; 204 uint32_t elen;
205 uint8_t alloctype; 205 uint8_t alloctype;
206 struct extent_position epos; 206 struct extent_position epos;
@@ -281,7 +281,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
281 epos.bh = NULL; 281 epos.bh = NULL;
282 epos.block = iinfo->i_location; 282 epos.block = iinfo->i_location;
283 epos.offset = udf_file_entry_alloc_offset(inode); 283 epos.offset = udf_file_entry_alloc_offset(inode);
284 udf_add_aext(inode, &epos, eloc, elen, 0); 284 udf_add_aext(inode, &epos, &eloc, elen, 0);
285 /* UniqueID stuff */ 285 /* UniqueID stuff */
286 286
287 brelse(epos.bh); 287 brelse(epos.bh);
@@ -359,12 +359,12 @@ static struct buffer_head *udf_getblk(struct inode *inode, long block,
359 359
360/* Extend the file by 'blocks' blocks, return the number of extents added */ 360/* Extend the file by 'blocks' blocks, return the number of extents added */
361int udf_extend_file(struct inode *inode, struct extent_position *last_pos, 361int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
362 kernel_long_ad *last_ext, sector_t blocks) 362 struct kernel_long_ad *last_ext, sector_t blocks)
363{ 363{
364 sector_t add; 364 sector_t add;
365 int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); 365 int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
366 struct super_block *sb = inode->i_sb; 366 struct super_block *sb = inode->i_sb;
367 kernel_lb_addr prealloc_loc = {}; 367 struct kernel_lb_addr prealloc_loc = {};
368 int prealloc_len = 0; 368 int prealloc_len = 0;
369 struct udf_inode_info *iinfo; 369 struct udf_inode_info *iinfo;
370 370
@@ -411,11 +411,11 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
411 } 411 }
412 412
413 if (fake) { 413 if (fake) {
414 udf_add_aext(inode, last_pos, last_ext->extLocation, 414 udf_add_aext(inode, last_pos, &last_ext->extLocation,
415 last_ext->extLength, 1); 415 last_ext->extLength, 1);
416 count++; 416 count++;
417 } else 417 } else
418 udf_write_aext(inode, last_pos, last_ext->extLocation, 418 udf_write_aext(inode, last_pos, &last_ext->extLocation,
419 last_ext->extLength, 1); 419 last_ext->extLength, 1);
420 420
421 /* Managed to do everything necessary? */ 421 /* Managed to do everything necessary? */
@@ -432,7 +432,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
432 /* Create enough extents to cover the whole hole */ 432 /* Create enough extents to cover the whole hole */
433 while (blocks > add) { 433 while (blocks > add) {
434 blocks -= add; 434 blocks -= add;
435 if (udf_add_aext(inode, last_pos, last_ext->extLocation, 435 if (udf_add_aext(inode, last_pos, &last_ext->extLocation,
436 last_ext->extLength, 1) == -1) 436 last_ext->extLength, 1) == -1)
437 return -1; 437 return -1;
438 count++; 438 count++;
@@ -440,7 +440,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
440 if (blocks) { 440 if (blocks) {
441 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | 441 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
442 (blocks << sb->s_blocksize_bits); 442 (blocks << sb->s_blocksize_bits);
443 if (udf_add_aext(inode, last_pos, last_ext->extLocation, 443 if (udf_add_aext(inode, last_pos, &last_ext->extLocation,
444 last_ext->extLength, 1) == -1) 444 last_ext->extLength, 1) == -1)
445 return -1; 445 return -1;
446 count++; 446 count++;
@@ -449,7 +449,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
449out: 449out:
450 /* Do we have some preallocated blocks saved? */ 450 /* Do we have some preallocated blocks saved? */
451 if (prealloc_len) { 451 if (prealloc_len) {
452 if (udf_add_aext(inode, last_pos, prealloc_loc, 452 if (udf_add_aext(inode, last_pos, &prealloc_loc,
453 prealloc_len, 1) == -1) 453 prealloc_len, 1) == -1)
454 return -1; 454 return -1;
455 last_ext->extLocation = prealloc_loc; 455 last_ext->extLocation = prealloc_loc;
@@ -459,9 +459,9 @@ out:
459 459
460 /* last_pos should point to the last written extent... */ 460 /* last_pos should point to the last written extent... */
461 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 461 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
462 last_pos->offset -= sizeof(short_ad); 462 last_pos->offset -= sizeof(struct short_ad);
463 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 463 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
464 last_pos->offset -= sizeof(long_ad); 464 last_pos->offset -= sizeof(struct long_ad);
465 else 465 else
466 return -1; 466 return -1;
467 467
@@ -473,11 +473,11 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
473{ 473{
474 static sector_t last_block; 474 static sector_t last_block;
475 struct buffer_head *result = NULL; 475 struct buffer_head *result = NULL;
476 kernel_long_ad laarr[EXTENT_MERGE_SIZE]; 476 struct kernel_long_ad laarr[EXTENT_MERGE_SIZE];
477 struct extent_position prev_epos, cur_epos, next_epos; 477 struct extent_position prev_epos, cur_epos, next_epos;
478 int count = 0, startnum = 0, endnum = 0; 478 int count = 0, startnum = 0, endnum = 0;
479 uint32_t elen = 0, tmpelen; 479 uint32_t elen = 0, tmpelen;
480 kernel_lb_addr eloc, tmpeloc; 480 struct kernel_lb_addr eloc, tmpeloc;
481 int c = 1; 481 int c = 1;
482 loff_t lbcount = 0, b_off = 0; 482 loff_t lbcount = 0, b_off = 0;
483 uint32_t newblocknum, newblock; 483 uint32_t newblocknum, newblock;
@@ -550,12 +550,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
550 elen = EXT_RECORDED_ALLOCATED | 550 elen = EXT_RECORDED_ALLOCATED |
551 ((elen + inode->i_sb->s_blocksize - 1) & 551 ((elen + inode->i_sb->s_blocksize - 1) &
552 ~(inode->i_sb->s_blocksize - 1)); 552 ~(inode->i_sb->s_blocksize - 1));
553 etype = udf_write_aext(inode, &cur_epos, eloc, elen, 1); 553 etype = udf_write_aext(inode, &cur_epos, &eloc, elen, 1);
554 } 554 }
555 brelse(prev_epos.bh); 555 brelse(prev_epos.bh);
556 brelse(cur_epos.bh); 556 brelse(cur_epos.bh);
557 brelse(next_epos.bh); 557 brelse(next_epos.bh);
558 newblock = udf_get_lb_pblock(inode->i_sb, eloc, offset); 558 newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
559 *phys = newblock; 559 *phys = newblock;
560 return NULL; 560 return NULL;
561 } 561 }
@@ -572,7 +572,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
572 } else { 572 } else {
573 /* Create a fake extent when there's not one */ 573 /* Create a fake extent when there's not one */
574 memset(&laarr[0].extLocation, 0x00, 574 memset(&laarr[0].extLocation, 0x00,
575 sizeof(kernel_lb_addr)); 575 sizeof(struct kernel_lb_addr));
576 laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; 576 laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
577 /* Will udf_extend_file() create real extent from 577 /* Will udf_extend_file() create real extent from
578 a fake one? */ 578 a fake one? */
@@ -602,7 +602,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
602 laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | 602 laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
603 inode->i_sb->s_blocksize; 603 inode->i_sb->s_blocksize;
604 memset(&laarr[c].extLocation, 0x00, 604 memset(&laarr[c].extLocation, 0x00,
605 sizeof(kernel_lb_addr)); 605 sizeof(struct kernel_lb_addr));
606 count++; 606 count++;
607 endnum++; 607 endnum++;
608 } 608 }
@@ -699,7 +699,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
699 699
700static void udf_split_extents(struct inode *inode, int *c, int offset, 700static void udf_split_extents(struct inode *inode, int *c, int offset,
701 int newblocknum, 701 int newblocknum,
702 kernel_long_ad laarr[EXTENT_MERGE_SIZE], 702 struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
703 int *endnum) 703 int *endnum)
704{ 704{
705 unsigned long blocksize = inode->i_sb->s_blocksize; 705 unsigned long blocksize = inode->i_sb->s_blocksize;
@@ -726,7 +726,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset,
726 if (offset) { 726 if (offset) {
727 if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { 727 if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
728 udf_free_blocks(inode->i_sb, inode, 728 udf_free_blocks(inode->i_sb, inode,
729 laarr[curr].extLocation, 729 &laarr[curr].extLocation,
730 0, offset); 730 0, offset);
731 laarr[curr].extLength = 731 laarr[curr].extLength =
732 EXT_NOT_RECORDED_NOT_ALLOCATED | 732 EXT_NOT_RECORDED_NOT_ALLOCATED |
@@ -763,7 +763,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset,
763} 763}
764 764
765static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, 765static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
766 kernel_long_ad laarr[EXTENT_MERGE_SIZE], 766 struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
767 int *endnum) 767 int *endnum)
768{ 768{
769 int start, length = 0, currlength = 0, i; 769 int start, length = 0, currlength = 0, i;
@@ -817,7 +817,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
817 inode->i_sb->s_blocksize_bits); 817 inode->i_sb->s_blocksize_bits);
818 else { 818 else {
819 memmove(&laarr[c + 2], &laarr[c + 1], 819 memmove(&laarr[c + 2], &laarr[c + 1],
820 sizeof(long_ad) * (*endnum - (c + 1))); 820 sizeof(struct long_ad) * (*endnum - (c + 1)));
821 (*endnum)++; 821 (*endnum)++;
822 laarr[c + 1].extLocation.logicalBlockNum = next; 822 laarr[c + 1].extLocation.logicalBlockNum = next;
823 laarr[c + 1].extLocation.partitionReferenceNum = 823 laarr[c + 1].extLocation.partitionReferenceNum =
@@ -846,7 +846,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
846 if (*endnum > (i + 1)) 846 if (*endnum > (i + 1))
847 memmove(&laarr[i], 847 memmove(&laarr[i],
848 &laarr[i + 1], 848 &laarr[i + 1],
849 sizeof(long_ad) * 849 sizeof(struct long_ad) *
850 (*endnum - (i + 1))); 850 (*endnum - (i + 1)));
851 i--; 851 i--;
852 (*endnum)--; 852 (*endnum)--;
@@ -859,7 +859,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
859} 859}
860 860
861static void udf_merge_extents(struct inode *inode, 861static void udf_merge_extents(struct inode *inode,
862 kernel_long_ad laarr[EXTENT_MERGE_SIZE], 862 struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
863 int *endnum) 863 int *endnum)
864{ 864{
865 int i; 865 int i;
@@ -867,8 +867,8 @@ static void udf_merge_extents(struct inode *inode,
867 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; 867 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
868 868
869 for (i = 0; i < (*endnum - 1); i++) { 869 for (i = 0; i < (*endnum - 1); i++) {
870 kernel_long_ad *li /*l[i]*/ = &laarr[i]; 870 struct kernel_long_ad *li /*l[i]*/ = &laarr[i];
871 kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1]; 871 struct kernel_long_ad *lip1 /*l[i plus 1]*/ = &laarr[i + 1];
872 872
873 if (((li->extLength >> 30) == (lip1->extLength >> 30)) && 873 if (((li->extLength >> 30) == (lip1->extLength >> 30)) &&
874 (((li->extLength >> 30) == 874 (((li->extLength >> 30) ==
@@ -902,7 +902,7 @@ static void udf_merge_extents(struct inode *inode,
902 blocksize - 1) & ~(blocksize - 1)); 902 blocksize - 1) & ~(blocksize - 1));
903 if (*endnum > (i + 2)) 903 if (*endnum > (i + 2))
904 memmove(&laarr[i + 1], &laarr[i + 2], 904 memmove(&laarr[i + 1], &laarr[i + 2],
905 sizeof(long_ad) * 905 sizeof(struct long_ad) *
906 (*endnum - (i + 2))); 906 (*endnum - (i + 2)));
907 i--; 907 i--;
908 (*endnum)--; 908 (*endnum)--;
@@ -911,7 +911,7 @@ static void udf_merge_extents(struct inode *inode,
911 (EXT_NOT_RECORDED_ALLOCATED >> 30)) && 911 (EXT_NOT_RECORDED_ALLOCATED >> 30)) &&
912 ((lip1->extLength >> 30) == 912 ((lip1->extLength >> 30) ==
913 (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) { 913 (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) {
914 udf_free_blocks(inode->i_sb, inode, li->extLocation, 0, 914 udf_free_blocks(inode->i_sb, inode, &li->extLocation, 0,
915 ((li->extLength & 915 ((li->extLength &
916 UDF_EXTENT_LENGTH_MASK) + 916 UDF_EXTENT_LENGTH_MASK) +
917 blocksize - 1) >> blocksize_bits); 917 blocksize - 1) >> blocksize_bits);
@@ -937,7 +937,7 @@ static void udf_merge_extents(struct inode *inode,
937 blocksize - 1) & ~(blocksize - 1)); 937 blocksize - 1) & ~(blocksize - 1));
938 if (*endnum > (i + 2)) 938 if (*endnum > (i + 2))
939 memmove(&laarr[i + 1], &laarr[i + 2], 939 memmove(&laarr[i + 1], &laarr[i + 2],
940 sizeof(long_ad) * 940 sizeof(struct long_ad) *
941 (*endnum - (i + 2))); 941 (*endnum - (i + 2)));
942 i--; 942 i--;
943 (*endnum)--; 943 (*endnum)--;
@@ -945,7 +945,7 @@ static void udf_merge_extents(struct inode *inode,
945 } else if ((li->extLength >> 30) == 945 } else if ((li->extLength >> 30) ==
946 (EXT_NOT_RECORDED_ALLOCATED >> 30)) { 946 (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
947 udf_free_blocks(inode->i_sb, inode, 947 udf_free_blocks(inode->i_sb, inode,
948 li->extLocation, 0, 948 &li->extLocation, 0,
949 ((li->extLength & 949 ((li->extLength &
950 UDF_EXTENT_LENGTH_MASK) + 950 UDF_EXTENT_LENGTH_MASK) +
951 blocksize - 1) >> blocksize_bits); 951 blocksize - 1) >> blocksize_bits);
@@ -959,12 +959,12 @@ static void udf_merge_extents(struct inode *inode,
959} 959}
960 960
961static void udf_update_extents(struct inode *inode, 961static void udf_update_extents(struct inode *inode,
962 kernel_long_ad laarr[EXTENT_MERGE_SIZE], 962 struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
963 int startnum, int endnum, 963 int startnum, int endnum,
964 struct extent_position *epos) 964 struct extent_position *epos)
965{ 965{
966 int start = 0, i; 966 int start = 0, i;
967 kernel_lb_addr tmploc; 967 struct kernel_lb_addr tmploc;
968 uint32_t tmplen; 968 uint32_t tmplen;
969 969
970 if (startnum > endnum) { 970 if (startnum > endnum) {
@@ -983,7 +983,7 @@ static void udf_update_extents(struct inode *inode,
983 983
984 for (i = start; i < endnum; i++) { 984 for (i = start; i < endnum; i++) {
985 udf_next_aext(inode, epos, &tmploc, &tmplen, 0); 985 udf_next_aext(inode, epos, &tmploc, &tmplen, 0);
986 udf_write_aext(inode, epos, laarr[i].extLocation, 986 udf_write_aext(inode, epos, &laarr[i].extLocation,
987 laarr[i].extLength, 1); 987 laarr[i].extLength, 1);
988 } 988 }
989} 989}
@@ -1076,7 +1076,7 @@ static void __udf_read_inode(struct inode *inode)
1076 * i_nlink = 1 1076 * i_nlink = 1
1077 * i_op = NULL; 1077 * i_op = NULL;
1078 */ 1078 */
1079 bh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 0, &ident); 1079 bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
1080 if (!bh) { 1080 if (!bh) {
1081 printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", 1081 printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n",
1082 inode->i_ino); 1082 inode->i_ino);
@@ -1098,24 +1098,24 @@ static void __udf_read_inode(struct inode *inode)
1098 if (fe->icbTag.strategyType == cpu_to_le16(4096)) { 1098 if (fe->icbTag.strategyType == cpu_to_le16(4096)) {
1099 struct buffer_head *ibh; 1099 struct buffer_head *ibh;
1100 1100
1101 ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1, 1101 ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1,
1102 &ident); 1102 &ident);
1103 if (ident == TAG_IDENT_IE && ibh) { 1103 if (ident == TAG_IDENT_IE && ibh) {
1104 struct buffer_head *nbh = NULL; 1104 struct buffer_head *nbh = NULL;
1105 kernel_lb_addr loc; 1105 struct kernel_lb_addr loc;
1106 struct indirectEntry *ie; 1106 struct indirectEntry *ie;
1107 1107
1108 ie = (struct indirectEntry *)ibh->b_data; 1108 ie = (struct indirectEntry *)ibh->b_data;
1109 loc = lelb_to_cpu(ie->indirectICB.extLocation); 1109 loc = lelb_to_cpu(ie->indirectICB.extLocation);
1110 1110
1111 if (ie->indirectICB.extLength && 1111 if (ie->indirectICB.extLength &&
1112 (nbh = udf_read_ptagged(inode->i_sb, loc, 0, 1112 (nbh = udf_read_ptagged(inode->i_sb, &loc, 0,
1113 &ident))) { 1113 &ident))) {
1114 if (ident == TAG_IDENT_FE || 1114 if (ident == TAG_IDENT_FE ||
1115 ident == TAG_IDENT_EFE) { 1115 ident == TAG_IDENT_EFE) {
1116 memcpy(&iinfo->i_location, 1116 memcpy(&iinfo->i_location,
1117 &loc, 1117 &loc,
1118 sizeof(kernel_lb_addr)); 1118 sizeof(struct kernel_lb_addr));
1119 brelse(bh); 1119 brelse(bh);
1120 brelse(ibh); 1120 brelse(ibh);
1121 brelse(nbh); 1121 brelse(nbh);
@@ -1222,8 +1222,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1222 inode->i_size = le64_to_cpu(fe->informationLength); 1222 inode->i_size = le64_to_cpu(fe->informationLength);
1223 iinfo->i_lenExtents = inode->i_size; 1223 iinfo->i_lenExtents = inode->i_size;
1224 1224
1225 inode->i_mode = udf_convert_permissions(fe); 1225 if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY &&
1226 inode->i_mode &= ~UDF_SB(inode->i_sb)->s_umask; 1226 sbi->s_fmode != UDF_INVALID_MODE)
1227 inode->i_mode = sbi->s_fmode;
1228 else if (fe->icbTag.fileType == ICBTAG_FILE_TYPE_DIRECTORY &&
1229 sbi->s_dmode != UDF_INVALID_MODE)
1230 inode->i_mode = sbi->s_dmode;
1231 else
1232 inode->i_mode = udf_convert_permissions(fe);
1233 inode->i_mode &= ~sbi->s_umask;
1227 1234
1228 if (iinfo->i_efe == 0) { 1235 if (iinfo->i_efe == 0) {
1229 inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << 1236 inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
@@ -1396,7 +1403,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1396 1403
1397 bh = udf_tread(inode->i_sb, 1404 bh = udf_tread(inode->i_sb,
1398 udf_get_lb_pblock(inode->i_sb, 1405 udf_get_lb_pblock(inode->i_sb,
1399 iinfo->i_location, 0)); 1406 &iinfo->i_location, 0));
1400 if (!bh) { 1407 if (!bh) {
1401 udf_debug("bread failure\n"); 1408 udf_debug("bread failure\n");
1402 return -EIO; 1409 return -EIO;
@@ -1416,13 +1423,13 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1416 iinfo->i_ext.i_data, inode->i_sb->s_blocksize - 1423 iinfo->i_ext.i_data, inode->i_sb->s_blocksize -
1417 sizeof(struct unallocSpaceEntry)); 1424 sizeof(struct unallocSpaceEntry));
1418 crclen = sizeof(struct unallocSpaceEntry) + 1425 crclen = sizeof(struct unallocSpaceEntry) +
1419 iinfo->i_lenAlloc - sizeof(tag); 1426 iinfo->i_lenAlloc - sizeof(struct tag);
1420 use->descTag.tagLocation = cpu_to_le32( 1427 use->descTag.tagLocation = cpu_to_le32(
1421 iinfo->i_location. 1428 iinfo->i_location.
1422 logicalBlockNum); 1429 logicalBlockNum);
1423 use->descTag.descCRCLength = cpu_to_le16(crclen); 1430 use->descTag.descCRCLength = cpu_to_le16(crclen);
1424 use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use + 1431 use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use +
1425 sizeof(tag), 1432 sizeof(struct tag),
1426 crclen)); 1433 crclen));
1427 use->descTag.tagChecksum = udf_tag_checksum(&use->descTag); 1434 use->descTag.tagChecksum = udf_tag_checksum(&use->descTag);
1428 1435
@@ -1459,23 +1466,23 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1459 fe->informationLength = cpu_to_le64(inode->i_size); 1466 fe->informationLength = cpu_to_le64(inode->i_size);
1460 1467
1461 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1468 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
1462 regid *eid; 1469 struct regid *eid;
1463 struct deviceSpec *dsea = 1470 struct deviceSpec *dsea =
1464 (struct deviceSpec *)udf_get_extendedattr(inode, 12, 1); 1471 (struct deviceSpec *)udf_get_extendedattr(inode, 12, 1);
1465 if (!dsea) { 1472 if (!dsea) {
1466 dsea = (struct deviceSpec *) 1473 dsea = (struct deviceSpec *)
1467 udf_add_extendedattr(inode, 1474 udf_add_extendedattr(inode,
1468 sizeof(struct deviceSpec) + 1475 sizeof(struct deviceSpec) +
1469 sizeof(regid), 12, 0x3); 1476 sizeof(struct regid), 12, 0x3);
1470 dsea->attrType = cpu_to_le32(12); 1477 dsea->attrType = cpu_to_le32(12);
1471 dsea->attrSubtype = 1; 1478 dsea->attrSubtype = 1;
1472 dsea->attrLength = cpu_to_le32( 1479 dsea->attrLength = cpu_to_le32(
1473 sizeof(struct deviceSpec) + 1480 sizeof(struct deviceSpec) +
1474 sizeof(regid)); 1481 sizeof(struct regid));
1475 dsea->impUseLength = cpu_to_le32(sizeof(regid)); 1482 dsea->impUseLength = cpu_to_le32(sizeof(struct regid));
1476 } 1483 }
1477 eid = (regid *)dsea->impUse; 1484 eid = (struct regid *)dsea->impUse;
1478 memset(eid, 0, sizeof(regid)); 1485 memset(eid, 0, sizeof(struct regid));
1479 strcpy(eid->ident, UDF_ID_DEVELOPER); 1486 strcpy(eid->ident, UDF_ID_DEVELOPER);
1480 eid->identSuffix[0] = UDF_OS_CLASS_UNIX; 1487 eid->identSuffix[0] = UDF_OS_CLASS_UNIX;
1481 eid->identSuffix[1] = UDF_OS_ID_LINUX; 1488 eid->identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1494,7 +1501,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1494 udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); 1501 udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
1495 udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); 1502 udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
1496 udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime); 1503 udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime);
1497 memset(&(fe->impIdent), 0, sizeof(regid)); 1504 memset(&(fe->impIdent), 0, sizeof(struct regid));
1498 strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); 1505 strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER);
1499 fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; 1506 fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
1500 fe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; 1507 fe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1533,7 +1540,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1533 udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime); 1540 udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime);
1534 udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime); 1541 udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime);
1535 1542
1536 memset(&(efe->impIdent), 0, sizeof(regid)); 1543 memset(&(efe->impIdent), 0, sizeof(struct regid));
1537 strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); 1544 strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
1538 efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; 1545 efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
1539 efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; 1546 efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1584,9 +1591,9 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1584 fe->descTag.tagLocation = cpu_to_le32( 1591 fe->descTag.tagLocation = cpu_to_le32(
1585 iinfo->i_location.logicalBlockNum); 1592 iinfo->i_location.logicalBlockNum);
1586 crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc - 1593 crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc -
1587 sizeof(tag); 1594 sizeof(struct tag);
1588 fe->descTag.descCRCLength = cpu_to_le16(crclen); 1595 fe->descTag.descCRCLength = cpu_to_le16(crclen);
1589 fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(tag), 1596 fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(struct tag),
1590 crclen)); 1597 crclen));
1591 fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag); 1598 fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag);
1592 1599
@@ -1606,7 +1613,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1606 return err; 1613 return err;
1607} 1614}
1608 1615
1609struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) 1616struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino)
1610{ 1617{
1611 unsigned long block = udf_get_lb_pblock(sb, ino, 0); 1618 unsigned long block = udf_get_lb_pblock(sb, ino, 0);
1612 struct inode *inode = iget_locked(sb, block); 1619 struct inode *inode = iget_locked(sb, block);
@@ -1615,7 +1622,7 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
1615 return NULL; 1622 return NULL;
1616 1623
1617 if (inode->i_state & I_NEW) { 1624 if (inode->i_state & I_NEW) {
1618 memcpy(&UDF_I(inode)->i_location, &ino, sizeof(kernel_lb_addr)); 1625 memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
1619 __udf_read_inode(inode); 1626 __udf_read_inode(inode);
1620 unlock_new_inode(inode); 1627 unlock_new_inode(inode);
1621 } 1628 }
@@ -1623,10 +1630,10 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
1623 if (is_bad_inode(inode)) 1630 if (is_bad_inode(inode))
1624 goto out_iput; 1631 goto out_iput;
1625 1632
1626 if (ino.logicalBlockNum >= UDF_SB(sb)-> 1633 if (ino->logicalBlockNum >= UDF_SB(sb)->
1627 s_partmaps[ino.partitionReferenceNum].s_partition_len) { 1634 s_partmaps[ino->partitionReferenceNum].s_partition_len) {
1628 udf_debug("block=%d, partition=%d out of range\n", 1635 udf_debug("block=%d, partition=%d out of range\n",
1629 ino.logicalBlockNum, ino.partitionReferenceNum); 1636 ino->logicalBlockNum, ino->partitionReferenceNum);
1630 make_bad_inode(inode); 1637 make_bad_inode(inode);
1631 goto out_iput; 1638 goto out_iput;
1632 } 1639 }
@@ -1639,11 +1646,11 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
1639} 1646}
1640 1647
1641int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, 1648int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1642 kernel_lb_addr eloc, uint32_t elen, int inc) 1649 struct kernel_lb_addr *eloc, uint32_t elen, int inc)
1643{ 1650{
1644 int adsize; 1651 int adsize;
1645 short_ad *sad = NULL; 1652 struct short_ad *sad = NULL;
1646 long_ad *lad = NULL; 1653 struct long_ad *lad = NULL;
1647 struct allocExtDesc *aed; 1654 struct allocExtDesc *aed;
1648 int8_t etype; 1655 int8_t etype;
1649 uint8_t *ptr; 1656 uint8_t *ptr;
@@ -1657,9 +1664,9 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1657 ptr = epos->bh->b_data + epos->offset; 1664 ptr = epos->bh->b_data + epos->offset;
1658 1665
1659 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 1666 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
1660 adsize = sizeof(short_ad); 1667 adsize = sizeof(struct short_ad);
1661 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 1668 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
1662 adsize = sizeof(long_ad); 1669 adsize = sizeof(struct long_ad);
1663 else 1670 else
1664 return -1; 1671 return -1;
1665 1672
@@ -1667,7 +1674,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1667 char *sptr, *dptr; 1674 char *sptr, *dptr;
1668 struct buffer_head *nbh; 1675 struct buffer_head *nbh;
1669 int err, loffset; 1676 int err, loffset;
1670 kernel_lb_addr obloc = epos->block; 1677 struct kernel_lb_addr obloc = epos->block;
1671 1678
1672 epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL, 1679 epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL,
1673 obloc.partitionReferenceNum, 1680 obloc.partitionReferenceNum,
@@ -1675,7 +1682,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1675 if (!epos->block.logicalBlockNum) 1682 if (!epos->block.logicalBlockNum)
1676 return -1; 1683 return -1;
1677 nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, 1684 nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb,
1678 epos->block, 1685 &epos->block,
1679 0)); 1686 0));
1680 if (!nbh) 1687 if (!nbh)
1681 return -1; 1688 return -1;
@@ -1712,20 +1719,20 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1712 } 1719 }
1713 if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200) 1720 if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200)
1714 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, 1721 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1,
1715 epos->block.logicalBlockNum, sizeof(tag)); 1722 epos->block.logicalBlockNum, sizeof(struct tag));
1716 else 1723 else
1717 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, 1724 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1,
1718 epos->block.logicalBlockNum, sizeof(tag)); 1725 epos->block.logicalBlockNum, sizeof(struct tag));
1719 switch (iinfo->i_alloc_type) { 1726 switch (iinfo->i_alloc_type) {
1720 case ICBTAG_FLAG_AD_SHORT: 1727 case ICBTAG_FLAG_AD_SHORT:
1721 sad = (short_ad *)sptr; 1728 sad = (struct short_ad *)sptr;
1722 sad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | 1729 sad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS |
1723 inode->i_sb->s_blocksize); 1730 inode->i_sb->s_blocksize);
1724 sad->extPosition = 1731 sad->extPosition =
1725 cpu_to_le32(epos->block.logicalBlockNum); 1732 cpu_to_le32(epos->block.logicalBlockNum);
1726 break; 1733 break;
1727 case ICBTAG_FLAG_AD_LONG: 1734 case ICBTAG_FLAG_AD_LONG:
1728 lad = (long_ad *)sptr; 1735 lad = (struct long_ad *)sptr;
1729 lad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | 1736 lad->extLength = cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS |
1730 inode->i_sb->s_blocksize); 1737 inode->i_sb->s_blocksize);
1731 lad->extLocation = cpu_to_lelb(epos->block); 1738 lad->extLocation = cpu_to_lelb(epos->block);
@@ -1769,12 +1776,12 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1769} 1776}
1770 1777
1771int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, 1778int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
1772 kernel_lb_addr eloc, uint32_t elen, int inc) 1779 struct kernel_lb_addr *eloc, uint32_t elen, int inc)
1773{ 1780{
1774 int adsize; 1781 int adsize;
1775 uint8_t *ptr; 1782 uint8_t *ptr;
1776 short_ad *sad; 1783 struct short_ad *sad;
1777 long_ad *lad; 1784 struct long_ad *lad;
1778 struct udf_inode_info *iinfo = UDF_I(inode); 1785 struct udf_inode_info *iinfo = UDF_I(inode);
1779 1786
1780 if (!epos->bh) 1787 if (!epos->bh)
@@ -1786,17 +1793,17 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
1786 1793
1787 switch (iinfo->i_alloc_type) { 1794 switch (iinfo->i_alloc_type) {
1788 case ICBTAG_FLAG_AD_SHORT: 1795 case ICBTAG_FLAG_AD_SHORT:
1789 sad = (short_ad *)ptr; 1796 sad = (struct short_ad *)ptr;
1790 sad->extLength = cpu_to_le32(elen); 1797 sad->extLength = cpu_to_le32(elen);
1791 sad->extPosition = cpu_to_le32(eloc.logicalBlockNum); 1798 sad->extPosition = cpu_to_le32(eloc->logicalBlockNum);
1792 adsize = sizeof(short_ad); 1799 adsize = sizeof(struct short_ad);
1793 break; 1800 break;
1794 case ICBTAG_FLAG_AD_LONG: 1801 case ICBTAG_FLAG_AD_LONG:
1795 lad = (long_ad *)ptr; 1802 lad = (struct long_ad *)ptr;
1796 lad->extLength = cpu_to_le32(elen); 1803 lad->extLength = cpu_to_le32(elen);
1797 lad->extLocation = cpu_to_lelb(eloc); 1804 lad->extLocation = cpu_to_lelb(*eloc);
1798 memset(lad->impUse, 0x00, sizeof(lad->impUse)); 1805 memset(lad->impUse, 0x00, sizeof(lad->impUse));
1799 adsize = sizeof(long_ad); 1806 adsize = sizeof(struct long_ad);
1800 break; 1807 break;
1801 default: 1808 default:
1802 return -1; 1809 return -1;
@@ -1823,7 +1830,7 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
1823} 1830}
1824 1831
1825int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, 1832int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
1826 kernel_lb_addr *eloc, uint32_t *elen, int inc) 1833 struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
1827{ 1834{
1828 int8_t etype; 1835 int8_t etype;
1829 1836
@@ -1833,7 +1840,7 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
1833 epos->block = *eloc; 1840 epos->block = *eloc;
1834 epos->offset = sizeof(struct allocExtDesc); 1841 epos->offset = sizeof(struct allocExtDesc);
1835 brelse(epos->bh); 1842 brelse(epos->bh);
1836 block = udf_get_lb_pblock(inode->i_sb, epos->block, 0); 1843 block = udf_get_lb_pblock(inode->i_sb, &epos->block, 0);
1837 epos->bh = udf_tread(inode->i_sb, block); 1844 epos->bh = udf_tread(inode->i_sb, block);
1838 if (!epos->bh) { 1845 if (!epos->bh) {
1839 udf_debug("reading block %d failed!\n", block); 1846 udf_debug("reading block %d failed!\n", block);
@@ -1845,13 +1852,13 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
1845} 1852}
1846 1853
1847int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, 1854int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
1848 kernel_lb_addr *eloc, uint32_t *elen, int inc) 1855 struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
1849{ 1856{
1850 int alen; 1857 int alen;
1851 int8_t etype; 1858 int8_t etype;
1852 uint8_t *ptr; 1859 uint8_t *ptr;
1853 short_ad *sad; 1860 struct short_ad *sad;
1854 long_ad *lad; 1861 struct long_ad *lad;
1855 struct udf_inode_info *iinfo = UDF_I(inode); 1862 struct udf_inode_info *iinfo = UDF_I(inode);
1856 1863
1857 if (!epos->bh) { 1864 if (!epos->bh) {
@@ -1900,9 +1907,9 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
1900} 1907}
1901 1908
1902static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos, 1909static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
1903 kernel_lb_addr neloc, uint32_t nelen) 1910 struct kernel_lb_addr neloc, uint32_t nelen)
1904{ 1911{
1905 kernel_lb_addr oeloc; 1912 struct kernel_lb_addr oeloc;
1906 uint32_t oelen; 1913 uint32_t oelen;
1907 int8_t etype; 1914 int8_t etype;
1908 1915
@@ -1910,18 +1917,18 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
1910 get_bh(epos.bh); 1917 get_bh(epos.bh);
1911 1918
1912 while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) { 1919 while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) {
1913 udf_write_aext(inode, &epos, neloc, nelen, 1); 1920 udf_write_aext(inode, &epos, &neloc, nelen, 1);
1914 neloc = oeloc; 1921 neloc = oeloc;
1915 nelen = (etype << 30) | oelen; 1922 nelen = (etype << 30) | oelen;
1916 } 1923 }
1917 udf_add_aext(inode, &epos, neloc, nelen, 1); 1924 udf_add_aext(inode, &epos, &neloc, nelen, 1);
1918 brelse(epos.bh); 1925 brelse(epos.bh);
1919 1926
1920 return (nelen >> 30); 1927 return (nelen >> 30);
1921} 1928}
1922 1929
1923int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, 1930int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
1924 kernel_lb_addr eloc, uint32_t elen) 1931 struct kernel_lb_addr eloc, uint32_t elen)
1925{ 1932{
1926 struct extent_position oepos; 1933 struct extent_position oepos;
1927 int adsize; 1934 int adsize;
@@ -1936,9 +1943,9 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
1936 1943
1937 iinfo = UDF_I(inode); 1944 iinfo = UDF_I(inode);
1938 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 1945 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
1939 adsize = sizeof(short_ad); 1946 adsize = sizeof(struct short_ad);
1940 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 1947 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
1941 adsize = sizeof(long_ad); 1948 adsize = sizeof(struct long_ad);
1942 else 1949 else
1943 adsize = 0; 1950 adsize = 0;
1944 1951
@@ -1947,7 +1954,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
1947 return -1; 1954 return -1;
1948 1955
1949 while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { 1956 while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
1950 udf_write_aext(inode, &oepos, eloc, (etype << 30) | elen, 1); 1957 udf_write_aext(inode, &oepos, &eloc, (etype << 30) | elen, 1);
1951 if (oepos.bh != epos.bh) { 1958 if (oepos.bh != epos.bh) {
1952 oepos.block = epos.block; 1959 oepos.block = epos.block;
1953 brelse(oepos.bh); 1960 brelse(oepos.bh);
@@ -1956,13 +1963,13 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
1956 oepos.offset = epos.offset - adsize; 1963 oepos.offset = epos.offset - adsize;
1957 } 1964 }
1958 } 1965 }
1959 memset(&eloc, 0x00, sizeof(kernel_lb_addr)); 1966 memset(&eloc, 0x00, sizeof(struct kernel_lb_addr));
1960 elen = 0; 1967 elen = 0;
1961 1968
1962 if (epos.bh != oepos.bh) { 1969 if (epos.bh != oepos.bh) {
1963 udf_free_blocks(inode->i_sb, inode, epos.block, 0, 1); 1970 udf_free_blocks(inode->i_sb, inode, &epos.block, 0, 1);
1964 udf_write_aext(inode, &oepos, eloc, elen, 1); 1971 udf_write_aext(inode, &oepos, &eloc, elen, 1);
1965 udf_write_aext(inode, &oepos, eloc, elen, 1); 1972 udf_write_aext(inode, &oepos, &eloc, elen, 1);
1966 if (!oepos.bh) { 1973 if (!oepos.bh) {
1967 iinfo->i_lenAlloc -= (adsize * 2); 1974 iinfo->i_lenAlloc -= (adsize * 2);
1968 mark_inode_dirty(inode); 1975 mark_inode_dirty(inode);
@@ -1979,7 +1986,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
1979 mark_buffer_dirty_inode(oepos.bh, inode); 1986 mark_buffer_dirty_inode(oepos.bh, inode);
1980 } 1987 }
1981 } else { 1988 } else {
1982 udf_write_aext(inode, &oepos, eloc, elen, 1); 1989 udf_write_aext(inode, &oepos, &eloc, elen, 1);
1983 if (!oepos.bh) { 1990 if (!oepos.bh) {
1984 iinfo->i_lenAlloc -= adsize; 1991 iinfo->i_lenAlloc -= adsize;
1985 mark_inode_dirty(inode); 1992 mark_inode_dirty(inode);
@@ -2004,7 +2011,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
2004} 2011}
2005 2012
2006int8_t inode_bmap(struct inode *inode, sector_t block, 2013int8_t inode_bmap(struct inode *inode, sector_t block,
2007 struct extent_position *pos, kernel_lb_addr *eloc, 2014 struct extent_position *pos, struct kernel_lb_addr *eloc,
2008 uint32_t *elen, sector_t *offset) 2015 uint32_t *elen, sector_t *offset)
2009{ 2016{
2010 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; 2017 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
@@ -2036,7 +2043,7 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
2036 2043
2037long udf_block_map(struct inode *inode, sector_t block) 2044long udf_block_map(struct inode *inode, sector_t block)
2038{ 2045{
2039 kernel_lb_addr eloc; 2046 struct kernel_lb_addr eloc;
2040 uint32_t elen; 2047 uint32_t elen;
2041 sector_t offset; 2048 sector_t offset;
2042 struct extent_position epos = {}; 2049 struct extent_position epos = {};
@@ -2046,7 +2053,7 @@ long udf_block_map(struct inode *inode, sector_t block)
2046 2053
2047 if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == 2054 if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) ==
2048 (EXT_RECORDED_ALLOCATED >> 30)) 2055 (EXT_RECORDED_ALLOCATED >> 30))
2049 ret = udf_get_lb_pblock(inode->i_sb, eloc, offset); 2056 ret = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
2050 else 2057 else
2051 ret = 0; 2058 ret = 0;
2052 2059
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 84bf0fd4a4f1..9215700c00a4 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -134,10 +134,10 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
134 } 134 }
135 } 135 }
136 /* rewrite CRC + checksum of eahd */ 136 /* rewrite CRC + checksum of eahd */
137 crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag); 137 crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(struct tag);
138 eahd->descTag.descCRCLength = cpu_to_le16(crclen); 138 eahd->descTag.descCRCLength = cpu_to_le16(crclen);
139 eahd->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)eahd + 139 eahd->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)eahd +
140 sizeof(tag), crclen)); 140 sizeof(struct tag), crclen));
141 eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag); 141 eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag);
142 iinfo->i_lenEAttr += size; 142 iinfo->i_lenEAttr += size;
143 return (struct genericFormat *)&ea[offset]; 143 return (struct genericFormat *)&ea[offset];
@@ -202,7 +202,7 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
202struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, 202struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
203 uint32_t location, uint16_t *ident) 203 uint32_t location, uint16_t *ident)
204{ 204{
205 tag *tag_p; 205 struct tag *tag_p;
206 struct buffer_head *bh = NULL; 206 struct buffer_head *bh = NULL;
207 207
208 /* Read the block */ 208 /* Read the block */
@@ -216,7 +216,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
216 return NULL; 216 return NULL;
217 } 217 }
218 218
219 tag_p = (tag *)(bh->b_data); 219 tag_p = (struct tag *)(bh->b_data);
220 220
221 *ident = le16_to_cpu(tag_p->tagIdent); 221 *ident = le16_to_cpu(tag_p->tagIdent);
222 222
@@ -241,9 +241,9 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
241 } 241 }
242 242
243 /* Verify the descriptor CRC */ 243 /* Verify the descriptor CRC */
244 if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize || 244 if (le16_to_cpu(tag_p->descCRCLength) + sizeof(struct tag) > sb->s_blocksize ||
245 le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, 245 le16_to_cpu(tag_p->descCRC) == crc_itu_t(0,
246 bh->b_data + sizeof(tag), 246 bh->b_data + sizeof(struct tag),
247 le16_to_cpu(tag_p->descCRCLength))) 247 le16_to_cpu(tag_p->descCRCLength)))
248 return bh; 248 return bh;
249 249
@@ -255,27 +255,28 @@ error_out:
255 return NULL; 255 return NULL;
256} 256}
257 257
258struct buffer_head *udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc, 258struct buffer_head *udf_read_ptagged(struct super_block *sb,
259 struct kernel_lb_addr *loc,
259 uint32_t offset, uint16_t *ident) 260 uint32_t offset, uint16_t *ident)
260{ 261{
261 return udf_read_tagged(sb, udf_get_lb_pblock(sb, loc, offset), 262 return udf_read_tagged(sb, udf_get_lb_pblock(sb, loc, offset),
262 loc.logicalBlockNum + offset, ident); 263 loc->logicalBlockNum + offset, ident);
263} 264}
264 265
265void udf_update_tag(char *data, int length) 266void udf_update_tag(char *data, int length)
266{ 267{
267 tag *tptr = (tag *)data; 268 struct tag *tptr = (struct tag *)data;
268 length -= sizeof(tag); 269 length -= sizeof(struct tag);
269 270
270 tptr->descCRCLength = cpu_to_le16(length); 271 tptr->descCRCLength = cpu_to_le16(length);
271 tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(tag), length)); 272 tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(struct tag), length));
272 tptr->tagChecksum = udf_tag_checksum(tptr); 273 tptr->tagChecksum = udf_tag_checksum(tptr);
273} 274}
274 275
275void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum, 276void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum,
276 uint32_t loc, int length) 277 uint32_t loc, int length)
277{ 278{
278 tag *tptr = (tag *)data; 279 struct tag *tptr = (struct tag *)data;
279 tptr->tagIdent = cpu_to_le16(ident); 280 tptr->tagIdent = cpu_to_le16(ident);
280 tptr->descVersion = cpu_to_le16(version); 281 tptr->descVersion = cpu_to_le16(version);
281 tptr->tagSerialNum = cpu_to_le16(snum); 282 tptr->tagSerialNum = cpu_to_le16(snum);
@@ -283,12 +284,12 @@ void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum,
283 udf_update_tag(data, length); 284 udf_update_tag(data, length);
284} 285}
285 286
286u8 udf_tag_checksum(const tag *t) 287u8 udf_tag_checksum(const struct tag *t)
287{ 288{
288 u8 *data = (u8 *)t; 289 u8 *data = (u8 *)t;
289 u8 checksum = 0; 290 u8 checksum = 0;
290 int i; 291 int i;
291 for (i = 0; i < sizeof(tag); ++i) 292 for (i = 0; i < sizeof(struct tag); ++i)
292 if (i != 4) /* position of checksum */ 293 if (i != 4) /* position of checksum */
293 checksum += data[i]; 294 checksum += data[i];
294 return checksum; 295 return checksum;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f84bfaa8d941..6a29fa34c478 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -47,7 +47,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
47 struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh, 47 struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh,
48 uint8_t *impuse, uint8_t *fileident) 48 uint8_t *impuse, uint8_t *fileident)
49{ 49{
50 uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(tag); 50 uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(struct tag);
51 uint16_t crc; 51 uint16_t crc;
52 int offset; 52 int offset;
53 uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse); 53 uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse);
@@ -99,18 +99,18 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
99 memset(fibh->ebh->b_data, 0x00, padlen + offset); 99 memset(fibh->ebh->b_data, 0x00, padlen + offset);
100 } 100 }
101 101
102 crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(tag), 102 crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(struct tag),
103 sizeof(struct fileIdentDesc) - sizeof(tag)); 103 sizeof(struct fileIdentDesc) - sizeof(struct tag));
104 104
105 if (fibh->sbh == fibh->ebh) { 105 if (fibh->sbh == fibh->ebh) {
106 crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, 106 crc = crc_itu_t(crc, (uint8_t *)sfi->impUse,
107 crclen + sizeof(tag) - 107 crclen + sizeof(struct tag) -
108 sizeof(struct fileIdentDesc)); 108 sizeof(struct fileIdentDesc));
109 } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) { 109 } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) {
110 crc = crc_itu_t(crc, fibh->ebh->b_data + 110 crc = crc_itu_t(crc, fibh->ebh->b_data +
111 sizeof(struct fileIdentDesc) + 111 sizeof(struct fileIdentDesc) +
112 fibh->soffset, 112 fibh->soffset,
113 crclen + sizeof(tag) - 113 crclen + sizeof(struct tag) -
114 sizeof(struct fileIdentDesc)); 114 sizeof(struct fileIdentDesc));
115 } else { 115 } else {
116 crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, 116 crc = crc_itu_t(crc, (uint8_t *)sfi->impUse,
@@ -154,7 +154,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
154 uint8_t lfi; 154 uint8_t lfi;
155 uint16_t liu; 155 uint16_t liu;
156 loff_t size; 156 loff_t size;
157 kernel_lb_addr eloc; 157 struct kernel_lb_addr eloc;
158 uint32_t elen; 158 uint32_t elen;
159 sector_t offset; 159 sector_t offset;
160 struct extent_position epos = {}; 160 struct extent_position epos = {};
@@ -171,12 +171,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
171 if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, 171 if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
172 &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) 172 &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30))
173 goto out_err; 173 goto out_err;
174 block = udf_get_lb_pblock(dir->i_sb, eloc, offset); 174 block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
175 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { 175 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
176 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 176 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
177 epos.offset -= sizeof(short_ad); 177 epos.offset -= sizeof(struct short_ad);
178 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 178 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
179 epos.offset -= sizeof(long_ad); 179 epos.offset -= sizeof(struct long_ad);
180 } else 180 } else
181 offset = 0; 181 offset = 0;
182 182
@@ -268,7 +268,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
268#ifdef UDF_RECOVERY 268#ifdef UDF_RECOVERY
269 /* temporary shorthand for specifying files by inode number */ 269 /* temporary shorthand for specifying files by inode number */
270 if (!strncmp(dentry->d_name.name, ".B=", 3)) { 270 if (!strncmp(dentry->d_name.name, ".B=", 3)) {
271 kernel_lb_addr lb = { 271 struct kernel_lb_addr lb = {
272 .logicalBlockNum = 0, 272 .logicalBlockNum = 0,
273 .partitionReferenceNum = 273 .partitionReferenceNum =
274 simple_strtoul(dentry->d_name.name + 3, 274 simple_strtoul(dentry->d_name.name + 3,
@@ -283,11 +283,14 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
283#endif /* UDF_RECOVERY */ 283#endif /* UDF_RECOVERY */
284 284
285 if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) { 285 if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) {
286 struct kernel_lb_addr loc;
287
286 if (fibh.sbh != fibh.ebh) 288 if (fibh.sbh != fibh.ebh)
287 brelse(fibh.ebh); 289 brelse(fibh.ebh);
288 brelse(fibh.sbh); 290 brelse(fibh.sbh);
289 291
290 inode = udf_iget(dir->i_sb, lelb_to_cpu(cfi.icb.extLocation)); 292 loc = lelb_to_cpu(cfi.icb.extLocation);
293 inode = udf_iget(dir->i_sb, &loc);
291 if (!inode) { 294 if (!inode) {
292 unlock_kernel(); 295 unlock_kernel();
293 return ERR_PTR(-EACCES); 296 return ERR_PTR(-EACCES);
@@ -313,7 +316,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
313 uint8_t lfi; 316 uint8_t lfi;
314 uint16_t liu; 317 uint16_t liu;
315 int block; 318 int block;
316 kernel_lb_addr eloc; 319 struct kernel_lb_addr eloc;
317 uint32_t elen = 0; 320 uint32_t elen = 0;
318 sector_t offset; 321 sector_t offset;
319 struct extent_position epos = {}; 322 struct extent_position epos = {};
@@ -351,16 +354,16 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
351 if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, 354 if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
352 &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) { 355 &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) {
353 block = udf_get_lb_pblock(dir->i_sb, 356 block = udf_get_lb_pblock(dir->i_sb,
354 dinfo->i_location, 0); 357 &dinfo->i_location, 0);
355 fibh->soffset = fibh->eoffset = sb->s_blocksize; 358 fibh->soffset = fibh->eoffset = sb->s_blocksize;
356 goto add; 359 goto add;
357 } 360 }
358 block = udf_get_lb_pblock(dir->i_sb, eloc, offset); 361 block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
359 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { 362 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
360 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 363 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
361 epos.offset -= sizeof(short_ad); 364 epos.offset -= sizeof(struct short_ad);
362 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 365 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
363 epos.offset -= sizeof(long_ad); 366 epos.offset -= sizeof(struct long_ad);
364 } else 367 } else
365 offset = 0; 368 offset = 0;
366 369
@@ -409,10 +412,10 @@ add:
409 if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) { 412 if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) {
410 elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); 413 elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
411 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 414 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
412 epos.offset -= sizeof(short_ad); 415 epos.offset -= sizeof(struct short_ad);
413 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 416 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
414 epos.offset -= sizeof(long_ad); 417 epos.offset -= sizeof(struct long_ad);
415 udf_write_aext(dir, &epos, eloc, elen, 1); 418 udf_write_aext(dir, &epos, &eloc, elen, 1);
416 } 419 }
417 f_pos += nfidlen; 420 f_pos += nfidlen;
418 421
@@ -494,10 +497,10 @@ add:
494 memset(cfi, 0, sizeof(struct fileIdentDesc)); 497 memset(cfi, 0, sizeof(struct fileIdentDesc));
495 if (UDF_SB(sb)->s_udfrev >= 0x0200) 498 if (UDF_SB(sb)->s_udfrev >= 0x0200)
496 udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block, 499 udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block,
497 sizeof(tag)); 500 sizeof(struct tag));
498 else 501 else
499 udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block, 502 udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block,
500 sizeof(tag)); 503 sizeof(struct tag));
501 cfi->fileVersionNum = cpu_to_le16(1); 504 cfi->fileVersionNum = cpu_to_le16(1);
502 cfi->lengthFileIdent = namelen; 505 cfi->lengthFileIdent = namelen;
503 cfi->lengthOfImpUse = cpu_to_le16(0); 506 cfi->lengthOfImpUse = cpu_to_le16(0);
@@ -530,7 +533,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
530 cfi->fileCharacteristics |= FID_FILE_CHAR_DELETED; 533 cfi->fileCharacteristics |= FID_FILE_CHAR_DELETED;
531 534
532 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT)) 535 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT))
533 memset(&(cfi->icb), 0x00, sizeof(long_ad)); 536 memset(&(cfi->icb), 0x00, sizeof(struct long_ad));
534 537
535 return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); 538 return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
536} 539}
@@ -710,7 +713,7 @@ static int empty_dir(struct inode *dir)
710 loff_t f_pos; 713 loff_t f_pos;
711 loff_t size = udf_ext0_offset(dir) + dir->i_size; 714 loff_t size = udf_ext0_offset(dir) + dir->i_size;
712 int block; 715 int block;
713 kernel_lb_addr eloc; 716 struct kernel_lb_addr eloc;
714 uint32_t elen; 717 uint32_t elen;
715 sector_t offset; 718 sector_t offset;
716 struct extent_position epos = {}; 719 struct extent_position epos = {};
@@ -724,12 +727,12 @@ static int empty_dir(struct inode *dir)
724 else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, 727 else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits,
725 &epos, &eloc, &elen, &offset) == 728 &epos, &eloc, &elen, &offset) ==
726 (EXT_RECORDED_ALLOCATED >> 30)) { 729 (EXT_RECORDED_ALLOCATED >> 30)) {
727 block = udf_get_lb_pblock(dir->i_sb, eloc, offset); 730 block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
728 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { 731 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
729 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 732 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
730 epos.offset -= sizeof(short_ad); 733 epos.offset -= sizeof(struct short_ad);
731 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 734 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
732 epos.offset -= sizeof(long_ad); 735 epos.offset -= sizeof(struct long_ad);
733 } else 736 } else
734 offset = 0; 737 offset = 0;
735 738
@@ -778,7 +781,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
778 struct inode *inode = dentry->d_inode; 781 struct inode *inode = dentry->d_inode;
779 struct udf_fileident_bh fibh; 782 struct udf_fileident_bh fibh;
780 struct fileIdentDesc *fi, cfi; 783 struct fileIdentDesc *fi, cfi;
781 kernel_lb_addr tloc; 784 struct kernel_lb_addr tloc;
782 785
783 retval = -ENOENT; 786 retval = -ENOENT;
784 lock_kernel(); 787 lock_kernel();
@@ -788,7 +791,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
788 791
789 retval = -EIO; 792 retval = -EIO;
790 tloc = lelb_to_cpu(cfi.icb.extLocation); 793 tloc = lelb_to_cpu(cfi.icb.extLocation);
791 if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino) 794 if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino)
792 goto end_rmdir; 795 goto end_rmdir;
793 retval = -ENOTEMPTY; 796 retval = -ENOTEMPTY;
794 if (!empty_dir(inode)) 797 if (!empty_dir(inode))
@@ -824,7 +827,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
824 struct udf_fileident_bh fibh; 827 struct udf_fileident_bh fibh;
825 struct fileIdentDesc *fi; 828 struct fileIdentDesc *fi;
826 struct fileIdentDesc cfi; 829 struct fileIdentDesc cfi;
827 kernel_lb_addr tloc; 830 struct kernel_lb_addr tloc;
828 831
829 retval = -ENOENT; 832 retval = -ENOENT;
830 lock_kernel(); 833 lock_kernel();
@@ -834,7 +837,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
834 837
835 retval = -EIO; 838 retval = -EIO;
836 tloc = lelb_to_cpu(cfi.icb.extLocation); 839 tloc = lelb_to_cpu(cfi.icb.extLocation);
837 if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino) 840 if (udf_get_lb_pblock(dir->i_sb, &tloc, 0) != inode->i_ino)
838 goto end_unlink; 841 goto end_unlink;
839 842
840 if (!inode->i_nlink) { 843 if (!inode->i_nlink) {
@@ -897,7 +900,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
897 inode->i_op = &page_symlink_inode_operations; 900 inode->i_op = &page_symlink_inode_operations;
898 901
899 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { 902 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
900 kernel_lb_addr eloc; 903 struct kernel_lb_addr eloc;
901 uint32_t bsize; 904 uint32_t bsize;
902 905
903 block = udf_new_block(inode->i_sb, inode, 906 block = udf_new_block(inode->i_sb, inode,
@@ -913,7 +916,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
913 iinfo->i_location.partitionReferenceNum; 916 iinfo->i_location.partitionReferenceNum;
914 bsize = inode->i_sb->s_blocksize; 917 bsize = inode->i_sb->s_blocksize;
915 iinfo->i_lenExtents = bsize; 918 iinfo->i_lenExtents = bsize;
916 udf_add_aext(inode, &epos, eloc, bsize, 0); 919 udf_add_aext(inode, &epos, &eloc, bsize, 0);
917 brelse(epos.bh); 920 brelse(epos.bh);
918 921
919 block = udf_get_pblock(inode->i_sb, block, 922 block = udf_get_pblock(inode->i_sb, block,
@@ -1108,7 +1111,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1108 struct fileIdentDesc ocfi, ncfi; 1111 struct fileIdentDesc ocfi, ncfi;
1109 struct buffer_head *dir_bh = NULL; 1112 struct buffer_head *dir_bh = NULL;
1110 int retval = -ENOENT; 1113 int retval = -ENOENT;
1111 kernel_lb_addr tloc; 1114 struct kernel_lb_addr tloc;
1112 struct udf_inode_info *old_iinfo = UDF_I(old_inode); 1115 struct udf_inode_info *old_iinfo = UDF_I(old_inode);
1113 1116
1114 lock_kernel(); 1117 lock_kernel();
@@ -1119,7 +1122,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1119 brelse(ofibh.sbh); 1122 brelse(ofibh.sbh);
1120 } 1123 }
1121 tloc = lelb_to_cpu(ocfi.icb.extLocation); 1124 tloc = lelb_to_cpu(ocfi.icb.extLocation);
1122 if (!ofi || udf_get_lb_pblock(old_dir->i_sb, tloc, 0) 1125 if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0)
1123 != old_inode->i_ino) 1126 != old_inode->i_ino)
1124 goto end_rename; 1127 goto end_rename;
1125 1128
@@ -1158,7 +1161,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1158 if (!dir_fi) 1161 if (!dir_fi)
1159 goto end_rename; 1162 goto end_rename;
1160 tloc = lelb_to_cpu(dir_fi->icb.extLocation); 1163 tloc = lelb_to_cpu(dir_fi->icb.extLocation);
1161 if (udf_get_lb_pblock(old_inode->i_sb, tloc, 0) != 1164 if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
1162 old_dir->i_ino) 1165 old_dir->i_ino)
1163 goto end_rename; 1166 goto end_rename;
1164 1167
@@ -1187,7 +1190,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1187 */ 1190 */
1188 ncfi.fileVersionNum = ocfi.fileVersionNum; 1191 ncfi.fileVersionNum = ocfi.fileVersionNum;
1189 ncfi.fileCharacteristics = ocfi.fileCharacteristics; 1192 ncfi.fileCharacteristics = ocfi.fileCharacteristics;
1190 memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(long_ad)); 1193 memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(struct long_ad));
1191 udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL); 1194 udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL);
1192 1195
1193 /* The old fid may have moved - find it again */ 1196 /* The old fid may have moved - find it again */
@@ -1242,6 +1245,7 @@ end_rename:
1242 1245
1243static struct dentry *udf_get_parent(struct dentry *child) 1246static struct dentry *udf_get_parent(struct dentry *child)
1244{ 1247{
1248 struct kernel_lb_addr tloc;
1245 struct inode *inode = NULL; 1249 struct inode *inode = NULL;
1246 struct qstr dotdot = {.name = "..", .len = 2}; 1250 struct qstr dotdot = {.name = "..", .len = 2};
1247 struct fileIdentDesc cfi; 1251 struct fileIdentDesc cfi;
@@ -1255,8 +1259,8 @@ static struct dentry *udf_get_parent(struct dentry *child)
1255 brelse(fibh.ebh); 1259 brelse(fibh.ebh);
1256 brelse(fibh.sbh); 1260 brelse(fibh.sbh);
1257 1261
1258 inode = udf_iget(child->d_inode->i_sb, 1262 tloc = lelb_to_cpu(cfi.icb.extLocation);
1259 lelb_to_cpu(cfi.icb.extLocation)); 1263 inode = udf_iget(child->d_inode->i_sb, &tloc);
1260 if (!inode) 1264 if (!inode)
1261 goto out_unlock; 1265 goto out_unlock;
1262 unlock_kernel(); 1266 unlock_kernel();
@@ -1272,14 +1276,14 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
1272 u16 partref, __u32 generation) 1276 u16 partref, __u32 generation)
1273{ 1277{
1274 struct inode *inode; 1278 struct inode *inode;
1275 kernel_lb_addr loc; 1279 struct kernel_lb_addr loc;
1276 1280
1277 if (block == 0) 1281 if (block == 0)
1278 return ERR_PTR(-ESTALE); 1282 return ERR_PTR(-ESTALE);
1279 1283
1280 loc.logicalBlockNum = block; 1284 loc.logicalBlockNum = block;
1281 loc.partitionReferenceNum = partref; 1285 loc.partitionReferenceNum = partref;
1282 inode = udf_iget(sb, loc); 1286 inode = udf_iget(sb, &loc);
1283 1287
1284 if (inode == NULL) 1288 if (inode == NULL)
1285 return ERR_PTR(-ENOMEM); 1289 return ERR_PTR(-ENOMEM);
@@ -1318,7 +1322,7 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1318{ 1322{
1319 int len = *lenp; 1323 int len = *lenp;
1320 struct inode *inode = de->d_inode; 1324 struct inode *inode = de->d_inode;
1321 kernel_lb_addr location = UDF_I(inode)->i_location; 1325 struct kernel_lb_addr location = UDF_I(inode)->i_location;
1322 struct fid *fid = (struct fid *)fh; 1326 struct fid *fid = (struct fid *)fh;
1323 int type = FILEID_UDF_WITHOUT_PARENT; 1327 int type = FILEID_UDF_WITHOUT_PARENT;
1324 1328
diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h
index 65ff47902bd2..fbff74654df2 100644
--- a/fs/udf/osta_udf.h
+++ b/fs/udf/osta_udf.h
@@ -85,7 +85,7 @@ struct appIdentSuffix {
85/* Logical Volume Integrity Descriptor (UDF 2.50 2.2.6) */ 85/* Logical Volume Integrity Descriptor (UDF 2.50 2.2.6) */
86/* Implementation Use (UDF 2.50 2.2.6.4) */ 86/* Implementation Use (UDF 2.50 2.2.6.4) */
87struct logicalVolIntegrityDescImpUse { 87struct logicalVolIntegrityDescImpUse {
88 regid impIdent; 88 struct regid impIdent;
89 __le32 numFiles; 89 __le32 numFiles;
90 __le32 numDirs; 90 __le32 numDirs;
91 __le16 minUDFReadRev; 91 __le16 minUDFReadRev;
@@ -97,12 +97,12 @@ struct logicalVolIntegrityDescImpUse {
97/* Implementation Use Volume Descriptor (UDF 2.50 2.2.7) */ 97/* Implementation Use Volume Descriptor (UDF 2.50 2.2.7) */
98/* Implementation Use (UDF 2.50 2.2.7.2) */ 98/* Implementation Use (UDF 2.50 2.2.7.2) */
99struct impUseVolDescImpUse { 99struct impUseVolDescImpUse {
100 charspec LVICharset; 100 struct charspec LVICharset;
101 dstring logicalVolIdent[128]; 101 dstring logicalVolIdent[128];
102 dstring LVInfo1[36]; 102 dstring LVInfo1[36];
103 dstring LVInfo2[36]; 103 dstring LVInfo2[36];
104 dstring LVInfo3[36]; 104 dstring LVInfo3[36];
105 regid impIdent; 105 struct regid impIdent;
106 uint8_t impUse[128]; 106 uint8_t impUse[128];
107} __attribute__ ((packed)); 107} __attribute__ ((packed));
108 108
@@ -110,7 +110,7 @@ struct udfPartitionMap2 {
110 uint8_t partitionMapType; 110 uint8_t partitionMapType;
111 uint8_t partitionMapLength; 111 uint8_t partitionMapLength;
112 uint8_t reserved1[2]; 112 uint8_t reserved1[2];
113 regid partIdent; 113 struct regid partIdent;
114 __le16 volSeqNum; 114 __le16 volSeqNum;
115 __le16 partitionNum; 115 __le16 partitionNum;
116} __attribute__ ((packed)); 116} __attribute__ ((packed));
@@ -120,7 +120,7 @@ struct virtualPartitionMap {
120 uint8_t partitionMapType; 120 uint8_t partitionMapType;
121 uint8_t partitionMapLength; 121 uint8_t partitionMapLength;
122 uint8_t reserved1[2]; 122 uint8_t reserved1[2];
123 regid partIdent; 123 struct regid partIdent;
124 __le16 volSeqNum; 124 __le16 volSeqNum;
125 __le16 partitionNum; 125 __le16 partitionNum;
126 uint8_t reserved2[24]; 126 uint8_t reserved2[24];
@@ -131,7 +131,7 @@ struct sparablePartitionMap {
131 uint8_t partitionMapType; 131 uint8_t partitionMapType;
132 uint8_t partitionMapLength; 132 uint8_t partitionMapLength;
133 uint8_t reserved1[2]; 133 uint8_t reserved1[2];
134 regid partIdent; 134 struct regid partIdent;
135 __le16 volSeqNum; 135 __le16 volSeqNum;
136 __le16 partitionNum; 136 __le16 partitionNum;
137 __le16 packetLength; 137 __le16 packetLength;
@@ -146,7 +146,7 @@ struct metadataPartitionMap {
146 uint8_t partitionMapType; 146 uint8_t partitionMapType;
147 uint8_t partitionMapLength; 147 uint8_t partitionMapLength;
148 uint8_t reserved1[2]; 148 uint8_t reserved1[2];
149 regid partIdent; 149 struct regid partIdent;
150 __le16 volSeqNum; 150 __le16 volSeqNum;
151 __le16 partitionNum; 151 __le16 partitionNum;
152 __le32 metadataFileLoc; 152 __le32 metadataFileLoc;
@@ -161,7 +161,7 @@ struct metadataPartitionMap {
161/* Virtual Allocation Table (UDF 1.5 2.2.10) */ 161/* Virtual Allocation Table (UDF 1.5 2.2.10) */
162struct virtualAllocationTable15 { 162struct virtualAllocationTable15 {
163 __le32 VirtualSector[0]; 163 __le32 VirtualSector[0];
164 regid vatIdent; 164 struct regid vatIdent;
165 __le32 previousVATICBLoc; 165 __le32 previousVATICBLoc;
166} __attribute__ ((packed)); 166} __attribute__ ((packed));
167 167
@@ -192,8 +192,8 @@ struct sparingEntry {
192} __attribute__ ((packed)); 192} __attribute__ ((packed));
193 193
194struct sparingTable { 194struct sparingTable {
195 tag descTag; 195 struct tag descTag;
196 regid sparingIdent; 196 struct regid sparingIdent;
197 __le16 reallocationTableLen; 197 __le16 reallocationTableLen;
198 __le16 reserved; 198 __le16 reserved;
199 __le32 sequenceNum; 199 __le32 sequenceNum;
@@ -206,7 +206,7 @@ struct sparingTable {
206#define ICBTAG_FILE_TYPE_MIRROR 0xFB 206#define ICBTAG_FILE_TYPE_MIRROR 0xFB
207#define ICBTAG_FILE_TYPE_BITMAP 0xFC 207#define ICBTAG_FILE_TYPE_BITMAP 0xFC
208 208
209/* struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */ 209/* struct struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */
210struct allocDescImpUse { 210struct allocDescImpUse {
211 __le16 flags; 211 __le16 flags;
212 uint8_t impUse[4]; 212 uint8_t impUse[4];
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 96dfd207c3d6..4b540ee632d5 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -273,7 +273,7 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
273{ 273{
274 struct super_block *sb = inode->i_sb; 274 struct super_block *sb = inode->i_sb;
275 struct udf_part_map *map; 275 struct udf_part_map *map;
276 kernel_lb_addr eloc; 276 struct kernel_lb_addr eloc;
277 uint32_t elen; 277 uint32_t elen;
278 sector_t ext_offset; 278 sector_t ext_offset;
279 struct extent_position epos = {}; 279 struct extent_position epos = {};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e25e7010627b..72348cc855a4 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -81,16 +81,13 @@ static char error_buf[1024];
81/* These are the "meat" - everything else is stuffing */ 81/* These are the "meat" - everything else is stuffing */
82static int udf_fill_super(struct super_block *, void *, int); 82static int udf_fill_super(struct super_block *, void *, int);
83static void udf_put_super(struct super_block *); 83static void udf_put_super(struct super_block *);
84static void udf_write_super(struct super_block *); 84static int udf_sync_fs(struct super_block *, int);
85static int udf_remount_fs(struct super_block *, int *, char *); 85static int udf_remount_fs(struct super_block *, int *, char *);
86static int udf_check_valid(struct super_block *, int, int); 86static void udf_load_logicalvolint(struct super_block *, struct kernel_extent_ad);
87static int udf_vrs(struct super_block *sb, int silent); 87static int udf_find_fileset(struct super_block *, struct kernel_lb_addr *,
88static void udf_load_logicalvolint(struct super_block *, kernel_extent_ad); 88 struct kernel_lb_addr *);
89static void udf_find_anchor(struct super_block *);
90static int udf_find_fileset(struct super_block *, kernel_lb_addr *,
91 kernel_lb_addr *);
92static void udf_load_fileset(struct super_block *, struct buffer_head *, 89static void udf_load_fileset(struct super_block *, struct buffer_head *,
93 kernel_lb_addr *); 90 struct kernel_lb_addr *);
94static void udf_open_lvid(struct super_block *); 91static void udf_open_lvid(struct super_block *);
95static void udf_close_lvid(struct super_block *); 92static void udf_close_lvid(struct super_block *);
96static unsigned int udf_count_free(struct super_block *); 93static unsigned int udf_count_free(struct super_block *);
@@ -181,7 +178,7 @@ static const struct super_operations udf_sb_ops = {
181 .delete_inode = udf_delete_inode, 178 .delete_inode = udf_delete_inode,
182 .clear_inode = udf_clear_inode, 179 .clear_inode = udf_clear_inode,
183 .put_super = udf_put_super, 180 .put_super = udf_put_super,
184 .write_super = udf_write_super, 181 .sync_fs = udf_sync_fs,
185 .statfs = udf_statfs, 182 .statfs = udf_statfs,
186 .remount_fs = udf_remount_fs, 183 .remount_fs = udf_remount_fs,
187 .show_options = udf_show_options, 184 .show_options = udf_show_options,
@@ -201,6 +198,8 @@ struct udf_options {
201 mode_t umask; 198 mode_t umask;
202 gid_t gid; 199 gid_t gid;
203 uid_t uid; 200 uid_t uid;
201 mode_t fmode;
202 mode_t dmode;
204 struct nls_table *nls_map; 203 struct nls_table *nls_map;
205}; 204};
206 205
@@ -258,7 +257,7 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
258 257
259 if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) 258 if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT))
260 seq_puts(seq, ",nostrict"); 259 seq_puts(seq, ",nostrict");
261 if (sb->s_blocksize != UDF_DEFAULT_BLOCKSIZE) 260 if (UDF_QUERY_FLAG(sb, UDF_FLAG_BLOCKSIZE_SET))
262 seq_printf(seq, ",bs=%lu", sb->s_blocksize); 261 seq_printf(seq, ",bs=%lu", sb->s_blocksize);
263 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE)) 262 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE))
264 seq_puts(seq, ",unhide"); 263 seq_puts(seq, ",unhide");
@@ -282,18 +281,16 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
282 seq_printf(seq, ",gid=%u", sbi->s_gid); 281 seq_printf(seq, ",gid=%u", sbi->s_gid);
283 if (sbi->s_umask != 0) 282 if (sbi->s_umask != 0)
284 seq_printf(seq, ",umask=%o", sbi->s_umask); 283 seq_printf(seq, ",umask=%o", sbi->s_umask);
284 if (sbi->s_fmode != UDF_INVALID_MODE)
285 seq_printf(seq, ",mode=%o", sbi->s_fmode);
286 if (sbi->s_dmode != UDF_INVALID_MODE)
287 seq_printf(seq, ",dmode=%o", sbi->s_dmode);
285 if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET)) 288 if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET))
286 seq_printf(seq, ",session=%u", sbi->s_session); 289 seq_printf(seq, ",session=%u", sbi->s_session);
287 if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET)) 290 if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET))
288 seq_printf(seq, ",lastblock=%u", sbi->s_last_block); 291 seq_printf(seq, ",lastblock=%u", sbi->s_last_block);
289 /* 292 if (sbi->s_anchor != 0)
290 * s_anchor[2] could be zeroed out in case there is no anchor 293 seq_printf(seq, ",anchor=%u", sbi->s_anchor);
291 * in the specified block, but then the "anchor=N" option
292 * originally given by the user wasn't effective, so it's OK
293 * if we don't show it.
294 */
295 if (sbi->s_anchor[2] != 0)
296 seq_printf(seq, ",anchor=%u", sbi->s_anchor[2]);
297 /* 294 /*
298 * volume, partition, fileset and rootdir seem to be ignored 295 * volume, partition, fileset and rootdir seem to be ignored
299 * currently 296 * currently
@@ -317,6 +314,8 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
317 * 314 *
318 * gid= Set the default group. 315 * gid= Set the default group.
319 * umask= Set the default umask. 316 * umask= Set the default umask.
317 * mode= Set the default file permissions.
318 * dmode= Set the default directory permissions.
320 * uid= Set the default user. 319 * uid= Set the default user.
321 * bs= Set the block size. 320 * bs= Set the block size.
322 * unhide Show otherwise hidden files. 321 * unhide Show otherwise hidden files.
@@ -366,7 +365,8 @@ enum {
366 Opt_gid, Opt_uid, Opt_umask, Opt_session, Opt_lastblock, 365 Opt_gid, Opt_uid, Opt_umask, Opt_session, Opt_lastblock,
367 Opt_anchor, Opt_volume, Opt_partition, Opt_fileset, 366 Opt_anchor, Opt_volume, Opt_partition, Opt_fileset,
368 Opt_rootdir, Opt_utf8, Opt_iocharset, 367 Opt_rootdir, Opt_utf8, Opt_iocharset,
369 Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore 368 Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore,
369 Opt_fmode, Opt_dmode
370}; 370};
371 371
372static const match_table_t tokens = { 372static const match_table_t tokens = {
@@ -395,6 +395,8 @@ static const match_table_t tokens = {
395 {Opt_rootdir, "rootdir=%u"}, 395 {Opt_rootdir, "rootdir=%u"},
396 {Opt_utf8, "utf8"}, 396 {Opt_utf8, "utf8"},
397 {Opt_iocharset, "iocharset=%s"}, 397 {Opt_iocharset, "iocharset=%s"},
398 {Opt_fmode, "mode=%o"},
399 {Opt_dmode, "dmode=%o"},
398 {Opt_err, NULL} 400 {Opt_err, NULL}
399}; 401};
400 402
@@ -405,7 +407,6 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
405 int option; 407 int option;
406 408
407 uopt->novrs = 0; 409 uopt->novrs = 0;
408 uopt->blocksize = UDF_DEFAULT_BLOCKSIZE;
409 uopt->partition = 0xFFFF; 410 uopt->partition = 0xFFFF;
410 uopt->session = 0xFFFFFFFF; 411 uopt->session = 0xFFFFFFFF;
411 uopt->lastblock = 0; 412 uopt->lastblock = 0;
@@ -428,10 +429,12 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
428 switch (token) { 429 switch (token) {
429 case Opt_novrs: 430 case Opt_novrs:
430 uopt->novrs = 1; 431 uopt->novrs = 1;
432 break;
431 case Opt_bs: 433 case Opt_bs:
432 if (match_int(&args[0], &option)) 434 if (match_int(&args[0], &option))
433 return 0; 435 return 0;
434 uopt->blocksize = option; 436 uopt->blocksize = option;
437 uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET);
435 break; 438 break;
436 case Opt_unhide: 439 case Opt_unhide:
437 uopt->flags |= (1 << UDF_FLAG_UNHIDE); 440 uopt->flags |= (1 << UDF_FLAG_UNHIDE);
@@ -531,6 +534,16 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
531 case Opt_gforget: 534 case Opt_gforget:
532 uopt->flags |= (1 << UDF_FLAG_GID_FORGET); 535 uopt->flags |= (1 << UDF_FLAG_GID_FORGET);
533 break; 536 break;
537 case Opt_fmode:
538 if (match_octal(args, &option))
539 return 0;
540 uopt->fmode = option & 0777;
541 break;
542 case Opt_dmode:
543 if (match_octal(args, &option))
544 return 0;
545 uopt->dmode = option & 0777;
546 break;
534 default: 547 default:
535 printk(KERN_ERR "udf: bad mount option \"%s\" " 548 printk(KERN_ERR "udf: bad mount option \"%s\" "
536 "or missing value\n", p); 549 "or missing value\n", p);
@@ -540,17 +553,6 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
540 return 1; 553 return 1;
541} 554}
542 555
543static void udf_write_super(struct super_block *sb)
544{
545 lock_kernel();
546
547 if (!(sb->s_flags & MS_RDONLY))
548 udf_open_lvid(sb);
549 sb->s_dirt = 0;
550
551 unlock_kernel();
552}
553
554static int udf_remount_fs(struct super_block *sb, int *flags, char *options) 556static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
555{ 557{
556 struct udf_options uopt; 558 struct udf_options uopt;
@@ -560,6 +562,8 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
560 uopt.uid = sbi->s_uid; 562 uopt.uid = sbi->s_uid;
561 uopt.gid = sbi->s_gid; 563 uopt.gid = sbi->s_gid;
562 uopt.umask = sbi->s_umask; 564 uopt.umask = sbi->s_umask;
565 uopt.fmode = sbi->s_fmode;
566 uopt.dmode = sbi->s_dmode;
563 567
564 if (!udf_parse_options(options, &uopt, true)) 568 if (!udf_parse_options(options, &uopt, true))
565 return -EINVAL; 569 return -EINVAL;
@@ -568,6 +572,8 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
568 sbi->s_uid = uopt.uid; 572 sbi->s_uid = uopt.uid;
569 sbi->s_gid = uopt.gid; 573 sbi->s_gid = uopt.gid;
570 sbi->s_umask = uopt.umask; 574 sbi->s_umask = uopt.umask;
575 sbi->s_fmode = uopt.fmode;
576 sbi->s_dmode = uopt.dmode;
571 577
572 if (sbi->s_lvid_bh) { 578 if (sbi->s_lvid_bh) {
573 int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); 579 int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
@@ -585,22 +591,19 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
585 return 0; 591 return 0;
586} 592}
587 593
588static int udf_vrs(struct super_block *sb, int silent) 594/* Check Volume Structure Descriptors (ECMA 167 2/9.1) */
595/* We also check any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */
596static loff_t udf_check_vsd(struct super_block *sb)
589{ 597{
590 struct volStructDesc *vsd = NULL; 598 struct volStructDesc *vsd = NULL;
591 loff_t sector = 32768; 599 loff_t sector = 32768;
592 int sectorsize; 600 int sectorsize;
593 struct buffer_head *bh = NULL; 601 struct buffer_head *bh = NULL;
594 int iso9660 = 0;
595 int nsr02 = 0; 602 int nsr02 = 0;
596 int nsr03 = 0; 603 int nsr03 = 0;
597 struct udf_sb_info *sbi; 604 struct udf_sb_info *sbi;
598 605
599 /* Block size must be a multiple of 512 */
600 if (sb->s_blocksize & 511)
601 return 0;
602 sbi = UDF_SB(sb); 606 sbi = UDF_SB(sb);
603
604 if (sb->s_blocksize < sizeof(struct volStructDesc)) 607 if (sb->s_blocksize < sizeof(struct volStructDesc))
605 sectorsize = sizeof(struct volStructDesc); 608 sectorsize = sizeof(struct volStructDesc);
606 else 609 else
@@ -627,7 +630,6 @@ static int udf_vrs(struct super_block *sb, int silent)
627 break; 630 break;
628 } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, 631 } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
629 VSD_STD_ID_LEN)) { 632 VSD_STD_ID_LEN)) {
630 iso9660 = sector;
631 switch (vsd->structType) { 633 switch (vsd->structType) {
632 case 0: 634 case 0:
633 udf_debug("ISO9660 Boot Record found\n"); 635 udf_debug("ISO9660 Boot Record found\n");
@@ -679,139 +681,9 @@ static int udf_vrs(struct super_block *sb, int silent)
679 return 0; 681 return 0;
680} 682}
681 683
682/*
683 * Check whether there is an anchor block in the given block
684 */
685static int udf_check_anchor_block(struct super_block *sb, sector_t block)
686{
687 struct buffer_head *bh;
688 uint16_t ident;
689
690 if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
691 udf_fixed_to_variable(block) >=
692 sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
693 return 0;
694
695 bh = udf_read_tagged(sb, block, block, &ident);
696 if (!bh)
697 return 0;
698 brelse(bh);
699
700 return ident == TAG_IDENT_AVDP;
701}
702
703/* Search for an anchor volume descriptor pointer */
704static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock)
705{
706 sector_t last[6];
707 int i;
708 struct udf_sb_info *sbi = UDF_SB(sb);
709
710 last[0] = lastblock;
711 last[1] = last[0] - 1;
712 last[2] = last[0] + 1;
713 last[3] = last[0] - 2;
714 last[4] = last[0] - 150;
715 last[5] = last[0] - 152;
716
717 /* according to spec, anchor is in either:
718 * block 256
719 * lastblock-256
720 * lastblock
721 * however, if the disc isn't closed, it could be 512 */
722
723 for (i = 0; i < ARRAY_SIZE(last); i++) {
724 if (last[i] < 0)
725 continue;
726 if (last[i] >= sb->s_bdev->bd_inode->i_size >>
727 sb->s_blocksize_bits)
728 continue;
729
730 if (udf_check_anchor_block(sb, last[i])) {
731 sbi->s_anchor[0] = last[i];
732 sbi->s_anchor[1] = last[i] - 256;
733 return last[i];
734 }
735
736 if (last[i] < 256)
737 continue;
738
739 if (udf_check_anchor_block(sb, last[i] - 256)) {
740 sbi->s_anchor[1] = last[i] - 256;
741 return last[i];
742 }
743 }
744
745 if (udf_check_anchor_block(sb, sbi->s_session + 256)) {
746 sbi->s_anchor[0] = sbi->s_session + 256;
747 return last[0];
748 }
749 if (udf_check_anchor_block(sb, sbi->s_session + 512)) {
750 sbi->s_anchor[0] = sbi->s_session + 512;
751 return last[0];
752 }
753 return 0;
754}
755
756/*
757 * Find an anchor volume descriptor. The function expects sbi->s_lastblock to
758 * be the last block on the media.
759 *
760 * Return 1 if not found, 0 if ok
761 *
762 */
763static void udf_find_anchor(struct super_block *sb)
764{
765 sector_t lastblock;
766 struct buffer_head *bh = NULL;
767 uint16_t ident;
768 int i;
769 struct udf_sb_info *sbi = UDF_SB(sb);
770
771 lastblock = udf_scan_anchors(sb, sbi->s_last_block);
772 if (lastblock)
773 goto check_anchor;
774
775 /* No anchor found? Try VARCONV conversion of block numbers */
776 UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
777 /* Firstly, we try to not convert number of the last block */
778 lastblock = udf_scan_anchors(sb,
779 udf_variable_to_fixed(sbi->s_last_block));
780 if (lastblock)
781 goto check_anchor;
782
783 /* Secondly, we try with converted number of the last block */
784 lastblock = udf_scan_anchors(sb, sbi->s_last_block);
785 if (!lastblock) {
786 /* VARCONV didn't help. Clear it. */
787 UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
788 }
789
790check_anchor:
791 /*
792 * Check located anchors and the anchor block supplied via
793 * mount options
794 */
795 for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) {
796 if (!sbi->s_anchor[i])
797 continue;
798 bh = udf_read_tagged(sb, sbi->s_anchor[i],
799 sbi->s_anchor[i], &ident);
800 if (!bh)
801 sbi->s_anchor[i] = 0;
802 else {
803 brelse(bh);
804 if (ident != TAG_IDENT_AVDP)
805 sbi->s_anchor[i] = 0;
806 }
807 }
808
809 sbi->s_last_block = lastblock;
810}
811
812static int udf_find_fileset(struct super_block *sb, 684static int udf_find_fileset(struct super_block *sb,
813 kernel_lb_addr *fileset, 685 struct kernel_lb_addr *fileset,
814 kernel_lb_addr *root) 686 struct kernel_lb_addr *root)
815{ 687{
816 struct buffer_head *bh = NULL; 688 struct buffer_head *bh = NULL;
817 long lastblock; 689 long lastblock;
@@ -820,7 +692,7 @@ static int udf_find_fileset(struct super_block *sb,
820 692
821 if (fileset->logicalBlockNum != 0xFFFFFFFF || 693 if (fileset->logicalBlockNum != 0xFFFFFFFF ||
822 fileset->partitionReferenceNum != 0xFFFF) { 694 fileset->partitionReferenceNum != 0xFFFF) {
823 bh = udf_read_ptagged(sb, *fileset, 0, &ident); 695 bh = udf_read_ptagged(sb, fileset, 0, &ident);
824 696
825 if (!bh) { 697 if (!bh) {
826 return 1; 698 return 1;
@@ -834,7 +706,7 @@ static int udf_find_fileset(struct super_block *sb,
834 sbi = UDF_SB(sb); 706 sbi = UDF_SB(sb);
835 if (!bh) { 707 if (!bh) {
836 /* Search backwards through the partitions */ 708 /* Search backwards through the partitions */
837 kernel_lb_addr newfileset; 709 struct kernel_lb_addr newfileset;
838 710
839/* --> cvg: FIXME - is it reasonable? */ 711/* --> cvg: FIXME - is it reasonable? */
840 return 1; 712 return 1;
@@ -850,7 +722,7 @@ static int udf_find_fileset(struct super_block *sb,
850 newfileset.logicalBlockNum = 0; 722 newfileset.logicalBlockNum = 0;
851 723
852 do { 724 do {
853 bh = udf_read_ptagged(sb, newfileset, 0, 725 bh = udf_read_ptagged(sb, &newfileset, 0,
854 &ident); 726 &ident);
855 if (!bh) { 727 if (!bh) {
856 newfileset.logicalBlockNum++; 728 newfileset.logicalBlockNum++;
@@ -902,14 +774,23 @@ static int udf_find_fileset(struct super_block *sb,
902static int udf_load_pvoldesc(struct super_block *sb, sector_t block) 774static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
903{ 775{
904 struct primaryVolDesc *pvoldesc; 776 struct primaryVolDesc *pvoldesc;
905 struct ustr instr; 777 struct ustr *instr, *outstr;
906 struct ustr outstr;
907 struct buffer_head *bh; 778 struct buffer_head *bh;
908 uint16_t ident; 779 uint16_t ident;
780 int ret = 1;
781
782 instr = kmalloc(sizeof(struct ustr), GFP_NOFS);
783 if (!instr)
784 return 1;
785
786 outstr = kmalloc(sizeof(struct ustr), GFP_NOFS);
787 if (!outstr)
788 goto out1;
909 789
910 bh = udf_read_tagged(sb, block, block, &ident); 790 bh = udf_read_tagged(sb, block, block, &ident);
911 if (!bh) 791 if (!bh)
912 return 1; 792 goto out2;
793
913 BUG_ON(ident != TAG_IDENT_PVD); 794 BUG_ON(ident != TAG_IDENT_PVD);
914 795
915 pvoldesc = (struct primaryVolDesc *)bh->b_data; 796 pvoldesc = (struct primaryVolDesc *)bh->b_data;
@@ -917,7 +798,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
917 if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time, 798 if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time,
918 pvoldesc->recordingDateAndTime)) { 799 pvoldesc->recordingDateAndTime)) {
919#ifdef UDFFS_DEBUG 800#ifdef UDFFS_DEBUG
920 timestamp *ts = &pvoldesc->recordingDateAndTime; 801 struct timestamp *ts = &pvoldesc->recordingDateAndTime;
921 udf_debug("recording time %04u/%02u/%02u" 802 udf_debug("recording time %04u/%02u/%02u"
922 " %02u:%02u (%x)\n", 803 " %02u:%02u (%x)\n",
923 le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, 804 le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
@@ -925,20 +806,25 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
925#endif 806#endif
926 } 807 }
927 808
928 if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) 809 if (!udf_build_ustr(instr, pvoldesc->volIdent, 32))
929 if (udf_CS0toUTF8(&outstr, &instr)) { 810 if (udf_CS0toUTF8(outstr, instr)) {
930 strncpy(UDF_SB(sb)->s_volume_ident, outstr.u_name, 811 strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
931 outstr.u_len > 31 ? 31 : outstr.u_len); 812 outstr->u_len > 31 ? 31 : outstr->u_len);
932 udf_debug("volIdent[] = '%s'\n", 813 udf_debug("volIdent[] = '%s'\n",
933 UDF_SB(sb)->s_volume_ident); 814 UDF_SB(sb)->s_volume_ident);
934 } 815 }
935 816
936 if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128)) 817 if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
937 if (udf_CS0toUTF8(&outstr, &instr)) 818 if (udf_CS0toUTF8(outstr, instr))
938 udf_debug("volSetIdent[] = '%s'\n", outstr.u_name); 819 udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
939 820
940 brelse(bh); 821 brelse(bh);
941 return 0; 822 ret = 0;
823out2:
824 kfree(outstr);
825out1:
826 kfree(instr);
827 return ret;
942} 828}
943 829
944static int udf_load_metadata_files(struct super_block *sb, int partition) 830static int udf_load_metadata_files(struct super_block *sb, int partition)
@@ -946,7 +832,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
946 struct udf_sb_info *sbi = UDF_SB(sb); 832 struct udf_sb_info *sbi = UDF_SB(sb);
947 struct udf_part_map *map; 833 struct udf_part_map *map;
948 struct udf_meta_data *mdata; 834 struct udf_meta_data *mdata;
949 kernel_lb_addr addr; 835 struct kernel_lb_addr addr;
950 int fe_error = 0; 836 int fe_error = 0;
951 837
952 map = &sbi->s_partmaps[partition]; 838 map = &sbi->s_partmaps[partition];
@@ -959,7 +845,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
959 udf_debug("Metadata file location: block = %d part = %d\n", 845 udf_debug("Metadata file location: block = %d part = %d\n",
960 addr.logicalBlockNum, addr.partitionReferenceNum); 846 addr.logicalBlockNum, addr.partitionReferenceNum);
961 847
962 mdata->s_metadata_fe = udf_iget(sb, addr); 848 mdata->s_metadata_fe = udf_iget(sb, &addr);
963 849
964 if (mdata->s_metadata_fe == NULL) { 850 if (mdata->s_metadata_fe == NULL) {
965 udf_warning(sb, __func__, "metadata inode efe not found, " 851 udf_warning(sb, __func__, "metadata inode efe not found, "
@@ -981,7 +867,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
981 udf_debug("Mirror metadata file location: block = %d part = %d\n", 867 udf_debug("Mirror metadata file location: block = %d part = %d\n",
982 addr.logicalBlockNum, addr.partitionReferenceNum); 868 addr.logicalBlockNum, addr.partitionReferenceNum);
983 869
984 mdata->s_mirror_fe = udf_iget(sb, addr); 870 mdata->s_mirror_fe = udf_iget(sb, &addr);
985 871
986 if (mdata->s_mirror_fe == NULL) { 872 if (mdata->s_mirror_fe == NULL) {
987 if (fe_error) { 873 if (fe_error) {
@@ -1013,7 +899,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
1013 udf_debug("Bitmap file location: block = %d part = %d\n", 899 udf_debug("Bitmap file location: block = %d part = %d\n",
1014 addr.logicalBlockNum, addr.partitionReferenceNum); 900 addr.logicalBlockNum, addr.partitionReferenceNum);
1015 901
1016 mdata->s_bitmap_fe = udf_iget(sb, addr); 902 mdata->s_bitmap_fe = udf_iget(sb, &addr);
1017 903
1018 if (mdata->s_bitmap_fe == NULL) { 904 if (mdata->s_bitmap_fe == NULL) {
1019 if (sb->s_flags & MS_RDONLY) 905 if (sb->s_flags & MS_RDONLY)
@@ -1037,7 +923,7 @@ error_exit:
1037} 923}
1038 924
1039static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, 925static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
1040 kernel_lb_addr *root) 926 struct kernel_lb_addr *root)
1041{ 927{
1042 struct fileSetDesc *fset; 928 struct fileSetDesc *fset;
1043 929
@@ -1119,13 +1005,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1119 1005
1120 phd = (struct partitionHeaderDesc *)p->partitionContentsUse; 1006 phd = (struct partitionHeaderDesc *)p->partitionContentsUse;
1121 if (phd->unallocSpaceTable.extLength) { 1007 if (phd->unallocSpaceTable.extLength) {
1122 kernel_lb_addr loc = { 1008 struct kernel_lb_addr loc = {
1123 .logicalBlockNum = le32_to_cpu( 1009 .logicalBlockNum = le32_to_cpu(
1124 phd->unallocSpaceTable.extPosition), 1010 phd->unallocSpaceTable.extPosition),
1125 .partitionReferenceNum = p_index, 1011 .partitionReferenceNum = p_index,
1126 }; 1012 };
1127 1013
1128 map->s_uspace.s_table = udf_iget(sb, loc); 1014 map->s_uspace.s_table = udf_iget(sb, &loc);
1129 if (!map->s_uspace.s_table) { 1015 if (!map->s_uspace.s_table) {
1130 udf_debug("cannot load unallocSpaceTable (part %d)\n", 1016 udf_debug("cannot load unallocSpaceTable (part %d)\n",
1131 p_index); 1017 p_index);
@@ -1154,13 +1040,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1154 udf_debug("partitionIntegrityTable (part %d)\n", p_index); 1040 udf_debug("partitionIntegrityTable (part %d)\n", p_index);
1155 1041
1156 if (phd->freedSpaceTable.extLength) { 1042 if (phd->freedSpaceTable.extLength) {
1157 kernel_lb_addr loc = { 1043 struct kernel_lb_addr loc = {
1158 .logicalBlockNum = le32_to_cpu( 1044 .logicalBlockNum = le32_to_cpu(
1159 phd->freedSpaceTable.extPosition), 1045 phd->freedSpaceTable.extPosition),
1160 .partitionReferenceNum = p_index, 1046 .partitionReferenceNum = p_index,
1161 }; 1047 };
1162 1048
1163 map->s_fspace.s_table = udf_iget(sb, loc); 1049 map->s_fspace.s_table = udf_iget(sb, &loc);
1164 if (!map->s_fspace.s_table) { 1050 if (!map->s_fspace.s_table) {
1165 udf_debug("cannot load freedSpaceTable (part %d)\n", 1051 udf_debug("cannot load freedSpaceTable (part %d)\n",
1166 p_index); 1052 p_index);
@@ -1192,7 +1078,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
1192{ 1078{
1193 struct udf_sb_info *sbi = UDF_SB(sb); 1079 struct udf_sb_info *sbi = UDF_SB(sb);
1194 struct udf_part_map *map = &sbi->s_partmaps[p_index]; 1080 struct udf_part_map *map = &sbi->s_partmaps[p_index];
1195 kernel_lb_addr ino; 1081 struct kernel_lb_addr ino;
1196 struct buffer_head *bh = NULL; 1082 struct buffer_head *bh = NULL;
1197 struct udf_inode_info *vati; 1083 struct udf_inode_info *vati;
1198 uint32_t pos; 1084 uint32_t pos;
@@ -1201,7 +1087,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
1201 /* VAT file entry is in the last recorded block */ 1087 /* VAT file entry is in the last recorded block */
1202 ino.partitionReferenceNum = type1_index; 1088 ino.partitionReferenceNum = type1_index;
1203 ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; 1089 ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root;
1204 sbi->s_vat_inode = udf_iget(sb, ino); 1090 sbi->s_vat_inode = udf_iget(sb, &ino);
1205 if (!sbi->s_vat_inode) 1091 if (!sbi->s_vat_inode)
1206 return 1; 1092 return 1;
1207 1093
@@ -1322,7 +1208,7 @@ out_bh:
1322} 1208}
1323 1209
1324static int udf_load_logicalvol(struct super_block *sb, sector_t block, 1210static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1325 kernel_lb_addr *fileset) 1211 struct kernel_lb_addr *fileset)
1326{ 1212{
1327 struct logicalVolDesc *lvd; 1213 struct logicalVolDesc *lvd;
1328 int i, j, offset; 1214 int i, j, offset;
@@ -1471,7 +1357,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1471 } 1357 }
1472 1358
1473 if (fileset) { 1359 if (fileset) {
1474 long_ad *la = (long_ad *)&(lvd->logicalVolContentsUse[0]); 1360 struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]);
1475 1361
1476 *fileset = lelb_to_cpu(la->extLocation); 1362 *fileset = lelb_to_cpu(la->extLocation);
1477 udf_debug("FileSet found in LogicalVolDesc at block=%d, " 1363 udf_debug("FileSet found in LogicalVolDesc at block=%d, "
@@ -1490,7 +1376,7 @@ out_bh:
1490 * udf_load_logicalvolint 1376 * udf_load_logicalvolint
1491 * 1377 *
1492 */ 1378 */
1493static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) 1379static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ad loc)
1494{ 1380{
1495 struct buffer_head *bh = NULL; 1381 struct buffer_head *bh = NULL;
1496 uint16_t ident; 1382 uint16_t ident;
@@ -1533,7 +1419,7 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc)
1533 * Written, tested, and released. 1419 * Written, tested, and released.
1534 */ 1420 */
1535static noinline int udf_process_sequence(struct super_block *sb, long block, 1421static noinline int udf_process_sequence(struct super_block *sb, long block,
1536 long lastblock, kernel_lb_addr *fileset) 1422 long lastblock, struct kernel_lb_addr *fileset)
1537{ 1423{
1538 struct buffer_head *bh = NULL; 1424 struct buffer_head *bh = NULL;
1539 struct udf_vds_record vds[VDS_POS_LENGTH]; 1425 struct udf_vds_record vds[VDS_POS_LENGTH];
@@ -1655,85 +1541,199 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
1655 return 0; 1541 return 0;
1656} 1542}
1657 1543
1544static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
1545 struct kernel_lb_addr *fileset)
1546{
1547 struct anchorVolDescPtr *anchor;
1548 long main_s, main_e, reserve_s, reserve_e;
1549 struct udf_sb_info *sbi;
1550
1551 sbi = UDF_SB(sb);
1552 anchor = (struct anchorVolDescPtr *)bh->b_data;
1553
1554 /* Locate the main sequence */
1555 main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation);
1556 main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength);
1557 main_e = main_e >> sb->s_blocksize_bits;
1558 main_e += main_s;
1559
1560 /* Locate the reserve sequence */
1561 reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation);
1562 reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength);
1563 reserve_e = reserve_e >> sb->s_blocksize_bits;
1564 reserve_e += reserve_s;
1565
1566 /* Process the main & reserve sequences */
1567 /* responsible for finding the PartitionDesc(s) */
1568 if (!udf_process_sequence(sb, main_s, main_e, fileset))
1569 return 1;
1570 return !udf_process_sequence(sb, reserve_s, reserve_e, fileset);
1571}
1572
1658/* 1573/*
1659 * udf_check_valid() 1574 * Check whether there is an anchor block in the given block and
1575 * load Volume Descriptor Sequence if so.
1660 */ 1576 */
1661static int udf_check_valid(struct super_block *sb, int novrs, int silent) 1577static int udf_check_anchor_block(struct super_block *sb, sector_t block,
1578 struct kernel_lb_addr *fileset)
1662{ 1579{
1663 long block; 1580 struct buffer_head *bh;
1664 struct udf_sb_info *sbi = UDF_SB(sb); 1581 uint16_t ident;
1582 int ret;
1665 1583
1666 if (novrs) { 1584 if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
1667 udf_debug("Validity check skipped because of novrs option\n"); 1585 udf_fixed_to_variable(block) >=
1586 sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
1587 return 0;
1588
1589 bh = udf_read_tagged(sb, block, block, &ident);
1590 if (!bh)
1591 return 0;
1592 if (ident != TAG_IDENT_AVDP) {
1593 brelse(bh);
1668 return 0; 1594 return 0;
1669 } 1595 }
1670 /* Check that it is NSR02 compliant */ 1596 ret = udf_load_sequence(sb, bh, fileset);
1671 /* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */ 1597 brelse(bh);
1672 block = udf_vrs(sb, silent); 1598 return ret;
1673 if (block == -1)
1674 udf_debug("Failed to read byte 32768. Assuming open "
1675 "disc. Skipping validity check\n");
1676 if (block && !sbi->s_last_block)
1677 sbi->s_last_block = udf_get_last_block(sb);
1678 return !block;
1679} 1599}
1680 1600
1681static int udf_load_sequence(struct super_block *sb, kernel_lb_addr *fileset) 1601/* Search for an anchor volume descriptor pointer */
1602static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
1603 struct kernel_lb_addr *fileset)
1682{ 1604{
1683 struct anchorVolDescPtr *anchor; 1605 sector_t last[6];
1684 uint16_t ident;
1685 struct buffer_head *bh;
1686 long main_s, main_e, reserve_s, reserve_e;
1687 int i; 1606 int i;
1688 struct udf_sb_info *sbi; 1607 struct udf_sb_info *sbi = UDF_SB(sb);
1689 1608 int last_count = 0;
1690 if (!sb)
1691 return 1;
1692 sbi = UDF_SB(sb);
1693 1609
1694 for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { 1610 /* First try user provided anchor */
1695 if (!sbi->s_anchor[i]) 1611 if (sbi->s_anchor) {
1612 if (udf_check_anchor_block(sb, sbi->s_anchor, fileset))
1613 return lastblock;
1614 }
1615 /*
1616 * according to spec, anchor is in either:
1617 * block 256
1618 * lastblock-256
1619 * lastblock
1620 * however, if the disc isn't closed, it could be 512.
1621 */
1622 if (udf_check_anchor_block(sb, sbi->s_session + 256, fileset))
1623 return lastblock;
1624 /*
1625 * The trouble is which block is the last one. Drives often misreport
1626 * this so we try various possibilities.
1627 */
1628 last[last_count++] = lastblock;
1629 if (lastblock >= 1)
1630 last[last_count++] = lastblock - 1;
1631 last[last_count++] = lastblock + 1;
1632 if (lastblock >= 2)
1633 last[last_count++] = lastblock - 2;
1634 if (lastblock >= 150)
1635 last[last_count++] = lastblock - 150;
1636 if (lastblock >= 152)
1637 last[last_count++] = lastblock - 152;
1638
1639 for (i = 0; i < last_count; i++) {
1640 if (last[i] >= sb->s_bdev->bd_inode->i_size >>
1641 sb->s_blocksize_bits)
1696 continue; 1642 continue;
1697 1643 if (udf_check_anchor_block(sb, last[i], fileset))
1698 bh = udf_read_tagged(sb, sbi->s_anchor[i], sbi->s_anchor[i], 1644 return last[i];
1699 &ident); 1645 if (last[i] < 256)
1700 if (!bh)
1701 continue; 1646 continue;
1647 if (udf_check_anchor_block(sb, last[i] - 256, fileset))
1648 return last[i];
1649 }
1702 1650
1703 anchor = (struct anchorVolDescPtr *)bh->b_data; 1651 /* Finally try block 512 in case media is open */
1652 if (udf_check_anchor_block(sb, sbi->s_session + 512, fileset))
1653 return last[0];
1654 return 0;
1655}
1704 1656
1705 /* Locate the main sequence */ 1657/*
1706 main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); 1658 * Find an anchor volume descriptor and load Volume Descriptor Sequence from
1707 main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength); 1659 * area specified by it. The function expects sbi->s_lastblock to be the last
1708 main_e = main_e >> sb->s_blocksize_bits; 1660 * block on the media.
1709 main_e += main_s; 1661 *
1662 * Return 1 if ok, 0 if not found.
1663 *
1664 */
1665static int udf_find_anchor(struct super_block *sb,
1666 struct kernel_lb_addr *fileset)
1667{
1668 sector_t lastblock;
1669 struct udf_sb_info *sbi = UDF_SB(sb);
1710 1670
1711 /* Locate the reserve sequence */ 1671 lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
1712 reserve_s = le32_to_cpu( 1672 if (lastblock)
1713 anchor->reserveVolDescSeqExt.extLocation); 1673 goto out;
1714 reserve_e = le32_to_cpu(
1715 anchor->reserveVolDescSeqExt.extLength);
1716 reserve_e = reserve_e >> sb->s_blocksize_bits;
1717 reserve_e += reserve_s;
1718 1674
1719 brelse(bh); 1675 /* No anchor found? Try VARCONV conversion of block numbers */
1676 UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
1677 /* Firstly, we try to not convert number of the last block */
1678 lastblock = udf_scan_anchors(sb,
1679 udf_variable_to_fixed(sbi->s_last_block),
1680 fileset);
1681 if (lastblock)
1682 goto out;
1720 1683
1721 /* Process the main & reserve sequences */ 1684 /* Secondly, we try with converted number of the last block */
1722 /* responsible for finding the PartitionDesc(s) */ 1685 lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
1723 if (!(udf_process_sequence(sb, main_s, main_e, 1686 if (!lastblock) {
1724 fileset) && 1687 /* VARCONV didn't help. Clear it. */
1725 udf_process_sequence(sb, reserve_s, reserve_e, 1688 UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
1726 fileset))) 1689 return 0;
1727 break;
1728 } 1690 }
1691out:
1692 sbi->s_last_block = lastblock;
1693 return 1;
1694}
1729 1695
1730 if (i == ARRAY_SIZE(sbi->s_anchor)) { 1696/*
1731 udf_debug("No Anchor block found\n"); 1697 * Check Volume Structure Descriptor, find Anchor block and load Volume
1732 return 1; 1698 * Descriptor Sequence
1699 */
1700static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1701 int silent, struct kernel_lb_addr *fileset)
1702{
1703 struct udf_sb_info *sbi = UDF_SB(sb);
1704 loff_t nsr_off;
1705
1706 if (!sb_set_blocksize(sb, uopt->blocksize)) {
1707 if (!silent)
1708 printk(KERN_WARNING "UDF-fs: Bad block size\n");
1709 return 0;
1710 }
1711 sbi->s_last_block = uopt->lastblock;
1712 if (!uopt->novrs) {
1713 /* Check that it is NSR02 compliant */
1714 nsr_off = udf_check_vsd(sb);
1715 if (!nsr_off) {
1716 if (!silent)
1717 printk(KERN_WARNING "UDF-fs: No VRS found\n");
1718 return 0;
1719 }
1720 if (nsr_off == -1)
1721 udf_debug("Failed to read byte 32768. Assuming open "
1722 "disc. Skipping validity check\n");
1723 if (!sbi->s_last_block)
1724 sbi->s_last_block = udf_get_last_block(sb);
1725 } else {
1726 udf_debug("Validity check skipped because of novrs option\n");
1733 } 1727 }
1734 udf_debug("Using anchor in block %d\n", sbi->s_anchor[i]);
1735 1728
1736 return 0; 1729 /* Look for anchor block and load Volume Descriptor Sequence */
1730 sbi->s_anchor = uopt->anchor;
1731 if (!udf_find_anchor(sb, fileset)) {
1732 if (!silent)
1733 printk(KERN_WARNING "UDF-fs: No anchor found\n");
1734 return 0;
1735 }
1736 return 1;
1737} 1737}
1738 1738
1739static void udf_open_lvid(struct super_block *sb) 1739static void udf_open_lvid(struct super_block *sb)
@@ -1742,9 +1742,9 @@ static void udf_open_lvid(struct super_block *sb)
1742 struct buffer_head *bh = sbi->s_lvid_bh; 1742 struct buffer_head *bh = sbi->s_lvid_bh;
1743 struct logicalVolIntegrityDesc *lvid; 1743 struct logicalVolIntegrityDesc *lvid;
1744 struct logicalVolIntegrityDescImpUse *lvidiu; 1744 struct logicalVolIntegrityDescImpUse *lvidiu;
1745
1745 if (!bh) 1746 if (!bh)
1746 return; 1747 return;
1747
1748 lvid = (struct logicalVolIntegrityDesc *)bh->b_data; 1748 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1749 lvidiu = udf_sb_lvidiu(sbi); 1749 lvidiu = udf_sb_lvidiu(sbi);
1750 1750
@@ -1752,14 +1752,15 @@ static void udf_open_lvid(struct super_block *sb)
1752 lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; 1752 lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
1753 udf_time_to_disk_stamp(&lvid->recordingDateAndTime, 1753 udf_time_to_disk_stamp(&lvid->recordingDateAndTime,
1754 CURRENT_TIME); 1754 CURRENT_TIME);
1755 lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN; 1755 lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN);
1756 1756
1757 lvid->descTag.descCRC = cpu_to_le16( 1757 lvid->descTag.descCRC = cpu_to_le16(
1758 crc_itu_t(0, (char *)lvid + sizeof(tag), 1758 crc_itu_t(0, (char *)lvid + sizeof(struct tag),
1759 le16_to_cpu(lvid->descTag.descCRCLength))); 1759 le16_to_cpu(lvid->descTag.descCRCLength)));
1760 1760
1761 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); 1761 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
1762 mark_buffer_dirty(bh); 1762 mark_buffer_dirty(bh);
1763 sbi->s_lvid_dirty = 0;
1763} 1764}
1764 1765
1765static void udf_close_lvid(struct super_block *sb) 1766static void udf_close_lvid(struct super_block *sb)
@@ -1773,10 +1774,6 @@ static void udf_close_lvid(struct super_block *sb)
1773 return; 1774 return;
1774 1775
1775 lvid = (struct logicalVolIntegrityDesc *)bh->b_data; 1776 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1776
1777 if (lvid->integrityType != LVID_INTEGRITY_TYPE_OPEN)
1778 return;
1779
1780 lvidiu = udf_sb_lvidiu(sbi); 1777 lvidiu = udf_sb_lvidiu(sbi);
1781 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; 1778 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
1782 lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; 1779 lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1790,11 +1787,12 @@ static void udf_close_lvid(struct super_block *sb)
1790 lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); 1787 lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
1791 1788
1792 lvid->descTag.descCRC = cpu_to_le16( 1789 lvid->descTag.descCRC = cpu_to_le16(
1793 crc_itu_t(0, (char *)lvid + sizeof(tag), 1790 crc_itu_t(0, (char *)lvid + sizeof(struct tag),
1794 le16_to_cpu(lvid->descTag.descCRCLength))); 1791 le16_to_cpu(lvid->descTag.descCRCLength)));
1795 1792
1796 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); 1793 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
1797 mark_buffer_dirty(bh); 1794 mark_buffer_dirty(bh);
1795 sbi->s_lvid_dirty = 0;
1798} 1796}
1799 1797
1800static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) 1798static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
@@ -1846,15 +1844,18 @@ static void udf_free_partition(struct udf_part_map *map)
1846static int udf_fill_super(struct super_block *sb, void *options, int silent) 1844static int udf_fill_super(struct super_block *sb, void *options, int silent)
1847{ 1845{
1848 int i; 1846 int i;
1847 int ret;
1849 struct inode *inode = NULL; 1848 struct inode *inode = NULL;
1850 struct udf_options uopt; 1849 struct udf_options uopt;
1851 kernel_lb_addr rootdir, fileset; 1850 struct kernel_lb_addr rootdir, fileset;
1852 struct udf_sb_info *sbi; 1851 struct udf_sb_info *sbi;
1853 1852
1854 uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); 1853 uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
1855 uopt.uid = -1; 1854 uopt.uid = -1;
1856 uopt.gid = -1; 1855 uopt.gid = -1;
1857 uopt.umask = 0; 1856 uopt.umask = 0;
1857 uopt.fmode = UDF_INVALID_MODE;
1858 uopt.dmode = UDF_INVALID_MODE;
1858 1859
1859 sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); 1860 sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
1860 if (!sbi) 1861 if (!sbi)
@@ -1892,15 +1893,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1892 sbi->s_uid = uopt.uid; 1893 sbi->s_uid = uopt.uid;
1893 sbi->s_gid = uopt.gid; 1894 sbi->s_gid = uopt.gid;
1894 sbi->s_umask = uopt.umask; 1895 sbi->s_umask = uopt.umask;
1896 sbi->s_fmode = uopt.fmode;
1897 sbi->s_dmode = uopt.dmode;
1895 sbi->s_nls_map = uopt.nls_map; 1898 sbi->s_nls_map = uopt.nls_map;
1896 1899
1897 /* Set the block size for all transfers */
1898 if (!sb_min_blocksize(sb, uopt.blocksize)) {
1899 udf_debug("Bad block size (%d)\n", uopt.blocksize);
1900 printk(KERN_ERR "udf: bad block size (%d)\n", uopt.blocksize);
1901 goto error_out;
1902 }
1903
1904 if (uopt.session == 0xFFFFFFFF) 1900 if (uopt.session == 0xFFFFFFFF)
1905 sbi->s_session = udf_get_last_session(sb); 1901 sbi->s_session = udf_get_last_session(sb);
1906 else 1902 else
@@ -1908,18 +1904,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1908 1904
1909 udf_debug("Multi-session=%d\n", sbi->s_session); 1905 udf_debug("Multi-session=%d\n", sbi->s_session);
1910 1906
1911 sbi->s_last_block = uopt.lastblock;
1912 sbi->s_anchor[0] = sbi->s_anchor[1] = 0;
1913 sbi->s_anchor[2] = uopt.anchor;
1914
1915 if (udf_check_valid(sb, uopt.novrs, silent)) {
1916 /* read volume recognition sequences */
1917 printk(KERN_WARNING "UDF-fs: No VRS found\n");
1918 goto error_out;
1919 }
1920
1921 udf_find_anchor(sb);
1922
1923 /* Fill in the rest of the superblock */ 1907 /* Fill in the rest of the superblock */
1924 sb->s_op = &udf_sb_ops; 1908 sb->s_op = &udf_sb_ops;
1925 sb->s_export_op = &udf_export_ops; 1909 sb->s_export_op = &udf_export_ops;
@@ -1928,7 +1912,21 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1928 sb->s_magic = UDF_SUPER_MAGIC; 1912 sb->s_magic = UDF_SUPER_MAGIC;
1929 sb->s_time_gran = 1000; 1913 sb->s_time_gran = 1000;
1930 1914
1931 if (udf_load_sequence(sb, &fileset)) { 1915 if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) {
1916 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1917 } else {
1918 uopt.blocksize = bdev_hardsect_size(sb->s_bdev);
1919 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1920 if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
1921 if (!silent)
1922 printk(KERN_NOTICE
1923 "UDF-fs: Rescanning with blocksize "
1924 "%d\n", UDF_DEFAULT_BLOCKSIZE);
1925 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
1926 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1927 }
1928 }
1929 if (!ret) {
1932 printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); 1930 printk(KERN_WARNING "UDF-fs: No partition found (1)\n");
1933 goto error_out; 1931 goto error_out;
1934 } 1932 }
@@ -1978,7 +1976,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1978 } 1976 }
1979 1977
1980 if (!silent) { 1978 if (!silent) {
1981 timestamp ts; 1979 struct timestamp ts;
1982 udf_time_to_disk_stamp(&ts, sbi->s_record_time); 1980 udf_time_to_disk_stamp(&ts, sbi->s_record_time);
1983 udf_info("UDF: Mounting volume '%s', " 1981 udf_info("UDF: Mounting volume '%s', "
1984 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", 1982 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
@@ -1991,7 +1989,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1991 /* Assign the root inode */ 1989 /* Assign the root inode */
1992 /* assign inodes by physical block number */ 1990 /* assign inodes by physical block number */
1993 /* perhaps it's not extensible enough, but for now ... */ 1991 /* perhaps it's not extensible enough, but for now ... */
1994 inode = udf_iget(sb, rootdir); 1992 inode = udf_iget(sb, &rootdir);
1995 if (!inode) { 1993 if (!inode) {
1996 printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, " 1994 printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, "
1997 "partition=%d\n", 1995 "partition=%d\n",
@@ -2081,11 +2079,31 @@ static void udf_put_super(struct super_block *sb)
2081 sb->s_fs_info = NULL; 2079 sb->s_fs_info = NULL;
2082} 2080}
2083 2081
2082static int udf_sync_fs(struct super_block *sb, int wait)
2083{
2084 struct udf_sb_info *sbi = UDF_SB(sb);
2085
2086 mutex_lock(&sbi->s_alloc_mutex);
2087 if (sbi->s_lvid_dirty) {
2088 /*
2089 * Blockdevice will be synced later so we don't have to submit
2090 * the buffer for IO
2091 */
2092 mark_buffer_dirty(sbi->s_lvid_bh);
2093 sb->s_dirt = 0;
2094 sbi->s_lvid_dirty = 0;
2095 }
2096 mutex_unlock(&sbi->s_alloc_mutex);
2097
2098 return 0;
2099}
2100
2084static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) 2101static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
2085{ 2102{
2086 struct super_block *sb = dentry->d_sb; 2103 struct super_block *sb = dentry->d_sb;
2087 struct udf_sb_info *sbi = UDF_SB(sb); 2104 struct udf_sb_info *sbi = UDF_SB(sb);
2088 struct logicalVolIntegrityDescImpUse *lvidiu; 2105 struct logicalVolIntegrityDescImpUse *lvidiu;
2106 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
2089 2107
2090 if (sbi->s_lvid_bh != NULL) 2108 if (sbi->s_lvid_bh != NULL)
2091 lvidiu = udf_sb_lvidiu(sbi); 2109 lvidiu = udf_sb_lvidiu(sbi);
@@ -2101,8 +2119,9 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
2101 le32_to_cpu(lvidiu->numDirs)) : 0) 2119 le32_to_cpu(lvidiu->numDirs)) : 0)
2102 + buf->f_bfree; 2120 + buf->f_bfree;
2103 buf->f_ffree = buf->f_bfree; 2121 buf->f_ffree = buf->f_bfree;
2104 /* __kernel_fsid_t f_fsid */
2105 buf->f_namelen = UDF_NAME_LEN - 2; 2122 buf->f_namelen = UDF_NAME_LEN - 2;
2123 buf->f_fsid.val[0] = (u32)id;
2124 buf->f_fsid.val[1] = (u32)(id >> 32);
2106 2125
2107 return 0; 2126 return 0;
2108} 2127}
@@ -2114,7 +2133,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2114 unsigned int accum = 0; 2133 unsigned int accum = 0;
2115 int index; 2134 int index;
2116 int block = 0, newblock; 2135 int block = 0, newblock;
2117 kernel_lb_addr loc; 2136 struct kernel_lb_addr loc;
2118 uint32_t bytes; 2137 uint32_t bytes;
2119 uint8_t *ptr; 2138 uint8_t *ptr;
2120 uint16_t ident; 2139 uint16_t ident;
@@ -2124,7 +2143,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2124 2143
2125 loc.logicalBlockNum = bitmap->s_extPosition; 2144 loc.logicalBlockNum = bitmap->s_extPosition;
2126 loc.partitionReferenceNum = UDF_SB(sb)->s_partition; 2145 loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
2127 bh = udf_read_ptagged(sb, loc, 0, &ident); 2146 bh = udf_read_ptagged(sb, &loc, 0, &ident);
2128 2147
2129 if (!bh) { 2148 if (!bh) {
2130 printk(KERN_ERR "udf: udf_count_free failed\n"); 2149 printk(KERN_ERR "udf: udf_count_free failed\n");
@@ -2147,7 +2166,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2147 bytes -= cur_bytes; 2166 bytes -= cur_bytes;
2148 if (bytes) { 2167 if (bytes) {
2149 brelse(bh); 2168 brelse(bh);
2150 newblock = udf_get_lb_pblock(sb, loc, ++block); 2169 newblock = udf_get_lb_pblock(sb, &loc, ++block);
2151 bh = udf_tread(sb, newblock); 2170 bh = udf_tread(sb, newblock);
2152 if (!bh) { 2171 if (!bh) {
2153 udf_debug("read failed\n"); 2172 udf_debug("read failed\n");
@@ -2170,7 +2189,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
2170{ 2189{
2171 unsigned int accum = 0; 2190 unsigned int accum = 0;
2172 uint32_t elen; 2191 uint32_t elen;
2173 kernel_lb_addr eloc; 2192 struct kernel_lb_addr eloc;
2174 int8_t etype; 2193 int8_t etype;
2175 struct extent_position epos; 2194 struct extent_position epos;
2176 2195
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 65e19b4f9424..225527cdc885 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -28,10 +28,10 @@
28#include "udf_sb.h" 28#include "udf_sb.h"
29 29
30static void extent_trunc(struct inode *inode, struct extent_position *epos, 30static void extent_trunc(struct inode *inode, struct extent_position *epos,
31 kernel_lb_addr eloc, int8_t etype, uint32_t elen, 31 struct kernel_lb_addr *eloc, int8_t etype, uint32_t elen,
32 uint32_t nelen) 32 uint32_t nelen)
33{ 33{
34 kernel_lb_addr neloc = {}; 34 struct kernel_lb_addr neloc = {};
35 int last_block = (elen + inode->i_sb->s_blocksize - 1) >> 35 int last_block = (elen + inode->i_sb->s_blocksize - 1) >>
36 inode->i_sb->s_blocksize_bits; 36 inode->i_sb->s_blocksize_bits;
37 int first_block = (nelen + inode->i_sb->s_blocksize - 1) >> 37 int first_block = (nelen + inode->i_sb->s_blocksize - 1) >>
@@ -43,12 +43,12 @@ static void extent_trunc(struct inode *inode, struct extent_position *epos,
43 last_block); 43 last_block);
44 etype = (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30); 44 etype = (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30);
45 } else 45 } else
46 neloc = eloc; 46 neloc = *eloc;
47 nelen = (etype << 30) | nelen; 47 nelen = (etype << 30) | nelen;
48 } 48 }
49 49
50 if (elen != nelen) { 50 if (elen != nelen) {
51 udf_write_aext(inode, epos, neloc, nelen, 0); 51 udf_write_aext(inode, epos, &neloc, nelen, 0);
52 if (last_block - first_block > 0) { 52 if (last_block - first_block > 0) {
53 if (etype == (EXT_RECORDED_ALLOCATED >> 30)) 53 if (etype == (EXT_RECORDED_ALLOCATED >> 30))
54 mark_inode_dirty(inode); 54 mark_inode_dirty(inode);
@@ -68,7 +68,7 @@ static void extent_trunc(struct inode *inode, struct extent_position *epos,
68void udf_truncate_tail_extent(struct inode *inode) 68void udf_truncate_tail_extent(struct inode *inode)
69{ 69{
70 struct extent_position epos = {}; 70 struct extent_position epos = {};
71 kernel_lb_addr eloc; 71 struct kernel_lb_addr eloc;
72 uint32_t elen, nelen; 72 uint32_t elen, nelen;
73 uint64_t lbcount = 0; 73 uint64_t lbcount = 0;
74 int8_t etype = -1, netype; 74 int8_t etype = -1, netype;
@@ -83,9 +83,9 @@ void udf_truncate_tail_extent(struct inode *inode)
83 return; 83 return;
84 84
85 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 85 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
86 adsize = sizeof(short_ad); 86 adsize = sizeof(struct short_ad);
87 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 87 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
88 adsize = sizeof(long_ad); 88 adsize = sizeof(struct long_ad);
89 else 89 else
90 BUG(); 90 BUG();
91 91
@@ -106,7 +106,7 @@ void udf_truncate_tail_extent(struct inode *inode)
106 (unsigned)elen); 106 (unsigned)elen);
107 nelen = elen - (lbcount - inode->i_size); 107 nelen = elen - (lbcount - inode->i_size);
108 epos.offset -= adsize; 108 epos.offset -= adsize;
109 extent_trunc(inode, &epos, eloc, etype, elen, nelen); 109 extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
110 epos.offset += adsize; 110 epos.offset += adsize;
111 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) 111 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
112 printk(KERN_ERR "udf_truncate_tail_extent(): " 112 printk(KERN_ERR "udf_truncate_tail_extent(): "
@@ -124,7 +124,7 @@ void udf_truncate_tail_extent(struct inode *inode)
124void udf_discard_prealloc(struct inode *inode) 124void udf_discard_prealloc(struct inode *inode)
125{ 125{
126 struct extent_position epos = { NULL, 0, {0, 0} }; 126 struct extent_position epos = { NULL, 0, {0, 0} };
127 kernel_lb_addr eloc; 127 struct kernel_lb_addr eloc;
128 uint32_t elen; 128 uint32_t elen;
129 uint64_t lbcount = 0; 129 uint64_t lbcount = 0;
130 int8_t etype = -1, netype; 130 int8_t etype = -1, netype;
@@ -136,9 +136,9 @@ void udf_discard_prealloc(struct inode *inode)
136 return; 136 return;
137 137
138 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 138 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
139 adsize = sizeof(short_ad); 139 adsize = sizeof(struct short_ad);
140 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 140 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
141 adsize = sizeof(long_ad); 141 adsize = sizeof(struct long_ad);
142 else 142 else
143 adsize = 0; 143 adsize = 0;
144 144
@@ -152,7 +152,7 @@ void udf_discard_prealloc(struct inode *inode)
152 if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { 152 if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
153 epos.offset -= adsize; 153 epos.offset -= adsize;
154 lbcount -= elen; 154 lbcount -= elen;
155 extent_trunc(inode, &epos, eloc, etype, elen, 0); 155 extent_trunc(inode, &epos, &eloc, etype, elen, 0);
156 if (!epos.bh) { 156 if (!epos.bh) {
157 iinfo->i_lenAlloc = 157 iinfo->i_lenAlloc =
158 epos.offset - 158 epos.offset -
@@ -200,7 +200,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode,
200void udf_truncate_extents(struct inode *inode) 200void udf_truncate_extents(struct inode *inode)
201{ 201{
202 struct extent_position epos; 202 struct extent_position epos;
203 kernel_lb_addr eloc, neloc = {}; 203 struct kernel_lb_addr eloc, neloc = {};
204 uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc; 204 uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc;
205 int8_t etype; 205 int8_t etype;
206 struct super_block *sb = inode->i_sb; 206 struct super_block *sb = inode->i_sb;
@@ -210,9 +210,9 @@ void udf_truncate_extents(struct inode *inode)
210 struct udf_inode_info *iinfo = UDF_I(inode); 210 struct udf_inode_info *iinfo = UDF_I(inode);
211 211
212 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) 212 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
213 adsize = sizeof(short_ad); 213 adsize = sizeof(struct short_ad);
214 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 214 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
215 adsize = sizeof(long_ad); 215 adsize = sizeof(struct long_ad);
216 else 216 else
217 BUG(); 217 BUG();
218 218
@@ -221,7 +221,7 @@ void udf_truncate_extents(struct inode *inode)
221 (inode->i_size & (sb->s_blocksize - 1)); 221 (inode->i_size & (sb->s_blocksize - 1));
222 if (etype != -1) { 222 if (etype != -1) {
223 epos.offset -= adsize; 223 epos.offset -= adsize;
224 extent_trunc(inode, &epos, eloc, etype, elen, byte_offset); 224 extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset);
225 epos.offset += adsize; 225 epos.offset += adsize;
226 if (byte_offset) 226 if (byte_offset)
227 lenalloc = epos.offset; 227 lenalloc = epos.offset;
@@ -236,12 +236,12 @@ void udf_truncate_extents(struct inode *inode)
236 while ((etype = udf_current_aext(inode, &epos, &eloc, 236 while ((etype = udf_current_aext(inode, &epos, &eloc,
237 &elen, 0)) != -1) { 237 &elen, 0)) != -1) {
238 if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { 238 if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) {
239 udf_write_aext(inode, &epos, neloc, nelen, 0); 239 udf_write_aext(inode, &epos, &neloc, nelen, 0);
240 if (indirect_ext_len) { 240 if (indirect_ext_len) {
241 /* We managed to free all extents in the 241 /* We managed to free all extents in the
242 * indirect extent - free it too */ 242 * indirect extent - free it too */
243 BUG_ON(!epos.bh); 243 BUG_ON(!epos.bh);
244 udf_free_blocks(sb, inode, epos.block, 244 udf_free_blocks(sb, inode, &epos.block,
245 0, indirect_ext_len); 245 0, indirect_ext_len);
246 } else if (!epos.bh) { 246 } else if (!epos.bh) {
247 iinfo->i_lenAlloc = lenalloc; 247 iinfo->i_lenAlloc = lenalloc;
@@ -253,7 +253,7 @@ void udf_truncate_extents(struct inode *inode)
253 epos.offset = sizeof(struct allocExtDesc); 253 epos.offset = sizeof(struct allocExtDesc);
254 epos.block = eloc; 254 epos.block = eloc;
255 epos.bh = udf_tread(sb, 255 epos.bh = udf_tread(sb,
256 udf_get_lb_pblock(sb, eloc, 0)); 256 udf_get_lb_pblock(sb, &eloc, 0));
257 if (elen) 257 if (elen)
258 indirect_ext_len = 258 indirect_ext_len =
259 (elen + sb->s_blocksize - 1) >> 259 (elen + sb->s_blocksize - 1) >>
@@ -261,7 +261,7 @@ void udf_truncate_extents(struct inode *inode)
261 else 261 else
262 indirect_ext_len = 1; 262 indirect_ext_len = 1;
263 } else { 263 } else {
264 extent_trunc(inode, &epos, eloc, etype, 264 extent_trunc(inode, &epos, &eloc, etype,
265 elen, 0); 265 elen, 0);
266 epos.offset += adsize; 266 epos.offset += adsize;
267 } 267 }
@@ -269,7 +269,7 @@ void udf_truncate_extents(struct inode *inode)
269 269
270 if (indirect_ext_len) { 270 if (indirect_ext_len) {
271 BUG_ON(!epos.bh); 271 BUG_ON(!epos.bh);
272 udf_free_blocks(sb, inode, epos.block, 0, 272 udf_free_blocks(sb, inode, &epos.block, 0,
273 indirect_ext_len); 273 indirect_ext_len);
274 } else if (!epos.bh) { 274 } else if (!epos.bh) {
275 iinfo->i_lenAlloc = lenalloc; 275 iinfo->i_lenAlloc = lenalloc;
@@ -278,7 +278,7 @@ void udf_truncate_extents(struct inode *inode)
278 udf_update_alloc_ext_desc(inode, &epos, lenalloc); 278 udf_update_alloc_ext_desc(inode, &epos, lenalloc);
279 } else if (inode->i_size) { 279 } else if (inode->i_size) {
280 if (byte_offset) { 280 if (byte_offset) {
281 kernel_long_ad extent; 281 struct kernel_long_ad extent;
282 282
283 /* 283 /*
284 * OK, there is not extent covering inode->i_size and 284 * OK, there is not extent covering inode->i_size and
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index 4f86b1d98a5d..e58d1de41073 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -4,7 +4,7 @@
4struct udf_inode_info { 4struct udf_inode_info {
5 struct timespec i_crtime; 5 struct timespec i_crtime;
6 /* Physical address of inode */ 6 /* Physical address of inode */
7 kernel_lb_addr i_location; 7 struct kernel_lb_addr i_location;
8 __u64 i_unique; 8 __u64 i_unique;
9 __u32 i_lenEAttr; 9 __u32 i_lenEAttr;
10 __u32 i_lenAlloc; 10 __u32 i_lenAlloc;
@@ -17,8 +17,8 @@ struct udf_inode_info {
17 unsigned i_strat4096 : 1; 17 unsigned i_strat4096 : 1;
18 unsigned reserved : 26; 18 unsigned reserved : 26;
19 union { 19 union {
20 short_ad *i_sad; 20 struct short_ad *i_sad;
21 long_ad *i_lad; 21 struct long_ad *i_lad;
22 __u8 *i_data; 22 __u8 *i_data;
23 } i_ext; 23 } i_ext;
24 struct inode vfs_inode; 24 struct inode vfs_inode;
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 1c1c514a9725..d113b72c2768 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -30,6 +30,7 @@
30#define UDF_FLAG_GID_SET 16 30#define UDF_FLAG_GID_SET 16
31#define UDF_FLAG_SESSION_SET 17 31#define UDF_FLAG_SESSION_SET 17
32#define UDF_FLAG_LASTBLOCK_SET 18 32#define UDF_FLAG_LASTBLOCK_SET 18
33#define UDF_FLAG_BLOCKSIZE_SET 19
33 34
34#define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 35#define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001
35#define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 36#define UDF_PART_FLAG_UNALLOC_TABLE 0x0002
@@ -48,6 +49,8 @@
48#define UDF_SPARABLE_MAP15 0x1522U 49#define UDF_SPARABLE_MAP15 0x1522U
49#define UDF_METADATA_MAP25 0x2511U 50#define UDF_METADATA_MAP25 0x2511U
50 51
52#define UDF_INVALID_MODE ((mode_t)-1)
53
51#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ 54#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */
52 55
53struct udf_meta_data { 56struct udf_meta_data {
@@ -114,7 +117,7 @@ struct udf_sb_info {
114 117
115 /* Sector headers */ 118 /* Sector headers */
116 __s32 s_session; 119 __s32 s_session;
117 __u32 s_anchor[3]; 120 __u32 s_anchor;
118 __u32 s_last_block; 121 __u32 s_last_block;
119 122
120 struct buffer_head *s_lvid_bh; 123 struct buffer_head *s_lvid_bh;
@@ -123,6 +126,8 @@ struct udf_sb_info {
123 mode_t s_umask; 126 mode_t s_umask;
124 gid_t s_gid; 127 gid_t s_gid;
125 uid_t s_uid; 128 uid_t s_uid;
129 mode_t s_fmode;
130 mode_t s_dmode;
126 131
127 /* Root Info */ 132 /* Root Info */
128 struct timespec s_record_time; 133 struct timespec s_record_time;
@@ -143,6 +148,8 @@ struct udf_sb_info {
143 struct inode *s_vat_inode; 148 struct inode *s_vat_inode;
144 149
145 struct mutex s_alloc_mutex; 150 struct mutex s_alloc_mutex;
151 /* Protected by s_alloc_mutex */
152 unsigned int s_lvid_dirty;
146}; 153};
147 154
148static inline struct udf_sb_info *UDF_SB(struct super_block *sb) 155static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 8ec865de5f13..cac51b77a5d1 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -62,10 +62,8 @@ static inline size_t udf_ext0_offset(struct inode *inode)
62 return 0; 62 return 0;
63} 63}
64 64
65#define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset))
66
67/* computes tag checksum */ 65/* computes tag checksum */
68u8 udf_tag_checksum(const tag *t); 66u8 udf_tag_checksum(const struct tag *t);
69 67
70struct dentry; 68struct dentry;
71struct inode; 69struct inode;
@@ -95,7 +93,7 @@ struct udf_vds_record {
95}; 93};
96 94
97struct generic_desc { 95struct generic_desc {
98 tag descTag; 96 struct tag descTag;
99 __le32 volDescSeqNum; 97 __le32 volDescSeqNum;
100}; 98};
101 99
@@ -108,11 +106,22 @@ struct ustr {
108struct extent_position { 106struct extent_position {
109 struct buffer_head *bh; 107 struct buffer_head *bh;
110 uint32_t offset; 108 uint32_t offset;
111 kernel_lb_addr block; 109 struct kernel_lb_addr block;
112}; 110};
113 111
114/* super.c */ 112/* super.c */
115extern void udf_warning(struct super_block *, const char *, const char *, ...); 113extern void udf_warning(struct super_block *, const char *, const char *, ...);
114static inline void udf_updated_lvid(struct super_block *sb)
115{
116 struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh;
117
118 BUG_ON(!bh);
119 WARN_ON_ONCE(((struct logicalVolIntegrityDesc *)
120 bh->b_data)->integrityType !=
121 cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN));
122 sb->s_dirt = 1;
123 UDF_SB(sb)->s_lvid_dirty = 1;
124}
116 125
117/* namei.c */ 126/* namei.c */
118extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, 127extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
@@ -124,7 +133,7 @@ extern int udf_ioctl(struct inode *, struct file *, unsigned int,
124 unsigned long); 133 unsigned long);
125 134
126/* inode.c */ 135/* inode.c */
127extern struct inode *udf_iget(struct super_block *, kernel_lb_addr); 136extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
128extern int udf_sync_inode(struct inode *); 137extern int udf_sync_inode(struct inode *);
129extern void udf_expand_file_adinicb(struct inode *, int, int *); 138extern void udf_expand_file_adinicb(struct inode *, int, int *);
130extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); 139extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
@@ -136,19 +145,19 @@ extern void udf_clear_inode(struct inode *);
136extern int udf_write_inode(struct inode *, int); 145extern int udf_write_inode(struct inode *, int);
137extern long udf_block_map(struct inode *, sector_t); 146extern long udf_block_map(struct inode *, sector_t);
138extern int udf_extend_file(struct inode *, struct extent_position *, 147extern int udf_extend_file(struct inode *, struct extent_position *,
139 kernel_long_ad *, sector_t); 148 struct kernel_long_ad *, sector_t);
140extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, 149extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *,
141 kernel_lb_addr *, uint32_t *, sector_t *); 150 struct kernel_lb_addr *, uint32_t *, sector_t *);
142extern int8_t udf_add_aext(struct inode *, struct extent_position *, 151extern int8_t udf_add_aext(struct inode *, struct extent_position *,
143 kernel_lb_addr, uint32_t, int); 152 struct kernel_lb_addr *, uint32_t, int);
144extern int8_t udf_write_aext(struct inode *, struct extent_position *, 153extern int8_t udf_write_aext(struct inode *, struct extent_position *,
145 kernel_lb_addr, uint32_t, int); 154 struct kernel_lb_addr *, uint32_t, int);
146extern int8_t udf_delete_aext(struct inode *, struct extent_position, 155extern int8_t udf_delete_aext(struct inode *, struct extent_position,
147 kernel_lb_addr, uint32_t); 156 struct kernel_lb_addr, uint32_t);
148extern int8_t udf_next_aext(struct inode *, struct extent_position *, 157extern int8_t udf_next_aext(struct inode *, struct extent_position *,
149 kernel_lb_addr *, uint32_t *, int); 158 struct kernel_lb_addr *, uint32_t *, int);
150extern int8_t udf_current_aext(struct inode *, struct extent_position *, 159extern int8_t udf_current_aext(struct inode *, struct extent_position *,
151 kernel_lb_addr *, uint32_t *, int); 160 struct kernel_lb_addr *, uint32_t *, int);
152 161
153/* misc.c */ 162/* misc.c */
154extern struct buffer_head *udf_tgetblk(struct super_block *, int); 163extern struct buffer_head *udf_tgetblk(struct super_block *, int);
@@ -160,7 +169,7 @@ extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t,
160extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t, 169extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t,
161 uint32_t, uint16_t *); 170 uint32_t, uint16_t *);
162extern struct buffer_head *udf_read_ptagged(struct super_block *, 171extern struct buffer_head *udf_read_ptagged(struct super_block *,
163 kernel_lb_addr, uint32_t, 172 struct kernel_lb_addr *, uint32_t,
164 uint16_t *); 173 uint16_t *);
165extern void udf_update_tag(char *, int); 174extern void udf_update_tag(char *, int);
166extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int); 175extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int);
@@ -182,6 +191,14 @@ extern uint32_t udf_get_pblock_meta25(struct super_block *, uint32_t, uint16_t,
182 uint32_t); 191 uint32_t);
183extern int udf_relocate_blocks(struct super_block *, long, long *); 192extern int udf_relocate_blocks(struct super_block *, long, long *);
184 193
194static inline uint32_t
195udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc,
196 uint32_t offset)
197{
198 return udf_get_pblock(sb, loc->logicalBlockNum,
199 loc->partitionReferenceNum, offset);
200}
201
185/* unicode.c */ 202/* unicode.c */
186extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int); 203extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int);
187extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, 204extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *,
@@ -200,7 +217,7 @@ extern void udf_truncate_extents(struct inode *);
200 217
201/* balloc.c */ 218/* balloc.c */
202extern void udf_free_blocks(struct super_block *, struct inode *, 219extern void udf_free_blocks(struct super_block *, struct inode *,
203 kernel_lb_addr, uint32_t, uint32_t); 220 struct kernel_lb_addr *, uint32_t, uint32_t);
204extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t, 221extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t,
205 uint32_t, uint32_t); 222 uint32_t, uint32_t);
206extern int udf_new_block(struct super_block *, struct inode *, uint16_t, 223extern int udf_new_block(struct super_block *, struct inode *, uint16_t,
@@ -214,16 +231,16 @@ extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *,
214 struct udf_fileident_bh *, 231 struct udf_fileident_bh *,
215 struct fileIdentDesc *, 232 struct fileIdentDesc *,
216 struct extent_position *, 233 struct extent_position *,
217 kernel_lb_addr *, uint32_t *, 234 struct kernel_lb_addr *, uint32_t *,
218 sector_t *); 235 sector_t *);
219extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, 236extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize,
220 int *offset); 237 int *offset);
221extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); 238extern struct long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int);
222extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); 239extern struct short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int);
223 240
224/* udftime.c */ 241/* udftime.c */
225extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest, 242extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest,
226 timestamp src); 243 struct timestamp src);
227extern timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec src); 244extern struct timestamp *udf_time_to_disk_stamp(struct timestamp *dest, struct timespec src);
228 245
229#endif /* __UDF_DECL_H */ 246#endif /* __UDF_DECL_H */
diff --git a/fs/udf/udfend.h b/fs/udf/udfend.h
index 489f52fb428c..6a9f3a9cc428 100644
--- a/fs/udf/udfend.h
+++ b/fs/udf/udfend.h
@@ -4,9 +4,9 @@
4#include <asm/byteorder.h> 4#include <asm/byteorder.h>
5#include <linux/string.h> 5#include <linux/string.h>
6 6
7static inline kernel_lb_addr lelb_to_cpu(lb_addr in) 7static inline struct kernel_lb_addr lelb_to_cpu(struct lb_addr in)
8{ 8{
9 kernel_lb_addr out; 9 struct kernel_lb_addr out;
10 10
11 out.logicalBlockNum = le32_to_cpu(in.logicalBlockNum); 11 out.logicalBlockNum = le32_to_cpu(in.logicalBlockNum);
12 out.partitionReferenceNum = le16_to_cpu(in.partitionReferenceNum); 12 out.partitionReferenceNum = le16_to_cpu(in.partitionReferenceNum);
@@ -14,9 +14,9 @@ static inline kernel_lb_addr lelb_to_cpu(lb_addr in)
14 return out; 14 return out;
15} 15}
16 16
17static inline lb_addr cpu_to_lelb(kernel_lb_addr in) 17static inline struct lb_addr cpu_to_lelb(struct kernel_lb_addr in)
18{ 18{
19 lb_addr out; 19 struct lb_addr out;
20 20
21 out.logicalBlockNum = cpu_to_le32(in.logicalBlockNum); 21 out.logicalBlockNum = cpu_to_le32(in.logicalBlockNum);
22 out.partitionReferenceNum = cpu_to_le16(in.partitionReferenceNum); 22 out.partitionReferenceNum = cpu_to_le16(in.partitionReferenceNum);
@@ -24,9 +24,9 @@ static inline lb_addr cpu_to_lelb(kernel_lb_addr in)
24 return out; 24 return out;
25} 25}
26 26
27static inline short_ad lesa_to_cpu(short_ad in) 27static inline struct short_ad lesa_to_cpu(struct short_ad in)
28{ 28{
29 short_ad out; 29 struct short_ad out;
30 30
31 out.extLength = le32_to_cpu(in.extLength); 31 out.extLength = le32_to_cpu(in.extLength);
32 out.extPosition = le32_to_cpu(in.extPosition); 32 out.extPosition = le32_to_cpu(in.extPosition);
@@ -34,9 +34,9 @@ static inline short_ad lesa_to_cpu(short_ad in)
34 return out; 34 return out;
35} 35}
36 36
37static inline short_ad cpu_to_lesa(short_ad in) 37static inline struct short_ad cpu_to_lesa(struct short_ad in)
38{ 38{
39 short_ad out; 39 struct short_ad out;
40 40
41 out.extLength = cpu_to_le32(in.extLength); 41 out.extLength = cpu_to_le32(in.extLength);
42 out.extPosition = cpu_to_le32(in.extPosition); 42 out.extPosition = cpu_to_le32(in.extPosition);
@@ -44,9 +44,9 @@ static inline short_ad cpu_to_lesa(short_ad in)
44 return out; 44 return out;
45} 45}
46 46
47static inline kernel_long_ad lela_to_cpu(long_ad in) 47static inline struct kernel_long_ad lela_to_cpu(struct long_ad in)
48{ 48{
49 kernel_long_ad out; 49 struct kernel_long_ad out;
50 50
51 out.extLength = le32_to_cpu(in.extLength); 51 out.extLength = le32_to_cpu(in.extLength);
52 out.extLocation = lelb_to_cpu(in.extLocation); 52 out.extLocation = lelb_to_cpu(in.extLocation);
@@ -54,9 +54,9 @@ static inline kernel_long_ad lela_to_cpu(long_ad in)
54 return out; 54 return out;
55} 55}
56 56
57static inline long_ad cpu_to_lela(kernel_long_ad in) 57static inline struct long_ad cpu_to_lela(struct kernel_long_ad in)
58{ 58{
59 long_ad out; 59 struct long_ad out;
60 60
61 out.extLength = cpu_to_le32(in.extLength); 61 out.extLength = cpu_to_le32(in.extLength);
62 out.extLocation = cpu_to_lelb(in.extLocation); 62 out.extLocation = cpu_to_lelb(in.extLocation);
@@ -64,9 +64,9 @@ static inline long_ad cpu_to_lela(kernel_long_ad in)
64 return out; 64 return out;
65} 65}
66 66
67static inline kernel_extent_ad leea_to_cpu(extent_ad in) 67static inline struct kernel_extent_ad leea_to_cpu(struct extent_ad in)
68{ 68{
69 kernel_extent_ad out; 69 struct kernel_extent_ad out;
70 70
71 out.extLength = le32_to_cpu(in.extLength); 71 out.extLength = le32_to_cpu(in.extLength);
72 out.extLocation = le32_to_cpu(in.extLocation); 72 out.extLocation = le32_to_cpu(in.extLocation);
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index 5f811655c9b5..b8c828c4d200 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -85,7 +85,8 @@ extern struct timezone sys_tz;
85#define SECS_PER_HOUR (60 * 60) 85#define SECS_PER_HOUR (60 * 60)
86#define SECS_PER_DAY (SECS_PER_HOUR * 24) 86#define SECS_PER_DAY (SECS_PER_HOUR * 24)
87 87
88struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src) 88struct timespec *
89udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
89{ 90{
90 int yday; 91 int yday;
91 u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone); 92 u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone);
@@ -116,7 +117,8 @@ struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src)
116 return dest; 117 return dest;
117} 118}
118 119
119timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec ts) 120struct timestamp *
121udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts)
120{ 122{
121 long int days, rem, y; 123 long int days, rem, y;
122 const unsigned short int *ip; 124 const unsigned short int *ip;
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 9fdf8c93c58e..cefa8c8913e6 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -254,7 +254,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
254{ 254{
255 const uint8_t *ocu; 255 const uint8_t *ocu;
256 uint8_t cmp_id, ocu_len; 256 uint8_t cmp_id, ocu_len;
257 int i; 257 int i, len;
258 258
259 259
260 ocu_len = ocu_i->u_len; 260 ocu_len = ocu_i->u_len;
@@ -279,8 +279,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
279 if (cmp_id == 16) 279 if (cmp_id == 16)
280 c = (c << 8) | ocu[i++]; 280 c = (c << 8) | ocu[i++];
281 281
282 utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len], 282 len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
283 UDF_NAME_LEN - utf_o->u_len); 283 UDF_NAME_LEN - utf_o->u_len);
284 /* Valid character? */
285 if (len >= 0)
286 utf_o->u_len += len;
287 else
288 utf_o->u_name[utf_o->u_len++] = '?';
284 } 289 }
285 utf_o->u_cmpID = 8; 290 utf_o->u_cmpID = 8;
286 291
@@ -290,7 +295,8 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
290static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, 295static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
291 int length) 296 int length)
292{ 297{
293 unsigned len, i, max_val; 298 int len;
299 unsigned i, max_val;
294 uint16_t uni_char; 300 uint16_t uni_char;
295 int u_len; 301 int u_len;
296 302
@@ -302,8 +308,13 @@ try_again:
302 u_len = 0U; 308 u_len = 0U;
303 for (i = 0U; i < uni->u_len; i++) { 309 for (i = 0U; i < uni->u_len; i++) {
304 len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); 310 len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
305 if (len <= 0) 311 if (!len)
306 continue; 312 continue;
313 /* Invalid character, deal with it */
314 if (len < 0) {
315 len = 1;
316 uni_char = '?';
317 }
307 318
308 if (uni_char > max_val) { 319 if (uni_char > max_val) {
309 max_val = 0xffffU; 320 max_val = 0xffffU;
@@ -324,34 +335,43 @@ try_again:
324int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, 335int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname,
325 int flen) 336 int flen)
326{ 337{
327 struct ustr filename, unifilename; 338 struct ustr *filename, *unifilename;
328 int len; 339 int len = 0;
329 340
330 if (udf_build_ustr_exact(&unifilename, sname, flen)) 341 filename = kmalloc(sizeof(struct ustr), GFP_NOFS);
342 if (!filename)
331 return 0; 343 return 0;
332 344
345 unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS);
346 if (!unifilename)
347 goto out1;
348
349 if (udf_build_ustr_exact(unifilename, sname, flen))
350 goto out2;
351
333 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { 352 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
334 if (!udf_CS0toUTF8(&filename, &unifilename)) { 353 if (!udf_CS0toUTF8(filename, unifilename)) {
335 udf_debug("Failed in udf_get_filename: sname = %s\n", 354 udf_debug("Failed in udf_get_filename: sname = %s\n",
336 sname); 355 sname);
337 return 0; 356 goto out2;
338 } 357 }
339 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { 358 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
340 if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, 359 if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
341 &unifilename)) { 360 unifilename)) {
342 udf_debug("Failed in udf_get_filename: sname = %s\n", 361 udf_debug("Failed in udf_get_filename: sname = %s\n",
343 sname); 362 sname);
344 return 0; 363 goto out2;
345 } 364 }
346 } else 365 } else
347 return 0; 366 goto out2;
348 367
349 len = udf_translate_to_linux(dname, filename.u_name, filename.u_len, 368 len = udf_translate_to_linux(dname, filename->u_name, filename->u_len,
350 unifilename.u_name, unifilename.u_len); 369 unifilename->u_name, unifilename->u_len);
351 if (len) 370out2:
352 return len; 371 kfree(unifilename);
353 372out1:
354 return 0; 373 kfree(filename);
374 return len;
355} 375}
356 376
357int udf_put_filename(struct super_block *sb, const uint8_t *sname, 377int udf_put_filename(struct super_block *sb, const uint8_t *sname,
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c3dc491fff89..60f107e47fe9 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -33,6 +33,7 @@ xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
33 xfs_qm_syscalls.o \ 33 xfs_qm_syscalls.o \
34 xfs_qm_bhv.o \ 34 xfs_qm_bhv.o \
35 xfs_qm.o) 35 xfs_qm.o)
36xfs-$(CONFIG_XFS_QUOTA) += linux-2.6/xfs_quotaops.o
36 37
37ifeq ($(CONFIG_XFS_QUOTA),y) 38ifeq ($(CONFIG_XFS_QUOTA),y)
38xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o 39xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
deleted file mode 100644
index 2a88d56c4dc2..000000000000
--- a/fs/xfs/linux-2.6/mutex.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_MUTEX_H__
19#define __XFS_SUPPORT_MUTEX_H__
20
21#include <linux/mutex.h>
22
23typedef struct mutex mutex_t;
24
25#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index de3a198f771e..c13f67300fe7 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1623,4 +1623,5 @@ const struct address_space_operations xfs_address_space_operations = {
1623 .bmap = xfs_vm_bmap, 1623 .bmap = xfs_vm_bmap,
1624 .direct_IO = xfs_vm_direct_IO, 1624 .direct_IO = xfs_vm_direct_IO,
1625 .migratepage = buffer_migrate_page, 1625 .migratepage = buffer_migrate_page,
1626 .is_partially_uptodate = block_is_partially_uptodate,
1626}; 1627};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4bd112313f33..d0b499418a7d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -34,6 +34,7 @@
34#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_ioctl.h"
37#include "xfs_btree.h" 38#include "xfs_btree.h"
38#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
39#include "xfs_rtalloc.h" 40#include "xfs_rtalloc.h"
@@ -78,92 +79,74 @@ xfs_find_handle(
78 int hsize; 79 int hsize;
79 xfs_handle_t handle; 80 xfs_handle_t handle;
80 struct inode *inode; 81 struct inode *inode;
82 struct file *file = NULL;
83 struct path path;
84 int error;
85 struct xfs_inode *ip;
81 86
82 memset((char *)&handle, 0, sizeof(handle)); 87 if (cmd == XFS_IOC_FD_TO_HANDLE) {
83 88 file = fget(hreq->fd);
84 switch (cmd) { 89 if (!file)
85 case XFS_IOC_PATH_TO_FSHANDLE: 90 return -EBADF;
86 case XFS_IOC_PATH_TO_HANDLE: { 91 inode = file->f_path.dentry->d_inode;
87 struct path path; 92 } else {
88 int error = user_lpath((const char __user *)hreq->path, &path); 93 error = user_lpath((const char __user *)hreq->path, &path);
89 if (error) 94 if (error)
90 return error; 95 return error;
91 96 inode = path.dentry->d_inode;
92 ASSERT(path.dentry);
93 ASSERT(path.dentry->d_inode);
94 inode = igrab(path.dentry->d_inode);
95 path_put(&path);
96 break;
97 } 97 }
98 ip = XFS_I(inode);
98 99
99 case XFS_IOC_FD_TO_HANDLE: { 100 /*
100 struct file *file; 101 * We can only generate handles for inodes residing on a XFS filesystem,
101 102 * and only for regular files, directories or symbolic links.
102 file = fget(hreq->fd); 103 */
103 if (!file) 104 error = -EINVAL;
104 return -EBADF; 105 if (inode->i_sb->s_magic != XFS_SB_MAGIC)
106 goto out_put;
105 107
106 ASSERT(file->f_path.dentry); 108 error = -EBADF;
107 ASSERT(file->f_path.dentry->d_inode); 109 if (!S_ISREG(inode->i_mode) &&
108 inode = igrab(file->f_path.dentry->d_inode); 110 !S_ISDIR(inode->i_mode) &&
109 fput(file); 111 !S_ISLNK(inode->i_mode))
110 break; 112 goto out_put;
111 }
112 113
113 default:
114 ASSERT(0);
115 return -XFS_ERROR(EINVAL);
116 }
117 114
118 if (inode->i_sb->s_magic != XFS_SB_MAGIC) { 115 memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
119 /* we're not in XFS anymore, Toto */
120 iput(inode);
121 return -XFS_ERROR(EINVAL);
122 }
123 116
124 switch (inode->i_mode & S_IFMT) { 117 if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
125 case S_IFREG: 118 /*
126 case S_IFDIR: 119 * This handle only contains an fsid, zero the rest.
127 case S_IFLNK: 120 */
128 break; 121 memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
129 default: 122 hsize = sizeof(xfs_fsid_t);
130 iput(inode); 123 } else {
131 return -XFS_ERROR(EBADF);
132 }
133
134 /* now we can grab the fsid */
135 memcpy(&handle.ha_fsid, XFS_I(inode)->i_mount->m_fixedfsid,
136 sizeof(xfs_fsid_t));
137 hsize = sizeof(xfs_fsid_t);
138
139 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
140 xfs_inode_t *ip = XFS_I(inode);
141 int lock_mode; 124 int lock_mode;
142 125
143 /* need to get access to the xfs_inode to read the generation */
144 lock_mode = xfs_ilock_map_shared(ip); 126 lock_mode = xfs_ilock_map_shared(ip);
145
146 /* fill in fid section of handle from inode */
147 handle.ha_fid.fid_len = sizeof(xfs_fid_t) - 127 handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
148 sizeof(handle.ha_fid.fid_len); 128 sizeof(handle.ha_fid.fid_len);
149 handle.ha_fid.fid_pad = 0; 129 handle.ha_fid.fid_pad = 0;
150 handle.ha_fid.fid_gen = ip->i_d.di_gen; 130 handle.ha_fid.fid_gen = ip->i_d.di_gen;
151 handle.ha_fid.fid_ino = ip->i_ino; 131 handle.ha_fid.fid_ino = ip->i_ino;
152
153 xfs_iunlock_map_shared(ip, lock_mode); 132 xfs_iunlock_map_shared(ip, lock_mode);
154 133
155 hsize = XFS_HSIZE(handle); 134 hsize = XFS_HSIZE(handle);
156 } 135 }
157 136
158 /* now copy our handle into the user buffer & write out the size */ 137 error = -EFAULT;
159 if (copy_to_user(hreq->ohandle, &handle, hsize) || 138 if (copy_to_user(hreq->ohandle, &handle, hsize) ||
160 copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) { 139 copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
161 iput(inode); 140 goto out_put;
162 return -XFS_ERROR(EFAULT);
163 }
164 141
165 iput(inode); 142 error = 0;
166 return 0; 143
144 out_put:
145 if (cmd == XFS_IOC_FD_TO_HANDLE)
146 fput(file);
147 else
148 path_put(&path);
149 return error;
167} 150}
168 151
169/* 152/*
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2940612e3aeb..6075382336d7 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -211,8 +211,13 @@ xfs_vn_mknod(
211 * Irix uses Missed'em'V split, but doesn't want to see 211 * Irix uses Missed'em'V split, but doesn't want to see
212 * the upper 5 bits of (14bit) major. 212 * the upper 5 bits of (14bit) major.
213 */ 213 */
214 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) 214 if (S_ISCHR(mode) || S_ISBLK(mode)) {
215 return -EINVAL; 215 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
216 return -EINVAL;
217 rdev = sysv_encode_dev(rdev);
218 } else {
219 rdev = 0;
220 }
216 221
217 if (test_default_acl && test_default_acl(dir)) { 222 if (test_default_acl && test_default_acl(dir)) {
218 if (!_ACL_ALLOC(default_acl)) { 223 if (!_ACL_ALLOC(default_acl)) {
@@ -224,28 +229,11 @@ xfs_vn_mknod(
224 } 229 }
225 } 230 }
226 231
227 xfs_dentry_to_name(&name, dentry);
228
229 if (IS_POSIXACL(dir) && !default_acl) 232 if (IS_POSIXACL(dir) && !default_acl)
230 mode &= ~current_umask(); 233 mode &= ~current_umask();
231 234
232 switch (mode & S_IFMT) { 235 xfs_dentry_to_name(&name, dentry);
233 case S_IFCHR: 236 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
234 case S_IFBLK:
235 case S_IFIFO:
236 case S_IFSOCK:
237 rdev = sysv_encode_dev(rdev);
238 case S_IFREG:
239 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
240 break;
241 case S_IFDIR:
242 error = xfs_mkdir(XFS_I(dir), &name, mode, &ip, NULL);
243 break;
244 default:
245 error = EINVAL;
246 break;
247 }
248
249 if (unlikely(error)) 237 if (unlikely(error))
250 goto out_free_acl; 238 goto out_free_acl;
251 239
@@ -553,9 +541,6 @@ xfs_vn_getattr(
553 stat->uid = ip->i_d.di_uid; 541 stat->uid = ip->i_d.di_uid;
554 stat->gid = ip->i_d.di_gid; 542 stat->gid = ip->i_d.di_gid;
555 stat->ino = ip->i_ino; 543 stat->ino = ip->i_ino;
556#if XFS_BIG_INUMS
557 stat->ino += mp->m_inoadd;
558#endif
559 stat->atime = inode->i_atime; 544 stat->atime = inode->i_atime;
560 stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; 545 stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec;
561 stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 546 stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 507492d6dccd..f65a53f8752f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -38,7 +38,6 @@
38#include <kmem.h> 38#include <kmem.h>
39#include <mrlock.h> 39#include <mrlock.h>
40#include <sv.h> 40#include <sv.h>
41#include <mutex.h>
42#include <time.h> 41#include <time.h>
43 42
44#include <support/ktrace.h> 43#include <support/ktrace.h>
@@ -51,6 +50,7 @@
51#include <linux/blkdev.h> 50#include <linux/blkdev.h>
52#include <linux/slab.h> 51#include <linux/slab.h>
53#include <linux/module.h> 52#include <linux/module.h>
53#include <linux/mutex.h>
54#include <linux/file.h> 54#include <linux/file.h>
55#include <linux/swap.h> 55#include <linux/swap.h>
56#include <linux/errno.h> 56#include <linux/errno.h>
@@ -147,17 +147,6 @@
147#define SYNCHRONIZE() barrier() 147#define SYNCHRONIZE() barrier()
148#define __return_address __builtin_return_address(0) 148#define __return_address __builtin_return_address(0)
149 149
150/*
151 * IRIX (BSD) quotactl makes use of separate commands for user/group,
152 * whereas on Linux the syscall encodes this information into the cmd
153 * field (see the QCMD macro in quota.h). These macros help keep the
154 * code portable - they are not visible from the syscall interface.
155 */
156#define Q_XSETGQLIM XQM_CMD(8) /* set groups disk limits */
157#define Q_XGETGQUOTA XQM_CMD(9) /* get groups disk limits */
158#define Q_XSETPQLIM XQM_CMD(10) /* set projects disk limits */
159#define Q_XGETPQUOTA XQM_CMD(11) /* get projects disk limits */
160
161#define dfltprid 0 150#define dfltprid 0
162#define MAXPATHLEN 1024 151#define MAXPATHLEN 1024
163 152
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
new file mode 100644
index 000000000000..94d9a633d3d9
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -0,0 +1,157 @@
1/*
2 * Copyright (c) 2008, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_dmapi.h"
20#include "xfs_sb.h"
21#include "xfs_inum.h"
22#include "xfs_ag.h"
23#include "xfs_mount.h"
24#include "xfs_quota.h"
25#include "xfs_log.h"
26#include "xfs_trans.h"
27#include "xfs_bmap_btree.h"
28#include "xfs_inode.h"
29#include "quota/xfs_qm.h"
30#include <linux/quota.h>
31
32
33STATIC int
34xfs_quota_type(int type)
35{
36 switch (type) {
37 case USRQUOTA:
38 return XFS_DQ_USER;
39 case GRPQUOTA:
40 return XFS_DQ_GROUP;
41 default:
42 return XFS_DQ_PROJ;
43 }
44}
45
46STATIC int
47xfs_fs_quota_sync(
48 struct super_block *sb,
49 int type)
50{
51 struct xfs_mount *mp = XFS_M(sb);
52
53 if (!XFS_IS_QUOTA_RUNNING(mp))
54 return -ENOSYS;
55 return -xfs_sync_inodes(mp, SYNC_DELWRI);
56}
57
58STATIC int
59xfs_fs_get_xstate(
60 struct super_block *sb,
61 struct fs_quota_stat *fqs)
62{
63 struct xfs_mount *mp = XFS_M(sb);
64
65 if (!XFS_IS_QUOTA_RUNNING(mp))
66 return -ENOSYS;
67 return -xfs_qm_scall_getqstat(mp, fqs);
68}
69
70STATIC int
71xfs_fs_set_xstate(
72 struct super_block *sb,
73 unsigned int uflags,
74 int op)
75{
76 struct xfs_mount *mp = XFS_M(sb);
77 unsigned int flags = 0;
78
79 if (sb->s_flags & MS_RDONLY)
80 return -EROFS;
81 if (!XFS_IS_QUOTA_RUNNING(mp))
82 return -ENOSYS;
83 if (!capable(CAP_SYS_ADMIN))
84 return -EPERM;
85
86 if (uflags & XFS_QUOTA_UDQ_ACCT)
87 flags |= XFS_UQUOTA_ACCT;
88 if (uflags & XFS_QUOTA_PDQ_ACCT)
89 flags |= XFS_PQUOTA_ACCT;
90 if (uflags & XFS_QUOTA_GDQ_ACCT)
91 flags |= XFS_GQUOTA_ACCT;
92 if (uflags & XFS_QUOTA_UDQ_ENFD)
93 flags |= XFS_UQUOTA_ENFD;
94 if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
95 flags |= XFS_OQUOTA_ENFD;
96
97 switch (op) {
98 case Q_XQUOTAON:
99 return -xfs_qm_scall_quotaon(mp, flags);
100 case Q_XQUOTAOFF:
101 if (!XFS_IS_QUOTA_ON(mp))
102 return -EINVAL;
103 return -xfs_qm_scall_quotaoff(mp, flags);
104 case Q_XQUOTARM:
105 if (XFS_IS_QUOTA_ON(mp))
106 return -EINVAL;
107 return -xfs_qm_scall_trunc_qfiles(mp, flags);
108 }
109
110 return -EINVAL;
111}
112
113STATIC int
114xfs_fs_get_xquota(
115 struct super_block *sb,
116 int type,
117 qid_t id,
118 struct fs_disk_quota *fdq)
119{
120 struct xfs_mount *mp = XFS_M(sb);
121
122 if (!XFS_IS_QUOTA_RUNNING(mp))
123 return -ENOSYS;
124 if (!XFS_IS_QUOTA_ON(mp))
125 return -ESRCH;
126
127 return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
128}
129
130STATIC int
131xfs_fs_set_xquota(
132 struct super_block *sb,
133 int type,
134 qid_t id,
135 struct fs_disk_quota *fdq)
136{
137 struct xfs_mount *mp = XFS_M(sb);
138
139 if (sb->s_flags & MS_RDONLY)
140 return -EROFS;
141 if (!XFS_IS_QUOTA_RUNNING(mp))
142 return -ENOSYS;
143 if (!XFS_IS_QUOTA_ON(mp))
144 return -ESRCH;
145 if (!capable(CAP_SYS_ADMIN))
146 return -EPERM;
147
148 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
149}
150
151struct quotactl_ops xfs_quotactl_operations = {
152 .quota_sync = xfs_fs_quota_sync,
153 .get_xstate = xfs_fs_get_xstate,
154 .set_xstate = xfs_fs_set_xstate,
155 .get_xquota = xfs_fs_get_xquota,
156 .set_xquota = xfs_fs_set_xquota,
157};
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 32ae5028e96b..bb685269f832 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -68,7 +68,6 @@
68#include <linux/freezer.h> 68#include <linux/freezer.h>
69#include <linux/parser.h> 69#include <linux/parser.h>
70 70
71static struct quotactl_ops xfs_quotactl_operations;
72static struct super_operations xfs_super_operations; 71static struct super_operations xfs_super_operations;
73static kmem_zone_t *xfs_ioend_zone; 72static kmem_zone_t *xfs_ioend_zone;
74mempool_t *xfs_ioend_pool; 73mempool_t *xfs_ioend_pool;
@@ -79,7 +78,6 @@ mempool_t *xfs_ioend_pool;
79#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ 78#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */
80#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ 79#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */
81#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ 80#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */
82#define MNTOPT_INO64 "ino64" /* force inodes into 64-bit range */
83#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ 81#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */
84#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ 82#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */
85#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ 83#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */
@@ -180,7 +178,7 @@ xfs_parseargs(
180 int dswidth = 0; 178 int dswidth = 0;
181 int iosize = 0; 179 int iosize = 0;
182 int dmapi_implies_ikeep = 1; 180 int dmapi_implies_ikeep = 1;
183 uchar_t iosizelog = 0; 181 __uint8_t iosizelog = 0;
184 182
185 /* 183 /*
186 * Copy binary VFS mount flags we are interested in. 184 * Copy binary VFS mount flags we are interested in.
@@ -291,16 +289,6 @@ xfs_parseargs(
291 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; 289 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
292 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { 290 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
293 mp->m_flags |= XFS_MOUNT_NORECOVERY; 291 mp->m_flags |= XFS_MOUNT_NORECOVERY;
294 } else if (!strcmp(this_char, MNTOPT_INO64)) {
295#if XFS_BIG_INUMS
296 mp->m_flags |= XFS_MOUNT_INO64;
297 mp->m_inoadd = XFS_INO64_OFFSET;
298#else
299 cmn_err(CE_WARN,
300 "XFS: %s option not allowed on this system",
301 this_char);
302 return EINVAL;
303#endif
304 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { 292 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
305 mp->m_flags |= XFS_MOUNT_NOALIGN; 293 mp->m_flags |= XFS_MOUNT_NOALIGN;
306 } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { 294 } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
@@ -529,7 +517,6 @@ xfs_showargs(
529 /* the few simple ones we can get from the mount struct */ 517 /* the few simple ones we can get from the mount struct */
530 { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP }, 518 { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP },
531 { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, 519 { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC },
532 { XFS_MOUNT_INO64, "," MNTOPT_INO64 },
533 { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, 520 { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN },
534 { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, 521 { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC },
535 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, 522 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
@@ -634,7 +621,7 @@ xfs_max_file_offset(
634 return (((__uint64_t)pagefactor) << bitshift) - 1; 621 return (((__uint64_t)pagefactor) << bitshift) - 1;
635} 622}
636 623
637int 624STATIC int
638xfs_blkdev_get( 625xfs_blkdev_get(
639 xfs_mount_t *mp, 626 xfs_mount_t *mp,
640 const char *name, 627 const char *name,
@@ -651,7 +638,7 @@ xfs_blkdev_get(
651 return -error; 638 return -error;
652} 639}
653 640
654void 641STATIC void
655xfs_blkdev_put( 642xfs_blkdev_put(
656 struct block_device *bdev) 643 struct block_device *bdev)
657{ 644{
@@ -872,7 +859,7 @@ xfsaild_wakeup(
872 wake_up_process(ailp->xa_task); 859 wake_up_process(ailp->xa_task);
873} 860}
874 861
875int 862STATIC int
876xfsaild( 863xfsaild(
877 void *data) 864 void *data)
878{ 865{
@@ -990,26 +977,57 @@ xfs_fs_write_inode(
990 int sync) 977 int sync)
991{ 978{
992 struct xfs_inode *ip = XFS_I(inode); 979 struct xfs_inode *ip = XFS_I(inode);
980 struct xfs_mount *mp = ip->i_mount;
993 int error = 0; 981 int error = 0;
994 int flags = 0;
995 982
996 xfs_itrace_entry(ip); 983 xfs_itrace_entry(ip);
984
985 if (XFS_FORCED_SHUTDOWN(mp))
986 return XFS_ERROR(EIO);
987
997 if (sync) { 988 if (sync) {
998 error = xfs_wait_on_pages(ip, 0, -1); 989 error = xfs_wait_on_pages(ip, 0, -1);
999 if (error) 990 if (error)
1000 goto out_error; 991 goto out;
1001 flags |= FLUSH_SYNC;
1002 } 992 }
1003 error = xfs_inode_flush(ip, flags);
1004 993
1005out_error: 994 /*
995 * Bypass inodes which have already been cleaned by
996 * the inode flush clustering code inside xfs_iflush
997 */
998 if (xfs_inode_clean(ip))
999 goto out;
1000
1001 /*
1002 * We make this non-blocking if the inode is contended, return
1003 * EAGAIN to indicate to the caller that they did not succeed.
1004 * This prevents the flush path from blocking on inodes inside
1005 * another operation right now, they get caught later by xfs_sync.
1006 */
1007 if (sync) {
1008 xfs_ilock(ip, XFS_ILOCK_SHARED);
1009 xfs_iflock(ip);
1010
1011 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
1012 } else {
1013 error = EAGAIN;
1014 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1015 goto out;
1016 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1017 goto out_unlock;
1018
1019 error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);
1020 }
1021
1022 out_unlock:
1023 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1024 out:
1006 /* 1025 /*
1007 * if we failed to write out the inode then mark 1026 * if we failed to write out the inode then mark
1008 * it dirty again so we'll try again later. 1027 * it dirty again so we'll try again later.
1009 */ 1028 */
1010 if (error) 1029 if (error)
1011 xfs_mark_inode_dirty_sync(ip); 1030 xfs_mark_inode_dirty_sync(ip);
1012
1013 return -error; 1031 return -error;
1014} 1032}
1015 1033
@@ -1169,18 +1187,12 @@ xfs_fs_statfs(
1169 statp->f_bfree = statp->f_bavail = 1187 statp->f_bfree = statp->f_bavail =
1170 sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1188 sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1171 fakeinos = statp->f_bfree << sbp->sb_inopblog; 1189 fakeinos = statp->f_bfree << sbp->sb_inopblog;
1172#if XFS_BIG_INUMS
1173 fakeinos += mp->m_inoadd;
1174#endif
1175 statp->f_files = 1190 statp->f_files =
1176 MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); 1191 MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
1177 if (mp->m_maxicount) 1192 if (mp->m_maxicount)
1178#if XFS_BIG_INUMS 1193 statp->f_files = min_t(typeof(statp->f_files),
1179 if (!mp->m_inoadd) 1194 statp->f_files,
1180#endif 1195 mp->m_maxicount);
1181 statp->f_files = min_t(typeof(statp->f_files),
1182 statp->f_files,
1183 mp->m_maxicount);
1184 statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 1196 statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
1185 spin_unlock(&mp->m_sb_lock); 1197 spin_unlock(&mp->m_sb_lock);
1186 1198
@@ -1302,57 +1314,6 @@ xfs_fs_show_options(
1302 return -xfs_showargs(XFS_M(mnt->mnt_sb), m); 1314 return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
1303} 1315}
1304 1316
1305STATIC int
1306xfs_fs_quotasync(
1307 struct super_block *sb,
1308 int type)
1309{
1310 return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XQUOTASYNC, 0, NULL);
1311}
1312
1313STATIC int
1314xfs_fs_getxstate(
1315 struct super_block *sb,
1316 struct fs_quota_stat *fqs)
1317{
1318 return -XFS_QM_QUOTACTL(XFS_M(sb), Q_XGETQSTAT, 0, (caddr_t)fqs);
1319}
1320
1321STATIC int
1322xfs_fs_setxstate(
1323 struct super_block *sb,
1324 unsigned int flags,
1325 int op)
1326{
1327 return -XFS_QM_QUOTACTL(XFS_M(sb), op, 0, (caddr_t)&flags);
1328}
1329
1330STATIC int
1331xfs_fs_getxquota(
1332 struct super_block *sb,
1333 int type,
1334 qid_t id,
1335 struct fs_disk_quota *fdq)
1336{
1337 return -XFS_QM_QUOTACTL(XFS_M(sb),
1338 (type == USRQUOTA) ? Q_XGETQUOTA :
1339 ((type == GRPQUOTA) ? Q_XGETGQUOTA :
1340 Q_XGETPQUOTA), id, (caddr_t)fdq);
1341}
1342
1343STATIC int
1344xfs_fs_setxquota(
1345 struct super_block *sb,
1346 int type,
1347 qid_t id,
1348 struct fs_disk_quota *fdq)
1349{
1350 return -XFS_QM_QUOTACTL(XFS_M(sb),
1351 (type == USRQUOTA) ? Q_XSETQLIM :
1352 ((type == GRPQUOTA) ? Q_XSETGQLIM :
1353 Q_XSETPQLIM), id, (caddr_t)fdq);
1354}
1355
1356/* 1317/*
1357 * This function fills in xfs_mount_t fields based on mount args. 1318 * This function fills in xfs_mount_t fields based on mount args.
1358 * Note: the superblock _has_ now been read in. 1319 * Note: the superblock _has_ now been read in.
@@ -1435,7 +1396,9 @@ xfs_fs_fill_super(
1435 sb_min_blocksize(sb, BBSIZE); 1396 sb_min_blocksize(sb, BBSIZE);
1436 sb->s_xattr = xfs_xattr_handlers; 1397 sb->s_xattr = xfs_xattr_handlers;
1437 sb->s_export_op = &xfs_export_operations; 1398 sb->s_export_op = &xfs_export_operations;
1399#ifdef CONFIG_XFS_QUOTA
1438 sb->s_qcop = &xfs_quotactl_operations; 1400 sb->s_qcop = &xfs_quotactl_operations;
1401#endif
1439 sb->s_op = &xfs_super_operations; 1402 sb->s_op = &xfs_super_operations;
1440 1403
1441 error = xfs_dmops_get(mp); 1404 error = xfs_dmops_get(mp);
@@ -1578,14 +1541,6 @@ static struct super_operations xfs_super_operations = {
1578 .show_options = xfs_fs_show_options, 1541 .show_options = xfs_fs_show_options,
1579}; 1542};
1580 1543
1581static struct quotactl_ops xfs_quotactl_operations = {
1582 .quota_sync = xfs_fs_quotasync,
1583 .get_xstate = xfs_fs_getxstate,
1584 .set_xstate = xfs_fs_setxstate,
1585 .get_xquota = xfs_fs_getxquota,
1586 .set_xquota = xfs_fs_setxquota,
1587};
1588
1589static struct file_system_type xfs_fs_type = { 1544static struct file_system_type xfs_fs_type = {
1590 .owner = THIS_MODULE, 1545 .owner = THIS_MODULE,
1591 .name = "xfs", 1546 .name = "xfs",
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index d5d776d4cd67..5a2ea3a21781 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -93,6 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
93 93
94extern const struct export_operations xfs_export_operations; 94extern const struct export_operations xfs_export_operations;
95extern struct xattr_handler *xfs_xattr_handlers[]; 95extern struct xattr_handler *xfs_xattr_handlers[];
96extern struct quotactl_ops xfs_quotactl_operations;
96 97
97#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 98#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
98 99
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 5f6de1efe1f6..04f058c848ae 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -19,6 +19,7 @@
19#define XFS_SYNC_H 1 19#define XFS_SYNC_H 1
20 20
21struct xfs_mount; 21struct xfs_mount;
22struct xfs_perag;
22 23
23typedef struct bhv_vfs_sync_work { 24typedef struct bhv_vfs_sync_work {
24 struct list_head w_list; 25 struct list_head w_list;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index f65983a230d3..ad7fbead4c97 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -41,11 +41,6 @@ struct attrlist_cursor_kern;
41#define IO_INVIS 0x00020 /* don't update inode timestamps */ 41#define IO_INVIS 0x00020 /* don't update inode timestamps */
42 42
43/* 43/*
44 * Flags for xfs_inode_flush
45 */
46#define FLUSH_SYNC 1 /* wait for flush to complete */
47
48/*
49 * Flush/Invalidate options for vop_toss/flush/flushinval_pages. 44 * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
50 */ 45 */
51#define FI_NONE 0 /* none */ 46#define FI_NONE 0 /* none */
@@ -55,33 +50,6 @@ struct attrlist_cursor_kern;
55 the operation completes. */ 50 the operation completes. */
56 51
57/* 52/*
58 * Dealing with bad inodes
59 */
60static inline int VN_BAD(struct inode *vp)
61{
62 return is_bad_inode(vp);
63}
64
65/*
66 * Extracting atime values in various formats
67 */
68static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime)
69{
70 bs_atime->tv_sec = vp->i_atime.tv_sec;
71 bs_atime->tv_nsec = vp->i_atime.tv_nsec;
72}
73
74static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts)
75{
76 *ts = vp->i_atime;
77}
78
79static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
80{
81 *tt = vp->i_atime.tv_sec;
82}
83
84/*
85 * Some useful predicates. 53 * Some useful predicates.
86 */ 54 */
87#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) 55#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 6543c0b29753..e4babcc63423 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -804,7 +804,7 @@ xfs_qm_dqlookup(
804 uint flist_locked; 804 uint flist_locked;
805 xfs_dquot_t *d; 805 xfs_dquot_t *d;
806 806
807 ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); 807 ASSERT(mutex_is_locked(&qh->qh_lock));
808 808
809 flist_locked = B_FALSE; 809 flist_locked = B_FALSE;
810 810
@@ -877,7 +877,7 @@ xfs_qm_dqlookup(
877 /* 877 /*
878 * move the dquot to the front of the hashchain 878 * move the dquot to the front of the hashchain
879 */ 879 */
880 ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); 880 ASSERT(mutex_is_locked(&qh->qh_lock));
881 if (dqp->HL_PREVP != &qh->qh_next) { 881 if (dqp->HL_PREVP != &qh->qh_next) {
882 xfs_dqtrace_entry(dqp, 882 xfs_dqtrace_entry(dqp,
883 "DQLOOKUP: HASH MOVETOFRONT"); 883 "DQLOOKUP: HASH MOVETOFRONT");
@@ -892,13 +892,13 @@ xfs_qm_dqlookup(
892 } 892 }
893 xfs_dqtrace_entry(dqp, "LOOKUP END"); 893 xfs_dqtrace_entry(dqp, "LOOKUP END");
894 *O_dqpp = dqp; 894 *O_dqpp = dqp;
895 ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); 895 ASSERT(mutex_is_locked(&qh->qh_lock));
896 return (0); 896 return (0);
897 } 897 }
898 } 898 }
899 899
900 *O_dqpp = NULL; 900 *O_dqpp = NULL;
901 ASSERT(XFS_DQ_IS_HASH_LOCKED(qh)); 901 ASSERT(mutex_is_locked(&qh->qh_lock));
902 return (1); 902 return (1);
903} 903}
904 904
@@ -956,7 +956,7 @@ xfs_qm_dqget(
956 ASSERT(ip->i_gdquot == NULL); 956 ASSERT(ip->i_gdquot == NULL);
957 } 957 }
958#endif 958#endif
959 XFS_DQ_HASH_LOCK(h); 959 mutex_lock(&h->qh_lock);
960 960
961 /* 961 /*
962 * Look in the cache (hashtable). 962 * Look in the cache (hashtable).
@@ -971,7 +971,7 @@ xfs_qm_dqget(
971 */ 971 */
972 ASSERT(*O_dqpp); 972 ASSERT(*O_dqpp);
973 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); 973 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
974 XFS_DQ_HASH_UNLOCK(h); 974 mutex_unlock(&h->qh_lock);
975 xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)"); 975 xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)");
976 return (0); /* success */ 976 return (0); /* success */
977 } 977 }
@@ -991,7 +991,7 @@ xfs_qm_dqget(
991 * we don't keep the lock across a disk read 991 * we don't keep the lock across a disk read
992 */ 992 */
993 version = h->qh_version; 993 version = h->qh_version;
994 XFS_DQ_HASH_UNLOCK(h); 994 mutex_unlock(&h->qh_lock);
995 995
996 /* 996 /*
997 * Allocate the dquot on the kernel heap, and read the ondisk 997 * Allocate the dquot on the kernel heap, and read the ondisk
@@ -1056,7 +1056,7 @@ xfs_qm_dqget(
1056 /* 1056 /*
1057 * Hashlock comes after ilock in lock order 1057 * Hashlock comes after ilock in lock order
1058 */ 1058 */
1059 XFS_DQ_HASH_LOCK(h); 1059 mutex_lock(&h->qh_lock);
1060 if (version != h->qh_version) { 1060 if (version != h->qh_version) {
1061 xfs_dquot_t *tmpdqp; 1061 xfs_dquot_t *tmpdqp;
1062 /* 1062 /*
@@ -1072,7 +1072,7 @@ xfs_qm_dqget(
1072 * and start over. 1072 * and start over.
1073 */ 1073 */
1074 xfs_qm_dqput(tmpdqp); 1074 xfs_qm_dqput(tmpdqp);
1075 XFS_DQ_HASH_UNLOCK(h); 1075 mutex_unlock(&h->qh_lock);
1076 xfs_qm_dqdestroy(dqp); 1076 xfs_qm_dqdestroy(dqp);
1077 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 1077 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
1078 goto again; 1078 goto again;
@@ -1083,7 +1083,7 @@ xfs_qm_dqget(
1083 * Put the dquot at the beginning of the hash-chain and mp's list 1083 * Put the dquot at the beginning of the hash-chain and mp's list
1084 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. 1084 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
1085 */ 1085 */
1086 ASSERT(XFS_DQ_IS_HASH_LOCKED(h)); 1086 ASSERT(mutex_is_locked(&h->qh_lock));
1087 dqp->q_hash = h; 1087 dqp->q_hash = h;
1088 XQM_HASHLIST_INSERT(h, dqp); 1088 XQM_HASHLIST_INSERT(h, dqp);
1089 1089
@@ -1102,7 +1102,7 @@ xfs_qm_dqget(
1102 XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp); 1102 XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
1103 1103
1104 xfs_qm_mplist_unlock(mp); 1104 xfs_qm_mplist_unlock(mp);
1105 XFS_DQ_HASH_UNLOCK(h); 1105 mutex_unlock(&h->qh_lock);
1106 dqret: 1106 dqret:
1107 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1107 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1108 xfs_dqtrace_entry(dqp, "DQGET DONE"); 1108 xfs_dqtrace_entry(dqp, "DQGET DONE");
@@ -1440,7 +1440,7 @@ xfs_qm_dqpurge(
1440 xfs_mount_t *mp = dqp->q_mount; 1440 xfs_mount_t *mp = dqp->q_mount;
1441 1441
1442 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 1442 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
1443 ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); 1443 ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
1444 1444
1445 xfs_dqlock(dqp); 1445 xfs_dqlock(dqp);
1446 /* 1446 /*
@@ -1453,7 +1453,7 @@ xfs_qm_dqpurge(
1453 */ 1453 */
1454 if (dqp->q_nrefs != 0) { 1454 if (dqp->q_nrefs != 0) {
1455 xfs_dqunlock(dqp); 1455 xfs_dqunlock(dqp);
1456 XFS_DQ_HASH_UNLOCK(dqp->q_hash); 1456 mutex_unlock(&dqp->q_hash->qh_lock);
1457 return (1); 1457 return (1);
1458 } 1458 }
1459 1459
@@ -1517,7 +1517,7 @@ xfs_qm_dqpurge(
1517 memset(&dqp->q_core, 0, sizeof(dqp->q_core)); 1517 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1518 xfs_dqfunlock(dqp); 1518 xfs_dqfunlock(dqp);
1519 xfs_dqunlock(dqp); 1519 xfs_dqunlock(dqp);
1520 XFS_DQ_HASH_UNLOCK(thishash); 1520 mutex_unlock(&thishash->qh_lock);
1521 return (0); 1521 return (0);
1522} 1522}
1523 1523
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index d443e93b4331..de0f402ddb4c 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -34,7 +34,7 @@
34 */ 34 */
35typedef struct xfs_dqhash { 35typedef struct xfs_dqhash {
36 struct xfs_dquot *qh_next; 36 struct xfs_dquot *qh_next;
37 mutex_t qh_lock; 37 struct mutex qh_lock;
38 uint qh_version; /* ever increasing version */ 38 uint qh_version; /* ever increasing version */
39 uint qh_nelems; /* number of dquots on the list */ 39 uint qh_nelems; /* number of dquots on the list */
40} xfs_dqhash_t; 40} xfs_dqhash_t;
@@ -81,7 +81,7 @@ typedef struct xfs_dquot {
81 xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ 81 xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
82 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ 82 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ 83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
84 mutex_t q_qlock; /* quota lock */ 84 struct mutex q_qlock; /* quota lock */
85 struct completion q_flush; /* flush completion queue */ 85 struct completion q_flush; /* flush completion queue */
86 atomic_t q_pincount; /* dquot pin count */ 86 atomic_t q_pincount; /* dquot pin count */
87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ 87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
@@ -109,19 +109,6 @@ enum {
109 109
110#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) 110#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
111 111
112#ifdef DEBUG
113static inline int
114XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
115{
116 if (mutex_trylock(&dqp->q_qlock)) {
117 mutex_unlock(&dqp->q_qlock);
118 return 0;
119 }
120 return 1;
121}
122#endif
123
124
125/* 112/*
126 * Manage the q_flush completion queue embedded in the dquot. This completion 113 * Manage the q_flush completion queue embedded in the dquot. This completion
127 * queue synchronizes processes attempting to flush the in-core dquot back to 114 * queue synchronizes processes attempting to flush the in-core dquot back to
@@ -142,6 +129,7 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
142 complete(&dqp->q_flush); 129 complete(&dqp->q_flush);
143} 130}
144 131
132#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
145#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) 133#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
146#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 134#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
147#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 135#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 7a2beb64314f..5b6695049e00 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -55,7 +55,7 @@
55 * quota functionality, including maintaining the freelist and hash 55 * quota functionality, including maintaining the freelist and hash
56 * tables of dquots. 56 * tables of dquots.
57 */ 57 */
58mutex_t xfs_Gqm_lock; 58struct mutex xfs_Gqm_lock;
59struct xfs_qm *xfs_Gqm; 59struct xfs_qm *xfs_Gqm;
60uint ndquot; 60uint ndquot;
61 61
@@ -69,8 +69,6 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
69 69
70STATIC void xfs_qm_freelist_init(xfs_frlist_t *); 70STATIC void xfs_qm_freelist_init(xfs_frlist_t *);
71STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *); 71STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *);
72STATIC int xfs_qm_mplist_nowait(xfs_mount_t *);
73STATIC int xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
74 72
75STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 73STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
76STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 74STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
@@ -82,7 +80,7 @@ static struct shrinker xfs_qm_shaker = {
82}; 80};
83 81
84#ifdef DEBUG 82#ifdef DEBUG
85extern mutex_t qcheck_lock; 83extern struct mutex qcheck_lock;
86#endif 84#endif
87 85
88#ifdef QUOTADEBUG 86#ifdef QUOTADEBUG
@@ -219,7 +217,7 @@ xfs_qm_hold_quotafs_ref(
219 * the structure could disappear between the entry to this routine and 217 * the structure could disappear between the entry to this routine and
220 * a HOLD operation if not locked. 218 * a HOLD operation if not locked.
221 */ 219 */
222 XFS_QM_LOCK(xfs_Gqm); 220 mutex_lock(&xfs_Gqm_lock);
223 221
224 if (xfs_Gqm == NULL) 222 if (xfs_Gqm == NULL)
225 xfs_Gqm = xfs_Gqm_init(); 223 xfs_Gqm = xfs_Gqm_init();
@@ -228,8 +226,8 @@ xfs_qm_hold_quotafs_ref(
228 * debugging and statistical purposes, but ... 226 * debugging and statistical purposes, but ...
229 * Just take a reference and get out. 227 * Just take a reference and get out.
230 */ 228 */
231 XFS_QM_HOLD(xfs_Gqm); 229 xfs_Gqm->qm_nrefs++;
232 XFS_QM_UNLOCK(xfs_Gqm); 230 mutex_unlock(&xfs_Gqm_lock);
233 231
234 return 0; 232 return 0;
235} 233}
@@ -277,13 +275,12 @@ xfs_qm_rele_quotafs_ref(
277 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 275 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
278 * be restarted. 276 * be restarted.
279 */ 277 */
280 XFS_QM_LOCK(xfs_Gqm); 278 mutex_lock(&xfs_Gqm_lock);
281 XFS_QM_RELE(xfs_Gqm); 279 if (--xfs_Gqm->qm_nrefs == 0) {
282 if (xfs_Gqm->qm_nrefs == 0) {
283 xfs_qm_destroy(xfs_Gqm); 280 xfs_qm_destroy(xfs_Gqm);
284 xfs_Gqm = NULL; 281 xfs_Gqm = NULL;
285 } 282 }
286 XFS_QM_UNLOCK(xfs_Gqm); 283 mutex_unlock(&xfs_Gqm_lock);
287} 284}
288 285
289/* 286/*
@@ -577,10 +574,10 @@ xfs_qm_dqpurge_int(
577 continue; 574 continue;
578 } 575 }
579 576
580 if (! xfs_qm_dqhashlock_nowait(dqp)) { 577 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
581 nrecl = XFS_QI_MPLRECLAIMS(mp); 578 nrecl = XFS_QI_MPLRECLAIMS(mp);
582 xfs_qm_mplist_unlock(mp); 579 xfs_qm_mplist_unlock(mp);
583 XFS_DQ_HASH_LOCK(dqp->q_hash); 580 mutex_lock(&dqp->q_hash->qh_lock);
584 xfs_qm_mplist_lock(mp); 581 xfs_qm_mplist_lock(mp);
585 582
586 /* 583 /*
@@ -590,7 +587,7 @@ xfs_qm_dqpurge_int(
590 * this point, but somebody might be taking things off. 587 * this point, but somebody might be taking things off.
591 */ 588 */
592 if (nrecl != XFS_QI_MPLRECLAIMS(mp)) { 589 if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
593 XFS_DQ_HASH_UNLOCK(dqp->q_hash); 590 mutex_unlock(&dqp->q_hash->qh_lock);
594 goto again; 591 goto again;
595 } 592 }
596 } 593 }
@@ -632,7 +629,6 @@ xfs_qm_dqattach_one(
632 xfs_dqid_t id, 629 xfs_dqid_t id,
633 uint type, 630 uint type,
634 uint doalloc, 631 uint doalloc,
635 uint dolock,
636 xfs_dquot_t *udqhint, /* hint */ 632 xfs_dquot_t *udqhint, /* hint */
637 xfs_dquot_t **IO_idqpp) 633 xfs_dquot_t **IO_idqpp)
638{ 634{
@@ -641,16 +637,16 @@ xfs_qm_dqattach_one(
641 637
642 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 638 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
643 error = 0; 639 error = 0;
640
644 /* 641 /*
645 * See if we already have it in the inode itself. IO_idqpp is 642 * See if we already have it in the inode itself. IO_idqpp is
646 * &i_udquot or &i_gdquot. This made the code look weird, but 643 * &i_udquot or &i_gdquot. This made the code look weird, but
647 * made the logic a lot simpler. 644 * made the logic a lot simpler.
648 */ 645 */
649 if ((dqp = *IO_idqpp)) { 646 dqp = *IO_idqpp;
650 if (dolock) 647 if (dqp) {
651 xfs_dqlock(dqp);
652 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip"); 648 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
653 goto done; 649 return 0;
654 } 650 }
655 651
656 /* 652 /*
@@ -659,38 +655,38 @@ xfs_qm_dqattach_one(
659 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside 655 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
660 * the user dquot. 656 * the user dquot.
661 */ 657 */
662 ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); 658 if (udqhint) {
663 if (udqhint && !dolock) 659 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
664 xfs_dqlock(udqhint); 660 xfs_dqlock(udqhint);
665 661
666 /* 662 /*
667 * No need to take dqlock to look at the id. 663 * No need to take dqlock to look at the id.
668 * The ID can't change until it gets reclaimed, and it won't 664 *
669 * be reclaimed as long as we have a ref from inode and we hold 665 * The ID can't change until it gets reclaimed, and it won't
670 * the ilock. 666 * be reclaimed as long as we have a ref from inode and we
671 */ 667 * hold the ilock.
672 if (udqhint && 668 */
673 (dqp = udqhint->q_gdquot) && 669 dqp = udqhint->q_gdquot;
674 (be32_to_cpu(dqp->q_core.d_id) == id)) { 670 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
675 ASSERT(XFS_DQ_IS_LOCKED(udqhint)); 671 xfs_dqlock(dqp);
676 xfs_dqlock(dqp); 672 XFS_DQHOLD(dqp);
677 XFS_DQHOLD(dqp); 673 ASSERT(*IO_idqpp == NULL);
678 ASSERT(*IO_idqpp == NULL); 674 *IO_idqpp = dqp;
679 *IO_idqpp = dqp; 675
680 if (!dolock) {
681 xfs_dqunlock(dqp); 676 xfs_dqunlock(dqp);
682 xfs_dqunlock(udqhint); 677 xfs_dqunlock(udqhint);
678 return 0;
683 } 679 }
684 goto done; 680
685 } 681 /*
686 /* 682 * We can't hold a dquot lock when we call the dqget code.
687 * We can't hold a dquot lock when we call the dqget code. 683 * We'll deadlock in no time, because of (not conforming to)
688 * We'll deadlock in no time, because of (not conforming to) 684 * lock ordering - the inodelock comes before any dquot lock,
689 * lock ordering - the inodelock comes before any dquot lock, 685 * and we may drop and reacquire the ilock in xfs_qm_dqget().
690 * and we may drop and reacquire the ilock in xfs_qm_dqget(). 686 */
691 */
692 if (udqhint)
693 xfs_dqunlock(udqhint); 687 xfs_dqunlock(udqhint);
688 }
689
694 /* 690 /*
695 * Find the dquot from somewhere. This bumps the 691 * Find the dquot from somewhere. This bumps the
696 * reference count of dquot and returns it locked. 692 * reference count of dquot and returns it locked.
@@ -698,48 +694,19 @@ xfs_qm_dqattach_one(
698 * disk and we didn't ask it to allocate; 694 * disk and we didn't ask it to allocate;
699 * ESRCH if quotas got turned off suddenly. 695 * ESRCH if quotas got turned off suddenly.
700 */ 696 */
701 if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type, 697 error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
702 doalloc|XFS_QMOPT_DOWARN, &dqp))) { 698 if (error)
703 if (udqhint && dolock) 699 return error;
704 xfs_dqlock(udqhint);
705 goto done;
706 }
707 700
708 xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget"); 701 xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
702
709 /* 703 /*
710 * dqget may have dropped and re-acquired the ilock, but it guarantees 704 * dqget may have dropped and re-acquired the ilock, but it guarantees
711 * that the dquot returned is the one that should go in the inode. 705 * that the dquot returned is the one that should go in the inode.
712 */ 706 */
713 *IO_idqpp = dqp; 707 *IO_idqpp = dqp;
714 ASSERT(dqp); 708 xfs_dqunlock(dqp);
715 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 709 return 0;
716 if (! dolock) {
717 xfs_dqunlock(dqp);
718 goto done;
719 }
720 if (! udqhint)
721 goto done;
722
723 ASSERT(udqhint);
724 ASSERT(dolock);
725 ASSERT(XFS_DQ_IS_LOCKED(dqp));
726 if (! xfs_qm_dqlock_nowait(udqhint)) {
727 xfs_dqunlock(dqp);
728 xfs_dqlock(udqhint);
729 xfs_dqlock(dqp);
730 }
731 done:
732#ifdef QUOTADEBUG
733 if (udqhint) {
734 if (dolock)
735 ASSERT(XFS_DQ_IS_LOCKED(udqhint));
736 }
737 if (! error) {
738 if (dolock)
739 ASSERT(XFS_DQ_IS_LOCKED(dqp));
740 }
741#endif
742 return error;
743} 710}
744 711
745 712
@@ -754,24 +721,15 @@ xfs_qm_dqattach_one(
754STATIC void 721STATIC void
755xfs_qm_dqattach_grouphint( 722xfs_qm_dqattach_grouphint(
756 xfs_dquot_t *udq, 723 xfs_dquot_t *udq,
757 xfs_dquot_t *gdq, 724 xfs_dquot_t *gdq)
758 uint locked)
759{ 725{
760 xfs_dquot_t *tmp; 726 xfs_dquot_t *tmp;
761 727
762#ifdef QUOTADEBUG 728 xfs_dqlock(udq);
763 if (locked) {
764 ASSERT(XFS_DQ_IS_LOCKED(udq));
765 ASSERT(XFS_DQ_IS_LOCKED(gdq));
766 }
767#endif
768 if (! locked)
769 xfs_dqlock(udq);
770 729
771 if ((tmp = udq->q_gdquot)) { 730 if ((tmp = udq->q_gdquot)) {
772 if (tmp == gdq) { 731 if (tmp == gdq) {
773 if (! locked) 732 xfs_dqunlock(udq);
774 xfs_dqunlock(udq);
775 return; 733 return;
776 } 734 }
777 735
@@ -781,8 +739,6 @@ xfs_qm_dqattach_grouphint(
781 * because the freelist lock comes before dqlocks. 739 * because the freelist lock comes before dqlocks.
782 */ 740 */
783 xfs_dqunlock(udq); 741 xfs_dqunlock(udq);
784 if (locked)
785 xfs_dqunlock(gdq);
786 /* 742 /*
787 * we took a hard reference once upon a time in dqget, 743 * we took a hard reference once upon a time in dqget,
788 * so give it back when the udquot no longer points at it 744 * so give it back when the udquot no longer points at it
@@ -795,9 +751,7 @@ xfs_qm_dqattach_grouphint(
795 751
796 } else { 752 } else {
797 ASSERT(XFS_DQ_IS_LOCKED(udq)); 753 ASSERT(XFS_DQ_IS_LOCKED(udq));
798 if (! locked) { 754 xfs_dqlock(gdq);
799 xfs_dqlock(gdq);
800 }
801 } 755 }
802 756
803 ASSERT(XFS_DQ_IS_LOCKED(udq)); 757 ASSERT(XFS_DQ_IS_LOCKED(udq));
@@ -810,10 +764,9 @@ xfs_qm_dqattach_grouphint(
810 XFS_DQHOLD(gdq); 764 XFS_DQHOLD(gdq);
811 udq->q_gdquot = gdq; 765 udq->q_gdquot = gdq;
812 } 766 }
813 if (! locked) { 767
814 xfs_dqunlock(gdq); 768 xfs_dqunlock(gdq);
815 xfs_dqunlock(udq); 769 xfs_dqunlock(udq);
816 }
817} 770}
818 771
819 772
@@ -821,8 +774,6 @@ xfs_qm_dqattach_grouphint(
821 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON 774 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
822 * into account. 775 * into account.
823 * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. 776 * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
824 * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
825 * much made this code a complete mess, but it has been pretty useful.
826 * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL. 777 * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
827 * Inode may get unlocked and relocked in here, and the caller must deal with 778 * Inode may get unlocked and relocked in here, and the caller must deal with
828 * the consequences. 779 * the consequences.
@@ -851,7 +802,6 @@ xfs_qm_dqattach(
851 if (XFS_IS_UQUOTA_ON(mp)) { 802 if (XFS_IS_UQUOTA_ON(mp)) {
852 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, 803 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
853 flags & XFS_QMOPT_DQALLOC, 804 flags & XFS_QMOPT_DQALLOC,
854 flags & XFS_QMOPT_DQLOCK,
855 NULL, &ip->i_udquot); 805 NULL, &ip->i_udquot);
856 if (error) 806 if (error)
857 goto done; 807 goto done;
@@ -863,11 +813,9 @@ xfs_qm_dqattach(
863 error = XFS_IS_GQUOTA_ON(mp) ? 813 error = XFS_IS_GQUOTA_ON(mp) ?
864 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, 814 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
865 flags & XFS_QMOPT_DQALLOC, 815 flags & XFS_QMOPT_DQALLOC,
866 flags & XFS_QMOPT_DQLOCK,
867 ip->i_udquot, &ip->i_gdquot) : 816 ip->i_udquot, &ip->i_gdquot) :
868 xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, 817 xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
869 flags & XFS_QMOPT_DQALLOC, 818 flags & XFS_QMOPT_DQALLOC,
870 flags & XFS_QMOPT_DQLOCK,
871 ip->i_udquot, &ip->i_gdquot); 819 ip->i_udquot, &ip->i_gdquot);
872 /* 820 /*
873 * Don't worry about the udquot that we may have 821 * Don't worry about the udquot that we may have
@@ -898,22 +846,13 @@ xfs_qm_dqattach(
898 /* 846 /*
899 * Attach i_gdquot to the gdquot hint inside the i_udquot. 847 * Attach i_gdquot to the gdquot hint inside the i_udquot.
900 */ 848 */
901 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot, 849 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
902 flags & XFS_QMOPT_DQLOCK);
903 } 850 }
904 851
905 done: 852 done:
906 853
907#ifdef QUOTADEBUG 854#ifdef QUOTADEBUG
908 if (! error) { 855 if (! error) {
909 if (ip->i_udquot) {
910 if (flags & XFS_QMOPT_DQLOCK)
911 ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
912 }
913 if (ip->i_gdquot) {
914 if (flags & XFS_QMOPT_DQLOCK)
915 ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
916 }
917 if (XFS_IS_UQUOTA_ON(mp)) 856 if (XFS_IS_UQUOTA_ON(mp))
918 ASSERT(ip->i_udquot); 857 ASSERT(ip->i_udquot);
919 if (XFS_IS_OQUOTA_ON(mp)) 858 if (XFS_IS_OQUOTA_ON(mp))
@@ -2086,7 +2025,7 @@ xfs_qm_shake_freelist(
2086 * a dqlookup process that holds the hashlock that is 2025 * a dqlookup process that holds the hashlock that is
2087 * waiting for the freelist lock. 2026 * waiting for the freelist lock.
2088 */ 2027 */
2089 if (! xfs_qm_dqhashlock_nowait(dqp)) { 2028 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
2090 xfs_dqfunlock(dqp); 2029 xfs_dqfunlock(dqp);
2091 xfs_dqunlock(dqp); 2030 xfs_dqunlock(dqp);
2092 dqp = dqp->dq_flnext; 2031 dqp = dqp->dq_flnext;
@@ -2103,7 +2042,7 @@ xfs_qm_shake_freelist(
2103 /* XXX put a sentinel so that we can come back here */ 2042 /* XXX put a sentinel so that we can come back here */
2104 xfs_dqfunlock(dqp); 2043 xfs_dqfunlock(dqp);
2105 xfs_dqunlock(dqp); 2044 xfs_dqunlock(dqp);
2106 XFS_DQ_HASH_UNLOCK(hash); 2045 mutex_unlock(&hash->qh_lock);
2107 xfs_qm_freelist_unlock(xfs_Gqm); 2046 xfs_qm_freelist_unlock(xfs_Gqm);
2108 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2047 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2109 return nreclaimed; 2048 return nreclaimed;
@@ -2120,7 +2059,7 @@ xfs_qm_shake_freelist(
2120 XQM_HASHLIST_REMOVE(hash, dqp); 2059 XQM_HASHLIST_REMOVE(hash, dqp);
2121 xfs_dqfunlock(dqp); 2060 xfs_dqfunlock(dqp);
2122 xfs_qm_mplist_unlock(dqp->q_mount); 2061 xfs_qm_mplist_unlock(dqp->q_mount);
2123 XFS_DQ_HASH_UNLOCK(hash); 2062 mutex_unlock(&hash->qh_lock);
2124 2063
2125 off_freelist: 2064 off_freelist:
2126 XQM_FREELIST_REMOVE(dqp); 2065 XQM_FREELIST_REMOVE(dqp);
@@ -2262,7 +2201,7 @@ xfs_qm_dqreclaim_one(void)
2262 continue; 2201 continue;
2263 } 2202 }
2264 2203
2265 if (! xfs_qm_dqhashlock_nowait(dqp)) 2204 if (!mutex_trylock(&dqp->q_hash->qh_lock))
2266 goto mplistunlock; 2205 goto mplistunlock;
2267 2206
2268 ASSERT(dqp->q_nrefs == 0); 2207 ASSERT(dqp->q_nrefs == 0);
@@ -2271,7 +2210,7 @@ xfs_qm_dqreclaim_one(void)
2271 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); 2210 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2272 XQM_FREELIST_REMOVE(dqp); 2211 XQM_FREELIST_REMOVE(dqp);
2273 dqpout = dqp; 2212 dqpout = dqp;
2274 XFS_DQ_HASH_UNLOCK(dqp->q_hash); 2213 mutex_unlock(&dqp->q_hash->qh_lock);
2275 mplistunlock: 2214 mplistunlock:
2276 xfs_qm_mplist_unlock(dqp->q_mount); 2215 xfs_qm_mplist_unlock(dqp->q_mount);
2277 xfs_dqfunlock(dqp); 2216 xfs_dqfunlock(dqp);
@@ -2774,34 +2713,3 @@ xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2774{ 2713{
2775 xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq); 2714 xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2776} 2715}
2777
2778STATIC int
2779xfs_qm_dqhashlock_nowait(
2780 xfs_dquot_t *dqp)
2781{
2782 int locked;
2783
2784 locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
2785 return locked;
2786}
2787
2788int
2789xfs_qm_freelist_lock_nowait(
2790 xfs_qm_t *xqm)
2791{
2792 int locked;
2793
2794 locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
2795 return locked;
2796}
2797
2798STATIC int
2799xfs_qm_mplist_nowait(
2800 xfs_mount_t *mp)
2801{
2802 int locked;
2803
2804 ASSERT(mp->m_quotainfo);
2805 locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
2806 return locked;
2807}
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index ddf09166387c..a371954cae1b 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -27,7 +27,7 @@ struct xfs_qm;
27struct xfs_inode; 27struct xfs_inode;
28 28
29extern uint ndquot; 29extern uint ndquot;
30extern mutex_t xfs_Gqm_lock; 30extern struct mutex xfs_Gqm_lock;
31extern struct xfs_qm *xfs_Gqm; 31extern struct xfs_qm *xfs_Gqm;
32extern kmem_zone_t *qm_dqzone; 32extern kmem_zone_t *qm_dqzone;
33extern kmem_zone_t *qm_dqtrxzone; 33extern kmem_zone_t *qm_dqtrxzone;
@@ -79,7 +79,7 @@ typedef xfs_dqhash_t xfs_dqlist_t;
79typedef struct xfs_frlist { 79typedef struct xfs_frlist {
80 struct xfs_dquot *qh_next; 80 struct xfs_dquot *qh_next;
81 struct xfs_dquot *qh_prev; 81 struct xfs_dquot *qh_prev;
82 mutex_t qh_lock; 82 struct mutex qh_lock;
83 uint qh_version; 83 uint qh_version;
84 uint qh_nelems; 84 uint qh_nelems;
85} xfs_frlist_t; 85} xfs_frlist_t;
@@ -115,7 +115,7 @@ typedef struct xfs_quotainfo {
115 xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ 115 xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */
116 xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ 116 xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */
117 xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ 117 xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */
118 mutex_t qi_quotaofflock;/* to serialize quotaoff */ 118 struct mutex qi_quotaofflock;/* to serialize quotaoff */
119 xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ 119 xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */
120 uint qi_dqperchunk; /* # ondisk dqs in above chunk */ 120 uint qi_dqperchunk; /* # ondisk dqs in above chunk */
121 xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ 121 xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */
@@ -158,11 +158,6 @@ typedef struct xfs_dquot_acct {
158#define XFS_QM_IWARNLIMIT 5 158#define XFS_QM_IWARNLIMIT 5
159#define XFS_QM_RTBWARNLIMIT 5 159#define XFS_QM_RTBWARNLIMIT 5
160 160
161#define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock))
162#define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock))
163#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++)
164#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
165
166extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); 161extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
167extern void xfs_qm_mount_quotas(xfs_mount_t *); 162extern void xfs_qm_mount_quotas(xfs_mount_t *);
168extern int xfs_qm_quotacheck(xfs_mount_t *); 163extern int xfs_qm_quotacheck(xfs_mount_t *);
@@ -178,6 +173,16 @@ extern void xfs_qm_dqdetach(xfs_inode_t *);
178extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); 173extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
179extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); 174extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
180 175
176/* quota ops */
177extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
178extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
179 fs_disk_quota_t *);
180extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
181 fs_disk_quota_t *);
182extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
183extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
184extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
185
181/* vop stuff */ 186/* vop stuff */
182extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, 187extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
183 uid_t, gid_t, prid_t, uint, 188 uid_t, gid_t, prid_t, uint,
@@ -194,11 +199,6 @@ extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
194/* list stuff */ 199/* list stuff */
195extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); 200extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
196extern void xfs_qm_freelist_unlink(xfs_dquot_t *); 201extern void xfs_qm_freelist_unlink(xfs_dquot_t *);
197extern int xfs_qm_freelist_lock_nowait(xfs_qm_t *);
198
199/* system call interface */
200extern int xfs_qm_quotactl(struct xfs_mount *, int, int,
201 xfs_caddr_t);
202 202
203#ifdef DEBUG 203#ifdef DEBUG
204extern int xfs_qm_internalqcheck(xfs_mount_t *); 204extern int xfs_qm_internalqcheck(xfs_mount_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index bc6c5cca3e12..63037c689a4b 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -235,7 +235,6 @@ struct xfs_qmops xfs_qmcore_xfs = {
235 .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve, 235 .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve,
236 .xfs_dqstatvfs = xfs_qm_statvfs, 236 .xfs_dqstatvfs = xfs_qm_statvfs,
237 .xfs_dqsync = xfs_qm_sync, 237 .xfs_dqsync = xfs_qm_sync,
238 .xfs_quotactl = xfs_qm_quotactl,
239 .xfs_dqtrxops = &xfs_trans_dquot_ops, 238 .xfs_dqtrxops = &xfs_trans_dquot_ops,
240}; 239};
241EXPORT_SYMBOL(xfs_qmcore_xfs); 240EXPORT_SYMBOL(xfs_qmcore_xfs);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 68139b38aede..c7b66f6506ce 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -57,135 +57,16 @@
57# define qdprintk(s, args...) do { } while (0) 57# define qdprintk(s, args...) do { } while (0)
58#endif 58#endif
59 59
60STATIC int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
61STATIC int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
62 fs_disk_quota_t *);
63STATIC int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
64STATIC int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
65 fs_disk_quota_t *);
66STATIC int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
67STATIC int xfs_qm_scall_quotaoff(xfs_mount_t *, uint, boolean_t);
68STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 60STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
69STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, 61STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
70 uint); 62 uint);
71STATIC uint xfs_qm_import_flags(uint);
72STATIC uint xfs_qm_export_flags(uint); 63STATIC uint xfs_qm_export_flags(uint);
73STATIC uint xfs_qm_import_qtype_flags(uint);
74STATIC uint xfs_qm_export_qtype_flags(uint); 64STATIC uint xfs_qm_export_qtype_flags(uint);
75STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, 65STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
76 fs_disk_quota_t *); 66 fs_disk_quota_t *);
77 67
78 68
79/* 69/*
80 * The main distribution switch of all XFS quotactl system calls.
81 */
82int
83xfs_qm_quotactl(
84 xfs_mount_t *mp,
85 int cmd,
86 int id,
87 xfs_caddr_t addr)
88{
89 int error;
90
91 ASSERT(addr != NULL || cmd == Q_XQUOTASYNC);
92
93 /*
94 * The following commands are valid even when quotaoff.
95 */
96 switch (cmd) {
97 case Q_XQUOTARM:
98 /*
99 * Truncate quota files. quota must be off.
100 */
101 if (XFS_IS_QUOTA_ON(mp))
102 return XFS_ERROR(EINVAL);
103 if (mp->m_flags & XFS_MOUNT_RDONLY)
104 return XFS_ERROR(EROFS);
105 return (xfs_qm_scall_trunc_qfiles(mp,
106 xfs_qm_import_qtype_flags(*(uint *)addr)));
107
108 case Q_XGETQSTAT:
109 /*
110 * Get quota status information.
111 */
112 return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr));
113
114 case Q_XQUOTAON:
115 /*
116 * QUOTAON - enabling quota enforcement.
117 * Quota accounting must be turned on at mount time.
118 */
119 if (mp->m_flags & XFS_MOUNT_RDONLY)
120 return XFS_ERROR(EROFS);
121 return (xfs_qm_scall_quotaon(mp,
122 xfs_qm_import_flags(*(uint *)addr)));
123
124 case Q_XQUOTAOFF:
125 if (mp->m_flags & XFS_MOUNT_RDONLY)
126 return XFS_ERROR(EROFS);
127 break;
128
129 case Q_XQUOTASYNC:
130 return xfs_sync_inodes(mp, SYNC_DELWRI);
131
132 default:
133 break;
134 }
135
136 if (! XFS_IS_QUOTA_ON(mp))
137 return XFS_ERROR(ESRCH);
138
139 switch (cmd) {
140 case Q_XQUOTAOFF:
141 if (mp->m_flags & XFS_MOUNT_RDONLY)
142 return XFS_ERROR(EROFS);
143 error = xfs_qm_scall_quotaoff(mp,
144 xfs_qm_import_flags(*(uint *)addr),
145 B_FALSE);
146 break;
147
148 case Q_XGETQUOTA:
149 error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER,
150 (fs_disk_quota_t *)addr);
151 break;
152 case Q_XGETGQUOTA:
153 error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
154 (fs_disk_quota_t *)addr);
155 break;
156 case Q_XGETPQUOTA:
157 error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
158 (fs_disk_quota_t *)addr);
159 break;
160
161 case Q_XSETQLIM:
162 if (mp->m_flags & XFS_MOUNT_RDONLY)
163 return XFS_ERROR(EROFS);
164 error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER,
165 (fs_disk_quota_t *)addr);
166 break;
167 case Q_XSETGQLIM:
168 if (mp->m_flags & XFS_MOUNT_RDONLY)
169 return XFS_ERROR(EROFS);
170 error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
171 (fs_disk_quota_t *)addr);
172 break;
173 case Q_XSETPQLIM:
174 if (mp->m_flags & XFS_MOUNT_RDONLY)
175 return XFS_ERROR(EROFS);
176 error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
177 (fs_disk_quota_t *)addr);
178 break;
179
180 default:
181 error = XFS_ERROR(EINVAL);
182 break;
183 }
184
185 return (error);
186}
187
188/*
189 * Turn off quota accounting and/or enforcement for all udquots and/or 70 * Turn off quota accounting and/or enforcement for all udquots and/or
190 * gdquots. Called only at unmount time. 71 * gdquots. Called only at unmount time.
191 * 72 *
@@ -193,11 +74,10 @@ xfs_qm_quotactl(
193 * incore, and modifies the ondisk dquot directly. Therefore, for example, 74 * incore, and modifies the ondisk dquot directly. Therefore, for example,
194 * it is an error to call this twice, without purging the cache. 75 * it is an error to call this twice, without purging the cache.
195 */ 76 */
196STATIC int 77int
197xfs_qm_scall_quotaoff( 78xfs_qm_scall_quotaoff(
198 xfs_mount_t *mp, 79 xfs_mount_t *mp,
199 uint flags, 80 uint flags)
200 boolean_t force)
201{ 81{
202 uint dqtype; 82 uint dqtype;
203 int error; 83 int error;
@@ -205,8 +85,6 @@ xfs_qm_scall_quotaoff(
205 xfs_qoff_logitem_t *qoffstart; 85 xfs_qoff_logitem_t *qoffstart;
206 int nculprits; 86 int nculprits;
207 87
208 if (!force && !capable(CAP_SYS_ADMIN))
209 return XFS_ERROR(EPERM);
210 /* 88 /*
211 * No file system can have quotas enabled on disk but not in core. 89 * No file system can have quotas enabled on disk but not in core.
212 * Note that quota utilities (like quotaoff) _expect_ 90 * Note that quota utilities (like quotaoff) _expect_
@@ -375,7 +253,7 @@ out_error:
375 return (error); 253 return (error);
376} 254}
377 255
378STATIC int 256int
379xfs_qm_scall_trunc_qfiles( 257xfs_qm_scall_trunc_qfiles(
380 xfs_mount_t *mp, 258 xfs_mount_t *mp,
381 uint flags) 259 uint flags)
@@ -383,8 +261,6 @@ xfs_qm_scall_trunc_qfiles(
383 int error = 0, error2 = 0; 261 int error = 0, error2 = 0;
384 xfs_inode_t *qip; 262 xfs_inode_t *qip;
385 263
386 if (!capable(CAP_SYS_ADMIN))
387 return XFS_ERROR(EPERM);
388 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { 264 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
389 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); 265 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
390 return XFS_ERROR(EINVAL); 266 return XFS_ERROR(EINVAL);
@@ -416,7 +292,7 @@ xfs_qm_scall_trunc_qfiles(
416 * effect immediately. 292 * effect immediately.
417 * (Switching on quota accounting must be done at mount time.) 293 * (Switching on quota accounting must be done at mount time.)
418 */ 294 */
419STATIC int 295int
420xfs_qm_scall_quotaon( 296xfs_qm_scall_quotaon(
421 xfs_mount_t *mp, 297 xfs_mount_t *mp,
422 uint flags) 298 uint flags)
@@ -426,9 +302,6 @@ xfs_qm_scall_quotaon(
426 uint accflags; 302 uint accflags;
427 __int64_t sbflags; 303 __int64_t sbflags;
428 304
429 if (!capable(CAP_SYS_ADMIN))
430 return XFS_ERROR(EPERM);
431
432 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 305 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
433 /* 306 /*
434 * Switching on quota accounting must be done at mount time. 307 * Switching on quota accounting must be done at mount time.
@@ -517,7 +390,7 @@ xfs_qm_scall_quotaon(
517/* 390/*
518 * Return quota status information, such as uquota-off, enforcements, etc. 391 * Return quota status information, such as uquota-off, enforcements, etc.
519 */ 392 */
520STATIC int 393int
521xfs_qm_scall_getqstat( 394xfs_qm_scall_getqstat(
522 xfs_mount_t *mp, 395 xfs_mount_t *mp,
523 fs_quota_stat_t *out) 396 fs_quota_stat_t *out)
@@ -582,7 +455,7 @@ xfs_qm_scall_getqstat(
582/* 455/*
583 * Adjust quota limits, and start/stop timers accordingly. 456 * Adjust quota limits, and start/stop timers accordingly.
584 */ 457 */
585STATIC int 458int
586xfs_qm_scall_setqlim( 459xfs_qm_scall_setqlim(
587 xfs_mount_t *mp, 460 xfs_mount_t *mp,
588 xfs_dqid_t id, 461 xfs_dqid_t id,
@@ -595,9 +468,6 @@ xfs_qm_scall_setqlim(
595 int error; 468 int error;
596 xfs_qcnt_t hard, soft; 469 xfs_qcnt_t hard, soft;
597 470
598 if (!capable(CAP_SYS_ADMIN))
599 return XFS_ERROR(EPERM);
600
601 if ((newlim->d_fieldmask & 471 if ((newlim->d_fieldmask &
602 (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) 472 (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0)
603 return (0); 473 return (0);
@@ -742,7 +612,7 @@ xfs_qm_scall_setqlim(
742 return error; 612 return error;
743} 613}
744 614
745STATIC int 615int
746xfs_qm_scall_getquota( 616xfs_qm_scall_getquota(
747 xfs_mount_t *mp, 617 xfs_mount_t *mp,
748 xfs_dqid_t id, 618 xfs_dqid_t id,
@@ -935,30 +805,6 @@ xfs_qm_export_dquot(
935} 805}
936 806
937STATIC uint 807STATIC uint
938xfs_qm_import_qtype_flags(
939 uint uflags)
940{
941 uint oflags = 0;
942
943 /*
944 * Can't be more than one, or none.
945 */
946 if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ==
947 (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
948 ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ==
949 (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ||
950 ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ==
951 (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ||
952 ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0))
953 return (0);
954
955 oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0;
956 oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0;
957 oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0;
958 return oflags;
959}
960
961STATIC uint
962xfs_qm_export_qtype_flags( 808xfs_qm_export_qtype_flags(
963 uint flags) 809 uint flags)
964{ 810{
@@ -979,26 +825,6 @@ xfs_qm_export_qtype_flags(
979} 825}
980 826
981STATIC uint 827STATIC uint
982xfs_qm_import_flags(
983 uint uflags)
984{
985 uint flags = 0;
986
987 if (uflags & XFS_QUOTA_UDQ_ACCT)
988 flags |= XFS_UQUOTA_ACCT;
989 if (uflags & XFS_QUOTA_PDQ_ACCT)
990 flags |= XFS_PQUOTA_ACCT;
991 if (uflags & XFS_QUOTA_GDQ_ACCT)
992 flags |= XFS_GQUOTA_ACCT;
993 if (uflags & XFS_QUOTA_UDQ_ENFD)
994 flags |= XFS_UQUOTA_ENFD;
995 if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
996 flags |= XFS_OQUOTA_ENFD;
997 return (flags);
998}
999
1000
1001STATIC uint
1002xfs_qm_export_flags( 828xfs_qm_export_flags(
1003 uint flags) 829 uint flags)
1004{ 830{
@@ -1134,7 +960,7 @@ xfs_dqhash_t *qmtest_udqtab;
1134xfs_dqhash_t *qmtest_gdqtab; 960xfs_dqhash_t *qmtest_gdqtab;
1135int qmtest_hashmask; 961int qmtest_hashmask;
1136int qmtest_nfails; 962int qmtest_nfails;
1137mutex_t qcheck_lock; 963struct mutex qcheck_lock;
1138 964
1139#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ 965#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
1140 (__psunsigned_t)(id)) & \ 966 (__psunsigned_t)(id)) & \
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index c4fcea600bc2..8286b2842b6b 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -42,34 +42,24 @@
42#define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock) 42#define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock)
43 43
44#define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist) 44#define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist)
45#define XFS_QI_MPLLOCK(mp) ((mp)->m_quotainfo->qi_dqlist.qh_lock)
46#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next) 45#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next)
47#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems) 46#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems)
48 47
49#define XQMLCK(h) (mutex_lock(&((h)->qh_lock))) 48#define xfs_qm_mplist_lock(mp) \
50#define XQMUNLCK(h) (mutex_unlock(&((h)->qh_lock))) 49 mutex_lock(&(XFS_QI_MPL_LIST(mp).qh_lock))
51#ifdef DEBUG 50#define xfs_qm_mplist_nowait(mp) \
52struct xfs_dqhash; 51 mutex_trylock(&(XFS_QI_MPL_LIST(mp).qh_lock))
53static inline int XQMISLCKD(struct xfs_dqhash *h) 52#define xfs_qm_mplist_unlock(mp) \
54{ 53 mutex_unlock(&(XFS_QI_MPL_LIST(mp).qh_lock))
55 if (mutex_trylock(&h->qh_lock)) { 54#define XFS_QM_IS_MPLIST_LOCKED(mp) \
56 mutex_unlock(&h->qh_lock); 55 mutex_is_locked(&(XFS_QI_MPL_LIST(mp).qh_lock))
57 return 0; 56
58 } 57#define xfs_qm_freelist_lock(qm) \
59 return 1; 58 mutex_lock(&((qm)->qm_dqfreelist.qh_lock))
60} 59#define xfs_qm_freelist_lock_nowait(qm) \
61#endif 60 mutex_trylock(&((qm)->qm_dqfreelist.qh_lock))
62 61#define xfs_qm_freelist_unlock(qm) \
63#define XFS_DQ_HASH_LOCK(h) XQMLCK(h) 62 mutex_unlock(&((qm)->qm_dqfreelist.qh_lock))
64#define XFS_DQ_HASH_UNLOCK(h) XQMUNLCK(h)
65#define XFS_DQ_IS_HASH_LOCKED(h) XQMISLCKD(h)
66
67#define xfs_qm_mplist_lock(mp) XQMLCK(&(XFS_QI_MPL_LIST(mp)))
68#define xfs_qm_mplist_unlock(mp) XQMUNLCK(&(XFS_QI_MPL_LIST(mp)))
69#define XFS_QM_IS_MPLIST_LOCKED(mp) XQMISLCKD(&(XFS_QI_MPL_LIST(mp)))
70
71#define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist))
72#define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist))
73 63
74/* 64/*
75 * Hash into a bucket in the dquot hash table, based on <mp, id>. 65 * Hash into a bucket in the dquot hash table, based on <mp, id>.
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 99611381e740..447173bcf96d 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -624,10 +624,9 @@ xfs_trans_dqresv(
624 xfs_qcnt_t *resbcountp; 624 xfs_qcnt_t *resbcountp;
625 xfs_quotainfo_t *q = mp->m_quotainfo; 625 xfs_quotainfo_t *q = mp->m_quotainfo;
626 626
627 if (! (flags & XFS_QMOPT_DQLOCK)) { 627
628 xfs_dqlock(dqp); 628 xfs_dqlock(dqp);
629 } 629
630 ASSERT(XFS_DQ_IS_LOCKED(dqp));
631 if (flags & XFS_TRANS_DQ_RES_BLKS) { 630 if (flags & XFS_TRANS_DQ_RES_BLKS) {
632 hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); 631 hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
633 if (!hardlimit) 632 if (!hardlimit)
@@ -740,10 +739,8 @@ xfs_trans_dqresv(
740 ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); 739 ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
741 740
742error_return: 741error_return:
743 if (! (flags & XFS_QMOPT_DQLOCK)) { 742 xfs_dqunlock(dqp);
744 xfs_dqunlock(dqp); 743 return error;
745 }
746 return (error);
747} 744}
748 745
749 746
@@ -753,8 +750,7 @@ error_return:
753 * grp/prj quotas is important, because this follows a both-or-nothing 750 * grp/prj quotas is important, because this follows a both-or-nothing
754 * approach. 751 * approach.
755 * 752 *
756 * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked. 753 * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
757 * XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
758 * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. 754 * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota.
759 * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks 755 * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
760 * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks 756 * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index ae5482965424..3f3610a7ee05 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -24,6 +24,7 @@
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_dmapi.h" 25#include "xfs_dmapi.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_error.h"
27 28
28static char message[1024]; /* keep it off the stack */ 29static char message[1024]; /* keep it off the stack */
29static DEFINE_SPINLOCK(xfs_err_lock); 30static DEFINE_SPINLOCK(xfs_err_lock);
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 5830c040ea7e..b83f76b6d410 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,10 +17,6 @@
17 */ 17 */
18#include <xfs.h> 18#include <xfs.h>
19 19
20static DEFINE_MUTEX(uuid_monitor);
21static int uuid_table_size;
22static uuid_t *uuid_table;
23
24/* IRIX interpretation of an uuid_t */ 20/* IRIX interpretation of an uuid_t */
25typedef struct { 21typedef struct {
26 __be32 uu_timelow; 22 __be32 uu_timelow;
@@ -46,12 +42,6 @@ uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
46 fsid[1] = be32_to_cpu(uup->uu_timelow); 42 fsid[1] = be32_to_cpu(uup->uu_timelow);
47} 43}
48 44
49void
50uuid_create_nil(uuid_t *uuid)
51{
52 memset(uuid, 0, sizeof(*uuid));
53}
54
55int 45int
56uuid_is_nil(uuid_t *uuid) 46uuid_is_nil(uuid_t *uuid)
57{ 47{
@@ -71,64 +61,3 @@ uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
71{ 61{
72 return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; 62 return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
73} 63}
74
75/*
76 * Given a 128-bit uuid, return a 64-bit value by adding the top and bottom
77 * 64-bit words. NOTE: This function can not be changed EVER. Although
78 * brain-dead, some applications depend on this 64-bit value remaining
79 * persistent. Specifically, DMI vendors store the value as a persistent
80 * filehandle.
81 */
82__uint64_t
83uuid_hash64(uuid_t *uuid)
84{
85 __uint64_t *sp = (__uint64_t *)uuid;
86
87 return sp[0] + sp[1];
88}
89
90int
91uuid_table_insert(uuid_t *uuid)
92{
93 int i, hole;
94
95 mutex_lock(&uuid_monitor);
96 for (i = 0, hole = -1; i < uuid_table_size; i++) {
97 if (uuid_is_nil(&uuid_table[i])) {
98 hole = i;
99 continue;
100 }
101 if (uuid_equal(uuid, &uuid_table[i])) {
102 mutex_unlock(&uuid_monitor);
103 return 0;
104 }
105 }
106 if (hole < 0) {
107 uuid_table = kmem_realloc(uuid_table,
108 (uuid_table_size + 1) * sizeof(*uuid_table),
109 uuid_table_size * sizeof(*uuid_table),
110 KM_SLEEP);
111 hole = uuid_table_size++;
112 }
113 uuid_table[hole] = *uuid;
114 mutex_unlock(&uuid_monitor);
115 return 1;
116}
117
118void
119uuid_table_remove(uuid_t *uuid)
120{
121 int i;
122
123 mutex_lock(&uuid_monitor);
124 for (i = 0; i < uuid_table_size; i++) {
125 if (uuid_is_nil(&uuid_table[i]))
126 continue;
127 if (!uuid_equal(uuid, &uuid_table[i]))
128 continue;
129 uuid_create_nil(&uuid_table[i]);
130 break;
131 }
132 ASSERT(i < uuid_table_size);
133 mutex_unlock(&uuid_monitor);
134}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index cff5b607d445..4732d71262cc 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,12 +22,8 @@ typedef struct {
22 unsigned char __u_bits[16]; 22 unsigned char __u_bits[16];
23} uuid_t; 23} uuid_t;
24 24
25extern void uuid_create_nil(uuid_t *uuid);
26extern int uuid_is_nil(uuid_t *uuid); 25extern int uuid_is_nil(uuid_t *uuid);
27extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); 26extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
28extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); 27extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
29extern __uint64_t uuid_hash64(uuid_t *uuid);
30extern int uuid_table_insert(uuid_t *uuid);
31extern void uuid_table_remove(uuid_t *uuid);
32 28
33#endif /* __XFS_SUPPORT_UUID_H__ */ 29#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 143d63ecb20a..c8641f713caa 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,8 +223,8 @@ typedef struct xfs_perag
223 be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp)) 223 be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
224#define XFS_MIN_FREELIST_PAG(pag,mp) \ 224#define XFS_MIN_FREELIST_PAG(pag,mp) \
225 (XFS_MIN_FREELIST_RAW( \ 225 (XFS_MIN_FREELIST_RAW( \
226 (uint_t)(pag)->pagf_levels[XFS_BTNUM_BNOi], \ 226 (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
227 (uint_t)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp)) 227 (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
228 228
229#define XFS_AGB_TO_FSB(mp,agno,agbno) \ 229#define XFS_AGB_TO_FSB(mp,agno,agbno) \
230 (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) 230 (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 028e44e58ea9..2cf944eb796d 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1872,6 +1872,25 @@ xfs_alloc_compute_maxlevels(
1872} 1872}
1873 1873
1874/* 1874/*
1875 * Find the length of the longest extent in an AG.
1876 */
1877xfs_extlen_t
1878xfs_alloc_longest_free_extent(
1879 struct xfs_mount *mp,
1880 struct xfs_perag *pag)
1881{
1882 xfs_extlen_t need, delta = 0;
1883
1884 need = XFS_MIN_FREELIST_PAG(pag, mp);
1885 if (need > pag->pagf_flcount)
1886 delta = need - pag->pagf_flcount;
1887
1888 if (pag->pagf_longest > delta)
1889 return pag->pagf_longest - delta;
1890 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1891}
1892
1893/*
1875 * Decide whether to use this allocation group for this allocation. 1894 * Decide whether to use this allocation group for this allocation.
1876 * If so, fix up the btree freelist's size. 1895 * If so, fix up the btree freelist's size.
1877 */ 1896 */
@@ -1923,15 +1942,12 @@ xfs_alloc_fix_freelist(
1923 } 1942 }
1924 1943
1925 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1944 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1926 need = XFS_MIN_FREELIST_PAG(pag, mp);
1927 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
1928 /* 1945 /*
1929 * If it looks like there isn't a long enough extent, or enough 1946 * If it looks like there isn't a long enough extent, or enough
1930 * total blocks, reject it. 1947 * total blocks, reject it.
1931 */ 1948 */
1932 longest = (pag->pagf_longest > delta) ? 1949 need = XFS_MIN_FREELIST_PAG(pag, mp);
1933 (pag->pagf_longest - delta) : 1950 longest = xfs_alloc_longest_free_extent(mp, pag);
1934 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1935 if ((args->minlen + args->alignment + args->minalignslop - 1) > 1951 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1936 longest || 1952 longest ||
1937 ((int)(pag->pagf_freeblks + pag->pagf_flcount - 1953 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 588172796f7b..e704caee10df 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -100,6 +100,12 @@ typedef struct xfs_alloc_arg {
100#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ 100#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
101#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ 101#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
102 102
103/*
104 * Find the length of the longest extent in an AG.
105 */
106xfs_extlen_t
107xfs_alloc_longest_free_extent(struct xfs_mount *mp,
108 struct xfs_perag *pag);
103 109
104#ifdef __KERNEL__ 110#ifdef __KERNEL__
105 111
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 6c323f8a4cd1..afdc8911637d 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -155,7 +155,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
155 * minimum offset only needs to be the space required for 155 * minimum offset only needs to be the space required for
156 * the btree root. 156 * the btree root.
157 */ 157 */
158 if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > mp->m_attroffset) 158 if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
159 xfs_default_attroffset(dp))
159 dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); 160 dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
160 break; 161 break;
161 162
@@ -298,6 +299,26 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
298} 299}
299 300
300/* 301/*
302 * After the last attribute is removed revert to original inode format,
303 * making all literal area available to the data fork once more.
304 */
305STATIC void
306xfs_attr_fork_reset(
307 struct xfs_inode *ip,
308 struct xfs_trans *tp)
309{
310 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
311 ip->i_d.di_forkoff = 0;
312 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
313
314 ASSERT(ip->i_d.di_anextents == 0);
315 ASSERT(ip->i_afp == NULL);
316
317 ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
318 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
319}
320
321/*
301 * Remove an attribute from the shortform attribute list structure. 322 * Remove an attribute from the shortform attribute list structure.
302 */ 323 */
303int 324int
@@ -344,22 +365,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
344 */ 365 */
345 totsize -= size; 366 totsize -= size;
346 if (totsize == sizeof(xfs_attr_sf_hdr_t) && 367 if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
347 !(args->op_flags & XFS_DA_OP_ADDNAME) && 368 (mp->m_flags & XFS_MOUNT_ATTR2) &&
348 (mp->m_flags & XFS_MOUNT_ATTR2) && 369 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
349 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) { 370 !(args->op_flags & XFS_DA_OP_ADDNAME)) {
350 /* 371 xfs_attr_fork_reset(dp, args->trans);
351 * Last attribute now removed, revert to original
352 * inode format making all literal area available
353 * to the data fork once more.
354 */
355 xfs_idestroy_fork(dp, XFS_ATTR_FORK);
356 dp->i_d.di_forkoff = 0;
357 dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
358 ASSERT(dp->i_d.di_anextents == 0);
359 ASSERT(dp->i_afp == NULL);
360 dp->i_df.if_ext_max =
361 XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
362 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
363 } else { 372 } else {
364 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); 373 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
365 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 374 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
@@ -786,20 +795,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
786 if (forkoff == -1) { 795 if (forkoff == -1) {
787 ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); 796 ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
788 ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); 797 ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
789 798 xfs_attr_fork_reset(dp, args->trans);
790 /*
791 * Last attribute was removed, revert to original
792 * inode format making all literal area available
793 * to the data fork once more.
794 */
795 xfs_idestroy_fork(dp, XFS_ATTR_FORK);
796 dp->i_d.di_forkoff = 0;
797 dp->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
798 ASSERT(dp->i_d.di_anextents == 0);
799 ASSERT(dp->i_afp == NULL);
800 dp->i_df.if_ext_max =
801 XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
802 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
803 goto out; 799 goto out;
804 } 800 }
805 801
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c852cd65aaea..3a6ed426327a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2479,7 +2479,7 @@ xfs_bmap_adjacent(
2479 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); 2479 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
2480 /* 2480 /*
2481 * If allocating at eof, and there's a previous real block, 2481 * If allocating at eof, and there's a previous real block,
2482 * try to use it's last block as our starting point. 2482 * try to use its last block as our starting point.
2483 */ 2483 */
2484 if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF && 2484 if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF &&
2485 !isnullstartblock(ap->prevp->br_startblock) && 2485 !isnullstartblock(ap->prevp->br_startblock) &&
@@ -2712,9 +2712,6 @@ xfs_bmap_btalloc(
2712 xfs_agnumber_t startag; 2712 xfs_agnumber_t startag;
2713 xfs_alloc_arg_t args; 2713 xfs_alloc_arg_t args;
2714 xfs_extlen_t blen; 2714 xfs_extlen_t blen;
2715 xfs_extlen_t delta;
2716 xfs_extlen_t longest;
2717 xfs_extlen_t need;
2718 xfs_extlen_t nextminlen = 0; 2715 xfs_extlen_t nextminlen = 0;
2719 xfs_perag_t *pag; 2716 xfs_perag_t *pag;
2720 int nullfb; /* true if ap->firstblock isn't set */ 2717 int nullfb; /* true if ap->firstblock isn't set */
@@ -2796,13 +2793,8 @@ xfs_bmap_btalloc(
2796 * See xfs_alloc_fix_freelist... 2793 * See xfs_alloc_fix_freelist...
2797 */ 2794 */
2798 if (pag->pagf_init) { 2795 if (pag->pagf_init) {
2799 need = XFS_MIN_FREELIST_PAG(pag, mp); 2796 xfs_extlen_t longest;
2800 delta = need > pag->pagf_flcount ? 2797 longest = xfs_alloc_longest_free_extent(mp, pag);
2801 need - pag->pagf_flcount : 0;
2802 longest = (pag->pagf_longest > delta) ?
2803 (pag->pagf_longest - delta) :
2804 (pag->pagf_flcount > 0 ||
2805 pag->pagf_longest > 0);
2806 if (blen < longest) 2798 if (blen < longest)
2807 blen = longest; 2799 blen = longest;
2808 } else 2800 } else
@@ -3577,6 +3569,27 @@ xfs_bmap_extents_to_btree(
3577} 3569}
3578 3570
3579/* 3571/*
3572 * Calculate the default attribute fork offset for newly created inodes.
3573 */
3574uint
3575xfs_default_attroffset(
3576 struct xfs_inode *ip)
3577{
3578 struct xfs_mount *mp = ip->i_mount;
3579 uint offset;
3580
3581 if (mp->m_sb.sb_inodesize == 256) {
3582 offset = XFS_LITINO(mp) -
3583 XFS_BMDR_SPACE_CALC(MINABTPTRS);
3584 } else {
3585 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
3586 }
3587
3588 ASSERT(offset < XFS_LITINO(mp));
3589 return offset;
3590}
3591
3592/*
3580 * Helper routine to reset inode di_forkoff field when switching 3593 * Helper routine to reset inode di_forkoff field when switching
3581 * attribute fork from local to extent format - we reset it where 3594 * attribute fork from local to extent format - we reset it where
3582 * possible to make space available for inline data fork extents. 3595 * possible to make space available for inline data fork extents.
@@ -3588,15 +3601,18 @@ xfs_bmap_forkoff_reset(
3588 int whichfork) 3601 int whichfork)
3589{ 3602{
3590 if (whichfork == XFS_ATTR_FORK && 3603 if (whichfork == XFS_ATTR_FORK &&
3591 (ip->i_d.di_format != XFS_DINODE_FMT_DEV) && 3604 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
3592 (ip->i_d.di_format != XFS_DINODE_FMT_UUID) && 3605 ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
3593 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 3606 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
3594 ((mp->m_attroffset >> 3) > ip->i_d.di_forkoff)) { 3607 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
3595 ip->i_d.di_forkoff = mp->m_attroffset >> 3; 3608
3596 ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / 3609 if (dfl_forkoff > ip->i_d.di_forkoff) {
3597 (uint)sizeof(xfs_bmbt_rec_t); 3610 ip->i_d.di_forkoff = dfl_forkoff;
3598 ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) / 3611 ip->i_df.if_ext_max =
3599 (uint)sizeof(xfs_bmbt_rec_t); 3612 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
3613 ip->i_afp->if_ext_max =
3614 XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t);
3615 }
3600 } 3616 }
3601} 3617}
3602 3618
@@ -4065,7 +4081,7 @@ xfs_bmap_add_attrfork(
4065 case XFS_DINODE_FMT_BTREE: 4081 case XFS_DINODE_FMT_BTREE:
4066 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); 4082 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
4067 if (!ip->i_d.di_forkoff) 4083 if (!ip->i_d.di_forkoff)
4068 ip->i_d.di_forkoff = mp->m_attroffset >> 3; 4084 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
4069 else if (mp->m_flags & XFS_MOUNT_ATTR2) 4085 else if (mp->m_flags & XFS_MOUNT_ATTR2)
4070 version = 2; 4086 version = 2;
4071 break; 4087 break;
@@ -4212,12 +4228,12 @@ xfs_bmap_compute_maxlevels(
4212 * (a signed 16-bit number, xfs_aextnum_t). 4228 * (a signed 16-bit number, xfs_aextnum_t).
4213 * 4229 *
4214 * Note that we can no longer assume that if we are in ATTR1 that 4230 * Note that we can no longer assume that if we are in ATTR1 that
4215 * the fork offset of all the inodes will be (m_attroffset >> 3) 4231 * the fork offset of all the inodes will be
4216 * because we could have mounted with ATTR2 and then mounted back 4232 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
4217 * with ATTR1, keeping the di_forkoff's fixed but probably at 4233 * with ATTR2 and then mounted back with ATTR1, keeping the
4218 * various positions. Therefore, for both ATTR1 and ATTR2 4234 * di_forkoff's fixed but probably at various positions. Therefore,
4219 * we have to assume the worst case scenario of a minimum size 4235 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
4220 * available. 4236 * of a minimum size available.
4221 */ 4237 */
4222 if (whichfork == XFS_DATA_FORK) { 4238 if (whichfork == XFS_DATA_FORK) {
4223 maxleafents = MAXEXTNUM; 4239 maxleafents = MAXEXTNUM;
@@ -4804,7 +4820,7 @@ xfs_bmapi(
4804 xfs_extlen_t minlen; /* min allocation size */ 4820 xfs_extlen_t minlen; /* min allocation size */
4805 xfs_mount_t *mp; /* xfs mount structure */ 4821 xfs_mount_t *mp; /* xfs mount structure */
4806 int n; /* current extent index */ 4822 int n; /* current extent index */
4807 int nallocs; /* number of extents alloc\'d */ 4823 int nallocs; /* number of extents alloc'd */
4808 xfs_extnum_t nextents; /* number of extents in file */ 4824 xfs_extnum_t nextents; /* number of extents in file */
4809 xfs_fileoff_t obno; /* old block number (offset) */ 4825 xfs_fileoff_t obno; /* old block number (offset) */
4810 xfs_bmbt_irec_t prev; /* previous file extent record */ 4826 xfs_bmbt_irec_t prev; /* previous file extent record */
@@ -6204,7 +6220,7 @@ xfs_bmap_get_bp(
6204 return(bp); 6220 return(bp);
6205} 6221}
6206 6222
6207void 6223STATIC void
6208xfs_check_block( 6224xfs_check_block(
6209 struct xfs_btree_block *block, 6225 struct xfs_btree_block *block,
6210 xfs_mount_t *mp, 6226 xfs_mount_t *mp,
@@ -6494,7 +6510,7 @@ xfs_bmap_count_tree(
6494 block = XFS_BUF_TO_BLOCK(bp); 6510 block = XFS_BUF_TO_BLOCK(bp);
6495 6511
6496 if (--level) { 6512 if (--level) {
6497 /* Not at node above leafs, count this level of nodes */ 6513 /* Not at node above leaves, count this level of nodes */
6498 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 6514 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6499 while (nextbno != NULLFSBLOCK) { 6515 while (nextbno != NULLFSBLOCK) {
6500 if ((error = xfs_btree_read_bufl(mp, tp, nextbno, 6516 if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index be2979d88d32..1b8ff9256bd0 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -125,7 +125,7 @@ typedef struct xfs_bmalloca {
125 struct xfs_bmbt_irec *gotp; /* extent after, or delayed */ 125 struct xfs_bmbt_irec *gotp; /* extent after, or delayed */
126 xfs_extlen_t alen; /* i/o length asked/allocated */ 126 xfs_extlen_t alen; /* i/o length asked/allocated */
127 xfs_extlen_t total; /* total blocks needed for xaction */ 127 xfs_extlen_t total; /* total blocks needed for xaction */
128 xfs_extlen_t minlen; /* mininum allocation size (blocks) */ 128 xfs_extlen_t minlen; /* minimum allocation size (blocks) */
129 xfs_extlen_t minleft; /* amount must be left after alloc */ 129 xfs_extlen_t minleft; /* amount must be left after alloc */
130 char eof; /* set if allocating past last extent */ 130 char eof; /* set if allocating past last extent */
131 char wasdel; /* replacing a delayed allocation */ 131 char wasdel; /* replacing a delayed allocation */
@@ -338,6 +338,10 @@ xfs_check_nostate_extents(
338 xfs_extnum_t idx, 338 xfs_extnum_t idx,
339 xfs_extnum_t num); 339 xfs_extnum_t num);
340 340
341uint
342xfs_default_attroffset(
343 struct xfs_inode *ip);
344
341#ifdef __KERNEL__ 345#ifdef __KERNEL__
342 346
343/* 347/*
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e73c332eb23f..e9df99574829 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -1883,7 +1883,7 @@ xfs_btree_lshift(
1883 1883
1884 /* 1884 /*
1885 * We add one entry to the left side and remove one for the right side. 1885 * We add one entry to the left side and remove one for the right side.
1886 * Accout for it here, the changes will be updated on disk and logged 1886 * Account for it here, the changes will be updated on disk and logged
1887 * later. 1887 * later.
1888 */ 1888 */
1889 lrecs++; 1889 lrecs++;
@@ -3535,7 +3535,7 @@ xfs_btree_delrec(
3535 XFS_BTREE_STATS_INC(cur, join); 3535 XFS_BTREE_STATS_INC(cur, join);
3536 3536
3537 /* 3537 /*
3538 * Fix up the the number of records and right block pointer in the 3538 * Fix up the number of records and right block pointer in the
3539 * surviving block, and log it. 3539 * surviving block, and log it.
3540 */ 3540 */
3541 xfs_btree_set_numrecs(left, lrecs + rrecs); 3541 xfs_btree_set_numrecs(left, lrecs + rrecs);
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 789fffdf8b2f..4f852b735b96 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -41,7 +41,7 @@ extern kmem_zone_t *xfs_btree_cur_zone;
41/* 41/*
42 * Generic btree header. 42 * Generic btree header.
43 * 43 *
44 * This is a comination of the actual format used on disk for short and long 44 * This is a combination of the actual format used on disk for short and long
45 * format btrees. The first three fields are shared by both format, but 45 * format btrees. The first three fields are shared by both format, but
46 * the pointers are different and should be used with care. 46 * the pointers are different and should be used with care.
47 * 47 *
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index c45f74ff1a5b..9ff6e57a5075 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1503,7 +1503,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1503 * This is implemented with some source-level loop unrolling. 1503 * This is implemented with some source-level loop unrolling.
1504 */ 1504 */
1505xfs_dahash_t 1505xfs_dahash_t
1506xfs_da_hashname(const uchar_t *name, int namelen) 1506xfs_da_hashname(const __uint8_t *name, int namelen)
1507{ 1507{
1508 xfs_dahash_t hash; 1508 xfs_dahash_t hash;
1509 1509
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 70b710c1792d..8c536167bf75 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -91,9 +91,9 @@ enum xfs_dacmp {
91 * Structure to ease passing around component names. 91 * Structure to ease passing around component names.
92 */ 92 */
93typedef struct xfs_da_args { 93typedef struct xfs_da_args {
94 const uchar_t *name; /* string (maybe not NULL terminated) */ 94 const __uint8_t *name; /* string (maybe not NULL terminated) */
95 int namelen; /* length of string (maybe no NULL) */ 95 int namelen; /* length of string (maybe no NULL) */
96 uchar_t *value; /* set of bytes (maybe contain NULLs) */ 96 __uint8_t *value; /* set of bytes (maybe contain NULLs) */
97 int valuelen; /* length of value */ 97 int valuelen; /* length of value */
98 int flags; /* argument flags (eg: ATTR_NOCREATE) */ 98 int flags; /* argument flags (eg: ATTR_NOCREATE) */
99 xfs_dahash_t hashval; /* hash value of name */ 99 xfs_dahash_t hashval; /* hash value of name */
@@ -185,7 +185,7 @@ typedef struct xfs_da_state {
185 unsigned char inleaf; /* insert into 1->lf, 0->splf */ 185 unsigned char inleaf; /* insert into 1->lf, 0->splf */
186 unsigned char extravalid; /* T/F: extrablk is in use */ 186 unsigned char extravalid; /* T/F: extrablk is in use */
187 unsigned char extraafter; /* T/F: extrablk is after new */ 187 unsigned char extraafter; /* T/F: extrablk is after new */
188 xfs_da_state_blk_t extrablk; /* for double-splits on leafs */ 188 xfs_da_state_blk_t extrablk; /* for double-splits on leaves */
189 /* for dirv2 extrablk is data */ 189 /* for dirv2 extrablk is data */
190} xfs_da_state_t; 190} xfs_da_state_t;
191 191
@@ -251,7 +251,7 @@ xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
251int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, 251int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
252 xfs_dabuf_t *dead_buf); 252 xfs_dabuf_t *dead_buf);
253 253
254uint xfs_da_hashname(const uchar_t *name_string, int name_length); 254uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
255enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, 255enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
256 const char *name, int len); 256 const char *name, int len);
257 257
@@ -268,5 +268,6 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
268 268
269extern struct kmem_zone *xfs_da_state_zone; 269extern struct kmem_zone *xfs_da_state_zone;
270extern struct kmem_zone *xfs_dabuf_zone; 270extern struct kmem_zone *xfs_dabuf_zone;
271extern const struct xfs_nameops xfs_default_nameops;
271 272
272#endif /* __XFS_DA_BTREE_H__ */ 273#endif /* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index f8278cfcc1d3..e6d839bddbf0 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -79,6 +79,12 @@ xfs_swapext(
79 goto out_put_target_file; 79 goto out_put_target_file;
80 } 80 }
81 81
82 if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
83 IS_SWAPFILE(target_file->f_path.dentry->d_inode)) {
84 error = XFS_ERROR(EINVAL);
85 goto out_put_target_file;
86 }
87
82 ip = XFS_I(file->f_path.dentry->d_inode); 88 ip = XFS_I(file->f_path.dentry->d_inode);
83 tip = XFS_I(target_file->f_path.dentry->d_inode); 89 tip = XFS_I(target_file->f_path.dentry->d_inode);
84 90
@@ -118,19 +124,17 @@ xfs_swap_extents(
118 xfs_bstat_t *sbp = &sxp->sx_stat; 124 xfs_bstat_t *sbp = &sxp->sx_stat;
119 xfs_ifork_t *tempifp, *ifp, *tifp; 125 xfs_ifork_t *tempifp, *ifp, *tifp;
120 int ilf_fields, tilf_fields; 126 int ilf_fields, tilf_fields;
121 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
122 int error = 0; 127 int error = 0;
123 int aforkblks = 0; 128 int aforkblks = 0;
124 int taforkblks = 0; 129 int taforkblks = 0;
125 __uint64_t tmp; 130 __uint64_t tmp;
126 char locked = 0;
127 131
128 mp = ip->i_mount; 132 mp = ip->i_mount;
129 133
130 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 134 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
131 if (!tempifp) { 135 if (!tempifp) {
132 error = XFS_ERROR(ENOMEM); 136 error = XFS_ERROR(ENOMEM);
133 goto error0; 137 goto out;
134 } 138 }
135 139
136 sbp = &sxp->sx_stat; 140 sbp = &sxp->sx_stat;
@@ -143,25 +147,24 @@ xfs_swap_extents(
143 */ 147 */
144 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 148 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
145 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 149 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
146 locked = 1;
147 150
148 /* Verify that both files have the same format */ 151 /* Verify that both files have the same format */
149 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { 152 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
150 error = XFS_ERROR(EINVAL); 153 error = XFS_ERROR(EINVAL);
151 goto error0; 154 goto out_unlock;
152 } 155 }
153 156
154 /* Verify both files are either real-time or non-realtime */ 157 /* Verify both files are either real-time or non-realtime */
155 if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 158 if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
156 error = XFS_ERROR(EINVAL); 159 error = XFS_ERROR(EINVAL);
157 goto error0; 160 goto out_unlock;
158 } 161 }
159 162
160 /* Should never get a local format */ 163 /* Should never get a local format */
161 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 164 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
162 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 165 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
163 error = XFS_ERROR(EINVAL); 166 error = XFS_ERROR(EINVAL);
164 goto error0; 167 goto out_unlock;
165 } 168 }
166 169
167 if (VN_CACHED(VFS_I(tip)) != 0) { 170 if (VN_CACHED(VFS_I(tip)) != 0) {
@@ -169,13 +172,13 @@ xfs_swap_extents(
169 error = xfs_flushinval_pages(tip, 0, -1, 172 error = xfs_flushinval_pages(tip, 0, -1,
170 FI_REMAPF_LOCKED); 173 FI_REMAPF_LOCKED);
171 if (error) 174 if (error)
172 goto error0; 175 goto out_unlock;
173 } 176 }
174 177
175 /* Verify O_DIRECT for ftmp */ 178 /* Verify O_DIRECT for ftmp */
176 if (VN_CACHED(VFS_I(tip)) != 0) { 179 if (VN_CACHED(VFS_I(tip)) != 0) {
177 error = XFS_ERROR(EINVAL); 180 error = XFS_ERROR(EINVAL);
178 goto error0; 181 goto out_unlock;
179 } 182 }
180 183
181 /* Verify all data are being swapped */ 184 /* Verify all data are being swapped */
@@ -183,7 +186,7 @@ xfs_swap_extents(
183 sxp->sx_length != ip->i_d.di_size || 186 sxp->sx_length != ip->i_d.di_size ||
184 sxp->sx_length != tip->i_d.di_size) { 187 sxp->sx_length != tip->i_d.di_size) {
185 error = XFS_ERROR(EFAULT); 188 error = XFS_ERROR(EFAULT);
186 goto error0; 189 goto out_unlock;
187 } 190 }
188 191
189 /* 192 /*
@@ -193,7 +196,7 @@ xfs_swap_extents(
193 */ 196 */
194 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) {
195 error = XFS_ERROR(EINVAL); 198 error = XFS_ERROR(EINVAL);
196 goto error0; 199 goto out_unlock;
197 } 200 }
198 201
199 /* 202 /*
@@ -208,7 +211,7 @@ xfs_swap_extents(
208 (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || 211 (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) ||
209 (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { 212 (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) {
210 error = XFS_ERROR(EBUSY); 213 error = XFS_ERROR(EBUSY);
211 goto error0; 214 goto out_unlock;
212 } 215 }
213 216
214 /* We need to fail if the file is memory mapped. Once we have tossed 217 /* We need to fail if the file is memory mapped. Once we have tossed
@@ -219,7 +222,7 @@ xfs_swap_extents(
219 */ 222 */
220 if (VN_MAPPED(VFS_I(ip))) { 223 if (VN_MAPPED(VFS_I(ip))) {
221 error = XFS_ERROR(EBUSY); 224 error = XFS_ERROR(EBUSY);
222 goto error0; 225 goto out_unlock;
223 } 226 }
224 227
225 xfs_iunlock(ip, XFS_ILOCK_EXCL); 228 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -242,8 +245,7 @@ xfs_swap_extents(
242 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 245 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
243 xfs_iunlock(tip, XFS_IOLOCK_EXCL); 246 xfs_iunlock(tip, XFS_IOLOCK_EXCL);
244 xfs_trans_cancel(tp, 0); 247 xfs_trans_cancel(tp, 0);
245 locked = 0; 248 goto out;
246 goto error0;
247 } 249 }
248 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 250 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
249 251
@@ -253,19 +255,15 @@ xfs_swap_extents(
253 if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 255 if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
254 (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 256 (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
255 error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); 257 error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
256 if (error) { 258 if (error)
257 xfs_trans_cancel(tp, 0); 259 goto out_trans_cancel;
258 goto error0;
259 }
260 } 260 }
261 if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 261 if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
262 (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 262 (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
263 error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, 263 error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
264 &taforkblks); 264 &taforkblks);
265 if (error) { 265 if (error)
266 xfs_trans_cancel(tp, 0); 266 goto out_trans_cancel;
267 goto error0;
268 }
269 } 267 }
270 268
271 /* 269 /*
@@ -332,10 +330,10 @@ xfs_swap_extents(
332 330
333 331
334 IHOLD(ip); 332 IHOLD(ip);
335 xfs_trans_ijoin(tp, ip, lock_flags); 333 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
336 334
337 IHOLD(tip); 335 IHOLD(tip);
338 xfs_trans_ijoin(tp, tip, lock_flags); 336 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
339 337
340 xfs_trans_log_inode(tp, ip, ilf_fields); 338 xfs_trans_log_inode(tp, ip, ilf_fields);
341 xfs_trans_log_inode(tp, tip, tilf_fields); 339 xfs_trans_log_inode(tp, tip, tilf_fields);
@@ -344,19 +342,19 @@ xfs_swap_extents(
344 * If this is a synchronous mount, make sure that the 342 * If this is a synchronous mount, make sure that the
345 * transaction goes to disk before returning to the user. 343 * transaction goes to disk before returning to the user.
346 */ 344 */
347 if (mp->m_flags & XFS_MOUNT_WSYNC) { 345 if (mp->m_flags & XFS_MOUNT_WSYNC)
348 xfs_trans_set_sync(tp); 346 xfs_trans_set_sync(tp);
349 }
350 347
351 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); 348 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
352 locked = 0;
353 349
354 error0: 350out_unlock:
355 if (locked) { 351 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
356 xfs_iunlock(ip, lock_flags); 352 xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
357 xfs_iunlock(tip, lock_flags); 353out:
358 } 354 kmem_free(tempifp);
359 if (tempifp != NULL)
360 kmem_free(tempifp);
361 return error; 355 return error;
356
357out_trans_cancel:
358 xfs_trans_cancel(tp, 0);
359 goto out_unlock;
362} 360}
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 162e8726df5e..e5b153b2e6a3 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -103,7 +103,9 @@ typedef enum xfs_dinode_fmt {
103/* 103/*
104 * Inode size for given fs. 104 * Inode size for given fs.
105 */ 105 */
106#define XFS_LITINO(mp) ((mp)->m_litino) 106#define XFS_LITINO(mp) \
107 ((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode)))
108
107#define XFS_BROOT_SIZE_ADJ \ 109#define XFS_BROOT_SIZE_ADJ \
108 (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t)) 110 (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
109 111
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 1afb12278b8d..c657bec6d951 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,8 +46,6 @@
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = {"..", 2};
48 48
49extern const struct xfs_nameops xfs_default_nameops;
50
51/* 49/*
52 * ASCII case-insensitive (ie. A-Z) support for directories that was 50 * ASCII case-insensitive (ie. A-Z) support for directories that was
53 * used in IRIX. 51 * used in IRIX.
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index e1f0a06aaf04..ab52e9e1c1ee 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -448,7 +448,6 @@ xfs_dir2_block_getdents(
448 xfs_mount_t *mp; /* filesystem mount point */ 448 xfs_mount_t *mp; /* filesystem mount point */
449 char *ptr; /* current data entry */ 449 char *ptr; /* current data entry */
450 int wantoff; /* starting block offset */ 450 int wantoff; /* starting block offset */
451 xfs_ino_t ino;
452 xfs_off_t cook; 451 xfs_off_t cook;
453 452
454 mp = dp->i_mount; 453 mp = dp->i_mount;
@@ -509,16 +508,12 @@ xfs_dir2_block_getdents(
509 508
510 cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 509 cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
511 (char *)dep - (char *)block); 510 (char *)dep - (char *)block);
512 ino = be64_to_cpu(dep->inumber);
513#if XFS_BIG_INUMS
514 ino += mp->m_inoadd;
515#endif
516 511
517 /* 512 /*
518 * If it didn't fit, set the final offset to here & return. 513 * If it didn't fit, set the final offset to here & return.
519 */ 514 */
520 if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, 515 if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff,
521 ino, DT_UNKNOWN)) { 516 be64_to_cpu(dep->inumber), DT_UNKNOWN)) {
522 *offset = cook & 0x7fffffff; 517 *offset = cook & 0x7fffffff;
523 xfs_da_brelse(NULL, bp); 518 xfs_da_brelse(NULL, bp);
524 return 0; 519 return 0;
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index b816e0252739..efbc290c7fec 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -38,7 +38,7 @@ struct xfs_trans;
38 38
39/* 39/*
40 * Directory address space divided into sections, 40 * Directory address space divided into sections,
41 * spaces separated by 32gb. 41 * spaces separated by 32GB.
42 */ 42 */
43#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) 43#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
44#define XFS_DIR2_DATA_SPACE 0 44#define XFS_DIR2_DATA_SPACE 0
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ef805a374eec..fa913e459442 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -549,7 +549,7 @@ xfs_dir2_leaf_addname(
549 * Check the internal consistency of a leaf1 block. 549 * Check the internal consistency of a leaf1 block.
550 * Pop an assert if something is wrong. 550 * Pop an assert if something is wrong.
551 */ 551 */
552void 552STATIC void
553xfs_dir2_leaf_check( 553xfs_dir2_leaf_check(
554 xfs_inode_t *dp, /* incore directory inode */ 554 xfs_inode_t *dp, /* incore directory inode */
555 xfs_dabuf_t *bp) /* leaf's buffer */ 555 xfs_dabuf_t *bp) /* leaf's buffer */
@@ -780,7 +780,6 @@ xfs_dir2_leaf_getdents(
780 int ra_index; /* *map index for read-ahead */ 780 int ra_index; /* *map index for read-ahead */
781 int ra_offset; /* map entry offset for ra */ 781 int ra_offset; /* map entry offset for ra */
782 int ra_want; /* readahead count wanted */ 782 int ra_want; /* readahead count wanted */
783 xfs_ino_t ino;
784 783
785 /* 784 /*
786 * If the offset is at or past the largest allowed value, 785 * If the offset is at or past the largest allowed value,
@@ -1076,24 +1075,12 @@ xfs_dir2_leaf_getdents(
1076 continue; 1075 continue;
1077 } 1076 }
1078 1077
1079 /*
1080 * Copy the entry into the putargs, and try formatting it.
1081 */
1082 dep = (xfs_dir2_data_entry_t *)ptr; 1078 dep = (xfs_dir2_data_entry_t *)ptr;
1083
1084 length = xfs_dir2_data_entsize(dep->namelen); 1079 length = xfs_dir2_data_entsize(dep->namelen);
1085 1080
1086 ino = be64_to_cpu(dep->inumber);
1087#if XFS_BIG_INUMS
1088 ino += mp->m_inoadd;
1089#endif
1090
1091 /*
1092 * Won't fit. Return to caller.
1093 */
1094 if (filldir(dirent, dep->name, dep->namelen, 1081 if (filldir(dirent, dep->name, dep->namelen,
1095 xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, 1082 xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1096 ino, DT_UNKNOWN)) 1083 be64_to_cpu(dep->inumber), DT_UNKNOWN))
1097 break; 1084 break;
1098 1085
1099 /* 1086 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index fa6c3a5ddbc6..5a81ccd1045b 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1104,7 +1104,7 @@ xfs_dir2_leafn_remove(
1104 } 1104 }
1105 xfs_dir2_leafn_check(dp, bp); 1105 xfs_dir2_leafn_check(dp, bp);
1106 /* 1106 /*
1107 * Return indication of whether this leaf block is emtpy enough 1107 * Return indication of whether this leaf block is empty enough
1108 * to justify trying to join it with a neighbor. 1108 * to justify trying to join it with a neighbor.
1109 */ 1109 */
1110 *rval = 1110 *rval =
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index a8a8a6efad5b..e89734e84646 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -748,11 +748,7 @@ xfs_dir2_sf_getdents(
748 * Put . entry unless we're starting past it. 748 * Put . entry unless we're starting past it.
749 */ 749 */
750 if (*offset <= dot_offset) { 750 if (*offset <= dot_offset) {
751 ino = dp->i_ino; 751 if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, dp->i_ino, DT_DIR)) {
752#if XFS_BIG_INUMS
753 ino += mp->m_inoadd;
754#endif
755 if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, ino, DT_DIR)) {
756 *offset = dot_offset & 0x7fffffff; 752 *offset = dot_offset & 0x7fffffff;
757 return 0; 753 return 0;
758 } 754 }
@@ -763,9 +759,6 @@ xfs_dir2_sf_getdents(
763 */ 759 */
764 if (*offset <= dotdot_offset) { 760 if (*offset <= dotdot_offset) {
765 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); 761 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
766#if XFS_BIG_INUMS
767 ino += mp->m_inoadd;
768#endif
769 if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { 762 if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) {
770 *offset = dotdot_offset & 0x7fffffff; 763 *offset = dotdot_offset & 0x7fffffff;
771 return 0; 764 return 0;
@@ -786,10 +779,6 @@ xfs_dir2_sf_getdents(
786 } 779 }
787 780
788 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); 781 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
789#if XFS_BIG_INUMS
790 ino += mp->m_inoadd;
791#endif
792
793 if (filldir(dirent, sfep->name, sfep->namelen, 782 if (filldir(dirent, sfep->name, sfep->namelen,
794 off & 0x7fffffff, ino, DT_UNKNOWN)) { 783 off & 0x7fffffff, ino, DT_UNKNOWN)) {
795 *offset = off & 0x7fffffff; 784 *offset = off & 0x7fffffff;
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 2f049f63e85f..0d22c56fdf64 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -33,12 +33,10 @@ typedef struct xfs_extent {
33 * conversion routine. 33 * conversion routine.
34 */ 34 */
35 35
36#ifndef HAVE_FORMAT32
37typedef struct xfs_extent_32 { 36typedef struct xfs_extent_32 {
38 __uint64_t ext_start; 37 __uint64_t ext_start;
39 __uint32_t ext_len; 38 __uint32_t ext_len;
40} __attribute__((packed)) xfs_extent_32_t; 39} __attribute__((packed)) xfs_extent_32_t;
41#endif
42 40
43typedef struct xfs_extent_64 { 41typedef struct xfs_extent_64 {
44 __uint64_t ext_start; 42 __uint64_t ext_start;
@@ -59,7 +57,6 @@ typedef struct xfs_efi_log_format {
59 xfs_extent_t efi_extents[1]; /* array of extents to free */ 57 xfs_extent_t efi_extents[1]; /* array of extents to free */
60} xfs_efi_log_format_t; 58} xfs_efi_log_format_t;
61 59
62#ifndef HAVE_FORMAT32
63typedef struct xfs_efi_log_format_32 { 60typedef struct xfs_efi_log_format_32 {
64 __uint16_t efi_type; /* efi log item type */ 61 __uint16_t efi_type; /* efi log item type */
65 __uint16_t efi_size; /* size of this item */ 62 __uint16_t efi_size; /* size of this item */
@@ -67,7 +64,6 @@ typedef struct xfs_efi_log_format_32 {
67 __uint64_t efi_id; /* efi identifier */ 64 __uint64_t efi_id; /* efi identifier */
68 xfs_extent_32_t efi_extents[1]; /* array of extents to free */ 65 xfs_extent_32_t efi_extents[1]; /* array of extents to free */
69} __attribute__((packed)) xfs_efi_log_format_32_t; 66} __attribute__((packed)) xfs_efi_log_format_32_t;
70#endif
71 67
72typedef struct xfs_efi_log_format_64 { 68typedef struct xfs_efi_log_format_64 {
73 __uint16_t efi_type; /* efi log item type */ 69 __uint16_t efi_type; /* efi log item type */
@@ -90,7 +86,6 @@ typedef struct xfs_efd_log_format {
90 xfs_extent_t efd_extents[1]; /* array of extents freed */ 86 xfs_extent_t efd_extents[1]; /* array of extents freed */
91} xfs_efd_log_format_t; 87} xfs_efd_log_format_t;
92 88
93#ifndef HAVE_FORMAT32
94typedef struct xfs_efd_log_format_32 { 89typedef struct xfs_efd_log_format_32 {
95 __uint16_t efd_type; /* efd log item type */ 90 __uint16_t efd_type; /* efd log item type */
96 __uint16_t efd_size; /* size of this item */ 91 __uint16_t efd_size; /* size of this item */
@@ -98,7 +93,6 @@ typedef struct xfs_efd_log_format_32 {
98 __uint64_t efd_efi_id; /* id of corresponding efi */ 93 __uint64_t efd_efi_id; /* id of corresponding efi */
99 xfs_extent_32_t efd_extents[1]; /* array of extents freed */ 94 xfs_extent_32_t efd_extents[1]; /* array of extents freed */
100} __attribute__((packed)) xfs_efd_log_format_32_t; 95} __attribute__((packed)) xfs_efd_log_format_32_t;
101#endif
102 96
103typedef struct xfs_efd_log_format_64 { 97typedef struct xfs_efd_log_format_64 {
104 __uint16_t efd_type; /* efd log item type */ 98 __uint16_t efd_type; /* efd log item type */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index f3bb75da384e..6c87c8f304ef 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -140,7 +140,7 @@ _xfs_filestream_pick_ag(
140 xfs_extlen_t minlen) 140 xfs_extlen_t minlen)
141{ 141{
142 int err, trylock, nscan; 142 int err, trylock, nscan;
143 xfs_extlen_t delta, longest, need, free, minfree, maxfree = 0; 143 xfs_extlen_t longest, free, minfree, maxfree = 0;
144 xfs_agnumber_t ag, max_ag = NULLAGNUMBER; 144 xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
145 struct xfs_perag *pag; 145 struct xfs_perag *pag;
146 146
@@ -186,12 +186,7 @@ _xfs_filestream_pick_ag(
186 goto next_ag; 186 goto next_ag;
187 } 187 }
188 188
189 need = XFS_MIN_FREELIST_PAG(pag, mp); 189 longest = xfs_alloc_longest_free_extent(mp, pag);
190 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
191 longest = (pag->pagf_longest > delta) ?
192 (pag->pagf_longest - delta) :
193 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
194
195 if (((minlen && longest >= minlen) || 190 if (((minlen && longest >= minlen) ||
196 (!minlen && pag->pagf_freeblks >= minfree)) && 191 (!minlen && pag->pagf_freeblks >= minfree)) &&
197 (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || 192 (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 680d0e0ec932..8379e3bca26c 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -576,7 +576,7 @@ out:
576 if (fdblks_delta) { 576 if (fdblks_delta) {
577 /* 577 /*
578 * If we are putting blocks back here, m_resblks_avail is 578 * If we are putting blocks back here, m_resblks_avail is
579 * already at it's max so this will put it in the free pool. 579 * already at its max so this will put it in the free pool.
580 * 580 *
581 * If we need space, we'll either succeed in getting it 581 * If we need space, we'll either succeed in getting it
582 * from the free block count or we'll get an enospc. If 582 * from the free block count or we'll get an enospc. If
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index ab016e5ae7be..3120a3a5e20f 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -230,7 +230,7 @@ xfs_ialloc_ag_alloc(
230 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; 230 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
231 231
232 /* Allow space for the inode btree to split. */ 232 /* Allow space for the inode btree to split. */
233 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 233 args.minleft = args.mp->m_in_maxlevels - 1;
234 if ((error = xfs_alloc_vextent(&args))) 234 if ((error = xfs_alloc_vextent(&args)))
235 return error; 235 return error;
236 } else 236 } else
@@ -270,7 +270,7 @@ xfs_ialloc_ag_alloc(
270 /* 270 /*
271 * Allow space for the inode btree to split. 271 * Allow space for the inode btree to split.
272 */ 272 */
273 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 273 args.minleft = args.mp->m_in_maxlevels - 1;
274 if ((error = xfs_alloc_vextent(&args))) 274 if ((error = xfs_alloc_vextent(&args)))
275 return error; 275 return error;
276 } 276 }
@@ -349,7 +349,7 @@ xfs_ialloc_ag_alloc(
349 * Initialize all inodes in this buffer and then log them. 349 * Initialize all inodes in this buffer and then log them.
350 * 350 *
351 * XXX: It would be much better if we had just one transaction to 351 * XXX: It would be much better if we had just one transaction to
352 * log a whole cluster of inodes instead of all the indivdual 352 * log a whole cluster of inodes instead of all the individual
353 * transactions causing a lot of log traffic. 353 * transactions causing a lot of log traffic.
354 */ 354 */
355 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); 355 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
@@ -943,7 +943,7 @@ nextag:
943 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 943 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
944 XFS_INODES_PER_CHUNK) == 0); 944 XFS_INODES_PER_CHUNK) == 0);
945 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 945 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
946 XFS_INOBT_CLR_FREE(&rec, offset); 946 rec.ir_free &= ~XFS_INOBT_MASK(offset);
947 rec.ir_freecount--; 947 rec.ir_freecount--;
948 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, 948 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
949 rec.ir_free))) 949 rec.ir_free)))
@@ -1105,11 +1105,11 @@ xfs_difree(
1105 */ 1105 */
1106 off = agino - rec.ir_startino; 1106 off = agino - rec.ir_startino;
1107 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); 1107 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
1108 ASSERT(!XFS_INOBT_IS_FREE(&rec, off)); 1108 ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
1109 /* 1109 /*
1110 * Mark the inode free & increment the count. 1110 * Mark the inode free & increment the count.
1111 */ 1111 */
1112 XFS_INOBT_SET_FREE(&rec, off); 1112 rec.ir_free |= XFS_INOBT_MASK(off);
1113 rec.ir_freecount++; 1113 rec.ir_freecount++;
1114 1114
1115 /* 1115 /*
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 99f2408e8d8e..c282a9af5393 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -164,7 +164,7 @@ xfs_inobt_init_rec_from_cur(
164} 164}
165 165
166/* 166/*
167 * intial value of ptr for lookup 167 * initial value of ptr for lookup
168 */ 168 */
169STATIC void 169STATIC void
170xfs_inobt_init_ptr_from_cur( 170xfs_inobt_init_ptr_from_cur(
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 5580e255ff06..f782ad0c4769 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -32,14 +32,14 @@ struct xfs_mount;
32#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ 32#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
33 33
34typedef __uint64_t xfs_inofree_t; 34typedef __uint64_t xfs_inofree_t;
35#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) 35#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
36#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) 36#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3)
37#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) 37#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
38#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
38 39
39static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) 40static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
40{ 41{
41 return (((n) >= XFS_INODES_PER_CHUNK ? \ 42 return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
42 (xfs_inofree_t)0 : ((xfs_inofree_t)1 << (n))) - 1) << (i);
43} 43}
44 44
45/* 45/*
@@ -69,20 +69,6 @@ typedef struct xfs_inobt_key {
69typedef __be32 xfs_inobt_ptr_t; 69typedef __be32 xfs_inobt_ptr_t;
70 70
71/* 71/*
72 * Bit manipulations for ir_free.
73 */
74#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
75#define XFS_INOBT_IS_FREE(rp,i) \
76 (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
77#define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i))
78#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i))
79
80/*
81 * Maximum number of inode btree levels.
82 */
83#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels)
84
85/*
86 * block numbers in the AG. 72 * block numbers in the AG.
87 */ 73 */
88#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) 74#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1f175fa34b22..f879c1bc4b96 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -122,7 +122,7 @@ typedef struct xfs_ictimestamp {
122 122
123/* 123/*
124 * NOTE: This structure must be kept identical to struct xfs_dinode 124 * NOTE: This structure must be kept identical to struct xfs_dinode
125 * in xfs_dinode.h except for the endianess annotations. 125 * in xfs_dinode.h except for the endianness annotations.
126 */ 126 */
127typedef struct xfs_icdinode { 127typedef struct xfs_icdinode {
128 __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ 128 __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 9957d0602d54..a52ac125f055 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -40,7 +40,6 @@ typedef struct xfs_inode_log_format {
40 __int32_t ilf_boffset; /* off of inode in buffer */ 40 __int32_t ilf_boffset; /* off of inode in buffer */
41} xfs_inode_log_format_t; 41} xfs_inode_log_format_t;
42 42
43#ifndef HAVE_FORMAT32
44typedef struct xfs_inode_log_format_32 { 43typedef struct xfs_inode_log_format_32 {
45 __uint16_t ilf_type; /* inode log item type */ 44 __uint16_t ilf_type; /* inode log item type */
46 __uint16_t ilf_size; /* size of this item */ 45 __uint16_t ilf_size; /* size of this item */
@@ -56,7 +55,6 @@ typedef struct xfs_inode_log_format_32 {
56 __int32_t ilf_len; /* len of inode buffer */ 55 __int32_t ilf_len; /* len of inode buffer */
57 __int32_t ilf_boffset; /* off of inode in buffer */ 56 __int32_t ilf_boffset; /* off of inode in buffer */
58} __attribute__((packed)) xfs_inode_log_format_32_t; 57} __attribute__((packed)) xfs_inode_log_format_32_t;
59#endif
60 58
61typedef struct xfs_inode_log_format_64 { 59typedef struct xfs_inode_log_format_64 {
62 __uint16_t ilf_type; /* inode log item type */ 60 __uint16_t ilf_type; /* inode log item type */
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index ee1a0c134cc2..a1cc1322fc0f 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -63,7 +63,7 @@ typedef enum {
63 */ 63 */
64 64
65typedef struct xfs_iomap { 65typedef struct xfs_iomap {
66 xfs_daddr_t iomap_bn; /* first 512b blk of mapping */ 66 xfs_daddr_t iomap_bn; /* first 512B blk of mapping */
67 xfs_buftarg_t *iomap_target; 67 xfs_buftarg_t *iomap_target;
68 xfs_off_t iomap_offset; /* offset of mapping, bytes */ 68 xfs_off_t iomap_offset; /* offset of mapping, bytes */
69 xfs_off_t iomap_bsize; /* size of mapping, bytes */ 69 xfs_off_t iomap_bsize; /* size of mapping, bytes */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index cf98a805ec90..aeb2d2221c7d 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -83,7 +83,12 @@ xfs_bulkstat_one_iget(
83 buf->bs_uid = dic->di_uid; 83 buf->bs_uid = dic->di_uid;
84 buf->bs_gid = dic->di_gid; 84 buf->bs_gid = dic->di_gid;
85 buf->bs_size = dic->di_size; 85 buf->bs_size = dic->di_size;
86 vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime); 86 /*
87 * We are reading the atime from the Linux inode because the
88 * dinode might not be uptodate.
89 */
90 buf->bs_atime.tv_sec = VFS_I(ip)->i_atime.tv_sec;
91 buf->bs_atime.tv_nsec = VFS_I(ip)->i_atime.tv_nsec;
87 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 92 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
88 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 93 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
89 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; 94 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
@@ -579,7 +584,7 @@ xfs_bulkstat(
579 * first inode of the cluster. 584 * first inode of the cluster.
580 * 585 *
581 * Careful with clustidx. There can be 586 * Careful with clustidx. There can be
582 * multple clusters per chunk, a single 587 * multiple clusters per chunk, a single
583 * cluster per chunk or a cluster that has 588 * cluster per chunk or a cluster that has
584 * inodes represented from several different 589 * inodes represented from several different
585 * chunks (if blocksize is large). 590 * chunks (if blocksize is large).
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f4726f702a9e..f76c6d7cea21 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -574,7 +574,7 @@ xfs_log_mount(
574 error = xfs_trans_ail_init(mp); 574 error = xfs_trans_ail_init(mp);
575 if (error) { 575 if (error) {
576 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); 576 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error);
577 goto error; 577 goto out_free_log;
578 } 578 }
579 mp->m_log->l_ailp = mp->m_ail; 579 mp->m_log->l_ailp = mp->m_ail;
580 580
@@ -594,20 +594,22 @@ xfs_log_mount(
594 mp->m_flags |= XFS_MOUNT_RDONLY; 594 mp->m_flags |= XFS_MOUNT_RDONLY;
595 if (error) { 595 if (error) {
596 cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); 596 cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error);
597 goto error; 597 goto out_destroy_ail;
598 } 598 }
599 } 599 }
600 600
601 /* Normal transactions can now occur */ 601 /* Normal transactions can now occur */
602 mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; 602 mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
603 603
604 /* End mounting message in xfs_log_mount_finish */
605 return 0; 604 return 0;
606error: 605
607 xfs_log_unmount_dealloc(mp); 606out_destroy_ail:
607 xfs_trans_ail_destroy(mp);
608out_free_log:
609 xlog_dealloc_log(mp->m_log);
608out: 610out:
609 return error; 611 return error;
610} /* xfs_log_mount */ 612}
611 613
612/* 614/*
613 * Finish the recovery of the file system. This is separate from 615 * Finish the recovery of the file system. This is separate from
@@ -633,19 +635,6 @@ xfs_log_mount_finish(xfs_mount_t *mp)
633} 635}
634 636
635/* 637/*
636 * Unmount processing for the log.
637 */
638int
639xfs_log_unmount(xfs_mount_t *mp)
640{
641 int error;
642
643 error = xfs_log_unmount_write(mp);
644 xfs_log_unmount_dealloc(mp);
645 return error;
646}
647
648/*
649 * Final log writes as part of unmount. 638 * Final log writes as part of unmount.
650 * 639 *
651 * Mark the filesystem clean as unmount happens. Note that during relocation 640 * Mark the filesystem clean as unmount happens. Note that during relocation
@@ -795,7 +784,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
795 * and deallocate the log as the aild references the log. 784 * and deallocate the log as the aild references the log.
796 */ 785 */
797void 786void
798xfs_log_unmount_dealloc(xfs_mount_t *mp) 787xfs_log_unmount(xfs_mount_t *mp)
799{ 788{
800 xfs_trans_ail_destroy(mp); 789 xfs_trans_ail_destroy(mp);
801 xlog_dealloc_log(mp->m_log); 790 xlog_dealloc_log(mp->m_log);
@@ -1109,7 +1098,7 @@ xlog_bdstrat_cb(struct xfs_buf *bp)
1109/* 1098/*
1110 * Return size of each in-core log record buffer. 1099 * Return size of each in-core log record buffer.
1111 * 1100 *
1112 * All machines get 8 x 32KB buffers by default, unless tuned otherwise. 1101 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
1113 * 1102 *
1114 * If the filesystem blocksize is too large, we may need to choose a 1103 * If the filesystem blocksize is too large, we may need to choose a
1115 * larger size since the directory code currently logs entire blocks. 1104 * larger size since the directory code currently logs entire blocks.
@@ -1139,8 +1128,8 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp,
1139 } 1128 }
1140 1129
1141 if (xfs_sb_version_haslogv2(&mp->m_sb)) { 1130 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1142 /* # headers = size / 32K 1131 /* # headers = size / 32k
1143 * one header holds cycles from 32K of data 1132 * one header holds cycles from 32k of data
1144 */ 1133 */
1145 1134
1146 xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; 1135 xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
@@ -1156,7 +1145,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp,
1156 goto done; 1145 goto done;
1157 } 1146 }
1158 1147
1159 /* All machines use 32KB buffers by default. */ 1148 /* All machines use 32kB buffers by default. */
1160 log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; 1149 log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
1161 log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; 1150 log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
1162 1151
@@ -1164,32 +1153,8 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp,
1164 log->l_iclog_hsize = BBSIZE; 1153 log->l_iclog_hsize = BBSIZE;
1165 log->l_iclog_heads = 1; 1154 log->l_iclog_heads = 1;
1166 1155
1167 /* 1156done:
1168 * For 16KB, we use 3 32KB buffers. For 32KB block sizes, we use 1157 /* are we being asked to make the sizes selected above visible? */
1169 * 4 32KB buffers. For 64KB block sizes, we use 8 32KB buffers.
1170 */
1171 if (mp->m_sb.sb_blocksize >= 16*1024) {
1172 log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
1173 log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
1174 if (mp->m_logbufs <= 0) {
1175 switch (mp->m_sb.sb_blocksize) {
1176 case 16*1024: /* 16 KB */
1177 log->l_iclog_bufs = 3;
1178 break;
1179 case 32*1024: /* 32 KB */
1180 log->l_iclog_bufs = 4;
1181 break;
1182 case 64*1024: /* 64 KB */
1183 log->l_iclog_bufs = 8;
1184 break;
1185 default:
1186 xlog_panic("XFS: Invalid blocksize");
1187 break;
1188 }
1189 }
1190 }
1191
1192done: /* are we being asked to make the sizes selected above visible? */
1193 if (mp->m_logbufs == 0) 1158 if (mp->m_logbufs == 0)
1194 mp->m_logbufs = log->l_iclog_bufs; 1159 mp->m_logbufs = log->l_iclog_bufs;
1195 if (mp->m_logbsize == 0) 1160 if (mp->m_logbsize == 0)
@@ -3214,7 +3179,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3214 */ 3179 */
3215 3180
3216/* 3181/*
3217 * Free a used ticket when it's refcount falls to zero. 3182 * Free a used ticket when its refcount falls to zero.
3218 */ 3183 */
3219void 3184void
3220xfs_log_ticket_put( 3185xfs_log_ticket_put(
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 8a3e84e900a3..d0c9baa50b1a 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -170,9 +170,8 @@ int xfs_log_write(struct xfs_mount *mp,
170 int nentries, 170 int nentries,
171 xfs_log_ticket_t ticket, 171 xfs_log_ticket_t ticket,
172 xfs_lsn_t *start_lsn); 172 xfs_lsn_t *start_lsn);
173int xfs_log_unmount(struct xfs_mount *mp);
174int xfs_log_unmount_write(struct xfs_mount *mp); 173int xfs_log_unmount_write(struct xfs_mount *mp);
175void xfs_log_unmount_dealloc(struct xfs_mount *mp); 174void xfs_log_unmount(struct xfs_mount *mp);
176int xfs_log_force_umount(struct xfs_mount *mp, int logerror); 175int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
177int xfs_log_need_covered(struct xfs_mount *mp); 176int xfs_log_need_covered(struct xfs_mount *mp);
178 177
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 654167be0efb..bcad5f4c1fd1 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -359,7 +359,7 @@ typedef struct xlog_in_core {
359 int ic_size; 359 int ic_size;
360 int ic_offset; 360 int ic_offset;
361 int ic_bwritecnt; 361 int ic_bwritecnt;
362 ushort_t ic_state; 362 unsigned short ic_state;
363 char *ic_datap; /* pointer to iclog data */ 363 char *ic_datap; /* pointer to iclog data */
364#ifdef XFS_LOG_TRACE 364#ifdef XFS_LOG_TRACE
365 struct ktrace *ic_trace; 365 struct ktrace *ic_trace;
@@ -455,7 +455,6 @@ extern void xlog_recover_process_iunlinks(xlog_t *log);
455 455
456extern struct xfs_buf *xlog_get_bp(xlog_t *, int); 456extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
457extern void xlog_put_bp(struct xfs_buf *); 457extern void xlog_put_bp(struct xfs_buf *);
458extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
459 458
460extern kmem_zone_t *xfs_log_ticket_zone; 459extern kmem_zone_t *xfs_log_ticket_zone;
461 460
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 61af610d79b3..7ba450116d4f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -94,12 +94,30 @@ xlog_put_bp(
94 xfs_buf_free(bp); 94 xfs_buf_free(bp);
95} 95}
96 96
97STATIC xfs_caddr_t
98xlog_align(
99 xlog_t *log,
100 xfs_daddr_t blk_no,
101 int nbblks,
102 xfs_buf_t *bp)
103{
104 xfs_caddr_t ptr;
105
106 if (!log->l_sectbb_log)
107 return XFS_BUF_PTR(bp);
108
109 ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
110 ASSERT(XFS_BUF_SIZE(bp) >=
111 BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
112 return ptr;
113}
114
97 115
98/* 116/*
99 * nbblks should be uint, but oh well. Just want to catch that 32-bit length. 117 * nbblks should be uint, but oh well. Just want to catch that 32-bit length.
100 */ 118 */
101int 119STATIC int
102xlog_bread( 120xlog_bread_noalign(
103 xlog_t *log, 121 xlog_t *log,
104 xfs_daddr_t blk_no, 122 xfs_daddr_t blk_no,
105 int nbblks, 123 int nbblks,
@@ -137,6 +155,24 @@ xlog_bread(
137 return error; 155 return error;
138} 156}
139 157
158STATIC int
159xlog_bread(
160 xlog_t *log,
161 xfs_daddr_t blk_no,
162 int nbblks,
163 xfs_buf_t *bp,
164 xfs_caddr_t *offset)
165{
166 int error;
167
168 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
169 if (error)
170 return error;
171
172 *offset = xlog_align(log, blk_no, nbblks, bp);
173 return 0;
174}
175
140/* 176/*
141 * Write out the buffer at the given block for the given number of blocks. 177 * Write out the buffer at the given block for the given number of blocks.
142 * The buffer is kept locked across the write and is returned locked. 178 * The buffer is kept locked across the write and is returned locked.
@@ -180,24 +216,6 @@ xlog_bwrite(
180 return error; 216 return error;
181} 217}
182 218
183STATIC xfs_caddr_t
184xlog_align(
185 xlog_t *log,
186 xfs_daddr_t blk_no,
187 int nbblks,
188 xfs_buf_t *bp)
189{
190 xfs_caddr_t ptr;
191
192 if (!log->l_sectbb_log)
193 return XFS_BUF_PTR(bp);
194
195 ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
196 ASSERT(XFS_BUF_SIZE(bp) >=
197 BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
198 return ptr;
199}
200
201#ifdef DEBUG 219#ifdef DEBUG
202/* 220/*
203 * dump debug superblock and log record information 221 * dump debug superblock and log record information
@@ -211,11 +229,11 @@ xlog_header_check_dump(
211 229
212 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); 230 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__);
213 for (b = 0; b < 16; b++) 231 for (b = 0; b < 16; b++)
214 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); 232 cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&mp->m_sb.sb_uuid)[b]);
215 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); 233 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
216 cmn_err(CE_DEBUG, " log : uuid = "); 234 cmn_err(CE_DEBUG, " log : uuid = ");
217 for (b = 0; b < 16; b++) 235 for (b = 0; b < 16; b++)
218 cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); 236 cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&head->h_fs_uuid)[b]);
219 cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); 237 cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
220} 238}
221#else 239#else
@@ -321,9 +339,9 @@ xlog_find_cycle_start(
321 339
322 mid_blk = BLK_AVG(first_blk, *last_blk); 340 mid_blk = BLK_AVG(first_blk, *last_blk);
323 while (mid_blk != first_blk && mid_blk != *last_blk) { 341 while (mid_blk != first_blk && mid_blk != *last_blk) {
324 if ((error = xlog_bread(log, mid_blk, 1, bp))) 342 error = xlog_bread(log, mid_blk, 1, bp, &offset);
343 if (error)
325 return error; 344 return error;
326 offset = xlog_align(log, mid_blk, 1, bp);
327 mid_cycle = xlog_get_cycle(offset); 345 mid_cycle = xlog_get_cycle(offset);
328 if (mid_cycle == cycle) { 346 if (mid_cycle == cycle) {
329 *last_blk = mid_blk; 347 *last_blk = mid_blk;
@@ -379,10 +397,10 @@ xlog_find_verify_cycle(
379 397
380 bcount = min(bufblks, (start_blk + nbblks - i)); 398 bcount = min(bufblks, (start_blk + nbblks - i));
381 399
382 if ((error = xlog_bread(log, i, bcount, bp))) 400 error = xlog_bread(log, i, bcount, bp, &buf);
401 if (error)
383 goto out; 402 goto out;
384 403
385 buf = xlog_align(log, i, bcount, bp);
386 for (j = 0; j < bcount; j++) { 404 for (j = 0; j < bcount; j++) {
387 cycle = xlog_get_cycle(buf); 405 cycle = xlog_get_cycle(buf);
388 if (cycle == stop_on_cycle_no) { 406 if (cycle == stop_on_cycle_no) {
@@ -436,9 +454,9 @@ xlog_find_verify_log_record(
436 return ENOMEM; 454 return ENOMEM;
437 smallmem = 1; 455 smallmem = 1;
438 } else { 456 } else {
439 if ((error = xlog_bread(log, start_blk, num_blks, bp))) 457 error = xlog_bread(log, start_blk, num_blks, bp, &offset);
458 if (error)
440 goto out; 459 goto out;
441 offset = xlog_align(log, start_blk, num_blks, bp);
442 offset += ((num_blks - 1) << BBSHIFT); 460 offset += ((num_blks - 1) << BBSHIFT);
443 } 461 }
444 462
@@ -453,9 +471,9 @@ xlog_find_verify_log_record(
453 } 471 }
454 472
455 if (smallmem) { 473 if (smallmem) {
456 if ((error = xlog_bread(log, i, 1, bp))) 474 error = xlog_bread(log, i, 1, bp, &offset);
475 if (error)
457 goto out; 476 goto out;
458 offset = xlog_align(log, i, 1, bp);
459 } 477 }
460 478
461 head = (xlog_rec_header_t *)offset; 479 head = (xlog_rec_header_t *)offset;
@@ -559,15 +577,18 @@ xlog_find_head(
559 bp = xlog_get_bp(log, 1); 577 bp = xlog_get_bp(log, 1);
560 if (!bp) 578 if (!bp)
561 return ENOMEM; 579 return ENOMEM;
562 if ((error = xlog_bread(log, 0, 1, bp))) 580
581 error = xlog_bread(log, 0, 1, bp, &offset);
582 if (error)
563 goto bp_err; 583 goto bp_err;
564 offset = xlog_align(log, 0, 1, bp); 584
565 first_half_cycle = xlog_get_cycle(offset); 585 first_half_cycle = xlog_get_cycle(offset);
566 586
567 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 587 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
568 if ((error = xlog_bread(log, last_blk, 1, bp))) 588 error = xlog_bread(log, last_blk, 1, bp, &offset);
589 if (error)
569 goto bp_err; 590 goto bp_err;
570 offset = xlog_align(log, last_blk, 1, bp); 591
571 last_half_cycle = xlog_get_cycle(offset); 592 last_half_cycle = xlog_get_cycle(offset);
572 ASSERT(last_half_cycle != 0); 593 ASSERT(last_half_cycle != 0);
573 594
@@ -817,9 +838,10 @@ xlog_find_tail(
817 if (!bp) 838 if (!bp)
818 return ENOMEM; 839 return ENOMEM;
819 if (*head_blk == 0) { /* special case */ 840 if (*head_blk == 0) { /* special case */
820 if ((error = xlog_bread(log, 0, 1, bp))) 841 error = xlog_bread(log, 0, 1, bp, &offset);
842 if (error)
821 goto bread_err; 843 goto bread_err;
822 offset = xlog_align(log, 0, 1, bp); 844
823 if (xlog_get_cycle(offset) == 0) { 845 if (xlog_get_cycle(offset) == 0) {
824 *tail_blk = 0; 846 *tail_blk = 0;
825 /* leave all other log inited values alone */ 847 /* leave all other log inited values alone */
@@ -832,9 +854,10 @@ xlog_find_tail(
832 */ 854 */
833 ASSERT(*head_blk < INT_MAX); 855 ASSERT(*head_blk < INT_MAX);
834 for (i = (int)(*head_blk) - 1; i >= 0; i--) { 856 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
835 if ((error = xlog_bread(log, i, 1, bp))) 857 error = xlog_bread(log, i, 1, bp, &offset);
858 if (error)
836 goto bread_err; 859 goto bread_err;
837 offset = xlog_align(log, i, 1, bp); 860
838 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { 861 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
839 found = 1; 862 found = 1;
840 break; 863 break;
@@ -848,9 +871,10 @@ xlog_find_tail(
848 */ 871 */
849 if (!found) { 872 if (!found) {
850 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { 873 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
851 if ((error = xlog_bread(log, i, 1, bp))) 874 error = xlog_bread(log, i, 1, bp, &offset);
875 if (error)
852 goto bread_err; 876 goto bread_err;
853 offset = xlog_align(log, i, 1, bp); 877
854 if (XLOG_HEADER_MAGIC_NUM == 878 if (XLOG_HEADER_MAGIC_NUM ==
855 be32_to_cpu(*(__be32 *)offset)) { 879 be32_to_cpu(*(__be32 *)offset)) {
856 found = 2; 880 found = 2;
@@ -922,10 +946,10 @@ xlog_find_tail(
922 if (*head_blk == after_umount_blk && 946 if (*head_blk == after_umount_blk &&
923 be32_to_cpu(rhead->h_num_logops) == 1) { 947 be32_to_cpu(rhead->h_num_logops) == 1) {
924 umount_data_blk = (i + hblks) % log->l_logBBsize; 948 umount_data_blk = (i + hblks) % log->l_logBBsize;
925 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { 949 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
950 if (error)
926 goto bread_err; 951 goto bread_err;
927 } 952
928 offset = xlog_align(log, umount_data_blk, 1, bp);
929 op_head = (xlog_op_header_t *)offset; 953 op_head = (xlog_op_header_t *)offset;
930 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 954 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
931 /* 955 /*
@@ -1017,9 +1041,10 @@ xlog_find_zeroed(
1017 bp = xlog_get_bp(log, 1); 1041 bp = xlog_get_bp(log, 1);
1018 if (!bp) 1042 if (!bp)
1019 return ENOMEM; 1043 return ENOMEM;
1020 if ((error = xlog_bread(log, 0, 1, bp))) 1044 error = xlog_bread(log, 0, 1, bp, &offset);
1045 if (error)
1021 goto bp_err; 1046 goto bp_err;
1022 offset = xlog_align(log, 0, 1, bp); 1047
1023 first_cycle = xlog_get_cycle(offset); 1048 first_cycle = xlog_get_cycle(offset);
1024 if (first_cycle == 0) { /* completely zeroed log */ 1049 if (first_cycle == 0) { /* completely zeroed log */
1025 *blk_no = 0; 1050 *blk_no = 0;
@@ -1028,9 +1053,10 @@ xlog_find_zeroed(
1028 } 1053 }
1029 1054
1030 /* check partially zeroed log */ 1055 /* check partially zeroed log */
1031 if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) 1056 error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
1057 if (error)
1032 goto bp_err; 1058 goto bp_err;
1033 offset = xlog_align(log, log_bbnum-1, 1, bp); 1059
1034 last_cycle = xlog_get_cycle(offset); 1060 last_cycle = xlog_get_cycle(offset);
1035 if (last_cycle != 0) { /* log completely written to */ 1061 if (last_cycle != 0) { /* log completely written to */
1036 xlog_put_bp(bp); 1062 xlog_put_bp(bp);
@@ -1152,10 +1178,10 @@ xlog_write_log_records(
1152 */ 1178 */
1153 balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); 1179 balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block);
1154 if (balign != start_block) { 1180 if (balign != start_block) {
1155 if ((error = xlog_bread(log, start_block, 1, bp))) { 1181 error = xlog_bread_noalign(log, start_block, 1, bp);
1156 xlog_put_bp(bp); 1182 if (error)
1157 return error; 1183 goto out_put_bp;
1158 } 1184
1159 j = start_block - balign; 1185 j = start_block - balign;
1160 } 1186 }
1161 1187
@@ -1175,10 +1201,14 @@ xlog_write_log_records(
1175 balign = BBTOB(ealign - start_block); 1201 balign = BBTOB(ealign - start_block);
1176 error = XFS_BUF_SET_PTR(bp, offset + balign, 1202 error = XFS_BUF_SET_PTR(bp, offset + balign,
1177 BBTOB(sectbb)); 1203 BBTOB(sectbb));
1178 if (!error) 1204 if (error)
1179 error = xlog_bread(log, ealign, sectbb, bp); 1205 break;
1180 if (!error) 1206
1181 error = XFS_BUF_SET_PTR(bp, offset, bufblks); 1207 error = xlog_bread_noalign(log, ealign, sectbb, bp);
1208 if (error)
1209 break;
1210
1211 error = XFS_BUF_SET_PTR(bp, offset, bufblks);
1182 if (error) 1212 if (error)
1183 break; 1213 break;
1184 } 1214 }
@@ -1195,6 +1225,8 @@ xlog_write_log_records(
1195 start_block += endcount; 1225 start_block += endcount;
1196 j = 0; 1226 j = 0;
1197 } 1227 }
1228
1229 out_put_bp:
1198 xlog_put_bp(bp); 1230 xlog_put_bp(bp);
1199 return error; 1231 return error;
1200} 1232}
@@ -2511,16 +2543,10 @@ xlog_recover_do_inode_trans(
2511 } 2543 }
2512 2544
2513write_inode_buffer: 2545write_inode_buffer:
2514 if (ITEM_TYPE(item) == XFS_LI_INODE) { 2546 ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
2515 ASSERT(bp->b_mount == NULL || bp->b_mount == mp); 2547 bp->b_mount = mp;
2516 bp->b_mount = mp; 2548 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2517 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2549 xfs_bdwrite(mp, bp);
2518 xfs_bdwrite(mp, bp);
2519 } else {
2520 XFS_BUF_STALE(bp);
2521 error = xfs_bwrite(mp, bp);
2522 }
2523
2524error: 2550error:
2525 if (need_free) 2551 if (need_free)
2526 kmem_free(in_f); 2552 kmem_free(in_f);
@@ -2769,51 +2795,48 @@ xlog_recover_do_trans(
2769 int error = 0; 2795 int error = 0;
2770 xlog_recover_item_t *item, *first_item; 2796 xlog_recover_item_t *item, *first_item;
2771 2797
2772 if ((error = xlog_recover_reorder_trans(trans))) 2798 error = xlog_recover_reorder_trans(trans);
2799 if (error)
2773 return error; 2800 return error;
2801
2774 first_item = item = trans->r_itemq; 2802 first_item = item = trans->r_itemq;
2775 do { 2803 do {
2776 /* 2804 switch (ITEM_TYPE(item)) {
2777 * we don't need to worry about the block number being 2805 case XFS_LI_BUF:
2778 * truncated in > 1 TB buffers because in user-land, 2806 error = xlog_recover_do_buffer_trans(log, item, pass);
2779 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so 2807 break;
2780 * the blknos will get through the user-mode buffer 2808 case XFS_LI_INODE:
2781 * cache properly. The only bad case is o32 kernels 2809 error = xlog_recover_do_inode_trans(log, item, pass);
2782 * where xfs_daddr_t is 32-bits but mount will warn us 2810 break;
2783 * off a > 1 TB filesystem before we get here. 2811 case XFS_LI_EFI:
2784 */ 2812 error = xlog_recover_do_efi_trans(log, item,
2785 if ((ITEM_TYPE(item) == XFS_LI_BUF)) { 2813 trans->r_lsn, pass);
2786 if ((error = xlog_recover_do_buffer_trans(log, item, 2814 break;
2787 pass))) 2815 case XFS_LI_EFD:
2788 break;
2789 } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) {
2790 if ((error = xlog_recover_do_inode_trans(log, item,
2791 pass)))
2792 break;
2793 } else if (ITEM_TYPE(item) == XFS_LI_EFI) {
2794 if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn,
2795 pass)))
2796 break;
2797 } else if (ITEM_TYPE(item) == XFS_LI_EFD) {
2798 xlog_recover_do_efd_trans(log, item, pass); 2816 xlog_recover_do_efd_trans(log, item, pass);
2799 } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { 2817 error = 0;
2800 if ((error = xlog_recover_do_dquot_trans(log, item, 2818 break;
2801 pass))) 2819 case XFS_LI_DQUOT:
2802 break; 2820 error = xlog_recover_do_dquot_trans(log, item, pass);
2803 } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { 2821 break;
2804 if ((error = xlog_recover_do_quotaoff_trans(log, item, 2822 case XFS_LI_QUOTAOFF:
2805 pass))) 2823 error = xlog_recover_do_quotaoff_trans(log, item,
2806 break; 2824 pass);
2807 } else { 2825 break;
2808 xlog_warn("XFS: xlog_recover_do_trans"); 2826 default:
2827 xlog_warn(
2828 "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
2809 ASSERT(0); 2829 ASSERT(0);
2810 error = XFS_ERROR(EIO); 2830 error = XFS_ERROR(EIO);
2811 break; 2831 break;
2812 } 2832 }
2833
2834 if (error)
2835 return error;
2813 item = item->ri_next; 2836 item = item->ri_next;
2814 } while (first_item != item); 2837 } while (first_item != item);
2815 2838
2816 return error; 2839 return 0;
2817} 2840}
2818 2841
2819/* 2842/*
@@ -3490,9 +3513,11 @@ xlog_do_recovery_pass(
3490 hbp = xlog_get_bp(log, 1); 3513 hbp = xlog_get_bp(log, 1);
3491 if (!hbp) 3514 if (!hbp)
3492 return ENOMEM; 3515 return ENOMEM;
3493 if ((error = xlog_bread(log, tail_blk, 1, hbp))) 3516
3517 error = xlog_bread(log, tail_blk, 1, hbp, &offset);
3518 if (error)
3494 goto bread_err1; 3519 goto bread_err1;
3495 offset = xlog_align(log, tail_blk, 1, hbp); 3520
3496 rhead = (xlog_rec_header_t *)offset; 3521 rhead = (xlog_rec_header_t *)offset;
3497 error = xlog_valid_rec_header(log, rhead, tail_blk); 3522 error = xlog_valid_rec_header(log, rhead, tail_blk);
3498 if (error) 3523 if (error)
@@ -3526,9 +3551,10 @@ xlog_do_recovery_pass(
3526 memset(rhash, 0, sizeof(rhash)); 3551 memset(rhash, 0, sizeof(rhash));
3527 if (tail_blk <= head_blk) { 3552 if (tail_blk <= head_blk) {
3528 for (blk_no = tail_blk; blk_no < head_blk; ) { 3553 for (blk_no = tail_blk; blk_no < head_blk; ) {
3529 if ((error = xlog_bread(log, blk_no, hblks, hbp))) 3554 error = xlog_bread(log, blk_no, hblks, hbp, &offset);
3555 if (error)
3530 goto bread_err2; 3556 goto bread_err2;
3531 offset = xlog_align(log, blk_no, hblks, hbp); 3557
3532 rhead = (xlog_rec_header_t *)offset; 3558 rhead = (xlog_rec_header_t *)offset;
3533 error = xlog_valid_rec_header(log, rhead, blk_no); 3559 error = xlog_valid_rec_header(log, rhead, blk_no);
3534 if (error) 3560 if (error)
@@ -3536,10 +3562,11 @@ xlog_do_recovery_pass(
3536 3562
3537 /* blocks in data section */ 3563 /* blocks in data section */
3538 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3564 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
3539 error = xlog_bread(log, blk_no + hblks, bblks, dbp); 3565 error = xlog_bread(log, blk_no + hblks, bblks, dbp,
3566 &offset);
3540 if (error) 3567 if (error)
3541 goto bread_err2; 3568 goto bread_err2;
3542 offset = xlog_align(log, blk_no + hblks, bblks, dbp); 3569
3543 xlog_unpack_data(rhead, offset, log); 3570 xlog_unpack_data(rhead, offset, log);
3544 if ((error = xlog_recover_process_data(log, 3571 if ((error = xlog_recover_process_data(log,
3545 rhash, rhead, offset, pass))) 3572 rhash, rhead, offset, pass)))
@@ -3562,10 +3589,10 @@ xlog_do_recovery_pass(
3562 wrapped_hblks = 0; 3589 wrapped_hblks = 0;
3563 if (blk_no + hblks <= log->l_logBBsize) { 3590 if (blk_no + hblks <= log->l_logBBsize) {
3564 /* Read header in one read */ 3591 /* Read header in one read */
3565 error = xlog_bread(log, blk_no, hblks, hbp); 3592 error = xlog_bread(log, blk_no, hblks, hbp,
3593 &offset);
3566 if (error) 3594 if (error)
3567 goto bread_err2; 3595 goto bread_err2;
3568 offset = xlog_align(log, blk_no, hblks, hbp);
3569 } else { 3596 } else {
3570 /* This LR is split across physical log end */ 3597 /* This LR is split across physical log end */
3571 if (blk_no != log->l_logBBsize) { 3598 if (blk_no != log->l_logBBsize) {
@@ -3573,12 +3600,13 @@ xlog_do_recovery_pass(
3573 ASSERT(blk_no <= INT_MAX); 3600 ASSERT(blk_no <= INT_MAX);
3574 split_hblks = log->l_logBBsize - (int)blk_no; 3601 split_hblks = log->l_logBBsize - (int)blk_no;
3575 ASSERT(split_hblks > 0); 3602 ASSERT(split_hblks > 0);
3576 if ((error = xlog_bread(log, blk_no, 3603 error = xlog_bread(log, blk_no,
3577 split_hblks, hbp))) 3604 split_hblks, hbp,
3605 &offset);
3606 if (error)
3578 goto bread_err2; 3607 goto bread_err2;
3579 offset = xlog_align(log, blk_no,
3580 split_hblks, hbp);
3581 } 3608 }
3609
3582 /* 3610 /*
3583 * Note: this black magic still works with 3611 * Note: this black magic still works with
3584 * large sector sizes (non-512) only because: 3612 * large sector sizes (non-512) only because:
@@ -3596,14 +3624,19 @@ xlog_do_recovery_pass(
3596 error = XFS_BUF_SET_PTR(hbp, 3624 error = XFS_BUF_SET_PTR(hbp,
3597 bufaddr + BBTOB(split_hblks), 3625 bufaddr + BBTOB(split_hblks),
3598 BBTOB(hblks - split_hblks)); 3626 BBTOB(hblks - split_hblks));
3599 if (!error) 3627 if (error)
3600 error = xlog_bread(log, 0, 3628 goto bread_err2;
3601 wrapped_hblks, hbp); 3629
3602 if (!error) 3630 error = xlog_bread_noalign(log, 0,
3603 error = XFS_BUF_SET_PTR(hbp, bufaddr, 3631 wrapped_hblks, hbp);
3632 if (error)
3633 goto bread_err2;
3634
3635 error = XFS_BUF_SET_PTR(hbp, bufaddr,
3604 BBTOB(hblks)); 3636 BBTOB(hblks));
3605 if (error) 3637 if (error)
3606 goto bread_err2; 3638 goto bread_err2;
3639
3607 if (!offset) 3640 if (!offset)
3608 offset = xlog_align(log, 0, 3641 offset = xlog_align(log, 0,
3609 wrapped_hblks, hbp); 3642 wrapped_hblks, hbp);
@@ -3619,10 +3652,10 @@ xlog_do_recovery_pass(
3619 3652
3620 /* Read in data for log record */ 3653 /* Read in data for log record */
3621 if (blk_no + bblks <= log->l_logBBsize) { 3654 if (blk_no + bblks <= log->l_logBBsize) {
3622 error = xlog_bread(log, blk_no, bblks, dbp); 3655 error = xlog_bread(log, blk_no, bblks, dbp,
3656 &offset);
3623 if (error) 3657 if (error)
3624 goto bread_err2; 3658 goto bread_err2;
3625 offset = xlog_align(log, blk_no, bblks, dbp);
3626 } else { 3659 } else {
3627 /* This log record is split across the 3660 /* This log record is split across the
3628 * physical end of log */ 3661 * physical end of log */
@@ -3636,12 +3669,13 @@ xlog_do_recovery_pass(
3636 split_bblks = 3669 split_bblks =
3637 log->l_logBBsize - (int)blk_no; 3670 log->l_logBBsize - (int)blk_no;
3638 ASSERT(split_bblks > 0); 3671 ASSERT(split_bblks > 0);
3639 if ((error = xlog_bread(log, blk_no, 3672 error = xlog_bread(log, blk_no,
3640 split_bblks, dbp))) 3673 split_bblks, dbp,
3674 &offset);
3675 if (error)
3641 goto bread_err2; 3676 goto bread_err2;
3642 offset = xlog_align(log, blk_no,
3643 split_bblks, dbp);
3644 } 3677 }
3678
3645 /* 3679 /*
3646 * Note: this black magic still works with 3680 * Note: this black magic still works with
3647 * large sector sizes (non-512) only because: 3681 * large sector sizes (non-512) only because:
@@ -3658,15 +3692,19 @@ xlog_do_recovery_pass(
3658 error = XFS_BUF_SET_PTR(dbp, 3692 error = XFS_BUF_SET_PTR(dbp,
3659 bufaddr + BBTOB(split_bblks), 3693 bufaddr + BBTOB(split_bblks),
3660 BBTOB(bblks - split_bblks)); 3694 BBTOB(bblks - split_bblks));
3661 if (!error)
3662 error = xlog_bread(log, wrapped_hblks,
3663 bblks - split_bblks,
3664 dbp);
3665 if (!error)
3666 error = XFS_BUF_SET_PTR(dbp, bufaddr,
3667 h_size);
3668 if (error) 3695 if (error)
3669 goto bread_err2; 3696 goto bread_err2;
3697
3698 error = xlog_bread_noalign(log, wrapped_hblks,
3699 bblks - split_bblks,
3700 dbp);
3701 if (error)
3702 goto bread_err2;
3703
3704 error = XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
3705 if (error)
3706 goto bread_err2;
3707
3670 if (!offset) 3708 if (!offset)
3671 offset = xlog_align(log, wrapped_hblks, 3709 offset = xlog_align(log, wrapped_hblks,
3672 bblks - split_bblks, dbp); 3710 bblks - split_bblks, dbp);
@@ -3683,17 +3721,21 @@ xlog_do_recovery_pass(
3683 3721
3684 /* read first part of physical log */ 3722 /* read first part of physical log */
3685 while (blk_no < head_blk) { 3723 while (blk_no < head_blk) {
3686 if ((error = xlog_bread(log, blk_no, hblks, hbp))) 3724 error = xlog_bread(log, blk_no, hblks, hbp, &offset);
3725 if (error)
3687 goto bread_err2; 3726 goto bread_err2;
3688 offset = xlog_align(log, blk_no, hblks, hbp); 3727
3689 rhead = (xlog_rec_header_t *)offset; 3728 rhead = (xlog_rec_header_t *)offset;
3690 error = xlog_valid_rec_header(log, rhead, blk_no); 3729 error = xlog_valid_rec_header(log, rhead, blk_no);
3691 if (error) 3730 if (error)
3692 goto bread_err2; 3731 goto bread_err2;
3732
3693 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3733 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
3694 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) 3734 error = xlog_bread(log, blk_no+hblks, bblks, dbp,
3735 &offset);
3736 if (error)
3695 goto bread_err2; 3737 goto bread_err2;
3696 offset = xlog_align(log, blk_no+hblks, bblks, dbp); 3738
3697 xlog_unpack_data(rhead, offset, log); 3739 xlog_unpack_data(rhead, offset, log);
3698 if ((error = xlog_recover_process_data(log, rhash, 3740 if ((error = xlog_recover_process_data(log, rhash,
3699 rhead, offset, pass))) 3741 rhead, offset, pass)))
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 35300250e86d..b101990df027 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -45,7 +45,6 @@
45#include "xfs_fsops.h" 45#include "xfs_fsops.h"
46#include "xfs_utils.h" 46#include "xfs_utils.h"
47 47
48STATIC int xfs_uuid_mount(xfs_mount_t *);
49STATIC void xfs_unmountfs_wait(xfs_mount_t *); 48STATIC void xfs_unmountfs_wait(xfs_mount_t *);
50 49
51 50
@@ -121,6 +120,84 @@ static const struct {
121 { sizeof(xfs_sb_t), 0 } 120 { sizeof(xfs_sb_t), 0 }
122}; 121};
123 122
123static DEFINE_MUTEX(xfs_uuid_table_mutex);
124static int xfs_uuid_table_size;
125static uuid_t *xfs_uuid_table;
126
127/*
128 * See if the UUID is unique among mounted XFS filesystems.
129 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
130 */
131STATIC int
132xfs_uuid_mount(
133 struct xfs_mount *mp)
134{
135 uuid_t *uuid = &mp->m_sb.sb_uuid;
136 int hole, i;
137
138 if (mp->m_flags & XFS_MOUNT_NOUUID)
139 return 0;
140
141 if (uuid_is_nil(uuid)) {
142 cmn_err(CE_WARN,
143 "XFS: Filesystem %s has nil UUID - can't mount",
144 mp->m_fsname);
145 return XFS_ERROR(EINVAL);
146 }
147
148 mutex_lock(&xfs_uuid_table_mutex);
149 for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
150 if (uuid_is_nil(&xfs_uuid_table[i])) {
151 hole = i;
152 continue;
153 }
154 if (uuid_equal(uuid, &xfs_uuid_table[i]))
155 goto out_duplicate;
156 }
157
158 if (hole < 0) {
159 xfs_uuid_table = kmem_realloc(xfs_uuid_table,
160 (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
161 xfs_uuid_table_size * sizeof(*xfs_uuid_table),
162 KM_SLEEP);
163 hole = xfs_uuid_table_size++;
164 }
165 xfs_uuid_table[hole] = *uuid;
166 mutex_unlock(&xfs_uuid_table_mutex);
167
168 return 0;
169
170 out_duplicate:
171 mutex_unlock(&xfs_uuid_table_mutex);
172 cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount",
173 mp->m_fsname);
174 return XFS_ERROR(EINVAL);
175}
176
177STATIC void
178xfs_uuid_unmount(
179 struct xfs_mount *mp)
180{
181 uuid_t *uuid = &mp->m_sb.sb_uuid;
182 int i;
183
184 if (mp->m_flags & XFS_MOUNT_NOUUID)
185 return;
186
187 mutex_lock(&xfs_uuid_table_mutex);
188 for (i = 0; i < xfs_uuid_table_size; i++) {
189 if (uuid_is_nil(&xfs_uuid_table[i]))
190 continue;
191 if (!uuid_equal(uuid, &xfs_uuid_table[i]))
192 continue;
193 memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
194 break;
195 }
196 ASSERT(i < xfs_uuid_table_size);
197 mutex_unlock(&xfs_uuid_table_mutex);
198}
199
200
124/* 201/*
125 * Free up the resources associated with a mount structure. Assume that 202 * Free up the resources associated with a mount structure. Assume that
126 * the structure was initially zeroed, so we can tell which fields got 203 * the structure was initially zeroed, so we can tell which fields got
@@ -256,6 +333,22 @@ xfs_mount_validate_sb(
256 return XFS_ERROR(ENOSYS); 333 return XFS_ERROR(ENOSYS);
257 } 334 }
258 335
336 /*
337 * Currently only very few inode sizes are supported.
338 */
339 switch (sbp->sb_inodesize) {
340 case 256:
341 case 512:
342 case 1024:
343 case 2048:
344 break;
345 default:
346 xfs_fs_mount_cmn_err(flags,
347 "inode size of %d bytes not supported",
348 sbp->sb_inodesize);
349 return XFS_ERROR(ENOSYS);
350 }
351
259 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 352 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
260 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 353 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
261 xfs_fs_mount_cmn_err(flags, 354 xfs_fs_mount_cmn_err(flags,
@@ -574,32 +667,10 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
574 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 667 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
575 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 668 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
576 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 669 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
577 mp->m_litino = sbp->sb_inodesize - sizeof(struct xfs_dinode);
578 mp->m_blockmask = sbp->sb_blocksize - 1; 670 mp->m_blockmask = sbp->sb_blocksize - 1;
579 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 671 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
580 mp->m_blockwmask = mp->m_blockwsize - 1; 672 mp->m_blockwmask = mp->m_blockwsize - 1;
581 673
582 /*
583 * Setup for attributes, in case they get created.
584 * This value is for inodes getting attributes for the first time,
585 * the per-inode value is for old attribute values.
586 */
587 ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
588 switch (sbp->sb_inodesize) {
589 case 256:
590 mp->m_attroffset = XFS_LITINO(mp) -
591 XFS_BMDR_SPACE_CALC(MINABTPTRS);
592 break;
593 case 512:
594 case 1024:
595 case 2048:
596 mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
597 break;
598 default:
599 ASSERT(0);
600 }
601 ASSERT(mp->m_attroffset < XFS_LITINO(mp));
602
603 mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); 674 mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
604 mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); 675 mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
605 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; 676 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
@@ -645,7 +716,7 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
645 for (index = 0; index < agcount; index++) { 716 for (index = 0; index < agcount; index++) {
646 /* 717 /*
647 * read the agf, then the agi. This gets us 718 * read the agf, then the agi. This gets us
648 * all the inforamtion we need and populates the 719 * all the information we need and populates the
649 * per-ag structures for us. 720 * per-ag structures for us.
650 */ 721 */
651 error = xfs_alloc_pagf_init(mp, NULL, index, 0); 722 error = xfs_alloc_pagf_init(mp, NULL, index, 0);
@@ -886,8 +957,6 @@ xfs_check_sizes(xfs_mount_t *mp)
886} 957}
887 958
888/* 959/*
889 * xfs_mountfs
890 *
891 * This function does the following on an initial mount of a file system: 960 * This function does the following on an initial mount of a file system:
892 * - reads the superblock from disk and init the mount struct 961 * - reads the superblock from disk and init the mount struct
893 * - if we're a 32-bit kernel, do a size check on the superblock 962 * - if we're a 32-bit kernel, do a size check on the superblock
@@ -905,7 +974,6 @@ xfs_mountfs(
905 xfs_inode_t *rip; 974 xfs_inode_t *rip;
906 __uint64_t resblks; 975 __uint64_t resblks;
907 uint quotamount, quotaflags; 976 uint quotamount, quotaflags;
908 int uuid_mounted = 0;
909 int error = 0; 977 int error = 0;
910 978
911 xfs_mount_common(mp, sbp); 979 xfs_mount_common(mp, sbp);
@@ -960,7 +1028,7 @@ xfs_mountfs(
960 */ 1028 */
961 error = xfs_update_alignment(mp); 1029 error = xfs_update_alignment(mp);
962 if (error) 1030 if (error)
963 goto error1; 1031 goto out;
964 1032
965 xfs_alloc_compute_maxlevels(mp); 1033 xfs_alloc_compute_maxlevels(mp);
966 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 1034 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
@@ -971,19 +1039,9 @@ xfs_mountfs(
971 1039
972 mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); 1040 mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
973 1041
974 /* 1042 error = xfs_uuid_mount(mp);
975 * XFS uses the uuid from the superblock as the unique 1043 if (error)
976 * identifier for fsid. We can not use the uuid from the volume 1044 goto out;
977 * since a single partition filesystem is identical to a single
978 * partition volume/filesystem.
979 */
980 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
981 if (xfs_uuid_mount(mp)) {
982 error = XFS_ERROR(EINVAL);
983 goto error1;
984 }
985 uuid_mounted=1;
986 }
987 1045
988 /* 1046 /*
989 * Set the minimum read and write sizes 1047 * Set the minimum read and write sizes
@@ -1007,7 +1065,7 @@ xfs_mountfs(
1007 */ 1065 */
1008 error = xfs_check_sizes(mp); 1066 error = xfs_check_sizes(mp);
1009 if (error) 1067 if (error)
1010 goto error1; 1068 goto out_remove_uuid;
1011 1069
1012 /* 1070 /*
1013 * Initialize realtime fields in the mount structure 1071 * Initialize realtime fields in the mount structure
@@ -1015,7 +1073,7 @@ xfs_mountfs(
1015 error = xfs_rtmount_init(mp); 1073 error = xfs_rtmount_init(mp);
1016 if (error) { 1074 if (error) {
1017 cmn_err(CE_WARN, "XFS: RT mount failed"); 1075 cmn_err(CE_WARN, "XFS: RT mount failed");
1018 goto error1; 1076 goto out_remove_uuid;
1019 } 1077 }
1020 1078
1021 /* 1079 /*
@@ -1045,26 +1103,26 @@ xfs_mountfs(
1045 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), 1103 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t),
1046 KM_MAYFAIL); 1104 KM_MAYFAIL);
1047 if (!mp->m_perag) 1105 if (!mp->m_perag)
1048 goto error1; 1106 goto out_remove_uuid;
1049 1107
1050 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); 1108 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1051 1109
1110 if (!sbp->sb_logblocks) {
1111 cmn_err(CE_WARN, "XFS: no log defined");
1112 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
1113 error = XFS_ERROR(EFSCORRUPTED);
1114 goto out_free_perag;
1115 }
1116
1052 /* 1117 /*
1053 * log's mount-time initialization. Perform 1st part recovery if needed 1118 * log's mount-time initialization. Perform 1st part recovery if needed
1054 */ 1119 */
1055 if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */ 1120 error = xfs_log_mount(mp, mp->m_logdev_targp,
1056 error = xfs_log_mount(mp, mp->m_logdev_targp, 1121 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
1057 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 1122 XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
1058 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 1123 if (error) {
1059 if (error) { 1124 cmn_err(CE_WARN, "XFS: log mount failed");
1060 cmn_err(CE_WARN, "XFS: log mount failed"); 1125 goto out_free_perag;
1061 goto error2;
1062 }
1063 } else { /* No log has been defined */
1064 cmn_err(CE_WARN, "XFS: no log defined");
1065 XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
1066 error = XFS_ERROR(EFSCORRUPTED);
1067 goto error2;
1068 } 1126 }
1069 1127
1070 /* 1128 /*
@@ -1086,15 +1144,14 @@ xfs_mountfs(
1086 * If we are currently making the filesystem, the initialisation will 1144 * If we are currently making the filesystem, the initialisation will
1087 * fail as the perag data is in an undefined state. 1145 * fail as the perag data is in an undefined state.
1088 */ 1146 */
1089
1090 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 1147 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
1091 !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && 1148 !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
1092 !mp->m_sb.sb_inprogress) { 1149 !mp->m_sb.sb_inprogress) {
1093 error = xfs_initialize_perag_data(mp, sbp->sb_agcount); 1150 error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
1094 if (error) { 1151 if (error)
1095 goto error2; 1152 goto out_free_perag;
1096 }
1097 } 1153 }
1154
1098 /* 1155 /*
1099 * Get and sanity-check the root inode. 1156 * Get and sanity-check the root inode.
1100 * Save the pointer to it in the mount structure. 1157 * Save the pointer to it in the mount structure.
@@ -1102,7 +1159,7 @@ xfs_mountfs(
1102 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 1159 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
1103 if (error) { 1160 if (error) {
1104 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1161 cmn_err(CE_WARN, "XFS: failed to read root inode");
1105 goto error3; 1162 goto out_log_dealloc;
1106 } 1163 }
1107 1164
1108 ASSERT(rip != NULL); 1165 ASSERT(rip != NULL);
@@ -1116,7 +1173,7 @@ xfs_mountfs(
1116 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1173 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
1117 mp); 1174 mp);
1118 error = XFS_ERROR(EFSCORRUPTED); 1175 error = XFS_ERROR(EFSCORRUPTED);
1119 goto error4; 1176 goto out_rele_rip;
1120 } 1177 }
1121 mp->m_rootip = rip; /* save it */ 1178 mp->m_rootip = rip; /* save it */
1122 1179
@@ -1131,7 +1188,7 @@ xfs_mountfs(
1131 * Free up the root inode. 1188 * Free up the root inode.
1132 */ 1189 */
1133 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1190 cmn_err(CE_WARN, "XFS: failed to read RT inodes");
1134 goto error4; 1191 goto out_rele_rip;
1135 } 1192 }
1136 1193
1137 /* 1194 /*
@@ -1143,7 +1200,7 @@ xfs_mountfs(
1143 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1200 error = xfs_mount_log_sb(mp, mp->m_update_flags);
1144 if (error) { 1201 if (error) {
1145 cmn_err(CE_WARN, "XFS: failed to write sb changes"); 1202 cmn_err(CE_WARN, "XFS: failed to write sb changes");
1146 goto error4; 1203 goto out_rtunmount;
1147 } 1204 }
1148 } 1205 }
1149 1206
@@ -1152,7 +1209,7 @@ xfs_mountfs(
1152 */ 1209 */
1153 error = XFS_QM_INIT(mp, &quotamount, &quotaflags); 1210 error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
1154 if (error) 1211 if (error)
1155 goto error4; 1212 goto out_rtunmount;
1156 1213
1157 /* 1214 /*
1158 * Finish recovering the file system. This part needed to be 1215 * Finish recovering the file system. This part needed to be
@@ -1162,7 +1219,7 @@ xfs_mountfs(
1162 error = xfs_log_mount_finish(mp); 1219 error = xfs_log_mount_finish(mp);
1163 if (error) { 1220 if (error) {
1164 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1221 cmn_err(CE_WARN, "XFS: log mount finish failed");
1165 goto error4; 1222 goto out_rtunmount;
1166 } 1223 }
1167 1224
1168 /* 1225 /*
@@ -1170,7 +1227,7 @@ xfs_mountfs(
1170 */ 1227 */
1171 error = XFS_QM_MOUNT(mp, quotamount, quotaflags); 1228 error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
1172 if (error) 1229 if (error)
1173 goto error4; 1230 goto out_rtunmount;
1174 1231
1175 /* 1232 /*
1176 * Now we are mounted, reserve a small amount of unused space for 1233 * Now we are mounted, reserve a small amount of unused space for
@@ -1194,18 +1251,17 @@ xfs_mountfs(
1194 1251
1195 return 0; 1252 return 0;
1196 1253
1197 error4: 1254 out_rtunmount:
1198 /* 1255 xfs_rtunmount_inodes(mp);
1199 * Free up the root inode. 1256 out_rele_rip:
1200 */
1201 IRELE(rip); 1257 IRELE(rip);
1202 error3: 1258 out_log_dealloc:
1203 xfs_log_unmount_dealloc(mp); 1259 xfs_log_unmount(mp);
1204 error2: 1260 out_free_perag:
1205 xfs_free_perag(mp); 1261 xfs_free_perag(mp);
1206 error1: 1262 out_remove_uuid:
1207 if (uuid_mounted) 1263 xfs_uuid_unmount(mp);
1208 uuid_table_remove(&mp->m_sb.sb_uuid); 1264 out:
1209 return error; 1265 return error;
1210} 1266}
1211 1267
@@ -1226,15 +1282,12 @@ xfs_unmountfs(
1226 */ 1282 */
1227 XFS_QM_UNMOUNT(mp); 1283 XFS_QM_UNMOUNT(mp);
1228 1284
1229 if (mp->m_rbmip) 1285 xfs_rtunmount_inodes(mp);
1230 IRELE(mp->m_rbmip);
1231 if (mp->m_rsumip)
1232 IRELE(mp->m_rsumip);
1233 IRELE(mp->m_rootip); 1286 IRELE(mp->m_rootip);
1234 1287
1235 /* 1288 /*
1236 * We can potentially deadlock here if we have an inode cluster 1289 * We can potentially deadlock here if we have an inode cluster
1237 * that has been freed has it's buffer still pinned in memory because 1290 * that has been freed has its buffer still pinned in memory because
1238 * the transaction is still sitting in a iclog. The stale inodes 1291 * the transaction is still sitting in a iclog. The stale inodes
1239 * on that buffer will have their flush locks held until the 1292 * on that buffer will have their flush locks held until the
1240 * transaction hits the disk and the callbacks run. the inode 1293 * transaction hits the disk and the callbacks run. the inode
@@ -1266,7 +1319,7 @@ xfs_unmountfs(
1266 * Unreserve any blocks we have so that when we unmount we don't account 1319 * Unreserve any blocks we have so that when we unmount we don't account
1267 * the reserved free space as used. This is really only necessary for 1320 * the reserved free space as used. This is really only necessary for
1268 * lazy superblock counting because it trusts the incore superblock 1321 * lazy superblock counting because it trusts the incore superblock
1269 * counters to be aboslutely correct on clean unmount. 1322 * counters to be absolutely correct on clean unmount.
1270 * 1323 *
1271 * We don't bother correcting this elsewhere for lazy superblock 1324 * We don't bother correcting this elsewhere for lazy superblock
1272 * counting because on mount of an unclean filesystem we reconstruct the 1325 * counting because on mount of an unclean filesystem we reconstruct the
@@ -1288,10 +1341,9 @@ xfs_unmountfs(
1288 "Freespace may not be correct on next mount."); 1341 "Freespace may not be correct on next mount.");
1289 xfs_unmountfs_writesb(mp); 1342 xfs_unmountfs_writesb(mp);
1290 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1343 xfs_unmountfs_wait(mp); /* wait for async bufs */
1291 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1344 xfs_log_unmount_write(mp);
1292 1345 xfs_log_unmount(mp);
1293 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1346 xfs_uuid_unmount(mp);
1294 uuid_table_remove(&mp->m_sb.sb_uuid);
1295 1347
1296#if defined(DEBUG) 1348#if defined(DEBUG)
1297 xfs_errortag_clearall(mp, 0); 1349 xfs_errortag_clearall(mp, 0);
@@ -1793,29 +1845,6 @@ xfs_freesb(
1793} 1845}
1794 1846
1795/* 1847/*
1796 * See if the UUID is unique among mounted XFS filesystems.
1797 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
1798 */
1799STATIC int
1800xfs_uuid_mount(
1801 xfs_mount_t *mp)
1802{
1803 if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
1804 cmn_err(CE_WARN,
1805 "XFS: Filesystem %s has nil UUID - can't mount",
1806 mp->m_fsname);
1807 return -1;
1808 }
1809 if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
1810 cmn_err(CE_WARN,
1811 "XFS: Filesystem %s has duplicate UUID - can't mount",
1812 mp->m_fsname);
1813 return -1;
1814 }
1815 return 0;
1816}
1817
1818/*
1819 * Used to log changes to the superblock unit and width fields which could 1848 * Used to log changes to the superblock unit and width fields which could
1820 * be altered by the mount options, as well as any potential sb_features2 1849 * be altered by the mount options, as well as any potential sb_features2
1821 * fixup. Only the first superblock is updated. 1850 * fixup. Only the first superblock is updated.
@@ -1868,7 +1897,7 @@ xfs_mount_log_sb(
1868 * we disable the per-cpu counter and go through the slow path. 1897 * we disable the per-cpu counter and go through the slow path.
1869 * 1898 *
1870 * The slow path is the current xfs_mod_incore_sb() function. This means that 1899 * The slow path is the current xfs_mod_incore_sb() function. This means that
1871 * when we disable a per-cpu counter, we need to drain it's resources back to 1900 * when we disable a per-cpu counter, we need to drain its resources back to
1872 * the global superblock. We do this after disabling the counter to prevent 1901 * the global superblock. We do this after disabling the counter to prevent
1873 * more threads from queueing up on the counter. 1902 * more threads from queueing up on the counter.
1874 * 1903 *
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f5e9937f9bdb..7af44adffc8f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -136,7 +136,6 @@ typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
136 struct xfs_dquot *, struct xfs_dquot *, uint); 136 struct xfs_dquot *, struct xfs_dquot *, uint);
137typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *); 137typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
138typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); 138typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags);
139typedef int (*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t);
140 139
141typedef struct xfs_qmops { 140typedef struct xfs_qmops {
142 xfs_qminit_t xfs_qminit; 141 xfs_qminit_t xfs_qminit;
@@ -154,7 +153,6 @@ typedef struct xfs_qmops {
154 xfs_dqvopchownresv_t xfs_dqvopchownresv; 153 xfs_dqvopchownresv_t xfs_dqvopchownresv;
155 xfs_dqstatvfs_t xfs_dqstatvfs; 154 xfs_dqstatvfs_t xfs_dqstatvfs;
156 xfs_dqsync_t xfs_dqsync; 155 xfs_dqsync_t xfs_dqsync;
157 xfs_quotactl_t xfs_quotactl;
158 struct xfs_dqtrxops *xfs_dqtrxops; 156 struct xfs_dqtrxops *xfs_dqtrxops;
159} xfs_qmops_t; 157} xfs_qmops_t;
160 158
@@ -188,8 +186,6 @@ typedef struct xfs_qmops {
188 (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp) 186 (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
189#define XFS_QM_DQSYNC(mp, flags) \ 187#define XFS_QM_DQSYNC(mp, flags) \
190 (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags) 188 (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
191#define XFS_QM_QUOTACTL(mp, cmd, id, addr) \
192 (*(mp)->m_qm_ops->xfs_quotactl)(mp, cmd, id, addr)
193 189
194#ifdef HAVE_PERCPU_SB 190#ifdef HAVE_PERCPU_SB
195 191
@@ -273,19 +269,17 @@ typedef struct xfs_mount {
273 uint m_inobt_mnr[2]; /* min inobt btree records */ 269 uint m_inobt_mnr[2]; /* min inobt btree records */
274 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 270 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
275 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 271 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
276 uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ 272 uint m_in_maxlevels; /* max inobt btree levels. */
277 struct xfs_perag *m_perag; /* per-ag accounting info */ 273 struct xfs_perag *m_perag; /* per-ag accounting info */
278 struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ 274 struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */
279 struct mutex m_growlock; /* growfs mutex */ 275 struct mutex m_growlock; /* growfs mutex */
280 int m_fixedfsid[2]; /* unchanged for life of FS */ 276 int m_fixedfsid[2]; /* unchanged for life of FS */
281 uint m_dmevmask; /* DMI events for this FS */ 277 uint m_dmevmask; /* DMI events for this FS */
282 __uint64_t m_flags; /* global mount flags */ 278 __uint64_t m_flags; /* global mount flags */
283 uint m_attroffset; /* inode attribute offset */
284 uint m_dir_node_ents; /* #entries in a dir danode */ 279 uint m_dir_node_ents; /* #entries in a dir danode */
285 uint m_attr_node_ents; /* #entries in attr danode */ 280 uint m_attr_node_ents; /* #entries in attr danode */
286 int m_ialloc_inos; /* inodes in inode allocation */ 281 int m_ialloc_inos; /* inodes in inode allocation */
287 int m_ialloc_blks; /* blocks in inode allocation */ 282 int m_ialloc_blks; /* blocks in inode allocation */
288 int m_litino; /* size of inode union area */
289 int m_inoalign_mask;/* mask sb_inoalignmt if used */ 283 int m_inoalign_mask;/* mask sb_inoalignmt if used */
290 uint m_qflags; /* quota status flags */ 284 uint m_qflags; /* quota status flags */
291 xfs_trans_reservations_t m_reservations;/* precomputed res values */ 285 xfs_trans_reservations_t m_reservations;/* precomputed res values */
@@ -293,9 +287,6 @@ typedef struct xfs_mount {
293 __uint64_t m_maxioffset; /* maximum inode offset */ 287 __uint64_t m_maxioffset; /* maximum inode offset */
294 __uint64_t m_resblks; /* total reserved blocks */ 288 __uint64_t m_resblks; /* total reserved blocks */
295 __uint64_t m_resblks_avail;/* available reserved blocks */ 289 __uint64_t m_resblks_avail;/* available reserved blocks */
296#if XFS_BIG_INUMS
297 xfs_ino_t m_inoadd; /* add value for ino64_offset */
298#endif
299 int m_dalign; /* stripe unit */ 290 int m_dalign; /* stripe unit */
300 int m_swidth; /* stripe width */ 291 int m_swidth; /* stripe width */
301 int m_sinoalign; /* stripe unit inode alignment */ 292 int m_sinoalign; /* stripe unit inode alignment */
@@ -337,7 +328,6 @@ typedef struct xfs_mount {
337#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 328#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
338 must be synchronous except 329 must be synchronous except
339 for space allocations */ 330 for space allocations */
340#define XFS_MOUNT_INO64 (1ULL << 1)
341#define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ 331#define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */
342#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) 332#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
343#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 333#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
@@ -389,8 +379,8 @@ typedef struct xfs_mount {
389 * Synchronous read and write sizes. This should be 379 * Synchronous read and write sizes. This should be
390 * better for NFSv2 wsync filesystems. 380 * better for NFSv2 wsync filesystems.
391 */ 381 */
392#define XFS_WSYNC_READIO_LOG 15 /* 32K */ 382#define XFS_WSYNC_READIO_LOG 15 /* 32k */
393#define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */ 383#define XFS_WSYNC_WRITEIO_LOG 14 /* 16k */
394 384
395/* 385/*
396 * Allow large block sizes to be reported to userspace programs if the 386 * Allow large block sizes to be reported to userspace programs if the
@@ -500,9 +490,6 @@ typedef struct xfs_mod_sb {
500 int64_t msb_delta; /* Change to make to specified field */ 490 int64_t msb_delta; /* Change to make to specified field */
501} xfs_mod_sb_t; 491} xfs_mod_sb_t;
502 492
503#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
504#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
505
506extern int xfs_log_sbcount(xfs_mount_t *, uint); 493extern int xfs_log_sbcount(xfs_mount_t *, uint);
507extern int xfs_mountfs(xfs_mount_t *mp); 494extern int xfs_mountfs(xfs_mount_t *mp);
508extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 495extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index 27f80581520a..e101790ea8e7 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -126,7 +126,6 @@ static struct xfs_qmops xfs_qmcore_stub = {
126 .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr, 126 .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr,
127 .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval, 127 .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval,
128 .xfs_dqsync = (xfs_dqsync_t) fs_noerr, 128 .xfs_dqsync = (xfs_dqsync_t) fs_noerr,
129 .xfs_quotactl = (xfs_quotactl_t) fs_nosys,
130}; 129};
131 130
132int 131int
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 48965ecaa155..f5d1202dde25 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -18,6 +18,8 @@
18#ifndef __XFS_QUOTA_H__ 18#ifndef __XFS_QUOTA_H__
19#define __XFS_QUOTA_H__ 19#define __XFS_QUOTA_H__
20 20
21struct xfs_trans;
22
21/* 23/*
22 * The ondisk form of a dquot structure. 24 * The ondisk form of a dquot structure.
23 */ 25 */
@@ -185,7 +187,6 @@ typedef struct xfs_qoff_logformat {
185 * to a single function. None of these XFS_QMOPT_* flags are meant to have 187 * to a single function. None of these XFS_QMOPT_* flags are meant to have
186 * persistent values (ie. their values can and will change between versions) 188 * persistent values (ie. their values can and will change between versions)
187 */ 189 */
188#define XFS_QMOPT_DQLOCK 0x0000001 /* dqlock */
189#define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ 190#define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */
190#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ 191#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */
191#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ 192#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index c5bb86f3ec05..385f6dceba5d 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2288,6 +2288,16 @@ xfs_rtmount_inodes(
2288 return 0; 2288 return 0;
2289} 2289}
2290 2290
2291void
2292xfs_rtunmount_inodes(
2293 struct xfs_mount *mp)
2294{
2295 if (mp->m_rbmip)
2296 IRELE(mp->m_rbmip);
2297 if (mp->m_rsumip)
2298 IRELE(mp->m_rsumip);
2299}
2300
2291/* 2301/*
2292 * Pick an extent for allocation at the start of a new realtime file. 2302 * Pick an extent for allocation at the start of a new realtime file.
2293 * Use the sequence number stored in the atime field of the bitmap inode. 2303 * Use the sequence number stored in the atime field of the bitmap inode.
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 8d8dcd215716..b2d67adb6a08 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -23,8 +23,8 @@ struct xfs_trans;
23 23
24/* Min and max rt extent sizes, specified in bytes */ 24/* Min and max rt extent sizes, specified in bytes */
25#define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */ 25#define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */
26#define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64KB */ 26#define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64kB */
27#define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4KB */ 27#define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */
28 28
29/* 29/*
30 * Constants for bit manipulations. 30 * Constants for bit manipulations.
@@ -108,6 +108,9 @@ xfs_rtfree_extent(
108int /* error */ 108int /* error */
109xfs_rtmount_init( 109xfs_rtmount_init(
110 struct xfs_mount *mp); /* file system mount structure */ 110 struct xfs_mount *mp); /* file system mount structure */
111void
112xfs_rtunmount_inodes(
113 struct xfs_mount *mp);
111 114
112/* 115/*
113 * Get the bitmap and summary inodes into the mount structure 116 * Get the bitmap and summary inodes into the mount structure
@@ -146,6 +149,7 @@ xfs_growfs_rt(
146# define xfs_growfs_rt(mp,in) (ENOSYS) 149# define xfs_growfs_rt(mp,in) (ENOSYS)
147# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 150# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
148# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 151# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
152# define xfs_rtunmount_inodes(m)
149#endif /* CONFIG_XFS_RT */ 153#endif /* CONFIG_XFS_RT */
150 154
151#endif /* __KERNEL__ */ 155#endif /* __KERNEL__ */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index d6fe4a88d79f..775249a54f6f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -292,7 +292,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
292 * In a write transaction we can allocate a maximum of 2 292 * In a write transaction we can allocate a maximum of 2
293 * extents. This gives: 293 * extents. This gives:
294 * the inode getting the new extents: inode size 294 * the inode getting the new extents: inode size
295 * the inode\'s bmap btree: max depth * block size 295 * the inode's bmap btree: max depth * block size
296 * the agfs of the ags from which the extents are allocated: 2 * sector 296 * the agfs of the ags from which the extents are allocated: 2 * sector
297 * the superblock free block counter: sector size 297 * the superblock free block counter: sector size
298 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 298 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
@@ -321,7 +321,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
321/* 321/*
322 * In truncating a file we free up to two extents at once. We can modify: 322 * In truncating a file we free up to two extents at once. We can modify:
323 * the inode being truncated: inode size 323 * the inode being truncated: inode size
324 * the inode\'s bmap btree: (max depth + 1) * block size 324 * the inode's bmap btree: (max depth + 1) * block size
325 * And the bmap_finish transaction can free the blocks and bmap blocks: 325 * And the bmap_finish transaction can free the blocks and bmap blocks:
326 * the agf for each of the ags: 4 * sector size 326 * the agf for each of the ags: 4 * sector size
327 * the agfl for each of the ags: 4 * sector size 327 * the agfl for each of the ags: 4 * sector size
@@ -343,7 +343,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
343 (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \ 343 (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
344 (128 * 5) + \ 344 (128 * 5) + \
345 XFS_ALLOCFREE_LOG_RES(mp, 1) + \ 345 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
346 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ 346 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
347 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) 347 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
348 348
349#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) 349#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate)
@@ -431,8 +431,8 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
431 * the new inode: inode size 431 * the new inode: inode size
432 * the inode btree entry: 1 block 432 * the inode btree entry: 1 block
433 * the directory btree: (max depth + v2) * dir block size 433 * the directory btree: (max depth + v2) * dir block size
434 * the directory inode\'s bmap btree: (max depth + v2) * block size 434 * the directory inode's bmap btree: (max depth + v2) * block size
435 * the blocks for the symlink: 1 KB 435 * the blocks for the symlink: 1 kB
436 * Or in the first xact we allocate some inodes giving: 436 * Or in the first xact we allocate some inodes giving:
437 * the agi and agf of the ag getting the new inodes: 2 * sectorsize 437 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
438 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize 438 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
@@ -449,9 +449,9 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
449 (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \ 449 (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
450 (2 * (mp)->m_sb.sb_sectsize + \ 450 (2 * (mp)->m_sb.sb_sectsize + \
451 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ 451 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
452 XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \ 452 XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
453 XFS_ALLOCFREE_LOG_RES(mp, 1) + \ 453 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
454 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ 454 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
455 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) 455 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
456 456
457#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) 457#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
@@ -463,7 +463,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
463 * the inode btree entry: block size 463 * the inode btree entry: block size
464 * the superblock for the nlink flag: sector size 464 * the superblock for the nlink flag: sector size
465 * the directory btree: (max depth + v2) * dir block size 465 * the directory btree: (max depth + v2) * dir block size
466 * the directory inode\'s bmap btree: (max depth + v2) * block size 466 * the directory inode's bmap btree: (max depth + v2) * block size
467 * Or in the first xact we allocate some inodes giving: 467 * Or in the first xact we allocate some inodes giving:
468 * the agi and agf of the ag getting the new inodes: 2 * sectorsize 468 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
469 * the superblock for the nlink flag: sector size 469 * the superblock for the nlink flag: sector size
@@ -481,9 +481,9 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
481 (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \ 481 (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
482 (3 * (mp)->m_sb.sb_sectsize + \ 482 (3 * (mp)->m_sb.sb_sectsize + \
483 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ 483 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
484 XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \ 484 XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
485 XFS_ALLOCFREE_LOG_RES(mp, 1) + \ 485 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
486 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ 486 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
487 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) 487 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
488 488
489#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) 489#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create)
@@ -513,7 +513,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
513 MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \ 513 MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
514 (128 * 5) + \ 514 (128 * 5) + \
515 XFS_ALLOCFREE_LOG_RES(mp, 1) + \ 515 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
516 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ 516 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
517 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) 517 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
518 518
519 519
@@ -637,7 +637,7 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
637/* 637/*
638 * Removing the attribute fork of a file 638 * Removing the attribute fork of a file
639 * the inode being truncated: inode size 639 * the inode being truncated: inode size
640 * the inode\'s bmap btree: max depth * block size 640 * the inode's bmap btree: max depth * block size
641 * And the bmap_finish transaction can free the blocks and bmap blocks: 641 * And the bmap_finish transaction can free the blocks and bmap blocks:
642 * the agf for each of the ags: 4 * sector size 642 * the agf for each of the ags: 4 * sector size
643 * the agfl for each of the ags: 4 * sector size 643 * the agfl for each of the ags: 4 * sector size
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 2d47f10f8bed..f31271c30de9 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -79,7 +79,7 @@ xfs_trans_ail_tail(
79 * the push is run asynchronously in a separate thread, so we return the tail 79 * the push is run asynchronously in a separate thread, so we return the tail
80 * of the log right now instead of the tail after the push. This means we will 80 * of the log right now instead of the tail after the push. This means we will
81 * either continue right away, or we will sleep waiting on the async thread to 81 * either continue right away, or we will sleep waiting on the async thread to
82 * do it's work. 82 * do its work.
83 * 83 *
84 * We do this unlocked - we only need to know whether there is anything in the 84 * We do this unlocked - we only need to know whether there is anything in the
85 * AIL at the time we are called. We don't need to access the contents of 85 * AIL at the time we are called. We don't need to access the contents of
@@ -160,7 +160,7 @@ xfs_trans_ail_cursor_next(
160/* 160/*
161 * Now that the traversal is complete, we need to remove the cursor 161 * Now that the traversal is complete, we need to remove the cursor
162 * from the list of traversing cursors. Avoid removing the embedded 162 * from the list of traversing cursors. Avoid removing the embedded
163 * push cursor, but use the fact it is alway present to make the 163 * push cursor, but use the fact it is always present to make the
164 * list deletion simple. 164 * list deletion simple.
165 */ 165 */
166void 166void
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index e110bf57d7f4..eb3fc57f9eef 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -22,7 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_trans_priv.h" 24#include "xfs_trans_priv.h"
25/* XXX: from here down needed until struct xfs_trans has it's own ailp */ 25/* XXX: from here down needed until struct xfs_trans has its own ailp */
26#include "xfs_bit.h" 26#include "xfs_bit.h"
27#include "xfs_buf_item.h" 27#include "xfs_buf_item.h"
28#include "xfs_sb.h" 28#include "xfs_sb.h"
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h
index 4ea2e5074bdd..7d2c920dfb9c 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/xfs_trans_space.h
@@ -47,7 +47,7 @@
47#define XFS_DIRREMOVE_SPACE_RES(mp) \ 47#define XFS_DIRREMOVE_SPACE_RES(mp) \
48 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) 48 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
49#define XFS_IALLOC_SPACE_RES(mp) \ 49#define XFS_IALLOC_SPACE_RES(mp) \
50 (XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp)-1) 50 (XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels - 1)
51 51
52/* 52/*
53 * Space reservation values for various transactions. 53 * Space reservation values for various transactions.
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index b2f724502f1b..d725428c9df6 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -21,14 +21,6 @@
21#ifdef __KERNEL__ 21#ifdef __KERNEL__
22 22
23/* 23/*
24 * POSIX Extensions
25 */
26typedef unsigned char uchar_t;
27typedef unsigned short ushort_t;
28typedef unsigned int uint_t;
29typedef unsigned long ulong_t;
30
31/*
32 * Additional type declarations for XFS 24 * Additional type declarations for XFS
33 */ 25 */
34typedef signed char __int8_t; 26typedef signed char __int8_t;
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index fcc2285d03ed..79b9e5ea5359 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -374,7 +374,7 @@ xfs_truncate_file(
374 374
375 /* 375 /*
376 * Follow the normal truncate locking protocol. Since we 376 * Follow the normal truncate locking protocol. Since we
377 * hold the inode in the transaction, we know that it's number 377 * hold the inode in the transaction, we know that its number
378 * of references will stay constant. 378 * of references will stay constant.
379 */ 379 */
380 xfs_ilock(ip, XFS_ILOCK_EXCL); 380 xfs_ilock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 0e55c5d7db5f..7394c7af5de5 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1136,7 +1136,7 @@ xfs_inactive(
1136 * If the inode is already free, then there can be nothing 1136 * If the inode is already free, then there can be nothing
1137 * to clean up here. 1137 * to clean up here.
1138 */ 1138 */
1139 if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) { 1139 if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
1140 ASSERT(ip->i_df.if_real_bytes == 0); 1140 ASSERT(ip->i_df.if_real_bytes == 0);
1141 ASSERT(ip->i_df.if_broot_bytes == 0); 1141 ASSERT(ip->i_df.if_broot_bytes == 0);
1142 return VN_INACTIVE_CACHE; 1142 return VN_INACTIVE_CACHE;
@@ -1387,23 +1387,28 @@ xfs_create(
1387 xfs_inode_t **ipp, 1387 xfs_inode_t **ipp,
1388 cred_t *credp) 1388 cred_t *credp)
1389{ 1389{
1390 xfs_mount_t *mp = dp->i_mount; 1390 int is_dir = S_ISDIR(mode);
1391 xfs_inode_t *ip; 1391 struct xfs_mount *mp = dp->i_mount;
1392 xfs_trans_t *tp; 1392 struct xfs_inode *ip = NULL;
1393 struct xfs_trans *tp = NULL;
1393 int error; 1394 int error;
1394 xfs_bmap_free_t free_list; 1395 xfs_bmap_free_t free_list;
1395 xfs_fsblock_t first_block; 1396 xfs_fsblock_t first_block;
1396 boolean_t unlock_dp_on_error = B_FALSE; 1397 boolean_t unlock_dp_on_error = B_FALSE;
1397 int dm_event_sent = 0;
1398 uint cancel_flags; 1398 uint cancel_flags;
1399 int committed; 1399 int committed;
1400 xfs_prid_t prid; 1400 xfs_prid_t prid;
1401 struct xfs_dquot *udqp, *gdqp; 1401 struct xfs_dquot *udqp = NULL;
1402 struct xfs_dquot *gdqp = NULL;
1402 uint resblks; 1403 uint resblks;
1404 uint log_res;
1405 uint log_count;
1403 1406
1404 ASSERT(!*ipp);
1405 xfs_itrace_entry(dp); 1407 xfs_itrace_entry(dp);
1406 1408
1409 if (XFS_FORCED_SHUTDOWN(mp))
1410 return XFS_ERROR(EIO);
1411
1407 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 1412 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
1408 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1413 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
1409 dp, DM_RIGHT_NULL, NULL, 1414 dp, DM_RIGHT_NULL, NULL,
@@ -1412,84 +1417,97 @@ xfs_create(
1412 1417
1413 if (error) 1418 if (error)
1414 return error; 1419 return error;
1415 dm_event_sent = 1;
1416 } 1420 }
1417 1421
1418 if (XFS_FORCED_SHUTDOWN(mp))
1419 return XFS_ERROR(EIO);
1420
1421 /* Return through std_return after this point. */
1422
1423 udqp = gdqp = NULL;
1424 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1422 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1425 prid = dp->i_d.di_projid; 1423 prid = dp->i_d.di_projid;
1426 else 1424 else
1427 prid = (xfs_prid_t)dfltprid; 1425 prid = dfltprid;
1428 1426
1429 /* 1427 /*
1430 * Make sure that we have allocated dquot(s) on disk. 1428 * Make sure that we have allocated dquot(s) on disk.
1431 */ 1429 */
1432 error = XFS_QM_DQVOPALLOC(mp, dp, 1430 error = XFS_QM_DQVOPALLOC(mp, dp,
1433 current_fsuid(), current_fsgid(), prid, 1431 current_fsuid(), current_fsgid(), prid,
1434 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1432 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
1435 if (error) 1433 if (error)
1436 goto std_return; 1434 goto std_return;
1437 1435
1438 ip = NULL; 1436 if (is_dir) {
1437 rdev = 0;
1438 resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
1439 log_res = XFS_MKDIR_LOG_RES(mp);
1440 log_count = XFS_MKDIR_LOG_COUNT;
1441 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
1442 } else {
1443 resblks = XFS_CREATE_SPACE_RES(mp, name->len);
1444 log_res = XFS_CREATE_LOG_RES(mp);
1445 log_count = XFS_CREATE_LOG_COUNT;
1446 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
1447 }
1439 1448
1440 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
1441 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1449 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1442 resblks = XFS_CREATE_SPACE_RES(mp, name->len); 1450
1443 /* 1451 /*
1444 * Initially assume that the file does not exist and 1452 * Initially assume that the file does not exist and
1445 * reserve the resources for that case. If that is not 1453 * reserve the resources for that case. If that is not
1446 * the case we'll drop the one we have and get a more 1454 * the case we'll drop the one we have and get a more
1447 * appropriate transaction later. 1455 * appropriate transaction later.
1448 */ 1456 */
1449 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1457 error = xfs_trans_reserve(tp, resblks, log_res, 0,
1450 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1458 XFS_TRANS_PERM_LOG_RES, log_count);
1451 if (error == ENOSPC) { 1459 if (error == ENOSPC) {
1452 resblks = 0; 1460 resblks = 0;
1453 error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, 1461 error = xfs_trans_reserve(tp, 0, log_res, 0,
1454 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1462 XFS_TRANS_PERM_LOG_RES, log_count);
1455 } 1463 }
1456 if (error) { 1464 if (error) {
1457 cancel_flags = 0; 1465 cancel_flags = 0;
1458 goto error_return; 1466 goto out_trans_cancel;
1459 } 1467 }
1460 1468
1461 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1469 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1462 unlock_dp_on_error = B_TRUE; 1470 unlock_dp_on_error = B_TRUE;
1463 1471
1464 xfs_bmap_init(&free_list, &first_block); 1472 /*
1473 * Check for directory link count overflow.
1474 */
1475 if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) {
1476 error = XFS_ERROR(EMLINK);
1477 goto out_trans_cancel;
1478 }
1465 1479
1466 ASSERT(ip == NULL); 1480 xfs_bmap_init(&free_list, &first_block);
1467 1481
1468 /* 1482 /*
1469 * Reserve disk quota and the inode. 1483 * Reserve disk quota and the inode.
1470 */ 1484 */
1471 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 1485 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
1472 if (error) 1486 if (error)
1473 goto error_return; 1487 goto out_trans_cancel;
1474 1488
1475 error = xfs_dir_canenter(tp, dp, name, resblks); 1489 error = xfs_dir_canenter(tp, dp, name, resblks);
1476 if (error) 1490 if (error)
1477 goto error_return; 1491 goto out_trans_cancel;
1478 error = xfs_dir_ialloc(&tp, dp, mode, 1, 1492
1479 rdev, credp, prid, resblks > 0, 1493 /*
1480 &ip, &committed); 1494 * A newly created regular or special file just has one directory
1495 * entry pointing to them, but a directory also the "." entry
1496 * pointing to itself.
1497 */
1498 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp,
1499 prid, resblks > 0, &ip, &committed);
1481 if (error) { 1500 if (error) {
1482 if (error == ENOSPC) 1501 if (error == ENOSPC)
1483 goto error_return; 1502 goto out_trans_cancel;
1484 goto abort_return; 1503 goto out_trans_abort;
1485 } 1504 }
1486 xfs_itrace_ref(ip);
1487 1505
1488 /* 1506 /*
1489 * At this point, we've gotten a newly allocated inode. 1507 * At this point, we've gotten a newly allocated inode.
1490 * It is locked (and joined to the transaction). 1508 * It is locked (and joined to the transaction).
1491 */ 1509 */
1492 1510 xfs_itrace_ref(ip);
1493 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1511 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1494 1512
1495 /* 1513 /*
@@ -1508,19 +1526,28 @@ xfs_create(
1508 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1526 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1509 if (error) { 1527 if (error) {
1510 ASSERT(error != ENOSPC); 1528 ASSERT(error != ENOSPC);
1511 goto abort_return; 1529 goto out_trans_abort;
1512 } 1530 }
1513 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1531 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1514 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1532 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1515 1533
1534 if (is_dir) {
1535 error = xfs_dir_init(tp, ip, dp);
1536 if (error)
1537 goto out_bmap_cancel;
1538
1539 error = xfs_bumplink(tp, dp);
1540 if (error)
1541 goto out_bmap_cancel;
1542 }
1543
1516 /* 1544 /*
1517 * If this is a synchronous mount, make sure that the 1545 * If this is a synchronous mount, make sure that the
1518 * create transaction goes to disk before returning to 1546 * create transaction goes to disk before returning to
1519 * the user. 1547 * the user.
1520 */ 1548 */
1521 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1549 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
1522 xfs_trans_set_sync(tp); 1550 xfs_trans_set_sync(tp);
1523 }
1524 1551
1525 /* 1552 /*
1526 * Attach the dquot(s) to the inodes and modify them incore. 1553 * Attach the dquot(s) to the inodes and modify them incore.
@@ -1537,16 +1564,13 @@ xfs_create(
1537 IHOLD(ip); 1564 IHOLD(ip);
1538 1565
1539 error = xfs_bmap_finish(&tp, &free_list, &committed); 1566 error = xfs_bmap_finish(&tp, &free_list, &committed);
1540 if (error) { 1567 if (error)
1541 xfs_bmap_cancel(&free_list); 1568 goto out_abort_rele;
1542 goto abort_rele;
1543 }
1544 1569
1545 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1570 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1546 if (error) { 1571 if (error) {
1547 IRELE(ip); 1572 IRELE(ip);
1548 tp = NULL; 1573 goto out_dqrele;
1549 goto error_return;
1550 } 1574 }
1551 1575
1552 XFS_QM_DQRELE(mp, udqp); 1576 XFS_QM_DQRELE(mp, udqp);
@@ -1555,26 +1579,22 @@ xfs_create(
1555 *ipp = ip; 1579 *ipp = ip;
1556 1580
1557 /* Fallthrough to std_return with error = 0 */ 1581 /* Fallthrough to std_return with error = 0 */
1558 1582 std_return:
1559std_return: 1583 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
1560 if ((*ipp || (error != 0 && dm_event_sent != 0)) && 1584 XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dp, DM_RIGHT_NULL,
1561 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 1585 ip, DM_RIGHT_NULL, name->name, NULL, mode,
1562 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 1586 error, 0);
1563 dp, DM_RIGHT_NULL,
1564 *ipp ? ip : NULL,
1565 DM_RIGHT_NULL, name->name, NULL,
1566 mode, error, 0);
1567 } 1587 }
1588
1568 return error; 1589 return error;
1569 1590
1570 abort_return: 1591 out_bmap_cancel:
1592 xfs_bmap_cancel(&free_list);
1593 out_trans_abort:
1571 cancel_flags |= XFS_TRANS_ABORT; 1594 cancel_flags |= XFS_TRANS_ABORT;
1572 /* FALLTHROUGH */ 1595 out_trans_cancel:
1573 1596 xfs_trans_cancel(tp, cancel_flags);
1574 error_return: 1597 out_dqrele:
1575 if (tp != NULL)
1576 xfs_trans_cancel(tp, cancel_flags);
1577
1578 XFS_QM_DQRELE(mp, udqp); 1598 XFS_QM_DQRELE(mp, udqp);
1579 XFS_QM_DQRELE(mp, gdqp); 1599 XFS_QM_DQRELE(mp, gdqp);
1580 1600
@@ -1583,20 +1603,18 @@ std_return:
1583 1603
1584 goto std_return; 1604 goto std_return;
1585 1605
1586 abort_rele: 1606 out_abort_rele:
1587 /* 1607 /*
1588 * Wait until after the current transaction is aborted to 1608 * Wait until after the current transaction is aborted to
1589 * release the inode. This prevents recursive transactions 1609 * release the inode. This prevents recursive transactions
1590 * and deadlocks from xfs_inactive. 1610 * and deadlocks from xfs_inactive.
1591 */ 1611 */
1612 xfs_bmap_cancel(&free_list);
1592 cancel_flags |= XFS_TRANS_ABORT; 1613 cancel_flags |= XFS_TRANS_ABORT;
1593 xfs_trans_cancel(tp, cancel_flags); 1614 xfs_trans_cancel(tp, cancel_flags);
1594 IRELE(ip); 1615 IRELE(ip);
1595 1616 unlock_dp_on_error = B_FALSE;
1596 XFS_QM_DQRELE(mp, udqp); 1617 goto out_dqrele;
1597 XFS_QM_DQRELE(mp, gdqp);
1598
1599 goto std_return;
1600} 1618}
1601 1619
1602#ifdef DEBUG 1620#ifdef DEBUG
@@ -2004,8 +2022,10 @@ xfs_link(
2004 /* Return through std_return after this point. */ 2022 /* Return through std_return after this point. */
2005 2023
2006 error = XFS_QM_DQATTACH(mp, sip, 0); 2024 error = XFS_QM_DQATTACH(mp, sip, 0);
2007 if (!error && sip != tdp) 2025 if (error)
2008 error = XFS_QM_DQATTACH(mp, tdp, 0); 2026 goto std_return;
2027
2028 error = XFS_QM_DQATTACH(mp, tdp, 0);
2009 if (error) 2029 if (error)
2010 goto std_return; 2030 goto std_return;
2011 2031
@@ -2110,209 +2130,6 @@ std_return:
2110 goto std_return; 2130 goto std_return;
2111} 2131}
2112 2132
2113
2114int
2115xfs_mkdir(
2116 xfs_inode_t *dp,
2117 struct xfs_name *dir_name,
2118 mode_t mode,
2119 xfs_inode_t **ipp,
2120 cred_t *credp)
2121{
2122 xfs_mount_t *mp = dp->i_mount;
2123 xfs_inode_t *cdp; /* inode of created dir */
2124 xfs_trans_t *tp;
2125 int cancel_flags;
2126 int error;
2127 int committed;
2128 xfs_bmap_free_t free_list;
2129 xfs_fsblock_t first_block;
2130 boolean_t unlock_dp_on_error = B_FALSE;
2131 boolean_t created = B_FALSE;
2132 int dm_event_sent = 0;
2133 xfs_prid_t prid;
2134 struct xfs_dquot *udqp, *gdqp;
2135 uint resblks;
2136
2137 if (XFS_FORCED_SHUTDOWN(mp))
2138 return XFS_ERROR(EIO);
2139
2140 tp = NULL;
2141
2142 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
2143 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
2144 dp, DM_RIGHT_NULL, NULL,
2145 DM_RIGHT_NULL, dir_name->name, NULL,
2146 mode, 0, 0);
2147 if (error)
2148 return error;
2149 dm_event_sent = 1;
2150 }
2151
2152 /* Return through std_return after this point. */
2153
2154 xfs_itrace_entry(dp);
2155
2156 mp = dp->i_mount;
2157 udqp = gdqp = NULL;
2158 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
2159 prid = dp->i_d.di_projid;
2160 else
2161 prid = (xfs_prid_t)dfltprid;
2162
2163 /*
2164 * Make sure that we have allocated dquot(s) on disk.
2165 */
2166 error = XFS_QM_DQVOPALLOC(mp, dp,
2167 current_fsuid(), current_fsgid(), prid,
2168 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2169 if (error)
2170 goto std_return;
2171
2172 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
2173 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2174 resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len);
2175 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0,
2176 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
2177 if (error == ENOSPC) {
2178 resblks = 0;
2179 error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0,
2180 XFS_TRANS_PERM_LOG_RES,
2181 XFS_MKDIR_LOG_COUNT);
2182 }
2183 if (error) {
2184 cancel_flags = 0;
2185 goto error_return;
2186 }
2187
2188 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
2189 unlock_dp_on_error = B_TRUE;
2190
2191 /*
2192 * Check for directory link count overflow.
2193 */
2194 if (dp->i_d.di_nlink >= XFS_MAXLINK) {
2195 error = XFS_ERROR(EMLINK);
2196 goto error_return;
2197 }
2198
2199 /*
2200 * Reserve disk quota and the inode.
2201 */
2202 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
2203 if (error)
2204 goto error_return;
2205
2206 error = xfs_dir_canenter(tp, dp, dir_name, resblks);
2207 if (error)
2208 goto error_return;
2209 /*
2210 * create the directory inode.
2211 */
2212 error = xfs_dir_ialloc(&tp, dp, mode, 2,
2213 0, credp, prid, resblks > 0,
2214 &cdp, NULL);
2215 if (error) {
2216 if (error == ENOSPC)
2217 goto error_return;
2218 goto abort_return;
2219 }
2220 xfs_itrace_ref(cdp);
2221
2222 /*
2223 * Now we add the directory inode to the transaction.
2224 * We waited until now since xfs_dir_ialloc might start
2225 * a new transaction. Had we joined the transaction
2226 * earlier, the locks might have gotten released. An error
2227 * from here on will result in the transaction cancel
2228 * unlocking dp so don't do it explicitly in the error path.
2229 */
2230 IHOLD(dp);
2231 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2232 unlock_dp_on_error = B_FALSE;
2233
2234 xfs_bmap_init(&free_list, &first_block);
2235
2236 error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino,
2237 &first_block, &free_list, resblks ?
2238 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
2239 if (error) {
2240 ASSERT(error != ENOSPC);
2241 goto error1;
2242 }
2243 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2244
2245 error = xfs_dir_init(tp, cdp, dp);
2246 if (error)
2247 goto error2;
2248
2249 error = xfs_bumplink(tp, dp);
2250 if (error)
2251 goto error2;
2252
2253 created = B_TRUE;
2254
2255 *ipp = cdp;
2256 IHOLD(cdp);
2257
2258 /*
2259 * Attach the dquots to the new inode and modify the icount incore.
2260 */
2261 XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp);
2262
2263 /*
2264 * If this is a synchronous mount, make sure that the
2265 * mkdir transaction goes to disk before returning to
2266 * the user.
2267 */
2268 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2269 xfs_trans_set_sync(tp);
2270 }
2271
2272 error = xfs_bmap_finish(&tp, &free_list, &committed);
2273 if (error) {
2274 IRELE(cdp);
2275 goto error2;
2276 }
2277
2278 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2279 XFS_QM_DQRELE(mp, udqp);
2280 XFS_QM_DQRELE(mp, gdqp);
2281 if (error) {
2282 IRELE(cdp);
2283 }
2284
2285 /* Fall through to std_return with error = 0 or errno from
2286 * xfs_trans_commit. */
2287
2288std_return:
2289 if ((created || (error != 0 && dm_event_sent != 0)) &&
2290 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
2291 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
2292 dp, DM_RIGHT_NULL,
2293 created ? cdp : NULL,
2294 DM_RIGHT_NULL,
2295 dir_name->name, NULL,
2296 mode, error, 0);
2297 }
2298 return error;
2299
2300 error2:
2301 error1:
2302 xfs_bmap_cancel(&free_list);
2303 abort_return:
2304 cancel_flags |= XFS_TRANS_ABORT;
2305 error_return:
2306 xfs_trans_cancel(tp, cancel_flags);
2307 XFS_QM_DQRELE(mp, udqp);
2308 XFS_QM_DQRELE(mp, gdqp);
2309
2310 if (unlock_dp_on_error)
2311 xfs_iunlock(dp, XFS_ILOCK_EXCL);
2312
2313 goto std_return;
2314}
2315
2316int 2133int
2317xfs_symlink( 2134xfs_symlink(
2318 xfs_inode_t *dp, 2135 xfs_inode_t *dp,
@@ -2587,51 +2404,6 @@ std_return:
2587} 2404}
2588 2405
2589int 2406int
2590xfs_inode_flush(
2591 xfs_inode_t *ip,
2592 int flags)
2593{
2594 xfs_mount_t *mp = ip->i_mount;
2595 int error = 0;
2596
2597 if (XFS_FORCED_SHUTDOWN(mp))
2598 return XFS_ERROR(EIO);
2599
2600 /*
2601 * Bypass inodes which have already been cleaned by
2602 * the inode flush clustering code inside xfs_iflush
2603 */
2604 if (xfs_inode_clean(ip))
2605 return 0;
2606
2607 /*
2608 * We make this non-blocking if the inode is contended,
2609 * return EAGAIN to indicate to the caller that they
2610 * did not succeed. This prevents the flush path from
2611 * blocking on inodes inside another operation right
2612 * now, they get caught later by xfs_sync.
2613 */
2614 if (flags & FLUSH_SYNC) {
2615 xfs_ilock(ip, XFS_ILOCK_SHARED);
2616 xfs_iflock(ip);
2617 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
2618 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
2619 xfs_iunlock(ip, XFS_ILOCK_SHARED);
2620 return EAGAIN;
2621 }
2622 } else {
2623 return EAGAIN;
2624 }
2625
2626 error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
2627 : XFS_IFLUSH_ASYNC_NOBLOCK);
2628 xfs_iunlock(ip, XFS_ILOCK_SHARED);
2629
2630 return error;
2631}
2632
2633
2634int
2635xfs_set_dmattrs( 2407xfs_set_dmattrs(
2636 xfs_inode_t *ip, 2408 xfs_inode_t *ip,
2637 u_int evmask, 2409 u_int evmask,
@@ -2676,7 +2448,7 @@ xfs_reclaim(
2676 ASSERT(!VN_MAPPED(VFS_I(ip))); 2448 ASSERT(!VN_MAPPED(VFS_I(ip)));
2677 2449
2678 /* bad inode, get out here ASAP */ 2450 /* bad inode, get out here ASAP */
2679 if (VN_BAD(VFS_I(ip))) { 2451 if (is_bad_inode(VFS_I(ip))) {
2680 xfs_ireclaim(ip); 2452 xfs_ireclaim(ip);
2681 return 0; 2453 return 0;
2682 } 2454 }
@@ -3090,7 +2862,7 @@ xfs_free_file_space(
3090 2862
3091 /* 2863 /*
3092 * Need to zero the stuff we're not freeing, on disk. 2864 * Need to zero the stuff we're not freeing, on disk.
3093 * If its a realtime file & can't use unwritten extents then we 2865 * If it's a realtime file & can't use unwritten extents then we
3094 * actually need to zero the extent edges. Otherwise xfs_bunmapi 2866 * actually need to zero the extent edges. Otherwise xfs_bunmapi
3095 * will take care of it for us. 2867 * will take care of it for us.
3096 */ 2868 */
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 76df328c61b4..04373c6c61ff 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -31,14 +31,11 @@ int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
31 struct xfs_inode *ip); 31 struct xfs_inode *ip);
32int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 32int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
33 struct xfs_name *target_name); 33 struct xfs_name *target_name);
34int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
35 mode_t mode, struct xfs_inode **ipp, cred_t *credp);
36int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 34int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
37 xfs_off_t *offset, filldir_t filldir); 35 xfs_off_t *offset, filldir_t filldir);
38int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 36int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
39 const char *target_path, mode_t mode, struct xfs_inode **ipp, 37 const char *target_path, mode_t mode, struct xfs_inode **ipp,
40 cred_t *credp); 38 cred_t *credp);
41int xfs_inode_flush(struct xfs_inode *ip, int flags);
42int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 39int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
43int xfs_reclaim(struct xfs_inode *ip); 40int xfs_reclaim(struct xfs_inode *ip);
44int xfs_change_file_space(struct xfs_inode *ip, int cmd, 41int xfs_change_file_space(struct xfs_inode *ip, int cmd,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 8209e08969f9..66ec05a57955 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -139,6 +139,9 @@ struct target_type {
139 dm_ioctl_fn ioctl; 139 dm_ioctl_fn ioctl;
140 dm_merge_fn merge; 140 dm_merge_fn merge;
141 dm_busy_fn busy; 141 dm_busy_fn busy;
142
143 /* For internal device-mapper use. */
144 struct list_head list;
142}; 145};
143 146
144struct io_restrictions { 147struct io_restrictions {
diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h
index 600c5fb2daad..5e8b11d88f6f 100644
--- a/include/linux/dm-dirty-log.h
+++ b/include/linux/dm-dirty-log.h
@@ -28,6 +28,9 @@ struct dm_dirty_log_type {
28 const char *name; 28 const char *name;
29 struct module *module; 29 struct module *module;
30 30
31 /* For internal device-mapper use */
32 struct list_head list;
33
31 int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, 34 int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
32 unsigned argc, char **argv); 35 unsigned argc, char **argv);
33 void (*dtr)(struct dm_dirty_log *log); 36 void (*dtr)(struct dm_dirty_log *log);
@@ -113,6 +116,16 @@ struct dm_dirty_log_type {
113 */ 116 */
114 int (*status)(struct dm_dirty_log *log, status_type_t status_type, 117 int (*status)(struct dm_dirty_log *log, status_type_t status_type,
115 char *result, unsigned maxlen); 118 char *result, unsigned maxlen);
119
120 /*
121 * is_remote_recovering is necessary for cluster mirroring. It provides
122 * a way to detect recovery on another node, so we aren't writing
123 * concurrently. This function is likely to block (when a cluster log
124 * is used).
125 *
126 * Returns: 0, 1
127 */
128 int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
116}; 129};
117 130
118int dm_dirty_log_type_register(struct dm_dirty_log_type *type); 131int dm_dirty_log_type_register(struct dm_dirty_log_type *type);
diff --git a/include/linux/hdreg.h b/include/linux/hdreg.h
index ed21bd3dbd25..29ee2873f4a8 100644
--- a/include/linux/hdreg.h
+++ b/include/linux/hdreg.h
@@ -1,68 +1,6 @@
1#ifndef _LINUX_HDREG_H 1#ifndef _LINUX_HDREG_H
2#define _LINUX_HDREG_H 2#define _LINUX_HDREG_H
3 3
4#ifdef __KERNEL__
5#include <linux/ata.h>
6
7/*
8 * This file contains some defines for the AT-hd-controller.
9 * Various sources.
10 */
11
12/* ide.c has its own port definitions in "ide.h" */
13
14#define HD_IRQ 14
15
16/* Hd controller regs. Ref: IBM AT Bios-listing */
17#define HD_DATA 0x1f0 /* _CTL when writing */
18#define HD_ERROR 0x1f1 /* see err-bits */
19#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */
20#define HD_SECTOR 0x1f3 /* starting sector */
21#define HD_LCYL 0x1f4 /* starting cylinder */
22#define HD_HCYL 0x1f5 /* high byte of starting cyl */
23#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */
24#define HD_STATUS 0x1f7 /* see status-bits */
25#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */
26#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */
27#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */
28
29#define HD_CMD 0x3f6 /* used for resets */
30#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */
31
32/* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */
33
34/* Bits of HD_STATUS */
35#define ERR_STAT 0x01
36#define INDEX_STAT 0x02
37#define ECC_STAT 0x04 /* Corrected error */
38#define DRQ_STAT 0x08
39#define SEEK_STAT 0x10
40#define SRV_STAT 0x10
41#define WRERR_STAT 0x20
42#define READY_STAT 0x40
43#define BUSY_STAT 0x80
44
45/* Bits for HD_ERROR */
46#define MARK_ERR 0x01 /* Bad address mark */
47#define ILI_ERR 0x01 /* Illegal Length Indication (ATAPI) */
48#define TRK0_ERR 0x02 /* couldn't find track 0 */
49#define EOM_ERR 0x02 /* End Of Media (ATAPI) */
50#define ABRT_ERR 0x04 /* Command aborted */
51#define MCR_ERR 0x08 /* media change request */
52#define ID_ERR 0x10 /* ID field not found */
53#define MC_ERR 0x20 /* media changed */
54#define ECC_ERR 0x40 /* Uncorrectable ECC error */
55#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */
56#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */
57#define LFS_ERR 0xf0 /* Last Failed Sense (ATAPI) */
58
59/* Bits of HD_NSECTOR */
60#define CD 0x01
61#define IO 0x02
62#define REL 0x04
63#define TAG_MASK 0xf8
64#endif /* __KERNEL__ */
65
66#include <linux/types.h> 4#include <linux/types.h>
67 5
68/* 6/*
@@ -191,6 +129,7 @@ typedef struct hd_drive_hob_hdr {
191#define TASKFILE_INVALID 0x7fff 129#define TASKFILE_INVALID 0x7fff
192#endif 130#endif
193 131
132#ifndef __KERNEL__
194/* ATA/ATAPI Commands pre T13 Spec */ 133/* ATA/ATAPI Commands pre T13 Spec */
195#define WIN_NOP 0x00 134#define WIN_NOP 0x00
196/* 135/*
@@ -379,6 +318,7 @@ typedef struct hd_drive_hob_hdr {
379#define SECURITY_ERASE_UNIT 0xBD 318#define SECURITY_ERASE_UNIT 0xBD
380#define SECURITY_FREEZE_LOCK 0xBE 319#define SECURITY_FREEZE_LOCK 0xBE
381#define SECURITY_DISABLE_PASSWORD 0xBF 320#define SECURITY_DISABLE_PASSWORD 0xBF
321#endif /* __KERNEL__ */
382 322
383struct hd_geometry { 323struct hd_geometry {
384 unsigned char heads; 324 unsigned char heads;
@@ -448,6 +388,7 @@ enum {
448 388
449#define __NEW_HD_DRIVE_ID 389#define __NEW_HD_DRIVE_ID
450 390
391#ifndef __KERNEL__
451/* 392/*
452 * Structure returned by HDIO_GET_IDENTITY, as per ANSI NCITS ATA6 rev.1b spec. 393 * Structure returned by HDIO_GET_IDENTITY, as per ANSI NCITS ATA6 rev.1b spec.
453 * 394 *
@@ -699,6 +640,7 @@ struct hd_driveid {
699 * 7:0 Signature 640 * 7:0 Signature
700 */ 641 */
701}; 642};
643#endif /* __KERNEL__ */
702 644
703/* 645/*
704 * IDE "nice" flags. These are used on a per drive basis to determine 646 * IDE "nice" flags. These are used on a per drive basis to determine
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 7ff5c55f9b55..1fcb7126a01f 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -19,8 +19,21 @@ static inline void flush_kernel_dcache_page(struct page *page)
19} 19}
20#endif 20#endif
21 21
22#ifdef CONFIG_HIGHMEM 22#include <asm/kmap_types.h>
23
24#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)
25
26void debug_kmap_atomic(enum km_type type);
27
28#else
23 29
30static inline void debug_kmap_atomic(enum km_type type)
31{
32}
33
34#endif
35
36#ifdef CONFIG_HIGHMEM
24#include <asm/highmem.h> 37#include <asm/highmem.h>
25 38
26/* declarations for linux/mm/highmem.c */ 39/* declarations for linux/mm/highmem.c */
@@ -44,8 +57,6 @@ static inline void *kmap(struct page *page)
44 57
45#define kunmap(page) do { (void) (page); } while (0) 58#define kunmap(page) do { (void) (page); } while (0)
46 59
47#include <asm/kmap_types.h>
48
49static inline void *kmap_atomic(struct page *page, enum km_type idx) 60static inline void *kmap_atomic(struct page *page, enum km_type idx)
50{ 61{
51 pagefault_disable(); 62 pagefault_disable();
@@ -187,16 +198,4 @@ static inline void copy_highpage(struct page *to, struct page *from)
187 kunmap_atomic(vto, KM_USER1); 198 kunmap_atomic(vto, KM_USER1);
188} 199}
189 200
190#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)
191
192void debug_kmap_atomic(enum km_type type);
193
194#else
195
196static inline void debug_kmap_atomic(enum km_type type)
197{
198}
199
200#endif
201
202#endif /* _LINUX_HIGHMEM_H */ 201#endif /* _LINUX_HIGHMEM_H */
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
deleted file mode 100644
index 82bea14cae1a..000000000000
--- a/include/linux/raid/md.h
+++ /dev/null
@@ -1,81 +0,0 @@
1/*
2 md.h : Multiple Devices driver for Linux
3 Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
4 Copyright (C) 1994-96 Marc ZYNGIER
5 <zyngier@ufr-info-p7.ibp.fr> or
6 <maz@gloups.fdn.fr>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 You should have received a copy of the GNU General Public License
14 (for example /usr/src/linux/COPYING); if not, write to the Free
15 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16*/
17
18#ifndef _MD_H
19#define _MD_H
20
21#include <linux/blkdev.h>
22#include <linux/seq_file.h>
23
24/*
25 * 'md_p.h' holds the 'physical' layout of RAID devices
26 * 'md_u.h' holds the user <=> kernel API
27 *
28 * 'md_k.h' holds kernel internal definitions
29 */
30
31#include <linux/raid/md_p.h>
32#include <linux/raid/md_u.h>
33#include <linux/raid/md_k.h>
34
35#ifdef CONFIG_MD
36
37/*
38 * Different major versions are not compatible.
39 * Different minor versions are only downward compatible.
40 * Different patchlevel versions are downward and upward compatible.
41 */
42#define MD_MAJOR_VERSION 0
43#define MD_MINOR_VERSION 90
44/*
45 * MD_PATCHLEVEL_VERSION indicates kernel functionality.
46 * >=1 means different superblock formats are selectable using SET_ARRAY_INFO
47 * and major_version/minor_version accordingly
48 * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
49 * in the super status byte
50 * >=3 means that bitmap superblock version 4 is supported, which uses
51 * little-ending representation rather than host-endian
52 */
53#define MD_PATCHLEVEL_VERSION 3
54
55extern int mdp_major;
56
57extern int register_md_personality(struct mdk_personality *p);
58extern int unregister_md_personality(struct mdk_personality *p);
59extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
60 mddev_t *mddev, const char *name);
61extern void md_unregister_thread(mdk_thread_t *thread);
62extern void md_wakeup_thread(mdk_thread_t *thread);
63extern void md_check_recovery(mddev_t *mddev);
64extern void md_write_start(mddev_t *mddev, struct bio *bi);
65extern void md_write_end(mddev_t *mddev);
66extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
67extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
68
69extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
70 sector_t sector, int size, struct page *page);
71extern void md_super_wait(mddev_t *mddev);
72extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
73 struct page *page, int rw);
74extern void md_do_sync(mddev_t *mddev);
75extern void md_new_event(mddev_t *mddev);
76extern int md_allow_write(mddev_t *mddev);
77extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
78
79#endif /* CONFIG_MD */
80#endif
81
diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h
index 7192035fc4b0..fb1abb3367e9 100644
--- a/include/linux/raid/md_u.h
+++ b/include/linux/raid/md_u.h
@@ -15,6 +15,24 @@
15#ifndef _MD_U_H 15#ifndef _MD_U_H
16#define _MD_U_H 16#define _MD_U_H
17 17
18/*
19 * Different major versions are not compatible.
20 * Different minor versions are only downward compatible.
21 * Different patchlevel versions are downward and upward compatible.
22 */
23#define MD_MAJOR_VERSION 0
24#define MD_MINOR_VERSION 90
25/*
26 * MD_PATCHLEVEL_VERSION indicates kernel functionality.
27 * >=1 means different superblock formats are selectable using SET_ARRAY_INFO
28 * and major_version/minor_version accordingly
29 * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
30 * in the super status byte
31 * >=3 means that bitmap superblock version 4 is supported, which uses
32 * little-ending representation rather than host-endian
33 */
34#define MD_PATCHLEVEL_VERSION 3
35
18/* ioctls */ 36/* ioctls */
19 37
20/* status */ 38/* status */
@@ -46,6 +64,12 @@
46#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) 64#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
47#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) 65#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
48 66
67/* 63 partitions with the alternate major number (mdp) */
68#define MdpMinorShift 6
69#ifdef __KERNEL__
70extern int mdp_major;
71#endif
72
49typedef struct mdu_version_s { 73typedef struct mdu_version_s {
50 int major; 74 int major;
51 int minor; 75 int minor;
@@ -85,6 +109,17 @@ typedef struct mdu_array_info_s {
85 109
86} mdu_array_info_t; 110} mdu_array_info_t;
87 111
112/* non-obvious values for 'level' */
113#define LEVEL_MULTIPATH (-4)
114#define LEVEL_LINEAR (-1)
115#define LEVEL_FAULTY (-5)
116
117/* we need a value for 'no level specified' and 0
118 * means 'raid0', so we need something else. This is
119 * for internal use only
120 */
121#define LEVEL_NONE (-1000000)
122
88typedef struct mdu_disk_info_s { 123typedef struct mdu_disk_info_s {
89 /* 124 /*
90 * configuration/status of one particular disk 125 * configuration/status of one particular disk
diff --git a/drivers/md/raid6.h b/include/linux/raid/pq.h
index 98dcde88470e..d92480f8285c 100644
--- a/drivers/md/raid6.h
+++ b/include/linux/raid/pq.h
@@ -5,7 +5,7 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference. 9 * (at your option) any later version; incorporated herein by reference.
10 * 10 *
11 * ----------------------------------------------------------------------- */ 11 * ----------------------------------------------------------------------- */
@@ -17,14 +17,7 @@
17 17
18/* Set to 1 to use kernel-wide empty_zero_page */ 18/* Set to 1 to use kernel-wide empty_zero_page */
19#define RAID6_USE_EMPTY_ZERO_PAGE 0 19#define RAID6_USE_EMPTY_ZERO_PAGE 0
20 20#include <linux/blkdev.h>
21#include <linux/raid/md.h>
22#include <linux/raid/raid5.h>
23
24typedef raid5_conf_t raid6_conf_t; /* Same configuration */
25
26/* Additional compute_parity mode -- updates the parity w/o LOCKING */
27#define UPDATE_PARITY 4
28 21
29/* We need a pre-zeroed page... if we don't want to use the kernel-provided 22/* We need a pre-zeroed page... if we don't want to use the kernel-provided
30 one define it here */ 23 one define it here */
@@ -68,6 +61,10 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
68#define enable_kernel_altivec() 61#define enable_kernel_altivec()
69#define disable_kernel_altivec() 62#define disable_kernel_altivec()
70 63
64#define EXPORT_SYMBOL(sym)
65#define MODULE_LICENSE(licence)
66#define subsys_initcall(x)
67#define module_exit(x)
71#endif /* __KERNEL__ */ 68#endif /* __KERNEL__ */
72 69
73/* Routine choices */ 70/* Routine choices */
@@ -98,9 +95,11 @@ extern const u8 raid6_gfinv[256] __attribute__((aligned(256)));
98extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); 95extern const u8 raid6_gfexi[256] __attribute__((aligned(256)));
99 96
100/* Recovery routines */ 97/* Recovery routines */
101void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, void **ptrs); 98void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
99 void **ptrs);
102void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs); 100void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs);
103void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs); 101void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
102 void **ptrs);
104 103
105/* Some definitions to allow code to be compiled for testing in userspace */ 104/* Some definitions to allow code to be compiled for testing in userspace */
106#ifndef __KERNEL__ 105#ifndef __KERNEL__
@@ -108,8 +107,11 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs
108# define jiffies raid6_jiffies() 107# define jiffies raid6_jiffies()
109# define printk printf 108# define printk printf
110# define GFP_KERNEL 0 109# define GFP_KERNEL 0
111# define __get_free_pages(x,y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0)) 110# define __get_free_pages(x, y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
112# define free_pages(x,y) munmap((void *)(x), (y)*PAGE_SIZE) 111 PROT_READ|PROT_WRITE, \
112 MAP_PRIVATE|MAP_ANONYMOUS,\
113 0, 0))
114# define free_pages(x, y) munmap((void *)(x), (y)*PAGE_SIZE)
113 115
114static inline void cpu_relax(void) 116static inline void cpu_relax(void)
115{ 117{
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 3e120587eada..5a210959e3f8 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -1,8 +1,6 @@
1#ifndef _XOR_H 1#ifndef _XOR_H
2#define _XOR_H 2#define _XOR_H
3 3
4#include <linux/raid/md.h>
5
6#define MAX_XOR_BLOCKS 4 4#define MAX_XOR_BLOCKS 4
7 5
8extern void xor_blocks(unsigned int count, unsigned int bytes, 6extern void xor_blocks(unsigned int count, unsigned int bytes,
diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h
index dd253177f65f..3e08a1c86830 100644
--- a/include/linux/timeriomem-rng.h
+++ b/include/linux/timeriomem-rng.h
@@ -14,7 +14,7 @@ struct timeriomem_rng_data {
14 struct completion completion; 14 struct completion completion;
15 unsigned int present:1; 15 unsigned int present:1;
16 16
17 u32 __iomem *address; 17 void __iomem *address;
18 18
19 /* measures in usecs */ 19 /* measures in usecs */
20 unsigned int period; 20 unsigned int period;
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 9aa968d54329..f5b978a9bb92 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -1,4 +1,5 @@
1#include <linux/kernel.h> 1#include <linux/kernel.h>
2#include <linux/blkdev.h>
2#include <linux/init.h> 3#include <linux/init.h>
3#include <linux/syscalls.h> 4#include <linux/syscalls.h>
4#include <linux/unistd.h> 5#include <linux/unistd.h>
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index 9bdddbcb3d6a..69aebbf8fd2d 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -1,5 +1,6 @@
1#include <linux/delay.h> 1#include <linux/delay.h>
2#include <linux/raid/md.h> 2#include <linux/raid/md_u.h>
3#include <linux/raid/md_p.h>
3 4
4#include "do_mounts.h" 5#include "do_mounts.h"
5 6
@@ -112,8 +113,6 @@ static int __init md_setup(char *str)
112 return 1; 113 return 1;
113} 114}
114 115
115#define MdpMinorShift 6
116
117static void __init md_setup_drive(void) 116static void __init md_setup_drive(void)
118{ 117{
119 int minor, i, ent, partitioned; 118 int minor, i, ent, partitioned;
diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index 28574ae55170..b1fd48db1640 100644
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar
@@ -75,6 +75,10 @@ case "${ARCH}" in
75 alpha) 75 alpha)
76 [ -f "${objtree}/arch/alpha/boot/vmlinux.gz" ] && cp -v -- "${objtree}/arch/alpha/boot/vmlinux.gz" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}" 76 [ -f "${objtree}/arch/alpha/boot/vmlinux.gz" ] && cp -v -- "${objtree}/arch/alpha/boot/vmlinux.gz" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
77 ;; 77 ;;
78 parisc*)
79 [ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
80 [ -f "${objtree}/lifimage" ] && cp -v -- "${objtree}/lifimage" "${tmpdir}/boot/lifimage-${KERNELRELEASE}"
81 ;;
78 vax) 82 vax)
79 [ -f "${objtree}/vmlinux.SYS" ] && cp -v -- "${objtree}/vmlinux.SYS" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.SYS" 83 [ -f "${objtree}/vmlinux.SYS" ] && cp -v -- "${objtree}/vmlinux.SYS" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.SYS"
80 [ -f "${objtree}/vmlinux.dsk" ] && cp -v -- "${objtree}/vmlinux.dsk" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.dsk" 84 [ -f "${objtree}/vmlinux.dsk" ] && cp -v -- "${objtree}/vmlinux.dsk" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.dsk"