diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 18:38:19 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 18:38:19 -0400 |
| commit | 3d30701b58970425e1d45994d6cb82f828924fdd (patch) | |
| tree | 8b14cf462628bebf8548c1b8c205a674564052d1 | |
| parent | 8cbd84f2dd4e52a8771b191030c374ba3e56d291 (diff) | |
| parent | fd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (diff) | |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (24 commits)
md: clean up do_md_stop
md: fix another deadlock with removing sysfs attributes.
md: move revalidate_disk() back outside open_mutex
md/raid10: fix deadlock with unaligned read during resync
md/bitmap: separate out loading a bitmap from initialising the structures.
md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log.
md/bitmap: optimise scanning of empty bitmaps.
md/bitmap: clean up plugging calls.
md/bitmap: reduce dependence on sysfs.
md/bitmap: white space clean up and similar.
md/raid5: export raid5 unplugging interface.
md/plug: optionally use plugger to unplug an array during resync/recovery.
md/raid5: add simple plugging infrastructure.
md/raid5: export is_congested test
raid5: Don't set read-ahead when there is no queue
md: add support for raising dm events.
md: export various start/stop interfaces
md: split out md_rdev_init
md: be more careful setting MD_CHANGE_CLEAN
md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk
...
| -rw-r--r-- | crypto/async_tx/Kconfig | 14 | ||||
| -rw-r--r-- | drivers/md/Kconfig | 18 | ||||
| -rw-r--r-- | drivers/md/Makefile | 77 | ||||
| -rw-r--r-- | drivers/md/bitmap.c | 508 | ||||
| -rw-r--r-- | drivers/md/bitmap.h | 6 | ||||
| -rw-r--r-- | drivers/md/md.c | 286 | ||||
| -rw-r--r-- | drivers/md/md.h | 55 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 18 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 168 | ||||
| -rw-r--r-- | drivers/md/raid5.h | 9 | ||||
| -rw-r--r-- | lib/Kconfig | 3 | ||||
| -rw-r--r-- | lib/Makefile | 1 | ||||
| -rw-r--r-- | lib/raid6/Makefile | 78 | ||||
| -rw-r--r-- | lib/raid6/mktables.c (renamed from drivers/md/mktables.c) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6algos.c (renamed from drivers/md/raid6algos.c) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6altivec.uc (renamed from drivers/md/raid6altivec.uc) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6int.uc (renamed from drivers/md/raid6int.uc) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6mmx.c (renamed from drivers/md/raid6mmx.c) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6recov.c (renamed from drivers/md/raid6recov.c) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6sse1.c (renamed from drivers/md/raid6sse1.c) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6sse2.c (renamed from drivers/md/raid6sse2.c) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6test/Makefile (renamed from drivers/md/raid6test/Makefile) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6test/test.c (renamed from drivers/md/raid6test/test.c) | 0 | ||||
| -rw-r--r-- | lib/raid6/raid6x86.h (renamed from drivers/md/raid6x86.h) | 0 | ||||
| -rw-r--r-- | lib/raid6/unroll.awk (renamed from drivers/md/unroll.awk) | 0 |
25 files changed, 740 insertions, 501 deletions
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index e28e276ac611..5de2ed13b35d 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig | |||
| @@ -22,6 +22,20 @@ config ASYNC_RAID6_RECOV | |||
| 22 | tristate | 22 | tristate |
| 23 | select ASYNC_CORE | 23 | select ASYNC_CORE |
| 24 | select ASYNC_PQ | 24 | select ASYNC_PQ |
| 25 | select ASYNC_XOR | ||
| 26 | |||
| 27 | config ASYNC_RAID6_TEST | ||
| 28 | tristate "Self test for hardware accelerated raid6 recovery" | ||
| 29 | depends on ASYNC_RAID6_RECOV | ||
| 30 | select ASYNC_MEMCPY | ||
| 31 | ---help--- | ||
| 32 | This is a one-shot self test that permutes through the | ||
| 33 | recovery of all the possible two disk failure scenarios for a | ||
| 34 | N-disk array. Recovery is performed with the asynchronous | ||
| 35 | raid6 recovery routines, and will optionally use an offload | ||
| 36 | engine if one is available. | ||
| 37 | |||
| 38 | If unsure, say N. | ||
| 25 | 39 | ||
| 26 | config ASYNC_TX_DISABLE_PQ_VAL_DMA | 40 | config ASYNC_TX_DISABLE_PQ_VAL_DMA |
| 27 | bool | 41 | bool |
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4a6feac8c94a..bf1a95e31559 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
| @@ -121,7 +121,7 @@ config MD_RAID10 | |||
| 121 | config MD_RAID456 | 121 | config MD_RAID456 |
| 122 | tristate "RAID-4/RAID-5/RAID-6 mode" | 122 | tristate "RAID-4/RAID-5/RAID-6 mode" |
| 123 | depends on BLK_DEV_MD | 123 | depends on BLK_DEV_MD |
| 124 | select MD_RAID6_PQ | 124 | select RAID6_PQ |
| 125 | select ASYNC_MEMCPY | 125 | select ASYNC_MEMCPY |
| 126 | select ASYNC_XOR | 126 | select ASYNC_XOR |
| 127 | select ASYNC_PQ | 127 | select ASYNC_PQ |
| @@ -165,22 +165,6 @@ config MULTICORE_RAID456 | |||
| 165 | 165 | ||
| 166 | If unsure, say N. | 166 | If unsure, say N. |
| 167 | 167 | ||
| 168 | config MD_RAID6_PQ | ||
| 169 | tristate | ||
| 170 | |||
| 171 | config ASYNC_RAID6_TEST | ||
| 172 | tristate "Self test for hardware accelerated raid6 recovery" | ||
| 173 | depends on MD_RAID6_PQ | ||
| 174 | select ASYNC_RAID6_RECOV | ||
| 175 | ---help--- | ||
| 176 | This is a one-shot self test that permutes through the | ||
| 177 | recovery of all the possible two disk failure scenarios for a | ||
| 178 | N-disk array. Recovery is performed with the asynchronous | ||
| 179 | raid6 recovery routines, and will optionally use an offload | ||
| 180 | engine if one is available. | ||
| 181 | |||
| 182 | If unsure, say N. | ||
| 183 | |||
| 184 | config MD_MULTIPATH | 168 | config MD_MULTIPATH |
| 185 | tristate "Multipath I/O support" | 169 | tristate "Multipath I/O support" |
| 186 | depends on BLK_DEV_MD | 170 | depends on BLK_DEV_MD |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index e355e7f6a536..5e3aac41919d 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
| @@ -12,13 +12,6 @@ dm-log-userspace-y \ | |||
| 12 | += dm-log-userspace-base.o dm-log-userspace-transfer.o | 12 | += dm-log-userspace-base.o dm-log-userspace-transfer.o |
| 13 | md-mod-y += md.o bitmap.o | 13 | md-mod-y += md.o bitmap.o |
| 14 | raid456-y += raid5.o | 14 | raid456-y += raid5.o |
| 15 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | ||
| 16 | raid6int1.o raid6int2.o raid6int4.o \ | ||
| 17 | raid6int8.o raid6int16.o raid6int32.o \ | ||
| 18 | raid6altivec1.o raid6altivec2.o raid6altivec4.o \ | ||
| 19 | raid6altivec8.o \ | ||
| 20 | raid6mmx.o raid6sse1.o raid6sse2.o | ||
| 21 | hostprogs-y += mktables | ||
| 22 | 15 | ||
| 23 | # Note: link order is important. All raid personalities | 16 | # Note: link order is important. All raid personalities |
| 24 | # and must come before md.o, as they each initialise | 17 | # and must come before md.o, as they each initialise |
| @@ -29,7 +22,6 @@ obj-$(CONFIG_MD_LINEAR) += linear.o | |||
| 29 | obj-$(CONFIG_MD_RAID0) += raid0.o | 22 | obj-$(CONFIG_MD_RAID0) += raid0.o |
| 30 | obj-$(CONFIG_MD_RAID1) += raid1.o | 23 | obj-$(CONFIG_MD_RAID1) += raid1.o |
| 31 | obj-$(CONFIG_MD_RAID10) += raid10.o | 24 | obj-$(CONFIG_MD_RAID10) += raid10.o |
| 32 | obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o | ||
| 33 | obj-$(CONFIG_MD_RAID456) += raid456.o | 25 | obj-$(CONFIG_MD_RAID456) += raid456.o |
| 34 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o | 26 | obj-$(CONFIG_MD_MULTIPATH) += multipath.o |
| 35 | obj-$(CONFIG_MD_FAULTY) += faulty.o | 27 | obj-$(CONFIG_MD_FAULTY) += faulty.o |
| @@ -45,75 +37,6 @@ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o | |||
| 45 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o | 37 | obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o |
| 46 | obj-$(CONFIG_DM_ZERO) += dm-zero.o | 38 | obj-$(CONFIG_DM_ZERO) += dm-zero.o |
| 47 | 39 | ||
| 48 | quiet_cmd_unroll = UNROLL $@ | ||
| 49 | cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ | ||
| 50 | < $< > $@ || ( rm -f $@ && exit 1 ) | ||
| 51 | |||
| 52 | ifeq ($(CONFIG_ALTIVEC),y) | ||
| 53 | altivec_flags := -maltivec -mabi=altivec | ||
| 54 | endif | ||
| 55 | |||
| 56 | ifeq ($(CONFIG_DM_UEVENT),y) | 40 | ifeq ($(CONFIG_DM_UEVENT),y) |
| 57 | dm-mod-objs += dm-uevent.o | 41 | dm-mod-objs += dm-uevent.o |
| 58 | endif | 42 | endif |
| 59 | |||
| 60 | targets += raid6int1.c | ||
| 61 | $(obj)/raid6int1.c: UNROLL := 1 | ||
| 62 | $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 63 | $(call if_changed,unroll) | ||
| 64 | |||
| 65 | targets += raid6int2.c | ||
| 66 | $(obj)/raid6int2.c: UNROLL := 2 | ||
| 67 | $(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 68 | $(call if_changed,unroll) | ||
| 69 | |||
| 70 | targets += raid6int4.c | ||
| 71 | $(obj)/raid6int4.c: UNROLL := 4 | ||
| 72 | $(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 73 | $(call if_changed,unroll) | ||
| 74 | |||
| 75 | targets += raid6int8.c | ||
| 76 | $(obj)/raid6int8.c: UNROLL := 8 | ||
| 77 | $(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 78 | $(call if_changed,unroll) | ||
| 79 | |||
| 80 | targets += raid6int16.c | ||
| 81 | $(obj)/raid6int16.c: UNROLL := 16 | ||
| 82 | $(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 83 | $(call if_changed,unroll) | ||
| 84 | |||
| 85 | targets += raid6int32.c | ||
| 86 | $(obj)/raid6int32.c: UNROLL := 32 | ||
| 87 | $(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 88 | $(call if_changed,unroll) | ||
| 89 | |||
| 90 | CFLAGS_raid6altivec1.o += $(altivec_flags) | ||
| 91 | targets += raid6altivec1.c | ||
| 92 | $(obj)/raid6altivec1.c: UNROLL := 1 | ||
| 93 | $(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
| 94 | $(call if_changed,unroll) | ||
| 95 | |||
| 96 | CFLAGS_raid6altivec2.o += $(altivec_flags) | ||
| 97 | targets += raid6altivec2.c | ||
| 98 | $(obj)/raid6altivec2.c: UNROLL := 2 | ||
| 99 | $(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
| 100 | $(call if_changed,unroll) | ||
| 101 | |||
| 102 | CFLAGS_raid6altivec4.o += $(altivec_flags) | ||
| 103 | targets += raid6altivec4.c | ||
| 104 | $(obj)/raid6altivec4.c: UNROLL := 4 | ||
| 105 | $(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
| 106 | $(call if_changed,unroll) | ||
| 107 | |||
| 108 | CFLAGS_raid6altivec8.o += $(altivec_flags) | ||
| 109 | targets += raid6altivec8.c | ||
| 110 | $(obj)/raid6altivec8.c: UNROLL := 8 | ||
| 111 | $(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
| 112 | $(call if_changed,unroll) | ||
| 113 | |||
| 114 | quiet_cmd_mktable = TABLE $@ | ||
| 115 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | ||
| 116 | |||
| 117 | targets += raid6tables.c | ||
| 118 | $(obj)/raid6tables.c: $(obj)/mktables FORCE | ||
| 119 | $(call if_changed,mktable) | ||
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1742435ce3ae..1ba1e122e948 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
| @@ -13,7 +13,6 @@ | |||
| 13 | * Still to do: | 13 | * Still to do: |
| 14 | * | 14 | * |
| 15 | * flush after percent set rather than just time based. (maybe both). | 15 | * flush after percent set rather than just time based. (maybe both). |
| 16 | * wait if count gets too high, wake when it drops to half. | ||
| 17 | */ | 16 | */ |
| 18 | 17 | ||
| 19 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
| @@ -30,6 +29,7 @@ | |||
| 30 | #include "md.h" | 29 | #include "md.h" |
| 31 | #include "bitmap.h" | 30 | #include "bitmap.h" |
| 32 | 31 | ||
| 32 | #include <linux/dm-dirty-log.h> | ||
| 33 | /* debug macros */ | 33 | /* debug macros */ |
| 34 | 34 | ||
| 35 | #define DEBUG 0 | 35 | #define DEBUG 0 |
| @@ -51,9 +51,6 @@ | |||
| 51 | #define INJECT_FATAL_FAULT_3 0 /* undef */ | 51 | #define INJECT_FATAL_FAULT_3 0 /* undef */ |
| 52 | #endif | 52 | #endif |
| 53 | 53 | ||
| 54 | //#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ | ||
| 55 | #define DPRINTK(x...) do { } while(0) | ||
| 56 | |||
| 57 | #ifndef PRINTK | 54 | #ifndef PRINTK |
| 58 | # if DEBUG > 0 | 55 | # if DEBUG > 0 |
| 59 | # define PRINTK(x...) printk(KERN_DEBUG x) | 56 | # define PRINTK(x...) printk(KERN_DEBUG x) |
| @@ -62,12 +59,11 @@ | |||
| 62 | # endif | 59 | # endif |
| 63 | #endif | 60 | #endif |
| 64 | 61 | ||
| 65 | static inline char * bmname(struct bitmap *bitmap) | 62 | static inline char *bmname(struct bitmap *bitmap) |
| 66 | { | 63 | { |
| 67 | return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; | 64 | return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; |
| 68 | } | 65 | } |
| 69 | 66 | ||
| 70 | |||
| 71 | /* | 67 | /* |
| 72 | * just a placeholder - calls kmalloc for bitmap pages | 68 | * just a placeholder - calls kmalloc for bitmap pages |
| 73 | */ | 69 | */ |
| @@ -78,7 +74,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) | |||
| 78 | #ifdef INJECT_FAULTS_1 | 74 | #ifdef INJECT_FAULTS_1 |
| 79 | page = NULL; | 75 | page = NULL; |
| 80 | #else | 76 | #else |
| 81 | page = kmalloc(PAGE_SIZE, GFP_NOIO); | 77 | page = kzalloc(PAGE_SIZE, GFP_NOIO); |
| 82 | #endif | 78 | #endif |
| 83 | if (!page) | 79 | if (!page) |
| 84 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); | 80 | printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); |
| @@ -107,7 +103,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) | |||
| 107 | * if we find our page, we increment the page's refcount so that it stays | 103 | * if we find our page, we increment the page's refcount so that it stays |
| 108 | * allocated while we're using it | 104 | * allocated while we're using it |
| 109 | */ | 105 | */ |
| 110 | static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) | 106 | static int bitmap_checkpage(struct bitmap *bitmap, |
| 107 | unsigned long page, int create) | ||
| 111 | __releases(bitmap->lock) | 108 | __releases(bitmap->lock) |
| 112 | __acquires(bitmap->lock) | 109 | __acquires(bitmap->lock) |
| 113 | { | 110 | { |
| @@ -121,7 +118,6 @@ __acquires(bitmap->lock) | |||
| 121 | return -EINVAL; | 118 | return -EINVAL; |
| 122 | } | 119 | } |
| 123 | 120 | ||
| 124 | |||
| 125 | if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ | 121 | if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ |
| 126 | return 0; | 122 | return 0; |
| 127 | 123 | ||
| @@ -131,43 +127,34 @@ __acquires(bitmap->lock) | |||
| 131 | if (!create) | 127 | if (!create) |
| 132 | return -ENOENT; | 128 | return -ENOENT; |
| 133 | 129 | ||
| 134 | spin_unlock_irq(&bitmap->lock); | ||
| 135 | |||
| 136 | /* this page has not been allocated yet */ | 130 | /* this page has not been allocated yet */ |
| 137 | 131 | ||
| 138 | if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { | 132 | spin_unlock_irq(&bitmap->lock); |
| 133 | mappage = bitmap_alloc_page(bitmap); | ||
| 134 | spin_lock_irq(&bitmap->lock); | ||
| 135 | |||
| 136 | if (mappage == NULL) { | ||
| 139 | PRINTK("%s: bitmap map page allocation failed, hijacking\n", | 137 | PRINTK("%s: bitmap map page allocation failed, hijacking\n", |
| 140 | bmname(bitmap)); | 138 | bmname(bitmap)); |
| 141 | /* failed - set the hijacked flag so that we can use the | 139 | /* failed - set the hijacked flag so that we can use the |
| 142 | * pointer as a counter */ | 140 | * pointer as a counter */ |
| 143 | spin_lock_irq(&bitmap->lock); | ||
| 144 | if (!bitmap->bp[page].map) | 141 | if (!bitmap->bp[page].map) |
| 145 | bitmap->bp[page].hijacked = 1; | 142 | bitmap->bp[page].hijacked = 1; |
| 146 | goto out; | 143 | } else if (bitmap->bp[page].map || |
| 147 | } | 144 | bitmap->bp[page].hijacked) { |
| 148 | |||
| 149 | /* got a page */ | ||
| 150 | |||
| 151 | spin_lock_irq(&bitmap->lock); | ||
| 152 | |||
| 153 | /* recheck the page */ | ||
| 154 | |||
| 155 | if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { | ||
| 156 | /* somebody beat us to getting the page */ | 145 | /* somebody beat us to getting the page */ |
| 157 | bitmap_free_page(bitmap, mappage); | 146 | bitmap_free_page(bitmap, mappage); |
| 158 | return 0; | 147 | return 0; |
| 159 | } | 148 | } else { |
| 160 | 149 | ||
| 161 | /* no page was in place and we have one, so install it */ | 150 | /* no page was in place and we have one, so install it */ |
| 162 | 151 | ||
| 163 | memset(mappage, 0, PAGE_SIZE); | 152 | bitmap->bp[page].map = mappage; |
| 164 | bitmap->bp[page].map = mappage; | 153 | bitmap->missing_pages--; |
| 165 | bitmap->missing_pages--; | 154 | } |
| 166 | out: | ||
| 167 | return 0; | 155 | return 0; |
| 168 | } | 156 | } |
| 169 | 157 | ||
| 170 | |||
| 171 | /* if page is completely empty, put it back on the free list, or dealloc it */ | 158 | /* if page is completely empty, put it back on the free list, or dealloc it */ |
| 172 | /* if page was hijacked, unmark the flag so it might get alloced next time */ | 159 | /* if page was hijacked, unmark the flag so it might get alloced next time */ |
| 173 | /* Note: lock should be held when calling this */ | 160 | /* Note: lock should be held when calling this */ |
| @@ -183,26 +170,15 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) | |||
| 183 | if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ | 170 | if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ |
| 184 | bitmap->bp[page].hijacked = 0; | 171 | bitmap->bp[page].hijacked = 0; |
| 185 | bitmap->bp[page].map = NULL; | 172 | bitmap->bp[page].map = NULL; |
| 186 | return; | 173 | } else { |
| 174 | /* normal case, free the page */ | ||
| 175 | ptr = bitmap->bp[page].map; | ||
| 176 | bitmap->bp[page].map = NULL; | ||
| 177 | bitmap->missing_pages++; | ||
| 178 | bitmap_free_page(bitmap, ptr); | ||
| 187 | } | 179 | } |
| 188 | |||
| 189 | /* normal case, free the page */ | ||
| 190 | |||
| 191 | #if 0 | ||
| 192 | /* actually ... let's not. We will probably need the page again exactly when | ||
| 193 | * memory is tight and we are flusing to disk | ||
| 194 | */ | ||
| 195 | return; | ||
| 196 | #else | ||
| 197 | ptr = bitmap->bp[page].map; | ||
| 198 | bitmap->bp[page].map = NULL; | ||
| 199 | bitmap->missing_pages++; | ||
| 200 | bitmap_free_page(bitmap, ptr); | ||
| 201 | return; | ||
| 202 | #endif | ||
| 203 | } | 180 | } |
| 204 | 181 | ||
| 205 | |||
| 206 | /* | 182 | /* |
| 207 | * bitmap file handling - read and write the bitmap file and its superblock | 183 | * bitmap file handling - read and write the bitmap file and its superblock |
| 208 | */ | 184 | */ |
| @@ -220,11 +196,14 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, | |||
| 220 | 196 | ||
| 221 | mdk_rdev_t *rdev; | 197 | mdk_rdev_t *rdev; |
| 222 | sector_t target; | 198 | sector_t target; |
| 199 | int did_alloc = 0; | ||
| 223 | 200 | ||
| 224 | if (!page) | 201 | if (!page) { |
| 225 | page = alloc_page(GFP_KERNEL); | 202 | page = alloc_page(GFP_KERNEL); |
| 226 | if (!page) | 203 | if (!page) |
| 227 | return ERR_PTR(-ENOMEM); | 204 | return ERR_PTR(-ENOMEM); |
| 205 | did_alloc = 1; | ||
| 206 | } | ||
| 228 | 207 | ||
| 229 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 208 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
| 230 | if (! test_bit(In_sync, &rdev->flags) | 209 | if (! test_bit(In_sync, &rdev->flags) |
| @@ -242,6 +221,8 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, | |||
| 242 | return page; | 221 | return page; |
| 243 | } | 222 | } |
| 244 | } | 223 | } |
| 224 | if (did_alloc) | ||
| 225 | put_page(page); | ||
| 245 | return ERR_PTR(-EIO); | 226 | return ERR_PTR(-EIO); |
| 246 | 227 | ||
| 247 | } | 228 | } |
| @@ -286,49 +267,51 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
| 286 | mddev_t *mddev = bitmap->mddev; | 267 | mddev_t *mddev = bitmap->mddev; |
| 287 | 268 | ||
| 288 | while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { | 269 | while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { |
| 289 | int size = PAGE_SIZE; | 270 | int size = PAGE_SIZE; |
| 290 | loff_t offset = mddev->bitmap_info.offset; | 271 | loff_t offset = mddev->bitmap_info.offset; |
| 291 | if (page->index == bitmap->file_pages-1) | 272 | if (page->index == bitmap->file_pages-1) |
| 292 | size = roundup(bitmap->last_page_size, | 273 | size = roundup(bitmap->last_page_size, |
| 293 | bdev_logical_block_size(rdev->bdev)); | 274 | bdev_logical_block_size(rdev->bdev)); |
| 294 | /* Just make sure we aren't corrupting data or | 275 | /* Just make sure we aren't corrupting data or |
| 295 | * metadata | 276 | * metadata |
| 296 | */ | 277 | */ |
| 297 | if (mddev->external) { | 278 | if (mddev->external) { |
| 298 | /* Bitmap could be anywhere. */ | 279 | /* Bitmap could be anywhere. */ |
| 299 | if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) > | 280 | if (rdev->sb_start + offset + (page->index |
| 300 | rdev->data_offset && | 281 | * (PAGE_SIZE/512)) |
| 301 | rdev->sb_start + offset < | 282 | > rdev->data_offset |
| 302 | rdev->data_offset + mddev->dev_sectors + | 283 | && |
| 303 | (PAGE_SIZE/512)) | 284 | rdev->sb_start + offset |
| 304 | goto bad_alignment; | 285 | < (rdev->data_offset + mddev->dev_sectors |
| 305 | } else if (offset < 0) { | 286 | + (PAGE_SIZE/512))) |
| 306 | /* DATA BITMAP METADATA */ | 287 | goto bad_alignment; |
| 307 | if (offset | 288 | } else if (offset < 0) { |
| 308 | + (long)(page->index * (PAGE_SIZE/512)) | 289 | /* DATA BITMAP METADATA */ |
| 309 | + size/512 > 0) | 290 | if (offset |
| 310 | /* bitmap runs in to metadata */ | 291 | + (long)(page->index * (PAGE_SIZE/512)) |
| 311 | goto bad_alignment; | 292 | + size/512 > 0) |
| 312 | if (rdev->data_offset + mddev->dev_sectors | 293 | /* bitmap runs in to metadata */ |
| 313 | > rdev->sb_start + offset) | 294 | goto bad_alignment; |
| 314 | /* data runs in to bitmap */ | 295 | if (rdev->data_offset + mddev->dev_sectors |
| 315 | goto bad_alignment; | 296 | > rdev->sb_start + offset) |
| 316 | } else if (rdev->sb_start < rdev->data_offset) { | 297 | /* data runs in to bitmap */ |
| 317 | /* METADATA BITMAP DATA */ | 298 | goto bad_alignment; |
| 318 | if (rdev->sb_start | 299 | } else if (rdev->sb_start < rdev->data_offset) { |
| 319 | + offset | 300 | /* METADATA BITMAP DATA */ |
| 320 | + page->index*(PAGE_SIZE/512) + size/512 | 301 | if (rdev->sb_start |
| 321 | > rdev->data_offset) | 302 | + offset |
| 322 | /* bitmap runs in to data */ | 303 | + page->index*(PAGE_SIZE/512) + size/512 |
| 323 | goto bad_alignment; | 304 | > rdev->data_offset) |
| 324 | } else { | 305 | /* bitmap runs in to data */ |
| 325 | /* DATA METADATA BITMAP - no problems */ | 306 | goto bad_alignment; |
| 326 | } | 307 | } else { |
| 327 | md_super_write(mddev, rdev, | 308 | /* DATA METADATA BITMAP - no problems */ |
| 328 | rdev->sb_start + offset | 309 | } |
| 329 | + page->index * (PAGE_SIZE/512), | 310 | md_super_write(mddev, rdev, |
| 330 | size, | 311 | rdev->sb_start + offset |
| 331 | page); | 312 | + page->index * (PAGE_SIZE/512), |
| 313 | size, | ||
| 314 | page); | ||
| 332 | } | 315 | } |
| 333 | 316 | ||
| 334 | if (wait) | 317 | if (wait) |
| @@ -364,10 +347,9 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) | |||
| 364 | bh = bh->b_this_page; | 347 | bh = bh->b_this_page; |
| 365 | } | 348 | } |
| 366 | 349 | ||
| 367 | if (wait) { | 350 | if (wait) |
| 368 | wait_event(bitmap->write_wait, | 351 | wait_event(bitmap->write_wait, |
| 369 | atomic_read(&bitmap->pending_writes)==0); | 352 | atomic_read(&bitmap->pending_writes)==0); |
| 370 | } | ||
| 371 | } | 353 | } |
| 372 | if (bitmap->flags & BITMAP_WRITE_ERROR) | 354 | if (bitmap->flags & BITMAP_WRITE_ERROR) |
| 373 | bitmap_file_kick(bitmap); | 355 | bitmap_file_kick(bitmap); |
| @@ -424,7 +406,7 @@ static struct page *read_page(struct file *file, unsigned long index, | |||
| 424 | struct buffer_head *bh; | 406 | struct buffer_head *bh; |
| 425 | sector_t block; | 407 | sector_t block; |
| 426 | 408 | ||
| 427 | PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, | 409 | PRINTK("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, |
| 428 | (unsigned long long)index << PAGE_SHIFT); | 410 | (unsigned long long)index << PAGE_SHIFT); |
| 429 | 411 | ||
| 430 | page = alloc_page(GFP_KERNEL); | 412 | page = alloc_page(GFP_KERNEL); |
| @@ -478,7 +460,7 @@ static struct page *read_page(struct file *file, unsigned long index, | |||
| 478 | } | 460 | } |
| 479 | out: | 461 | out: |
| 480 | if (IS_ERR(page)) | 462 | if (IS_ERR(page)) |
| 481 | printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", | 463 | printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n", |
| 482 | (int)PAGE_SIZE, | 464 | (int)PAGE_SIZE, |
| 483 | (unsigned long long)index << PAGE_SHIFT, | 465 | (unsigned long long)index << PAGE_SHIFT, |
| 484 | PTR_ERR(page)); | 466 | PTR_ERR(page)); |
| @@ -664,11 +646,14 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, | |||
| 664 | sb = kmap_atomic(bitmap->sb_page, KM_USER0); | 646 | sb = kmap_atomic(bitmap->sb_page, KM_USER0); |
| 665 | old = le32_to_cpu(sb->state) & bits; | 647 | old = le32_to_cpu(sb->state) & bits; |
| 666 | switch (op) { | 648 | switch (op) { |
| 667 | case MASK_SET: sb->state |= cpu_to_le32(bits); | 649 | case MASK_SET: |
| 668 | break; | 650 | sb->state |= cpu_to_le32(bits); |
| 669 | case MASK_UNSET: sb->state &= cpu_to_le32(~bits); | 651 | break; |
| 670 | break; | 652 | case MASK_UNSET: |
| 671 | default: BUG(); | 653 | sb->state &= cpu_to_le32(~bits); |
| 654 | break; | ||
| 655 | default: | ||
| 656 | BUG(); | ||
| 672 | } | 657 | } |
| 673 | kunmap_atomic(sb, KM_USER0); | 658 | kunmap_atomic(sb, KM_USER0); |
| 674 | return old; | 659 | return old; |
| @@ -710,12 +695,14 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon | |||
| 710 | static inline struct page *filemap_get_page(struct bitmap *bitmap, | 695 | static inline struct page *filemap_get_page(struct bitmap *bitmap, |
| 711 | unsigned long chunk) | 696 | unsigned long chunk) |
| 712 | { | 697 | { |
| 713 | if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; | 698 | if (bitmap->filemap == NULL) |
| 699 | return NULL; | ||
| 700 | if (file_page_index(bitmap, chunk) >= bitmap->file_pages) | ||
| 701 | return NULL; | ||
| 714 | return bitmap->filemap[file_page_index(bitmap, chunk) | 702 | return bitmap->filemap[file_page_index(bitmap, chunk) |
| 715 | - file_page_index(bitmap, 0)]; | 703 | - file_page_index(bitmap, 0)]; |
| 716 | } | 704 | } |
| 717 | 705 | ||
| 718 | |||
| 719 | static void bitmap_file_unmap(struct bitmap *bitmap) | 706 | static void bitmap_file_unmap(struct bitmap *bitmap) |
| 720 | { | 707 | { |
| 721 | struct page **map, *sb_page; | 708 | struct page **map, *sb_page; |
| @@ -766,7 +753,6 @@ static void bitmap_file_put(struct bitmap *bitmap) | |||
| 766 | } | 753 | } |
| 767 | } | 754 | } |
| 768 | 755 | ||
| 769 | |||
| 770 | /* | 756 | /* |
| 771 | * bitmap_file_kick - if an error occurs while manipulating the bitmap file | 757 | * bitmap_file_kick - if an error occurs while manipulating the bitmap file |
| 772 | * then it is no longer reliable, so we stop using it and we mark the file | 758 | * then it is no longer reliable, so we stop using it and we mark the file |
| @@ -785,7 +771,6 @@ static void bitmap_file_kick(struct bitmap *bitmap) | |||
| 785 | ptr = d_path(&bitmap->file->f_path, path, | 771 | ptr = d_path(&bitmap->file->f_path, path, |
| 786 | PAGE_SIZE); | 772 | PAGE_SIZE); |
| 787 | 773 | ||
| 788 | |||
| 789 | printk(KERN_ALERT | 774 | printk(KERN_ALERT |
| 790 | "%s: kicking failed bitmap file %s from array!\n", | 775 | "%s: kicking failed bitmap file %s from array!\n", |
| 791 | bmname(bitmap), IS_ERR(ptr) ? "" : ptr); | 776 | bmname(bitmap), IS_ERR(ptr) ? "" : ptr); |
| @@ -803,27 +788,36 @@ static void bitmap_file_kick(struct bitmap *bitmap) | |||
| 803 | } | 788 | } |
| 804 | 789 | ||
| 805 | enum bitmap_page_attr { | 790 | enum bitmap_page_attr { |
| 806 | BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced | 791 | BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ |
| 807 | BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared | 792 | BITMAP_PAGE_CLEAN = 1, /* there are bits that might need to be cleared */ |
| 808 | BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced | 793 | BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ |
| 809 | }; | 794 | }; |
| 810 | 795 | ||
| 811 | static inline void set_page_attr(struct bitmap *bitmap, struct page *page, | 796 | static inline void set_page_attr(struct bitmap *bitmap, struct page *page, |
| 812 | enum bitmap_page_attr attr) | 797 | enum bitmap_page_attr attr) |
| 813 | { | 798 | { |
| 814 | __set_bit((page->index<<2) + attr, bitmap->filemap_attr); | 799 | if (page) |
| 800 | __set_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
| 801 | else | ||
| 802 | __set_bit(attr, &bitmap->logattrs); | ||
| 815 | } | 803 | } |
| 816 | 804 | ||
| 817 | static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, | 805 | static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, |
| 818 | enum bitmap_page_attr attr) | 806 | enum bitmap_page_attr attr) |
| 819 | { | 807 | { |
| 820 | __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); | 808 | if (page) |
| 809 | __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
| 810 | else | ||
| 811 | __clear_bit(attr, &bitmap->logattrs); | ||
| 821 | } | 812 | } |
| 822 | 813 | ||
| 823 | static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, | 814 | static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, |
| 824 | enum bitmap_page_attr attr) | 815 | enum bitmap_page_attr attr) |
| 825 | { | 816 | { |
| 826 | return test_bit((page->index<<2) + attr, bitmap->filemap_attr); | 817 | if (page) |
| 818 | return test_bit((page->index<<2) + attr, bitmap->filemap_attr); | ||
| 819 | else | ||
| 820 | return test_bit(attr, &bitmap->logattrs); | ||
| 827 | } | 821 | } |
| 828 | 822 | ||
| 829 | /* | 823 | /* |
| @@ -836,30 +830,32 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p | |||
| 836 | static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | 830 | static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) |
| 837 | { | 831 | { |
| 838 | unsigned long bit; | 832 | unsigned long bit; |
| 839 | struct page *page; | 833 | struct page *page = NULL; |
| 840 | void *kaddr; | 834 | void *kaddr; |
| 841 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); | 835 | unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); |
| 842 | 836 | ||
| 843 | if (!bitmap->filemap) { | 837 | if (!bitmap->filemap) { |
| 844 | return; | 838 | struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; |
| 845 | } | 839 | if (log) |
| 846 | 840 | log->type->mark_region(log, chunk); | |
| 847 | page = filemap_get_page(bitmap, chunk); | 841 | } else { |
| 848 | if (!page) return; | ||
| 849 | bit = file_page_offset(bitmap, chunk); | ||
| 850 | 842 | ||
| 851 | /* set the bit */ | 843 | page = filemap_get_page(bitmap, chunk); |
| 852 | kaddr = kmap_atomic(page, KM_USER0); | 844 | if (!page) |
| 853 | if (bitmap->flags & BITMAP_HOSTENDIAN) | 845 | return; |
| 854 | set_bit(bit, kaddr); | 846 | bit = file_page_offset(bitmap, chunk); |
| 855 | else | ||
| 856 | ext2_set_bit(bit, kaddr); | ||
| 857 | kunmap_atomic(kaddr, KM_USER0); | ||
| 858 | PRINTK("set file bit %lu page %lu\n", bit, page->index); | ||
| 859 | 847 | ||
| 848 | /* set the bit */ | ||
| 849 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 850 | if (bitmap->flags & BITMAP_HOSTENDIAN) | ||
| 851 | set_bit(bit, kaddr); | ||
| 852 | else | ||
| 853 | ext2_set_bit(bit, kaddr); | ||
| 854 | kunmap_atomic(kaddr, KM_USER0); | ||
| 855 | PRINTK("set file bit %lu page %lu\n", bit, page->index); | ||
| 856 | } | ||
| 860 | /* record page number so it gets flushed to disk when unplug occurs */ | 857 | /* record page number so it gets flushed to disk when unplug occurs */ |
| 861 | set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); | 858 | set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); |
| 862 | |||
| 863 | } | 859 | } |
| 864 | 860 | ||
| 865 | /* this gets called when the md device is ready to unplug its underlying | 861 | /* this gets called when the md device is ready to unplug its underlying |
| @@ -874,6 +870,16 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
| 874 | 870 | ||
| 875 | if (!bitmap) | 871 | if (!bitmap) |
| 876 | return; | 872 | return; |
| 873 | if (!bitmap->filemap) { | ||
| 874 | /* Must be using a dirty_log */ | ||
| 875 | struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; | ||
| 876 | dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs); | ||
| 877 | need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs); | ||
| 878 | if (dirty || need_write) | ||
| 879 | if (log->type->flush(log)) | ||
| 880 | bitmap->flags |= BITMAP_WRITE_ERROR; | ||
| 881 | goto out; | ||
| 882 | } | ||
| 877 | 883 | ||
| 878 | /* look at each page to see if there are any set bits that need to be | 884 | /* look at each page to see if there are any set bits that need to be |
| 879 | * flushed out to disk */ | 885 | * flushed out to disk */ |
| @@ -892,7 +898,7 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
| 892 | wait = 1; | 898 | wait = 1; |
| 893 | spin_unlock_irqrestore(&bitmap->lock, flags); | 899 | spin_unlock_irqrestore(&bitmap->lock, flags); |
| 894 | 900 | ||
| 895 | if (dirty | need_write) | 901 | if (dirty || need_write) |
| 896 | write_page(bitmap, page, 0); | 902 | write_page(bitmap, page, 0); |
| 897 | } | 903 | } |
| 898 | if (wait) { /* if any writes were performed, we need to wait on them */ | 904 | if (wait) { /* if any writes were performed, we need to wait on them */ |
| @@ -902,9 +908,11 @@ void bitmap_unplug(struct bitmap *bitmap) | |||
| 902 | else | 908 | else |
| 903 | md_super_wait(bitmap->mddev); | 909 | md_super_wait(bitmap->mddev); |
| 904 | } | 910 | } |
| 911 | out: | ||
| 905 | if (bitmap->flags & BITMAP_WRITE_ERROR) | 912 | if (bitmap->flags & BITMAP_WRITE_ERROR) |
| 906 | bitmap_file_kick(bitmap); | 913 | bitmap_file_kick(bitmap); |
| 907 | } | 914 | } |
| 915 | EXPORT_SYMBOL(bitmap_unplug); | ||
| 908 | 916 | ||
| 909 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); | 917 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); |
| 910 | /* * bitmap_init_from_disk -- called at bitmap_create time to initialize | 918 | /* * bitmap_init_from_disk -- called at bitmap_create time to initialize |
| @@ -943,12 +951,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
| 943 | printk(KERN_INFO "%s: bitmap file is out of date, doing full " | 951 | printk(KERN_INFO "%s: bitmap file is out of date, doing full " |
| 944 | "recovery\n", bmname(bitmap)); | 952 | "recovery\n", bmname(bitmap)); |
| 945 | 953 | ||
| 946 | bytes = (chunks + 7) / 8; | 954 | bytes = DIV_ROUND_UP(bitmap->chunks, 8); |
| 947 | if (!bitmap->mddev->bitmap_info.external) | 955 | if (!bitmap->mddev->bitmap_info.external) |
| 948 | bytes += sizeof(bitmap_super_t); | 956 | bytes += sizeof(bitmap_super_t); |
| 949 | 957 | ||
| 950 | 958 | num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); | |
| 951 | num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE; | ||
| 952 | 959 | ||
| 953 | if (file && i_size_read(file->f_mapping->host) < bytes) { | 960 | if (file && i_size_read(file->f_mapping->host) < bytes) { |
| 954 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", | 961 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", |
| @@ -966,7 +973,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
| 966 | 973 | ||
| 967 | /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ | 974 | /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ |
| 968 | bitmap->filemap_attr = kzalloc( | 975 | bitmap->filemap_attr = kzalloc( |
| 969 | roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), | 976 | roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), |
| 970 | GFP_KERNEL); | 977 | GFP_KERNEL); |
| 971 | if (!bitmap->filemap_attr) | 978 | if (!bitmap->filemap_attr) |
| 972 | goto err; | 979 | goto err; |
| @@ -1021,7 +1028,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
| 1021 | if (outofdate) { | 1028 | if (outofdate) { |
| 1022 | /* | 1029 | /* |
| 1023 | * if bitmap is out of date, dirty the | 1030 | * if bitmap is out of date, dirty the |
| 1024 | * whole page and write it out | 1031 | * whole page and write it out |
| 1025 | */ | 1032 | */ |
| 1026 | paddr = kmap_atomic(page, KM_USER0); | 1033 | paddr = kmap_atomic(page, KM_USER0); |
| 1027 | memset(paddr + offset, 0xff, | 1034 | memset(paddr + offset, 0xff, |
| @@ -1052,7 +1059,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
| 1052 | } | 1059 | } |
| 1053 | } | 1060 | } |
| 1054 | 1061 | ||
| 1055 | /* everything went OK */ | 1062 | /* everything went OK */ |
| 1056 | ret = 0; | 1063 | ret = 0; |
| 1057 | bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); | 1064 | bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); |
| 1058 | 1065 | ||
| @@ -1080,21 +1087,16 @@ void bitmap_write_all(struct bitmap *bitmap) | |||
| 1080 | */ | 1087 | */ |
| 1081 | int i; | 1088 | int i; |
| 1082 | 1089 | ||
| 1083 | for (i=0; i < bitmap->file_pages; i++) | 1090 | for (i = 0; i < bitmap->file_pages; i++) |
| 1084 | set_page_attr(bitmap, bitmap->filemap[i], | 1091 | set_page_attr(bitmap, bitmap->filemap[i], |
| 1085 | BITMAP_PAGE_NEEDWRITE); | 1092 | BITMAP_PAGE_NEEDWRITE); |
| 1086 | } | 1093 | } |
| 1087 | 1094 | ||
| 1088 | |||
| 1089 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) | 1095 | static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) |
| 1090 | { | 1096 | { |
| 1091 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); | 1097 | sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); |
| 1092 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1098 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
| 1093 | bitmap->bp[page].count += inc; | 1099 | bitmap->bp[page].count += inc; |
| 1094 | /* | ||
| 1095 | if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", | ||
| 1096 | (unsigned long long)offset, inc, bitmap->bp[page].count); | ||
| 1097 | */ | ||
| 1098 | bitmap_checkfree(bitmap, page); | 1100 | bitmap_checkfree(bitmap, page); |
| 1099 | } | 1101 | } |
| 1100 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | 1102 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, |
| @@ -1114,6 +1116,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
| 1114 | struct page *page = NULL, *lastpage = NULL; | 1116 | struct page *page = NULL, *lastpage = NULL; |
| 1115 | int blocks; | 1117 | int blocks; |
| 1116 | void *paddr; | 1118 | void *paddr; |
| 1119 | struct dm_dirty_log *log = mddev->bitmap_info.log; | ||
| 1117 | 1120 | ||
| 1118 | /* Use a mutex to guard daemon_work against | 1121 | /* Use a mutex to guard daemon_work against |
| 1119 | * bitmap_destroy. | 1122 | * bitmap_destroy. |
| @@ -1138,11 +1141,12 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
| 1138 | spin_lock_irqsave(&bitmap->lock, flags); | 1141 | spin_lock_irqsave(&bitmap->lock, flags); |
| 1139 | for (j = 0; j < bitmap->chunks; j++) { | 1142 | for (j = 0; j < bitmap->chunks; j++) { |
| 1140 | bitmap_counter_t *bmc; | 1143 | bitmap_counter_t *bmc; |
| 1141 | if (!bitmap->filemap) | 1144 | if (!bitmap->filemap) { |
| 1142 | /* error or shutdown */ | 1145 | if (!log) |
| 1143 | break; | 1146 | /* error or shutdown */ |
| 1144 | 1147 | break; | |
| 1145 | page = filemap_get_page(bitmap, j); | 1148 | } else |
| 1149 | page = filemap_get_page(bitmap, j); | ||
| 1146 | 1150 | ||
| 1147 | if (page != lastpage) { | 1151 | if (page != lastpage) { |
| 1148 | /* skip this page unless it's marked as needing cleaning */ | 1152 | /* skip this page unless it's marked as needing cleaning */ |
| @@ -1197,14 +1201,11 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
| 1197 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | 1201 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), |
| 1198 | &blocks, 0); | 1202 | &blocks, 0); |
| 1199 | if (bmc) { | 1203 | if (bmc) { |
| 1200 | /* | ||
| 1201 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); | ||
| 1202 | */ | ||
| 1203 | if (*bmc) | 1204 | if (*bmc) |
| 1204 | bitmap->allclean = 0; | 1205 | bitmap->allclean = 0; |
| 1205 | 1206 | ||
| 1206 | if (*bmc == 2) { | 1207 | if (*bmc == 2) { |
| 1207 | *bmc=1; /* maybe clear the bit next time */ | 1208 | *bmc = 1; /* maybe clear the bit next time */ |
| 1208 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1209 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
| 1209 | } else if (*bmc == 1 && !bitmap->need_sync) { | 1210 | } else if (*bmc == 1 && !bitmap->need_sync) { |
| 1210 | /* we can clear the bit */ | 1211 | /* we can clear the bit */ |
| @@ -1214,14 +1215,17 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
| 1214 | -1); | 1215 | -1); |
| 1215 | 1216 | ||
| 1216 | /* clear the bit */ | 1217 | /* clear the bit */ |
| 1217 | paddr = kmap_atomic(page, KM_USER0); | 1218 | if (page) { |
| 1218 | if (bitmap->flags & BITMAP_HOSTENDIAN) | 1219 | paddr = kmap_atomic(page, KM_USER0); |
| 1219 | clear_bit(file_page_offset(bitmap, j), | 1220 | if (bitmap->flags & BITMAP_HOSTENDIAN) |
| 1220 | paddr); | 1221 | clear_bit(file_page_offset(bitmap, j), |
| 1221 | else | 1222 | paddr); |
| 1222 | ext2_clear_bit(file_page_offset(bitmap, j), | 1223 | else |
| 1223 | paddr); | 1224 | ext2_clear_bit(file_page_offset(bitmap, j), |
| 1224 | kunmap_atomic(paddr, KM_USER0); | 1225 | paddr); |
| 1226 | kunmap_atomic(paddr, KM_USER0); | ||
| 1227 | } else | ||
| 1228 | log->type->clear_region(log, j); | ||
| 1225 | } | 1229 | } |
| 1226 | } else | 1230 | } else |
| 1227 | j |= PAGE_COUNTER_MASK; | 1231 | j |= PAGE_COUNTER_MASK; |
| @@ -1229,12 +1233,16 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
| 1229 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1233 | spin_unlock_irqrestore(&bitmap->lock, flags); |
| 1230 | 1234 | ||
| 1231 | /* now sync the final page */ | 1235 | /* now sync the final page */ |
| 1232 | if (lastpage != NULL) { | 1236 | if (lastpage != NULL || log != NULL) { |
| 1233 | spin_lock_irqsave(&bitmap->lock, flags); | 1237 | spin_lock_irqsave(&bitmap->lock, flags); |
| 1234 | if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { | 1238 | if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { |
| 1235 | clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | 1239 | clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); |
| 1236 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1240 | spin_unlock_irqrestore(&bitmap->lock, flags); |
| 1237 | write_page(bitmap, lastpage, 0); | 1241 | if (lastpage) |
| 1242 | write_page(bitmap, lastpage, 0); | ||
| 1243 | else | ||
| 1244 | if (log->type->flush(log)) | ||
| 1245 | bitmap->flags |= BITMAP_WRITE_ERROR; | ||
| 1238 | } else { | 1246 | } else { |
| 1239 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); | 1247 | set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); |
| 1240 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1248 | spin_unlock_irqrestore(&bitmap->lock, flags); |
| @@ -1243,7 +1251,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
| 1243 | 1251 | ||
| 1244 | done: | 1252 | done: |
| 1245 | if (bitmap->allclean == 0) | 1253 | if (bitmap->allclean == 0) |
| 1246 | bitmap->mddev->thread->timeout = | 1254 | bitmap->mddev->thread->timeout = |
| 1247 | bitmap->mddev->bitmap_info.daemon_sleep; | 1255 | bitmap->mddev->bitmap_info.daemon_sleep; |
| 1248 | mutex_unlock(&mddev->bitmap_info.mutex); | 1256 | mutex_unlock(&mddev->bitmap_info.mutex); |
| 1249 | } | 1257 | } |
| @@ -1262,34 +1270,38 @@ __acquires(bitmap->lock) | |||
| 1262 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; | 1270 | unsigned long page = chunk >> PAGE_COUNTER_SHIFT; |
| 1263 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; | 1271 | unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; |
| 1264 | sector_t csize; | 1272 | sector_t csize; |
| 1273 | int err; | ||
| 1265 | 1274 | ||
| 1266 | if (bitmap_checkpage(bitmap, page, create) < 0) { | 1275 | err = bitmap_checkpage(bitmap, page, create); |
| 1276 | |||
| 1277 | if (bitmap->bp[page].hijacked || | ||
| 1278 | bitmap->bp[page].map == NULL) | ||
| 1279 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | ||
| 1280 | PAGE_COUNTER_SHIFT - 1); | ||
| 1281 | else | ||
| 1267 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | 1282 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); |
| 1268 | *blocks = csize - (offset & (csize- 1)); | 1283 | *blocks = csize - (offset & (csize - 1)); |
| 1284 | |||
| 1285 | if (err < 0) | ||
| 1269 | return NULL; | 1286 | return NULL; |
| 1270 | } | 1287 | |
| 1271 | /* now locked ... */ | 1288 | /* now locked ... */ |
| 1272 | 1289 | ||
| 1273 | if (bitmap->bp[page].hijacked) { /* hijacked pointer */ | 1290 | if (bitmap->bp[page].hijacked) { /* hijacked pointer */ |
| 1274 | /* should we use the first or second counter field | 1291 | /* should we use the first or second counter field |
| 1275 | * of the hijacked pointer? */ | 1292 | * of the hijacked pointer? */ |
| 1276 | int hi = (pageoff > PAGE_COUNTER_MASK); | 1293 | int hi = (pageoff > PAGE_COUNTER_MASK); |
| 1277 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + | ||
| 1278 | PAGE_COUNTER_SHIFT - 1); | ||
| 1279 | *blocks = csize - (offset & (csize- 1)); | ||
| 1280 | return &((bitmap_counter_t *) | 1294 | return &((bitmap_counter_t *) |
| 1281 | &bitmap->bp[page].map)[hi]; | 1295 | &bitmap->bp[page].map)[hi]; |
| 1282 | } else { /* page is allocated */ | 1296 | } else /* page is allocated */ |
| 1283 | csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); | ||
| 1284 | *blocks = csize - (offset & (csize- 1)); | ||
| 1285 | return (bitmap_counter_t *) | 1297 | return (bitmap_counter_t *) |
| 1286 | &(bitmap->bp[page].map[pageoff]); | 1298 | &(bitmap->bp[page].map[pageoff]); |
| 1287 | } | ||
| 1288 | } | 1299 | } |
| 1289 | 1300 | ||
| 1290 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) | 1301 | int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) |
| 1291 | { | 1302 | { |
| 1292 | if (!bitmap) return 0; | 1303 | if (!bitmap) |
| 1304 | return 0; | ||
| 1293 | 1305 | ||
| 1294 | if (behind) { | 1306 | if (behind) { |
| 1295 | int bw; | 1307 | int bw; |
| @@ -1322,17 +1334,16 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
| 1322 | prepare_to_wait(&bitmap->overflow_wait, &__wait, | 1334 | prepare_to_wait(&bitmap->overflow_wait, &__wait, |
| 1323 | TASK_UNINTERRUPTIBLE); | 1335 | TASK_UNINTERRUPTIBLE); |
| 1324 | spin_unlock_irq(&bitmap->lock); | 1336 | spin_unlock_irq(&bitmap->lock); |
| 1325 | blk_unplug(bitmap->mddev->queue); | 1337 | md_unplug(bitmap->mddev); |
| 1326 | schedule(); | 1338 | schedule(); |
| 1327 | finish_wait(&bitmap->overflow_wait, &__wait); | 1339 | finish_wait(&bitmap->overflow_wait, &__wait); |
| 1328 | continue; | 1340 | continue; |
| 1329 | } | 1341 | } |
| 1330 | 1342 | ||
| 1331 | switch(*bmc) { | 1343 | switch (*bmc) { |
| 1332 | case 0: | 1344 | case 0: |
| 1333 | bitmap_file_set_bit(bitmap, offset); | 1345 | bitmap_file_set_bit(bitmap, offset); |
| 1334 | bitmap_count_page(bitmap,offset, 1); | 1346 | bitmap_count_page(bitmap, offset, 1); |
| 1335 | blk_plug_device_unlocked(bitmap->mddev->queue); | ||
| 1336 | /* fall through */ | 1347 | /* fall through */ |
| 1337 | case 1: | 1348 | case 1: |
| 1338 | *bmc = 2; | 1349 | *bmc = 2; |
| @@ -1345,16 +1356,19 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
| 1345 | offset += blocks; | 1356 | offset += blocks; |
| 1346 | if (sectors > blocks) | 1357 | if (sectors > blocks) |
| 1347 | sectors -= blocks; | 1358 | sectors -= blocks; |
| 1348 | else sectors = 0; | 1359 | else |
| 1360 | sectors = 0; | ||
| 1349 | } | 1361 | } |
| 1350 | bitmap->allclean = 0; | 1362 | bitmap->allclean = 0; |
| 1351 | return 0; | 1363 | return 0; |
| 1352 | } | 1364 | } |
| 1365 | EXPORT_SYMBOL(bitmap_startwrite); | ||
| 1353 | 1366 | ||
| 1354 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, | 1367 | void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, |
| 1355 | int success, int behind) | 1368 | int success, int behind) |
| 1356 | { | 1369 | { |
| 1357 | if (!bitmap) return; | 1370 | if (!bitmap) |
| 1371 | return; | ||
| 1358 | if (behind) { | 1372 | if (behind) { |
| 1359 | if (atomic_dec_and_test(&bitmap->behind_writes)) | 1373 | if (atomic_dec_and_test(&bitmap->behind_writes)) |
| 1360 | wake_up(&bitmap->behind_wait); | 1374 | wake_up(&bitmap->behind_wait); |
| @@ -1381,7 +1395,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
| 1381 | bitmap->events_cleared < bitmap->mddev->events) { | 1395 | bitmap->events_cleared < bitmap->mddev->events) { |
| 1382 | bitmap->events_cleared = bitmap->mddev->events; | 1396 | bitmap->events_cleared = bitmap->mddev->events; |
| 1383 | bitmap->need_sync = 1; | 1397 | bitmap->need_sync = 1; |
| 1384 | sysfs_notify_dirent(bitmap->sysfs_can_clear); | 1398 | sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); |
| 1385 | } | 1399 | } |
| 1386 | 1400 | ||
| 1387 | if (!success && ! (*bmc & NEEDED_MASK)) | 1401 | if (!success && ! (*bmc & NEEDED_MASK)) |
| @@ -1391,18 +1405,22 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
| 1391 | wake_up(&bitmap->overflow_wait); | 1405 | wake_up(&bitmap->overflow_wait); |
| 1392 | 1406 | ||
| 1393 | (*bmc)--; | 1407 | (*bmc)--; |
| 1394 | if (*bmc <= 2) { | 1408 | if (*bmc <= 2) |
| 1395 | set_page_attr(bitmap, | 1409 | set_page_attr(bitmap, |
| 1396 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1410 | filemap_get_page( |
| 1411 | bitmap, | ||
| 1412 | offset >> CHUNK_BLOCK_SHIFT(bitmap)), | ||
| 1397 | BITMAP_PAGE_CLEAN); | 1413 | BITMAP_PAGE_CLEAN); |
| 1398 | } | 1414 | |
| 1399 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1415 | spin_unlock_irqrestore(&bitmap->lock, flags); |
| 1400 | offset += blocks; | 1416 | offset += blocks; |
| 1401 | if (sectors > blocks) | 1417 | if (sectors > blocks) |
| 1402 | sectors -= blocks; | 1418 | sectors -= blocks; |
| 1403 | else sectors = 0; | 1419 | else |
| 1420 | sectors = 0; | ||
| 1404 | } | 1421 | } |
| 1405 | } | 1422 | } |
| 1423 | EXPORT_SYMBOL(bitmap_endwrite); | ||
| 1406 | 1424 | ||
| 1407 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | 1425 | static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, |
| 1408 | int degraded) | 1426 | int degraded) |
| @@ -1455,14 +1473,14 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, | |||
| 1455 | } | 1473 | } |
| 1456 | return rv; | 1474 | return rv; |
| 1457 | } | 1475 | } |
| 1476 | EXPORT_SYMBOL(bitmap_start_sync); | ||
| 1458 | 1477 | ||
| 1459 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) | 1478 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) |
| 1460 | { | 1479 | { |
| 1461 | bitmap_counter_t *bmc; | 1480 | bitmap_counter_t *bmc; |
| 1462 | unsigned long flags; | 1481 | unsigned long flags; |
| 1463 | /* | 1482 | |
| 1464 | if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); | 1483 | if (bitmap == NULL) { |
| 1465 | */ if (bitmap == NULL) { | ||
| 1466 | *blocks = 1024; | 1484 | *blocks = 1024; |
| 1467 | return; | 1485 | return; |
| 1468 | } | 1486 | } |
| @@ -1471,26 +1489,23 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab | |||
| 1471 | if (bmc == NULL) | 1489 | if (bmc == NULL) |
| 1472 | goto unlock; | 1490 | goto unlock; |
| 1473 | /* locked */ | 1491 | /* locked */ |
| 1474 | /* | ||
| 1475 | if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks); | ||
| 1476 | */ | ||
| 1477 | if (RESYNC(*bmc)) { | 1492 | if (RESYNC(*bmc)) { |
| 1478 | *bmc &= ~RESYNC_MASK; | 1493 | *bmc &= ~RESYNC_MASK; |
| 1479 | 1494 | ||
| 1480 | if (!NEEDED(*bmc) && aborted) | 1495 | if (!NEEDED(*bmc) && aborted) |
| 1481 | *bmc |= NEEDED_MASK; | 1496 | *bmc |= NEEDED_MASK; |
| 1482 | else { | 1497 | else { |
| 1483 | if (*bmc <= 2) { | 1498 | if (*bmc <= 2) |
| 1484 | set_page_attr(bitmap, | 1499 | set_page_attr(bitmap, |
| 1485 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), | 1500 | filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), |
| 1486 | BITMAP_PAGE_CLEAN); | 1501 | BITMAP_PAGE_CLEAN); |
| 1487 | } | ||
| 1488 | } | 1502 | } |
| 1489 | } | 1503 | } |
| 1490 | unlock: | 1504 | unlock: |
| 1491 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1505 | spin_unlock_irqrestore(&bitmap->lock, flags); |
| 1492 | bitmap->allclean = 0; | 1506 | bitmap->allclean = 0; |
| 1493 | } | 1507 | } |
| 1508 | EXPORT_SYMBOL(bitmap_end_sync); | ||
| 1494 | 1509 | ||
| 1495 | void bitmap_close_sync(struct bitmap *bitmap) | 1510 | void bitmap_close_sync(struct bitmap *bitmap) |
| 1496 | { | 1511 | { |
| @@ -1507,6 +1522,7 @@ void bitmap_close_sync(struct bitmap *bitmap) | |||
| 1507 | sector += blocks; | 1522 | sector += blocks; |
| 1508 | } | 1523 | } |
| 1509 | } | 1524 | } |
| 1525 | EXPORT_SYMBOL(bitmap_close_sync); | ||
| 1510 | 1526 | ||
| 1511 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | 1527 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) |
| 1512 | { | 1528 | { |
| @@ -1526,7 +1542,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
| 1526 | atomic_read(&bitmap->mddev->recovery_active) == 0); | 1542 | atomic_read(&bitmap->mddev->recovery_active) == 0); |
| 1527 | 1543 | ||
| 1528 | bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; | 1544 | bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; |
| 1529 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | 1545 | if (bitmap->mddev->persistent) |
| 1546 | set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); | ||
| 1530 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); | 1547 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); |
| 1531 | s = 0; | 1548 | s = 0; |
| 1532 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { | 1549 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { |
| @@ -1536,6 +1553,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
| 1536 | bitmap->last_end_sync = jiffies; | 1553 | bitmap->last_end_sync = jiffies; |
| 1537 | sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); | 1554 | sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); |
| 1538 | } | 1555 | } |
| 1556 | EXPORT_SYMBOL(bitmap_cond_end_sync); | ||
| 1539 | 1557 | ||
| 1540 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) | 1558 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) |
| 1541 | { | 1559 | { |
| @@ -1552,9 +1570,9 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n | |||
| 1552 | spin_unlock_irq(&bitmap->lock); | 1570 | spin_unlock_irq(&bitmap->lock); |
| 1553 | return; | 1571 | return; |
| 1554 | } | 1572 | } |
| 1555 | if (! *bmc) { | 1573 | if (!*bmc) { |
| 1556 | struct page *page; | 1574 | struct page *page; |
| 1557 | *bmc = 1 | (needed?NEEDED_MASK:0); | 1575 | *bmc = 1 | (needed ? NEEDED_MASK : 0); |
| 1558 | bitmap_count_page(bitmap, offset, 1); | 1576 | bitmap_count_page(bitmap, offset, 1); |
| 1559 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); | 1577 | page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); |
| 1560 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1578 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
| @@ -1663,15 +1681,17 @@ int bitmap_create(mddev_t *mddev) | |||
| 1663 | unsigned long pages; | 1681 | unsigned long pages; |
| 1664 | struct file *file = mddev->bitmap_info.file; | 1682 | struct file *file = mddev->bitmap_info.file; |
| 1665 | int err; | 1683 | int err; |
| 1666 | sector_t start; | 1684 | struct sysfs_dirent *bm = NULL; |
| 1667 | struct sysfs_dirent *bm; | ||
| 1668 | 1685 | ||
| 1669 | BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); | 1686 | BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); |
| 1670 | 1687 | ||
| 1671 | if (!file && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ | 1688 | if (!file |
| 1689 | && !mddev->bitmap_info.offset | ||
| 1690 | && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */ | ||
| 1672 | return 0; | 1691 | return 0; |
| 1673 | 1692 | ||
| 1674 | BUG_ON(file && mddev->bitmap_info.offset); | 1693 | BUG_ON(file && mddev->bitmap_info.offset); |
| 1694 | BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log); | ||
| 1675 | 1695 | ||
| 1676 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); | 1696 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); |
| 1677 | if (!bitmap) | 1697 | if (!bitmap) |
| @@ -1685,7 +1705,8 @@ int bitmap_create(mddev_t *mddev) | |||
| 1685 | 1705 | ||
| 1686 | bitmap->mddev = mddev; | 1706 | bitmap->mddev = mddev; |
| 1687 | 1707 | ||
| 1688 | bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); | 1708 | if (mddev->kobj.sd) |
| 1709 | bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); | ||
| 1689 | if (bm) { | 1710 | if (bm) { |
| 1690 | bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); | 1711 | bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); |
| 1691 | sysfs_put(bm); | 1712 | sysfs_put(bm); |
| @@ -1719,9 +1740,9 @@ int bitmap_create(mddev_t *mddev) | |||
| 1719 | bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); | 1740 | bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); |
| 1720 | 1741 | ||
| 1721 | /* now that chunksize and chunkshift are set, we can use these macros */ | 1742 | /* now that chunksize and chunkshift are set, we can use these macros */ |
| 1722 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> | 1743 | chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> |
| 1723 | CHUNK_BLOCK_SHIFT(bitmap); | 1744 | CHUNK_BLOCK_SHIFT(bitmap); |
| 1724 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; | 1745 | pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; |
| 1725 | 1746 | ||
| 1726 | BUG_ON(!pages); | 1747 | BUG_ON(!pages); |
| 1727 | 1748 | ||
| @@ -1741,27 +1762,11 @@ int bitmap_create(mddev_t *mddev) | |||
| 1741 | if (!bitmap->bp) | 1762 | if (!bitmap->bp) |
| 1742 | goto error; | 1763 | goto error; |
| 1743 | 1764 | ||
| 1744 | /* now that we have some pages available, initialize the in-memory | ||
| 1745 | * bitmap from the on-disk bitmap */ | ||
| 1746 | start = 0; | ||
| 1747 | if (mddev->degraded == 0 | ||
| 1748 | || bitmap->events_cleared == mddev->events) | ||
| 1749 | /* no need to keep dirty bits to optimise a re-add of a missing device */ | ||
| 1750 | start = mddev->recovery_cp; | ||
| 1751 | err = bitmap_init_from_disk(bitmap, start); | ||
| 1752 | |||
| 1753 | if (err) | ||
| 1754 | goto error; | ||
| 1755 | |||
| 1756 | printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", | 1765 | printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", |
| 1757 | pages, bmname(bitmap)); | 1766 | pages, bmname(bitmap)); |
| 1758 | 1767 | ||
| 1759 | mddev->bitmap = bitmap; | 1768 | mddev->bitmap = bitmap; |
| 1760 | 1769 | ||
| 1761 | mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; | ||
| 1762 | md_wakeup_thread(mddev->thread); | ||
| 1763 | |||
| 1764 | bitmap_update_sb(bitmap); | ||
| 1765 | 1770 | ||
| 1766 | return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; | 1771 | return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; |
| 1767 | 1772 | ||
| @@ -1770,15 +1775,69 @@ int bitmap_create(mddev_t *mddev) | |||
| 1770 | return err; | 1775 | return err; |
| 1771 | } | 1776 | } |
| 1772 | 1777 | ||
| 1778 | int bitmap_load(mddev_t *mddev) | ||
| 1779 | { | ||
| 1780 | int err = 0; | ||
| 1781 | sector_t sector = 0; | ||
| 1782 | struct bitmap *bitmap = mddev->bitmap; | ||
| 1783 | |||
| 1784 | if (!bitmap) | ||
| 1785 | goto out; | ||
| 1786 | |||
| 1787 | /* Clear out old bitmap info first: Either there is none, or we | ||
| 1788 | * are resuming after someone else has possibly changed things, | ||
| 1789 | * so we should forget old cached info. | ||
| 1790 | * All chunks should be clean, but some might need_sync. | ||
| 1791 | */ | ||
| 1792 | while (sector < mddev->resync_max_sectors) { | ||
| 1793 | int blocks; | ||
| 1794 | bitmap_start_sync(bitmap, sector, &blocks, 0); | ||
| 1795 | sector += blocks; | ||
| 1796 | } | ||
| 1797 | bitmap_close_sync(bitmap); | ||
| 1798 | |||
| 1799 | if (mddev->bitmap_info.log) { | ||
| 1800 | unsigned long i; | ||
| 1801 | struct dm_dirty_log *log = mddev->bitmap_info.log; | ||
| 1802 | for (i = 0; i < bitmap->chunks; i++) | ||
| 1803 | if (!log->type->in_sync(log, i, 1)) | ||
| 1804 | bitmap_set_memory_bits(bitmap, | ||
| 1805 | (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), | ||
| 1806 | 1); | ||
| 1807 | } else { | ||
| 1808 | sector_t start = 0; | ||
| 1809 | if (mddev->degraded == 0 | ||
| 1810 | || bitmap->events_cleared == mddev->events) | ||
| 1811 | /* no need to keep dirty bits to optimise a | ||
| 1812 | * re-add of a missing device */ | ||
| 1813 | start = mddev->recovery_cp; | ||
| 1814 | |||
| 1815 | err = bitmap_init_from_disk(bitmap, start); | ||
| 1816 | } | ||
| 1817 | if (err) | ||
| 1818 | goto out; | ||
| 1819 | |||
| 1820 | mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; | ||
| 1821 | md_wakeup_thread(mddev->thread); | ||
| 1822 | |||
| 1823 | bitmap_update_sb(bitmap); | ||
| 1824 | |||
| 1825 | if (bitmap->flags & BITMAP_WRITE_ERROR) | ||
| 1826 | err = -EIO; | ||
| 1827 | out: | ||
| 1828 | return err; | ||
| 1829 | } | ||
| 1830 | EXPORT_SYMBOL_GPL(bitmap_load); | ||
| 1831 | |||
| 1773 | static ssize_t | 1832 | static ssize_t |
| 1774 | location_show(mddev_t *mddev, char *page) | 1833 | location_show(mddev_t *mddev, char *page) |
| 1775 | { | 1834 | { |
| 1776 | ssize_t len; | 1835 | ssize_t len; |
| 1777 | if (mddev->bitmap_info.file) { | 1836 | if (mddev->bitmap_info.file) |
| 1778 | len = sprintf(page, "file"); | 1837 | len = sprintf(page, "file"); |
| 1779 | } else if (mddev->bitmap_info.offset) { | 1838 | else if (mddev->bitmap_info.offset) |
| 1780 | len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); | 1839 | len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); |
| 1781 | } else | 1840 | else |
| 1782 | len = sprintf(page, "none"); | 1841 | len = sprintf(page, "none"); |
| 1783 | len += sprintf(page+len, "\n"); | 1842 | len += sprintf(page+len, "\n"); |
| 1784 | return len; | 1843 | return len; |
| @@ -1867,7 +1926,7 @@ timeout_show(mddev_t *mddev, char *page) | |||
| 1867 | ssize_t len; | 1926 | ssize_t len; |
| 1868 | unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; | 1927 | unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; |
| 1869 | unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; | 1928 | unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; |
| 1870 | 1929 | ||
| 1871 | len = sprintf(page, "%lu", secs); | 1930 | len = sprintf(page, "%lu", secs); |
| 1872 | if (jifs) | 1931 | if (jifs) |
| 1873 | len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); | 1932 | len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); |
| @@ -2049,12 +2108,3 @@ struct attribute_group md_bitmap_group = { | |||
| 2049 | .attrs = md_bitmap_attrs, | 2108 | .attrs = md_bitmap_attrs, |
| 2050 | }; | 2109 | }; |
| 2051 | 2110 | ||
| 2052 | |||
| 2053 | /* the bitmap API -- for raid personalities */ | ||
| 2054 | EXPORT_SYMBOL(bitmap_startwrite); | ||
| 2055 | EXPORT_SYMBOL(bitmap_endwrite); | ||
| 2056 | EXPORT_SYMBOL(bitmap_start_sync); | ||
| 2057 | EXPORT_SYMBOL(bitmap_end_sync); | ||
| 2058 | EXPORT_SYMBOL(bitmap_unplug); | ||
| 2059 | EXPORT_SYMBOL(bitmap_close_sync); | ||
| 2060 | EXPORT_SYMBOL(bitmap_cond_end_sync); | ||
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 3797dea4723a..e872a7bad6b8 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
| @@ -222,6 +222,10 @@ struct bitmap { | |||
| 222 | unsigned long file_pages; /* number of pages in the file */ | 222 | unsigned long file_pages; /* number of pages in the file */ |
| 223 | int last_page_size; /* bytes in the last page */ | 223 | int last_page_size; /* bytes in the last page */ |
| 224 | 224 | ||
| 225 | unsigned long logattrs; /* used when filemap_attr doesn't exist | ||
| 226 | * because we are working with a dirty_log | ||
| 227 | */ | ||
| 228 | |||
| 225 | unsigned long flags; | 229 | unsigned long flags; |
| 226 | 230 | ||
| 227 | int allclean; | 231 | int allclean; |
| @@ -243,12 +247,14 @@ struct bitmap { | |||
| 243 | wait_queue_head_t behind_wait; | 247 | wait_queue_head_t behind_wait; |
| 244 | 248 | ||
| 245 | struct sysfs_dirent *sysfs_can_clear; | 249 | struct sysfs_dirent *sysfs_can_clear; |
| 250 | |||
| 246 | }; | 251 | }; |
| 247 | 252 | ||
| 248 | /* the bitmap API */ | 253 | /* the bitmap API */ |
| 249 | 254 | ||
| 250 | /* these are used only by md/bitmap */ | 255 | /* these are used only by md/bitmap */ |
| 251 | int bitmap_create(mddev_t *mddev); | 256 | int bitmap_create(mddev_t *mddev); |
| 257 | int bitmap_load(mddev_t *mddev); | ||
| 252 | void bitmap_flush(mddev_t *mddev); | 258 | void bitmap_flush(mddev_t *mddev); |
| 253 | void bitmap_destroy(mddev_t *mddev); | 259 | void bitmap_destroy(mddev_t *mddev); |
| 254 | 260 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 700c96edf9b2..11567c7999a2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -262,7 +262,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio) | |||
| 262 | * Once ->stop is called and completes, the module will be completely | 262 | * Once ->stop is called and completes, the module will be completely |
| 263 | * unused. | 263 | * unused. |
| 264 | */ | 264 | */ |
| 265 | static void mddev_suspend(mddev_t *mddev) | 265 | void mddev_suspend(mddev_t *mddev) |
| 266 | { | 266 | { |
| 267 | BUG_ON(mddev->suspended); | 267 | BUG_ON(mddev->suspended); |
| 268 | mddev->suspended = 1; | 268 | mddev->suspended = 1; |
| @@ -270,13 +270,15 @@ static void mddev_suspend(mddev_t *mddev) | |||
| 270 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); | 270 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); |
| 271 | mddev->pers->quiesce(mddev, 1); | 271 | mddev->pers->quiesce(mddev, 1); |
| 272 | } | 272 | } |
| 273 | EXPORT_SYMBOL_GPL(mddev_suspend); | ||
| 273 | 274 | ||
| 274 | static void mddev_resume(mddev_t *mddev) | 275 | void mddev_resume(mddev_t *mddev) |
| 275 | { | 276 | { |
| 276 | mddev->suspended = 0; | 277 | mddev->suspended = 0; |
| 277 | wake_up(&mddev->sb_wait); | 278 | wake_up(&mddev->sb_wait); |
| 278 | mddev->pers->quiesce(mddev, 0); | 279 | mddev->pers->quiesce(mddev, 0); |
| 279 | } | 280 | } |
| 281 | EXPORT_SYMBOL_GPL(mddev_resume); | ||
| 280 | 282 | ||
| 281 | int mddev_congested(mddev_t *mddev, int bits) | 283 | int mddev_congested(mddev_t *mddev, int bits) |
| 282 | { | 284 | { |
| @@ -385,6 +387,51 @@ void md_barrier_request(mddev_t *mddev, struct bio *bio) | |||
| 385 | } | 387 | } |
| 386 | EXPORT_SYMBOL(md_barrier_request); | 388 | EXPORT_SYMBOL(md_barrier_request); |
| 387 | 389 | ||
| 390 | /* Support for plugging. | ||
| 391 | * This mirrors the plugging support in request_queue, but does not | ||
| 392 | * require having a whole queue | ||
| 393 | */ | ||
| 394 | static void plugger_work(struct work_struct *work) | ||
| 395 | { | ||
| 396 | struct plug_handle *plug = | ||
| 397 | container_of(work, struct plug_handle, unplug_work); | ||
| 398 | plug->unplug_fn(plug); | ||
| 399 | } | ||
| 400 | static void plugger_timeout(unsigned long data) | ||
| 401 | { | ||
| 402 | struct plug_handle *plug = (void *)data; | ||
| 403 | kblockd_schedule_work(NULL, &plug->unplug_work); | ||
| 404 | } | ||
| 405 | void plugger_init(struct plug_handle *plug, | ||
| 406 | void (*unplug_fn)(struct plug_handle *)) | ||
| 407 | { | ||
| 408 | plug->unplug_flag = 0; | ||
| 409 | plug->unplug_fn = unplug_fn; | ||
| 410 | init_timer(&plug->unplug_timer); | ||
| 411 | plug->unplug_timer.function = plugger_timeout; | ||
| 412 | plug->unplug_timer.data = (unsigned long)plug; | ||
| 413 | INIT_WORK(&plug->unplug_work, plugger_work); | ||
| 414 | } | ||
| 415 | EXPORT_SYMBOL_GPL(plugger_init); | ||
| 416 | |||
| 417 | void plugger_set_plug(struct plug_handle *plug) | ||
| 418 | { | ||
| 419 | if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag)) | ||
| 420 | mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1); | ||
| 421 | } | ||
| 422 | EXPORT_SYMBOL_GPL(plugger_set_plug); | ||
| 423 | |||
| 424 | int plugger_remove_plug(struct plug_handle *plug) | ||
| 425 | { | ||
| 426 | if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) { | ||
| 427 | del_timer(&plug->unplug_timer); | ||
| 428 | return 1; | ||
| 429 | } else | ||
| 430 | return 0; | ||
| 431 | } | ||
| 432 | EXPORT_SYMBOL_GPL(plugger_remove_plug); | ||
| 433 | |||
| 434 | |||
| 388 | static inline mddev_t *mddev_get(mddev_t *mddev) | 435 | static inline mddev_t *mddev_get(mddev_t *mddev) |
| 389 | { | 436 | { |
| 390 | atomic_inc(&mddev->active); | 437 | atomic_inc(&mddev->active); |
| @@ -417,7 +464,7 @@ static void mddev_put(mddev_t *mddev) | |||
| 417 | spin_unlock(&all_mddevs_lock); | 464 | spin_unlock(&all_mddevs_lock); |
| 418 | } | 465 | } |
| 419 | 466 | ||
| 420 | static void mddev_init(mddev_t *mddev) | 467 | void mddev_init(mddev_t *mddev) |
| 421 | { | 468 | { |
| 422 | mutex_init(&mddev->open_mutex); | 469 | mutex_init(&mddev->open_mutex); |
| 423 | mutex_init(&mddev->reconfig_mutex); | 470 | mutex_init(&mddev->reconfig_mutex); |
| @@ -437,6 +484,7 @@ static void mddev_init(mddev_t *mddev) | |||
| 437 | mddev->resync_max = MaxSector; | 484 | mddev->resync_max = MaxSector; |
| 438 | mddev->level = LEVEL_NONE; | 485 | mddev->level = LEVEL_NONE; |
| 439 | } | 486 | } |
| 487 | EXPORT_SYMBOL_GPL(mddev_init); | ||
| 440 | 488 | ||
| 441 | static mddev_t * mddev_find(dev_t unit) | 489 | static mddev_t * mddev_find(dev_t unit) |
| 442 | { | 490 | { |
| @@ -533,25 +581,31 @@ static void mddev_unlock(mddev_t * mddev) | |||
| 533 | * an access to the files will try to take reconfig_mutex | 581 | * an access to the files will try to take reconfig_mutex |
| 534 | * while holding the file unremovable, which leads to | 582 | * while holding the file unremovable, which leads to |
| 535 | * a deadlock. | 583 | * a deadlock. |
| 536 | * So hold open_mutex instead - we are allowed to take | 584 | * So hold set sysfs_active while the remove in happeing, |
| 537 | * it while holding reconfig_mutex, and md_run can | 585 | * and anything else which might set ->to_remove or my |
| 538 | * use it to wait for the remove to complete. | 586 | * otherwise change the sysfs namespace will fail with |
| 587 | * -EBUSY if sysfs_active is still set. | ||
| 588 | * We set sysfs_active under reconfig_mutex and elsewhere | ||
| 589 | * test it under the same mutex to ensure its correct value | ||
| 590 | * is seen. | ||
| 539 | */ | 591 | */ |
| 540 | struct attribute_group *to_remove = mddev->to_remove; | 592 | struct attribute_group *to_remove = mddev->to_remove; |
| 541 | mddev->to_remove = NULL; | 593 | mddev->to_remove = NULL; |
| 542 | mutex_lock(&mddev->open_mutex); | 594 | mddev->sysfs_active = 1; |
| 543 | mutex_unlock(&mddev->reconfig_mutex); | 595 | mutex_unlock(&mddev->reconfig_mutex); |
| 544 | 596 | ||
| 545 | if (to_remove != &md_redundancy_group) | 597 | if (mddev->kobj.sd) { |
| 546 | sysfs_remove_group(&mddev->kobj, to_remove); | 598 | if (to_remove != &md_redundancy_group) |
| 547 | if (mddev->pers == NULL || | 599 | sysfs_remove_group(&mddev->kobj, to_remove); |
| 548 | mddev->pers->sync_request == NULL) { | 600 | if (mddev->pers == NULL || |
| 549 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); | 601 | mddev->pers->sync_request == NULL) { |
| 550 | if (mddev->sysfs_action) | 602 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); |
| 551 | sysfs_put(mddev->sysfs_action); | 603 | if (mddev->sysfs_action) |
| 552 | mddev->sysfs_action = NULL; | 604 | sysfs_put(mddev->sysfs_action); |
| 605 | mddev->sysfs_action = NULL; | ||
| 606 | } | ||
| 553 | } | 607 | } |
| 554 | mutex_unlock(&mddev->open_mutex); | 608 | mddev->sysfs_active = 0; |
| 555 | } else | 609 | } else |
| 556 | mutex_unlock(&mddev->reconfig_mutex); | 610 | mutex_unlock(&mddev->reconfig_mutex); |
| 557 | 611 | ||
| @@ -1812,11 +1866,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
| 1812 | goto fail; | 1866 | goto fail; |
| 1813 | 1867 | ||
| 1814 | ko = &part_to_dev(rdev->bdev->bd_part)->kobj; | 1868 | ko = &part_to_dev(rdev->bdev->bd_part)->kobj; |
| 1815 | if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { | 1869 | if (sysfs_create_link(&rdev->kobj, ko, "block")) |
| 1816 | kobject_del(&rdev->kobj); | 1870 | /* failure here is OK */; |
| 1817 | goto fail; | 1871 | rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); |
| 1818 | } | ||
| 1819 | rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state"); | ||
| 1820 | 1872 | ||
| 1821 | list_add_rcu(&rdev->same_set, &mddev->disks); | 1873 | list_add_rcu(&rdev->same_set, &mddev->disks); |
| 1822 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); | 1874 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); |
| @@ -2335,8 +2387,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
| 2335 | set_bit(In_sync, &rdev->flags); | 2387 | set_bit(In_sync, &rdev->flags); |
| 2336 | err = 0; | 2388 | err = 0; |
| 2337 | } | 2389 | } |
| 2338 | if (!err && rdev->sysfs_state) | 2390 | if (!err) |
| 2339 | sysfs_notify_dirent(rdev->sysfs_state); | 2391 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
| 2340 | return err ? err : len; | 2392 | return err ? err : len; |
| 2341 | } | 2393 | } |
| 2342 | static struct rdev_sysfs_entry rdev_state = | 2394 | static struct rdev_sysfs_entry rdev_state = |
| @@ -2431,14 +2483,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
| 2431 | rdev->raid_disk = -1; | 2483 | rdev->raid_disk = -1; |
| 2432 | return err; | 2484 | return err; |
| 2433 | } else | 2485 | } else |
| 2434 | sysfs_notify_dirent(rdev->sysfs_state); | 2486 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
| 2435 | sprintf(nm, "rd%d", rdev->raid_disk); | 2487 | sprintf(nm, "rd%d", rdev->raid_disk); |
| 2436 | if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) | 2488 | if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) |
| 2437 | printk(KERN_WARNING | 2489 | /* failure here is OK */; |
| 2438 | "md: cannot register " | ||
| 2439 | "%s for %s\n", | ||
| 2440 | nm, mdname(rdev->mddev)); | ||
| 2441 | |||
| 2442 | /* don't wakeup anyone, leave that to userspace. */ | 2490 | /* don't wakeup anyone, leave that to userspace. */ |
| 2443 | } else { | 2491 | } else { |
| 2444 | if (slot >= rdev->mddev->raid_disks) | 2492 | if (slot >= rdev->mddev->raid_disks) |
| @@ -2448,7 +2496,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
| 2448 | clear_bit(Faulty, &rdev->flags); | 2496 | clear_bit(Faulty, &rdev->flags); |
| 2449 | clear_bit(WriteMostly, &rdev->flags); | 2497 | clear_bit(WriteMostly, &rdev->flags); |
| 2450 | set_bit(In_sync, &rdev->flags); | 2498 | set_bit(In_sync, &rdev->flags); |
| 2451 | sysfs_notify_dirent(rdev->sysfs_state); | 2499 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
| 2452 | } | 2500 | } |
| 2453 | return len; | 2501 | return len; |
| 2454 | } | 2502 | } |
| @@ -2696,6 +2744,24 @@ static struct kobj_type rdev_ktype = { | |||
| 2696 | .default_attrs = rdev_default_attrs, | 2744 | .default_attrs = rdev_default_attrs, |
| 2697 | }; | 2745 | }; |
| 2698 | 2746 | ||
| 2747 | void md_rdev_init(mdk_rdev_t *rdev) | ||
| 2748 | { | ||
| 2749 | rdev->desc_nr = -1; | ||
| 2750 | rdev->saved_raid_disk = -1; | ||
| 2751 | rdev->raid_disk = -1; | ||
| 2752 | rdev->flags = 0; | ||
| 2753 | rdev->data_offset = 0; | ||
| 2754 | rdev->sb_events = 0; | ||
| 2755 | rdev->last_read_error.tv_sec = 0; | ||
| 2756 | rdev->last_read_error.tv_nsec = 0; | ||
| 2757 | atomic_set(&rdev->nr_pending, 0); | ||
| 2758 | atomic_set(&rdev->read_errors, 0); | ||
| 2759 | atomic_set(&rdev->corrected_errors, 0); | ||
| 2760 | |||
| 2761 | INIT_LIST_HEAD(&rdev->same_set); | ||
| 2762 | init_waitqueue_head(&rdev->blocked_wait); | ||
| 2763 | } | ||
| 2764 | EXPORT_SYMBOL_GPL(md_rdev_init); | ||
| 2699 | /* | 2765 | /* |
| 2700 | * Import a device. If 'super_format' >= 0, then sanity check the superblock | 2766 | * Import a device. If 'super_format' >= 0, then sanity check the superblock |
| 2701 | * | 2767 | * |
| @@ -2719,6 +2785,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
| 2719 | return ERR_PTR(-ENOMEM); | 2785 | return ERR_PTR(-ENOMEM); |
| 2720 | } | 2786 | } |
| 2721 | 2787 | ||
| 2788 | md_rdev_init(rdev); | ||
| 2722 | if ((err = alloc_disk_sb(rdev))) | 2789 | if ((err = alloc_disk_sb(rdev))) |
| 2723 | goto abort_free; | 2790 | goto abort_free; |
| 2724 | 2791 | ||
| @@ -2728,18 +2795,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
| 2728 | 2795 | ||
| 2729 | kobject_init(&rdev->kobj, &rdev_ktype); | 2796 | kobject_init(&rdev->kobj, &rdev_ktype); |
| 2730 | 2797 | ||
| 2731 | rdev->desc_nr = -1; | ||
| 2732 | rdev->saved_raid_disk = -1; | ||
| 2733 | rdev->raid_disk = -1; | ||
| 2734 | rdev->flags = 0; | ||
| 2735 | rdev->data_offset = 0; | ||
| 2736 | rdev->sb_events = 0; | ||
| 2737 | rdev->last_read_error.tv_sec = 0; | ||
| 2738 | rdev->last_read_error.tv_nsec = 0; | ||
| 2739 | atomic_set(&rdev->nr_pending, 0); | ||
| 2740 | atomic_set(&rdev->read_errors, 0); | ||
| 2741 | atomic_set(&rdev->corrected_errors, 0); | ||
| 2742 | |||
| 2743 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; | 2798 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; |
| 2744 | if (!size) { | 2799 | if (!size) { |
| 2745 | printk(KERN_WARNING | 2800 | printk(KERN_WARNING |
| @@ -2768,9 +2823,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
| 2768 | } | 2823 | } |
| 2769 | } | 2824 | } |
| 2770 | 2825 | ||
| 2771 | INIT_LIST_HEAD(&rdev->same_set); | ||
| 2772 | init_waitqueue_head(&rdev->blocked_wait); | ||
| 2773 | |||
| 2774 | return rdev; | 2826 | return rdev; |
| 2775 | 2827 | ||
| 2776 | abort_free: | 2828 | abort_free: |
| @@ -2961,7 +3013,9 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2961 | * - new personality will access other array. | 3013 | * - new personality will access other array. |
| 2962 | */ | 3014 | */ |
| 2963 | 3015 | ||
| 2964 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) | 3016 | if (mddev->sync_thread || |
| 3017 | mddev->reshape_position != MaxSector || | ||
| 3018 | mddev->sysfs_active) | ||
| 2965 | return -EBUSY; | 3019 | return -EBUSY; |
| 2966 | 3020 | ||
| 2967 | if (!mddev->pers->quiesce) { | 3021 | if (!mddev->pers->quiesce) { |
| @@ -3438,7 +3492,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3438 | if (err) | 3492 | if (err) |
| 3439 | return err; | 3493 | return err; |
| 3440 | else { | 3494 | else { |
| 3441 | sysfs_notify_dirent(mddev->sysfs_state); | 3495 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 3442 | return len; | 3496 | return len; |
| 3443 | } | 3497 | } |
| 3444 | } | 3498 | } |
| @@ -3736,7 +3790,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) | |||
| 3736 | } | 3790 | } |
| 3737 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3791 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| 3738 | md_wakeup_thread(mddev->thread); | 3792 | md_wakeup_thread(mddev->thread); |
| 3739 | sysfs_notify_dirent(mddev->sysfs_action); | 3793 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
| 3740 | return len; | 3794 | return len; |
| 3741 | } | 3795 | } |
| 3742 | 3796 | ||
| @@ -4282,13 +4336,14 @@ static int md_alloc(dev_t dev, char *name) | |||
| 4282 | disk->disk_name); | 4336 | disk->disk_name); |
| 4283 | error = 0; | 4337 | error = 0; |
| 4284 | } | 4338 | } |
| 4285 | if (sysfs_create_group(&mddev->kobj, &md_bitmap_group)) | 4339 | if (mddev->kobj.sd && |
| 4340 | sysfs_create_group(&mddev->kobj, &md_bitmap_group)) | ||
| 4286 | printk(KERN_DEBUG "pointless warning\n"); | 4341 | printk(KERN_DEBUG "pointless warning\n"); |
| 4287 | abort: | 4342 | abort: |
| 4288 | mutex_unlock(&disks_mutex); | 4343 | mutex_unlock(&disks_mutex); |
| 4289 | if (!error) { | 4344 | if (!error && mddev->kobj.sd) { |
| 4290 | kobject_uevent(&mddev->kobj, KOBJ_ADD); | 4345 | kobject_uevent(&mddev->kobj, KOBJ_ADD); |
| 4291 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state"); | 4346 | mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); |
| 4292 | } | 4347 | } |
| 4293 | mddev_put(mddev); | 4348 | mddev_put(mddev); |
| 4294 | return error; | 4349 | return error; |
| @@ -4326,14 +4381,14 @@ static void md_safemode_timeout(unsigned long data) | |||
| 4326 | if (!atomic_read(&mddev->writes_pending)) { | 4381 | if (!atomic_read(&mddev->writes_pending)) { |
| 4327 | mddev->safemode = 1; | 4382 | mddev->safemode = 1; |
| 4328 | if (mddev->external) | 4383 | if (mddev->external) |
| 4329 | sysfs_notify_dirent(mddev->sysfs_state); | 4384 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 4330 | } | 4385 | } |
| 4331 | md_wakeup_thread(mddev->thread); | 4386 | md_wakeup_thread(mddev->thread); |
| 4332 | } | 4387 | } |
| 4333 | 4388 | ||
| 4334 | static int start_dirty_degraded; | 4389 | static int start_dirty_degraded; |
| 4335 | 4390 | ||
| 4336 | static int md_run(mddev_t *mddev) | 4391 | int md_run(mddev_t *mddev) |
| 4337 | { | 4392 | { |
| 4338 | int err; | 4393 | int err; |
| 4339 | mdk_rdev_t *rdev; | 4394 | mdk_rdev_t *rdev; |
| @@ -4345,13 +4400,9 @@ static int md_run(mddev_t *mddev) | |||
| 4345 | 4400 | ||
| 4346 | if (mddev->pers) | 4401 | if (mddev->pers) |
| 4347 | return -EBUSY; | 4402 | return -EBUSY; |
| 4348 | 4403 | /* Cannot run until previous stop completes properly */ | |
| 4349 | /* These two calls synchronise us with the | 4404 | if (mddev->sysfs_active) |
| 4350 | * sysfs_remove_group calls in mddev_unlock, | 4405 | return -EBUSY; |
| 4351 | * so they must have completed. | ||
| 4352 | */ | ||
| 4353 | mutex_lock(&mddev->open_mutex); | ||
| 4354 | mutex_unlock(&mddev->open_mutex); | ||
| 4355 | 4406 | ||
| 4356 | /* | 4407 | /* |
| 4357 | * Analyze all RAID superblock(s) | 4408 | * Analyze all RAID superblock(s) |
| @@ -4398,7 +4449,7 @@ static int md_run(mddev_t *mddev) | |||
| 4398 | return -EINVAL; | 4449 | return -EINVAL; |
| 4399 | } | 4450 | } |
| 4400 | } | 4451 | } |
| 4401 | sysfs_notify_dirent(rdev->sysfs_state); | 4452 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
| 4402 | } | 4453 | } |
| 4403 | 4454 | ||
| 4404 | spin_lock(&pers_lock); | 4455 | spin_lock(&pers_lock); |
| @@ -4497,11 +4548,12 @@ static int md_run(mddev_t *mddev) | |||
| 4497 | return err; | 4548 | return err; |
| 4498 | } | 4549 | } |
| 4499 | if (mddev->pers->sync_request) { | 4550 | if (mddev->pers->sync_request) { |
| 4500 | if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | 4551 | if (mddev->kobj.sd && |
| 4552 | sysfs_create_group(&mddev->kobj, &md_redundancy_group)) | ||
| 4501 | printk(KERN_WARNING | 4553 | printk(KERN_WARNING |
| 4502 | "md: cannot register extra attributes for %s\n", | 4554 | "md: cannot register extra attributes for %s\n", |
| 4503 | mdname(mddev)); | 4555 | mdname(mddev)); |
| 4504 | mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action"); | 4556 | mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action"); |
| 4505 | } else if (mddev->ro == 2) /* auto-readonly not meaningful */ | 4557 | } else if (mddev->ro == 2) /* auto-readonly not meaningful */ |
| 4506 | mddev->ro = 0; | 4558 | mddev->ro = 0; |
| 4507 | 4559 | ||
| @@ -4519,8 +4571,7 @@ static int md_run(mddev_t *mddev) | |||
| 4519 | char nm[20]; | 4571 | char nm[20]; |
| 4520 | sprintf(nm, "rd%d", rdev->raid_disk); | 4572 | sprintf(nm, "rd%d", rdev->raid_disk); |
| 4521 | if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) | 4573 | if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) |
| 4522 | printk("md: cannot register %s for %s\n", | 4574 | /* failure here is OK */; |
| 4523 | nm, mdname(mddev)); | ||
| 4524 | } | 4575 | } |
| 4525 | 4576 | ||
| 4526 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4577 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| @@ -4532,12 +4583,12 @@ static int md_run(mddev_t *mddev) | |||
| 4532 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4583 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
| 4533 | 4584 | ||
| 4534 | md_new_event(mddev); | 4585 | md_new_event(mddev); |
| 4535 | sysfs_notify_dirent(mddev->sysfs_state); | 4586 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 4536 | if (mddev->sysfs_action) | 4587 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
| 4537 | sysfs_notify_dirent(mddev->sysfs_action); | ||
| 4538 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | 4588 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
| 4539 | return 0; | 4589 | return 0; |
| 4540 | } | 4590 | } |
| 4591 | EXPORT_SYMBOL_GPL(md_run); | ||
| 4541 | 4592 | ||
| 4542 | static int do_md_run(mddev_t *mddev) | 4593 | static int do_md_run(mddev_t *mddev) |
| 4543 | { | 4594 | { |
| @@ -4546,7 +4597,11 @@ static int do_md_run(mddev_t *mddev) | |||
| 4546 | err = md_run(mddev); | 4597 | err = md_run(mddev); |
| 4547 | if (err) | 4598 | if (err) |
| 4548 | goto out; | 4599 | goto out; |
| 4549 | 4600 | err = bitmap_load(mddev); | |
| 4601 | if (err) { | ||
| 4602 | bitmap_destroy(mddev); | ||
| 4603 | goto out; | ||
| 4604 | } | ||
| 4550 | set_capacity(mddev->gendisk, mddev->array_sectors); | 4605 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 4551 | revalidate_disk(mddev->gendisk); | 4606 | revalidate_disk(mddev->gendisk); |
| 4552 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 4607 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
| @@ -4574,7 +4629,7 @@ static int restart_array(mddev_t *mddev) | |||
| 4574 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4629 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| 4575 | md_wakeup_thread(mddev->thread); | 4630 | md_wakeup_thread(mddev->thread); |
| 4576 | md_wakeup_thread(mddev->sync_thread); | 4631 | md_wakeup_thread(mddev->sync_thread); |
| 4577 | sysfs_notify_dirent(mddev->sysfs_state); | 4632 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 4578 | return 0; | 4633 | return 0; |
| 4579 | } | 4634 | } |
| 4580 | 4635 | ||
| @@ -4645,9 +4700,10 @@ static void md_clean(mddev_t *mddev) | |||
| 4645 | mddev->bitmap_info.chunksize = 0; | 4700 | mddev->bitmap_info.chunksize = 0; |
| 4646 | mddev->bitmap_info.daemon_sleep = 0; | 4701 | mddev->bitmap_info.daemon_sleep = 0; |
| 4647 | mddev->bitmap_info.max_write_behind = 0; | 4702 | mddev->bitmap_info.max_write_behind = 0; |
| 4703 | mddev->plug = NULL; | ||
| 4648 | } | 4704 | } |
| 4649 | 4705 | ||
| 4650 | static void md_stop_writes(mddev_t *mddev) | 4706 | void md_stop_writes(mddev_t *mddev) |
| 4651 | { | 4707 | { |
| 4652 | if (mddev->sync_thread) { | 4708 | if (mddev->sync_thread) { |
| 4653 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4709 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
| @@ -4667,11 +4723,10 @@ static void md_stop_writes(mddev_t *mddev) | |||
| 4667 | md_update_sb(mddev, 1); | 4723 | md_update_sb(mddev, 1); |
| 4668 | } | 4724 | } |
| 4669 | } | 4725 | } |
| 4726 | EXPORT_SYMBOL_GPL(md_stop_writes); | ||
| 4670 | 4727 | ||
| 4671 | static void md_stop(mddev_t *mddev) | 4728 | void md_stop(mddev_t *mddev) |
| 4672 | { | 4729 | { |
| 4673 | md_stop_writes(mddev); | ||
| 4674 | |||
| 4675 | mddev->pers->stop(mddev); | 4730 | mddev->pers->stop(mddev); |
| 4676 | if (mddev->pers->sync_request && mddev->to_remove == NULL) | 4731 | if (mddev->pers->sync_request && mddev->to_remove == NULL) |
| 4677 | mddev->to_remove = &md_redundancy_group; | 4732 | mddev->to_remove = &md_redundancy_group; |
| @@ -4679,6 +4734,7 @@ static void md_stop(mddev_t *mddev) | |||
| 4679 | mddev->pers = NULL; | 4734 | mddev->pers = NULL; |
| 4680 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4735 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
| 4681 | } | 4736 | } |
| 4737 | EXPORT_SYMBOL_GPL(md_stop); | ||
| 4682 | 4738 | ||
| 4683 | static int md_set_readonly(mddev_t *mddev, int is_open) | 4739 | static int md_set_readonly(mddev_t *mddev, int is_open) |
| 4684 | { | 4740 | { |
| @@ -4698,7 +4754,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open) | |||
| 4698 | mddev->ro = 1; | 4754 | mddev->ro = 1; |
| 4699 | set_disk_ro(mddev->gendisk, 1); | 4755 | set_disk_ro(mddev->gendisk, 1); |
| 4700 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4756 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
| 4701 | sysfs_notify_dirent(mddev->sysfs_state); | 4757 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 4702 | err = 0; | 4758 | err = 0; |
| 4703 | } | 4759 | } |
| 4704 | out: | 4760 | out: |
| @@ -4712,26 +4768,29 @@ out: | |||
| 4712 | */ | 4768 | */ |
| 4713 | static int do_md_stop(mddev_t * mddev, int mode, int is_open) | 4769 | static int do_md_stop(mddev_t * mddev, int mode, int is_open) |
| 4714 | { | 4770 | { |
| 4715 | int err = 0; | ||
| 4716 | struct gendisk *disk = mddev->gendisk; | 4771 | struct gendisk *disk = mddev->gendisk; |
| 4717 | mdk_rdev_t *rdev; | 4772 | mdk_rdev_t *rdev; |
| 4718 | 4773 | ||
| 4719 | mutex_lock(&mddev->open_mutex); | 4774 | mutex_lock(&mddev->open_mutex); |
| 4720 | if (atomic_read(&mddev->openers) > is_open) { | 4775 | if (atomic_read(&mddev->openers) > is_open || |
| 4776 | mddev->sysfs_active) { | ||
| 4721 | printk("md: %s still in use.\n",mdname(mddev)); | 4777 | printk("md: %s still in use.\n",mdname(mddev)); |
| 4722 | err = -EBUSY; | 4778 | mutex_unlock(&mddev->open_mutex); |
| 4723 | } else if (mddev->pers) { | 4779 | return -EBUSY; |
| 4780 | } | ||
| 4724 | 4781 | ||
| 4782 | if (mddev->pers) { | ||
| 4725 | if (mddev->ro) | 4783 | if (mddev->ro) |
| 4726 | set_disk_ro(disk, 0); | 4784 | set_disk_ro(disk, 0); |
| 4727 | 4785 | ||
| 4786 | md_stop_writes(mddev); | ||
| 4728 | md_stop(mddev); | 4787 | md_stop(mddev); |
| 4729 | mddev->queue->merge_bvec_fn = NULL; | 4788 | mddev->queue->merge_bvec_fn = NULL; |
| 4730 | mddev->queue->unplug_fn = NULL; | 4789 | mddev->queue->unplug_fn = NULL; |
| 4731 | mddev->queue->backing_dev_info.congested_fn = NULL; | 4790 | mddev->queue->backing_dev_info.congested_fn = NULL; |
| 4732 | 4791 | ||
| 4733 | /* tell userspace to handle 'inactive' */ | 4792 | /* tell userspace to handle 'inactive' */ |
| 4734 | sysfs_notify_dirent(mddev->sysfs_state); | 4793 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 4735 | 4794 | ||
| 4736 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4795 | list_for_each_entry(rdev, &mddev->disks, same_set) |
| 4737 | if (rdev->raid_disk >= 0) { | 4796 | if (rdev->raid_disk >= 0) { |
| @@ -4741,21 +4800,17 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
| 4741 | } | 4800 | } |
| 4742 | 4801 | ||
| 4743 | set_capacity(disk, 0); | 4802 | set_capacity(disk, 0); |
| 4803 | mutex_unlock(&mddev->open_mutex); | ||
| 4744 | revalidate_disk(disk); | 4804 | revalidate_disk(disk); |
| 4745 | 4805 | ||
| 4746 | if (mddev->ro) | 4806 | if (mddev->ro) |
| 4747 | mddev->ro = 0; | 4807 | mddev->ro = 0; |
| 4748 | 4808 | } else | |
| 4749 | err = 0; | 4809 | mutex_unlock(&mddev->open_mutex); |
| 4750 | } | ||
| 4751 | mutex_unlock(&mddev->open_mutex); | ||
| 4752 | if (err) | ||
| 4753 | return err; | ||
| 4754 | /* | 4810 | /* |
| 4755 | * Free resources if final stop | 4811 | * Free resources if final stop |
| 4756 | */ | 4812 | */ |
| 4757 | if (mode == 0) { | 4813 | if (mode == 0) { |
| 4758 | |||
| 4759 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); | 4814 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); |
| 4760 | 4815 | ||
| 4761 | bitmap_destroy(mddev); | 4816 | bitmap_destroy(mddev); |
| @@ -4772,13 +4827,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
| 4772 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); | 4827 | kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); |
| 4773 | if (mddev->hold_active == UNTIL_STOP) | 4828 | if (mddev->hold_active == UNTIL_STOP) |
| 4774 | mddev->hold_active = 0; | 4829 | mddev->hold_active = 0; |
| 4775 | |||
| 4776 | } | 4830 | } |
| 4777 | err = 0; | ||
| 4778 | blk_integrity_unregister(disk); | 4831 | blk_integrity_unregister(disk); |
| 4779 | md_new_event(mddev); | 4832 | md_new_event(mddev); |
| 4780 | sysfs_notify_dirent(mddev->sysfs_state); | 4833 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 4781 | return err; | 4834 | return 0; |
| 4782 | } | 4835 | } |
| 4783 | 4836 | ||
| 4784 | #ifndef MODULE | 4837 | #ifndef MODULE |
| @@ -5139,7 +5192,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
| 5139 | if (err) | 5192 | if (err) |
| 5140 | export_rdev(rdev); | 5193 | export_rdev(rdev); |
| 5141 | else | 5194 | else |
| 5142 | sysfs_notify_dirent(rdev->sysfs_state); | 5195 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
| 5143 | 5196 | ||
| 5144 | md_update_sb(mddev, 1); | 5197 | md_update_sb(mddev, 1); |
| 5145 | if (mddev->degraded) | 5198 | if (mddev->degraded) |
| @@ -5332,8 +5385,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
| 5332 | err = 0; | 5385 | err = 0; |
| 5333 | if (mddev->pers) { | 5386 | if (mddev->pers) { |
| 5334 | mddev->pers->quiesce(mddev, 1); | 5387 | mddev->pers->quiesce(mddev, 1); |
| 5335 | if (fd >= 0) | 5388 | if (fd >= 0) { |
| 5336 | err = bitmap_create(mddev); | 5389 | err = bitmap_create(mddev); |
| 5390 | if (!err) | ||
| 5391 | err = bitmap_load(mddev); | ||
| 5392 | } | ||
| 5337 | if (fd < 0 || err) { | 5393 | if (fd < 0 || err) { |
| 5338 | bitmap_destroy(mddev); | 5394 | bitmap_destroy(mddev); |
| 5339 | fd = -1; /* make sure to put the file */ | 5395 | fd = -1; /* make sure to put the file */ |
| @@ -5582,6 +5638,8 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 5582 | mddev->bitmap_info.default_offset; | 5638 | mddev->bitmap_info.default_offset; |
| 5583 | mddev->pers->quiesce(mddev, 1); | 5639 | mddev->pers->quiesce(mddev, 1); |
| 5584 | rv = bitmap_create(mddev); | 5640 | rv = bitmap_create(mddev); |
| 5641 | if (!rv) | ||
| 5642 | rv = bitmap_load(mddev); | ||
| 5585 | if (rv) | 5643 | if (rv) |
| 5586 | bitmap_destroy(mddev); | 5644 | bitmap_destroy(mddev); |
| 5587 | mddev->pers->quiesce(mddev, 0); | 5645 | mddev->pers->quiesce(mddev, 0); |
| @@ -5814,7 +5872,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
| 5814 | if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { | 5872 | if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { |
| 5815 | if (mddev->ro == 2) { | 5873 | if (mddev->ro == 2) { |
| 5816 | mddev->ro = 0; | 5874 | mddev->ro = 0; |
| 5817 | sysfs_notify_dirent(mddev->sysfs_state); | 5875 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 5818 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5876 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| 5819 | md_wakeup_thread(mddev->thread); | 5877 | md_wakeup_thread(mddev->thread); |
| 5820 | } else { | 5878 | } else { |
| @@ -6065,10 +6123,12 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 6065 | mddev->pers->error_handler(mddev,rdev); | 6123 | mddev->pers->error_handler(mddev,rdev); |
| 6066 | if (mddev->degraded) | 6124 | if (mddev->degraded) |
| 6067 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | 6125 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); |
| 6068 | sysfs_notify_dirent(rdev->sysfs_state); | 6126 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
| 6069 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 6127 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
| 6070 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 6128 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| 6071 | md_wakeup_thread(mddev->thread); | 6129 | md_wakeup_thread(mddev->thread); |
| 6130 | if (mddev->event_work.func) | ||
| 6131 | schedule_work(&mddev->event_work); | ||
| 6072 | md_new_event_inintr(mddev); | 6132 | md_new_event_inintr(mddev); |
| 6073 | } | 6133 | } |
| 6074 | 6134 | ||
| @@ -6526,7 +6586,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
| 6526 | spin_unlock_irq(&mddev->write_lock); | 6586 | spin_unlock_irq(&mddev->write_lock); |
| 6527 | } | 6587 | } |
| 6528 | if (did_change) | 6588 | if (did_change) |
| 6529 | sysfs_notify_dirent(mddev->sysfs_state); | 6589 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 6530 | wait_event(mddev->sb_wait, | 6590 | wait_event(mddev->sb_wait, |
| 6531 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && | 6591 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && |
| 6532 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); | 6592 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); |
| @@ -6569,7 +6629,7 @@ int md_allow_write(mddev_t *mddev) | |||
| 6569 | mddev->safemode = 1; | 6629 | mddev->safemode = 1; |
| 6570 | spin_unlock_irq(&mddev->write_lock); | 6630 | spin_unlock_irq(&mddev->write_lock); |
| 6571 | md_update_sb(mddev, 0); | 6631 | md_update_sb(mddev, 0); |
| 6572 | sysfs_notify_dirent(mddev->sysfs_state); | 6632 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 6573 | } else | 6633 | } else |
| 6574 | spin_unlock_irq(&mddev->write_lock); | 6634 | spin_unlock_irq(&mddev->write_lock); |
| 6575 | 6635 | ||
| @@ -6580,6 +6640,14 @@ int md_allow_write(mddev_t *mddev) | |||
| 6580 | } | 6640 | } |
| 6581 | EXPORT_SYMBOL_GPL(md_allow_write); | 6641 | EXPORT_SYMBOL_GPL(md_allow_write); |
| 6582 | 6642 | ||
| 6643 | void md_unplug(mddev_t *mddev) | ||
| 6644 | { | ||
| 6645 | if (mddev->queue) | ||
| 6646 | blk_unplug(mddev->queue); | ||
| 6647 | if (mddev->plug) | ||
| 6648 | mddev->plug->unplug_fn(mddev->plug); | ||
| 6649 | } | ||
| 6650 | |||
| 6583 | #define SYNC_MARKS 10 | 6651 | #define SYNC_MARKS 10 |
| 6584 | #define SYNC_MARK_STEP (3*HZ) | 6652 | #define SYNC_MARK_STEP (3*HZ) |
| 6585 | void md_do_sync(mddev_t *mddev) | 6653 | void md_do_sync(mddev_t *mddev) |
| @@ -6758,12 +6826,13 @@ void md_do_sync(mddev_t *mddev) | |||
| 6758 | >= mddev->resync_max - mddev->curr_resync_completed | 6826 | >= mddev->resync_max - mddev->curr_resync_completed |
| 6759 | )) { | 6827 | )) { |
| 6760 | /* time to update curr_resync_completed */ | 6828 | /* time to update curr_resync_completed */ |
| 6761 | blk_unplug(mddev->queue); | 6829 | md_unplug(mddev); |
| 6762 | wait_event(mddev->recovery_wait, | 6830 | wait_event(mddev->recovery_wait, |
| 6763 | atomic_read(&mddev->recovery_active) == 0); | 6831 | atomic_read(&mddev->recovery_active) == 0); |
| 6764 | mddev->curr_resync_completed = | 6832 | mddev->curr_resync_completed = |
| 6765 | mddev->curr_resync; | 6833 | mddev->curr_resync; |
| 6766 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 6834 | if (mddev->persistent) |
| 6835 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
| 6767 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 6836 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
| 6768 | } | 6837 | } |
| 6769 | 6838 | ||
| @@ -6835,7 +6904,7 @@ void md_do_sync(mddev_t *mddev) | |||
| 6835 | * about not overloading the IO subsystem. (things like an | 6904 | * about not overloading the IO subsystem. (things like an |
| 6836 | * e2fsck being done on the RAID array should execute fast) | 6905 | * e2fsck being done on the RAID array should execute fast) |
| 6837 | */ | 6906 | */ |
| 6838 | blk_unplug(mddev->queue); | 6907 | md_unplug(mddev); |
| 6839 | cond_resched(); | 6908 | cond_resched(); |
| 6840 | 6909 | ||
| 6841 | currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 | 6910 | currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 |
| @@ -6854,7 +6923,7 @@ void md_do_sync(mddev_t *mddev) | |||
| 6854 | * this also signals 'finished resyncing' to md_stop | 6923 | * this also signals 'finished resyncing' to md_stop |
| 6855 | */ | 6924 | */ |
| 6856 | out: | 6925 | out: |
| 6857 | blk_unplug(mddev->queue); | 6926 | md_unplug(mddev); |
| 6858 | 6927 | ||
| 6859 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); | 6928 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); |
| 6860 | 6929 | ||
| @@ -6956,10 +7025,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
| 6956 | sprintf(nm, "rd%d", rdev->raid_disk); | 7025 | sprintf(nm, "rd%d", rdev->raid_disk); |
| 6957 | if (sysfs_create_link(&mddev->kobj, | 7026 | if (sysfs_create_link(&mddev->kobj, |
| 6958 | &rdev->kobj, nm)) | 7027 | &rdev->kobj, nm)) |
| 6959 | printk(KERN_WARNING | 7028 | /* failure here is OK */; |
| 6960 | "md: cannot register " | ||
| 6961 | "%s for %s\n", | ||
| 6962 | nm, mdname(mddev)); | ||
| 6963 | spares++; | 7029 | spares++; |
| 6964 | md_new_event(mddev); | 7030 | md_new_event(mddev); |
| 6965 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 7031 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
| @@ -7052,7 +7118,7 @@ void md_check_recovery(mddev_t *mddev) | |||
| 7052 | mddev->safemode = 0; | 7118 | mddev->safemode = 0; |
| 7053 | spin_unlock_irq(&mddev->write_lock); | 7119 | spin_unlock_irq(&mddev->write_lock); |
| 7054 | if (did_change) | 7120 | if (did_change) |
| 7055 | sysfs_notify_dirent(mddev->sysfs_state); | 7121 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
| 7056 | } | 7122 | } |
| 7057 | 7123 | ||
| 7058 | if (mddev->flags) | 7124 | if (mddev->flags) |
| @@ -7091,7 +7157,7 @@ void md_check_recovery(mddev_t *mddev) | |||
| 7091 | mddev->recovery = 0; | 7157 | mddev->recovery = 0; |
| 7092 | /* flag recovery needed just to double check */ | 7158 | /* flag recovery needed just to double check */ |
| 7093 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 7159 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| 7094 | sysfs_notify_dirent(mddev->sysfs_action); | 7160 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
| 7095 | md_new_event(mddev); | 7161 | md_new_event(mddev); |
| 7096 | goto unlock; | 7162 | goto unlock; |
| 7097 | } | 7163 | } |
| @@ -7153,7 +7219,7 @@ void md_check_recovery(mddev_t *mddev) | |||
| 7153 | mddev->recovery = 0; | 7219 | mddev->recovery = 0; |
| 7154 | } else | 7220 | } else |
| 7155 | md_wakeup_thread(mddev->sync_thread); | 7221 | md_wakeup_thread(mddev->sync_thread); |
| 7156 | sysfs_notify_dirent(mddev->sysfs_action); | 7222 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
| 7157 | md_new_event(mddev); | 7223 | md_new_event(mddev); |
| 7158 | } | 7224 | } |
| 7159 | unlock: | 7225 | unlock: |
| @@ -7162,7 +7228,7 @@ void md_check_recovery(mddev_t *mddev) | |||
| 7162 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, | 7228 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, |
| 7163 | &mddev->recovery)) | 7229 | &mddev->recovery)) |
| 7164 | if (mddev->sysfs_action) | 7230 | if (mddev->sysfs_action) |
| 7165 | sysfs_notify_dirent(mddev->sysfs_action); | 7231 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
| 7166 | } | 7232 | } |
| 7167 | mddev_unlock(mddev); | 7233 | mddev_unlock(mddev); |
| 7168 | } | 7234 | } |
| @@ -7170,7 +7236,7 @@ void md_check_recovery(mddev_t *mddev) | |||
| 7170 | 7236 | ||
| 7171 | void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | 7237 | void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) |
| 7172 | { | 7238 | { |
| 7173 | sysfs_notify_dirent(rdev->sysfs_state); | 7239 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
| 7174 | wait_event_timeout(rdev->blocked_wait, | 7240 | wait_event_timeout(rdev->blocked_wait, |
| 7175 | !test_bit(Blocked, &rdev->flags), | 7241 | !test_bit(Blocked, &rdev->flags), |
| 7176 | msecs_to_jiffies(5000)); | 7242 | msecs_to_jiffies(5000)); |
diff --git a/drivers/md/md.h b/drivers/md/md.h index fc56e0f21c80..a953fe2808ae 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -29,6 +29,26 @@ | |||
| 29 | typedef struct mddev_s mddev_t; | 29 | typedef struct mddev_s mddev_t; |
| 30 | typedef struct mdk_rdev_s mdk_rdev_t; | 30 | typedef struct mdk_rdev_s mdk_rdev_t; |
| 31 | 31 | ||
| 32 | /* generic plugging support - like that provided with request_queue, | ||
| 33 | * but does not require a request_queue | ||
| 34 | */ | ||
| 35 | struct plug_handle { | ||
| 36 | void (*unplug_fn)(struct plug_handle *); | ||
| 37 | struct timer_list unplug_timer; | ||
| 38 | struct work_struct unplug_work; | ||
| 39 | unsigned long unplug_flag; | ||
| 40 | }; | ||
| 41 | #define PLUGGED_FLAG 1 | ||
| 42 | void plugger_init(struct plug_handle *plug, | ||
| 43 | void (*unplug_fn)(struct plug_handle *)); | ||
| 44 | void plugger_set_plug(struct plug_handle *plug); | ||
| 45 | int plugger_remove_plug(struct plug_handle *plug); | ||
| 46 | static inline void plugger_flush(struct plug_handle *plug) | ||
| 47 | { | ||
| 48 | del_timer_sync(&plug->unplug_timer); | ||
| 49 | cancel_work_sync(&plug->unplug_work); | ||
| 50 | } | ||
| 51 | |||
| 32 | /* | 52 | /* |
| 33 | * MD's 'extended' device | 53 | * MD's 'extended' device |
| 34 | */ | 54 | */ |
| @@ -125,6 +145,10 @@ struct mddev_s | |||
| 125 | int suspended; | 145 | int suspended; |
| 126 | atomic_t active_io; | 146 | atomic_t active_io; |
| 127 | int ro; | 147 | int ro; |
| 148 | int sysfs_active; /* set when sysfs deletes | ||
| 149 | * are happening, so run/ | ||
| 150 | * takeover/stop are not safe | ||
| 151 | */ | ||
| 128 | 152 | ||
| 129 | struct gendisk *gendisk; | 153 | struct gendisk *gendisk; |
| 130 | 154 | ||
| @@ -297,9 +321,14 @@ struct mddev_s | |||
| 297 | * hot-adding a bitmap. It should | 321 | * hot-adding a bitmap. It should |
| 298 | * eventually be settable by sysfs. | 322 | * eventually be settable by sysfs. |
| 299 | */ | 323 | */ |
| 324 | /* When md is serving under dm, it might use a | ||
| 325 | * dirty_log to store the bits. | ||
| 326 | */ | ||
| 327 | struct dm_dirty_log *log; | ||
| 328 | |||
| 300 | struct mutex mutex; | 329 | struct mutex mutex; |
| 301 | unsigned long chunksize; | 330 | unsigned long chunksize; |
| 302 | unsigned long daemon_sleep; /* how many seconds between updates? */ | 331 | unsigned long daemon_sleep; /* how many jiffies between updates? */ |
| 303 | unsigned long max_write_behind; /* write-behind mode */ | 332 | unsigned long max_write_behind; /* write-behind mode */ |
| 304 | int external; | 333 | int external; |
| 305 | } bitmap_info; | 334 | } bitmap_info; |
| @@ -308,6 +337,8 @@ struct mddev_s | |||
| 308 | struct list_head all_mddevs; | 337 | struct list_head all_mddevs; |
| 309 | 338 | ||
| 310 | struct attribute_group *to_remove; | 339 | struct attribute_group *to_remove; |
| 340 | struct plug_handle *plug; /* if used by personality */ | ||
| 341 | |||
| 311 | /* Generic barrier handling. | 342 | /* Generic barrier handling. |
| 312 | * If there is a pending barrier request, all other | 343 | * If there is a pending barrier request, all other |
| 313 | * writes are blocked while the devices are flushed. | 344 | * writes are blocked while the devices are flushed. |
| @@ -318,6 +349,7 @@ struct mddev_s | |||
| 318 | struct bio *barrier; | 349 | struct bio *barrier; |
| 319 | atomic_t flush_pending; | 350 | atomic_t flush_pending; |
| 320 | struct work_struct barrier_work; | 351 | struct work_struct barrier_work; |
| 352 | struct work_struct event_work; /* used by dm to report failure event */ | ||
| 321 | }; | 353 | }; |
| 322 | 354 | ||
| 323 | 355 | ||
| @@ -382,6 +414,18 @@ struct md_sysfs_entry { | |||
| 382 | }; | 414 | }; |
| 383 | extern struct attribute_group md_bitmap_group; | 415 | extern struct attribute_group md_bitmap_group; |
| 384 | 416 | ||
| 417 | static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name) | ||
| 418 | { | ||
| 419 | if (sd) | ||
| 420 | return sysfs_get_dirent(sd, NULL, name); | ||
| 421 | return sd; | ||
| 422 | } | ||
| 423 | static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd) | ||
| 424 | { | ||
| 425 | if (sd) | ||
| 426 | sysfs_notify_dirent(sd); | ||
| 427 | } | ||
| 428 | |||
| 385 | static inline char * mdname (mddev_t * mddev) | 429 | static inline char * mdname (mddev_t * mddev) |
| 386 | { | 430 | { |
| 387 | return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; | 431 | return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; |
| @@ -474,5 +518,14 @@ extern int md_integrity_register(mddev_t *mddev); | |||
| 474 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 518 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
| 475 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); | 519 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); |
| 476 | extern void restore_bitmap_write_access(struct file *file); | 520 | extern void restore_bitmap_write_access(struct file *file); |
| 521 | extern void md_unplug(mddev_t *mddev); | ||
| 522 | |||
| 523 | extern void mddev_init(mddev_t *mddev); | ||
| 524 | extern int md_run(mddev_t *mddev); | ||
| 525 | extern void md_stop(mddev_t *mddev); | ||
| 526 | extern void md_stop_writes(mddev_t *mddev); | ||
| 527 | extern void md_rdev_init(mdk_rdev_t *rdev); | ||
| 477 | 528 | ||
| 529 | extern void mddev_suspend(mddev_t *mddev); | ||
| 530 | extern void mddev_resume(mddev_t *mddev); | ||
| 478 | #endif /* _MD_MD_H */ | 531 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 62ecb6650fd0..a88aeb5198c7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -825,11 +825,29 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 825 | */ | 825 | */ |
| 826 | bp = bio_split(bio, | 826 | bp = bio_split(bio, |
| 827 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); | 827 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); |
| 828 | |||
| 829 | /* Each of these 'make_request' calls will call 'wait_barrier'. | ||
| 830 | * If the first succeeds but the second blocks due to the resync | ||
| 831 | * thread raising the barrier, we will deadlock because the | ||
| 832 | * IO to the underlying device will be queued in generic_make_request | ||
| 833 | * and will never complete, so will never reduce nr_pending. | ||
| 834 | * So increment nr_waiting here so no new raise_barriers will | ||
| 835 | * succeed, and so the second wait_barrier cannot block. | ||
| 836 | */ | ||
| 837 | spin_lock_irq(&conf->resync_lock); | ||
| 838 | conf->nr_waiting++; | ||
| 839 | spin_unlock_irq(&conf->resync_lock); | ||
| 840 | |||
| 828 | if (make_request(mddev, &bp->bio1)) | 841 | if (make_request(mddev, &bp->bio1)) |
| 829 | generic_make_request(&bp->bio1); | 842 | generic_make_request(&bp->bio1); |
| 830 | if (make_request(mddev, &bp->bio2)) | 843 | if (make_request(mddev, &bp->bio2)) |
| 831 | generic_make_request(&bp->bio2); | 844 | generic_make_request(&bp->bio2); |
| 832 | 845 | ||
| 846 | spin_lock_irq(&conf->resync_lock); | ||
| 847 | conf->nr_waiting--; | ||
| 848 | wake_up(&conf->wait_barrier); | ||
| 849 | spin_unlock_irq(&conf->resync_lock); | ||
| 850 | |||
| 833 | bio_pair_release(bp); | 851 | bio_pair_release(bp); |
| 834 | return 0; | 852 | return 0; |
| 835 | bad_map: | 853 | bad_map: |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 20ac2f14376a..866d4b5a144c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
| 201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
| 202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { | 202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { |
| 203 | list_add_tail(&sh->lru, &conf->delayed_list); | 203 | list_add_tail(&sh->lru, &conf->delayed_list); |
| 204 | blk_plug_device(conf->mddev->queue); | 204 | plugger_set_plug(&conf->plug); |
| 205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
| 206 | sh->bm_seq - conf->seq_write > 0) { | 206 | sh->bm_seq - conf->seq_write > 0) { |
| 207 | list_add_tail(&sh->lru, &conf->bitmap_list); | 207 | list_add_tail(&sh->lru, &conf->bitmap_list); |
| 208 | blk_plug_device(conf->mddev->queue); | 208 | plugger_set_plug(&conf->plug); |
| 209 | } else { | 209 | } else { |
| 210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
| 211 | list_add_tail(&sh->lru, &conf->handle_list); | 211 | list_add_tail(&sh->lru, &conf->handle_list); |
| @@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf) | |||
| 434 | } | 434 | } |
| 435 | 435 | ||
| 436 | static void unplug_slaves(mddev_t *mddev); | 436 | static void unplug_slaves(mddev_t *mddev); |
| 437 | static void raid5_unplug_device(struct request_queue *q); | ||
| 438 | 437 | ||
| 439 | static struct stripe_head * | 438 | static struct stripe_head * |
| 440 | get_active_stripe(raid5_conf_t *conf, sector_t sector, | 439 | get_active_stripe(raid5_conf_t *conf, sector_t sector, |
| @@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, | |||
| 464 | < (conf->max_nr_stripes *3/4) | 463 | < (conf->max_nr_stripes *3/4) |
| 465 | || !conf->inactive_blocked), | 464 | || !conf->inactive_blocked), |
| 466 | conf->device_lock, | 465 | conf->device_lock, |
| 467 | raid5_unplug_device(conf->mddev->queue) | 466 | md_raid5_unplug_device(conf) |
| 468 | ); | 467 | ); |
| 469 | conf->inactive_blocked = 0; | 468 | conf->inactive_blocked = 0; |
| 470 | } else | 469 | } else |
| @@ -1337,10 +1336,14 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
| 1337 | struct kmem_cache *sc; | 1336 | struct kmem_cache *sc; |
| 1338 | int devs = max(conf->raid_disks, conf->previous_raid_disks); | 1337 | int devs = max(conf->raid_disks, conf->previous_raid_disks); |
| 1339 | 1338 | ||
| 1340 | sprintf(conf->cache_name[0], | 1339 | if (conf->mddev->gendisk) |
| 1341 | "raid%d-%s", conf->level, mdname(conf->mddev)); | 1340 | sprintf(conf->cache_name[0], |
| 1342 | sprintf(conf->cache_name[1], | 1341 | "raid%d-%s", conf->level, mdname(conf->mddev)); |
| 1343 | "raid%d-%s-alt", conf->level, mdname(conf->mddev)); | 1342 | else |
| 1343 | sprintf(conf->cache_name[0], | ||
| 1344 | "raid%d-%p", conf->level, conf->mddev); | ||
| 1345 | sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); | ||
| 1346 | |||
| 1344 | conf->active_name = 0; | 1347 | conf->active_name = 0; |
| 1345 | sc = kmem_cache_create(conf->cache_name[conf->active_name], | 1348 | sc = kmem_cache_create(conf->cache_name[conf->active_name], |
| 1346 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 1349 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
| @@ -3614,7 +3617,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) | |||
| 3614 | list_add_tail(&sh->lru, &conf->hold_list); | 3617 | list_add_tail(&sh->lru, &conf->hold_list); |
| 3615 | } | 3618 | } |
| 3616 | } else | 3619 | } else |
| 3617 | blk_plug_device(conf->mddev->queue); | 3620 | plugger_set_plug(&conf->plug); |
| 3618 | } | 3621 | } |
| 3619 | 3622 | ||
| 3620 | static void activate_bit_delay(raid5_conf_t *conf) | 3623 | static void activate_bit_delay(raid5_conf_t *conf) |
| @@ -3655,36 +3658,44 @@ static void unplug_slaves(mddev_t *mddev) | |||
| 3655 | rcu_read_unlock(); | 3658 | rcu_read_unlock(); |
| 3656 | } | 3659 | } |
| 3657 | 3660 | ||
| 3658 | static void raid5_unplug_device(struct request_queue *q) | 3661 | void md_raid5_unplug_device(raid5_conf_t *conf) |
| 3659 | { | 3662 | { |
| 3660 | mddev_t *mddev = q->queuedata; | ||
| 3661 | raid5_conf_t *conf = mddev->private; | ||
| 3662 | unsigned long flags; | 3663 | unsigned long flags; |
| 3663 | 3664 | ||
| 3664 | spin_lock_irqsave(&conf->device_lock, flags); | 3665 | spin_lock_irqsave(&conf->device_lock, flags); |
| 3665 | 3666 | ||
| 3666 | if (blk_remove_plug(q)) { | 3667 | if (plugger_remove_plug(&conf->plug)) { |
| 3667 | conf->seq_flush++; | 3668 | conf->seq_flush++; |
| 3668 | raid5_activate_delayed(conf); | 3669 | raid5_activate_delayed(conf); |
| 3669 | } | 3670 | } |
| 3670 | md_wakeup_thread(mddev->thread); | 3671 | md_wakeup_thread(conf->mddev->thread); |
| 3671 | 3672 | ||
| 3672 | spin_unlock_irqrestore(&conf->device_lock, flags); | 3673 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 3673 | 3674 | ||
| 3674 | unplug_slaves(mddev); | 3675 | unplug_slaves(conf->mddev); |
| 3675 | } | 3676 | } |
| 3677 | EXPORT_SYMBOL_GPL(md_raid5_unplug_device); | ||
| 3676 | 3678 | ||
| 3677 | static int raid5_congested(void *data, int bits) | 3679 | static void raid5_unplug(struct plug_handle *plug) |
| 3680 | { | ||
| 3681 | raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); | ||
| 3682 | md_raid5_unplug_device(conf); | ||
| 3683 | } | ||
| 3684 | |||
| 3685 | static void raid5_unplug_queue(struct request_queue *q) | ||
| 3686 | { | ||
| 3687 | mddev_t *mddev = q->queuedata; | ||
| 3688 | md_raid5_unplug_device(mddev->private); | ||
| 3689 | } | ||
| 3690 | |||
| 3691 | int md_raid5_congested(mddev_t *mddev, int bits) | ||
| 3678 | { | 3692 | { |
| 3679 | mddev_t *mddev = data; | ||
| 3680 | raid5_conf_t *conf = mddev->private; | 3693 | raid5_conf_t *conf = mddev->private; |
| 3681 | 3694 | ||
| 3682 | /* No difference between reads and writes. Just check | 3695 | /* No difference between reads and writes. Just check |
| 3683 | * how busy the stripe_cache is | 3696 | * how busy the stripe_cache is |
| 3684 | */ | 3697 | */ |
| 3685 | 3698 | ||
| 3686 | if (mddev_congested(mddev, bits)) | ||
| 3687 | return 1; | ||
| 3688 | if (conf->inactive_blocked) | 3699 | if (conf->inactive_blocked) |
| 3689 | return 1; | 3700 | return 1; |
| 3690 | if (conf->quiesce) | 3701 | if (conf->quiesce) |
| @@ -3694,6 +3705,15 @@ static int raid5_congested(void *data, int bits) | |||
| 3694 | 3705 | ||
| 3695 | return 0; | 3706 | return 0; |
| 3696 | } | 3707 | } |
| 3708 | EXPORT_SYMBOL_GPL(md_raid5_congested); | ||
| 3709 | |||
| 3710 | static int raid5_congested(void *data, int bits) | ||
| 3711 | { | ||
| 3712 | mddev_t *mddev = data; | ||
| 3713 | |||
| 3714 | return mddev_congested(mddev, bits) || | ||
| 3715 | md_raid5_congested(mddev, bits); | ||
| 3716 | } | ||
| 3697 | 3717 | ||
| 3698 | /* We want read requests to align with chunks where possible, | 3718 | /* We want read requests to align with chunks where possible, |
| 3699 | * but write requests don't need to. | 3719 | * but write requests don't need to. |
| @@ -4075,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
| 4075 | * add failed due to overlap. Flush everything | 4095 | * add failed due to overlap. Flush everything |
| 4076 | * and wait a while | 4096 | * and wait a while |
| 4077 | */ | 4097 | */ |
| 4078 | raid5_unplug_device(mddev->queue); | 4098 | md_raid5_unplug_device(conf); |
| 4079 | release_stripe(sh); | 4099 | release_stripe(sh); |
| 4080 | schedule(); | 4100 | schedule(); |
| 4081 | goto retry; | 4101 | goto retry; |
| @@ -4566,23 +4586,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) | |||
| 4566 | return 0; | 4586 | return 0; |
| 4567 | } | 4587 | } |
| 4568 | 4588 | ||
| 4569 | static ssize_t | 4589 | int |
| 4570 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | 4590 | raid5_set_cache_size(mddev_t *mddev, int size) |
| 4571 | { | 4591 | { |
| 4572 | raid5_conf_t *conf = mddev->private; | 4592 | raid5_conf_t *conf = mddev->private; |
| 4573 | unsigned long new; | ||
| 4574 | int err; | 4593 | int err; |
| 4575 | 4594 | ||
| 4576 | if (len >= PAGE_SIZE) | 4595 | if (size <= 16 || size > 32768) |
| 4577 | return -EINVAL; | 4596 | return -EINVAL; |
| 4578 | if (!conf) | 4597 | while (size < conf->max_nr_stripes) { |
| 4579 | return -ENODEV; | ||
| 4580 | |||
| 4581 | if (strict_strtoul(page, 10, &new)) | ||
| 4582 | return -EINVAL; | ||
| 4583 | if (new <= 16 || new > 32768) | ||
| 4584 | return -EINVAL; | ||
| 4585 | while (new < conf->max_nr_stripes) { | ||
| 4586 | if (drop_one_stripe(conf)) | 4598 | if (drop_one_stripe(conf)) |
| 4587 | conf->max_nr_stripes--; | 4599 | conf->max_nr_stripes--; |
| 4588 | else | 4600 | else |
| @@ -4591,11 +4603,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | |||
| 4591 | err = md_allow_write(mddev); | 4603 | err = md_allow_write(mddev); |
| 4592 | if (err) | 4604 | if (err) |
| 4593 | return err; | 4605 | return err; |
| 4594 | while (new > conf->max_nr_stripes) { | 4606 | while (size > conf->max_nr_stripes) { |
| 4595 | if (grow_one_stripe(conf)) | 4607 | if (grow_one_stripe(conf)) |
| 4596 | conf->max_nr_stripes++; | 4608 | conf->max_nr_stripes++; |
| 4597 | else break; | 4609 | else break; |
| 4598 | } | 4610 | } |
| 4611 | return 0; | ||
| 4612 | } | ||
| 4613 | EXPORT_SYMBOL(raid5_set_cache_size); | ||
| 4614 | |||
| 4615 | static ssize_t | ||
| 4616 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | ||
| 4617 | { | ||
| 4618 | raid5_conf_t *conf = mddev->private; | ||
| 4619 | unsigned long new; | ||
| 4620 | int err; | ||
| 4621 | |||
| 4622 | if (len >= PAGE_SIZE) | ||
| 4623 | return -EINVAL; | ||
| 4624 | if (!conf) | ||
| 4625 | return -ENODEV; | ||
| 4626 | |||
| 4627 | if (strict_strtoul(page, 10, &new)) | ||
| 4628 | return -EINVAL; | ||
| 4629 | err = raid5_set_cache_size(mddev, new); | ||
| 4630 | if (err) | ||
| 4631 | return err; | ||
| 4599 | return len; | 4632 | return len; |
| 4600 | } | 4633 | } |
| 4601 | 4634 | ||
| @@ -4958,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded | |||
| 4958 | static int run(mddev_t *mddev) | 4991 | static int run(mddev_t *mddev) |
| 4959 | { | 4992 | { |
| 4960 | raid5_conf_t *conf; | 4993 | raid5_conf_t *conf; |
| 4961 | int working_disks = 0, chunk_size; | 4994 | int working_disks = 0; |
| 4962 | int dirty_parity_disks = 0; | 4995 | int dirty_parity_disks = 0; |
| 4963 | mdk_rdev_t *rdev; | 4996 | mdk_rdev_t *rdev; |
| 4964 | sector_t reshape_offset = 0; | 4997 | sector_t reshape_offset = 0; |
| @@ -5144,42 +5177,47 @@ static int run(mddev_t *mddev) | |||
| 5144 | "reshape"); | 5177 | "reshape"); |
| 5145 | } | 5178 | } |
| 5146 | 5179 | ||
| 5147 | /* read-ahead size must cover two whole stripes, which is | ||
| 5148 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | ||
| 5149 | */ | ||
| 5150 | { | ||
| 5151 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | ||
| 5152 | int stripe = data_disks * | ||
| 5153 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
| 5154 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | ||
| 5155 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | ||
| 5156 | } | ||
| 5157 | 5180 | ||
| 5158 | /* Ok, everything is just fine now */ | 5181 | /* Ok, everything is just fine now */ |
| 5159 | if (mddev->to_remove == &raid5_attrs_group) | 5182 | if (mddev->to_remove == &raid5_attrs_group) |
| 5160 | mddev->to_remove = NULL; | 5183 | mddev->to_remove = NULL; |
| 5161 | else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) | 5184 | else if (mddev->kobj.sd && |
| 5185 | sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) | ||
| 5162 | printk(KERN_WARNING | 5186 | printk(KERN_WARNING |
| 5163 | "md/raid:%s: failed to create sysfs attributes.\n", | 5187 | "raid5: failed to create sysfs attributes for %s\n", |
| 5164 | mdname(mddev)); | 5188 | mdname(mddev)); |
| 5189 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | ||
| 5165 | 5190 | ||
| 5166 | mddev->queue->queue_lock = &conf->device_lock; | 5191 | plugger_init(&conf->plug, raid5_unplug); |
| 5192 | mddev->plug = &conf->plug; | ||
| 5193 | if (mddev->queue) { | ||
| 5194 | int chunk_size; | ||
| 5195 | /* read-ahead size must cover two whole stripes, which | ||
| 5196 | * is 2 * (datadisks) * chunksize where 'n' is the | ||
| 5197 | * number of raid devices | ||
| 5198 | */ | ||
| 5199 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | ||
| 5200 | int stripe = data_disks * | ||
| 5201 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
| 5202 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | ||
| 5203 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | ||
| 5167 | 5204 | ||
| 5168 | mddev->queue->unplug_fn = raid5_unplug_device; | 5205 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
| 5169 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
| 5170 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
| 5171 | 5206 | ||
| 5172 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5207 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 5208 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
| 5209 | mddev->queue->queue_lock = &conf->device_lock; | ||
| 5210 | mddev->queue->unplug_fn = raid5_unplug_queue; | ||
| 5173 | 5211 | ||
| 5174 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 5212 | chunk_size = mddev->chunk_sectors << 9; |
| 5175 | chunk_size = mddev->chunk_sectors << 9; | 5213 | blk_queue_io_min(mddev->queue, chunk_size); |
| 5176 | blk_queue_io_min(mddev->queue, chunk_size); | 5214 | blk_queue_io_opt(mddev->queue, chunk_size * |
| 5177 | blk_queue_io_opt(mddev->queue, chunk_size * | 5215 | (conf->raid_disks - conf->max_degraded)); |
| 5178 | (conf->raid_disks - conf->max_degraded)); | ||
| 5179 | 5216 | ||
| 5180 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5217 | list_for_each_entry(rdev, &mddev->disks, same_set) |
| 5181 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5218 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 5182 | rdev->data_offset << 9); | 5219 | rdev->data_offset << 9); |
| 5220 | } | ||
| 5183 | 5221 | ||
| 5184 | return 0; | 5222 | return 0; |
| 5185 | abort: | 5223 | abort: |
| @@ -5200,8 +5238,9 @@ static int stop(mddev_t *mddev) | |||
| 5200 | 5238 | ||
| 5201 | md_unregister_thread(mddev->thread); | 5239 | md_unregister_thread(mddev->thread); |
| 5202 | mddev->thread = NULL; | 5240 | mddev->thread = NULL; |
| 5203 | mddev->queue->backing_dev_info.congested_fn = NULL; | 5241 | if (mddev->queue) |
| 5204 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 5242 | mddev->queue->backing_dev_info.congested_fn = NULL; |
| 5243 | plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ | ||
| 5205 | free_conf(conf); | 5244 | free_conf(conf); |
| 5206 | mddev->private = NULL; | 5245 | mddev->private = NULL; |
| 5207 | mddev->to_remove = &raid5_attrs_group; | 5246 | mddev->to_remove = &raid5_attrs_group; |
| @@ -5545,10 +5584,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
| 5545 | sprintf(nm, "rd%d", rdev->raid_disk); | 5584 | sprintf(nm, "rd%d", rdev->raid_disk); |
| 5546 | if (sysfs_create_link(&mddev->kobj, | 5585 | if (sysfs_create_link(&mddev->kobj, |
| 5547 | &rdev->kobj, nm)) | 5586 | &rdev->kobj, nm)) |
| 5548 | printk(KERN_WARNING | 5587 | /* Failure here is OK */; |
| 5549 | "md/raid:%s: failed to create " | ||
| 5550 | " link %s\n", | ||
| 5551 | mdname(mddev), nm); | ||
| 5552 | } else | 5588 | } else |
| 5553 | break; | 5589 | break; |
| 5554 | } | 5590 | } |
| @@ -5603,7 +5639,7 @@ static void end_reshape(raid5_conf_t *conf) | |||
| 5603 | /* read-ahead size must cover two whole stripes, which is | 5639 | /* read-ahead size must cover two whole stripes, which is |
| 5604 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | 5640 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices |
| 5605 | */ | 5641 | */ |
| 5606 | { | 5642 | if (conf->mddev->queue) { |
| 5607 | int data_disks = conf->raid_disks - conf->max_degraded; | 5643 | int data_disks = conf->raid_disks - conf->max_degraded; |
| 5608 | int stripe = data_disks * ((conf->chunk_sectors << 9) | 5644 | int stripe = data_disks * ((conf->chunk_sectors << 9) |
| 5609 | / PAGE_SIZE); | 5645 | / PAGE_SIZE); |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 0f86f5e36724..36eaed5dfd6e 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
| @@ -388,7 +388,7 @@ struct raid5_private_data { | |||
| 388 | * two caches. | 388 | * two caches. |
| 389 | */ | 389 | */ |
| 390 | int active_name; | 390 | int active_name; |
| 391 | char cache_name[2][20]; | 391 | char cache_name[2][32]; |
| 392 | struct kmem_cache *slab_cache; /* for allocating stripes */ | 392 | struct kmem_cache *slab_cache; /* for allocating stripes */ |
| 393 | 393 | ||
| 394 | int seq_flush, seq_write; | 394 | int seq_flush, seq_write; |
| @@ -398,6 +398,9 @@ struct raid5_private_data { | |||
| 398 | * (fresh device added). | 398 | * (fresh device added). |
| 399 | * Cleared when a sync completes. | 399 | * Cleared when a sync completes. |
| 400 | */ | 400 | */ |
| 401 | |||
| 402 | struct plug_handle plug; | ||
| 403 | |||
| 401 | /* per cpu variables */ | 404 | /* per cpu variables */ |
| 402 | struct raid5_percpu { | 405 | struct raid5_percpu { |
| 403 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 406 | struct page *spare_page; /* Used when checking P/Q in raid6 */ |
| @@ -497,4 +500,8 @@ static inline int algorithm_is_DDF(int layout) | |||
| 497 | { | 500 | { |
| 498 | return layout >= 8 && layout <= 10; | 501 | return layout >= 8 && layout <= 10; |
| 499 | } | 502 | } |
| 503 | |||
| 504 | extern int md_raid5_congested(mddev_t *mddev, int bits); | ||
| 505 | extern void md_raid5_unplug_device(raid5_conf_t *conf); | ||
| 506 | extern int raid5_set_cache_size(mddev_t *mddev, int size); | ||
| 500 | #endif | 507 | #endif |
diff --git a/lib/Kconfig b/lib/Kconfig index 5b916bc0fbae..fa9bf2c06199 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
| @@ -7,6 +7,9 @@ config BINARY_PRINTF | |||
| 7 | 7 | ||
| 8 | menu "Library routines" | 8 | menu "Library routines" |
| 9 | 9 | ||
| 10 | config RAID6_PQ | ||
| 11 | tristate | ||
| 12 | |||
| 10 | config BITREVERSE | 13 | config BITREVERSE |
| 11 | tristate | 14 | tristate |
| 12 | 15 | ||
diff --git a/lib/Makefile b/lib/Makefile index 0bfabba1bb32..e6a3763b8212 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
| @@ -69,6 +69,7 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ | |||
| 69 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ | 69 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ |
| 70 | obj-$(CONFIG_LZO_COMPRESS) += lzo/ | 70 | obj-$(CONFIG_LZO_COMPRESS) += lzo/ |
| 71 | obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ | 71 | obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ |
| 72 | obj-$(CONFIG_RAID6_PQ) += raid6/ | ||
| 72 | 73 | ||
| 73 | lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o | 74 | lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o |
| 74 | lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o | 75 | lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o |
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile new file mode 100644 index 000000000000..19bf32da644f --- /dev/null +++ b/lib/raid6/Makefile | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | obj-$(CONFIG_RAID6_PQ) += raid6_pq.o | ||
| 2 | |||
| 3 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | ||
| 4 | raid6int1.o raid6int2.o raid6int4.o \ | ||
| 5 | raid6int8.o raid6int16.o raid6int32.o \ | ||
| 6 | raid6altivec1.o raid6altivec2.o raid6altivec4.o \ | ||
| 7 | raid6altivec8.o \ | ||
| 8 | raid6mmx.o raid6sse1.o raid6sse2.o | ||
| 9 | hostprogs-y += mktables | ||
| 10 | |||
| 11 | quiet_cmd_unroll = UNROLL $@ | ||
| 12 | cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ | ||
| 13 | < $< > $@ || ( rm -f $@ && exit 1 ) | ||
| 14 | |||
| 15 | ifeq ($(CONFIG_ALTIVEC),y) | ||
| 16 | altivec_flags := -maltivec -mabi=altivec | ||
| 17 | endif | ||
| 18 | |||
| 19 | targets += raid6int1.c | ||
| 20 | $(obj)/raid6int1.c: UNROLL := 1 | ||
| 21 | $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 22 | $(call if_changed,unroll) | ||
| 23 | |||
| 24 | targets += raid6int2.c | ||
| 25 | $(obj)/raid6int2.c: UNROLL := 2 | ||
| 26 | $(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 27 | $(call if_changed,unroll) | ||
| 28 | |||
| 29 | targets += raid6int4.c | ||
| 30 | $(obj)/raid6int4.c: UNROLL := 4 | ||
| 31 | $(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 32 | $(call if_changed,unroll) | ||
| 33 | |||
| 34 | targets += raid6int8.c | ||
| 35 | $(obj)/raid6int8.c: UNROLL := 8 | ||
| 36 | $(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 37 | $(call if_changed,unroll) | ||
| 38 | |||
| 39 | targets += raid6int16.c | ||
| 40 | $(obj)/raid6int16.c: UNROLL := 16 | ||
| 41 | $(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 42 | $(call if_changed,unroll) | ||
| 43 | |||
| 44 | targets += raid6int32.c | ||
| 45 | $(obj)/raid6int32.c: UNROLL := 32 | ||
| 46 | $(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
| 47 | $(call if_changed,unroll) | ||
| 48 | |||
| 49 | CFLAGS_raid6altivec1.o += $(altivec_flags) | ||
| 50 | targets += raid6altivec1.c | ||
| 51 | $(obj)/raid6altivec1.c: UNROLL := 1 | ||
| 52 | $(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
| 53 | $(call if_changed,unroll) | ||
| 54 | |||
| 55 | CFLAGS_raid6altivec2.o += $(altivec_flags) | ||
| 56 | targets += raid6altivec2.c | ||
| 57 | $(obj)/raid6altivec2.c: UNROLL := 2 | ||
| 58 | $(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
| 59 | $(call if_changed,unroll) | ||
| 60 | |||
| 61 | CFLAGS_raid6altivec4.o += $(altivec_flags) | ||
| 62 | targets += raid6altivec4.c | ||
| 63 | $(obj)/raid6altivec4.c: UNROLL := 4 | ||
| 64 | $(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
| 65 | $(call if_changed,unroll) | ||
| 66 | |||
| 67 | CFLAGS_raid6altivec8.o += $(altivec_flags) | ||
| 68 | targets += raid6altivec8.c | ||
| 69 | $(obj)/raid6altivec8.c: UNROLL := 8 | ||
| 70 | $(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
| 71 | $(call if_changed,unroll) | ||
| 72 | |||
| 73 | quiet_cmd_mktable = TABLE $@ | ||
| 74 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | ||
| 75 | |||
| 76 | targets += raid6tables.c | ||
| 77 | $(obj)/raid6tables.c: $(obj)/mktables FORCE | ||
| 78 | $(call if_changed,mktable) | ||
diff --git a/drivers/md/mktables.c b/lib/raid6/mktables.c index 3b1500843bba..3b1500843bba 100644 --- a/drivers/md/mktables.c +++ b/lib/raid6/mktables.c | |||
diff --git a/drivers/md/raid6algos.c b/lib/raid6/raid6algos.c index 1f8784bfd44d..1f8784bfd44d 100644 --- a/drivers/md/raid6algos.c +++ b/lib/raid6/raid6algos.c | |||
diff --git a/drivers/md/raid6altivec.uc b/lib/raid6/raid6altivec.uc index 2654d5c854be..2654d5c854be 100644 --- a/drivers/md/raid6altivec.uc +++ b/lib/raid6/raid6altivec.uc | |||
diff --git a/drivers/md/raid6int.uc b/lib/raid6/raid6int.uc index d1e276a14fab..d1e276a14fab 100644 --- a/drivers/md/raid6int.uc +++ b/lib/raid6/raid6int.uc | |||
diff --git a/drivers/md/raid6mmx.c b/lib/raid6/raid6mmx.c index e7f6c13132bf..e7f6c13132bf 100644 --- a/drivers/md/raid6mmx.c +++ b/lib/raid6/raid6mmx.c | |||
diff --git a/drivers/md/raid6recov.c b/lib/raid6/raid6recov.c index 2609f00e0d61..2609f00e0d61 100644 --- a/drivers/md/raid6recov.c +++ b/lib/raid6/raid6recov.c | |||
diff --git a/drivers/md/raid6sse1.c b/lib/raid6/raid6sse1.c index b274dd5eab8f..b274dd5eab8f 100644 --- a/drivers/md/raid6sse1.c +++ b/lib/raid6/raid6sse1.c | |||
diff --git a/drivers/md/raid6sse2.c b/lib/raid6/raid6sse2.c index 6ed6c6c0389f..6ed6c6c0389f 100644 --- a/drivers/md/raid6sse2.c +++ b/lib/raid6/raid6sse2.c | |||
diff --git a/drivers/md/raid6test/Makefile b/lib/raid6/raid6test/Makefile index 2874cbef529d..2874cbef529d 100644 --- a/drivers/md/raid6test/Makefile +++ b/lib/raid6/raid6test/Makefile | |||
diff --git a/drivers/md/raid6test/test.c b/lib/raid6/raid6test/test.c index 7a930318b17d..7a930318b17d 100644 --- a/drivers/md/raid6test/test.c +++ b/lib/raid6/raid6test/test.c | |||
diff --git a/drivers/md/raid6x86.h b/lib/raid6/raid6x86.h index 4c22c1568558..4c22c1568558 100644 --- a/drivers/md/raid6x86.h +++ b/lib/raid6/raid6x86.h | |||
diff --git a/drivers/md/unroll.awk b/lib/raid6/unroll.awk index c6aa03631df8..c6aa03631df8 100644 --- a/drivers/md/unroll.awk +++ b/lib/raid6/unroll.awk | |||
