aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-25 21:34:20 -0400
committerNeilBrown <neilb@suse.de>2011-07-25 21:34:20 -0400
commitc4c1663be46b2ab94e59d3e0c583a8f6b188ff0c (patch)
tree2e0b6b51c0a55c5f9edad2c832a66c9949ef496d
parentcbe47ec559c33a68b5ee002051b848d1531a8adb (diff)
md/raid5: replace sh->lock with an 'active' flag.
sh->lock is now mainly used to ensure that two threads aren't running in the locked part of handle_stripe[56] at the same time. That can more neatly be achieved with an 'active' flag which we set while running handle_stripe. If we find the flag is set, we simply requeue the stripe for later by setting STRIPE_HANDLE. For safety we take ->device_lock while examining the state of the stripe and creating a summary in 'stripe_head_state / r6_state'. This possibly isn't needed but as shared fields like ->toread, ->towrite are checked it is safer for now at least. We leave the label after the old 'unlock' called "unlock" because it will disappear in a few patches, so renaming seems pointless. This leaves the stripe 'locked' for longer as we clear STRIPE_ACTIVE later, but that is not a problem. Signed-off-by: NeilBrown <neilb@suse.de> Reviewed-by: Namhyung Kim <namhyung@gmail.com>
-rw-r--r--drivers/md/raid5.c26
-rw-r--r--drivers/md/raid5.h35
2 files changed, 29 insertions, 32 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9985138f4c04..f8275b5a6fbe 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1020,14 +1020,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1020 if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { 1020 if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
1021 struct bio *wbi; 1021 struct bio *wbi;
1022 1022
1023 spin_lock(&sh->lock);
1024 spin_lock_irq(&sh->raid_conf->device_lock); 1023 spin_lock_irq(&sh->raid_conf->device_lock);
1025 chosen = dev->towrite; 1024 chosen = dev->towrite;
1026 dev->towrite = NULL; 1025 dev->towrite = NULL;
1027 BUG_ON(dev->written); 1026 BUG_ON(dev->written);
1028 wbi = dev->written = chosen; 1027 wbi = dev->written = chosen;
1029 spin_unlock_irq(&sh->raid_conf->device_lock); 1028 spin_unlock_irq(&sh->raid_conf->device_lock);
1030 spin_unlock(&sh->lock);
1031 1029
1032 while (wbi && wbi->bi_sector < 1030 while (wbi && wbi->bi_sector <
1033 dev->sector + STRIPE_SECTORS) { 1031 dev->sector + STRIPE_SECTORS) {
@@ -1322,7 +1320,6 @@ static int grow_one_stripe(raid5_conf_t *conf)
1322 return 0; 1320 return 0;
1323 1321
1324 sh->raid_conf = conf; 1322 sh->raid_conf = conf;
1325 spin_lock_init(&sh->lock);
1326 #ifdef CONFIG_MULTICORE_RAID456 1323 #ifdef CONFIG_MULTICORE_RAID456
1327 init_waitqueue_head(&sh->ops.wait_for_ops); 1324 init_waitqueue_head(&sh->ops.wait_for_ops);
1328 #endif 1325 #endif
@@ -1442,7 +1439,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1442 break; 1439 break;
1443 1440
1444 nsh->raid_conf = conf; 1441 nsh->raid_conf = conf;
1445 spin_lock_init(&nsh->lock);
1446 #ifdef CONFIG_MULTICORE_RAID456 1442 #ifdef CONFIG_MULTICORE_RAID456
1447 init_waitqueue_head(&nsh->ops.wait_for_ops); 1443 init_waitqueue_head(&nsh->ops.wait_for_ops);
1448 #endif 1444 #endif
@@ -2148,7 +2144,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2148 (unsigned long long)sh->sector); 2144 (unsigned long long)sh->sector);
2149 2145
2150 2146
2151 spin_lock(&sh->lock);
2152 spin_lock_irq(&conf->device_lock); 2147 spin_lock_irq(&conf->device_lock);
2153 if (forwrite) { 2148 if (forwrite) {
2154 bip = &sh->dev[dd_idx].towrite; 2149 bip = &sh->dev[dd_idx].towrite;
@@ -2184,7 +2179,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2184 set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); 2179 set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
2185 } 2180 }
2186 spin_unlock_irq(&conf->device_lock); 2181 spin_unlock_irq(&conf->device_lock);
2187 spin_unlock(&sh->lock);
2188 2182
2189 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", 2183 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
2190 (unsigned long long)(*bip)->bi_sector, 2184 (unsigned long long)(*bip)->bi_sector,
@@ -2201,7 +2195,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2201 overlap: 2195 overlap:
2202 set_bit(R5_Overlap, &sh->dev[dd_idx].flags); 2196 set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
2203 spin_unlock_irq(&conf->device_lock); 2197 spin_unlock_irq(&conf->device_lock);
2204 spin_unlock(&sh->lock);
2205 return 0; 2198 return 0;
2206} 2199}
2207 2200
@@ -3023,12 +3016,10 @@ static void handle_stripe5(struct stripe_head *sh)
3023 atomic_read(&sh->count), sh->pd_idx, sh->check_state, 3016 atomic_read(&sh->count), sh->pd_idx, sh->check_state,
3024 sh->reconstruct_state); 3017 sh->reconstruct_state);
3025 3018
3026 spin_lock(&sh->lock);
3027 if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 3019 if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3028 set_bit(STRIPE_SYNCING, &sh->state); 3020 set_bit(STRIPE_SYNCING, &sh->state);
3029 clear_bit(STRIPE_INSYNC, &sh->state); 3021 clear_bit(STRIPE_INSYNC, &sh->state);
3030 } 3022 }
3031 clear_bit(STRIPE_HANDLE, &sh->state);
3032 clear_bit(STRIPE_DELAYED, &sh->state); 3023 clear_bit(STRIPE_DELAYED, &sh->state);
3033 3024
3034 s.syncing = test_bit(STRIPE_SYNCING, &sh->state); 3025 s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
@@ -3037,6 +3028,7 @@ static void handle_stripe5(struct stripe_head *sh)
3037 3028
3038 /* Now to look around and see what can be done */ 3029 /* Now to look around and see what can be done */
3039 rcu_read_lock(); 3030 rcu_read_lock();
3031 spin_lock_irq(&conf->device_lock);
3040 for (i=disks; i--; ) { 3032 for (i=disks; i--; ) {
3041 mdk_rdev_t *rdev; 3033 mdk_rdev_t *rdev;
3042 3034
@@ -3099,6 +3091,7 @@ static void handle_stripe5(struct stripe_head *sh)
3099 s.failed_num = i; 3091 s.failed_num = i;
3100 } 3092 }
3101 } 3093 }
3094 spin_unlock_irq(&conf->device_lock);
3102 rcu_read_unlock(); 3095 rcu_read_unlock();
3103 3096
3104 if (unlikely(blocked_rdev)) { 3097 if (unlikely(blocked_rdev)) {
@@ -3275,7 +3268,6 @@ static void handle_stripe5(struct stripe_head *sh)
3275 handle_stripe_expansion(conf, sh, NULL); 3268 handle_stripe_expansion(conf, sh, NULL);
3276 3269
3277 unlock: 3270 unlock:
3278 spin_unlock(&sh->lock);
3279 3271
3280 /* wait for this device to become unblocked */ 3272 /* wait for this device to become unblocked */
3281 if (unlikely(blocked_rdev)) 3273 if (unlikely(blocked_rdev))
@@ -3318,12 +3310,10 @@ static void handle_stripe6(struct stripe_head *sh)
3318 sh->check_state, sh->reconstruct_state); 3310 sh->check_state, sh->reconstruct_state);
3319 memset(&s, 0, sizeof(s)); 3311 memset(&s, 0, sizeof(s));
3320 3312
3321 spin_lock(&sh->lock);
3322 if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 3313 if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3323 set_bit(STRIPE_SYNCING, &sh->state); 3314 set_bit(STRIPE_SYNCING, &sh->state);
3324 clear_bit(STRIPE_INSYNC, &sh->state); 3315 clear_bit(STRIPE_INSYNC, &sh->state);
3325 } 3316 }
3326 clear_bit(STRIPE_HANDLE, &sh->state);
3327 clear_bit(STRIPE_DELAYED, &sh->state); 3317 clear_bit(STRIPE_DELAYED, &sh->state);
3328 3318
3329 s.syncing = test_bit(STRIPE_SYNCING, &sh->state); 3319 s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
@@ -3332,6 +3322,7 @@ static void handle_stripe6(struct stripe_head *sh)
3332 /* Now to look around and see what can be done */ 3322 /* Now to look around and see what can be done */
3333 3323
3334 rcu_read_lock(); 3324 rcu_read_lock();
3325 spin_lock_irq(&conf->device_lock);
3335 for (i=disks; i--; ) { 3326 for (i=disks; i--; ) {
3336 mdk_rdev_t *rdev; 3327 mdk_rdev_t *rdev;
3337 dev = &sh->dev[i]; 3328 dev = &sh->dev[i];
@@ -3395,6 +3386,7 @@ static void handle_stripe6(struct stripe_head *sh)
3395 s.failed++; 3386 s.failed++;
3396 } 3387 }
3397 } 3388 }
3389 spin_unlock_irq(&conf->device_lock);
3398 rcu_read_unlock(); 3390 rcu_read_unlock();
3399 3391
3400 if (unlikely(blocked_rdev)) { 3392 if (unlikely(blocked_rdev)) {
@@ -3580,7 +3572,6 @@ static void handle_stripe6(struct stripe_head *sh)
3580 handle_stripe_expansion(conf, sh, &r6s); 3572 handle_stripe_expansion(conf, sh, &r6s);
3581 3573
3582 unlock: 3574 unlock:
3583 spin_unlock(&sh->lock);
3584 3575
3585 /* wait for this device to become unblocked */ 3576 /* wait for this device to become unblocked */
3586 if (unlikely(blocked_rdev)) 3577 if (unlikely(blocked_rdev))
@@ -3608,10 +3599,19 @@ static void handle_stripe6(struct stripe_head *sh)
3608 3599
3609static void handle_stripe(struct stripe_head *sh) 3600static void handle_stripe(struct stripe_head *sh)
3610{ 3601{
3602 clear_bit(STRIPE_HANDLE, &sh->state);
3603 if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) {
3604 /* already being handled, ensure it gets handled
3605 * again when current action finishes */
3606 set_bit(STRIPE_HANDLE, &sh->state);
3607 return;
3608 }
3609
3611 if (sh->raid_conf->level == 6) 3610 if (sh->raid_conf->level == 6)
3612 handle_stripe6(sh); 3611 handle_stripe6(sh);
3613 else 3612 else
3614 handle_stripe5(sh); 3613 handle_stripe5(sh);
3614 clear_bit(STRIPE_ACTIVE, &sh->state);
3615} 3615}
3616 3616
3617static void raid5_activate_delayed(raid5_conf_t *conf) 3617static void raid5_activate_delayed(raid5_conf_t *conf)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index a33001137bf8..bb246d9e0547 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -6,11 +6,11 @@
6 6
7/* 7/*
8 * 8 *
9 * Each stripe contains one buffer per disc. Each buffer can be in 9 * Each stripe contains one buffer per device. Each buffer can be in
10 * one of a number of states stored in "flags". Changes between 10 * one of a number of states stored in "flags". Changes between
11 * these states happen *almost* exclusively under a per-stripe 11 * these states happen *almost* exclusively under the protection of the
12 * spinlock. Some very specific changes can happen in bi_end_io, and 12 * STRIPE_ACTIVE flag. Some very specific changes can happen in bi_end_io, and
13 * these are not protected by the spin lock. 13 * these are not protected by STRIPE_ACTIVE.
14 * 14 *
15 * The flag bits that are used to represent these states are: 15 * The flag bits that are used to represent these states are:
16 * R5_UPTODATE and R5_LOCKED 16 * R5_UPTODATE and R5_LOCKED
@@ -76,12 +76,10 @@
76 * block and the cached buffer are successfully written, any buffer on 76 * block and the cached buffer are successfully written, any buffer on
77 * a written list can be returned with b_end_io. 77 * a written list can be returned with b_end_io.
78 * 78 *
79 * The write list and read list both act as fifos. The read list is 79 * The write list and read list both act as fifos. The read list,
80 * protected by the device_lock. The write and written lists are 80 * write list and written list are protected by the device_lock.
81 * protected by the stripe lock. The device_lock, which can be 81 * The device_lock is only for list manipulations and will only be
82 * claimed while the stipe lock is held, is only for list 82 * held for a very short time. It can be claimed from interrupts.
83 * manipulations and will only be held for a very short time. It can
84 * be claimed from interrupts.
85 * 83 *
86 * 84 *
87 * Stripes in the stripe cache can be on one of two lists (or on 85 * Stripes in the stripe cache can be on one of two lists (or on
@@ -96,7 +94,6 @@
96 * 94 *
97 * The inactive_list, handle_list and hash bucket lists are all protected by the 95 * The inactive_list, handle_list and hash bucket lists are all protected by the
98 * device_lock. 96 * device_lock.
99 * - stripes on the inactive_list never have their stripe_lock held.
100 * - stripes have a reference counter. If count==0, they are on a list. 97 * - stripes have a reference counter. If count==0, they are on a list.
101 * - If a stripe might need handling, STRIPE_HANDLE is set. 98 * - If a stripe might need handling, STRIPE_HANDLE is set.
102 * - When refcount reaches zero, then if STRIPE_HANDLE it is put on 99 * - When refcount reaches zero, then if STRIPE_HANDLE it is put on
@@ -116,10 +113,10 @@
116 * attach a request to an active stripe (add_stripe_bh()) 113 * attach a request to an active stripe (add_stripe_bh())
117 * lockdev attach-buffer unlockdev 114 * lockdev attach-buffer unlockdev
118 * handle a stripe (handle_stripe()) 115 * handle a stripe (handle_stripe())
119 * lockstripe clrSTRIPE_HANDLE ... 116 * setSTRIPE_ACTIVE, clrSTRIPE_HANDLE ...
120 * (lockdev check-buffers unlockdev) .. 117 * (lockdev check-buffers unlockdev) ..
121 * change-state .. 118 * change-state ..
122 * record io/ops needed unlockstripe schedule io/ops 119 * record io/ops needed clearSTRIPE_ACTIVE schedule io/ops
123 * release an active stripe (release_stripe()) 120 * release an active stripe (release_stripe())
124 * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev 121 * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
125 * 122 *
@@ -128,8 +125,7 @@
128 * on a cached buffer, and plus one if the stripe is undergoing stripe 125 * on a cached buffer, and plus one if the stripe is undergoing stripe
129 * operations. 126 * operations.
130 * 127 *
131 * Stripe operations are performed outside the stripe lock, 128 * The stripe operations are:
132 * the stripe operations are:
133 * -copying data between the stripe cache and user application buffers 129 * -copying data between the stripe cache and user application buffers
134 * -computing blocks to save a disk access, or to recover a missing block 130 * -computing blocks to save a disk access, or to recover a missing block
135 * -updating the parity on a write operation (reconstruct write and 131 * -updating the parity on a write operation (reconstruct write and
@@ -159,7 +155,8 @@
159 */ 155 */
160 156
161/* 157/*
162 * Operations state - intermediate states that are visible outside of sh->lock 158 * Operations state - intermediate states that are visible outside of
159 * STRIPE_ACTIVE.
163 * In general _idle indicates nothing is running, _run indicates a data 160 * In general _idle indicates nothing is running, _run indicates a data
164 * processing operation is active, and _result means the data processing result 161 * processing operation is active, and _result means the data processing result
165 * is stable and can be acted upon. For simple operations like biofill and 162 * is stable and can be acted upon. For simple operations like biofill and
@@ -209,7 +206,6 @@ struct stripe_head {
209 short ddf_layout;/* use DDF ordering to calculate Q */ 206 short ddf_layout;/* use DDF ordering to calculate Q */
210 unsigned long state; /* state flags */ 207 unsigned long state; /* state flags */
211 atomic_t count; /* nr of active thread/requests */ 208 atomic_t count; /* nr of active thread/requests */
212 spinlock_t lock;
213 int bm_seq; /* sequence number for bitmap flushes */ 209 int bm_seq; /* sequence number for bitmap flushes */
214 int disks; /* disks in stripe */ 210 int disks; /* disks in stripe */
215 enum check_states check_state; 211 enum check_states check_state;
@@ -240,7 +236,7 @@ struct stripe_head {
240}; 236};
241 237
242/* stripe_head_state - collects and tracks the dynamic state of a stripe_head 238/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
243 * for handle_stripe. It is only valid under spin_lock(sh->lock); 239 * for handle_stripe.
244 */ 240 */
245struct stripe_head_state { 241struct stripe_head_state {
246 int syncing, expanding, expanded; 242 int syncing, expanding, expanded;
@@ -290,6 +286,7 @@ struct r6_state {
290 * Stripe state 286 * Stripe state
291 */ 287 */
292enum { 288enum {
289 STRIPE_ACTIVE,
293 STRIPE_HANDLE, 290 STRIPE_HANDLE,
294 STRIPE_SYNC_REQUESTED, 291 STRIPE_SYNC_REQUESTED,
295 STRIPE_SYNCING, 292 STRIPE_SYNCING,
@@ -339,7 +336,7 @@ enum {
339 * PREREAD_ACTIVE. 336 * PREREAD_ACTIVE.
340 * In stripe_handle, if we find pre-reading is necessary, we do it if 337 * In stripe_handle, if we find pre-reading is necessary, we do it if
341 * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue. 338 * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
342 * HANDLE gets cleared if stripe_handle leave nothing locked. 339 * HANDLE gets cleared if stripe_handle leaves nothing locked.
343 */ 340 */
344 341
345 342