aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSong Liu <songliubraving@fb.com>2017-01-23 20:12:57 -0500
committerShaohua Li <shli@fb.com>2017-01-24 14:20:15 -0500
commita85dd7b8df52e35d8ee3794c65cac5c39128fd80 (patch)
tree3d69f1c7c7ce9c9a85c806763c12c774cd95643f
parentba02684daf7fb4a827580f909b7c7db61c05ae7d (diff)
md/r5cache: flush data only stripes in r5l_recovery_log()
For safer operation, all arrays start in write-through mode, which has been better tested and is more mature. And actually the write-through/write-mode isn't persistent after array restarted, so we always start array in write-through mode. However, if recovery found data-only stripes before the shutdown (from previous write-back mode), it is not safe to start the array in write-through mode, as write-through mode can not handle stripes with data in write-back cache. To solve this problem, we flush all data-only stripes in r5l_recovery_log(). When r5l_recovery_log() returns, the array starts with empty cache in write-through mode. This logic is implemented in r5c_recovery_flush_data_only_stripes(): 1. enable write back cache 2. flush all stripes 3. wake up conf->mddev->thread 4. wait for all stripes get flushed (reuse wait_for_quiescent) 5. disable write back cache The wait in 4 will be waked up in release_inactive_stripe_list() when conf->active_stripes reaches 0. It is safe to wake up mddev->thread here because all the resource required for the thread has been initialized. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Shaohua Li <shli@fb.com>
-rw-r--r--drivers/md/md.c5
-rw-r--r--drivers/md/raid5-cache.c56
2 files changed, 45 insertions, 16 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 82821ee0d57f..01175dac0db6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5291,6 +5291,11 @@ int md_run(struct mddev *mddev)
5291 if (start_readonly && mddev->ro == 0) 5291 if (start_readonly && mddev->ro == 0)
5292 mddev->ro = 2; /* read-only, but switch on first write */ 5292 mddev->ro = 2; /* read-only, but switch on first write */
5293 5293
5294 /*
5295 * NOTE: some pers->run(), for example r5l_recovery_log(), wakes
5296 * up mddev->thread. It is important to initialize critical
5297 * resources for mddev->thread BEFORE calling pers->run().
5298 */
5294 err = pers->run(mddev); 5299 err = pers->run(mddev);
5295 if (err) 5300 if (err)
5296 pr_warn("md: pers->run() failed ...\n"); 5301 pr_warn("md: pers->run() failed ...\n");
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 95dcaa022e1f..3d7dda85494c 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2060,7 +2060,7 @@ static int
2060r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, 2060r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2061 struct r5l_recovery_ctx *ctx) 2061 struct r5l_recovery_ctx *ctx)
2062{ 2062{
2063 struct stripe_head *sh, *next; 2063 struct stripe_head *sh;
2064 struct mddev *mddev = log->rdev->mddev; 2064 struct mddev *mddev = log->rdev->mddev;
2065 struct page *page; 2065 struct page *page;
2066 sector_t next_checkpoint = MaxSector; 2066 sector_t next_checkpoint = MaxSector;
@@ -2074,7 +2074,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2074 2074
2075 WARN_ON(list_empty(&ctx->cached_list)); 2075 WARN_ON(list_empty(&ctx->cached_list));
2076 2076
2077 list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { 2077 list_for_each_entry(sh, &ctx->cached_list, lru) {
2078 struct r5l_meta_block *mb; 2078 struct r5l_meta_block *mb;
2079 int i; 2079 int i;
2080 int offset; 2080 int offset;
@@ -2124,14 +2124,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2124 ctx->pos = write_pos; 2124 ctx->pos = write_pos;
2125 ctx->seq += 1; 2125 ctx->seq += 1;
2126 next_checkpoint = sh->log_start; 2126 next_checkpoint = sh->log_start;
2127 list_del_init(&sh->lru);
2128 raid5_release_stripe(sh);
2129 } 2127 }
2130 log->next_checkpoint = next_checkpoint; 2128 log->next_checkpoint = next_checkpoint;
2131 __free_page(page); 2129 __free_page(page);
2132 return 0; 2130 return 0;
2133} 2131}
2134 2132
2133static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
2134 struct r5l_recovery_ctx *ctx)
2135{
2136 struct mddev *mddev = log->rdev->mddev;
2137 struct r5conf *conf = mddev->private;
2138 struct stripe_head *sh, *next;
2139
2140 if (ctx->data_only_stripes == 0)
2141 return;
2142
2143 log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK;
2144
2145 list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
2146 r5c_make_stripe_write_out(sh);
2147 set_bit(STRIPE_HANDLE, &sh->state);
2148 list_del_init(&sh->lru);
2149 raid5_release_stripe(sh);
2150 }
2151
2152 md_wakeup_thread(conf->mddev->thread);
2153 /* reuse conf->wait_for_quiescent in recovery */
2154 wait_event(conf->wait_for_quiescent,
2155 atomic_read(&conf->active_stripes) == 0);
2156
2157 log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
2158}
2159
2135static int r5l_recovery_log(struct r5l_log *log) 2160static int r5l_recovery_log(struct r5l_log *log)
2136{ 2161{
2137 struct mddev *mddev = log->rdev->mddev; 2162 struct mddev *mddev = log->rdev->mddev;
@@ -2158,32 +2183,31 @@ static int r5l_recovery_log(struct r5l_log *log)
2158 pos = ctx.pos; 2183 pos = ctx.pos;
2159 ctx.seq += 10000; 2184 ctx.seq += 10000;
2160 2185
2161 if (ctx.data_only_stripes == 0) {
2162 log->next_checkpoint = ctx.pos;
2163 r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
2164 ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
2165 }
2166 2186
2167 if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) 2187 if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
2168 pr_debug("md/raid:%s: starting from clean shutdown\n", 2188 pr_debug("md/raid:%s: starting from clean shutdown\n",
2169 mdname(mddev)); 2189 mdname(mddev));
2170 else { 2190 else
2171 pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n", 2191 pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
2172 mdname(mddev), ctx.data_only_stripes, 2192 mdname(mddev), ctx.data_only_stripes,
2173 ctx.data_parity_stripes); 2193 ctx.data_parity_stripes);
2174 2194
2175 if (ctx.data_only_stripes > 0) 2195 if (ctx.data_only_stripes == 0) {
2176 if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) { 2196 log->next_checkpoint = ctx.pos;
2177 pr_err("md/raid:%s: failed to rewrite stripes to journal\n", 2197 r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
2178 mdname(mddev)); 2198 ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
2179 return -EIO; 2199 } else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
2180 } 2200 pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
2201 mdname(mddev));
2202 return -EIO;
2181 } 2203 }
2182 2204
2183 log->log_start = ctx.pos; 2205 log->log_start = ctx.pos;
2184 log->seq = ctx.seq; 2206 log->seq = ctx.seq;
2185 log->last_checkpoint = pos; 2207 log->last_checkpoint = pos;
2186 r5l_write_super(log, pos); 2208 r5l_write_super(log, pos);
2209
2210 r5c_recovery_flush_data_only_stripes(log, &ctx);
2187 return 0; 2211 return 0;
2188} 2212}
2189 2213