aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5-cache.c')
-rw-r--r--drivers/md/raid5-cache.c143
1 files changed, 140 insertions, 3 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 33fc85015147..02a554434747 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -40,6 +40,47 @@
40 */ 40 */
41#define R5L_POOL_SIZE 4 41#define R5L_POOL_SIZE 4
42 42
43/*
44 * r5c journal modes of the array: write-back or write-through.
45 * write-through mode has identical behavior as existing log only
46 * implementation.
47 */
48enum r5c_journal_mode {
49 R5C_JOURNAL_MODE_WRITE_THROUGH = 0,
50 R5C_JOURNAL_MODE_WRITE_BACK = 1,
51};
52
53/*
54 * raid5 cache state machine
55 *
56 * With rhe RAID cache, each stripe works in two phases:
57 * - caching phase
58 * - writing-out phase
59 *
60 * These two phases are controlled by bit STRIPE_R5C_CACHING:
61 * if STRIPE_R5C_CACHING == 0, the stripe is in writing-out phase
62 * if STRIPE_R5C_CACHING == 1, the stripe is in caching phase
63 *
64 * When there is no journal, or the journal is in write-through mode,
65 * the stripe is always in writing-out phase.
66 *
67 * For write-back journal, the stripe is sent to caching phase on write
68 * (r5c_try_caching_write). r5c_make_stripe_write_out() kicks off
69 * the write-out phase by clearing STRIPE_R5C_CACHING.
70 *
71 * Stripes in caching phase do not write the raid disks. Instead, all
72 * writes are committed from the log device. Therefore, a stripe in
73 * caching phase handles writes as:
74 * - write to log device
75 * - return IO
76 *
77 * Stripes in writing-out phase handle writes as:
78 * - calculate parity
79 * - write pending data and parity to journal
80 * - write data and parity to raid disks
81 * - return IO for pending writes
82 */
83
43struct r5l_log { 84struct r5l_log {
44 struct md_rdev *rdev; 85 struct md_rdev *rdev;
45 86
@@ -96,6 +137,9 @@ struct r5l_log {
96 spinlock_t no_space_stripes_lock; 137 spinlock_t no_space_stripes_lock;
97 138
98 bool need_cache_flush; 139 bool need_cache_flush;
140
141 /* for r5c_cache */
142 enum r5c_journal_mode r5c_journal_mode;
99}; 143};
100 144
101/* 145/*
@@ -133,6 +177,12 @@ enum r5l_io_unit_state {
133 IO_UNIT_STRIPE_END = 3, /* stripes data finished writing to raid */ 177 IO_UNIT_STRIPE_END = 3, /* stripes data finished writing to raid */
134}; 178};
135 179
180bool r5c_is_writeback(struct r5l_log *log)
181{
182 return (log != NULL &&
183 log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK);
184}
185
136static sector_t r5l_ring_add(struct r5l_log *log, sector_t start, sector_t inc) 186static sector_t r5l_ring_add(struct r5l_log *log, sector_t start, sector_t inc)
137{ 187{
138 start += inc; 188 start += inc;
@@ -168,12 +218,51 @@ static void __r5l_set_io_unit_state(struct r5l_io_unit *io,
168 io->state = state; 218 io->state = state;
169} 219}
170 220
221/*
222 * Put the stripe into writing-out phase by clearing STRIPE_R5C_CACHING.
223 * This function should only be called in write-back mode.
224 */
225static void r5c_make_stripe_write_out(struct stripe_head *sh)
226{
227 struct r5conf *conf = sh->raid_conf;
228 struct r5l_log *log = conf->log;
229
230 BUG_ON(!r5c_is_writeback(log));
231
232 WARN_ON(!test_bit(STRIPE_R5C_CACHING, &sh->state));
233 clear_bit(STRIPE_R5C_CACHING, &sh->state);
234}
235
236/*
237 * Setting proper flags after writing (or flushing) data and/or parity to the
238 * log device. This is called from r5l_log_endio() or r5l_log_flush_endio().
239 */
240static void r5c_finish_cache_stripe(struct stripe_head *sh)
241{
242 struct r5l_log *log = sh->raid_conf->log;
243
244 if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) {
245 BUG_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
246 /*
247 * Set R5_InJournal for parity dev[pd_idx]. This means
248 * all data AND parity in the journal. For RAID 6, it is
249 * NOT necessary to set the flag for dev[qd_idx], as the
250 * two parities are written out together.
251 */
252 set_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags);
253 } else
254 BUG(); /* write-back logic in next patch */
255}
256
171static void r5l_io_run_stripes(struct r5l_io_unit *io) 257static void r5l_io_run_stripes(struct r5l_io_unit *io)
172{ 258{
173 struct stripe_head *sh, *next; 259 struct stripe_head *sh, *next;
174 260
175 list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { 261 list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
176 list_del_init(&sh->log_list); 262 list_del_init(&sh->log_list);
263
264 r5c_finish_cache_stripe(sh);
265
177 set_bit(STRIPE_HANDLE, &sh->state); 266 set_bit(STRIPE_HANDLE, &sh->state);
178 raid5_release_stripe(sh); 267 raid5_release_stripe(sh);
179 } 268 }
@@ -412,18 +501,19 @@ static int r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh,
412 r5l_append_payload_page(log, sh->dev[i].page); 501 r5l_append_payload_page(log, sh->dev[i].page);
413 } 502 }
414 503
415 if (sh->qd_idx >= 0) { 504 if (parity_pages == 2) {
416 r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, 505 r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY,
417 sh->sector, sh->dev[sh->pd_idx].log_checksum, 506 sh->sector, sh->dev[sh->pd_idx].log_checksum,
418 sh->dev[sh->qd_idx].log_checksum, true); 507 sh->dev[sh->qd_idx].log_checksum, true);
419 r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); 508 r5l_append_payload_page(log, sh->dev[sh->pd_idx].page);
420 r5l_append_payload_page(log, sh->dev[sh->qd_idx].page); 509 r5l_append_payload_page(log, sh->dev[sh->qd_idx].page);
421 } else { 510 } else if (parity_pages == 1) {
422 r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, 511 r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY,
423 sh->sector, sh->dev[sh->pd_idx].log_checksum, 512 sh->sector, sh->dev[sh->pd_idx].log_checksum,
424 0, false); 513 0, false);
425 r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); 514 r5l_append_payload_page(log, sh->dev[sh->pd_idx].page);
426 } 515 } else /* Just writing data, not parity, in caching phase */
516 BUG_ON(parity_pages != 0);
427 517
428 list_add_tail(&sh->log_list, &io->stripe_list); 518 list_add_tail(&sh->log_list, &io->stripe_list);
429 atomic_inc(&io->pending_stripe); 519 atomic_inc(&io->pending_stripe);
@@ -455,6 +545,8 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
455 return -EAGAIN; 545 return -EAGAIN;
456 } 546 }
457 547
548 WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
549
458 for (i = 0; i < sh->disks; i++) { 550 for (i = 0; i < sh->disks; i++) {
459 void *addr; 551 void *addr;
460 552
@@ -1112,6 +1204,49 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp)
1112 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1204 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1113} 1205}
1114 1206
1207/*
1208 * Try handle write operation in caching phase. This function should only
1209 * be called in write-back mode.
1210 *
1211 * If all outstanding writes can be handled in caching phase, returns 0
1212 * If writes requires write-out phase, call r5c_make_stripe_write_out()
1213 * and returns -EAGAIN
1214 */
1215int r5c_try_caching_write(struct r5conf *conf,
1216 struct stripe_head *sh,
1217 struct stripe_head_state *s,
1218 int disks)
1219{
1220 struct r5l_log *log = conf->log;
1221
1222 BUG_ON(!r5c_is_writeback(log));
1223
1224 /* more write-back logic in next patches */
1225 r5c_make_stripe_write_out(sh);
1226 return -EAGAIN;
1227}
1228
1229/*
1230 * clean up the stripe (clear R5_InJournal for dev[pd_idx] etc.) after the
1231 * stripe is committed to RAID disks.
1232 */
1233void r5c_finish_stripe_write_out(struct r5conf *conf,
1234 struct stripe_head *sh,
1235 struct stripe_head_state *s)
1236{
1237 if (!conf->log ||
1238 !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
1239 return;
1240
1241 WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
1242 clear_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags);
1243
1244 if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
1245 return;
1246 BUG(); /* write-back logic in following patches */
1247}
1248
1249
1115static int r5l_load_log(struct r5l_log *log) 1250static int r5l_load_log(struct r5l_log *log)
1116{ 1251{
1117 struct md_rdev *rdev = log->rdev; 1252 struct md_rdev *rdev = log->rdev;
@@ -1249,6 +1384,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
1249 INIT_LIST_HEAD(&log->no_space_stripes); 1384 INIT_LIST_HEAD(&log->no_space_stripes);
1250 spin_lock_init(&log->no_space_stripes_lock); 1385 spin_lock_init(&log->no_space_stripes_lock);
1251 1386
1387 log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
1388
1252 if (r5l_load_log(log)) 1389 if (r5l_load_log(log))
1253 goto error; 1390 goto error;
1254 1391