diff options
Diffstat (limited to 'drivers/md/raid5-cache.c')
-rw-r--r-- | drivers/md/raid5-cache.c | 143 |
1 files changed, 140 insertions, 3 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 33fc85015147..02a554434747 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c | |||
@@ -40,6 +40,47 @@ | |||
40 | */ | 40 | */ |
41 | #define R5L_POOL_SIZE 4 | 41 | #define R5L_POOL_SIZE 4 |
42 | 42 | ||
43 | /* | ||
44 | * r5c journal modes of the array: write-back or write-through. | ||
45 | * write-through mode has identical behavior as existing log only | ||
46 | * implementation. | ||
47 | */ | ||
48 | enum r5c_journal_mode { | ||
49 | R5C_JOURNAL_MODE_WRITE_THROUGH = 0, | ||
50 | R5C_JOURNAL_MODE_WRITE_BACK = 1, | ||
51 | }; | ||
52 | |||
53 | /* | ||
54 | * raid5 cache state machine | ||
55 | * | ||
56 | * With rhe RAID cache, each stripe works in two phases: | ||
57 | * - caching phase | ||
58 | * - writing-out phase | ||
59 | * | ||
60 | * These two phases are controlled by bit STRIPE_R5C_CACHING: | ||
61 | * if STRIPE_R5C_CACHING == 0, the stripe is in writing-out phase | ||
62 | * if STRIPE_R5C_CACHING == 1, the stripe is in caching phase | ||
63 | * | ||
64 | * When there is no journal, or the journal is in write-through mode, | ||
65 | * the stripe is always in writing-out phase. | ||
66 | * | ||
67 | * For write-back journal, the stripe is sent to caching phase on write | ||
68 | * (r5c_try_caching_write). r5c_make_stripe_write_out() kicks off | ||
69 | * the write-out phase by clearing STRIPE_R5C_CACHING. | ||
70 | * | ||
71 | * Stripes in caching phase do not write the raid disks. Instead, all | ||
72 | * writes are committed from the log device. Therefore, a stripe in | ||
73 | * caching phase handles writes as: | ||
74 | * - write to log device | ||
75 | * - return IO | ||
76 | * | ||
77 | * Stripes in writing-out phase handle writes as: | ||
78 | * - calculate parity | ||
79 | * - write pending data and parity to journal | ||
80 | * - write data and parity to raid disks | ||
81 | * - return IO for pending writes | ||
82 | */ | ||
83 | |||
43 | struct r5l_log { | 84 | struct r5l_log { |
44 | struct md_rdev *rdev; | 85 | struct md_rdev *rdev; |
45 | 86 | ||
@@ -96,6 +137,9 @@ struct r5l_log { | |||
96 | spinlock_t no_space_stripes_lock; | 137 | spinlock_t no_space_stripes_lock; |
97 | 138 | ||
98 | bool need_cache_flush; | 139 | bool need_cache_flush; |
140 | |||
141 | /* for r5c_cache */ | ||
142 | enum r5c_journal_mode r5c_journal_mode; | ||
99 | }; | 143 | }; |
100 | 144 | ||
101 | /* | 145 | /* |
@@ -133,6 +177,12 @@ enum r5l_io_unit_state { | |||
133 | IO_UNIT_STRIPE_END = 3, /* stripes data finished writing to raid */ | 177 | IO_UNIT_STRIPE_END = 3, /* stripes data finished writing to raid */ |
134 | }; | 178 | }; |
135 | 179 | ||
180 | bool r5c_is_writeback(struct r5l_log *log) | ||
181 | { | ||
182 | return (log != NULL && | ||
183 | log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK); | ||
184 | } | ||
185 | |||
136 | static sector_t r5l_ring_add(struct r5l_log *log, sector_t start, sector_t inc) | 186 | static sector_t r5l_ring_add(struct r5l_log *log, sector_t start, sector_t inc) |
137 | { | 187 | { |
138 | start += inc; | 188 | start += inc; |
@@ -168,12 +218,51 @@ static void __r5l_set_io_unit_state(struct r5l_io_unit *io, | |||
168 | io->state = state; | 218 | io->state = state; |
169 | } | 219 | } |
170 | 220 | ||
221 | /* | ||
222 | * Put the stripe into writing-out phase by clearing STRIPE_R5C_CACHING. | ||
223 | * This function should only be called in write-back mode. | ||
224 | */ | ||
225 | static void r5c_make_stripe_write_out(struct stripe_head *sh) | ||
226 | { | ||
227 | struct r5conf *conf = sh->raid_conf; | ||
228 | struct r5l_log *log = conf->log; | ||
229 | |||
230 | BUG_ON(!r5c_is_writeback(log)); | ||
231 | |||
232 | WARN_ON(!test_bit(STRIPE_R5C_CACHING, &sh->state)); | ||
233 | clear_bit(STRIPE_R5C_CACHING, &sh->state); | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * Setting proper flags after writing (or flushing) data and/or parity to the | ||
238 | * log device. This is called from r5l_log_endio() or r5l_log_flush_endio(). | ||
239 | */ | ||
240 | static void r5c_finish_cache_stripe(struct stripe_head *sh) | ||
241 | { | ||
242 | struct r5l_log *log = sh->raid_conf->log; | ||
243 | |||
244 | if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) { | ||
245 | BUG_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); | ||
246 | /* | ||
247 | * Set R5_InJournal for parity dev[pd_idx]. This means | ||
248 | * all data AND parity in the journal. For RAID 6, it is | ||
249 | * NOT necessary to set the flag for dev[qd_idx], as the | ||
250 | * two parities are written out together. | ||
251 | */ | ||
252 | set_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags); | ||
253 | } else | ||
254 | BUG(); /* write-back logic in next patch */ | ||
255 | } | ||
256 | |||
171 | static void r5l_io_run_stripes(struct r5l_io_unit *io) | 257 | static void r5l_io_run_stripes(struct r5l_io_unit *io) |
172 | { | 258 | { |
173 | struct stripe_head *sh, *next; | 259 | struct stripe_head *sh, *next; |
174 | 260 | ||
175 | list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { | 261 | list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { |
176 | list_del_init(&sh->log_list); | 262 | list_del_init(&sh->log_list); |
263 | |||
264 | r5c_finish_cache_stripe(sh); | ||
265 | |||
177 | set_bit(STRIPE_HANDLE, &sh->state); | 266 | set_bit(STRIPE_HANDLE, &sh->state); |
178 | raid5_release_stripe(sh); | 267 | raid5_release_stripe(sh); |
179 | } | 268 | } |
@@ -412,18 +501,19 @@ static int r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh, | |||
412 | r5l_append_payload_page(log, sh->dev[i].page); | 501 | r5l_append_payload_page(log, sh->dev[i].page); |
413 | } | 502 | } |
414 | 503 | ||
415 | if (sh->qd_idx >= 0) { | 504 | if (parity_pages == 2) { |
416 | r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, | 505 | r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, |
417 | sh->sector, sh->dev[sh->pd_idx].log_checksum, | 506 | sh->sector, sh->dev[sh->pd_idx].log_checksum, |
418 | sh->dev[sh->qd_idx].log_checksum, true); | 507 | sh->dev[sh->qd_idx].log_checksum, true); |
419 | r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); | 508 | r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); |
420 | r5l_append_payload_page(log, sh->dev[sh->qd_idx].page); | 509 | r5l_append_payload_page(log, sh->dev[sh->qd_idx].page); |
421 | } else { | 510 | } else if (parity_pages == 1) { |
422 | r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, | 511 | r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, |
423 | sh->sector, sh->dev[sh->pd_idx].log_checksum, | 512 | sh->sector, sh->dev[sh->pd_idx].log_checksum, |
424 | 0, false); | 513 | 0, false); |
425 | r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); | 514 | r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); |
426 | } | 515 | } else /* Just writing data, not parity, in caching phase */ |
516 | BUG_ON(parity_pages != 0); | ||
427 | 517 | ||
428 | list_add_tail(&sh->log_list, &io->stripe_list); | 518 | list_add_tail(&sh->log_list, &io->stripe_list); |
429 | atomic_inc(&io->pending_stripe); | 519 | atomic_inc(&io->pending_stripe); |
@@ -455,6 +545,8 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh) | |||
455 | return -EAGAIN; | 545 | return -EAGAIN; |
456 | } | 546 | } |
457 | 547 | ||
548 | WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); | ||
549 | |||
458 | for (i = 0; i < sh->disks; i++) { | 550 | for (i = 0; i < sh->disks; i++) { |
459 | void *addr; | 551 | void *addr; |
460 | 552 | ||
@@ -1112,6 +1204,49 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp) | |||
1112 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 1204 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
1113 | } | 1205 | } |
1114 | 1206 | ||
1207 | /* | ||
1208 | * Try handle write operation in caching phase. This function should only | ||
1209 | * be called in write-back mode. | ||
1210 | * | ||
1211 | * If all outstanding writes can be handled in caching phase, returns 0 | ||
1212 | * If writes requires write-out phase, call r5c_make_stripe_write_out() | ||
1213 | * and returns -EAGAIN | ||
1214 | */ | ||
1215 | int r5c_try_caching_write(struct r5conf *conf, | ||
1216 | struct stripe_head *sh, | ||
1217 | struct stripe_head_state *s, | ||
1218 | int disks) | ||
1219 | { | ||
1220 | struct r5l_log *log = conf->log; | ||
1221 | |||
1222 | BUG_ON(!r5c_is_writeback(log)); | ||
1223 | |||
1224 | /* more write-back logic in next patches */ | ||
1225 | r5c_make_stripe_write_out(sh); | ||
1226 | return -EAGAIN; | ||
1227 | } | ||
1228 | |||
1229 | /* | ||
1230 | * clean up the stripe (clear R5_InJournal for dev[pd_idx] etc.) after the | ||
1231 | * stripe is committed to RAID disks. | ||
1232 | */ | ||
1233 | void r5c_finish_stripe_write_out(struct r5conf *conf, | ||
1234 | struct stripe_head *sh, | ||
1235 | struct stripe_head_state *s) | ||
1236 | { | ||
1237 | if (!conf->log || | ||
1238 | !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)) | ||
1239 | return; | ||
1240 | |||
1241 | WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); | ||
1242 | clear_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags); | ||
1243 | |||
1244 | if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) | ||
1245 | return; | ||
1246 | BUG(); /* write-back logic in following patches */ | ||
1247 | } | ||
1248 | |||
1249 | |||
1115 | static int r5l_load_log(struct r5l_log *log) | 1250 | static int r5l_load_log(struct r5l_log *log) |
1116 | { | 1251 | { |
1117 | struct md_rdev *rdev = log->rdev; | 1252 | struct md_rdev *rdev = log->rdev; |
@@ -1249,6 +1384,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) | |||
1249 | INIT_LIST_HEAD(&log->no_space_stripes); | 1384 | INIT_LIST_HEAD(&log->no_space_stripes); |
1250 | spin_lock_init(&log->no_space_stripes_lock); | 1385 | spin_lock_init(&log->no_space_stripes_lock); |
1251 | 1386 | ||
1387 | log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; | ||
1388 | |||
1252 | if (r5l_load_log(log)) | 1389 | if (r5l_load_log(log)) |
1253 | goto error; | 1390 | goto error; |
1254 | 1391 | ||