aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/raid/raid5.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/raid/raid5.h')
-rw-r--r--include/linux/raid/raid5.h81
1 files changed, 78 insertions, 3 deletions
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index b99d354f6128..6fb9d94e6f2e 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -116,13 +116,46 @@
116 * attach a request to an active stripe (add_stripe_bh()) 116 * attach a request to an active stripe (add_stripe_bh())
117 * lockdev attach-buffer unlockdev 117 * lockdev attach-buffer unlockdev
118 * handle a stripe (handle_stripe()) 118 * handle a stripe (handle_stripe())
119 * lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io 119 * lockstripe clrSTRIPE_HANDLE ...
120 * (lockdev check-buffers unlockdev) ..
121 * change-state ..
122 * record io/ops needed unlockstripe schedule io/ops
120 * release an active stripe (release_stripe()) 123 * release an active stripe (release_stripe())
121 * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev 124 * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
122 * 125 *
123 * The refcount counts each thread that have activated the stripe, 126 * The refcount counts each thread that have activated the stripe,
124 * plus raid5d if it is handling it, plus one for each active request 127 * plus raid5d if it is handling it, plus one for each active request
125 * on a cached buffer. 128 * on a cached buffer, and plus one if the stripe is undergoing stripe
129 * operations.
130 *
131 * Stripe operations are performed outside the stripe lock,
132 * the stripe operations are:
133 * -copying data between the stripe cache and user application buffers
134 * -computing blocks to save a disk access, or to recover a missing block
135 * -updating the parity on a write operation (reconstruct write and
136 * read-modify-write)
137 * -checking parity correctness
138 * -running i/o to disk
139 * These operations are carried out by raid5_run_ops which uses the async_tx
140 * api to (optionally) offload operations to dedicated hardware engines.
141 * When requesting an operation handle_stripe sets the pending bit for the
142 * operation and increments the count. raid5_run_ops is then run whenever
143 * the count is non-zero.
144 * There are some critical dependencies between the operations that prevent some
145 * from being requested while another is in flight.
146 * 1/ Parity check operations destroy the in cache version of the parity block,
147 * so we prevent parity dependent operations like writes and compute_blocks
148 * from starting while a check is in progress. Some dma engines can perform
149 * the check without damaging the parity block, in these cases the parity
150 * block is re-marked up to date (assuming the check was successful) and is
151 * not re-read from disk.
152 * 2/ When a write operation is requested we immediately lock the affected
153 * blocks, and mark them as not up to date. This causes new read requests
154 * to be held off, as well as parity checks and compute block operations.
155 * 3/ Once a compute block operation has been requested handle_stripe treats
156 * that block as if it is up to date. raid5_run_ops guaruntees that any
157 * operation that is dependent on the compute block result is initiated after
158 * the compute block completes.
126 */ 159 */
127 160
128struct stripe_head { 161struct stripe_head {
@@ -136,11 +169,26 @@ struct stripe_head {
136 spinlock_t lock; 169 spinlock_t lock;
137 int bm_seq; /* sequence number for bitmap flushes */ 170 int bm_seq; /* sequence number for bitmap flushes */
138 int disks; /* disks in stripe */ 171 int disks; /* disks in stripe */
172 /* stripe_operations
173 * @pending - pending ops flags (set for request->issue->complete)
174 * @ack - submitted ops flags (set for issue->complete)
175 * @complete - completed ops flags (set for complete)
176 * @target - STRIPE_OP_COMPUTE_BLK target
177 * @count - raid5_runs_ops is set to run when this is non-zero
178 */
179 struct stripe_operations {
180 unsigned long pending;
181 unsigned long ack;
182 unsigned long complete;
183 int target;
184 int count;
185 u32 zero_sum_result;
186 } ops;
139 struct r5dev { 187 struct r5dev {
140 struct bio req; 188 struct bio req;
141 struct bio_vec vec; 189 struct bio_vec vec;
142 struct page *page; 190 struct page *page;
143 struct bio *toread, *towrite, *written; 191 struct bio *toread, *read, *towrite, *written;
144 sector_t sector; /* sector of this page */ 192 sector_t sector; /* sector of this page */
145 unsigned long flags; 193 unsigned long flags;
146 } dev[1]; /* allocated with extra space depending of RAID geometry */ 194 } dev[1]; /* allocated with extra space depending of RAID geometry */
@@ -174,6 +222,15 @@ struct r6_state {
174#define R5_ReWrite 9 /* have tried to over-write the readerror */ 222#define R5_ReWrite 9 /* have tried to over-write the readerror */
175 223
176#define R5_Expanded 10 /* This block now has post-expand data */ 224#define R5_Expanded 10 /* This block now has post-expand data */
225#define R5_Wantcompute 11 /* compute_block in progress treat as
226 * uptodate
227 */
228#define R5_Wantfill 12 /* dev->toread contains a bio that needs
229 * filling
230 */
231#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from
232 * other "towrites"
233 */
177/* 234/*
178 * Write method 235 * Write method
179 */ 236 */
@@ -196,6 +253,24 @@ struct r6_state {
196#define STRIPE_EXPAND_SOURCE 10 253#define STRIPE_EXPAND_SOURCE 10
197#define STRIPE_EXPAND_READY 11 254#define STRIPE_EXPAND_READY 11
198/* 255/*
256 * Operations flags (in issue order)
257 */
258#define STRIPE_OP_BIOFILL 0
259#define STRIPE_OP_COMPUTE_BLK 1
260#define STRIPE_OP_PREXOR 2
261#define STRIPE_OP_BIODRAIN 3
262#define STRIPE_OP_POSTXOR 4
263#define STRIPE_OP_CHECK 5
264#define STRIPE_OP_IO 6
265
266/* modifiers to the base operations
267 * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back
268 * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check
269 */
270#define STRIPE_OP_MOD_REPAIR_PD 7
271#define STRIPE_OP_MOD_DMA_CHECK 8
272
273/*
199 * Plugging: 274 * Plugging:
200 * 275 *
201 * To improve write throughput, we need to delay the handling of some 276 * To improve write throughput, we need to delay the handling of some