aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/objlayout/objlayout.c
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2011-05-22 12:52:19 -0400
committerBoaz Harrosh <bharrosh@panasas.com>2011-05-29 13:54:15 -0400
commit04f83450388e87d86b387cf4a27b81eb7e69de7d (patch)
tree99c10d6a995ed1e7b872abb0127fc38d6bef9982 /fs/nfs/objlayout/objlayout.c
parentd20581aa4be11407c9eeeb75992df5ef176bba0f (diff)
pnfs-obj: osd raid engine read/write implementation
With the use of the in-kernel osd library. Implement read/write of data from/to osd-objects according to information specified in the objects-layout. Support for stripping over mirrors with a received stripe_unit. There are however a few constrains which are not supported: 1. Stripe Unit must be a multiple of PAGE_SIZE 2. stripe length (stripe_unit * number_of_stripes) can not be bigger then 32bit. Also support raid-groups and partial-layout. Partial-layout is when not all the groups are received on the line, addressing only a partial range of the file. TODO: Only raid0! raid 4/5/6 support will come at later stage A none supported layout will send IO through the MDS [Important fallout from the last rebase] Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> [gfp_flags] Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Diffstat (limited to 'fs/nfs/objlayout/objlayout.c')
-rw-r--r--fs/nfs/objlayout/objlayout.c254
1 files changed, 254 insertions, 0 deletions
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index f14b4da34052..5157ef6d0041 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -129,6 +129,260 @@ objlayout_free_lseg(struct pnfs_layout_segment *lseg)
129} 129}
130 130
131/* 131/*
132 * I/O Operations
133 */
134static inline u64
135end_offset(u64 start, u64 len)
136{
137 u64 end;
138
139 end = start + len;
140 return end >= start ? end : NFS4_MAX_UINT64;
141}
142
143/* last octet in a range */
144static inline u64
145last_byte_offset(u64 start, u64 len)
146{
147 u64 end;
148
149 BUG_ON(!len);
150 end = start + len;
151 return end > start ? end - 1 : NFS4_MAX_UINT64;
152}
153
154static struct objlayout_io_state *
155objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
156 struct page **pages,
157 unsigned pgbase,
158 loff_t offset,
159 size_t count,
160 struct pnfs_layout_segment *lseg,
161 void *rpcdata,
162 gfp_t gfp_flags)
163{
164 struct objlayout_io_state *state;
165 u64 lseg_end_offset;
166
167 dprintk("%s: allocating io_state\n", __func__);
168 if (objio_alloc_io_state(lseg, &state, gfp_flags))
169 return NULL;
170
171 BUG_ON(offset < lseg->pls_range.offset);
172 lseg_end_offset = end_offset(lseg->pls_range.offset,
173 lseg->pls_range.length);
174 BUG_ON(offset >= lseg_end_offset);
175 if (offset + count > lseg_end_offset) {
176 count = lseg->pls_range.length -
177 (offset - lseg->pls_range.offset);
178 dprintk("%s: truncated count %Zd\n", __func__, count);
179 }
180
181 if (pgbase > PAGE_SIZE) {
182 pages += pgbase >> PAGE_SHIFT;
183 pgbase &= ~PAGE_MASK;
184 }
185
186 state->lseg = lseg;
187 state->rpcdata = rpcdata;
188 state->pages = pages;
189 state->pgbase = pgbase;
190 state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
191 state->offset = offset;
192 state->count = count;
193 state->sync = 0;
194
195 return state;
196}
197
198static void
199objlayout_free_io_state(struct objlayout_io_state *state)
200{
201 dprintk("%s: freeing io_state\n", __func__);
202 if (unlikely(!state))
203 return;
204
205 objio_free_io_state(state);
206}
207
208/*
209 * I/O done common code
210 */
211static void
212objlayout_iodone(struct objlayout_io_state *state)
213{
214 dprintk("%s: state %p status\n", __func__, state);
215
216 objlayout_free_io_state(state);
217}
218
219/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
220 * This is because the osd completion is called with ints-off from
221 * the block layer
222 */
223static void _rpc_read_complete(struct work_struct *work)
224{
225 struct rpc_task *task;
226 struct nfs_read_data *rdata;
227
228 dprintk("%s enter\n", __func__);
229 task = container_of(work, struct rpc_task, u.tk_work);
230 rdata = container_of(task, struct nfs_read_data, task);
231
232 pnfs_ld_read_done(rdata);
233}
234
235void
236objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
237{
238 int eof = state->eof;
239 struct nfs_read_data *rdata;
240
241 state->status = status;
242 dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
243 rdata = state->rpcdata;
244 rdata->task.tk_status = status;
245 if (status >= 0) {
246 rdata->res.count = status;
247 rdata->res.eof = eof;
248 }
249 objlayout_iodone(state);
250 /* must not use state after this point */
251
252 if (sync)
253 pnfs_ld_read_done(rdata);
254 else {
255 INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
256 schedule_work(&rdata->task.u.tk_work);
257 }
258}
259
260/*
261 * Perform sync or async reads.
262 */
263enum pnfs_try_status
264objlayout_read_pagelist(struct nfs_read_data *rdata)
265{
266 loff_t offset = rdata->args.offset;
267 size_t count = rdata->args.count;
268 struct objlayout_io_state *state;
269 ssize_t status = 0;
270 loff_t eof;
271
272 dprintk("%s: Begin inode %p offset %llu count %d\n",
273 __func__, rdata->inode, offset, (int)count);
274
275 eof = i_size_read(rdata->inode);
276 if (unlikely(offset + count > eof)) {
277 if (offset >= eof) {
278 status = 0;
279 rdata->res.count = 0;
280 rdata->res.eof = 1;
281 goto out;
282 }
283 count = eof - offset;
284 }
285
286 state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
287 rdata->args.pages, rdata->args.pgbase,
288 offset, count,
289 rdata->lseg, rdata,
290 GFP_KERNEL);
291 if (unlikely(!state)) {
292 status = -ENOMEM;
293 goto out;
294 }
295
296 state->eof = state->offset + state->count >= eof;
297
298 status = objio_read_pagelist(state);
299 out:
300 dprintk("%s: Return status %Zd\n", __func__, status);
301 rdata->pnfs_error = status;
302 return PNFS_ATTEMPTED;
303}
304
305/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
306 * This is because the osd completion is called with ints-off from
307 * the block layer
308 */
309static void _rpc_write_complete(struct work_struct *work)
310{
311 struct rpc_task *task;
312 struct nfs_write_data *wdata;
313
314 dprintk("%s enter\n", __func__);
315 task = container_of(work, struct rpc_task, u.tk_work);
316 wdata = container_of(task, struct nfs_write_data, task);
317
318 pnfs_ld_write_done(wdata);
319}
320
321void
322objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
323 bool sync)
324{
325 struct nfs_write_data *wdata;
326
327 dprintk("%s: Begin\n", __func__);
328 wdata = state->rpcdata;
329 state->status = status;
330 wdata->task.tk_status = status;
331 if (status >= 0) {
332 wdata->res.count = status;
333 wdata->verf.committed = state->committed;
334 dprintk("%s: Return status %d committed %d\n",
335 __func__, wdata->task.tk_status,
336 wdata->verf.committed);
337 } else
338 dprintk("%s: Return status %d\n",
339 __func__, wdata->task.tk_status);
340 objlayout_iodone(state);
341 /* must not use state after this point */
342
343 if (sync)
344 pnfs_ld_write_done(wdata);
345 else {
346 INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
347 schedule_work(&wdata->task.u.tk_work);
348 }
349}
350
351/*
352 * Perform sync or async writes.
353 */
354enum pnfs_try_status
355objlayout_write_pagelist(struct nfs_write_data *wdata,
356 int how)
357{
358 struct objlayout_io_state *state;
359 ssize_t status;
360
361 dprintk("%s: Begin inode %p offset %llu count %u\n",
362 __func__, wdata->inode, wdata->args.offset, wdata->args.count);
363
364 state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
365 wdata->args.pages,
366 wdata->args.pgbase,
367 wdata->args.offset,
368 wdata->args.count,
369 wdata->lseg, wdata,
370 GFP_NOFS);
371 if (unlikely(!state)) {
372 status = -ENOMEM;
373 goto out;
374 }
375
376 state->sync = how & FLUSH_SYNC;
377
378 status = objio_write_pagelist(state, how & FLUSH_STABLE);
379 out:
380 dprintk("%s: Return status %Zd\n", __func__, status);
381 wdata->pnfs_error = status;
382 return PNFS_ATTEMPTED;
383}
384
385/*
132 * Get Device Info API for io engines 386 * Get Device Info API for io engines
133 */ 387 */
134struct objlayout_deviceinfo { 388struct objlayout_deviceinfo {