diff options
author | Fred Isaman <iisaman@citi.umich.edu> | 2011-07-30 20:52:55 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2011-07-31 12:18:17 -0400 |
commit | 31e6306a4046926b598484f1cacf69309382eac6 (patch) | |
tree | 3b71cc552cc00a37b3838c7eb0c7dd4d320fc71d | |
parent | 650e2d39bd8f6b99f39b5009dbed9fbd3bb65e54 (diff) |
pnfsblock: note written INVAL areas for layoutcommit
Signed-off-by: Peng Tao <peng_tao@emc.com>
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Benny Halevy <bhalevy@tonian.com>
Signed-off-by: Jim Rees <rees@umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 32 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.h | 2 | ||||
-rw-r--r-- | fs/nfs/blocklayout/extents.c | 95 |
3 files changed, 129 insertions, 0 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 2e373826db80..21efef7c2fd2 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -329,6 +329,30 @@ out: | |||
329 | return PNFS_NOT_ATTEMPTED; | 329 | return PNFS_NOT_ATTEMPTED; |
330 | } | 330 | } |
331 | 331 | ||
332 | static void mark_extents_written(struct pnfs_block_layout *bl, | ||
333 | __u64 offset, __u32 count) | ||
334 | { | ||
335 | sector_t isect, end; | ||
336 | struct pnfs_block_extent *be; | ||
337 | |||
338 | dprintk("%s(%llu, %u)\n", __func__, offset, count); | ||
339 | if (count == 0) | ||
340 | return; | ||
341 | isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; | ||
342 | end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); | ||
343 | end >>= SECTOR_SHIFT; | ||
344 | while (isect < end) { | ||
345 | sector_t len; | ||
346 | be = bl_find_get_extent(bl, isect, NULL); | ||
347 | BUG_ON(!be); /* FIXME */ | ||
348 | len = min(end, be->be_f_offset + be->be_length) - isect; | ||
349 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
350 | bl_mark_for_commit(be, isect, len); /* What if fails? */ | ||
351 | isect += len; | ||
352 | bl_put_extent(be); | ||
353 | } | ||
354 | } | ||
355 | |||
332 | /* This is basically copied from mpage_end_io_read */ | 356 | /* This is basically copied from mpage_end_io_read */ |
333 | static void bl_end_io_write(struct bio *bio, int err) | 357 | static void bl_end_io_write(struct bio *bio, int err) |
334 | { | 358 | { |
@@ -355,6 +379,14 @@ static void bl_write_cleanup(struct work_struct *work) | |||
355 | dprintk("%s enter\n", __func__); | 379 | dprintk("%s enter\n", __func__); |
356 | task = container_of(work, struct rpc_task, u.tk_work); | 380 | task = container_of(work, struct rpc_task, u.tk_work); |
357 | wdata = container_of(task, struct nfs_write_data, task); | 381 | wdata = container_of(task, struct nfs_write_data, task); |
382 | if (!wdata->task.tk_status) { | ||
383 | /* Marks for LAYOUTCOMMIT */ | ||
384 | /* BUG - this should be called after each bio, not after | ||
385 | * all finish, unless have some way of storing success/failure | ||
386 | */ | ||
387 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | ||
388 | wdata->args.offset, wdata->args.count); | ||
389 | } | ||
358 | pnfs_ld_write_done(wdata); | 390 | pnfs_ld_write_done(wdata); |
359 | } | 391 | } |
360 | 392 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 6a703b79c33d..f27d827960a3 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -201,5 +201,7 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | |||
201 | int status); | 201 | int status); |
202 | int bl_add_merge_extent(struct pnfs_block_layout *bl, | 202 | int bl_add_merge_extent(struct pnfs_block_layout *bl, |
203 | struct pnfs_block_extent *new); | 203 | struct pnfs_block_extent *new); |
204 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
205 | sector_t offset, sector_t length); | ||
204 | 206 | ||
205 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ | 207 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ |
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 7521940dcca5..19fa7b0b8c00 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c | |||
@@ -217,6 +217,48 @@ int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | |||
217 | return rv; | 217 | return rv; |
218 | } | 218 | } |
219 | 219 | ||
220 | /* Assume start, end already sector aligned */ | ||
221 | static int | ||
222 | _range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag) | ||
223 | { | ||
224 | struct pnfs_inval_tracking *pos; | ||
225 | u64 expect = 0; | ||
226 | |||
227 | dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); | ||
228 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
229 | if (pos->it_sector >= end) | ||
230 | continue; | ||
231 | if (!expect) { | ||
232 | if ((pos->it_sector == end - tree->mtt_step_size) && | ||
233 | (pos->it_tags & (1 << tag))) { | ||
234 | expect = pos->it_sector - tree->mtt_step_size; | ||
235 | if (pos->it_sector < tree->mtt_step_size || expect < start) | ||
236 | return 1; | ||
237 | continue; | ||
238 | } else { | ||
239 | return 0; | ||
240 | } | ||
241 | } | ||
242 | if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) | ||
243 | return 0; | ||
244 | expect -= tree->mtt_step_size; | ||
245 | if (expect < start) | ||
246 | return 1; | ||
247 | } | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | static int is_range_written(struct pnfs_inval_markings *marks, | ||
252 | sector_t start, sector_t end) | ||
253 | { | ||
254 | int rv; | ||
255 | |||
256 | spin_lock(&marks->im_lock); | ||
257 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | ||
258 | spin_unlock(&marks->im_lock); | ||
259 | return rv; | ||
260 | } | ||
261 | |||
220 | /* Marks sectors in [offest, offset_length) as having been initialized. | 262 | /* Marks sectors in [offest, offset_length) as having been initialized. |
221 | * All lengths are step-aligned, where step is min(pagesize, blocksize). | 263 | * All lengths are step-aligned, where step is min(pagesize, blocksize). |
222 | * Notes where partial block is initialized, and helps prepare it for | 264 | * Notes where partial block is initialized, and helps prepare it for |
@@ -396,6 +438,59 @@ static void add_to_commitlist(struct pnfs_block_layout *bl, | |||
396 | print_clist(clist, bl->bl_count); | 438 | print_clist(clist, bl->bl_count); |
397 | } | 439 | } |
398 | 440 | ||
441 | /* Note the range described by offset, length is guaranteed to be contained | ||
442 | * within be. | ||
443 | */ | ||
444 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
445 | sector_t offset, sector_t length) | ||
446 | { | ||
447 | sector_t new_end, end = offset + length; | ||
448 | struct pnfs_block_short_extent *new; | ||
449 | struct pnfs_block_layout *bl = container_of(be->be_inval, | ||
450 | struct pnfs_block_layout, | ||
451 | bl_inval); | ||
452 | |||
453 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
454 | if (!new) | ||
455 | return -ENOMEM; | ||
456 | |||
457 | mark_written_sectors(be->be_inval, offset, length); | ||
458 | /* We want to add the range to commit list, but it must be | ||
459 | * block-normalized, and verified that the normalized range has | ||
460 | * been entirely written to disk. | ||
461 | */ | ||
462 | new->bse_f_offset = offset; | ||
463 | offset = normalize(offset, bl->bl_blocksize); | ||
464 | if (offset < new->bse_f_offset) { | ||
465 | if (is_range_written(be->be_inval, offset, new->bse_f_offset)) | ||
466 | new->bse_f_offset = offset; | ||
467 | else | ||
468 | new->bse_f_offset = offset + bl->bl_blocksize; | ||
469 | } | ||
470 | new_end = normalize_up(end, bl->bl_blocksize); | ||
471 | if (end < new_end) { | ||
472 | if (is_range_written(be->be_inval, end, new_end)) | ||
473 | end = new_end; | ||
474 | else | ||
475 | end = new_end - bl->bl_blocksize; | ||
476 | } | ||
477 | if (end <= new->bse_f_offset) { | ||
478 | kfree(new); | ||
479 | return 0; | ||
480 | } | ||
481 | new->bse_length = end - new->bse_f_offset; | ||
482 | new->bse_devid = be->be_devid; | ||
483 | new->bse_mdev = be->be_mdev; | ||
484 | |||
485 | spin_lock(&bl->bl_ext_lock); | ||
486 | /* new will be freed, either by add_to_commitlist if it decides not | ||
487 | * to use it, or after LAYOUTCOMMIT uses it in the commitlist. | ||
488 | */ | ||
489 | add_to_commitlist(bl, new); | ||
490 | spin_unlock(&bl->bl_ext_lock); | ||
491 | return 0; | ||
492 | } | ||
493 | |||
399 | static void print_bl_extent(struct pnfs_block_extent *be) | 494 | static void print_bl_extent(struct pnfs_block_extent *be) |
400 | { | 495 | { |
401 | dprintk("PRINT EXTENT extent %p\n", be); | 496 | dprintk("PRINT EXTENT extent %p\n", be); |