aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Isaman <iisaman@citi.umich.edu>2011-07-30 20:52:55 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-07-31 12:18:17 -0400
commit31e6306a4046926b598484f1cacf69309382eac6 (patch)
tree3b71cc552cc00a37b3838c7eb0c7dd4d320fc71d
parent650e2d39bd8f6b99f39b5009dbed9fbd3bb65e54 (diff)
pnfsblock: note written INVAL areas for layoutcommit
Signed-off-by: Peng Tao <peng_tao@emc.com> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/blocklayout/blocklayout.c32
-rw-r--r--fs/nfs/blocklayout/blocklayout.h2
-rw-r--r--fs/nfs/blocklayout/extents.c95
3 files changed, 129 insertions, 0 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 2e373826db8..21efef7c2fd 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -329,6 +329,30 @@ out:
329 return PNFS_NOT_ATTEMPTED; 329 return PNFS_NOT_ATTEMPTED;
330} 330}
331 331
332static void mark_extents_written(struct pnfs_block_layout *bl,
333 __u64 offset, __u32 count)
334{
335 sector_t isect, end;
336 struct pnfs_block_extent *be;
337
338 dprintk("%s(%llu, %u)\n", __func__, offset, count);
339 if (count == 0)
340 return;
341 isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT;
342 end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
343 end >>= SECTOR_SHIFT;
344 while (isect < end) {
345 sector_t len;
346 be = bl_find_get_extent(bl, isect, NULL);
347 BUG_ON(!be); /* FIXME */
348 len = min(end, be->be_f_offset + be->be_length) - isect;
349 if (be->be_state == PNFS_BLOCK_INVALID_DATA)
350 bl_mark_for_commit(be, isect, len); /* What if fails? */
351 isect += len;
352 bl_put_extent(be);
353 }
354}
355
332/* This is basically copied from mpage_end_io_read */ 356/* This is basically copied from mpage_end_io_read */
333static void bl_end_io_write(struct bio *bio, int err) 357static void bl_end_io_write(struct bio *bio, int err)
334{ 358{
@@ -355,6 +379,14 @@ static void bl_write_cleanup(struct work_struct *work)
355 dprintk("%s enter\n", __func__); 379 dprintk("%s enter\n", __func__);
356 task = container_of(work, struct rpc_task, u.tk_work); 380 task = container_of(work, struct rpc_task, u.tk_work);
357 wdata = container_of(task, struct nfs_write_data, task); 381 wdata = container_of(task, struct nfs_write_data, task);
382 if (!wdata->task.tk_status) {
383 /* Marks for LAYOUTCOMMIT */
384 /* BUG - this should be called after each bio, not after
385 * all finish, unless have some way of storing success/failure
386 */
387 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
388 wdata->args.offset, wdata->args.count);
389 }
358 pnfs_ld_write_done(wdata); 390 pnfs_ld_write_done(wdata);
359} 391}
360 392
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 6a703b79c33..f27d827960a 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -201,5 +201,7 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
201 int status); 201 int status);
202int bl_add_merge_extent(struct pnfs_block_layout *bl, 202int bl_add_merge_extent(struct pnfs_block_layout *bl,
203 struct pnfs_block_extent *new); 203 struct pnfs_block_extent *new);
204int bl_mark_for_commit(struct pnfs_block_extent *be,
205 sector_t offset, sector_t length);
204 206
205#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ 207#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 7521940dcca..19fa7b0b8c0 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -217,6 +217,48 @@ int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
217 return rv; 217 return rv;
218} 218}
219 219
220/* Assume start, end already sector aligned */
221static int
222_range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag)
223{
224 struct pnfs_inval_tracking *pos;
225 u64 expect = 0;
226
227 dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
228 list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
229 if (pos->it_sector >= end)
230 continue;
231 if (!expect) {
232 if ((pos->it_sector == end - tree->mtt_step_size) &&
233 (pos->it_tags & (1 << tag))) {
234 expect = pos->it_sector - tree->mtt_step_size;
235 if (pos->it_sector < tree->mtt_step_size || expect < start)
236 return 1;
237 continue;
238 } else {
239 return 0;
240 }
241 }
242 if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
243 return 0;
244 expect -= tree->mtt_step_size;
245 if (expect < start)
246 return 1;
247 }
248 return 0;
249}
250
251static int is_range_written(struct pnfs_inval_markings *marks,
252 sector_t start, sector_t end)
253{
254 int rv;
255
256 spin_lock(&marks->im_lock);
257 rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
258 spin_unlock(&marks->im_lock);
259 return rv;
260}
261
220/* Marks sectors in [offest, offset_length) as having been initialized. 262/* Marks sectors in [offest, offset_length) as having been initialized.
221 * All lengths are step-aligned, where step is min(pagesize, blocksize). 263 * All lengths are step-aligned, where step is min(pagesize, blocksize).
222 * Notes where partial block is initialized, and helps prepare it for 264 * Notes where partial block is initialized, and helps prepare it for
@@ -396,6 +438,59 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
396 print_clist(clist, bl->bl_count); 438 print_clist(clist, bl->bl_count);
397} 439}
398 440
441/* Note the range described by offset, length is guaranteed to be contained
442 * within be.
443 */
444int bl_mark_for_commit(struct pnfs_block_extent *be,
445 sector_t offset, sector_t length)
446{
447 sector_t new_end, end = offset + length;
448 struct pnfs_block_short_extent *new;
449 struct pnfs_block_layout *bl = container_of(be->be_inval,
450 struct pnfs_block_layout,
451 bl_inval);
452
453 new = kmalloc(sizeof(*new), GFP_NOFS);
454 if (!new)
455 return -ENOMEM;
456
457 mark_written_sectors(be->be_inval, offset, length);
458 /* We want to add the range to commit list, but it must be
459 * block-normalized, and verified that the normalized range has
460 * been entirely written to disk.
461 */
462 new->bse_f_offset = offset;
463 offset = normalize(offset, bl->bl_blocksize);
464 if (offset < new->bse_f_offset) {
465 if (is_range_written(be->be_inval, offset, new->bse_f_offset))
466 new->bse_f_offset = offset;
467 else
468 new->bse_f_offset = offset + bl->bl_blocksize;
469 }
470 new_end = normalize_up(end, bl->bl_blocksize);
471 if (end < new_end) {
472 if (is_range_written(be->be_inval, end, new_end))
473 end = new_end;
474 else
475 end = new_end - bl->bl_blocksize;
476 }
477 if (end <= new->bse_f_offset) {
478 kfree(new);
479 return 0;
480 }
481 new->bse_length = end - new->bse_f_offset;
482 new->bse_devid = be->be_devid;
483 new->bse_mdev = be->be_mdev;
484
485 spin_lock(&bl->bl_ext_lock);
486 /* new will be freed, either by add_to_commitlist if it decides not
487 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
488 */
489 add_to_commitlist(bl, new);
490 spin_unlock(&bl->bl_ext_lock);
491 return 0;
492}
493
399static void print_bl_extent(struct pnfs_block_extent *be) 494static void print_bl_extent(struct pnfs_block_extent *be)
400{ 495{
401 dprintk("PRINT EXTENT extent %p\n", be); 496 dprintk("PRINT EXTENT extent %p\n", be);