diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-28 18:51:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-28 18:51:02 -0400 |
commit | c5850150d0b9ae16840c5d9846f9d5a759996a15 (patch) | |
tree | 6e3f66bff48916af728aa4173f2b7657b31c2600 /fs | |
parent | 243b422af9ea9af4ead07a8ad54c90d4f9b6081a (diff) | |
parent | 0e6e847ffe37436e331c132639f9f872febce82e (diff) |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs:
xfs: stop using the page cache to back the buffer cache
xfs: register the inode cache shrinker before quotachecks
xfs: xfs_trans_read_buf() should return an error on failure
xfs: introduce inode cluster buffer trylocks for xfs_iflush
vmap: flush vmap aliases when mapping fails
xfs: preallocation transactions do not need to be synchronous
Fix up trivial conflicts in fs/xfs/linux-2.6/xfs_buf.c due to plug removal.
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 344 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 40 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 6 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 4 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 36 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 30 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.h | 1 |
11 files changed, 160 insertions, 315 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index c05324d3282c..596bb2c9de42 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -94,75 +94,6 @@ xfs_buf_vmap_len( | |||
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | /* |
97 | * Page Region interfaces. | ||
98 | * | ||
99 | * For pages in filesystems where the blocksize is smaller than the | ||
100 | * pagesize, we use the page->private field (long) to hold a bitmap | ||
101 | * of uptodate regions within the page. | ||
102 | * | ||
103 | * Each such region is "bytes per page / bits per long" bytes long. | ||
104 | * | ||
105 | * NBPPR == number-of-bytes-per-page-region | ||
106 | * BTOPR == bytes-to-page-region (rounded up) | ||
107 | * BTOPRT == bytes-to-page-region-truncated (rounded down) | ||
108 | */ | ||
109 | #if (BITS_PER_LONG == 32) | ||
110 | #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ | ||
111 | #elif (BITS_PER_LONG == 64) | ||
112 | #define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ | ||
113 | #else | ||
114 | #error BITS_PER_LONG must be 32 or 64 | ||
115 | #endif | ||
116 | #define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) | ||
117 | #define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) | ||
118 | #define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) | ||
119 | |||
120 | STATIC unsigned long | ||
121 | page_region_mask( | ||
122 | size_t offset, | ||
123 | size_t length) | ||
124 | { | ||
125 | unsigned long mask; | ||
126 | int first, final; | ||
127 | |||
128 | first = BTOPR(offset); | ||
129 | final = BTOPRT(offset + length - 1); | ||
130 | first = min(first, final); | ||
131 | |||
132 | mask = ~0UL; | ||
133 | mask <<= BITS_PER_LONG - (final - first); | ||
134 | mask >>= BITS_PER_LONG - (final); | ||
135 | |||
136 | ASSERT(offset + length <= PAGE_CACHE_SIZE); | ||
137 | ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); | ||
138 | |||
139 | return mask; | ||
140 | } | ||
141 | |||
142 | STATIC void | ||
143 | set_page_region( | ||
144 | struct page *page, | ||
145 | size_t offset, | ||
146 | size_t length) | ||
147 | { | ||
148 | set_page_private(page, | ||
149 | page_private(page) | page_region_mask(offset, length)); | ||
150 | if (page_private(page) == ~0UL) | ||
151 | SetPageUptodate(page); | ||
152 | } | ||
153 | |||
154 | STATIC int | ||
155 | test_page_region( | ||
156 | struct page *page, | ||
157 | size_t offset, | ||
158 | size_t length) | ||
159 | { | ||
160 | unsigned long mask = page_region_mask(offset, length); | ||
161 | |||
162 | return (mask && (page_private(page) & mask) == mask); | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * xfs_buf_lru_add - add a buffer to the LRU. | 97 | * xfs_buf_lru_add - add a buffer to the LRU. |
167 | * | 98 | * |
168 | * The LRU takes a new reference to the buffer so that it will only be freed | 99 | * The LRU takes a new reference to the buffer so that it will only be freed |
@@ -332,7 +263,7 @@ xfs_buf_free( | |||
332 | 263 | ||
333 | ASSERT(list_empty(&bp->b_lru)); | 264 | ASSERT(list_empty(&bp->b_lru)); |
334 | 265 | ||
335 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 266 | if (bp->b_flags & _XBF_PAGES) { |
336 | uint i; | 267 | uint i; |
337 | 268 | ||
338 | if (xfs_buf_is_vmapped(bp)) | 269 | if (xfs_buf_is_vmapped(bp)) |
@@ -342,25 +273,22 @@ xfs_buf_free( | |||
342 | for (i = 0; i < bp->b_page_count; i++) { | 273 | for (i = 0; i < bp->b_page_count; i++) { |
343 | struct page *page = bp->b_pages[i]; | 274 | struct page *page = bp->b_pages[i]; |
344 | 275 | ||
345 | if (bp->b_flags & _XBF_PAGE_CACHE) | 276 | __free_page(page); |
346 | ASSERT(!PagePrivate(page)); | ||
347 | page_cache_release(page); | ||
348 | } | 277 | } |
349 | } | 278 | } else if (bp->b_flags & _XBF_KMEM) |
279 | kmem_free(bp->b_addr); | ||
350 | _xfs_buf_free_pages(bp); | 280 | _xfs_buf_free_pages(bp); |
351 | xfs_buf_deallocate(bp); | 281 | xfs_buf_deallocate(bp); |
352 | } | 282 | } |
353 | 283 | ||
354 | /* | 284 | /* |
355 | * Finds all pages for buffer in question and builds it's page list. | 285 | * Allocates all the pages for buffer in question and builds it's page list. |
356 | */ | 286 | */ |
357 | STATIC int | 287 | STATIC int |
358 | _xfs_buf_lookup_pages( | 288 | xfs_buf_allocate_memory( |
359 | xfs_buf_t *bp, | 289 | xfs_buf_t *bp, |
360 | uint flags) | 290 | uint flags) |
361 | { | 291 | { |
362 | struct address_space *mapping = bp->b_target->bt_mapping; | ||
363 | size_t blocksize = bp->b_target->bt_bsize; | ||
364 | size_t size = bp->b_count_desired; | 292 | size_t size = bp->b_count_desired; |
365 | size_t nbytes, offset; | 293 | size_t nbytes, offset; |
366 | gfp_t gfp_mask = xb_to_gfp(flags); | 294 | gfp_t gfp_mask = xb_to_gfp(flags); |
@@ -369,29 +297,55 @@ _xfs_buf_lookup_pages( | |||
369 | xfs_off_t end; | 297 | xfs_off_t end; |
370 | int error; | 298 | int error; |
371 | 299 | ||
300 | /* | ||
301 | * for buffers that are contained within a single page, just allocate | ||
302 | * the memory from the heap - there's no need for the complexity of | ||
303 | * page arrays to keep allocation down to order 0. | ||
304 | */ | ||
305 | if (bp->b_buffer_length < PAGE_SIZE) { | ||
306 | bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); | ||
307 | if (!bp->b_addr) { | ||
308 | /* low memory - use alloc_page loop instead */ | ||
309 | goto use_alloc_page; | ||
310 | } | ||
311 | |||
312 | if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & | ||
313 | PAGE_MASK) != | ||
314 | ((unsigned long)bp->b_addr & PAGE_MASK)) { | ||
315 | /* b_addr spans two pages - use alloc_page instead */ | ||
316 | kmem_free(bp->b_addr); | ||
317 | bp->b_addr = NULL; | ||
318 | goto use_alloc_page; | ||
319 | } | ||
320 | bp->b_offset = offset_in_page(bp->b_addr); | ||
321 | bp->b_pages = bp->b_page_array; | ||
322 | bp->b_pages[0] = virt_to_page(bp->b_addr); | ||
323 | bp->b_page_count = 1; | ||
324 | bp->b_flags |= XBF_MAPPED | _XBF_KMEM; | ||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | use_alloc_page: | ||
372 | end = bp->b_file_offset + bp->b_buffer_length; | 329 | end = bp->b_file_offset + bp->b_buffer_length; |
373 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); | 330 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); |
374 | |||
375 | error = _xfs_buf_get_pages(bp, page_count, flags); | 331 | error = _xfs_buf_get_pages(bp, page_count, flags); |
376 | if (unlikely(error)) | 332 | if (unlikely(error)) |
377 | return error; | 333 | return error; |
378 | bp->b_flags |= _XBF_PAGE_CACHE; | ||
379 | 334 | ||
380 | offset = bp->b_offset; | 335 | offset = bp->b_offset; |
381 | first = bp->b_file_offset >> PAGE_CACHE_SHIFT; | 336 | first = bp->b_file_offset >> PAGE_SHIFT; |
337 | bp->b_flags |= _XBF_PAGES; | ||
382 | 338 | ||
383 | for (i = 0; i < bp->b_page_count; i++) { | 339 | for (i = 0; i < bp->b_page_count; i++) { |
384 | struct page *page; | 340 | struct page *page; |
385 | uint retries = 0; | 341 | uint retries = 0; |
386 | 342 | retry: | |
387 | retry: | 343 | page = alloc_page(gfp_mask); |
388 | page = find_or_create_page(mapping, first + i, gfp_mask); | ||
389 | if (unlikely(page == NULL)) { | 344 | if (unlikely(page == NULL)) { |
390 | if (flags & XBF_READ_AHEAD) { | 345 | if (flags & XBF_READ_AHEAD) { |
391 | bp->b_page_count = i; | 346 | bp->b_page_count = i; |
392 | for (i = 0; i < bp->b_page_count; i++) | 347 | error = ENOMEM; |
393 | unlock_page(bp->b_pages[i]); | 348 | goto out_free_pages; |
394 | return -ENOMEM; | ||
395 | } | 349 | } |
396 | 350 | ||
397 | /* | 351 | /* |
@@ -412,33 +366,16 @@ _xfs_buf_lookup_pages( | |||
412 | 366 | ||
413 | XFS_STATS_INC(xb_page_found); | 367 | XFS_STATS_INC(xb_page_found); |
414 | 368 | ||
415 | nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); | 369 | nbytes = min_t(size_t, size, PAGE_SIZE - offset); |
416 | size -= nbytes; | 370 | size -= nbytes; |
417 | |||
418 | ASSERT(!PagePrivate(page)); | ||
419 | if (!PageUptodate(page)) { | ||
420 | page_count--; | ||
421 | if (blocksize >= PAGE_CACHE_SIZE) { | ||
422 | if (flags & XBF_READ) | ||
423 | bp->b_flags |= _XBF_PAGE_LOCKED; | ||
424 | } else if (!PagePrivate(page)) { | ||
425 | if (test_page_region(page, offset, nbytes)) | ||
426 | page_count++; | ||
427 | } | ||
428 | } | ||
429 | |||
430 | bp->b_pages[i] = page; | 371 | bp->b_pages[i] = page; |
431 | offset = 0; | 372 | offset = 0; |
432 | } | 373 | } |
374 | return 0; | ||
433 | 375 | ||
434 | if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { | 376 | out_free_pages: |
435 | for (i = 0; i < bp->b_page_count; i++) | 377 | for (i = 0; i < bp->b_page_count; i++) |
436 | unlock_page(bp->b_pages[i]); | 378 | __free_page(bp->b_pages[i]); |
437 | } | ||
438 | |||
439 | if (page_count == bp->b_page_count) | ||
440 | bp->b_flags |= XBF_DONE; | ||
441 | |||
442 | return error; | 379 | return error; |
443 | } | 380 | } |
444 | 381 | ||
@@ -450,14 +387,23 @@ _xfs_buf_map_pages( | |||
450 | xfs_buf_t *bp, | 387 | xfs_buf_t *bp, |
451 | uint flags) | 388 | uint flags) |
452 | { | 389 | { |
453 | /* A single page buffer is always mappable */ | 390 | ASSERT(bp->b_flags & _XBF_PAGES); |
454 | if (bp->b_page_count == 1) { | 391 | if (bp->b_page_count == 1) { |
392 | /* A single page buffer is always mappable */ | ||
455 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 393 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
456 | bp->b_flags |= XBF_MAPPED; | 394 | bp->b_flags |= XBF_MAPPED; |
457 | } else if (flags & XBF_MAPPED) { | 395 | } else if (flags & XBF_MAPPED) { |
458 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | 396 | int retried = 0; |
459 | -1, PAGE_KERNEL); | 397 | |
460 | if (unlikely(bp->b_addr == NULL)) | 398 | do { |
399 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | ||
400 | -1, PAGE_KERNEL); | ||
401 | if (bp->b_addr) | ||
402 | break; | ||
403 | vm_unmap_aliases(); | ||
404 | } while (retried++ <= 1); | ||
405 | |||
406 | if (!bp->b_addr) | ||
461 | return -ENOMEM; | 407 | return -ENOMEM; |
462 | bp->b_addr += bp->b_offset; | 408 | bp->b_addr += bp->b_offset; |
463 | bp->b_flags |= XBF_MAPPED; | 409 | bp->b_flags |= XBF_MAPPED; |
@@ -568,9 +514,14 @@ found: | |||
568 | } | 514 | } |
569 | } | 515 | } |
570 | 516 | ||
517 | /* | ||
518 | * if the buffer is stale, clear all the external state associated with | ||
519 | * it. We need to keep flags such as how we allocated the buffer memory | ||
520 | * intact here. | ||
521 | */ | ||
571 | if (bp->b_flags & XBF_STALE) { | 522 | if (bp->b_flags & XBF_STALE) { |
572 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 523 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
573 | bp->b_flags &= XBF_MAPPED; | 524 | bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; |
574 | } | 525 | } |
575 | 526 | ||
576 | trace_xfs_buf_find(bp, flags, _RET_IP_); | 527 | trace_xfs_buf_find(bp, flags, _RET_IP_); |
@@ -591,7 +542,7 @@ xfs_buf_get( | |||
591 | xfs_buf_flags_t flags) | 542 | xfs_buf_flags_t flags) |
592 | { | 543 | { |
593 | xfs_buf_t *bp, *new_bp; | 544 | xfs_buf_t *bp, *new_bp; |
594 | int error = 0, i; | 545 | int error = 0; |
595 | 546 | ||
596 | new_bp = xfs_buf_allocate(flags); | 547 | new_bp = xfs_buf_allocate(flags); |
597 | if (unlikely(!new_bp)) | 548 | if (unlikely(!new_bp)) |
@@ -599,7 +550,7 @@ xfs_buf_get( | |||
599 | 550 | ||
600 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 551 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); |
601 | if (bp == new_bp) { | 552 | if (bp == new_bp) { |
602 | error = _xfs_buf_lookup_pages(bp, flags); | 553 | error = xfs_buf_allocate_memory(bp, flags); |
603 | if (error) | 554 | if (error) |
604 | goto no_buffer; | 555 | goto no_buffer; |
605 | } else { | 556 | } else { |
@@ -608,9 +559,6 @@ xfs_buf_get( | |||
608 | return NULL; | 559 | return NULL; |
609 | } | 560 | } |
610 | 561 | ||
611 | for (i = 0; i < bp->b_page_count; i++) | ||
612 | mark_page_accessed(bp->b_pages[i]); | ||
613 | |||
614 | if (!(bp->b_flags & XBF_MAPPED)) { | 562 | if (!(bp->b_flags & XBF_MAPPED)) { |
615 | error = _xfs_buf_map_pages(bp, flags); | 563 | error = _xfs_buf_map_pages(bp, flags); |
616 | if (unlikely(error)) { | 564 | if (unlikely(error)) { |
@@ -711,8 +659,7 @@ xfs_buf_readahead( | |||
711 | { | 659 | { |
712 | struct backing_dev_info *bdi; | 660 | struct backing_dev_info *bdi; |
713 | 661 | ||
714 | bdi = target->bt_mapping->backing_dev_info; | 662 | if (bdi_read_congested(target->bt_bdi)) |
715 | if (bdi_read_congested(bdi)) | ||
716 | return; | 663 | return; |
717 | 664 | ||
718 | xfs_buf_read(target, ioff, isize, | 665 | xfs_buf_read(target, ioff, isize, |
@@ -790,10 +737,10 @@ xfs_buf_associate_memory( | |||
790 | size_t buflen; | 737 | size_t buflen; |
791 | int page_count; | 738 | int page_count; |
792 | 739 | ||
793 | pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; | 740 | pageaddr = (unsigned long)mem & PAGE_MASK; |
794 | offset = (unsigned long)mem - pageaddr; | 741 | offset = (unsigned long)mem - pageaddr; |
795 | buflen = PAGE_CACHE_ALIGN(len + offset); | 742 | buflen = PAGE_ALIGN(len + offset); |
796 | page_count = buflen >> PAGE_CACHE_SHIFT; | 743 | page_count = buflen >> PAGE_SHIFT; |
797 | 744 | ||
798 | /* Free any previous set of page pointers */ | 745 | /* Free any previous set of page pointers */ |
799 | if (bp->b_pages) | 746 | if (bp->b_pages) |
@@ -810,13 +757,12 @@ xfs_buf_associate_memory( | |||
810 | 757 | ||
811 | for (i = 0; i < bp->b_page_count; i++) { | 758 | for (i = 0; i < bp->b_page_count; i++) { |
812 | bp->b_pages[i] = mem_to_page((void *)pageaddr); | 759 | bp->b_pages[i] = mem_to_page((void *)pageaddr); |
813 | pageaddr += PAGE_CACHE_SIZE; | 760 | pageaddr += PAGE_SIZE; |
814 | } | 761 | } |
815 | 762 | ||
816 | bp->b_count_desired = len; | 763 | bp->b_count_desired = len; |
817 | bp->b_buffer_length = buflen; | 764 | bp->b_buffer_length = buflen; |
818 | bp->b_flags |= XBF_MAPPED; | 765 | bp->b_flags |= XBF_MAPPED; |
819 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
820 | 766 | ||
821 | return 0; | 767 | return 0; |
822 | } | 768 | } |
@@ -923,20 +869,7 @@ xfs_buf_rele( | |||
923 | 869 | ||
924 | 870 | ||
925 | /* | 871 | /* |
926 | * Mutual exclusion on buffers. Locking model: | 872 | * Lock a buffer object, if it is not already locked. |
927 | * | ||
928 | * Buffers associated with inodes for which buffer locking | ||
929 | * is not enabled are not protected by semaphores, and are | ||
930 | * assumed to be exclusively owned by the caller. There is a | ||
931 | * spinlock in the buffer, used by the caller when concurrent | ||
932 | * access is possible. | ||
933 | */ | ||
934 | |||
935 | /* | ||
936 | * Locks a buffer object, if it is not already locked. Note that this in | ||
937 | * no way locks the underlying pages, so it is only useful for | ||
938 | * synchronizing concurrent use of buffer objects, not for synchronizing | ||
939 | * independent access to the underlying pages. | ||
940 | * | 873 | * |
941 | * If we come across a stale, pinned, locked buffer, we know that we are | 874 | * If we come across a stale, pinned, locked buffer, we know that we are |
942 | * being asked to lock a buffer that has been reallocated. Because it is | 875 | * being asked to lock a buffer that has been reallocated. Because it is |
@@ -970,10 +903,7 @@ xfs_buf_lock_value( | |||
970 | } | 903 | } |
971 | 904 | ||
972 | /* | 905 | /* |
973 | * Locks a buffer object. | 906 | * Lock a buffer object. |
974 | * Note that this in no way locks the underlying pages, so it is only | ||
975 | * useful for synchronizing concurrent use of buffer objects, not for | ||
976 | * synchronizing independent access to the underlying pages. | ||
977 | * | 907 | * |
978 | * If we come across a stale, pinned, locked buffer, we know that we | 908 | * If we come across a stale, pinned, locked buffer, we know that we |
979 | * are being asked to lock a buffer that has been reallocated. Because | 909 | * are being asked to lock a buffer that has been reallocated. Because |
@@ -1246,10 +1176,8 @@ _xfs_buf_ioend( | |||
1246 | xfs_buf_t *bp, | 1176 | xfs_buf_t *bp, |
1247 | int schedule) | 1177 | int schedule) |
1248 | { | 1178 | { |
1249 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { | 1179 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) |
1250 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
1251 | xfs_buf_ioend(bp, schedule); | 1180 | xfs_buf_ioend(bp, schedule); |
1252 | } | ||
1253 | } | 1181 | } |
1254 | 1182 | ||
1255 | STATIC void | 1183 | STATIC void |
@@ -1258,35 +1186,12 @@ xfs_buf_bio_end_io( | |||
1258 | int error) | 1186 | int error) |
1259 | { | 1187 | { |
1260 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; | 1188 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; |
1261 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1262 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
1263 | 1189 | ||
1264 | xfs_buf_ioerror(bp, -error); | 1190 | xfs_buf_ioerror(bp, -error); |
1265 | 1191 | ||
1266 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) | 1192 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) |
1267 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); | 1193 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); |
1268 | 1194 | ||
1269 | do { | ||
1270 | struct page *page = bvec->bv_page; | ||
1271 | |||
1272 | ASSERT(!PagePrivate(page)); | ||
1273 | if (unlikely(bp->b_error)) { | ||
1274 | if (bp->b_flags & XBF_READ) | ||
1275 | ClearPageUptodate(page); | ||
1276 | } else if (blocksize >= PAGE_CACHE_SIZE) { | ||
1277 | SetPageUptodate(page); | ||
1278 | } else if (!PagePrivate(page) && | ||
1279 | (bp->b_flags & _XBF_PAGE_CACHE)) { | ||
1280 | set_page_region(page, bvec->bv_offset, bvec->bv_len); | ||
1281 | } | ||
1282 | |||
1283 | if (--bvec >= bio->bi_io_vec) | ||
1284 | prefetchw(&bvec->bv_page->flags); | ||
1285 | |||
1286 | if (bp->b_flags & _XBF_PAGE_LOCKED) | ||
1287 | unlock_page(page); | ||
1288 | } while (bvec >= bio->bi_io_vec); | ||
1289 | |||
1290 | _xfs_buf_ioend(bp, 1); | 1195 | _xfs_buf_ioend(bp, 1); |
1291 | bio_put(bio); | 1196 | bio_put(bio); |
1292 | } | 1197 | } |
@@ -1300,7 +1205,6 @@ _xfs_buf_ioapply( | |||
1300 | int offset = bp->b_offset; | 1205 | int offset = bp->b_offset; |
1301 | int size = bp->b_count_desired; | 1206 | int size = bp->b_count_desired; |
1302 | sector_t sector = bp->b_bn; | 1207 | sector_t sector = bp->b_bn; |
1303 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1304 | 1208 | ||
1305 | total_nr_pages = bp->b_page_count; | 1209 | total_nr_pages = bp->b_page_count; |
1306 | map_i = 0; | 1210 | map_i = 0; |
@@ -1321,29 +1225,6 @@ _xfs_buf_ioapply( | |||
1321 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; | 1225 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; |
1322 | } | 1226 | } |
1323 | 1227 | ||
1324 | /* Special code path for reading a sub page size buffer in -- | ||
1325 | * we populate up the whole page, and hence the other metadata | ||
1326 | * in the same page. This optimization is only valid when the | ||
1327 | * filesystem block size is not smaller than the page size. | ||
1328 | */ | ||
1329 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && | ||
1330 | ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == | ||
1331 | (XBF_READ|_XBF_PAGE_LOCKED)) && | ||
1332 | (blocksize >= PAGE_CACHE_SIZE)) { | ||
1333 | bio = bio_alloc(GFP_NOIO, 1); | ||
1334 | |||
1335 | bio->bi_bdev = bp->b_target->bt_bdev; | ||
1336 | bio->bi_sector = sector - (offset >> BBSHIFT); | ||
1337 | bio->bi_end_io = xfs_buf_bio_end_io; | ||
1338 | bio->bi_private = bp; | ||
1339 | |||
1340 | bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); | ||
1341 | size = 0; | ||
1342 | |||
1343 | atomic_inc(&bp->b_io_remaining); | ||
1344 | |||
1345 | goto submit_io; | ||
1346 | } | ||
1347 | 1228 | ||
1348 | next_chunk: | 1229 | next_chunk: |
1349 | atomic_inc(&bp->b_io_remaining); | 1230 | atomic_inc(&bp->b_io_remaining); |
@@ -1357,8 +1238,9 @@ next_chunk: | |||
1357 | bio->bi_end_io = xfs_buf_bio_end_io; | 1238 | bio->bi_end_io = xfs_buf_bio_end_io; |
1358 | bio->bi_private = bp; | 1239 | bio->bi_private = bp; |
1359 | 1240 | ||
1241 | |||
1360 | for (; size && nr_pages; nr_pages--, map_i++) { | 1242 | for (; size && nr_pages; nr_pages--, map_i++) { |
1361 | int rbytes, nbytes = PAGE_CACHE_SIZE - offset; | 1243 | int rbytes, nbytes = PAGE_SIZE - offset; |
1362 | 1244 | ||
1363 | if (nbytes > size) | 1245 | if (nbytes > size) |
1364 | nbytes = size; | 1246 | nbytes = size; |
@@ -1373,7 +1255,6 @@ next_chunk: | |||
1373 | total_nr_pages--; | 1255 | total_nr_pages--; |
1374 | } | 1256 | } |
1375 | 1257 | ||
1376 | submit_io: | ||
1377 | if (likely(bio->bi_size)) { | 1258 | if (likely(bio->bi_size)) { |
1378 | if (xfs_buf_is_vmapped(bp)) { | 1259 | if (xfs_buf_is_vmapped(bp)) { |
1379 | flush_kernel_vmap_range(bp->b_addr, | 1260 | flush_kernel_vmap_range(bp->b_addr, |
@@ -1383,18 +1264,7 @@ submit_io: | |||
1383 | if (size) | 1264 | if (size) |
1384 | goto next_chunk; | 1265 | goto next_chunk; |
1385 | } else { | 1266 | } else { |
1386 | /* | ||
1387 | * if we get here, no pages were added to the bio. However, | ||
1388 | * we can't just error out here - if the pages are locked then | ||
1389 | * we have to unlock them otherwise we can hang on a later | ||
1390 | * access to the page. | ||
1391 | */ | ||
1392 | xfs_buf_ioerror(bp, EIO); | 1267 | xfs_buf_ioerror(bp, EIO); |
1393 | if (bp->b_flags & _XBF_PAGE_LOCKED) { | ||
1394 | int i; | ||
1395 | for (i = 0; i < bp->b_page_count; i++) | ||
1396 | unlock_page(bp->b_pages[i]); | ||
1397 | } | ||
1398 | bio_put(bio); | 1268 | bio_put(bio); |
1399 | } | 1269 | } |
1400 | } | 1270 | } |
@@ -1458,8 +1328,8 @@ xfs_buf_offset( | |||
1458 | return XFS_BUF_PTR(bp) + offset; | 1328 | return XFS_BUF_PTR(bp) + offset; |
1459 | 1329 | ||
1460 | offset += bp->b_offset; | 1330 | offset += bp->b_offset; |
1461 | page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; | 1331 | page = bp->b_pages[offset >> PAGE_SHIFT]; |
1462 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); | 1332 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); |
1463 | } | 1333 | } |
1464 | 1334 | ||
1465 | /* | 1335 | /* |
@@ -1481,9 +1351,9 @@ xfs_buf_iomove( | |||
1481 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; | 1351 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; |
1482 | cpoff = xfs_buf_poff(boff + bp->b_offset); | 1352 | cpoff = xfs_buf_poff(boff + bp->b_offset); |
1483 | csize = min_t(size_t, | 1353 | csize = min_t(size_t, |
1484 | PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); | 1354 | PAGE_SIZE-cpoff, bp->b_count_desired-boff); |
1485 | 1355 | ||
1486 | ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); | 1356 | ASSERT(((csize + cpoff) <= PAGE_SIZE)); |
1487 | 1357 | ||
1488 | switch (mode) { | 1358 | switch (mode) { |
1489 | case XBRW_ZERO: | 1359 | case XBRW_ZERO: |
@@ -1596,7 +1466,6 @@ xfs_free_buftarg( | |||
1596 | xfs_flush_buftarg(btp, 1); | 1466 | xfs_flush_buftarg(btp, 1); |
1597 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1467 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1598 | xfs_blkdev_issue_flush(btp); | 1468 | xfs_blkdev_issue_flush(btp); |
1599 | iput(btp->bt_mapping->host); | ||
1600 | 1469 | ||
1601 | kthread_stop(btp->bt_task); | 1470 | kthread_stop(btp->bt_task); |
1602 | kmem_free(btp); | 1471 | kmem_free(btp); |
@@ -1620,15 +1489,6 @@ xfs_setsize_buftarg_flags( | |||
1620 | return EINVAL; | 1489 | return EINVAL; |
1621 | } | 1490 | } |
1622 | 1491 | ||
1623 | if (verbose && | ||
1624 | (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { | ||
1625 | printk(KERN_WARNING | ||
1626 | "XFS: %u byte sectors in use on device %s. " | ||
1627 | "This is suboptimal; %u or greater is ideal.\n", | ||
1628 | sectorsize, XFS_BUFTARG_NAME(btp), | ||
1629 | (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); | ||
1630 | } | ||
1631 | |||
1632 | return 0; | 1492 | return 0; |
1633 | } | 1493 | } |
1634 | 1494 | ||
@@ -1643,7 +1503,7 @@ xfs_setsize_buftarg_early( | |||
1643 | struct block_device *bdev) | 1503 | struct block_device *bdev) |
1644 | { | 1504 | { |
1645 | return xfs_setsize_buftarg_flags(btp, | 1505 | return xfs_setsize_buftarg_flags(btp, |
1646 | PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); | 1506 | PAGE_SIZE, bdev_logical_block_size(bdev), 0); |
1647 | } | 1507 | } |
1648 | 1508 | ||
1649 | int | 1509 | int |
@@ -1656,40 +1516,6 @@ xfs_setsize_buftarg( | |||
1656 | } | 1516 | } |
1657 | 1517 | ||
1658 | STATIC int | 1518 | STATIC int |
1659 | xfs_mapping_buftarg( | ||
1660 | xfs_buftarg_t *btp, | ||
1661 | struct block_device *bdev) | ||
1662 | { | ||
1663 | struct backing_dev_info *bdi; | ||
1664 | struct inode *inode; | ||
1665 | struct address_space *mapping; | ||
1666 | static const struct address_space_operations mapping_aops = { | ||
1667 | .migratepage = fail_migrate_page, | ||
1668 | }; | ||
1669 | |||
1670 | inode = new_inode(bdev->bd_inode->i_sb); | ||
1671 | if (!inode) { | ||
1672 | printk(KERN_WARNING | ||
1673 | "XFS: Cannot allocate mapping inode for device %s\n", | ||
1674 | XFS_BUFTARG_NAME(btp)); | ||
1675 | return ENOMEM; | ||
1676 | } | ||
1677 | inode->i_ino = get_next_ino(); | ||
1678 | inode->i_mode = S_IFBLK; | ||
1679 | inode->i_bdev = bdev; | ||
1680 | inode->i_rdev = bdev->bd_dev; | ||
1681 | bdi = blk_get_backing_dev_info(bdev); | ||
1682 | if (!bdi) | ||
1683 | bdi = &default_backing_dev_info; | ||
1684 | mapping = &inode->i_data; | ||
1685 | mapping->a_ops = &mapping_aops; | ||
1686 | mapping->backing_dev_info = bdi; | ||
1687 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
1688 | btp->bt_mapping = mapping; | ||
1689 | return 0; | ||
1690 | } | ||
1691 | |||
1692 | STATIC int | ||
1693 | xfs_alloc_delwrite_queue( | 1519 | xfs_alloc_delwrite_queue( |
1694 | xfs_buftarg_t *btp, | 1520 | xfs_buftarg_t *btp, |
1695 | const char *fsname) | 1521 | const char *fsname) |
@@ -1717,12 +1543,14 @@ xfs_alloc_buftarg( | |||
1717 | btp->bt_mount = mp; | 1543 | btp->bt_mount = mp; |
1718 | btp->bt_dev = bdev->bd_dev; | 1544 | btp->bt_dev = bdev->bd_dev; |
1719 | btp->bt_bdev = bdev; | 1545 | btp->bt_bdev = bdev; |
1546 | btp->bt_bdi = blk_get_backing_dev_info(bdev); | ||
1547 | if (!btp->bt_bdi) | ||
1548 | goto error; | ||
1549 | |||
1720 | INIT_LIST_HEAD(&btp->bt_lru); | 1550 | INIT_LIST_HEAD(&btp->bt_lru); |
1721 | spin_lock_init(&btp->bt_lru_lock); | 1551 | spin_lock_init(&btp->bt_lru_lock); |
1722 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1552 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1723 | goto error; | 1553 | goto error; |
1724 | if (xfs_mapping_buftarg(btp, bdev)) | ||
1725 | goto error; | ||
1726 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1554 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1727 | goto error; | 1555 | goto error; |
1728 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | 1556 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index cbe65950e524..a9a1c4512645 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -61,30 +61,11 @@ typedef enum { | |||
61 | #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ | 61 | #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ |
62 | 62 | ||
63 | /* flags used only internally */ | 63 | /* flags used only internally */ |
64 | #define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */ | ||
65 | #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ | 64 | #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ |
66 | #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ | 65 | #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ |
66 | #define _XBF_KMEM (1 << 20)/* backed by heap memory */ | ||
67 | #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ | 67 | #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ |
68 | 68 | ||
69 | /* | ||
70 | * Special flag for supporting metadata blocks smaller than a FSB. | ||
71 | * | ||
72 | * In this case we can have multiple xfs_buf_t on a single page and | ||
73 | * need to lock out concurrent xfs_buf_t readers as they only | ||
74 | * serialise access to the buffer. | ||
75 | * | ||
76 | * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation | ||
77 | * between reads of the page. Hence we can have one thread read the | ||
78 | * page and modify it, but then race with another thread that thinks | ||
79 | * the page is not up-to-date and hence reads it again. | ||
80 | * | ||
81 | * The result is that the first modifcation to the page is lost. | ||
82 | * This sort of AGF/AGI reading race can happen when unlinking inodes | ||
83 | * that require truncation and results in the AGI unlinked list | ||
84 | * modifications being lost. | ||
85 | */ | ||
86 | #define _XBF_PAGE_LOCKED (1 << 22) | ||
87 | |||
88 | typedef unsigned int xfs_buf_flags_t; | 69 | typedef unsigned int xfs_buf_flags_t; |
89 | 70 | ||
90 | #define XFS_BUF_FLAGS \ | 71 | #define XFS_BUF_FLAGS \ |
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t; | |||
100 | { XBF_LOCK, "LOCK" }, /* should never be set */\ | 81 | { XBF_LOCK, "LOCK" }, /* should never be set */\ |
101 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ | 82 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ |
102 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ | 83 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ |
103 | { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \ | ||
104 | { _XBF_PAGES, "PAGES" }, \ | 84 | { _XBF_PAGES, "PAGES" }, \ |
105 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ | 85 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ |
106 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 86 | { _XBF_KMEM, "KMEM" }, \ |
107 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } | 87 | { _XBF_DELWRI_Q, "DELWRI_Q" } |
108 | |||
109 | 88 | ||
110 | typedef enum { | 89 | typedef enum { |
111 | XBT_FORCE_SLEEP = 0, | 90 | XBT_FORCE_SLEEP = 0, |
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash { | |||
120 | typedef struct xfs_buftarg { | 99 | typedef struct xfs_buftarg { |
121 | dev_t bt_dev; | 100 | dev_t bt_dev; |
122 | struct block_device *bt_bdev; | 101 | struct block_device *bt_bdev; |
123 | struct address_space *bt_mapping; | 102 | struct backing_dev_info *bt_bdi; |
124 | struct xfs_mount *bt_mount; | 103 | struct xfs_mount *bt_mount; |
125 | unsigned int bt_bsize; | 104 | unsigned int bt_bsize; |
126 | unsigned int bt_sshift; | 105 | unsigned int bt_sshift; |
@@ -139,17 +118,6 @@ typedef struct xfs_buftarg { | |||
139 | unsigned int bt_lru_nr; | 118 | unsigned int bt_lru_nr; |
140 | } xfs_buftarg_t; | 119 | } xfs_buftarg_t; |
141 | 120 | ||
142 | /* | ||
143 | * xfs_buf_t: Buffer structure for pagecache-based buffers | ||
144 | * | ||
145 | * This buffer structure is used by the pagecache buffer management routines | ||
146 | * to refer to an assembly of pages forming a logical buffer. | ||
147 | * | ||
148 | * The buffer structure is used on a temporary basis only, and discarded when | ||
149 | * released. The real data storage is recorded in the pagecache. Buffers are | ||
150 | * hashed to the block device on which the file system resides. | ||
151 | */ | ||
152 | |||
153 | struct xfs_buf; | 121 | struct xfs_buf; |
154 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); | 122 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); |
155 | 123 | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index a55c1b46b219..52aadfbed132 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -896,6 +896,7 @@ xfs_file_fallocate( | |||
896 | xfs_flock64_t bf; | 896 | xfs_flock64_t bf; |
897 | xfs_inode_t *ip = XFS_I(inode); | 897 | xfs_inode_t *ip = XFS_I(inode); |
898 | int cmd = XFS_IOC_RESVSP; | 898 | int cmd = XFS_IOC_RESVSP; |
899 | int attr_flags = XFS_ATTR_NOLOCK; | ||
899 | 900 | ||
900 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 901 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
901 | return -EOPNOTSUPP; | 902 | return -EOPNOTSUPP; |
@@ -918,7 +919,10 @@ xfs_file_fallocate( | |||
918 | goto out_unlock; | 919 | goto out_unlock; |
919 | } | 920 | } |
920 | 921 | ||
921 | error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); | 922 | if (file->f_flags & O_DSYNC) |
923 | attr_flags |= XFS_ATTR_SYNC; | ||
924 | |||
925 | error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); | ||
922 | if (error) | 926 | if (error) |
923 | goto out_unlock; | 927 | goto out_unlock; |
924 | 928 | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 0ca0e3c024d7..acca2c5ca3fa 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -624,6 +624,10 @@ xfs_ioc_space( | |||
624 | 624 | ||
625 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 625 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
626 | attr_flags |= XFS_ATTR_NONBLOCK; | 626 | attr_flags |= XFS_ATTR_NONBLOCK; |
627 | |||
628 | if (filp->f_flags & O_DSYNC) | ||
629 | attr_flags |= XFS_ATTR_SYNC; | ||
630 | |||
627 | if (ioflags & IO_INVIS) | 631 | if (ioflags & IO_INVIS) |
628 | attr_flags |= XFS_ATTR_DMI; | 632 | attr_flags |= XFS_ATTR_DMI; |
629 | 633 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 818c4cf2de86..1ba5c451da36 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1078,7 +1078,7 @@ xfs_fs_write_inode( | |||
1078 | error = 0; | 1078 | error = 0; |
1079 | goto out_unlock; | 1079 | goto out_unlock; |
1080 | } | 1080 | } |
1081 | error = xfs_iflush(ip, 0); | 1081 | error = xfs_iflush(ip, SYNC_TRYLOCK); |
1082 | } | 1082 | } |
1083 | 1083 | ||
1084 | out_unlock: | 1084 | out_unlock: |
@@ -1539,10 +1539,14 @@ xfs_fs_fill_super( | |||
1539 | if (error) | 1539 | if (error) |
1540 | goto out_free_sb; | 1540 | goto out_free_sb; |
1541 | 1541 | ||
1542 | error = xfs_mountfs(mp); | 1542 | /* |
1543 | if (error) | 1543 | * we must configure the block size in the superblock before we run the |
1544 | goto out_filestream_unmount; | 1544 | * full mount process as the mount process can lookup and cache inodes. |
1545 | 1545 | * For the same reason we must also initialise the syncd and register | |
1546 | * the inode cache shrinker so that inodes can be reclaimed during | ||
1547 | * operations like a quotacheck that iterate all inodes in the | ||
1548 | * filesystem. | ||
1549 | */ | ||
1546 | sb->s_magic = XFS_SB_MAGIC; | 1550 | sb->s_magic = XFS_SB_MAGIC; |
1547 | sb->s_blocksize = mp->m_sb.sb_blocksize; | 1551 | sb->s_blocksize = mp->m_sb.sb_blocksize; |
1548 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; | 1552 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; |
@@ -1550,6 +1554,16 @@ xfs_fs_fill_super( | |||
1550 | sb->s_time_gran = 1; | 1554 | sb->s_time_gran = 1; |
1551 | set_posix_acl_flag(sb); | 1555 | set_posix_acl_flag(sb); |
1552 | 1556 | ||
1557 | error = xfs_syncd_init(mp); | ||
1558 | if (error) | ||
1559 | goto out_filestream_unmount; | ||
1560 | |||
1561 | xfs_inode_shrinker_register(mp); | ||
1562 | |||
1563 | error = xfs_mountfs(mp); | ||
1564 | if (error) | ||
1565 | goto out_syncd_stop; | ||
1566 | |||
1553 | root = igrab(VFS_I(mp->m_rootip)); | 1567 | root = igrab(VFS_I(mp->m_rootip)); |
1554 | if (!root) { | 1568 | if (!root) { |
1555 | error = ENOENT; | 1569 | error = ENOENT; |
@@ -1565,14 +1579,11 @@ xfs_fs_fill_super( | |||
1565 | goto fail_vnrele; | 1579 | goto fail_vnrele; |
1566 | } | 1580 | } |
1567 | 1581 | ||
1568 | error = xfs_syncd_init(mp); | ||
1569 | if (error) | ||
1570 | goto fail_vnrele; | ||
1571 | |||
1572 | xfs_inode_shrinker_register(mp); | ||
1573 | |||
1574 | return 0; | 1582 | return 0; |
1575 | 1583 | ||
1584 | out_syncd_stop: | ||
1585 | xfs_inode_shrinker_unregister(mp); | ||
1586 | xfs_syncd_stop(mp); | ||
1576 | out_filestream_unmount: | 1587 | out_filestream_unmount: |
1577 | xfs_filestream_unmount(mp); | 1588 | xfs_filestream_unmount(mp); |
1578 | out_free_sb: | 1589 | out_free_sb: |
@@ -1596,6 +1607,9 @@ xfs_fs_fill_super( | |||
1596 | } | 1607 | } |
1597 | 1608 | ||
1598 | fail_unmount: | 1609 | fail_unmount: |
1610 | xfs_inode_shrinker_unregister(mp); | ||
1611 | xfs_syncd_stop(mp); | ||
1612 | |||
1599 | /* | 1613 | /* |
1600 | * Blow away any referenced inode in the filestreams cache. | 1614 | * Blow away any referenced inode in the filestreams cache. |
1601 | * This can and will cause log traffic as inodes go inactive | 1615 | * This can and will cause log traffic as inodes go inactive |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 6c10f1d2e3d3..594cd822d84d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -761,8 +761,10 @@ xfs_reclaim_inode( | |||
761 | struct xfs_perag *pag, | 761 | struct xfs_perag *pag, |
762 | int sync_mode) | 762 | int sync_mode) |
763 | { | 763 | { |
764 | int error = 0; | 764 | int error; |
765 | 765 | ||
766 | restart: | ||
767 | error = 0; | ||
766 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 768 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
767 | if (!xfs_iflock_nowait(ip)) { | 769 | if (!xfs_iflock_nowait(ip)) { |
768 | if (!(sync_mode & SYNC_WAIT)) | 770 | if (!(sync_mode & SYNC_WAIT)) |
@@ -788,9 +790,31 @@ xfs_reclaim_inode( | |||
788 | if (xfs_inode_clean(ip)) | 790 | if (xfs_inode_clean(ip)) |
789 | goto reclaim; | 791 | goto reclaim; |
790 | 792 | ||
791 | /* Now we have an inode that needs flushing */ | 793 | /* |
792 | error = xfs_iflush(ip, sync_mode); | 794 | * Now we have an inode that needs flushing. |
795 | * | ||
796 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT | ||
797 | * reclaim as we can deadlock with inode cluster removal. | ||
798 | * xfs_ifree_cluster() can lock the inode buffer before it locks the | ||
799 | * ip->i_lock, and we are doing the exact opposite here. As a result, | ||
800 | * doing a blocking xfs_itobp() to get the cluster buffer will result | ||
801 | * in an ABBA deadlock with xfs_ifree_cluster(). | ||
802 | * | ||
803 | * As xfs_ifree_cluser() must gather all inodes that are active in the | ||
804 | * cache to mark them stale, if we hit this case we don't actually want | ||
805 | * to do IO here - we want the inode marked stale so we can simply | ||
806 | * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, | ||
807 | * just unlock the inode, back off and try again. Hopefully the next | ||
808 | * pass through will see the stale flag set on the inode. | ||
809 | */ | ||
810 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); | ||
793 | if (sync_mode & SYNC_WAIT) { | 811 | if (sync_mode & SYNC_WAIT) { |
812 | if (error == EAGAIN) { | ||
813 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
814 | /* backoff longer than in xfs_ifree_cluster */ | ||
815 | delay(2); | ||
816 | goto restart; | ||
817 | } | ||
794 | xfs_iflock(ip); | 818 | xfs_iflock(ip); |
795 | goto reclaim; | 819 | goto reclaim; |
796 | } | 820 | } |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index da871f532236..742c8330994a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2835,7 +2835,7 @@ xfs_iflush( | |||
2835 | * Get the buffer containing the on-disk inode. | 2835 | * Get the buffer containing the on-disk inode. |
2836 | */ | 2836 | */ |
2837 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, | 2837 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, |
2838 | (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); | 2838 | (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); |
2839 | if (error || !bp) { | 2839 | if (error || !bp) { |
2840 | xfs_ifunlock(ip); | 2840 | xfs_ifunlock(ip); |
2841 | return error; | 2841 | return error; |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fd4f398bd6f1..46cc40131d4a 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -760,11 +760,11 @@ xfs_inode_item_push( | |||
760 | * Push the inode to it's backing buffer. This will not remove the | 760 | * Push the inode to it's backing buffer. This will not remove the |
761 | * inode from the AIL - a further push will be required to trigger a | 761 | * inode from the AIL - a further push will be required to trigger a |
762 | * buffer push. However, this allows all the dirty inodes to be pushed | 762 | * buffer push. However, this allows all the dirty inodes to be pushed |
763 | * to the buffer before it is pushed to disk. THe buffer IO completion | 763 | * to the buffer before it is pushed to disk. The buffer IO completion |
764 | * will pull th einode from the AIL, mark it clean and unlock the flush | 764 | * will pull the inode from the AIL, mark it clean and unlock the flush |
765 | * lock. | 765 | * lock. |
766 | */ | 766 | */ |
767 | (void) xfs_iflush(ip, 0); | 767 | (void) xfs_iflush(ip, SYNC_TRYLOCK); |
768 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 768 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
769 | } | 769 | } |
770 | 770 | ||
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 3bea66132334..03b3b7f85a3b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -383,7 +383,8 @@ xfs_trans_read_buf( | |||
383 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); | 383 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); |
384 | if (bp == NULL) { | 384 | if (bp == NULL) { |
385 | *bpp = NULL; | 385 | *bpp = NULL; |
386 | return 0; | 386 | return (flags & XBF_TRYLOCK) ? |
387 | 0 : XFS_ERROR(ENOMEM); | ||
387 | } | 388 | } |
388 | if (XFS_BUF_GETERROR(bp) != 0) { | 389 | if (XFS_BUF_GETERROR(bp) != 0) { |
389 | XFS_BUF_SUPER_STALE(bp); | 390 | XFS_BUF_SUPER_STALE(bp); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 37d8146ee15b..c48b4217ec47 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -2831,7 +2831,8 @@ xfs_change_file_space( | |||
2831 | ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; | 2831 | ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; |
2832 | 2832 | ||
2833 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2833 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
2834 | xfs_trans_set_sync(tp); | 2834 | if (attr_flags & XFS_ATTR_SYNC) |
2835 | xfs_trans_set_sync(tp); | ||
2835 | 2836 | ||
2836 | error = xfs_trans_commit(tp, 0); | 2837 | error = xfs_trans_commit(tp, 0); |
2837 | 2838 | ||
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index f6702927eee4..3bcd23353d6c 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); | |||
18 | #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ | 18 | #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ |
19 | #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ | 19 | #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ |
20 | #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ | 20 | #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ |
21 | #define XFS_ATTR_SYNC 0x10 /* synchronous operation required */ | ||
21 | 22 | ||
22 | int xfs_readlink(struct xfs_inode *ip, char *link); | 23 | int xfs_readlink(struct xfs_inode *ip, char *link); |
23 | int xfs_release(struct xfs_inode *ip); | 24 | int xfs_release(struct xfs_inode *ip); |