aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/kmem.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c392
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h40
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h23
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c126
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h40
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c293
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c265
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c2
14 files changed, 595 insertions, 623 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 666c9db48eb6..a907de565db3 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -23,6 +23,7 @@
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include "time.h" 24#include "time.h"
25#include "kmem.h" 25#include "kmem.h"
26#include "xfs_message.h"
26 27
27/* 28/*
28 * Greedy allocation. May fail and may return vmalloced memory. 29 * Greedy allocation. May fail and may return vmalloced memory.
@@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
56 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 57 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
57 return ptr; 58 return ptr;
58 if (!(++retries % 100)) 59 if (!(++retries % 100))
59 printk(KERN_ERR "XFS: possible memory allocation " 60 xfs_err(NULL,
60 "deadlock in %s (mode:0x%x)\n", 61 "possible memory allocation deadlock in %s (mode:0x%x)",
61 __func__, lflags); 62 __func__, lflags);
62 congestion_wait(BLK_RW_ASYNC, HZ/50); 63 congestion_wait(BLK_RW_ASYNC, HZ/50);
63 } while (1); 64 } while (1);
@@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
112 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 113 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
113 return ptr; 114 return ptr;
114 if (!(++retries % 100)) 115 if (!(++retries % 100))
115 printk(KERN_ERR "XFS: possible memory allocation " 116 xfs_err(NULL,
116 "deadlock in %s (mode:0x%x)\n", 117 "possible memory allocation deadlock in %s (mode:0x%x)",
117 __func__, lflags); 118 __func__, lflags);
118 congestion_wait(BLK_RW_ASYNC, HZ/50); 119 congestion_wait(BLK_RW_ASYNC, HZ/50);
119 } while (1); 120 } while (1);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index ec7bbb5645b6..79ce38be15a1 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -413,8 +413,7 @@ xfs_submit_ioend_bio(
413 if (xfs_ioend_new_eof(ioend)) 413 if (xfs_ioend_new_eof(ioend))
414 xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); 414 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
415 415
416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
417 WRITE_SYNC_PLUG : WRITE, bio);
418} 417}
419 418
420STATIC struct bio * 419STATIC struct bio *
@@ -854,7 +853,7 @@ xfs_aops_discard_page(
854 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 853 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
855 goto out_invalidate; 854 goto out_invalidate;
856 855
857 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 856 xfs_alert(ip->i_mount,
858 "page discard on page %p, inode 0x%llx, offset %llu.", 857 "page discard on page %p, inode 0x%llx, offset %llu.",
859 page, ip->i_ino, offset); 858 page, ip->i_ino, offset);
860 859
@@ -872,7 +871,7 @@ xfs_aops_discard_page(
872 if (error) { 871 if (error) {
873 /* something screwed, just bail */ 872 /* something screwed, just bail */
874 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 873 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
875 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 874 xfs_alert(ip->i_mount,
876 "page discard unable to remove delalloc mapping."); 875 "page discard unable to remove delalloc mapping.");
877 } 876 }
878 break; 877 break;
@@ -1296,7 +1295,7 @@ xfs_get_blocks_direct(
1296 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1295 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1297 * need to issue a transaction to convert the range from unwritten to written 1296 * need to issue a transaction to convert the range from unwritten to written
1298 * extents. In case this is regular synchronous I/O we just call xfs_end_io 1297 * extents. In case this is regular synchronous I/O we just call xfs_end_io
1299 * to do this and we are done. But in case this was a successfull AIO 1298 * to do this and we are done. But in case this was a successful AIO
1300 * request this handler is called from interrupt context, from which we 1299 * request this handler is called from interrupt context, from which we
1301 * can't start transactions. In that case offload the I/O completion to 1300 * can't start transactions. In that case offload the I/O completion to
1302 * the workqueues we also use for buffered I/O completion. 1301 * the workqueues we also use for buffered I/O completion.
@@ -1411,7 +1410,7 @@ xfs_vm_write_failed(
1411 if (error) { 1410 if (error) {
1412 /* something screwed, just bail */ 1411 /* something screwed, just bail */
1413 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1412 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1414 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 1413 xfs_alert(ip->i_mount,
1415 "xfs_vm_write_failed: unable to clean up ino %lld", 1414 "xfs_vm_write_failed: unable to clean up ino %lld",
1416 ip->i_ino); 1415 ip->i_ino);
1417 } 1416 }
@@ -1495,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = {
1495 .readpages = xfs_vm_readpages, 1494 .readpages = xfs_vm_readpages,
1496 .writepage = xfs_vm_writepage, 1495 .writepage = xfs_vm_writepage,
1497 .writepages = xfs_vm_writepages, 1496 .writepages = xfs_vm_writepages,
1498 .sync_page = block_sync_page,
1499 .releasepage = xfs_vm_releasepage, 1497 .releasepage = xfs_vm_releasepage,
1500 .invalidatepage = xfs_vm_invalidatepage, 1498 .invalidatepage = xfs_vm_invalidatepage,
1501 .write_begin = xfs_vm_write_begin, 1499 .write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index f83a4c830a65..9ef9ed2cfe2e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -94,75 +94,6 @@ xfs_buf_vmap_len(
94} 94}
95 95
96/* 96/*
97 * Page Region interfaces.
98 *
99 * For pages in filesystems where the blocksize is smaller than the
100 * pagesize, we use the page->private field (long) to hold a bitmap
101 * of uptodate regions within the page.
102 *
103 * Each such region is "bytes per page / bits per long" bytes long.
104 *
105 * NBPPR == number-of-bytes-per-page-region
106 * BTOPR == bytes-to-page-region (rounded up)
107 * BTOPRT == bytes-to-page-region-truncated (rounded down)
108 */
109#if (BITS_PER_LONG == 32)
110#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
111#elif (BITS_PER_LONG == 64)
112#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
113#else
114#error BITS_PER_LONG must be 32 or 64
115#endif
116#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
117#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
118#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
119
120STATIC unsigned long
121page_region_mask(
122 size_t offset,
123 size_t length)
124{
125 unsigned long mask;
126 int first, final;
127
128 first = BTOPR(offset);
129 final = BTOPRT(offset + length - 1);
130 first = min(first, final);
131
132 mask = ~0UL;
133 mask <<= BITS_PER_LONG - (final - first);
134 mask >>= BITS_PER_LONG - (final);
135
136 ASSERT(offset + length <= PAGE_CACHE_SIZE);
137 ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
138
139 return mask;
140}
141
142STATIC void
143set_page_region(
144 struct page *page,
145 size_t offset,
146 size_t length)
147{
148 set_page_private(page,
149 page_private(page) | page_region_mask(offset, length));
150 if (page_private(page) == ~0UL)
151 SetPageUptodate(page);
152}
153
154STATIC int
155test_page_region(
156 struct page *page,
157 size_t offset,
158 size_t length)
159{
160 unsigned long mask = page_region_mask(offset, length);
161
162 return (mask && (page_private(page) & mask) == mask);
163}
164
165/*
166 * xfs_buf_lru_add - add a buffer to the LRU. 97 * xfs_buf_lru_add - add a buffer to the LRU.
167 * 98 *
168 * The LRU takes a new reference to the buffer so that it will only be freed 99 * The LRU takes a new reference to the buffer so that it will only be freed
@@ -189,7 +120,7 @@ xfs_buf_lru_add(
189 * The unlocked check is safe here because it only occurs when there are not 120 * The unlocked check is safe here because it only occurs when there are not
190 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there 121 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
191 * to optimise the shrinker removing the buffer from the LRU and calling 122 * to optimise the shrinker removing the buffer from the LRU and calling
192 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the 123 * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
193 * bt_lru_lock. 124 * bt_lru_lock.
194 */ 125 */
195STATIC void 126STATIC void
@@ -332,7 +263,7 @@ xfs_buf_free(
332 263
333 ASSERT(list_empty(&bp->b_lru)); 264 ASSERT(list_empty(&bp->b_lru));
334 265
335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 266 if (bp->b_flags & _XBF_PAGES) {
336 uint i; 267 uint i;
337 268
338 if (xfs_buf_is_vmapped(bp)) 269 if (xfs_buf_is_vmapped(bp))
@@ -342,56 +273,77 @@ xfs_buf_free(
342 for (i = 0; i < bp->b_page_count; i++) { 273 for (i = 0; i < bp->b_page_count; i++) {
343 struct page *page = bp->b_pages[i]; 274 struct page *page = bp->b_pages[i];
344 275
345 if (bp->b_flags & _XBF_PAGE_CACHE) 276 __free_page(page);
346 ASSERT(!PagePrivate(page));
347 page_cache_release(page);
348 } 277 }
349 } 278 } else if (bp->b_flags & _XBF_KMEM)
279 kmem_free(bp->b_addr);
350 _xfs_buf_free_pages(bp); 280 _xfs_buf_free_pages(bp);
351 xfs_buf_deallocate(bp); 281 xfs_buf_deallocate(bp);
352} 282}
353 283
354/* 284/*
355 * Finds all pages for buffer in question and builds it's page list. 285 * Allocates all the pages for buffer in question and builds it's page list.
356 */ 286 */
357STATIC int 287STATIC int
358_xfs_buf_lookup_pages( 288xfs_buf_allocate_memory(
359 xfs_buf_t *bp, 289 xfs_buf_t *bp,
360 uint flags) 290 uint flags)
361{ 291{
362 struct address_space *mapping = bp->b_target->bt_mapping;
363 size_t blocksize = bp->b_target->bt_bsize;
364 size_t size = bp->b_count_desired; 292 size_t size = bp->b_count_desired;
365 size_t nbytes, offset; 293 size_t nbytes, offset;
366 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
367 unsigned short page_count, i; 295 unsigned short page_count, i;
368 pgoff_t first;
369 xfs_off_t end; 296 xfs_off_t end;
370 int error; 297 int error;
371 298
299 /*
300 * for buffers that are contained within a single page, just allocate
301 * the memory from the heap - there's no need for the complexity of
302 * page arrays to keep allocation down to order 0.
303 */
304 if (bp->b_buffer_length < PAGE_SIZE) {
305 bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
306 if (!bp->b_addr) {
307 /* low memory - use alloc_page loop instead */
308 goto use_alloc_page;
309 }
310
311 if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
312 PAGE_MASK) !=
313 ((unsigned long)bp->b_addr & PAGE_MASK)) {
314 /* b_addr spans two pages - use alloc_page instead */
315 kmem_free(bp->b_addr);
316 bp->b_addr = NULL;
317 goto use_alloc_page;
318 }
319 bp->b_offset = offset_in_page(bp->b_addr);
320 bp->b_pages = bp->b_page_array;
321 bp->b_pages[0] = virt_to_page(bp->b_addr);
322 bp->b_page_count = 1;
323 bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
324 return 0;
325 }
326
327use_alloc_page:
372 end = bp->b_file_offset + bp->b_buffer_length; 328 end = bp->b_file_offset + bp->b_buffer_length;
373 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); 329 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
374
375 error = _xfs_buf_get_pages(bp, page_count, flags); 330 error = _xfs_buf_get_pages(bp, page_count, flags);
376 if (unlikely(error)) 331 if (unlikely(error))
377 return error; 332 return error;
378 bp->b_flags |= _XBF_PAGE_CACHE;
379 333
380 offset = bp->b_offset; 334 offset = bp->b_offset;
381 first = bp->b_file_offset >> PAGE_CACHE_SHIFT; 335 bp->b_flags |= _XBF_PAGES;
382 336
383 for (i = 0; i < bp->b_page_count; i++) { 337 for (i = 0; i < bp->b_page_count; i++) {
384 struct page *page; 338 struct page *page;
385 uint retries = 0; 339 uint retries = 0;
386 340retry:
387 retry: 341 page = alloc_page(gfp_mask);
388 page = find_or_create_page(mapping, first + i, gfp_mask);
389 if (unlikely(page == NULL)) { 342 if (unlikely(page == NULL)) {
390 if (flags & XBF_READ_AHEAD) { 343 if (flags & XBF_READ_AHEAD) {
391 bp->b_page_count = i; 344 bp->b_page_count = i;
392 for (i = 0; i < bp->b_page_count; i++) 345 error = ENOMEM;
393 unlock_page(bp->b_pages[i]); 346 goto out_free_pages;
394 return -ENOMEM;
395 } 347 }
396 348
397 /* 349 /*
@@ -401,9 +353,8 @@ _xfs_buf_lookup_pages(
401 * handle buffer allocation failures we can't do much. 353 * handle buffer allocation failures we can't do much.
402 */ 354 */
403 if (!(++retries % 100)) 355 if (!(++retries % 100))
404 printk(KERN_ERR 356 xfs_err(NULL,
405 "XFS: possible memory allocation " 357 "possible memory allocation deadlock in %s (mode:0x%x)",
406 "deadlock in %s (mode:0x%x)\n",
407 __func__, gfp_mask); 358 __func__, gfp_mask);
408 359
409 XFS_STATS_INC(xb_page_retries); 360 XFS_STATS_INC(xb_page_retries);
@@ -413,52 +364,44 @@ _xfs_buf_lookup_pages(
413 364
414 XFS_STATS_INC(xb_page_found); 365 XFS_STATS_INC(xb_page_found);
415 366
416 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 367 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
417 size -= nbytes; 368 size -= nbytes;
418
419 ASSERT(!PagePrivate(page));
420 if (!PageUptodate(page)) {
421 page_count--;
422 if (blocksize >= PAGE_CACHE_SIZE) {
423 if (flags & XBF_READ)
424 bp->b_flags |= _XBF_PAGE_LOCKED;
425 } else if (!PagePrivate(page)) {
426 if (test_page_region(page, offset, nbytes))
427 page_count++;
428 }
429 }
430
431 bp->b_pages[i] = page; 369 bp->b_pages[i] = page;
432 offset = 0; 370 offset = 0;
433 } 371 }
372 return 0;
434 373
435 if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { 374out_free_pages:
436 for (i = 0; i < bp->b_page_count; i++) 375 for (i = 0; i < bp->b_page_count; i++)
437 unlock_page(bp->b_pages[i]); 376 __free_page(bp->b_pages[i]);
438 }
439
440 if (page_count == bp->b_page_count)
441 bp->b_flags |= XBF_DONE;
442
443 return error; 377 return error;
444} 378}
445 379
446/* 380/*
447 * Map buffer into kernel address-space if nessecary. 381 * Map buffer into kernel address-space if necessary.
448 */ 382 */
449STATIC int 383STATIC int
450_xfs_buf_map_pages( 384_xfs_buf_map_pages(
451 xfs_buf_t *bp, 385 xfs_buf_t *bp,
452 uint flags) 386 uint flags)
453{ 387{
454 /* A single page buffer is always mappable */ 388 ASSERT(bp->b_flags & _XBF_PAGES);
455 if (bp->b_page_count == 1) { 389 if (bp->b_page_count == 1) {
390 /* A single page buffer is always mappable */
456 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 391 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
457 bp->b_flags |= XBF_MAPPED; 392 bp->b_flags |= XBF_MAPPED;
458 } else if (flags & XBF_MAPPED) { 393 } else if (flags & XBF_MAPPED) {
459 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, 394 int retried = 0;
460 -1, PAGE_KERNEL); 395
461 if (unlikely(bp->b_addr == NULL)) 396 do {
397 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
398 -1, PAGE_KERNEL);
399 if (bp->b_addr)
400 break;
401 vm_unmap_aliases();
402 } while (retried++ <= 1);
403
404 if (!bp->b_addr)
462 return -ENOMEM; 405 return -ENOMEM;
463 bp->b_addr += bp->b_offset; 406 bp->b_addr += bp->b_offset;
464 bp->b_flags |= XBF_MAPPED; 407 bp->b_flags |= XBF_MAPPED;
@@ -569,9 +512,14 @@ found:
569 } 512 }
570 } 513 }
571 514
515 /*
516 * if the buffer is stale, clear all the external state associated with
517 * it. We need to keep flags such as how we allocated the buffer memory
518 * intact here.
519 */
572 if (bp->b_flags & XBF_STALE) { 520 if (bp->b_flags & XBF_STALE) {
573 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 521 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
574 bp->b_flags &= XBF_MAPPED; 522 bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
575 } 523 }
576 524
577 trace_xfs_buf_find(bp, flags, _RET_IP_); 525 trace_xfs_buf_find(bp, flags, _RET_IP_);
@@ -592,7 +540,7 @@ xfs_buf_get(
592 xfs_buf_flags_t flags) 540 xfs_buf_flags_t flags)
593{ 541{
594 xfs_buf_t *bp, *new_bp; 542 xfs_buf_t *bp, *new_bp;
595 int error = 0, i; 543 int error = 0;
596 544
597 new_bp = xfs_buf_allocate(flags); 545 new_bp = xfs_buf_allocate(flags);
598 if (unlikely(!new_bp)) 546 if (unlikely(!new_bp))
@@ -600,7 +548,7 @@ xfs_buf_get(
600 548
601 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); 549 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
602 if (bp == new_bp) { 550 if (bp == new_bp) {
603 error = _xfs_buf_lookup_pages(bp, flags); 551 error = xfs_buf_allocate_memory(bp, flags);
604 if (error) 552 if (error)
605 goto no_buffer; 553 goto no_buffer;
606 } else { 554 } else {
@@ -609,14 +557,11 @@ xfs_buf_get(
609 return NULL; 557 return NULL;
610 } 558 }
611 559
612 for (i = 0; i < bp->b_page_count; i++)
613 mark_page_accessed(bp->b_pages[i]);
614
615 if (!(bp->b_flags & XBF_MAPPED)) { 560 if (!(bp->b_flags & XBF_MAPPED)) {
616 error = _xfs_buf_map_pages(bp, flags); 561 error = _xfs_buf_map_pages(bp, flags);
617 if (unlikely(error)) { 562 if (unlikely(error)) {
618 printk(KERN_WARNING "%s: failed to map pages\n", 563 xfs_warn(target->bt_mount,
619 __func__); 564 "%s: failed to map pages\n", __func__);
620 goto no_buffer; 565 goto no_buffer;
621 } 566 }
622 } 567 }
@@ -710,10 +655,7 @@ xfs_buf_readahead(
710 xfs_off_t ioff, 655 xfs_off_t ioff,
711 size_t isize) 656 size_t isize)
712{ 657{
713 struct backing_dev_info *bdi; 658 if (bdi_read_congested(target->bt_bdi))
714
715 bdi = target->bt_mapping->backing_dev_info;
716 if (bdi_read_congested(bdi))
717 return; 659 return;
718 660
719 xfs_buf_read(target, ioff, isize, 661 xfs_buf_read(target, ioff, isize,
@@ -791,10 +733,10 @@ xfs_buf_associate_memory(
791 size_t buflen; 733 size_t buflen;
792 int page_count; 734 int page_count;
793 735
794 pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; 736 pageaddr = (unsigned long)mem & PAGE_MASK;
795 offset = (unsigned long)mem - pageaddr; 737 offset = (unsigned long)mem - pageaddr;
796 buflen = PAGE_CACHE_ALIGN(len + offset); 738 buflen = PAGE_ALIGN(len + offset);
797 page_count = buflen >> PAGE_CACHE_SHIFT; 739 page_count = buflen >> PAGE_SHIFT;
798 740
799 /* Free any previous set of page pointers */ 741 /* Free any previous set of page pointers */
800 if (bp->b_pages) 742 if (bp->b_pages)
@@ -811,13 +753,12 @@ xfs_buf_associate_memory(
811 753
812 for (i = 0; i < bp->b_page_count; i++) { 754 for (i = 0; i < bp->b_page_count; i++) {
813 bp->b_pages[i] = mem_to_page((void *)pageaddr); 755 bp->b_pages[i] = mem_to_page((void *)pageaddr);
814 pageaddr += PAGE_CACHE_SIZE; 756 pageaddr += PAGE_SIZE;
815 } 757 }
816 758
817 bp->b_count_desired = len; 759 bp->b_count_desired = len;
818 bp->b_buffer_length = buflen; 760 bp->b_buffer_length = buflen;
819 bp->b_flags |= XBF_MAPPED; 761 bp->b_flags |= XBF_MAPPED;
820 bp->b_flags &= ~_XBF_PAGE_LOCKED;
821 762
822 return 0; 763 return 0;
823} 764}
@@ -850,8 +791,8 @@ xfs_buf_get_uncached(
850 791
851 error = _xfs_buf_map_pages(bp, XBF_MAPPED); 792 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
852 if (unlikely(error)) { 793 if (unlikely(error)) {
853 printk(KERN_WARNING "%s: failed to map pages\n", 794 xfs_warn(target->bt_mount,
854 __func__); 795 "%s: failed to map pages\n", __func__);
855 goto fail_free_mem; 796 goto fail_free_mem;
856 } 797 }
857 798
@@ -924,20 +865,7 @@ xfs_buf_rele(
924 865
925 866
926/* 867/*
927 * Mutual exclusion on buffers. Locking model: 868 * Lock a buffer object, if it is not already locked.
928 *
929 * Buffers associated with inodes for which buffer locking
930 * is not enabled are not protected by semaphores, and are
931 * assumed to be exclusively owned by the caller. There is a
932 * spinlock in the buffer, used by the caller when concurrent
933 * access is possible.
934 */
935
936/*
937 * Locks a buffer object, if it is not already locked. Note that this in
938 * no way locks the underlying pages, so it is only useful for
939 * synchronizing concurrent use of buffer objects, not for synchronizing
940 * independent access to the underlying pages.
941 * 869 *
942 * If we come across a stale, pinned, locked buffer, we know that we are 870 * If we come across a stale, pinned, locked buffer, we know that we are
943 * being asked to lock a buffer that has been reallocated. Because it is 871 * being asked to lock a buffer that has been reallocated. Because it is
@@ -971,10 +899,7 @@ xfs_buf_lock_value(
971} 899}
972 900
973/* 901/*
974 * Locks a buffer object. 902 * Lock a buffer object.
975 * Note that this in no way locks the underlying pages, so it is only
976 * useful for synchronizing concurrent use of buffer objects, not for
977 * synchronizing independent access to the underlying pages.
978 * 903 *
979 * If we come across a stale, pinned, locked buffer, we know that we 904 * If we come across a stale, pinned, locked buffer, we know that we
980 * are being asked to lock a buffer that has been reallocated. Because 905 * are being asked to lock a buffer that has been reallocated. Because
@@ -990,8 +915,6 @@ xfs_buf_lock(
990 915
991 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 916 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
992 xfs_log_force(bp->b_target->bt_mount, 0); 917 xfs_log_force(bp->b_target->bt_mount, 0);
993 if (atomic_read(&bp->b_io_remaining))
994 blk_run_address_space(bp->b_target->bt_mapping);
995 down(&bp->b_sema); 918 down(&bp->b_sema);
996 XB_SET_OWNER(bp); 919 XB_SET_OWNER(bp);
997 920
@@ -1035,9 +958,7 @@ xfs_buf_wait_unpin(
1035 set_current_state(TASK_UNINTERRUPTIBLE); 958 set_current_state(TASK_UNINTERRUPTIBLE);
1036 if (atomic_read(&bp->b_pin_count) == 0) 959 if (atomic_read(&bp->b_pin_count) == 0)
1037 break; 960 break;
1038 if (atomic_read(&bp->b_io_remaining)) 961 io_schedule();
1039 blk_run_address_space(bp->b_target->bt_mapping);
1040 schedule();
1041 } 962 }
1042 remove_wait_queue(&bp->b_waiters, &wait); 963 remove_wait_queue(&bp->b_waiters, &wait);
1043 set_current_state(TASK_RUNNING); 964 set_current_state(TASK_RUNNING);
@@ -1249,10 +1170,8 @@ _xfs_buf_ioend(
1249 xfs_buf_t *bp, 1170 xfs_buf_t *bp,
1250 int schedule) 1171 int schedule)
1251{ 1172{
1252 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { 1173 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1253 bp->b_flags &= ~_XBF_PAGE_LOCKED;
1254 xfs_buf_ioend(bp, schedule); 1174 xfs_buf_ioend(bp, schedule);
1255 }
1256} 1175}
1257 1176
1258STATIC void 1177STATIC void
@@ -1261,35 +1180,12 @@ xfs_buf_bio_end_io(
1261 int error) 1180 int error)
1262{ 1181{
1263 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1182 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1264 unsigned int blocksize = bp->b_target->bt_bsize;
1265 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1266 1183
1267 xfs_buf_ioerror(bp, -error); 1184 xfs_buf_ioerror(bp, -error);
1268 1185
1269 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1186 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1270 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1187 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1271 1188
1272 do {
1273 struct page *page = bvec->bv_page;
1274
1275 ASSERT(!PagePrivate(page));
1276 if (unlikely(bp->b_error)) {
1277 if (bp->b_flags & XBF_READ)
1278 ClearPageUptodate(page);
1279 } else if (blocksize >= PAGE_CACHE_SIZE) {
1280 SetPageUptodate(page);
1281 } else if (!PagePrivate(page) &&
1282 (bp->b_flags & _XBF_PAGE_CACHE)) {
1283 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1284 }
1285
1286 if (--bvec >= bio->bi_io_vec)
1287 prefetchw(&bvec->bv_page->flags);
1288
1289 if (bp->b_flags & _XBF_PAGE_LOCKED)
1290 unlock_page(page);
1291 } while (bvec >= bio->bi_io_vec);
1292
1293 _xfs_buf_ioend(bp, 1); 1189 _xfs_buf_ioend(bp, 1);
1294 bio_put(bio); 1190 bio_put(bio);
1295} 1191}
@@ -1303,7 +1199,6 @@ _xfs_buf_ioapply(
1303 int offset = bp->b_offset; 1199 int offset = bp->b_offset;
1304 int size = bp->b_count_desired; 1200 int size = bp->b_count_desired;
1305 sector_t sector = bp->b_bn; 1201 sector_t sector = bp->b_bn;
1306 unsigned int blocksize = bp->b_target->bt_bsize;
1307 1202
1308 total_nr_pages = bp->b_page_count; 1203 total_nr_pages = bp->b_page_count;
1309 map_i = 0; 1204 map_i = 0;
@@ -1324,29 +1219,6 @@ _xfs_buf_ioapply(
1324 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1219 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
1325 } 1220 }
1326 1221
1327 /* Special code path for reading a sub page size buffer in --
1328 * we populate up the whole page, and hence the other metadata
1329 * in the same page. This optimization is only valid when the
1330 * filesystem block size is not smaller than the page size.
1331 */
1332 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1333 ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
1334 (XBF_READ|_XBF_PAGE_LOCKED)) &&
1335 (blocksize >= PAGE_CACHE_SIZE)) {
1336 bio = bio_alloc(GFP_NOIO, 1);
1337
1338 bio->bi_bdev = bp->b_target->bt_bdev;
1339 bio->bi_sector = sector - (offset >> BBSHIFT);
1340 bio->bi_end_io = xfs_buf_bio_end_io;
1341 bio->bi_private = bp;
1342
1343 bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
1344 size = 0;
1345
1346 atomic_inc(&bp->b_io_remaining);
1347
1348 goto submit_io;
1349 }
1350 1222
1351next_chunk: 1223next_chunk:
1352 atomic_inc(&bp->b_io_remaining); 1224 atomic_inc(&bp->b_io_remaining);
@@ -1360,8 +1232,9 @@ next_chunk:
1360 bio->bi_end_io = xfs_buf_bio_end_io; 1232 bio->bi_end_io = xfs_buf_bio_end_io;
1361 bio->bi_private = bp; 1233 bio->bi_private = bp;
1362 1234
1235
1363 for (; size && nr_pages; nr_pages--, map_i++) { 1236 for (; size && nr_pages; nr_pages--, map_i++) {
1364 int rbytes, nbytes = PAGE_CACHE_SIZE - offset; 1237 int rbytes, nbytes = PAGE_SIZE - offset;
1365 1238
1366 if (nbytes > size) 1239 if (nbytes > size)
1367 nbytes = size; 1240 nbytes = size;
@@ -1376,7 +1249,6 @@ next_chunk:
1376 total_nr_pages--; 1249 total_nr_pages--;
1377 } 1250 }
1378 1251
1379submit_io:
1380 if (likely(bio->bi_size)) { 1252 if (likely(bio->bi_size)) {
1381 if (xfs_buf_is_vmapped(bp)) { 1253 if (xfs_buf_is_vmapped(bp)) {
1382 flush_kernel_vmap_range(bp->b_addr, 1254 flush_kernel_vmap_range(bp->b_addr,
@@ -1386,18 +1258,7 @@ submit_io:
1386 if (size) 1258 if (size)
1387 goto next_chunk; 1259 goto next_chunk;
1388 } else { 1260 } else {
1389 /*
1390 * if we get here, no pages were added to the bio. However,
1391 * we can't just error out here - if the pages are locked then
1392 * we have to unlock them otherwise we can hang on a later
1393 * access to the page.
1394 */
1395 xfs_buf_ioerror(bp, EIO); 1261 xfs_buf_ioerror(bp, EIO);
1396 if (bp->b_flags & _XBF_PAGE_LOCKED) {
1397 int i;
1398 for (i = 0; i < bp->b_page_count; i++)
1399 unlock_page(bp->b_pages[i]);
1400 }
1401 bio_put(bio); 1262 bio_put(bio);
1402 } 1263 }
1403} 1264}
@@ -1442,8 +1303,6 @@ xfs_buf_iowait(
1442{ 1303{
1443 trace_xfs_buf_iowait(bp, _RET_IP_); 1304 trace_xfs_buf_iowait(bp, _RET_IP_);
1444 1305
1445 if (atomic_read(&bp->b_io_remaining))
1446 blk_run_address_space(bp->b_target->bt_mapping);
1447 wait_for_completion(&bp->b_iowait); 1306 wait_for_completion(&bp->b_iowait);
1448 1307
1449 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1308 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1461,8 +1320,8 @@ xfs_buf_offset(
1461 return XFS_BUF_PTR(bp) + offset; 1320 return XFS_BUF_PTR(bp) + offset;
1462 1321
1463 offset += bp->b_offset; 1322 offset += bp->b_offset;
1464 page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; 1323 page = bp->b_pages[offset >> PAGE_SHIFT];
1465 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); 1324 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
1466} 1325}
1467 1326
1468/* 1327/*
@@ -1484,9 +1343,9 @@ xfs_buf_iomove(
1484 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; 1343 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
1485 cpoff = xfs_buf_poff(boff + bp->b_offset); 1344 cpoff = xfs_buf_poff(boff + bp->b_offset);
1486 csize = min_t(size_t, 1345 csize = min_t(size_t,
1487 PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); 1346 PAGE_SIZE-cpoff, bp->b_count_desired-boff);
1488 1347
1489 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1348 ASSERT(((csize + cpoff) <= PAGE_SIZE));
1490 1349
1491 switch (mode) { 1350 switch (mode) {
1492 case XBRW_ZERO: 1351 case XBRW_ZERO:
@@ -1599,7 +1458,6 @@ xfs_free_buftarg(
1599 xfs_flush_buftarg(btp, 1); 1458 xfs_flush_buftarg(btp, 1);
1600 if (mp->m_flags & XFS_MOUNT_BARRIER) 1459 if (mp->m_flags & XFS_MOUNT_BARRIER)
1601 xfs_blkdev_issue_flush(btp); 1460 xfs_blkdev_issue_flush(btp);
1602 iput(btp->bt_mapping->host);
1603 1461
1604 kthread_stop(btp->bt_task); 1462 kthread_stop(btp->bt_task);
1605 kmem_free(btp); 1463 kmem_free(btp);
@@ -1617,21 +1475,12 @@ xfs_setsize_buftarg_flags(
1617 btp->bt_smask = sectorsize - 1; 1475 btp->bt_smask = sectorsize - 1;
1618 1476
1619 if (set_blocksize(btp->bt_bdev, sectorsize)) { 1477 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1620 printk(KERN_WARNING 1478 xfs_warn(btp->bt_mount,
1621 "XFS: Cannot set_blocksize to %u on device %s\n", 1479 "Cannot set_blocksize to %u on device %s\n",
1622 sectorsize, XFS_BUFTARG_NAME(btp)); 1480 sectorsize, XFS_BUFTARG_NAME(btp));
1623 return EINVAL; 1481 return EINVAL;
1624 } 1482 }
1625 1483
1626 if (verbose &&
1627 (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
1628 printk(KERN_WARNING
1629 "XFS: %u byte sectors in use on device %s. "
1630 "This is suboptimal; %u or greater is ideal.\n",
1631 sectorsize, XFS_BUFTARG_NAME(btp),
1632 (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
1633 }
1634
1635 return 0; 1484 return 0;
1636} 1485}
1637 1486
@@ -1646,7 +1495,7 @@ xfs_setsize_buftarg_early(
1646 struct block_device *bdev) 1495 struct block_device *bdev)
1647{ 1496{
1648 return xfs_setsize_buftarg_flags(btp, 1497 return xfs_setsize_buftarg_flags(btp,
1649 PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); 1498 PAGE_SIZE, bdev_logical_block_size(bdev), 0);
1650} 1499}
1651 1500
1652int 1501int
@@ -1659,41 +1508,6 @@ xfs_setsize_buftarg(
1659} 1508}
1660 1509
1661STATIC int 1510STATIC int
1662xfs_mapping_buftarg(
1663 xfs_buftarg_t *btp,
1664 struct block_device *bdev)
1665{
1666 struct backing_dev_info *bdi;
1667 struct inode *inode;
1668 struct address_space *mapping;
1669 static const struct address_space_operations mapping_aops = {
1670 .sync_page = block_sync_page,
1671 .migratepage = fail_migrate_page,
1672 };
1673
1674 inode = new_inode(bdev->bd_inode->i_sb);
1675 if (!inode) {
1676 printk(KERN_WARNING
1677 "XFS: Cannot allocate mapping inode for device %s\n",
1678 XFS_BUFTARG_NAME(btp));
1679 return ENOMEM;
1680 }
1681 inode->i_ino = get_next_ino();
1682 inode->i_mode = S_IFBLK;
1683 inode->i_bdev = bdev;
1684 inode->i_rdev = bdev->bd_dev;
1685 bdi = blk_get_backing_dev_info(bdev);
1686 if (!bdi)
1687 bdi = &default_backing_dev_info;
1688 mapping = &inode->i_data;
1689 mapping->a_ops = &mapping_aops;
1690 mapping->backing_dev_info = bdi;
1691 mapping_set_gfp_mask(mapping, GFP_NOFS);
1692 btp->bt_mapping = mapping;
1693 return 0;
1694}
1695
1696STATIC int
1697xfs_alloc_delwrite_queue( 1511xfs_alloc_delwrite_queue(
1698 xfs_buftarg_t *btp, 1512 xfs_buftarg_t *btp,
1699 const char *fsname) 1513 const char *fsname)
@@ -1721,12 +1535,14 @@ xfs_alloc_buftarg(
1721 btp->bt_mount = mp; 1535 btp->bt_mount = mp;
1722 btp->bt_dev = bdev->bd_dev; 1536 btp->bt_dev = bdev->bd_dev;
1723 btp->bt_bdev = bdev; 1537 btp->bt_bdev = bdev;
1538 btp->bt_bdi = blk_get_backing_dev_info(bdev);
1539 if (!btp->bt_bdi)
1540 goto error;
1541
1724 INIT_LIST_HEAD(&btp->bt_lru); 1542 INIT_LIST_HEAD(&btp->bt_lru);
1725 spin_lock_init(&btp->bt_lru_lock); 1543 spin_lock_init(&btp->bt_lru_lock);
1726 if (xfs_setsize_buftarg_early(btp, bdev)) 1544 if (xfs_setsize_buftarg_early(btp, bdev))
1727 goto error; 1545 goto error;
1728 if (xfs_mapping_buftarg(btp, bdev))
1729 goto error;
1730 if (xfs_alloc_delwrite_queue(btp, fsname)) 1546 if (xfs_alloc_delwrite_queue(btp, fsname))
1731 goto error; 1547 goto error;
1732 btp->bt_shrinker.shrink = xfs_buftarg_shrink; 1548 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
@@ -1923,8 +1739,8 @@ xfsbufd(
1923 do { 1739 do {
1924 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1740 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1925 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1741 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1926 int count = 0;
1927 struct list_head tmp; 1742 struct list_head tmp;
1743 struct blk_plug plug;
1928 1744
1929 if (unlikely(freezing(current))) { 1745 if (unlikely(freezing(current))) {
1930 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1746 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1940,16 +1756,15 @@ xfsbufd(
1940 1756
1941 xfs_buf_delwri_split(target, &tmp, age); 1757 xfs_buf_delwri_split(target, &tmp, age);
1942 list_sort(NULL, &tmp, xfs_buf_cmp); 1758 list_sort(NULL, &tmp, xfs_buf_cmp);
1759
1760 blk_start_plug(&plug);
1943 while (!list_empty(&tmp)) { 1761 while (!list_empty(&tmp)) {
1944 struct xfs_buf *bp; 1762 struct xfs_buf *bp;
1945 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1763 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1946 list_del_init(&bp->b_list); 1764 list_del_init(&bp->b_list);
1947 xfs_bdstrat_cb(bp); 1765 xfs_bdstrat_cb(bp);
1948 count++;
1949 } 1766 }
1950 if (count) 1767 blk_finish_plug(&plug);
1951 blk_run_address_space(target->bt_mapping);
1952
1953 } while (!kthread_should_stop()); 1768 } while (!kthread_should_stop());
1954 1769
1955 return 0; 1770 return 0;
@@ -1969,6 +1784,7 @@ xfs_flush_buftarg(
1969 int pincount = 0; 1784 int pincount = 0;
1970 LIST_HEAD(tmp_list); 1785 LIST_HEAD(tmp_list);
1971 LIST_HEAD(wait_list); 1786 LIST_HEAD(wait_list);
1787 struct blk_plug plug;
1972 1788
1973 xfs_buf_runall_queues(xfsconvertd_workqueue); 1789 xfs_buf_runall_queues(xfsconvertd_workqueue);
1974 xfs_buf_runall_queues(xfsdatad_workqueue); 1790 xfs_buf_runall_queues(xfsdatad_workqueue);
@@ -1983,6 +1799,8 @@ xfs_flush_buftarg(
1983 * we do that after issuing all the IO. 1799 * we do that after issuing all the IO.
1984 */ 1800 */
1985 list_sort(NULL, &tmp_list, xfs_buf_cmp); 1801 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1802
1803 blk_start_plug(&plug);
1986 while (!list_empty(&tmp_list)) { 1804 while (!list_empty(&tmp_list)) {
1987 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); 1805 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1988 ASSERT(target == bp->b_target); 1806 ASSERT(target == bp->b_target);
@@ -1993,10 +1811,10 @@ xfs_flush_buftarg(
1993 } 1811 }
1994 xfs_bdstrat_cb(bp); 1812 xfs_bdstrat_cb(bp);
1995 } 1813 }
1814 blk_finish_plug(&plug);
1996 1815
1997 if (wait) { 1816 if (wait) {
1998 /* Expedite and wait for IO to complete. */ 1817 /* Wait for IO to complete. */
1999 blk_run_address_space(target->bt_mapping);
2000 while (!list_empty(&wait_list)) { 1818 while (!list_empty(&wait_list)) {
2001 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1819 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2002 1820
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index cbe65950e524..a9a1c4512645 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -61,30 +61,11 @@ typedef enum {
61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ 61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
62 62
63/* flags used only internally */ 63/* flags used only internally */
64#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
65#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ 64#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
66#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ 65#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
66#define _XBF_KMEM (1 << 20)/* backed by heap memory */
67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ 67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
68 68
69/*
70 * Special flag for supporting metadata blocks smaller than a FSB.
71 *
72 * In this case we can have multiple xfs_buf_t on a single page and
73 * need to lock out concurrent xfs_buf_t readers as they only
74 * serialise access to the buffer.
75 *
76 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
77 * between reads of the page. Hence we can have one thread read the
78 * page and modify it, but then race with another thread that thinks
79 * the page is not up-to-date and hence reads it again.
80 *
81 * The result is that the first modifcation to the page is lost.
82 * This sort of AGF/AGI reading race can happen when unlinking inodes
83 * that require truncation and results in the AGI unlinked list
84 * modifications being lost.
85 */
86#define _XBF_PAGE_LOCKED (1 << 22)
87
88typedef unsigned int xfs_buf_flags_t; 69typedef unsigned int xfs_buf_flags_t;
89 70
90#define XFS_BUF_FLAGS \ 71#define XFS_BUF_FLAGS \
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t;
100 { XBF_LOCK, "LOCK" }, /* should never be set */\ 81 { XBF_LOCK, "LOCK" }, /* should never be set */\
101 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ 82 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
102 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ 83 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
103 { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
104 { _XBF_PAGES, "PAGES" }, \ 84 { _XBF_PAGES, "PAGES" }, \
105 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ 85 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
106 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 86 { _XBF_KMEM, "KMEM" }, \
107 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } 87 { _XBF_DELWRI_Q, "DELWRI_Q" }
108
109 88
110typedef enum { 89typedef enum {
111 XBT_FORCE_SLEEP = 0, 90 XBT_FORCE_SLEEP = 0,
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash {
120typedef struct xfs_buftarg { 99typedef struct xfs_buftarg {
121 dev_t bt_dev; 100 dev_t bt_dev;
122 struct block_device *bt_bdev; 101 struct block_device *bt_bdev;
123 struct address_space *bt_mapping; 102 struct backing_dev_info *bt_bdi;
124 struct xfs_mount *bt_mount; 103 struct xfs_mount *bt_mount;
125 unsigned int bt_bsize; 104 unsigned int bt_bsize;
126 unsigned int bt_sshift; 105 unsigned int bt_sshift;
@@ -139,17 +118,6 @@ typedef struct xfs_buftarg {
139 unsigned int bt_lru_nr; 118 unsigned int bt_lru_nr;
140} xfs_buftarg_t; 119} xfs_buftarg_t;
141 120
142/*
143 * xfs_buf_t: Buffer structure for pagecache-based buffers
144 *
145 * This buffer structure is used by the pagecache buffer management routines
146 * to refer to an assembly of pages forming a logical buffer.
147 *
148 * The buffer structure is used on a temporary basis only, and discarded when
149 * released. The real data storage is recorded in the pagecache. Buffers are
150 * hashed to the block device on which the file system resides.
151 */
152
153struct xfs_buf; 121struct xfs_buf;
154typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); 122typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
155 123
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index a55c1b46b219..f4213ba1ff85 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -381,7 +381,7 @@ xfs_aio_write_isize_update(
381 381
382/* 382/*
383 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then 383 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
384 * part of the I/O may have been written to disk before the error occured. In 384 * part of the I/O may have been written to disk before the error occurred. In
385 * this case the on-disk file size may have been adjusted beyond the in-memory 385 * this case the on-disk file size may have been adjusted beyond the in-memory
386 * file size and now needs to be truncated back. 386 * file size and now needs to be truncated back.
387 */ 387 */
@@ -896,6 +896,7 @@ xfs_file_fallocate(
896 xfs_flock64_t bf; 896 xfs_flock64_t bf;
897 xfs_inode_t *ip = XFS_I(inode); 897 xfs_inode_t *ip = XFS_I(inode);
898 int cmd = XFS_IOC_RESVSP; 898 int cmd = XFS_IOC_RESVSP;
899 int attr_flags = XFS_ATTR_NOLOCK;
899 900
900 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 901 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
901 return -EOPNOTSUPP; 902 return -EOPNOTSUPP;
@@ -918,7 +919,10 @@ xfs_file_fallocate(
918 goto out_unlock; 919 goto out_unlock;
919 } 920 }
920 921
921 error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); 922 if (file->f_flags & O_DSYNC)
923 attr_flags |= XFS_ATTR_SYNC;
924
925 error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
922 if (error) 926 if (error)
923 goto out_unlock; 927 goto out_unlock;
924 928
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 0ca0e3c024d7..acca2c5ca3fa 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -624,6 +624,10 @@ xfs_ioc_space(
624 624
625 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 625 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
626 attr_flags |= XFS_ATTR_NONBLOCK; 626 attr_flags |= XFS_ATTR_NONBLOCK;
627
628 if (filp->f_flags & O_DSYNC)
629 attr_flags |= XFS_ATTR_SYNC;
630
627 if (ioflags & IO_INVIS) 631 if (ioflags & IO_INVIS)
628 attr_flags |= XFS_ATTR_DMI; 632 attr_flags |= XFS_ATTR_DMI;
629 633
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9ff7fc603d2f..dd21784525a8 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -70,7 +70,7 @@ xfs_synchronize_times(
70 70
71/* 71/*
72 * If the linux inode is valid, mark it dirty. 72 * If the linux inode is valid, mark it dirty.
73 * Used when commiting a dirty inode into a transaction so that 73 * Used when committing a dirty inode into a transaction so that
74 * the inode will get written back by the linux code 74 * the inode will get written back by the linux code
75 */ 75 */
76void 76void
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 096494997747..244be9cbfe78 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -39,7 +39,6 @@
39#include <mrlock.h> 39#include <mrlock.h>
40#include <time.h> 40#include <time.h>
41 41
42#include <support/debug.h>
43#include <support/uuid.h> 42#include <support/uuid.h>
44 43
45#include <linux/semaphore.h> 44#include <linux/semaphore.h>
@@ -86,6 +85,7 @@
86#include <xfs_aops.h> 85#include <xfs_aops.h>
87#include <xfs_super.h> 86#include <xfs_super.h>
88#include <xfs_buf.h> 87#include <xfs_buf.h>
88#include <xfs_message.h>
89 89
90/* 90/*
91 * Feature macros (disable/enable) 91 * Feature macros (disable/enable)
@@ -280,4 +280,25 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
280#define __arch_pack 280#define __arch_pack
281#endif 281#endif
282 282
283#define ASSERT_ALWAYS(expr) \
284 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
285
286#ifndef DEBUG
287#define ASSERT(expr) ((void)0)
288
289#ifndef STATIC
290# define STATIC static noinline
291#endif
292
293#else /* DEBUG */
294
295#define ASSERT(expr) \
296 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
297
298#ifndef STATIC
299# define STATIC noinline
300#endif
301
302#endif /* DEBUG */
303
283#endif /* __XFS_LINUX__ */ 304#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
new file mode 100644
index 000000000000..9f76cceb678d
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -0,0 +1,126 @@
1/*
2 * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_mount.h"
27
28/*
29 * XFS logging functions
30 */
31static void
32__xfs_printk(
33 const char *level,
34 const struct xfs_mount *mp,
35 struct va_format *vaf)
36{
37 if (mp && mp->m_fsname) {
38 printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return;
40 }
41 printk("%sXFS: %pV\n", level, vaf);
42}
43
44void xfs_printk(
45 const char *level,
46 const struct xfs_mount *mp,
47 const char *fmt, ...)
48{
49 struct va_format vaf;
50 va_list args;
51
52 va_start(args, fmt);
53
54 vaf.fmt = fmt;
55 vaf.va = &args;
56
57 __xfs_printk(level, mp, &vaf);
58 va_end(args);
59}
60
61#define define_xfs_printk_level(func, kern_level) \
62void func(const struct xfs_mount *mp, const char *fmt, ...) \
63{ \
64 struct va_format vaf; \
65 va_list args; \
66 \
67 va_start(args, fmt); \
68 \
69 vaf.fmt = fmt; \
70 vaf.va = &args; \
71 \
72 __xfs_printk(kern_level, mp, &vaf); \
73 va_end(args); \
74} \
75
76define_xfs_printk_level(xfs_emerg, KERN_EMERG);
77define_xfs_printk_level(xfs_alert, KERN_ALERT);
78define_xfs_printk_level(xfs_crit, KERN_CRIT);
79define_xfs_printk_level(xfs_err, KERN_ERR);
80define_xfs_printk_level(xfs_warn, KERN_WARNING);
81define_xfs_printk_level(xfs_notice, KERN_NOTICE);
82define_xfs_printk_level(xfs_info, KERN_INFO);
83#ifdef DEBUG
84define_xfs_printk_level(xfs_debug, KERN_DEBUG);
85#endif
86
87void
88xfs_alert_tag(
89 const struct xfs_mount *mp,
90 int panic_tag,
91 const char *fmt, ...)
92{
93 struct va_format vaf;
94 va_list args;
95 int do_panic = 0;
96
97 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
98 xfs_printk(KERN_ALERT, mp,
99 "XFS: Transforming an alert into a BUG.");
100 do_panic = 1;
101 }
102
103 va_start(args, fmt);
104
105 vaf.fmt = fmt;
106 vaf.va = &args;
107
108 __xfs_printk(KERN_ALERT, mp, &vaf);
109 va_end(args);
110
111 BUG_ON(do_panic);
112}
113
114void
115assfail(char *expr, char *file, int line)
116{
117 xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
118 expr, file, line);
119 BUG();
120}
121
122void
123xfs_hex_dump(void *p, int length)
124{
125 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
126}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
new file mode 100644
index 000000000000..f1b3fc1b6c4e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -0,0 +1,40 @@
1#ifndef __XFS_MESSAGE_H
2#define __XFS_MESSAGE_H 1
3
4struct xfs_mount;
5
6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4)));
9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3)));
11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
12 __attribute__ ((format (printf, 2, 3)));
13extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
14 const char *fmt, ...)
15 __attribute__ ((format (printf, 3, 4)));
16extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
17 __attribute__ ((format (printf, 2, 3)));
18extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3)));
20extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
21 __attribute__ ((format (printf, 2, 3)));
22extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
23 __attribute__ ((format (printf, 2, 3)));
24extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
25 __attribute__ ((format (printf, 2, 3)));
26
27#ifdef DEBUG
28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3)));
30#else
31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{
33}
34#endif
35
36extern void assfail(char *expr, char *f, int l);
37
38extern void xfs_hex_dump(void *p, int length);
39
40#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9731898083ae..b38e58d02299 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -173,6 +173,15 @@ xfs_parseargs(
173 __uint8_t iosizelog = 0; 173 __uint8_t iosizelog = 0;
174 174
175 /* 175 /*
176 * set up the mount name first so all the errors will refer to the
177 * correct device.
178 */
179 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
180 if (!mp->m_fsname)
181 return ENOMEM;
182 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
183
184 /*
176 * Copy binary VFS mount flags we are interested in. 185 * Copy binary VFS mount flags we are interested in.
177 */ 186 */
178 if (sb->s_flags & MS_RDONLY) 187 if (sb->s_flags & MS_RDONLY)
@@ -189,6 +198,7 @@ xfs_parseargs(
189 mp->m_flags |= XFS_MOUNT_BARRIER; 198 mp->m_flags |= XFS_MOUNT_BARRIER;
190 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 199 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
191 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 200 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
201 mp->m_flags |= XFS_MOUNT_DELAYLOG;
192 202
193 /* 203 /*
194 * These can be overridden by the mount option parsing. 204 * These can be overridden by the mount option parsing.
@@ -207,24 +217,21 @@ xfs_parseargs(
207 217
208 if (!strcmp(this_char, MNTOPT_LOGBUFS)) { 218 if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
209 if (!value || !*value) { 219 if (!value || !*value) {
210 cmn_err(CE_WARN, 220 xfs_warn(mp, "%s option requires an argument",
211 "XFS: %s option requires an argument",
212 this_char); 221 this_char);
213 return EINVAL; 222 return EINVAL;
214 } 223 }
215 mp->m_logbufs = simple_strtoul(value, &eov, 10); 224 mp->m_logbufs = simple_strtoul(value, &eov, 10);
216 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 225 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
217 if (!value || !*value) { 226 if (!value || !*value) {
218 cmn_err(CE_WARN, 227 xfs_warn(mp, "%s option requires an argument",
219 "XFS: %s option requires an argument",
220 this_char); 228 this_char);
221 return EINVAL; 229 return EINVAL;
222 } 230 }
223 mp->m_logbsize = suffix_strtoul(value, &eov, 10); 231 mp->m_logbsize = suffix_strtoul(value, &eov, 10);
224 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 232 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
225 if (!value || !*value) { 233 if (!value || !*value) {
226 cmn_err(CE_WARN, 234 xfs_warn(mp, "%s option requires an argument",
227 "XFS: %s option requires an argument",
228 this_char); 235 this_char);
229 return EINVAL; 236 return EINVAL;
230 } 237 }
@@ -232,14 +239,12 @@ xfs_parseargs(
232 if (!mp->m_logname) 239 if (!mp->m_logname)
233 return ENOMEM; 240 return ENOMEM;
234 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 241 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
235 cmn_err(CE_WARN, 242 xfs_warn(mp, "%s option not allowed on this system",
236 "XFS: %s option not allowed on this system",
237 this_char); 243 this_char);
238 return EINVAL; 244 return EINVAL;
239 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 245 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
240 if (!value || !*value) { 246 if (!value || !*value) {
241 cmn_err(CE_WARN, 247 xfs_warn(mp, "%s option requires an argument",
242 "XFS: %s option requires an argument",
243 this_char); 248 this_char);
244 return EINVAL; 249 return EINVAL;
245 } 250 }
@@ -248,8 +253,7 @@ xfs_parseargs(
248 return ENOMEM; 253 return ENOMEM;
249 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 254 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
250 if (!value || !*value) { 255 if (!value || !*value) {
251 cmn_err(CE_WARN, 256 xfs_warn(mp, "%s option requires an argument",
252 "XFS: %s option requires an argument",
253 this_char); 257 this_char);
254 return EINVAL; 258 return EINVAL;
255 } 259 }
@@ -257,8 +261,7 @@ xfs_parseargs(
257 iosizelog = ffs(iosize) - 1; 261 iosizelog = ffs(iosize) - 1;
258 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 262 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
259 if (!value || !*value) { 263 if (!value || !*value) {
260 cmn_err(CE_WARN, 264 xfs_warn(mp, "%s option requires an argument",
261 "XFS: %s option requires an argument",
262 this_char); 265 this_char);
263 return EINVAL; 266 return EINVAL;
264 } 267 }
@@ -280,16 +283,14 @@ xfs_parseargs(
280 mp->m_flags |= XFS_MOUNT_SWALLOC; 283 mp->m_flags |= XFS_MOUNT_SWALLOC;
281 } else if (!strcmp(this_char, MNTOPT_SUNIT)) { 284 } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
282 if (!value || !*value) { 285 if (!value || !*value) {
283 cmn_err(CE_WARN, 286 xfs_warn(mp, "%s option requires an argument",
284 "XFS: %s option requires an argument",
285 this_char); 287 this_char);
286 return EINVAL; 288 return EINVAL;
287 } 289 }
288 dsunit = simple_strtoul(value, &eov, 10); 290 dsunit = simple_strtoul(value, &eov, 10);
289 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 291 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
290 if (!value || !*value) { 292 if (!value || !*value) {
291 cmn_err(CE_WARN, 293 xfs_warn(mp, "%s option requires an argument",
292 "XFS: %s option requires an argument",
293 this_char); 294 this_char);
294 return EINVAL; 295 return EINVAL;
295 } 296 }
@@ -297,8 +298,7 @@ xfs_parseargs(
297 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 298 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
298 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 299 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
299#if !XFS_BIG_INUMS 300#if !XFS_BIG_INUMS
300 cmn_err(CE_WARN, 301 xfs_warn(mp, "%s option not allowed on this system",
301 "XFS: %s option not allowed on this system",
302 this_char); 302 this_char);
303 return EINVAL; 303 return EINVAL;
304#endif 304#endif
@@ -356,20 +356,19 @@ xfs_parseargs(
356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG; 357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
358 } else if (!strcmp(this_char, "ihashsize")) { 358 } else if (!strcmp(this_char, "ihashsize")) {
359 cmn_err(CE_WARN, 359 xfs_warn(mp,
360 "XFS: ihashsize no longer used, option is deprecated."); 360 "ihashsize no longer used, option is deprecated.");
361 } else if (!strcmp(this_char, "osyncisdsync")) { 361 } else if (!strcmp(this_char, "osyncisdsync")) {
362 cmn_err(CE_WARN, 362 xfs_warn(mp,
363 "XFS: osyncisdsync has no effect, option is deprecated."); 363 "osyncisdsync has no effect, option is deprecated.");
364 } else if (!strcmp(this_char, "osyncisosync")) { 364 } else if (!strcmp(this_char, "osyncisosync")) {
365 cmn_err(CE_WARN, 365 xfs_warn(mp,
366 "XFS: osyncisosync has no effect, option is deprecated."); 366 "osyncisosync has no effect, option is deprecated.");
367 } else if (!strcmp(this_char, "irixsgid")) { 367 } else if (!strcmp(this_char, "irixsgid")) {
368 cmn_err(CE_WARN, 368 xfs_warn(mp,
369 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); 369 "irixsgid is now a sysctl(2) variable, option is deprecated.");
370 } else { 370 } else {
371 cmn_err(CE_WARN, 371 xfs_warn(mp, "unknown mount option [%s].", this_char);
372 "XFS: unknown mount option [%s].", this_char);
373 return EINVAL; 372 return EINVAL;
374 } 373 }
375 } 374 }
@@ -379,40 +378,37 @@ xfs_parseargs(
379 */ 378 */
380 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && 379 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
381 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 380 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
382 cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); 381 xfs_warn(mp, "no-recovery mounts must be read-only.");
383 return EINVAL; 382 return EINVAL;
384 } 383 }
385 384
386 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { 385 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
387 cmn_err(CE_WARN, 386 xfs_warn(mp,
388 "XFS: sunit and swidth options incompatible with the noalign option"); 387 "sunit and swidth options incompatible with the noalign option");
389 return EINVAL; 388 return EINVAL;
390 } 389 }
391 390
392#ifndef CONFIG_XFS_QUOTA 391#ifndef CONFIG_XFS_QUOTA
393 if (XFS_IS_QUOTA_RUNNING(mp)) { 392 if (XFS_IS_QUOTA_RUNNING(mp)) {
394 cmn_err(CE_WARN, 393 xfs_warn(mp, "quota support not available in this kernel.");
395 "XFS: quota support not available in this kernel.");
396 return EINVAL; 394 return EINVAL;
397 } 395 }
398#endif 396#endif
399 397
400 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && 398 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
401 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { 399 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
402 cmn_err(CE_WARN, 400 xfs_warn(mp, "cannot mount with both project and group quota");
403 "XFS: cannot mount with both project and group quota");
404 return EINVAL; 401 return EINVAL;
405 } 402 }
406 403
407 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 404 if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
408 cmn_err(CE_WARN, 405 xfs_warn(mp, "sunit and swidth must be specified together");
409 "XFS: sunit and swidth must be specified together");
410 return EINVAL; 406 return EINVAL;
411 } 407 }
412 408
413 if (dsunit && (dswidth % dsunit != 0)) { 409 if (dsunit && (dswidth % dsunit != 0)) {
414 cmn_err(CE_WARN, 410 xfs_warn(mp,
415 "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", 411 "stripe width (%d) must be a multiple of the stripe unit (%d)",
416 dswidth, dsunit); 412 dswidth, dsunit);
417 return EINVAL; 413 return EINVAL;
418 } 414 }
@@ -438,8 +434,7 @@ done:
438 mp->m_logbufs != 0 && 434 mp->m_logbufs != 0 &&
439 (mp->m_logbufs < XLOG_MIN_ICLOGS || 435 (mp->m_logbufs < XLOG_MIN_ICLOGS ||
440 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 436 mp->m_logbufs > XLOG_MAX_ICLOGS)) {
441 cmn_err(CE_WARN, 437 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
442 "XFS: invalid logbufs value: %d [not %d-%d]",
443 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 438 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
444 return XFS_ERROR(EINVAL); 439 return XFS_ERROR(EINVAL);
445 } 440 }
@@ -448,22 +443,16 @@ done:
448 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 443 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
449 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 444 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
450 !is_power_of_2(mp->m_logbsize))) { 445 !is_power_of_2(mp->m_logbsize))) {
451 cmn_err(CE_WARN, 446 xfs_warn(mp,
452 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 447 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
453 mp->m_logbsize); 448 mp->m_logbsize);
454 return XFS_ERROR(EINVAL); 449 return XFS_ERROR(EINVAL);
455 } 450 }
456 451
457 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
458 if (!mp->m_fsname)
459 return ENOMEM;
460 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
461
462 if (iosizelog) { 452 if (iosizelog) {
463 if (iosizelog > XFS_MAX_IO_LOG || 453 if (iosizelog > XFS_MAX_IO_LOG ||
464 iosizelog < XFS_MIN_IO_LOG) { 454 iosizelog < XFS_MIN_IO_LOG) {
465 cmn_err(CE_WARN, 455 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
466 "XFS: invalid log iosize: %d [not %d-%d]",
467 iosizelog, XFS_MIN_IO_LOG, 456 iosizelog, XFS_MIN_IO_LOG,
468 XFS_MAX_IO_LOG); 457 XFS_MAX_IO_LOG);
469 return XFS_ERROR(EINVAL); 458 return XFS_ERROR(EINVAL);
@@ -610,7 +599,7 @@ xfs_blkdev_get(
610 mp); 599 mp);
611 if (IS_ERR(*bdevp)) { 600 if (IS_ERR(*bdevp)) {
612 error = PTR_ERR(*bdevp); 601 error = PTR_ERR(*bdevp);
613 printk("XFS: Invalid device [%s], error=%d\n", name, error); 602 xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
614 } 603 }
615 604
616 return -error; 605 return -error;
@@ -664,23 +653,23 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
664 int error; 653 int error;
665 654
666 if (mp->m_logdev_targp != mp->m_ddev_targp) { 655 if (mp->m_logdev_targp != mp->m_ddev_targp) {
667 xfs_fs_cmn_err(CE_NOTE, mp, 656 xfs_notice(mp,
668 "Disabling barriers, not supported with external log device"); 657 "Disabling barriers, not supported with external log device");
669 mp->m_flags &= ~XFS_MOUNT_BARRIER; 658 mp->m_flags &= ~XFS_MOUNT_BARRIER;
670 return; 659 return;
671 } 660 }
672 661
673 if (xfs_readonly_buftarg(mp->m_ddev_targp)) { 662 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
674 xfs_fs_cmn_err(CE_NOTE, mp, 663 xfs_notice(mp,
675 "Disabling barriers, underlying device is readonly"); 664 "Disabling barriers, underlying device is readonly");
676 mp->m_flags &= ~XFS_MOUNT_BARRIER; 665 mp->m_flags &= ~XFS_MOUNT_BARRIER;
677 return; 666 return;
678 } 667 }
679 668
680 error = xfs_barrier_test(mp); 669 error = xfs_barrier_test(mp);
681 if (error) { 670 if (error) {
682 xfs_fs_cmn_err(CE_NOTE, mp, 671 xfs_notice(mp,
683 "Disabling barriers, trial barrier write failed"); 672 "Disabling barriers, trial barrier write failed");
684 mp->m_flags &= ~XFS_MOUNT_BARRIER; 673 mp->m_flags &= ~XFS_MOUNT_BARRIER;
685 return; 674 return;
686 } 675 }
@@ -743,8 +732,8 @@ xfs_open_devices(
743 goto out_close_logdev; 732 goto out_close_logdev;
744 733
745 if (rtdev == ddev || rtdev == logdev) { 734 if (rtdev == ddev || rtdev == logdev) {
746 cmn_err(CE_WARN, 735 xfs_warn(mp,
747 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); 736 "Cannot mount filesystem with identical rtdev and ddev/logdev.");
748 error = EINVAL; 737 error = EINVAL;
749 goto out_close_rtdev; 738 goto out_close_rtdev;
750 } 739 }
@@ -827,75 +816,6 @@ xfs_setup_devices(
827 return 0; 816 return 0;
828} 817}
829 818
830/*
831 * XFS AIL push thread support
832 */
833void
834xfsaild_wakeup(
835 struct xfs_ail *ailp,
836 xfs_lsn_t threshold_lsn)
837{
838 /* only ever move the target forwards */
839 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
840 ailp->xa_target = threshold_lsn;
841 wake_up_process(ailp->xa_task);
842 }
843}
844
845STATIC int
846xfsaild(
847 void *data)
848{
849 struct xfs_ail *ailp = data;
850 xfs_lsn_t last_pushed_lsn = 0;
851 long tout = 0; /* milliseconds */
852
853 while (!kthread_should_stop()) {
854 /*
855 * for short sleeps indicating congestion, don't allow us to
856 * get woken early. Otherwise all we do is bang on the AIL lock
857 * without making progress.
858 */
859 if (tout && tout <= 20)
860 __set_current_state(TASK_KILLABLE);
861 else
862 __set_current_state(TASK_INTERRUPTIBLE);
863 schedule_timeout(tout ?
864 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
865
866 /* swsusp */
867 try_to_freeze();
868
869 ASSERT(ailp->xa_mount->m_log);
870 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
871 continue;
872
873 tout = xfsaild_push(ailp, &last_pushed_lsn);
874 }
875
876 return 0;
877} /* xfsaild */
878
879int
880xfsaild_start(
881 struct xfs_ail *ailp)
882{
883 ailp->xa_target = 0;
884 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
885 ailp->xa_mount->m_fsname);
886 if (IS_ERR(ailp->xa_task))
887 return -PTR_ERR(ailp->xa_task);
888 return 0;
889}
890
891void
892xfsaild_stop(
893 struct xfs_ail *ailp)
894{
895 kthread_stop(ailp->xa_task);
896}
897
898
899/* Catch misguided souls that try to use this interface on XFS */ 819/* Catch misguided souls that try to use this interface on XFS */
900STATIC struct inode * 820STATIC struct inode *
901xfs_fs_alloc_inode( 821xfs_fs_alloc_inode(
@@ -1089,7 +1009,7 @@ xfs_fs_write_inode(
1089 error = 0; 1009 error = 0;
1090 goto out_unlock; 1010 goto out_unlock;
1091 } 1011 }
1092 error = xfs_iflush(ip, 0); 1012 error = xfs_iflush(ip, SYNC_TRYLOCK);
1093 } 1013 }
1094 1014
1095 out_unlock: 1015 out_unlock:
@@ -1202,22 +1122,12 @@ xfs_fs_sync_fs(
1202 return -error; 1122 return -error;
1203 1123
1204 if (laptop_mode) { 1124 if (laptop_mode) {
1205 int prev_sync_seq = mp->m_sync_seq;
1206
1207 /* 1125 /*
1208 * The disk must be active because we're syncing. 1126 * The disk must be active because we're syncing.
1209 * We schedule xfssyncd now (now that the disk is 1127 * We schedule xfssyncd now (now that the disk is
1210 * active) instead of later (when it might not be). 1128 * active) instead of later (when it might not be).
1211 */ 1129 */
1212 wake_up_process(mp->m_sync_task); 1130 flush_delayed_work_sync(&mp->m_sync_work);
1213 /*
1214 * We have to wait for the sync iteration to complete.
1215 * If we don't, the disk activity caused by the sync
1216 * will come after the sync is completed, and that
1217 * triggers another sync from laptop mode.
1218 */
1219 wait_event(mp->m_wait_single_sync_task,
1220 mp->m_sync_seq != prev_sync_seq);
1221 } 1131 }
1222 1132
1223 return 0; 1133 return 0;
@@ -1345,8 +1255,8 @@ xfs_fs_remount(
1345 * options that we can't actually change. 1255 * options that we can't actually change.
1346 */ 1256 */
1347#if 0 1257#if 0
1348 printk(KERN_INFO 1258 xfs_info(mp,
1349 "XFS: mount option \"%s\" not supported for remount\n", p); 1259 "mount option \"%s\" not supported for remount\n", p);
1350 return -EINVAL; 1260 return -EINVAL;
1351#else 1261#else
1352 break; 1262 break;
@@ -1367,8 +1277,7 @@ xfs_fs_remount(
1367 if (mp->m_update_flags) { 1277 if (mp->m_update_flags) {
1368 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1278 error = xfs_mount_log_sb(mp, mp->m_update_flags);
1369 if (error) { 1279 if (error) {
1370 cmn_err(CE_WARN, 1280 xfs_warn(mp, "failed to write sb changes");
1371 "XFS: failed to write sb changes");
1372 return error; 1281 return error;
1373 } 1282 }
1374 mp->m_update_flags = 0; 1283 mp->m_update_flags = 0;
@@ -1452,15 +1361,15 @@ xfs_finish_flags(
1452 mp->m_logbsize = mp->m_sb.sb_logsunit; 1361 mp->m_logbsize = mp->m_sb.sb_logsunit;
1453 } else if (mp->m_logbsize > 0 && 1362 } else if (mp->m_logbsize > 0 &&
1454 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1363 mp->m_logbsize < mp->m_sb.sb_logsunit) {
1455 cmn_err(CE_WARN, 1364 xfs_warn(mp,
1456 "XFS: logbuf size must be greater than or equal to log stripe size"); 1365 "logbuf size must be greater than or equal to log stripe size");
1457 return XFS_ERROR(EINVAL); 1366 return XFS_ERROR(EINVAL);
1458 } 1367 }
1459 } else { 1368 } else {
1460 /* Fail a mount if the logbuf is larger than 32K */ 1369 /* Fail a mount if the logbuf is larger than 32K */
1461 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1370 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1462 cmn_err(CE_WARN, 1371 xfs_warn(mp,
1463 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 1372 "logbuf size for version 1 logs must be 16K or 32K");
1464 return XFS_ERROR(EINVAL); 1373 return XFS_ERROR(EINVAL);
1465 } 1374 }
1466 } 1375 }
@@ -1477,8 +1386,8 @@ xfs_finish_flags(
1477 * prohibit r/w mounts of read-only filesystems 1386 * prohibit r/w mounts of read-only filesystems
1478 */ 1387 */
1479 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { 1388 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1480 cmn_err(CE_WARN, 1389 xfs_warn(mp,
1481 "XFS: cannot mount a read-only filesystem as read-write"); 1390 "cannot mount a read-only filesystem as read-write");
1482 return XFS_ERROR(EROFS); 1391 return XFS_ERROR(EROFS);
1483 } 1392 }
1484 1393
@@ -1502,9 +1411,6 @@ xfs_fs_fill_super(
1502 spin_lock_init(&mp->m_sb_lock); 1411 spin_lock_init(&mp->m_sb_lock);
1503 mutex_init(&mp->m_growlock); 1412 mutex_init(&mp->m_growlock);
1504 atomic_set(&mp->m_active_trans, 0); 1413 atomic_set(&mp->m_active_trans, 0);
1505 INIT_LIST_HEAD(&mp->m_sync_list);
1506 spin_lock_init(&mp->m_sync_lock);
1507 init_waitqueue_head(&mp->m_wait_single_sync_task);
1508 1414
1509 mp->m_super = sb; 1415 mp->m_super = sb;
1510 sb->s_fs_info = mp; 1416 sb->s_fs_info = mp;
@@ -1551,10 +1457,14 @@ xfs_fs_fill_super(
1551 if (error) 1457 if (error)
1552 goto out_free_sb; 1458 goto out_free_sb;
1553 1459
1554 error = xfs_mountfs(mp); 1460 /*
1555 if (error) 1461 * we must configure the block size in the superblock before we run the
1556 goto out_filestream_unmount; 1462 * full mount process as the mount process can lookup and cache inodes.
1557 1463 * For the same reason we must also initialise the syncd and register
1464 * the inode cache shrinker so that inodes can be reclaimed during
1465 * operations like a quotacheck that iterate all inodes in the
1466 * filesystem.
1467 */
1558 sb->s_magic = XFS_SB_MAGIC; 1468 sb->s_magic = XFS_SB_MAGIC;
1559 sb->s_blocksize = mp->m_sb.sb_blocksize; 1469 sb->s_blocksize = mp->m_sb.sb_blocksize;
1560 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1470 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1562,6 +1472,16 @@ xfs_fs_fill_super(
1562 sb->s_time_gran = 1; 1472 sb->s_time_gran = 1;
1563 set_posix_acl_flag(sb); 1473 set_posix_acl_flag(sb);
1564 1474
1475 error = xfs_syncd_init(mp);
1476 if (error)
1477 goto out_filestream_unmount;
1478
1479 xfs_inode_shrinker_register(mp);
1480
1481 error = xfs_mountfs(mp);
1482 if (error)
1483 goto out_syncd_stop;
1484
1565 root = igrab(VFS_I(mp->m_rootip)); 1485 root = igrab(VFS_I(mp->m_rootip));
1566 if (!root) { 1486 if (!root) {
1567 error = ENOENT; 1487 error = ENOENT;
@@ -1577,14 +1497,11 @@ xfs_fs_fill_super(
1577 goto fail_vnrele; 1497 goto fail_vnrele;
1578 } 1498 }
1579 1499
1580 error = xfs_syncd_init(mp);
1581 if (error)
1582 goto fail_vnrele;
1583
1584 xfs_inode_shrinker_register(mp);
1585
1586 return 0; 1500 return 0;
1587 1501
1502 out_syncd_stop:
1503 xfs_inode_shrinker_unregister(mp);
1504 xfs_syncd_stop(mp);
1588 out_filestream_unmount: 1505 out_filestream_unmount:
1589 xfs_filestream_unmount(mp); 1506 xfs_filestream_unmount(mp);
1590 out_free_sb: 1507 out_free_sb:
@@ -1608,6 +1525,9 @@ xfs_fs_fill_super(
1608 } 1525 }
1609 1526
1610 fail_unmount: 1527 fail_unmount:
1528 xfs_inode_shrinker_unregister(mp);
1529 xfs_syncd_stop(mp);
1530
1611 /* 1531 /*
1612 * Blow away any referenced inode in the filestreams cache. 1532 * Blow away any referenced inode in the filestreams cache.
1613 * This can and will cause log traffic as inodes go inactive 1533 * This can and will cause log traffic as inodes go inactive
@@ -1797,6 +1717,38 @@ xfs_destroy_zones(void)
1797} 1717}
1798 1718
1799STATIC int __init 1719STATIC int __init
1720xfs_init_workqueues(void)
1721{
1722 /*
1723 * max_active is set to 8 to give enough concurency to allow
1724 * multiple work operations on each CPU to run. This allows multiple
1725 * filesystems to be running sync work concurrently, and scales with
1726 * the number of CPUs in the system.
1727 */
1728 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1729 if (!xfs_syncd_wq)
1730 goto out;
1731
1732 xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1733 if (!xfs_ail_wq)
1734 goto out_destroy_syncd;
1735
1736 return 0;
1737
1738out_destroy_syncd:
1739 destroy_workqueue(xfs_syncd_wq);
1740out:
1741 return -ENOMEM;
1742}
1743
1744STATIC void
1745xfs_destroy_workqueues(void)
1746{
1747 destroy_workqueue(xfs_ail_wq);
1748 destroy_workqueue(xfs_syncd_wq);
1749}
1750
1751STATIC int __init
1800init_xfs_fs(void) 1752init_xfs_fs(void)
1801{ 1753{
1802 int error; 1754 int error;
@@ -1811,10 +1763,14 @@ init_xfs_fs(void)
1811 if (error) 1763 if (error)
1812 goto out; 1764 goto out;
1813 1765
1814 error = xfs_mru_cache_init(); 1766 error = xfs_init_workqueues();
1815 if (error) 1767 if (error)
1816 goto out_destroy_zones; 1768 goto out_destroy_zones;
1817 1769
1770 error = xfs_mru_cache_init();
1771 if (error)
1772 goto out_destroy_wq;
1773
1818 error = xfs_filestream_init(); 1774 error = xfs_filestream_init();
1819 if (error) 1775 if (error)
1820 goto out_mru_cache_uninit; 1776 goto out_mru_cache_uninit;
@@ -1831,6 +1787,10 @@ init_xfs_fs(void)
1831 if (error) 1787 if (error)
1832 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1833 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1834 vfs_initquota(); 1794 vfs_initquota();
1835 1795
1836 error = register_filesystem(&xfs_fs_type); 1796 error = register_filesystem(&xfs_fs_type);
@@ -1848,6 +1808,8 @@ init_xfs_fs(void)
1848 xfs_filestream_uninit(); 1808 xfs_filestream_uninit();
1849 out_mru_cache_uninit: 1809 out_mru_cache_uninit:
1850 xfs_mru_cache_uninit(); 1810 xfs_mru_cache_uninit();
1811 out_destroy_wq:
1812 xfs_destroy_workqueues();
1851 out_destroy_zones: 1813 out_destroy_zones:
1852 xfs_destroy_zones(); 1814 xfs_destroy_zones();
1853 out: 1815 out:
@@ -1864,6 +1826,7 @@ exit_xfs_fs(void)
1864 xfs_buf_terminate(); 1826 xfs_buf_terminate();
1865 xfs_filestream_uninit(); 1827 xfs_filestream_uninit();
1866 xfs_mru_cache_uninit(); 1828 xfs_mru_cache_uninit();
1829 xfs_destroy_workqueues();
1867 xfs_destroy_zones(); 1830 xfs_destroy_zones();
1868} 1831}
1869 1832
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index e22f0057d21f..e4f9c1b0836c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
@@ -39,6 +40,8 @@
39#include <linux/kthread.h> 40#include <linux/kthread.h>
40#include <linux/freezer.h> 41#include <linux/freezer.h>
41 42
43struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
44
42/* 45/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and 46 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between 47 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -401,7 +404,7 @@ xfs_quiesce_fs(
401/* 404/*
402 * Second stage of a quiesce. The data is already synced, now we have to take 405 * Second stage of a quiesce. The data is already synced, now we have to take
403 * care of the metadata. New transactions are already blocked, so we need to 406 * care of the metadata. New transactions are already blocked, so we need to
404 * wait for any remaining transactions to drain out before proceding. 407 * wait for any remaining transactions to drain out before proceeding.
405 */ 408 */
406void 409void
407xfs_quiesce_attr( 410xfs_quiesce_attr(
@@ -425,69 +428,18 @@ xfs_quiesce_attr(
425 /* Push the superblock and write an unmount record */ 428 /* Push the superblock and write an unmount record */
426 error = xfs_log_sbcount(mp, 1); 429 error = xfs_log_sbcount(mp, 1);
427 if (error) 430 if (error)
428 xfs_fs_cmn_err(CE_WARN, mp, 431 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
429 "xfs_attr_quiesce: failed to log sb changes. "
430 "Frozen image may not be consistent."); 432 "Frozen image may not be consistent.");
431 xfs_log_unmount_write(mp); 433 xfs_log_unmount_write(mp);
432 xfs_unmountfs_writesb(mp); 434 xfs_unmountfs_writesb(mp);
433} 435}
434 436
435/* 437static void
436 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 438xfs_syncd_queue_sync(
437 * Doing this has two advantages: 439 struct xfs_mount *mp)
438 * - It saves on stack space, which is tight in certain situations
439 * - It can be used (with care) as a mechanism to avoid deadlocks.
440 * Flushing while allocating in a full filesystem requires both.
441 */
442STATIC void
443xfs_syncd_queue_work(
444 struct xfs_mount *mp,
445 void *data,
446 void (*syncer)(struct xfs_mount *, void *),
447 struct completion *completion)
448{
449 struct xfs_sync_work *work;
450
451 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
452 INIT_LIST_HEAD(&work->w_list);
453 work->w_syncer = syncer;
454 work->w_data = data;
455 work->w_mount = mp;
456 work->w_completion = completion;
457 spin_lock(&mp->m_sync_lock);
458 list_add_tail(&work->w_list, &mp->m_sync_list);
459 spin_unlock(&mp->m_sync_lock);
460 wake_up_process(mp->m_sync_task);
461}
462
463/*
464 * Flush delayed allocate data, attempting to free up reserved space
465 * from existing allocations. At this point a new allocation attempt
466 * has failed with ENOSPC and we are in the process of scratching our
467 * heads, looking about for more room...
468 */
469STATIC void
470xfs_flush_inodes_work(
471 struct xfs_mount *mp,
472 void *arg)
473{
474 struct inode *inode = arg;
475 xfs_sync_data(mp, SYNC_TRYLOCK);
476 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
477 iput(inode);
478}
479
480void
481xfs_flush_inodes(
482 xfs_inode_t *ip)
483{ 440{
484 struct inode *inode = VFS_I(ip); 441 queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
485 DECLARE_COMPLETION_ONSTACK(completion); 442 msecs_to_jiffies(xfs_syncd_centisecs * 10));
486
487 igrab(inode);
488 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
489 wait_for_completion(&completion);
490 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
491} 443}
492 444
493/* 445/*
@@ -497,9 +449,10 @@ xfs_flush_inodes(
497 */ 449 */
498STATIC void 450STATIC void
499xfs_sync_worker( 451xfs_sync_worker(
500 struct xfs_mount *mp, 452 struct work_struct *work)
501 void *unused)
502{ 453{
454 struct xfs_mount *mp = container_of(to_delayed_work(work),
455 struct xfs_mount, m_sync_work);
503 int error; 456 int error;
504 457
505 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 458 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -509,73 +462,106 @@ xfs_sync_worker(
509 error = xfs_fs_log_dummy(mp); 462 error = xfs_fs_log_dummy(mp);
510 else 463 else
511 xfs_log_force(mp, 0); 464 xfs_log_force(mp, 0);
512 xfs_reclaim_inodes(mp, 0);
513 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 465 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
466
467 /* start pushing all the metadata that is currently dirty */
468 xfs_ail_push_all(mp->m_ail);
514 } 469 }
515 mp->m_sync_seq++; 470
516 wake_up(&mp->m_wait_single_sync_task); 471 /* queue us up again */
472 xfs_syncd_queue_sync(mp);
517} 473}
518 474
519STATIC int 475/*
520xfssyncd( 476 * Queue a new inode reclaim pass if there are reclaimable inodes and there
521 void *arg) 477 * isn't a reclaim pass already in progress. By default it runs every 5s based
478 * on the xfs syncd work default of 30s. Perhaps this should have it's own
479 * tunable, but that can be done if this method proves to be ineffective or too
480 * aggressive.
481 */
482static void
483xfs_syncd_queue_reclaim(
484 struct xfs_mount *mp)
522{ 485{
523 struct xfs_mount *mp = arg;
524 long timeleft;
525 xfs_sync_work_t *work, *n;
526 LIST_HEAD (tmp);
527
528 set_freezable();
529 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
530 for (;;) {
531 if (list_empty(&mp->m_sync_list))
532 timeleft = schedule_timeout_interruptible(timeleft);
533 /* swsusp */
534 try_to_freeze();
535 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
536 break;
537 486
538 spin_lock(&mp->m_sync_lock); 487 /*
539 /* 488 * We can have inodes enter reclaim after we've shut down the syncd
540 * We can get woken by laptop mode, to do a sync - 489 * workqueue during unmount, so don't allow reclaim work to be queued
541 * that's the (only!) case where the list would be 490 * during unmount.
542 * empty with time remaining. 491 */
543 */ 492 if (!(mp->m_super->s_flags & MS_ACTIVE))
544 if (!timeleft || list_empty(&mp->m_sync_list)) { 493 return;
545 if (!timeleft)
546 timeleft = xfs_syncd_centisecs *
547 msecs_to_jiffies(10);
548 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
549 list_add_tail(&mp->m_sync_work.w_list,
550 &mp->m_sync_list);
551 }
552 list_splice_init(&mp->m_sync_list, &tmp);
553 spin_unlock(&mp->m_sync_lock);
554 494
555 list_for_each_entry_safe(work, n, &tmp, w_list) { 495 rcu_read_lock();
556 (*work->w_syncer)(mp, work->w_data); 496 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
557 list_del(&work->w_list); 497 queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
558 if (work == &mp->m_sync_work) 498 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
559 continue;
560 if (work->w_completion)
561 complete(work->w_completion);
562 kmem_free(work);
563 }
564 } 499 }
500 rcu_read_unlock();
501}
565 502
566 return 0; 503/*
504 * This is a fast pass over the inode cache to try to get reclaim moving on as
505 * many inodes as possible in a short period of time. It kicks itself every few
506 * seconds, as well as being kicked by the inode cache shrinker when memory
507 * goes low. It scans as quickly as possible avoiding locked inodes or those
508 * already being flushed, and once done schedules a future pass.
509 */
510STATIC void
511xfs_reclaim_worker(
512 struct work_struct *work)
513{
514 struct xfs_mount *mp = container_of(to_delayed_work(work),
515 struct xfs_mount, m_reclaim_work);
516
517 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
518 xfs_syncd_queue_reclaim(mp);
519}
520
521/*
522 * Flush delayed allocate data, attempting to free up reserved space
523 * from existing allocations. At this point a new allocation attempt
524 * has failed with ENOSPC and we are in the process of scratching our
525 * heads, looking about for more room.
526 *
527 * Queue a new data flush if there isn't one already in progress and
528 * wait for completion of the flush. This means that we only ever have one
529 * inode flush in progress no matter how many ENOSPC events are occurring and
530 * so will prevent the system from bogging down due to every concurrent
531 * ENOSPC event scanning all the active inodes in the system for writeback.
532 */
533void
534xfs_flush_inodes(
535 struct xfs_inode *ip)
536{
537 struct xfs_mount *mp = ip->i_mount;
538
539 queue_work(xfs_syncd_wq, &mp->m_flush_work);
540 flush_work_sync(&mp->m_flush_work);
541}
542
543STATIC void
544xfs_flush_worker(
545 struct work_struct *work)
546{
547 struct xfs_mount *mp = container_of(work,
548 struct xfs_mount, m_flush_work);
549
550 xfs_sync_data(mp, SYNC_TRYLOCK);
551 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
567} 552}
568 553
569int 554int
570xfs_syncd_init( 555xfs_syncd_init(
571 struct xfs_mount *mp) 556 struct xfs_mount *mp)
572{ 557{
573 mp->m_sync_work.w_syncer = xfs_sync_worker; 558 INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
574 mp->m_sync_work.w_mount = mp; 559 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
575 mp->m_sync_work.w_completion = NULL; 560 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
576 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); 561
577 if (IS_ERR(mp->m_sync_task)) 562 xfs_syncd_queue_sync(mp);
578 return -PTR_ERR(mp->m_sync_task); 563 xfs_syncd_queue_reclaim(mp);
564
579 return 0; 565 return 0;
580} 566}
581 567
@@ -583,7 +569,9 @@ void
583xfs_syncd_stop( 569xfs_syncd_stop(
584 struct xfs_mount *mp) 570 struct xfs_mount *mp)
585{ 571{
586 kthread_stop(mp->m_sync_task); 572 cancel_delayed_work_sync(&mp->m_sync_work);
573 cancel_delayed_work_sync(&mp->m_reclaim_work);
574 cancel_work_sync(&mp->m_flush_work);
587} 575}
588 576
589void 577void
@@ -602,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
602 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 590 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
603 XFS_ICI_RECLAIM_TAG); 591 XFS_ICI_RECLAIM_TAG);
604 spin_unlock(&ip->i_mount->m_perag_lock); 592 spin_unlock(&ip->i_mount->m_perag_lock);
593
594 /* schedule periodic background inode reclaim */
595 xfs_syncd_queue_reclaim(ip->i_mount);
596
605 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 597 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
606 -1, _RET_IP_); 598 -1, _RET_IP_);
607 } 599 }
@@ -762,8 +754,10 @@ xfs_reclaim_inode(
762 struct xfs_perag *pag, 754 struct xfs_perag *pag,
763 int sync_mode) 755 int sync_mode)
764{ 756{
765 int error = 0; 757 int error;
766 758
759restart:
760 error = 0;
767 xfs_ilock(ip, XFS_ILOCK_EXCL); 761 xfs_ilock(ip, XFS_ILOCK_EXCL);
768 if (!xfs_iflock_nowait(ip)) { 762 if (!xfs_iflock_nowait(ip)) {
769 if (!(sync_mode & SYNC_WAIT)) 763 if (!(sync_mode & SYNC_WAIT))
@@ -789,9 +783,31 @@ xfs_reclaim_inode(
789 if (xfs_inode_clean(ip)) 783 if (xfs_inode_clean(ip))
790 goto reclaim; 784 goto reclaim;
791 785
792 /* Now we have an inode that needs flushing */ 786 /*
793 error = xfs_iflush(ip, sync_mode); 787 * Now we have an inode that needs flushing.
788 *
789 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
790 * reclaim as we can deadlock with inode cluster removal.
791 * xfs_ifree_cluster() can lock the inode buffer before it locks the
792 * ip->i_lock, and we are doing the exact opposite here. As a result,
793 * doing a blocking xfs_itobp() to get the cluster buffer will result
794 * in an ABBA deadlock with xfs_ifree_cluster().
795 *
796 * As xfs_ifree_cluser() must gather all inodes that are active in the
797 * cache to mark them stale, if we hit this case we don't actually want
798 * to do IO here - we want the inode marked stale so we can simply
799 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
800 * just unlock the inode, back off and try again. Hopefully the next
801 * pass through will see the stale flag set on the inode.
802 */
803 error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
794 if (sync_mode & SYNC_WAIT) { 804 if (sync_mode & SYNC_WAIT) {
805 if (error == EAGAIN) {
806 xfs_iunlock(ip, XFS_ILOCK_EXCL);
807 /* backoff longer than in xfs_ifree_cluster */
808 delay(2);
809 goto restart;
810 }
795 xfs_iflock(ip); 811 xfs_iflock(ip);
796 goto reclaim; 812 goto reclaim;
797 } 813 }
@@ -806,7 +822,7 @@ xfs_reclaim_inode(
806 * pass on the error. 822 * pass on the error.
807 */ 823 */
808 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { 824 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
809 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 825 xfs_warn(ip->i_mount,
810 "inode 0x%llx background reclaim flush failed with %d", 826 "inode 0x%llx background reclaim flush failed with %d",
811 (long long)ip->i_ino, error); 827 (long long)ip->i_ino, error);
812 } 828 }
@@ -994,7 +1010,13 @@ xfs_reclaim_inodes(
994} 1010}
995 1011
996/* 1012/*
997 * Shrinker infrastructure. 1013 * Inode cache shrinker.
1014 *
1015 * When called we make sure that there is a background (fast) inode reclaim in
1016 * progress, while we will throttle the speed of reclaim via doiing synchronous
1017 * reclaim of inodes. That means if we come across dirty inodes, we wait for
1018 * them to be cleaned, which we hope will not be very long due to the
1019 * background walker having already kicked the IO off on those dirty inodes.
998 */ 1020 */
999static int 1021static int
1000xfs_reclaim_inode_shrink( 1022xfs_reclaim_inode_shrink(
@@ -1009,10 +1031,15 @@ xfs_reclaim_inode_shrink(
1009 1031
1010 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1032 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1011 if (nr_to_scan) { 1033 if (nr_to_scan) {
1034 /* kick background reclaimer and push the AIL */
1035 xfs_syncd_queue_reclaim(mp);
1036 xfs_ail_push_all(mp->m_ail);
1037
1012 if (!(gfp_mask & __GFP_FS)) 1038 if (!(gfp_mask & __GFP_FS))
1013 return -1; 1039 return -1;
1014 1040
1015 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); 1041 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
1042 &nr_to_scan);
1016 /* terminate if we don't exhaust the scan */ 1043 /* terminate if we don't exhaust the scan */
1017 if (nr_to_scan > 0) 1044 if (nr_to_scan > 0)
1018 return -1; 1045 return -1;
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 32ba6628290c..e3a6ad27415f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work {
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
34 34
35extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
36
35int xfs_syncd_init(struct xfs_mount *mp); 37int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 38void xfs_syncd_stop(struct xfs_mount *mp);
37 39
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index ee3cee097e7e..ee2d2adaa438 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -37,7 +37,7 @@ xfs_stats_clear_proc_handler(
37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); 37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
38 38
39 if (!ret && write && *valp) { 39 if (!ret && write && *valp) {
40 printk("XFS Clearing xfsstats\n"); 40 xfs_notice(NULL, "Clearing xfsstats");
41 for_each_possible_cpu(c) { 41 for_each_possible_cpu(c) {
42 preempt_disable(); 42 preempt_disable();
43 /* save vn_active, it's a universal truth! */ 43 /* save vn_active, it's a universal truth! */